commit
8dfc05848c
82 changed files with 4368 additions and 0 deletions
-
25bak/wsgi.py_20231109
-
14config.ini
-
0db.sqlite3
-
BINlog_util/__pycache__/set_logger.cpython-310.pyc
-
BINlog_util/__pycache__/set_logger.cpython-36.pyc
-
33log_util/set_logger.py
-
20manage.py
-
1start.sh
-
1stop_uwsgi.sh
-
20test.py
-
0text_analysis/__init__.py
-
BINtext_analysis/__pycache__/__init__.cpython-310.pyc
-
BINtext_analysis/__pycache__/__init__.cpython-36.pyc
-
BINtext_analysis/__pycache__/__init__.cpython-38.pyc
-
BINtext_analysis/__pycache__/read_config.cpython-310.pyc
-
BINtext_analysis/__pycache__/settings.cpython-310.pyc
-
BINtext_analysis/__pycache__/settings.cpython-36.pyc
-
BINtext_analysis/__pycache__/settings.cpython-38.pyc
-
BINtext_analysis/__pycache__/src.cpython-36.pyc
-
BINtext_analysis/__pycache__/urls.cpython-310.pyc
-
BINtext_analysis/__pycache__/urls.cpython-36.pyc
-
BINtext_analysis/__pycache__/urls.cpython-38.pyc
-
BINtext_analysis/__pycache__/views.cpython-310.pyc
-
BINtext_analysis/__pycache__/views.cpython-36.pyc
-
BINtext_analysis/__pycache__/views.cpython-38.pyc
-
BINtext_analysis/__pycache__/wsgi.cpython-310.pyc
-
BINtext_analysis/__pycache__/wsgi.cpython-36.pyc
-
BINtext_analysis/__pycache__/wsgi.cpython-38.pyc
-
101text_analysis/bak/views-0702.py
-
86text_analysis/bak/views.py
-
140text_analysis/bak/views.py_0704
-
96text_analysis/bak/views.py_1109
-
100text_analysis/bak/views.py_1201bak
-
102text_analysis/bak/views.py_20240418
-
102text_analysis/bak/views.py_20240612
-
142text_analysis/bak/views.py_20240930
-
87text_analysis/bak/views.pyold
-
101text_analysis/bak/views_0107.py
-
101text_analysis/bak/views_0412.py
-
101text_analysis/bak/views_0415.py
-
142text_analysis/bak/views_20241021.py
-
10text_analysis/read_config.py
-
14text_analysis/request.py
-
148text_analysis/settings.py
-
18text_analysis/src.py
-
BINtext_analysis/tools/__pycache__/cusException.cpython-36.pyc
-
BINtext_analysis/tools/__pycache__/mysql_helper.cpython-36.pyc
-
BINtext_analysis/tools/__pycache__/process.cpython-36.pyc
-
BINtext_analysis/tools/__pycache__/to_kafka.cpython-310.pyc
-
BINtext_analysis/tools/__pycache__/to_kafka.cpython-36.pyc
-
BINtext_analysis/tools/__pycache__/to_kafka.cpython-38.pyc
-
BINtext_analysis/tools/__pycache__/tool.cpython-310.pyc
-
BINtext_analysis/tools/__pycache__/tool.cpython-36.pyc
-
BINtext_analysis/tools/__pycache__/tool.cpython-38.pyc
-
BINtext_analysis/tools/__pycache__/tools.cpython-36.pyc
-
74text_analysis/tools/bak/to_kafka.py
-
105text_analysis/tools/bak/tool.py
-
114text_analysis/tools/bak/tool.py0821
-
181text_analysis/tools/bak/tool.py1109
-
170text_analysis/tools/bak/tool.py_1107
-
170text_analysis/tools/bak/tool.py_1107_final
-
173text_analysis/tools/bak/tool.py_20240418
-
170text_analysis/tools/bak/tool_1107_final.py
-
25text_analysis/tools/cusException.py
-
65text_analysis/tools/kakfa_util.py
-
0text_analysis/tools/logs/results.log
-
338text_analysis/tools/mysql_helper.py
-
51text_analysis/tools/process.py
-
171text_analysis/tools/seleniumTest.py
-
25text_analysis/tools/to_kafka.py
-
74text_analysis/tools/to_kafka_pykafka.py
-
178text_analysis/tools/tool.py
-
44text_analysis/tools/zk_util.py
-
13text_analysis/urls.py
-
148text_analysis/views.py
-
148text_analysis/views.py_bak
-
142text_analysis/views_20241023.py
-
16text_analysis/wsgi.py
-
8uwsgi.ini
-
0wsgi.log
-
35wsgi.py
-
25wsgi.py_20231109
@ -0,0 +1,25 @@ |
|||
""" |
|||
WSGI config for Zhijian_Project_WebService project. |
|||
|
|||
It exposes the WSGI callable as a module-level variable named ``application``. |
|||
|
|||
For more information on this file, see |
|||
https://docs.djangoproject.com/en/1.8/howto/deployment/wsgi/ |
|||
""" |
|||
|
|||
import os |
|||
|
|||
import threading |
|||
from text_analysis.views import chatgpt |
|||
t = threading.Thread(target=chatgpt, name='chatgpt') |
|||
t.daemon = True |
|||
t.start() |
|||
|
|||
from django.core.wsgi import get_wsgi_application |
|||
|
|||
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "text_analysis.settings") |
|||
application = get_wsgi_application() |
|||
|
|||
|
|||
|
|||
|
@ -0,0 +1,14 @@ |
|||
[zookeeper] |
|||
;zk地址 |
|||
zkhost=172.18.1.146:2181,172.18.1.147:2181,172.18.1.148:2181 |
|||
;节点 |
|||
node=/analyze |
|||
|
|||
[kafka] |
|||
;服务器地址 |
|||
bootstrap_servers=172.18.1.146:9092,172.18.1.147:9092,172.18.1.148:9092 |
|||
;topic |
|||
topic=produce_analyze |
|||
|
|||
[gptmodel] |
|||
url=https://api.openai.com/v1/chat/completions |
@ -0,0 +1,33 @@ |
|||
#coding:utf8 |
|||
import logging |
|||
import os |
|||
import sys |
|||
from logging.handlers import TimedRotatingFileHandler |
|||
import re |
|||
# cur_dir = os.path.dirname( os.path.abspath(__file__)) or os.getcwd() |
|||
# sys.path.append(cur_dir + '/log_util') |
|||
def set_logger(filename): |
|||
# 创建logger对象。传入logger名字 |
|||
logger = logging.getLogger(filename) |
|||
# log_path = os.path.join(cur_dir, filename) |
|||
# 设置日志记录等级 |
|||
logger.setLevel(logging.INFO) |
|||
# interval 滚动周期, |
|||
# when="MIDNIGHT", interval=1 表示每天0点为更新点,每天生成一个文件 |
|||
# backupCount 表示日志保存个数 |
|||
file_handler = TimedRotatingFileHandler( |
|||
filename=filename, when="MIDNIGHT",encoding="utf-8", interval=1, backupCount=3 |
|||
) |
|||
# filename="mylog" suffix设置,会生成文件名为mylog.2020-02-25.log |
|||
file_handler.suffix = "%Y-%m-%d.log" |
|||
# extMatch是编译好正则表达式,用于匹配日志文件名后缀 |
|||
# 需要注意的是suffix和extMatch一定要匹配的上,如果不匹配,过期日志不会被删除。 |
|||
file_handler.extMatch = re.compile(r"^\d{4}-\d{2}-\d{2}.log$") |
|||
# 定义日志输出格式 |
|||
file_handler.setFormatter( |
|||
logging.Formatter( |
|||
"[%(asctime)s] [%(process)d] [%(levelname)s] - %(module)s.%(funcName)s (%(filename)s:%(lineno)d) - %(message)s" |
|||
) |
|||
) |
|||
logger.addHandler(file_handler) |
|||
return logger |
@ -0,0 +1,20 @@ |
|||
#!/usr/bin/env python |
|||
import os |
|||
import sys |
|||
import threading |
|||
from text_analysis.views import chatgpt |
|||
import queue |
|||
import django |
|||
# global task_queue |
|||
# task_queue = queue.Queue() |
|||
|
|||
if __name__ == "__main__": |
|||
t = threading.Thread(target=chatgpt, name='chatgpt') |
|||
t.daemon = True |
|||
t.start() |
|||
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "text_analysis.settings") |
|||
django.setup() |
|||
from django.core.management import execute_from_command_line |
|||
execute_from_command_line(sys.argv) |
|||
|
|||
|
@ -0,0 +1 @@ |
|||
/opt/crawl/anaconda2/envs/py36/bin/uwsgi --ini uwsgi.ini --file wsgi.py --daemonize wsgi.log |
@ -0,0 +1 @@ |
|||
lsof -i:9012 |grep -v 'PID' | awk '{print $2}'| xargs kill -9 |
20
test.py
File diff suppressed because it is too large
View File
File diff suppressed because it is too large
View File
@ -0,0 +1,101 @@ |
|||
# coding:utf8 |
|||
import os, sys |
|||
import io |
|||
|
|||
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8') |
|||
cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd() |
|||
par_dir = os.path.abspath(os.path.join(cur_dir, os.path.pardir)) |
|||
sys.path.append(cur_dir) |
|||
sys.path.append(par_dir) |
|||
import json |
|||
from django.http import HttpResponse |
|||
from text_analysis.tools import to_kafka |
|||
from django.views.decorators.csrf import csrf_exempt |
|||
from log_util.set_logger import set_logger |
|||
|
|||
logging = set_logger('logs/results.log') |
|||
import traceback |
|||
import queue |
|||
import requests |
|||
from text_analysis.tools.tool import get_content,parse_gptResult |
|||
import uuid |
|||
|
|||
import time |
|||
|
|||
global task_queue |
|||
task_queue = queue.Queue() |
|||
|
|||
|
|||
@csrf_exempt |
|||
def chatGptNew(request): |
|||
if request.method == 'POST': |
|||
try: |
|||
# txt=request.body.encode("utf-8") |
|||
raw_data = json.loads(request.body) |
|||
task_queue.put(raw_data) |
|||
return HttpResponse(json.dumps({"code": 1, "msg": "请求正常!"}, ensure_ascii=False)) |
|||
except: |
|||
logging.error(traceback.format_exc()) |
|||
return HttpResponse(json.dumps({"code": 0, "msg": "请求json格式不正确!"}, ensure_ascii=False)) |
|||
else: |
|||
return HttpResponse(json.dumps({"code": 0, "msg": "请求方式错误,改为post请求"}, ensure_ascii=False)) |
|||
|
|||
|
|||
def chatgpt(): |
|||
while True: |
|||
if task_queue.qsize() > 0: |
|||
# try: |
|||
logging.info("取任务队列长度{}".format(task_queue.qsize())) |
|||
raw_data = task_queue.get() |
|||
|
|||
try: |
|||
output = raw_data["output"] |
|||
res_tmp = {key: "" for key in output} |
|||
if "id" in res_tmp.keys(): |
|||
res_tmp["id"] = str(uuid.uuid4()) |
|||
data = get_content(raw_data, logging) |
|||
url = "https://api.openai.com/v1/chat/completions" |
|||
headers = { |
|||
"Content-Type": "application/json;charset=UTF-8", |
|||
"Authorization": "Bearer " + data["authorization"] |
|||
} |
|||
payload = json.dumps({ |
|||
"model": data["model"], |
|||
"messages": [{"role": "user", "content": data["prompt"]}], |
|||
"temperature": float(data["temperature"]), |
|||
"top_p": float(data["top_p"]), |
|||
"n": int(data["n"]) |
|||
}) |
|||
logging.info("prompt为{}".format(data["prompt"])) |
|||
response = requests.request("POST", url, headers=headers, data=payload,timeout=180) |
|||
logging.info("GPT返回值:{}-{}".format(response,response.text)) |
|||
d = json.loads(response.text) |
|||
result = d['choices'][0]['message']['content'] |
|||
#添加 0是文本,1是json格式 |
|||
fieldType = raw_data["input"]['fieldType'] |
|||
if fieldType == 0: |
|||
res_tmp["content"] = result |
|||
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False) |
|||
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json,"status":1,"message":"成功"} |
|||
else: |
|||
res=parse_gptResult(res_tmp,result) |
|||
if res: |
|||
res_tmp_json = json.dumps(res, ensure_ascii=False) |
|||
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json,"status":1,"message":"成功"} |
|||
else: |
|||
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False) |
|||
raw_data["result"] = {"successCode": "0", "errorLog": "GPT返回值不是json格式,无法解析!", "results": res_tmp_json,"status":2,"message":"GPT返回结果非json格式"} |
|||
# logging.info(raw_data) |
|||
to_kafka.send_kafka(raw_data, logging) |
|||
except: |
|||
raw_data["result"] = {"successCode": "0", "errorLog": "", "results": "","status":2,"message":"异常"} |
|||
raw_data["result"]["errorLog"] = traceback.format_exc() |
|||
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False) |
|||
raw_data["result"]["results"] = res_tmp_json |
|||
logging.info("调用gpt失败{}-{}".format(raw_data, traceback.format_exc())) |
|||
to_kafka.send_kafka(raw_data, logging) |
|||
|
|||
else: |
|||
logging.info("暂无任务,进入休眠--") |
|||
time.sleep(10) |
|||
|
@ -0,0 +1,86 @@ |
|||
#coding:utf8 |
|||
import os, sys |
|||
import io |
|||
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8') |
|||
cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd() |
|||
par_dir = os.path.abspath(os.path.join(cur_dir, os.path.pardir)) |
|||
sys.path.append(cur_dir) |
|||
sys.path.append(par_dir) |
|||
import json |
|||
from django.http import HttpResponse |
|||
from text_analysis.tools import to_kafka |
|||
from django.views.decorators.csrf import csrf_exempt |
|||
from log_util.set_logger import set_logger |
|||
logging=set_logger('logs/results.log') |
|||
import traceback |
|||
import queue |
|||
import requests |
|||
from text_analysis.tools.tool import get_content |
|||
|
|||
import time |
|||
|
|||
global task_queue |
|||
task_queue = queue.Queue() |
|||
|
|||
@csrf_exempt |
|||
def chatGpt(request): |
|||
if request.method == 'POST': |
|||
try: |
|||
# txt=request.body.encode("utf-8") |
|||
raw_data = json.loads(request.body) |
|||
task_queue.put(raw_data) |
|||
return HttpResponse(json.dumps({"code": 1, "msg": "请求正常!"}, ensure_ascii=False)) |
|||
except: |
|||
logging.error(traceback.format_exc()) |
|||
return HttpResponse(json.dumps({"code": 0, "msg": "请求json格式不正确!"}, ensure_ascii=False)) |
|||
else: |
|||
return HttpResponse(json.dumps({"code": 0, "msg": "请求方式错误,改为post请求"}, ensure_ascii=False)) |
|||
|
|||
def chatgpt(): |
|||
while True: |
|||
try: |
|||
if task_queue.qsize() >0: |
|||
try: |
|||
logging.info("取任务队列长度{}".format(task_queue.qsize())) |
|||
raw_data = task_queue.get() |
|||
# logging.info(raw_data) |
|||
data=get_content(raw_data,logging) |
|||
url = "https://api.openai.com/v1/chat/completions" |
|||
headers = { |
|||
"Content-Type": "application/json;charset=UTF-8", |
|||
"Authorization": "Bearer "+data["authorization"] |
|||
} |
|||
payload = json.dumps({ |
|||
"model": data["model"], |
|||
"messages": [{"role": "user","content": data["prompt"]}], |
|||
"temperature":float(data["temperature"]), |
|||
"top_p":float(data["top_p"]), |
|||
"n":int(data["n"]) |
|||
}) |
|||
# response=None |
|||
response = requests.request("POST", url, headers=headers, data=payload) |
|||
# print(response) |
|||
d = json.loads(response.text) |
|||
result = d['choices'][0]['message']['content'] |
|||
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": result} |
|||
# print(raw_data) |
|||
logging.info(raw_data) |
|||
to_kafka.send_kafka(raw_data,logging) |
|||
except: |
|||
raw_data["result"] = {"successCode": "0", "errorLog": "", "results": ""} |
|||
if response and response.text: |
|||
raw_data["result"]["errorLog"] = response.text |
|||
else: |
|||
raw_data["result"]["errorLog"] = traceback.format_exc() |
|||
logging.info(raw_data) |
|||
to_kafka.send_kafka(raw_data,logging) |
|||
else: |
|||
# logging.info("暂无任务,进入休眠--") |
|||
time.sleep(10) |
|||
except: |
|||
raw_data["result"] = {"successCode": "0", "errorLog": "", "results": ""} |
|||
raw_data["result"]["errorLog"] = traceback.format_exc() |
|||
logging.info(traceback.format_exc()) |
|||
to_kafka.send_kafka(raw_data, logging) |
|||
|
|||
|
@ -0,0 +1,140 @@ |
|||
# coding:utf8 |
|||
import os, sys |
|||
import io |
|||
|
|||
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8') |
|||
cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd() |
|||
par_dir = os.path.abspath(os.path.join(cur_dir, os.path.pardir)) |
|||
sys.path.append(cur_dir) |
|||
sys.path.append(par_dir) |
|||
import json |
|||
from django.http import HttpResponse |
|||
from text_analysis.tools import to_kafka |
|||
from django.views.decorators.csrf import csrf_exempt |
|||
from log_util.set_logger import set_logger |
|||
|
|||
logging = set_logger('logs/results.log') |
|||
import traceback |
|||
import queue |
|||
import requests |
|||
from text_analysis.tools.tool import get_content,parse_gptResult |
|||
import uuid |
|||
import time |
|||
from kazoo.client import KazooClient |
|||
from kazoo.protocol.states import EventType |
|||
|
|||
|
|||
# global task_queue |
|||
task_queue = queue.Queue() |
|||
# global stop_dict |
|||
stop_dict={} |
|||
|
|||
@csrf_exempt |
|||
def chatGptNew(request): |
|||
if request.method == 'POST': |
|||
try: |
|||
# txt=request.body.encode("utf-8") |
|||
raw_data = json.loads(request.body) |
|||
task_queue.put(raw_data) |
|||
return HttpResponse(json.dumps({"code": 1, "msg": "请求正常!"}, ensure_ascii=False)) |
|||
except: |
|||
logging.error(traceback.format_exc()) |
|||
return HttpResponse(json.dumps({"code": 0, "msg": "请求json格式不正确!"}, ensure_ascii=False)) |
|||
else: |
|||
return HttpResponse(json.dumps({"code": 0, "msg": "请求方式错误,改为post请求"}, ensure_ascii=False)) |
|||
|
|||
|
|||
def chatgpt(): |
|||
while True: |
|||
if task_queue.qsize() > 0: |
|||
try: |
|||
logging.info("取任务队列长度{}".format(task_queue.qsize())) |
|||
raw_data = task_queue.get() |
|||
task_id=raw_data["scenes_id"] |
|||
task_version=raw_data["version"] |
|||
# logging.info("任务数据为:{}".format(raw_data)) |
|||
logging.info("当前version信息为:{}".format(stop_dict)) |
|||
if task_id in stop_dict.keys() and task_version!=stop_dict[task_id]["version"]: |
|||
logging.info("已暂停任务,过滤掉。{}".format(raw_data)) |
|||
continue |
|||
output = raw_data["output"] |
|||
res_tmp = {key: "" for key in output} |
|||
if "id" in res_tmp.keys(): |
|||
res_tmp["id"] = str(uuid.uuid4()) |
|||
data = get_content(raw_data, logging) |
|||
url = "https://api.openai.com/v1/chat/completions" |
|||
headers = { |
|||
"Content-Type": "application/json;charset=UTF-8", |
|||
"Authorization": "Bearer " + data["authorization"] |
|||
} |
|||
payload = json.dumps({ |
|||
"model": data["model"], |
|||
"messages": [{"role": "user", "content": data["prompt"]}], |
|||
"temperature": float(data["temperature"]), |
|||
"top_p": float(data["top_p"]), |
|||
"n": int(data["n"]) |
|||
}) |
|||
logging.info("prompt为{}".format(data["prompt"])) |
|||
response = requests.request("POST", url, headers=headers, data=payload,timeout=180) |
|||
logging.info("GPT返回值:{}-{}".format(response,response.text)) |
|||
d = json.loads(response.text) |
|||
result = d['choices'][0]['message']['content'] |
|||
#添加 0是文本,1是json格式 |
|||
fieldType = raw_data["input"]['fieldType'] |
|||
if fieldType == 0: |
|||
res_tmp["content"] = result |
|||
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False) |
|||
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json,"status":1,"message":"成功"} |
|||
else: |
|||
res=parse_gptResult(res_tmp,result) |
|||
if res: |
|||
res_tmp_json = json.dumps(res, ensure_ascii=False) |
|||
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json,"status":1,"message":"成功"} |
|||
else: |
|||
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False) |
|||
raw_data["result"] = {"successCode": "0", "errorLog": "GPT返回值不是json格式,无法解析!", "results": res_tmp_json,"status":2,"message":"GPT返回结果非json格式"} |
|||
# logging.info(raw_data) |
|||
to_kafka.send_kafka(raw_data, logging) |
|||
except: |
|||
raw_data["result"] = {"successCode": "0", "errorLog": "", "results": "","status":2,"message":"异常"} |
|||
raw_data["result"]["errorLog"] = traceback.format_exc() |
|||
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False) |
|||
raw_data["result"]["results"] = res_tmp_json |
|||
logging.info("调用gpt失败{}-{}".format(raw_data, traceback.format_exc())) |
|||
to_kafka.send_kafka(raw_data, logging) |
|||
|
|||
else: |
|||
logging.info("暂无任务,进入休眠--") |
|||
time.sleep(10) |
|||
|
|||
def zk_monitoring(): |
|||
try: |
|||
#线上环境 |
|||
zk = KazooClient(hosts='172.18.1.146:2181,172.18.1.147:2181,172.18.1.148:2181') |
|||
#测试环境 |
|||
# zk = KazooClient(hosts='172.16.12.55:2181,172.16.12.56:2181,172.16.12.57:2181') |
|||
zk.start() |
|||
# 设置监听器 |
|||
@zk.DataWatch("/analyze") |
|||
def watch_node(data, stat, event): |
|||
if event is not None and event.type == EventType.CHANGED: |
|||
data, stat = zk.get("/analyze") |
|||
logging.info("执行删除操作:{}".format(data)) |
|||
d = json.loads(data) |
|||
id = d["scenes_id"] |
|||
stop_dict[id] = {} |
|||
stop_dict[id]["version"] = d["version"] |
|||
stop_dict[id]["operation"] = d["operation"] |
|||
# 保持程序运行以监听节点变化 |
|||
try: |
|||
while True: |
|||
time.sleep(1) |
|||
except: |
|||
logging.info("Stopping...") |
|||
# 关闭连接 |
|||
zk.stop() |
|||
zk.close() |
|||
except: |
|||
logging.error(traceback.format_exc()) |
|||
|
|||
|
@ -0,0 +1,96 @@ |
|||
# coding:utf8 |
|||
import os, sys |
|||
import io |
|||
|
|||
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8') |
|||
cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd() |
|||
par_dir = os.path.abspath(os.path.join(cur_dir, os.path.pardir)) |
|||
sys.path.append(cur_dir) |
|||
sys.path.append(par_dir) |
|||
import json |
|||
from django.http import HttpResponse |
|||
from text_analysis.tools import to_kafka |
|||
from django.views.decorators.csrf import csrf_exempt |
|||
from log_util.set_logger import set_logger |
|||
|
|||
logging = set_logger('logs/results.log') |
|||
import traceback |
|||
import queue |
|||
import requests |
|||
from text_analysis.tools.tool import get_content |
|||
import uuid |
|||
|
|||
import time |
|||
|
|||
global task_queue |
|||
task_queue = queue.Queue() |
|||
|
|||
|
|||
@csrf_exempt |
|||
def chatGptNew(request): |
|||
if request.method == 'POST': |
|||
try: |
|||
# txt=request.body.encode("utf-8") |
|||
raw_data = json.loads(request.body) |
|||
task_queue.put(raw_data) |
|||
return HttpResponse(json.dumps({"code": 1, "msg": "请求正常!"}, ensure_ascii=False)) |
|||
except: |
|||
logging.error(traceback.format_exc()) |
|||
return HttpResponse(json.dumps({"code": 0, "msg": "请求json格式不正确!"}, ensure_ascii=False)) |
|||
else: |
|||
return HttpResponse(json.dumps({"code": 0, "msg": "请求方式错误,改为post请求"}, ensure_ascii=False)) |
|||
|
|||
|
|||
def chatgpt(): |
|||
while True: |
|||
try: |
|||
if task_queue.qsize() > 0: |
|||
try: |
|||
logging.info("取任务队列长度{}".format(task_queue.qsize())) |
|||
raw_data = task_queue.get() |
|||
data = get_content(raw_data, logging) |
|||
output = raw_data["output"] |
|||
res_tmp = {key: "" for key in output} |
|||
if "id" in res_tmp.keys(): |
|||
res_tmp["id"] = str(uuid.uuid4()) |
|||
url = "https://api.openai.com/v1/chat/completions" |
|||
headers = { |
|||
"Content-Type": "application/json;charset=UTF-8", |
|||
"Authorization": "Bearer " + data["authorization"] |
|||
} |
|||
payload = json.dumps({ |
|||
"model": data["model"], |
|||
"messages": [{"role": "user", "content": data["prompt"]}], |
|||
"temperature": float(data["temperature"]), |
|||
"top_p": float(data["top_p"]), |
|||
"n": int(data["n"]) |
|||
}) |
|||
logging.info("prompt为{}".format(data["prompt"])) |
|||
response = requests.request("POST", url, headers=headers, data=payload) |
|||
logging.info("GPT返回值:{}".format(response)) |
|||
d = json.loads(response.text) |
|||
result = d['choices'][0]['message']['content'] |
|||
res_tmp["content"] = result |
|||
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False) |
|||
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json} |
|||
# print(raw_data) |
|||
logging.info(raw_data) |
|||
to_kafka.send_kafka(raw_data, logging) |
|||
except: |
|||
raw_data["result"] = {"successCode": "0", "errorLog": "", "results": ""} |
|||
if response and response.text: |
|||
raw_data["result"]["errorLog"] = response.text |
|||
else: |
|||
raw_data["result"]["errorLog"] = traceback.format_exc() |
|||
logging.info("解析失败{}-{}".format(raw_data, traceback.format_exc())) |
|||
to_kafka.send_kafka(raw_data, logging) |
|||
else: |
|||
# logging.info("暂无任务,进入休眠--") |
|||
time.sleep(10) |
|||
except: |
|||
raw_data["result"] = {"successCode": "0", "errorLog": "", "results": ""} |
|||
raw_data["result"]["errorLog"] = traceback.format_exc() |
|||
logging.info(traceback.format_exc()) |
|||
to_kafka.send_kafka(raw_data, logging) |
|||
|
|||
|
@ -0,0 +1,100 @@ |
|||
# coding:utf8 |
|||
import os, sys |
|||
import io |
|||
|
|||
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8') |
|||
cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd() |
|||
par_dir = os.path.abspath(os.path.join(cur_dir, os.path.pardir)) |
|||
sys.path.append(cur_dir) |
|||
sys.path.append(par_dir) |
|||
import json |
|||
from django.http import HttpResponse |
|||
from text_analysis.tools import to_kafka |
|||
from django.views.decorators.csrf import csrf_exempt |
|||
from log_util.set_logger import set_logger |
|||
|
|||
logging = set_logger('logs/results.log') |
|||
import traceback |
|||
import queue |
|||
import requests |
|||
from text_analysis.tools.tool import get_content |
|||
import uuid |
|||
|
|||
import time |
|||
|
|||
global task_queue |
|||
task_queue = queue.Queue() |
|||
|
|||
|
|||
@csrf_exempt |
|||
def chatGptNew(request): |
|||
if request.method == 'POST': |
|||
try: |
|||
# txt=request.body.encode("utf-8") |
|||
raw_data = json.loads(request.body) |
|||
task_queue.put(raw_data) |
|||
return HttpResponse(json.dumps({"code": 1, "msg": "请求正常!"}, ensure_ascii=False)) |
|||
except: |
|||
logging.error(traceback.format_exc()) |
|||
return HttpResponse(json.dumps({"code": 0, "msg": "请求json格式不正确!"}, ensure_ascii=False)) |
|||
else: |
|||
return HttpResponse(json.dumps({"code": 0, "msg": "请求方式错误,改为post请求"}, ensure_ascii=False)) |
|||
|
|||
|
|||
def chatgpt(): |
|||
while True: |
|||
try: |
|||
if task_queue.qsize() > 0: |
|||
try: |
|||
logging.info("取任务队列长度{}".format(task_queue.qsize())) |
|||
raw_data = task_queue.get() |
|||
output = raw_data["output"] |
|||
res_tmp = {key: "" for key in output} |
|||
if "id" in res_tmp.keys(): |
|||
res_tmp["id"] = str(uuid.uuid4()) |
|||
data = get_content(raw_data, logging) |
|||
url = "https://api.openai.com/v1/chat/completions" |
|||
headers = { |
|||
"Content-Type": "application/json;charset=UTF-8", |
|||
"Authorization": "Bearer " + data["authorization"] |
|||
} |
|||
payload = json.dumps({ |
|||
"model": data["model"], |
|||
"messages": [{"role": "user", "content": data["prompt"]}], |
|||
"temperature": float(data["temperature"]), |
|||
"top_p": float(data["top_p"]), |
|||
"n": int(data["n"]) |
|||
}) |
|||
logging.info("prompt为{}".format(data["prompt"])) |
|||
response = requests.request("POST", url, headers=headers, data=payload) |
|||
logging.info("GPT返回值:{}".format(response)) |
|||
d = json.loads(response.text) |
|||
result = d['choices'][0]['message']['content'] |
|||
res_tmp["content"] = result |
|||
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False) |
|||
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json} |
|||
# print(raw_data) |
|||
logging.info(raw_data) |
|||
to_kafka.send_kafka(raw_data, logging) |
|||
except: |
|||
raw_data["result"] = {"successCode": "0", "errorLog": "", "results": ""} |
|||
if response and response.text: |
|||
raw_data["result"]["errorLog"] = response.text |
|||
else: |
|||
raw_data["result"]["errorLog"] = traceback.format_exc() |
|||
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False) |
|||
raw_data["result"]["results"]=res_tmp_json |
|||
logging.info("解析失败{}-{}".format(raw_data, traceback.format_exc())) |
|||
to_kafka.send_kafka(raw_data, logging) |
|||
else: |
|||
# logging.info("暂无任务,进入休眠--") |
|||
time.sleep(10) |
|||
except: |
|||
raw_data["result"] = {"successCode": "0", "errorLog": "", "results": ""} |
|||
raw_data["result"]["errorLog"] = traceback.format_exc() |
|||
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False) |
|||
raw_data["result"]["results"] = res_tmp_json |
|||
logging.info(traceback.format_exc()) |
|||
to_kafka.send_kafka(raw_data, logging) |
|||
|
|||
|
@ -0,0 +1,102 @@ |
|||
# coding:utf8 |
|||
import os, sys |
|||
import io |
|||
|
|||
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8') |
|||
cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd() |
|||
par_dir = os.path.abspath(os.path.join(cur_dir, os.path.pardir)) |
|||
sys.path.append(cur_dir) |
|||
sys.path.append(par_dir) |
|||
import json |
|||
from django.http import HttpResponse |
|||
from text_analysis.tools import to_kafka |
|||
from django.views.decorators.csrf import csrf_exempt |
|||
from log_util.set_logger import set_logger |
|||
|
|||
logging = set_logger('logs/results.log') |
|||
import traceback |
|||
import queue |
|||
import requests |
|||
from text_analysis.tools.tool import get_content,parse_gptResult |
|||
import uuid |
|||
|
|||
import time |
|||
|
|||
global task_queue |
|||
task_queue = queue.Queue() |
|||
|
|||
|
|||
@csrf_exempt |
|||
def chatGptNew(request): |
|||
if request.method == 'POST': |
|||
try: |
|||
# txt=request.body.encode("utf-8") |
|||
raw_data = json.loads(request.body) |
|||
task_queue.put(raw_data) |
|||
return HttpResponse(json.dumps({"code": 1, "msg": "请求正常!"}, ensure_ascii=False)) |
|||
except: |
|||
logging.error(traceback.format_exc()) |
|||
return HttpResponse(json.dumps({"code": 0, "msg": "请求json格式不正确!"}, ensure_ascii=False)) |
|||
else: |
|||
return HttpResponse(json.dumps({"code": 0, "msg": "请求方式错误,改为post请求"}, ensure_ascii=False)) |
|||
|
|||
|
|||
def chatgpt(): |
|||
while True: |
|||
if task_queue.qsize() > 0: |
|||
# try: |
|||
logging.info("取任务队列长度{}".format(task_queue.qsize())) |
|||
raw_data = task_queue.get() |
|||
try: |
|||
output = raw_data["output"] |
|||
res_tmp = {key: "" for key in output} |
|||
if "id" in res_tmp.keys(): |
|||
res_tmp["id"] = str(uuid.uuid4()) |
|||
data = get_content(raw_data, logging) |
|||
url = "https://api.openai.com/v1/chat/completions" |
|||
headers = { |
|||
"Content-Type": "application/json;charset=UTF-8", |
|||
"Authorization": "Bearer " + data["authorization"] |
|||
} |
|||
payload = json.dumps({ |
|||
"model": data["model"], |
|||
"messages": [{"role": "user", "content": data["prompt"]}], |
|||
"temperature": float(data["temperature"]), |
|||
"top_p": float(data["top_p"]), |
|||
"n": int(data["n"]) |
|||
}) |
|||
logging.info("prompt为{}".format(data["prompt"])) |
|||
response = requests.request("POST", url, headers=headers, data=payload) |
|||
logging.info("GPT返回值:{}-{}".format(response,response.text)) |
|||
d = json.loads(response.text) |
|||
result = d['choices'][0]['message']['content'] |
|||
#添加 0是文本,1是json格式 |
|||
fieldType = raw_data["input"]['fieldType'] |
|||
if fieldType == 0: |
|||
res_tmp["content"] = result |
|||
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False) |
|||
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json} |
|||
else: |
|||
res=parse_gptResult(res_tmp,result) |
|||
if res: |
|||
res_tmp_json = json.dumps(res, ensure_ascii=False) |
|||
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json} |
|||
else: |
|||
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False) |
|||
raw_data["result"] = {"successCode": "0", "errorLog": "GPT返回值不是json格式,无法解析!", "results": res_tmp_json} |
|||
# logging.info(raw_data) |
|||
to_kafka.send_kafka(raw_data, logging) |
|||
except: |
|||
raw_data["result"] = {"successCode": "0", "errorLog": "", "results": ""} |
|||
raw_data["result"]["errorLog"] = traceback.format_exc() |
|||
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False) |
|||
raw_data["result"]["results"] = res_tmp_json |
|||
logging.info("调用gpt失败{}-{}".format(raw_data, traceback.format_exc())) |
|||
to_kafka.send_kafka(raw_data, logging) |
|||
|
|||
else: |
|||
logging.info("暂无任务,进入休眠--") |
|||
time.sleep(10) |
|||
|
|||
|
|||
|
@ -0,0 +1,102 @@ |
|||
# coding:utf8 |
|||
import os, sys |
|||
import io |
|||
|
|||
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8') |
|||
cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd() |
|||
par_dir = os.path.abspath(os.path.join(cur_dir, os.path.pardir)) |
|||
sys.path.append(cur_dir) |
|||
sys.path.append(par_dir) |
|||
import json |
|||
from django.http import HttpResponse |
|||
from text_analysis.tools import to_kafka |
|||
from django.views.decorators.csrf import csrf_exempt |
|||
from log_util.set_logger import set_logger |
|||
|
|||
logging = set_logger('logs/results.log') |
|||
import traceback |
|||
import queue |
|||
import requests |
|||
from text_analysis.tools.tool import get_content,parse_gptResult |
|||
import uuid |
|||
|
|||
import time |
|||
|
|||
global task_queue |
|||
task_queue = queue.Queue() |
|||
|
|||
|
|||
@csrf_exempt |
|||
def chatGptNew(request): |
|||
if request.method == 'POST': |
|||
try: |
|||
# txt=request.body.encode("utf-8") |
|||
raw_data = json.loads(request.body) |
|||
task_queue.put(raw_data) |
|||
return HttpResponse(json.dumps({"code": 1, "msg": "请求正常!"}, ensure_ascii=False)) |
|||
except: |
|||
logging.error(traceback.format_exc()) |
|||
return HttpResponse(json.dumps({"code": 0, "msg": "请求json格式不正确!"}, ensure_ascii=False)) |
|||
else: |
|||
return HttpResponse(json.dumps({"code": 0, "msg": "请求方式错误,改为post请求"}, ensure_ascii=False)) |
|||
|
|||
|
|||
def chatgpt(): |
|||
while True: |
|||
if task_queue.qsize() > 0: |
|||
# try: |
|||
logging.info("取任务队列长度{}".format(task_queue.qsize())) |
|||
raw_data = task_queue.get() |
|||
try: |
|||
output = raw_data["output"] |
|||
res_tmp = {key: "" for key in output} |
|||
if "id" in res_tmp.keys(): |
|||
res_tmp["id"] = str(uuid.uuid4()) |
|||
data = get_content(raw_data, logging) |
|||
url = "https://api.openai.com/v1/chat/completions" |
|||
headers = { |
|||
"Content-Type": "application/json;charset=UTF-8", |
|||
"Authorization": "Bearer " + data["authorization"] |
|||
} |
|||
payload = json.dumps({ |
|||
"model": data["model"], |
|||
"messages": [{"role": "user", "content": data["prompt"]}], |
|||
"temperature": float(data["temperature"]), |
|||
"top_p": float(data["top_p"]), |
|||
"n": int(data["n"]) |
|||
}) |
|||
logging.info("prompt为{}".format(data["prompt"])) |
|||
response = requests.request("POST", url, headers=headers, data=payload,timeout=180) |
|||
logging.info("GPT返回值:{}-{}".format(response,response.text)) |
|||
d = json.loads(response.text) |
|||
result = d['choices'][0]['message']['content'] |
|||
#添加 0是文本,1是json格式 |
|||
fieldType = raw_data["input"]['fieldType'] |
|||
if fieldType == 0: |
|||
res_tmp["content"] = result |
|||
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False) |
|||
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json,"status":1,"message":"成功"} |
|||
else: |
|||
res=parse_gptResult(res_tmp,result) |
|||
if res: |
|||
res_tmp_json = json.dumps(res, ensure_ascii=False) |
|||
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json,"status":1,"message":"成功"} |
|||
else: |
|||
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False) |
|||
raw_data["result"] = {"successCode": "0", "errorLog": "GPT返回值不是json格式,无法解析!", "results": res_tmp_json,"status":2,"message":"GPT返回结果非json格式"} |
|||
# logging.info(raw_data) |
|||
to_kafka.send_kafka(raw_data, logging) |
|||
except: |
|||
raw_data["result"] = {"successCode": "0", "errorLog": "", "results": "","status":2,"message":"异常"} |
|||
raw_data["result"]["errorLog"] = traceback.format_exc() |
|||
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False) |
|||
raw_data["result"]["results"] = res_tmp_json |
|||
logging.info("调用gpt失败{}-{}".format(raw_data, traceback.format_exc())) |
|||
to_kafka.send_kafka(raw_data, logging) |
|||
|
|||
else: |
|||
logging.info("暂无任务,进入休眠--") |
|||
time.sleep(10) |
|||
|
|||
|
|||
|
@ -0,0 +1,142 @@ |
|||
# coding:utf8 |
|||
import os, sys |
|||
import io |
|||
|
|||
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8') |
|||
cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd() |
|||
par_dir = os.path.abspath(os.path.join(cur_dir, os.path.pardir)) |
|||
sys.path.append(cur_dir) |
|||
sys.path.append(par_dir) |
|||
import json |
|||
from django.http import HttpResponse |
|||
from text_analysis.tools import to_kafka |
|||
from django.views.decorators.csrf import csrf_exempt |
|||
from log_util.set_logger import set_logger |
|||
|
|||
logging = set_logger('logs/results.log') |
|||
import traceback |
|||
import queue |
|||
import requests |
|||
from text_analysis.tools.tool import get_content,parse_gptResult |
|||
import uuid |
|||
import time |
|||
from kazoo.client import KazooClient |
|||
from kazoo.protocol.states import EventType |
|||
|
|||
|
|||
# global task_queue |
|||
task_queue = queue.Queue() |
|||
# global stop_dict |
|||
stop_dict={} |
|||
|
|||
@csrf_exempt |
|||
def chatGptNew(request): |
|||
if request.method == 'POST': |
|||
try: |
|||
# txt=request.body.encode("utf-8") |
|||
raw_data = json.loads(request.body) |
|||
task_queue.put(raw_data) |
|||
return HttpResponse(json.dumps({"code": 1, "msg": "请求正常!"}, ensure_ascii=False)) |
|||
except: |
|||
logging.error(traceback.format_exc()) |
|||
return HttpResponse(json.dumps({"code": 0, "msg": "请求json格式不正确!"}, ensure_ascii=False)) |
|||
else: |
|||
return HttpResponse(json.dumps({"code": 0, "msg": "请求方式错误,改为post请求"}, ensure_ascii=False)) |
|||
|
|||
|
|||
def chatgpt(): |
|||
while True: |
|||
if task_queue.qsize() > 0: |
|||
try: |
|||
logging.info("取任务队列长度{}".format(task_queue.qsize())) |
|||
raw_data = task_queue.get() |
|||
task_id=raw_data["scenes_id"] |
|||
task_version=raw_data["version"] |
|||
# logging.info("任务数据为:{}".format(raw_data)) |
|||
logging.info("当前version信息为:{}".format(stop_dict)) |
|||
if task_id in stop_dict.keys() and task_version!=stop_dict[task_id]["version"]: |
|||
logging.info("已暂停任务,过滤掉。{}".format(raw_data)) |
|||
continue |
|||
output = raw_data["output"] |
|||
res_tmp = {key: "" for key in output} |
|||
if "id" in res_tmp.keys(): |
|||
res_tmp["id"] = str(uuid.uuid4()) |
|||
res_tmp["isLast"]=1 |
|||
data = get_content(raw_data, logging) |
|||
url = "https://api.openai.com/v1/chat/completions" |
|||
headers = { |
|||
"Content-Type": "application/json;charset=UTF-8", |
|||
"Authorization": "Bearer " + data["authorization"] |
|||
} |
|||
payload = json.dumps({ |
|||
"model": data["model"], |
|||
"messages": [{"role": "user", "content": data["prompt"]}], |
|||
"temperature": float(data["temperature"]), |
|||
"top_p": float(data["top_p"]), |
|||
"n": int(data["n"]) |
|||
}) |
|||
logging.info("prompt为{}".format(data["prompt"])) |
|||
response = requests.request("POST", url, headers=headers, data=payload,timeout=180) |
|||
logging.info("GPT返回值:{}-{}".format(response,response.text)) |
|||
d = json.loads(response.text) |
|||
result = d['choices'][0]['message']['content'] |
|||
#添加 0是文本,1是json格式 |
|||
fieldType = raw_data["input"]['fieldType'] |
|||
if fieldType == 0: |
|||
res_tmp["content"] = result |
|||
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False) |
|||
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json,"status":1,"message":"成功"} |
|||
else: |
|||
res=parse_gptResult(res_tmp,result) |
|||
if res: |
|||
res["isLast"]=1 |
|||
res_tmp_json = json.dumps(res, ensure_ascii=False) |
|||
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json,"status":1,"message":"成功"} |
|||
else: |
|||
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False) |
|||
raw_data["result"] = {"successCode": "0", "errorLog": "GPT返回值不是json格式,无法解析!{}".format(result), "results": res_tmp_json,"status":2,"message":"GPT返回结果非json格式"} |
|||
logging.info(raw_data) |
|||
to_kafka.send_kafka(raw_data, logging) |
|||
except: |
|||
raw_data["result"] = {"successCode": "0", "errorLog": "", "results": "","status":2,"message":"异常"} |
|||
raw_data["result"]["errorLog"] = traceback.format_exc() |
|||
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False) |
|||
raw_data["result"]["results"] = res_tmp_json |
|||
logging.info("调用gpt失败{}-{}".format(raw_data, traceback.format_exc())) |
|||
to_kafka.send_kafka(raw_data, logging) |
|||
|
|||
else: |
|||
logging.info("暂无任务,进入休眠--") |
|||
time.sleep(10) |
|||
|
|||
def zk_monitoring(): |
|||
try: |
|||
#线上环境 |
|||
zk = KazooClient(hosts='172.18.1.146:2181,172.18.1.147:2181,172.18.1.148:2181') |
|||
#测试环境 |
|||
# zk = KazooClient(hosts='172.16.12.55:2181,172.16.12.56:2181,172.16.12.57:2181') |
|||
zk.start() |
|||
# 设置监听器 |
|||
@zk.DataWatch("/analyze") |
|||
def watch_node(data, stat, event): |
|||
if event is not None and event.type == EventType.CHANGED: |
|||
data, stat = zk.get("/analyze") |
|||
logging.info("执行删除操作:{}".format(data)) |
|||
d = json.loads(data) |
|||
id = d["scenes_id"] |
|||
stop_dict[id] = {} |
|||
stop_dict[id]["version"] = d["version"] |
|||
stop_dict[id]["operation"] = d["operation"] |
|||
# 保持程序运行以监听节点变化 |
|||
try: |
|||
while True: |
|||
time.sleep(1) |
|||
except: |
|||
logging.info("Stopping...") |
|||
# 关闭连接 |
|||
zk.stop() |
|||
zk.close() |
|||
except: |
|||
logging.error(traceback.format_exc()) |
|||
|
|||
|
@ -0,0 +1,87 @@ |
|||
#coding:utf8 |
|||
import os, sys |
|||
import io |
|||
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8') |
|||
cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd() |
|||
par_dir = os.path.abspath(os.path.join(cur_dir, os.path.pardir)) |
|||
sys.path.append(cur_dir) |
|||
sys.path.append(par_dir) |
|||
import json |
|||
from django.http import HttpResponse |
|||
from text_analysis.tools import to_kafka |
|||
from django.views.decorators.csrf import csrf_exempt |
|||
from log_util.set_logger import set_logger |
|||
logging=set_logger('logs/results.log') |
|||
import traceback |
|||
import queue |
|||
import requests |
|||
from text_analysis.tools.tool import get_content |
|||
|
|||
import time |
|||
|
|||
global task_queue |
|||
task_queue = queue.Queue() |
|||
|
|||
@csrf_exempt |
|||
def chatGptNew(request): |
|||
if request.method == 'POST': |
|||
try: |
|||
# txt=request.body.encode("utf-8") |
|||
raw_data = json.loads(request.body) |
|||
task_queue.put(raw_data) |
|||
return HttpResponse(json.dumps({"code": 1, "msg": "请求正常!"}, ensure_ascii=False)) |
|||
except: |
|||
logging.error(traceback.format_exc()) |
|||
return HttpResponse(json.dumps({"code": 0, "msg": "请求json格式不正确!"}, ensure_ascii=False)) |
|||
else: |
|||
return HttpResponse(json.dumps({"code": 0, "msg": "请求方式错误,改为post请求"}, ensure_ascii=False)) |
|||
|
|||
def chatgpt(): |
|||
while True: |
|||
try: |
|||
if task_queue.qsize() >0: |
|||
try: |
|||
logging.info("取任务队列长度{}".format(task_queue.qsize())) |
|||
raw_data = task_queue.get() |
|||
# logging.info(raw_data) |
|||
data=get_content(raw_data,logging) |
|||
logging.info("问题:{}".format(data)) |
|||
url = "https://api.openai.com/v1/chat/completions" |
|||
headers = { |
|||
"Content-Type": "application/json;charset=UTF-8", |
|||
"Authorization": "Bearer "+data["authorization"] |
|||
} |
|||
payload = json.dumps({ |
|||
"model": data["model"], |
|||
"messages": [{"role": "user","content": data["prompt"]}], |
|||
"temperature":float(data["temperature"]), |
|||
"top_p":float(data["top_p"]), |
|||
"n":int(data["n"]) |
|||
}) |
|||
# print(payload) |
|||
response = requests.request("POST", url, headers=headers, data=payload) |
|||
# print(response) |
|||
d = json.loads(response.text) |
|||
result = d['choices'][0]['message']['content'] |
|||
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": result} |
|||
# print(raw_data) |
|||
logging.info(raw_data) |
|||
to_kafka.send_kafka(raw_data,logging) |
|||
except: |
|||
raw_data["result"] = {"successCode": "0", "errorLog": "", "results": ""} |
|||
if response and response.text: |
|||
raw_data["result"]["errorLog"] = response.text |
|||
else: |
|||
raw_data["result"]["errorLog"] = traceback.format_exc() |
|||
logging.info("解析失败{}-{}".format(raw_data,traceback.format_exc())) |
|||
to_kafka.send_kafka(raw_data,logging) |
|||
else: |
|||
# logging.info("暂无任务,进入休眠--") |
|||
time.sleep(10) |
|||
except: |
|||
raw_data["result"] = {"successCode": "0", "errorLog": "", "results": ""} |
|||
raw_data["result"]["errorLog"] = traceback.format_exc() |
|||
logging.info(traceback.format_exc()) |
|||
to_kafka.send_kafka(raw_data, logging) |
|||
|
|||
|
@ -0,0 +1,101 @@ |
|||
# coding:utf8 |
|||
import os, sys |
|||
import io |
|||
|
|||
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8') |
|||
cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd() |
|||
par_dir = os.path.abspath(os.path.join(cur_dir, os.path.pardir)) |
|||
sys.path.append(cur_dir) |
|||
sys.path.append(par_dir) |
|||
import json |
|||
from django.http import HttpResponse |
|||
from text_analysis.tools import to_kafka |
|||
from django.views.decorators.csrf import csrf_exempt |
|||
from log_util.set_logger import set_logger |
|||
|
|||
logging = set_logger('logs/results.log') |
|||
import traceback |
|||
import queue |
|||
import requests |
|||
from text_analysis.tools.tool import get_content |
|||
import uuid |
|||
|
|||
import time |
|||
|
|||
global task_queue |
|||
task_queue = queue.Queue() |
|||
|
|||
|
|||
@csrf_exempt |
|||
def chatGptNew(request): |
|||
if request.method == 'POST': |
|||
try: |
|||
# txt=request.body.encode("utf-8") |
|||
raw_data = json.loads(request.body) |
|||
task_queue.put(raw_data) |
|||
return HttpResponse(json.dumps({"code": 1, "msg": "请求正常!"}, ensure_ascii=False)) |
|||
except: |
|||
logging.error(traceback.format_exc()) |
|||
return HttpResponse(json.dumps({"code": 0, "msg": "请求json格式不正确!"}, ensure_ascii=False)) |
|||
else: |
|||
return HttpResponse(json.dumps({"code": 0, "msg": "请求方式错误,改为post请求"}, ensure_ascii=False)) |
|||
|
|||
|
|||
def chatgpt(): |
|||
while True: |
|||
if task_queue.qsize() > 0: |
|||
# try: |
|||
logging.info("取任务队列长度{}".format(task_queue.qsize())) |
|||
raw_data = task_queue.get() |
|||
try: |
|||
output = raw_data["output"] |
|||
res_tmp = {key: "" for key in output} |
|||
if "id" in res_tmp.keys(): |
|||
res_tmp["id"] = str(uuid.uuid4()) |
|||
data = get_content(raw_data, logging) |
|||
url = "https://api.openai.com/v1/chat/completions" |
|||
headers = { |
|||
"Content-Type": "application/json;charset=UTF-8", |
|||
"Authorization": "Bearer " + data["authorization"] |
|||
} |
|||
payload = json.dumps({ |
|||
"model": data["model"], |
|||
"messages": [{"role": "user", "content": data["prompt"]}], |
|||
"temperature": float(data["temperature"]), |
|||
"top_p": float(data["top_p"]), |
|||
"n": int(data["n"]) |
|||
}) |
|||
logging.info("prompt为{}".format(data["prompt"])) |
|||
response = requests.request("POST", url, headers=headers, data=payload) |
|||
logging.info("GPT返回值:{}-{}".format(response,response.text)) |
|||
d = json.loads(response.text) |
|||
result = d['choices'][0]['message']['content'] |
|||
res_tmp["content"] = result |
|||
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False) |
|||
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json} |
|||
# print(raw_data) |
|||
logging.info(raw_data) |
|||
to_kafka.send_kafka(raw_data, logging) |
|||
except: |
|||
raw_data["result"] = {"successCode": "0", "errorLog": "", "results": ""} |
|||
raw_data["result"]["errorLog"] = traceback.format_exc() |
|||
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False) |
|||
raw_data["result"]["results"] = res_tmp_json |
|||
logging.info("调用gpt失败{}-{}".format(raw_data, traceback.format_exc())) |
|||
to_kafka.send_kafka(raw_data, logging) |
|||
# except: |
|||
# raw_data["result"] = {"successCode": "0", "errorLog": "", "results": ""} |
|||
# if response and response.text: |
|||
# raw_data["result"]["errorLog"] = response.text |
|||
# else: |
|||
# raw_data["result"]["errorLog"] = traceback.format_exc() |
|||
# res_tmp_json = json.dumps(res_tmp, ensure_ascii=False) |
|||
# raw_data["result"]["results"]=res_tmp_json |
|||
# logging.info("解析失败{}-{}".format(raw_data, traceback.format_exc())) |
|||
# to_kafka.send_kafka(raw_data, logging) |
|||
else: |
|||
# logging.info("暂无任务,进入休眠--") |
|||
time.sleep(10) |
|||
|
|||
|
|||
|
@ -0,0 +1,101 @@ |
|||
# coding:utf8 |
|||
import os, sys |
|||
import io |
|||
|
|||
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8') |
|||
cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd() |
|||
par_dir = os.path.abspath(os.path.join(cur_dir, os.path.pardir)) |
|||
sys.path.append(cur_dir) |
|||
sys.path.append(par_dir) |
|||
import json |
|||
from django.http import HttpResponse |
|||
from text_analysis.tools import to_kafka |
|||
from django.views.decorators.csrf import csrf_exempt |
|||
from log_util.set_logger import set_logger |
|||
|
|||
logging = set_logger('logs/results.log') |
|||
import traceback |
|||
import queue |
|||
import requests |
|||
from text_analysis.tools.tool import get_content |
|||
import uuid |
|||
|
|||
import time |
|||
|
|||
global task_queue |
|||
task_queue = queue.Queue() |
|||
|
|||
|
|||
@csrf_exempt |
|||
def chatGptNew(request): |
|||
if request.method == 'POST': |
|||
try: |
|||
# txt=request.body.encode("utf-8") |
|||
raw_data = json.loads(request.body) |
|||
task_queue.put(raw_data) |
|||
return HttpResponse(json.dumps({"code": 1, "msg": "请求正常!"}, ensure_ascii=False)) |
|||
except: |
|||
logging.error(traceback.format_exc()) |
|||
return HttpResponse(json.dumps({"code": 0, "msg": "请求json格式不正确!"}, ensure_ascii=False)) |
|||
else: |
|||
return HttpResponse(json.dumps({"code": 0, "msg": "请求方式错误,改为post请求"}, ensure_ascii=False)) |
|||
|
|||
|
|||
def chatgpt(): |
|||
while True: |
|||
if task_queue.qsize() > 0: |
|||
# try: |
|||
logging.info("取任务队列长度{}".format(task_queue.qsize())) |
|||
raw_data = task_queue.get() |
|||
try: |
|||
output = raw_data["output"] |
|||
res_tmp = {key: "" for key in output} |
|||
if "id" in res_tmp.keys(): |
|||
res_tmp["id"] = str(uuid.uuid4()) |
|||
data = get_content(raw_data, logging) |
|||
url = "https://api.openai.com/v1/chat/completions" |
|||
headers = { |
|||
"Content-Type": "application/json;charset=UTF-8", |
|||
"Authorization": "Bearer " + data["authorization"] |
|||
} |
|||
payload = json.dumps({ |
|||
"model": data["model"], |
|||
"messages": [{"role": "user", "content": data["prompt"]}], |
|||
"temperature": float(data["temperature"]), |
|||
"top_p": float(data["top_p"]), |
|||
"n": int(data["n"]) |
|||
}) |
|||
logging.info("prompt为{}".format(data["prompt"])) |
|||
response = requests.request("POST", url, headers=headers, data=payload) |
|||
logging.info("GPT返回值:{}-{}".format(response,response.text)) |
|||
d = json.loads(response.text) |
|||
result = d['choices'][0]['message']['content'] |
|||
res_tmp["content"] = result |
|||
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False) |
|||
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json} |
|||
# print(raw_data) |
|||
#logging.info(raw_data) |
|||
to_kafka.send_kafka(raw_data, logging) |
|||
except: |
|||
raw_data["result"] = {"successCode": "0", "errorLog": "", "results": ""} |
|||
raw_data["result"]["errorLog"] = traceback.format_exc() |
|||
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False) |
|||
raw_data["result"]["results"] = res_tmp_json |
|||
logging.info("调用gpt失败{}-{}".format(raw_data, traceback.format_exc())) |
|||
to_kafka.send_kafka(raw_data, logging) |
|||
# except: |
|||
# raw_data["result"] = {"successCode": "0", "errorLog": "", "results": ""} |
|||
# if response and response.text: |
|||
# raw_data["result"]["errorLog"] = response.text |
|||
# else: |
|||
# raw_data["result"]["errorLog"] = traceback.format_exc() |
|||
# res_tmp_json = json.dumps(res_tmp, ensure_ascii=False) |
|||
# raw_data["result"]["results"]=res_tmp_json |
|||
# logging.info("解析失败{}-{}".format(raw_data, traceback.format_exc())) |
|||
# to_kafka.send_kafka(raw_data, logging) |
|||
else: |
|||
# logging.info("暂无任务,进入休眠--") |
|||
time.sleep(10) |
|||
|
|||
|
|||
|
@ -0,0 +1,101 @@ |
|||
# coding:utf8 |
|||
import os, sys |
|||
import io |
|||
|
|||
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8') |
|||
cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd() |
|||
par_dir = os.path.abspath(os.path.join(cur_dir, os.path.pardir)) |
|||
sys.path.append(cur_dir) |
|||
sys.path.append(par_dir) |
|||
import json |
|||
from django.http import HttpResponse |
|||
from text_analysis.tools import to_kafka |
|||
from django.views.decorators.csrf import csrf_exempt |
|||
from log_util.set_logger import set_logger |
|||
|
|||
logging = set_logger('logs/results.log') |
|||
import traceback |
|||
import queue |
|||
import requests |
|||
from text_analysis.tools.tool import get_content,parse_gptResult |
|||
import uuid |
|||
|
|||
import time |
|||
|
|||
global task_queue |
|||
task_queue = queue.Queue() |
|||
|
|||
|
|||
@csrf_exempt |
|||
def chatGptNew(request): |
|||
if request.method == 'POST': |
|||
try: |
|||
# txt=request.body.encode("utf-8") |
|||
raw_data = json.loads(request.body) |
|||
task_queue.put(raw_data) |
|||
return HttpResponse(json.dumps({"code": 1, "msg": "请求正常!"}, ensure_ascii=False)) |
|||
except: |
|||
logging.error(traceback.format_exc()) |
|||
return HttpResponse(json.dumps({"code": 0, "msg": "请求json格式不正确!"}, ensure_ascii=False)) |
|||
else: |
|||
return HttpResponse(json.dumps({"code": 0, "msg": "请求方式错误,改为post请求"}, ensure_ascii=False)) |
|||
|
|||
|
|||
def chatgpt(): |
|||
while True: |
|||
if task_queue.qsize() > 0: |
|||
# try: |
|||
logging.info("取任务队列长度{}".format(task_queue.qsize())) |
|||
raw_data = task_queue.get() |
|||
try: |
|||
output = raw_data["output"] |
|||
res_tmp = {key: "" for key in output} |
|||
if "id" in res_tmp.keys(): |
|||
res_tmp["id"] = str(uuid.uuid4()) |
|||
data = get_content(raw_data, logging) |
|||
url = "https://api.openai.com/v1/chat/completions" |
|||
headers = { |
|||
"Content-Type": "application/json;charset=UTF-8", |
|||
"Authorization": "Bearer " + data["authorization"] |
|||
} |
|||
payload = json.dumps({ |
|||
"model": data["model"], |
|||
"messages": [{"role": "user", "content": data["prompt"]}], |
|||
"temperature": float(data["temperature"]), |
|||
"top_p": float(data["top_p"]), |
|||
"n": int(data["n"]) |
|||
}) |
|||
logging.info("prompt为{}".format(data["prompt"])) |
|||
response = requests.request("POST", url, headers=headers, data=payload) |
|||
logging.info("GPT返回值:{}-{}".format(response,response.text)) |
|||
d = json.loads(response.text) |
|||
result = d['choices'][0]['message']['content'] |
|||
#添加 |
|||
if "content" in res_tmp.keys(): |
|||
res_tmp["content"] = result |
|||
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False) |
|||
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json} |
|||
else: |
|||
res=parse_gptResult(res_tmp,result) |
|||
if res: |
|||
res_tmp_json = json.dumps(res, ensure_ascii=False) |
|||
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json} |
|||
else: |
|||
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False) |
|||
raw_data["result"] = {"successCode": "0", "errorLog": "GPT返回值不是json格式,无法解析!", "results": res_tmp_json} |
|||
# logging.info(raw_data) |
|||
# to_kafka.send_kafka(raw_data, logging) |
|||
except: |
|||
raw_data["result"] = {"successCode": "0", "errorLog": "", "results": ""} |
|||
raw_data["result"]["errorLog"] = traceback.format_exc() |
|||
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False) |
|||
raw_data["result"]["results"] = res_tmp_json |
|||
logging.info("调用gpt失败{}-{}".format(raw_data, traceback.format_exc())) |
|||
# to_kafka.send_kafka(raw_data, logging) |
|||
|
|||
else: |
|||
# logging.info("暂无任务,进入休眠--") |
|||
time.sleep(10) |
|||
|
|||
|
|||
|
@ -0,0 +1,142 @@ |
|||
# coding:utf8 |
|||
import os, sys |
|||
import io |
|||
|
|||
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8') |
|||
cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd() |
|||
par_dir = os.path.abspath(os.path.join(cur_dir, os.path.pardir)) |
|||
sys.path.append(cur_dir) |
|||
sys.path.append(par_dir) |
|||
import json |
|||
from django.http import HttpResponse |
|||
from text_analysis.tools import to_kafka |
|||
from django.views.decorators.csrf import csrf_exempt |
|||
from log_util.set_logger import set_logger |
|||
|
|||
logging = set_logger('logs/results.log') |
|||
import traceback |
|||
import queue |
|||
import requests |
|||
from text_analysis.tools.tool import get_content,parse_gptResult |
|||
import uuid |
|||
import time |
|||
from kazoo.client import KazooClient |
|||
from kazoo.protocol.states import EventType |
|||
|
|||
|
|||
# global task_queue |
|||
task_queue = queue.Queue() |
|||
# global stop_dict |
|||
stop_dict={} |
|||
|
|||
@csrf_exempt |
|||
def chatGptNew(request): |
|||
if request.method == 'POST': |
|||
try: |
|||
# txt=request.body.encode("utf-8") |
|||
raw_data = json.loads(request.body) |
|||
task_queue.put(raw_data) |
|||
return HttpResponse(json.dumps({"code": 1, "msg": "请求正常!"}, ensure_ascii=False)) |
|||
except: |
|||
logging.error(traceback.format_exc()) |
|||
return HttpResponse(json.dumps({"code": 0, "msg": "请求json格式不正确!"}, ensure_ascii=False)) |
|||
else: |
|||
return HttpResponse(json.dumps({"code": 0, "msg": "请求方式错误,改为post请求"}, ensure_ascii=False)) |
|||
|
|||
|
|||
def chatgpt(): |
|||
while True: |
|||
if task_queue.qsize() > 0: |
|||
try: |
|||
logging.info("取任务队列长度{}".format(task_queue.qsize())) |
|||
raw_data = task_queue.get() |
|||
task_id=raw_data["scenes_id"] |
|||
task_version=raw_data["version"] |
|||
# logging.info("任务数据为:{}".format(raw_data)) |
|||
logging.info("当前version信息为:{}".format(stop_dict)) |
|||
if task_id in stop_dict.keys() and task_version!=stop_dict[task_id]["version"]: |
|||
logging.info("已暂停任务,过滤掉。{}".format(raw_data)) |
|||
continue |
|||
output = raw_data["output"] |
|||
res_tmp = {key: "" for key in output} |
|||
if "id" in res_tmp.keys(): |
|||
res_tmp["id"] = str(uuid.uuid4()) |
|||
res_tmp["isLast"]=1 |
|||
data = get_content(raw_data, logging) |
|||
url = "https://api.openai.com/v1/chat/completions" |
|||
headers = { |
|||
"Content-Type": "application/json;charset=UTF-8", |
|||
"Authorization": "Bearer " + data["authorization"] |
|||
} |
|||
payload = json.dumps({ |
|||
"model": data["model"], |
|||
"messages": [{"role": "user", "content": data["prompt"]}], |
|||
"temperature": float(data["temperature"]), |
|||
"top_p": float(data["top_p"]), |
|||
"n": int(data["n"]) |
|||
}) |
|||
logging.info("prompt为{}".format(data["prompt"])) |
|||
response = requests.request("POST", url, headers=headers, data=payload,timeout=180) |
|||
logging.info("GPT返回值:{}-{}".format(response,response.text)) |
|||
d = json.loads(response.text) |
|||
result = d['choices'][0]['message']['content'] |
|||
#添加 0是文本,1是json格式 |
|||
fieldType = raw_data["input"]['fieldType'] |
|||
if fieldType == 0: |
|||
res_tmp["content"] = result |
|||
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False) |
|||
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json,"status":1,"message":"成功"} |
|||
else: |
|||
res=parse_gptResult(res_tmp,result) |
|||
if res: |
|||
res["isLast"]=1 |
|||
res_tmp_json = json.dumps(res, ensure_ascii=False) |
|||
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json,"status":1,"message":"成功"} |
|||
else: |
|||
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False) |
|||
raw_data["result"] = {"successCode": "0", "errorLog": "GPT返回值不是json格式,无法解析!", "results": res_tmp_json,"status":2,"message":"GPT返回结果非json格式"} |
|||
logging.info(raw_data) |
|||
to_kafka.send_kafka(raw_data, logging) |
|||
except: |
|||
raw_data["result"] = {"successCode": "0", "errorLog": "", "results": "","status":2,"message":"异常"} |
|||
raw_data["result"]["errorLog"] = traceback.format_exc() |
|||
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False) |
|||
raw_data["result"]["results"] = res_tmp_json |
|||
logging.info("调用gpt失败{}-{}".format(raw_data, traceback.format_exc())) |
|||
to_kafka.send_kafka(raw_data, logging) |
|||
|
|||
else: |
|||
# logging.info("暂无任务,进入休眠--") |
|||
time.sleep(10) |
|||
|
|||
def zk_monitoring(): |
|||
try: |
|||
#线上环境 |
|||
zk = KazooClient(hosts='172.18.1.146:2181,172.18.1.147:2181,172.18.1.148:2181') |
|||
#测试环境 |
|||
# zk = KazooClient(hosts='172.16.12.55:2181,172.16.12.56:2181,172.16.12.57:2181') |
|||
zk.start() |
|||
# 设置监听器 |
|||
@zk.DataWatch("/analyze") |
|||
def watch_node(data, stat, event): |
|||
if event is not None and event.type == EventType.CHANGED: |
|||
data, stat = zk.get("/analyze") |
|||
logging.info("执行删除操作:{}".format(data)) |
|||
d = json.loads(data) |
|||
id = d["scenes_id"] |
|||
stop_dict[id] = {} |
|||
stop_dict[id]["version"] = d["version"] |
|||
stop_dict[id]["operation"] = d["operation"] |
|||
# 保持程序运行以监听节点变化 |
|||
try: |
|||
while True: |
|||
time.sleep(1) |
|||
except: |
|||
logging.info("Stopping...") |
|||
# 关闭连接 |
|||
zk.stop() |
|||
zk.close() |
|||
except: |
|||
logging.error(traceback.format_exc()) |
|||
|
|||
|
@ -0,0 +1,10 @@ |
|||
import configparser |
|||
|
|||
#加载配置文件 |
|||
def load_config(): |
|||
configFile = './config.ini' |
|||
# 创建配置文件对象 |
|||
con = configparser.ConfigParser() |
|||
# 读取文件 |
|||
con.read(configFile, encoding='utf-8') |
|||
return con |
@ -0,0 +1,14 @@ |
|||
#coding:utf8 |
|||
# import leida_ner_bert_crf |
|||
|
|||
import requests |
|||
|
|||
url = "http://172.18.1.166:9000/leidaduikang" |
|||
|
|||
payload = "{\"inputUrl\":\"/home/bfdadmin/leidabert/Project_leidaduikang/AInputdata/content_100.xlsx\"}" |
|||
headers = {'user-agent': "vscode-restclient",'header name': "header value"} |
|||
|
|||
response = requests.request("POST", url, timeout=1000000,data=payload, headers=headers) |
|||
|
|||
print(response.text) |
|||
|
@ -0,0 +1,148 @@ |
|||
""" |
|||
Django settings for Zhijian_Project_WebService project. |
|||
|
|||
Generated by 'django-admin startproject' using Django 1.8. |
|||
|
|||
For more information on this file, see |
|||
https://docs.djangoproject.com/en/1.8/topics/settings/ |
|||
|
|||
For the full list of settings and their values, see |
|||
https://docs.djangoproject.com/en/1.8/ref/settings/ |
|||
""" |
|||
|
|||
# Build paths inside the project like this: os.path.join(BASE_DIR, ...) |
|||
import os |
|||
|
|||
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) |
|||
|
|||
|
|||
# Quick-start development settings - unsuitable for production |
|||
# See https://docs.djangoproject.com/en/1.8/howto/deployment/checklist/ |
|||
|
|||
# SECURITY WARNING: keep the secret key used in production secret! |
|||
SECRET_KEY = '330r)_!^qhd7$!w4)$y@4=p2bd*vlxf%4z(bx-fx-1i3txagvz' |
|||
|
|||
# SECURITY WARNING: don't run with debug turned on in production! |
|||
DEBUG = True |
|||
|
|||
ALLOWED_HOSTS = ['*'] |
|||
|
|||
|
|||
# Application definition |
|||
|
|||
INSTALLED_APPS = ( |
|||
'django.contrib.admin', |
|||
'django.contrib.auth', |
|||
'django.contrib.contenttypes', |
|||
'django.contrib.sessions', |
|||
'django.contrib.messages', |
|||
'django.contrib.staticfiles', |
|||
) |
|||
|
|||
MIDDLEWARE = [ |
|||
'django.contrib.sessions.middleware.SessionMiddleware', |
|||
'django.middleware.common.CommonMiddleware', |
|||
'django.middleware.csrf.CsrfViewMiddleware', |
|||
'django.contrib.auth.middleware.AuthenticationMiddleware', |
|||
# 'django.contrib.auth.middleware.SessionAuthenticationMiddleware', |
|||
'django.contrib.messages.middleware.MessageMiddleware', |
|||
'django.middleware.clickjacking.XFrameOptionsMiddleware', |
|||
'django.middleware.security.SecurityMiddleware', |
|||
] |
|||
|
|||
ROOT_URLCONF = 'text_analysis.urls' |
|||
|
|||
TEMPLATES = [ |
|||
{ |
|||
'BACKEND': 'django.template.backends.django.DjangoTemplates', |
|||
'DIRS': [], |
|||
'APP_DIRS': True, |
|||
'OPTIONS': { |
|||
'context_processors': [ |
|||
'django.template.context_processors.debug', |
|||
'django.template.context_processors.request', |
|||
'django.contrib.auth.context_processors.auth', |
|||
'django.contrib.messages.context_processors.messages', |
|||
], |
|||
}, |
|||
}, |
|||
] |
|||
|
|||
WSGI_APPLICATION = 'text_analysis.wsgi.application' |
|||
|
|||
|
|||
# Database |
|||
# https://docs.djangoproject.com/en/1.8/ref/settings/#databases |
|||
|
|||
# DATABASES = { |
|||
# 'default': { |
|||
# 'ENGINE': 'django.db.backends.sqlite3', |
|||
# 'NAME': os.path.join(BASE_DIR, 'db.sqlite3'), |
|||
# } |
|||
# } |
|||
|
|||
|
|||
# Internationalization |
|||
# https://docs.djangoproject.com/en/1.8/topics/i18n/ |
|||
|
|||
LANGUAGE_CODE = 'en-us' |
|||
|
|||
TIME_ZONE = 'Asia/Shanghai' |
|||
|
|||
USE_I18N = True |
|||
|
|||
USE_L10N = True |
|||
|
|||
USE_TZ = True |
|||
|
|||
|
|||
# Static files (CSS, JavaScript, Images) |
|||
# https://docs.djangoproject.com/en/1.8/howto/static-files/ |
|||
|
|||
STATIC_URL = '/static/' |
|||
|
|||
# U_LOGFILE_SIZE = 1 * 1024 * 1024 # 单日志文件最大100M |
|||
# U_LOGFILE_COUNT = 7 # 保留10个日志文件 |
|||
# |
|||
# LOGGING = { |
|||
# 'version': 1, |
|||
# 'disable_existing_loggers': True, # 禁用所有已经存在的日志配置 |
|||
# 'filters': { |
|||
# 'require_debug_false': { |
|||
# '()': 'django.utils.log.RequireDebugFalse' |
|||
# } |
|||
# }, |
|||
# 'formatters': { |
|||
# 'verbose': { |
|||
# 'format': '[%(levelname)s %(asctime)s @ %(process)d] %(module)s %(process)d %(thread)d %(message)s' |
|||
# }, |
|||
# 'simple': { |
|||
# 'format': '%(levelname)s %(asctime)s @ %(process)d %(message)s' |
|||
# }, |
|||
# 'complete': { |
|||
# 'format': '[%(levelname)s %(asctime)s @ %(process)d] (%(pathname)s/%(funcName)s:%(lineno)d) - %(message)s' |
|||
# }, |
|||
# 'online': { |
|||
# 'format': '[%(levelname)s %(asctime)s @ %(process)d] - %(message)s' |
|||
# } |
|||
# }, |
|||
# 'handlers': { |
|||
# 'text': { |
|||
# 'level': 'DEBUG', |
|||
# #'class': 'logging.handlers.RotatingFileHandler', |
|||
# 'class': 'logging.handlers.TimedRotatingFileHandler', |
|||
# 'when': 'H', |
|||
# 'interval': 1, |
|||
# 'backupCount': U_LOGFILE_COUNT, |
|||
# 'formatter': 'complete', |
|||
# 'filename': os.path.join(BASE_DIR, 'logs/resultNew.log').replace('\\', '/'), |
|||
# } |
|||
# }, |
|||
# 'loggers': { |
|||
# 'text': { |
|||
# 'handlers': ['text'], |
|||
# 'level': 'DEBUG', |
|||
# 'propagate': False, |
|||
# } |
|||
# } |
|||
# } |
@ -0,0 +1,18 @@ |
|||
# coding:utf8 |
|||
|
|||
|
|||
# def mySql(): |
|||
# try: |
|||
# db = pymysql.connect(host='172.26.28.30', user='crawl', passwd='crawl13', db='test', port=3306, |
|||
# charset='utf8', cursorclass=pymysql.cursors.DictCursor) |
|||
# if db.open: |
|||
# print("MySQL连接成功!") |
|||
# else: |
|||
# print("MySQL连接失败!") |
|||
# db.close() |
|||
# except: |
|||
# print(traceback.format_exc()) |
|||
|
|||
print("这是一个测试!!") |
|||
|
|||
|
@ -0,0 +1,74 @@ |
|||
#coding:utf8 |
|||
import traceback |
|||
from pykafka import KafkaClient |
|||
# from pykafka import partitioners |
|||
# from pykafka.simpleconsumer import OwnedPartition, OffsetType |
|||
import json |
|||
from tqdm import tqdm |
|||
# from kafka import KafkaProducer |
|||
from pykafka.simpleconsumer import OwnedPartition, OffsetType |
|||
|
|||
def send_kafka(data,logging): |
|||
try: |
|||
producer = None |
|||
# client = KafkaClient(hosts='172.26.28.30:9092', socket_timeout_ms=10 * 1000) |
|||
topic = 'analyze' |
|||
# producer = client.topics[topic].get_sync_producer(**{'max_request_size': 3000012 * 5}) |
|||
#producer = client.topics[topic].get_producer(sync=True) |
|||
client = KafkaClient(hosts='172.26.28.30:9092', socket_timeout_ms=10 * 1000) |
|||
# topic = client.topics['analyze'] |
|||
producer = client.topics[topic].get_producer() |
|||
|
|||
data1=json.dumps(data,ensure_ascii=False) |
|||
producer.produce(bytes(data1, encoding='utf-8')) |
|||
# kafkaProduce(topic,bytes(data1, encoding='utf-8')) |
|||
logging.info("数据推入kafka!") |
|||
|
|||
except Exception as e: |
|||
logging.info(traceback.format_exc()) |
|||
logging.info('写入kafka失败') |
|||
# def kafkaProduce(topic,resultData): |
|||
# producer = KafkaProducer(bootstrap_servers = '{}'.format("172.26.28.30:9092")) |
|||
# topics = topic.split(',') |
|||
# for tc in topics: |
|||
# future = producer.send(tc,resultData) |
|||
# producer.flush() |
|||
|
|||
def consumer(): |
|||
# topic = 'ais_caiji_kg_210'.encode('utf-8') |
|||
# client = KafkaClient(hosts='172.16.3.153:9092,172.16.3.154:9092,172.16.3.155:9092') |
|||
|
|||
# topic = 'test_mysql_topic'.encode('utf-8') |
|||
# client = KafkaClient(hosts='localhost:9092') |
|||
# topic = client.topics[topic] |
|||
# consumer = topic.get_simple_consumer(consumer_group='test1', |
|||
# auto_commit_enable=True, # 去重消费 |
|||
# auto_commit_interval_ms=1000, |
|||
# # consumer_id='test1', # 消费者ID |
|||
# reset_offset_on_start=True, |
|||
# # auto_offset_reset=OffsetType.LATEST, |
|||
# consumer_timeout_ms=100000) |
|||
# c = 0 |
|||
# for msg in consumer: |
|||
# c += 1 |
|||
# if msg: |
|||
# val = msg.value.decode('utf-8') |
|||
# print(c,val) |
|||
|
|||
# client = KafkaClient(hosts='localhost:9092') |
|||
# topic = client.topics['test_mysql_topic'] |
|||
|
|||
client = KafkaClient(hosts='172.26.28.30:9092') |
|||
topic = client.topics['analyze'] |
|||
consumer = topic.get_simple_consumer(consumer_group='my_consumer_group', |
|||
auto_offset_reset=OffsetType.LATEST, |
|||
reset_offset_on_start=True) |
|||
|
|||
# 消费数据 |
|||
for message in consumer: |
|||
if message is not None: |
|||
print(message.offset, message.value.decode()) |
|||
|
|||
if __name__=="__main__": |
|||
# send_kafka() |
|||
consumer() |
@ -0,0 +1,105 @@ |
|||
#coding:utf8 |
|||
import re |
|||
|
|||
def get_content(inputdata,logging): |
|||
""" |
|||
重新组装参数 |
|||
:param inputdata:原json数据 |
|||
:return: 组装的prompt及其他参数 |
|||
""" |
|||
res={} |
|||
admin=inputdata["metadata"]["admin"] |
|||
data=inputdata["data"] |
|||
prompt=admin["prompt"] |
|||
if_user=re.findall("{{(.*)}}",prompt) |
|||
if_data=re.findall("@@(.*)@@",prompt) |
|||
user_data=inputdata["metadata"]["user"] |
|||
if if_user!=[] and if_user[0] in user_data.keys(): |
|||
tmp=user_data[if_user[0]] |
|||
prompt=re.sub("{{(.*)}}",tmp,prompt) |
|||
if if_data!=[] and if_data[0] in data.keys(): |
|||
tmp1=data[if_data[0]] |
|||
prompt=re.sub("@@(.*)@@",tmp1,prompt) |
|||
res["prompt"]=prompt |
|||
res["authorization"]=admin["authorization"] |
|||
res["model"]=admin["model"] |
|||
res["temperature"]=admin["temperature"] |
|||
res["authorization"]=admin["authorization"] |
|||
res["top_p"]=admin["top_p"] |
|||
res["n"]=admin["n"] |
|||
return res |
|||
|
|||
|
|||
if __name__=="__main__": |
|||
inputdata={ |
|||
"id":1, |
|||
"module":"ChatGPT", |
|||
"version":1, |
|||
"name":"信息抽取", |
|||
"describe":"此步骤进行相关信息抽取", |
|||
"metadata":{ |
|||
"position":[ |
|||
100, |
|||
200 |
|||
], |
|||
"output":{ |
|||
"output_type":"table", |
|||
"label_col":[ |
|||
"文件名称", |
|||
"识别内容", |
|||
"文件路径", |
|||
"文件大小", |
|||
"上传时间", |
|||
"GPT处理结果" |
|||
] |
|||
}, |
|||
"input":{ |
|||
"input_type":"text", |
|||
"label":[ |
|||
"3_文件名称", |
|||
"3_识别内容", |
|||
"3_文件路径", |
|||
"3_文件大小", |
|||
"3_上传时间" |
|||
] |
|||
}, |
|||
"admin":{ |
|||
"prompt":"下面我给出一段数据,请抽取相关内容。需抽取的内容是{{tag}}。数据为@@3_识别内容@@", |
|||
"authorization":"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", |
|||
"model":"gpt-3.5-turbo", |
|||
"temperature":"0.2", |
|||
"top_p":"1", |
|||
"N":"1", |
|||
"user_input":[ |
|||
{ |
|||
"keyname":"tag", |
|||
"keydesc":"需抽取内容" |
|||
} |
|||
] |
|||
}, |
|||
"user":{ |
|||
"tag":"专利号,专利名称,申请人" |
|||
} |
|||
}, |
|||
"data":{ |
|||
"3_文件名称":"测试的专利文档.pdf", |
|||
"3_识别内容":"\n证书号第2353566号\n发明专利证书\n发明名称:一种浅海大型复杂沙波区地形重构方法\n发 明 人:张华国;傅斌;何谢错;厉冬玲;史爱琴;楼璘林\n专 利 号:ZL 2015 1 0071764.4\n专利申请日:2015年02月11日 专利权人:国家海洋局第二海洋研究所 授权公告日:2017年01月18日\n本发明经过本局依照中华人民共和国专利法进行审查,决定授予专利权,颁发本证书 并在专利登记簿上予以登记-专利权自授权公告之日起生效。\n本专利的专利权期限为二十年,自申请日起算。专利权人应当依照专利法及其实施细 则规定缴纳年费。本专利的年费应当在每年02月11日前缴纳。未按照规定缴纳年费的, 专利权自应当缴纳年费期满之日起终止„\n专利证书记载专利权登记时的法律状况。专利权的转移、质押、无效、终止、恢复和 专利权人的姓名或名称、国籍、地址变更等事项记载在专利登记簿上。 \n", |
|||
"3_文件路径":"http://10.0.32.50:/data2/lybtmp/install/知识包专利/测试的专利文档.pdf", |
|||
"3_文件大小":"250KB", |
|||
"3_上传时间":1687835515 |
|||
}, |
|||
"next_app_id":[ |
|||
], |
|||
"wait_condition":[ |
|||
], |
|||
"start_tag":"false" |
|||
} |
|||
a=get_content(inputdata) |
|||
print(a) |
|||
|
|||
|
|||
|
|||
|
|||
|
|||
|
|||
|
@ -0,0 +1,114 @@ |
|||
#coding:utf8 |
|||
import re |
|||
|
|||
def get_content(inputdata,logging): |
|||
""" |
|||
重新组装参数 |
|||
:param inputdata:原json数据 |
|||
:return: 组装的prompt及其他参数 |
|||
""" |
|||
res={} |
|||
admin=inputdata["metadata"]["admin"] |
|||
data=inputdata["data"] |
|||
prompt=admin["prompt"] |
|||
if_user=re.findall("{{(.*)}}",prompt) |
|||
if_data=re.findall("@@(.*)@@",prompt) |
|||
if if_user != []: |
|||
user_data=inputdata["metadata"]["user"] |
|||
if if_user[0] in user_data.keys(): |
|||
tmp=user_data[if_user[0]] |
|||
prompt=re.sub("{{(.*)}}",tmp,prompt) |
|||
if if_data!=[] and if_data[0] in data.keys(): |
|||
tmp1=data[if_data[0]] |
|||
prompt=re.sub("@@(.*)@@",tmp1,prompt) |
|||
res["prompt"]=prompt |
|||
res["authorization"]=admin["authorization"] |
|||
res["model"]=admin["model"] |
|||
res["temperature"]=admin["temperature"] |
|||
res["authorization"]=admin["authorization"] |
|||
res["top_p"]=admin["top_p"] |
|||
res["n"]=admin["n"] |
|||
return res |
|||
|
|||
|
|||
if __name__=="__main__": |
|||
inputdata={ |
|||
"metadata":{ |
|||
"output":{ |
|||
"output_type":"table", |
|||
"label_col":[ |
|||
"软件著作抽取结果" |
|||
] |
|||
}, |
|||
"input":{ |
|||
"input_type":"text", |
|||
"label":[ |
|||
"7_软件著作过滤器" |
|||
] |
|||
}, |
|||
"address":"http://172.18.1.181:9011/chatGpt/", |
|||
"admin":{ |
|||
"authorization":"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", |
|||
"top_p":"1", |
|||
"user_input":[ |
|||
{ |
|||
"keyname":"tag", |
|||
"keydesc":"" |
|||
} |
|||
], |
|||
"temperature":"0.2", |
|||
"model":"gpt-3.5-turbo-16k", |
|||
"prompt":"请在下面这句话中提取出:证书号、软件名称、著作权人,以json格式输出,找不到的字段赋值为空字符串,不要有多余的文字输出,只输出json结构。@@7_软件著作过滤器@@", |
|||
"n":"1" |
|||
}, |
|||
"index":1 |
|||
}, |
|||
"data":{ |
|||
"1_项目文件上传":"[{ \"fileUrl\":\"http://172.18.1.130:9985/group33/default/20230816/16/05/1/1-基于时间序列遥感 影像洪涝检测系统.jpg\",\"fileType\":\"jpg\", \"filePath\":\"/软件著作/1-基于时间序列遥感 影像洪涝检测系统.jpg\",\"fileId\":\"cd6592f0389bb1da25afbb44901f9cde\",\"fileName\":\"1-基于时间序列遥感 影像洪涝检测系统.jpg\" },{ \"fileUrl\":\"http://172.18.1.130:9985/group33/default/20230816/16/06/1/2-基于遥感影像的快速变化检测系统.jpg\",\"fileType\":\"jpg\", \"filePath\":\"/软件著作/2-基于遥感影像的快速变化检测系统.jpg\",\"fileId\":\"338847e34904fa96e8834cb220667db8\",\"fileName\":\"2-基于遥感影像的快速变化检测系统.jpg\" },{ \"fileUrl\":\"http://172.18.1.130:9985/group33/default/20230816/16/08/1/3-基于时空模型的遥感时间序列森林火灾检测系统.jpg\",\"fileType\":\"jpg\", \"filePath\":\"/软件著作/1/3-基于时空模型的遥感时间序列森林火灾检测系统.jpg\",\"fileId\":\"944eec1cf98f216ea953459dac4dd505\",\"fileName\":\"3-基于时空模型的遥感时间序列森林火灾检测系统.jpg\" },{ \"fileUrl\":\"http://172.18.1.130:9985/group33/default/20230816/16/09/1/4-基于隐马尔可夫模型的遥感时间序列分类系统.jpg\",\"fileType\":\"jpg\", \"filePath\":\"/软件著作/4-基于隐马尔可夫模型的遥感时间序列分类系统.jpg\",\"fileId\":\"eb378cb9ee914323f601500378dfad76\",\"fileName\":\"4-基于隐马尔可夫模型的遥感时间序列分类系统.jpg\" }]", |
|||
"2_文件分类信息":"{\"软件著作\":4}", |
|||
"3_OCR识别内容":"{\"content\":\" 22222222222222222222222222222222222222222222222222\\n中华人民共和国国家版权局\\n计算机软件著作权登记证书\\n证书号:软著登字第1623261号\\n软件名称:\\n基于遥感影像的快速变化检测系统\\nV1.0\\n著作权人:中国科学院遥感与数字地球研究所\\n开发完成日期:2016年08月01日\\n首次发表日期:未发表\\n权利取得方式:原始取得\\n权利范围:全部权利\\n登记号:2017SR037977\\n根据《计算机软件保护条例》和《计算机软件著作权登记办法》的\\n规定,经中国版权保护中心审核,对以上事项予以登记\\n计算机软件著作权\\n登记专用章\\n2017年02月10日\\nNo.01433672\",\"fileId\":\"338847e34904fa96e8834cb220667db8\",\"fileName\":\"2-基于遥感影像的快速变化检测系统.jpg\",\"filePath\":\"/软件著作/2-基于遥感影像的快速变化检测系统.jpg\",\"fileType\":\"jpg\",\"fileUrl\":\"http://172.18.1.130:9985/group33/default/20230816/16/06/1/2-基于遥感影像的快速变化检测系统.jpg\",\"pageNum\":1}", |
|||
"businessKey":"185aef3b1c810799a6be8314abf6512c", |
|||
"7_软件著作过滤器":"{\"content\":\" 22222222222222222222222222222222222222222222222222\\n中华人民共和国国家版权局\\n计算机软件著作权登记证书\\n证书号:软著登字第1623261号\\n软件名称:\\n基于遥感影像的快速变化检测系统\\nV1.0\\n著作权人:中国科学院遥感与数字地球研究所\\n开发完成日期:2016年08月01日\\n首次发表日期:未发表\\n权利取得方式:原始取得\\n权利范围:全部权利\\n登记号:2017SR037977\\n根据《计算机软件保护条例》和《计算机软件著作权登记办法》的\\n规定,经中国版权保护中心审核,对以上事项予以登记\\n计算机软件著作权\\n登记专用章\\n2017年02月10日\\nNo.01433672\",\"fileId\":\"338847e34904fa96e8834cb220667db8\",\"fileName\":\"2-基于遥感影像的快速变化检测系统.jpg\",\"filePath\":\"/软件著作/2-基于遥感影像的快速变化检测系统.jpg\",\"fileType\":\"jpg\",\"fileUrl\":\"http://172.18.1.130:9985/group33/default/20230816/16/06/1/2-基于遥感影像的快速变化检测系统.jpg\",\"pageNum\":1}" |
|||
}, |
|||
"created":1691004265000, |
|||
"module":"OCR", |
|||
"start_tag":"false", |
|||
"last_edit":1692464331000, |
|||
"next_app_id":[ |
|||
{ |
|||
"start_id":86, |
|||
"edge_id":49, |
|||
"end_id":90 |
|||
} |
|||
], |
|||
"transfer_id":11, |
|||
"blueprint_id":3, |
|||
"scenes_id":3, |
|||
"scenario":{ |
|||
"dataloss":1, |
|||
"autoCommitTriggerLast":1, |
|||
"maxErrors":3, |
|||
"autoCommit":1, |
|||
"freshVariables":1 |
|||
}, |
|||
"wait_condition":[ |
|||
|
|||
], |
|||
"scheduling":{ |
|||
"interval":-1, |
|||
"type":"single" |
|||
}, |
|||
"name":"软件著作抽取", |
|||
"businessKey":"185aef3b1c810799a6be8314abf6512c", |
|||
"id":86, |
|||
"describe":"软件著作抽取" |
|||
} |
|||
a=get_content(inputdata,"") |
|||
print(a) |
|||
|
|||
|
|||
|
|||
|
|||
|
|||
|
|||
|
@ -0,0 +1,181 @@ |
|||
#coding:utf8 |
|||
import re |
|||
from jsonpath_ng import parse |
|||
import json |
|||
import traceback |
|||
|
|||
def get_content(inputdata,logging): |
|||
""" |
|||
重新组装参数 |
|||
:param inputdata:原json数据 |
|||
:return: 组装的prompt及其他参数 |
|||
""" |
|||
res={} |
|||
admin=inputdata["metadata"]["admin"] |
|||
data=inputdata["data"] |
|||
prompt=admin["prompt"] |
|||
if_user=re.findall("{{(.*)}}",prompt) |
|||
if_data=re.findall("@@(.*?)@@",prompt) |
|||
if if_user != []: |
|||
user_data=inputdata["metadata"]["user"] |
|||
if if_user[0] in user_data.keys(): |
|||
tmp=user_data[if_user[0]] |
|||
prompt=re.sub("{{(.*)}}",tmp,prompt) |
|||
if if_data!=[] : |
|||
for rule in if_data: |
|||
try: |
|||
if ':' in rule: |
|||
s=rule.split(':') |
|||
rule1=s[0] |
|||
rule2=s[1] |
|||
if rule1 in data.keys(): |
|||
tmp1 = data[rule1] |
|||
#按照给定的规则解析字段 |
|||
json_obj = json.loads(tmp1) |
|||
jsonpath_expr = parse(rule2) |
|||
result = str([match.value for match in jsonpath_expr.find(json_obj)][0]) |
|||
rep="@@{}@@".format(rule) |
|||
#增加转义字符 |
|||
rep_escaped = re.escape(rep) |
|||
prompt=re.sub(rep_escaped,result,prompt) |
|||
else: |
|||
if rule in data.keys(): |
|||
tmp1=data[rule] |
|||
rep = "@@{}@@".format(rule) |
|||
prompt=re.sub(rep,tmp1,prompt) |
|||
except: |
|||
rep = "@@{}@@".format(rule) |
|||
prompt = prompt.replace(rep,'') |
|||
logging.info("动态字段获取数据失败。{}-{}".format(rule, traceback.format_exc())) |
|||
logging.info("拼接后的问题:{}".format(prompt)) |
|||
res["prompt"]=prompt |
|||
res["authorization"]=admin["authorization"] |
|||
res["model"]=admin["model"] |
|||
res["temperature"]=admin["temperature"] |
|||
res["authorization"]=admin["authorization"] |
|||
res["top_p"]=admin["top_p"] |
|||
res["n"]=admin["n"] |
|||
return res |
|||
|
|||
# def get_content(inputdata,logging): |
|||
# """ |
|||
# 重新组装参数 |
|||
# :param inputdata:原json数据 |
|||
# :return: 组装的prompt及其他参数 |
|||
# """ |
|||
# res={} |
|||
# admin=inputdata["metadata"]["admin"] |
|||
# data=inputdata["data"] |
|||
# prompt=admin["prompt"] |
|||
# if_user=re.findall("{{(.*)}}",prompt) |
|||
# if_data=re.findall("@@(.*)@@",prompt) |
|||
# if if_user != []: |
|||
# user_data=inputdata["metadata"]["user"] |
|||
# if if_user[0] in user_data.keys(): |
|||
# tmp=user_data[if_user[0]] |
|||
# prompt=re.sub("{{(.*)}}",tmp,prompt) |
|||
# if if_data!=[] and if_data[0] in data.keys(): |
|||
# tmp1=data[if_data[0]] |
|||
# prompt=re.sub("@@(.*)@@",tmp1,prompt) |
|||
# res["prompt"]=prompt |
|||
# res["authorization"]=admin["authorization"] |
|||
# res["model"]=admin["model"] |
|||
# res["temperature"]=admin["temperature"] |
|||
# res["authorization"]=admin["authorization"] |
|||
# res["top_p"]=admin["top_p"] |
|||
# res["n"]=admin["n"] |
|||
# return res |
|||
|
|||
if __name__=="__main__": |
|||
|
|||
prompt = "用@@11_任务拆分:$.lang@@,生成@@11_任务拆分:$.quantity@@条@@11_任务拆分:$.age@@的@@11_任务拆分:$.sex@@发布@@11_任务拆分:$.emotion@@的@@11_任务拆分:$.subject@@的@@11_任务拆分:$.content_type@@。以JSON数组泛型是String类型的格式进行输出,结构外层需要用“resultList”进行接收,不用多余的文字。" |
|||
|
|||
if_data = re.findall("@@(.*?)@@", prompt) |
|||
print(if_data) |
|||
# inputdata={ |
|||
# "metadata":{ |
|||
# "output":{ |
|||
# "output_type":"table", |
|||
# "label_col":[ |
|||
# "相似内容抽取" |
|||
# ] |
|||
# }, |
|||
# "input":{ |
|||
# "input_type":"text", |
|||
# "label":[ |
|||
# "3_相似内容过滤器" |
|||
# ] |
|||
# }, |
|||
# "address":"http://172.18.1.181:9011/chatGpt/", |
|||
# "admin":{ |
|||
# "authorization":"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", |
|||
# "top_p":"1", |
|||
# "user_input":[ |
|||
# { |
|||
# "keyname":"tag", |
|||
# "keydesc":"" |
|||
# } |
|||
# ], |
|||
# "temperature":"1", |
|||
# "model":"gpt-3.5-turbo-16k", |
|||
# "prompt":"以JSON数组泛型是String类型的格式进行输出,不用多余的文字。参考”@@11_任务拆分:$.quantity@@不仅仅是一种工具,更是一种改变世界的力量“生成@@11_任务拆分:$.quantity@@条@@11_任务拆分:$.lang@@的@@11_任务拆分:$.content_type@@", |
|||
# "n":"1" |
|||
# }, |
|||
# "index":3, |
|||
# "user":{ |
|||
# "tag":"" |
|||
# } |
|||
# }, |
|||
# "data":{ |
|||
# "10_任务提取":"[{\"age\":\"18~24岁\",\"collection_element\":\"0,1\",\"collection_quantity\":1438,\"collection_task\":\"https://twitter.com/MFA_China\",\"collection_type\":0,\"content_type\":\"发帖内容\",\"create_user_id\":\"652468062228768915\",\"del\":0,\"emotion\":\"积极/乐观\",\"id\":178,\"lang\":\"英语\",\"model_status\":1,\"model_type\":1,\"name\":\"TW用户-发言办公室01-04~12-31\",\"quantity\":10,\"sex\":\"男性\",\"site_id\":181,\"status\":0,\"subject\":\"社会问题和时事主题\",\"tenant_id\":237,\"type\":1},{\"age\":\"18~24岁\",\"collection_element\":\"0,1\",\"collection_quantity\":444378,\"collection_task\":\"hongkong\",\"collection_type\":1,\"content_type\":\"发帖内容\",\"create_user_id\":\"652468062228768915\",\"del\":0,\"emotion\":\"积极/乐观\",\"id\":179,\"lang\":\"英语\",\"model_status\":1,\"model_type\":1,\"name\":\"TW关键词-hongkong01-04~12-31\",\"quantity\":10,\"sex\":\"女性\",\"site_id\":181,\"status\":0,\"subject\":\"旅行和探险主题\",\"tenant_id\":237,\"type\":1},{\"age\":\"18~24岁\",\"collection_element\":\"0,1\",\"collection_quantity\":256,\"collection_task\":\"https://www.facebook.com/tsaiingwen\",\"collection_type\":0,\"content_type\":\"发帖内容\",\"create_user_id\":\"652468062228768915\",\"del\":0,\"emotion\":\"积极/乐观\",\"id\":180,\"lang\":\"英语\",\"model_status\":1,\"model_type\":2,\"name\":\"FB用户-蔡英文-01-04~12-31\",\"quantity\":10,\"sex\":\"男性\",\"site_id\":182,\"status\":0,\"subject\":\"科技和创新主题\",\"tenant_id\":237,\"type\":1},{\"age\":\"18~24岁\",\"collection_element\":\"0,1\",\"collection_quantity\":1253,\"collection_task\":\"台湾新闻\",\"collection_type\":1,\"content_type\":\"发帖内容\",\"create_user_id\":\"652468062228768915\",\"del\":0,\"emotion\":\"积极/乐观\",\"id\":183,\"lang\":\"英语\",\"model_status\":1,\"model_type\":2,\"name\":\"FB关键词-台湾新闻-0110~12-13\",\"quantity\":10,\"sex\":\"女性\",\"site_id\":182,\"status\":0,\"subject\":\"健康和生活方式主题\",\"tenant_id\":237,\"type\":1}]", |
|||
# "3_相似内容过滤器":"{\"age\":\"18~24岁\",\"collection_element\":\"0,1\",\"collection_quantity\":1253,\"collection_task\":\"台湾新闻\",\"collection_type\":1,\"content_type\":\"发帖内容\",\"create_user_id\":\"652468062228768915\",\"del\":0,\"emotion\":\"积极/乐观\",\"id\":183,\"lang\":\"英语\",\"model_status\":1,\"model_type\":2,\"name\":\"FB关键词-台湾新闻-0110~12-13\",\"quantity\":10,\"sex\":\"女性\",\"site_id\":182,\"size\":21,\"status\":0,\"subject\":\"健康和生活方式主题\",\"tenant_id\":237,\"type\":1}", |
|||
# "11_任务拆分":"{\"tenant_id\":237,\"create_user_id\":\"652468062228768915\",\"collection_quantity\":1253,\"quantity\":10,\"subject\":\"健康和生活方式主题\",\"sex\":\"女性\",\"model_type\":2,\"del\":0,\"type\":1,\"collection_element\":\"0,1\",\"collection_type\":1,\"model_status\":1,\"collection_task\":\"台湾新闻\",\"emotion\":\"积极/乐观\",\"content_type\":\"发帖内容\",\"size\":21,\"name\":\"FB关键词-台湾新闻-0110~12-13\",\"site_id\":182,\"id\":183,\"lang\":\"英语\",\"age\":\"18~24岁\",\"status\":0}", |
|||
# "1_mysql数据查询":"{\"resultList\": [{\"id\": 178, \"tenant_id\": 237, \"name\": \"TW用户-发言办公室01-04~12-31\", \"site_id\": 181, \"collection_type\": 0, \"collection_element\": \"0,1\", \"collection_task\": \"https://twitter.com/MFA_China\", \"status\": 0, \"collection_quantity\": 1438, \"create_user\": null, \"create_user_id\": \"652468062228768915\", \"update_user\": null, \"update_user_id\": null, \"del\": 0, \"type\": 1, \"model_type\": 1, \"quantity\": 10, \"content_type\": \"发帖内容\", \"model_status\": 1, \"lang\": \"英语\", \"age\": \"18~24岁\", \"sex\": \"男性\", \"emotion\": \"积极/乐观\", \"subject\": \"社会问题和时事主题\", \"similar_content\": null}, {\"id\": 179, \"tenant_id\": 237, \"name\": \"TW关键词-hongkong01-04~12-31\", \"site_id\": 181, \"collection_type\": 1, \"collection_element\": \"0,1\", \"collection_task\": \"hongkong\", \"status\": 0, \"collection_quantity\": 444378, \"create_user\": null, \"create_user_id\": \"652468062228768915\", \"update_user\": null, \"update_user_id\": null, \"del\": 0, \"type\": 1, \"model_type\": 1, \"quantity\": 10, \"content_type\": \"发帖内容\", \"model_status\": 1, \"lang\": \"英语\", \"age\": \"18~24岁\", \"sex\": \"女性\", \"emotion\": \"积极/乐观\", \"subject\": \"旅行和探险主题\", \"similar_content\": null}, {\"id\": 180, \"tenant_id\": 237, \"name\": \"FB用户-蔡英文-01-04~12-31\", \"site_id\": 182, \"collection_type\": 0, \"collection_element\": \"0,1\", \"collection_task\": \"https://www.facebook.com/tsaiingwen\", \"status\": 0, \"collection_quantity\": 256, \"create_user\": null, \"create_user_id\": \"652468062228768915\", \"update_user\": null, \"update_user_id\": null, \"del\": 0, \"type\": 1, \"model_type\": 2, \"quantity\": 10, \"content_type\": \"发帖内容\", \"model_status\": 1, \"lang\": \"英语\", \"age\": \"18~24岁\", \"sex\": \"男性\", \"emotion\": \"积极/乐观\", \"subject\": \"科技和创新主题\", \"similar_content\": null}, {\"id\": 183, \"tenant_id\": 237, \"name\": \"FB关键词-台湾新闻-0110~12-13\", \"site_id\": 182, \"collection_type\": 1, \"collection_element\": \"0,1\", \"collection_task\": \"台湾新闻\", \"status\": 0, \"collection_quantity\": 1253, \"create_user\": null, \"create_user_id\": \"652468062228768915\", \"update_user\": null, \"update_user_id\": null, \"del\": 0, \"type\": 1, \"model_type\": 2, \"quantity\": 10, \"content_type\": \"发帖内容\", \"model_status\": 1, \"lang\": \"英语\", \"age\": \"18~24岁\", \"sex\": \"女性\", \"emotion\": \"积极/乐观\", \"subject\": \"健康和生活方式主题\", \"similar_content\": null}]}" |
|||
# }, |
|||
# "created":1691004265000, |
|||
# "module":"ChatGPT", |
|||
# "start_tag":"false", |
|||
# "multi_branch":0, |
|||
# "last_edit":1693932236000, |
|||
# "next_app_id":[ |
|||
# { |
|||
# "start_id":188, |
|||
# "edge_id":92, |
|||
# "end_id":190 |
|||
# } |
|||
# ], |
|||
# "transfer_id":5, |
|||
# "version":1, |
|||
# "blueprint_id":6, |
|||
# "scenes_id":7, |
|||
# "scenario":{ |
|||
# "dataloss":1, |
|||
# "autoCommitTriggerLast":1, |
|||
# "maxErrors":3, |
|||
# "autoCommit":1, |
|||
# "freshVariables":1 |
|||
# }, |
|||
# "wait_condition":[ |
|||
# |
|||
# ], |
|||
# "scheduling":{ |
|||
# "interval":-1, |
|||
# "type":"single" |
|||
# }, |
|||
# "name":"相似内容抽取", |
|||
# "id":188, |
|||
# "position":[ |
|||
# 100, |
|||
# 200 |
|||
# ], |
|||
# "describe":"相似内容抽取" |
|||
# } |
|||
# a=get_content(inputdata,"") |
|||
# print(a) |
|||
|
|||
|
|||
|
|||
|
|||
|
|||
|
|||
|
@ -0,0 +1,170 @@ |
|||
#coding:utf8 |
|||
import re |
|||
from jsonpath_ng import parse |
|||
import json |
|||
import traceback |
|||
|
|||
def parse_data(raw_data,para): |
|||
all_result = raw_data['data'] |
|||
param_split = str(para).split(":") |
|||
datasourcestr = all_result[param_split[0]] |
|||
datasource = json.loads(datasourcestr) |
|||
# 创建 JsonPath 表达式对象 |
|||
expr = parse(param_split[1]) |
|||
# 使用表达式来选择 JSON 元素 |
|||
match = [match.value for match in expr.find(datasource)] |
|||
val = match[0] |
|||
return val |
|||
|
|||
def get_content(inputdata,logging): |
|||
""" |
|||
重新组装参数 |
|||
:param inputdata:原json数据 |
|||
:return: 组装的prompt及其他参数 |
|||
""" |
|||
res={} |
|||
input=inputdata["input"] |
|||
data=inputdata["data"] |
|||
prompt=input["prompt"] |
|||
if_data=re.findall("@@(.*?)@@",prompt) |
|||
# if_user=re.findall("{{(.*)}}",prompt) |
|||
# if if_user != []: |
|||
# user_data=inputdata["metadata"]["user"] |
|||
# if if_user[0] in user_data.keys(): |
|||
# tmp=user_data[if_user[0]] |
|||
# prompt=re.sub("{{(.*)}}",tmp,prompt) |
|||
if if_data!=[] : |
|||
for rule in if_data: |
|||
try: |
|||
if "#json#" in rule: |
|||
parm = rule.split("#json#") |
|||
data1 = parse_data(inputdata, parm[0]) |
|||
data1_json = json.loads(data1) |
|||
expr = parse(parm[1]) |
|||
result = str([match.value for match in expr.find(data1_json)][0]) |
|||
rep = "@@{}@@".format(rule) |
|||
# 增加转义字符 |
|||
rep_escaped = re.escape(rep) |
|||
prompt = re.sub(rep_escaped, result, prompt) |
|||
elif ":" in rule: |
|||
result = parse_data(inputdata, rule) |
|||
rep = "@@{}@@".format(rule) |
|||
rep_escaped = re.escape(rep) |
|||
prompt = re.sub(rep_escaped, result, prompt) |
|||
else: |
|||
if rule in data.keys(): |
|||
tmp1=data[rule] |
|||
rep = "@@{}@@".format(rule) |
|||
prompt=re.sub(rep,tmp1,prompt) |
|||
except: |
|||
# print(traceback.format_exc()) |
|||
rep = "@@{}@@".format(rule) |
|||
prompt = prompt.replace(rep,'') |
|||
logging.info("动态字段获取数据失败。{}-{}".format(rule, traceback.format_exc())) |
|||
logging.info("拼接后的问题:{}".format(prompt)) |
|||
res["prompt"]=prompt |
|||
res["authorization"]=input["authorization"] |
|||
res["model"]=input["model"] |
|||
res["temperature"]=input["temperature"] |
|||
res["authorization"]=input["authorization"] |
|||
res["top_p"]=input["top_p"] |
|||
res["n"]=input["n"] |
|||
return res |
|||
|
|||
# def get_content(inputdata,logging): |
|||
# """ |
|||
# 重新组装参数 |
|||
# :param inputdata:原json数据 |
|||
# :return: 组装的prompt及其他参数 |
|||
# """ |
|||
# res={} |
|||
# admin=inputdata["metadata"]["admin"] |
|||
# data=inputdata["data"] |
|||
# prompt=admin["prompt"] |
|||
# if_user=re.findall("{{(.*)}}",prompt) |
|||
# if_data=re.findall("@@(.*)@@",prompt) |
|||
# if if_user != []: |
|||
# user_data=inputdata["metadata"]["user"] |
|||
# if if_user[0] in user_data.keys(): |
|||
# tmp=user_data[if_user[0]] |
|||
# prompt=re.sub("{{(.*)}}",tmp,prompt) |
|||
# if if_data!=[] and if_data[0] in data.keys(): |
|||
# tmp1=data[if_data[0]] |
|||
# prompt=re.sub("@@(.*)@@",tmp1,prompt) |
|||
# res["prompt"]=prompt |
|||
# res["authorization"]=admin["authorization"] |
|||
# res["model"]=admin["model"] |
|||
# res["temperature"]=admin["temperature"] |
|||
# res["authorization"]=admin["authorization"] |
|||
# res["top_p"]=admin["top_p"] |
|||
# res["n"]=admin["n"] |
|||
# return res |
|||
|
|||
if __name__=="__main__": |
|||
|
|||
inputdata={ |
|||
"output":{ |
|||
"id":"id", |
|||
"content":"content" |
|||
}, |
|||
"address":"http://172.18.1.181:9011/chatGpt/", |
|||
"input":{ |
|||
"authorization":"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", |
|||
"top_p":"1", |
|||
"temperature":"1", |
|||
"model":"gpt-3.5-turbo-16k", |
|||
"prompt":"根据下面内容:@@1_Youtube采集:$['content']#json#$['test1']@@。生成一条@@1_Youtube采集:$['Count']@@字的关于中国正面的新闻,标题用title,内容用content,以json格式输出。", |
|||
"n":"1" |
|||
}, |
|||
"data":{ |
|||
"1_Youtube采集":"{\"isDownload\":\"true\",\"content\":\"{\\\"test1\\\":\\\"22222\\\"}\",\"Count\":\"555\"}" |
|||
|
|||
}, |
|||
"created":1691004265000, |
|||
"module":"ChatGPT", |
|||
"start_tag":"false", |
|||
"multi_branch":0, |
|||
"last_edit":1698927821000, |
|||
"next_app_id":[ |
|||
{ |
|||
"start_id":316, |
|||
"edge_id":200, |
|||
"end_id":317 |
|||
} |
|||
], |
|||
"transfer_id":3, |
|||
"version":1, |
|||
"blueprint_id":12, |
|||
"scenes_id":12, |
|||
"scenario":{ |
|||
"dataloss":1, |
|||
"autoCommitTriggerLast":1, |
|||
"maxErrors":3, |
|||
"autoCommit":1, |
|||
"freshVariables":1 |
|||
}, |
|||
"wait_condition":[ |
|||
|
|||
], |
|||
"scheduling":{ |
|||
"interval":-1, |
|||
"type":"single" |
|||
}, |
|||
"name":"正面引导", |
|||
"businessKey":"78278a5168e45304", |
|||
"id":316, |
|||
"position":[ |
|||
100, |
|||
200 |
|||
], |
|||
"describe":"正面引导" |
|||
} |
|||
a=get_content(inputdata,"") |
|||
print(a) |
|||
|
|||
|
|||
|
|||
|
|||
|
|||
|
|||
|
@ -0,0 +1,170 @@ |
|||
#coding:utf8 |
|||
import re |
|||
from jsonpath_ng import parse |
|||
import json |
|||
import traceback |
|||
|
|||
def parse_data(raw_data,para): |
|||
all_result = raw_data['data'] |
|||
param_split = str(para).split(":") |
|||
datasourcestr = all_result[param_split[0]] |
|||
datasource = json.loads(datasourcestr) |
|||
# 创建 JsonPath 表达式对象 |
|||
expr = parse(param_split[1]) |
|||
# 使用表达式来选择 JSON 元素 |
|||
match = [match.value for match in expr.find(datasource)] |
|||
val = match[0] |
|||
return val |
|||
|
|||
def get_content(inputdata,logging): |
|||
""" |
|||
重新组装参数 |
|||
:param inputdata:原json数据 |
|||
:return: 组装的prompt及其他参数 |
|||
""" |
|||
res={} |
|||
input=inputdata["input"] |
|||
data=inputdata["data"] |
|||
prompt=input["prompt"] |
|||
if_data=re.findall("@@(.*?)@@",prompt) |
|||
# if_user=re.findall("{{(.*)}}",prompt) |
|||
# if if_user != []: |
|||
# user_data=inputdata["metadata"]["user"] |
|||
# if if_user[0] in user_data.keys(): |
|||
# tmp=user_data[if_user[0]] |
|||
# prompt=re.sub("{{(.*)}}",tmp,prompt) |
|||
if if_data!=[] : |
|||
for rule in if_data: |
|||
try: |
|||
if "#json#" in rule: |
|||
parm = rule.split("#json#") |
|||
data1 = parse_data(inputdata, parm[0]) |
|||
data1_json = json.loads(data1) |
|||
expr = parse(parm[1]) |
|||
result = str([match.value for match in expr.find(data1_json)][0]) |
|||
rep = "@@{}@@".format(rule) |
|||
# 增加转义字符 |
|||
rep_escaped = re.escape(rep) |
|||
prompt = re.sub(rep_escaped, result, prompt) |
|||
elif ":" in rule: |
|||
result = parse_data(inputdata, rule) |
|||
rep = "@@{}@@".format(rule) |
|||
rep_escaped = re.escape(rep) |
|||
prompt = re.sub(rep_escaped, result, prompt) |
|||
else: |
|||
if rule in data.keys(): |
|||
tmp1=data[rule] |
|||
rep = "@@{}@@".format(rule) |
|||
prompt=re.sub(rep,tmp1,prompt) |
|||
except: |
|||
# print(traceback.format_exc()) |
|||
rep = "@@{}@@".format(rule) |
|||
prompt = prompt.replace(rep,'') |
|||
logging.info("动态字段获取数据失败。{}-{}".format(rule, traceback.format_exc())) |
|||
logging.info("拼接后的问题:{}".format(prompt)) |
|||
res["prompt"]=prompt |
|||
res["authorization"]=input["authorization"] |
|||
res["model"]=input["model"] |
|||
res["temperature"]=input["temperature"] |
|||
res["authorization"]=input["authorization"] |
|||
res["top_p"]=input["top_p"] |
|||
res["n"]=input["n"] |
|||
return res |
|||
|
|||
# def get_content(inputdata,logging): |
|||
# """ |
|||
# 重新组装参数 |
|||
# :param inputdata:原json数据 |
|||
# :return: 组装的prompt及其他参数 |
|||
# """ |
|||
# res={} |
|||
# admin=inputdata["metadata"]["admin"] |
|||
# data=inputdata["data"] |
|||
# prompt=admin["prompt"] |
|||
# if_user=re.findall("{{(.*)}}",prompt) |
|||
# if_data=re.findall("@@(.*)@@",prompt) |
|||
# if if_user != []: |
|||
# user_data=inputdata["metadata"]["user"] |
|||
# if if_user[0] in user_data.keys(): |
|||
# tmp=user_data[if_user[0]] |
|||
# prompt=re.sub("{{(.*)}}",tmp,prompt) |
|||
# if if_data!=[] and if_data[0] in data.keys(): |
|||
# tmp1=data[if_data[0]] |
|||
# prompt=re.sub("@@(.*)@@",tmp1,prompt) |
|||
# res["prompt"]=prompt |
|||
# res["authorization"]=admin["authorization"] |
|||
# res["model"]=admin["model"] |
|||
# res["temperature"]=admin["temperature"] |
|||
# res["authorization"]=admin["authorization"] |
|||
# res["top_p"]=admin["top_p"] |
|||
# res["n"]=admin["n"] |
|||
# return res |
|||
|
|||
if __name__=="__main__": |
|||
|
|||
inputdata={ |
|||
"output":{ |
|||
"id":"id", |
|||
"content":"content" |
|||
}, |
|||
"address":"http://172.18.1.181:9011/chatGpt/", |
|||
"input":{ |
|||
"authorization":"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", |
|||
"top_p":"1", |
|||
"temperature":"1", |
|||
"model":"gpt-3.5-turbo-16k", |
|||
"prompt":"根据下面内容:@@1_Youtube采集:$['content']#json#$['test1']@@。生成一条@@1_Youtube采集:$['Count']@@字的关于中国正面的新闻,标题用title,内容用content,以json格式输出。", |
|||
"n":"1" |
|||
}, |
|||
"data":{ |
|||
"1_Youtube采集":"{\"isDownload\":\"true\",\"content\":\"{\\\"test1\\\":\\\"22222\\\"}\",\"Count\":\"555\"}" |
|||
|
|||
}, |
|||
"created":1691004265000, |
|||
"module":"ChatGPT", |
|||
"start_tag":"false", |
|||
"multi_branch":0, |
|||
"last_edit":1698927821000, |
|||
"next_app_id":[ |
|||
{ |
|||
"start_id":316, |
|||
"edge_id":200, |
|||
"end_id":317 |
|||
} |
|||
], |
|||
"transfer_id":3, |
|||
"version":1, |
|||
"blueprint_id":12, |
|||
"scenes_id":12, |
|||
"scenario":{ |
|||
"dataloss":1, |
|||
"autoCommitTriggerLast":1, |
|||
"maxErrors":3, |
|||
"autoCommit":1, |
|||
"freshVariables":1 |
|||
}, |
|||
"wait_condition":[ |
|||
|
|||
], |
|||
"scheduling":{ |
|||
"interval":-1, |
|||
"type":"single" |
|||
}, |
|||
"name":"正面引导", |
|||
"businessKey":"78278a5168e45304", |
|||
"id":316, |
|||
"position":[ |
|||
100, |
|||
200 |
|||
], |
|||
"describe":"正面引导" |
|||
} |
|||
a=get_content(inputdata,"") |
|||
print(a) |
|||
|
|||
|
|||
|
|||
|
|||
|
|||
|
|||
|
173
text_analysis/tools/bak/tool.py_20240418
File diff suppressed because it is too large
View File
File diff suppressed because it is too large
View File
@ -0,0 +1,170 @@ |
|||
#coding:utf8 |
|||
import re |
|||
from jsonpath_ng import parse |
|||
import json |
|||
import traceback |
|||
|
|||
def parse_data(raw_data,para): |
|||
all_result = raw_data['data'] |
|||
param_split = str(para).split(":") |
|||
datasourcestr = all_result[param_split[0]] |
|||
datasource = json.loads(datasourcestr) |
|||
# 创建 JsonPath 表达式对象 |
|||
expr = parse(param_split[1]) |
|||
# 使用表达式来选择 JSON 元素 |
|||
match = [match.value for match in expr.find(datasource)] |
|||
val = match[0] |
|||
return val |
|||
|
|||
def get_content(inputdata,logging): |
|||
""" |
|||
重新组装参数 |
|||
:param inputdata:原json数据 |
|||
:return: 组装的prompt及其他参数 |
|||
""" |
|||
res={} |
|||
input=inputdata["input"] |
|||
data=inputdata["data"] |
|||
prompt=input["prompt"] |
|||
if_data=re.findall("@@(.*?)@@",prompt) |
|||
# if_user=re.findall("{{(.*)}}",prompt) |
|||
# if if_user != []: |
|||
# user_data=inputdata["metadata"]["user"] |
|||
# if if_user[0] in user_data.keys(): |
|||
# tmp=user_data[if_user[0]] |
|||
# prompt=re.sub("{{(.*)}}",tmp,prompt) |
|||
if if_data!=[] : |
|||
for rule in if_data: |
|||
try: |
|||
if "#json#" in rule: |
|||
parm = rule.split("#json#") |
|||
data1 = parse_data(inputdata, parm[0]) |
|||
data1_json = json.loads(data1) |
|||
expr = parse(parm[1]) |
|||
result = str([match.value for match in expr.find(data1_json)][0]) |
|||
rep = "@@{}@@".format(rule) |
|||
# 增加转义字符 |
|||
rep_escaped = re.escape(rep) |
|||
prompt = re.sub(rep_escaped, result, prompt) |
|||
elif ":" in rule: |
|||
result = parse_data(inputdata, rule) |
|||
rep = "@@{}@@".format(rule) |
|||
rep_escaped = re.escape(rep) |
|||
prompt = re.sub(rep_escaped, result, prompt) |
|||
else: |
|||
if rule in data.keys(): |
|||
tmp1=data[rule] |
|||
rep = "@@{}@@".format(rule) |
|||
prompt=re.sub(rep,tmp1,prompt) |
|||
except: |
|||
# print(traceback.format_exc()) |
|||
rep = "@@{}@@".format(rule) |
|||
prompt = prompt.replace(rep,'') |
|||
logging.info("动态字段获取数据失败。{}-{}".format(rule, traceback.format_exc())) |
|||
logging.info("拼接后的问题:{}".format(prompt)) |
|||
res["prompt"]=prompt |
|||
res["authorization"]=input["authorization"] |
|||
res["model"]=input["model"] |
|||
res["temperature"]=input["temperature"] |
|||
res["authorization"]=input["authorization"] |
|||
res["top_p"]=input["top_p"] |
|||
res["n"]=input["n"] |
|||
return res |
|||
|
|||
# def get_content(inputdata,logging): |
|||
# """ |
|||
# 重新组装参数 |
|||
# :param inputdata:原json数据 |
|||
# :return: 组装的prompt及其他参数 |
|||
# """ |
|||
# res={} |
|||
# admin=inputdata["metadata"]["admin"] |
|||
# data=inputdata["data"] |
|||
# prompt=admin["prompt"] |
|||
# if_user=re.findall("{{(.*)}}",prompt) |
|||
# if_data=re.findall("@@(.*)@@",prompt) |
|||
# if if_user != []: |
|||
# user_data=inputdata["metadata"]["user"] |
|||
# if if_user[0] in user_data.keys(): |
|||
# tmp=user_data[if_user[0]] |
|||
# prompt=re.sub("{{(.*)}}",tmp,prompt) |
|||
# if if_data!=[] and if_data[0] in data.keys(): |
|||
# tmp1=data[if_data[0]] |
|||
# prompt=re.sub("@@(.*)@@",tmp1,prompt) |
|||
# res["prompt"]=prompt |
|||
# res["authorization"]=admin["authorization"] |
|||
# res["model"]=admin["model"] |
|||
# res["temperature"]=admin["temperature"] |
|||
# res["authorization"]=admin["authorization"] |
|||
# res["top_p"]=admin["top_p"] |
|||
# res["n"]=admin["n"] |
|||
# return res |
|||
|
|||
if __name__=="__main__": |
|||
|
|||
inputdata={ |
|||
"output":{ |
|||
"id":"id", |
|||
"content":"content" |
|||
}, |
|||
"address":"http://172.18.1.181:9011/chatGpt/", |
|||
"input":{ |
|||
"authorization":"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", |
|||
"top_p":"1", |
|||
"temperature":"1", |
|||
"model":"gpt-3.5-turbo-16k", |
|||
"prompt":"根据下面内容:@@1_Youtube采集:$['content']#json#$['test1']@@。生成一条@@1_Youtube采集:$['Count']@@字的关于中国正面的新闻,标题用title,内容用content,以json格式输出。", |
|||
"n":"1" |
|||
}, |
|||
"data":{ |
|||
"1_Youtube采集":"{\"isDownload\":\"true\",\"content\":\"{\\\"test1\\\":\\\"22222\\\"}\",\"Count\":\"555\"}" |
|||
|
|||
}, |
|||
"created":1691004265000, |
|||
"module":"ChatGPT", |
|||
"start_tag":"false", |
|||
"multi_branch":0, |
|||
"last_edit":1698927821000, |
|||
"next_app_id":[ |
|||
{ |
|||
"start_id":316, |
|||
"edge_id":200, |
|||
"end_id":317 |
|||
} |
|||
], |
|||
"transfer_id":3, |
|||
"version":1, |
|||
"blueprint_id":12, |
|||
"scenes_id":12, |
|||
"scenario":{ |
|||
"dataloss":1, |
|||
"autoCommitTriggerLast":1, |
|||
"maxErrors":3, |
|||
"autoCommit":1, |
|||
"freshVariables":1 |
|||
}, |
|||
"wait_condition":[ |
|||
|
|||
], |
|||
"scheduling":{ |
|||
"interval":-1, |
|||
"type":"single" |
|||
}, |
|||
"name":"正面引导", |
|||
"businessKey":"78278a5168e45304", |
|||
"id":316, |
|||
"position":[ |
|||
100, |
|||
200 |
|||
], |
|||
"describe":"正面引导" |
|||
} |
|||
a=get_content(inputdata,"") |
|||
print(a) |
|||
|
|||
|
|||
|
|||
|
|||
|
|||
|
|||
|
@ -0,0 +1,25 @@ |
|||
# -*- coding:utf-8 -*- |
|||
|
|||
class pt_v_Exception(Exception): |
|||
def __str__(self): |
|||
return 'pt规则未在缓存中命中' |
|||
|
|||
class dt_v_Exception(Exception): |
|||
def __str__(self): |
|||
return 'dt规则未在缓存中命中' |
|||
|
|||
class dt_v_attr_Exception(Exception): |
|||
def __str__(self): |
|||
return 'dt_attrcode规则未在缓存中命中' |
|||
|
|||
class dt_v_codeid_Exception(Exception): |
|||
def __str__(self): |
|||
return 'dt_codeid规则未在缓存中命中' |
|||
|
|||
class dt_v_senti_Exception(Exception): |
|||
def __str__(self): |
|||
return 'dt_senti规则未在缓存中命中' |
|||
|
|||
class dt_v_res_Exception(Exception): |
|||
def __str__(self): |
|||
return 'dt_resverse规则未在缓存中命中' |
@ -0,0 +1,65 @@ |
|||
# coding=utf-8 |
|||
from kafka import KafkaProducer |
|||
from kafka import KafkaConsumer |
|||
import json |
|||
import traceback |
|||
import time |
|||
import traceback |
|||
import datetime |
|||
import queue |
|||
from logUtil import get_logger |
|||
""" |
|||
写到kafka |
|||
""" |
|||
def kafkaProduce(topic,resultData,address): |
|||
producer = KafkaProducer(bootstrap_servers = '{}'.format(address),request_timeout_ms=120000) |
|||
topics = topic.split(',') |
|||
for tc in topics: |
|||
future = producer.send(tc,resultData) |
|||
result = future.get(timeout=60) |
|||
producer.flush() |
|||
print (result) |
|||
|
|||
#写入文件 |
|||
def writeTxt(filePath,result): |
|||
f = open(filePath,'a',encoding='utf-8') |
|||
f.write(result.encode('utf-8').decode('unicode_escape')+'\n') |
|||
f.close |
|||
|
|||
def KafkaConsume(topic,address,group_id,task_queue,logger): |
|||
''' |
|||
监控kafka,读取数据写到任务队列 |
|||
:param topic: |
|||
:param address: |
|||
:param group_id: |
|||
:param task_queue: |
|||
:return: |
|||
''' |
|||
try: |
|||
consumer = KafkaConsumer(topic, auto_offset_reset='earliest',fetch_max_bytes=1024768000,fetch_max_wait_ms=5000, bootstrap_servers=address,group_id = group_id) |
|||
i = 1 |
|||
while True: |
|||
for msg in consumer: |
|||
print('第{}条数据'.format(i)) |
|||
data = str(msg.value, encoding = "utf-8") |
|||
print(data) |
|||
task_queue.put(data) |
|||
i = i+1 |
|||
else: |
|||
print('暂无任务------') |
|||
time.sleep(10) |
|||
except Exception as e: |
|||
print('kafka未知异常----') |
|||
traceback.print_exc() |
|||
|
|||
def writeTxt(filePath,result): |
|||
f = open(filePath,'a') |
|||
f.write(result+'\n') |
|||
f.close |
|||
|
|||
if __name__ == '__main__': |
|||
resultData = {'id': '中文', 'url': 'https://zh.wikipedia.org/zh/%E8%94%A1%E8%8B%B1%E6%96%87'} |
|||
kafkaProduce('test', json.dumps(resultData).encode('utf-8').decode('unicode_escape').encode(),'172.26.28.30:9092') |
|||
#task_queue = queue.Queue() |
|||
#KafkaConsume('fq-Taobao-eccontent','39.129.129.172:6666,39.129.129.172:6668,39.129.129.172:6669,39.129.129.172:6670,39.129.129.172:6671','news_sche_8',task_queue,logger) |
|||
# KafkaConsume('zxbnewstopic','120.133.14.71:9992','group3',task_queue,logger) |
@ -0,0 +1,338 @@ |
|||
# coding:utf8 |
|||
import os, sys |
|||
cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd() |
|||
par_dir = os.path.abspath(os.path.join(cur_dir, os.path.pardir)) |
|||
sys.path.append(cur_dir) |
|||
sys.path.append(par_dir) |
|||
import json |
|||
import re |
|||
# from log_util.set_logger import set_logger |
|||
# logging = set_logger('logs/error.log') |
|||
import pymysql.cursors |
|||
import traceback |
|||
|
|||
def mysqlConn(data,logging): |
|||
res={"successCode":"1","errorLog":"","results":""} |
|||
p_host=data["Host"] |
|||
p_port=int(data["Port"]) |
|||
p_db=data["Database"] |
|||
p_user=data["User"] |
|||
p_password=data["Password"] |
|||
try: |
|||
db = pymysql.connect(host=p_host, user=p_user, passwd=p_password, db=p_db, port=p_port, |
|||
charset='utf8', cursorclass=pymysql.cursors.DictCursor) |
|||
db.ping(reconnect=True) |
|||
cursor = db.cursor() |
|||
sql = "SHOW TABLES" |
|||
cursor.execute(sql) |
|||
tables = cursor.fetchall() |
|||
if tables: |
|||
table_names = list(map(lambda x: list(x.values())[0], tables)) |
|||
res["results"] = table_names |
|||
else: |
|||
res["successCode"] = "0" |
|||
cursor.close() |
|||
db.close() |
|||
return res |
|||
except: |
|||
res["successCode"] = "0" |
|||
res["errorLog"]=traceback.format_exc() |
|||
logging.error(traceback.format_exc()) |
|||
return res |
|||
|
|||
def getTableColumnNames(data,logging): |
|||
res={"successCode":"1","errorLog":"","results":""} |
|||
p_host=data["Host"] |
|||
p_port=int(data["Port"]) |
|||
p_db=data["Database"] |
|||
p_user=data["User"] |
|||
p_password=data["Password"] |
|||
p_table=data["Table"] |
|||
try: |
|||
db = pymysql.connect(host=p_host, user=p_user, passwd=p_password, db=p_db, port=p_port, |
|||
charset='utf8', cursorclass=pymysql.cursors.DictCursor) |
|||
db.ping(reconnect=True) |
|||
cursor = db.cursor() |
|||
sql = "DESCRIBE "+p_table |
|||
cursor.execute(sql) |
|||
tables = cursor.fetchall() |
|||
if tables: |
|||
table_names = list(map(lambda x: x['Field'], tables)) |
|||
res["results"] = table_names |
|||
else: |
|||
res["successCode"] = "0" |
|||
cursor.close() |
|||
db.close() |
|||
return res |
|||
except: |
|||
res["successCode"] = "0" |
|||
res["errorLog"]=traceback.format_exc() |
|||
logging.error(traceback.format_exc()) |
|||
return res |
|||
|
|||
def mysqlInsert(input,logging): |
|||
res={"successCode":"1","errorLog":"","results":""} |
|||
data=input["metadata"]["admin"] |
|||
p_host=data["Host"] |
|||
p_port=int(data["Port"]) |
|||
p_db=data["Database"] |
|||
p_user=data["User"] |
|||
p_password=data["Password"] |
|||
p_table=data["Table"] |
|||
p_columnName=data["columnName"] |
|||
cN='('+','.join(p_columnName)+') ' |
|||
p_values=data["values"] |
|||
val=tuple(p_values) |
|||
try: |
|||
db = pymysql.connect(host=p_host, user=p_user, passwd=p_password, db=p_db, port=p_port, |
|||
charset='utf8', cursorclass=pymysql.cursors.DictCursor) |
|||
db.ping(reconnect=True) |
|||
cursor = db.cursor() |
|||
sql = "insert into " + p_table + cN + "values ("+ ','.join(['%s'] * len(val)) + ")" |
|||
cursor.execute(sql,val) |
|||
db.commit() |
|||
cursor.close() |
|||
db.close() |
|||
return res |
|||
except: |
|||
res["successCode"] = "0" |
|||
res["errorLog"]=traceback.format_exc() |
|||
logging.error(traceback.format_exc()) |
|||
return res |
|||
|
|||
def mysqlUpdate(input,logging): |
|||
res={"successCode":"1","errorLog":"","results":""} |
|||
data=input["metadata"]["admin"] |
|||
p_host=data["Host"] |
|||
p_port=int(data["Port"]) |
|||
p_db=data["Database"] |
|||
p_user=data["User"] |
|||
p_password=data["Password"] |
|||
p_table=data["Table"] |
|||
# p_set=data["Set"] |
|||
p_set=get_updateSet(input) |
|||
# where=process_where(data["Filter"]) |
|||
where=get_filter(data["Filter"]) |
|||
try: |
|||
db = pymysql.connect(host=p_host, user=p_user, passwd=p_password, db=p_db, port=p_port, |
|||
charset='utf8', cursorclass=pymysql.cursors.DictCursor) |
|||
db.ping(reconnect=True) |
|||
cursor = db.cursor() |
|||
sql = "UPDATE " + p_table + p_set + where |
|||
print(sql) |
|||
cursor.execute(sql) |
|||
db.commit() |
|||
cursor.close() |
|||
db.close() |
|||
return res |
|||
except: |
|||
res["successCode"] = "0" |
|||
res["errorLog"]=traceback.format_exc() |
|||
logging.error(traceback.format_exc()) |
|||
return res |
|||
|
|||
def mysqlExecute(input,logging): |
|||
res={"successCode":"1","errorLog":"","results":""} |
|||
data=input["metadata"]["admin"] |
|||
p_host=data["Host"] |
|||
p_port=int(data["Port"]) |
|||
p_db=data["Database"] |
|||
p_user=data["User"] |
|||
p_password=data["Password"] |
|||
execute=data["Execute"] |
|||
try: |
|||
db = pymysql.connect(host=p_host, user=p_user, passwd=p_password, db=p_db, port=p_port, |
|||
charset='utf8', cursorclass=pymysql.cursors.DictCursor) |
|||
db.ping(reconnect=True) |
|||
cursor = db.cursor() |
|||
cursor.execute(execute) |
|||
if 'select' in execute.lower(): |
|||
result = cursor.fetchall() |
|||
res["results"]=json.dumps(result,ensure_ascii=False) |
|||
else: |
|||
db.commit() |
|||
cursor.close() |
|||
db.close() |
|||
return res |
|||
except: |
|||
res["successCode"] = "0" |
|||
res["errorLog"]=traceback.format_exc() |
|||
logging.error(traceback.format_exc()) |
|||
return res |
|||
|
|||
# def process_where(data): |
|||
# ''' |
|||
# 组装where |
|||
# :param data: data["Filter"],{"key":"age","value":"20","operator":">"},{"logicalSymbol":"and"},{"key":"weight","value":"50","operator":"<"} |
|||
# :return: WHERE age>20 and weight<50 |
|||
# ''' |
|||
# if data=="" or data==[]: |
|||
# return "" |
|||
# where = " WHERE " |
|||
# for line in data: |
|||
# if "key" in line.keys(): |
|||
# val = line["value"] |
|||
# if isinstance(val, str): |
|||
# val = "\'" + val + "\'" |
|||
# tmp = str(line["key"]) + " " + line["operator"] + " " + str(val) |
|||
# where += tmp |
|||
# else: |
|||
# where += " " + line["logicalSymbol"] + " " |
|||
# return where |
|||
# |
|||
# def process_filter(data): |
|||
# ''' |
|||
# 组装key,value,operator |
|||
# :param data: data["Filter"],{"key":"age",value:"20","operator":"="} |
|||
# :return: age=20 |
|||
# ''' |
|||
# if data=="" or data==[]: |
|||
# return "" |
|||
# res=data["key"]+" "+data["operator"]+" "+data["value"] |
|||
# return res |
|||
|
|||
def get_updateSet(input): |
|||
metadata=input["metadata"] |
|||
user=metadata["user"] |
|||
sets=metadata["admin"]["Set"] |
|||
res=[] |
|||
for line in sets: |
|||
part=line.split("=") |
|||
tmp = [] |
|||
for p in part: |
|||
user_match=re.findall('##(.*?)##', p) |
|||
if user_match!=[]: |
|||
tmp.append(user[user_match[0]]) |
|||
res.append(str(tmp[0])+"="+str(tmp[1])) |
|||
result=" SET "+",".join(res) |
|||
return result |
|||
|
|||
def get_filter(data): |
|||
if "OR" not in data.keys(): |
|||
return "" |
|||
op_or=data["OR"] |
|||
res = "" |
|||
if len(op_or) == 1: |
|||
tmp = [] |
|||
line = op_or[0]["AND"] |
|||
for single_line in line: |
|||
val = single_line["value"] |
|||
if isinstance(val, str): |
|||
val = "\'" + val + "\'" |
|||
tmp.append(str(single_line["key"]) + single_line["operator"] + str(val)) |
|||
if single_line != line[-1]: |
|||
tmp.append("and") |
|||
res = " WHERE "+" ".join(tmp) |
|||
elif len(op_or) > 1: |
|||
tmp = [] |
|||
for single_and in op_or: |
|||
line = single_and["AND"] |
|||
for sigle_line in line: |
|||
val = sigle_line["value"] |
|||
if isinstance(val, str): |
|||
val = "\'" + val + "\'" |
|||
tmp.append(str(sigle_line["key"]) + sigle_line["operator"] + str(val)) |
|||
if sigle_line != line[-1]: |
|||
tmp.append("and") |
|||
if single_and != op_or[-1]: |
|||
tmp.append("or") |
|||
res = " WHERE "+" ".join(tmp) |
|||
return res |
|||
|
|||
|
|||
def mysqlQuery(input,logging): |
|||
res={"successCode":"1","errorLog":"","results":""} |
|||
data=input["metadata"]["admin"] |
|||
p_host=data["Host"] |
|||
p_port=int(data["Port"]) |
|||
p_db=data["Database"] |
|||
p_user=data["User"] |
|||
p_password=data["Password"] |
|||
p_table=data["Table"] |
|||
p_columnNames=data["columnNames"] |
|||
# p_filter=data["Filter"] |
|||
column='*' |
|||
if len(p_columnNames)==1: |
|||
column=p_columnNames[0] |
|||
elif len(p_columnNames)>1: |
|||
column=','.join(p_columnNames) |
|||
where=get_filter(data["Filter"]) |
|||
try: |
|||
db = pymysql.connect(host=p_host, user=p_user, passwd=p_password, db=p_db, port=p_port, |
|||
charset='utf8', cursorclass=pymysql.cursors.DictCursor) |
|||
db.ping(reconnect=True) |
|||
cursor = db.cursor() |
|||
sql = "SELECT " + column +" From "+ p_table + where |
|||
# print(sql) |
|||
cursor.execute(sql) |
|||
result = cursor.fetchall() |
|||
res["results"]=json.dumps(result,ensure_ascii=False) |
|||
cursor.close() |
|||
db.close() |
|||
return res |
|||
except: |
|||
res["successCode"] = "0" |
|||
res["errorLog"]=traceback.format_exc() |
|||
logging.error(traceback.format_exc()) |
|||
return res |
|||
|
|||
def mysqlDelete(input,logging): |
|||
res={"successCode":"1","errorLog":"","results":""} |
|||
data=input["metadata"]["admin"] |
|||
p_host=data["Host"] |
|||
p_port=int(data["Port"]) |
|||
p_db=data["Database"] |
|||
p_user=data["User"] |
|||
p_password=data["Password"] |
|||
p_table=data["Table"] |
|||
# where=process_where(data["Filter"]) |
|||
where=get_filter(data["Filter"]) |
|||
try: |
|||
db = pymysql.connect(host=p_host, user=p_user, passwd=p_password, db=p_db, port=p_port, |
|||
charset='utf8', cursorclass=pymysql.cursors.DictCursor) |
|||
db.ping(reconnect=True) |
|||
cursor = db.cursor() |
|||
sql = "DELETE From "+ p_table + where |
|||
cursor.execute(sql) |
|||
db.commit() |
|||
cursor.close() |
|||
db.close() |
|||
return res |
|||
except: |
|||
res["successCode"] = "0" |
|||
res["errorLog"]=traceback.format_exc() |
|||
logging.error(traceback.format_exc()) |
|||
return res |
|||
|
|||
|
|||
if __name__=="__main__": |
|||
input={"metadata":{"admin":{ |
|||
"type":"query", |
|||
"Table":"student", |
|||
"columnNames":["name","age"], |
|||
"Set":["##tag1##=##value1##","##tag2##=##value2##"], |
|||
"Filter":{ |
|||
"OR":[ |
|||
{ |
|||
"AND":[{"key":"age","value":20,"operator":">"},{"key":"weight","value":50,"operator":"<"}] |
|||
}, |
|||
{ |
|||
"AND":[{"key":"name","value":"ff","operator":"="}] |
|||
} |
|||
] |
|||
}, |
|||
"Host":"172.26.28.30", |
|||
"Port":"3306", |
|||
"Database":"test", |
|||
"User":"crawl", |
|||
"Password":"crawl123" |
|||
}}, |
|||
"user": { |
|||
"tag1": "age", |
|||
"tag2": "weight", |
|||
"value1": 2, |
|||
"value2": 100 |
|||
} |
|||
} |
|||
res=mysqlUpdate(input,"") |
|||
print(res) |
@ -0,0 +1,51 @@ |
|||
#coding:utf8 |
|||
import os, sys |
|||
cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd() |
|||
par_dir = os.path.abspath(os.path.join(cur_dir, os.path.pardir)) |
|||
sys.path.append(cur_dir) |
|||
sys.path.append(par_dir) |
|||
import json |
|||
from text_analysis.tools import to_kafka |
|||
from tools.mysql_helper import mysqlConn,mysqlInsert,mysqlQuery,mysqlExecute,mysqlUpdate,mysqlDelete,getTableColumnNames |
|||
import traceback |
|||
import time |
|||
from log_util.set_logger import set_logger |
|||
logging=set_logger('results.log') |
|||
|
|||
from views import task_queue |
|||
|
|||
def process_data(): |
|||
while True: |
|||
try: |
|||
# print("task_queue:",task_queue) |
|||
if task_queue.qsize() >0: |
|||
try: |
|||
raw_data = task_queue.get() |
|||
res = "" |
|||
logging.info("启动数据处理线程——") |
|||
logging.info(raw_data) |
|||
flag = raw_data["metadata"]["admin"]["type"] |
|||
# type分为execute、query、insert、update、delete |
|||
if flag == 'insert': |
|||
res = mysqlInsert(raw_data, logging) |
|||
elif flag == 'execute': |
|||
res = mysqlExecute(raw_data, logging) |
|||
elif flag == 'update': |
|||
res = mysqlUpdate(raw_data, logging) |
|||
elif flag == 'query': |
|||
res = mysqlQuery(raw_data, logging) |
|||
elif flag == 'delete': |
|||
res = mysqlDelete(raw_data, logging) |
|||
raw_data["result"] = res |
|||
logging.info("************写入kafka***********") |
|||
to_kafka.send_kafka(raw_data) |
|||
except: |
|||
raw_data["result"] = {"successCode": "0", "errorLog": "", "results": ""} |
|||
raw_data["result"]["errorLog"] = traceback.format_exc() |
|||
to_kafka.send_kafka(raw_data) |
|||
else: |
|||
logging.info("暂无任务,进入休眠--") |
|||
print("222222222222222222222222") |
|||
time.sleep(10) |
|||
except: |
|||
logging.error(traceback.format_exc()) |
@ -0,0 +1,171 @@ |
|||
# -*- coding: utf-8 -*- |
|||
import time |
|||
import threading |
|||
from selenium import webdriver |
|||
import json |
|||
from urllib.parse import urljoin |
|||
from kakfa_util import KafkaConsume |
|||
from kakfa_util import kafkaProduce |
|||
from logUtil import get_logger |
|||
from Go_fastDfs import uploadFile |
|||
import traceback |
|||
import queue |
|||
import configparser |
|||
import os, sys |
|||
import re |
|||
logger = get_logger("./logs/crawlWebsrcCode.log") |
|||
#加载配置文件 |
|||
configFile = './config.ini' |
|||
# 创建配置文件对象 |
|||
con = configparser.ConfigParser() |
|||
# 读取文件 |
|||
con.read(configFile, encoding='utf-8') |
|||
kafkaConfig = dict(con.items('kafka'))#kafka配置信息 |
|||
goFastdfsConfig = dict(con.items('goFastdfs'))#goFastdfs配置信息 |
|||
class Spider(object): |
|||
def __init__(self,url): |
|||
self.chromeOptions = self.get_profile() |
|||
self.browser = self.get_browser() |
|||
self.url = url |
|||
def get_profile(self): |
|||
chromeOptions = webdriver.ChromeOptions() |
|||
chromeOptions.add_argument('--headless') # 谷歌无头模式 |
|||
chromeOptions.add_argument('--disable-gpu') # 禁用显卡 |
|||
# chromeOptions.add_argument('window-size=1280,800') # 指定浏览器分辨率 |
|||
chromeOptions.add_argument("--no-sandbox") |
|||
return chromeOptions |
|||
|
|||
def get_browser(self): |
|||
browser = webdriver.Chrome("D:\\工作使用\\zhaoshang\\chromedriver.exe",chrome_options=self.chromeOptions) |
|||
return browser |
|||
|
|||
def _get_page(self,path): |
|||
''' |
|||
获取页面原格式,写入文件并返回路径 |
|||
:param path: |
|||
:return: |
|||
''' |
|||
self.browser.get(self.url) |
|||
time.sleep(5) |
|||
logger.info("休眠结束") |
|||
# 向下偏移了10000个像素,到达底部。 |
|||
scrollTop = 10000 |
|||
for num in range(1,10): |
|||
js = "var q=document.documentElement.scrollTop={}".format(scrollTop*num) |
|||
logger.info("第{}次滚动".format(num)) |
|||
self.browser.execute_script(js) |
|||
time.sleep(5) |
|||
# 执行 Chome 开发工具命令,得到mhtml内容 |
|||
res = self.browser.execute_cdp_cmd('Page.captureSnapshot', {}) |
|||
#获取文章标题 |
|||
title = '无标题' |
|||
try: |
|||
title = self.browser.find_element_by_css_selector("title").get_attribute("textContent") |
|||
except Exception as e: |
|||
logger.error('获取标题异常----') |
|||
traceback.print_exc() |
|||
pathName = '{}{}.mhtml'.format(path,title) |
|||
with open(pathName, 'w',newline='') as f: |
|||
f.write(res['data']) |
|||
return pathName,title |
|||
if __name__ == '__main__': |
|||
#初始化任务队列 |
|||
task_queue = queue.Queue() |
|||
#跟读kafka线程 |
|||
logger.info("开启读取kafka线程---") |
|||
t = threading.Thread(target=KafkaConsume, name='LoopThread',args=(kafkaConfig['read_topic'], kafkaConfig['address'], kafkaConfig['group_id'], task_queue,logger)) |
|||
t.daemon = True |
|||
t.start() |
|||
#获取任务执行页面原格式保留 |
|||
|
|||
|
|||
|
|||
|
|||
|
|||
while True: |
|||
try: |
|||
if task_queue.qsize() >0: |
|||
taskStr = task_queue.get() |
|||
logger.info('当前任务:{}'.format(taskStr)) |
|||
task = json.loads(taskStr) |
|||
p1 = u'(https?|ftp|file)://[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#/%=~_|]' |
|||
pattern1 = re.compile(p1) |
|||
matcher1 = re.search(p1, task['url']) |
|||
if matcher1: |
|||
l = Spider(task['url']) |
|||
pathName,title = l._get_page(goFastdfsConfig['path']) |
|||
l.browser.quit() |
|||
#gofast 上传,写入kafka |
|||
if '404 Not Found' in title: |
|||
logger.error('页面404,无效') |
|||
resultData = { |
|||
'code': 500, |
|||
'id': task['id'], |
|||
'message': '页面404' |
|||
} |
|||
kafkaProduce(kafkaConfig['data_topics'], |
|||
json.dumps(resultData).encode('utf-8').decode('unicode_escape').encode(), |
|||
kafkaConfig['address']) |
|||
time.sleep(2) |
|||
continue |
|||
try: |
|||
uploadStr = uploadFile('{}upload'.format(goFastdfsConfig['uploadaddress']),pathName,logger) |
|||
uploadJson = json.loads(uploadStr) |
|||
except Exception as e: |
|||
logger.error('文件上传异常----') |
|||
traceback.print_exc() |
|||
resultData = { |
|||
'code': 500, |
|||
'id': task['id'], |
|||
'message': '文件上传失败' |
|||
} |
|||
kafkaProduce(kafkaConfig['data_topics'], |
|||
json.dumps(resultData).encode('utf-8').decode('unicode_escape').encode(), |
|||
kafkaConfig['address']) |
|||
time.sleep(2) |
|||
continue |
|||
resultData = { |
|||
'code':200, |
|||
'id':task['id'], |
|||
'url':goFastdfsConfig['downloadaddress']+uploadJson['path'], |
|||
'title':title, |
|||
'delMd5':uploadJson['md5'], |
|||
'uploadTime':uploadJson['mtime'], |
|||
'message':'成功' |
|||
} |
|||
kafkaProduce(kafkaConfig['data_topics'],json.dumps(resultData).encode('utf-8').decode('unicode_escape').encode(),kafkaConfig['address']) |
|||
logger.info('数据写入成功') |
|||
#删除文件 |
|||
if (os.path.exists(pathName)): |
|||
os.remove(pathName) |
|||
logger.info('清除文件:{}'.format(pathName)) |
|||
else: |
|||
logger.info('要删除的文件不存在:{}'.format(pathName)) |
|||
else: |
|||
logger.error('非正确url:'.format(task['url'])) |
|||
resultData = { |
|||
'code': 500, |
|||
'id': task['id'], |
|||
'message': '非正确url' |
|||
} |
|||
kafkaProduce(kafkaConfig['data_topics'], |
|||
json.dumps(resultData).encode('utf-8').decode('unicode_escape').encode(), |
|||
kafkaConfig['address']) |
|||
time.sleep(2) |
|||
continue |
|||
else: |
|||
logger.info("暂无任务,进入休眠--") |
|||
time.sleep(10) |
|||
except Exception as e: |
|||
logger.error('未知异常----') |
|||
traceback.print_exc() |
|||
resultData = { |
|||
'code': 500, |
|||
'id': task['id'], |
|||
'message': '未知异常' |
|||
} |
|||
kafkaProduce(kafkaConfig['data_topics'], |
|||
json.dumps(resultData).encode('utf-8').decode('unicode_escape').encode(), |
|||
kafkaConfig['address']) |
|||
time.sleep(2) |
|||
|
@ -0,0 +1,25 @@ |
|||
#coding:utf8 |
|||
import traceback |
|||
import json |
|||
from kafka import KafkaProducer |
|||
from text_analysis.read_config import load_config |
|||
config=load_config() |
|||
|
|||
def send_kafka(data,logging): |
|||
try: |
|||
producer = None |
|||
topic = config["kafka"]["topic"] |
|||
data1=json.dumps(data,ensure_ascii=False) |
|||
kafkaProduce(topic,bytes(data1, encoding='utf-8')) |
|||
logging.info("数据推入kafka!") |
|||
|
|||
except Exception as e: |
|||
logging.info(traceback.format_exc()) |
|||
logging.info('写入kafka失败') |
|||
|
|||
def kafkaProduce(topic,resultData): |
|||
producer = KafkaProducer(bootstrap_servers = '{}'.format(config["kafka"]["bootstrap_servers"]),max_request_size=52428800) |
|||
topics = topic.split(',') |
|||
for tc in topics: |
|||
future = producer.send(tc,resultData) |
|||
producer.flush() |
@ -0,0 +1,74 @@ |
|||
#coding:utf8 |
|||
import traceback |
|||
from pykafka import KafkaClient |
|||
# from pykafka import partitioners |
|||
# from pykafka.simpleconsumer import OwnedPartition, OffsetType |
|||
import json |
|||
from tqdm import tqdm |
|||
# from kafka import KafkaProducer |
|||
from pykafka.simpleconsumer import OwnedPartition, OffsetType |
|||
|
|||
def send_kafka(data,logging): |
|||
try: |
|||
producer = None |
|||
# client = KafkaClient(hosts='172.26.28.30:9092', socket_timeout_ms=10 * 1000) |
|||
topic = 'analyze' |
|||
# producer = client.topics[topic].get_sync_producer(**{'max_request_size': 3000012 * 5}) |
|||
#producer = client.topics[topic].get_producer(sync=True) |
|||
client = KafkaClient(hosts='172.26.28.30:9092', socket_timeout_ms=10 * 1000) |
|||
# topic = client.topics['analyze'] |
|||
producer = client.topics[topic].get_producer() |
|||
|
|||
data1=json.dumps(data,ensure_ascii=False) |
|||
producer.produce(bytes(data1, encoding='utf-8')) |
|||
# kafkaProduce(topic,bytes(data1, encoding='utf-8')) |
|||
logging.info("数据推入kafka!") |
|||
|
|||
except Exception as e: |
|||
logging.info(traceback.format_exc()) |
|||
logging.info('写入kafka失败') |
|||
# def kafkaProduce(topic,resultData): |
|||
# producer = KafkaProducer(bootstrap_servers = '{}'.format("172.26.28.30:9092")) |
|||
# topics = topic.split(',') |
|||
# for tc in topics: |
|||
# future = producer.send(tc,resultData) |
|||
# producer.flush() |
|||
|
|||
def consumer(): |
|||
# topic = 'ais_caiji_kg_210'.encode('utf-8') |
|||
# client = KafkaClient(hosts='172.16.3.153:9092,172.16.3.154:9092,172.16.3.155:9092') |
|||
|
|||
# topic = 'test_mysql_topic'.encode('utf-8') |
|||
# client = KafkaClient(hosts='localhost:9092') |
|||
# topic = client.topics[topic] |
|||
# consumer = topic.get_simple_consumer(consumer_group='test1', |
|||
# auto_commit_enable=True, # 去重消费 |
|||
# auto_commit_interval_ms=1000, |
|||
# # consumer_id='test1', # 消费者ID |
|||
# reset_offset_on_start=True, |
|||
# # auto_offset_reset=OffsetType.LATEST, |
|||
# consumer_timeout_ms=100000) |
|||
# c = 0 |
|||
# for msg in consumer: |
|||
# c += 1 |
|||
# if msg: |
|||
# val = msg.value.decode('utf-8') |
|||
# print(c,val) |
|||
|
|||
# client = KafkaClient(hosts='localhost:9092') |
|||
# topic = client.topics['test_mysql_topic'] |
|||
|
|||
client = KafkaClient(hosts='172.26.28.30:9092') |
|||
topic = client.topics['analyze'] |
|||
consumer = topic.get_simple_consumer(consumer_group='my_consumer_group', |
|||
auto_offset_reset=OffsetType.LATEST, |
|||
reset_offset_on_start=True) |
|||
|
|||
# 消费数据 |
|||
for message in consumer: |
|||
if message is not None: |
|||
print(message.offset, message.value.decode()) |
|||
|
|||
if __name__=="__main__": |
|||
# send_kafka() |
|||
consumer() |
178
text_analysis/tools/tool.py
File diff suppressed because it is too large
View File
File diff suppressed because it is too large
View File
@ -0,0 +1,44 @@ |
|||
''' |
|||
监听数据 |
|||
"{ |
|||
"scenes_id":2222, |
|||
"operation":"stop", |
|||
"version":5 |
|||
}" |
|||
scenes_id=2222 |
|||
version!=0 |
|||
|
|||
''' |
|||
from kazoo.client import KazooClient |
|||
from kazoo.protocol.states import EventType |
|||
import time |
|||
|
|||
# 连接到ZooKeeper服务器 |
|||
zk = KazooClient(hosts='172.18.1.146:2181,172.18.1.147:2181,172.18.1.148:2181') |
|||
zk.start() |
|||
|
|||
# 定义数据变更时的回调函数 |
|||
def data_change_listener(event): |
|||
if event.type == EventType.CHANGED: |
|||
data, stat = zk.get("/analyze") |
|||
print("Data changed on node /analyze: {data.decode('utf-8')}") |
|||
elif event.type == EventType.DELETED: |
|||
print("Node /analyze has been deleted") |
|||
|
|||
# 设置监听器 |
|||
@zk.DataWatch("/analyze") |
|||
def watch_node(data, stat, event): |
|||
if event is not None: |
|||
data_change_listener(event) |
|||
|
|||
# 保持程序运行以监听节点变化 |
|||
try: |
|||
while True: |
|||
print("ok") |
|||
time.sleep(1) |
|||
except KeyboardInterrupt: |
|||
print("Stopping...") |
|||
|
|||
# 关闭连接 |
|||
zk.stop() |
|||
zk.close() |
@ -0,0 +1,13 @@ |
|||
from django.conf.urls import include, url |
|||
from django.contrib import admin |
|||
from text_analysis import views |
|||
|
|||
urlpatterns = [ |
|||
|
|||
url(r'^chatGptNew',views.chatGptNew, name='chatGptNew'), |
|||
# url(r'^mysqlConnection',views.mysqlConnection, name='mysqlConnection'), |
|||
# url(r'^mysqlField', views.mysqlField, name='mysqlField') |
|||
|
|||
] |
|||
|
|||
|
@ -0,0 +1,148 @@ |
|||
# coding:utf8 |
|||
import os, sys |
|||
import io |
|||
import time |
|||
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8') |
|||
cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd() |
|||
par_dir = os.path.abspath(os.path.join(cur_dir, os.path.pardir)) |
|||
sys.path.append(cur_dir) |
|||
sys.path.append(par_dir) |
|||
import json |
|||
from django.http import HttpResponse |
|||
from text_analysis.tools import to_kafka |
|||
from django.views.decorators.csrf import csrf_exempt |
|||
from log_util.set_logger import set_logger |
|||
|
|||
logging = set_logger('logs/results.log') |
|||
import traceback |
|||
# import queue |
|||
import requests |
|||
from text_analysis.tools.tool import get_content,parse_gptResult |
|||
import uuid |
|||
import time |
|||
from kazoo.client import KazooClient |
|||
from kazoo.protocol.states import EventType |
|||
|
|||
import queue |
|||
task_queue = queue.PriorityQueue() |
|||
stop_dict={} |
|||
from text_analysis.read_config import load_config |
|||
config=load_config() |
|||
|
|||
@csrf_exempt |
|||
def chatGptNew(request): |
|||
if request.method == 'POST': |
|||
try: |
|||
# txt=request.body.encode("utf-8") |
|||
raw_data = json.loads(request.body) |
|||
if "trace" in raw_data.keys() and raw_data["trace"]==True: |
|||
task_queue.put((-1,time.time(), raw_data)) |
|||
else: |
|||
task_queue.put((1, time.time(),raw_data)) |
|||
return HttpResponse(json.dumps({"code": 1, "msg": "请求正常!"}, ensure_ascii=False)) |
|||
except: |
|||
logging.error(traceback.format_exc()) |
|||
return HttpResponse(json.dumps({"code": 0, "msg": "请求json格式不正确!"}, ensure_ascii=False)) |
|||
else: |
|||
return HttpResponse(json.dumps({"code": 0, "msg": "请求方式错误,改为post请求"}, ensure_ascii=False)) |
|||
|
|||
|
|||
def chatgpt(): |
|||
while True: |
|||
try: |
|||
if task_queue.qsize()>0: |
|||
p,t,raw_data = task_queue.get(timeout=1) |
|||
logging.info("当前任务队列长度{}".format(task_queue.qsize()+1)) |
|||
output = raw_data["output"] |
|||
res_tmp = {key: "" for key in output} |
|||
if "id" in res_tmp.keys(): |
|||
res_tmp["id"] = str(uuid.uuid4()) |
|||
res_tmp["isLast"]=1 |
|||
task_id=raw_data["scenes_id"] |
|||
task_version=raw_data["version"] |
|||
# logging.info("任务数据为:{}".format(raw_data)) |
|||
logging.info("当前version信息为:{}".format(stop_dict)) |
|||
if task_id in stop_dict.keys() and task_version!=stop_dict[task_id]["version"]: |
|||
logging.info("已暂停任务,过滤掉。{}".format(raw_data)) |
|||
continue |
|||
data = get_content(raw_data, logging) |
|||
url = config["gptmodel"]["url"] |
|||
headers = { |
|||
"Content-Type": "application/json;charset=UTF-8", |
|||
"Authorization": "Bearer " + data["authorization"] |
|||
} |
|||
payload = json.dumps({ |
|||
"model": data["model"], |
|||
"messages": [{"role": "user", "content": data["prompt"]}], |
|||
"temperature": float(data["temperature"]), |
|||
"top_p": float(data["top_p"]), |
|||
"n": int(data["n"]) |
|||
}) |
|||
proxies = { |
|||
'http': 'http://jian.mao:maojian123@@oversea_vpn.baifendian.com:3128', |
|||
'https': 'http://jian.mao:maojian123@@oversea_vpn.baifendian.com:3128' |
|||
} |
|||
response = requests.request("POST", url, headers=headers, data=payload,timeout=180,proxies=proxies) |
|||
logging.info("GPT返回值:{}-{}".format(response,response.text)) |
|||
d = json.loads(response.text) |
|||
result = d['choices'][0]['message']['content'] |
|||
#添加 0是文本,1是json格式 |
|||
fieldType = raw_data["input"]['fieldType'] |
|||
if fieldType == 0: |
|||
res_tmp["content"] = result |
|||
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False) |
|||
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json,"status":1,"message":"成功"} |
|||
else: |
|||
res=parse_gptResult(res_tmp,result) |
|||
if res: |
|||
res["isLast"]=1 |
|||
res_tmp_json = json.dumps(res, ensure_ascii=False) |
|||
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json,"status":1,"message":"成功"} |
|||
else: |
|||
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False) |
|||
raw_data["result"] = {"successCode": "0", "errorLog": "GPT返回值不是json格式,无法解析!", "results": res_tmp_json,"status":2,"message":"GPT返回结果非json格式"} |
|||
logging.info(raw_data) |
|||
to_kafka.send_kafka(raw_data, logging) |
|||
else: |
|||
time.sleep(10) |
|||
except queue.Empty: |
|||
#从空队列取任务 |
|||
logging.info("该线程任务队列为空,等待新任务") |
|||
except: |
|||
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False) |
|||
raw_data["result"] = {"successCode": "0", "errorLog": traceback.format_exc(), "results": res_tmp_json,"status":2,"message":"异常"} |
|||
logging.info("调用gpt失败{}-{}".format(raw_data, traceback.format_exc())) |
|||
to_kafka.send_kafka(raw_data, logging) |
|||
|
|||
def zk_monitoring(): |
|||
try: |
|||
#线上环境 |
|||
zk = KazooClient(hosts=config['zookeeper']['zkhost']) |
|||
#测试环境 |
|||
# zk = KazooClient(hosts='172.16.12.55:2181,172.16.12.56:2181,172.16.12.57:2181') |
|||
zk.start() |
|||
# 设置监听器 |
|||
@zk.DataWatch("/analyze") |
|||
def watch_node(data, stat, event): |
|||
if event is not None and event.type == EventType.CHANGED: |
|||
data, stat = zk.get("/analyze") |
|||
logging.info("执行删除操作:{}".format(data)) |
|||
d = json.loads(data) |
|||
id = d["scenes_id"] |
|||
stop_dict[id] = {} |
|||
stop_dict[id]["version"] = d["version"] |
|||
stop_dict[id]["operation"] = d["operation"] |
|||
|
|||
# 保持程序运行以监听节点变化 |
|||
try: |
|||
while True: |
|||
time.sleep(1) |
|||
except: |
|||
logging.info("Stopping...") |
|||
# 关闭连接 |
|||
zk.stop() |
|||
zk.close() |
|||
except: |
|||
logging.error(traceback.format_exc()) |
|||
|
|||
|
@ -0,0 +1,148 @@ |
|||
# coding:utf8 |
|||
import os, sys |
|||
import io |
|||
|
|||
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8') |
|||
cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd() |
|||
par_dir = os.path.abspath(os.path.join(cur_dir, os.path.pardir)) |
|||
sys.path.append(cur_dir) |
|||
sys.path.append(par_dir) |
|||
import json |
|||
from django.http import HttpResponse |
|||
from text_analysis.tools import to_kafka |
|||
from django.views.decorators.csrf import csrf_exempt |
|||
from log_util.set_logger import set_logger |
|||
|
|||
logging = set_logger('logs/results.log') |
|||
import traceback |
|||
# import queue |
|||
import requests |
|||
from text_analysis.tools.tool import get_content,parse_gptResult |
|||
import uuid |
|||
import time |
|||
from kazoo.client import KazooClient |
|||
from kazoo.protocol.states import EventType |
|||
|
|||
import queue |
|||
task_queue = queue.PriorityQueue() |
|||
stop_dict={} |
|||
from text_analysis.read_config import load_config |
|||
config=load_config() |
|||
|
|||
@csrf_exempt |
|||
def chatGptNew(request): |
|||
if request.method == 'POST': |
|||
try: |
|||
# txt=request.body.encode("utf-8") |
|||
raw_data = json.loads(request.body) |
|||
if "trace" in raw_data.keys() and raw_data["trace"]==True: |
|||
task_queue.put((-1, raw_data)) |
|||
else: |
|||
task_queue.put((1, raw_data)) |
|||
return HttpResponse(json.dumps({"code": 1, "msg": "请求正常!"}, ensure_ascii=False)) |
|||
except: |
|||
logging.error(traceback.format_exc()) |
|||
return HttpResponse(json.dumps({"code": 0, "msg": "请求json格式不正确!"}, ensure_ascii=False)) |
|||
else: |
|||
return HttpResponse(json.dumps({"code": 0, "msg": "请求方式错误,改为post请求"}, ensure_ascii=False)) |
|||
|
|||
|
|||
def chatgpt(): |
|||
while True: |
|||
try: |
|||
if task_queue.qsize()>0: |
|||
p,raw_data = task_queue.get(timeout=1) |
|||
logging.info("当前任务队列长度{}".format(task_queue.qsize()+1)) |
|||
output = raw_data["output"] |
|||
res_tmp = {key: "" for key in output} |
|||
if "id" in res_tmp.keys(): |
|||
res_tmp["id"] = str(uuid.uuid4()) |
|||
res_tmp["isLast"]=1 |
|||
task_id=raw_data["scenes_id"] |
|||
task_version=raw_data["version"] |
|||
# logging.info("任务数据为:{}".format(raw_data)) |
|||
logging.info("当前version信息为:{}".format(stop_dict)) |
|||
if task_id in stop_dict.keys() and task_version!=stop_dict[task_id]["version"]: |
|||
logging.info("已暂停任务,过滤掉。{}".format(raw_data)) |
|||
continue |
|||
data = get_content(raw_data, logging) |
|||
url = config["gptmodel"]["url"] |
|||
headers = { |
|||
"Content-Type": "application/json;charset=UTF-8", |
|||
"Authorization": "Bearer " + data["authorization"] |
|||
} |
|||
payload = json.dumps({ |
|||
"model": data["model"], |
|||
"messages": [{"role": "user", "content": data["prompt"]}], |
|||
"temperature": float(data["temperature"]), |
|||
"top_p": float(data["top_p"]), |
|||
"n": int(data["n"]) |
|||
}) |
|||
proxies = { |
|||
'http': 'http://jian.mao:maojian123@@oversea_vpn.baifendian.com:3128', |
|||
'https': 'http://jian.mao:maojian123@@oversea_vpn.baifendian.com:3128' |
|||
} |
|||
response = requests.request("POST", url, headers=headers, data=payload,timeout=180,proxies=proxies) |
|||
logging.info("GPT返回值:{}-{}".format(response,response.text)) |
|||
d = json.loads(response.text) |
|||
result = d['choices'][0]['message']['content'] |
|||
#添加 0是文本,1是json格式 |
|||
fieldType = raw_data["input"]['fieldType'] |
|||
if fieldType == 0: |
|||
res_tmp["content"] = result |
|||
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False) |
|||
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json,"status":1,"message":"成功"} |
|||
else: |
|||
res=parse_gptResult(res_tmp,result) |
|||
if res: |
|||
res["isLast"]=1 |
|||
res_tmp_json = json.dumps(res, ensure_ascii=False) |
|||
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json,"status":1,"message":"成功"} |
|||
else: |
|||
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False) |
|||
raw_data["result"] = {"successCode": "0", "errorLog": "GPT返回值不是json格式,无法解析!", "results": res_tmp_json,"status":2,"message":"GPT返回结果非json格式"} |
|||
logging.info(raw_data) |
|||
to_kafka.send_kafka(raw_data, logging) |
|||
else: |
|||
time.sleep(10) |
|||
except queue.Empty: |
|||
#从空队列取任务 |
|||
logging.info("该线程任务队列为空,等待新任务") |
|||
except: |
|||
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False) |
|||
raw_data["result"] = {"successCode": "0", "errorLog": traceback.format_exc(), "results": res_tmp_json,"status":2,"message":"异常"} |
|||
logging.info("调用gpt失败{}-{}".format(raw_data, traceback.format_exc())) |
|||
to_kafka.send_kafka(raw_data, logging) |
|||
|
|||
def zk_monitoring(): |
|||
try: |
|||
#线上环境 |
|||
zk = KazooClient(hosts=config['zookeeper']['zkhost']) |
|||
#测试环境 |
|||
# zk = KazooClient(hosts='172.16.12.55:2181,172.16.12.56:2181,172.16.12.57:2181') |
|||
zk.start() |
|||
# 设置监听器 |
|||
@zk.DataWatch("/analyze") |
|||
def watch_node(data, stat, event): |
|||
if event is not None and event.type == EventType.CHANGED: |
|||
data, stat = zk.get("/analyze") |
|||
logging.info("执行删除操作:{}".format(data)) |
|||
d = json.loads(data) |
|||
id = d["scenes_id"] |
|||
stop_dict[id] = {} |
|||
stop_dict[id]["version"] = d["version"] |
|||
stop_dict[id]["operation"] = d["operation"] |
|||
|
|||
# 保持程序运行以监听节点变化 |
|||
try: |
|||
while True: |
|||
time.sleep(1) |
|||
except: |
|||
logging.info("Stopping...") |
|||
# 关闭连接 |
|||
zk.stop() |
|||
zk.close() |
|||
except: |
|||
logging.error(traceback.format_exc()) |
|||
|
|||
|
@ -0,0 +1,142 @@ |
|||
# coding:utf8 |
|||
import os, sys |
|||
import io |
|||
|
|||
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8') |
|||
cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd() |
|||
par_dir = os.path.abspath(os.path.join(cur_dir, os.path.pardir)) |
|||
sys.path.append(cur_dir) |
|||
sys.path.append(par_dir) |
|||
import json |
|||
from django.http import HttpResponse |
|||
from text_analysis.tools import to_kafka |
|||
from django.views.decorators.csrf import csrf_exempt |
|||
from log_util.set_logger import set_logger |
|||
|
|||
logging = set_logger('logs/results.log') |
|||
import traceback |
|||
import queue |
|||
import requests |
|||
from text_analysis.tools.tool import get_content,parse_gptResult |
|||
import uuid |
|||
import time |
|||
from kazoo.client import KazooClient |
|||
from kazoo.protocol.states import EventType |
|||
|
|||
|
|||
# global task_queue |
|||
task_queue = queue.Queue() |
|||
# global stop_dict |
|||
stop_dict={} |
|||
|
|||
@csrf_exempt |
|||
def chatGptNew(request): |
|||
if request.method == 'POST': |
|||
try: |
|||
# txt=request.body.encode("utf-8") |
|||
raw_data = json.loads(request.body) |
|||
task_queue.put(raw_data) |
|||
return HttpResponse(json.dumps({"code": 1, "msg": "请求正常!"}, ensure_ascii=False)) |
|||
except: |
|||
logging.error(traceback.format_exc()) |
|||
return HttpResponse(json.dumps({"code": 0, "msg": "请求json格式不正确!"}, ensure_ascii=False)) |
|||
else: |
|||
return HttpResponse(json.dumps({"code": 0, "msg": "请求方式错误,改为post请求"}, ensure_ascii=False)) |
|||
|
|||
|
|||
def chatgpt(): |
|||
while True: |
|||
if task_queue.qsize() > 0: |
|||
try: |
|||
logging.info("取任务队列长度{}".format(task_queue.qsize())) |
|||
raw_data = task_queue.get() |
|||
task_id=raw_data["scenes_id"] |
|||
task_version=raw_data["version"] |
|||
# logging.info("任务数据为:{}".format(raw_data)) |
|||
logging.info("当前version信息为:{}".format(stop_dict)) |
|||
if task_id in stop_dict.keys() and task_version!=stop_dict[task_id]["version"]: |
|||
logging.info("已暂停任务,过滤掉。{}".format(raw_data)) |
|||
continue |
|||
output = raw_data["output"] |
|||
res_tmp = {key: "" for key in output} |
|||
if "id" in res_tmp.keys(): |
|||
res_tmp["id"] = str(uuid.uuid4()) |
|||
res_tmp["isLast"]=1 |
|||
data = get_content(raw_data, logging) |
|||
url = "https://api.openai.com/v1/chat/completions" |
|||
headers = { |
|||
"Content-Type": "application/json;charset=UTF-8", |
|||
"Authorization": "Bearer " + data["authorization"] |
|||
} |
|||
payload = json.dumps({ |
|||
"model": data["model"], |
|||
"messages": [{"role": "user", "content": data["prompt"]}], |
|||
"temperature": float(data["temperature"]), |
|||
"top_p": float(data["top_p"]), |
|||
"n": int(data["n"]) |
|||
}) |
|||
logging.info("prompt为{}".format(data["prompt"])) |
|||
response = requests.request("POST", url, headers=headers, data=payload,timeout=180) |
|||
logging.info("GPT返回值:{}-{}".format(response,response.text)) |
|||
d = json.loads(response.text) |
|||
result = d['choices'][0]['message']['content'] |
|||
#添加 0是文本,1是json格式 |
|||
fieldType = raw_data["input"]['fieldType'] |
|||
if fieldType == 0: |
|||
res_tmp["content"] = result |
|||
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False) |
|||
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json,"status":1,"message":"成功"} |
|||
else: |
|||
res=parse_gptResult(res_tmp,result) |
|||
if res: |
|||
res["isLast"]=1 |
|||
res_tmp_json = json.dumps(res, ensure_ascii=False) |
|||
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json,"status":1,"message":"成功"} |
|||
else: |
|||
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False) |
|||
raw_data["result"] = {"successCode": "0", "errorLog": "GPT返回值不是json格式,无法解析!", "results": res_tmp_json,"status":2,"message":"GPT返回结果非json格式"} |
|||
logging.info(raw_data) |
|||
to_kafka.send_kafka(raw_data, logging) |
|||
except: |
|||
raw_data["result"] = {"successCode": "0", "errorLog": "", "results": "","status":2,"message":"异常"} |
|||
raw_data["result"]["errorLog"] = traceback.format_exc() |
|||
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False) |
|||
raw_data["result"]["results"] = res_tmp_json |
|||
logging.info("调用gpt失败{}-{}".format(raw_data, traceback.format_exc())) |
|||
to_kafka.send_kafka(raw_data, logging) |
|||
|
|||
else: |
|||
# logging.info("暂无任务,进入休眠--") |
|||
time.sleep(10) |
|||
|
|||
def zk_monitoring(): |
|||
try: |
|||
#线上环境 |
|||
zk = KazooClient(hosts='172.18.1.146:2181,172.18.1.147:2181,172.18.1.148:2181') |
|||
#测试环境 |
|||
# zk = KazooClient(hosts='172.16.12.55:2181,172.16.12.56:2181,172.16.12.57:2181') |
|||
zk.start() |
|||
# 设置监听器 |
|||
@zk.DataWatch("/analyze") |
|||
def watch_node(data, stat, event): |
|||
if event is not None and event.type == EventType.CHANGED: |
|||
data, stat = zk.get("/analyze") |
|||
logging.info("执行删除操作:{}".format(data)) |
|||
d = json.loads(data) |
|||
id = d["scenes_id"] |
|||
stop_dict[id] = {} |
|||
stop_dict[id]["version"] = d["version"] |
|||
stop_dict[id]["operation"] = d["operation"] |
|||
# 保持程序运行以监听节点变化 |
|||
try: |
|||
while True: |
|||
time.sleep(1) |
|||
except: |
|||
logging.info("Stopping...") |
|||
# 关闭连接 |
|||
zk.stop() |
|||
zk.close() |
|||
except: |
|||
logging.error(traceback.format_exc()) |
|||
|
|||
|
@ -0,0 +1,16 @@ |
|||
""" |
|||
WSGI config for Zhijian_Project_WebService project. |
|||
|
|||
It exposes the WSGI callable as a module-level variable named ``application``. |
|||
|
|||
For more information on this file, see |
|||
https://docs.djangoproject.com/en/1.8/howto/deployment/wsgi/ |
|||
""" |
|||
|
|||
import os |
|||
|
|||
from django.core.wsgi import get_wsgi_application |
|||
|
|||
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "text_analysis.settings") |
|||
|
|||
application = get_wsgi_application() |
@ -0,0 +1,8 @@ |
|||
[uwsgi] |
|||
http = 0.0.0.0:9012 |
|||
chdir = ../chatGptNew |
|||
wsgi-file = ../chatGptNew/wsgi.py |
|||
processes = 1 |
|||
threads = 2 |
|||
listen = 1024 |
|||
http-timeout=21600 |
@ -0,0 +1,35 @@ |
|||
""" |
|||
WSGI config for Zhijian_Project_WebService project. |
|||
|
|||
It exposes the WSGI callable as a module-level variable named ``application``. |
|||
|
|||
For more information on this file, see |
|||
https://docs.djangoproject.com/en/1.8/howto/deployment/wsgi/ |
|||
""" |
|||
|
|||
import os |
|||
import threading |
|||
from text_analysis.views import chatgpt,zk_monitoring |
|||
# t = threading.Thread(target=chatgpt, name='chatgpt') |
|||
# t.daemon = True |
|||
# t.start() |
|||
|
|||
# 启动 5 个 chatgpt 线程 |
|||
num_threads = 5 |
|||
chatgpt_threads = [threading.Thread(target=chatgpt) for _ in range(num_threads)] |
|||
for thread in chatgpt_threads: |
|||
thread.daemon = True |
|||
thread.start() |
|||
|
|||
#启动zk监听线程 |
|||
t = threading.Thread(target=zk_monitoring, name='zk_monitoring') |
|||
t.daemon = True |
|||
t.start() |
|||
|
|||
from django.core.wsgi import get_wsgi_application |
|||
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "text_analysis.settings") |
|||
application = get_wsgi_application() |
|||
|
|||
|
|||
|
|||
|
@ -0,0 +1,25 @@ |
|||
""" |
|||
WSGI config for Zhijian_Project_WebService project. |
|||
|
|||
It exposes the WSGI callable as a module-level variable named ``application``. |
|||
|
|||
For more information on this file, see |
|||
https://docs.djangoproject.com/en/1.8/howto/deployment/wsgi/ |
|||
""" |
|||
|
|||
import os |
|||
|
|||
import threading |
|||
from text_analysis.views import chatgpt |
|||
t = threading.Thread(target=chatgpt, name='chatgpt') |
|||
t.daemon = True |
|||
t.start() |
|||
|
|||
from django.core.wsgi import get_wsgi_application |
|||
|
|||
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "text_analysis.settings") |
|||
application = get_wsgi_application() |
|||
|
|||
|
|||
|
|||
|
Write
Preview
Loading…
Cancel
Save
Reference in new issue