|
|
# coding:utf8 import os, sys import io from jsonpath_ng import jsonpath, parse import uuid sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8') cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd() par_dir = os.path.abspath(os.path.join(cur_dir, os.path.pardir)) sys.path.append(cur_dir) sys.path.append(par_dir) import json from django.http import HttpResponse from text_analysis.tools import to_kafka from django.views.decorators.csrf import csrf_exempt from log_util.set_logger import set_logger
logging = set_logger('logs/results.log') import traceback import queue import requests from text_analysis.tools.tool import parse_data from text_analysis.chroma1 import LangChainChroma import time from kazoo.client import KazooClient from kazoo.protocol.states import EventType import queue task_queue = queue.PriorityQueue() stop_dict={} from text_analysis.read_config import load_config config=load_config()
@csrf_exempt def createChroma(request): if request.method == 'POST': try: raw_data = json.loads(request.body) if "trace" in raw_data.keys() and raw_data["trace"]==True: task_queue.put((-1, time.time(),raw_data)) else: task_queue.put((1, time.time(),raw_data)) return HttpResponse(json.dumps({"code": 1, "msg": "请求正常!"}, ensure_ascii=False)) except: logging.error(traceback.format_exc()) return HttpResponse(json.dumps({"code": 0, "msg": "请求json格式不正确!"}, ensure_ascii=False)) else: return HttpResponse(json.dumps({"code": 0, "msg": "请求方式错误,改为post请求"}, ensure_ascii=False))
def upload(): while True: try: if task_queue.qsize()>0: p,t,raw_data = task_queue.get(timeout=1) logging.info("当前任务队列长度{}".format(task_queue.qsize()+1)) output=raw_data["output"] res_tmp={key: "" for key in output} if "id" in res_tmp.keys(): res_tmp["id"]=str(uuid.uuid4()) res_tmp["isLast"]=1 task_id=raw_data["scenes_id"] task_version=raw_data["version"] logging.info("任务数据为:{}".format(raw_data)) logging.info("当前version信息为:{}".format(stop_dict)) if task_id in stop_dict.keys() and task_version!=stop_dict[task_id]["version"]: logging.info("已暂停任务,数据过滤掉") continue # chunkSize=parse_data(raw_data,raw_data["input"]["chunkSize"]) if ':$[' not in raw_data["input"]["content"]: content=raw_data["input"]["content"] else: content=parse_data(raw_data,raw_data["input"]["content"]) if ':$[' not in raw_data["input"]["fieldName"]: fieldName=raw_data["input"]["fieldName"] else: fieldName=parse_data(raw_data,raw_data["input"]["fieldName"]) if ':$[' not in raw_data["input"]["dataId"]: dataId=raw_data["input"]["dataId"] else: dataId=parse_data(raw_data,raw_data["input"]["dataId"]) # dataId=raw_data["dataId"] if content and fieldName and dataId: vector_db=LangChainChroma(fieldName) docs=vector_db.text_splitter.split_text(content) res,db_count=vector_db.add_documents(docs,dataId) vector_db.db_close() logging.info('当前数据划分{}个块。数据库{}共有{}个块'.format(len(res), fieldName,db_count)) # res=LC.addChroma(content,fieldName,logging,chunkSize) res_tmp['resultsID']=res raw_data["result"] = {"successCode": "", "errorLog": "", "results": ""} if res: res_tmp["status"]=1 raw_data["result"]["successCode"] = "1" raw_data["result"]["status"] = 1 raw_data["result"]["message"] = "成功" else: res_tmp["status"]=3 raw_data["result"]["successCode"] = "0" raw_data["result"]["status"] = 2 raw_data["result"]["message"] = "异常" else: res_tmp["status"] = 3 raw_data["result"]["successCode"] = "0" raw_data["result"]["errorLog"] = "请检查content/fieldName/dataId,要求非空" raw_data["result"]["status"] = 2 raw_data["result"]["message"] = "请检查content/fieldName/dataId,要求非空"
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False) raw_data["result"]["results"]=res_tmp_json logging.info("结果数据为:{}".format(raw_data)) to_kafka.send_kafka(raw_data, logging) else: # 暂无任务,进入休眠 time.sleep(10) except: raw_data["result"]={} raw_data["result"]["successCode"] = "0" raw_data["result"]["errorLog"] = traceback.format_exc() res_tmp["status"] = 3 raw_data["result"]["status"] = 2 raw_data["result"]["message"] = "异常" raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False) logging.error(traceback.format_exc()) to_kafka.send_kafka(raw_data, logging)
def zk_monitoring(): try: #线上环境 zk = KazooClient(hosts=config['zookeeper']['zkhost']) #测试环境 # zk = KazooClient(hosts='172.16.12.55:2181,172.16.12.56:2181,172.16.12.57:2181') zk.start() # 设置监听器 @zk.DataWatch("/analyze") def watch_node(data, stat, event): if event is not None and event.type == EventType.CHANGED: data, stat = zk.get("/analyze") logging.info("执行删除操作:{}".format(data)) d = json.loads(data) id = d["scenes_id"] stop_dict[id] = {} stop_dict[id]["version"] = d["version"] stop_dict[id]["operation"] = d["operation"] # 保持程序运行以监听节点变化 try: while True: time.sleep(1) except: logging.info("Stopping...") # 关闭连接 zk.stop() zk.close() except: logging.error(traceback.format_exc())
|