查询知识库应用
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

140 lines
5.7 KiB

  1. # coding:utf8
  2. import os, sys
  3. import io
  4. from jsonpath_ng import jsonpath, parse
  5. import uuid
  6. sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8')
  7. cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd()
  8. par_dir = os.path.abspath(os.path.join(cur_dir, os.path.pardir))
  9. sys.path.append(cur_dir)
  10. sys.path.append(par_dir)
  11. import json
  12. from django.http import HttpResponse
  13. from text_analysis.tools import to_kafka
  14. from django.views.decorators.csrf import csrf_exempt
  15. from log_util.set_logger import set_logger
  16. from datetime import datetime, timedelta
  17. logging = set_logger('logs/results.log')
  18. import traceback
  19. # import queue
  20. # import requests
  21. # from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
  22. from text_analysis.tools.tool import parse_data,promptPro
  23. from text_analysis.chroma1 import LangChainChroma
  24. import time
  25. from kazoo.client import KazooClient
  26. from kazoo.protocol.states import EventType
  27. # 任务队列
  28. import queue
  29. task_queue = queue.PriorityQueue()
  30. stop_dict={}
  31. from text_analysis.read_config import load_config
  32. config=load_config()
  33. @csrf_exempt
  34. def promptSim(request):
  35. if request.method == 'POST':
  36. try:
  37. raw_data = json.loads(request.body)
  38. if "trace" in raw_data.keys() and raw_data["trace"]==True:
  39. task_queue.put((-1, time.time(),raw_data))
  40. else:
  41. task_queue.put((1, time.time(),raw_data))
  42. return HttpResponse(json.dumps({"code": 1, "msg": "请求正常!"}, ensure_ascii=False))
  43. except:
  44. logging.error(traceback.format_exc())
  45. return HttpResponse(json.dumps({"code": 0, "msg": "请求json格式不正确!"}, ensure_ascii=False))
  46. else:
  47. return HttpResponse(json.dumps({"code": 0, "msg": "请求方式错误,改为post请求"}, ensure_ascii=False))
  48. def upload():
  49. while True:
  50. try:
  51. if task_queue.qsize()>0:
  52. p,t,raw_data = task_queue.get(timeout=1)
  53. logging.info("当前任务队列长度{}".format(task_queue.qsize()+1))
  54. output=raw_data["output"]
  55. res_tmp={key: "" for key in output}
  56. if "id" in res_tmp.keys():
  57. res_tmp["id"]=str(uuid.uuid4())
  58. res_tmp["isLast"]=1
  59. task_id=raw_data["scenes_id"]
  60. task_version=raw_data["version"]
  61. logging.info("任务数据为:{}".format(raw_data))
  62. logging.info("当前version信息为:{}".format(stop_dict))
  63. if task_id in stop_dict.keys() and task_version!=stop_dict[task_id]["version"]:
  64. logging.info("已暂停任务,数据过滤掉")
  65. continue
  66. if ":$[" not in raw_data["input"]["topn"]:
  67. topn=raw_data["input"]["topn"]
  68. else:
  69. topn=parse_data(raw_data,raw_data["input"]["topn"])
  70. if ":$[" not in raw_data["input"]["prompt"]:
  71. prompt=raw_data["input"]["prompt"]
  72. else:
  73. prompt=parse_data(raw_data,raw_data["input"]["prompt"])
  74. if ":$[" not in raw_data["input"]["fieldName"]:
  75. fieldName=raw_data["input"]["fieldName"]
  76. else:
  77. fieldName=parse_data(raw_data,raw_data["input"]["fieldName"])
  78. vector_db=LangChainChroma(fieldName)
  79. docs=vector_db.search(prompt,int(topn))
  80. vector_db.db_close()
  81. logging.info("向量数据库搜索的相似上下文:{}".format(docs))
  82. #组装,最长字符5W
  83. res=promptPro(prompt,docs)
  84. logging.info("生成的上下文:{}".format(res))
  85. res_tmp["promptRes"]=res
  86. res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
  87. raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json}
  88. raw_data["result"]["status"] = 1
  89. raw_data["result"]["message"] = "成功"
  90. logging.info("结果数据为:{}".format(raw_data))
  91. to_kafka.send_kafka(raw_data, logging)
  92. else:
  93. # 暂无任务,进入休眠
  94. time.sleep(10)
  95. except:
  96. raw_data["result"]={}
  97. raw_data["result"]["successCode"] = "0"
  98. raw_data["result"]["errorLog"] = traceback.format_exc()
  99. raw_data["result"]["status"] = 2
  100. raw_data["result"]["message"] = "异常"
  101. raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False)
  102. logging.error(traceback.format_exc())
  103. to_kafka.send_kafka(raw_data, logging)
  104. def zk_monitoring():
  105. try:
  106. #线上环境
  107. zk = KazooClient(hosts=config['zookeeper']['zkhost'])
  108. #测试环境
  109. # zk = KazooClient(hosts='172.16.12.55:2181,172.16.12.56:2181,172.16.12.57:2181')
  110. zk.start()
  111. # 设置监听器
  112. @zk.DataWatch("/analyze")
  113. def watch_node(data, stat, event):
  114. if event is not None and event.type == EventType.CHANGED:
  115. data, stat = zk.get("/analyze")
  116. logging.info("执行删除操作:{}".format(data))
  117. d = json.loads(data)
  118. id = d["scenes_id"]
  119. stop_dict[id] = {}
  120. stop_dict[id]["version"] = d["version"]
  121. stop_dict[id]["operation"] = d["operation"]
  122. # 保持程序运行以监听节点变化
  123. try:
  124. while True:
  125. time.sleep(1)
  126. except:
  127. logging.info("Stopping...")
  128. # 关闭连接
  129. zk.stop()
  130. zk.close()
  131. except:
  132. logging.error(traceback.format_exc())