chroma新增、删除、知识库应用
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

121 lines
5.0 KiB

  1. # coding:utf8
  2. import os, sys
  3. import io
  4. from jsonpath_ng import jsonpath, parse
  5. import uuid
  6. sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8')
  7. cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd()
  8. par_dir = os.path.abspath(os.path.join(cur_dir, os.path.pardir))
  9. sys.path.append(cur_dir)
  10. sys.path.append(par_dir)
  11. import json
  12. from django.http import HttpResponse
  13. from text_analysis.tools import to_kafka
  14. from django.views.decorators.csrf import csrf_exempt
  15. from log_util.set_logger import set_logger
  16. logging = set_logger('logs/results.log')
  17. import traceback
  18. import queue
  19. import requests
  20. from text_analysis.tools.tool import parse_data
  21. from text_analysis.chroma1 import LangChainChroma
  22. import time
  23. from datetime import datetime
  24. import os
  25. # 任务队列
  26. global task_queue
  27. task_queue = queue.Queue()
  28. # LC = LangChainChroma()
  29. @csrf_exempt
  30. def createChroma(request):
  31. if request.method == 'POST':
  32. try:
  33. raw_data = json.loads(request.body)
  34. task_queue.put(raw_data)
  35. return HttpResponse(json.dumps({"code": 1, "msg": "请求正常!"}, ensure_ascii=False))
  36. except:
  37. logging.error(traceback.format_exc())
  38. return HttpResponse(json.dumps({"code": 0, "msg": "请求json格式不正确!"}, ensure_ascii=False))
  39. else:
  40. return HttpResponse(json.dumps({"code": 0, "msg": "请求方式错误,改为post请求"}, ensure_ascii=False))
  41. def upload():
  42. while True:
  43. try:
  44. if task_queue.qsize() > 0:
  45. logging.info("取任务队列长度{}".format(task_queue.qsize()))
  46. raw_data = task_queue.get()
  47. output=raw_data["output"]
  48. res_tmp={key: "" for key in output}
  49. if "id" in res_tmp.keys():
  50. res_tmp["id"]=str(uuid.uuid4())
  51. res_tmp["isLast"]=1
  52. logging.info("任务数据为:{}".format(raw_data))
  53. # chunkSize=parse_data(raw_data,raw_data["input"]["chunkSize"])
  54. if ':$[' not in raw_data["input"]["content"]:
  55. content=raw_data["input"]["content"]
  56. else:
  57. content=parse_data(raw_data,raw_data["input"]["content"])
  58. if ':$[' not in raw_data["input"]["fieldName"]:
  59. fieldName=raw_data["input"]["fieldName"]
  60. else:
  61. fieldName=parse_data(raw_data,raw_data["input"]["fieldName"])
  62. if ':$[' not in raw_data["input"]["dataId"]:
  63. dataId=raw_data["input"]["dataId"]
  64. else:
  65. dataId=parse_data(raw_data,raw_data["input"]["dataId"])
  66. # dataId=raw_data["dataId"]
  67. if content and fieldName and dataId:
  68. vector_db=LangChainChroma(fieldName)
  69. docs=vector_db.text_splitter.split_text(content)
  70. res,db_count=vector_db.add_documents(docs,dataId)
  71. vector_db.db_close()
  72. logging.info('当前数据划分{}个块。数据库{}共有{}个块'.format(len(res), fieldName,db_count))
  73. # res=LC.addChroma(content,fieldName,logging,chunkSize)
  74. res_tmp['resultsID']=res
  75. raw_data["result"] = {"successCode": "", "errorLog": "", "results": ""}
  76. if res:
  77. res_tmp["status"]=1
  78. raw_data["result"]["successCode"] = "1"
  79. raw_data["result"]["status"] = 1
  80. raw_data["result"]["message"] = "成功"
  81. else:
  82. res_tmp["status"]=3
  83. raw_data["result"]["successCode"] = "0"
  84. raw_data["result"]["status"] = 2
  85. raw_data["result"]["message"] = "异常"
  86. else:
  87. res_tmp["status"] = 3
  88. raw_data["result"]["successCode"] = "0"
  89. raw_data["result"]["errorLog"] = "请检查content/fieldName/dataId,要求非空"
  90. raw_data["result"]["status"] = 2
  91. raw_data["result"]["message"] = "请检查content/fieldName/dataId,要求非空"
  92. res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
  93. raw_data["result"]["results"]=res_tmp_json
  94. logging.info("结果数据为:{}".format(raw_data))
  95. to_kafka.send_kafka(raw_data, logging)
  96. else:
  97. # 暂无任务,进入休眠
  98. time.sleep(10)
  99. except:
  100. raw_data["result"]={}
  101. raw_data["result"]["successCode"] = "0"
  102. raw_data["result"]["errorLog"] = traceback.format_exc()
  103. res_tmp["status"] = 3
  104. raw_data["result"]["status"] = 2
  105. raw_data["result"]["message"] = "异常"
  106. raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False)
  107. logging.error(traceback.format_exc())
  108. to_kafka.send_kafka(raw_data, logging)