语音识别应用
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

208 lines
9.6 KiB

  1. # coding:utf8
  2. import os, sys
  3. import io
  4. from jsonpath_ng import jsonpath, parse
  5. import uuid
  6. sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8')
  7. cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd()
  8. par_dir = os.path.abspath(os.path.join(cur_dir, os.path.pardir))
  9. sys.path.append(cur_dir)
  10. sys.path.append(par_dir)
  11. import json
  12. from django.http import HttpResponse
  13. from text_analysis.tools import to_kafka
  14. from django.views.decorators.csrf import csrf_exempt
  15. from log_util.set_logger import set_logger
  16. logging = set_logger('logs/results.log')
  17. import traceback
  18. import queue
  19. import requests
  20. from text_analysis.tools.tool import parse_data
  21. import time
  22. from datetime import datetime
  23. import os
  24. # 任务队列
  25. global task_queue
  26. task_queue = queue.Queue()
  27. # 数据队列
  28. global data_queue
  29. data_queue = queue.Queue()
  30. @csrf_exempt
  31. def ASRNew(request):
  32. if request.method == 'POST':
  33. try:
  34. raw_data = json.loads(request.body)
  35. task_queue.put(raw_data)
  36. return HttpResponse(json.dumps({"code": 1, "msg": "请求正常!"}, ensure_ascii=False))
  37. except:
  38. logging.error(traceback.format_exc())
  39. return HttpResponse(json.dumps({"code": 0, "msg": "请求json格式不正确!"}, ensure_ascii=False))
  40. else:
  41. return HttpResponse(json.dumps({"code": 0, "msg": "请求方式错误,改为post请求"}, ensure_ascii=False))
  42. def upload():
  43. while True:
  44. try:
  45. if task_queue.qsize() > 0:
  46. logging.info("取任务队列长度{}".format(task_queue.qsize()))
  47. raw_data = task_queue.get()
  48. output=raw_data["output"]
  49. res_tmp={key: "" for key in output}
  50. if "id" in res_tmp.keys():
  51. res_tmp["id"]=str(uuid.uuid4())
  52. # index = raw_data["metadata"]["index"]
  53. # datasource = raw_data["metadata"]["admin"]["datasource"]
  54. # if datasource not in raw_data["data"].keys():
  55. # logging.info("找不到相关数据源!—{}".format(raw_data))
  56. # continue
  57. # allFile = raw_data["data"][datasource]
  58. # currentFile = eval(allFile)[index]
  59. logging.info("任务数据为:{}".format(raw_data))
  60. url=raw_data["input"]["fileUrl"]
  61. if "json" in url:
  62. parm = url.split("#")
  63. data1 = parse_data(raw_data, parm[0])
  64. data1_json = json.loads(data1)
  65. expr = parse(parm[2])
  66. match = [match.value for match in expr.find(data1_json)]
  67. video_url = match[0]
  68. else:
  69. video_url = parse_data(raw_data, url)
  70. fileName=video_url.rsplit('/')[-1]
  71. if "http" not in video_url:
  72. file = "https://caiji.percent.cn/" + video_url.lstrip("/")
  73. else:
  74. file=video_url
  75. # name=raw_data["metadata"]["admin"]["fileName"]
  76. # if '$.' in name:
  77. # # json.path表达式动态获取value
  78. # datasources = str(name).split(':')
  79. # # 0是数据源,1是JsonPath 表达式
  80. # datasourcestr = raw_data["data"][datasources[0]]
  81. # datasource = json.loads(datasourcestr)
  82. # # 创建 JsonPath 表达式对象
  83. # expr = parse(datasources[1])
  84. # # 使用表达式来选择 JSON 元素
  85. # match = [match.value for match in expr.find(datasource)]
  86. # fileName = match[0]
  87. currentFile={"fileName":fileName,"fileUrl":file}
  88. language = raw_data["input"]["fromLanguage"]
  89. # 从gofast获取视频
  90. myfile = requests.get(file)
  91. starttime = datetime.now().strftime('%Y-%m-%d')
  92. path = 'inputdata/' + starttime
  93. if not os.path.exists(path):
  94. os.makedirs(path)
  95. with open(path + '/' + fileName, 'wb') as f:
  96. f.write(myfile.content)
  97. logging.info("视频从gofast下载完毕,开始上传-{}".format(fileName))
  98. # 访问视频上传接口
  99. url = "https://realtime.pdeepmatrix.com/apis/media/analysis/upload"
  100. data = {'fromLanguage': language}
  101. f = open(path + '/' + fileName, 'rb')
  102. files = {'file': f}
  103. response = requests.post(url, data=data, files=files)
  104. logging.info("上传后接口返回值:{}-{}".format(response,response.text))
  105. d = json.loads(response.text)
  106. if "code" in d.keys() and d["code"] == 200:
  107. # 接口返回值data中存放视频获取结果的key
  108. result = d["data"]
  109. raw_data["result"] = {"successCode": "1", "errorLog": "", "results": "", "dataKey": result,"file":currentFile}
  110. data_queue.put(raw_data)
  111. logging.info("视频上传成功{}".format(raw_data))
  112. # to_kafka.send_kafka(raw_data,logging)
  113. else:
  114. logging.info("视频上传失败{}-{}".format(raw_data, d))
  115. f.close()
  116. # Todo删除视频文件
  117. else:
  118. # 暂无任务,进入休眠
  119. time.sleep(10)
  120. except:
  121. raw_data["result"]={}
  122. raw_data["result"]["successCode"] = "0"
  123. raw_data["result"]["errorLog"] = traceback.format_exc()
  124. raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False)
  125. logging.error(traceback.format_exc())
  126. to_kafka.send_kafka(raw_data, logging)
  127. def getResult():
  128. while True:
  129. # 3秒钟结果获取一次
  130. time.sleep(3)
  131. try:
  132. if data_queue.qsize() > 0:
  133. logging.info("取数据队列长度{}".format(data_queue.qsize()))
  134. raw_data = data_queue.get()
  135. logging.info("任务数据为:{}".format(raw_data))
  136. # print(raw_data)
  137. output=raw_data["output"]
  138. res_tmp={key: "" for key in output}
  139. if "id" in res_tmp.keys():
  140. res_tmp["id"]=str(uuid.uuid4())
  141. # 根据视频key访问获取结果接口
  142. dataKey = raw_data["result"]["dataKey"]
  143. url = "https://realtime.pdeepmatrix.com/apis/media/analysis/getResult"
  144. params = {'taskId': dataKey}
  145. response = requests.get(url, params=params)
  146. logging.info("ASR网站返回值:{}-{}".format(response,response.text))
  147. d = json.loads(response.text)
  148. if "code" in d.keys() and d["code"] == 200:
  149. results = ""
  150. if d["data"]["code"] == "1" and d["data"]["sentences"]:
  151. for sentence in d["data"]["sentences"]:
  152. if results:
  153. results += ' ' + sentence["text"]
  154. else:
  155. results = sentence["text"]
  156. if "content" in res_tmp.keys():
  157. res_tmp["content"]=results
  158. raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False)
  159. logging.info("视频解析获取结果成功{}".format(raw_data))
  160. to_kafka.send_kafka(raw_data, logging)
  161. elif d["data"]["code"] == "1" and not d["data"]["sentences"]:
  162. results =""
  163. if "content" in res_tmp.keys():
  164. res_tmp["content"]=results
  165. raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False)
  166. logging.info("视频解析获取结果成功{}".format(raw_data))
  167. to_kafka.send_kafka(raw_data, logging)
  168. elif d["data"]["code"] == "0":
  169. # 正在解析中,将任务再次放回数据队列
  170. data_queue.put(raw_data)
  171. logging.info("视频未解析完毕,放回队列等待{}-{}".format(raw_data, d))
  172. else:
  173. # 解析失败
  174. raw_data["result"]["successCode"] = "0"
  175. raw_data["result"]["errorLog"] = response.text
  176. raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False)
  177. logging.info("视频解析获取结果失败,数据{},接口返回值{}".format(raw_data, d))
  178. to_kafka.send_kafka(raw_data, logging)
  179. else:
  180. raw_data["result"]["successCode"] = "0"
  181. raw_data["result"]["errorLog"] = response.text
  182. raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False)
  183. logging.info("视频解析获取结果失败,数据{},接口返回值{}".format(raw_data, d))
  184. to_kafka.send_kafka(raw_data, logging)
  185. else:
  186. # 暂无任务,进入休眠
  187. time.sleep(10)
  188. except:
  189. raw_data["result"]["successCode"] = "0"
  190. raw_data["result"]["errorLog"] = traceback.format_exc()
  191. raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False)
  192. logging.error(traceback.format_exc())
  193. to_kafka.send_kafka(raw_data, logging)