语音识别应用
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

186 lines
8.3 KiB

  1. # coding:utf8
  2. import os, sys
  3. import io
  4. from jsonpath_ng import jsonpath, parse
  5. sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8')
  6. cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd()
  7. par_dir = os.path.abspath(os.path.join(cur_dir, os.path.pardir))
  8. sys.path.append(cur_dir)
  9. sys.path.append(par_dir)
  10. import json
  11. from django.http import HttpResponse
  12. from text_analysis.tools import to_kafka
  13. from django.views.decorators.csrf import csrf_exempt
  14. from log_util.set_logger import set_logger
  15. logging = set_logger('logs/results.log')
  16. import traceback
  17. import queue
  18. import requests
  19. # from text_analysis.tools.tool import get_data
  20. import time
  21. from datetime import datetime
  22. import os
  23. # 任务队列
  24. global task_queue
  25. task_queue = queue.Queue()
  26. # 数据队列
  27. global data_queue
  28. data_queue = queue.Queue()
  29. @csrf_exempt
  30. def ASR(request):
  31. if request.method == 'POST':
  32. try:
  33. raw_data = json.loads(request.body)
  34. task_queue.put(raw_data)
  35. return HttpResponse(json.dumps({"code": 1, "msg": "请求正常!"}, ensure_ascii=False))
  36. except:
  37. logging.error(traceback.format_exc())
  38. return HttpResponse(json.dumps({"code": 0, "msg": "请求json格式不正确!"}, ensure_ascii=False))
  39. else:
  40. return HttpResponse(json.dumps({"code": 0, "msg": "请求方式错误,改为post请求"}, ensure_ascii=False))
  41. def upload():
  42. while True:
  43. try:
  44. if task_queue.qsize() > 0:
  45. logging.info("取任务队列长度{}".format(task_queue.qsize()))
  46. raw_data = task_queue.get()
  47. # index = raw_data["metadata"]["index"]
  48. # datasource = raw_data["metadata"]["admin"]["datasource"]
  49. # if datasource not in raw_data["data"].keys():
  50. # logging.info("找不到相关数据源!—{}".format(raw_data))
  51. # continue
  52. # allFile = raw_data["data"][datasource]
  53. # currentFile = eval(allFile)[index]
  54. url=raw_data["metadata"]["admin"]["fileUrl"]
  55. if '$.' in url:
  56. # json.path表达式动态获取value
  57. datasources = str(url).split(':')
  58. # 0是数据源,1是JsonPath 表达式
  59. datasourcestr = raw_data["data"][datasources[0]]
  60. # print(datasourcestr)
  61. datasource = json.loads(datasourcestr)
  62. # 创建 JsonPath 表达式对象
  63. expr = parse(datasources[1])
  64. # 使用表达式来选择 JSON 元素
  65. match = [match.value for match in expr.find(datasource)]
  66. video_url = match[0]
  67. fileName=video_url.rsplit('/')[-1]
  68. if "http" not in video_url:
  69. file = "https://caiji.percent.cn/" + video_url.lstrip("/")
  70. else:
  71. file=video_url
  72. # print(file)
  73. # name=raw_data["metadata"]["admin"]["fileName"]
  74. # if '$.' in name:
  75. # # json.path表达式动态获取value
  76. # datasources = str(name).split(':')
  77. # # 0是数据源,1是JsonPath 表达式
  78. # datasourcestr = raw_data["data"][datasources[0]]
  79. # datasource = json.loads(datasourcestr)
  80. # # 创建 JsonPath 表达式对象
  81. # expr = parse(datasources[1])
  82. # # 使用表达式来选择 JSON 元素
  83. # match = [match.value for match in expr.find(datasource)]
  84. # fileName = match[0]
  85. currentFile={"content":"","fileName":fileName,"fileUrl":file}
  86. language = raw_data["metadata"]["admin"]["fromLanguage"]
  87. # 从gofast获取视频
  88. myfile = requests.get(file)
  89. starttime = datetime.now().strftime('%Y-%m-%d')
  90. path = 'inputdata/' + starttime
  91. if not os.path.exists(path):
  92. os.makedirs(path)
  93. with open(path + '/' + fileName, 'wb') as f:
  94. f.write(myfile.content)
  95. logging.info("视频从gofast下载完毕,开始上传-{}".format(fileName))
  96. # 访问视频上传接口
  97. url = "https://realtime.pdeepmatrix.com/apis/media/analysis/upload"
  98. data = {'fromLanguage': language}
  99. f = open(path + '/' + fileName, 'rb')
  100. files = {'file': f}
  101. response = requests.post(url, data=data, files=files)
  102. d = json.loads(response.text)
  103. if "code" in d.keys() and d["code"] == 200:
  104. # 接口返回值data中存放视频获取结果的key
  105. result = d["data"]
  106. raw_data["result"] = {"successCode": "1", "errorLog": "", "results": currentFile, "dataKey": result}
  107. data_queue.put(raw_data)
  108. logging.info("视频上传成功{}".format(raw_data))
  109. # to_kafka.send_kafka(raw_data,logging)
  110. else:
  111. logging.info("视频上传失败{}-{}".format(raw_data, d))
  112. f.close()
  113. # Todo删除视频文件
  114. else:
  115. # 暂无任务,进入休眠
  116. time.sleep(10)
  117. except:
  118. logging.error(traceback.format_exc())
  119. def getResult():
  120. while True:
  121. # 3秒钟结果获取一次
  122. time.sleep(3)
  123. try:
  124. if data_queue.qsize() > 0:
  125. logging.info("取数据队列长度{}".format(data_queue.qsize()))
  126. raw_data = data_queue.get()
  127. # print(raw_data)
  128. # 根据视频key访问获取结果接口
  129. dataKey = raw_data["result"]["dataKey"]
  130. url = "https://realtime.pdeepmatrix.com/apis/media/analysis/getResult"
  131. params = {'taskId': dataKey}
  132. response = requests.get(url, params=params)
  133. # print(response.text)
  134. d = json.loads(response.text)
  135. if "code" in d.keys() and d["code"] == 200:
  136. results = ""
  137. if d["data"]["code"] == "1":
  138. for sentence in d["data"]["sentences"]:
  139. results += sentence["text"]
  140. raw_data["result"]["results"]["content"] = results
  141. raw_data["result"]["results"] = json.dumps(raw_data["result"]["results"], ensure_ascii=False)
  142. logging.info("视频解析获取结果成功{}".format(raw_data))
  143. to_kafka.send_kafka(raw_data, logging)
  144. elif d["data"]["code"] == "0":
  145. # 正在解析中,将任务再次放回数据队列
  146. data_queue.put(raw_data)
  147. logging.info("视频未解析完毕,放回队列等待{}-{}".format(raw_data, d))
  148. else:
  149. # 解析失败
  150. raw_data["result"]["successCode"] = "0"
  151. raw_data["result"]["errorLog"] = response.text
  152. raw_data["result"]["results"] = json.dumps(raw_data["result"]["results"], ensure_ascii=False)
  153. logging.info("视频解析获取结果失败,数据{},接口返回值{}".format(raw_data, d))
  154. to_kafka.send_kafka(raw_data, logging)
  155. else:
  156. raw_data["result"]["successCode"] = "0"
  157. raw_data["result"]["errorLog"] = response.text
  158. raw_data["result"]["results"] = json.dumps(raw_data["result"]["results"], ensure_ascii=False)
  159. logging.info("视频解析获取结果失败,数据{},接口返回值{}".format(raw_data, d))
  160. to_kafka.send_kafka(raw_data, logging)
  161. else:
  162. # 暂无任务,进入休眠
  163. time.sleep(10)
  164. except:
  165. raw_data["result"]["successCode"] = "0"
  166. raw_data["result"]["errorLog"] = traceback.format_exc()
  167. raw_data["result"]["results"] = ""
  168. logging.error(traceback.format_exc())
  169. to_kafka.send_kafka(raw_data, logging)