语音识别应用
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

264 lines
12 KiB

  1. # coding:utf8
  2. import os, sys
  3. import io
  4. from jsonpath_ng import jsonpath, parse
  5. import uuid
  6. sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8')
  7. cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd()
  8. par_dir = os.path.abspath(os.path.join(cur_dir, os.path.pardir))
  9. sys.path.append(cur_dir)
  10. sys.path.append(par_dir)
  11. import json
  12. from django.http import HttpResponse
  13. from text_analysis.tools import to_kafka
  14. from django.views.decorators.csrf import csrf_exempt
  15. from log_util.set_logger import set_logger
  16. logging = set_logger('logs/results.log')
  17. import traceback
  18. import queue
  19. import requests
  20. from text_analysis.tools.tool import parse_data
  21. import time
  22. from datetime import datetime
  23. import os
  24. from kazoo.client import KazooClient
  25. from kazoo.protocol.states import EventType
  26. # 任务队列
  27. # global task_queue
  28. task_queue = queue.Queue()
  29. # 数据队列
  30. # global data_queue
  31. data_queue = queue.Queue()
  32. stop_dict={}
  33. @csrf_exempt
  34. def ASRNew(request):
  35. if request.method == 'POST':
  36. try:
  37. raw_data = json.loads(request.body)
  38. task_queue.put(raw_data)
  39. return HttpResponse(json.dumps({"code": 1, "msg": "请求正常!"}, ensure_ascii=False))
  40. except:
  41. logging.error(traceback.format_exc())
  42. return HttpResponse(json.dumps({"code": 0, "msg": "请求json格式不正确!"}, ensure_ascii=False))
  43. else:
  44. return HttpResponse(json.dumps({"code": 0, "msg": "请求方式错误,改为post请求"}, ensure_ascii=False))
  45. def upload():
  46. while True:
  47. try:
  48. if task_queue.qsize() > 0:
  49. logging.info("取任务队列长度{}".format(task_queue.qsize()))
  50. raw_data = task_queue.get()
  51. output=raw_data["output"]
  52. res_tmp={key: "" for key in output}
  53. if "id" in res_tmp.keys():
  54. res_tmp["id"]=str(uuid.uuid4())
  55. logging.info("任务数据为:{}".format(raw_data))
  56. logging.info("当前version信息为:{}".format(stop_dict))
  57. task_id=raw_data["scenes_id"]
  58. task_version=raw_data["version"]
  59. if task_id in stop_dict.keys() and task_version!=stop_dict[task_id]["version"]:
  60. logging.info("已暂停任务上传,过滤掉。{}".format(raw_data))
  61. continue
  62. url=raw_data["input"]["fileUrl"]
  63. if "json" in url:
  64. parm = url.split("#")
  65. data1 = parse_data(raw_data, parm[0])
  66. data1_json = json.loads(data1)
  67. expr = parse(parm[2])
  68. match = [match.value for match in expr.find(data1_json)]
  69. video_url = match[0]
  70. else:
  71. video_url = parse_data(raw_data, url)
  72. fileName=video_url.rsplit('/')[-1]
  73. if "http" not in video_url:
  74. file = "https://caiji.percent.cn/" + video_url.lstrip("/")
  75. else:
  76. file=video_url
  77. # name=raw_data["metadata"]["admin"]["fileName"]
  78. # if '$.' in name:
  79. # # json.path表达式动态获取value
  80. # datasources = str(name).split(':')
  81. # # 0是数据源,1是JsonPath 表达式
  82. # datasourcestr = raw_data["data"][datasources[0]]
  83. # datasource = json.loads(datasourcestr)
  84. # # 创建 JsonPath 表达式对象
  85. # expr = parse(datasources[1])
  86. # # 使用表达式来选择 JSON 元素
  87. # match = [match.value for match in expr.find(datasource)]
  88. # fileName = match[0]
  89. currentFile={"fileName":fileName,"fileUrl":file}
  90. language = raw_data["input"]["fromLanguage"]
  91. # 从gofast获取视频
  92. myfile = requests.get(file)
  93. starttime = datetime.now().strftime('%Y-%m-%d')
  94. path = 'inputdata/' + starttime
  95. if not os.path.exists(path):
  96. os.makedirs(path)
  97. with open(path + '/' + fileName, 'wb') as f:
  98. f.write(myfile.content)
  99. logging.info("视频从gofast下载完毕,开始上传-{}".format(fileName))
  100. # 访问视频上传接口
  101. # video=1视频,0音频。
  102. video=1
  103. if fileName[-3:]=="m4a" or fileName[-3:]=="mp3" or fileName[-3:]=="wav":
  104. url="https://realtime.pdeepmatrix.com/apis/file/asr/upload"
  105. video=0
  106. else:
  107. url = "https://realtime.pdeepmatrix.com/apis/media/analysis/upload"
  108. data = {'fromLanguage': language}
  109. f = open(path + '/' + fileName, 'rb')
  110. files = {'file': f}
  111. response = requests.post(url, data=data, files=files,verify=False)
  112. logging.info("上传后接口返回值:{}-{}".format(response,response.text))
  113. d = json.loads(response.text)
  114. if "code" in d.keys() and d["code"] == 200:
  115. # 接口返回值data中存放视频获取结果的key
  116. result = d["data"]
  117. raw_data["result"] = {"successCode": "1", "errorLog": "", "results": "", "dataKey": result,"video":video,"file":currentFile}
  118. data_queue.put(raw_data)
  119. logging.info("视频上传成功{}".format(raw_data))
  120. # to_kafka.send_kafka(raw_data,logging)
  121. else:
  122. logging.info("视频上传失败{}-{}".format(raw_data, d))
  123. f.close()
  124. # Todo删除视频文件
  125. else:
  126. # 暂无任务,进入休眠
  127. time.sleep(10)
  128. except:
  129. raw_data["result"]={}
  130. raw_data["result"]["successCode"] = "0"
  131. raw_data["result"]["status"]=2
  132. raw_data["result"]["message"]="视频/音频上传异常"
  133. raw_data["result"]["errorLog"] = traceback.format_exc()
  134. raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False)
  135. logging.error(traceback.format_exc())
  136. to_kafka.send_kafka(raw_data, logging)
  137. def getResult():
  138. while True:
  139. # 3秒钟结果获取一次
  140. time.sleep(3)
  141. try:
  142. if data_queue.qsize() > 0:
  143. logging.info("取数据队列长度{}".format(data_queue.qsize()))
  144. raw_data = data_queue.get()
  145. logging.info("任务数据为:{}".format(raw_data))
  146. task_id=raw_data["scenes_id"]
  147. task_version=raw_data["version"]
  148. if task_id in stop_dict.keys() and task_version!=stop_dict[task_id]["version"]:
  149. logging.info("已暂停获取结果任务,过滤掉。{}".format(raw_data))
  150. continue
  151. output=raw_data["output"]
  152. res_tmp={key: "" for key in output}
  153. if "id" in res_tmp.keys():
  154. res_tmp["id"]=str(uuid.uuid4())
  155. # 根据视频key访问获取结果接口
  156. dataKey = raw_data["result"]["dataKey"]
  157. params = {'taskId': dataKey}
  158. language = raw_data["input"]["fromLanguage"]
  159. data = {'fromLanguage': language,'taskId': dataKey}
  160. if raw_data["result"]["video"]==1:
  161. url = "https://realtime.pdeepmatrix.com/apis/media/analysis/getResult"
  162. response = requests.get(url, params=params, verify=False)
  163. else:
  164. url ="https://realtime.pdeepmatrix.com/apis/file/asr/getResult"
  165. response = requests.post(url, data=data, verify=False)
  166. logging.info("ASR网站返回值:{}-{}".format(response,response.text))
  167. d = json.loads(response.text)
  168. if "code" in d.keys() and d["code"] == 200:
  169. results = ""
  170. if d["data"]["code"] == "1" and d["data"]["sentences"]:
  171. for sentence in d["data"]["sentences"]:
  172. if results:
  173. results += ' ' + sentence["text"]
  174. else:
  175. results = sentence["text"]
  176. if "content" in res_tmp.keys():
  177. res_tmp["content"]=results
  178. raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False)
  179. raw_data["result"]["status"]=1
  180. raw_data["result"]["message"]="成功"
  181. logging.info("视频解析获取结果成功{}".format(raw_data))
  182. to_kafka.send_kafka(raw_data, logging)
  183. elif d["data"]["code"] == "1" and not d["data"]["sentences"]:
  184. results =""
  185. if "content" in res_tmp.keys():
  186. res_tmp["content"]=results
  187. raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False)
  188. raw_data["result"]["status"]=1
  189. raw_data["result"]["message"]="成功"
  190. logging.info("视频解析获取结果成功{}".format(raw_data))
  191. to_kafka.send_kafka(raw_data, logging)
  192. elif d["data"]["code"] == "0":
  193. # 正在解析中,将任务再次放回数据队列
  194. data_queue.put(raw_data)
  195. logging.info("视频未解析完毕,放回队列等待{}-{}".format(raw_data, d))
  196. else:
  197. # 解析失败
  198. raw_data["result"]["successCode"] = "0"
  199. raw_data["result"]["errorLog"] = response.text
  200. raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False)
  201. raw_data["result"]["status"]=2
  202. raw_data["result"]["message"]="视频/音频解析异常"
  203. logging.info("视频解析获取结果失败,数据{},接口返回值{}".format(raw_data, d))
  204. to_kafka.send_kafka(raw_data, logging)
  205. else:
  206. raw_data["result"]["successCode"] = "0"
  207. raw_data["result"]["errorLog"] = response.text
  208. raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False)
  209. raw_data["result"]["status"] = 2
  210. raw_data["result"]["message"] = "视频/音频解析异常"
  211. logging.info("视频解析获取结果失败,数据{},接口返回值{}".format(raw_data, d))
  212. to_kafka.send_kafka(raw_data, logging)
  213. else:
  214. # 暂无任务,进入休眠
  215. time.sleep(10)
  216. except:
  217. raw_data["result"]["successCode"] = "0"
  218. raw_data["result"]["errorLog"] = traceback.format_exc()
  219. raw_data["result"]["status"] = 2
  220. raw_data["result"]["message"] = "视频/音频解析异常"
  221. raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False)
  222. logging.error(traceback.format_exc())
  223. to_kafka.send_kafka(raw_data, logging)
  224. def zk_monitoring():
  225. try:
  226. #线上环境
  227. zk = KazooClient(hosts='172.18.1.146:2181,172.18.1.147:2181,172.18.1.148:2181')
  228. #测试环境
  229. # zk = KazooClient(hosts='172.16.12.55:2181,172.16.12.56:2181,172.16.12.57:2181')
  230. zk.start()
  231. # 设置监听器
  232. @zk.DataWatch("/analyze")
  233. def watch_node(data, stat, event):
  234. if event is not None and event.type == EventType.CHANGED:
  235. data, stat = zk.get("/analyze")
  236. logging.info("执行删除操作:{}".format(data))
  237. d = json.loads(data)
  238. id = d["scenes_id"]
  239. stop_dict[id] = {}
  240. stop_dict[id]["version"] = d["version"]
  241. stop_dict[id]["operation"] = d["operation"]
  242. # 保持程序运行以监听节点变化
  243. try:
  244. while True:
  245. time.sleep(1)
  246. except:
  247. logging.info("Stopping...")
  248. # 关闭连接
  249. zk.stop()
  250. zk.close()
  251. except:
  252. logging.error(traceback.format_exc())