# coding:utf8 import os, sys import io from jsonpath_ng import jsonpath, parse import uuid sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8') cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd() par_dir = os.path.abspath(os.path.join(cur_dir, os.path.pardir)) sys.path.append(cur_dir) sys.path.append(par_dir) import json from django.http import HttpResponse from text_analysis.tools import to_kafka from django.views.decorators.csrf import csrf_exempt from log_util.set_logger import set_logger logging = set_logger('logs/results.log') import traceback import queue import requests from text_analysis.tools.tool import parse_data import time from datetime import datetime import os from kazoo.client import KazooClient from kazoo.protocol.states import EventType # 任务队列 # global task_queue task_queue = queue.Queue() # 数据队列 # global data_queue data_queue = queue.Queue() stop_dict={} @csrf_exempt def ASRNew(request): if request.method == 'POST': try: raw_data = json.loads(request.body) task_queue.put(raw_data) return HttpResponse(json.dumps({"code": 1, "msg": "请求正常!"}, ensure_ascii=False)) except: logging.error(traceback.format_exc()) return HttpResponse(json.dumps({"code": 0, "msg": "请求json格式不正确!"}, ensure_ascii=False)) else: return HttpResponse(json.dumps({"code": 0, "msg": "请求方式错误,改为post请求"}, ensure_ascii=False)) def upload(): while True: try: if task_queue.qsize() > 0: logging.info("取任务队列长度{}".format(task_queue.qsize())) raw_data = task_queue.get() output=raw_data["output"] res_tmp={key: "" for key in output} if "id" in res_tmp.keys(): res_tmp["id"]=str(uuid.uuid4()) logging.info("任务数据为:{}".format(raw_data)) logging.info("当前version信息为:{}".format(stop_dict)) task_id=raw_data["scenes_id"] task_version=raw_data["version"] if task_id in stop_dict.keys() and task_version!=stop_dict[task_id]["version"]: logging.info("已暂停任务上传,过滤掉。{}".format(raw_data)) continue url=raw_data["input"]["fileUrl"] if "json" in url: parm = url.split("#") data1 = parse_data(raw_data, parm[0]) data1_json = json.loads(data1) expr = parse(parm[2]) match = [match.value for match in expr.find(data1_json)] video_url = match[0] else: video_url = parse_data(raw_data, url) fileName=video_url.rsplit('/')[-1] if "http" not in video_url: file = "https://caiji.percent.cn/" + video_url.lstrip("/") else: file=video_url # name=raw_data["metadata"]["admin"]["fileName"] # if '$.' in name: # # json.path表达式动态获取value # datasources = str(name).split(':') # # 0是数据源,1是JsonPath 表达式 # datasourcestr = raw_data["data"][datasources[0]] # datasource = json.loads(datasourcestr) # # 创建 JsonPath 表达式对象 # expr = parse(datasources[1]) # # 使用表达式来选择 JSON 元素 # match = [match.value for match in expr.find(datasource)] # fileName = match[0] currentFile={"fileName":fileName,"fileUrl":file} language = raw_data["input"]["fromLanguage"] # 从gofast获取视频 myfile = requests.get(file) starttime = datetime.now().strftime('%Y-%m-%d') path = 'inputdata/' + starttime if not os.path.exists(path): os.makedirs(path) with open(path + '/' + fileName, 'wb') as f: f.write(myfile.content) logging.info("视频从gofast下载完毕,开始上传-{}".format(fileName)) # 访问视频上传接口 # video=1视频,0音频。 video=1 if fileName[-3:]=="m4a" or fileName[-3:]=="mp3" or fileName[-3:]=="wav": url="https://realtime.pdeepmatrix.com/apis/file/asr/upload" video=0 else: url = "https://realtime.pdeepmatrix.com/apis/media/analysis/upload" data = {'fromLanguage': language} f = open(path + '/' + fileName, 'rb') files = {'file': f} response = requests.post(url, data=data, files=files,verify=False) logging.info("上传后接口返回值:{}-{}".format(response,response.text)) d = json.loads(response.text) if "code" in d.keys() and d["code"] == 200: # 接口返回值data中存放视频获取结果的key result = d["data"] raw_data["result"] = {"successCode": "1", "errorLog": "", "results": "", "dataKey": result,"video":video,"file":currentFile} data_queue.put(raw_data) logging.info("视频上传成功{}".format(raw_data)) # to_kafka.send_kafka(raw_data,logging) else: logging.info("视频上传失败{}-{}".format(raw_data, d)) f.close() # Todo删除视频文件 else: # 暂无任务,进入休眠 time.sleep(10) except: raw_data["result"]={} raw_data["result"]["successCode"] = "0" raw_data["result"]["status"]=2 raw_data["result"]["message"]="视频/音频上传异常" raw_data["result"]["errorLog"] = traceback.format_exc() raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False) logging.error(traceback.format_exc()) to_kafka.send_kafka(raw_data, logging) def getResult(): while True: # 3秒钟结果获取一次 time.sleep(3) try: if data_queue.qsize() > 0: logging.info("取数据队列长度{}".format(data_queue.qsize())) raw_data = data_queue.get() logging.info("任务数据为:{}".format(raw_data)) task_id=raw_data["scenes_id"] task_version=raw_data["version"] if task_id in stop_dict.keys() and task_version!=stop_dict[task_id]["version"]: logging.info("已暂停获取结果任务,过滤掉。{}".format(raw_data)) continue output=raw_data["output"] res_tmp={key: "" for key in output} if "id" in res_tmp.keys(): res_tmp["id"]=str(uuid.uuid4()) res_tmp["isLast"]=1 res_tmp["fileName"]=raw_data["result"]["file"]["fileName"] # 根据视频key访问获取结果接口 dataKey = raw_data["result"]["dataKey"] params = {'taskId': dataKey} language = raw_data["input"]["fromLanguage"] data = {'fromLanguage': language,'taskId': dataKey} if raw_data["result"]["video"]==1: url = "https://realtime.pdeepmatrix.com/apis/media/analysis/getResult" response = requests.get(url, params=params, verify=False) else: url ="https://realtime.pdeepmatrix.com/apis/file/asr/getResult" response = requests.post(url, data=data, verify=False) logging.info("ASR网站返回值:{}-{}".format(response,response.text)) d = json.loads(response.text) if "code" in d.keys() and d["code"] == 200: results = "" if d["data"]["code"] == "1" and d["data"]["sentences"]: for sentence in d["data"]["sentences"]: if results: results += ' ' + sentence["text"] else: results = sentence["text"] if "content" in res_tmp.keys(): res_tmp["content"]=results raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False) raw_data["result"]["status"]=1 raw_data["result"]["message"]="成功" logging.info("视频解析获取结果成功{}".format(raw_data)) to_kafka.send_kafka(raw_data, logging) elif d["data"]["code"] == "1" and not d["data"]["sentences"]: results ="" if "content" in res_tmp.keys(): res_tmp["content"]=results raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False) raw_data["result"]["status"]=1 raw_data["result"]["message"]="成功" logging.info("视频解析获取结果成功{}".format(raw_data)) to_kafka.send_kafka(raw_data, logging) elif d["data"]["code"] == "0": # 正在解析中,将任务再次放回数据队列 data_queue.put(raw_data) logging.info("视频未解析完毕,放回队列等待{}-{}".format(raw_data, d)) else: # 解析失败 raw_data["result"]["successCode"] = "0" raw_data["result"]["errorLog"] = response.text raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False) raw_data["result"]["status"]=2 raw_data["result"]["message"]="视频/音频解析异常" logging.info("视频解析获取结果失败,数据{},接口返回值{}".format(raw_data, d)) to_kafka.send_kafka(raw_data, logging) else: raw_data["result"]["successCode"] = "0" raw_data["result"]["errorLog"] = response.text raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False) raw_data["result"]["status"] = 2 raw_data["result"]["message"] = "视频/音频解析异常" logging.info("视频解析获取结果失败,数据{},接口返回值{}".format(raw_data, d)) to_kafka.send_kafka(raw_data, logging) else: # 暂无任务,进入休眠 time.sleep(10) except: raw_data["result"]["successCode"] = "0" raw_data["result"]["errorLog"] = traceback.format_exc() raw_data["result"]["status"] = 2 raw_data["result"]["message"] = "视频/音频解析异常" raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False) logging.error(traceback.format_exc()) to_kafka.send_kafka(raw_data, logging) def zk_monitoring(): try: #线上环境 zk = KazooClient(hosts='172.18.1.146:2181,172.18.1.147:2181,172.18.1.148:2181') #测试环境 # zk = KazooClient(hosts='172.16.12.55:2181,172.16.12.56:2181,172.16.12.57:2181') zk.start() # 设置监听器 @zk.DataWatch("/analyze") def watch_node(data, stat, event): if event is not None and event.type == EventType.CHANGED: data, stat = zk.get("/analyze") logging.info("执行删除操作:{}".format(data)) d = json.loads(data) id = d["scenes_id"] stop_dict[id] = {} stop_dict[id]["version"] = d["version"] stop_dict[id]["operation"] = d["operation"] # 保持程序运行以监听节点变化 try: while True: time.sleep(1) except: logging.info("Stopping...") # 关闭连接 zk.stop() zk.close() except: logging.error(traceback.format_exc())