You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
264 lines
12 KiB
264 lines
12 KiB
# coding:utf8
|
|
import os, sys
|
|
import io
|
|
from jsonpath_ng import jsonpath, parse
|
|
import uuid
|
|
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8')
|
|
cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd()
|
|
par_dir = os.path.abspath(os.path.join(cur_dir, os.path.pardir))
|
|
sys.path.append(cur_dir)
|
|
sys.path.append(par_dir)
|
|
import json
|
|
from django.http import HttpResponse
|
|
from text_analysis.tools import to_kafka
|
|
from django.views.decorators.csrf import csrf_exempt
|
|
from log_util.set_logger import set_logger
|
|
|
|
logging = set_logger('logs/results.log')
|
|
import traceback
|
|
import queue
|
|
import requests
|
|
from text_analysis.tools.tool import parse_data
|
|
import time
|
|
from datetime import datetime
|
|
import os
|
|
from kazoo.client import KazooClient
|
|
from kazoo.protocol.states import EventType
|
|
# 任务队列
|
|
# global task_queue
|
|
task_queue = queue.Queue()
|
|
# 数据队列
|
|
# global data_queue
|
|
data_queue = queue.Queue()
|
|
stop_dict={}
|
|
|
|
@csrf_exempt
|
|
def ASRNew(request):
|
|
if request.method == 'POST':
|
|
try:
|
|
raw_data = json.loads(request.body)
|
|
task_queue.put(raw_data)
|
|
return HttpResponse(json.dumps({"code": 1, "msg": "请求正常!"}, ensure_ascii=False))
|
|
except:
|
|
logging.error(traceback.format_exc())
|
|
return HttpResponse(json.dumps({"code": 0, "msg": "请求json格式不正确!"}, ensure_ascii=False))
|
|
else:
|
|
return HttpResponse(json.dumps({"code": 0, "msg": "请求方式错误,改为post请求"}, ensure_ascii=False))
|
|
|
|
|
|
def upload():
|
|
while True:
|
|
try:
|
|
if task_queue.qsize() > 0:
|
|
logging.info("取任务队列长度{}".format(task_queue.qsize()))
|
|
raw_data = task_queue.get()
|
|
output=raw_data["output"]
|
|
res_tmp={key: "" for key in output}
|
|
if "id" in res_tmp.keys():
|
|
res_tmp["id"]=str(uuid.uuid4())
|
|
logging.info("任务数据为:{}".format(raw_data))
|
|
logging.info("当前version信息为:{}".format(stop_dict))
|
|
task_id=raw_data["scenes_id"]
|
|
task_version=raw_data["version"]
|
|
if task_id in stop_dict.keys() and task_version!=stop_dict[task_id]["version"]:
|
|
logging.info("已暂停任务上传,过滤掉。{}".format(raw_data))
|
|
continue
|
|
|
|
url=raw_data["input"]["fileUrl"]
|
|
if "json" in url:
|
|
parm = url.split("#")
|
|
data1 = parse_data(raw_data, parm[0])
|
|
data1_json = json.loads(data1)
|
|
expr = parse(parm[2])
|
|
match = [match.value for match in expr.find(data1_json)]
|
|
video_url = match[0]
|
|
else:
|
|
video_url = parse_data(raw_data, url)
|
|
fileName=video_url.rsplit('/')[-1]
|
|
if "http" not in video_url:
|
|
file = "https://caiji.percent.cn/" + video_url.lstrip("/")
|
|
else:
|
|
file=video_url
|
|
# name=raw_data["metadata"]["admin"]["fileName"]
|
|
# if '$.' in name:
|
|
# # json.path表达式动态获取value
|
|
# datasources = str(name).split(':')
|
|
# # 0是数据源,1是JsonPath 表达式
|
|
# datasourcestr = raw_data["data"][datasources[0]]
|
|
# datasource = json.loads(datasourcestr)
|
|
# # 创建 JsonPath 表达式对象
|
|
# expr = parse(datasources[1])
|
|
# # 使用表达式来选择 JSON 元素
|
|
# match = [match.value for match in expr.find(datasource)]
|
|
# fileName = match[0]
|
|
|
|
currentFile={"fileName":fileName,"fileUrl":file}
|
|
language = raw_data["input"]["fromLanguage"]
|
|
# 从gofast获取视频
|
|
myfile = requests.get(file)
|
|
starttime = datetime.now().strftime('%Y-%m-%d')
|
|
path = 'inputdata/' + starttime
|
|
if not os.path.exists(path):
|
|
os.makedirs(path)
|
|
with open(path + '/' + fileName, 'wb') as f:
|
|
f.write(myfile.content)
|
|
logging.info("视频从gofast下载完毕,开始上传-{}".format(fileName))
|
|
# 访问视频上传接口
|
|
# video=1视频,0音频。
|
|
video=1
|
|
if fileName[-3:]=="m4a" or fileName[-3:]=="mp3" or fileName[-3:]=="wav":
|
|
url="https://realtime.pdeepmatrix.com/apis/file/asr/upload"
|
|
video=0
|
|
else:
|
|
url = "https://realtime.pdeepmatrix.com/apis/media/analysis/upload"
|
|
data = {'fromLanguage': language}
|
|
f = open(path + '/' + fileName, 'rb')
|
|
files = {'file': f}
|
|
response = requests.post(url, data=data, files=files,verify=False)
|
|
logging.info("上传后接口返回值:{}-{}".format(response,response.text))
|
|
d = json.loads(response.text)
|
|
if "code" in d.keys() and d["code"] == 200:
|
|
# 接口返回值data中存放视频获取结果的key
|
|
result = d["data"]
|
|
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": "", "dataKey": result,"video":video,"file":currentFile}
|
|
data_queue.put(raw_data)
|
|
logging.info("视频上传成功{}".format(raw_data))
|
|
# to_kafka.send_kafka(raw_data,logging)
|
|
else:
|
|
logging.info("视频上传失败{}-{}".format(raw_data, d))
|
|
f.close()
|
|
# Todo删除视频文件
|
|
else:
|
|
# 暂无任务,进入休眠
|
|
time.sleep(10)
|
|
except:
|
|
raw_data["result"]={}
|
|
raw_data["result"]["successCode"] = "0"
|
|
raw_data["result"]["status"]=2
|
|
raw_data["result"]["message"]="视频/音频上传异常"
|
|
raw_data["result"]["errorLog"] = traceback.format_exc()
|
|
raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False)
|
|
logging.error(traceback.format_exc())
|
|
to_kafka.send_kafka(raw_data, logging)
|
|
|
|
|
|
def getResult():
|
|
while True:
|
|
# 3秒钟结果获取一次
|
|
time.sleep(3)
|
|
try:
|
|
if data_queue.qsize() > 0:
|
|
logging.info("取数据队列长度{}".format(data_queue.qsize()))
|
|
raw_data = data_queue.get()
|
|
logging.info("任务数据为:{}".format(raw_data))
|
|
task_id=raw_data["scenes_id"]
|
|
task_version=raw_data["version"]
|
|
if task_id in stop_dict.keys() and task_version!=stop_dict[task_id]["version"]:
|
|
logging.info("已暂停获取结果任务,过滤掉。{}".format(raw_data))
|
|
continue
|
|
output=raw_data["output"]
|
|
res_tmp={key: "" for key in output}
|
|
if "id" in res_tmp.keys():
|
|
res_tmp["id"]=str(uuid.uuid4())
|
|
# 根据视频key访问获取结果接口
|
|
dataKey = raw_data["result"]["dataKey"]
|
|
params = {'taskId': dataKey}
|
|
language = raw_data["input"]["fromLanguage"]
|
|
data = {'fromLanguage': language,'taskId': dataKey}
|
|
if raw_data["result"]["video"]==1:
|
|
url = "https://realtime.pdeepmatrix.com/apis/media/analysis/getResult"
|
|
response = requests.get(url, params=params, verify=False)
|
|
else:
|
|
url ="https://realtime.pdeepmatrix.com/apis/file/asr/getResult"
|
|
response = requests.post(url, data=data, verify=False)
|
|
logging.info("ASR网站返回值:{}-{}".format(response,response.text))
|
|
d = json.loads(response.text)
|
|
if "code" in d.keys() and d["code"] == 200:
|
|
results = ""
|
|
if d["data"]["code"] == "1" and d["data"]["sentences"]:
|
|
for sentence in d["data"]["sentences"]:
|
|
if results:
|
|
results += ' ' + sentence["text"]
|
|
else:
|
|
results = sentence["text"]
|
|
if "content" in res_tmp.keys():
|
|
res_tmp["content"]=results
|
|
raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False)
|
|
raw_data["result"]["status"]=1
|
|
raw_data["result"]["message"]="成功"
|
|
logging.info("视频解析获取结果成功{}".format(raw_data))
|
|
to_kafka.send_kafka(raw_data, logging)
|
|
elif d["data"]["code"] == "1" and not d["data"]["sentences"]:
|
|
results =""
|
|
if "content" in res_tmp.keys():
|
|
res_tmp["content"]=results
|
|
raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False)
|
|
raw_data["result"]["status"]=1
|
|
raw_data["result"]["message"]="成功"
|
|
logging.info("视频解析获取结果成功{}".format(raw_data))
|
|
to_kafka.send_kafka(raw_data, logging)
|
|
elif d["data"]["code"] == "0":
|
|
# 正在解析中,将任务再次放回数据队列
|
|
data_queue.put(raw_data)
|
|
logging.info("视频未解析完毕,放回队列等待{}-{}".format(raw_data, d))
|
|
else:
|
|
# 解析失败
|
|
raw_data["result"]["successCode"] = "0"
|
|
raw_data["result"]["errorLog"] = response.text
|
|
raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False)
|
|
raw_data["result"]["status"]=2
|
|
raw_data["result"]["message"]="视频/音频解析异常"
|
|
logging.info("视频解析获取结果失败,数据{},接口返回值{}".format(raw_data, d))
|
|
to_kafka.send_kafka(raw_data, logging)
|
|
else:
|
|
raw_data["result"]["successCode"] = "0"
|
|
raw_data["result"]["errorLog"] = response.text
|
|
raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False)
|
|
raw_data["result"]["status"] = 2
|
|
raw_data["result"]["message"] = "视频/音频解析异常"
|
|
logging.info("视频解析获取结果失败,数据{},接口返回值{}".format(raw_data, d))
|
|
to_kafka.send_kafka(raw_data, logging)
|
|
else:
|
|
# 暂无任务,进入休眠
|
|
time.sleep(10)
|
|
except:
|
|
raw_data["result"]["successCode"] = "0"
|
|
raw_data["result"]["errorLog"] = traceback.format_exc()
|
|
raw_data["result"]["status"] = 2
|
|
raw_data["result"]["message"] = "视频/音频解析异常"
|
|
raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False)
|
|
logging.error(traceback.format_exc())
|
|
to_kafka.send_kafka(raw_data, logging)
|
|
|
|
|
|
def zk_monitoring():
|
|
try:
|
|
#线上环境
|
|
zk = KazooClient(hosts='172.18.1.146:2181,172.18.1.147:2181,172.18.1.148:2181')
|
|
#测试环境
|
|
# zk = KazooClient(hosts='172.16.12.55:2181,172.16.12.56:2181,172.16.12.57:2181')
|
|
zk.start()
|
|
# 设置监听器
|
|
@zk.DataWatch("/analyze")
|
|
def watch_node(data, stat, event):
|
|
if event is not None and event.type == EventType.CHANGED:
|
|
data, stat = zk.get("/analyze")
|
|
logging.info("执行删除操作:{}".format(data))
|
|
d = json.loads(data)
|
|
id = d["scenes_id"]
|
|
stop_dict[id] = {}
|
|
stop_dict[id]["version"] = d["version"]
|
|
stop_dict[id]["operation"] = d["operation"]
|
|
# 保持程序运行以监听节点变化
|
|
try:
|
|
while True:
|
|
time.sleep(1)
|
|
except:
|
|
logging.info("Stopping...")
|
|
# 关闭连接
|
|
zk.stop()
|
|
zk.close()
|
|
except:
|
|
logging.error(traceback.format_exc())
|
|
|
|
|