commit
859a791e3a
69 changed files with 4459 additions and 0 deletions
-
26config.ini
-
BINlog_util/__pycache__/set_logger.cpython-36.pyc
-
BINlog_util/__pycache__/set_logger.cpython-37.pyc
-
BINlog_util/__pycache__/set_logger.cpython-38.pyc
-
33log_util/set_logger.py
-
0logs/results.log
-
22manage.py
-
34src.py
-
1start.sh
-
1stop_uwsgi.sh
-
36test.py
-
0text_analysis/__init__.py
-
BINtext_analysis/__pycache__/__init__.cpython-36.pyc
-
BINtext_analysis/__pycache__/__init__.cpython-37.pyc
-
BINtext_analysis/__pycache__/__init__.cpython-38.pyc
-
BINtext_analysis/__pycache__/read_config.cpython-38.pyc
-
BINtext_analysis/__pycache__/settings.cpython-36.pyc
-
BINtext_analysis/__pycache__/settings.cpython-37.pyc
-
BINtext_analysis/__pycache__/settings.cpython-38.pyc
-
BINtext_analysis/__pycache__/urls.cpython-36.pyc
-
BINtext_analysis/__pycache__/urls.cpython-37.pyc
-
BINtext_analysis/__pycache__/urls.cpython-38.pyc
-
BINtext_analysis/__pycache__/views.cpython-36.pyc
-
BINtext_analysis/__pycache__/views.cpython-37.pyc
-
BINtext_analysis/__pycache__/views.cpython-38.pyc
-
BINtext_analysis/__pycache__/views.cpython-39.pyc
-
BINtext_analysis/__pycache__/wsgi.cpython-36.pyc
-
140text_analysis/bak/views.py0831
-
151text_analysis/bak/views.py0922_1
-
184text_analysis/bak/views.py0922_2
-
189text_analysis/bak/views.py1031
-
187text_analysis/bak/views.py_1109
-
208text_analysis/bak/views.py_1220
-
219text_analysis/bak/views.py_20240517
-
231text_analysis/bak/views.py_20240607
-
264text_analysis/bak/views.py_20240705
-
266text_analysis/bak/views.py_20240819
-
186text_analysis/bak/views.py_old
-
6text_analysis/eg.py
-
10text_analysis/read_config.py
-
14text_analysis/request.py
-
148text_analysis/settings.py
-
BINtext_analysis/tools/__pycache__/cusException.cpython-36.pyc
-
BINtext_analysis/tools/__pycache__/mysql_helper.cpython-36.pyc
-
BINtext_analysis/tools/__pycache__/process.cpython-36.pyc
-
BINtext_analysis/tools/__pycache__/to_kafka.cpython-36.pyc
-
BINtext_analysis/tools/__pycache__/to_kafka.cpython-37.pyc
-
BINtext_analysis/tools/__pycache__/to_kafka.cpython-38.pyc
-
BINtext_analysis/tools/__pycache__/tool.cpython-36.pyc
-
BINtext_analysis/tools/__pycache__/tool.cpython-37.pyc
-
BINtext_analysis/tools/__pycache__/tool.cpython-38.pyc
-
BINtext_analysis/tools/__pycache__/tools.cpython-36.pyc
-
129text_analysis/tools/bak/tool.py0822
-
25text_analysis/tools/cusException.py
-
67text_analysis/tools/kakfa_util.py
-
338text_analysis/tools/mysql_helper.py
-
51text_analysis/tools/process.py
-
171text_analysis/tools/seleniumTest.py
-
25text_analysis/tools/to_kafka.py
-
132text_analysis/tools/tool.py
-
13text_analysis/urls.py
-
268text_analysis/views.py
-
266text_analysis/views.py_20240819
-
271text_analysis/views_20240903.py
-
16text_analysis/wsgi.py
-
8uwsgi.ini
-
58wsgi.log
-
35wsgi.py
-
30wsgi.py_0228
@ -0,0 +1,26 @@ |
|||||
|
[zookeeper] |
||||
|
;zk地址 |
||||
|
zkhost=node-01:12181,node-02:12181,node-03:12181 |
||||
|
;节点 |
||||
|
node=/analyze |
||||
|
|
||||
|
[kafka] |
||||
|
;服务器地址 |
||||
|
bootstrap_servers=node-01:19092,node-02:19092,node-03:19092 |
||||
|
;topic |
||||
|
topic=produce_analyze |
||||
|
|
||||
|
[gofast] |
||||
|
;gofast前缀 |
||||
|
;url=https://caiji.percent.cn/ |
||||
|
url=http://8.152.196.157:8081/ |
||||
|
|
||||
|
[asr] |
||||
|
;音频上传 |
||||
|
mp3_upload=http://voice.pontoaplus.com/apis/file/asr/upload |
||||
|
;音频结果获取 |
||||
|
mp3_getResult=http://voice.pontoaplus.com/apis/file/asr/getResult |
||||
|
;视频上传 |
||||
|
video_upload=http://voice.pontoaplus.com/apis/media/analysis/upload |
||||
|
;视频结果获取 |
||||
|
video_getResult=http://voice.pontoaplus.com/apis/media/analysis/getResult |
||||
@ -0,0 +1,33 @@ |
|||||
|
#coding:utf8 |
||||
|
import logging |
||||
|
import os |
||||
|
import sys |
||||
|
from logging.handlers import TimedRotatingFileHandler |
||||
|
import re |
||||
|
# cur_dir = os.path.dirname( os.path.abspath(__file__)) or os.getcwd() |
||||
|
# sys.path.append(cur_dir + '/log_util') |
||||
|
def set_logger(filename): |
||||
|
# 创建logger对象。传入logger名字 |
||||
|
logger = logging.getLogger(filename) |
||||
|
# log_path = os.path.join(cur_dir, filename) |
||||
|
# 设置日志记录等级 |
||||
|
logger.setLevel(logging.INFO) |
||||
|
# interval 滚动周期, |
||||
|
# when="MIDNIGHT", interval=1 表示每天0点为更新点,每天生成一个文件 |
||||
|
# backupCount 表示日志保存个数 |
||||
|
file_handler = TimedRotatingFileHandler( |
||||
|
filename=filename, when="MIDNIGHT",encoding="utf-8", interval=1, backupCount=3 |
||||
|
) |
||||
|
# filename="mylog" suffix设置,会生成文件名为mylog.2020-02-25.log |
||||
|
file_handler.suffix = "%Y-%m-%d.log" |
||||
|
# extMatch是编译好正则表达式,用于匹配日志文件名后缀 |
||||
|
# 需要注意的是suffix和extMatch一定要匹配的上,如果不匹配,过期日志不会被删除。 |
||||
|
file_handler.extMatch = re.compile(r"^\d{4}-\d{2}-\d{2}.log$") |
||||
|
# 定义日志输出格式 |
||||
|
file_handler.setFormatter( |
||||
|
logging.Formatter( |
||||
|
"[%(asctime)s] [%(process)d] [%(levelname)s] - %(module)s.%(funcName)s (%(filename)s:%(lineno)d) - %(message)s" |
||||
|
) |
||||
|
) |
||||
|
logger.addHandler(file_handler) |
||||
|
return logger |
||||
@ -0,0 +1,22 @@ |
|||||
|
#!/usr/bin/env python |
||||
|
import os |
||||
|
import sys |
||||
|
import threading |
||||
|
from text_analysis.views import upload,getResult |
||||
|
import django |
||||
|
|
||||
|
if __name__ == "__main__": |
||||
|
t = threading.Thread(target=upload, name='upload') |
||||
|
t.daemon = True |
||||
|
t.start() |
||||
|
|
||||
|
r = threading.Thread(target=getResult, name='getResult') |
||||
|
r.daemon = True |
||||
|
r.start() |
||||
|
|
||||
|
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "text_analysis.settings") |
||||
|
django.setup() |
||||
|
from django.core.management import execute_from_command_line |
||||
|
execute_from_command_line(sys.argv) |
||||
|
|
||||
|
|
||||
@ -0,0 +1,34 @@ |
|||||
|
#coding:utf8 |
||||
|
import requests |
||||
|
|
||||
|
def upload(): |
||||
|
url="https://realtime.pdeepmatrix.com/apis/media/analysis/upload" |
||||
|
# 定义form-data参数 |
||||
|
data = { |
||||
|
'fromLanguage': 'zh' |
||||
|
} |
||||
|
# 定义文件参数 |
||||
|
files = { |
||||
|
'file': open('test.mp4', 'rb') |
||||
|
} |
||||
|
response = requests.post(url, data=data, files=files) |
||||
|
print(response.text) |
||||
|
|
||||
|
#结果—{"code":200,"message":"SUCCESS","data":"3a42ea9594b641c39e40d1497ca29be9"} |
||||
|
|
||||
|
def getResults(): |
||||
|
url="https://realtime.pdeepmatrix.com/apis/media/analysis/getResult" |
||||
|
# 定义参数 |
||||
|
#'taskId': '3a42ea9594b641c39e40d1497ca29be9' |
||||
|
params = { |
||||
|
'taskId': '4ef21e404b7240acb14bbd5fe63227fc' |
||||
|
} |
||||
|
response = requests.get(url, params=params) |
||||
|
# 打印响应结果 |
||||
|
print(response.text) |
||||
|
#{"code":200,"message":"SUCCESS","data":{"sentences":[{"silence_duration":0,"end_time":5108,"speech_rate":150,"begin_time":1130,"channel_id":0,"emotion_value":"5.0","text":"视频解析、语音识别。"}]... |
||||
|
getResults() |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
@ -0,0 +1 @@ |
|||||
|
../../environment/python3.8/bin/uwsgi --ini uwsgi.ini --file wsgi.py --daemonize wsgi.log |
||||
@ -0,0 +1 @@ |
|||||
|
lsof -i:9014 |grep -v 'PID' | awk '{print $2}'| xargs kill -9 |
||||
@ -0,0 +1,36 @@ |
|||||
|
#coding=utf8 |
||||
|
import sys |
||||
|
import requests |
||||
|
import json |
||||
|
import time |
||||
|
|
||||
|
# #url = 'http://0.0.0.0:5033' |
||||
|
# """ |
||||
|
# url = 'http://20.0.2.6:5055/classify_event' |
||||
|
# url = 'http://20.0.2.6:5055/is_about_china' |
||||
|
# url = 'http://20.0.2.6:5055/associated_words' |
||||
|
# """ |
||||
|
# url = 'http://127.0.0.1:9008/paper' |
||||
|
# |
||||
|
# # url_file ="http://172.18.1.130:9985/group33/default/20230415/09/15/1/“GF-1”影像质量评价及矿区土地利用分类潜力研究_陈明.docx" |
||||
|
# url_file="/opt/Project_kongtianyuan/inputfile/" |
||||
|
# filename = "“GF-1”影像质量评价及矿区土地利用分类潜力研究" |
||||
|
# |
||||
|
# data = {"url":url_file,"filename":filename} |
||||
|
# data_str = json.dumps(data) |
||||
|
# |
||||
|
# r = requests.post(url,data=str(data_str)) |
||||
|
# print(r.text) |
||||
|
# # res =json.loads(r.text) |
||||
|
# # print(res) |
||||
|
from datetime import datetime |
||||
|
import os |
||||
|
path = datetime.now().strftime('%Y-%m-%d') |
||||
|
if not os.path.exists(path): |
||||
|
os.makedirs(path) |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
@ -0,0 +1,140 @@ |
|||||
|
#coding:utf8 |
||||
|
import os, sys |
||||
|
import io |
||||
|
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8') |
||||
|
cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd() |
||||
|
par_dir = os.path.abspath(os.path.join(cur_dir, os.path.pardir)) |
||||
|
sys.path.append(cur_dir) |
||||
|
sys.path.append(par_dir) |
||||
|
import json |
||||
|
from django.http import HttpResponse |
||||
|
from text_analysis.tools import to_kafka |
||||
|
from django.views.decorators.csrf import csrf_exempt |
||||
|
from log_util.set_logger import set_logger |
||||
|
logging=set_logger('logs/results.log') |
||||
|
import traceback |
||||
|
import queue |
||||
|
import requests |
||||
|
from text_analysis.tools.tool import get_data |
||||
|
import time |
||||
|
from datetime import datetime |
||||
|
import os |
||||
|
#任务队列 |
||||
|
global task_queue |
||||
|
task_queue = queue.Queue() |
||||
|
#数据队列 |
||||
|
global data_queue |
||||
|
data_queue = queue.Queue() |
||||
|
|
||||
|
@csrf_exempt |
||||
|
def ASR(request): |
||||
|
if request.method == 'POST': |
||||
|
try: |
||||
|
raw_data = json.loads(request.body) |
||||
|
task_queue.put(raw_data) |
||||
|
return HttpResponse(json.dumps({"code": 1, "msg": "请求正常!"}, ensure_ascii=False)) |
||||
|
except: |
||||
|
logging.error(traceback.format_exc()) |
||||
|
return HttpResponse(json.dumps({"code": 0, "msg": "请求json格式不正确!"}, ensure_ascii=False)) |
||||
|
else: |
||||
|
return HttpResponse(json.dumps({"code": 0, "msg": "请求方式错误,改为post请求"}, ensure_ascii=False)) |
||||
|
|
||||
|
def upload(): |
||||
|
while True: |
||||
|
try: |
||||
|
if task_queue.qsize() >0: |
||||
|
logging.info("取任务队列长度{}".format(task_queue.qsize())) |
||||
|
raw_data = task_queue.get() |
||||
|
index=raw_data["metadata"]["index"] |
||||
|
datasource=raw_data["metadata"]["admin"]["datasource"] |
||||
|
if datasource not in raw_data["data"].keys(): |
||||
|
logging.info("找不到相关数据源!—{}".format(raw_data)) |
||||
|
continue |
||||
|
allFile=raw_data["data"][datasource] |
||||
|
currentFile=eval(allFile)[index] |
||||
|
file=currentFile["fileUrl"] |
||||
|
fileName=currentFile["fileName"] |
||||
|
#从gofast获取视频 |
||||
|
myfile = requests.get(file) |
||||
|
starttime = datetime.now().strftime('%Y-%m-%d') |
||||
|
path='inputdata/'+starttime |
||||
|
if not os.path.exists(path): |
||||
|
os.makedirs(path) |
||||
|
with open(path+'/'+fileName, 'wb') as f: |
||||
|
f.write(myfile.content) |
||||
|
logging.info("视频从gofast下载完毕,开始上传{}".format(fileName)) |
||||
|
#访问视频上传接口 |
||||
|
url = "https://realtime.pdeepmatrix.com/apis/media/analysis/upload" |
||||
|
data = {'fromLanguage': 'zh'} |
||||
|
files = {'file': open(path+'/'+fileName, 'rb')} |
||||
|
response = requests.post(url, data=data, files=files) |
||||
|
d = json.loads(response.text) |
||||
|
if "code" in d.keys() and d["code"]==200: |
||||
|
#接口返回值data中存放视频获取结果的key |
||||
|
result = d["data"] |
||||
|
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": "","dataKey":result} |
||||
|
data_queue.put(raw_data) |
||||
|
logging.info("视频上传成功{}".format(raw_data)) |
||||
|
# to_kafka.send_kafka(raw_data,logging) |
||||
|
else: |
||||
|
logging.info("视频上传失败,接口返回值{}".format(d)) |
||||
|
else: |
||||
|
#暂无任务,进入休眠 |
||||
|
time.sleep(10) |
||||
|
except: |
||||
|
logging.error(traceback.format_exc()) |
||||
|
|
||||
|
|
||||
|
def getResult(): |
||||
|
while True: |
||||
|
#3秒钟结果获取一次 |
||||
|
time.sleep(3) |
||||
|
try: |
||||
|
if data_queue.qsize() >0: |
||||
|
logging.info("取数据队列长度{}".format(data_queue.qsize())) |
||||
|
raw_data = data_queue.get() |
||||
|
print(raw_data) |
||||
|
#根据视频key访问获取结果接口 |
||||
|
dataKey=raw_data["result"]["dataKey"] |
||||
|
url = "https://realtime.pdeepmatrix.com/apis/media/analysis/getResult" |
||||
|
params = {'taskId': dataKey} |
||||
|
response = requests.get(url, params=params) |
||||
|
# print(response.text) |
||||
|
d = json.loads(response.text) |
||||
|
if "code" in d.keys() and d["code"]==200: |
||||
|
results="" |
||||
|
if d["data"]["code"]=="1": |
||||
|
for sentence in d["data"]["sentences"]: |
||||
|
results+=sentence["text"] |
||||
|
raw_data["result"]["results"] =results |
||||
|
logging.info("视频解析获取结果成功{}".format(raw_data)) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
elif d["data"]["code"]=="0": |
||||
|
#正在解析中,将任务再次放回数据队列 |
||||
|
data_queue.put(raw_data) |
||||
|
logging.info("视频未解析完毕,放回队列等待{}-{}".format(raw_data,d)) |
||||
|
else: |
||||
|
#解析失败 |
||||
|
raw_data["result"]["successCode"] = "0" |
||||
|
raw_data["result"]["errorLog"] = response.text |
||||
|
logging.info("视频解析获取结果失败,数据{},接口返回值{}".format(raw_data, d)) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
else: |
||||
|
raw_data["result"]["successCode"] = "0" |
||||
|
raw_data["result"]["errorLog"]=response.text |
||||
|
logging.info("视频解析获取结果失败,数据{},接口返回值{}".format(raw_data,d)) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
|
||||
|
else: |
||||
|
#暂无任务,进入休眠 |
||||
|
time.sleep(10) |
||||
|
except: |
||||
|
raw_data["result"]["successCode"]="0" |
||||
|
raw_data["result"]["errorLog"]=traceback.format_exc() |
||||
|
logging.error(traceback.format_exc()) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
@ -0,0 +1,151 @@ |
|||||
|
#coding:utf8 |
||||
|
import os, sys |
||||
|
import io |
||||
|
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8') |
||||
|
cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd() |
||||
|
par_dir = os.path.abspath(os.path.join(cur_dir, os.path.pardir)) |
||||
|
sys.path.append(cur_dir) |
||||
|
sys.path.append(par_dir) |
||||
|
import json |
||||
|
from django.http import HttpResponse |
||||
|
from text_analysis.tools import to_kafka |
||||
|
from django.views.decorators.csrf import csrf_exempt |
||||
|
from log_util.set_logger import set_logger |
||||
|
logging=set_logger('logs/results.log') |
||||
|
import traceback |
||||
|
import queue |
||||
|
import requests |
||||
|
from text_analysis.tools.tool import get_data |
||||
|
import time |
||||
|
from datetime import datetime |
||||
|
import os |
||||
|
#任务队列 |
||||
|
global task_queue |
||||
|
task_queue = queue.Queue() |
||||
|
#数据队列 |
||||
|
global data_queue |
||||
|
data_queue = queue.Queue() |
||||
|
|
||||
|
@csrf_exempt |
||||
|
def ASR(request): |
||||
|
if request.method == 'POST': |
||||
|
try: |
||||
|
raw_data = json.loads(request.body) |
||||
|
task_queue.put(raw_data) |
||||
|
return HttpResponse(json.dumps({"code": 1, "msg": "请求正常!"}, ensure_ascii=False)) |
||||
|
except: |
||||
|
logging.error(traceback.format_exc()) |
||||
|
return HttpResponse(json.dumps({"code": 0, "msg": "请求json格式不正确!"}, ensure_ascii=False)) |
||||
|
else: |
||||
|
return HttpResponse(json.dumps({"code": 0, "msg": "请求方式错误,改为post请求"}, ensure_ascii=False)) |
||||
|
|
||||
|
def upload(): |
||||
|
while True: |
||||
|
try: |
||||
|
if task_queue.qsize() >0: |
||||
|
logging.info("取任务队列长度{}".format(task_queue.qsize())) |
||||
|
raw_data = task_queue.get() |
||||
|
index=raw_data["metadata"]["index"] |
||||
|
datasource=raw_data["metadata"]["admin"]["datasource"] |
||||
|
if datasource not in raw_data["data"].keys(): |
||||
|
logging.info("找不到相关数据源!—{}".format(raw_data)) |
||||
|
continue |
||||
|
allFile=raw_data["data"][datasource] |
||||
|
currentFile=eval(allFile)[index] |
||||
|
currentFile["content"]="" |
||||
|
file=currentFile["fileUrl"] |
||||
|
if "http" not in file: |
||||
|
file="https://caiji.percent.cn/"+file.lstrip("/") |
||||
|
fileName=currentFile["fileName"] |
||||
|
language=raw_data["metadata"]["admin"]["fromLanguage"] |
||||
|
#从gofast获取视频 |
||||
|
myfile = requests.get(file) |
||||
|
starttime = datetime.now().strftime('%Y-%m-%d') |
||||
|
path='inputdata/'+starttime |
||||
|
if not os.path.exists(path): |
||||
|
os.makedirs(path) |
||||
|
with open(path+'/'+fileName, 'wb') as f: |
||||
|
f.write(myfile.content) |
||||
|
logging.info("视频从gofast下载完毕,开始上传-{}".format(fileName)) |
||||
|
#访问视频上传接口 |
||||
|
url = "https://realtime.pdeepmatrix.com/apis/media/analysis/upload" |
||||
|
data = {'fromLanguage': language} |
||||
|
f=open(path+'/'+fileName, 'rb') |
||||
|
files = {'file': f} |
||||
|
response = requests.post(url, data=data, files=files) |
||||
|
d = json.loads(response.text) |
||||
|
if "code" in d.keys() and d["code"]==200: |
||||
|
#接口返回值data中存放视频获取结果的key |
||||
|
result = d["data"] |
||||
|
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": currentFile,"dataKey":result} |
||||
|
data_queue.put(raw_data) |
||||
|
logging.info("视频上传成功{}".format(raw_data)) |
||||
|
# to_kafka.send_kafka(raw_data,logging) |
||||
|
else: |
||||
|
logging.info("视频上传失败{}-{}".format(raw_data,d)) |
||||
|
f.close() |
||||
|
#Todo删除视频文件 |
||||
|
else: |
||||
|
#暂无任务,进入休眠 |
||||
|
time.sleep(10) |
||||
|
except: |
||||
|
logging.error(traceback.format_exc()) |
||||
|
|
||||
|
|
||||
|
def getResult(): |
||||
|
while True: |
||||
|
#3秒钟结果获取一次 |
||||
|
time.sleep(3) |
||||
|
try: |
||||
|
if data_queue.qsize() >0: |
||||
|
logging.info("取数据队列长度{}".format(data_queue.qsize())) |
||||
|
raw_data = data_queue.get() |
||||
|
# print(raw_data) |
||||
|
#根据视频key访问获取结果接口 |
||||
|
dataKey=raw_data["result"]["dataKey"] |
||||
|
url = "https://realtime.pdeepmatrix.com/apis/media/analysis/getResult" |
||||
|
params = {'taskId': dataKey} |
||||
|
response = requests.get(url, params=params) |
||||
|
# print(response.text) |
||||
|
d = json.loads(response.text) |
||||
|
if "code" in d.keys() and d["code"]==200: |
||||
|
results="" |
||||
|
if d["data"]["code"]=="1": |
||||
|
for sentence in d["data"]["sentences"]: |
||||
|
results+=sentence["text"] |
||||
|
raw_data["result"]["results"]["content"] =results |
||||
|
raw_data["result"]["results"]=json.dumps(raw_data["result"]["results"],ensure_ascii=False) |
||||
|
logging.info("视频解析获取结果成功{}".format(raw_data)) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
elif d["data"]["code"]=="0": |
||||
|
#正在解析中,将任务再次放回数据队列 |
||||
|
data_queue.put(raw_data) |
||||
|
logging.info("视频未解析完毕,放回队列等待{}-{}".format(raw_data,d)) |
||||
|
else: |
||||
|
#解析失败 |
||||
|
raw_data["result"]["successCode"] = "0" |
||||
|
raw_data["result"]["errorLog"] = response.text |
||||
|
raw_data["result"]["results"]=json.dumps(raw_data["result"]["results"],ensure_ascii=False) |
||||
|
logging.info("视频解析获取结果失败,数据{},接口返回值{}".format(raw_data, d)) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
else: |
||||
|
raw_data["result"]["successCode"] = "0" |
||||
|
raw_data["result"]["errorLog"]=response.text |
||||
|
raw_data["result"]["results"] = json.dumps(raw_data["result"]["results"], ensure_ascii=False) |
||||
|
logging.info("视频解析获取结果失败,数据{},接口返回值{}".format(raw_data,d)) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
|
||||
|
else: |
||||
|
#暂无任务,进入休眠 |
||||
|
time.sleep(10) |
||||
|
except: |
||||
|
raw_data["result"]["successCode"]="0" |
||||
|
raw_data["result"]["errorLog"]=traceback.format_exc() |
||||
|
raw_data["result"]["results"]="" |
||||
|
logging.error(traceback.format_exc()) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
@ -0,0 +1,184 @@ |
|||||
|
# coding:utf8 |
||||
|
import os, sys |
||||
|
import io |
||||
|
from jsonpath_ng import jsonpath, parse |
||||
|
|
||||
|
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8') |
||||
|
cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd() |
||||
|
par_dir = os.path.abspath(os.path.join(cur_dir, os.path.pardir)) |
||||
|
sys.path.append(cur_dir) |
||||
|
sys.path.append(par_dir) |
||||
|
import json |
||||
|
from django.http import HttpResponse |
||||
|
from text_analysis.tools import to_kafka |
||||
|
from django.views.decorators.csrf import csrf_exempt |
||||
|
from log_util.set_logger import set_logger |
||||
|
|
||||
|
logging = set_logger('logs/results.log') |
||||
|
import traceback |
||||
|
import queue |
||||
|
import requests |
||||
|
from text_analysis.tools.tool import get_data |
||||
|
import time |
||||
|
from datetime import datetime |
||||
|
import os |
||||
|
|
||||
|
# 任务队列 |
||||
|
global task_queue |
||||
|
task_queue = queue.Queue() |
||||
|
# 数据队列 |
||||
|
global data_queue |
||||
|
data_queue = queue.Queue() |
||||
|
|
||||
|
|
||||
|
@csrf_exempt |
||||
|
def ASR(request): |
||||
|
if request.method == 'POST': |
||||
|
try: |
||||
|
raw_data = json.loads(request.body) |
||||
|
task_queue.put(raw_data) |
||||
|
return HttpResponse(json.dumps({"code": 1, "msg": "请求正常!"}, ensure_ascii=False)) |
||||
|
except: |
||||
|
logging.error(traceback.format_exc()) |
||||
|
return HttpResponse(json.dumps({"code": 0, "msg": "请求json格式不正确!"}, ensure_ascii=False)) |
||||
|
else: |
||||
|
return HttpResponse(json.dumps({"code": 0, "msg": "请求方式错误,改为post请求"}, ensure_ascii=False)) |
||||
|
|
||||
|
|
||||
|
def upload(): |
||||
|
while True: |
||||
|
try: |
||||
|
if task_queue.qsize() > 0: |
||||
|
logging.info("取任务队列长度{}".format(task_queue.qsize())) |
||||
|
raw_data = task_queue.get() |
||||
|
# index = raw_data["metadata"]["index"] |
||||
|
# datasource = raw_data["metadata"]["admin"]["datasource"] |
||||
|
# if datasource not in raw_data["data"].keys(): |
||||
|
# logging.info("找不到相关数据源!—{}".format(raw_data)) |
||||
|
# continue |
||||
|
# allFile = raw_data["data"][datasource] |
||||
|
# currentFile = eval(allFile)[index] |
||||
|
url=raw_data["metadata"]["admin"]["fileUrl"] |
||||
|
if '$.' in url: |
||||
|
# json.path表达式动态获取value |
||||
|
datasources = str(url).split(':') |
||||
|
# 0是数据源,1是JsonPath 表达式 |
||||
|
datasourcestr = raw_data["data"][datasources[0]] |
||||
|
# print(datasourcestr) |
||||
|
datasource = json.loads(datasourcestr) |
||||
|
# 创建 JsonPath 表达式对象 |
||||
|
expr = parse(datasources[1]) |
||||
|
# 使用表达式来选择 JSON 元素 |
||||
|
match = [match.value for match in expr.find(datasource)] |
||||
|
video_url = match[0] |
||||
|
fileName=video_url.rsplit('/')[-1] |
||||
|
if "http" not in video_url: |
||||
|
file = "https://caiji.percent.cn/" + video_url.lstrip("/") |
||||
|
# print(file) |
||||
|
# name=raw_data["metadata"]["admin"]["fileName"] |
||||
|
# if '$.' in name: |
||||
|
# # json.path表达式动态获取value |
||||
|
# datasources = str(name).split(':') |
||||
|
# # 0是数据源,1是JsonPath 表达式 |
||||
|
# datasourcestr = raw_data["data"][datasources[0]] |
||||
|
# datasource = json.loads(datasourcestr) |
||||
|
# # 创建 JsonPath 表达式对象 |
||||
|
# expr = parse(datasources[1]) |
||||
|
# # 使用表达式来选择 JSON 元素 |
||||
|
# match = [match.value for match in expr.find(datasource)] |
||||
|
# fileName = match[0] |
||||
|
|
||||
|
currentFile={"content":"","fileName":fileName,"fileUrl":file} |
||||
|
language = raw_data["metadata"]["admin"]["fromLanguage"] |
||||
|
|
||||
|
# 从gofast获取视频 |
||||
|
myfile = requests.get(file) |
||||
|
starttime = datetime.now().strftime('%Y-%m-%d') |
||||
|
path = 'inputdata/' + starttime |
||||
|
if not os.path.exists(path): |
||||
|
os.makedirs(path) |
||||
|
with open(path + '/' + fileName, 'wb') as f: |
||||
|
f.write(myfile.content) |
||||
|
logging.info("视频从gofast下载完毕,开始上传-{}".format(fileName)) |
||||
|
# 访问视频上传接口 |
||||
|
url = "https://realtime.pdeepmatrix.com/apis/media/analysis/upload" |
||||
|
data = {'fromLanguage': language} |
||||
|
f = open(path + '/' + fileName, 'rb') |
||||
|
files = {'file': f} |
||||
|
response = requests.post(url, data=data, files=files) |
||||
|
d = json.loads(response.text) |
||||
|
if "code" in d.keys() and d["code"] == 200: |
||||
|
# 接口返回值data中存放视频获取结果的key |
||||
|
result = d["data"] |
||||
|
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": currentFile, "dataKey": result} |
||||
|
data_queue.put(raw_data) |
||||
|
logging.info("视频上传成功{}".format(raw_data)) |
||||
|
# to_kafka.send_kafka(raw_data,logging) |
||||
|
else: |
||||
|
logging.info("视频上传失败{}-{}".format(raw_data, d)) |
||||
|
f.close() |
||||
|
# Todo删除视频文件 |
||||
|
else: |
||||
|
# 暂无任务,进入休眠 |
||||
|
time.sleep(10) |
||||
|
except: |
||||
|
logging.error(traceback.format_exc()) |
||||
|
|
||||
|
|
||||
|
def getResult(): |
||||
|
while True: |
||||
|
# 3秒钟结果获取一次 |
||||
|
time.sleep(3) |
||||
|
try: |
||||
|
if data_queue.qsize() > 0: |
||||
|
logging.info("取数据队列长度{}".format(data_queue.qsize())) |
||||
|
raw_data = data_queue.get() |
||||
|
# print(raw_data) |
||||
|
# 根据视频key访问获取结果接口 |
||||
|
dataKey = raw_data["result"]["dataKey"] |
||||
|
url = "https://realtime.pdeepmatrix.com/apis/media/analysis/getResult" |
||||
|
params = {'taskId': dataKey} |
||||
|
response = requests.get(url, params=params) |
||||
|
# print(response.text) |
||||
|
d = json.loads(response.text) |
||||
|
if "code" in d.keys() and d["code"] == 200: |
||||
|
results = "" |
||||
|
if d["data"]["code"] == "1": |
||||
|
for sentence in d["data"]["sentences"]: |
||||
|
results += sentence["text"] |
||||
|
raw_data["result"]["results"]["content"] = results |
||||
|
raw_data["result"]["results"] = json.dumps(raw_data["result"]["results"], ensure_ascii=False) |
||||
|
logging.info("视频解析获取结果成功{}".format(raw_data)) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
elif d["data"]["code"] == "0": |
||||
|
# 正在解析中,将任务再次放回数据队列 |
||||
|
data_queue.put(raw_data) |
||||
|
logging.info("视频未解析完毕,放回队列等待{}-{}".format(raw_data, d)) |
||||
|
else: |
||||
|
# 解析失败 |
||||
|
raw_data["result"]["successCode"] = "0" |
||||
|
raw_data["result"]["errorLog"] = response.text |
||||
|
raw_data["result"]["results"] = json.dumps(raw_data["result"]["results"], ensure_ascii=False) |
||||
|
logging.info("视频解析获取结果失败,数据{},接口返回值{}".format(raw_data, d)) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
else: |
||||
|
raw_data["result"]["successCode"] = "0" |
||||
|
raw_data["result"]["errorLog"] = response.text |
||||
|
raw_data["result"]["results"] = json.dumps(raw_data["result"]["results"], ensure_ascii=False) |
||||
|
logging.info("视频解析获取结果失败,数据{},接口返回值{}".format(raw_data, d)) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
|
||||
|
else: |
||||
|
# 暂无任务,进入休眠 |
||||
|
time.sleep(10) |
||||
|
except: |
||||
|
raw_data["result"]["successCode"] = "0" |
||||
|
raw_data["result"]["errorLog"] = traceback.format_exc() |
||||
|
raw_data["result"]["results"] = "" |
||||
|
logging.error(traceback.format_exc()) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
@ -0,0 +1,189 @@ |
|||||
|
# coding:utf8 |
||||
|
import os, sys |
||||
|
import io |
||||
|
from jsonpath_ng import jsonpath, parse |
||||
|
import uuid |
||||
|
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8') |
||||
|
cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd() |
||||
|
par_dir = os.path.abspath(os.path.join(cur_dir, os.path.pardir)) |
||||
|
sys.path.append(cur_dir) |
||||
|
sys.path.append(par_dir) |
||||
|
import json |
||||
|
from django.http import HttpResponse |
||||
|
from text_analysis.tools import to_kafka |
||||
|
from django.views.decorators.csrf import csrf_exempt |
||||
|
from log_util.set_logger import set_logger |
||||
|
|
||||
|
logging = set_logger('logs/results.log') |
||||
|
import traceback |
||||
|
import queue |
||||
|
import requests |
||||
|
from text_analysis.tools.tool import get_data |
||||
|
import time |
||||
|
from datetime import datetime |
||||
|
import os |
||||
|
|
||||
|
# 任务队列 |
||||
|
global task_queue |
||||
|
task_queue = queue.Queue() |
||||
|
# 数据队列 |
||||
|
global data_queue |
||||
|
data_queue = queue.Queue() |
||||
|
|
||||
|
|
||||
|
@csrf_exempt |
||||
|
def ASR(request): |
||||
|
if request.method == 'POST': |
||||
|
try: |
||||
|
raw_data = json.loads(request.body) |
||||
|
task_queue.put(raw_data) |
||||
|
return HttpResponse(json.dumps({"code": 1, "msg": "请求正常!"}, ensure_ascii=False)) |
||||
|
except: |
||||
|
logging.error(traceback.format_exc()) |
||||
|
return HttpResponse(json.dumps({"code": 0, "msg": "请求json格式不正确!"}, ensure_ascii=False)) |
||||
|
else: |
||||
|
return HttpResponse(json.dumps({"code": 0, "msg": "请求方式错误,改为post请求"}, ensure_ascii=False)) |
||||
|
|
||||
|
|
||||
|
def upload(): |
||||
|
while True: |
||||
|
try: |
||||
|
if task_queue.qsize() > 0: |
||||
|
logging.info("取任务队列长度{}".format(task_queue.qsize())) |
||||
|
raw_data = task_queue.get() |
||||
|
# index = raw_data["metadata"]["index"] |
||||
|
# datasource = raw_data["metadata"]["admin"]["datasource"] |
||||
|
# if datasource not in raw_data["data"].keys(): |
||||
|
# logging.info("找不到相关数据源!—{}".format(raw_data)) |
||||
|
# continue |
||||
|
# allFile = raw_data["data"][datasource] |
||||
|
# currentFile = eval(allFile)[index] |
||||
|
url=raw_data["input"]["fileUrl"] |
||||
|
if '$.' in url: |
||||
|
# json.path表达式动态获取value |
||||
|
datasources = str(url).split(':') |
||||
|
# 0是数据源,1是JsonPath 表达式 |
||||
|
datasourcestr = raw_data["data"][datasources[0]] |
||||
|
# print(datasourcestr) |
||||
|
datasource = json.loads(datasourcestr) |
||||
|
# 创建 JsonPath 表达式对象 |
||||
|
expr = parse(datasources[1]) |
||||
|
# 使用表达式来选择 JSON 元素 |
||||
|
match = [match.value for match in expr.find(datasource)] |
||||
|
video_url = match[0] |
||||
|
fileName=video_url.rsplit('/')[-1] |
||||
|
if "http" not in video_url: |
||||
|
file = "https://caiji.percent.cn/" + video_url.lstrip("/") |
||||
|
# print(file) |
||||
|
# name=raw_data["metadata"]["admin"]["fileName"] |
||||
|
# if '$.' in name: |
||||
|
# # json.path表达式动态获取value |
||||
|
# datasources = str(name).split(':') |
||||
|
# # 0是数据源,1是JsonPath 表达式 |
||||
|
# datasourcestr = raw_data["data"][datasources[0]] |
||||
|
# datasource = json.loads(datasourcestr) |
||||
|
# # 创建 JsonPath 表达式对象 |
||||
|
# expr = parse(datasources[1]) |
||||
|
# # 使用表达式来选择 JSON 元素 |
||||
|
# match = [match.value for match in expr.find(datasource)] |
||||
|
# fileName = match[0] |
||||
|
|
||||
|
currentFile={"fileName":fileName,"fileUrl":file} |
||||
|
language = raw_data["input"]["fromLanguage"] |
||||
|
|
||||
|
# 从gofast获取视频 |
||||
|
myfile = requests.get(file) |
||||
|
starttime = datetime.now().strftime('%Y-%m-%d') |
||||
|
path = 'inputdata/' + starttime |
||||
|
if not os.path.exists(path): |
||||
|
os.makedirs(path) |
||||
|
with open(path + '/' + fileName, 'wb') as f: |
||||
|
f.write(myfile.content) |
||||
|
logging.info("视频从gofast下载完毕,开始上传-{}".format(fileName)) |
||||
|
# 访问视频上传接口 |
||||
|
url = "https://realtime.pdeepmatrix.com/apis/media/analysis/upload" |
||||
|
data = {'fromLanguage': language} |
||||
|
f = open(path + '/' + fileName, 'rb') |
||||
|
files = {'file': f} |
||||
|
response = requests.post(url, data=data, files=files) |
||||
|
d = json.loads(response.text) |
||||
|
if "code" in d.keys() and d["code"] == 200: |
||||
|
# 接口返回值data中存放视频获取结果的key |
||||
|
result = d["data"] |
||||
|
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": "", "dataKey": result,"file":currentFile} |
||||
|
data_queue.put(raw_data) |
||||
|
logging.info("视频上传成功{}".format(raw_data)) |
||||
|
# to_kafka.send_kafka(raw_data,logging) |
||||
|
else: |
||||
|
logging.info("视频上传失败{}-{}".format(raw_data, d)) |
||||
|
f.close() |
||||
|
# Todo删除视频文件 |
||||
|
else: |
||||
|
# 暂无任务,进入休眠 |
||||
|
time.sleep(10) |
||||
|
except: |
||||
|
logging.error(traceback.format_exc()) |
||||
|
|
||||
|
|
||||
|
def getResult(): |
||||
|
while True: |
||||
|
# 3秒钟结果获取一次 |
||||
|
time.sleep(3) |
||||
|
try: |
||||
|
if data_queue.qsize() > 0: |
||||
|
logging.info("取数据队列长度{}".format(data_queue.qsize())) |
||||
|
raw_data = data_queue.get() |
||||
|
# print(raw_data) |
||||
|
output=raw_data["output"] |
||||
|
res_tmp={key: "" for key in output} |
||||
|
if "id" in res_tmp.keys(): |
||||
|
res_tmp["id"]=str(uuid.uuid4()) |
||||
|
# 根据视频key访问获取结果接口 |
||||
|
dataKey = raw_data["result"]["dataKey"] |
||||
|
url = "https://realtime.pdeepmatrix.com/apis/media/analysis/getResult" |
||||
|
params = {'taskId': dataKey} |
||||
|
response = requests.get(url, params=params) |
||||
|
# print(response.text) |
||||
|
d = json.loads(response.text) |
||||
|
if "code" in d.keys() and d["code"] == 200: |
||||
|
results = "" |
||||
|
if d["data"]["code"] == "1": |
||||
|
for sentence in d["data"]["sentences"]: |
||||
|
results += sentence["text"] |
||||
|
if "content" in res_tmp.keys(): |
||||
|
res_tmp["content"]=results |
||||
|
raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False) |
||||
|
logging.info("视频解析获取结果成功{}".format(raw_data)) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
elif d["data"]["code"] == "0": |
||||
|
# 正在解析中,将任务再次放回数据队列 |
||||
|
data_queue.put(raw_data) |
||||
|
logging.info("视频未解析完毕,放回队列等待{}-{}".format(raw_data, d)) |
||||
|
else: |
||||
|
# 解析失败 |
||||
|
raw_data["result"]["successCode"] = "0" |
||||
|
raw_data["result"]["errorLog"] = response.text |
||||
|
raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False) |
||||
|
logging.info("视频解析获取结果失败,数据{},接口返回值{}".format(raw_data, d)) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
else: |
||||
|
raw_data["result"]["successCode"] = "0" |
||||
|
raw_data["result"]["errorLog"] = response.text |
||||
|
raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False) |
||||
|
logging.info("视频解析获取结果失败,数据{},接口返回值{}".format(raw_data, d)) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
|
||||
|
else: |
||||
|
# 暂无任务,进入休眠 |
||||
|
time.sleep(10) |
||||
|
except: |
||||
|
raw_data["result"]["successCode"] = "0" |
||||
|
raw_data["result"]["errorLog"] = traceback.format_exc() |
||||
|
raw_data["result"]["results"] = "" |
||||
|
logging.error(traceback.format_exc()) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
@ -0,0 +1,187 @@ |
|||||
|
# coding:utf8 |
||||
|
import os, sys |
||||
|
import io |
||||
|
from jsonpath_ng import jsonpath, parse |
||||
|
import uuid |
||||
|
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8') |
||||
|
cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd() |
||||
|
par_dir = os.path.abspath(os.path.join(cur_dir, os.path.pardir)) |
||||
|
sys.path.append(cur_dir) |
||||
|
sys.path.append(par_dir) |
||||
|
import json |
||||
|
from django.http import HttpResponse |
||||
|
from text_analysis.tools import to_kafka |
||||
|
from django.views.decorators.csrf import csrf_exempt |
||||
|
from log_util.set_logger import set_logger |
||||
|
|
||||
|
logging = set_logger('logs/results.log') |
||||
|
import traceback |
||||
|
import queue |
||||
|
import requests |
||||
|
from text_analysis.tools.tool import parse_data |
||||
|
import time |
||||
|
from datetime import datetime |
||||
|
import os |
||||
|
|
||||
|
# 任务队列 |
||||
|
global task_queue |
||||
|
task_queue = queue.Queue() |
||||
|
# 数据队列 |
||||
|
global data_queue |
||||
|
data_queue = queue.Queue() |
||||
|
|
||||
|
|
||||
|
@csrf_exempt |
||||
|
def ASRNew(request): |
||||
|
if request.method == 'POST': |
||||
|
try: |
||||
|
raw_data = json.loads(request.body) |
||||
|
task_queue.put(raw_data) |
||||
|
return HttpResponse(json.dumps({"code": 1, "msg": "请求正常!"}, ensure_ascii=False)) |
||||
|
except: |
||||
|
logging.error(traceback.format_exc()) |
||||
|
return HttpResponse(json.dumps({"code": 0, "msg": "请求json格式不正确!"}, ensure_ascii=False)) |
||||
|
else: |
||||
|
return HttpResponse(json.dumps({"code": 0, "msg": "请求方式错误,改为post请求"}, ensure_ascii=False)) |
||||
|
|
||||
|
|
||||
|
def upload(): |
||||
|
while True: |
||||
|
try: |
||||
|
if task_queue.qsize() > 0: |
||||
|
logging.info("取任务队列长度{}".format(task_queue.qsize())) |
||||
|
raw_data = task_queue.get() |
||||
|
# index = raw_data["metadata"]["index"] |
||||
|
# datasource = raw_data["metadata"]["admin"]["datasource"] |
||||
|
# if datasource not in raw_data["data"].keys(): |
||||
|
# logging.info("找不到相关数据源!—{}".format(raw_data)) |
||||
|
# continue |
||||
|
# allFile = raw_data["data"][datasource] |
||||
|
# currentFile = eval(allFile)[index] |
||||
|
url=raw_data["input"]["fileUrl"] |
||||
|
if "json" in url: |
||||
|
parm = url.split("#") |
||||
|
data1 = parse_data(raw_data, parm[0]) |
||||
|
data1_json = json.loads(data1) |
||||
|
expr = parse(parm[2]) |
||||
|
match = [match.value for match in expr.find(data1_json)] |
||||
|
video_url = match[0] |
||||
|
else: |
||||
|
video_url = parse_data(raw_data, url) |
||||
|
fileName=video_url.rsplit('/')[-1] |
||||
|
if "http" not in video_url: |
||||
|
file = "https://caiji.percent.cn/" + video_url.lstrip("/") |
||||
|
else: |
||||
|
file=video_url |
||||
|
# name=raw_data["metadata"]["admin"]["fileName"] |
||||
|
# if '$.' in name: |
||||
|
# # json.path表达式动态获取value |
||||
|
# datasources = str(name).split(':') |
||||
|
# # 0是数据源,1是JsonPath 表达式 |
||||
|
# datasourcestr = raw_data["data"][datasources[0]] |
||||
|
# datasource = json.loads(datasourcestr) |
||||
|
# # 创建 JsonPath 表达式对象 |
||||
|
# expr = parse(datasources[1]) |
||||
|
# # 使用表达式来选择 JSON 元素 |
||||
|
# match = [match.value for match in expr.find(datasource)] |
||||
|
# fileName = match[0] |
||||
|
|
||||
|
currentFile={"fileName":fileName,"fileUrl":file} |
||||
|
language = raw_data["input"]["fromLanguage"] |
||||
|
|
||||
|
# 从gofast获取视频 |
||||
|
myfile = requests.get(file) |
||||
|
starttime = datetime.now().strftime('%Y-%m-%d') |
||||
|
path = 'inputdata/' + starttime |
||||
|
if not os.path.exists(path): |
||||
|
os.makedirs(path) |
||||
|
with open(path + '/' + fileName, 'wb') as f: |
||||
|
f.write(myfile.content) |
||||
|
logging.info("视频从gofast下载完毕,开始上传-{}".format(fileName)) |
||||
|
# 访问视频上传接口 |
||||
|
url = "https://realtime.pdeepmatrix.com/apis/media/analysis/upload" |
||||
|
data = {'fromLanguage': language} |
||||
|
f = open(path + '/' + fileName, 'rb') |
||||
|
files = {'file': f} |
||||
|
response = requests.post(url, data=data, files=files) |
||||
|
d = json.loads(response.text) |
||||
|
if "code" in d.keys() and d["code"] == 200: |
||||
|
# 接口返回值data中存放视频获取结果的key |
||||
|
result = d["data"] |
||||
|
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": "", "dataKey": result,"file":currentFile} |
||||
|
data_queue.put(raw_data) |
||||
|
logging.info("视频上传成功{}".format(raw_data)) |
||||
|
# to_kafka.send_kafka(raw_data,logging) |
||||
|
else: |
||||
|
logging.info("视频上传失败{}-{}".format(raw_data, d)) |
||||
|
f.close() |
||||
|
# Todo删除视频文件 |
||||
|
else: |
||||
|
# 暂无任务,进入休眠 |
||||
|
time.sleep(10) |
||||
|
except: |
||||
|
logging.error(traceback.format_exc()) |
||||
|
|
||||
|
|
||||
|
def getResult(): |
||||
|
while True: |
||||
|
# 3秒钟结果获取一次 |
||||
|
time.sleep(3) |
||||
|
try: |
||||
|
if data_queue.qsize() > 0: |
||||
|
logging.info("取数据队列长度{}".format(data_queue.qsize())) |
||||
|
raw_data = data_queue.get() |
||||
|
# print(raw_data) |
||||
|
output=raw_data["output"] |
||||
|
res_tmp={key: "" for key in output} |
||||
|
if "id" in res_tmp.keys(): |
||||
|
res_tmp["id"]=str(uuid.uuid4()) |
||||
|
# 根据视频key访问获取结果接口 |
||||
|
dataKey = raw_data["result"]["dataKey"] |
||||
|
url = "https://realtime.pdeepmatrix.com/apis/media/analysis/getResult" |
||||
|
params = {'taskId': dataKey} |
||||
|
response = requests.get(url, params=params) |
||||
|
# print(response.text) |
||||
|
d = json.loads(response.text) |
||||
|
if "code" in d.keys() and d["code"] == 200: |
||||
|
results = "" |
||||
|
if d["data"]["code"] == "1": |
||||
|
for sentence in d["data"]["sentences"]: |
||||
|
results += sentence["text"] |
||||
|
if "content" in res_tmp.keys(): |
||||
|
res_tmp["content"]=results |
||||
|
raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False) |
||||
|
logging.info("视频解析获取结果成功{}".format(raw_data)) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
elif d["data"]["code"] == "0": |
||||
|
# 正在解析中,将任务再次放回数据队列 |
||||
|
data_queue.put(raw_data) |
||||
|
logging.info("视频未解析完毕,放回队列等待{}-{}".format(raw_data, d)) |
||||
|
else: |
||||
|
# 解析失败 |
||||
|
raw_data["result"]["successCode"] = "0" |
||||
|
raw_data["result"]["errorLog"] = response.text |
||||
|
raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False) |
||||
|
logging.info("视频解析获取结果失败,数据{},接口返回值{}".format(raw_data, d)) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
else: |
||||
|
raw_data["result"]["successCode"] = "0" |
||||
|
raw_data["result"]["errorLog"] = response.text |
||||
|
raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False) |
||||
|
logging.info("视频解析获取结果失败,数据{},接口返回值{}".format(raw_data, d)) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
|
||||
|
else: |
||||
|
# 暂无任务,进入休眠 |
||||
|
time.sleep(10) |
||||
|
except: |
||||
|
raw_data["result"]["successCode"] = "0" |
||||
|
raw_data["result"]["errorLog"] = traceback.format_exc() |
||||
|
raw_data["result"]["results"] = "" |
||||
|
logging.error(traceback.format_exc()) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
@ -0,0 +1,208 @@ |
|||||
|
# coding:utf8 |
||||
|
import os, sys |
||||
|
import io |
||||
|
from jsonpath_ng import jsonpath, parse |
||||
|
import uuid |
||||
|
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8') |
||||
|
cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd() |
||||
|
par_dir = os.path.abspath(os.path.join(cur_dir, os.path.pardir)) |
||||
|
sys.path.append(cur_dir) |
||||
|
sys.path.append(par_dir) |
||||
|
import json |
||||
|
from django.http import HttpResponse |
||||
|
from text_analysis.tools import to_kafka |
||||
|
from django.views.decorators.csrf import csrf_exempt |
||||
|
from log_util.set_logger import set_logger |
||||
|
|
||||
|
logging = set_logger('logs/results.log') |
||||
|
import traceback |
||||
|
import queue |
||||
|
import requests |
||||
|
from text_analysis.tools.tool import parse_data |
||||
|
import time |
||||
|
from datetime import datetime |
||||
|
import os |
||||
|
|
||||
|
# 任务队列 |
||||
|
global task_queue |
||||
|
task_queue = queue.Queue() |
||||
|
# 数据队列 |
||||
|
global data_queue |
||||
|
data_queue = queue.Queue() |
||||
|
|
||||
|
|
||||
|
@csrf_exempt |
||||
|
def ASRNew(request): |
||||
|
if request.method == 'POST': |
||||
|
try: |
||||
|
raw_data = json.loads(request.body) |
||||
|
task_queue.put(raw_data) |
||||
|
return HttpResponse(json.dumps({"code": 1, "msg": "请求正常!"}, ensure_ascii=False)) |
||||
|
except: |
||||
|
logging.error(traceback.format_exc()) |
||||
|
return HttpResponse(json.dumps({"code": 0, "msg": "请求json格式不正确!"}, ensure_ascii=False)) |
||||
|
else: |
||||
|
return HttpResponse(json.dumps({"code": 0, "msg": "请求方式错误,改为post请求"}, ensure_ascii=False)) |
||||
|
|
||||
|
|
||||
|
def upload(): |
||||
|
while True: |
||||
|
try: |
||||
|
if task_queue.qsize() > 0: |
||||
|
logging.info("取任务队列长度{}".format(task_queue.qsize())) |
||||
|
raw_data = task_queue.get() |
||||
|
output=raw_data["output"] |
||||
|
res_tmp={key: "" for key in output} |
||||
|
if "id" in res_tmp.keys(): |
||||
|
res_tmp["id"]=str(uuid.uuid4()) |
||||
|
# index = raw_data["metadata"]["index"] |
||||
|
# datasource = raw_data["metadata"]["admin"]["datasource"] |
||||
|
# if datasource not in raw_data["data"].keys(): |
||||
|
# logging.info("找不到相关数据源!—{}".format(raw_data)) |
||||
|
# continue |
||||
|
# allFile = raw_data["data"][datasource] |
||||
|
# currentFile = eval(allFile)[index] |
||||
|
logging.info("任务数据为:{}".format(raw_data)) |
||||
|
url=raw_data["input"]["fileUrl"] |
||||
|
if "json" in url: |
||||
|
parm = url.split("#") |
||||
|
data1 = parse_data(raw_data, parm[0]) |
||||
|
data1_json = json.loads(data1) |
||||
|
expr = parse(parm[2]) |
||||
|
match = [match.value for match in expr.find(data1_json)] |
||||
|
video_url = match[0] |
||||
|
else: |
||||
|
video_url = parse_data(raw_data, url) |
||||
|
fileName=video_url.rsplit('/')[-1] |
||||
|
if "http" not in video_url: |
||||
|
file = "https://caiji.percent.cn/" + video_url.lstrip("/") |
||||
|
else: |
||||
|
file=video_url |
||||
|
# name=raw_data["metadata"]["admin"]["fileName"] |
||||
|
# if '$.' in name: |
||||
|
# # json.path表达式动态获取value |
||||
|
# datasources = str(name).split(':') |
||||
|
# # 0是数据源,1是JsonPath 表达式 |
||||
|
# datasourcestr = raw_data["data"][datasources[0]] |
||||
|
# datasource = json.loads(datasourcestr) |
||||
|
# # 创建 JsonPath 表达式对象 |
||||
|
# expr = parse(datasources[1]) |
||||
|
# # 使用表达式来选择 JSON 元素 |
||||
|
# match = [match.value for match in expr.find(datasource)] |
||||
|
# fileName = match[0] |
||||
|
|
||||
|
currentFile={"fileName":fileName,"fileUrl":file} |
||||
|
language = raw_data["input"]["fromLanguage"] |
||||
|
|
||||
|
# 从gofast获取视频 |
||||
|
myfile = requests.get(file) |
||||
|
starttime = datetime.now().strftime('%Y-%m-%d') |
||||
|
path = 'inputdata/' + starttime |
||||
|
if not os.path.exists(path): |
||||
|
os.makedirs(path) |
||||
|
with open(path + '/' + fileName, 'wb') as f: |
||||
|
f.write(myfile.content) |
||||
|
logging.info("视频从gofast下载完毕,开始上传-{}".format(fileName)) |
||||
|
# 访问视频上传接口 |
||||
|
url = "https://realtime.pdeepmatrix.com/apis/media/analysis/upload" |
||||
|
data = {'fromLanguage': language} |
||||
|
f = open(path + '/' + fileName, 'rb') |
||||
|
files = {'file': f} |
||||
|
response = requests.post(url, data=data, files=files) |
||||
|
logging.info("上传后接口返回值:{}-{}".format(response,response.text)) |
||||
|
d = json.loads(response.text) |
||||
|
if "code" in d.keys() and d["code"] == 200: |
||||
|
# 接口返回值data中存放视频获取结果的key |
||||
|
result = d["data"] |
||||
|
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": "", "dataKey": result,"file":currentFile} |
||||
|
data_queue.put(raw_data) |
||||
|
logging.info("视频上传成功{}".format(raw_data)) |
||||
|
# to_kafka.send_kafka(raw_data,logging) |
||||
|
else: |
||||
|
logging.info("视频上传失败{}-{}".format(raw_data, d)) |
||||
|
f.close() |
||||
|
# Todo删除视频文件 |
||||
|
else: |
||||
|
# 暂无任务,进入休眠 |
||||
|
time.sleep(10) |
||||
|
except: |
||||
|
raw_data["result"]={} |
||||
|
raw_data["result"]["successCode"] = "0" |
||||
|
raw_data["result"]["errorLog"] = traceback.format_exc() |
||||
|
raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False) |
||||
|
logging.error(traceback.format_exc()) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
|
||||
|
|
||||
|
def getResult(): |
||||
|
while True: |
||||
|
# 3秒钟结果获取一次 |
||||
|
time.sleep(3) |
||||
|
try: |
||||
|
if data_queue.qsize() > 0: |
||||
|
logging.info("取数据队列长度{}".format(data_queue.qsize())) |
||||
|
raw_data = data_queue.get() |
||||
|
logging.info("任务数据为:{}".format(raw_data)) |
||||
|
# print(raw_data) |
||||
|
output=raw_data["output"] |
||||
|
res_tmp={key: "" for key in output} |
||||
|
if "id" in res_tmp.keys(): |
||||
|
res_tmp["id"]=str(uuid.uuid4()) |
||||
|
# 根据视频key访问获取结果接口 |
||||
|
dataKey = raw_data["result"]["dataKey"] |
||||
|
url = "https://realtime.pdeepmatrix.com/apis/media/analysis/getResult" |
||||
|
params = {'taskId': dataKey} |
||||
|
response = requests.get(url, params=params) |
||||
|
logging.info("ASR网站返回值:{}-{}".format(response,response.text)) |
||||
|
d = json.loads(response.text) |
||||
|
if "code" in d.keys() and d["code"] == 200: |
||||
|
results = "" |
||||
|
if d["data"]["code"] == "1" and d["data"]["sentences"]: |
||||
|
for sentence in d["data"]["sentences"]: |
||||
|
if results: |
||||
|
results += ' ' + sentence["text"] |
||||
|
else: |
||||
|
results = sentence["text"] |
||||
|
if "content" in res_tmp.keys(): |
||||
|
res_tmp["content"]=results |
||||
|
raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False) |
||||
|
logging.info("视频解析获取结果成功{}".format(raw_data)) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
elif d["data"]["code"] == "1" and not d["data"]["sentences"]: |
||||
|
results ="" |
||||
|
if "content" in res_tmp.keys(): |
||||
|
res_tmp["content"]=results |
||||
|
raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False) |
||||
|
logging.info("视频解析获取结果成功{}".format(raw_data)) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
elif d["data"]["code"] == "0": |
||||
|
# 正在解析中,将任务再次放回数据队列 |
||||
|
data_queue.put(raw_data) |
||||
|
logging.info("视频未解析完毕,放回队列等待{}-{}".format(raw_data, d)) |
||||
|
else: |
||||
|
# 解析失败 |
||||
|
raw_data["result"]["successCode"] = "0" |
||||
|
raw_data["result"]["errorLog"] = response.text |
||||
|
raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False) |
||||
|
logging.info("视频解析获取结果失败,数据{},接口返回值{}".format(raw_data, d)) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
else: |
||||
|
raw_data["result"]["successCode"] = "0" |
||||
|
raw_data["result"]["errorLog"] = response.text |
||||
|
raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False) |
||||
|
logging.info("视频解析获取结果失败,数据{},接口返回值{}".format(raw_data, d)) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
else: |
||||
|
# 暂无任务,进入休眠 |
||||
|
time.sleep(10) |
||||
|
except: |
||||
|
raw_data["result"]["successCode"] = "0" |
||||
|
raw_data["result"]["errorLog"] = traceback.format_exc() |
||||
|
raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False) |
||||
|
logging.error(traceback.format_exc()) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
@ -0,0 +1,219 @@ |
|||||
|
# coding:utf8 |
||||
|
import os, sys |
||||
|
import io |
||||
|
from jsonpath_ng import jsonpath, parse |
||||
|
import uuid |
||||
|
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8') |
||||
|
cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd() |
||||
|
par_dir = os.path.abspath(os.path.join(cur_dir, os.path.pardir)) |
||||
|
sys.path.append(cur_dir) |
||||
|
sys.path.append(par_dir) |
||||
|
import json |
||||
|
from django.http import HttpResponse |
||||
|
from text_analysis.tools import to_kafka |
||||
|
from django.views.decorators.csrf import csrf_exempt |
||||
|
from log_util.set_logger import set_logger |
||||
|
|
||||
|
logging = set_logger('logs/results.log') |
||||
|
import traceback |
||||
|
import queue |
||||
|
import requests |
||||
|
from text_analysis.tools.tool import parse_data |
||||
|
import time |
||||
|
from datetime import datetime |
||||
|
import os |
||||
|
|
||||
|
# 任务队列 |
||||
|
global task_queue |
||||
|
task_queue = queue.Queue() |
||||
|
# 数据队列 |
||||
|
global data_queue |
||||
|
data_queue = queue.Queue() |
||||
|
|
||||
|
|
||||
|
@csrf_exempt |
||||
|
def ASRNew(request): |
||||
|
if request.method == 'POST': |
||||
|
try: |
||||
|
raw_data = json.loads(request.body) |
||||
|
task_queue.put(raw_data) |
||||
|
return HttpResponse(json.dumps({"code": 1, "msg": "请求正常!"}, ensure_ascii=False)) |
||||
|
except: |
||||
|
logging.error(traceback.format_exc()) |
||||
|
return HttpResponse(json.dumps({"code": 0, "msg": "请求json格式不正确!"}, ensure_ascii=False)) |
||||
|
else: |
||||
|
return HttpResponse(json.dumps({"code": 0, "msg": "请求方式错误,改为post请求"}, ensure_ascii=False)) |
||||
|
|
||||
|
|
||||
|
def upload(): |
||||
|
while True: |
||||
|
try: |
||||
|
if task_queue.qsize() > 0: |
||||
|
logging.info("取任务队列长度{}".format(task_queue.qsize())) |
||||
|
raw_data = task_queue.get() |
||||
|
output=raw_data["output"] |
||||
|
res_tmp={key: "" for key in output} |
||||
|
if "id" in res_tmp.keys(): |
||||
|
res_tmp["id"]=str(uuid.uuid4()) |
||||
|
# index = raw_data["metadata"]["index"] |
||||
|
# datasource = raw_data["metadata"]["admin"]["datasource"] |
||||
|
# if datasource not in raw_data["data"].keys(): |
||||
|
# logging.info("找不到相关数据源!—{}".format(raw_data)) |
||||
|
# continue |
||||
|
# allFile = raw_data["data"][datasource] |
||||
|
# currentFile = eval(allFile)[index] |
||||
|
logging.info("任务数据为:{}".format(raw_data)) |
||||
|
url=raw_data["input"]["fileUrl"] |
||||
|
if "json" in url: |
||||
|
parm = url.split("#") |
||||
|
data1 = parse_data(raw_data, parm[0]) |
||||
|
data1_json = json.loads(data1) |
||||
|
expr = parse(parm[2]) |
||||
|
match = [match.value for match in expr.find(data1_json)] |
||||
|
video_url = match[0] |
||||
|
else: |
||||
|
video_url = parse_data(raw_data, url) |
||||
|
fileName=video_url.rsplit('/')[-1] |
||||
|
if "http" not in video_url: |
||||
|
file = "https://caiji.percent.cn/" + video_url.lstrip("/") |
||||
|
else: |
||||
|
file=video_url |
||||
|
# name=raw_data["metadata"]["admin"]["fileName"] |
||||
|
# if '$.' in name: |
||||
|
# # json.path表达式动态获取value |
||||
|
# datasources = str(name).split(':') |
||||
|
# # 0是数据源,1是JsonPath 表达式 |
||||
|
# datasourcestr = raw_data["data"][datasources[0]] |
||||
|
# datasource = json.loads(datasourcestr) |
||||
|
# # 创建 JsonPath 表达式对象 |
||||
|
# expr = parse(datasources[1]) |
||||
|
# # 使用表达式来选择 JSON 元素 |
||||
|
# match = [match.value for match in expr.find(datasource)] |
||||
|
# fileName = match[0] |
||||
|
|
||||
|
currentFile={"fileName":fileName,"fileUrl":file} |
||||
|
language = raw_data["input"]["fromLanguage"] |
||||
|
# 从gofast获取视频 |
||||
|
myfile = requests.get(file) |
||||
|
starttime = datetime.now().strftime('%Y-%m-%d') |
||||
|
path = 'inputdata/' + starttime |
||||
|
if not os.path.exists(path): |
||||
|
os.makedirs(path) |
||||
|
with open(path + '/' + fileName, 'wb') as f: |
||||
|
f.write(myfile.content) |
||||
|
logging.info("视频从gofast下载完毕,开始上传-{}".format(fileName)) |
||||
|
# 访问视频上传接口 |
||||
|
# video=1视频,0音频。 |
||||
|
video=1 |
||||
|
if fileName[-3:]=="m4a" or fileName[-3:]=="mp3" or fileName[-3:]=="wav": |
||||
|
url="https://realtime.pdeepmatrix.com/apis/file/asr/upload" |
||||
|
video=0 |
||||
|
else: |
||||
|
url = "https://realtime.pdeepmatrix.com/apis/media/analysis/upload" |
||||
|
data = {'fromLanguage': language} |
||||
|
f = open(path + '/' + fileName, 'rb') |
||||
|
files = {'file': f} |
||||
|
response = requests.post(url, data=data, files=files,verify=False) |
||||
|
logging.info("上传后接口返回值:{}-{}".format(response,response.text)) |
||||
|
d = json.loads(response.text) |
||||
|
if "code" in d.keys() and d["code"] == 200: |
||||
|
# 接口返回值data中存放视频获取结果的key |
||||
|
result = d["data"] |
||||
|
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": "", "dataKey": result,"video":video,"file":currentFile} |
||||
|
data_queue.put(raw_data) |
||||
|
logging.info("视频上传成功{}".format(raw_data)) |
||||
|
# to_kafka.send_kafka(raw_data,logging) |
||||
|
else: |
||||
|
logging.info("视频上传失败{}-{}".format(raw_data, d)) |
||||
|
f.close() |
||||
|
# Todo删除视频文件 |
||||
|
else: |
||||
|
# 暂无任务,进入休眠 |
||||
|
time.sleep(10) |
||||
|
except: |
||||
|
raw_data["result"]={} |
||||
|
raw_data["result"]["successCode"] = "0" |
||||
|
raw_data["result"]["errorLog"] = traceback.format_exc() |
||||
|
raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False) |
||||
|
logging.error(traceback.format_exc()) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
|
||||
|
|
||||
|
def getResult(): |
||||
|
while True: |
||||
|
# 3秒钟结果获取一次 |
||||
|
time.sleep(3) |
||||
|
try: |
||||
|
if data_queue.qsize() > 0: |
||||
|
logging.info("取数据队列长度{}".format(data_queue.qsize())) |
||||
|
raw_data = data_queue.get() |
||||
|
logging.info("任务数据为:{}".format(raw_data)) |
||||
|
# print(raw_data) |
||||
|
output=raw_data["output"] |
||||
|
res_tmp={key: "" for key in output} |
||||
|
if "id" in res_tmp.keys(): |
||||
|
res_tmp["id"]=str(uuid.uuid4()) |
||||
|
# 根据视频key访问获取结果接口 |
||||
|
dataKey = raw_data["result"]["dataKey"] |
||||
|
params = {'taskId': dataKey} |
||||
|
language = raw_data["input"]["fromLanguage"] |
||||
|
data = {'fromLanguage': language,'taskId': dataKey} |
||||
|
if raw_data["result"]["video"]==1: |
||||
|
url = "https://realtime.pdeepmatrix.com/apis/media/analysis/getResult" |
||||
|
response = requests.get(url, params=params, verify=False) |
||||
|
else: |
||||
|
url ="https://realtime.pdeepmatrix.com/apis/file/asr/getResult" |
||||
|
response = requests.post(url, data=data, verify=False) |
||||
|
logging.info("ASR网站返回值:{}-{}".format(response,response.text)) |
||||
|
d = json.loads(response.text) |
||||
|
if "code" in d.keys() and d["code"] == 200: |
||||
|
results = "" |
||||
|
if d["data"]["code"] == "1" and d["data"]["sentences"]: |
||||
|
for sentence in d["data"]["sentences"]: |
||||
|
if results: |
||||
|
results += ' ' + sentence["text"] |
||||
|
else: |
||||
|
results = sentence["text"] |
||||
|
if "content" in res_tmp.keys(): |
||||
|
res_tmp["content"]=results |
||||
|
raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False) |
||||
|
logging.info("视频解析获取结果成功{}".format(raw_data)) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
elif d["data"]["code"] == "1" and not d["data"]["sentences"]: |
||||
|
results ="" |
||||
|
if "content" in res_tmp.keys(): |
||||
|
res_tmp["content"]=results |
||||
|
raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False) |
||||
|
logging.info("视频解析获取结果成功{}".format(raw_data)) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
elif d["data"]["code"] == "0": |
||||
|
# 正在解析中,将任务再次放回数据队列 |
||||
|
data_queue.put(raw_data) |
||||
|
logging.info("视频未解析完毕,放回队列等待{}-{}".format(raw_data, d)) |
||||
|
else: |
||||
|
# 解析失败 |
||||
|
raw_data["result"]["successCode"] = "0" |
||||
|
raw_data["result"]["errorLog"] = response.text |
||||
|
raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False) |
||||
|
logging.info("视频解析获取结果失败,数据{},接口返回值{}".format(raw_data, d)) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
else: |
||||
|
raw_data["result"]["successCode"] = "0" |
||||
|
raw_data["result"]["errorLog"] = response.text |
||||
|
raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False) |
||||
|
logging.info("视频解析获取结果失败,数据{},接口返回值{}".format(raw_data, d)) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
else: |
||||
|
# 暂无任务,进入休眠 |
||||
|
time.sleep(10) |
||||
|
except: |
||||
|
raw_data["result"]["successCode"] = "0" |
||||
|
raw_data["result"]["errorLog"] = traceback.format_exc() |
||||
|
raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False) |
||||
|
logging.error(traceback.format_exc()) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
@ -0,0 +1,231 @@ |
|||||
|
# coding:utf8 |
||||
|
import os, sys |
||||
|
import io |
||||
|
from jsonpath_ng import jsonpath, parse |
||||
|
import uuid |
||||
|
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8') |
||||
|
cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd() |
||||
|
par_dir = os.path.abspath(os.path.join(cur_dir, os.path.pardir)) |
||||
|
sys.path.append(cur_dir) |
||||
|
sys.path.append(par_dir) |
||||
|
import json |
||||
|
from django.http import HttpResponse |
||||
|
from text_analysis.tools import to_kafka |
||||
|
from django.views.decorators.csrf import csrf_exempt |
||||
|
from log_util.set_logger import set_logger |
||||
|
|
||||
|
logging = set_logger('logs/results.log') |
||||
|
import traceback |
||||
|
import queue |
||||
|
import requests |
||||
|
from text_analysis.tools.tool import parse_data |
||||
|
import time |
||||
|
from datetime import datetime |
||||
|
import os |
||||
|
|
||||
|
# 任务队列 |
||||
|
global task_queue |
||||
|
task_queue = queue.Queue() |
||||
|
# 数据队列 |
||||
|
global data_queue |
||||
|
data_queue = queue.Queue() |
||||
|
|
||||
|
|
||||
|
@csrf_exempt |
||||
|
def ASRNew(request): |
||||
|
if request.method == 'POST': |
||||
|
try: |
||||
|
raw_data = json.loads(request.body) |
||||
|
task_queue.put(raw_data) |
||||
|
return HttpResponse(json.dumps({"code": 1, "msg": "请求正常!"}, ensure_ascii=False)) |
||||
|
except: |
||||
|
logging.error(traceback.format_exc()) |
||||
|
return HttpResponse(json.dumps({"code": 0, "msg": "请求json格式不正确!"}, ensure_ascii=False)) |
||||
|
else: |
||||
|
return HttpResponse(json.dumps({"code": 0, "msg": "请求方式错误,改为post请求"}, ensure_ascii=False)) |
||||
|
|
||||
|
|
||||
|
def upload(): |
||||
|
while True: |
||||
|
try: |
||||
|
if task_queue.qsize() > 0: |
||||
|
logging.info("取任务队列长度{}".format(task_queue.qsize())) |
||||
|
raw_data = task_queue.get() |
||||
|
output=raw_data["output"] |
||||
|
res_tmp={key: "" for key in output} |
||||
|
if "id" in res_tmp.keys(): |
||||
|
res_tmp["id"]=str(uuid.uuid4()) |
||||
|
# index = raw_data["metadata"]["index"] |
||||
|
# datasource = raw_data["metadata"]["admin"]["datasource"] |
||||
|
# if datasource not in raw_data["data"].keys(): |
||||
|
# logging.info("找不到相关数据源!—{}".format(raw_data)) |
||||
|
# continue |
||||
|
# allFile = raw_data["data"][datasource] |
||||
|
# currentFile = eval(allFile)[index] |
||||
|
logging.info("任务数据为:{}".format(raw_data)) |
||||
|
url=raw_data["input"]["fileUrl"] |
||||
|
if "json" in url: |
||||
|
parm = url.split("#") |
||||
|
data1 = parse_data(raw_data, parm[0]) |
||||
|
data1_json = json.loads(data1) |
||||
|
expr = parse(parm[2]) |
||||
|
match = [match.value for match in expr.find(data1_json)] |
||||
|
video_url = match[0] |
||||
|
else: |
||||
|
video_url = parse_data(raw_data, url) |
||||
|
fileName=video_url.rsplit('/')[-1] |
||||
|
if "http" not in video_url: |
||||
|
file = "https://caiji.percent.cn/" + video_url.lstrip("/") |
||||
|
else: |
||||
|
file=video_url |
||||
|
# name=raw_data["metadata"]["admin"]["fileName"] |
||||
|
# if '$.' in name: |
||||
|
# # json.path表达式动态获取value |
||||
|
# datasources = str(name).split(':') |
||||
|
# # 0是数据源,1是JsonPath 表达式 |
||||
|
# datasourcestr = raw_data["data"][datasources[0]] |
||||
|
# datasource = json.loads(datasourcestr) |
||||
|
# # 创建 JsonPath 表达式对象 |
||||
|
# expr = parse(datasources[1]) |
||||
|
# # 使用表达式来选择 JSON 元素 |
||||
|
# match = [match.value for match in expr.find(datasource)] |
||||
|
# fileName = match[0] |
||||
|
|
||||
|
currentFile={"fileName":fileName,"fileUrl":file} |
||||
|
language = raw_data["input"]["fromLanguage"] |
||||
|
# 从gofast获取视频 |
||||
|
myfile = requests.get(file) |
||||
|
starttime = datetime.now().strftime('%Y-%m-%d') |
||||
|
path = 'inputdata/' + starttime |
||||
|
if not os.path.exists(path): |
||||
|
os.makedirs(path) |
||||
|
with open(path + '/' + fileName, 'wb') as f: |
||||
|
f.write(myfile.content) |
||||
|
logging.info("视频从gofast下载完毕,开始上传-{}".format(fileName)) |
||||
|
# 访问视频上传接口 |
||||
|
# video=1视频,0音频。 |
||||
|
video=1 |
||||
|
if fileName[-3:]=="m4a" or fileName[-3:]=="mp3" or fileName[-3:]=="wav": |
||||
|
url="https://realtime.pdeepmatrix.com/apis/file/asr/upload" |
||||
|
video=0 |
||||
|
else: |
||||
|
url = "https://realtime.pdeepmatrix.com/apis/media/analysis/upload" |
||||
|
data = {'fromLanguage': language} |
||||
|
f = open(path + '/' + fileName, 'rb') |
||||
|
files = {'file': f} |
||||
|
response = requests.post(url, data=data, files=files,verify=False) |
||||
|
logging.info("上传后接口返回值:{}-{}".format(response,response.text)) |
||||
|
d = json.loads(response.text) |
||||
|
if "code" in d.keys() and d["code"] == 200: |
||||
|
# 接口返回值data中存放视频获取结果的key |
||||
|
result = d["data"] |
||||
|
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": "", "dataKey": result,"video":video,"file":currentFile} |
||||
|
data_queue.put(raw_data) |
||||
|
logging.info("视频上传成功{}".format(raw_data)) |
||||
|
# to_kafka.send_kafka(raw_data,logging) |
||||
|
else: |
||||
|
logging.info("视频上传失败{}-{}".format(raw_data, d)) |
||||
|
f.close() |
||||
|
# Todo删除视频文件 |
||||
|
else: |
||||
|
# 暂无任务,进入休眠 |
||||
|
time.sleep(10) |
||||
|
except: |
||||
|
raw_data["result"]={} |
||||
|
raw_data["result"]["successCode"] = "0" |
||||
|
raw_data["result"]["status"]=2 |
||||
|
raw_data["result"]["message"]="视频/音频上传异常" |
||||
|
raw_data["result"]["errorLog"] = traceback.format_exc() |
||||
|
raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False) |
||||
|
logging.error(traceback.format_exc()) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
|
||||
|
|
||||
|
def getResult(): |
||||
|
while True: |
||||
|
# 3秒钟结果获取一次 |
||||
|
time.sleep(3) |
||||
|
try: |
||||
|
if data_queue.qsize() > 0: |
||||
|
logging.info("取数据队列长度{}".format(data_queue.qsize())) |
||||
|
raw_data = data_queue.get() |
||||
|
logging.info("任务数据为:{}".format(raw_data)) |
||||
|
# print(raw_data) |
||||
|
output=raw_data["output"] |
||||
|
res_tmp={key: "" for key in output} |
||||
|
if "id" in res_tmp.keys(): |
||||
|
res_tmp["id"]=str(uuid.uuid4()) |
||||
|
# 根据视频key访问获取结果接口 |
||||
|
dataKey = raw_data["result"]["dataKey"] |
||||
|
params = {'taskId': dataKey} |
||||
|
language = raw_data["input"]["fromLanguage"] |
||||
|
data = {'fromLanguage': language,'taskId': dataKey} |
||||
|
if raw_data["result"]["video"]==1: |
||||
|
url = "https://realtime.pdeepmatrix.com/apis/media/analysis/getResult" |
||||
|
response = requests.get(url, params=params, verify=False) |
||||
|
else: |
||||
|
url ="https://realtime.pdeepmatrix.com/apis/file/asr/getResult" |
||||
|
response = requests.post(url, data=data, verify=False) |
||||
|
logging.info("ASR网站返回值:{}-{}".format(response,response.text)) |
||||
|
d = json.loads(response.text) |
||||
|
if "code" in d.keys() and d["code"] == 200: |
||||
|
results = "" |
||||
|
if d["data"]["code"] == "1" and d["data"]["sentences"]: |
||||
|
for sentence in d["data"]["sentences"]: |
||||
|
if results: |
||||
|
results += ' ' + sentence["text"] |
||||
|
else: |
||||
|
results = sentence["text"] |
||||
|
if "content" in res_tmp.keys(): |
||||
|
res_tmp["content"]=results |
||||
|
raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False) |
||||
|
raw_data["result"]["status"]=1 |
||||
|
raw_data["result"]["message"]="成功" |
||||
|
logging.info("视频解析获取结果成功{}".format(raw_data)) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
elif d["data"]["code"] == "1" and not d["data"]["sentences"]: |
||||
|
results ="" |
||||
|
if "content" in res_tmp.keys(): |
||||
|
res_tmp["content"]=results |
||||
|
raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False) |
||||
|
raw_data["result"]["status"]=1 |
||||
|
raw_data["result"]["message"]="成功" |
||||
|
logging.info("视频解析获取结果成功{}".format(raw_data)) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
elif d["data"]["code"] == "0": |
||||
|
# 正在解析中,将任务再次放回数据队列 |
||||
|
data_queue.put(raw_data) |
||||
|
logging.info("视频未解析完毕,放回队列等待{}-{}".format(raw_data, d)) |
||||
|
else: |
||||
|
# 解析失败 |
||||
|
raw_data["result"]["successCode"] = "0" |
||||
|
raw_data["result"]["errorLog"] = response.text |
||||
|
raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False) |
||||
|
raw_data["result"]["status"]=2 |
||||
|
raw_data["result"]["message"]="视频/音频解析异常" |
||||
|
logging.info("视频解析获取结果失败,数据{},接口返回值{}".format(raw_data, d)) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
else: |
||||
|
raw_data["result"]["successCode"] = "0" |
||||
|
raw_data["result"]["errorLog"] = response.text |
||||
|
raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False) |
||||
|
raw_data["result"]["status"] = 2 |
||||
|
raw_data["result"]["message"] = "视频/音频解析异常" |
||||
|
logging.info("视频解析获取结果失败,数据{},接口返回值{}".format(raw_data, d)) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
else: |
||||
|
# 暂无任务,进入休眠 |
||||
|
time.sleep(10) |
||||
|
except: |
||||
|
raw_data["result"]["successCode"] = "0" |
||||
|
raw_data["result"]["errorLog"] = traceback.format_exc() |
||||
|
raw_data["result"]["status"] = 2 |
||||
|
raw_data["result"]["message"] = "视频/音频解析异常" |
||||
|
raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False) |
||||
|
logging.error(traceback.format_exc()) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
@ -0,0 +1,264 @@ |
|||||
|
# coding:utf8 |
||||
|
import os, sys |
||||
|
import io |
||||
|
from jsonpath_ng import jsonpath, parse |
||||
|
import uuid |
||||
|
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8') |
||||
|
cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd() |
||||
|
par_dir = os.path.abspath(os.path.join(cur_dir, os.path.pardir)) |
||||
|
sys.path.append(cur_dir) |
||||
|
sys.path.append(par_dir) |
||||
|
import json |
||||
|
from django.http import HttpResponse |
||||
|
from text_analysis.tools import to_kafka |
||||
|
from django.views.decorators.csrf import csrf_exempt |
||||
|
from log_util.set_logger import set_logger |
||||
|
|
||||
|
logging = set_logger('logs/results.log') |
||||
|
import traceback |
||||
|
import queue |
||||
|
import requests |
||||
|
from text_analysis.tools.tool import parse_data |
||||
|
import time |
||||
|
from datetime import datetime |
||||
|
import os |
||||
|
from kazoo.client import KazooClient |
||||
|
from kazoo.protocol.states import EventType |
||||
|
# 任务队列 |
||||
|
# global task_queue |
||||
|
task_queue = queue.Queue() |
||||
|
# 数据队列 |
||||
|
# global data_queue |
||||
|
data_queue = queue.Queue() |
||||
|
stop_dict={} |
||||
|
|
||||
|
@csrf_exempt |
||||
|
def ASRNew(request): |
||||
|
if request.method == 'POST': |
||||
|
try: |
||||
|
raw_data = json.loads(request.body) |
||||
|
task_queue.put(raw_data) |
||||
|
return HttpResponse(json.dumps({"code": 1, "msg": "请求正常!"}, ensure_ascii=False)) |
||||
|
except: |
||||
|
logging.error(traceback.format_exc()) |
||||
|
return HttpResponse(json.dumps({"code": 0, "msg": "请求json格式不正确!"}, ensure_ascii=False)) |
||||
|
else: |
||||
|
return HttpResponse(json.dumps({"code": 0, "msg": "请求方式错误,改为post请求"}, ensure_ascii=False)) |
||||
|
|
||||
|
|
||||
|
def upload(): |
||||
|
while True: |
||||
|
try: |
||||
|
if task_queue.qsize() > 0: |
||||
|
logging.info("取任务队列长度{}".format(task_queue.qsize())) |
||||
|
raw_data = task_queue.get() |
||||
|
output=raw_data["output"] |
||||
|
res_tmp={key: "" for key in output} |
||||
|
if "id" in res_tmp.keys(): |
||||
|
res_tmp["id"]=str(uuid.uuid4()) |
||||
|
logging.info("任务数据为:{}".format(raw_data)) |
||||
|
logging.info("当前version信息为:{}".format(stop_dict)) |
||||
|
task_id=raw_data["scenes_id"] |
||||
|
task_version=raw_data["version"] |
||||
|
if task_id in stop_dict.keys() and task_version!=stop_dict[task_id]["version"]: |
||||
|
logging.info("已暂停任务上传,过滤掉。{}".format(raw_data)) |
||||
|
continue |
||||
|
|
||||
|
url=raw_data["input"]["fileUrl"] |
||||
|
if "json" in url: |
||||
|
parm = url.split("#") |
||||
|
data1 = parse_data(raw_data, parm[0]) |
||||
|
data1_json = json.loads(data1) |
||||
|
expr = parse(parm[2]) |
||||
|
match = [match.value for match in expr.find(data1_json)] |
||||
|
video_url = match[0] |
||||
|
else: |
||||
|
video_url = parse_data(raw_data, url) |
||||
|
fileName=video_url.rsplit('/')[-1] |
||||
|
if "http" not in video_url: |
||||
|
file = "https://caiji.percent.cn/" + video_url.lstrip("/") |
||||
|
else: |
||||
|
file=video_url |
||||
|
# name=raw_data["metadata"]["admin"]["fileName"] |
||||
|
# if '$.' in name: |
||||
|
# # json.path表达式动态获取value |
||||
|
# datasources = str(name).split(':') |
||||
|
# # 0是数据源,1是JsonPath 表达式 |
||||
|
# datasourcestr = raw_data["data"][datasources[0]] |
||||
|
# datasource = json.loads(datasourcestr) |
||||
|
# # 创建 JsonPath 表达式对象 |
||||
|
# expr = parse(datasources[1]) |
||||
|
# # 使用表达式来选择 JSON 元素 |
||||
|
# match = [match.value for match in expr.find(datasource)] |
||||
|
# fileName = match[0] |
||||
|
|
||||
|
currentFile={"fileName":fileName,"fileUrl":file} |
||||
|
language = raw_data["input"]["fromLanguage"] |
||||
|
# 从gofast获取视频 |
||||
|
myfile = requests.get(file) |
||||
|
starttime = datetime.now().strftime('%Y-%m-%d') |
||||
|
path = 'inputdata/' + starttime |
||||
|
if not os.path.exists(path): |
||||
|
os.makedirs(path) |
||||
|
with open(path + '/' + fileName, 'wb') as f: |
||||
|
f.write(myfile.content) |
||||
|
logging.info("视频从gofast下载完毕,开始上传-{}".format(fileName)) |
||||
|
# 访问视频上传接口 |
||||
|
# video=1视频,0音频。 |
||||
|
video=1 |
||||
|
if fileName[-3:]=="m4a" or fileName[-3:]=="mp3" or fileName[-3:]=="wav": |
||||
|
url="https://realtime.pdeepmatrix.com/apis/file/asr/upload" |
||||
|
video=0 |
||||
|
else: |
||||
|
url = "https://realtime.pdeepmatrix.com/apis/media/analysis/upload" |
||||
|
data = {'fromLanguage': language} |
||||
|
f = open(path + '/' + fileName, 'rb') |
||||
|
files = {'file': f} |
||||
|
response = requests.post(url, data=data, files=files,verify=False) |
||||
|
logging.info("上传后接口返回值:{}-{}".format(response,response.text)) |
||||
|
d = json.loads(response.text) |
||||
|
if "code" in d.keys() and d["code"] == 200: |
||||
|
# 接口返回值data中存放视频获取结果的key |
||||
|
result = d["data"] |
||||
|
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": "", "dataKey": result,"video":video,"file":currentFile} |
||||
|
data_queue.put(raw_data) |
||||
|
logging.info("视频上传成功{}".format(raw_data)) |
||||
|
# to_kafka.send_kafka(raw_data,logging) |
||||
|
else: |
||||
|
logging.info("视频上传失败{}-{}".format(raw_data, d)) |
||||
|
f.close() |
||||
|
# Todo删除视频文件 |
||||
|
else: |
||||
|
# 暂无任务,进入休眠 |
||||
|
time.sleep(10) |
||||
|
except: |
||||
|
raw_data["result"]={} |
||||
|
raw_data["result"]["successCode"] = "0" |
||||
|
raw_data["result"]["status"]=2 |
||||
|
raw_data["result"]["message"]="视频/音频上传异常" |
||||
|
raw_data["result"]["errorLog"] = traceback.format_exc() |
||||
|
raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False) |
||||
|
logging.error(traceback.format_exc()) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
|
||||
|
|
||||
|
def getResult(): |
||||
|
while True: |
||||
|
# 3秒钟结果获取一次 |
||||
|
time.sleep(3) |
||||
|
try: |
||||
|
if data_queue.qsize() > 0: |
||||
|
logging.info("取数据队列长度{}".format(data_queue.qsize())) |
||||
|
raw_data = data_queue.get() |
||||
|
logging.info("任务数据为:{}".format(raw_data)) |
||||
|
task_id=raw_data["scenes_id"] |
||||
|
task_version=raw_data["version"] |
||||
|
if task_id in stop_dict.keys() and task_version!=stop_dict[task_id]["version"]: |
||||
|
logging.info("已暂停获取结果任务,过滤掉。{}".format(raw_data)) |
||||
|
continue |
||||
|
output=raw_data["output"] |
||||
|
res_tmp={key: "" for key in output} |
||||
|
if "id" in res_tmp.keys(): |
||||
|
res_tmp["id"]=str(uuid.uuid4()) |
||||
|
# 根据视频key访问获取结果接口 |
||||
|
dataKey = raw_data["result"]["dataKey"] |
||||
|
params = {'taskId': dataKey} |
||||
|
language = raw_data["input"]["fromLanguage"] |
||||
|
data = {'fromLanguage': language,'taskId': dataKey} |
||||
|
if raw_data["result"]["video"]==1: |
||||
|
url = "https://realtime.pdeepmatrix.com/apis/media/analysis/getResult" |
||||
|
response = requests.get(url, params=params, verify=False) |
||||
|
else: |
||||
|
url ="https://realtime.pdeepmatrix.com/apis/file/asr/getResult" |
||||
|
response = requests.post(url, data=data, verify=False) |
||||
|
logging.info("ASR网站返回值:{}-{}".format(response,response.text)) |
||||
|
d = json.loads(response.text) |
||||
|
if "code" in d.keys() and d["code"] == 200: |
||||
|
results = "" |
||||
|
if d["data"]["code"] == "1" and d["data"]["sentences"]: |
||||
|
for sentence in d["data"]["sentences"]: |
||||
|
if results: |
||||
|
results += ' ' + sentence["text"] |
||||
|
else: |
||||
|
results = sentence["text"] |
||||
|
if "content" in res_tmp.keys(): |
||||
|
res_tmp["content"]=results |
||||
|
raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False) |
||||
|
raw_data["result"]["status"]=1 |
||||
|
raw_data["result"]["message"]="成功" |
||||
|
logging.info("视频解析获取结果成功{}".format(raw_data)) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
elif d["data"]["code"] == "1" and not d["data"]["sentences"]: |
||||
|
results ="" |
||||
|
if "content" in res_tmp.keys(): |
||||
|
res_tmp["content"]=results |
||||
|
raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False) |
||||
|
raw_data["result"]["status"]=1 |
||||
|
raw_data["result"]["message"]="成功" |
||||
|
logging.info("视频解析获取结果成功{}".format(raw_data)) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
elif d["data"]["code"] == "0": |
||||
|
# 正在解析中,将任务再次放回数据队列 |
||||
|
data_queue.put(raw_data) |
||||
|
logging.info("视频未解析完毕,放回队列等待{}-{}".format(raw_data, d)) |
||||
|
else: |
||||
|
# 解析失败 |
||||
|
raw_data["result"]["successCode"] = "0" |
||||
|
raw_data["result"]["errorLog"] = response.text |
||||
|
raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False) |
||||
|
raw_data["result"]["status"]=2 |
||||
|
raw_data["result"]["message"]="视频/音频解析异常" |
||||
|
logging.info("视频解析获取结果失败,数据{},接口返回值{}".format(raw_data, d)) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
else: |
||||
|
raw_data["result"]["successCode"] = "0" |
||||
|
raw_data["result"]["errorLog"] = response.text |
||||
|
raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False) |
||||
|
raw_data["result"]["status"] = 2 |
||||
|
raw_data["result"]["message"] = "视频/音频解析异常" |
||||
|
logging.info("视频解析获取结果失败,数据{},接口返回值{}".format(raw_data, d)) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
else: |
||||
|
# 暂无任务,进入休眠 |
||||
|
time.sleep(10) |
||||
|
except: |
||||
|
raw_data["result"]["successCode"] = "0" |
||||
|
raw_data["result"]["errorLog"] = traceback.format_exc() |
||||
|
raw_data["result"]["status"] = 2 |
||||
|
raw_data["result"]["message"] = "视频/音频解析异常" |
||||
|
raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False) |
||||
|
logging.error(traceback.format_exc()) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
|
||||
|
|
||||
|
def zk_monitoring(): |
||||
|
try: |
||||
|
#线上环境 |
||||
|
zk = KazooClient(hosts='172.18.1.146:2181,172.18.1.147:2181,172.18.1.148:2181') |
||||
|
#测试环境 |
||||
|
# zk = KazooClient(hosts='172.16.12.55:2181,172.16.12.56:2181,172.16.12.57:2181') |
||||
|
zk.start() |
||||
|
# 设置监听器 |
||||
|
@zk.DataWatch("/analyze") |
||||
|
def watch_node(data, stat, event): |
||||
|
if event is not None and event.type == EventType.CHANGED: |
||||
|
data, stat = zk.get("/analyze") |
||||
|
logging.info("执行删除操作:{}".format(data)) |
||||
|
d = json.loads(data) |
||||
|
id = d["scenes_id"] |
||||
|
stop_dict[id] = {} |
||||
|
stop_dict[id]["version"] = d["version"] |
||||
|
stop_dict[id]["operation"] = d["operation"] |
||||
|
# 保持程序运行以监听节点变化 |
||||
|
try: |
||||
|
while True: |
||||
|
time.sleep(1) |
||||
|
except: |
||||
|
logging.info("Stopping...") |
||||
|
# 关闭连接 |
||||
|
zk.stop() |
||||
|
zk.close() |
||||
|
except: |
||||
|
logging.error(traceback.format_exc()) |
||||
|
|
||||
|
|
||||
@ -0,0 +1,266 @@ |
|||||
|
# coding:utf8 |
||||
|
import os, sys |
||||
|
import io |
||||
|
from jsonpath_ng import jsonpath, parse |
||||
|
import uuid |
||||
|
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8') |
||||
|
cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd() |
||||
|
par_dir = os.path.abspath(os.path.join(cur_dir, os.path.pardir)) |
||||
|
sys.path.append(cur_dir) |
||||
|
sys.path.append(par_dir) |
||||
|
import json |
||||
|
from django.http import HttpResponse |
||||
|
from text_analysis.tools import to_kafka |
||||
|
from django.views.decorators.csrf import csrf_exempt |
||||
|
from log_util.set_logger import set_logger |
||||
|
|
||||
|
logging = set_logger('logs/results.log') |
||||
|
import traceback |
||||
|
import queue |
||||
|
import requests |
||||
|
from text_analysis.tools.tool import parse_data |
||||
|
import time |
||||
|
from datetime import datetime |
||||
|
import os |
||||
|
from kazoo.client import KazooClient |
||||
|
from kazoo.protocol.states import EventType |
||||
|
# 任务队列 |
||||
|
# global task_queue |
||||
|
task_queue = queue.Queue() |
||||
|
# 数据队列 |
||||
|
# global data_queue |
||||
|
data_queue = queue.Queue() |
||||
|
stop_dict={} |
||||
|
|
||||
|
@csrf_exempt |
||||
|
def ASRNew(request): |
||||
|
if request.method == 'POST': |
||||
|
try: |
||||
|
raw_data = json.loads(request.body) |
||||
|
task_queue.put(raw_data) |
||||
|
return HttpResponse(json.dumps({"code": 1, "msg": "请求正常!"}, ensure_ascii=False)) |
||||
|
except: |
||||
|
logging.error(traceback.format_exc()) |
||||
|
return HttpResponse(json.dumps({"code": 0, "msg": "请求json格式不正确!"}, ensure_ascii=False)) |
||||
|
else: |
||||
|
return HttpResponse(json.dumps({"code": 0, "msg": "请求方式错误,改为post请求"}, ensure_ascii=False)) |
||||
|
|
||||
|
|
||||
|
def upload(): |
||||
|
while True: |
||||
|
try: |
||||
|
if task_queue.qsize() > 0: |
||||
|
logging.info("取任务队列长度{}".format(task_queue.qsize())) |
||||
|
raw_data = task_queue.get() |
||||
|
output=raw_data["output"] |
||||
|
res_tmp={key: "" for key in output} |
||||
|
if "id" in res_tmp.keys(): |
||||
|
res_tmp["id"]=str(uuid.uuid4()) |
||||
|
logging.info("任务数据为:{}".format(raw_data)) |
||||
|
logging.info("当前version信息为:{}".format(stop_dict)) |
||||
|
task_id=raw_data["scenes_id"] |
||||
|
task_version=raw_data["version"] |
||||
|
if task_id in stop_dict.keys() and task_version!=stop_dict[task_id]["version"]: |
||||
|
logging.info("已暂停任务上传,过滤掉。{}".format(raw_data)) |
||||
|
continue |
||||
|
|
||||
|
url=raw_data["input"]["fileUrl"] |
||||
|
if "json" in url: |
||||
|
parm = url.split("#") |
||||
|
data1 = parse_data(raw_data, parm[0]) |
||||
|
data1_json = json.loads(data1) |
||||
|
expr = parse(parm[2]) |
||||
|
match = [match.value for match in expr.find(data1_json)] |
||||
|
video_url = match[0] |
||||
|
else: |
||||
|
video_url = parse_data(raw_data, url) |
||||
|
fileName=video_url.rsplit('/')[-1] |
||||
|
if "http" not in video_url: |
||||
|
file = "https://caiji.percent.cn/" + video_url.lstrip("/") |
||||
|
else: |
||||
|
file=video_url |
||||
|
# name=raw_data["metadata"]["admin"]["fileName"] |
||||
|
# if '$.' in name: |
||||
|
# # json.path表达式动态获取value |
||||
|
# datasources = str(name).split(':') |
||||
|
# # 0是数据源,1是JsonPath 表达式 |
||||
|
# datasourcestr = raw_data["data"][datasources[0]] |
||||
|
# datasource = json.loads(datasourcestr) |
||||
|
# # 创建 JsonPath 表达式对象 |
||||
|
# expr = parse(datasources[1]) |
||||
|
# # 使用表达式来选择 JSON 元素 |
||||
|
# match = [match.value for match in expr.find(datasource)] |
||||
|
# fileName = match[0] |
||||
|
|
||||
|
currentFile={"fileName":fileName,"fileUrl":file} |
||||
|
language = raw_data["input"]["fromLanguage"] |
||||
|
# 从gofast获取视频 |
||||
|
myfile = requests.get(file) |
||||
|
starttime = datetime.now().strftime('%Y-%m-%d') |
||||
|
path = 'inputdata/' + starttime |
||||
|
if not os.path.exists(path): |
||||
|
os.makedirs(path) |
||||
|
with open(path + '/' + fileName, 'wb') as f: |
||||
|
f.write(myfile.content) |
||||
|
logging.info("视频从gofast下载完毕,开始上传-{}".format(fileName)) |
||||
|
# 访问视频上传接口 |
||||
|
# video=1视频,0音频。 |
||||
|
video=1 |
||||
|
if fileName[-3:]=="m4a" or fileName[-3:]=="mp3" or fileName[-3:]=="wav": |
||||
|
url="https://realtime.pdeepmatrix.com/apis/file/asr/upload" |
||||
|
video=0 |
||||
|
else: |
||||
|
url = "https://realtime.pdeepmatrix.com/apis/media/analysis/upload" |
||||
|
data = {'fromLanguage': language} |
||||
|
f = open(path + '/' + fileName, 'rb') |
||||
|
files = {'file': f} |
||||
|
response = requests.post(url, data=data, files=files,verify=False) |
||||
|
logging.info("上传后接口返回值:{}-{}".format(response,response.text)) |
||||
|
d = json.loads(response.text) |
||||
|
if "code" in d.keys() and d["code"] == 200: |
||||
|
# 接口返回值data中存放视频获取结果的key |
||||
|
result = d["data"] |
||||
|
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": "", "dataKey": result,"video":video,"file":currentFile} |
||||
|
data_queue.put(raw_data) |
||||
|
logging.info("视频上传成功{}".format(raw_data)) |
||||
|
# to_kafka.send_kafka(raw_data,logging) |
||||
|
else: |
||||
|
logging.info("视频上传失败{}-{}".format(raw_data, d)) |
||||
|
f.close() |
||||
|
# Todo删除视频文件 |
||||
|
else: |
||||
|
# 暂无任务,进入休眠 |
||||
|
time.sleep(10) |
||||
|
except: |
||||
|
raw_data["result"]={} |
||||
|
raw_data["result"]["successCode"] = "0" |
||||
|
raw_data["result"]["status"]=2 |
||||
|
raw_data["result"]["message"]="视频/音频上传异常" |
||||
|
raw_data["result"]["errorLog"] = traceback.format_exc() |
||||
|
raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False) |
||||
|
logging.error(traceback.format_exc()) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
|
||||
|
|
||||
|
def getResult(): |
||||
|
while True: |
||||
|
# 3秒钟结果获取一次 |
||||
|
time.sleep(3) |
||||
|
try: |
||||
|
if data_queue.qsize() > 0: |
||||
|
logging.info("取数据队列长度{}".format(data_queue.qsize())) |
||||
|
raw_data = data_queue.get() |
||||
|
logging.info("任务数据为:{}".format(raw_data)) |
||||
|
task_id=raw_data["scenes_id"] |
||||
|
task_version=raw_data["version"] |
||||
|
if task_id in stop_dict.keys() and task_version!=stop_dict[task_id]["version"]: |
||||
|
logging.info("已暂停获取结果任务,过滤掉。{}".format(raw_data)) |
||||
|
continue |
||||
|
output=raw_data["output"] |
||||
|
res_tmp={key: "" for key in output} |
||||
|
if "id" in res_tmp.keys(): |
||||
|
res_tmp["id"]=str(uuid.uuid4()) |
||||
|
res_tmp["isLast"]=1 |
||||
|
res_tmp["fileName"]=raw_data["result"]["file"]["fileName"] |
||||
|
# 根据视频key访问获取结果接口 |
||||
|
dataKey = raw_data["result"]["dataKey"] |
||||
|
params = {'taskId': dataKey} |
||||
|
language = raw_data["input"]["fromLanguage"] |
||||
|
data = {'fromLanguage': language,'taskId': dataKey} |
||||
|
if raw_data["result"]["video"]==1: |
||||
|
url = "https://realtime.pdeepmatrix.com/apis/media/analysis/getResult" |
||||
|
response = requests.get(url, params=params, verify=False) |
||||
|
else: |
||||
|
url ="https://realtime.pdeepmatrix.com/apis/file/asr/getResult" |
||||
|
response = requests.post(url, data=data, verify=False) |
||||
|
logging.info("ASR网站返回值:{}-{}".format(response,response.text)) |
||||
|
d = json.loads(response.text) |
||||
|
if "code" in d.keys() and d["code"] == 200: |
||||
|
results = "" |
||||
|
if d["data"]["code"] == "1" and d["data"]["sentences"]: |
||||
|
for sentence in d["data"]["sentences"]: |
||||
|
if results: |
||||
|
results += ' ' + sentence["text"] |
||||
|
else: |
||||
|
results = sentence["text"] |
||||
|
if "content" in res_tmp.keys(): |
||||
|
res_tmp["content"]=results |
||||
|
raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False) |
||||
|
raw_data["result"]["status"]=1 |
||||
|
raw_data["result"]["message"]="成功" |
||||
|
logging.info("视频解析获取结果成功{}".format(raw_data)) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
elif d["data"]["code"] == "1" and not d["data"]["sentences"]: |
||||
|
results ="" |
||||
|
if "content" in res_tmp.keys(): |
||||
|
res_tmp["content"]=results |
||||
|
raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False) |
||||
|
raw_data["result"]["status"]=1 |
||||
|
raw_data["result"]["message"]="成功" |
||||
|
logging.info("视频解析获取结果成功{}".format(raw_data)) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
elif d["data"]["code"] == "0": |
||||
|
# 正在解析中,将任务再次放回数据队列 |
||||
|
data_queue.put(raw_data) |
||||
|
logging.info("视频未解析完毕,放回队列等待{}-{}".format(raw_data, d)) |
||||
|
else: |
||||
|
# 解析失败 |
||||
|
raw_data["result"]["successCode"] = "0" |
||||
|
raw_data["result"]["errorLog"] = response.text |
||||
|
raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False) |
||||
|
raw_data["result"]["status"]=2 |
||||
|
raw_data["result"]["message"]="视频/音频解析异常" |
||||
|
logging.info("视频解析获取结果失败,数据{},接口返回值{}".format(raw_data, d)) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
else: |
||||
|
raw_data["result"]["successCode"] = "0" |
||||
|
raw_data["result"]["errorLog"] = response.text |
||||
|
raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False) |
||||
|
raw_data["result"]["status"] = 2 |
||||
|
raw_data["result"]["message"] = "视频/音频解析异常" |
||||
|
logging.info("视频解析获取结果失败,数据{},接口返回值{}".format(raw_data, d)) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
else: |
||||
|
# 暂无任务,进入休眠 |
||||
|
time.sleep(10) |
||||
|
except: |
||||
|
raw_data["result"]["successCode"] = "0" |
||||
|
raw_data["result"]["errorLog"] = traceback.format_exc() |
||||
|
raw_data["result"]["status"] = 2 |
||||
|
raw_data["result"]["message"] = "视频/音频解析异常" |
||||
|
raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False) |
||||
|
logging.error(traceback.format_exc()) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
|
||||
|
|
||||
|
def zk_monitoring(): |
||||
|
try: |
||||
|
#线上环境 |
||||
|
zk = KazooClient(hosts='172.18.1.146:2181,172.18.1.147:2181,172.18.1.148:2181') |
||||
|
#测试环境 |
||||
|
# zk = KazooClient(hosts='172.16.12.55:2181,172.16.12.56:2181,172.16.12.57:2181') |
||||
|
zk.start() |
||||
|
# 设置监听器 |
||||
|
@zk.DataWatch("/analyze") |
||||
|
def watch_node(data, stat, event): |
||||
|
if event is not None and event.type == EventType.CHANGED: |
||||
|
data, stat = zk.get("/analyze") |
||||
|
logging.info("执行删除操作:{}".format(data)) |
||||
|
d = json.loads(data) |
||||
|
id = d["scenes_id"] |
||||
|
stop_dict[id] = {} |
||||
|
stop_dict[id]["version"] = d["version"] |
||||
|
stop_dict[id]["operation"] = d["operation"] |
||||
|
# 保持程序运行以监听节点变化 |
||||
|
try: |
||||
|
while True: |
||||
|
time.sleep(1) |
||||
|
except: |
||||
|
logging.info("Stopping...") |
||||
|
# 关闭连接 |
||||
|
zk.stop() |
||||
|
zk.close() |
||||
|
except: |
||||
|
logging.error(traceback.format_exc()) |
||||
|
|
||||
|
|
||||
@ -0,0 +1,186 @@ |
|||||
|
# coding:utf8 |
||||
|
import os, sys |
||||
|
import io |
||||
|
from jsonpath_ng import jsonpath, parse |
||||
|
|
||||
|
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8') |
||||
|
cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd() |
||||
|
par_dir = os.path.abspath(os.path.join(cur_dir, os.path.pardir)) |
||||
|
sys.path.append(cur_dir) |
||||
|
sys.path.append(par_dir) |
||||
|
import json |
||||
|
from django.http import HttpResponse |
||||
|
from text_analysis.tools import to_kafka |
||||
|
from django.views.decorators.csrf import csrf_exempt |
||||
|
from log_util.set_logger import set_logger |
||||
|
|
||||
|
logging = set_logger('logs/results.log') |
||||
|
import traceback |
||||
|
import queue |
||||
|
import requests |
||||
|
# from text_analysis.tools.tool import get_data |
||||
|
import time |
||||
|
from datetime import datetime |
||||
|
import os |
||||
|
|
||||
|
# 任务队列 |
||||
|
global task_queue |
||||
|
task_queue = queue.Queue() |
||||
|
# 数据队列 |
||||
|
global data_queue |
||||
|
data_queue = queue.Queue() |
||||
|
|
||||
|
|
||||
|
@csrf_exempt |
||||
|
def ASR(request): |
||||
|
if request.method == 'POST': |
||||
|
try: |
||||
|
raw_data = json.loads(request.body) |
||||
|
task_queue.put(raw_data) |
||||
|
return HttpResponse(json.dumps({"code": 1, "msg": "请求正常!"}, ensure_ascii=False)) |
||||
|
except: |
||||
|
logging.error(traceback.format_exc()) |
||||
|
return HttpResponse(json.dumps({"code": 0, "msg": "请求json格式不正确!"}, ensure_ascii=False)) |
||||
|
else: |
||||
|
return HttpResponse(json.dumps({"code": 0, "msg": "请求方式错误,改为post请求"}, ensure_ascii=False)) |
||||
|
|
||||
|
|
||||
|
def upload(): |
||||
|
while True: |
||||
|
try: |
||||
|
if task_queue.qsize() > 0: |
||||
|
logging.info("取任务队列长度{}".format(task_queue.qsize())) |
||||
|
raw_data = task_queue.get() |
||||
|
# index = raw_data["metadata"]["index"] |
||||
|
# datasource = raw_data["metadata"]["admin"]["datasource"] |
||||
|
# if datasource not in raw_data["data"].keys(): |
||||
|
# logging.info("找不到相关数据源!—{}".format(raw_data)) |
||||
|
# continue |
||||
|
# allFile = raw_data["data"][datasource] |
||||
|
# currentFile = eval(allFile)[index] |
||||
|
url=raw_data["metadata"]["admin"]["fileUrl"] |
||||
|
if '$.' in url: |
||||
|
# json.path表达式动态获取value |
||||
|
datasources = str(url).split(':') |
||||
|
# 0是数据源,1是JsonPath 表达式 |
||||
|
datasourcestr = raw_data["data"][datasources[0]] |
||||
|
# print(datasourcestr) |
||||
|
datasource = json.loads(datasourcestr) |
||||
|
# 创建 JsonPath 表达式对象 |
||||
|
expr = parse(datasources[1]) |
||||
|
# 使用表达式来选择 JSON 元素 |
||||
|
match = [match.value for match in expr.find(datasource)] |
||||
|
video_url = match[0] |
||||
|
fileName=video_url.rsplit('/')[-1] |
||||
|
if "http" not in video_url: |
||||
|
file = "https://caiji.percent.cn/" + video_url.lstrip("/") |
||||
|
else: |
||||
|
file=video_url |
||||
|
# print(file) |
||||
|
# name=raw_data["metadata"]["admin"]["fileName"] |
||||
|
# if '$.' in name: |
||||
|
# # json.path表达式动态获取value |
||||
|
# datasources = str(name).split(':') |
||||
|
# # 0是数据源,1是JsonPath 表达式 |
||||
|
# datasourcestr = raw_data["data"][datasources[0]] |
||||
|
# datasource = json.loads(datasourcestr) |
||||
|
# # 创建 JsonPath 表达式对象 |
||||
|
# expr = parse(datasources[1]) |
||||
|
# # 使用表达式来选择 JSON 元素 |
||||
|
# match = [match.value for match in expr.find(datasource)] |
||||
|
# fileName = match[0] |
||||
|
|
||||
|
currentFile={"content":"","fileName":fileName,"fileUrl":file} |
||||
|
language = raw_data["metadata"]["admin"]["fromLanguage"] |
||||
|
|
||||
|
# 从gofast获取视频 |
||||
|
myfile = requests.get(file) |
||||
|
starttime = datetime.now().strftime('%Y-%m-%d') |
||||
|
path = 'inputdata/' + starttime |
||||
|
if not os.path.exists(path): |
||||
|
os.makedirs(path) |
||||
|
with open(path + '/' + fileName, 'wb') as f: |
||||
|
f.write(myfile.content) |
||||
|
logging.info("视频从gofast下载完毕,开始上传-{}".format(fileName)) |
||||
|
# 访问视频上传接口 |
||||
|
url = "https://realtime.pdeepmatrix.com/apis/media/analysis/upload" |
||||
|
data = {'fromLanguage': language} |
||||
|
f = open(path + '/' + fileName, 'rb') |
||||
|
files = {'file': f} |
||||
|
response = requests.post(url, data=data, files=files) |
||||
|
d = json.loads(response.text) |
||||
|
if "code" in d.keys() and d["code"] == 200: |
||||
|
# 接口返回值data中存放视频获取结果的key |
||||
|
result = d["data"] |
||||
|
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": currentFile, "dataKey": result} |
||||
|
data_queue.put(raw_data) |
||||
|
logging.info("视频上传成功{}".format(raw_data)) |
||||
|
# to_kafka.send_kafka(raw_data,logging) |
||||
|
else: |
||||
|
logging.info("视频上传失败{}-{}".format(raw_data, d)) |
||||
|
f.close() |
||||
|
# Todo删除视频文件 |
||||
|
else: |
||||
|
# 暂无任务,进入休眠 |
||||
|
time.sleep(10) |
||||
|
except: |
||||
|
logging.error(traceback.format_exc()) |
||||
|
|
||||
|
|
||||
|
def getResult(): |
||||
|
while True: |
||||
|
# 3秒钟结果获取一次 |
||||
|
time.sleep(3) |
||||
|
try: |
||||
|
if data_queue.qsize() > 0: |
||||
|
logging.info("取数据队列长度{}".format(data_queue.qsize())) |
||||
|
raw_data = data_queue.get() |
||||
|
# print(raw_data) |
||||
|
# 根据视频key访问获取结果接口 |
||||
|
dataKey = raw_data["result"]["dataKey"] |
||||
|
url = "https://realtime.pdeepmatrix.com/apis/media/analysis/getResult" |
||||
|
params = {'taskId': dataKey} |
||||
|
response = requests.get(url, params=params) |
||||
|
# print(response.text) |
||||
|
d = json.loads(response.text) |
||||
|
if "code" in d.keys() and d["code"] == 200: |
||||
|
results = "" |
||||
|
if d["data"]["code"] == "1": |
||||
|
for sentence in d["data"]["sentences"]: |
||||
|
results += sentence["text"] |
||||
|
raw_data["result"]["results"]["content"] = results |
||||
|
raw_data["result"]["results"] = json.dumps(raw_data["result"]["results"], ensure_ascii=False) |
||||
|
logging.info("视频解析获取结果成功{}".format(raw_data)) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
elif d["data"]["code"] == "0": |
||||
|
# 正在解析中,将任务再次放回数据队列 |
||||
|
data_queue.put(raw_data) |
||||
|
logging.info("视频未解析完毕,放回队列等待{}-{}".format(raw_data, d)) |
||||
|
else: |
||||
|
# 解析失败 |
||||
|
raw_data["result"]["successCode"] = "0" |
||||
|
raw_data["result"]["errorLog"] = response.text |
||||
|
raw_data["result"]["results"] = json.dumps(raw_data["result"]["results"], ensure_ascii=False) |
||||
|
logging.info("视频解析获取结果失败,数据{},接口返回值{}".format(raw_data, d)) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
else: |
||||
|
raw_data["result"]["successCode"] = "0" |
||||
|
raw_data["result"]["errorLog"] = response.text |
||||
|
raw_data["result"]["results"] = json.dumps(raw_data["result"]["results"], ensure_ascii=False) |
||||
|
logging.info("视频解析获取结果失败,数据{},接口返回值{}".format(raw_data, d)) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
|
||||
|
else: |
||||
|
# 暂无任务,进入休眠 |
||||
|
time.sleep(10) |
||||
|
except: |
||||
|
raw_data["result"]["successCode"] = "0" |
||||
|
raw_data["result"]["errorLog"] = traceback.format_exc() |
||||
|
raw_data["result"]["results"] = "" |
||||
|
logging.error(traceback.format_exc()) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
@ -0,0 +1,6 @@ |
|||||
|
#coding:utf8 |
||||
|
|
||||
|
from views import data_queue |
||||
|
|
||||
|
print(list(data_queue.queue)) |
||||
|
|
||||
@ -0,0 +1,10 @@ |
|||||
|
import configparser |
||||
|
|
||||
|
#加载配置文件 |
||||
|
def load_config(): |
||||
|
configFile = './config.ini' |
||||
|
# 创建配置文件对象 |
||||
|
con = configparser.ConfigParser() |
||||
|
# 读取文件 |
||||
|
con.read(configFile, encoding='utf-8') |
||||
|
return con |
||||
@ -0,0 +1,14 @@ |
|||||
|
#coding:utf8 |
||||
|
# import leida_ner_bert_crf |
||||
|
|
||||
|
import requests |
||||
|
|
||||
|
url = "http://172.18.1.166:9000/leidaduikang" |
||||
|
|
||||
|
payload = "{\"inputUrl\":\"/home/bfdadmin/leidabert/Project_leidaduikang/AInputdata/content_100.xlsx\"}" |
||||
|
headers = {'user-agent': "vscode-restclient",'header name': "header value"} |
||||
|
|
||||
|
response = requests.request("POST", url, timeout=1000000,data=payload, headers=headers) |
||||
|
|
||||
|
print(response.text) |
||||
|
|
||||
@ -0,0 +1,148 @@ |
|||||
|
""" |
||||
|
Django settings for Zhijian_Project_WebService project. |
||||
|
|
||||
|
Generated by 'django-admin startproject' using Django 1.8. |
||||
|
|
||||
|
For more information on this file, see |
||||
|
https://docs.djangoproject.com/en/1.8/topics/settings/ |
||||
|
|
||||
|
For the full list of settings and their values, see |
||||
|
https://docs.djangoproject.com/en/1.8/ref/settings/ |
||||
|
""" |
||||
|
|
||||
|
# Build paths inside the project like this: os.path.join(BASE_DIR, ...) |
||||
|
import os |
||||
|
|
||||
|
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) |
||||
|
|
||||
|
|
||||
|
# Quick-start development settings - unsuitable for production |
||||
|
# See https://docs.djangoproject.com/en/1.8/howto/deployment/checklist/ |
||||
|
|
||||
|
# SECURITY WARNING: keep the secret key used in production secret! |
||||
|
SECRET_KEY = '330r)_!^qhd7$!w4)$y@4=p2bd*vlxf%4z(bx-fx-1i3txagvz' |
||||
|
|
||||
|
# SECURITY WARNING: don't run with debug turned on in production! |
||||
|
DEBUG = True |
||||
|
|
||||
|
ALLOWED_HOSTS = ['*'] |
||||
|
|
||||
|
|
||||
|
# Application definition |
||||
|
|
||||
|
INSTALLED_APPS = ( |
||||
|
'django.contrib.admin', |
||||
|
'django.contrib.auth', |
||||
|
'django.contrib.contenttypes', |
||||
|
'django.contrib.sessions', |
||||
|
'django.contrib.messages', |
||||
|
'django.contrib.staticfiles', |
||||
|
) |
||||
|
|
||||
|
MIDDLEWARE = [ |
||||
|
'django.contrib.sessions.middleware.SessionMiddleware', |
||||
|
'django.middleware.common.CommonMiddleware', |
||||
|
'django.middleware.csrf.CsrfViewMiddleware', |
||||
|
'django.contrib.auth.middleware.AuthenticationMiddleware', |
||||
|
# 'django.contrib.auth.middleware.SessionAuthenticationMiddleware', |
||||
|
'django.contrib.messages.middleware.MessageMiddleware', |
||||
|
'django.middleware.clickjacking.XFrameOptionsMiddleware', |
||||
|
'django.middleware.security.SecurityMiddleware', |
||||
|
] |
||||
|
|
||||
|
ROOT_URLCONF = 'text_analysis.urls' |
||||
|
|
||||
|
TEMPLATES = [ |
||||
|
{ |
||||
|
'BACKEND': 'django.template.backends.django.DjangoTemplates', |
||||
|
'DIRS': [], |
||||
|
'APP_DIRS': True, |
||||
|
'OPTIONS': { |
||||
|
'context_processors': [ |
||||
|
'django.template.context_processors.debug', |
||||
|
'django.template.context_processors.request', |
||||
|
'django.contrib.auth.context_processors.auth', |
||||
|
'django.contrib.messages.context_processors.messages', |
||||
|
], |
||||
|
}, |
||||
|
}, |
||||
|
] |
||||
|
|
||||
|
WSGI_APPLICATION = 'text_analysis.wsgi.application' |
||||
|
|
||||
|
|
||||
|
# Database |
||||
|
# https://docs.djangoproject.com/en/1.8/ref/settings/#databases |
||||
|
|
||||
|
# DATABASES = { |
||||
|
# 'default': { |
||||
|
# 'ENGINE': 'django.db.backends.sqlite3', |
||||
|
# 'NAME': os.path.join(BASE_DIR, 'db.sqlite3'), |
||||
|
# } |
||||
|
# } |
||||
|
|
||||
|
|
||||
|
# Internationalization |
||||
|
# https://docs.djangoproject.com/en/1.8/topics/i18n/ |
||||
|
|
||||
|
LANGUAGE_CODE = 'en-us' |
||||
|
|
||||
|
TIME_ZONE = 'Asia/Shanghai' |
||||
|
|
||||
|
USE_I18N = True |
||||
|
|
||||
|
USE_L10N = True |
||||
|
|
||||
|
USE_TZ = True |
||||
|
|
||||
|
|
||||
|
# Static files (CSS, JavaScript, Images) |
||||
|
# https://docs.djangoproject.com/en/1.8/howto/static-files/ |
||||
|
|
||||
|
STATIC_URL = '/static/' |
||||
|
|
||||
|
# U_LOGFILE_SIZE = 1 * 1024 * 1024 # 单日志文件最大100M |
||||
|
# U_LOGFILE_COUNT = 7 # 保留10个日志文件 |
||||
|
# |
||||
|
# LOGGING = { |
||||
|
# 'version': 1, |
||||
|
# 'disable_existing_loggers': True, # 禁用所有已经存在的日志配置 |
||||
|
# 'filters': { |
||||
|
# 'require_debug_false': { |
||||
|
# '()': 'django.utils.log.RequireDebugFalse' |
||||
|
# } |
||||
|
# }, |
||||
|
# 'formatters': { |
||||
|
# 'verbose': { |
||||
|
# 'format': '[%(levelname)s %(asctime)s @ %(process)d] %(module)s %(process)d %(thread)d %(message)s' |
||||
|
# }, |
||||
|
# 'simple': { |
||||
|
# 'format': '%(levelname)s %(asctime)s @ %(process)d %(message)s' |
||||
|
# }, |
||||
|
# 'complete': { |
||||
|
# 'format': '[%(levelname)s %(asctime)s @ %(process)d] (%(pathname)s/%(funcName)s:%(lineno)d) - %(message)s' |
||||
|
# }, |
||||
|
# 'online': { |
||||
|
# 'format': '[%(levelname)s %(asctime)s @ %(process)d] - %(message)s' |
||||
|
# } |
||||
|
# }, |
||||
|
# 'handlers': { |
||||
|
# 'text': { |
||||
|
# 'level': 'DEBUG', |
||||
|
# #'class': 'logging.handlers.RotatingFileHandler', |
||||
|
# 'class': 'logging.handlers.TimedRotatingFileHandler', |
||||
|
# 'when': 'H', |
||||
|
# 'interval': 1, |
||||
|
# 'backupCount': U_LOGFILE_COUNT, |
||||
|
# 'formatter': 'complete', |
||||
|
# 'filename': os.path.join(BASE_DIR, 'logs/resultNew.log').replace('\\', '/'), |
||||
|
# } |
||||
|
# }, |
||||
|
# 'loggers': { |
||||
|
# 'text': { |
||||
|
# 'handlers': ['text'], |
||||
|
# 'level': 'DEBUG', |
||||
|
# 'propagate': False, |
||||
|
# } |
||||
|
# } |
||||
|
# } |
||||
@ -0,0 +1,129 @@ |
|||||
|
#coding:utf8 |
||||
|
import re |
||||
|
|
||||
|
def get_data(inputdata): |
||||
|
""" |
||||
|
重新组装参数 |
||||
|
:param inputdata:原json数据 |
||||
|
:return: 组装的prompt及其他参数 |
||||
|
""" |
||||
|
res={} |
||||
|
|
||||
|
|
||||
|
return res |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
def get_content(inputdata,logging): |
||||
|
""" |
||||
|
重新组装参数 |
||||
|
:param inputdata:原json数据 |
||||
|
:return: 组装的prompt及其他参数 |
||||
|
""" |
||||
|
res={} |
||||
|
admin=inputdata["metadata"]["admin"] |
||||
|
data=inputdata["data"] |
||||
|
prompt=admin["prompt"] |
||||
|
if_user=re.findall("{{(.*)}}",prompt) |
||||
|
if_data=re.findall("@@(.*)@@",prompt) |
||||
|
if if_user != []: |
||||
|
user_data=inputdata["metadata"]["user"] |
||||
|
if if_user[0] in user_data.keys(): |
||||
|
tmp=user_data[if_user[0]] |
||||
|
prompt=re.sub("{{(.*)}}",tmp,prompt) |
||||
|
if if_data!=[] and if_data[0] in data.keys(): |
||||
|
tmp1=data[if_data[0]] |
||||
|
prompt=re.sub("@@(.*)@@",tmp1,prompt) |
||||
|
res["prompt"]=prompt |
||||
|
res["authorization"]=admin["authorization"] |
||||
|
res["model"]=admin["model"] |
||||
|
res["temperature"]=admin["temperature"] |
||||
|
res["authorization"]=admin["authorization"] |
||||
|
res["top_p"]=admin["top_p"] |
||||
|
res["n"]=admin["n"] |
||||
|
return res |
||||
|
|
||||
|
|
||||
|
|
||||
|
if __name__=="__main__": |
||||
|
inputdata={ |
||||
|
"metadata":{ |
||||
|
"output":{ |
||||
|
"output_type":"table", |
||||
|
"label_col":[ |
||||
|
"软件著作抽取结果" |
||||
|
] |
||||
|
}, |
||||
|
"input":{ |
||||
|
"input_type":"text", |
||||
|
"label":[ |
||||
|
"7_软件著作过滤器" |
||||
|
] |
||||
|
}, |
||||
|
"address":"http://172.18.1.181:9011/chatGpt/", |
||||
|
"admin":{ |
||||
|
"authorization":"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", |
||||
|
"top_p":"1", |
||||
|
"user_input":[ |
||||
|
{ |
||||
|
"keyname":"tag", |
||||
|
"keydesc":"" |
||||
|
} |
||||
|
], |
||||
|
"temperature":"0.2", |
||||
|
"model":"gpt-3.5-turbo-16k", |
||||
|
"prompt":"请在下面这句话中提取出:证书号、软件名称、著作权人,以json格式输出,找不到的字段赋值为空字符串,不要有多余的文字输出,只输出json结构。@@7_软件著作过滤器@@", |
||||
|
"n":"1" |
||||
|
}, |
||||
|
"index":1 |
||||
|
}, |
||||
|
"data":{ |
||||
|
"1_项目文件上传":"[{ \"fileUrl\":\"http://172.18.1.130:9985/group33/default/20230816/16/05/1/1-基于时间序列遥感 影像洪涝检测系统.jpg\",\"fileType\":\"jpg\", \"filePath\":\"/软件著作/1-基于时间序列遥感 影像洪涝检测系统.jpg\",\"fileId\":\"cd6592f0389bb1da25afbb44901f9cde\",\"fileName\":\"1-基于时间序列遥感 影像洪涝检测系统.jpg\" },{ \"fileUrl\":\"http://172.18.1.130:9985/group33/default/20230816/16/06/1/2-基于遥感影像的快速变化检测系统.jpg\",\"fileType\":\"jpg\", \"filePath\":\"/软件著作/2-基于遥感影像的快速变化检测系统.jpg\",\"fileId\":\"338847e34904fa96e8834cb220667db8\",\"fileName\":\"2-基于遥感影像的快速变化检测系统.jpg\" },{ \"fileUrl\":\"http://172.18.1.130:9985/group33/default/20230816/16/08/1/3-基于时空模型的遥感时间序列森林火灾检测系统.jpg\",\"fileType\":\"jpg\", \"filePath\":\"/软件著作/1/3-基于时空模型的遥感时间序列森林火灾检测系统.jpg\",\"fileId\":\"944eec1cf98f216ea953459dac4dd505\",\"fileName\":\"3-基于时空模型的遥感时间序列森林火灾检测系统.jpg\" },{ \"fileUrl\":\"http://172.18.1.130:9985/group33/default/20230816/16/09/1/4-基于隐马尔可夫模型的遥感时间序列分类系统.jpg\",\"fileType\":\"jpg\", \"filePath\":\"/软件著作/4-基于隐马尔可夫模型的遥感时间序列分类系统.jpg\",\"fileId\":\"eb378cb9ee914323f601500378dfad76\",\"fileName\":\"4-基于隐马尔可夫模型的遥感时间序列分类系统.jpg\" }]", |
||||
|
"2_文件分类信息":"{\"软件著作\":4}", |
||||
|
"3_OCR识别内容":"{\"content\":\" 22222222222222222222222222222222222222222222222222\\n中华人民共和国国家版权局\\n计算机软件著作权登记证书\\n证书号:软著登字第1623261号\\n软件名称:\\n基于遥感影像的快速变化检测系统\\nV1.0\\n著作权人:中国科学院遥感与数字地球研究所\\n开发完成日期:2016年08月01日\\n首次发表日期:未发表\\n权利取得方式:原始取得\\n权利范围:全部权利\\n登记号:2017SR037977\\n根据《计算机软件保护条例》和《计算机软件著作权登记办法》的\\n规定,经中国版权保护中心审核,对以上事项予以登记\\n计算机软件著作权\\n登记专用章\\n2017年02月10日\\nNo.01433672\",\"fileId\":\"338847e34904fa96e8834cb220667db8\",\"fileName\":\"2-基于遥感影像的快速变化检测系统.jpg\",\"filePath\":\"/软件著作/2-基于遥感影像的快速变化检测系统.jpg\",\"fileType\":\"jpg\",\"fileUrl\":\"http://172.18.1.130:9985/group33/default/20230816/16/06/1/2-基于遥感影像的快速变化检测系统.jpg\",\"pageNum\":1}", |
||||
|
"businessKey":"185aef3b1c810799a6be8314abf6512c", |
||||
|
"7_软件著作过滤器":"{\"content\":\" 22222222222222222222222222222222222222222222222222\\n中华人民共和国国家版权局\\n计算机软件著作权登记证书\\n证书号:软著登字第1623261号\\n软件名称:\\n基于遥感影像的快速变化检测系统\\nV1.0\\n著作权人:中国科学院遥感与数字地球研究所\\n开发完成日期:2016年08月01日\\n首次发表日期:未发表\\n权利取得方式:原始取得\\n权利范围:全部权利\\n登记号:2017SR037977\\n根据《计算机软件保护条例》和《计算机软件著作权登记办法》的\\n规定,经中国版权保护中心审核,对以上事项予以登记\\n计算机软件著作权\\n登记专用章\\n2017年02月10日\\nNo.01433672\",\"fileId\":\"338847e34904fa96e8834cb220667db8\",\"fileName\":\"2-基于遥感影像的快速变化检测系统.jpg\",\"filePath\":\"/软件著作/2-基于遥感影像的快速变化检测系统.jpg\",\"fileType\":\"jpg\",\"fileUrl\":\"http://172.18.1.130:9985/group33/default/20230816/16/06/1/2-基于遥感影像的快速变化检测系统.jpg\",\"pageNum\":1}" |
||||
|
}, |
||||
|
"created":1691004265000, |
||||
|
"module":"OCR", |
||||
|
"start_tag":"false", |
||||
|
"last_edit":1692464331000, |
||||
|
"next_app_id":[ |
||||
|
{ |
||||
|
"start_id":86, |
||||
|
"edge_id":49, |
||||
|
"end_id":90 |
||||
|
} |
||||
|
], |
||||
|
"transfer_id":11, |
||||
|
"blueprint_id":3, |
||||
|
"scenes_id":3, |
||||
|
"scenario":{ |
||||
|
"dataloss":1, |
||||
|
"autoCommitTriggerLast":1, |
||||
|
"maxErrors":3, |
||||
|
"autoCommit":1, |
||||
|
"freshVariables":1 |
||||
|
}, |
||||
|
"wait_condition":[ |
||||
|
|
||||
|
], |
||||
|
"scheduling":{ |
||||
|
"interval":-1, |
||||
|
"type":"single" |
||||
|
}, |
||||
|
"name":"软件著作抽取", |
||||
|
"businessKey":"185aef3b1c810799a6be8314abf6512c", |
||||
|
"id":86, |
||||
|
"describe":"软件著作抽取" |
||||
|
} |
||||
|
a=get_content(inputdata,"") |
||||
|
print(a) |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
@ -0,0 +1,25 @@ |
|||||
|
# -*- coding:utf-8 -*- |
||||
|
|
||||
|
class pt_v_Exception(Exception): |
||||
|
def __str__(self): |
||||
|
return 'pt规则未在缓存中命中' |
||||
|
|
||||
|
class dt_v_Exception(Exception): |
||||
|
def __str__(self): |
||||
|
return 'dt规则未在缓存中命中' |
||||
|
|
||||
|
class dt_v_attr_Exception(Exception): |
||||
|
def __str__(self): |
||||
|
return 'dt_attrcode规则未在缓存中命中' |
||||
|
|
||||
|
class dt_v_codeid_Exception(Exception): |
||||
|
def __str__(self): |
||||
|
return 'dt_codeid规则未在缓存中命中' |
||||
|
|
||||
|
class dt_v_senti_Exception(Exception): |
||||
|
def __str__(self): |
||||
|
return 'dt_senti规则未在缓存中命中' |
||||
|
|
||||
|
class dt_v_res_Exception(Exception): |
||||
|
def __str__(self): |
||||
|
return 'dt_resverse规则未在缓存中命中' |
||||
@ -0,0 +1,67 @@ |
|||||
|
# coding=utf-8 |
||||
|
from kafka import KafkaProducer |
||||
|
from kafka import KafkaConsumer |
||||
|
import json |
||||
|
import traceback |
||||
|
import time |
||||
|
import traceback |
||||
|
import datetime |
||||
|
import queue |
||||
|
from logUtil import get_logger |
||||
|
|
||||
|
logger = get_logger("crawlWebsrcCode.log") |
||||
|
""" |
||||
|
写到kafka |
||||
|
""" |
||||
|
def kafkaProduce(topic,resultData,address): |
||||
|
producer = KafkaProducer(bootstrap_servers = '{}'.format(address),request_timeout_ms=120000) |
||||
|
topics = topic.split(',') |
||||
|
for tc in topics: |
||||
|
future = producer.send(tc,resultData) |
||||
|
result = future.get(timeout=60) |
||||
|
producer.flush() |
||||
|
print (result) |
||||
|
|
||||
|
#写入文件 |
||||
|
def writeTxt(filePath,result): |
||||
|
f = open(filePath,'a',encoding='utf-8') |
||||
|
f.write(result.encode('utf-8').decode('unicode_escape')+'\n') |
||||
|
f.close |
||||
|
|
||||
|
def KafkaConsume(topic,address,group_id,task_queue,logger): |
||||
|
''' |
||||
|
监控kafka,读取数据写到任务队列 |
||||
|
:param topic: |
||||
|
:param address: |
||||
|
:param group_id: |
||||
|
:param task_queue: |
||||
|
:return: |
||||
|
''' |
||||
|
try: |
||||
|
consumer = KafkaConsumer(topic, auto_offset_reset='earliest',fetch_max_bytes=1024768000,fetch_max_wait_ms=5000, bootstrap_servers=address,group_id = group_id) |
||||
|
i = 1 |
||||
|
while True: |
||||
|
for msg in consumer: |
||||
|
print('第{}条数据'.format(i)) |
||||
|
data = str(msg.value, encoding = "utf-8") |
||||
|
print(data) |
||||
|
task_queue.put(data) |
||||
|
i = i+1 |
||||
|
else: |
||||
|
print('暂无任务------') |
||||
|
time.sleep(10) |
||||
|
except Exception as e: |
||||
|
print('kafka未知异常----') |
||||
|
traceback.print_exc() |
||||
|
|
||||
|
def writeTxt(filePath,result): |
||||
|
f = open(filePath,'a') |
||||
|
f.write(result+'\n') |
||||
|
f.close |
||||
|
|
||||
|
if __name__ == '__main__': |
||||
|
# resultData = {'id': '中文', 'url': 'https://zh.wikipedia.org/zh/%E8%94%A1%E8%8B%B1%E6%96%87'} |
||||
|
# kafkaProduce('test', json.dumps(resultData).encode('utf-8').decode('unicode_escape').encode(),'121.4.41.194:8008') |
||||
|
task_queue = queue.Queue() |
||||
|
KafkaConsume('fq-Taobao-eccontent','39.129.129.172:6666,39.129.129.172:6668,39.129.129.172:6669,39.129.129.172:6670,39.129.129.172:6671','news_sche_8',task_queue,logger) |
||||
|
# KafkaConsume('zxbnewstopic','120.133.14.71:9992','group3',task_queue,logger) |
||||
@ -0,0 +1,338 @@ |
|||||
|
# coding:utf8 |
||||
|
import os, sys |
||||
|
cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd() |
||||
|
par_dir = os.path.abspath(os.path.join(cur_dir, os.path.pardir)) |
||||
|
sys.path.append(cur_dir) |
||||
|
sys.path.append(par_dir) |
||||
|
import json |
||||
|
import re |
||||
|
# from log_util.set_logger import set_logger |
||||
|
# logging = set_logger('logs/error.log') |
||||
|
import pymysql.cursors |
||||
|
import traceback |
||||
|
|
||||
|
def mysqlConn(data,logging): |
||||
|
res={"successCode":"1","errorLog":"","results":""} |
||||
|
p_host=data["Host"] |
||||
|
p_port=int(data["Port"]) |
||||
|
p_db=data["Database"] |
||||
|
p_user=data["User"] |
||||
|
p_password=data["Password"] |
||||
|
try: |
||||
|
db = pymysql.connect(host=p_host, user=p_user, passwd=p_password, db=p_db, port=p_port, |
||||
|
charset='utf8', cursorclass=pymysql.cursors.DictCursor) |
||||
|
db.ping(reconnect=True) |
||||
|
cursor = db.cursor() |
||||
|
sql = "SHOW TABLES" |
||||
|
cursor.execute(sql) |
||||
|
tables = cursor.fetchall() |
||||
|
if tables: |
||||
|
table_names = list(map(lambda x: list(x.values())[0], tables)) |
||||
|
res["results"] = table_names |
||||
|
else: |
||||
|
res["successCode"] = "0" |
||||
|
cursor.close() |
||||
|
db.close() |
||||
|
return res |
||||
|
except: |
||||
|
res["successCode"] = "0" |
||||
|
res["errorLog"]=traceback.format_exc() |
||||
|
logging.error(traceback.format_exc()) |
||||
|
return res |
||||
|
|
||||
|
def getTableColumnNames(data,logging): |
||||
|
res={"successCode":"1","errorLog":"","results":""} |
||||
|
p_host=data["Host"] |
||||
|
p_port=int(data["Port"]) |
||||
|
p_db=data["Database"] |
||||
|
p_user=data["User"] |
||||
|
p_password=data["Password"] |
||||
|
p_table=data["Table"] |
||||
|
try: |
||||
|
db = pymysql.connect(host=p_host, user=p_user, passwd=p_password, db=p_db, port=p_port, |
||||
|
charset='utf8', cursorclass=pymysql.cursors.DictCursor) |
||||
|
db.ping(reconnect=True) |
||||
|
cursor = db.cursor() |
||||
|
sql = "DESCRIBE "+p_table |
||||
|
cursor.execute(sql) |
||||
|
tables = cursor.fetchall() |
||||
|
if tables: |
||||
|
table_names = list(map(lambda x: x['Field'], tables)) |
||||
|
res["results"] = table_names |
||||
|
else: |
||||
|
res["successCode"] = "0" |
||||
|
cursor.close() |
||||
|
db.close() |
||||
|
return res |
||||
|
except: |
||||
|
res["successCode"] = "0" |
||||
|
res["errorLog"]=traceback.format_exc() |
||||
|
logging.error(traceback.format_exc()) |
||||
|
return res |
||||
|
|
||||
|
def mysqlInsert(input,logging): |
||||
|
res={"successCode":"1","errorLog":"","results":""} |
||||
|
data=input["metadata"]["admin"] |
||||
|
p_host=data["Host"] |
||||
|
p_port=int(data["Port"]) |
||||
|
p_db=data["Database"] |
||||
|
p_user=data["User"] |
||||
|
p_password=data["Password"] |
||||
|
p_table=data["Table"] |
||||
|
p_columnName=data["columnName"] |
||||
|
cN='('+','.join(p_columnName)+') ' |
||||
|
p_values=data["values"] |
||||
|
val=tuple(p_values) |
||||
|
try: |
||||
|
db = pymysql.connect(host=p_host, user=p_user, passwd=p_password, db=p_db, port=p_port, |
||||
|
charset='utf8', cursorclass=pymysql.cursors.DictCursor) |
||||
|
db.ping(reconnect=True) |
||||
|
cursor = db.cursor() |
||||
|
sql = "insert into " + p_table + cN + "values ("+ ','.join(['%s'] * len(val)) + ")" |
||||
|
cursor.execute(sql,val) |
||||
|
db.commit() |
||||
|
cursor.close() |
||||
|
db.close() |
||||
|
return res |
||||
|
except: |
||||
|
res["successCode"] = "0" |
||||
|
res["errorLog"]=traceback.format_exc() |
||||
|
logging.error(traceback.format_exc()) |
||||
|
return res |
||||
|
|
||||
|
def mysqlUpdate(input,logging): |
||||
|
res={"successCode":"1","errorLog":"","results":""} |
||||
|
data=input["metadata"]["admin"] |
||||
|
p_host=data["Host"] |
||||
|
p_port=int(data["Port"]) |
||||
|
p_db=data["Database"] |
||||
|
p_user=data["User"] |
||||
|
p_password=data["Password"] |
||||
|
p_table=data["Table"] |
||||
|
# p_set=data["Set"] |
||||
|
p_set=get_updateSet(input) |
||||
|
# where=process_where(data["Filter"]) |
||||
|
where=get_filter(data["Filter"]) |
||||
|
try: |
||||
|
db = pymysql.connect(host=p_host, user=p_user, passwd=p_password, db=p_db, port=p_port, |
||||
|
charset='utf8', cursorclass=pymysql.cursors.DictCursor) |
||||
|
db.ping(reconnect=True) |
||||
|
cursor = db.cursor() |
||||
|
sql = "UPDATE " + p_table + p_set + where |
||||
|
print(sql) |
||||
|
cursor.execute(sql) |
||||
|
db.commit() |
||||
|
cursor.close() |
||||
|
db.close() |
||||
|
return res |
||||
|
except: |
||||
|
res["successCode"] = "0" |
||||
|
res["errorLog"]=traceback.format_exc() |
||||
|
logging.error(traceback.format_exc()) |
||||
|
return res |
||||
|
|
||||
|
def mysqlExecute(input,logging): |
||||
|
res={"successCode":"1","errorLog":"","results":""} |
||||
|
data=input["metadata"]["admin"] |
||||
|
p_host=data["Host"] |
||||
|
p_port=int(data["Port"]) |
||||
|
p_db=data["Database"] |
||||
|
p_user=data["User"] |
||||
|
p_password=data["Password"] |
||||
|
execute=data["Execute"] |
||||
|
try: |
||||
|
db = pymysql.connect(host=p_host, user=p_user, passwd=p_password, db=p_db, port=p_port, |
||||
|
charset='utf8', cursorclass=pymysql.cursors.DictCursor) |
||||
|
db.ping(reconnect=True) |
||||
|
cursor = db.cursor() |
||||
|
cursor.execute(execute) |
||||
|
if 'select' in execute.lower(): |
||||
|
result = cursor.fetchall() |
||||
|
res["results"]=json.dumps(result,ensure_ascii=False) |
||||
|
else: |
||||
|
db.commit() |
||||
|
cursor.close() |
||||
|
db.close() |
||||
|
return res |
||||
|
except: |
||||
|
res["successCode"] = "0" |
||||
|
res["errorLog"]=traceback.format_exc() |
||||
|
logging.error(traceback.format_exc()) |
||||
|
return res |
||||
|
|
||||
|
# def process_where(data): |
||||
|
# ''' |
||||
|
# 组装where |
||||
|
# :param data: data["Filter"],{"key":"age","value":"20","operator":">"},{"logicalSymbol":"and"},{"key":"weight","value":"50","operator":"<"} |
||||
|
# :return: WHERE age>20 and weight<50 |
||||
|
# ''' |
||||
|
# if data=="" or data==[]: |
||||
|
# return "" |
||||
|
# where = " WHERE " |
||||
|
# for line in data: |
||||
|
# if "key" in line.keys(): |
||||
|
# val = line["value"] |
||||
|
# if isinstance(val, str): |
||||
|
# val = "\'" + val + "\'" |
||||
|
# tmp = str(line["key"]) + " " + line["operator"] + " " + str(val) |
||||
|
# where += tmp |
||||
|
# else: |
||||
|
# where += " " + line["logicalSymbol"] + " " |
||||
|
# return where |
||||
|
# |
||||
|
# def process_filter(data): |
||||
|
# ''' |
||||
|
# 组装key,value,operator |
||||
|
# :param data: data["Filter"],{"key":"age",value:"20","operator":"="} |
||||
|
# :return: age=20 |
||||
|
# ''' |
||||
|
# if data=="" or data==[]: |
||||
|
# return "" |
||||
|
# res=data["key"]+" "+data["operator"]+" "+data["value"] |
||||
|
# return res |
||||
|
|
||||
|
def get_updateSet(input): |
||||
|
metadata=input["metadata"] |
||||
|
user=metadata["user"] |
||||
|
sets=metadata["admin"]["Set"] |
||||
|
res=[] |
||||
|
for line in sets: |
||||
|
part=line.split("=") |
||||
|
tmp = [] |
||||
|
for p in part: |
||||
|
user_match=re.findall('##(.*?)##', p) |
||||
|
if user_match!=[]: |
||||
|
tmp.append(user[user_match[0]]) |
||||
|
res.append(str(tmp[0])+"="+str(tmp[1])) |
||||
|
result=" SET "+",".join(res) |
||||
|
return result |
||||
|
|
||||
|
def get_filter(data): |
||||
|
if "OR" not in data.keys(): |
||||
|
return "" |
||||
|
op_or=data["OR"] |
||||
|
res = "" |
||||
|
if len(op_or) == 1: |
||||
|
tmp = [] |
||||
|
line = op_or[0]["AND"] |
||||
|
for single_line in line: |
||||
|
val = single_line["value"] |
||||
|
if isinstance(val, str): |
||||
|
val = "\'" + val + "\'" |
||||
|
tmp.append(str(single_line["key"]) + single_line["operator"] + str(val)) |
||||
|
if single_line != line[-1]: |
||||
|
tmp.append("and") |
||||
|
res = " WHERE "+" ".join(tmp) |
||||
|
elif len(op_or) > 1: |
||||
|
tmp = [] |
||||
|
for single_and in op_or: |
||||
|
line = single_and["AND"] |
||||
|
for sigle_line in line: |
||||
|
val = sigle_line["value"] |
||||
|
if isinstance(val, str): |
||||
|
val = "\'" + val + "\'" |
||||
|
tmp.append(str(sigle_line["key"]) + sigle_line["operator"] + str(val)) |
||||
|
if sigle_line != line[-1]: |
||||
|
tmp.append("and") |
||||
|
if single_and != op_or[-1]: |
||||
|
tmp.append("or") |
||||
|
res = " WHERE "+" ".join(tmp) |
||||
|
return res |
||||
|
|
||||
|
|
||||
|
def mysqlQuery(input,logging): |
||||
|
res={"successCode":"1","errorLog":"","results":""} |
||||
|
data=input["metadata"]["admin"] |
||||
|
p_host=data["Host"] |
||||
|
p_port=int(data["Port"]) |
||||
|
p_db=data["Database"] |
||||
|
p_user=data["User"] |
||||
|
p_password=data["Password"] |
||||
|
p_table=data["Table"] |
||||
|
p_columnNames=data["columnNames"] |
||||
|
# p_filter=data["Filter"] |
||||
|
column='*' |
||||
|
if len(p_columnNames)==1: |
||||
|
column=p_columnNames[0] |
||||
|
elif len(p_columnNames)>1: |
||||
|
column=','.join(p_columnNames) |
||||
|
where=get_filter(data["Filter"]) |
||||
|
try: |
||||
|
db = pymysql.connect(host=p_host, user=p_user, passwd=p_password, db=p_db, port=p_port, |
||||
|
charset='utf8', cursorclass=pymysql.cursors.DictCursor) |
||||
|
db.ping(reconnect=True) |
||||
|
cursor = db.cursor() |
||||
|
sql = "SELECT " + column +" From "+ p_table + where |
||||
|
# print(sql) |
||||
|
cursor.execute(sql) |
||||
|
result = cursor.fetchall() |
||||
|
res["results"]=json.dumps(result,ensure_ascii=False) |
||||
|
cursor.close() |
||||
|
db.close() |
||||
|
return res |
||||
|
except: |
||||
|
res["successCode"] = "0" |
||||
|
res["errorLog"]=traceback.format_exc() |
||||
|
logging.error(traceback.format_exc()) |
||||
|
return res |
||||
|
|
||||
|
def mysqlDelete(input,logging): |
||||
|
res={"successCode":"1","errorLog":"","results":""} |
||||
|
data=input["metadata"]["admin"] |
||||
|
p_host=data["Host"] |
||||
|
p_port=int(data["Port"]) |
||||
|
p_db=data["Database"] |
||||
|
p_user=data["User"] |
||||
|
p_password=data["Password"] |
||||
|
p_table=data["Table"] |
||||
|
# where=process_where(data["Filter"]) |
||||
|
where=get_filter(data["Filter"]) |
||||
|
try: |
||||
|
db = pymysql.connect(host=p_host, user=p_user, passwd=p_password, db=p_db, port=p_port, |
||||
|
charset='utf8', cursorclass=pymysql.cursors.DictCursor) |
||||
|
db.ping(reconnect=True) |
||||
|
cursor = db.cursor() |
||||
|
sql = "DELETE From "+ p_table + where |
||||
|
cursor.execute(sql) |
||||
|
db.commit() |
||||
|
cursor.close() |
||||
|
db.close() |
||||
|
return res |
||||
|
except: |
||||
|
res["successCode"] = "0" |
||||
|
res["errorLog"]=traceback.format_exc() |
||||
|
logging.error(traceback.format_exc()) |
||||
|
return res |
||||
|
|
||||
|
|
||||
|
if __name__=="__main__": |
||||
|
input={"metadata":{"admin":{ |
||||
|
"type":"query", |
||||
|
"Table":"student", |
||||
|
"columnNames":["name","age"], |
||||
|
"Set":["##tag1##=##value1##","##tag2##=##value2##"], |
||||
|
"Filter":{ |
||||
|
"OR":[ |
||||
|
{ |
||||
|
"AND":[{"key":"age","value":20,"operator":">"},{"key":"weight","value":50,"operator":"<"}] |
||||
|
}, |
||||
|
{ |
||||
|
"AND":[{"key":"name","value":"ff","operator":"="}] |
||||
|
} |
||||
|
] |
||||
|
}, |
||||
|
"Host":"172.26.28.30", |
||||
|
"Port":"3306", |
||||
|
"Database":"test", |
||||
|
"User":"crawl", |
||||
|
"Password":"crawl123" |
||||
|
}}, |
||||
|
"user": { |
||||
|
"tag1": "age", |
||||
|
"tag2": "weight", |
||||
|
"value1": 2, |
||||
|
"value2": 100 |
||||
|
} |
||||
|
} |
||||
|
res=mysqlUpdate(input,"") |
||||
|
print(res) |
||||
@ -0,0 +1,51 @@ |
|||||
|
#coding:utf8 |
||||
|
import os, sys |
||||
|
cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd() |
||||
|
par_dir = os.path.abspath(os.path.join(cur_dir, os.path.pardir)) |
||||
|
sys.path.append(cur_dir) |
||||
|
sys.path.append(par_dir) |
||||
|
import json |
||||
|
from text_analysis.tools import to_kafka |
||||
|
from tools.mysql_helper import mysqlConn,mysqlInsert,mysqlQuery,mysqlExecute,mysqlUpdate,mysqlDelete,getTableColumnNames |
||||
|
import traceback |
||||
|
import time |
||||
|
from log_util.set_logger import set_logger |
||||
|
logging=set_logger('results.log') |
||||
|
|
||||
|
from views import task_queue |
||||
|
|
||||
|
def process_data(): |
||||
|
while True: |
||||
|
try: |
||||
|
# print("task_queue:",task_queue) |
||||
|
if task_queue.qsize() >0: |
||||
|
try: |
||||
|
raw_data = task_queue.get() |
||||
|
res = "" |
||||
|
logging.info("启动数据处理线程——") |
||||
|
logging.info(raw_data) |
||||
|
flag = raw_data["metadata"]["admin"]["type"] |
||||
|
# type分为execute、query、insert、update、delete |
||||
|
if flag == 'insert': |
||||
|
res = mysqlInsert(raw_data, logging) |
||||
|
elif flag == 'execute': |
||||
|
res = mysqlExecute(raw_data, logging) |
||||
|
elif flag == 'update': |
||||
|
res = mysqlUpdate(raw_data, logging) |
||||
|
elif flag == 'query': |
||||
|
res = mysqlQuery(raw_data, logging) |
||||
|
elif flag == 'delete': |
||||
|
res = mysqlDelete(raw_data, logging) |
||||
|
raw_data["result"] = res |
||||
|
logging.info("************写入kafka***********") |
||||
|
to_kafka.send_kafka(raw_data) |
||||
|
except: |
||||
|
raw_data["result"] = {"successCode": "0", "errorLog": "", "results": ""} |
||||
|
raw_data["result"]["errorLog"] = traceback.format_exc() |
||||
|
to_kafka.send_kafka(raw_data) |
||||
|
else: |
||||
|
logging.info("暂无任务,进入休眠--") |
||||
|
print("222222222222222222222222") |
||||
|
time.sleep(10) |
||||
|
except: |
||||
|
logging.error(traceback.format_exc()) |
||||
@ -0,0 +1,171 @@ |
|||||
|
# -*- coding: utf-8 -*- |
||||
|
import time |
||||
|
import threading |
||||
|
from selenium import webdriver |
||||
|
import json |
||||
|
from urllib.parse import urljoin |
||||
|
from kakfa_util import KafkaConsume |
||||
|
from kakfa_util import kafkaProduce |
||||
|
from logUtil import get_logger |
||||
|
from Go_fastDfs import uploadFile |
||||
|
import traceback |
||||
|
import queue |
||||
|
import configparser |
||||
|
import os, sys |
||||
|
import re |
||||
|
logger = get_logger("./logs/crawlWebsrcCode.log") |
||||
|
#加载配置文件 |
||||
|
configFile = './config.ini' |
||||
|
# 创建配置文件对象 |
||||
|
con = configparser.ConfigParser() |
||||
|
# 读取文件 |
||||
|
con.read(configFile, encoding='utf-8') |
||||
|
kafkaConfig = dict(con.items('kafka'))#kafka配置信息 |
||||
|
goFastdfsConfig = dict(con.items('goFastdfs'))#goFastdfs配置信息 |
||||
|
class Spider(object): |
||||
|
def __init__(self,url): |
||||
|
self.chromeOptions = self.get_profile() |
||||
|
self.browser = self.get_browser() |
||||
|
self.url = url |
||||
|
def get_profile(self): |
||||
|
chromeOptions = webdriver.ChromeOptions() |
||||
|
chromeOptions.add_argument('--headless') # 谷歌无头模式 |
||||
|
chromeOptions.add_argument('--disable-gpu') # 禁用显卡 |
||||
|
# chromeOptions.add_argument('window-size=1280,800') # 指定浏览器分辨率 |
||||
|
chromeOptions.add_argument("--no-sandbox") |
||||
|
return chromeOptions |
||||
|
|
||||
|
def get_browser(self): |
||||
|
browser = webdriver.Chrome("D:\\工作使用\\zhaoshang\\chromedriver.exe",chrome_options=self.chromeOptions) |
||||
|
return browser |
||||
|
|
||||
|
def _get_page(self,path): |
||||
|
''' |
||||
|
获取页面原格式,写入文件并返回路径 |
||||
|
:param path: |
||||
|
:return: |
||||
|
''' |
||||
|
self.browser.get(self.url) |
||||
|
time.sleep(5) |
||||
|
logger.info("休眠结束") |
||||
|
# 向下偏移了10000个像素,到达底部。 |
||||
|
scrollTop = 10000 |
||||
|
for num in range(1,10): |
||||
|
js = "var q=document.documentElement.scrollTop={}".format(scrollTop*num) |
||||
|
logger.info("第{}次滚动".format(num)) |
||||
|
self.browser.execute_script(js) |
||||
|
time.sleep(5) |
||||
|
# 执行 Chome 开发工具命令,得到mhtml内容 |
||||
|
res = self.browser.execute_cdp_cmd('Page.captureSnapshot', {}) |
||||
|
#获取文章标题 |
||||
|
title = '无标题' |
||||
|
try: |
||||
|
title = self.browser.find_element_by_css_selector("title").get_attribute("textContent") |
||||
|
except Exception as e: |
||||
|
logger.error('获取标题异常----') |
||||
|
traceback.print_exc() |
||||
|
pathName = '{}{}.mhtml'.format(path,title) |
||||
|
with open(pathName, 'w',newline='') as f: |
||||
|
f.write(res['data']) |
||||
|
return pathName,title |
||||
|
if __name__ == '__main__': |
||||
|
#初始化任务队列 |
||||
|
task_queue = queue.Queue() |
||||
|
#跟读kafka线程 |
||||
|
logger.info("开启读取kafka线程---") |
||||
|
t = threading.Thread(target=KafkaConsume, name='LoopThread',args=(kafkaConfig['read_topic'], kafkaConfig['address'], kafkaConfig['group_id'], task_queue,logger)) |
||||
|
t.daemon = True |
||||
|
t.start() |
||||
|
#获取任务执行页面原格式保留 |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
while True: |
||||
|
try: |
||||
|
if task_queue.qsize() >0: |
||||
|
taskStr = task_queue.get() |
||||
|
logger.info('当前任务:{}'.format(taskStr)) |
||||
|
task = json.loads(taskStr) |
||||
|
p1 = u'(https?|ftp|file)://[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#/%=~_|]' |
||||
|
pattern1 = re.compile(p1) |
||||
|
matcher1 = re.search(p1, task['url']) |
||||
|
if matcher1: |
||||
|
l = Spider(task['url']) |
||||
|
pathName,title = l._get_page(goFastdfsConfig['path']) |
||||
|
l.browser.quit() |
||||
|
#gofast 上传,写入kafka |
||||
|
if '404 Not Found' in title: |
||||
|
logger.error('页面404,无效') |
||||
|
resultData = { |
||||
|
'code': 500, |
||||
|
'id': task['id'], |
||||
|
'message': '页面404' |
||||
|
} |
||||
|
kafkaProduce(kafkaConfig['data_topics'], |
||||
|
json.dumps(resultData).encode('utf-8').decode('unicode_escape').encode(), |
||||
|
kafkaConfig['address']) |
||||
|
time.sleep(2) |
||||
|
continue |
||||
|
try: |
||||
|
uploadStr = uploadFile('{}upload'.format(goFastdfsConfig['uploadaddress']),pathName,logger) |
||||
|
uploadJson = json.loads(uploadStr) |
||||
|
except Exception as e: |
||||
|
logger.error('文件上传异常----') |
||||
|
traceback.print_exc() |
||||
|
resultData = { |
||||
|
'code': 500, |
||||
|
'id': task['id'], |
||||
|
'message': '文件上传失败' |
||||
|
} |
||||
|
kafkaProduce(kafkaConfig['data_topics'], |
||||
|
json.dumps(resultData).encode('utf-8').decode('unicode_escape').encode(), |
||||
|
kafkaConfig['address']) |
||||
|
time.sleep(2) |
||||
|
continue |
||||
|
resultData = { |
||||
|
'code':200, |
||||
|
'id':task['id'], |
||||
|
'url':goFastdfsConfig['downloadaddress']+uploadJson['path'], |
||||
|
'title':title, |
||||
|
'delMd5':uploadJson['md5'], |
||||
|
'uploadTime':uploadJson['mtime'], |
||||
|
'message':'成功' |
||||
|
} |
||||
|
kafkaProduce(kafkaConfig['data_topics'],json.dumps(resultData).encode('utf-8').decode('unicode_escape').encode(),kafkaConfig['address']) |
||||
|
logger.info('数据写入成功') |
||||
|
#删除文件 |
||||
|
if (os.path.exists(pathName)): |
||||
|
os.remove(pathName) |
||||
|
logger.info('清除文件:{}'.format(pathName)) |
||||
|
else: |
||||
|
logger.info('要删除的文件不存在:{}'.format(pathName)) |
||||
|
else: |
||||
|
logger.error('非正确url:'.format(task['url'])) |
||||
|
resultData = { |
||||
|
'code': 500, |
||||
|
'id': task['id'], |
||||
|
'message': '非正确url' |
||||
|
} |
||||
|
kafkaProduce(kafkaConfig['data_topics'], |
||||
|
json.dumps(resultData).encode('utf-8').decode('unicode_escape').encode(), |
||||
|
kafkaConfig['address']) |
||||
|
time.sleep(2) |
||||
|
continue |
||||
|
else: |
||||
|
logger.info("暂无任务,进入休眠--") |
||||
|
time.sleep(10) |
||||
|
except Exception as e: |
||||
|
logger.error('未知异常----') |
||||
|
traceback.print_exc() |
||||
|
resultData = { |
||||
|
'code': 500, |
||||
|
'id': task['id'], |
||||
|
'message': '未知异常' |
||||
|
} |
||||
|
kafkaProduce(kafkaConfig['data_topics'], |
||||
|
json.dumps(resultData).encode('utf-8').decode('unicode_escape').encode(), |
||||
|
kafkaConfig['address']) |
||||
|
time.sleep(2) |
||||
|
|
||||
@ -0,0 +1,25 @@ |
|||||
|
#coding:utf8 |
||||
|
import traceback |
||||
|
import json |
||||
|
from kafka import KafkaProducer |
||||
|
from text_analysis.read_config import load_config |
||||
|
config=load_config() |
||||
|
|
||||
|
def send_kafka(data,logging): |
||||
|
try: |
||||
|
producer = None |
||||
|
topic = config["kafka"]["topic"] |
||||
|
data1=json.dumps(data,ensure_ascii=False) |
||||
|
kafkaProduce(topic,bytes(data1, encoding='utf-8')) |
||||
|
logging.info("数据推入kafka!") |
||||
|
|
||||
|
except Exception as e: |
||||
|
logging.info(traceback.format_exc()) |
||||
|
logging.info('写入kafka失败') |
||||
|
|
||||
|
def kafkaProduce(topic,resultData): |
||||
|
producer = KafkaProducer(bootstrap_servers = '{}'.format(config["kafka"]["bootstrap_servers"]),max_request_size=52428800) |
||||
|
topics = topic.split(',') |
||||
|
for tc in topics: |
||||
|
future = producer.send(tc,resultData) |
||||
|
producer.flush() |
||||
@ -0,0 +1,132 @@ |
|||||
|
#coding:utf8 |
||||
|
import re |
||||
|
import json |
||||
|
from jsonpath_ng import jsonpath, parse |
||||
|
|
||||
|
def parse_data(raw_data,url): |
||||
|
all_result = raw_data['data'] |
||||
|
param_split = str(url).split(":") |
||||
|
datasourcestr = all_result[param_split[0]] |
||||
|
datasource = json.loads(datasourcestr) |
||||
|
# 创建 JsonPath 表达式对象 |
||||
|
expr = parse(param_split[1]) |
||||
|
# 使用表达式来选择 JSON 元素 |
||||
|
match = [match.value for match in expr.find(datasource)] |
||||
|
val = match[0] |
||||
|
return val |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
def get_content(inputdata,logging): |
||||
|
""" |
||||
|
重新组装参数 |
||||
|
:param inputdata:原json数据 |
||||
|
:return: 组装的prompt及其他参数 |
||||
|
""" |
||||
|
res={} |
||||
|
admin=inputdata["metadata"]["admin"] |
||||
|
data=inputdata["data"] |
||||
|
prompt=admin["prompt"] |
||||
|
if_user=re.findall("{{(.*)}}",prompt) |
||||
|
if_data=re.findall("@@(.*)@@",prompt) |
||||
|
if if_user != []: |
||||
|
user_data=inputdata["metadata"]["user"] |
||||
|
if if_user[0] in user_data.keys(): |
||||
|
tmp=user_data[if_user[0]] |
||||
|
prompt=re.sub("{{(.*)}}",tmp,prompt) |
||||
|
if if_data!=[] and if_data[0] in data.keys(): |
||||
|
tmp1=data[if_data[0]] |
||||
|
prompt=re.sub("@@(.*)@@",tmp1,prompt) |
||||
|
res["prompt"]=prompt |
||||
|
res["authorization"]=admin["authorization"] |
||||
|
res["model"]=admin["model"] |
||||
|
res["temperature"]=admin["temperature"] |
||||
|
res["authorization"]=admin["authorization"] |
||||
|
res["top_p"]=admin["top_p"] |
||||
|
res["n"]=admin["n"] |
||||
|
return res |
||||
|
|
||||
|
|
||||
|
|
||||
|
if __name__=="__main__": |
||||
|
inputdata={ |
||||
|
"metadata":{ |
||||
|
"output":{ |
||||
|
"output_type":"table", |
||||
|
"label_col":[ |
||||
|
"软件著作抽取结果" |
||||
|
] |
||||
|
}, |
||||
|
"input":{ |
||||
|
"input_type":"text", |
||||
|
"label":[ |
||||
|
"7_软件著作过滤器" |
||||
|
] |
||||
|
}, |
||||
|
"address":"http://172.18.1.181:9011/chatGpt/", |
||||
|
"admin":{ |
||||
|
"authorization":"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", |
||||
|
"top_p":"1", |
||||
|
"user_input":[ |
||||
|
{ |
||||
|
"keyname":"tag", |
||||
|
"keydesc":"" |
||||
|
} |
||||
|
], |
||||
|
"temperature":"0.2", |
||||
|
"model":"gpt-3.5-turbo-16k", |
||||
|
"prompt":"请在下面这句话中提取出:证书号、软件名称、著作权人,以json格式输出,找不到的字段赋值为空字符串,不要有多余的文字输出,只输出json结构。@@7_软件著作过滤器@@", |
||||
|
"n":"1" |
||||
|
}, |
||||
|
"index":1 |
||||
|
}, |
||||
|
"data":{ |
||||
|
"1_项目文件上传":"[{ \"fileUrl\":\"http://172.18.1.130:9985/group33/default/20230816/16/05/1/1-基于时间序列遥感 影像洪涝检测系统.jpg\",\"fileType\":\"jpg\", \"filePath\":\"/软件著作/1-基于时间序列遥感 影像洪涝检测系统.jpg\",\"fileId\":\"cd6592f0389bb1da25afbb44901f9cde\",\"fileName\":\"1-基于时间序列遥感 影像洪涝检测系统.jpg\" },{ \"fileUrl\":\"http://172.18.1.130:9985/group33/default/20230816/16/06/1/2-基于遥感影像的快速变化检测系统.jpg\",\"fileType\":\"jpg\", \"filePath\":\"/软件著作/2-基于遥感影像的快速变化检测系统.jpg\",\"fileId\":\"338847e34904fa96e8834cb220667db8\",\"fileName\":\"2-基于遥感影像的快速变化检测系统.jpg\" },{ \"fileUrl\":\"http://172.18.1.130:9985/group33/default/20230816/16/08/1/3-基于时空模型的遥感时间序列森林火灾检测系统.jpg\",\"fileType\":\"jpg\", \"filePath\":\"/软件著作/1/3-基于时空模型的遥感时间序列森林火灾检测系统.jpg\",\"fileId\":\"944eec1cf98f216ea953459dac4dd505\",\"fileName\":\"3-基于时空模型的遥感时间序列森林火灾检测系统.jpg\" },{ \"fileUrl\":\"http://172.18.1.130:9985/group33/default/20230816/16/09/1/4-基于隐马尔可夫模型的遥感时间序列分类系统.jpg\",\"fileType\":\"jpg\", \"filePath\":\"/软件著作/4-基于隐马尔可夫模型的遥感时间序列分类系统.jpg\",\"fileId\":\"eb378cb9ee914323f601500378dfad76\",\"fileName\":\"4-基于隐马尔可夫模型的遥感时间序列分类系统.jpg\" }]", |
||||
|
"2_文件分类信息":"{\"软件著作\":4}", |
||||
|
"3_OCR识别内容":"{\"content\":\" 22222222222222222222222222222222222222222222222222\\n中华人民共和国国家版权局\\n计算机软件著作权登记证书\\n证书号:软著登字第1623261号\\n软件名称:\\n基于遥感影像的快速变化检测系统\\nV1.0\\n著作权人:中国科学院遥感与数字地球研究所\\n开发完成日期:2016年08月01日\\n首次发表日期:未发表\\n权利取得方式:原始取得\\n权利范围:全部权利\\n登记号:2017SR037977\\n根据《计算机软件保护条例》和《计算机软件著作权登记办法》的\\n规定,经中国版权保护中心审核,对以上事项予以登记\\n计算机软件著作权\\n登记专用章\\n2017年02月10日\\nNo.01433672\",\"fileId\":\"338847e34904fa96e8834cb220667db8\",\"fileName\":\"2-基于遥感影像的快速变化检测系统.jpg\",\"filePath\":\"/软件著作/2-基于遥感影像的快速变化检测系统.jpg\",\"fileType\":\"jpg\",\"fileUrl\":\"http://172.18.1.130:9985/group33/default/20230816/16/06/1/2-基于遥感影像的快速变化检测系统.jpg\",\"pageNum\":1}", |
||||
|
"businessKey":"185aef3b1c810799a6be8314abf6512c", |
||||
|
"7_软件著作过滤器":"{\"content\":\" 22222222222222222222222222222222222222222222222222\\n中华人民共和国国家版权局\\n计算机软件著作权登记证书\\n证书号:软著登字第1623261号\\n软件名称:\\n基于遥感影像的快速变化检测系统\\nV1.0\\n著作权人:中国科学院遥感与数字地球研究所\\n开发完成日期:2016年08月01日\\n首次发表日期:未发表\\n权利取得方式:原始取得\\n权利范围:全部权利\\n登记号:2017SR037977\\n根据《计算机软件保护条例》和《计算机软件著作权登记办法》的\\n规定,经中国版权保护中心审核,对以上事项予以登记\\n计算机软件著作权\\n登记专用章\\n2017年02月10日\\nNo.01433672\",\"fileId\":\"338847e34904fa96e8834cb220667db8\",\"fileName\":\"2-基于遥感影像的快速变化检测系统.jpg\",\"filePath\":\"/软件著作/2-基于遥感影像的快速变化检测系统.jpg\",\"fileType\":\"jpg\",\"fileUrl\":\"http://172.18.1.130:9985/group33/default/20230816/16/06/1/2-基于遥感影像的快速变化检测系统.jpg\",\"pageNum\":1}" |
||||
|
}, |
||||
|
"created":1691004265000, |
||||
|
"module":"OCR", |
||||
|
"start_tag":"false", |
||||
|
"last_edit":1692464331000, |
||||
|
"next_app_id":[ |
||||
|
{ |
||||
|
"start_id":86, |
||||
|
"edge_id":49, |
||||
|
"end_id":90 |
||||
|
} |
||||
|
], |
||||
|
"transfer_id":11, |
||||
|
"blueprint_id":3, |
||||
|
"scenes_id":3, |
||||
|
"scenario":{ |
||||
|
"dataloss":1, |
||||
|
"autoCommitTriggerLast":1, |
||||
|
"maxErrors":3, |
||||
|
"autoCommit":1, |
||||
|
"freshVariables":1 |
||||
|
}, |
||||
|
"wait_condition":[ |
||||
|
|
||||
|
], |
||||
|
"scheduling":{ |
||||
|
"interval":-1, |
||||
|
"type":"single" |
||||
|
}, |
||||
|
"name":"软件著作抽取", |
||||
|
"businessKey":"185aef3b1c810799a6be8314abf6512c", |
||||
|
"id":86, |
||||
|
"describe":"软件著作抽取" |
||||
|
} |
||||
|
a=get_content(inputdata,"") |
||||
|
print(a) |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
@ -0,0 +1,13 @@ |
|||||
|
from django.conf.urls import include, url |
||||
|
from django.contrib import admin |
||||
|
from text_analysis import views |
||||
|
|
||||
|
urlpatterns = [ |
||||
|
|
||||
|
url(r'^ASRNew',views.ASRNew, name='ASRNew'), |
||||
|
# url(r'^mysqlConnection',views.mysqlConnection, name='mysqlConnection'), |
||||
|
# url(r'^mysqlField', views.mysqlField, name='mysqlField') |
||||
|
|
||||
|
] |
||||
|
|
||||
|
|
||||
@ -0,0 +1,268 @@ |
|||||
|
# coding:utf8 |
||||
|
import os, sys |
||||
|
import io |
||||
|
from jsonpath_ng import jsonpath, parse |
||||
|
import uuid |
||||
|
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8') |
||||
|
cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd() |
||||
|
par_dir = os.path.abspath(os.path.join(cur_dir, os.path.pardir)) |
||||
|
sys.path.append(cur_dir) |
||||
|
sys.path.append(par_dir) |
||||
|
import json |
||||
|
from django.http import HttpResponse |
||||
|
from text_analysis.tools import to_kafka |
||||
|
from django.views.decorators.csrf import csrf_exempt |
||||
|
from log_util.set_logger import set_logger |
||||
|
|
||||
|
logging = set_logger('logs/results.log') |
||||
|
import traceback |
||||
|
import queue |
||||
|
import requests |
||||
|
from text_analysis.tools.tool import parse_data |
||||
|
import time |
||||
|
from datetime import datetime |
||||
|
import os |
||||
|
from kazoo.client import KazooClient |
||||
|
from kazoo.protocol.states import EventType |
||||
|
# 任务队列 |
||||
|
import queue |
||||
|
task_queue = queue.PriorityQueue() |
||||
|
# 数据队列 |
||||
|
data_queue = queue.Queue() |
||||
|
stop_dict={} |
||||
|
from text_analysis.read_config import load_config |
||||
|
config=load_config() |
||||
|
|
||||
|
|
||||
|
@csrf_exempt |
||||
|
def ASRNew(request): |
||||
|
if request.method == 'POST': |
||||
|
try: |
||||
|
raw_data = json.loads(request.body) |
||||
|
if "trace" in raw_data.keys() and raw_data["trace"]==True: |
||||
|
task_queue.put((-1,time.time(), raw_data)) |
||||
|
else: |
||||
|
task_queue.put((1, time.time(),raw_data)) |
||||
|
return HttpResponse(json.dumps({"code": 1, "msg": "请求正常!"}, ensure_ascii=False)) |
||||
|
except: |
||||
|
logging.error(traceback.format_exc()) |
||||
|
return HttpResponse(json.dumps({"code": 0, "msg": "请求json格式不正确!"}, ensure_ascii=False)) |
||||
|
else: |
||||
|
return HttpResponse(json.dumps({"code": 0, "msg": "请求方式错误,改为post请求"}, ensure_ascii=False)) |
||||
|
|
||||
|
|
||||
|
def upload(): |
||||
|
while True: |
||||
|
try: |
||||
|
if task_queue.qsize()>0: |
||||
|
p,t,raw_data = task_queue.get(timeout=1) |
||||
|
logging.info("当前任务队列长度{}".format(task_queue.qsize()+1)) |
||||
|
output=raw_data["output"] |
||||
|
res_tmp={key: "" for key in output} |
||||
|
if "id" in res_tmp.keys(): |
||||
|
res_tmp["id"]=str(uuid.uuid4()) |
||||
|
logging.info("任务数据为:{}".format(raw_data)) |
||||
|
logging.info("当前version信息为:{}".format(stop_dict)) |
||||
|
task_id=raw_data["scenes_id"] |
||||
|
task_version=raw_data["version"] |
||||
|
if task_id in stop_dict.keys() and task_version!=stop_dict[task_id]["version"]: |
||||
|
logging.info("已暂停任务上传,过滤掉。{}".format(raw_data)) |
||||
|
continue |
||||
|
|
||||
|
url=raw_data["input"]["fileUrl"] |
||||
|
if "json" in url: |
||||
|
parm = url.split("#") |
||||
|
data1 = parse_data(raw_data, parm[0]) |
||||
|
data1_json = json.loads(data1) |
||||
|
expr = parse(parm[2]) |
||||
|
match = [match.value for match in expr.find(data1_json)] |
||||
|
video_url = match[0] |
||||
|
else: |
||||
|
video_url = parse_data(raw_data, url) |
||||
|
fileName=video_url.rsplit('/')[-1] |
||||
|
if "http" not in video_url: |
||||
|
file = config["gofast"]["url"] + video_url.lstrip("/") |
||||
|
else: |
||||
|
file=video_url |
||||
|
currentFile={"fileName":fileName,"fileUrl":file} |
||||
|
language = raw_data["input"]["fromLanguage"] |
||||
|
# 从gofast获取视频 |
||||
|
myfile = requests.get(file) |
||||
|
starttime = datetime.now().strftime('%Y-%m-%d') |
||||
|
path = 'inputdata/' + starttime |
||||
|
if not os.path.exists(path): |
||||
|
os.makedirs(path) |
||||
|
with open(path + '/' + fileName, 'wb') as f: |
||||
|
f.write(myfile.content) |
||||
|
logging.info("视频从gofast下载完毕,开始上传-{}".format(fileName)) |
||||
|
# 访问视频上传接口 |
||||
|
# video=1视频,0音频。 |
||||
|
video=1 |
||||
|
if fileName[-3:]=="m4a" or fileName[-3:]=="mp3" or fileName[-3:]=="wav": |
||||
|
# url="https://realtime.pdeepmatrix.com/apis/file/asr/upload" |
||||
|
url=config["asr"]["mp3_upload"] |
||||
|
video=0 |
||||
|
else: |
||||
|
# url = "https://realtime.pdeepmatrix.com/apis/media/analysis/upload" |
||||
|
url = config["asr"]["video_upload"] |
||||
|
data = {'fromLanguage': language} |
||||
|
f = open(path + '/' + fileName, 'rb') |
||||
|
files = {'file': f} |
||||
|
response = requests.post(url, data=data, files=files,verify=False) |
||||
|
logging.info("上传后接口返回值:{}-{}".format(response,response.text)) |
||||
|
d = json.loads(response.text) |
||||
|
if "code" in d.keys() and d["code"] == 200: |
||||
|
# 接口返回值data中存放视频获取结果的key |
||||
|
result = d["data"] |
||||
|
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": "", "dataKey": result,"video":video,"file":currentFile} |
||||
|
data_queue.put(raw_data) |
||||
|
logging.info("视频上传成功{}".format(raw_data)) |
||||
|
# to_kafka.send_kafka(raw_data,logging) |
||||
|
else: |
||||
|
logging.info("视频上传失败{}-{}".format(raw_data, d)) |
||||
|
f.close() |
||||
|
# Todo删除视频文件 |
||||
|
else: |
||||
|
# 暂无任务,进入休眠 |
||||
|
time.sleep(10) |
||||
|
except: |
||||
|
raw_data["result"]={} |
||||
|
raw_data["result"]["successCode"] = "0" |
||||
|
raw_data["result"]["status"]=2 |
||||
|
raw_data["result"]["message"]="视频/音频上传异常" |
||||
|
raw_data["result"]["errorLog"] = traceback.format_exc() |
||||
|
raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False) |
||||
|
logging.error(traceback.format_exc()) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
|
||||
|
|
||||
|
def getResult(): |
||||
|
while True: |
||||
|
# 3秒钟结果获取一次 |
||||
|
time.sleep(3) |
||||
|
try: |
||||
|
if data_queue.qsize() > 0: |
||||
|
logging.info("取数据队列长度{}".format(data_queue.qsize())) |
||||
|
raw_data = data_queue.get() |
||||
|
logging.info("任务数据为:{}".format(raw_data)) |
||||
|
task_id=raw_data["scenes_id"] |
||||
|
task_version=raw_data["version"] |
||||
|
if task_id in stop_dict.keys() and task_version!=stop_dict[task_id]["version"]: |
||||
|
logging.info("已暂停获取结果任务,过滤掉。{}".format(raw_data)) |
||||
|
continue |
||||
|
output=raw_data["output"] |
||||
|
res_tmp={key: "" for key in output} |
||||
|
if "id" in res_tmp.keys(): |
||||
|
res_tmp["id"]=str(uuid.uuid4()) |
||||
|
res_tmp["isLast"]=1 |
||||
|
res_tmp["fileName"]=raw_data["result"]["file"]["fileName"] |
||||
|
# 根据视频key访问获取结果接口 |
||||
|
dataKey = raw_data["result"]["dataKey"] |
||||
|
params = {'taskId': dataKey} |
||||
|
language = raw_data["input"]["fromLanguage"] |
||||
|
data = {'fromLanguage': language,'taskId': dataKey} |
||||
|
if raw_data["result"]["video"]==1: |
||||
|
#url="https://realtime.pdeepmatrix.com/apis/media/analysis/getResult" |
||||
|
# url = "http://172.18.1.155:6611/apis/media/analysis/getResult?taskId={}".format(dataKey) |
||||
|
url=config["asr"]["video_getResult"]+"?taskId={}".format(dataKey) |
||||
|
response = requests.get(url, verify=False) |
||||
|
else: |
||||
|
# url ="https://realtime.pdeepmatrix.com/apis/file/asr/getResult" |
||||
|
# url ="http://172.18.1.155:6611/apis/file/asr/getResult" |
||||
|
url=config["asr"]["mp3_getResult"] |
||||
|
response = requests.post(url, data=data, verify=False) |
||||
|
|
||||
|
logging.info("ASR网站返回值:{}-{}".format(response,response.text)) |
||||
|
d = json.loads(response.text) |
||||
|
if "code" in d.keys() and d["code"] == 200: |
||||
|
results = "" |
||||
|
if d["data"]["code"] == "1" and d["data"]["sentences"]: |
||||
|
for sentence in d["data"]["sentences"]: |
||||
|
if results: |
||||
|
results += ' ' + sentence["text"] |
||||
|
else: |
||||
|
results = sentence["text"] |
||||
|
if "content" in res_tmp.keys(): |
||||
|
res_tmp["content"]=results |
||||
|
raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False) |
||||
|
raw_data["result"]["status"]=1 |
||||
|
raw_data["result"]["message"]="成功" |
||||
|
logging.info("视频解析获取结果成功{}".format(raw_data)) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
elif d["data"]["code"] == "1" and not d["data"]["sentences"]: |
||||
|
results ="" |
||||
|
if "content" in res_tmp.keys(): |
||||
|
res_tmp["content"]=results |
||||
|
raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False) |
||||
|
raw_data["result"]["status"]=1 |
||||
|
raw_data["result"]["message"]="成功" |
||||
|
logging.info("视频解析获取结果成功{}".format(raw_data)) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
elif d["data"]["code"] == "0": |
||||
|
# 正在解析中,将任务再次放回数据队列 |
||||
|
data_queue.put(raw_data) |
||||
|
logging.info("视频未解析完毕,放回队列等待{}-{}".format(raw_data, d)) |
||||
|
else: |
||||
|
# 解析失败 |
||||
|
raw_data["result"]["successCode"] = "0" |
||||
|
raw_data["result"]["errorLog"] = response.text |
||||
|
raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False) |
||||
|
raw_data["result"]["status"]=2 |
||||
|
raw_data["result"]["message"]="视频/音频解析异常" |
||||
|
logging.info("视频解析获取结果失败,数据{},接口返回值{}".format(raw_data, d)) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
else: |
||||
|
raw_data["result"]["successCode"] = "0" |
||||
|
raw_data["result"]["errorLog"] = response.text |
||||
|
raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False) |
||||
|
raw_data["result"]["status"] = 2 |
||||
|
raw_data["result"]["message"] = "视频/音频解析异常" |
||||
|
logging.info("视频解析获取结果失败,数据{},接口返回值{}".format(raw_data, d)) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
else: |
||||
|
# 暂无任务,进入休眠 |
||||
|
time.sleep(10) |
||||
|
except: |
||||
|
raw_data["result"]["successCode"] = "0" |
||||
|
raw_data["result"]["errorLog"] = traceback.format_exc() |
||||
|
raw_data["result"]["status"] = 2 |
||||
|
raw_data["result"]["message"] = "视频/音频解析异常" |
||||
|
raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False) |
||||
|
logging.error(traceback.format_exc()) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
|
||||
|
|
||||
|
def zk_monitoring(): |
||||
|
try: |
||||
|
#线上环境 |
||||
|
zk = KazooClient(hosts=config['zookeeper']['zkhost']) |
||||
|
#测试环境 |
||||
|
# zk = KazooClient(hosts='172.16.12.55:2181,172.16.12.56:2181,172.16.12.57:2181') |
||||
|
zk.start() |
||||
|
# 设置监听器 |
||||
|
@zk.DataWatch("/analyze") |
||||
|
def watch_node(data, stat, event): |
||||
|
if event is not None and event.type == EventType.CHANGED: |
||||
|
data, stat = zk.get("/analyze") |
||||
|
# logging.info("执行删除操作:{}".format(data)) |
||||
|
try: |
||||
|
d = json.loads(data) |
||||
|
id = d["scenes_id"] |
||||
|
stop_dict[id] = {} |
||||
|
stop_dict[id]["version"] = d["version"] |
||||
|
stop_dict[id]["operation"] = d["operation"] |
||||
|
except: |
||||
|
pass |
||||
|
# 保持程序运行以监听节点变化 |
||||
|
try: |
||||
|
while True: |
||||
|
time.sleep(1) |
||||
|
except: |
||||
|
logging.info("Stopping...") |
||||
|
# 关闭连接 |
||||
|
zk.stop() |
||||
|
zk.close() |
||||
|
except: |
||||
|
logging.error(traceback.format_exc()) |
||||
|
|
||||
|
|
||||
@ -0,0 +1,266 @@ |
|||||
|
# coding:utf8 |
||||
|
import os, sys |
||||
|
import io |
||||
|
from jsonpath_ng import jsonpath, parse |
||||
|
import uuid |
||||
|
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8') |
||||
|
cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd() |
||||
|
par_dir = os.path.abspath(os.path.join(cur_dir, os.path.pardir)) |
||||
|
sys.path.append(cur_dir) |
||||
|
sys.path.append(par_dir) |
||||
|
import json |
||||
|
from django.http import HttpResponse |
||||
|
from text_analysis.tools import to_kafka |
||||
|
from django.views.decorators.csrf import csrf_exempt |
||||
|
from log_util.set_logger import set_logger |
||||
|
|
||||
|
logging = set_logger('logs/results.log') |
||||
|
import traceback |
||||
|
import queue |
||||
|
import requests |
||||
|
from text_analysis.tools.tool import parse_data |
||||
|
import time |
||||
|
from datetime import datetime |
||||
|
import os |
||||
|
from kazoo.client import KazooClient |
||||
|
from kazoo.protocol.states import EventType |
||||
|
# 任务队列 |
||||
|
# global task_queue |
||||
|
task_queue = queue.Queue() |
||||
|
# 数据队列 |
||||
|
# global data_queue |
||||
|
data_queue = queue.Queue() |
||||
|
stop_dict={} |
||||
|
|
||||
|
@csrf_exempt |
||||
|
def ASRNew(request): |
||||
|
if request.method == 'POST': |
||||
|
try: |
||||
|
raw_data = json.loads(request.body) |
||||
|
task_queue.put(raw_data) |
||||
|
return HttpResponse(json.dumps({"code": 1, "msg": "请求正常!"}, ensure_ascii=False)) |
||||
|
except: |
||||
|
logging.error(traceback.format_exc()) |
||||
|
return HttpResponse(json.dumps({"code": 0, "msg": "请求json格式不正确!"}, ensure_ascii=False)) |
||||
|
else: |
||||
|
return HttpResponse(json.dumps({"code": 0, "msg": "请求方式错误,改为post请求"}, ensure_ascii=False)) |
||||
|
|
||||
|
|
||||
|
def upload(): |
||||
|
while True: |
||||
|
try: |
||||
|
if task_queue.qsize() > 0: |
||||
|
logging.info("取任务队列长度{}".format(task_queue.qsize())) |
||||
|
raw_data = task_queue.get() |
||||
|
output=raw_data["output"] |
||||
|
res_tmp={key: "" for key in output} |
||||
|
if "id" in res_tmp.keys(): |
||||
|
res_tmp["id"]=str(uuid.uuid4()) |
||||
|
logging.info("任务数据为:{}".format(raw_data)) |
||||
|
logging.info("当前version信息为:{}".format(stop_dict)) |
||||
|
task_id=raw_data["scenes_id"] |
||||
|
task_version=raw_data["version"] |
||||
|
if task_id in stop_dict.keys() and task_version!=stop_dict[task_id]["version"]: |
||||
|
logging.info("已暂停任务上传,过滤掉。{}".format(raw_data)) |
||||
|
continue |
||||
|
|
||||
|
url=raw_data["input"]["fileUrl"] |
||||
|
if "json" in url: |
||||
|
parm = url.split("#") |
||||
|
data1 = parse_data(raw_data, parm[0]) |
||||
|
data1_json = json.loads(data1) |
||||
|
expr = parse(parm[2]) |
||||
|
match = [match.value for match in expr.find(data1_json)] |
||||
|
video_url = match[0] |
||||
|
else: |
||||
|
video_url = parse_data(raw_data, url) |
||||
|
fileName=video_url.rsplit('/')[-1] |
||||
|
if "http" not in video_url: |
||||
|
file = "https://caiji.percent.cn/" + video_url.lstrip("/") |
||||
|
else: |
||||
|
file=video_url |
||||
|
# name=raw_data["metadata"]["admin"]["fileName"] |
||||
|
# if '$.' in name: |
||||
|
# # json.path表达式动态获取value |
||||
|
# datasources = str(name).split(':') |
||||
|
# # 0是数据源,1是JsonPath 表达式 |
||||
|
# datasourcestr = raw_data["data"][datasources[0]] |
||||
|
# datasource = json.loads(datasourcestr) |
||||
|
# # 创建 JsonPath 表达式对象 |
||||
|
# expr = parse(datasources[1]) |
||||
|
# # 使用表达式来选择 JSON 元素 |
||||
|
# match = [match.value for match in expr.find(datasource)] |
||||
|
# fileName = match[0] |
||||
|
|
||||
|
currentFile={"fileName":fileName,"fileUrl":file} |
||||
|
language = raw_data["input"]["fromLanguage"] |
||||
|
# 从gofast获取视频 |
||||
|
myfile = requests.get(file) |
||||
|
starttime = datetime.now().strftime('%Y-%m-%d') |
||||
|
path = 'inputdata/' + starttime |
||||
|
if not os.path.exists(path): |
||||
|
os.makedirs(path) |
||||
|
with open(path + '/' + fileName, 'wb') as f: |
||||
|
f.write(myfile.content) |
||||
|
logging.info("视频从gofast下载完毕,开始上传-{}".format(fileName)) |
||||
|
# 访问视频上传接口 |
||||
|
# video=1视频,0音频。 |
||||
|
video=1 |
||||
|
if fileName[-3:]=="m4a" or fileName[-3:]=="mp3" or fileName[-3:]=="wav": |
||||
|
url="https://realtime.pdeepmatrix.com/apis/file/asr/upload" |
||||
|
video=0 |
||||
|
else: |
||||
|
url = "https://realtime.pdeepmatrix.com/apis/media/analysis/upload" |
||||
|
data = {'fromLanguage': language} |
||||
|
f = open(path + '/' + fileName, 'rb') |
||||
|
files = {'file': f} |
||||
|
response = requests.post(url, data=data, files=files,verify=False) |
||||
|
logging.info("上传后接口返回值:{}-{}".format(response,response.text)) |
||||
|
d = json.loads(response.text) |
||||
|
if "code" in d.keys() and d["code"] == 200: |
||||
|
# 接口返回值data中存放视频获取结果的key |
||||
|
result = d["data"] |
||||
|
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": "", "dataKey": result,"video":video,"file":currentFile} |
||||
|
data_queue.put(raw_data) |
||||
|
logging.info("视频上传成功{}".format(raw_data)) |
||||
|
# to_kafka.send_kafka(raw_data,logging) |
||||
|
else: |
||||
|
logging.info("视频上传失败{}-{}".format(raw_data, d)) |
||||
|
f.close() |
||||
|
# Todo删除视频文件 |
||||
|
else: |
||||
|
# 暂无任务,进入休眠 |
||||
|
time.sleep(10) |
||||
|
except: |
||||
|
raw_data["result"]={} |
||||
|
raw_data["result"]["successCode"] = "0" |
||||
|
raw_data["result"]["status"]=2 |
||||
|
raw_data["result"]["message"]="视频/音频上传异常" |
||||
|
raw_data["result"]["errorLog"] = traceback.format_exc() |
||||
|
raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False) |
||||
|
logging.error(traceback.format_exc()) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
|
||||
|
|
||||
|
def getResult(): |
||||
|
while True: |
||||
|
# 3秒钟结果获取一次 |
||||
|
time.sleep(3) |
||||
|
try: |
||||
|
if data_queue.qsize() > 0: |
||||
|
logging.info("取数据队列长度{}".format(data_queue.qsize())) |
||||
|
raw_data = data_queue.get() |
||||
|
logging.info("任务数据为:{}".format(raw_data)) |
||||
|
task_id=raw_data["scenes_id"] |
||||
|
task_version=raw_data["version"] |
||||
|
if task_id in stop_dict.keys() and task_version!=stop_dict[task_id]["version"]: |
||||
|
logging.info("已暂停获取结果任务,过滤掉。{}".format(raw_data)) |
||||
|
continue |
||||
|
output=raw_data["output"] |
||||
|
res_tmp={key: "" for key in output} |
||||
|
if "id" in res_tmp.keys(): |
||||
|
res_tmp["id"]=str(uuid.uuid4()) |
||||
|
res_tmp["isLast"]=1 |
||||
|
res_tmp["fileName"]=raw_data["result"]["file"]["fileName"] |
||||
|
# 根据视频key访问获取结果接口 |
||||
|
dataKey = raw_data["result"]["dataKey"] |
||||
|
params = {'taskId': dataKey} |
||||
|
language = raw_data["input"]["fromLanguage"] |
||||
|
data = {'fromLanguage': language,'taskId': dataKey} |
||||
|
if raw_data["result"]["video"]==1: |
||||
|
url = "https://realtime.pdeepmatrix.com/apis/media/analysis/getResult" |
||||
|
response = requests.get(url, params=params, verify=False) |
||||
|
else: |
||||
|
url ="https://realtime.pdeepmatrix.com/apis/file/asr/getResult" |
||||
|
response = requests.post(url, data=data, verify=False) |
||||
|
logging.info("ASR网站返回值:{}-{}".format(response,response.text)) |
||||
|
d = json.loads(response.text) |
||||
|
if "code" in d.keys() and d["code"] == 200: |
||||
|
results = "" |
||||
|
if d["data"]["code"] == "1" and d["data"]["sentences"]: |
||||
|
for sentence in d["data"]["sentences"]: |
||||
|
if results: |
||||
|
results += ' ' + sentence["text"] |
||||
|
else: |
||||
|
results = sentence["text"] |
||||
|
if "content" in res_tmp.keys(): |
||||
|
res_tmp["content"]=results |
||||
|
raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False) |
||||
|
raw_data["result"]["status"]=1 |
||||
|
raw_data["result"]["message"]="成功" |
||||
|
logging.info("视频解析获取结果成功{}".format(raw_data)) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
elif d["data"]["code"] == "1" and not d["data"]["sentences"]: |
||||
|
results ="" |
||||
|
if "content" in res_tmp.keys(): |
||||
|
res_tmp["content"]=results |
||||
|
raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False) |
||||
|
raw_data["result"]["status"]=1 |
||||
|
raw_data["result"]["message"]="成功" |
||||
|
logging.info("视频解析获取结果成功{}".format(raw_data)) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
elif d["data"]["code"] == "0": |
||||
|
# 正在解析中,将任务再次放回数据队列 |
||||
|
data_queue.put(raw_data) |
||||
|
logging.info("视频未解析完毕,放回队列等待{}-{}".format(raw_data, d)) |
||||
|
else: |
||||
|
# 解析失败 |
||||
|
raw_data["result"]["successCode"] = "0" |
||||
|
raw_data["result"]["errorLog"] = response.text |
||||
|
raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False) |
||||
|
raw_data["result"]["status"]=2 |
||||
|
raw_data["result"]["message"]="视频/音频解析异常" |
||||
|
logging.info("视频解析获取结果失败,数据{},接口返回值{}".format(raw_data, d)) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
else: |
||||
|
raw_data["result"]["successCode"] = "0" |
||||
|
raw_data["result"]["errorLog"] = response.text |
||||
|
raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False) |
||||
|
raw_data["result"]["status"] = 2 |
||||
|
raw_data["result"]["message"] = "视频/音频解析异常" |
||||
|
logging.info("视频解析获取结果失败,数据{},接口返回值{}".format(raw_data, d)) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
else: |
||||
|
# 暂无任务,进入休眠 |
||||
|
time.sleep(10) |
||||
|
except: |
||||
|
raw_data["result"]["successCode"] = "0" |
||||
|
raw_data["result"]["errorLog"] = traceback.format_exc() |
||||
|
raw_data["result"]["status"] = 2 |
||||
|
raw_data["result"]["message"] = "视频/音频解析异常" |
||||
|
raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False) |
||||
|
logging.error(traceback.format_exc()) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
|
||||
|
|
||||
|
def zk_monitoring(): |
||||
|
try: |
||||
|
#线上环境 |
||||
|
zk = KazooClient(hosts='172.18.1.146:2181,172.18.1.147:2181,172.18.1.148:2181') |
||||
|
#测试环境 |
||||
|
# zk = KazooClient(hosts='172.16.12.55:2181,172.16.12.56:2181,172.16.12.57:2181') |
||||
|
zk.start() |
||||
|
# 设置监听器 |
||||
|
@zk.DataWatch("/analyze") |
||||
|
def watch_node(data, stat, event): |
||||
|
if event is not None and event.type == EventType.CHANGED: |
||||
|
data, stat = zk.get("/analyze") |
||||
|
logging.info("执行删除操作:{}".format(data)) |
||||
|
d = json.loads(data) |
||||
|
id = d["scenes_id"] |
||||
|
stop_dict[id] = {} |
||||
|
stop_dict[id]["version"] = d["version"] |
||||
|
stop_dict[id]["operation"] = d["operation"] |
||||
|
# 保持程序运行以监听节点变化 |
||||
|
try: |
||||
|
while True: |
||||
|
time.sleep(1) |
||||
|
except: |
||||
|
logging.info("Stopping...") |
||||
|
# 关闭连接 |
||||
|
zk.stop() |
||||
|
zk.close() |
||||
|
except: |
||||
|
logging.error(traceback.format_exc()) |
||||
|
|
||||
|
|
||||
@ -0,0 +1,271 @@ |
|||||
|
# coding:utf8 |
||||
|
import os, sys |
||||
|
import io |
||||
|
from jsonpath_ng import jsonpath, parse |
||||
|
import uuid |
||||
|
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8') |
||||
|
cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd() |
||||
|
par_dir = os.path.abspath(os.path.join(cur_dir, os.path.pardir)) |
||||
|
sys.path.append(cur_dir) |
||||
|
sys.path.append(par_dir) |
||||
|
import json |
||||
|
from django.http import HttpResponse |
||||
|
from text_analysis.tools import to_kafka |
||||
|
from django.views.decorators.csrf import csrf_exempt |
||||
|
from log_util.set_logger import set_logger |
||||
|
|
||||
|
logging = set_logger('logs/results.log') |
||||
|
import traceback |
||||
|
import queue |
||||
|
import requests |
||||
|
from text_analysis.tools.tool import parse_data |
||||
|
import time |
||||
|
from datetime import datetime |
||||
|
import os |
||||
|
from kazoo.client import KazooClient |
||||
|
from kazoo.protocol.states import EventType |
||||
|
# 任务队列 |
||||
|
# global task_queue |
||||
|
task_queue = queue.Queue() |
||||
|
# 数据队列 |
||||
|
# global data_queue |
||||
|
data_queue = queue.Queue() |
||||
|
stop_dict={} |
||||
|
|
||||
|
@csrf_exempt |
||||
|
def ASRNew(request): |
||||
|
if request.method == 'POST': |
||||
|
try: |
||||
|
raw_data = json.loads(request.body) |
||||
|
task_queue.put(raw_data) |
||||
|
return HttpResponse(json.dumps({"code": 1, "msg": "请求正常!"}, ensure_ascii=False)) |
||||
|
except: |
||||
|
logging.error(traceback.format_exc()) |
||||
|
return HttpResponse(json.dumps({"code": 0, "msg": "请求json格式不正确!"}, ensure_ascii=False)) |
||||
|
else: |
||||
|
return HttpResponse(json.dumps({"code": 0, "msg": "请求方式错误,改为post请求"}, ensure_ascii=False)) |
||||
|
|
||||
|
|
||||
|
def upload(): |
||||
|
while True: |
||||
|
try: |
||||
|
if task_queue.qsize() > 0: |
||||
|
logging.info("取任务队列长度{}".format(task_queue.qsize())) |
||||
|
raw_data = task_queue.get() |
||||
|
output=raw_data["output"] |
||||
|
res_tmp={key: "" for key in output} |
||||
|
if "id" in res_tmp.keys(): |
||||
|
res_tmp["id"]=str(uuid.uuid4()) |
||||
|
logging.info("任务数据为:{}".format(raw_data)) |
||||
|
logging.info("当前version信息为:{}".format(stop_dict)) |
||||
|
task_id=raw_data["scenes_id"] |
||||
|
task_version=raw_data["version"] |
||||
|
if task_id in stop_dict.keys() and task_version!=stop_dict[task_id]["version"]: |
||||
|
logging.info("已暂停任务上传,过滤掉。{}".format(raw_data)) |
||||
|
continue |
||||
|
|
||||
|
url=raw_data["input"]["fileUrl"] |
||||
|
if "json" in url: |
||||
|
parm = url.split("#") |
||||
|
data1 = parse_data(raw_data, parm[0]) |
||||
|
data1_json = json.loads(data1) |
||||
|
expr = parse(parm[2]) |
||||
|
match = [match.value for match in expr.find(data1_json)] |
||||
|
video_url = match[0] |
||||
|
else: |
||||
|
video_url = parse_data(raw_data, url) |
||||
|
fileName=video_url.rsplit('/')[-1] |
||||
|
if "http" not in video_url: |
||||
|
file = "https://caiji.percent.cn/" + video_url.lstrip("/") |
||||
|
else: |
||||
|
file=video_url |
||||
|
# name=raw_data["metadata"]["admin"]["fileName"] |
||||
|
# if '$.' in name: |
||||
|
# # json.path表达式动态获取value |
||||
|
# datasources = str(name).split(':') |
||||
|
# # 0是数据源,1是JsonPath 表达式 |
||||
|
# datasourcestr = raw_data["data"][datasources[0]] |
||||
|
# datasource = json.loads(datasourcestr) |
||||
|
# # 创建 JsonPath 表达式对象 |
||||
|
# expr = parse(datasources[1]) |
||||
|
# # 使用表达式来选择 JSON 元素 |
||||
|
# match = [match.value for match in expr.find(datasource)] |
||||
|
# fileName = match[0] |
||||
|
|
||||
|
currentFile={"fileName":fileName,"fileUrl":file} |
||||
|
language = raw_data["input"]["fromLanguage"] |
||||
|
# 从gofast获取视频 |
||||
|
myfile = requests.get(file) |
||||
|
starttime = datetime.now().strftime('%Y-%m-%d') |
||||
|
path = 'inputdata/' + starttime |
||||
|
if not os.path.exists(path): |
||||
|
os.makedirs(path) |
||||
|
with open(path + '/' + fileName, 'wb') as f: |
||||
|
f.write(myfile.content) |
||||
|
logging.info("视频从gofast下载完毕,开始上传-{}".format(fileName)) |
||||
|
# 访问视频上传接口 |
||||
|
# video=1视频,0音频。 |
||||
|
video=1 |
||||
|
if fileName[-3:]=="m4a" or fileName[-3:]=="mp3" or fileName[-3:]=="wav": |
||||
|
# url="https://realtime.pdeepmatrix.com/apis/file/asr/upload" |
||||
|
url="http://172.18.1.155:6611/apis/file/asr/upload " |
||||
|
video=0 |
||||
|
else: |
||||
|
# url = "https://realtime.pdeepmatrix.com/apis/media/analysis/upload" |
||||
|
url = "http://172.18.1.155:6611/apis/media/analysis/upload" |
||||
|
data = {'fromLanguage': language} |
||||
|
f = open(path + '/' + fileName, 'rb') |
||||
|
files = {'file': f} |
||||
|
response = requests.post(url, data=data, files=files,verify=False) |
||||
|
logging.info("上传后接口返回值:{}-{}".format(response,response.text)) |
||||
|
d = json.loads(response.text) |
||||
|
if "code" in d.keys() and d["code"] == 200: |
||||
|
# 接口返回值data中存放视频获取结果的key |
||||
|
result = d["data"] |
||||
|
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": "", "dataKey": result,"video":video,"file":currentFile} |
||||
|
data_queue.put(raw_data) |
||||
|
logging.info("视频上传成功{}".format(raw_data)) |
||||
|
# to_kafka.send_kafka(raw_data,logging) |
||||
|
else: |
||||
|
logging.info("视频上传失败{}-{}".format(raw_data, d)) |
||||
|
f.close() |
||||
|
# Todo删除视频文件 |
||||
|
else: |
||||
|
# 暂无任务,进入休眠 |
||||
|
time.sleep(10) |
||||
|
except: |
||||
|
raw_data["result"]={} |
||||
|
raw_data["result"]["successCode"] = "0" |
||||
|
raw_data["result"]["status"]=2 |
||||
|
raw_data["result"]["message"]="视频/音频上传异常" |
||||
|
raw_data["result"]["errorLog"] = traceback.format_exc() |
||||
|
raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False) |
||||
|
logging.error(traceback.format_exc()) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
|
||||
|
|
||||
|
def getResult(): |
||||
|
while True: |
||||
|
# 3秒钟结果获取一次 |
||||
|
time.sleep(3) |
||||
|
try: |
||||
|
if data_queue.qsize() > 0: |
||||
|
logging.info("取数据队列长度{}".format(data_queue.qsize())) |
||||
|
raw_data = data_queue.get() |
||||
|
logging.info("任务数据为:{}".format(raw_data)) |
||||
|
task_id=raw_data["scenes_id"] |
||||
|
task_version=raw_data["version"] |
||||
|
if task_id in stop_dict.keys() and task_version!=stop_dict[task_id]["version"]: |
||||
|
logging.info("已暂停获取结果任务,过滤掉。{}".format(raw_data)) |
||||
|
continue |
||||
|
output=raw_data["output"] |
||||
|
res_tmp={key: "" for key in output} |
||||
|
if "id" in res_tmp.keys(): |
||||
|
res_tmp["id"]=str(uuid.uuid4()) |
||||
|
res_tmp["isLast"]=1 |
||||
|
res_tmp["fileName"]=raw_data["result"]["file"]["fileName"] |
||||
|
# 根据视频key访问获取结果接口 |
||||
|
dataKey = raw_data["result"]["dataKey"] |
||||
|
params = {'taskId': dataKey} |
||||
|
language = raw_data["input"]["fromLanguage"] |
||||
|
data = {'fromLanguage': language,'taskId': dataKey} |
||||
|
if raw_data["result"]["video"]==1: |
||||
|
#url="https://realtime.pdeepmatrix.com/apis/media/analysis/getResult" |
||||
|
url = "http://172.18.1.155:6611/apis/media/analysis/getResult?taskId={}".format(dataKey) |
||||
|
response = requests.get(url, verify=False) |
||||
|
else: |
||||
|
# url ="https://realtime.pdeepmatrix.com/apis/file/asr/getResult" |
||||
|
url ="http://172.18.1.155:6611/apis/file/asr/getResult" |
||||
|
response = requests.post(url, data=data, verify=False) |
||||
|
|
||||
|
logging.info("ASR网站返回值:{}-{}".format(response,response.text)) |
||||
|
d = json.loads(response.text) |
||||
|
if "code" in d.keys() and d["code"] == 200: |
||||
|
results = "" |
||||
|
if d["data"]["code"] == "1" and d["data"]["sentences"]: |
||||
|
for sentence in d["data"]["sentences"]: |
||||
|
if results: |
||||
|
results += ' ' + sentence["text"] |
||||
|
else: |
||||
|
results = sentence["text"] |
||||
|
if "content" in res_tmp.keys(): |
||||
|
res_tmp["content"]=results |
||||
|
raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False) |
||||
|
raw_data["result"]["status"]=1 |
||||
|
raw_data["result"]["message"]="成功" |
||||
|
logging.info("视频解析获取结果成功{}".format(raw_data)) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
elif d["data"]["code"] == "1" and not d["data"]["sentences"]: |
||||
|
results ="" |
||||
|
if "content" in res_tmp.keys(): |
||||
|
res_tmp["content"]=results |
||||
|
raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False) |
||||
|
raw_data["result"]["status"]=1 |
||||
|
raw_data["result"]["message"]="成功" |
||||
|
logging.info("视频解析获取结果成功{}".format(raw_data)) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
elif d["data"]["code"] == "0": |
||||
|
# 正在解析中,将任务再次放回数据队列 |
||||
|
data_queue.put(raw_data) |
||||
|
logging.info("视频未解析完毕,放回队列等待{}-{}".format(raw_data, d)) |
||||
|
else: |
||||
|
# 解析失败 |
||||
|
raw_data["result"]["successCode"] = "0" |
||||
|
raw_data["result"]["errorLog"] = response.text |
||||
|
raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False) |
||||
|
raw_data["result"]["status"]=2 |
||||
|
raw_data["result"]["message"]="视频/音频解析异常" |
||||
|
logging.info("视频解析获取结果失败,数据{},接口返回值{}".format(raw_data, d)) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
else: |
||||
|
raw_data["result"]["successCode"] = "0" |
||||
|
raw_data["result"]["errorLog"] = response.text |
||||
|
raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False) |
||||
|
raw_data["result"]["status"] = 2 |
||||
|
raw_data["result"]["message"] = "视频/音频解析异常" |
||||
|
logging.info("视频解析获取结果失败,数据{},接口返回值{}".format(raw_data, d)) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
else: |
||||
|
# 暂无任务,进入休眠 |
||||
|
time.sleep(10) |
||||
|
except: |
||||
|
raw_data["result"]["successCode"] = "0" |
||||
|
raw_data["result"]["errorLog"] = traceback.format_exc() |
||||
|
raw_data["result"]["status"] = 2 |
||||
|
raw_data["result"]["message"] = "视频/音频解析异常" |
||||
|
raw_data["result"]["results"] = json.dumps(res_tmp, ensure_ascii=False) |
||||
|
logging.error(traceback.format_exc()) |
||||
|
to_kafka.send_kafka(raw_data, logging) |
||||
|
|
||||
|
|
||||
|
def zk_monitoring(): |
||||
|
try: |
||||
|
#线上环境 |
||||
|
zk = KazooClient(hosts='172.18.1.146:2181,172.18.1.147:2181,172.18.1.148:2181') |
||||
|
#测试环境 |
||||
|
# zk = KazooClient(hosts='172.16.12.55:2181,172.16.12.56:2181,172.16.12.57:2181') |
||||
|
zk.start() |
||||
|
# 设置监听器 |
||||
|
@zk.DataWatch("/analyze") |
||||
|
def watch_node(data, stat, event): |
||||
|
if event is not None and event.type == EventType.CHANGED: |
||||
|
data, stat = zk.get("/analyze") |
||||
|
logging.info("执行删除操作:{}".format(data)) |
||||
|
d = json.loads(data) |
||||
|
id = d["scenes_id"] |
||||
|
stop_dict[id] = {} |
||||
|
stop_dict[id]["version"] = d["version"] |
||||
|
stop_dict[id]["operation"] = d["operation"] |
||||
|
# 保持程序运行以监听节点变化 |
||||
|
try: |
||||
|
while True: |
||||
|
time.sleep(1) |
||||
|
except: |
||||
|
logging.info("Stopping...") |
||||
|
# 关闭连接 |
||||
|
zk.stop() |
||||
|
zk.close() |
||||
|
except: |
||||
|
logging.error(traceback.format_exc()) |
||||
|
|
||||
|
|
||||
@ -0,0 +1,16 @@ |
|||||
|
""" |
||||
|
WSGI config for Zhijian_Project_WebService project. |
||||
|
|
||||
|
It exposes the WSGI callable as a module-level variable named ``application``. |
||||
|
|
||||
|
For more information on this file, see |
||||
|
https://docs.djangoproject.com/en/1.8/howto/deployment/wsgi/ |
||||
|
""" |
||||
|
|
||||
|
import os |
||||
|
|
||||
|
from django.core.wsgi import get_wsgi_application |
||||
|
|
||||
|
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "text_analysis.settings") |
||||
|
|
||||
|
application = get_wsgi_application() |
||||
@ -0,0 +1,8 @@ |
|||||
|
[uwsgi] |
||||
|
http = 0.0.0.0:9014 |
||||
|
chdir = ../asrNew |
||||
|
wsgi-file = ../asrNew/wsgi.py |
||||
|
processes = 1 |
||||
|
threads = 2 |
||||
|
listen = 1024 |
||||
|
http-timeout=21600 |
||||
@ -0,0 +1,58 @@ |
|||||
|
*** Starting uWSGI 2.0.21 (64bit) on [Thu Jan 2 14:58:11 2025] *** |
||||
|
compiled with version: 11.2.0 on 24 October 2023 19:53:56 |
||||
|
os: Linux-3.10.0-1127.19.1.el7.x86_64 #1 SMP Tue Aug 25 17:23:54 UTC 2020 |
||||
|
nodename: node-04 |
||||
|
machine: x86_64 |
||||
|
clock source: unix |
||||
|
pcre jit disabled |
||||
|
detected number of CPU cores: 64 |
||||
|
current working directory: /opt/analyze/apps/asrNew |
||||
|
detected binary path: /opt/analyze/environment/python3.8/bin/uwsgi |
||||
|
uWSGI running as root, you can use --uid/--gid/--chroot options |
||||
|
*** WARNING: you are running uWSGI as root !!! (use the --uid flag) *** |
||||
|
chdir() to ../asrNew |
||||
|
*** WARNING: you are running uWSGI without its master process manager *** |
||||
|
your processes number limit is 1031041 |
||||
|
your memory page size is 4096 bytes |
||||
|
detected max file descriptor number: 65535 |
||||
|
lock engine: pthread robust mutexes |
||||
|
thunder lock: disabled (you can enable it with --thunder-lock) |
||||
|
Listen queue size is greater than the system max net.core.somaxconn (128). |
||||
|
*** Starting uWSGI 2.0.21 (64bit) on [Thu Jan 2 15:05:08 2025] *** |
||||
|
compiled with version: 11.2.0 on 24 October 2023 19:53:56 |
||||
|
os: Linux-3.10.0-1127.19.1.el7.x86_64 #1 SMP Tue Aug 25 17:23:54 UTC 2020 |
||||
|
nodename: node-04 |
||||
|
machine: x86_64 |
||||
|
clock source: unix |
||||
|
pcre jit disabled |
||||
|
detected number of CPU cores: 64 |
||||
|
current working directory: /opt/analyze/apps/asrNew |
||||
|
detected binary path: /opt/analyze/environment/python3.8/bin/uwsgi |
||||
|
uWSGI running as root, you can use --uid/--gid/--chroot options |
||||
|
*** WARNING: you are running uWSGI as root !!! (use the --uid flag) *** |
||||
|
chdir() to ../asrNew |
||||
|
*** WARNING: you are running uWSGI without its master process manager *** |
||||
|
your processes number limit is 1031041 |
||||
|
your memory page size is 4096 bytes |
||||
|
detected max file descriptor number: 65535 |
||||
|
lock engine: pthread robust mutexes |
||||
|
thunder lock: disabled (you can enable it with --thunder-lock) |
||||
|
uWSGI http bound on 0.0.0.0:9014 fd 4 |
||||
|
spawned uWSGI http 1 (pid: 32756) |
||||
|
uwsgi socket 0 bound to TCP address 127.0.0.1:39733 (port auto-assigned) fd 3 |
||||
|
uWSGI running as root, you can use --uid/--gid/--chroot options |
||||
|
*** WARNING: you are running uWSGI as root !!! (use the --uid flag) *** |
||||
|
Python version: 3.8.16 (default, Jun 12 2023, 18:09:05) [GCC 11.2.0] |
||||
|
Python main interpreter initialized at 0x22e11b0 |
||||
|
uWSGI running as root, you can use --uid/--gid/--chroot options |
||||
|
*** WARNING: you are running uWSGI as root !!! (use the --uid flag) *** |
||||
|
python threads support enabled |
||||
|
your server socket listen backlog is limited to 1024 connections |
||||
|
your mercy for graceful operations on workers is 60 seconds |
||||
|
mapped 83376 bytes (81 KB) for 2 cores |
||||
|
*** Operational MODE: threaded *** |
||||
|
WSGI app 0 (mountpoint='') ready in 0 seconds on interpreter 0x22e11b0 pid: 32755 (default app) |
||||
|
uWSGI running as root, you can use --uid/--gid/--chroot options |
||||
|
*** WARNING: you are running uWSGI as root !!! (use the --uid flag) *** |
||||
|
*** uWSGI is running in multiple interpreter mode *** |
||||
|
spawned uWSGI worker 1 (and the only) (pid: 32755, cores: 2) |
||||
@ -0,0 +1,35 @@ |
|||||
|
""" |
||||
|
WSGI config for Zhijian_Project_WebService project. |
||||
|
|
||||
|
It exposes the WSGI callable as a module-level variable named ``application``. |
||||
|
|
||||
|
For more information on this file, see |
||||
|
https://docs.djangoproject.com/en/1.8/howto/deployment/wsgi/ |
||||
|
""" |
||||
|
|
||||
|
import os |
||||
|
|
||||
|
import threading |
||||
|
from text_analysis.views import upload,getResult,zk_monitoring |
||||
|
|
||||
|
t = threading.Thread(target=upload, name='upload') |
||||
|
t.daemon = True |
||||
|
t.start() |
||||
|
|
||||
|
r = threading.Thread(target=getResult, name='getResult') |
||||
|
r.daemon = True |
||||
|
r.start() |
||||
|
|
||||
|
#启动zk监听线程 |
||||
|
t = threading.Thread(target=zk_monitoring, name='zk_monitoring') |
||||
|
t.daemon = True |
||||
|
t.start() |
||||
|
|
||||
|
from django.core.wsgi import get_wsgi_application |
||||
|
|
||||
|
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "text_analysis.settings") |
||||
|
application = get_wsgi_application() |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
@ -0,0 +1,30 @@ |
|||||
|
""" |
||||
|
WSGI config for Zhijian_Project_WebService project. |
||||
|
|
||||
|
It exposes the WSGI callable as a module-level variable named ``application``. |
||||
|
|
||||
|
For more information on this file, see |
||||
|
https://docs.djangoproject.com/en/1.8/howto/deployment/wsgi/ |
||||
|
""" |
||||
|
|
||||
|
import os |
||||
|
|
||||
|
import threading |
||||
|
from text_analysis.views import upload,getResult |
||||
|
|
||||
|
t = threading.Thread(target=upload, name='upload') |
||||
|
t.daemon = True |
||||
|
t.start() |
||||
|
|
||||
|
r = threading.Thread(target=getResult, name='getResult') |
||||
|
r.daemon = True |
||||
|
r.start() |
||||
|
|
||||
|
from django.core.wsgi import get_wsgi_application |
||||
|
|
||||
|
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "text_analysis.settings") |
||||
|
application = get_wsgi_application() |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
Write
Preview
Loading…
Cancel
Save
Reference in new issue