假新闻识别应用
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 

158 lines
6.6 KiB

#coding:utf8
import os, sys
import io
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8')
cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd()
par_dir = os.path.abspath(os.path.join(cur_dir, os.path.pardir))
sys.path.append(cur_dir)
sys.path.append(par_dir)
import json
from django.http import HttpResponse
from text_analysis.tools import to_kafka,tool
from text_analysis.tools import pred
from django.views.decorators.csrf import csrf_exempt
from log_util.set_logger import set_logger
logging=set_logger('logs/results.log')
import traceback
import queue
from text_analysis.cusException import userFile_Exception,chainFile_Exception
import requests
import time
from kazoo.client import KazooClient
from kazoo.protocol.states import EventType
#任务队列
import queue
task_queue = queue.PriorityQueue()
stop_dict={}
from text_analysis.read_config import load_config
config=load_config()
@csrf_exempt
def fakeNewIdentification(request):
if request.method == 'POST':
try:
raw_data = json.loads(request.body)
if "trace" in raw_data.keys() and raw_data["trace"]==True:
task_queue.put((-1,time.time(), raw_data))
else:
task_queue.put((1,time.time(), raw_data))
return HttpResponse(json.dumps({"code": 1, "msg": "请求正常!"}, ensure_ascii=False))
except:
logging.error(traceback.format_exc())
return HttpResponse(json.dumps({"code": 0, "msg": "请求json格式不正确!"}, ensure_ascii=False))
else:
return HttpResponse(json.dumps({"code": 0, "msg": "请求方式错误,改为post请求"}, ensure_ascii=False))
def predict_news():
dbConfig = dict(config.items('database'))
while True:
try:
if task_queue.qsize()>0:
p,t,raw_data = task_queue.get(timeout=1)
logging.info("当前任务队列长度{}".format(task_queue.qsize()+1))
logging.info("任务数据-{}".format(raw_data))
task_id=raw_data["scenes_id"]
task_version=raw_data["version"]
logging.info("当前version信息为:{}".format(stop_dict))
if task_id in stop_dict.keys() and task_version!=stop_dict[task_id]["version"]:
logging.info("已暂停任务,数据过滤掉")
continue
res = {"successCode": "1", "errorLog": "", "results": {},"status":1,"message":"成功"}
# 账号数据
userData = tool.mysqlData(raw_data, logging,"1",dbConfig)
# if not userData:
# raise userFile_Exception
logging.info("账号数据获取完毕!-长度{}".format(len(userData)))
# 传播链数据
postChain=tool.mysqlData(raw_data, logging,"0",dbConfig)
if not postChain:
raise chainFile_Exception
logging.info("传播链数据获取完毕!-长度{}".format(len(postChain)))
news=pred.predict_news(userData,postChain,logging)
# 结束标识
res['isLast'] = True
for i in range(len(news)):
row_dict = news.iloc[i].to_dict()
row_dict['pageType'] = 'fakeNewsPage'
# postId
row_dict['postId'] = userData[0]['postId']
if i == len(news) - 1:
row_dict["isLast"]=1
res["results"] = json.dumps(row_dict,ensure_ascii=False)
res["status"] = 1
res["message"] = "成功"
raw_data["result"] = res
logging.info("共{}条数据,第{}条数据输出-{}".format(len(news),i+1,raw_data))
to_kafka.send_kafka(raw_data, logging)
else:
# 暂无任务,进入休眠
time.sleep(10)
except userFile_Exception:
res = {"successCode": "0", "errorLog": "用户数据为空!", "results": {}, "status": 2,"message": "异常"}
results={}
results['pageType'] = 'fakeNewsPage'
results['recognitionResult'] = '用户数据为空'
res['results'] = json.dumps(results)
res["status"] = 2
res["message"] = "用户数据为空"
raw_data["result"] = res
logging.info("该条请求用户数据为空-{}".format(raw_data))
to_kafka.send_kafka(raw_data, logging)
except chainFile_Exception:
res = {"successCode": "0", "errorLog": "关系链数据为空!", "results": {}, "status": 2,"message": "异常"}
results={}
results['pageType'] = 'fakeNewsPage'
results['recognitionResult'] = '关系链数据为空'
res['results'] = json.dumps(results)
res["status"] = 2
res["message"] = "关系链数据为空"
raw_data["result"] = res
logging.info("该条请求关系链数据为空-{}".format(raw_data))
to_kafka.send_kafka(raw_data, logging)
except:
res = {"successCode": "0", "errorLog": "", "results": {}, "status": 2,"message": "异常"}
results={}
results['pageType'] = 'fakeNewsPage'
results['recognitionResult'] = ""
res['results'] = json.dumps(results)
res["status"] = 2
res["message"] = "异常"
raw_data["result"] = res
raw_data["result"]["errorLog"] = traceback.format_exc()
logging.info(traceback.format_exc())
to_kafka.send_kafka(raw_data, logging)
def zk_monitoring():
try:
#线上环境
zk = KazooClient(hosts=config['zookeeper']['zkhost'])
#测试环境
# zk = KazooClient(hosts='172.16.12.55:2181,172.16.12.56:2181,172.16.12.57:2181')
zk.start()
# 设置监听器
@zk.DataWatch("/analyze")
def watch_node(data, stat, event):
if event is not None and event.type == EventType.CHANGED:
data, stat = zk.get("/analyze")
logging.info("执行删除操作:{}".format(data))
d = json.loads(data)
id = d["scenes_id"]
stop_dict[id] = {}
stop_dict[id]["version"] = d["version"]
stop_dict[id]["operation"] = d["operation"]
# 保持程序运行以监听节点变化
try:
while True:
time.sleep(1)
except:
logging.info("Stopping...")
# 关闭连接
zk.stop()
zk.close()
except:
logging.error(traceback.format_exc())