假新闻识别应用
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

158 lines
6.6 KiB

  1. #coding:utf8
  2. import os, sys
  3. import io
  4. sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8')
  5. cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd()
  6. par_dir = os.path.abspath(os.path.join(cur_dir, os.path.pardir))
  7. sys.path.append(cur_dir)
  8. sys.path.append(par_dir)
  9. import json
  10. from django.http import HttpResponse
  11. from text_analysis.tools import to_kafka,tool
  12. from text_analysis.tools import pred
  13. from django.views.decorators.csrf import csrf_exempt
  14. from log_util.set_logger import set_logger
  15. logging=set_logger('logs/results.log')
  16. import traceback
  17. import queue
  18. from text_analysis.cusException import userFile_Exception,chainFile_Exception
  19. import requests
  20. import time
  21. from kazoo.client import KazooClient
  22. from kazoo.protocol.states import EventType
  23. #任务队列
  24. import queue
  25. task_queue = queue.PriorityQueue()
  26. stop_dict={}
  27. from text_analysis.read_config import load_config
  28. config=load_config()
  29. @csrf_exempt
  30. def fakeNewIdentification(request):
  31. if request.method == 'POST':
  32. try:
  33. raw_data = json.loads(request.body)
  34. if "trace" in raw_data.keys() and raw_data["trace"]==True:
  35. task_queue.put((-1,time.time(), raw_data))
  36. else:
  37. task_queue.put((1,time.time(), raw_data))
  38. return HttpResponse(json.dumps({"code": 1, "msg": "请求正常!"}, ensure_ascii=False))
  39. except:
  40. logging.error(traceback.format_exc())
  41. return HttpResponse(json.dumps({"code": 0, "msg": "请求json格式不正确!"}, ensure_ascii=False))
  42. else:
  43. return HttpResponse(json.dumps({"code": 0, "msg": "请求方式错误,改为post请求"}, ensure_ascii=False))
  44. def predict_news():
  45. dbConfig = dict(config.items('database'))
  46. while True:
  47. try:
  48. if task_queue.qsize()>0:
  49. p,t,raw_data = task_queue.get(timeout=1)
  50. logging.info("当前任务队列长度{}".format(task_queue.qsize()+1))
  51. logging.info("任务数据-{}".format(raw_data))
  52. task_id=raw_data["scenes_id"]
  53. task_version=raw_data["version"]
  54. logging.info("当前version信息为:{}".format(stop_dict))
  55. if task_id in stop_dict.keys() and task_version!=stop_dict[task_id]["version"]:
  56. logging.info("已暂停任务,数据过滤掉")
  57. continue
  58. res = {"successCode": "1", "errorLog": "", "results": {},"status":1,"message":"成功"}
  59. # 账号数据
  60. userData = tool.mysqlData(raw_data, logging,"1",dbConfig)
  61. # if not userData:
  62. # raise userFile_Exception
  63. logging.info("账号数据获取完毕!-长度{}".format(len(userData)))
  64. # 传播链数据
  65. postChain=tool.mysqlData(raw_data, logging,"0",dbConfig)
  66. if not postChain:
  67. raise chainFile_Exception
  68. logging.info("传播链数据获取完毕!-长度{}".format(len(postChain)))
  69. news=pred.predict_news(userData,postChain,logging)
  70. # 结束标识
  71. res['isLast'] = True
  72. for i in range(len(news)):
  73. row_dict = news.iloc[i].to_dict()
  74. row_dict['pageType'] = 'fakeNewsPage'
  75. # postId
  76. row_dict['postId'] = userData[0]['postId']
  77. if i == len(news) - 1:
  78. row_dict["isLast"]=1
  79. res["results"] = json.dumps(row_dict,ensure_ascii=False)
  80. res["status"] = 1
  81. res["message"] = "成功"
  82. raw_data["result"] = res
  83. logging.info("共{}条数据,第{}条数据输出-{}".format(len(news),i+1,raw_data))
  84. to_kafka.send_kafka(raw_data, logging)
  85. else:
  86. # 暂无任务,进入休眠
  87. time.sleep(10)
  88. except userFile_Exception:
  89. res = {"successCode": "0", "errorLog": "用户数据为空!", "results": {}, "status": 2,"message": "异常"}
  90. results={}
  91. results['pageType'] = 'fakeNewsPage'
  92. results['recognitionResult'] = '用户数据为空'
  93. res['results'] = json.dumps(results)
  94. res["status"] = 2
  95. res["message"] = "用户数据为空"
  96. raw_data["result"] = res
  97. logging.info("该条请求用户数据为空-{}".format(raw_data))
  98. to_kafka.send_kafka(raw_data, logging)
  99. except chainFile_Exception:
  100. res = {"successCode": "0", "errorLog": "关系链数据为空!", "results": {}, "status": 2,"message": "异常"}
  101. results={}
  102. results['pageType'] = 'fakeNewsPage'
  103. results['recognitionResult'] = '关系链数据为空'
  104. res['results'] = json.dumps(results)
  105. res["status"] = 2
  106. res["message"] = "关系链数据为空"
  107. raw_data["result"] = res
  108. logging.info("该条请求关系链数据为空-{}".format(raw_data))
  109. to_kafka.send_kafka(raw_data, logging)
  110. except:
  111. res = {"successCode": "0", "errorLog": "", "results": {}, "status": 2,"message": "异常"}
  112. results={}
  113. results['pageType'] = 'fakeNewsPage'
  114. results['recognitionResult'] = ""
  115. res['results'] = json.dumps(results)
  116. res["status"] = 2
  117. res["message"] = "异常"
  118. raw_data["result"] = res
  119. raw_data["result"]["errorLog"] = traceback.format_exc()
  120. logging.info(traceback.format_exc())
  121. to_kafka.send_kafka(raw_data, logging)
  122. def zk_monitoring():
  123. try:
  124. #线上环境
  125. zk = KazooClient(hosts=config['zookeeper']['zkhost'])
  126. #测试环境
  127. # zk = KazooClient(hosts='172.16.12.55:2181,172.16.12.56:2181,172.16.12.57:2181')
  128. zk.start()
  129. # 设置监听器
  130. @zk.DataWatch("/analyze")
  131. def watch_node(data, stat, event):
  132. if event is not None and event.type == EventType.CHANGED:
  133. data, stat = zk.get("/analyze")
  134. logging.info("执行删除操作:{}".format(data))
  135. d = json.loads(data)
  136. id = d["scenes_id"]
  137. stop_dict[id] = {}
  138. stop_dict[id]["version"] = d["version"]
  139. stop_dict[id]["operation"] = d["operation"]
  140. # 保持程序运行以监听节点变化
  141. try:
  142. while True:
  143. time.sleep(1)
  144. except:
  145. logging.info("Stopping...")
  146. # 关闭连接
  147. zk.stop()
  148. zk.close()
  149. except:
  150. logging.error(traceback.format_exc())