用户水军识别应用
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

207 lines
9.5 KiB

  1. #coding:utf8
  2. import os, sys
  3. import io
  4. sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8')
  5. cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd()
  6. par_dir = os.path.abspath(os.path.join(cur_dir, os.path.pardir))
  7. sys.path.append(cur_dir)
  8. sys.path.append(par_dir)
  9. import json
  10. from django.http import HttpResponse
  11. from text_analysis.tools import to_kafka
  12. from django.views.decorators.csrf import csrf_exempt
  13. from log_util.set_logger import set_logger
  14. logging=set_logger('logs/results.log')
  15. import traceback
  16. import queue
  17. import requests
  18. import time
  19. from datetime import datetime
  20. import os
  21. import joblib
  22. #任务队列
  23. global task_queue
  24. task_queue = queue.Queue()
  25. @csrf_exempt
  26. def robotIdentification(request):
  27. if request.method == 'POST':
  28. try:
  29. raw_data = json.loads(request.body)
  30. task_queue.put(raw_data)
  31. return HttpResponse(json.dumps({"code": 1, "msg": "请求正常!"}, ensure_ascii=False))
  32. except:
  33. logging.error(traceback.format_exc())
  34. return HttpResponse(json.dumps({"code": 0, "msg": "请求json格式不正确!"}, ensure_ascii=False))
  35. else:
  36. return HttpResponse(json.dumps({"code": 0, "msg": "请求方式错误,改为post请求"}, ensure_ascii=False))
  37. def predict():
  38. while True:
  39. if task_queue.qsize() >0:
  40. try:
  41. logging.info("取任务队列长度{}".format(task_queue.qsize()))
  42. raw_data = task_queue.get()
  43. logging.info("原始数据-{}".format(raw_data))
  44. # raw_data = {"user_file": {"accountId": "39234393", "accountName": "hello", "nickName": "Johnson Leung",
  45. # "fansCount": 308, "likeCount": 92707, "postCount": 14237,
  46. # "otherInfo": "{\"\"otherInfo\"\":\"\"{\"\"bio\"\": \"\"Huge}",
  47. # "authentication": 0},
  48. # "post_file": {"count": 1, "LikeCount": 12, "CommentsCount": 1, "ShareCount": 1,
  49. # "length": 150, "tags": 0, "https": 0, "at": 0, "diffdate": 1}}
  50. # 用户数据
  51. res = {"successCode": "1", "errorLog": "", "results": {}}
  52. #获取历史数据源
  53. all_result = raw_data['data']
  54. user_data = []
  55. data=raw_data["metadata"]["admin"]
  56. #{"user_file": "9_获取用户信息", "post_file": "10_获取用户发帖信息"}
  57. user_file_result = json.loads(all_result[data['user_file']])
  58. post_file_result = json.loads(all_result[data['post_file']])
  59. if user_file_result['resultList']:
  60. data['user_file'] = user_file_result['resultList'][0]
  61. logging.info('用户数据:{}'.format(data['user_file']))
  62. else:
  63. data['user_file'] ={}
  64. if post_file_result['resultList']:
  65. data['post_file'] = post_file_result['resultList'][0]
  66. logging.info('帖子数据:{}'.format(data['post_file']))
  67. else:
  68. data['post_file'] = {}
  69. #识别结果返回值
  70. recognition_code = "0"
  71. try:
  72. user_data_otherInfo_1 = 0 if data["user_file"]["otherInfo"].strip() == "" else 1
  73. except:
  74. user_data_otherInfo_1 = 0
  75. try:
  76. user_data_nickName_2 = 0 if data["user_file"]["nickName"].strip() == "" else 1
  77. except:
  78. user_data_nickName_2 = 0
  79. try:
  80. user_data_fansCount_3 = int(data["user_file"]["fansCount"])
  81. except:
  82. user_data_fansCount_3 = 0
  83. try:
  84. user_data_likeCount_4 = int(data["user_file"]["likeCount"])
  85. except:
  86. user_data_likeCount_4 = 0
  87. try:
  88. user_data_postCount_5 = int(data["user_file"]["postCount"])
  89. except:
  90. user_data_postCount_5 = 0
  91. try:
  92. user_data_authentication_6 = int(data["user_file"]["authentication"])
  93. except:
  94. user_data_authentication_6 = 0
  95. user_data.extend(
  96. [user_data_otherInfo_1, user_data_nickName_2, user_data_fansCount_3, user_data_likeCount_4,
  97. user_data_postCount_5, user_data_authentication_6])
  98. # 帖子数据
  99. if data["post_file"]=={}:
  100. recognition_code = "-1"
  101. else:
  102. post_data = []
  103. try:
  104. post_data_count_1 = int(data["post_file"]["count"])
  105. except:
  106. post_data_count_1 = 0
  107. try:
  108. post_data_LikeCount_2 = int(data["post_file"]["LikeCount"])
  109. except:
  110. post_data_LikeCount_2 = 0
  111. try:
  112. post_data_CommentsCount_3 = int(data["post_file"]["CommentsCount"])
  113. except:
  114. post_data_CommentsCount_3 = 0
  115. try:
  116. post_data_ShareCount_4 = int(data["post_file"]["ShareCount"])
  117. except:
  118. post_data_ShareCount_4 = 0
  119. try:
  120. post_data_length_5 = int(data["post_file"]["length"])
  121. except:
  122. post_data_length_5 = 0
  123. try:
  124. post_data_tags_6 = int(data["post_file"]["tags"])
  125. except:
  126. post_data_tags_6 = 0
  127. try:
  128. post_data_https_7 = int(data["post_file"]["https"])
  129. except:
  130. post_data_https_7 = 0
  131. try:
  132. post_data_at_8 = int(data["post_file"]["at"])
  133. except:
  134. post_data_at_8 = 0
  135. try:
  136. post_data_diffdate_9 = int(data["post_file"]["diffdate"])
  137. except:
  138. post_data_diffdate_9 = 0
  139. post_data.extend(
  140. [post_data_count_1, post_data_LikeCount_2, post_data_CommentsCount_3, post_data_ShareCount_4,
  141. post_data_length_5, post_data_tags_6, post_data_https_7, post_data_at_8, post_data_diffdate_9])
  142. features = [user_data + post_data]
  143. bot_user = joblib.load(cur_dir+"/model/bot_user.pkl") # 加载训练好的模型
  144. result = bot_user.predict(features)
  145. recognition_code = str(result[0])
  146. # logging.info("预测模型结果为{}".format(result))
  147. results = {}
  148. # 用户id
  149. results['accountId'] = data["user_file"]["accountId"]
  150. # 用户昵称
  151. results['nickName'] = data["user_file"]["nickName"]
  152. # 用户账号
  153. results['accountName'] = data["user_file"]["accountName"]
  154. if recognition_code == '0':
  155. results['recognitionResult'] = '非机器人'
  156. results['recognitionCode'] = recognition_code
  157. elif recognition_code == '1':
  158. results['recognitionResult'] = '机器人'
  159. results['recognitionCode'] = recognition_code
  160. else:
  161. results['recognitionResult'] = '未知识别结果'
  162. results['recognitionCode'] = recognition_code
  163. res['results'] = json.dumps(results)
  164. raw_data["result"] = res
  165. # raw_data_json=json.dumps(raw_data)
  166. logging.info("增加预测数据-{}".format(raw_data))
  167. to_kafka.send_kafka(raw_data, logging)
  168. except:
  169. res = {"successCode": "0", "errorLog": "", "results": {}}
  170. raw_data["result"] = res
  171. raw_data["result"]["error"] = traceback.format_exc()
  172. # raw_data_json=json.dumps(raw_data)
  173. logging.info(traceback.format_exc())
  174. to_kafka.send_kafka(raw_data, logging)
  175. else:
  176. #暂无任务,进入休眠
  177. time.sleep(10)
  178. if __name__ == '__main__':
  179. all_result = {"9_获取用户发帖信息":"{\"resultList\": [{\"count\": \"10\", \"LikeCount\": \"1\", \"CommentsCount\": \"0.1\", \"ShareCount\": \"0.4\", \"length\": \"241.8000\", \"tags\": \"5.80000000\", \"https\": \"1.20000000\", \"at\": \"0.40000000\", \"diffdate\": \"170269\"}]}","8_获取用户信息":"{\"resultList\": [{\"accountId\": \"1368232444323799043\", \"accountName\": \"Ujjal best Tech@UjjalKumarGho19\", \"nickName\": \"UjjalKumarGho19\", \"fansCount\": \"660\", \"likeCount\": \"2096\", \"postCount\": \"579\", \"otherInfo\": \"\", \"authentication\": 1}]}"}
  180. data={}
  181. #{"user_file": "9_获取用户信息", "post_file": "10_获取用户发帖信息"}
  182. user_file_result = json.loads(all_result[data['user_file']])
  183. post_file_result = json.loads(all_result[data['post_file']])
  184. if user_file_result['resultList']:
  185. resultList = user_file_result['resultList']
  186. data['user_file'] = resultList[0]
  187. else:
  188. data['user_file'] ={}
  189. if post_file_result['resultList']:
  190. data['post_file'] = post_file_result['resultList'][0]
  191. else:
  192. data['post_file'] = {}
  193. print(data)