#coding:utf8 import os, sys import io sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8') cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd() par_dir = os.path.abspath(os.path.join(cur_dir, os.path.pardir)) sys.path.append(cur_dir) sys.path.append(par_dir) import json from django.http import HttpResponse from text_analysis.tools import to_kafka from django.views.decorators.csrf import csrf_exempt from log_util.set_logger import set_logger logging=set_logger('logs/results.log') import traceback import queue import requests import time from datetime import datetime import os import joblib #任务队列 global task_queue task_queue = queue.Queue() @csrf_exempt def robotIdentification(request): if request.method == 'POST': try: raw_data = json.loads(request.body) task_queue.put(raw_data) return HttpResponse(json.dumps({"code": 1, "msg": "请求正常!"}, ensure_ascii=False)) except: logging.error(traceback.format_exc()) return HttpResponse(json.dumps({"code": 0, "msg": "请求json格式不正确!"}, ensure_ascii=False)) else: return HttpResponse(json.dumps({"code": 0, "msg": "请求方式错误,改为post请求"}, ensure_ascii=False)) def predict(): while True: if task_queue.qsize() >0: try: logging.info("取任务队列长度{}".format(task_queue.qsize())) raw_data = task_queue.get() logging.info("原始数据-{}".format(raw_data)) # raw_data = {"user_file": {"accountId": "39234393", "accountName": "hello", "nickName": "Johnson Leung", # "fansCount": 308, "likeCount": 92707, "postCount": 14237, # "otherInfo": "{\"\"otherInfo\"\":\"\"{\"\"bio\"\": \"\"Huge}", # "authentication": 0}, # "post_file": {"count": 1, "LikeCount": 12, "CommentsCount": 1, "ShareCount": 1, # "length": 150, "tags": 0, "https": 0, "at": 0, "diffdate": 1}} # 用户数据 res = {"successCode": "1", "errorLog": "", "results": {}} #获取历史数据源 all_result = raw_data['data'] user_data = [] data=raw_data["metadata"]["admin"] #{"user_file": "9_获取用户信息", "post_file": "10_获取用户发帖信息"} user_file_result = json.loads(all_result[data['user_file']]) post_file_result = json.loads(all_result[data['post_file']]) if user_file_result['resultList']: data['user_file'] = user_file_result['resultList'][0] logging.info('用户数据:{}'.format(data['user_file'])) else: data['user_file'] ={} if post_file_result['resultList']: data['post_file'] = post_file_result['resultList'][0] logging.info('帖子数据:{}'.format(data['post_file'])) else: data['post_file'] = {} #识别结果返回值 recognition_code = "0" try: user_data_otherInfo_1 = 0 if data["user_file"]["otherInfo"].strip() == "" else 1 except: user_data_otherInfo_1 = 0 try: user_data_nickName_2 = 0 if data["user_file"]["nickName"].strip() == "" else 1 except: user_data_nickName_2 = 0 try: user_data_fansCount_3 = int(data["user_file"]["fansCount"]) except: user_data_fansCount_3 = 0 try: user_data_likeCount_4 = int(data["user_file"]["likeCount"]) except: user_data_likeCount_4 = 0 try: user_data_postCount_5 = int(data["user_file"]["postCount"]) except: user_data_postCount_5 = 0 try: user_data_authentication_6 = int(data["user_file"]["authentication"]) except: user_data_authentication_6 = 0 user_data.extend( [user_data_otherInfo_1, user_data_nickName_2, user_data_fansCount_3, user_data_likeCount_4, user_data_postCount_5, user_data_authentication_6]) # 帖子数据 if data["post_file"]=={}: recognition_code = "-1" else: post_data = [] try: post_data_count_1 = int(data["post_file"]["count"]) except: post_data_count_1 = 0 try: post_data_LikeCount_2 = int(data["post_file"]["LikeCount"]) except: post_data_LikeCount_2 = 0 try: post_data_CommentsCount_3 = int(data["post_file"]["CommentsCount"]) except: post_data_CommentsCount_3 = 0 try: post_data_ShareCount_4 = int(data["post_file"]["ShareCount"]) except: post_data_ShareCount_4 = 0 try: post_data_length_5 = int(data["post_file"]["length"]) except: post_data_length_5 = 0 try: post_data_tags_6 = int(data["post_file"]["tags"]) except: post_data_tags_6 = 0 try: post_data_https_7 = int(data["post_file"]["https"]) except: post_data_https_7 = 0 try: post_data_at_8 = int(data["post_file"]["at"]) except: post_data_at_8 = 0 try: post_data_diffdate_9 = int(data["post_file"]["diffdate"]) except: post_data_diffdate_9 = 0 post_data.extend( [post_data_count_1, post_data_LikeCount_2, post_data_CommentsCount_3, post_data_ShareCount_4, post_data_length_5, post_data_tags_6, post_data_https_7, post_data_at_8, post_data_diffdate_9]) features = [user_data + post_data] bot_user = joblib.load(cur_dir+"/model/bot_user.pkl") # 加载训练好的模型 result = bot_user.predict(features) recognition_code = str(result[0]) # logging.info("预测模型结果为{}".format(result)) results = {} # 用户id results['accountId'] = data["user_file"]["accountId"] # 用户昵称 results['nickName'] = data["user_file"]["nickName"] # 用户账号 results['accountName'] = data["user_file"]["accountName"] if recognition_code == '0': results['recognitionResult'] = '非机器人' results['recognitionCode'] = recognition_code elif recognition_code == '1': results['recognitionResult'] = '机器人' results['recognitionCode'] = recognition_code else: results['recognitionResult'] = '未知识别结果' results['recognitionCode'] = recognition_code res['results'] = json.dumps(results) raw_data["result"] = res # raw_data_json=json.dumps(raw_data) logging.info("增加预测数据-{}".format(raw_data)) to_kafka.send_kafka(raw_data, logging) except: res = {"successCode": "0", "errorLog": "", "results": {}} raw_data["result"] = res raw_data["result"]["error"] = traceback.format_exc() # raw_data_json=json.dumps(raw_data) logging.info(traceback.format_exc()) to_kafka.send_kafka(raw_data, logging) else: #暂无任务,进入休眠 time.sleep(10) if __name__ == '__main__': all_result = {"9_获取用户发帖信息":"{\"resultList\": [{\"count\": \"10\", \"LikeCount\": \"1\", \"CommentsCount\": \"0.1\", \"ShareCount\": \"0.4\", \"length\": \"241.8000\", \"tags\": \"5.80000000\", \"https\": \"1.20000000\", \"at\": \"0.40000000\", \"diffdate\": \"170269\"}]}","8_获取用户信息":"{\"resultList\": [{\"accountId\": \"1368232444323799043\", \"accountName\": \"Ujjal best Tech@UjjalKumarGho19\", \"nickName\": \"UjjalKumarGho19\", \"fansCount\": \"660\", \"likeCount\": \"2096\", \"postCount\": \"579\", \"otherInfo\": \"\", \"authentication\": 1}]}"} data={} #{"user_file": "9_获取用户信息", "post_file": "10_获取用户发帖信息"} user_file_result = json.loads(all_result[data['user_file']]) post_file_result = json.loads(all_result[data['post_file']]) if user_file_result['resultList']: resultList = user_file_result['resultList'] data['user_file'] = resultList[0] else: data['user_file'] ={} if post_file_result['resultList']: data['post_file'] = post_file_result['resultList'][0] else: data['post_file'] = {} print(data)