You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
207 lines
9.5 KiB
207 lines
9.5 KiB
#coding:utf8
|
|
import os, sys
|
|
import io
|
|
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8')
|
|
cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd()
|
|
par_dir = os.path.abspath(os.path.join(cur_dir, os.path.pardir))
|
|
sys.path.append(cur_dir)
|
|
sys.path.append(par_dir)
|
|
import json
|
|
from django.http import HttpResponse
|
|
from text_analysis.tools import to_kafka
|
|
from django.views.decorators.csrf import csrf_exempt
|
|
from log_util.set_logger import set_logger
|
|
logging=set_logger('logs/results.log')
|
|
import traceback
|
|
import queue
|
|
import requests
|
|
import time
|
|
from datetime import datetime
|
|
import os
|
|
import joblib
|
|
#任务队列
|
|
global task_queue
|
|
task_queue = queue.Queue()
|
|
|
|
|
|
@csrf_exempt
|
|
def robotIdentification(request):
|
|
if request.method == 'POST':
|
|
try:
|
|
raw_data = json.loads(request.body)
|
|
task_queue.put(raw_data)
|
|
return HttpResponse(json.dumps({"code": 1, "msg": "请求正常!"}, ensure_ascii=False))
|
|
except:
|
|
logging.error(traceback.format_exc())
|
|
return HttpResponse(json.dumps({"code": 0, "msg": "请求json格式不正确!"}, ensure_ascii=False))
|
|
else:
|
|
return HttpResponse(json.dumps({"code": 0, "msg": "请求方式错误,改为post请求"}, ensure_ascii=False))
|
|
|
|
def predict():
|
|
while True:
|
|
if task_queue.qsize() >0:
|
|
try:
|
|
logging.info("取任务队列长度{}".format(task_queue.qsize()))
|
|
raw_data = task_queue.get()
|
|
logging.info("原始数据-{}".format(raw_data))
|
|
# raw_data = {"user_file": {"accountId": "39234393", "accountName": "hello", "nickName": "Johnson Leung",
|
|
# "fansCount": 308, "likeCount": 92707, "postCount": 14237,
|
|
# "otherInfo": "{\"\"otherInfo\"\":\"\"{\"\"bio\"\": \"\"Huge}",
|
|
# "authentication": 0},
|
|
# "post_file": {"count": 1, "LikeCount": 12, "CommentsCount": 1, "ShareCount": 1,
|
|
# "length": 150, "tags": 0, "https": 0, "at": 0, "diffdate": 1}}
|
|
# 用户数据
|
|
res = {"successCode": "1", "errorLog": "", "results": {}}
|
|
#获取历史数据源
|
|
all_result = raw_data['data']
|
|
user_data = []
|
|
data=raw_data["metadata"]["admin"]
|
|
#{"user_file": "9_获取用户信息", "post_file": "10_获取用户发帖信息"}
|
|
user_file_result = json.loads(all_result[data['user_file']])
|
|
post_file_result = json.loads(all_result[data['post_file']])
|
|
if user_file_result['resultList']:
|
|
data['user_file'] = user_file_result['resultList'][0]
|
|
logging.info('用户数据:{}'.format(data['user_file']))
|
|
else:
|
|
data['user_file'] ={}
|
|
if post_file_result['resultList']:
|
|
data['post_file'] = post_file_result['resultList'][0]
|
|
logging.info('帖子数据:{}'.format(data['post_file']))
|
|
else:
|
|
data['post_file'] = {}
|
|
#识别结果返回值
|
|
recognition_code = "0"
|
|
try:
|
|
user_data_otherInfo_1 = 0 if data["user_file"]["otherInfo"].strip() == "" else 1
|
|
except:
|
|
user_data_otherInfo_1 = 0
|
|
try:
|
|
user_data_nickName_2 = 0 if data["user_file"]["nickName"].strip() == "" else 1
|
|
except:
|
|
user_data_nickName_2 = 0
|
|
try:
|
|
user_data_fansCount_3 = int(data["user_file"]["fansCount"])
|
|
except:
|
|
user_data_fansCount_3 = 0
|
|
try:
|
|
user_data_likeCount_4 = int(data["user_file"]["likeCount"])
|
|
except:
|
|
user_data_likeCount_4 = 0
|
|
try:
|
|
user_data_postCount_5 = int(data["user_file"]["postCount"])
|
|
except:
|
|
user_data_postCount_5 = 0
|
|
try:
|
|
user_data_authentication_6 = int(data["user_file"]["authentication"])
|
|
except:
|
|
user_data_authentication_6 = 0
|
|
user_data.extend(
|
|
[user_data_otherInfo_1, user_data_nickName_2, user_data_fansCount_3, user_data_likeCount_4,
|
|
user_data_postCount_5, user_data_authentication_6])
|
|
# 帖子数据
|
|
if data["post_file"]=={}:
|
|
recognition_code = "-1"
|
|
else:
|
|
post_data = []
|
|
try:
|
|
post_data_count_1 = int(data["post_file"]["count"])
|
|
except:
|
|
post_data_count_1 = 0
|
|
try:
|
|
post_data_LikeCount_2 = int(data["post_file"]["LikeCount"])
|
|
except:
|
|
post_data_LikeCount_2 = 0
|
|
try:
|
|
post_data_CommentsCount_3 = int(data["post_file"]["CommentsCount"])
|
|
except:
|
|
post_data_CommentsCount_3 = 0
|
|
try:
|
|
post_data_ShareCount_4 = int(data["post_file"]["ShareCount"])
|
|
except:
|
|
post_data_ShareCount_4 = 0
|
|
try:
|
|
post_data_length_5 = int(data["post_file"]["length"])
|
|
except:
|
|
post_data_length_5 = 0
|
|
try:
|
|
post_data_tags_6 = int(data["post_file"]["tags"])
|
|
except:
|
|
post_data_tags_6 = 0
|
|
try:
|
|
post_data_https_7 = int(data["post_file"]["https"])
|
|
except:
|
|
post_data_https_7 = 0
|
|
try:
|
|
post_data_at_8 = int(data["post_file"]["at"])
|
|
except:
|
|
post_data_at_8 = 0
|
|
try:
|
|
post_data_diffdate_9 = int(data["post_file"]["diffdate"])
|
|
except:
|
|
post_data_diffdate_9 = 0
|
|
post_data.extend(
|
|
[post_data_count_1, post_data_LikeCount_2, post_data_CommentsCount_3, post_data_ShareCount_4,
|
|
post_data_length_5, post_data_tags_6, post_data_https_7, post_data_at_8, post_data_diffdate_9])
|
|
features = [user_data + post_data]
|
|
bot_user = joblib.load(cur_dir+"/model/bot_user.pkl") # 加载训练好的模型
|
|
result = bot_user.predict(features)
|
|
recognition_code = str(result[0])
|
|
# logging.info("预测模型结果为{}".format(result))
|
|
results = {}
|
|
# 用户id
|
|
results['accountId'] = data["user_file"]["accountId"]
|
|
# 用户昵称
|
|
results['nickName'] = data["user_file"]["nickName"]
|
|
# 用户账号
|
|
results['accountName'] = data["user_file"]["accountName"]
|
|
if recognition_code == '0':
|
|
results['recognitionResult'] = '非机器人'
|
|
results['recognitionCode'] = recognition_code
|
|
elif recognition_code == '1':
|
|
results['recognitionResult'] = '机器人'
|
|
results['recognitionCode'] = recognition_code
|
|
else:
|
|
results['recognitionResult'] = '未知识别结果'
|
|
results['recognitionCode'] = recognition_code
|
|
res['results'] = json.dumps(results)
|
|
raw_data["result"] = res
|
|
# raw_data_json=json.dumps(raw_data)
|
|
logging.info("增加预测数据-{}".format(raw_data))
|
|
to_kafka.send_kafka(raw_data, logging)
|
|
except:
|
|
res = {"successCode": "0", "errorLog": "", "results": {}}
|
|
raw_data["result"] = res
|
|
raw_data["result"]["error"] = traceback.format_exc()
|
|
# raw_data_json=json.dumps(raw_data)
|
|
logging.info(traceback.format_exc())
|
|
to_kafka.send_kafka(raw_data, logging)
|
|
else:
|
|
#暂无任务,进入休眠
|
|
time.sleep(10)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
all_result = {"9_获取用户发帖信息":"{\"resultList\": [{\"count\": \"10\", \"LikeCount\": \"1\", \"CommentsCount\": \"0.1\", \"ShareCount\": \"0.4\", \"length\": \"241.8000\", \"tags\": \"5.80000000\", \"https\": \"1.20000000\", \"at\": \"0.40000000\", \"diffdate\": \"170269\"}]}","8_获取用户信息":"{\"resultList\": [{\"accountId\": \"1368232444323799043\", \"accountName\": \"Ujjal best Tech@UjjalKumarGho19\", \"nickName\": \"UjjalKumarGho19\", \"fansCount\": \"660\", \"likeCount\": \"2096\", \"postCount\": \"579\", \"otherInfo\": \"\", \"authentication\": 1}]}"}
|
|
data={}
|
|
#{"user_file": "9_获取用户信息", "post_file": "10_获取用户发帖信息"}
|
|
user_file_result = json.loads(all_result[data['user_file']])
|
|
post_file_result = json.loads(all_result[data['post_file']])
|
|
if user_file_result['resultList']:
|
|
resultList = user_file_result['resultList']
|
|
data['user_file'] = resultList[0]
|
|
else:
|
|
data['user_file'] ={}
|
|
if post_file_result['resultList']:
|
|
data['post_file'] = post_file_result['resultList'][0]
|
|
else:
|
|
data['post_file'] = {}
|
|
|
|
|
|
print(data)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|