用户水军识别应用
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

101 lines
3.3 KiB

#coding:utf8
import joblib
import json
import os
cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd()
import numpy as np
class MyEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, np.integer):
return int(obj)
elif isinstance(obj, np.floating):
return float(obj)
elif isinstance(obj, np.ndarray):
return obj.tolist()
raw_data = {"user_file": {"accountId": "39234393", "accountName": "hello", "nickName": "Johnson Leung",
"fansCount": 308, "likeCount": 92707, "postCount": 14237,
"otherInfo": "{\"\"otherInfo\"\":\"\"{\"\"bio\"\": \"\"Huge}",
"authentication": 0},
"post_file": {"count": 1, "LikeCount": 12, "CommentsCount": 1, "ShareCount": 1,
"length": 150, "tags": 0, "https": 0, "at": 0, "diffdate": 1}}
# 用户数据
res = {"successCode": "1", "errorLog": "", "results": {}}
user_data = []
try:
user_data_otherInfo_1 = 0 if raw_data["user_file"]["otherInfo"].strip() == "" else 1
except:
user_data_otherInfo_1 = 0
try:
user_data_nickName_2 = 0 if raw_data["user_file"]["nickName"].strip() == "" else 1
except:
user_data_nickName_2 = 0
try:
user_data_fansCount_3 = int(raw_data["user_file"]["fansCount"])
except:
user_data_fansCount_3 = 0
try:
user_data_likeCount_4 = int(raw_data["user_file"]["likeCount"])
except:
user_data_likeCount_4 = 0
try:
user_data_postCount_5 = int(raw_data["user_file"]["postCount"])
except:
user_data_postCount_5 = 0
try:
user_data_authentication_6 = int(raw_data["user_file"]["authentication"])
except:
user_data_authentication_6 = 0
user_data.extend(
[user_data_otherInfo_1, user_data_nickName_2, user_data_fansCount_3, user_data_likeCount_4,
user_data_postCount_5, user_data_authentication_6])
# 帖子数据
post_data = []
try:
post_data_count_1 = int(raw_data["post_file"]["count"])
except:
post_data_count_1 = 0
try:
post_data_LikeCount_2 = int(raw_data["post_file"]["LikeCount"])
except:
post_data_LikeCount_2 = 0
try:
post_data_CommentsCount_3 = int(raw_data["post_file"]["CommentsCount"])
except:
post_data_CommentsCount_3 = 0
try:
post_data_ShareCount_4 = int(raw_data["post_file"]["ShareCount"])
except:
post_data_ShareCount_4 = 0
try:
post_data_length_5 = int(raw_data["post_file"]["length"])
except:
post_data_length_5 = 0
try:
post_data_tags_6 = int(raw_data["post_file"]["tags"])
except:
post_data_tags_6 = 0
try:
post_data_https_7 = int(raw_data["post_file"]["https"])
except:
post_data_https_7 = 0
try:
post_data_at_8 = int(raw_data["post_file"]["at"])
except:
post_data_at_8 = 0
try:
post_data_diffdate_9 = int(raw_data["post_file"]["diffdate"])
except:
post_data_diffdate_9 = 0
post_data.extend(
[post_data_count_1, post_data_LikeCount_2, post_data_CommentsCount_3, post_data_ShareCount_4,
post_data_length_5, post_data_tags_6, post_data_https_7, post_data_at_8, post_data_diffdate_9])
features = [user_data + post_data]
print(cur_dir + "/model/bot_user.pkl")
bot_user = joblib.load(cur_dir + "/model/bot_user.pkl") # 加载训练好的模型
result = bot_user.predict(features)
res["results"] = result[0]
# logging.info("预测模型结果为{}".format(result))
raw_data["result"] = res
# print(raw_data)
print(raw_data)