话题水军识别应用
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 

90 lines
3.6 KiB

#coding:utf8
import joblib
#accountName:johnsonleung
def predict():
raw_data = {"user_file":{"accountId": "39234393", "accountName": "hello", "nickName": "Johnson Leung", "fansCount": 308,"likeCount": 92707,"postCount": 14237, "otherInfo": "{\"\"otherInfo\"\":\"\"{\"\"bio\"\": \"\"Huge}", "authentication": 0},
"post_file":{"count":1,"LikeCount":12,"CommentsCount":1,"ShareCount":1,"length":150,"tags":0,"https":0,"at":0,"diffdate":1}}
'''
需要计算的入参
1.count:帖子总数量
2.LikeCount:帖子点赞数的平均值
3.CommentsCount:帖子评论数的平均值
4.ShareCount:帖子分享数的平均值
5.length:帖子文本长度的平均值
6.tags:帖子文本中包含“#”数量的平均值
7.https:帖子文本中包含“https”数量的平均值
8.at:帖子文本中包含“@”数量的平均值
9.diffdate:全部帖子的最小值(帖子A发表时间和抓取时间的最大值-A的发表时间)
'''
#用户数据
user_data=[]
try:
user_data_otherInfo_1 = 0 if raw_data["user_file"]["otherInfo"].strip() == "" else 1
except:
user_data_otherInfo_1=0
try:
user_data_nickName_2 = 0 if raw_data["user_file"]["nickName"].strip() == "" else 1
except:
user_data_nickName_2=0
try:
user_data_fansCount_3 = int(raw_data["user_file"]["fansCount"])
except:
user_data_fansCount_3=0
try:
user_data_likeCount_4=int(raw_data["user_file"]["likeCount"])
except:
user_data_likeCount_4=0
try:
user_data_postCount_5=int(raw_data["user_file"]["postCount"])
except:
user_data_postCount_5=0
try:
user_data_authentication_6=int(raw_data["user_file"]["authentication"])
except:
user_data_authentication_6=0
user_data.extend([user_data_otherInfo_1,user_data_nickName_2,user_data_fansCount_3,user_data_likeCount_4,user_data_postCount_5,user_data_authentication_6])
#帖子数据
post_data=[]
try:
post_data_count_1 = int(raw_data["post_file"]["count"])
except:
post_data_count_1=0
try:
post_data_LikeCount_2 = int(raw_data["post_file"]["LikeCount"])
except:
post_data_LikeCount_2=0
try:
post_data_CommentsCount_3 = int(raw_data["post_file"]["CommentsCount"])
except:
post_data_CommentsCount_3=0
try:
post_data_ShareCount_4 = int(raw_data["post_file"]["ShareCount"])
except:
post_data_ShareCount_4=0
try:
post_data_length_5 = int(raw_data["post_file"]["length"])
except:
post_data_length_5=0
try:
post_data_tags_6 = int(raw_data["post_file"]["tags"])
except:
post_data_tags_6=0
try:
post_data_https_7 = int(raw_data["post_file"]["https"])
except:
post_data_https_7=0
try:
post_data_at_8 = int(raw_data["post_file"]["at"])
except:
post_data_at_8=0
try:
post_data_diffdate_9 = int(raw_data["post_file"]["diffdate"])
except:
post_data_diffdate_9=0
post_data.extend([post_data_count_1,post_data_LikeCount_2,post_data_CommentsCount_3,post_data_ShareCount_4,post_data_length_5,post_data_tags_6,post_data_https_7,post_data_at_8,post_data_diffdate_9])
features=[user_data+post_data]
bot_user = joblib.load("model/bot_user.pkl") # 加载训练好的模型
result=bot_user.predict(features)
print(result)
# 参数顺序[['otherInfo', 'nickName', 'fansCount', 'likeCount','postCount', 'authentication', 'count', 'LikeCount', 'CommentsCount', 'ShareCount','length', 'tags', 'https', 'at', 'diffdate']]
predict()