You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
90 lines
3.6 KiB
90 lines
3.6 KiB
#coding:utf8
|
|
import joblib
|
|
#accountName:johnsonleung
|
|
def predict():
|
|
raw_data = {"user_file":{"accountId": "39234393", "accountName": "hello", "nickName": "Johnson Leung", "fansCount": 308,"likeCount": 92707,"postCount": 14237, "otherInfo": "{\"\"otherInfo\"\":\"\"{\"\"bio\"\": \"\"Huge}", "authentication": 0},
|
|
"post_file":{"count":1,"LikeCount":12,"CommentsCount":1,"ShareCount":1,"length":150,"tags":0,"https":0,"at":0,"diffdate":1}}
|
|
'''
|
|
需要计算的入参
|
|
1.count:帖子总数量
|
|
2.LikeCount:帖子点赞数的平均值
|
|
3.CommentsCount:帖子评论数的平均值
|
|
4.ShareCount:帖子分享数的平均值
|
|
5.length:帖子文本长度的平均值
|
|
6.tags:帖子文本中包含“#”数量的平均值
|
|
7.https:帖子文本中包含“https”数量的平均值
|
|
8.at:帖子文本中包含“@”数量的平均值
|
|
9.diffdate:全部帖子的最小值(帖子A发表时间和抓取时间的最大值-A的发表时间)
|
|
'''
|
|
#用户数据
|
|
user_data=[]
|
|
try:
|
|
user_data_otherInfo_1 = 0 if raw_data["user_file"]["otherInfo"].strip() == "" else 1
|
|
except:
|
|
user_data_otherInfo_1=0
|
|
try:
|
|
user_data_nickName_2 = 0 if raw_data["user_file"]["nickName"].strip() == "" else 1
|
|
except:
|
|
user_data_nickName_2=0
|
|
try:
|
|
user_data_fansCount_3 = int(raw_data["user_file"]["fansCount"])
|
|
except:
|
|
user_data_fansCount_3=0
|
|
try:
|
|
user_data_likeCount_4=int(raw_data["user_file"]["likeCount"])
|
|
except:
|
|
user_data_likeCount_4=0
|
|
try:
|
|
user_data_postCount_5=int(raw_data["user_file"]["postCount"])
|
|
except:
|
|
user_data_postCount_5=0
|
|
try:
|
|
user_data_authentication_6=int(raw_data["user_file"]["authentication"])
|
|
except:
|
|
user_data_authentication_6=0
|
|
user_data.extend([user_data_otherInfo_1,user_data_nickName_2,user_data_fansCount_3,user_data_likeCount_4,user_data_postCount_5,user_data_authentication_6])
|
|
#帖子数据
|
|
post_data=[]
|
|
try:
|
|
post_data_count_1 = int(raw_data["post_file"]["count"])
|
|
except:
|
|
post_data_count_1=0
|
|
try:
|
|
post_data_LikeCount_2 = int(raw_data["post_file"]["LikeCount"])
|
|
except:
|
|
post_data_LikeCount_2=0
|
|
try:
|
|
post_data_CommentsCount_3 = int(raw_data["post_file"]["CommentsCount"])
|
|
except:
|
|
post_data_CommentsCount_3=0
|
|
try:
|
|
post_data_ShareCount_4 = int(raw_data["post_file"]["ShareCount"])
|
|
except:
|
|
post_data_ShareCount_4=0
|
|
try:
|
|
post_data_length_5 = int(raw_data["post_file"]["length"])
|
|
except:
|
|
post_data_length_5=0
|
|
try:
|
|
post_data_tags_6 = int(raw_data["post_file"]["tags"])
|
|
except:
|
|
post_data_tags_6=0
|
|
try:
|
|
post_data_https_7 = int(raw_data["post_file"]["https"])
|
|
except:
|
|
post_data_https_7=0
|
|
try:
|
|
post_data_at_8 = int(raw_data["post_file"]["at"])
|
|
except:
|
|
post_data_at_8=0
|
|
try:
|
|
post_data_diffdate_9 = int(raw_data["post_file"]["diffdate"])
|
|
except:
|
|
post_data_diffdate_9=0
|
|
post_data.extend([post_data_count_1,post_data_LikeCount_2,post_data_CommentsCount_3,post_data_ShareCount_4,post_data_length_5,post_data_tags_6,post_data_https_7,post_data_at_8,post_data_diffdate_9])
|
|
features=[user_data+post_data]
|
|
bot_user = joblib.load("model/bot_user.pkl") # 加载训练好的模型
|
|
result=bot_user.predict(features)
|
|
print(result)
|
|
# 参数顺序[['otherInfo', 'nickName', 'fansCount', 'likeCount','postCount', 'authentication', 'count', 'LikeCount', 'CommentsCount', 'ShareCount','length', 'tags', 'https', 'at', 'diffdate']]
|
|
predict()
|