#coding:utf8 import joblib #accountName:johnsonleung def predict(): raw_data = {"user_file":{"accountId": "39234393", "accountName": "hello", "nickName": "Johnson Leung", "fansCount": 308,"likeCount": 92707,"postCount": 14237, "otherInfo": "{\"\"otherInfo\"\":\"\"{\"\"bio\"\": \"\"Huge}", "authentication": 0}, "post_file":{"count":1,"LikeCount":12,"CommentsCount":1,"ShareCount":1,"length":150,"tags":0,"https":0,"at":0,"diffdate":1}} ''' 需要计算的入参 1.count:帖子总数量 2.LikeCount:帖子点赞数的平均值 3.CommentsCount:帖子评论数的平均值 4.ShareCount:帖子分享数的平均值 5.length:帖子文本长度的平均值 6.tags:帖子文本中包含“#”数量的平均值 7.https:帖子文本中包含“https”数量的平均值 8.at:帖子文本中包含“@”数量的平均值 9.diffdate:全部帖子的最小值(帖子A发表时间和抓取时间的最大值-A的发表时间) ''' #用户数据 user_data=[] try: user_data_otherInfo_1 = 0 if raw_data["user_file"]["otherInfo"].strip() == "" else 1 except: user_data_otherInfo_1=0 try: user_data_nickName_2 = 0 if raw_data["user_file"]["nickName"].strip() == "" else 1 except: user_data_nickName_2=0 try: user_data_fansCount_3 = int(raw_data["user_file"]["fansCount"]) except: user_data_fansCount_3=0 try: user_data_likeCount_4=int(raw_data["user_file"]["likeCount"]) except: user_data_likeCount_4=0 try: user_data_postCount_5=int(raw_data["user_file"]["postCount"]) except: user_data_postCount_5=0 try: user_data_authentication_6=int(raw_data["user_file"]["authentication"]) except: user_data_authentication_6=0 user_data.extend([user_data_otherInfo_1,user_data_nickName_2,user_data_fansCount_3,user_data_likeCount_4,user_data_postCount_5,user_data_authentication_6]) #帖子数据 post_data=[] try: post_data_count_1 = int(raw_data["post_file"]["count"]) except: post_data_count_1=0 try: post_data_LikeCount_2 = int(raw_data["post_file"]["LikeCount"]) except: post_data_LikeCount_2=0 try: post_data_CommentsCount_3 = int(raw_data["post_file"]["CommentsCount"]) except: post_data_CommentsCount_3=0 try: post_data_ShareCount_4 = int(raw_data["post_file"]["ShareCount"]) except: post_data_ShareCount_4=0 try: post_data_length_5 = int(raw_data["post_file"]["length"]) except: post_data_length_5=0 try: post_data_tags_6 = int(raw_data["post_file"]["tags"]) except: post_data_tags_6=0 try: post_data_https_7 = int(raw_data["post_file"]["https"]) except: post_data_https_7=0 try: post_data_at_8 = int(raw_data["post_file"]["at"]) except: post_data_at_8=0 try: post_data_diffdate_9 = int(raw_data["post_file"]["diffdate"]) except: post_data_diffdate_9=0 post_data.extend([post_data_count_1,post_data_LikeCount_2,post_data_CommentsCount_3,post_data_ShareCount_4,post_data_length_5,post_data_tags_6,post_data_https_7,post_data_at_8,post_data_diffdate_9]) features=[user_data+post_data] bot_user = joblib.load("model/bot_user.pkl") # 加载训练好的模型 result=bot_user.predict(features) print(result) # 参数顺序[['otherInfo', 'nickName', 'fansCount', 'likeCount','postCount', 'authentication', 'count', 'LikeCount', 'CommentsCount', 'ShareCount','length', 'tags', 'https', 'at', 'diffdate']] predict()