话题水军识别应用
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

90 lines
3.6 KiB

  1. #coding:utf8
  2. import joblib
  3. #accountName:johnsonleung
  4. def predict():
  5. raw_data = {"user_file":{"accountId": "39234393", "accountName": "hello", "nickName": "Johnson Leung", "fansCount": 308,"likeCount": 92707,"postCount": 14237, "otherInfo": "{\"\"otherInfo\"\":\"\"{\"\"bio\"\": \"\"Huge}", "authentication": 0},
  6. "post_file":{"count":1,"LikeCount":12,"CommentsCount":1,"ShareCount":1,"length":150,"tags":0,"https":0,"at":0,"diffdate":1}}
  7. '''
  8. 1.count:
  9. 2.LikeCount
  10. 3.CommentsCount
  11. 4.ShareCount
  12. 5.length
  13. 6.tags#”数量的平均值
  14. 7.httpshttps
  15. 8.at@
  16. 9.diffdate:(A发表时间和抓取时间的最大值-A的发表时间)
  17. '''
  18. #用户数据
  19. user_data=[]
  20. try:
  21. user_data_otherInfo_1 = 0 if raw_data["user_file"]["otherInfo"].strip() == "" else 1
  22. except:
  23. user_data_otherInfo_1=0
  24. try:
  25. user_data_nickName_2 = 0 if raw_data["user_file"]["nickName"].strip() == "" else 1
  26. except:
  27. user_data_nickName_2=0
  28. try:
  29. user_data_fansCount_3 = int(raw_data["user_file"]["fansCount"])
  30. except:
  31. user_data_fansCount_3=0
  32. try:
  33. user_data_likeCount_4=int(raw_data["user_file"]["likeCount"])
  34. except:
  35. user_data_likeCount_4=0
  36. try:
  37. user_data_postCount_5=int(raw_data["user_file"]["postCount"])
  38. except:
  39. user_data_postCount_5=0
  40. try:
  41. user_data_authentication_6=int(raw_data["user_file"]["authentication"])
  42. except:
  43. user_data_authentication_6=0
  44. user_data.extend([user_data_otherInfo_1,user_data_nickName_2,user_data_fansCount_3,user_data_likeCount_4,user_data_postCount_5,user_data_authentication_6])
  45. #帖子数据
  46. post_data=[]
  47. try:
  48. post_data_count_1 = int(raw_data["post_file"]["count"])
  49. except:
  50. post_data_count_1=0
  51. try:
  52. post_data_LikeCount_2 = int(raw_data["post_file"]["LikeCount"])
  53. except:
  54. post_data_LikeCount_2=0
  55. try:
  56. post_data_CommentsCount_3 = int(raw_data["post_file"]["CommentsCount"])
  57. except:
  58. post_data_CommentsCount_3=0
  59. try:
  60. post_data_ShareCount_4 = int(raw_data["post_file"]["ShareCount"])
  61. except:
  62. post_data_ShareCount_4=0
  63. try:
  64. post_data_length_5 = int(raw_data["post_file"]["length"])
  65. except:
  66. post_data_length_5=0
  67. try:
  68. post_data_tags_6 = int(raw_data["post_file"]["tags"])
  69. except:
  70. post_data_tags_6=0
  71. try:
  72. post_data_https_7 = int(raw_data["post_file"]["https"])
  73. except:
  74. post_data_https_7=0
  75. try:
  76. post_data_at_8 = int(raw_data["post_file"]["at"])
  77. except:
  78. post_data_at_8=0
  79. try:
  80. post_data_diffdate_9 = int(raw_data["post_file"]["diffdate"])
  81. except:
  82. post_data_diffdate_9=0
  83. post_data.extend([post_data_count_1,post_data_LikeCount_2,post_data_CommentsCount_3,post_data_ShareCount_4,post_data_length_5,post_data_tags_6,post_data_https_7,post_data_at_8,post_data_diffdate_9])
  84. features=[user_data+post_data]
  85. bot_user = joblib.load("model/bot_user.pkl") # 加载训练好的模型
  86. result=bot_user.predict(features)
  87. print(result)
  88. # 参数顺序[['otherInfo', 'nickName', 'fansCount', 'likeCount','postCount', 'authentication', 'count', 'LikeCount', 'CommentsCount', 'ShareCount','length', 'tags', 'https', 'at', 'diffdate']]
  89. predict()