用户水军识别应用
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

100 lines
3.3 KiB

  1. #coding:utf8
  2. import joblib
  3. import json
  4. import os
  5. cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd()
  6. import numpy as np
  7. class MyEncoder(json.JSONEncoder):
  8. def default(self, obj):
  9. if isinstance(obj, np.integer):
  10. return int(obj)
  11. elif isinstance(obj, np.floating):
  12. return float(obj)
  13. elif isinstance(obj, np.ndarray):
  14. return obj.tolist()
  15. raw_data = {"user_file": {"accountId": "39234393", "accountName": "hello", "nickName": "Johnson Leung",
  16. "fansCount": 308, "likeCount": 92707, "postCount": 14237,
  17. "otherInfo": "{\"\"otherInfo\"\":\"\"{\"\"bio\"\": \"\"Huge}",
  18. "authentication": 0},
  19. "post_file": {"count": 1, "LikeCount": 12, "CommentsCount": 1, "ShareCount": 1,
  20. "length": 150, "tags": 0, "https": 0, "at": 0, "diffdate": 1}}
  21. # 用户数据
  22. res = {"successCode": "1", "errorLog": "", "results": {}}
  23. user_data = []
  24. try:
  25. user_data_otherInfo_1 = 0 if raw_data["user_file"]["otherInfo"].strip() == "" else 1
  26. except:
  27. user_data_otherInfo_1 = 0
  28. try:
  29. user_data_nickName_2 = 0 if raw_data["user_file"]["nickName"].strip() == "" else 1
  30. except:
  31. user_data_nickName_2 = 0
  32. try:
  33. user_data_fansCount_3 = int(raw_data["user_file"]["fansCount"])
  34. except:
  35. user_data_fansCount_3 = 0
  36. try:
  37. user_data_likeCount_4 = int(raw_data["user_file"]["likeCount"])
  38. except:
  39. user_data_likeCount_4 = 0
  40. try:
  41. user_data_postCount_5 = int(raw_data["user_file"]["postCount"])
  42. except:
  43. user_data_postCount_5 = 0
  44. try:
  45. user_data_authentication_6 = int(raw_data["user_file"]["authentication"])
  46. except:
  47. user_data_authentication_6 = 0
  48. user_data.extend(
  49. [user_data_otherInfo_1, user_data_nickName_2, user_data_fansCount_3, user_data_likeCount_4,
  50. user_data_postCount_5, user_data_authentication_6])
  51. # 帖子数据
  52. post_data = []
  53. try:
  54. post_data_count_1 = int(raw_data["post_file"]["count"])
  55. except:
  56. post_data_count_1 = 0
  57. try:
  58. post_data_LikeCount_2 = int(raw_data["post_file"]["LikeCount"])
  59. except:
  60. post_data_LikeCount_2 = 0
  61. try:
  62. post_data_CommentsCount_3 = int(raw_data["post_file"]["CommentsCount"])
  63. except:
  64. post_data_CommentsCount_3 = 0
  65. try:
  66. post_data_ShareCount_4 = int(raw_data["post_file"]["ShareCount"])
  67. except:
  68. post_data_ShareCount_4 = 0
  69. try:
  70. post_data_length_5 = int(raw_data["post_file"]["length"])
  71. except:
  72. post_data_length_5 = 0
  73. try:
  74. post_data_tags_6 = int(raw_data["post_file"]["tags"])
  75. except:
  76. post_data_tags_6 = 0
  77. try:
  78. post_data_https_7 = int(raw_data["post_file"]["https"])
  79. except:
  80. post_data_https_7 = 0
  81. try:
  82. post_data_at_8 = int(raw_data["post_file"]["at"])
  83. except:
  84. post_data_at_8 = 0
  85. try:
  86. post_data_diffdate_9 = int(raw_data["post_file"]["diffdate"])
  87. except:
  88. post_data_diffdate_9 = 0
  89. post_data.extend(
  90. [post_data_count_1, post_data_LikeCount_2, post_data_CommentsCount_3, post_data_ShareCount_4,
  91. post_data_length_5, post_data_tags_6, post_data_https_7, post_data_at_8, post_data_diffdate_9])
  92. features = [user_data + post_data]
  93. print(cur_dir + "/model/bot_user.pkl")
  94. bot_user = joblib.load(cur_dir + "/model/bot_user.pkl") # 加载训练好的模型
  95. result = bot_user.predict(features)
  96. res["results"] = result[0]
  97. # logging.info("预测模型结果为{}".format(result))
  98. raw_data["result"] = res
  99. # print(raw_data)
  100. print(raw_data)