算法暴露接口(xhs、dy、ks、wx、hnw)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

152 lines
4.8 KiB

7 months ago
  1. # coding:utf-8
  2. import argparse
  3. import os, sys
  4. # 相对路径补充
  5. root_path = os.path.abspath(os.path.dirname(__file__)).split('api-py')[0] + "api-py"
  6. sys.path.append(root_path)
  7. from concurrent.futures.thread import ThreadPoolExecutor
  8. from utils.MysqlData import MysqlPoolClient, CRAWLER_DB_CONF_DY
  9. from utils.tool import download_q
  10. from loguru import logger
  11. class DouyinTtwid():
  12. """
  13. ttwid
  14. signature
  15. """
  16. def __init__(self, is_proxy: bool):
  17. self.is_proxy = is_proxy
  18. self.sql_list = []
  19. def get_cookie(self, ck=None):
  20. """
  21. noncettwid)
  22. :param ck:
  23. :return:
  24. """
  25. headers = {
  26. "authority": "www.douyin.com",
  27. "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
  28. "accept-language": "zh-CN,zh;q=0.9",
  29. "cache-control": "no-cache",
  30. "pragma": "no-cache",
  31. "sec-ch-ua": "\"Chromium\";v=\"118\", \"Google Chrome\";v=\"118\", \"Not=A?Brand\";v=\"99\"",
  32. "sec-ch-ua-mobile": "?0",
  33. "sec-ch-ua-platform": "\"macOS\"",
  34. "sec-fetch-dest": "document",
  35. "sec-fetch-mode": "navigate",
  36. "sec-fetch-site": "none",
  37. "sec-fetch-user": "?1",
  38. "upgrade-insecure-requests": "1",
  39. "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36"
  40. }
  41. url = "https://www.douyin.com/"
  42. if ck:
  43. status, response = download_q(url, headers, cookies=ck, is_proxy=self.is_proxy)
  44. key = response.cookies["ttwid"]
  45. logger.info(f"获得 ttwid 成功:{key}")
  46. self.ttwid = key
  47. else:
  48. status, response = download_q(url, headers, cookies={}, is_proxy=self.is_proxy)
  49. key = response.cookies["__ac_nonce"]
  50. logger.info(f"获得 __ac_nonce 成功:{key}")
  51. self.nonce = key
  52. return key
  53. def get_signature(self, nonce):
  54. """
  55. node signature
  56. :param nonce:
  57. :return:
  58. """
  59. url = f"http://127.0.0.1:3000/douyin/get_signature?nonce={nonce}"
  60. status, response = download_q(url, {}, {})
  61. signature = response.text
  62. logger.info(f"成功获取signature:{signature}; nonce: {nonce}")
  63. return signature
  64. def run(self):
  65. """
  66. :return:
  67. """
  68. try:
  69. __ac_nonce = self.get_cookie()
  70. if __ac_nonce:
  71. __ac_signature = self.get_signature(__ac_nonce)
  72. ck = {"__ac_nonce": __ac_nonce, "__ac_signature": __ac_signature, "__ac_referer": "__ac_blank"}
  73. ttwid = self.get_cookie(ck)
  74. if ttwid:
  75. sql_ttwid = f"ttwid={ttwid};"
  76. sql = "INSERT INTO `douyin_cookie_video_ly_copy2_test`(`cookie`, `status`, `source`, `time`) " \
  77. "VALUES('%s', '%s', '%s', NOW());" % (sql_ttwid, 2, 0)
  78. self.sql_list.append(sql)
  79. return ttwid
  80. else:
  81. logger.error("获得 ttwid 失败 ")
  82. else:
  83. logger.error("获得 nonce 失败 ")
  84. except Exception as e:
  85. # traceback.print_exc()
  86. logger.error(e)
  87. def insert_data(sql_list):
  88. """
  89. :param sql_list:
  90. :return:
  91. """
  92. client = MysqlPoolClient(CRAWLER_DB_CONF_DY)
  93. for sql in sql_list:
  94. try:
  95. logger.success(f"insert cookie -> {sql}")
  96. client.getOne(sql)
  97. except Exception as e:
  98. logger.error(f"insert cookie -> {sql}")
  99. def write_file(l):
  100. """
  101. :param l:
  102. :return:
  103. """
  104. with open("ttwid.txt", "w") as f:
  105. f.write("\n".join(l))
  106. f.close()
  107. logger.info("文件保存成功")
  108. def create_by_thread(douyin, count):
  109. """
  110. :param slid:
  111. :param count:
  112. :return:
  113. """
  114. with ThreadPoolExecutor(max_workers=3) as t:
  115. obj_list = []
  116. for i in range(count * 2):
  117. obj = t.submit(douyin.run)
  118. obj_list.append(obj)
  119. insert_data(douyin.sql_list)
  120. # write_file(douyin.sql_list)
  121. logger.info(f"[sum] 并发任务 需要生成数量 {count}, 实际抓取数量 {count*2}, 实际生成数量 {len(douyin.sql_list)}, 成功率 {len(douyin.sql_list)/(count*2)}")
  122. if __name__ == '__main__':
  123. dy = DouyinTtwid(is_proxy=True)
  124. parser = argparse.ArgumentParser(description='get douyin.com cookie')
  125. parser.add_argument('-c', type=int, default=100, help="needed cookie count;default count=100;")
  126. args = parser.parse_args()
  127. args_count = args.c
  128. create_by_thread(dy, args_count)