|
|
# coding:utf-8 import argparse import os, sys # 相对路径补充 root_path = os.path.abspath(os.path.dirname(__file__)).split('api-py')[0] + "api-py" sys.path.append(root_path)
from concurrent.futures.thread import ThreadPoolExecutor from utils.MysqlData import MysqlPoolClient, CRAWLER_DB_CONF_DY from utils.tool import download_q from loguru import logger
class DouyinTtwid(): """
抖音主页生成 ttwid 参数:signature """
def __init__(self, is_proxy: bool): self.is_proxy = is_proxy self.sql_list = []
def get_cookie(self, ck=None): """
获取 nonce(ttwid) :param ck: :return: """
headers = { "authority": "www.douyin.com", "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", "accept-language": "zh-CN,zh;q=0.9", "cache-control": "no-cache", "pragma": "no-cache", "sec-ch-ua": "\"Chromium\";v=\"118\", \"Google Chrome\";v=\"118\", \"Not=A?Brand\";v=\"99\"", "sec-ch-ua-mobile": "?0", "sec-ch-ua-platform": "\"macOS\"", "sec-fetch-dest": "document", "sec-fetch-mode": "navigate", "sec-fetch-site": "none", "sec-fetch-user": "?1", "upgrade-insecure-requests": "1", "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36" }
url = "https://www.douyin.com/" if ck: status, response = download_q(url, headers, cookies=ck, is_proxy=self.is_proxy) key = response.cookies["ttwid"] logger.info(f"获得 ttwid 成功:{key}") self.ttwid = key else: status, response = download_q(url, headers, cookies={}, is_proxy=self.is_proxy) key = response.cookies["__ac_nonce"] logger.info(f"获得 __ac_nonce 成功:{key}") self.nonce = key
return key
def get_signature(self, nonce): """
node 服务获取signature :param nonce: :return: """
url = f"http://127.0.0.1:3000/douyin/get_signature?nonce={nonce}" status, response = download_q(url, {}, {}) signature = response.text logger.info(f"成功获取signature:{signature}; nonce: {nonce}") return signature
def run(self): """
调用入口 :return: """
try: __ac_nonce = self.get_cookie() if __ac_nonce: __ac_signature = self.get_signature(__ac_nonce) ck = {"__ac_nonce": __ac_nonce, "__ac_signature": __ac_signature, "__ac_referer": "__ac_blank"} ttwid = self.get_cookie(ck) if ttwid: sql_ttwid = f"ttwid={ttwid};" sql = "INSERT INTO `douyin_cookie_video_ly_copy2_test`(`cookie`, `status`, `source`, `time`) " \ "VALUES('%s', '%s', '%s', NOW());" % (sql_ttwid, 2, 0) self.sql_list.append(sql) return ttwid else: logger.error("获得 ttwid 失败 ") else: logger.error("获得 nonce 失败 ") except Exception as e: # traceback.print_exc() logger.error(e)
def insert_data(sql_list): """
入库 :param sql_list: :return: """
client = MysqlPoolClient(CRAWLER_DB_CONF_DY) for sql in sql_list: try: logger.success(f"insert cookie -> {sql}") client.getOne(sql) except Exception as e: logger.error(f"insert cookie -> {sql}")
def write_file(l): """
保存文件 :param l: :return: """
with open("ttwid.txt", "w") as f: f.write("\n".join(l)) f.close() logger.info("文件保存成功")
def create_by_thread(douyin, count): """
并发执行 :param slid: :param count: :return: """
with ThreadPoolExecutor(max_workers=3) as t: obj_list = [] for i in range(count * 2): obj = t.submit(douyin.run) obj_list.append(obj)
insert_data(douyin.sql_list) # write_file(douyin.sql_list) logger.info(f"[sum] 并发任务 需要生成数量 {count}, 实际抓取数量 {count*2}, 实际生成数量 {len(douyin.sql_list)}, 成功率 {len(douyin.sql_list)/(count*2)}")
if __name__ == '__main__': dy = DouyinTtwid(is_proxy=True)
parser = argparse.ArgumentParser(description='get douyin.com cookie') parser.add_argument('-c', type=int, default=100, help="needed cookie count;default count=100;") args = parser.parse_args()
args_count = args.c create_by_thread(dy, args_count)
|