You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
152 lines
4.8 KiB
152 lines
4.8 KiB
# coding:utf-8
|
|
import argparse
|
|
import os, sys
|
|
# 相对路径补充
|
|
root_path = os.path.abspath(os.path.dirname(__file__)).split('api-py')[0] + "api-py"
|
|
sys.path.append(root_path)
|
|
|
|
from concurrent.futures.thread import ThreadPoolExecutor
|
|
from utils.MysqlData import MysqlPoolClient, CRAWLER_DB_CONF_DY
|
|
from utils.tool import download_q
|
|
from loguru import logger
|
|
|
|
|
|
class DouyinTtwid():
|
|
"""
|
|
抖音主页生成 ttwid
|
|
参数:signature
|
|
"""
|
|
def __init__(self, is_proxy: bool):
|
|
self.is_proxy = is_proxy
|
|
self.sql_list = []
|
|
|
|
def get_cookie(self, ck=None):
|
|
"""
|
|
获取 nonce(ttwid)
|
|
:param ck:
|
|
:return:
|
|
"""
|
|
headers = {
|
|
"authority": "www.douyin.com",
|
|
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
|
|
"accept-language": "zh-CN,zh;q=0.9",
|
|
"cache-control": "no-cache",
|
|
"pragma": "no-cache",
|
|
"sec-ch-ua": "\"Chromium\";v=\"118\", \"Google Chrome\";v=\"118\", \"Not=A?Brand\";v=\"99\"",
|
|
"sec-ch-ua-mobile": "?0",
|
|
"sec-ch-ua-platform": "\"macOS\"",
|
|
"sec-fetch-dest": "document",
|
|
"sec-fetch-mode": "navigate",
|
|
"sec-fetch-site": "none",
|
|
"sec-fetch-user": "?1",
|
|
"upgrade-insecure-requests": "1",
|
|
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36"
|
|
}
|
|
|
|
url = "https://www.douyin.com/"
|
|
if ck:
|
|
status, response = download_q(url, headers, cookies=ck, is_proxy=self.is_proxy)
|
|
key = response.cookies["ttwid"]
|
|
logger.info(f"获得 ttwid 成功:{key}")
|
|
self.ttwid = key
|
|
else:
|
|
status, response = download_q(url, headers, cookies={}, is_proxy=self.is_proxy)
|
|
key = response.cookies["__ac_nonce"]
|
|
logger.info(f"获得 __ac_nonce 成功:{key}")
|
|
self.nonce = key
|
|
|
|
return key
|
|
|
|
def get_signature(self, nonce):
|
|
"""
|
|
node 服务获取signature
|
|
:param nonce:
|
|
:return:
|
|
"""
|
|
url = f"http://127.0.0.1:3000/douyin/get_signature?nonce={nonce}"
|
|
status, response = download_q(url, {}, {})
|
|
signature = response.text
|
|
logger.info(f"成功获取signature:{signature}; nonce: {nonce}")
|
|
return signature
|
|
|
|
def run(self):
|
|
"""
|
|
调用入口
|
|
:return:
|
|
"""
|
|
try:
|
|
__ac_nonce = self.get_cookie()
|
|
if __ac_nonce:
|
|
__ac_signature = self.get_signature(__ac_nonce)
|
|
ck = {"__ac_nonce": __ac_nonce, "__ac_signature": __ac_signature, "__ac_referer": "__ac_blank"}
|
|
ttwid = self.get_cookie(ck)
|
|
if ttwid:
|
|
sql_ttwid = f"ttwid={ttwid};"
|
|
sql = "INSERT INTO `douyin_cookie_video_ly_copy2_test`(`cookie`, `status`, `source`, `time`) " \
|
|
"VALUES('%s', '%s', '%s', NOW());" % (sql_ttwid, 2, 0)
|
|
self.sql_list.append(sql)
|
|
return ttwid
|
|
else:
|
|
logger.error("获得 ttwid 失败 ")
|
|
else:
|
|
logger.error("获得 nonce 失败 ")
|
|
except Exception as e:
|
|
# traceback.print_exc()
|
|
logger.error(e)
|
|
|
|
|
|
def insert_data(sql_list):
|
|
"""
|
|
入库
|
|
:param sql_list:
|
|
:return:
|
|
"""
|
|
client = MysqlPoolClient(CRAWLER_DB_CONF_DY)
|
|
for sql in sql_list:
|
|
try:
|
|
logger.success(f"insert cookie -> {sql}")
|
|
client.getOne(sql)
|
|
except Exception as e:
|
|
logger.error(f"insert cookie -> {sql}")
|
|
|
|
|
|
def write_file(l):
|
|
"""
|
|
保存文件
|
|
:param l:
|
|
:return:
|
|
"""
|
|
with open("ttwid.txt", "w") as f:
|
|
f.write("\n".join(l))
|
|
f.close()
|
|
logger.info("文件保存成功")
|
|
|
|
|
|
def create_by_thread(douyin, count):
|
|
"""
|
|
并发执行
|
|
:param slid:
|
|
:param count:
|
|
:return:
|
|
"""
|
|
with ThreadPoolExecutor(max_workers=3) as t:
|
|
obj_list = []
|
|
for i in range(count * 2):
|
|
obj = t.submit(douyin.run)
|
|
obj_list.append(obj)
|
|
|
|
insert_data(douyin.sql_list)
|
|
# write_file(douyin.sql_list)
|
|
logger.info(f"[sum] 并发任务 需要生成数量 {count}, 实际抓取数量 {count*2}, 实际生成数量 {len(douyin.sql_list)}, 成功率 {len(douyin.sql_list)/(count*2)}")
|
|
|
|
|
|
if __name__ == '__main__':
|
|
dy = DouyinTtwid(is_proxy=True)
|
|
|
|
parser = argparse.ArgumentParser(description='get douyin.com cookie')
|
|
parser.add_argument('-c', type=int, default=100, help="needed cookie count;default count=100;")
|
|
args = parser.parse_args()
|
|
|
|
args_count = args.c
|
|
create_by_thread(dy, args_count)
|
|
|