算法暴露接口(xhs、dy、ks、wx、hnw)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 

152 lines
4.8 KiB

# coding:utf-8
import argparse
import os, sys
# 相对路径补充
root_path = os.path.abspath(os.path.dirname(__file__)).split('api-py')[0] + "api-py"
sys.path.append(root_path)
from concurrent.futures.thread import ThreadPoolExecutor
from utils.MysqlData import MysqlPoolClient, CRAWLER_DB_CONF_DY
from utils.tool import download_q
from loguru import logger
class DouyinTtwid():
"""
抖音主页生成 ttwid
参数:signature
"""
def __init__(self, is_proxy: bool):
self.is_proxy = is_proxy
self.sql_list = []
def get_cookie(self, ck=None):
"""
获取 nonce(ttwid)
:param ck:
:return:
"""
headers = {
"authority": "www.douyin.com",
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"accept-language": "zh-CN,zh;q=0.9",
"cache-control": "no-cache",
"pragma": "no-cache",
"sec-ch-ua": "\"Chromium\";v=\"118\", \"Google Chrome\";v=\"118\", \"Not=A?Brand\";v=\"99\"",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "\"macOS\"",
"sec-fetch-dest": "document",
"sec-fetch-mode": "navigate",
"sec-fetch-site": "none",
"sec-fetch-user": "?1",
"upgrade-insecure-requests": "1",
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36"
}
url = "https://www.douyin.com/"
if ck:
status, response = download_q(url, headers, cookies=ck, is_proxy=self.is_proxy)
key = response.cookies["ttwid"]
logger.info(f"获得 ttwid 成功:{key}")
self.ttwid = key
else:
status, response = download_q(url, headers, cookies={}, is_proxy=self.is_proxy)
key = response.cookies["__ac_nonce"]
logger.info(f"获得 __ac_nonce 成功:{key}")
self.nonce = key
return key
def get_signature(self, nonce):
"""
node 服务获取signature
:param nonce:
:return:
"""
url = f"http://127.0.0.1:3000/douyin/get_signature?nonce={nonce}"
status, response = download_q(url, {}, {})
signature = response.text
logger.info(f"成功获取signature:{signature}; nonce: {nonce}")
return signature
def run(self):
"""
调用入口
:return:
"""
try:
__ac_nonce = self.get_cookie()
if __ac_nonce:
__ac_signature = self.get_signature(__ac_nonce)
ck = {"__ac_nonce": __ac_nonce, "__ac_signature": __ac_signature, "__ac_referer": "__ac_blank"}
ttwid = self.get_cookie(ck)
if ttwid:
sql_ttwid = f"ttwid={ttwid};"
sql = "INSERT INTO `douyin_cookie_video_ly_copy2_test`(`cookie`, `status`, `source`, `time`) " \
"VALUES('%s', '%s', '%s', NOW());" % (sql_ttwid, 2, 0)
self.sql_list.append(sql)
return ttwid
else:
logger.error("获得 ttwid 失败 ")
else:
logger.error("获得 nonce 失败 ")
except Exception as e:
# traceback.print_exc()
logger.error(e)
def insert_data(sql_list):
"""
入库
:param sql_list:
:return:
"""
client = MysqlPoolClient(CRAWLER_DB_CONF_DY)
for sql in sql_list:
try:
logger.success(f"insert cookie -> {sql}")
client.getOne(sql)
except Exception as e:
logger.error(f"insert cookie -> {sql}")
def write_file(l):
"""
保存文件
:param l:
:return:
"""
with open("ttwid.txt", "w") as f:
f.write("\n".join(l))
f.close()
logger.info("文件保存成功")
def create_by_thread(douyin, count):
"""
并发执行
:param slid:
:param count:
:return:
"""
with ThreadPoolExecutor(max_workers=3) as t:
obj_list = []
for i in range(count * 2):
obj = t.submit(douyin.run)
obj_list.append(obj)
insert_data(douyin.sql_list)
# write_file(douyin.sql_list)
logger.info(f"[sum] 并发任务 需要生成数量 {count}, 实际抓取数量 {count*2}, 实际生成数量 {len(douyin.sql_list)}, 成功率 {len(douyin.sql_list)/(count*2)}")
if __name__ == '__main__':
dy = DouyinTtwid(is_proxy=True)
parser = argparse.ArgumentParser(description='get douyin.com cookie')
parser.add_argument('-c', type=int, default=100, help="needed cookie count;default count=100;")
args = parser.parse_args()
args_count = args.c
create_by_thread(dy, args_count)