暗网采集的部署
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

55 lines
2.2 KiB

import requests
# 公网ip
api_url = "http://124.243.188.109:8000/crawl"
# 内网ip
# api_url = "http://192.168.0.131:8000/crawl"
url_list = [
'http://darkzqtmbdeauwq5mzcmgeeuhet42fhfjj4p5wbak3ofx2yqgecoeqyd.onion',
'http://rznvg5sjacavz5kpshrq4urm75xzruha6iiyuggidnioo5ztvwdfroyd.onion/blogs/where-to-buy-counterfeit-banknotes-how-t/hitman-internet-killers-hit-man-service.html',
'http://dwltorbltw3tdjskxn23j2mwz2f4q25j4ninl5bdvttiy4xb6cqzikid.onion/',
'https://onionsearchengine.com/search.php?q=search+engine',
'http://xao2lxsmia2edq2n5zxg6uahx6xox2t7bfjw6b5vdzsxi7ezmqob6qid.onion/',
'http://dwltorbltw3tdjskxn23j2mwz2f4q25j4ninl5bdvttiy4xb6cqzikid.onion/blog/dark-web-onion-links',
'http://darkzqtmbdeauwq5mzcmgeeuhet42fhfjj4p5wbak3ofx2yqgecoeqyd.onion/search?q=dark&p=2']
# 注意这里用 json=payload,且方法是 post
index = 6
payload = {
# "url": url_list[index],
# "url": "http://zqktlwiuavvvqqt4ybvgvi7tyo4hjl5xgfuvpdf6otjiycgwqbym2qad.onion/wiki/index.php/Main_Page#Conferences",
"url": "http://darkzqtmbdeauwq5mzcmgeeuhet42fhfjj4p5wbak3ofx2yqgecoeqyd.onion/search?q=a",
# "is_dynamic": True,
"method": "POST", # 告诉服务器:底层用 POST 去连暗网
}
resp = requests.post(api_url, json=payload)
data_json = resp.json()
print(data_json)
html = data_json.get('data').get('content')
# msg = data_json.get('msg')
# cont = 0
# for i,item in enumerate(url_list):
# payload = {
# "url": item,
# # "url": "http://dwltorbltw3tdjskxn23j2mwz2f4q25j4ninl5bdvttiy4xb6cqzikid.onion/blog/dark-web-onion-links ",
# "is_dynamic": True,
# "method": "POST", # 告诉服务器:底层用 POST 去连暗网
# }
# try:
# resp = requests.post(api_url, json=payload)
# data_json = resp.json()
# # print(data_json)
# html = data_json.get('data').get('content')
# msg = data_json.get('msg')
# if msg =="success":
# # print(html)
# cont += 1
# else:print(item)
# except:
# print(item)
# print(cont)
#
with open(f'./html{str(index)}.html', 'w',encoding='utf-8') as f:
f.write(html)