暗网采集的部署
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

55 lines
2.2 KiB

  1. import requests
  2. # 公网ip
  3. api_url = "http://124.243.188.109:8000/crawl"
  4. # 内网ip
  5. # api_url = "http://192.168.0.131:8000/crawl"
  6. url_list = [
  7. 'http://darkzqtmbdeauwq5mzcmgeeuhet42fhfjj4p5wbak3ofx2yqgecoeqyd.onion',
  8. 'http://rznvg5sjacavz5kpshrq4urm75xzruha6iiyuggidnioo5ztvwdfroyd.onion/blogs/where-to-buy-counterfeit-banknotes-how-t/hitman-internet-killers-hit-man-service.html',
  9. 'http://dwltorbltw3tdjskxn23j2mwz2f4q25j4ninl5bdvttiy4xb6cqzikid.onion/',
  10. 'https://onionsearchengine.com/search.php?q=search+engine',
  11. 'http://xao2lxsmia2edq2n5zxg6uahx6xox2t7bfjw6b5vdzsxi7ezmqob6qid.onion/',
  12. 'http://dwltorbltw3tdjskxn23j2mwz2f4q25j4ninl5bdvttiy4xb6cqzikid.onion/blog/dark-web-onion-links',
  13. 'http://darkzqtmbdeauwq5mzcmgeeuhet42fhfjj4p5wbak3ofx2yqgecoeqyd.onion/search?q=dark&p=2']
  14. # 注意这里用 json=payload,且方法是 post
  15. index = 6
  16. payload = {
  17. # "url": url_list[index],
  18. # "url": "http://zqktlwiuavvvqqt4ybvgvi7tyo4hjl5xgfuvpdf6otjiycgwqbym2qad.onion/wiki/index.php/Main_Page#Conferences",
  19. "url": "http://darkzqtmbdeauwq5mzcmgeeuhet42fhfjj4p5wbak3ofx2yqgecoeqyd.onion/search?q=a",
  20. # "is_dynamic": True,
  21. "method": "POST", # 告诉服务器:底层用 POST 去连暗网
  22. }
  23. resp = requests.post(api_url, json=payload)
  24. data_json = resp.json()
  25. print(data_json)
  26. html = data_json.get('data').get('content')
  27. # msg = data_json.get('msg')
  28. # cont = 0
  29. # for i,item in enumerate(url_list):
  30. # payload = {
  31. # "url": item,
  32. # # "url": "http://dwltorbltw3tdjskxn23j2mwz2f4q25j4ninl5bdvttiy4xb6cqzikid.onion/blog/dark-web-onion-links ",
  33. # "is_dynamic": True,
  34. # "method": "POST", # 告诉服务器:底层用 POST 去连暗网
  35. # }
  36. # try:
  37. # resp = requests.post(api_url, json=payload)
  38. # data_json = resp.json()
  39. # # print(data_json)
  40. # html = data_json.get('data').get('content')
  41. # msg = data_json.get('msg')
  42. # if msg =="success":
  43. # # print(html)
  44. # cont += 1
  45. # else:print(item)
  46. # except:
  47. # print(item)
  48. # print(cont)
  49. #
  50. with open(f'./html{str(index)}.html', 'w',encoding='utf-8') as f:
  51. f.write(html)