知网采集
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

28 lines
912 B

6 months ago
  1. import ddddocr
  2. ocr = ddddocr.DdddOcr()
  3. import requests
  4. import sys
  5. headerCookie = sys.argv[1]
  6. headers = {
  7. 'Accept': 'image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8',
  8. 'Accept-Language': 'zh-CN,zh;q=0.9',
  9. 'Connection': 'keep-alive',
  10. 'Cookie': headerCookie,
  11. 'Referer': 'https://ref.cnki.net/REF/AdvSearch',
  12. 'Sec-Fetch-Dest': 'image',
  13. 'Sec-Fetch-Mode': 'no-cors',
  14. 'Sec-Fetch-Site': 'same-origin',
  15. 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.115 Safari/537.36',
  16. 'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="102", "Google Chrome";v="102"',
  17. 'sec-ch-ua-mobile': '?0',
  18. 'sec-ch-ua-platform': '"Windows"'
  19. }
  20. url_code = 'https://ie.cnki.net/kns/checkcode.aspx?t=0.15957984515339407'
  21. rep_code=requests.get(url_code,headers=headers)
  22. code_bytes=rep_code.content
  23. code = ocr.classification(code_bytes)
  24. print(code)