知网采集
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

28 lines
912 B

import ddddocr
ocr = ddddocr.DdddOcr()
import requests
import sys
headerCookie = sys.argv[1]
headers = {
'Accept': 'image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Connection': 'keep-alive',
'Cookie': headerCookie,
'Referer': 'https://ref.cnki.net/REF/AdvSearch',
'Sec-Fetch-Dest': 'image',
'Sec-Fetch-Mode': 'no-cors',
'Sec-Fetch-Site': 'same-origin',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.115 Safari/537.36',
'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="102", "Google Chrome";v="102"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"'
}
url_code = 'https://ie.cnki.net/kns/checkcode.aspx?t=0.15957984515339407'
rep_code=requests.get(url_code,headers=headers)
code_bytes=rep_code.content
code = ocr.classification(code_bytes)
print(code)