From a5278554028ae689a51116a0618fd4eeeac3466f Mon Sep 17 00:00:00 2001 From: "steve.gao" Date: Wed, 12 Feb 2025 11:14:41 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=96=B0=E7=89=88=E6=9C=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- main.py | 141 ++++++++++++++++++++++++++++++++----------------------- requirements.txt | 18 +++++++ 2 files changed, 101 insertions(+), 58 deletions(-) create mode 100644 requirements.txt diff --git a/main.py b/main.py index 79470b9..d630bb6 100644 --- a/main.py +++ b/main.py @@ -1,60 +1,85 @@ -# 导入 from DrissionPage import Chromium, ChromiumOptions -# 连接浏览器 -# browser = Chromium() -# 获取标签页对象 -# tab = browser.latest_tab -# 访问网页 -# tab.get('https://etax.chinatax.gov.cn') - - -def inner_page(page): - """ - - """ - a_tab = page.ele('xpath:/html/body/div[4]/div/div/div[3]/ul/li[3]/a') # 缴税记录 - a_tab.click() - table_list = page.eles('xpath:/html/body/div[4]/div/div/div[3]/div[3]/div/div/div/div[2]/div/table/tbody/tr') # 凭证 - for tr in table_list: - href = tr.ele('@tag()=a') # 跳转链接 证明pdf 需要下载 - href.click() # 跳转下载 - tab.wait.doc_loaded() # 等待文档加载完毕 - page.get_screenshot(path='tmp', name='pic.jpg', full_page=False) # 下载发票 - break - -co = ChromiumOptions() -co.set_argument("--remote-debugging-port", "9222") -browser = Chromium(co) # 创建浏览器对象 -browser.set.retry_times(10) # 设置整体运行参数 -tab = browser.latest_tab # 获取Tab对象 -tab.get("https://etax.chinatax.gov.cn") -# #app > div.header-user > div > div.navbar-container > ul > li:nth-child(3) > a -# #app > div.header-user > div > div.navbar-container > ul > li.active > a -# ele = tab.ele("#app > div.header-user > div > div.navbar-container > ul > li:nth-child(3) > a") -try: - ele = tab.ele('xpath://*[@id="app"]/div[1]/div/div[2]/ul/li[3]/a') # 我要查询 - ele.click() - tab.ele('xpath://*[@id="app"]/div[2]/div/div/div[1]/div[2]/a[1]').click() # 申报查询 - - text = tab.ele('xpath://*[@id="app"]/div[2]/div/div[2]/div[2]/div[1]/div[1]').text # 待缴税款 - print(text) - text1 = tab.ele('xpath://*[@id="app"]/div[2]/div/div[2]/div[2]/div[1]/div[2]').text # 可申请退税金额 - print(text1) - ele = tab.ele('xpath://*[@id="app"]/div[2]/div/div[2]/div[1]/label[2]/span') # 已完成标签 - ele.click() - - table_list = tab.eles('xpath://*[@id="app"]/div[2]/div/div[2]/div[3]/div/div[3]/table/tbody/tr') - # for tr in table_list: - # # td = tab.eles('@tag:td') - # print(tr.texts()) # 获取所有的文本 - # # //*[@id="app"]/div[2]/div/div[2]/div[3]/div/div[3]/table/tbody/tr[2]/td[6]/div/a[1] - # href = tr.ele('@tag()=a') # 跳转链接 - # href.click() - # inner_page(tab) - # break -except Exception as e: - print(f"e is => {e}") - browser.quit() # 关闭浏览器 - -browser.quit() # 关闭浏览器 + +class DPModel(): + def __init__(self, link): + self.login_qr = "" + self.login_link = link + self.browser = self.init_browser() + self.tab = None + + def init_browser(self): + co = ChromiumOptions() + co.set_argument("--remote-debugging-port", "9222") + browser = Chromium(co) # 创建浏览器对象 + browser.set.retry_times(10) # 设置整体运行参数 + return browser + + def get_login_qr(self): + try: + self.tab = self.browser.latest_tab # 获取Tab对象 + self.tab.get("https://etax.chinatax.gov.cn/webstatic/login") + qr_code = self.tab.ele('xpath://*[@id="app"]/div[2]/div/div/div[2]/div[3]/div[1]/img').attr("src") + self.login_qr = qr_code + except Exception as e: + print(f"e is => {e}") + self.browser.quit() # 关闭浏览器 + + return self.login_qr + + def get_content(self): + try: + self.tab.wait.eles_loaded('xpath://*[@id="app"]/div[1]/div/div[2]/ul/li[3]/a', timeout=30) + ele = self.tab.ele('xpath://*[@id="app"]/div[1]/div/div[2]/ul/li[3]/a') # 我要查询 + ele.click() + self.tab.ele('xpath://*[@id="app"]/div[2]/div/div/div[1]/div[2]/a[1]').click() # 申报查询 + + text = self.tab.ele('xpath://*[@id="app"]/div[2]/div/div[2]/div[2]/div[1]/div[1]').text # 待缴税款 + print(text) + text1 = self.tab.ele('xpath://*[@id="app"]/div[2]/div/div[2]/div[2]/div[1]/div[2]').text # 可申请退税金额 + print(text1) + ele = self.tab.ele('xpath://*[@id="app"]/div[2]/div/div[2]/div[1]/label[2]/span') # 已完成标签 + ele.click() + + table_list = self.tab.eles('xpath://*[@id="app"]/div[2]/div/div[2]/div[3]/div/div[3]/table/tbody/tr') + for tr in table_list: + # td = tab.eles('@tag:td') + print(tr.texts()) # 获取所有的文本 + # //*[@id="app"]/div[2]/div/div[2]/div[3]/div/div[3]/table/tbody/tr[2]/td[6]/div/a[1] + href = tr.ele('@tag()=a') # 跳转链接 + href.click() + self.tab.wait.load_start() # 等待 + self.inner_page(self.tab) + break + + except Exception as e: + print(f"e is => {e}") + self.browser.quit() # 关闭浏览器 + + def inner_page(self, page): + """ + + """ + a_tab = page.ele('xpath:/html/body/div[4]/div/div/div[3]/ul/li[3]/a') # 缴税记录 + a_tab.click() + table_list = page.eles( + 'xpath:/html/body/div[4]/div/div/div[3]/div[3]/div/div/div/div[2]/div/table/tbody/tr') # 凭证 + for tr in table_list: + href = tr.ele('@tag()=a') # 跳转链接 证明pdf 需要下载 + href.click() # 跳转下载 + a_tab.wait.doc_loaded() # 等待文档加载完毕 + page.get_screenshot(path='tmp', name='pic.jpg', full_page=False) # 下载发票 + break + + def __del__(self): + print("?????") + # self.browser.quit() # 关闭浏览器 + + + +if __name__ == '__main__': + url = "https://etax.chinatax.gov.cn/webstatic/login" + d = DPModel(url) + qr_code = d.get_login_qr() # 获取验证码 + print(qr_code) + pass \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..e2ee714 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,18 @@ +certifi==2025.1.31 +charset-normalizer==3.4.1 +click==8.1.8 +cssselect==1.2.0 +DataRecorder==3.6.2 +DownloadKit==2.0.7 +DrissionPage==4.1.0.17 +et_xmlfile==2.0.0 +filelock==3.16.1 +idna==3.10 +lxml==5.3.1 +openpyxl==3.1.5 +psutil==6.1.1 +requests==2.32.3 +requests-file==2.1.0 +tldextract==5.1.2 +urllib3==2.2.3 +websocket-client==1.8.0