commit 86b867f54155800bb9111936a9b24acd7bb64837
Author: guanjz <1826473923@qq.com>
Date:   Mon May 19 10:41:53 2025 +0800

    本地部署

diff --git a/.idea/.gitignore b/.idea/.gitignore
new file mode 100644
index 0000000..73f69e0
--- /dev/null
+++ b/.idea/.gitignore
@@ -0,0 +1,8 @@
+# Default ignored files
+/shelf/
+/workspace.xml
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
+# Editor-based HTTP Client requests
+/httpRequests/
diff --git a/.idea/compiler.xml b/.idea/compiler.xml
new file mode 100644
index 0000000..e6b77de
--- /dev/null
+++ b/.idea/compiler.xml
@@ -0,0 +1,13 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="CompilerConfiguration">
+    <annotationProcessing>
+      <profile name="Maven default annotation processors profile" enabled="true">
+        <sourceOutputDir name="target/generated-sources/annotations" />
+        <sourceTestOutputDir name="target/generated-test-sources/test-annotations" />
+        <outputRelativeToContentRoot value="true" />
+        <module name="DaKaES" />
+      </profile>
+    </annotationProcessing>
+  </component>
+</project>
\ No newline at end of file
diff --git a/.idea/jarRepositories.xml b/.idea/jarRepositories.xml
new file mode 100644
index 0000000..712ab9d
--- /dev/null
+++ b/.idea/jarRepositories.xml
@@ -0,0 +1,20 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="RemoteRepositoriesConfiguration">
+    <remote-repository>
+      <option name="id" value="central" />
+      <option name="name" value="Central Repository" />
+      <option name="url" value="https://repo.maven.apache.org/maven2" />
+    </remote-repository>
+    <remote-repository>
+      <option name="id" value="central" />
+      <option name="name" value="Maven Central repository" />
+      <option name="url" value="https://repo1.maven.org/maven2" />
+    </remote-repository>
+    <remote-repository>
+      <option name="id" value="jboss.community" />
+      <option name="name" value="JBoss Community repository" />
+      <option name="url" value="https://repository.jboss.org/nexus/content/repositories/public/" />
+    </remote-repository>
+  </component>
+</project>
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 0000000..ae9c995
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,14 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ExternalStorageConfigurationManager" enabled="true" />
+  <component name="MavenProjectsManager">
+    <option name="originalFiles">
+      <list>
+        <option value="$PROJECT_DIR$/pom.xml" />
+      </list>
+    </option>
+  </component>
+  <component name="ProjectRootManager" version="2" languageLevel="JDK_1_8" default="true" project-jdk-name="corretto-1.8" project-jdk-type="JavaSDK">
+    <output url="file://$PROJECT_DIR$/out" />
+  </component>
+</project>
\ No newline at end of file
diff --git a/.idea/uiDesigner.xml b/.idea/uiDesigner.xml
new file mode 100644
index 0000000..e96534f
--- /dev/null
+++ b/.idea/uiDesigner.xml
@@ -0,0 +1,124 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="Palette2">
+    <group name="Swing">
+      <item class="com.intellij.uiDesigner.HSpacer" tooltip-text="Horizontal Spacer" icon="/com/intellij/uiDesigner/icons/hspacer.png" removable="false" auto-create-binding="false" can-attach-label="false">
+        <default-constraints vsize-policy="1" hsize-policy="6" anchor="0" fill="1" />
+      </item>
+      <item class="com.intellij.uiDesigner.VSpacer" tooltip-text="Vertical Spacer" icon="/com/intellij/uiDesigner/icons/vspacer.png" removable="false" auto-create-binding="false" can-attach-label="false">
+        <default-constraints vsize-policy="6" hsize-policy="1" anchor="0" fill="2" />
+      </item>
+      <item class="javax.swing.JPanel" icon="/com/intellij/uiDesigner/icons/panel.png" removable="false" auto-create-binding="false" can-attach-label="false">
+        <default-constraints vsize-policy="3" hsize-policy="3" anchor="0" fill="3" />
+      </item>
+      <item class="javax.swing.JScrollPane" icon="/com/intellij/uiDesigner/icons/scrollPane.png" removable="false" auto-create-binding="false" can-attach-label="true">
+        <default-constraints vsize-policy="7" hsize-policy="7" anchor="0" fill="3" />
+      </item>
+      <item class="javax.swing.JButton" icon="/com/intellij/uiDesigner/icons/button.png" removable="false" auto-create-binding="true" can-attach-label="false">
+        <default-constraints vsize-policy="0" hsize-policy="3" anchor="0" fill="1" />
+        <initial-values>
+          <property name="text" value="Button" />
+        </initial-values>
+      </item>
+      <item class="javax.swing.JRadioButton" icon="/com/intellij/uiDesigner/icons/radioButton.png" removable="false" auto-create-binding="true" can-attach-label="false">
+        <default-constraints vsize-policy="0" hsize-policy="3" anchor="8" fill="0" />
+        <initial-values>
+          <property name="text" value="RadioButton" />
+        </initial-values>
+      </item>
+      <item class="javax.swing.JCheckBox" icon="/com/intellij/uiDesigner/icons/checkBox.png" removable="false" auto-create-binding="true" can-attach-label="false">
+        <default-constraints vsize-policy="0" hsize-policy="3" anchor="8" fill="0" />
+        <initial-values>
+          <property name="text" value="CheckBox" />
+        </initial-values>
+      </item>
+      <item class="javax.swing.JLabel" icon="/com/intellij/uiDesigner/icons/label.png" removable="false" auto-create-binding="false" can-attach-label="false">
+        <default-constraints vsize-policy="0" hsize-policy="0" anchor="8" fill="0" />
+        <initial-values>
+          <property name="text" value="Label" />
+        </initial-values>
+      </item>
+      <item class="javax.swing.JTextField" icon="/com/intellij/uiDesigner/icons/textField.png" removable="false" auto-create-binding="true" can-attach-label="true">
+        <default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1">
+          <preferred-size width="150" height="-1" />
+        </default-constraints>
+      </item>
+      <item class="javax.swing.JPasswordField" icon="/com/intellij/uiDesigner/icons/passwordField.png" removable="false" auto-create-binding="true" can-attach-label="true">
+        <default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1">
+          <preferred-size width="150" height="-1" />
+        </default-constraints>
+      </item>
+      <item class="javax.swing.JFormattedTextField" icon="/com/intellij/uiDesigner/icons/formattedTextField.png" removable="false" auto-create-binding="true" can-attach-label="true">
+        <default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1">
+          <preferred-size width="150" height="-1" />
+        </default-constraints>
+      </item>
+      <item class="javax.swing.JTextArea" icon="/com/intellij/uiDesigner/icons/textArea.png" removable="false" auto-create-binding="true" can-attach-label="true">
+        <default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
+          <preferred-size width="150" height="50" />
+        </default-constraints>
+      </item>
+      <item class="javax.swing.JTextPane" icon="/com/intellij/uiDesigner/icons/textPane.png" removable="false" auto-create-binding="true" can-attach-label="true">
+        <default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
+          <preferred-size width="150" height="50" />
+        </default-constraints>
+      </item>
+      <item class="javax.swing.JEditorPane" icon="/com/intellij/uiDesigner/icons/editorPane.png" removable="false" auto-create-binding="true" can-attach-label="true">
+        <default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
+          <preferred-size width="150" height="50" />
+        </default-constraints>
+      </item>
+      <item class="javax.swing.JComboBox" icon="/com/intellij/uiDesigner/icons/comboBox.png" removable="false" auto-create-binding="true" can-attach-label="true">
+        <default-constraints vsize-policy="0" hsize-policy="2" anchor="8" fill="1" />
+      </item>
+      <item class="javax.swing.JTable" icon="/com/intellij/uiDesigner/icons/table.png" removable="false" auto-create-binding="true" can-attach-label="false">
+        <default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
+          <preferred-size width="150" height="50" />
+        </default-constraints>
+      </item>
+      <item class="javax.swing.JList" icon="/com/intellij/uiDesigner/icons/list.png" removable="false" auto-create-binding="true" can-attach-label="false">
+        <default-constraints vsize-policy="6" hsize-policy="2" anchor="0" fill="3">
+          <preferred-size width="150" height="50" />
+        </default-constraints>
+      </item>
+      <item class="javax.swing.JTree" icon="/com/intellij/uiDesigner/icons/tree.png" removable="false" auto-create-binding="true" can-attach-label="false">
+        <default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
+          <preferred-size width="150" height="50" />
+        </default-constraints>
+      </item>
+      <item class="javax.swing.JTabbedPane" icon="/com/intellij/uiDesigner/icons/tabbedPane.png" removable="false" auto-create-binding="true" can-attach-label="false">
+        <default-constraints vsize-policy="3" hsize-policy="3" anchor="0" fill="3">
+          <preferred-size width="200" height="200" />
+        </default-constraints>
+      </item>
+      <item class="javax.swing.JSplitPane" icon="/com/intellij/uiDesigner/icons/splitPane.png" removable="false" auto-create-binding="false" can-attach-label="false">
+        <default-constraints vsize-policy="3" hsize-policy="3" anchor="0" fill="3">
+          <preferred-size width="200" height="200" />
+        </default-constraints>
+      </item>
+      <item class="javax.swing.JSpinner" icon="/com/intellij/uiDesigner/icons/spinner.png" removable="false" auto-create-binding="true" can-attach-label="true">
+        <default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1" />
+      </item>
+      <item class="javax.swing.JSlider" icon="/com/intellij/uiDesigner/icons/slider.png" removable="false" auto-create-binding="true" can-attach-label="false">
+        <default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1" />
+      </item>
+      <item class="javax.swing.JSeparator" icon="/com/intellij/uiDesigner/icons/separator.png" removable="false" auto-create-binding="false" can-attach-label="false">
+        <default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3" />
+      </item>
+      <item class="javax.swing.JProgressBar" icon="/com/intellij/uiDesigner/icons/progressbar.png" removable="false" auto-create-binding="true" can-attach-label="false">
+        <default-constraints vsize-policy="0" hsize-policy="6" anchor="0" fill="1" />
+      </item>
+      <item class="javax.swing.JToolBar" icon="/com/intellij/uiDesigner/icons/toolbar.png" removable="false" auto-create-binding="false" can-attach-label="false">
+        <default-constraints vsize-policy="0" hsize-policy="6" anchor="0" fill="1">
+          <preferred-size width="-1" height="20" />
+        </default-constraints>
+      </item>
+      <item class="javax.swing.JToolBar$Separator" icon="/com/intellij/uiDesigner/icons/toolbarSeparator.png" removable="false" auto-create-binding="false" can-attach-label="false">
+        <default-constraints vsize-policy="0" hsize-policy="0" anchor="0" fill="1" />
+      </item>
+      <item class="javax.swing.JScrollBar" icon="/com/intellij/uiDesigner/icons/scrollbar.png" removable="false" auto-create-binding="true" can-attach-label="false">
+        <default-constraints vsize-policy="6" hsize-policy="0" anchor="0" fill="2" />
+      </item>
+    </group>
+  </component>
+</project>
\ No newline at end of file
diff --git a/.project b/.project
new file mode 100644
index 0000000..a8309f9
--- /dev/null
+++ b/.project
@@ -0,0 +1,23 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+	<name>DaKaES</name>
+	<comment></comment>
+	<projects>
+	</projects>
+	<buildSpec>
+		<buildCommand>
+			<name>org.eclipse.jdt.core.javabuilder</name>
+			<arguments>
+			</arguments>
+		</buildCommand>
+		<buildCommand>
+			<name>org.eclipse.m2e.core.maven2Builder</name>
+			<arguments>
+			</arguments>
+		</buildCommand>
+	</buildSpec>
+	<natures>
+		<nature>org.eclipse.jdt.core.javanature</nature>
+		<nature>org.eclipse.m2e.core.maven2Nature</nature>
+	</natures>
+</projectDescription>
diff --git a/NsantegouvListRe.jar b/NsantegouvListRe.jar
new file mode 100644
index 0000000..57433ef
Binary files /dev/null and b/NsantegouvListRe.jar differ
diff --git a/bin/.idea/.gitignore b/bin/.idea/.gitignore
new file mode 100644
index 0000000..73f69e0
--- /dev/null
+++ b/bin/.idea/.gitignore
@@ -0,0 +1,8 @@
+# Default ignored files
+/shelf/
+/workspace.xml
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
+# Editor-based HTTP Client requests
+/httpRequests/
diff --git a/bin/.idea/compiler.xml b/bin/.idea/compiler.xml
new file mode 100644
index 0000000..e6b77de
--- /dev/null
+++ b/bin/.idea/compiler.xml
@@ -0,0 +1,13 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="CompilerConfiguration">
+    <annotationProcessing>
+      <profile name="Maven default annotation processors profile" enabled="true">
+        <sourceOutputDir name="target/generated-sources/annotations" />
+        <sourceTestOutputDir name="target/generated-test-sources/test-annotations" />
+        <outputRelativeToContentRoot value="true" />
+        <module name="DaKaES" />
+      </profile>
+    </annotationProcessing>
+  </component>
+</project>
\ No newline at end of file
diff --git a/bin/.idea/jarRepositories.xml b/bin/.idea/jarRepositories.xml
new file mode 100644
index 0000000..712ab9d
--- /dev/null
+++ b/bin/.idea/jarRepositories.xml
@@ -0,0 +1,20 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="RemoteRepositoriesConfiguration">
+    <remote-repository>
+      <option name="id" value="central" />
+      <option name="name" value="Central Repository" />
+      <option name="url" value="https://repo.maven.apache.org/maven2" />
+    </remote-repository>
+    <remote-repository>
+      <option name="id" value="central" />
+      <option name="name" value="Maven Central repository" />
+      <option name="url" value="https://repo1.maven.org/maven2" />
+    </remote-repository>
+    <remote-repository>
+      <option name="id" value="jboss.community" />
+      <option name="name" value="JBoss Community repository" />
+      <option name="url" value="https://repository.jboss.org/nexus/content/repositories/public/" />
+    </remote-repository>
+  </component>
+</project>
\ No newline at end of file
diff --git a/bin/.idea/misc.xml b/bin/.idea/misc.xml
new file mode 100644
index 0000000..ae9c995
--- /dev/null
+++ b/bin/.idea/misc.xml
@@ -0,0 +1,14 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ExternalStorageConfigurationManager" enabled="true" />
+  <component name="MavenProjectsManager">
+    <option name="originalFiles">
+      <list>
+        <option value="$PROJECT_DIR$/pom.xml" />
+      </list>
+    </option>
+  </component>
+  <component name="ProjectRootManager" version="2" languageLevel="JDK_1_8" default="true" project-jdk-name="corretto-1.8" project-jdk-type="JavaSDK">
+    <output url="file://$PROJECT_DIR$/out" />
+  </component>
+</project>
\ No newline at end of file
diff --git a/bin/.project b/bin/.project
new file mode 100644
index 0000000..a8309f9
--- /dev/null
+++ b/bin/.project
@@ -0,0 +1,23 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+	<name>DaKaES</name>
+	<comment></comment>
+	<projects>
+	</projects>
+	<buildSpec>
+		<buildCommand>
+			<name>org.eclipse.jdt.core.javabuilder</name>
+			<arguments>
+			</arguments>
+		</buildCommand>
+		<buildCommand>
+			<name>org.eclipse.m2e.core.maven2Builder</name>
+			<arguments>
+			</arguments>
+		</buildCommand>
+	</buildSpec>
+	<natures>
+		<nature>org.eclipse.jdt.core.javanature</nature>
+		<nature>org.eclipse.m2e.core.maven2Nature</nature>
+	</natures>
+</projectDescription>
diff --git a/bin/hs_err_pid15760.log b/bin/hs_err_pid15760.log
new file mode 100644
index 0000000..3459d43
--- /dev/null
+++ b/bin/hs_err_pid15760.log
@@ -0,0 +1,167 @@
+#
+# There is insufficient memory for the Java Runtime Environment to continue.
+# Native memory allocation (malloc) failed to allocate 1048576 bytes for AllocateHeap
+# Possible reasons:
+#   The system is out of physical RAM or swap space
+#   The process is running with CompressedOops enabled, and the Java Heap may be blocking the growth of the native heap
+# Possible solutions:
+#   Reduce memory load on the system
+#   Increase physical memory or swap space
+#   Check if swap backing store is full
+#   Decrease Java heap size (-Xmx/-Xms)
+#   Decrease number of Java threads
+#   Decrease Java thread stack sizes (-Xss)
+#   Set larger code cache with -XX:ReservedCodeCacheSize=
+#   JVM is running with Zero Based Compressed Oops mode in which the Java heap is
+#     placed in the first 32GB address space. The Java Heap base address is the
+#     maximum limit for the native heap growth. Please use -XX:HeapBaseMinAddress
+#     to set the Java Heap base and to place the Java Heap above 32GB virtual address.
+# This output file may be truncated or incomplete.
+#
+#  Out of Memory Error (memory/allocation.inline.hpp:61), pid=15760, tid=0x0000000000003334
+#
+# JRE version:  (8.0_422-b05) (build )
+# Java VM: OpenJDK 64-Bit Server VM (25.422-b05 mixed mode windows-amd64 compressed oops)
+# Failed to write core dump. Minidumps are not enabled by default on client versions of Windows
+#
+
+---------------  T H R E A D  ---------------
+
+Current thread (0x00000271b7d7d800):  JavaThread "Unknown thread" [_thread_in_vm, id=13108, stack(0x00000082a1500000,0x00000082a1600000)]
+
+Stack: [0x00000082a1500000,0x00000082a1600000]
+[error occurred during error reporting (printing stack bounds), id 0xc0000005]
+
+Native frames: (J=compiled Java code, j=interpreted, Vv=VM code, C=native code)
+
+
+---------------  P R O C E S S  ---------------
+
+Java Threads: ( => current thread )
+
+Other Threads:
+
+=>0x00000271b7d7d800 (exited) JavaThread "Unknown thread" [_thread_in_vm, id=13108, stack(0x00000082a1500000,0x00000082a1600000)]
+
+VM state:not at safepoint (normal execution)
+
+VM Mutex/Monitor currently owned by a thread: None
+
+heap address: 0x00000006c4000000, size: 4032 MB, Compressed Oops mode: Zero based, Oop shift amount: 3
+Narrow klass base: 0x0000000000000000, Narrow klass shift: 3
+Compressed class space size: 1073741824 Address: 0x00000007c0000000
+
+Heap:
+ PSYoungGen      total 75264K, used 1290K [0x000000076c000000, 0x0000000771400000, 0x00000007c0000000)
+  eden space 64512K, 2% used [0x000000076c000000,0x000000076c142900,0x000000076ff00000)
+  from space 10752K, 0% used [0x0000000770980000,0x0000000770980000,0x0000000771400000)
+  to   space 10752K, 0% used [0x000000076ff00000,0x000000076ff00000,0x0000000770980000)
+ ParOldGen       total 172032K, used 0K [0x00000006c4000000, 0x00000006ce800000, 0x000000076c000000)
+  object space 172032K, 0% used [0x00000006c4000000,0x00000006c4000000,0x00000006ce800000)
+ Metaspace       used 790K, capacity 4480K, committed 4480K, reserved 1056768K
+  class space    used 76K, capacity 384K, committed 384K, reserved 1048576K
+
+Card table byte_map: [0x00000271c8b70000,0x00000271c9360000] byte_map_base: 0x00000271c5550000
+
+Marking Bits: (ParMarkBitMap*) 0x00000000521f38d0
+ Begin Bits: [0x00000271c98a0000, 0x00000271cd7a0000)
+ End Bits:   [0x00000271cd7a0000, 0x00000271d16a0000)
+
+Polling page: 0x00000271b7eb0000
+
+CodeCache: size=245760Kb used=328Kb max_used=328Kb free=245431Kb
+ bounds [0x00000271b97b0000, 0x00000271b9a20000, 0x00000271c87b0000]
+ total_blobs=57 nmethods=0 adapters=38
+ compilation: enabled
+
+Compilation events (0 events):
+No events
+
+GC Heap History (0 events):
+No events
+
+Deoptimization events (0 events):
+No events
+
+Classes redefined (0 events):
+No events
+
+Internal exceptions (0 events):
+No events
+
+Events (10 events):
+Event: 0.012 loading class java/lang/Short
+Event: 0.013 loading class java/lang/Short done
+Event: 0.013 loading class java/lang/Integer
+Event: 0.013 loading class java/lang/Integer done
+Event: 0.013 loading class java/lang/Long
+Event: 0.013 loading class java/lang/Long done
+Event: 0.013 loading class java/lang/NullPointerException
+Event: 0.013 loading class java/lang/NullPointerException done
+Event: 0.013 loading class java/lang/ArithmeticException
+Event: 0.013 loading class java/lang/ArithmeticException done
+
+
+Dynamic libraries:
+0x00007ff7d7590000 - 0x00007ff7d75d6000 	C:\Users\18264\.jdks\corretto-1.8.0_422\bin\java.exe
+0x00007ffa1d0b0000 - 0x00007ffa1d2a8000 	C:\Windows\SYSTEM32\ntdll.dll
+0x00007ffa1ce90000 - 0x00007ffa1cf52000 	C:\Windows\System32\KERNEL32.DLL
+0x00007ffa1add0000 - 0x00007ffa1b0cf000 	C:\Windows\System32\KERNELBASE.dll
+0x00007ffa1c470000 - 0x00007ffa1c51f000 	C:\Windows\System32\ADVAPI32.dll
+0x00007ffa1cf60000 - 0x00007ffa1cffe000 	C:\Windows\System32\msvcrt.dll
+0x00007ffa1cdf0000 - 0x00007ffa1ce8f000 	C:\Windows\System32\sechost.dll
+0x00007ffa1c580000 - 0x00007ffa1c6a3000 	C:\Windows\System32\RPCRT4.dll
+0x00007ffa1ada0000 - 0x00007ffa1adc7000 	C:\Windows\System32\bcrypt.dll
+0x00007ffa1be50000 - 0x00007ffa1bfed000 	C:\Windows\System32\USER32.dll
+0x00007ffa1a7a0000 - 0x00007ffa1a7c2000 	C:\Windows\System32\win32u.dll
+0x00007ffa1bff0000 - 0x00007ffa1c01b000 	C:\Windows\System32\GDI32.dll
+0x00007ffa1ac80000 - 0x00007ffa1ad9a000 	C:\Windows\System32\gdi32full.dll
+0x00007ffa1aaa0000 - 0x00007ffa1ab3d000 	C:\Windows\System32\msvcp_win.dll
+0x00007ffa1a9a0000 - 0x00007ffa1aaa0000 	C:\Windows\System32\ucrtbase.dll
+0x00007ffa00e00000 - 0x00007ffa0109a000 	C:\Windows\WinSxS\amd64_microsoft.windows.common-controls_6595b64144ccf1df_6.0.19041.4355_none_60b8b9eb71f62e16\COMCTL32.dll
+0x00007ffa1c030000 - 0x00007ffa1c05f000 	C:\Windows\System32\IMM32.DLL
+0x00007ffa10f70000 - 0x00007ffa10f85000 	C:\Users\18264\.jdks\corretto-1.8.0_422\jre\bin\vcruntime140.dll
+0x00007ff9ceb10000 - 0x00007ff9cebab000 	C:\Users\18264\.jdks\corretto-1.8.0_422\jre\bin\msvcp140.dll
+0x0000000051a10000 - 0x000000005226c000 	C:\Users\18264\.jdks\corretto-1.8.0_422\jre\bin\server\jvm.dll
+0x00007ffa1c020000 - 0x00007ffa1c028000 	C:\Windows\System32\PSAPI.DLL
+0x00007ff9fac50000 - 0x00007ff9fac59000 	C:\Windows\SYSTEM32\WSOCK32.dll
+0x00007ffa0d800000 - 0x00007ffa0d827000 	C:\Windows\SYSTEM32\WINMM.dll
+0x00007ffa0ff90000 - 0x00007ffa0ff9a000 	C:\Windows\SYSTEM32\VERSION.dll
+0x00007ffa1c060000 - 0x00007ffa1c0cb000 	C:\Windows\System32\WS2_32.dll
+0x00007ffa18f70000 - 0x00007ffa18f82000 	C:\Windows\SYSTEM32\kernel.appcore.dll
+0x00007ffa10fc0000 - 0x00007ffa10fd0000 	C:\Users\18264\.jdks\corretto-1.8.0_422\jre\bin\verify.dll
+0x00007ffa0aec0000 - 0x00007ffa0aeeb000 	C:\Users\18264\.jdks\corretto-1.8.0_422\jre\bin\java.dll
+0x00007ff9ca260000 - 0x00007ff9ca296000 	C:\Users\18264\.jdks\corretto-1.8.0_422\jre\bin\jdwp.dll
+0x00007ffa0af80000 - 0x00007ffa0af89000 	C:\Users\18264\.jdks\corretto-1.8.0_422\jre\bin\npt.dll
+0x00007ff9c1ab0000 - 0x00007ff9c1ae2000 	C:\Users\18264\.jdks\corretto-1.8.0_422\jre\bin\instrument.dll
+0x00007ffa008e0000 - 0x00007ffa008f8000 	C:\Users\18264\.jdks\corretto-1.8.0_422\jre\bin\zip.dll
+
+VM Arguments:
+jvm_args: -agentlib:jdwp=transport=dt_socket,address=127.0.0.1:56727,suspend=y,server=n -javaagent:C:\Users\18264\AppData\Local\JetBrains\IntelliJIdea2021.1\captureAgent\debugger-agent.jar -Dfile.encoding=UTF-8 
+java_command: com.example.saveInES
+java_class_path (initial): C:\Users\18264\.jdks\corretto-1.8.0_422\jre\lib\charsets.jar;C:\Users\18264\.jdks\corretto-1.8.0_422\jre\lib\ext\access-bridge-64.jar;C:\Users\18264\.jdks\corretto-1.8.0_422\jre\lib\ext\cldrdata.jar;C:\Users\18264\.jdks\corretto-1.8.0_422\jre\lib\ext\dnsns.jar;C:\Users\18264\.jdks\corretto-1.8.0_422\jre\lib\ext\jaccess.jar;C:\Users\18264\.jdks\corretto-1.8.0_422\jre\lib\ext\jfxrt.jar;C:\Users\18264\.jdks\corretto-1.8.0_422\jre\lib\ext\localedata.jar;C:\Users\18264\.jdks\corretto-1.8.0_422\jre\lib\ext\nashorn.jar;C:\Users\18264\.jdks\corretto-1.8.0_422\jre\lib\ext\sunec.jar;C:\Users\18264\.jdks\corretto-1.8.0_422\jre\lib\ext\sunjce_provider.jar;C:\Users\18264\.jdks\corretto-1.8.0_422\jre\lib\ext\sunmscapi.jar;C:\Users\18264\.jdks\corretto-1.8.0_422\jre\lib\ext\sunpkcs11.jar;C:\Users\18264\.jdks\corretto-1.8.0_422\jre\lib\ext\zipfs.jar;C:\Users\18264\.jdks\corretto-1.8.0_422\jre\lib\jce.jar;C:\Users\18264\.jdks\corretto-1.8.0_422\jre\lib\jfr.jar;C:\Users\18264\.jdks\corretto-1.8.0_422\jre\lib\jfxswt.jar;C:\Users\18264\.jdks\corretto-1.8.0_422\jre\lib\jsse.jar;C:\Users\18264\.jdks\corretto-1.8.0_422\jre\lib\management-agent.jar;C:\Users\18264\.jdks\corretto-1.8.0_422\jre\lib\resources.jar;C:\Users\18264\.jdks\corretto-1.8.0_422\jre\lib\rt.jar;F:\workTest\DaKaES\target\classes;C:\Users\18264\.m2\repository\org\elasticsearch\client\elasticsearch-rest-high-level-client\7.17.0\elasticsearch-rest-high-level-client-7.17.0.jar;C:\Users\18264\.m2\repository\org\elasticsearch\elasticsearch\7.17.0\elasticsearch-7.17.0.jar;C:\Users\18264\.m2\repository\org\elasticsearch\elasticsearch-core\7.17.0\elasticsearch-core-7.17.0.jar;C:\Users\18264\.m2\repository\org\elasticsearch\elasticsearch-secure-sm\7.17.0\elasticsearch-secure-sm-7.17.0.jar;C:\Users\18264\.m2\repository\org\elasticsearch\elasticsearch-x-content\7.17.0\elasticsearch-x-content-7.17.0.jar;C:\Users\18264\.m2\repository\org\yaml\snakeyaml\1.26\snakeyaml-1.26.jar;C:\Users\18264\.m2\repository\c
+Launcher Type: SUN_STANDARD
+
+Environment Variables:
+JAVA_HOME=E:\java
+PATH=C:\Program Files\Common Files\Oracle\Java\javapath;D:\vm\bin\;E:\app\18264\product\11.2.0\dbhome_1\bin;C:\Windows\system32;C:\Windows;C:\Windows\System32\Wbem;C:\Windows\System32\WindowsPowerShell\v1.0\;C:\Windows\System32\OpenSSH\;C:\Program Files (x86)\NVIDIA Corporation\PhysX\Common;C:\Program Files\NVIDIA Corporation\NVIDIA NvDLISR;C:\Windows\system32;C:\Windows;C:\Windows\System32\Wbem;C:\Windows\System32\WindowsPowerShell\v1.0\;C:\Windows\System32\OpenSSH\;E:\java\bin;F:\mysql\mysql-5.7.37-winx64\mysql-5.7.37-winx64\bin;D:\matlab\Matlab R2022a\bin;C:\Program Files (x86)\dotnet\;C:\Program Files\dotnet\;D:\winscp\WinSCP\;F:\javaAbout\apache-maven-3.6.3\bin;C:\Program Files\Git\cmd;F:\tool\nvm\nvm;F:\tool\node;C:\Users\18264\AppData\Local\Programs\Python\Python311\Scripts\;C:\Users\18264\AppData\Local\Programs\Python\Python311\;C:\Users\18264\AppData\Local\Programs\Python\Python37\Scripts\;C:\Users\18264\AppData\Local\Programs\Python\Python37\;C:\Users\18264\AppData\Local\Programs\Python\Launcher\;C:\Users\18264\AppData\Local\Microsoft\WindowsApps;D:\Microsoft VS Code\bin;F:\idea\IntelliJ IDEA 2021.1.3\bin;;F:\tool\nvm\nvm;F:\tool\node
+USERNAME=18264
+OS=Windows_NT
+PROCESSOR_IDENTIFIER=Intel64 Family 6 Model 141 Stepping 1, GenuineIntel
+
+
+
+---------------  S Y S T E M  ---------------
+
+OS: Windows 10 , 64 bit Build 19041 (10.0.19041.5438)
+
+CPU:total 16 (initial active 16) (8 cores per cpu, 2 threads per core) family 6 model 141 stepping 1, cmov, cx8, fxsr, mmx, sse, sse2, sse3, ssse3, sse4.1, sse4.2, popcnt, avx, avx2, aes, clmul, erms, 3dnowpref, lzcnt, ht, tsc, tscinvbit, bmi1, bmi2, adx
+
+Memory: 4k page, physical 16509736k(919328k free), swap 36170532k(5620k free)
+
+vm_info: OpenJDK 64-Bit Server VM (25.422-b05) for windows-amd64 JRE (1.8.0_422-b05), built on Jul 11 2024 17:20:01 by "Administrator" with MS VC++ 15.9 (VS2017)
+
+time: Tue Mar  4 14:31:48 2025
+timezone: Intel64 Family 6 Model 141 Stepping 1, GenuineIntel
+elapsed time: 0.022707 seconds (0d 0h 0m 0s)
+
diff --git a/bin/keywords.txt b/bin/keywords.txt
new file mode 100644
index 0000000..2358ab6
--- /dev/null
+++ b/bin/keywords.txt
@@ -0,0 +1,6 @@
+Montpellier Institute of Virology, France
+Ontario Public Health Laboratory, Canada
+University of Texas Biosafety Laboratory, USA
+Korea National Institute of Infectious Diseases (KCDC)
+Israel Institute of Life Sciences
+Biosafety Laboratory, University of Basel, Switzerland
\ No newline at end of file
diff --git a/bin/pom.xml b/bin/pom.xml
new file mode 100644
index 0000000..730bf94
--- /dev/null
+++ b/bin/pom.xml
@@ -0,0 +1,138 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+    <groupId>com.example</groupId>
+    <artifactId>es-crawler</artifactId>
+    <version>1.0-SNAPSHOT</version>
+
+    <properties>
+        <maven.compiler.source>8</maven.compiler.source>
+        <maven.compiler.target>8</maven.compiler.target>
+    </properties>
+
+    <dependencies>
+        <!-- Elasticsearch High Level REST Client -->
+        <dependency>
+            <groupId>org.elasticsearch.client</groupId>
+            <artifactId>elasticsearch-rest-high-level-client</artifactId>
+            <version>7.17.0</version>
+        </dependency>
+
+        <dependency>
+            <groupId>co.elastic.clients</groupId>
+            <artifactId>elasticsearch-java</artifactId>
+            <version>7.17.15</version>
+        </dependency>
+        <dependency>
+            <groupId>com.fasterxml.jackson.core</groupId>
+            <artifactId>jackson-databind</artifactId>
+            <version>2.15.0</version>
+        </dependency>
+
+        <!-- Jsoup HTML parser -->
+        <dependency>
+            <groupId>org.jsoup</groupId>
+            <artifactId>jsoup</artifactId>
+            <version>1.17.2</version>
+        </dependency>
+
+        <!-- OkHttp -->
+        <dependency>
+            <groupId>com.squareup.okhttp3</groupId>
+            <artifactId>okhttp</artifactId>
+            <version>4.9.3</version>
+        </dependency>
+
+        <!-- Logging -->
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-api</artifactId>
+            <version>1.7.36</version>
+        </dependency>
+        <dependency>
+            <groupId>ch.qos.logback</groupId>
+            <artifactId>logback-classic</artifactId>
+            <version>1.2.11</version>
+        </dependency>
+
+        <!-- Kafka 客户端 -->
+        <dependency>
+            <groupId>org.apache.kafka</groupId>
+            <artifactId>kafka-clients</artifactId>
+            <version>3.9.0</version>
+        </dependency>
+
+        <!-- Selenium Java -->
+        <dependency>
+            <groupId>org.seleniumhq.selenium</groupId>
+            <artifactId>selenium-java</artifactId>
+            <version>4.10.0</version>
+        </dependency>
+
+        <!-- WebDriver Manager -->
+        <dependency>
+            <groupId>io.github.bonigarcia</groupId>
+            <artifactId>webdrivermanager</artifactId>
+            <version>5.6.2</version>
+        </dependency>
+
+        <dependency>
+            <groupId>org.json</groupId>
+            <artifactId>json</artifactId>
+            <version>20230227</version>
+        </dependency>
+
+        <dependency>
+            <groupId>com.google.code.gson</groupId>
+            <artifactId>gson</artifactId>
+            <version>2.10.1</version>
+        </dependency>
+
+        <dependency>
+            <groupId>net.sourceforge.htmlunit</groupId>
+            <artifactId>htmlunit</artifactId>
+            <version>2.61.0</version>
+        </dependency>
+    </dependencies>
+
+    <build>
+        <plugins>
+            <!-- 编译插件，保持 Java 8 配置 -->
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-compiler-plugin</artifactId>
+                <version>3.8.1</version>
+                <configuration>
+                    <source>8</source>
+                    <target>8</target>
+                </configuration>
+            </plugin>
+            <!-- Assembly 插件，打包包含依赖的可执行 JAR -->
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-assembly-plugin</artifactId>
+                <version>3.3.0</version>
+                <configuration>
+                    <archive>
+                        <manifest>
+                            <mainClass>com.example.projTopic</mainClass> <!-- 替换为你的主类全路径 -->
+                        </manifest>
+                    </archive>
+                    <descriptorRefs>
+                        <descriptorRef>jar-with-dependencies</descriptorRef>
+                    </descriptorRefs>
+                </configuration>
+                <executions>
+                    <execution>
+                        <id>make-assembly</id>
+                        <phase>package</phase>
+                        <goals>
+                            <goal>single</goal>
+                        </goals>
+                    </execution>
+                </executions>
+            </plugin>
+        </plugins>
+    </build>
+</project>
\ No newline at end of file
diff --git a/bin/processed_urls.txt b/bin/processed_urls.txt
new file mode 100644
index 0000000..f862ec3
--- /dev/null
+++ b/bin/processed_urls.txt
@@ -0,0 +1,281 @@
+
+https://www.zyctd.com/zixun/201/1055143.html
+https://www.zyctd.com/zixun/201/861786.html
+https://www.zyctd.com/zixun/201/1053482.html
+https://www.zyctd.com/zixun/201/269419.html
+https://www.zyctd.com/zixun/201/1053149.html
+https://www.zyctd.com/zixun/201/1023926.html
+https://www.zyctd.com/zixun/201/435325.html
+https://www.zyctd.com/zixun/201/1050302.html
+https://www.zyctd.com/zixun/201/880441.html
+https://www.zyctd.com/zixun/201/1019635.html
+https://www.zyctd.com/zixun/201/970572.html
+https://www.zyctd.com/zixun/201/912277.html
+https://www.zyctd.com/zixun/201/372444.html
+https://www.zyctd.com/zixun/201/1073629.html
+https://www.zyctd.com/zixun/201/1069386.html
+https://www.zyctd.com/zixun/201/730410.html
+https://www.zyctd.com/zixun/201/953220.html
+https://www.zyctd.com/zixun/201/1074339.html
+https://www.zyctd.com/zixun/201/1072317.html
+https://www.zyctd.com/zixun/201/294794.html
+https://www.zyctd.com/zixun/201/267592.html
+https://www.zyctd.com/zixun/201/979665.html
+https://www.zyctd.com/zixun/201/869885.html
+https://www.zyctd.com/zixun/201/1054064.html
+https://www.zyctd.com/zixun/201/1049331.html
+https://www.zyctd.com/zixun/201/442647.html
+https://www.zyctd.com/zixun/201/285992.html
+https://www.zyctd.com/zixun/201/1037972.html
+https://www.zyctd.com/zixun/201/799801.html
+https://www.zyctd.com/zixun/201/916078.html
+https://www.zyctd.com/zixun/201/456647.html
+https://www.zyctd.com/zixun/201/812121.html
+https://www.zyctd.com/zixun/201/1042740.html
+https://www.zyctd.com/zixun/201/1042708.html
+https://www.zyctd.com/zixun/201/840450.html
+https://www.zyctd.com/zixun/201/320749.html
+https://www.zyctd.com/zixun/201/496106.html
+https://www.zyctd.com/zixun/201/850201.html
+https://www.zyctd.com/zixun/201/277145.html
+https://www.zyctd.com/zixun/201/299091.html
+https://www.zyctd.com/zixun/201/266080.html
+https://www.zyctd.com/zixun/201/1051925.html
+https://www.zyctd.com/zixun/201/898081.html
+https://www.zyctd.com/zixun/201/873280.html
+https://www.zyctd.com/zixun/201/703880.html
+https://www.zyctd.com/zixun/201/873126.html
+https://www.zyctd.com/zixun/201/887931.html
+https://www.zyctd.com/zixun/201/432742.html
+https://www.zyctd.com/zixun/201/1040431.html
+https://www.zyctd.com/zixun/201/1040223.html
+https://www.zyctd.com/zixun/201/858118.html
+https://www.zyctd.com/zixun/201/971286.html
+https://www.zyctd.com/zixun/201/458488.html
+https://www.zyctd.com/zixun/201/1079381.html
+https://www.zyctd.com/zixun/201/263578.html
+https://www.zyctd.com/zixun/201/553513.html
+https://www.zyctd.com/zixun/201/286229.html
+https://www.zyctd.com/zixun/201/285365.html
+https://www.zyctd.com/zixun/201/352921.html
+https://www.zyctd.com/zixun/201/503267.html
+https://www.zyctd.com/zixun/201/391337.html
+https://www.zyctd.com/zixun/201/813052.html
+https://www.zyctd.com/zixun/201/1053556.html
+https://www.zyctd.com/zixun/201/1041197.html
+https://www.zyctd.com/zixun/201/287420.html
+https://www.zyctd.com/zixun/201/291563.html
+https://www.zyctd.com/zixun/201/948250.html
+https://www.zyctd.com/zixun/201/289034.html
+https://www.zyctd.com/zixun/201/795965.html
+https://www.zyctd.com/zixun/201/292962.html
+https://www.zyctd.com/zixun/201/975850.html
+https://www.zyctd.com/zixun/201/275335.html
+https://www.zyctd.com/zixun/201/1031992.html
+https://www.zyctd.com/zixun/201/1033886.html
+https://www.zyctd.com/zixun/201/999510.html
+https://www.zyctd.com/zixun/201/270144.html
+https://www.zyctd.com/zixun/201/1055519.html
+https://www.zyctd.com/zixun/201/272205.html
+https://www.zyctd.com/zixun/201/526059.html
+https://www.zyctd.com/zixun/201/456640.html
+https://www.zyctd.com/zixun/201/267952.html
+https://www.zyctd.com/zixun/201/803469.html
+https://www.zyctd.com/zixun/201/270763.html
+https://www.zyctd.com/zixun/201/1072987.html
+https://www.zyctd.com/zixun/201/265176.html
+https://www.zyctd.com/zixun/201/1022141.html
+https://www.zyctd.com/zixun/201/290173.html
+https://www.zyctd.com/zixun/201/269175.html
+https://www.zyctd.com/zixun/201/744991.html
+https://www.zyctd.com/zixun/201/1019131.html
+https://www.zyctd.com/zixun/201/717054.html
+https://www.zyctd.com/zixun/201/517358.html
+https://www.zyctd.com/zixun/201/1058505.html
+https://www.zyctd.com/zixun/201/905515.html
+https://www.zyctd.com/zixun/201/287395.html
+https://www.zyctd.com/zixun/201/934873.html
+https://www.zyctd.com/zixun/201/1051317.html
+https://www.zyctd.com/zixun/201/926018.html
+https://www.zyctd.com/zixun/201/334511.html
+https://www.zyctd.com/zixun/201/845896.html
+https://www.zyctd.com/zixun/201/587785.html
+https://www.zyctd.com/zixun/201/288376.html
+https://www.zyctd.com/zixun/201/851405.html
+https://www.zyctd.com/zixun/201/941404.html
+https://www.zyctd.com/zixun/201/881855.html
+https://www.zyctd.com/zixun/201/602632.html
+https://www.zyctd.com/zixun/201/293601.html
+https://www.zyctd.com/zixun/201/541809.html
+https://www.zyctd.com/zixun/201/335120.html
+https://www.zyctd.com/zixun/201/1031137.html
+https://www.zyctd.com/zixun/201/960101.html
+https://www.zyctd.com/zixun/201/1077142.html
+https://www.zyctd.com/zixun/201/1063222.html
+https://www.zyctd.com/zixun/201/681466.html
+https://www.zyctd.com/zixun/201/1031130.html
+https://www.zyctd.com/zixun/201/1073734.html
+https://www.zyctd.com/zixun/201/1062186.html
+https://www.zyctd.com/zixun/201/1046628.html
+https://www.zyctd.com/zixun/201/358892.html
+https://www.zyctd.com/zixun/201/285361.html
+https://www.zyctd.com/zixun/201/1059889.html
+https://www.zyctd.com/zixun/201/297824.html
+https://www.zyctd.com/zixun/201/844307.html
+https://www.zyctd.com/zixun/201/900524.html
+https://www.zyctd.com/zixun/201/1057636.html
+https://www.zyctd.com/zixun/201/1010080.html
+https://www.zyctd.com/zixun/201/409152.html
+https://www.zyctd.com/zixun/201/402782.html
+https://www.zyctd.com/zixun/201/770296.html
+https://www.zyctd.com/zixun/201/1040602.html
+https://www.zyctd.com/zixun/201/606503.html
+https://www.zyctd.com/zixun/201/784471.html
+https://www.zyctd.com/zixun/201/466097.html
+https://www.zyctd.com/zixun/201/1071160.html
+https://www.zyctd.com/zixun/201/623226.html
+https://www.zyctd.com/zixun/201/948264.html
+https://www.zyctd.com/zixun/201/293462.html
+https://www.zyctd.com/zixun/201/829348.html
+https://www.zyctd.com/zixun/201/332369.html
+https://www.zyctd.com/zixun/201/907461.html
+https://www.zyctd.com/zixun/201/756555.html
+https://www.zyctd.com/zixun/201/717915.html
+https://www.zyctd.com/zixun/201/262203.html
+https://www.zyctd.com/zixun/201/1055787.html
+https://www.zyctd.com/zixun/201/432336.html
+https://www.zyctd.com/zixun/201/907489.html
+https://www.zyctd.com/zixun/201/1014686.html
+https://www.zyctd.com/zixun/201/1053320.html
+https://www.zyctd.com/zixun/201/480020.html
+https://www.zyctd.com/zixun/201/287423.html
+https://www.zyctd.com/zixun/201/385289.html
+https://www.zyctd.com/zixun/201/1030421.html
+https://www.zyctd.com/zixun/201/527648.html
+https://www.zyctd.com/zixun/201/972959.html
+https://www.zyctd.com/zixun/201/408767.html
+https://www.zyctd.com/zixun/201/724887.html
+https://www.zyctd.com/zixun/201/291480.html
+https://www.zyctd.com/zixun/201/472544.html
+https://www.zyctd.com/zixun/201/724873.html
+https://www.zyctd.com/zixun/201/281751.html
+https://www.zyctd.com/zixun/201/1049693.html
+https://www.zyctd.com/zixun/201/869619.html
+https://www.zyctd.com/zixun/201/355497.html
+https://www.zyctd.com/zixun/201/341623.html
+https://www.zyctd.com/zixun/201/450753.html
+https://www.zyctd.com/zixun/201/1065837.html
+https://www.zyctd.com/zixun/201/1031331.html
+https://www.zyctd.com/zixun/201/669727.html
+https://www.zyctd.com/zixun/201/1034010.html
+https://www.zyctd.com/zixun/201/1054058.html
+https://www.zyctd.com/zixun/201/954613.html
+https://www.zyctd.com/zixun/201/715584.html
+https://www.zyctd.com/zixun/201/1051110.html
+https://www.zyctd.com/zixun/201/269963.html
+https://www.zyctd.com/zixun/201/1048128.html
+https://www.zyctd.com/zixun/201/793207.html
+https://www.zyctd.com/zixun/201/284310.html
+https://www.zyctd.com/zixun/201/282639.html
+https://www.zyctd.com/zixun/201/1068138.html
+https://www.zyctd.com/zixun/201/340678.html
+https://www.zyctd.com/zixun/201/294371.html
+https://www.zyctd.com/zixun/201/324277.html
+https://www.zyctd.com/zixun/201/1048931.html
+https://www.zyctd.com/zixun/201/851398.html
+https://www.zyctd.com/zixun/201/263527.html
+https://www.zyctd.com/zixun/201/919480.html
+https://www.zyctd.com/zixun/201/685442.html
+https://www.zyctd.com/zixun/201/428325.html
+https://www.zyctd.com/zixun/201/1032698.html
+https://www.zyctd.com/zixun/201/1003367.html
+https://www.zyctd.com/zixun/201/852315.html
+https://www.zyctd.com/zixun/201/283156.html
+https://www.zyctd.com/zixun/201/262484.html
+https://www.zyctd.com/zixun/201/1065225.html
+https://www.zyctd.com/zixun/201/763331.html
+https://www.zyctd.com/zixun/201/1066158.html
+https://www.zyctd.com/zixun/201/1047744.html
+https://www.zyctd.com/zixun/201/842795.html
+https://www.zyctd.com/zixun/201/975374.html
+https://www.zyctd.com/zixun/201/1055865.html
+https://www.zyctd.com/zixun/201/1017367.html
+https://www.zyctd.com/zixun/201/1057711.html
+https://www.zyctd.com/zixun/201/1074295.html
+https://www.zyctd.com/zixun/201/283647.html
+https://www.zyctd.com/zixun/201/286896.html
+https://www.zyctd.com/zixun/201/1043393.html
+https://www.zyctd.com/zixun/201/305888.html
+https://www.zyctd.com/zixun/201/487258.html
+https://www.zyctd.com/zixun/201/1045652.html
+https://www.zyctd.com/zixun/201/1064905.html
+https://www.zyctd.com/zixun/201/515636.html
+https://www.zyctd.com/zixun/201/1038609.html
+https://www.zyctd.com/zixun/201/438083.html
+https://www.zyctd.com/zixun/201/297327.html
+https://www.zyctd.com/zixun/201/773537.html
+https://www.zyctd.com/zixun/201/1043589.html
+https://www.zyctd.com/zixun/201/815712.html
+https://www.zyctd.com/zixun/201/698595.html
+https://www.zyctd.com/zixun/201/269800.html
+https://www.zyctd.com/zixun/201/1030332.html
+https://www.zyctd.com/zixun/201/422676.html
+https://www.zyctd.com/zixun/201/290130.html
+https://www.zyctd.com/zixun/201/270359.html
+https://www.zyctd.com/zixun/201/995604.html
+https://www.zyctd.com/zixun/201/1074993.html
+https://www.zyctd.com/zixun/201/1054825.html
+https://www.zyctd.com/zixun/201/918577.html
+https://www.zyctd.com/zixun/201/686527.html
+https://www.zyctd.com/zixun/201/297509.html
+https://www.zyctd.com/zixun/201/622708.html
+https://www.zyctd.com/zixun/201/469870.html
+https://www.zyctd.com/zixun/201/844328.html
+https://www.zyctd.com/zixun/201/394508.html
+https://www.zyctd.com/zixun/201/271744.html
+https://www.zyctd.com/zixun/201/1054940.html
+https://www.zyctd.com/zixun/201/732818.html
+https://www.zyctd.com/zixun/201/1049547.html
+https://www.zyctd.com/zixun/201/1059684.html
+https://www.zyctd.com/zixun/201/1055301.html
+https://www.zyctd.com/zixun/201/962068.html
+https://www.zyctd.com/zixun/201/451355.html
+https://www.zyctd.com/zixun/201/1056174.html
+https://www.zyctd.com/zixun/201/930540.html
+https://www.zyctd.com/zixun/201/871656.html
+https://www.zyctd.com/zixun/201/363246.html
+https://www.zyctd.com/zixun/201/845672.html
+https://www.zyctd.com/zixun/201/452965.html
+https://www.zyctd.com/zixun/201/1065920.html
+https://www.zyctd.com/zixun/201/1058808.html
+https://www.zyctd.com/zixun/201/986868.html
+https://www.zyctd.com/zixun/201/489785.html
+https://www.zyctd.com/zixun/201/307946.html
+https://www.zyctd.com/zixun/201/833359.html
+https://www.zyctd.com/zixun/201/806969.html
+https://www.zyctd.com/zixun/201/1050812.html
+https://www.zyctd.com/zixun/201/1033696.html
+https://www.zyctd.com/zixun/201/501167.html
+https://www.zyctd.com/zixun/201/1078919.html
+https://www.zyctd.com/zixun/201/1036495.html
+https://www.zyctd.com/zixun/201/1008736.html
+https://www.zyctd.com/zixun/201/1054264.html
+https://www.zyctd.com/zixun/201/493152.html
+https://www.zyctd.com/zixun/201/685456.html
+https://www.zyctd.com/zixun/201/995597.html
+https://www.zyctd.com/zixun/201/905501.html
+https://www.zyctd.com/zixun/201/347573.html
+https://www.zyctd.com/zixun/201/1045494.html
+https://www.zyctd.com/zixun/201/549775.html
+https://www.zyctd.com/zixun/201/1037336.html
+https://www.zyctd.com/zixun/201/1034972.html
+https://www.zyctd.com/zixun/201/653046.html
+https://www.zyctd.com/zixun/201/316612.html
+https://www.zyctd.com/zixun/201/447064.html
+https://www.zyctd.com/zixun/201/307603.html
+https://www.zyctd.com/zixun/201/263437.html
+https://www.zyctd.com/zixun/201/894490.html
+https://www.zyctd.com/zixun/201/368629.html
+https://www.zyctd.com/zixun/201/273285.html
+https://www.zyctd.com/zixun/201/1059618.html
+https://www.zyctd.com/zixun/201/459237.html
diff --git a/bin/proxy.txt b/bin/proxy.txt
new file mode 100644
index 0000000..199a16c
--- /dev/null
+++ b/bin/proxy.txt
@@ -0,0 +1 @@
+127.0.0.1:7897
\ No newline at end of file
diff --git a/bin/src/main/java/com/example/Inka.class b/bin/src/main/java/com/example/Inka.class
new file mode 100644
index 0000000..ac137ee
Binary files /dev/null and b/bin/src/main/java/com/example/Inka.class differ
diff --git a/bin/src/main/java/com/example/NSFAwardCrawler.class b/bin/src/main/java/com/example/NSFAwardCrawler.class
new file mode 100644
index 0000000..eb1e050
Binary files /dev/null and b/bin/src/main/java/com/example/NSFAwardCrawler.class differ
diff --git a/bin/src/main/java/com/example/PatentscopeSeleniumCrawler.class b/bin/src/main/java/com/example/PatentscopeSeleniumCrawler.class
new file mode 100644
index 0000000..ee2edab
Binary files /dev/null and b/bin/src/main/java/com/example/PatentscopeSeleniumCrawler.class differ
diff --git a/bin/src/main/java/com/example/ProxyIPChecker.class b/bin/src/main/java/com/example/ProxyIPChecker.class
new file mode 100644
index 0000000..1b87f7c
Binary files /dev/null and b/bin/src/main/java/com/example/ProxyIPChecker.class differ
diff --git a/bin/src/main/java/com/example/StringFieldExtractor.class b/bin/src/main/java/com/example/StringFieldExtractor.class
new file mode 100644
index 0000000..d938b0b
Binary files /dev/null and b/bin/src/main/java/com/example/StringFieldExtractor.class differ
diff --git a/bin/src/main/java/com/example/getInKa.class b/bin/src/main/java/com/example/getInKa.class
new file mode 100644
index 0000000..a9baf48
Binary files /dev/null and b/bin/src/main/java/com/example/getInKa.class differ
diff --git a/bin/src/main/java/com/example/jsonGetOk.class b/bin/src/main/java/com/example/jsonGetOk.class
new file mode 100644
index 0000000..a9f68b9
Binary files /dev/null and b/bin/src/main/java/com/example/jsonGetOk.class differ
diff --git a/bin/src/main/java/com/example/ook.class b/bin/src/main/java/com/example/ook.class
new file mode 100644
index 0000000..8a7afb8
Binary files /dev/null and b/bin/src/main/java/com/example/ook.class differ
diff --git a/bin/src/main/java/com/example/oook.class b/bin/src/main/java/com/example/oook.class
new file mode 100644
index 0000000..a140aa5
Binary files /dev/null and b/bin/src/main/java/com/example/oook.class differ
diff --git a/bin/src/main/java/com/example/projTopic.class b/bin/src/main/java/com/example/projTopic.class
new file mode 100644
index 0000000..29af390
Binary files /dev/null and b/bin/src/main/java/com/example/projTopic.class differ
diff --git a/bin/src/main/java/com/example/saveInES.class b/bin/src/main/java/com/example/saveInES.class
new file mode 100644
index 0000000..1bf30e6
Binary files /dev/null and b/bin/src/main/java/com/example/saveInES.class differ
diff --git a/bin/src/main/java/com/example/test.class b/bin/src/main/java/com/example/test.class
new file mode 100644
index 0000000..95aa04c
Binary files /dev/null and b/bin/src/main/java/com/example/test.class differ
diff --git a/bin/src/main/java/com/example/test2.class b/bin/src/main/java/com/example/test2.class
new file mode 100644
index 0000000..6f03608
Binary files /dev/null and b/bin/src/main/java/com/example/test2.class differ
diff --git a/bin/src/main/java/com/example/testContent.class b/bin/src/main/java/com/example/testContent.class
new file mode 100644
index 0000000..16cc481
Binary files /dev/null and b/bin/src/main/java/com/example/testContent.class differ
diff --git a/bin/src/main/java/com/example/umlistTest.class b/bin/src/main/java/com/example/umlistTest.class
new file mode 100644
index 0000000..ccfdbcd
Binary files /dev/null and b/bin/src/main/java/com/example/umlistTest.class differ
diff --git a/bin/target/classes/META-INF/MANIFEST.MF b/bin/target/classes/META-INF/MANIFEST.MF
new file mode 100644
index 0000000..38f1f7e
--- /dev/null
+++ b/bin/target/classes/META-INF/MANIFEST.MF
@@ -0,0 +1,4 @@
+Manifest-Version: 1.0
+Build-Jdk-Spec: 22
+Created-By: Maven Integration for Eclipse
+
diff --git a/bin/target/es-crawler-1.0-SNAPSHOT-jar-with-dependencies.jar b/bin/target/es-crawler-1.0-SNAPSHOT-jar-with-dependencies.jar
new file mode 100644
index 0000000..041697a
Binary files /dev/null and b/bin/target/es-crawler-1.0-SNAPSHOT-jar-with-dependencies.jar differ
diff --git a/bin/target/es-crawler-1.0-SNAPSHOT.jar b/bin/target/es-crawler-1.0-SNAPSHOT.jar
new file mode 100644
index 0000000..febbb6e
Binary files /dev/null and b/bin/target/es-crawler-1.0-SNAPSHOT.jar differ
diff --git a/bin/target/maven-archiver/pom.properties b/bin/target/maven-archiver/pom.properties
new file mode 100644
index 0000000..c35b816
--- /dev/null
+++ b/bin/target/maven-archiver/pom.properties
@@ -0,0 +1,5 @@
+#Generated by Maven
+#Fri Apr 18 18:29:46 CST 2025
+version=1.0-SNAPSHOT
+groupId=com.example
+artifactId=es-crawler
diff --git a/bin/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst b/bin/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst
new file mode 100644
index 0000000..5f1323f
--- /dev/null
+++ b/bin/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst
@@ -0,0 +1 @@
+com\example\projTopic.class
diff --git a/bin/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst b/bin/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst
new file mode 100644
index 0000000..90e26e0
--- /dev/null
+++ b/bin/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst
@@ -0,0 +1 @@
+F:\workTest\DaKaES\src\main\java\com\example\projTopic.java
diff --git a/bin/target/maven-status/maven-compiler-plugin/testCompile/default-testCompile/createdFiles.lst b/bin/target/maven-status/maven-compiler-plugin/testCompile/default-testCompile/createdFiles.lst
new file mode 100644
index 0000000..e69de29
diff --git a/bin/target/maven-status/maven-compiler-plugin/testCompile/default-testCompile/inputFiles.lst b/bin/target/maven-status/maven-compiler-plugin/testCompile/default-testCompile/inputFiles.lst
new file mode 100644
index 0000000..e69de29
diff --git a/hs_err_pid15760.log b/hs_err_pid15760.log
new file mode 100644
index 0000000..3459d43
--- /dev/null
+++ b/hs_err_pid15760.log
@@ -0,0 +1,167 @@
+#
+# There is insufficient memory for the Java Runtime Environment to continue.
+# Native memory allocation (malloc) failed to allocate 1048576 bytes for AllocateHeap
+# Possible reasons:
+#   The system is out of physical RAM or swap space
+#   The process is running with CompressedOops enabled, and the Java Heap may be blocking the growth of the native heap
+# Possible solutions:
+#   Reduce memory load on the system
+#   Increase physical memory or swap space
+#   Check if swap backing store is full
+#   Decrease Java heap size (-Xmx/-Xms)
+#   Decrease number of Java threads
+#   Decrease Java thread stack sizes (-Xss)
+#   Set larger code cache with -XX:ReservedCodeCacheSize=
+#   JVM is running with Zero Based Compressed Oops mode in which the Java heap is
+#     placed in the first 32GB address space. The Java Heap base address is the
+#     maximum limit for the native heap growth. Please use -XX:HeapBaseMinAddress
+#     to set the Java Heap base and to place the Java Heap above 32GB virtual address.
+# This output file may be truncated or incomplete.
+#
+#  Out of Memory Error (memory/allocation.inline.hpp:61), pid=15760, tid=0x0000000000003334
+#
+# JRE version:  (8.0_422-b05) (build )
+# Java VM: OpenJDK 64-Bit Server VM (25.422-b05 mixed mode windows-amd64 compressed oops)
+# Failed to write core dump. Minidumps are not enabled by default on client versions of Windows
+#
+
+---------------  T H R E A D  ---------------
+
+Current thread (0x00000271b7d7d800):  JavaThread "Unknown thread" [_thread_in_vm, id=13108, stack(0x00000082a1500000,0x00000082a1600000)]
+
+Stack: [0x00000082a1500000,0x00000082a1600000]
+[error occurred during error reporting (printing stack bounds), id 0xc0000005]
+
+Native frames: (J=compiled Java code, j=interpreted, Vv=VM code, C=native code)
+
+
+---------------  P R O C E S S  ---------------
+
+Java Threads: ( => current thread )
+
+Other Threads:
+
+=>0x00000271b7d7d800 (exited) JavaThread "Unknown thread" [_thread_in_vm, id=13108, stack(0x00000082a1500000,0x00000082a1600000)]
+
+VM state:not at safepoint (normal execution)
+
+VM Mutex/Monitor currently owned by a thread: None
+
+heap address: 0x00000006c4000000, size: 4032 MB, Compressed Oops mode: Zero based, Oop shift amount: 3
+Narrow klass base: 0x0000000000000000, Narrow klass shift: 3
+Compressed class space size: 1073741824 Address: 0x00000007c0000000
+
+Heap:
+ PSYoungGen      total 75264K, used 1290K [0x000000076c000000, 0x0000000771400000, 0x00000007c0000000)
+  eden space 64512K, 2% used [0x000000076c000000,0x000000076c142900,0x000000076ff00000)
+  from space 10752K, 0% used [0x0000000770980000,0x0000000770980000,0x0000000771400000)
+  to   space 10752K, 0% used [0x000000076ff00000,0x000000076ff00000,0x0000000770980000)
+ ParOldGen       total 172032K, used 0K [0x00000006c4000000, 0x00000006ce800000, 0x000000076c000000)
+  object space 172032K, 0% used [0x00000006c4000000,0x00000006c4000000,0x00000006ce800000)
+ Metaspace       used 790K, capacity 4480K, committed 4480K, reserved 1056768K
+  class space    used 76K, capacity 384K, committed 384K, reserved 1048576K
+
+Card table byte_map: [0x00000271c8b70000,0x00000271c9360000] byte_map_base: 0x00000271c5550000
+
+Marking Bits: (ParMarkBitMap*) 0x00000000521f38d0
+ Begin Bits: [0x00000271c98a0000, 0x00000271cd7a0000)
+ End Bits:   [0x00000271cd7a0000, 0x00000271d16a0000)
+
+Polling page: 0x00000271b7eb0000
+
+CodeCache: size=245760Kb used=328Kb max_used=328Kb free=245431Kb
+ bounds [0x00000271b97b0000, 0x00000271b9a20000, 0x00000271c87b0000]
+ total_blobs=57 nmethods=0 adapters=38
+ compilation: enabled
+
+Compilation events (0 events):
+No events
+
+GC Heap History (0 events):
+No events
+
+Deoptimization events (0 events):
+No events
+
+Classes redefined (0 events):
+No events
+
+Internal exceptions (0 events):
+No events
+
+Events (10 events):
+Event: 0.012 loading class java/lang/Short
+Event: 0.013 loading class java/lang/Short done
+Event: 0.013 loading class java/lang/Integer
+Event: 0.013 loading class java/lang/Integer done
+Event: 0.013 loading class java/lang/Long
+Event: 0.013 loading class java/lang/Long done
+Event: 0.013 loading class java/lang/NullPointerException
+Event: 0.013 loading class java/lang/NullPointerException done
+Event: 0.013 loading class java/lang/ArithmeticException
+Event: 0.013 loading class java/lang/ArithmeticException done
+
+
+Dynamic libraries:
+0x00007ff7d7590000 - 0x00007ff7d75d6000 	C:\Users\18264\.jdks\corretto-1.8.0_422\bin\java.exe
+0x00007ffa1d0b0000 - 0x00007ffa1d2a8000 	C:\Windows\SYSTEM32\ntdll.dll
+0x00007ffa1ce90000 - 0x00007ffa1cf52000 	C:\Windows\System32\KERNEL32.DLL
+0x00007ffa1add0000 - 0x00007ffa1b0cf000 	C:\Windows\System32\KERNELBASE.dll
+0x00007ffa1c470000 - 0x00007ffa1c51f000 	C:\Windows\System32\ADVAPI32.dll
+0x00007ffa1cf60000 - 0x00007ffa1cffe000 	C:\Windows\System32\msvcrt.dll
+0x00007ffa1cdf0000 - 0x00007ffa1ce8f000 	C:\Windows\System32\sechost.dll
+0x00007ffa1c580000 - 0x00007ffa1c6a3000 	C:\Windows\System32\RPCRT4.dll
+0x00007ffa1ada0000 - 0x00007ffa1adc7000 	C:\Windows\System32\bcrypt.dll
+0x00007ffa1be50000 - 0x00007ffa1bfed000 	C:\Windows\System32\USER32.dll
+0x00007ffa1a7a0000 - 0x00007ffa1a7c2000 	C:\Windows\System32\win32u.dll
+0x00007ffa1bff0000 - 0x00007ffa1c01b000 	C:\Windows\System32\GDI32.dll
+0x00007ffa1ac80000 - 0x00007ffa1ad9a000 	C:\Windows\System32\gdi32full.dll
+0x00007ffa1aaa0000 - 0x00007ffa1ab3d000 	C:\Windows\System32\msvcp_win.dll
+0x00007ffa1a9a0000 - 0x00007ffa1aaa0000 	C:\Windows\System32\ucrtbase.dll
+0x00007ffa00e00000 - 0x00007ffa0109a000 	C:\Windows\WinSxS\amd64_microsoft.windows.common-controls_6595b64144ccf1df_6.0.19041.4355_none_60b8b9eb71f62e16\COMCTL32.dll
+0x00007ffa1c030000 - 0x00007ffa1c05f000 	C:\Windows\System32\IMM32.DLL
+0x00007ffa10f70000 - 0x00007ffa10f85000 	C:\Users\18264\.jdks\corretto-1.8.0_422\jre\bin\vcruntime140.dll
+0x00007ff9ceb10000 - 0x00007ff9cebab000 	C:\Users\18264\.jdks\corretto-1.8.0_422\jre\bin\msvcp140.dll
+0x0000000051a10000 - 0x000000005226c000 	C:\Users\18264\.jdks\corretto-1.8.0_422\jre\bin\server\jvm.dll
+0x00007ffa1c020000 - 0x00007ffa1c028000 	C:\Windows\System32\PSAPI.DLL
+0x00007ff9fac50000 - 0x00007ff9fac59000 	C:\Windows\SYSTEM32\WSOCK32.dll
+0x00007ffa0d800000 - 0x00007ffa0d827000 	C:\Windows\SYSTEM32\WINMM.dll
+0x00007ffa0ff90000 - 0x00007ffa0ff9a000 	C:\Windows\SYSTEM32\VERSION.dll
+0x00007ffa1c060000 - 0x00007ffa1c0cb000 	C:\Windows\System32\WS2_32.dll
+0x00007ffa18f70000 - 0x00007ffa18f82000 	C:\Windows\SYSTEM32\kernel.appcore.dll
+0x00007ffa10fc0000 - 0x00007ffa10fd0000 	C:\Users\18264\.jdks\corretto-1.8.0_422\jre\bin\verify.dll
+0x00007ffa0aec0000 - 0x00007ffa0aeeb000 	C:\Users\18264\.jdks\corretto-1.8.0_422\jre\bin\java.dll
+0x00007ff9ca260000 - 0x00007ff9ca296000 	C:\Users\18264\.jdks\corretto-1.8.0_422\jre\bin\jdwp.dll
+0x00007ffa0af80000 - 0x00007ffa0af89000 	C:\Users\18264\.jdks\corretto-1.8.0_422\jre\bin\npt.dll
+0x00007ff9c1ab0000 - 0x00007ff9c1ae2000 	C:\Users\18264\.jdks\corretto-1.8.0_422\jre\bin\instrument.dll
+0x00007ffa008e0000 - 0x00007ffa008f8000 	C:\Users\18264\.jdks\corretto-1.8.0_422\jre\bin\zip.dll
+
+VM Arguments:
+jvm_args: -agentlib:jdwp=transport=dt_socket,address=127.0.0.1:56727,suspend=y,server=n -javaagent:C:\Users\18264\AppData\Local\JetBrains\IntelliJIdea2021.1\captureAgent\debugger-agent.jar -Dfile.encoding=UTF-8 
+java_command: com.example.saveInES
+java_class_path (initial): C:\Users\18264\.jdks\corretto-1.8.0_422\jre\lib\charsets.jar;C:\Users\18264\.jdks\corretto-1.8.0_422\jre\lib\ext\access-bridge-64.jar;C:\Users\18264\.jdks\corretto-1.8.0_422\jre\lib\ext\cldrdata.jar;C:\Users\18264\.jdks\corretto-1.8.0_422\jre\lib\ext\dnsns.jar;C:\Users\18264\.jdks\corretto-1.8.0_422\jre\lib\ext\jaccess.jar;C:\Users\18264\.jdks\corretto-1.8.0_422\jre\lib\ext\jfxrt.jar;C:\Users\18264\.jdks\corretto-1.8.0_422\jre\lib\ext\localedata.jar;C:\Users\18264\.jdks\corretto-1.8.0_422\jre\lib\ext\nashorn.jar;C:\Users\18264\.jdks\corretto-1.8.0_422\jre\lib\ext\sunec.jar;C:\Users\18264\.jdks\corretto-1.8.0_422\jre\lib\ext\sunjce_provider.jar;C:\Users\18264\.jdks\corretto-1.8.0_422\jre\lib\ext\sunmscapi.jar;C:\Users\18264\.jdks\corretto-1.8.0_422\jre\lib\ext\sunpkcs11.jar;C:\Users\18264\.jdks\corretto-1.8.0_422\jre\lib\ext\zipfs.jar;C:\Users\18264\.jdks\corretto-1.8.0_422\jre\lib\jce.jar;C:\Users\18264\.jdks\corretto-1.8.0_422\jre\lib\jfr.jar;C:\Users\18264\.jdks\corretto-1.8.0_422\jre\lib\jfxswt.jar;C:\Users\18264\.jdks\corretto-1.8.0_422\jre\lib\jsse.jar;C:\Users\18264\.jdks\corretto-1.8.0_422\jre\lib\management-agent.jar;C:\Users\18264\.jdks\corretto-1.8.0_422\jre\lib\resources.jar;C:\Users\18264\.jdks\corretto-1.8.0_422\jre\lib\rt.jar;F:\workTest\DaKaES\target\classes;C:\Users\18264\.m2\repository\org\elasticsearch\client\elasticsearch-rest-high-level-client\7.17.0\elasticsearch-rest-high-level-client-7.17.0.jar;C:\Users\18264\.m2\repository\org\elasticsearch\elasticsearch\7.17.0\elasticsearch-7.17.0.jar;C:\Users\18264\.m2\repository\org\elasticsearch\elasticsearch-core\7.17.0\elasticsearch-core-7.17.0.jar;C:\Users\18264\.m2\repository\org\elasticsearch\elasticsearch-secure-sm\7.17.0\elasticsearch-secure-sm-7.17.0.jar;C:\Users\18264\.m2\repository\org\elasticsearch\elasticsearch-x-content\7.17.0\elasticsearch-x-content-7.17.0.jar;C:\Users\18264\.m2\repository\org\yaml\snakeyaml\1.26\snakeyaml-1.26.jar;C:\Users\18264\.m2\repository\c
+Launcher Type: SUN_STANDARD
+
+Environment Variables:
+JAVA_HOME=E:\java
+PATH=C:\Program Files\Common Files\Oracle\Java\javapath;D:\vm\bin\;E:\app\18264\product\11.2.0\dbhome_1\bin;C:\Windows\system32;C:\Windows;C:\Windows\System32\Wbem;C:\Windows\System32\WindowsPowerShell\v1.0\;C:\Windows\System32\OpenSSH\;C:\Program Files (x86)\NVIDIA Corporation\PhysX\Common;C:\Program Files\NVIDIA Corporation\NVIDIA NvDLISR;C:\Windows\system32;C:\Windows;C:\Windows\System32\Wbem;C:\Windows\System32\WindowsPowerShell\v1.0\;C:\Windows\System32\OpenSSH\;E:\java\bin;F:\mysql\mysql-5.7.37-winx64\mysql-5.7.37-winx64\bin;D:\matlab\Matlab R2022a\bin;C:\Program Files (x86)\dotnet\;C:\Program Files\dotnet\;D:\winscp\WinSCP\;F:\javaAbout\apache-maven-3.6.3\bin;C:\Program Files\Git\cmd;F:\tool\nvm\nvm;F:\tool\node;C:\Users\18264\AppData\Local\Programs\Python\Python311\Scripts\;C:\Users\18264\AppData\Local\Programs\Python\Python311\;C:\Users\18264\AppData\Local\Programs\Python\Python37\Scripts\;C:\Users\18264\AppData\Local\Programs\Python\Python37\;C:\Users\18264\AppData\Local\Programs\Python\Launcher\;C:\Users\18264\AppData\Local\Microsoft\WindowsApps;D:\Microsoft VS Code\bin;F:\idea\IntelliJ IDEA 2021.1.3\bin;;F:\tool\nvm\nvm;F:\tool\node
+USERNAME=18264
+OS=Windows_NT
+PROCESSOR_IDENTIFIER=Intel64 Family 6 Model 141 Stepping 1, GenuineIntel
+
+
+
+---------------  S Y S T E M  ---------------
+
+OS: Windows 10 , 64 bit Build 19041 (10.0.19041.5438)
+
+CPU:total 16 (initial active 16) (8 cores per cpu, 2 threads per core) family 6 model 141 stepping 1, cmov, cx8, fxsr, mmx, sse, sse2, sse3, ssse3, sse4.1, sse4.2, popcnt, avx, avx2, aes, clmul, erms, 3dnowpref, lzcnt, ht, tsc, tscinvbit, bmi1, bmi2, adx
+
+Memory: 4k page, physical 16509736k(919328k free), swap 36170532k(5620k free)
+
+vm_info: OpenJDK 64-Bit Server VM (25.422-b05) for windows-amd64 JRE (1.8.0_422-b05), built on Jul 11 2024 17:20:01 by "Administrator" with MS VC++ 15.9 (VS2017)
+
+time: Tue Mar  4 14:31:48 2025
+timezone: Intel64 Family 6 Model 141 Stepping 1, GenuineIntel
+elapsed time: 0.022707 seconds (0d 0h 0m 0s)
+
diff --git a/keywords.txt b/keywords.txt
new file mode 100644
index 0000000..51fc8fc
--- /dev/null
+++ b/keywords.txt
@@ -0,0 +1,1045 @@
+Zoonotic disease
+pandemic
+Emerging and re-emerging diseases
+biosafet
+biosecurit
+biodefen
+biological defen
+bioweapon
+biologicalweapon
+bioterroris
+biological terroris
+biowarfare
+biological warfare
+biosurveillan
+biological surveillan
+biohazard
+biological hazard
+bioincident
+biological incident
+biothreat
+biological threat
+bioagent
+biologicalagent
+biological protect
+bioprotect
+biological risk
+Bacillus anthracis
+Bacillus cereus Biovaranthracis
+Brucella abortus
+Brucella melitensis
+Brucella neotomae
+Brucella suis
+Brucellamelitensis biovar suis
+Burkholderia mallei
+Pseudomonas mallei
+Burkholderia pseudomallei
+Acinetobacter mallei
+Glanders bacillus
+Bacillus mallei
+Actinobacillus mallei
+Pfeifferella mallei
+Malleomyces mallei
+Loefferella mallei
+Chlamydophila psittaci
+Chlamydia psittaci
+Clostridium botulinum
+Coxiella burnetii
+Escherichia coli O157
+Escherichia coli
+E coli O157-H7
+Escherichia coli O157:H7
+Francisella tularensis
+Legionella pneumophila
+Mycoplasma capricolum
+Mycoplasma mycoides
+Rickettsia prowazekii
+Rickettsia rickettsii
+Salmonella enterica
+Salmonella choleraesuis
+Vibrio cholerae
+Vibrio comma
+Yersinia pestis
+Bacille de la peste
+Bacterium pestis
+Pasteurella pestis
+African horse sickness virus
+AHSV
+African Swine Fever Virus
+ASFV
+Wart-Hog Disease Virus
+Wart Hog Disease Virus
+Avian influenza virus
+Bluetongue virus
+Bluetongue Viruses
+Blue Tongue Virus
+Ovine Catarrhal Fever Virus
+Chapare virus
+Chapare viruses
+Chaparemammarenavirus
+Chikungunya virus
+CHIKV
+Classical Swine Fever Virus
+Hog CholeraVirus
+Pestivirus C
+CSFV
+Crimean-Congohaemorrhagic fever virus
+Crimean Congohemorrhagic fever virus
+Congo Virus
+denguevirus
+DENV
+Dengue Viruses
+BreakboneFever Virus
+Breakbone Fever Viruses
+EasternEquine Encephalomyelitis Virus
+Eastern EquineEncephalitis virus
+EEE Virus
+EEEV
+Ebolavirus
+Ebolaviruses
+Ebola Virus
+Ebola Viruses
+Ebola-like Viruses
+Ebola likeViruses
+Ebola-like Virus
+Foot-and-MouthDisease Virus
+Foot and Mouth Disease Virus
+Foot-and-Mouth Disease Viruses
+FMDV
+Goatpox virus
+Goatpox viruses
+Goat PoxVirus
+Goat Pox Viruses
+Guanarito virus
+Guanarito viruses
+Guanarito mammarenavirus
+GTOV
+Hantaan virus
+Korean HemorrhagicFever Virus
+Hantaan orthohantavirus
+Hemorrhagic Nephroso-Nephritis Virus
+Hemorrhagic Nephroso Nephritis Virus
+Hemorrhagic Nephroso-Nephritis Viruses
+Epidemic Hemorrhagic Fever Virus
+HFRS Viruses
+Hemorrhagic Fever Renal Syndrome Virus
+HTNV
+Hendra Virus
+HendraViruses
+Equine Morbillivirus
+EquineMorbilliviruses
+MojV
+Japanese B EncephalitisVirus
+Japanese Encephalitis Virus
+JEV
+Junin virus
+Argentinian mammarenavirus
+JUNV
+Kyasanur Forest disease virus
+KFDV
+Lassa virus
+Lassa fever virus
+Lassamammarenavirus
+LASV
+Lujo virus
+Lujomammarenavirus
+LUJV
+Lumpy skin diseasevirus
+Neethling Virus
+Machupo virus
+Machupo mammarenavirus
+MACV
+Marburgvirus
+Marburgviruses
+Marburg Virus
+Marburg Viruses
+Marburg-like Viruses
+Marburg like Viruses
+Marburg-like Virus
+Frankfurt-Marburg Syndrome Virus
+FrankfurtMarburg Syndrome Virus
+Monkeypox virus
+Monkeypox viruses
+Monkeypoxvirus
+Monkeypoxviruses
+Monkey Pox Virus
+Monkey Pox Viruses
+Newcastle disease virus
+NDV
+Nipah virus
+Nipah henipavirus
+Nipah Viruses
+Omskhemorrhagic fever virus
+OHFV
+Omskhaemorrhagic fever virus
+Peste-des-petits-ruminants virus
+Peste des petitsruminants virus
+Rabies virus
+Rabies lyssavirus
+Reconstructed 1918 Influenza virus
+RiftValley fever virus
+Rift Valley fever phlebovirus
+RVFV
+Rinderpest virus
+Rinderpestmorbillivirus
+Sabia virus
+SARS Virus
+Severe Acute Respiratory Syndrome Virus
+RSARS-Related Coronavirus
+SARS RelatedCoronavirus
+SARS-CoV
+SARS AssociatedCoronavirus
+SARS Coronavirus
+SARS-Associated Coronavirus
+Severe acuterespiratory syndrome related coronavirus
+Severeacute respiratory syndrome-related coronavirus
+Sheeppox virus
+Sheeppox viruses
+Sheep PoxVirus
+Sheep Pox Viruses
+Sin Nombre virus
+Muerto Canyon Virus
+Four Corners Virus
+Sin Nombre hantavirus
+Swine vesicular diseasevirus
+SVDV
+Tick-Borne Encephalitis Virus
+Tick Borne Encephalitis Virus
+Tick-BorneEncephalitis Viruses
+Tick Borne EncephalitisViruses
+TBEV
+Variola virus
+Variolaviruses
+Smallpox Virus
+Smallpox Viruses
+Poxvirus variolae
+Variola minor virus
+Variolamajor virus
+Alastrim
+Venezuelan equineencephalitis virus
+Venezuelan Equine EncephalitisViruses
+West Nile virus
+Egypt 101 virus
+Kunjin virus
+WNV
+WEE Virus
+WEEViruses
+Western Equine Encephalitis Viruses
+Western equine encephalitis virus
+WEEV
+Yellow fever virus
+Naegleria fowleri
+Naegleria fowlerus
+Fiji disease virus
+Ralstoniasolanacearum
+Rathayibacter toxicus
+Xanthomonas oryzae
+Erwinia amylovora
+Xanthomonas albilineans
+Dothistroma pini
+Dothistroma septosporum
+Scirrhia pini
+Tilletiaindica
+Coniothyrium glycines
+Phomaglycinicola
+Pyrenochaeta glycines
+Coccidioides immitis
+Histoplasma capsulatum
+Synchytrium endobioticum
+Colletotrichumcoffeanum
+Peronospora hyoscyami
+Peronosclerospora philippinensis
+Sclerophthorarayssiae
+Bacteriotoxins
+Botulinum toxins
+Clostridium perfringens toxins
+Staphylococcalenterotoxins
+Shigatoxins
+Anatoxins
+Ciguatoxins
+Saxitoxins
+Trichothecene toxins
+Abrins
+Ricin*
+recin
+Bungarotoxins
+Botulinum neurotoxin producing species ofClostridium
+Conotoxins
+T-2 toxin
+Tetrodotoxin
+Diacetoxyscirpeno
+SARS-COV-2
+COVID-19
+coronavirus disease 2019
+2019-nCov
+Alastrim virus
+Mpox virus
+Hypr virus
+Kumlinge virus
+Louping ill virus
+Hanzalova virus
+Omsk hemorrhagic fever virus
+St.Louis encephalitis virus
+Crimean-Congo hemorrhagic fever virus (Xinjiang hemorrhagic fever virus)
+Herpesvirus simiae
+Eastern equine encephalitis virus
+Venezuelan equine encephalitis virus
+Flexal virus
+Mopeia virus (and other Tacaribe viruses)
+Tacaribe virus
+Dabie bandavirus (SFTS Virus)
+Gordil virus
+Heartland bandavirus
+Itaituba virus
+Khasan virus
+Razdan virus
+Rift valley fever virus
+Garba virus
+Rabies virus (street virus)
+Rochambeau virus
+Inhangapi virus
+Middle East Respiratory Syndrome coronavirus (MERS-CoV)
+Severe acute respiratory syndrome coronavirus (SARS-CoV)
+Severe acute respiratory syndrome coronavirus 2,  (SARS-CoV-2)
+Hantaviruses causing pulmonary syndrome
+Hantaviruses causing hemorrhagic fever with renal syndrome
+Murray valley encephalitis virus
+Negishi virus
+Powassan virus
+Rocio virus
+Sepik virus
+Issyk-Kul virus
+Nairobi sheep disease virus
+Sapphire orthonairovirus  (Paramushir virus)
+Tamdy virus
+Human immunodeficiency virus (HIV) (Type 1 and 2 virus)
+Simian immunodeficiency virus (SIV)
+Everglades virus
+Kyzylagach virus
+Mayaro virus
+Middelburg virus
+Mucambo virus
+Ndumu virus
+Sagiyama virus
+Lymphocytic choriomeningitis (neurotropic) virus
+Polio virus
+Dhori virus
+High pathogenic avian influenza virus
+California encephalitis virus
+Germiston virus
+Inini virus (Simbu orthobunyavirus)
+Oropouche virus
+Sandfly fever virus
+Norovirus
+Sapovirus
+Flanders virus
+Hart Park virus
+Rabies virus (fixed virus)
+Vesicular stomatitis virus
+Buffalopox virus
+Camelpox virus
+Cowpox virus
+Molluscum contagiosum virus
+Orf virus
+Pseudocowpox virus (Milker‘s nodule virus)
+Rabbitpox virus
+Tanapox virus
+Vaccinia virus
+Polyoma virus
+Simian virus 40
+Metapneumovirus
+Respiratory syncytial virus
+Rubivirus (Rubella)
+Measles virus
+Mumps virus
+Parainfluenza virus
+Sendai virus (murine parainfluenza virus type 1)
+Coronavirus (low pathogenicity to human)
+Coltivirus
+Rotavirus
+Dengue virus
+Flaviviruses，other known non-highly pathogenic
+Hepatitis C virus
+Langat virus
+Saumarez reef virus
+Yellow fever virus, (vaccine strain, 17D)
+Zika Virus
+Hazara virus
+Human T- lymphotropic virus (HTLV)
+Lentivirus (Non highly pathogenic)
+Cytomegalovirus
+Epstein-Barr virus
+Herpes simplex virus
+Herpesvirus saimiri
+Human herpes virus-6
+Human herpes virus-7
+Human herpes virus-8
+Varicella-Zoster virus
+Alphaviruses, other known non-highly pathogenic
+Barmah forest virus
+Bebaru virus
+Getah virus
+O’nyong-nyong virus
+Ross river virus
+Semliki forest virus
+Sindbis virus
+Papillomavirus (human)
+Lymphocytic choriomeningitis  virus
+Hepatitis B virus
+Hepatitis D virus
+Hepatitis E virus
+Adeno-associated virus
+Bocavirus
+Parvovirus B19
+Adenovirus
+Cardiovirus
+Coxsakie virus
+ECHO virus
+Enterovirus
+Enterovirus A-71
+Hepatitis A virus
+Human Cosavirus
+Kobuvirus
+Parechovirus
+Rhinovirus
+Astrovirus
+Influenza virus
+Guaratuba virus
+La Crosse virus
+Tahyna orthobunyavirus
+Tensaw virus
+Turlock virus
+Hamster leukemia virus
+Mouse leukemia virus
+Mouse mammary tumor virus
+Rat leukemia virus
+Guinea pig herpes virus
+Bovine spongiform encephalopathy  (BSE)
+Creutzfeldt-Jakob disease (CJD)
+Fatal familian insomnia (FFI)
+Gerstmann- Sträussler -Scheinker syndrome（GSS）
+Kuru disease
+Variant Creutzfeldt-Jakob disease (vCJD)
+Scrapie
+Phagophilic cells without form
+Brucella genus
+Mycobacterium bovis
+Mycobacterium tuberculosis
+Rickettsia belongs to the spotted fever group
+Rickettsia Mori
+Przewalski's Rickettsia
+Eastern body of scrub typhus
+Lutheran rickettsia
+Siberian Rickettsia
+Tarasawich rickettsia
+Goat shapeless
+Acinetobacter baumannii
+Acinetobacter lwoffii
+Madura actinomycetes
+Bai Lejie Madura actinomycete
+Bovine actinomycete
+Actinomyces granulosus
+Yi's actinomycetes
+Nei's actinomycetes
+Other species of actinomycetes
+Aeromonas hydrophila
+Spotted Aeromonas
+Other species of Aeromonas genus
+Afipota genus
+Actinobacteria agglomerating bacteria
+Arachnia propionica
+Arcanobacterium equi
+Hemolytic Cryptococcus
+Bacillus cereus
+Fragile pseudomonas
+Rod like Bartonella
+Klebsiella pneumoniae
+Duoshi Bartonella
+Elizabethan Bartonella
+Guillain Barr é body
+bartonella henselae
+Kochia Bartonella
+5-Day Heat Bartonella Body
+Tribal Bartonella
+Wens Bartonella Wens subspecies
+Botrytis bronchiolitis
+Bordetella pertussis
+Borrelia burgdorferi
+Dashi sparse spiral body
+Returning to the heat sparse spiral body
+Fensenshu spirochete
+Short spiral bacteria genus
+Granuloma sheath bacteria
+Campylobacter coli
+Fetal Campylobacter
+Campylobacter jejuni
+Salivary Campylobacter
+Other species of Campylobacter genus
+Chlamydia pneumoniae
+Chlamydia parrot
+Chlamydia trachomatis
+Difficult Clostridium difficile
+Fusarium oxysporum
+Hemolytic Clostridium
+Clostridium novyi
+Clostridium perfringens
+Tetanus Clostridium
+Lactobacillus bovis
+Corynebacterium diphtheriae
+Corynebacterium minutissimum
+Fake Mycobacterium tuberculosis
+Corynebacterium striatum
+Acinetobacter canker
+Congo Pichia
+edwardsiella tarda
+Yifei Erich's body
+Eikenella corrodens
+Gas producing Escherichia coli
+Enterobacter cloacae
+Other species of Escherichia coli
+Adenothermic rickettsia
+Porcine red spot erysipelas fungus
+Dandelion fungus genus
+Burkholderia meningoseptica
+Bozeman's Legionella
+The new subspecies of the killer of the Tula Francisella fungus
+Fusobacterium necrophorum
+gardnerella vaginalis
+Hemophilus ducreyi
+Haemophilus influenzae
+Helicobacter pylori
+Kingella Kingae
+Klebsiella oxytoca
+Question mark Leptospira
+Listeria ivanovii
+Listeria monocytogenes
+Polymorphic small bacteria
+Morganella morganii
+African mycobacteria
+Goat mycobacteria
+Field mouse mycobacteria
+Mycobacterium asiaticum
+Mycobacterium avium
+Occasional mycobacteria
+Kansas mycobacteria
+Mycobacterium leprae
+Mycobacterium malmoense
+Mycobacterium avium subsp. paratuberculosis
+Mycobacterium scrotum
+Mycobacterium hominis
+Mycobacterium szulgai
+Ulcerative mycobacteria
+Other species of Mycobacterium genus
+mycoplasma pneumoniae
+neisseria gonorrhoeae
+Neisseria meningitidis
+Nocardia asteroides
+Nocardia brasiliensis
+Nocardia botulinum
+Nocardia pyogenes
+New Nocardia
+Nocardia in guinea pig ear inflammation
+Delancewanorca bacteria
+Clostridium sporogenes
+Pasteurella multocida
+Rodent bacteria invading the lungs
+Pathogenic Escherichia coli
+Other pathogenic Escherichia coli genera
+Anaerobic digestion streptococcus
+Plesiomonas shigelloides
+Prevotella genus
+Proteus mirabilis
+Proteus penneri
+Ordinary Proteobacteria
+Propionibacterium prolifera producing alkali
+Prevotella reinhardtii
+Pseudomonas aeruginosa
+Autotrophic false Nocardia
+Staphylococcus aureus
+Bongor Salmonella
+Serratia liquefaciens
+Fading Salmonella
+Shigella dysenteriae
+Shigella flexneri
+Shigella boydii
+Shigella Songnei
+Staphylococcus epidermidis
+Candida albicans
+Streptococcus pneumoniae
+Streptococcus pyogenes
+Streptococcus genus
+streptococcus suis
+Treponema carateum
+Treponema pallidum (syphilis)
+Treponema pertenue
+Wen's density spiral body
+Ureaplasma urealyticum
+Vibrio vulnificus
+Vibrio parahaemolyticus
+River Vibrio
+Vibrio alginolyticus
+Other species of Vibrio genus
+Yersinia enterocolitica
+Yersinia pseudotuberculosis
+mycoplasma genitalium
+Cronobacter genus
+Citrobacter genus
+Photobacterium damselae
+Shiwanju genus
+Seafood Deformable Fungi
+Defective autotrophic bacteria
+Carbon dioxide fiber eating bacteria genus
+Chromobacterium genus
+Golden rod genus
+Short chain Streptococcus genus
+Dermatitis budding bacteria
+Coarse ball spore fungus
+Posadas spore forming bacteria
+Capsular tissue cytoplasmic bacteria
+Histoplasma bacteria and other pathogenic diseases
+Brazilian Azospirillum
+Other pathogenic diseases of the genus Ascomycota
+Cladosporium genus
+Rhizopus genus
+Alternaria alternata
+Infecting Alternaria
+Other pathogenic diseases of the genus Alternaria
+Scale mold genus
+Genus Fusarium
+Arthrobacter genus
+Aspergillus flavus complex
+Aspergillus fumigatus complex group
+Aspergillus terreus
+Short stem mold genus
+Solid spore frog manure mold
+Frog manure mold belongs to other pathogenic diseases
+Beauveria genus
+candida dubliniensis
+Smooth Candida complex
+Ji Yemeng Candida complex
+Ximulong Candida complex
+Candida krusei
+Near smooth Candida complex
+Tropical Candida
+Candida auricula
+Candida and other pathogenic diseases
+Cephalosporin genus
+Genus Trichoderma
+Golden spore fungus genus
+Curly mold genus
+Botrytis cinerea
+Other pathogenic diseases of Aspergillus genus
+Saccharomyces genus
+Trichoderma genus
+Crown ear mold
+Conidiobolus incongruus
+Ear mold belongs to other pathogenic diseases
+False black powdery mildew genus
+Kashi cola rod mold
+Other pathogenic diseases of Colletotrichum genus
+Gert Cryptococcus complex
+Cryptococcus neoformans complex
+Cryptococcus and other pathogenic diseases
+Cunninghamella bertholletiae
+Xiaoke Yinhan mold belongs to other pathogenic diseases
+Hawaiian curved fungus
+Babendorf's curved fungus
+Suiform curved fungus
+Curvularia genus
+Corydalis genus
+Interstitial shell genus
+The genus Bifidobacterium
+Aemonas genus
+Trichophyton flocs
+Dermatitis external bottle mold
+Zhen's external bottle mold complex group
+Spinous external bottle mold
+External bottle mold belongs to other pathogenic diseases
+Magnum's navel mold
+Beaked navel mold
+Monofer coloring mold
+Pei's coloring mold
+Nubica coloring mold
+Other pathogenic diseases of the genus Trichoderma
+Fusarium oxysporum complex
+Fusarium complex of eggplant disease
+Other pathogenic diseases of Fusarium genus
+Geotrichum genus
+Genus Mucomycota
+Venetobacter baumannii
+Half new pillar top spore
+Lasiodiplodia theobromae
+Umbrella branch transverse stem mold
+Multi branch transverse stem mold
+Other pathogenic diseases of Streptomyces genus
+Polyphenophore Spore
+Gray Madura fungus
+Podomycosis Madura bacteria
+Madura bacteria and other pathogenic diseases
+Malassezia furfur
+Spherical Malassezia
+Malassezia and other pathogenic diseases
+Microsporidia canis
+Rust colored microsporidia
+Other pathogenic diseases of the genus Microsporidia
+Aspergillus genus
+Fusarium complex group
+Irregular mold
+Mucor racemosa
+Other pathogenic diseases of Mucor genus
+Gypsum Neisseria
+Neosatobacter genus
+New genus of balanoposthitis
+Black spore fungus genus
+Ochromycetes genus
+Xufang yeast genus
+Wan's Penicillium
+Dark colored Cladosporium genus
+Dark colored Cyclosporidium genus
+Pingge bacteria genus
+Single spore bottle mold genus
+American bottle mold
+Verrucous bottle mold
+Bottle mold belongs to other pathogenic diseases
+Stem point mold genus
+Hedermann nodule fungus
+The genus of crooked mouth shell
+Wickham without green algae
+Zufei no green algae
+No other pathogenic diseases of the green algae genus
+Conomycota genus
+Rhizopus microsporus
+rhizopus arrhizus
+Rhizopus and other pathogenic diseases
+Red yeast genus
+Broomycota genus
+Sharp tip Sedosporium
+Other pathogenic diseases of the genus Zygomycota
+Schizophyllum genus
+Short broom mold
+Other pathogenic diseases of the broom mold genus
+Spheroidal sporophytes
+Schenker Sporothrix fungus
+Sporothrix bacteria and other pathogenic diseases
+Copium genus
+Marlini's basket shaped bacteria
+Trichophyton complex
+Red Trichophyton complex
+Trichophyton schoenleinii
+Trichophyton interruptus
+Purple Trichophyton
+Trichophyton genus and other pathogenic diseases
+trichosporon asahii
+Other pathogenic diseases of the genus Trichosporum
+Wheat stalk mold genus
+Monogramma genus
+Viranthus genus
+Verrucosporium genus
+Ameba
+Wuchereria bancrofti
+Hydatid
+Whipworm
+Lung fluke
+Liver fluke
+Toxoplasma
+Hookworm
+Ascaris
+Giardia
+Scabies
+Pinworm
+Malaria
+Plasmodium
+Filarial worm
+Taenia
+Microsporidia
+Schistosome
+Cryptosporidium
+Porcine tapeworm
+Q hot
+Ebola hemorrhagic fever
+Bacillus subtilis
+Brucella bacteria
+Actinomycetes
+Eperythrocytic disease
+Para tuberculosis
+tox
+Leptospirasis
+Echinococcosis
+tuberculosis
+Old World spiral maggot disease
+Crimean Congo hemorrhagic fever
+foot-and-mouth disease
+rabies
+Pseudomallei
+Rift Valley fever
+Nipah's disease
+Japanese encephalitis
+Schmallenberg disease
+Vesicular stomatitis
+anthrax
+Pseudorabies
+Siniro fever
+Heart water disease
+New World spiral maggot disease
+Clostridium perfringens infections
+Infection with Trichinella spp
+Tularemia
+Trypanosoma Evansi infection
+Leishmaniasis
+Infection with epizootic haemorrhagicdis-ease
+Filariasis
+Staphylococcosis
+Schistosomiasis
+Nipah virus Encephalitis
+Rotavirus infection
+Clostridum Perfringens
+Salmonella disease
+Listeriosis
+Hemolytic brucellosis
+Mycoplasma disease
+Chlamydia disease
+Eastern schistosomiasis
+Clonorchiasis sinensis
+Cysticercosis
+Fasciola hepatica
+Blood Spear Nematode Disease
+Cryptosporidiosis
+Akabane disease
+Hemorrhagic sepsis
+Ibaraki disease
+Bovine leukemia
+Bovine viral diarrhea
+Bovine infectious rhinotracheitis
+Bovine contagious pleuropneumonia
+malignant catarrhal fever
+Bovine spongiform encephalopathy
+Bovine nodular dermatitis
+Cattle popularity trend
+Bovine hypodermatid myiasis
+Bovine non plasma disease
+Bovine mucosal disease
+Zhongshan disease
+Infectious bo-vine rhinotracheitis/Infectious pustular vulvovaginitis
+Bovine genital campylobacteriosis
+Bovine viral diarrhoea/Mucosal disease
+Bovine babesiosis
+Theileriosis
+Trichomonosis
+Dermatophilosis
+Local epidemic bovine leukemia
+Bovine coronavirus infection
+Bovine pear shaped insect disease
+African horse plague
+Hendra's disease
+Ulcerative lymphangitis
+Equine glanders
+Equine disease toxic arteritis
+Equine infectious anemia
+Equine infectious uterine inflammation
+equine paratyphoid
+Horse mating disease
+Equine influenza
+Equine epidemic lymphangitis
+Horse gland disease
+Venezuelan equine encephalomyelitis
+Infection with equid herpesvirus-1
+Equine encephalomyelitis (East-ern and Western)
+Horse flu
+Equine nosed pneumonia
+equine piroplasmosis
+african swine fever
+Seneca virus disease
+Porcine infectious gastroenteritis
+porcine contagious pleuropneumonia
+Pig erysipelas
+Porcine Reproductive and Respiratory Syndrome
+Porcine paratyphoid fever
+Porcine Epidemic Diarrhea
+Swine influenza
+Swine dysentery
+Porcine vesicular disease
+Porcine Tetreovirus induced encephalomyelitis
+Atrophic rhinitis in pigs
+swine fever
+Mycoplasma hyopneumoniae pneumonia in pigs
+Porcine parvovirus infection
+Swine streptococosis
+Porcine circovirus infection
+Glaesser’s disease（Haemoph-ilus parasuis）
+Infection with Taenia solium(Porcine cysticercosis)
+Porcine deltacorona virus（PDCoV）
+Porcine brucellosis
+Porcine Circovirus Disease
+Glaser's disease
+swine flu
+Porcine Coronavirus Infection
+Porcine Seneca virus infection
+Piglet dysentery
+Porcine dysentery
+Porcine proliferative intestinal disease
+Infectious rhinitis
+Infectious bursal disease
+Low pathogenic avian influenza
+Highly pathogenic avian influenza
+turkey rhinotracheitis
+Chicken white diarrhea
+Chicken viral arthritis
+Chicken egg production decline syndrome
+Infectious laryngotracheitis in chickens
+Infectious bronchitis in chickens
+Marek's disease
+Avian leukemia
+Avian infectious encephalomyelitis
+Avian pox
+Avian paratyphoid fever
+Avian spirochete disease
+Avian typhoid fever
+Avian nephritis
+Avian reticuloendothelial hyperplasia
+avian chlamydiosis
+Avian mycoplasmosis
+Newcastle disease
+Duck viral hepatitis
+Leucocytozoonosis
+Goose parvovirus infection
+Duck virus enteritis
+Avian coccidiosis
+Riemerella anatipestifer infection
+Duck plague
+Gosling plague
+Avian Infectious Laryngotracheitis
+avian infectious bronchitis
+Marek’s Disease
+egg drop syndrome
+Duck serositis
+Avian reticuloendothelial tissue proliferation disease
+Chicken infectious rhinitis
+Infection with avian Tembusu virus
+Avian adenovirus infection
+Chicken infectious anemia
+Infection of avian influenza virus
+Chicken red mite disease
+necrotic enteritis
+Duck reovirus infection
+Boundary disease
+Infectious azoospermia
+Caseous lymphadenitis
+Blue tongue disease
+Medi Visna disease
+enzootic abortion of ewes
+Sheep pox and goat pox
+Nairobi sheep disease
+Contagious pleuropneumonia in goats
+Goat encephalitis
+Small ruminant plague
+Sheep infectious pustular dermatitis
+ovine pulmonary adenomatosis
+Itchy disease
+Caprine arthritis/encephalitis
+Salmonellosis(S.abortusovis)
+Sheep lung adenomatous disease
+Sheep pear shaped worm disease
+Sheep without plasma disease
+Crayfish plague
+Vitiligo syndrome
+Spotted catfish viral disease
+Viral hemorrhagic sepsis
+Viral neuronecrosis disease
+Infectious muscle necrosis disease
+Infectious subcutaneous and hematopoietic organ necrosis disease
+Infectious splenic and renal necrosis disease
+Infectious Hematopoietic Organ Necrosis
+Bacterial sepsis in freshwater fish
+Salmon infectious anemia
+Necrotizing liver pancreatitis
+Huangtou disease
+Catfish intestinal sepsis
+Acute liver and pancreas necrosis
+Koi herpesvirus disease
+Carp spring viremia
+Carp edema virus disease
+Epidemic Ulcer Syndrome
+epizootic haematopoietic necrosis
+Tilapia Lake Virus Disease
+White tail disease
+Taura syndrome
+Bacterial nephropathy
+Red snapper rainbow virus disease
+Infection with Gyrodactylus Salaris
+Infection with abalone herpesvirus
+Infection with Bonamia Ostreae
+Infection with Bonamia Exitiosa
+Infection with Marteilia Refringens
+Infection with Perkinsus Olseni
+Infection with Perkinsus Marinus
+Infection with Xenohaliotis Californiensis
+Infection with Batrachochytrium Dendrobatidis
+Infection with Ranavirus species
+Anisakiasis
+Cryptocaryoniasis
+Edwardsiellasis
+Fish streptococcosis
+Chryseobacterium meningsepticum of frog (Rana spp)
+Infection with salmonid alphavirus
+Infection with Batrachochytrium salamandrivorans
+Infection with Decapod iridescent virus 1
+Grass carp hemorrhagic disease
+Necrosis of hematopoietic organs in crucian carp
+Carp float disease
+Shrimp liver intestinal worm disease
+schistosomiasis japonica
+Infectious pancreatic necrosis disease
+Paralichthys olivaceus virus disease
+Fish Edwardellosis
+Streptococcal disease
+Salmon killing Aeromonas disease
+Small melon worm disease
+Myxosporidiosis
+Third generation insect disease
+Ringworm disease
+Crab snail pathogen disease
+Bao herpesvirus disease
+Oyster herpesvirus disease
+Beehive Beetle
+american foul brood
+Bee chalky disease
+Bee shield mite disease
+Honey bee bright heat mite disease
+Bee mite disease
+european foul brood
+Small hive beetle infestation(Aethina tumida)
+Nosemosis of honey bees
+Bombyx mori polyhedrosis
+Bright and hot mite disease
+chalkbrood
+white muscardine
+Silkworm microsporidia
+Rabbit hemorrhagic disease
+Rabbit myxomatosis
+Rabbit coccidiosis
+Rabbit brucellosis
+Feline panleukopenia
+Canine infectious hepatitis
+canine distemper
+Canine parvovirus infection
+Canine parvovirus disease
+Cat cupping virus infection
+Feline infectious peritonitis
+canine babesiosis
+Amphibian frog iridovirus disease
+Turtle parotitis disease
+Frog meningitis sepsis
+Monkey viral immunodeficiency syndrome
+Monkeypox
+Lymphocytic choroidal meningitis
+Chronic wasting disease
+Camel pox
+Marburg Hemorrhagic Fever
+Rat pox
+Mink Aleutian disease
+Mink viral enteritis
+Mouse hepatitis
+Cercopithecine Herpesvirus Type I(B virus）infectious diseases
+Sendai virus infectious disease
+Infectious subcutaneous and hematopoietic tissue necrosis disease
+Acute Hepatopancreatic Necrosis
diff --git a/original_captcha.png b/original_captcha.png
new file mode 100644
index 0000000..6a588a1
Binary files /dev/null and b/original_captcha.png differ
diff --git a/pom.xml b/pom.xml
new file mode 100644
index 0000000..12f0a88
--- /dev/null
+++ b/pom.xml
@@ -0,0 +1,150 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+    <groupId>com.example</groupId>
+    <artifactId>es-crawler</artifactId>
+    <version>1.0-SNAPSHOT</version>
+
+    <properties>
+        <maven.compiler.source>8</maven.compiler.source>
+        <maven.compiler.target>8</maven.compiler.target>
+    </properties>
+
+    <dependencies>
+        <!-- Elasticsearch High Level REST Client -->
+        <dependency>
+            <groupId>org.elasticsearch.client</groupId>
+            <artifactId>elasticsearch-rest-high-level-client</artifactId>
+            <version>7.17.0</version>
+        </dependency>
+
+        <dependency>
+            <groupId>co.elastic.clients</groupId>
+            <artifactId>elasticsearch-java</artifactId>
+            <version>7.17.15</version>
+        </dependency>
+        <dependency>
+            <groupId>com.fasterxml.jackson.core</groupId>
+            <artifactId>jackson-databind</artifactId>
+            <version>2.15.0</version>
+        </dependency>
+
+        <!-- Jsoup HTML parser -->
+        <dependency>
+            <groupId>org.jsoup</groupId>
+            <artifactId>jsoup</artifactId>
+            <version>1.17.2</version>
+        </dependency>
+
+        <!-- OkHttp -->
+        <dependency>
+            <groupId>com.squareup.okhttp3</groupId>
+            <artifactId>okhttp</artifactId>
+            <version>4.9.3</version>
+        </dependency>
+
+        <!-- Logging -->
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-api</artifactId>
+            <version>1.7.36</version>
+        </dependency>
+        <dependency>
+            <groupId>ch.qos.logback</groupId>
+            <artifactId>logback-classic</artifactId>
+            <version>1.2.11</version>
+        </dependency>
+
+        <!-- Kafka 客户端 -->
+        <dependency>
+            <groupId>org.apache.kafka</groupId>
+            <artifactId>kafka-clients</artifactId>
+            <version>3.9.0</version>
+        </dependency>
+
+        <!-- Selenium Java -->
+        <dependency>
+            <groupId>org.seleniumhq.selenium</groupId>
+            <artifactId>selenium-java</artifactId>
+            <version>4.10.0</version>
+        </dependency>
+
+        <!-- WebDriver Manager -->
+        <dependency>
+            <groupId>io.github.bonigarcia</groupId>
+            <artifactId>webdrivermanager</artifactId>
+            <version>5.6.2</version>
+        </dependency>
+
+        <dependency>
+            <groupId>org.json</groupId>
+            <artifactId>json</artifactId>
+            <version>20230227</version>
+        </dependency>
+
+        <dependency>
+            <groupId>com.google.code.gson</groupId>
+            <artifactId>gson</artifactId>
+            <version>2.10.1</version>
+        </dependency>
+
+        <dependency>
+            <groupId>net.sourceforge.htmlunit</groupId>
+            <artifactId>htmlunit</artifactId>
+            <version>2.61.0</version>
+        </dependency>
+
+        <dependency>
+            <groupId>net.sourceforge.tess4j</groupId>
+            <artifactId>tess4j</artifactId>
+            <version>4.5.4</version>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.httpcomponents.client5</groupId>
+            <artifactId>httpclient5</artifactId>
+            <version>5.3.1</version>
+        </dependency>
+    </dependencies>
+
+    <build>
+        <plugins>
+            <!-- 编译插件，保持 Java 8 配置 -->
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-compiler-plugin</artifactId>
+                <version>3.8.1</version>
+                <configuration>
+                    <source>8</source>
+                    <target>8</target>
+                </configuration>
+            </plugin>
+            <!-- Assembly 插件，打包包含依赖的可执行 JAR -->
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-assembly-plugin</artifactId>
+                <version>3.3.0</version>
+                <configuration>
+                    <archive>
+                        <manifest>
+                            <mainClass>com.example.CtriScraper</mainClass> <!-- 替换为你的主类全路径 -->
+                        </manifest>
+                    </archive>
+                    <descriptorRefs>
+                        <descriptorRef>jar-with-dependencies</descriptorRef>
+                    </descriptorRefs>
+                </configuration>
+                <executions>
+                    <execution>
+                        <id>make-assembly</id>
+                        <phase>package</phase>
+                        <goals>
+                            <goal>single</goal>
+                        </goals>
+                    </execution>
+                </executions>
+            </plugin>
+        </plugins>
+    </build>
+</project>
\ No newline at end of file
diff --git a/preprocessed_captcha.png b/preprocessed_captcha.png
new file mode 100644
index 0000000..20329de
Binary files /dev/null and b/preprocessed_captcha.png differ
diff --git a/processed_urls.txt b/processed_urls.txt
new file mode 100644
index 0000000..f862ec3
--- /dev/null
+++ b/processed_urls.txt
@@ -0,0 +1,281 @@
+
+https://www.zyctd.com/zixun/201/1055143.html
+https://www.zyctd.com/zixun/201/861786.html
+https://www.zyctd.com/zixun/201/1053482.html
+https://www.zyctd.com/zixun/201/269419.html
+https://www.zyctd.com/zixun/201/1053149.html
+https://www.zyctd.com/zixun/201/1023926.html
+https://www.zyctd.com/zixun/201/435325.html
+https://www.zyctd.com/zixun/201/1050302.html
+https://www.zyctd.com/zixun/201/880441.html
+https://www.zyctd.com/zixun/201/1019635.html
+https://www.zyctd.com/zixun/201/970572.html
+https://www.zyctd.com/zixun/201/912277.html
+https://www.zyctd.com/zixun/201/372444.html
+https://www.zyctd.com/zixun/201/1073629.html
+https://www.zyctd.com/zixun/201/1069386.html
+https://www.zyctd.com/zixun/201/730410.html
+https://www.zyctd.com/zixun/201/953220.html
+https://www.zyctd.com/zixun/201/1074339.html
+https://www.zyctd.com/zixun/201/1072317.html
+https://www.zyctd.com/zixun/201/294794.html
+https://www.zyctd.com/zixun/201/267592.html
+https://www.zyctd.com/zixun/201/979665.html
+https://www.zyctd.com/zixun/201/869885.html
+https://www.zyctd.com/zixun/201/1054064.html
+https://www.zyctd.com/zixun/201/1049331.html
+https://www.zyctd.com/zixun/201/442647.html
+https://www.zyctd.com/zixun/201/285992.html
+https://www.zyctd.com/zixun/201/1037972.html
+https://www.zyctd.com/zixun/201/799801.html
+https://www.zyctd.com/zixun/201/916078.html
+https://www.zyctd.com/zixun/201/456647.html
+https://www.zyctd.com/zixun/201/812121.html
+https://www.zyctd.com/zixun/201/1042740.html
+https://www.zyctd.com/zixun/201/1042708.html
+https://www.zyctd.com/zixun/201/840450.html
+https://www.zyctd.com/zixun/201/320749.html
+https://www.zyctd.com/zixun/201/496106.html
+https://www.zyctd.com/zixun/201/850201.html
+https://www.zyctd.com/zixun/201/277145.html
+https://www.zyctd.com/zixun/201/299091.html
+https://www.zyctd.com/zixun/201/266080.html
+https://www.zyctd.com/zixun/201/1051925.html
+https://www.zyctd.com/zixun/201/898081.html
+https://www.zyctd.com/zixun/201/873280.html
+https://www.zyctd.com/zixun/201/703880.html
+https://www.zyctd.com/zixun/201/873126.html
+https://www.zyctd.com/zixun/201/887931.html
+https://www.zyctd.com/zixun/201/432742.html
+https://www.zyctd.com/zixun/201/1040431.html
+https://www.zyctd.com/zixun/201/1040223.html
+https://www.zyctd.com/zixun/201/858118.html
+https://www.zyctd.com/zixun/201/971286.html
+https://www.zyctd.com/zixun/201/458488.html
+https://www.zyctd.com/zixun/201/1079381.html
+https://www.zyctd.com/zixun/201/263578.html
+https://www.zyctd.com/zixun/201/553513.html
+https://www.zyctd.com/zixun/201/286229.html
+https://www.zyctd.com/zixun/201/285365.html
+https://www.zyctd.com/zixun/201/352921.html
+https://www.zyctd.com/zixun/201/503267.html
+https://www.zyctd.com/zixun/201/391337.html
+https://www.zyctd.com/zixun/201/813052.html
+https://www.zyctd.com/zixun/201/1053556.html
+https://www.zyctd.com/zixun/201/1041197.html
+https://www.zyctd.com/zixun/201/287420.html
+https://www.zyctd.com/zixun/201/291563.html
+https://www.zyctd.com/zixun/201/948250.html
+https://www.zyctd.com/zixun/201/289034.html
+https://www.zyctd.com/zixun/201/795965.html
+https://www.zyctd.com/zixun/201/292962.html
+https://www.zyctd.com/zixun/201/975850.html
+https://www.zyctd.com/zixun/201/275335.html
+https://www.zyctd.com/zixun/201/1031992.html
+https://www.zyctd.com/zixun/201/1033886.html
+https://www.zyctd.com/zixun/201/999510.html
+https://www.zyctd.com/zixun/201/270144.html
+https://www.zyctd.com/zixun/201/1055519.html
+https://www.zyctd.com/zixun/201/272205.html
+https://www.zyctd.com/zixun/201/526059.html
+https://www.zyctd.com/zixun/201/456640.html
+https://www.zyctd.com/zixun/201/267952.html
+https://www.zyctd.com/zixun/201/803469.html
+https://www.zyctd.com/zixun/201/270763.html
+https://www.zyctd.com/zixun/201/1072987.html
+https://www.zyctd.com/zixun/201/265176.html
+https://www.zyctd.com/zixun/201/1022141.html
+https://www.zyctd.com/zixun/201/290173.html
+https://www.zyctd.com/zixun/201/269175.html
+https://www.zyctd.com/zixun/201/744991.html
+https://www.zyctd.com/zixun/201/1019131.html
+https://www.zyctd.com/zixun/201/717054.html
+https://www.zyctd.com/zixun/201/517358.html
+https://www.zyctd.com/zixun/201/1058505.html
+https://www.zyctd.com/zixun/201/905515.html
+https://www.zyctd.com/zixun/201/287395.html
+https://www.zyctd.com/zixun/201/934873.html
+https://www.zyctd.com/zixun/201/1051317.html
+https://www.zyctd.com/zixun/201/926018.html
+https://www.zyctd.com/zixun/201/334511.html
+https://www.zyctd.com/zixun/201/845896.html
+https://www.zyctd.com/zixun/201/587785.html
+https://www.zyctd.com/zixun/201/288376.html
+https://www.zyctd.com/zixun/201/851405.html
+https://www.zyctd.com/zixun/201/941404.html
+https://www.zyctd.com/zixun/201/881855.html
+https://www.zyctd.com/zixun/201/602632.html
+https://www.zyctd.com/zixun/201/293601.html
+https://www.zyctd.com/zixun/201/541809.html
+https://www.zyctd.com/zixun/201/335120.html
+https://www.zyctd.com/zixun/201/1031137.html
+https://www.zyctd.com/zixun/201/960101.html
+https://www.zyctd.com/zixun/201/1077142.html
+https://www.zyctd.com/zixun/201/1063222.html
+https://www.zyctd.com/zixun/201/681466.html
+https://www.zyctd.com/zixun/201/1031130.html
+https://www.zyctd.com/zixun/201/1073734.html
+https://www.zyctd.com/zixun/201/1062186.html
+https://www.zyctd.com/zixun/201/1046628.html
+https://www.zyctd.com/zixun/201/358892.html
+https://www.zyctd.com/zixun/201/285361.html
+https://www.zyctd.com/zixun/201/1059889.html
+https://www.zyctd.com/zixun/201/297824.html
+https://www.zyctd.com/zixun/201/844307.html
+https://www.zyctd.com/zixun/201/900524.html
+https://www.zyctd.com/zixun/201/1057636.html
+https://www.zyctd.com/zixun/201/1010080.html
+https://www.zyctd.com/zixun/201/409152.html
+https://www.zyctd.com/zixun/201/402782.html
+https://www.zyctd.com/zixun/201/770296.html
+https://www.zyctd.com/zixun/201/1040602.html
+https://www.zyctd.com/zixun/201/606503.html
+https://www.zyctd.com/zixun/201/784471.html
+https://www.zyctd.com/zixun/201/466097.html
+https://www.zyctd.com/zixun/201/1071160.html
+https://www.zyctd.com/zixun/201/623226.html
+https://www.zyctd.com/zixun/201/948264.html
+https://www.zyctd.com/zixun/201/293462.html
+https://www.zyctd.com/zixun/201/829348.html
+https://www.zyctd.com/zixun/201/332369.html
+https://www.zyctd.com/zixun/201/907461.html
+https://www.zyctd.com/zixun/201/756555.html
+https://www.zyctd.com/zixun/201/717915.html
+https://www.zyctd.com/zixun/201/262203.html
+https://www.zyctd.com/zixun/201/1055787.html
+https://www.zyctd.com/zixun/201/432336.html
+https://www.zyctd.com/zixun/201/907489.html
+https://www.zyctd.com/zixun/201/1014686.html
+https://www.zyctd.com/zixun/201/1053320.html
+https://www.zyctd.com/zixun/201/480020.html
+https://www.zyctd.com/zixun/201/287423.html
+https://www.zyctd.com/zixun/201/385289.html
+https://www.zyctd.com/zixun/201/1030421.html
+https://www.zyctd.com/zixun/201/527648.html
+https://www.zyctd.com/zixun/201/972959.html
+https://www.zyctd.com/zixun/201/408767.html
+https://www.zyctd.com/zixun/201/724887.html
+https://www.zyctd.com/zixun/201/291480.html
+https://www.zyctd.com/zixun/201/472544.html
+https://www.zyctd.com/zixun/201/724873.html
+https://www.zyctd.com/zixun/201/281751.html
+https://www.zyctd.com/zixun/201/1049693.html
+https://www.zyctd.com/zixun/201/869619.html
+https://www.zyctd.com/zixun/201/355497.html
+https://www.zyctd.com/zixun/201/341623.html
+https://www.zyctd.com/zixun/201/450753.html
+https://www.zyctd.com/zixun/201/1065837.html
+https://www.zyctd.com/zixun/201/1031331.html
+https://www.zyctd.com/zixun/201/669727.html
+https://www.zyctd.com/zixun/201/1034010.html
+https://www.zyctd.com/zixun/201/1054058.html
+https://www.zyctd.com/zixun/201/954613.html
+https://www.zyctd.com/zixun/201/715584.html
+https://www.zyctd.com/zixun/201/1051110.html
+https://www.zyctd.com/zixun/201/269963.html
+https://www.zyctd.com/zixun/201/1048128.html
+https://www.zyctd.com/zixun/201/793207.html
+https://www.zyctd.com/zixun/201/284310.html
+https://www.zyctd.com/zixun/201/282639.html
+https://www.zyctd.com/zixun/201/1068138.html
+https://www.zyctd.com/zixun/201/340678.html
+https://www.zyctd.com/zixun/201/294371.html
+https://www.zyctd.com/zixun/201/324277.html
+https://www.zyctd.com/zixun/201/1048931.html
+https://www.zyctd.com/zixun/201/851398.html
+https://www.zyctd.com/zixun/201/263527.html
+https://www.zyctd.com/zixun/201/919480.html
+https://www.zyctd.com/zixun/201/685442.html
+https://www.zyctd.com/zixun/201/428325.html
+https://www.zyctd.com/zixun/201/1032698.html
+https://www.zyctd.com/zixun/201/1003367.html
+https://www.zyctd.com/zixun/201/852315.html
+https://www.zyctd.com/zixun/201/283156.html
+https://www.zyctd.com/zixun/201/262484.html
+https://www.zyctd.com/zixun/201/1065225.html
+https://www.zyctd.com/zixun/201/763331.html
+https://www.zyctd.com/zixun/201/1066158.html
+https://www.zyctd.com/zixun/201/1047744.html
+https://www.zyctd.com/zixun/201/842795.html
+https://www.zyctd.com/zixun/201/975374.html
+https://www.zyctd.com/zixun/201/1055865.html
+https://www.zyctd.com/zixun/201/1017367.html
+https://www.zyctd.com/zixun/201/1057711.html
+https://www.zyctd.com/zixun/201/1074295.html
+https://www.zyctd.com/zixun/201/283647.html
+https://www.zyctd.com/zixun/201/286896.html
+https://www.zyctd.com/zixun/201/1043393.html
+https://www.zyctd.com/zixun/201/305888.html
+https://www.zyctd.com/zixun/201/487258.html
+https://www.zyctd.com/zixun/201/1045652.html
+https://www.zyctd.com/zixun/201/1064905.html
+https://www.zyctd.com/zixun/201/515636.html
+https://www.zyctd.com/zixun/201/1038609.html
+https://www.zyctd.com/zixun/201/438083.html
+https://www.zyctd.com/zixun/201/297327.html
+https://www.zyctd.com/zixun/201/773537.html
+https://www.zyctd.com/zixun/201/1043589.html
+https://www.zyctd.com/zixun/201/815712.html
+https://www.zyctd.com/zixun/201/698595.html
+https://www.zyctd.com/zixun/201/269800.html
+https://www.zyctd.com/zixun/201/1030332.html
+https://www.zyctd.com/zixun/201/422676.html
+https://www.zyctd.com/zixun/201/290130.html
+https://www.zyctd.com/zixun/201/270359.html
+https://www.zyctd.com/zixun/201/995604.html
+https://www.zyctd.com/zixun/201/1074993.html
+https://www.zyctd.com/zixun/201/1054825.html
+https://www.zyctd.com/zixun/201/918577.html
+https://www.zyctd.com/zixun/201/686527.html
+https://www.zyctd.com/zixun/201/297509.html
+https://www.zyctd.com/zixun/201/622708.html
+https://www.zyctd.com/zixun/201/469870.html
+https://www.zyctd.com/zixun/201/844328.html
+https://www.zyctd.com/zixun/201/394508.html
+https://www.zyctd.com/zixun/201/271744.html
+https://www.zyctd.com/zixun/201/1054940.html
+https://www.zyctd.com/zixun/201/732818.html
+https://www.zyctd.com/zixun/201/1049547.html
+https://www.zyctd.com/zixun/201/1059684.html
+https://www.zyctd.com/zixun/201/1055301.html
+https://www.zyctd.com/zixun/201/962068.html
+https://www.zyctd.com/zixun/201/451355.html
+https://www.zyctd.com/zixun/201/1056174.html
+https://www.zyctd.com/zixun/201/930540.html
+https://www.zyctd.com/zixun/201/871656.html
+https://www.zyctd.com/zixun/201/363246.html
+https://www.zyctd.com/zixun/201/845672.html
+https://www.zyctd.com/zixun/201/452965.html
+https://www.zyctd.com/zixun/201/1065920.html
+https://www.zyctd.com/zixun/201/1058808.html
+https://www.zyctd.com/zixun/201/986868.html
+https://www.zyctd.com/zixun/201/489785.html
+https://www.zyctd.com/zixun/201/307946.html
+https://www.zyctd.com/zixun/201/833359.html
+https://www.zyctd.com/zixun/201/806969.html
+https://www.zyctd.com/zixun/201/1050812.html
+https://www.zyctd.com/zixun/201/1033696.html
+https://www.zyctd.com/zixun/201/501167.html
+https://www.zyctd.com/zixun/201/1078919.html
+https://www.zyctd.com/zixun/201/1036495.html
+https://www.zyctd.com/zixun/201/1008736.html
+https://www.zyctd.com/zixun/201/1054264.html
+https://www.zyctd.com/zixun/201/493152.html
+https://www.zyctd.com/zixun/201/685456.html
+https://www.zyctd.com/zixun/201/995597.html
+https://www.zyctd.com/zixun/201/905501.html
+https://www.zyctd.com/zixun/201/347573.html
+https://www.zyctd.com/zixun/201/1045494.html
+https://www.zyctd.com/zixun/201/549775.html
+https://www.zyctd.com/zixun/201/1037336.html
+https://www.zyctd.com/zixun/201/1034972.html
+https://www.zyctd.com/zixun/201/653046.html
+https://www.zyctd.com/zixun/201/316612.html
+https://www.zyctd.com/zixun/201/447064.html
+https://www.zyctd.com/zixun/201/307603.html
+https://www.zyctd.com/zixun/201/263437.html
+https://www.zyctd.com/zixun/201/894490.html
+https://www.zyctd.com/zixun/201/368629.html
+https://www.zyctd.com/zixun/201/273285.html
+https://www.zyctd.com/zixun/201/1059618.html
+https://www.zyctd.com/zixun/201/459237.html
diff --git a/proxy.txt b/proxy.txt
new file mode 100644
index 0000000..199a16c
--- /dev/null
+++ b/proxy.txt
@@ -0,0 +1 @@
+127.0.0.1:7897
\ No newline at end of file
diff --git a/src/main/java/com/example/AusContent.java b/src/main/java/com/example/AusContent.java
new file mode 100644
index 0000000..f71c2d8
--- /dev/null
+++ b/src/main/java/com/example/AusContent.java
@@ -0,0 +1,119 @@
+package com.example;
+
+import okhttp3.*;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+
+import java.io.IOException;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.time.LocalDateTime;
+import java.time.format.DateTimeFormatter;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.Map;
+
+public class AusContent {
+    public static void main(String[] args) throws IOException {
+        OkHttpClient client = new OkHttpClient().newBuilder()
+                .build();
+        MediaType mediaType = MediaType.parse("text/plain");
+        RequestBody body = RequestBody.create(mediaType, "");
+        Request request = new Request.Builder()
+                .url("https://www.anzctr.org.auTrial/Registration/TrialReview.aspx?id=389345&isReview=true")
+                .get()
+                .build();
+        Response response = client.newCall(request).execute();
+        String html = response.body().string();
+        Document parse = Jsoup.parse(html);
+        String title = parse.select("#ctl00_body_CXSTUDYTITLE").text();
+        String registNum = parse.select("#ctl00_body_CXACTRNUMBER").text();
+        String registTime = convertDate(parse.select("#ctl00_body_CXAPPROVALDATE").text());
+        String sponsor = parse.select("#ctl00_body_repeater_TXFUNDINGSOURCE_ctl00_CXTYPE").text();
+        String studyType = parse.select("#ctl00_body_CXSTUDYTYPE").text();
+        String phase = parse.select("#ctl00_body_CXPHASE").text();
+        String disease = parse.select("#ctl00_body_repeater_TXHEALTHCONDITION_ctl00_CXHEALTHCONDITION").text();
+        String SD1 = parse.select("#ctl00_body_CXPURPOSE").text();
+        String SD2 = parse.select("#ctl00_body_CXALLOCATION").text();
+        String SD3 = parse.select("#ctl00_body_CXCONCEALMENT").text();
+        String SD4 = parse.select("#ctl00_body_CXSEQUENCE").text();
+        String SD5 = parse.select("#ctl00_body_CXMASKING").text();
+        String SD6 = parse.select("#ctl00_body_maskingdiv > div > div.review-element-content").text();
+        String SD7 = parse.select("#ctl00_body_CXASSIGNMENT").text();
+        String SD8 = parse.select("#ctl00_body_CXPHASE").text();
+        String SD9 = parse.select("#ctl00_body_CXENDPOINT").text();
+        String SD10 = parse.select("#ctl00_body_CXSTATISTICALMETHODS").text();
+        String SD11 = parse.select("#ctl00_body_interventional_div > div:nth-child(8) > div > div.review-element-content").text();
+        String studyObjective = parse.select("#ctl00_body_CXPURPOSE").text();
+        String inclusionCriteria = parse.select("#ctl00_body_CXINCLUSIVECRITERIA").text();
+        String exclusionCriteria = parse.select("#ctl00_body_CXEXCLUSIVECRITERIA").text();
+        String currentStatus = parse.select("#ctl00_body_CXRECRUITMENTSTATUS").text();
+        String enrollment = parse.select("#ctl00_body_CXSAMPLESIZE").text();
+        String country = parse.select("#ctl00_body_repeater_TXCOUNTRYOUTSIDEAUSTRALIA_ctl01_CXCOUNTRY").text();
+        String intervention = parse.select("#ctl00_body_trialDiv > div:nth-child(30) > div > div.review-element-content").text();
+        Map<String,Object> studyDesign = new HashMap<>();
+        studyDesign.put("Purpose of the study",SD1);
+        studyDesign.put("Allocation to intervention",SD2);
+        studyDesign.put("Procedure for enrolling a subject and allocating the treatment (allocation concealment procedures)",SD3);
+        studyDesign.put("Methods used to generate the sequence in which subjects will be randomised (sequence generation)",SD4);
+        studyDesign.put("Masking / blinding",SD5);
+        studyDesign.put("Who is / are masked / blinded?",SD6);
+        studyDesign.put("Intervention assignment",SD7);
+        studyDesign.put("Other design features",SD11);
+        studyDesign.put("Phase",SD8);
+        studyDesign.put("Type of endpoint/s",SD9);
+        studyDesign.put("Statistical methods / analysis",SD10);
+        Map<String,Object> resultData = new HashMap<>();
+        resultData.put("title",title);
+        resultData.put("registNum",registNum);
+        resultData.put("registTime",registTime);
+        resultData.put("registStatus","");
+        resultData.put("registTitle","");
+        resultData.put("fullTitle","");
+        resultData.put("sponsor",sponsor);
+        resultData.put("sponsorPart","");
+        resultData.put("studyType",studyType);
+        resultData.put("phase",phase);
+        resultData.put("disease",disease);
+        resultData.put("studyDesign",studyDesign);
+        resultData.put("studyObjective",studyObjective);
+        resultData.put("studyStartDate","");
+        resultData.put("inclusionCriteria",inclusionCriteria);
+        resultData.put("exclusionCriteria",exclusionCriteria);
+        resultData.put("currentStatus",currentStatus);
+        resultData.put("enrollment",enrollment);
+        resultData.put("country",country);
+        resultData.put("tagTime","");
+        resultData.put("intervention",intervention);
+        resultData.put("primaryOutcome","");
+        resultData.put("crawlTime",getCurrentTime());
+//        resultData.put("crawlUrl",url);
+        resultData.put("postTime",registTime);
+        resultData.put("content","content");
+        resultData.put("forwardcontent","forwardcontent");
+        System.out.println(resultData);
+    }
+    public static String convertDate(String inputDate) {
+        try {
+
+            SimpleDateFormat inputFormat = new SimpleDateFormat("d/MM/yyyy");
+
+            Date date = inputFormat.parse(inputDate);
+
+            SimpleDateFormat outputFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+
+            return outputFormat.format(date);
+        } catch (ParseException e) {
+
+            return "Invalid date format";
+        }
+    }
+    public static String getCurrentTime() {
+        // 创建 DateTimeFormatter，指定输出格式
+        DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
+        // 获取当前时间
+        LocalDateTime now = LocalDateTime.now();
+        // 格式化
+        return now.format(formatter);
+    }
+}
diff --git a/src/main/java/com/example/AusList.java b/src/main/java/com/example/AusList.java
new file mode 100644
index 0000000..eaae8f9
--- /dev/null
+++ b/src/main/java/com/example/AusList.java
@@ -0,0 +1,200 @@
+package com.example;
+
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+
+import java.io.BufferedReader;
+import java.io.InputStreamReader;
+import java.io.OutputStream;
+import java.net.HttpURLConnection;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.net.URLEncoder;
+import java.nio.charset.StandardCharsets;
+import java.util.*;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class AusList {
+    public static void main(String[] args) throws Exception {
+        String targetUrl = "https://www.anzctr.org.au/TrialSearch.aspx?page=20";
+        String baseUrl = "https://www.anzctr.org.au/TrialSearch.aspx";
+        String postUrl = "https://www.anzctr.org.au/TrialSearch.aspx";
+        String pageNumber = targetUrl.contains("?page=") ? targetUrl.split("page=")[1] : "1";
+        int page = Integer.parseInt(pageNumber);
+        System.out.println("Page Number: " + page);
+        // 存储 cookies
+        Set<String> cookieSet = new HashSet<>();
+        String sessionId = null;
+
+        // 第一步：初始 GET 请求，获取 cookies 和 ViewState
+        URL initialUrl = new URL(baseUrl);
+        HttpURLConnection initialConn = (HttpURLConnection) initialUrl.openConnection();
+        initialConn.setRequestMethod("GET");
+        initialConn.setRequestProperty("User-Agent",
+                "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36");
+
+        initialConn.setRequestProperty("Accept",
+                "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7");
+        initialConn.setRequestProperty("Accept-Language", "zh-CN,zh;q=0.9,th;q=0.8,en;q=0.7");
+        initialConn.setRequestProperty("Cache-Control", "no-cache");
+        initialConn.setRequestProperty("Pragma", "no-cache");
+        initialConn.setRequestProperty("Upgrade-Insecure-Requests", "1");
+        initialConn.setRequestProperty("Sec-Fetch-Dest", "document");
+        initialConn.setRequestProperty("Sec-Fetch-Mode", "navigate");
+        initialConn.setRequestProperty("Sec-Fetch-Site", "same-origin");
+        initialConn.setRequestProperty("Sec-Fetch-User", "?1");
+
+        initialConn.setRequestProperty("Sec-CH-UA",
+                "\"Google Chrome\";v=\"135\", \"Not-A.Brand\";v=\"8\", \"Chromium\";v=\"135\"");
+        initialConn.setRequestProperty("Sec-CH-UA-Mobile", "?0");
+        initialConn.setRequestProperty("Sec-CH-UA-Platform", "\"Windows\"");
+
+//        initialConn.setRequestProperty("Cookie",
+//                "ASP.NET_SessionId=gkhw0unpeytexsa40v1sdjf1; __utma=2822752...; _ga=...");
+
+        initialConn.setInstanceFollowRedirects(false);
+        initialConn.setConnectTimeout(10000);
+        initialConn.setReadTimeout(10000);
+
+        // 捕获 cookies
+        sessionId = updateCookies(initialConn, cookieSet);
+
+        // 读取响应内容以获取 ViewState
+        BufferedReader in = new BufferedReader(new InputStreamReader(initialConn.getInputStream()));
+        StringBuilder content = new StringBuilder();
+        String inputLine;
+        while ((inputLine = in.readLine()) != null) {
+            content.append(inputLine);
+        }
+        in.close();
+        initialConn.disconnect();
+
+        // 提取初始 ViewState
+        Map<String, String> viewStateData = extractViewStateData(content.toString());
+        String viewState = viewStateData.get("__VIEWSTATE");
+        String viewStateGen = viewStateData.get("__VIEWSTATEGENERATOR");
+        String eventValidation = viewStateData.get("__EVENTVALIDATION");
+        String payload = buildPostData(viewState,eventValidation,viewStateGen,page,sessionId);
+
+        HttpURLConnection conn = (HttpURLConnection) new URL(postUrl).openConnection();
+        conn.setRequestMethod("POST");
+        conn.setDoOutput(true);
+        conn.setInstanceFollowRedirects(false);
+        conn.setConnectTimeout(10000);
+        conn.setReadTimeout(10000);
+
+        // 设置请求头（仿浏览器）
+        conn.setRequestProperty("Content-Type", "application/x-www-form-urlencoded; charset=UTF-8");
+        conn.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36");
+        conn.setRequestProperty("Accept", "*/*");
+        conn.setRequestProperty("X-Requested-With", "XMLHttpRequest");
+        conn.setRequestProperty("X-MicrosoftAjax", "Delta=true");
+        conn.setRequestProperty("Referer", "https://www.anzctr.org.au/TrialSearch.aspx");
+        conn.setRequestProperty("Origin", "https://www.anzctr.org.au");
+
+
+        // 构建 POST 表单数据
+        String postData = payload;
+        // 写入 POST 数据
+        try (OutputStream os = conn.getOutputStream()) {
+            byte[] input = postData.getBytes(StandardCharsets.UTF_8);
+            os.write(input);
+        }
+
+        // 读取响应
+        BufferedReader re = new BufferedReader(new InputStreamReader(conn.getInputStream(), StandardCharsets.UTF_8));
+        StringBuilder response = new StringBuilder();
+        String line;
+        while ((line = re.readLine()) != null) {
+            response.append(line);
+        }
+        String html = response.toString();
+        Document parse = Jsoup.parse(html);
+        Elements elements =parse.select(".results-header-tools a");
+        for (Element element:elements){
+            String link = "https://www.anzctr.org.au" + element.attr("href");
+            System.out.println(link);
+        }
+        re.close();
+        conn.disconnect();
+    }
+
+    // 更新并返回当前连接中的 Cookie，包含 JSESSIONID 的提取
+    private static String updateCookies(HttpURLConnection conn, Set<String> cookieSet) {
+        String sessionId = null;
+        Map<String, List<String>> headerFields = conn.getHeaderFields();
+        List<String> cookiesHeader = headerFields.get("Set-Cookie");
+        if (cookiesHeader != null) {
+            for (String cookie : cookiesHeader) {
+                String cookieValue = cookie.split(";")[0];
+                cookieSet.add(cookieValue);
+                if (cookieValue.startsWith("ASP.NET_SessionId=") || cookieValue.startsWith("csfcfc=")) {
+                    sessionId = cookieValue;
+                }
+            }
+        }
+        return sessionId;
+    }
+    // 提取 __VIEWSTATE 隐藏字段的值
+    private static Map<String, String> extractViewStateData(String html) {
+        Map<String, String> stateMap = new HashMap<>();
+
+        // 使用三个独立正则提取三个字段
+        extractHiddenField(html, "__VIEWSTATE", stateMap);
+        extractHiddenField(html, "__VIEWSTATEGENERATOR", stateMap);
+        extractHiddenField(html, "__EVENTVALIDATION", stateMap);
+
+        if (!stateMap.containsKey("__VIEWSTATE")) {
+            System.err.println("Failed to extract __VIEWSTATE from HTML");
+        }
+        if (!stateMap.containsKey("__EVENTVALIDATION")) {
+            System.err.println("Failed to extract __EVENTVALIDATION from HTML");
+        }
+        if (!stateMap.containsKey("__VIEWSTATEGENERATOR")) {
+            System.err.println("Failed to extract __VIEWSTATEGENERATOR from HTML");
+        }
+        return stateMap;
+    }
+
+    private static void extractHiddenField(String html, String fieldName, Map<String, String> map) {
+        String regex = "(?i)<input[^>]*name=[\"']" + fieldName + "[\"'][^>]*value=[\"']([^\"']+)[\"']";
+        Pattern pattern = Pattern.compile(regex);
+        Matcher matcher = pattern.matcher(html);
+
+        if (matcher.find()) {
+            map.put(fieldName, matcher.group(1));
+        }
+    }
+
+    private static String buildPostData(String viewState, String eventValidation, String viewStateGen, int page, String sessionId) {
+        try {
+            // 按照真实请求体的顺序和字段进行构建
+            String payload = "";
+            payload += URLEncoder.encode("ctl00$body$tsmAJAXScriptManager", StandardCharsets.UTF_8.name()) + "=" + URLEncoder.encode("ctl00$body$tsmAJAXScriptManager|ctl00$body$tsmAJAXScriptManager", StandardCharsets.UTF_8.name()) + "&";
+            payload += URLEncoder.encode("ctl00_body_tsmAJAXScriptManager_HiddenField", StandardCharsets.UTF_8.name()) + "=&"; // 添加缺失字段
+            payload += URLEncoder.encode("__EVENTTARGET", StandardCharsets.UTF_8.name()) + "=" + URLEncoder.encode("ctl00$body$tsmAJAXScriptManager", StandardCharsets.UTF_8.name()) + "&";
+            payload += URLEncoder.encode("__EVENTARGUMENT", StandardCharsets.UTF_8.name()) + "=" + URLEncoder.encode("conditionCode=&dateOfRegistrationFrom=&interventionDescription=&interventionCodeOperator=OR&primarySponsorType=&gender=&distance=&postcode=&pageSize=20&ageGroup=&recruitmentCountryOperator=OR&recruitmentRegion=&ethicsReview=&countryOfRecruitment=&registry=&searchTxt=&studyType=&allocationToIntervention=&dateOfRegistrationTo=&recruitmentStatus=&interventionCode=&healthCondition=&healthyVolunteers=&page="+page+"&conditionCategory=&fundingSource=&trialStartDateTo=&trialStartDateFrom=&phase=", StandardCharsets.UTF_8.name()) + "&"; // 注意这里的参数字符串是完整的
+            payload += URLEncoder.encode("__LASTFOCUS", StandardCharsets.UTF_8.name()) + "=&";
+            payload += URLEncoder.encode("__VIEWSTATE", StandardCharsets.UTF_8.name()) + "=" + URLEncoder.encode(viewState, StandardCharsets.UTF_8.name()) + "&";
+            payload += URLEncoder.encode("__VIEWSTATEGENERATOR", StandardCharsets.UTF_8.name()) + "=" + URLEncoder.encode(viewStateGen, StandardCharsets.UTF_8.name()) + "&";
+            payload += URLEncoder.encode("__SCROLLPOSITIONX", StandardCharsets.UTF_8.name()) + "=" + URLEncoder.encode("0", StandardCharsets.UTF_8.name()) + "&"; // 添加缺失字段
+            payload += URLEncoder.encode("__SCROLLPOSITIONY", StandardCharsets.UTF_8.name()) + "=" + URLEncoder.encode("0", StandardCharsets.UTF_8.name()) + "&"; // 添加缺失字段
+            payload += URLEncoder.encode("__EVENTVALIDATION", StandardCharsets.UTF_8.name()) + "=" + URLEncoder.encode(eventValidation, StandardCharsets.UTF_8.name()) + "&";
+
+            // ... 添加并按顺序排列其他所有字段，确保名称、值、编码与真实请求体一致 ...
+
+            // 确保最后一个字段后面没有 &
+            payload += URLEncoder.encode("__ASYNCPOST", StandardCharsets.UTF_8.name()) + "=" + URLEncoder.encode("true", StandardCharsets.UTF_8.name());
+
+            return payload;
+
+        } catch (Exception e) {
+            System.err.println("Error building POST data: " + e.getMessage());
+            return "";
+        }
+    }
+
+}
diff --git a/src/main/java/com/example/CaptchaOCR.java b/src/main/java/com/example/CaptchaOCR.java
new file mode 100644
index 0000000..f9f6c53
--- /dev/null
+++ b/src/main/java/com/example/CaptchaOCR.java
@@ -0,0 +1,173 @@
+package com.example;
+
+import java.awt.image.BufferedImage;
+import java.io.*;
+import java.net.HttpURLConnection;
+import java.net.URL;
+import javax.imageio.ImageIO;
+import net.sourceforge.tess4j.Tesseract;
+import net.sourceforge.tess4j.TesseractException;
+
+// ... 其他必要的导入 ...
+
+public class CaptchaOCR {
+
+    // Tesseract data 路径 (tessdata 文件夹所在目录)
+    // Windows 示例: "C:\\Program Files\\Tesseract-OCR\\tessdata"
+    // Linux/macOS 示例: 通常不需要设置，Tess4J 会自动查找
+    private static final String TESSDATA_PATH = "F:\\tool\\Tesseract-OCR\\tessdata"; // 根据你的安装路径修改
+
+    /**
+     * 下载验证码图片
+     * @param imageUrl 图片的完整 URL
+     * @return 图片的 BufferedImage 对象
+     * @throws IOException 如果下载失败
+     */
+    public static BufferedImage downloadImage(String imageUrl) throws IOException {
+        URL url = new URL(imageUrl);
+        HttpURLConnection conn = (HttpURLConnection) url.openConnection();
+        conn.setRequestMethod("GET");
+        // 添加 User-Agent 等必要的请求头，模拟浏览器
+        conn.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36");
+        // ... 其他头 ...
+
+        int responseCode = conn.getResponseCode();
+        if (responseCode == HttpURLConnection.HTTP_OK) {
+            try (InputStream is = conn.getInputStream()) {
+                // 将输入流读取到字节数组，ImageIO 从字节数组读取更稳定
+                ByteArrayOutputStream baos = new ByteArrayOutputStream();
+                byte[] buffer = new byte[4096]; // 缓冲区大小，可以调整
+                int bytesRead;
+                while ((bytesRead = is.read(buffer)) != -1) {
+                    baos.write(buffer, 0, bytesRead);
+                }
+                ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
+
+                BufferedImage image = ImageIO.read(bais);
+
+                if (image == null) {
+                    throw new IOException("Failed to read image from stream. Check image format.");
+                }
+                return image;
+            }
+        } else {
+            throw new IOException("Failed to download image. HTTP error code: " + responseCode);
+        }
+    }
+
+    /**
+     * 对验证码图片进行预处理 (基础示例：转灰度+二值化)
+     * 这是最关键的部分，需要根据验证码样式调整
+     * @param originalImage 原始图片
+     * @return 预处理后的图片
+     */
+    public static BufferedImage preprocessImage(BufferedImage originalImage) {
+        // TODO: 这里是图像预处理的重点，需要根据实际验证码样式进行调整和优化
+        // 基础处理：转灰度 -> 二值化
+        int width = originalImage.getWidth();
+        int height = originalImage.getHeight();
+        BufferedImage grayImage = new BufferedImage(width, height, BufferedImage.TYPE_BYTE_GRAY);
+        grayImage.getGraphics().drawImage(originalImage, 0, 0, null);
+
+        BufferedImage binaryImage = new BufferedImage(width, height, BufferedImage.TYPE_BYTE_BINARY);
+        // 二值化阈值，可能需要调整 (0-255)
+        int threshold = 128;
+        for (int y = 0; y < height; y++) {
+            for (int x = 0; x < width; x++) {
+                int gray = grayImage.getRaster().getSample(x, y, 0);
+                if (gray < threshold) {
+                    binaryImage.getRaster().setSample(x, y, 0, 0); // 黑色
+                } else {
+                    binaryImage.getRaster().setSample(x, y, 0, 1); // 白色
+                }
+            }
+        }
+
+        // TODO: 更高级的预处理包括：
+        // - 去除干扰线、噪点
+        // - 字符分割（如果字符粘连）
+        // - 倾斜校正
+        // - 调整亮度和对比度等
+        // 你可能需要引入更专业的图像处理库或算法
+
+        // 为了调试，可以将预处理后的图片保存下来查看效果
+        try {
+            File outputfile = new File("preprocessed_captcha.png");
+            ImageIO.write(binaryImage, "png", outputfile);
+            System.out.println("Preprocessed image saved to " + outputfile.getAbsolutePath());
+        } catch (IOException e) {
+            e.printStackTrace();
+        }
+
+        return binaryImage; // 返回预处理后的图片
+    }
+
+    /**
+     * 使用 Tess4J 识别图片中的文字
+     * @param image 待识别的图片 (最好是预处理后的)
+     * @return 识别出的字符串
+     */
+    public static String recognizeCaptcha(BufferedImage image) {
+        Tesseract tesseract = new Tesseract();
+
+        // 设置 tessdata 路径 (如果 TESSDATA_PATH 已正确设置且 Tesseract 安装正确，这行可能不是必需的，Tess4J 会自动查找)
+        // 但显式设置更保险
+        if (TESSDATA_PATH != null && !TESSDATA_PATH.isEmpty()) {
+            tesseract.setDatapath(TESSDATA_PATH);
+        } else {
+            System.out.println("TESSDATA_PATH not set. Tess4J will try to find tessdata automatically.");
+        }
+
+
+        tesseract.setLanguage("eng"); // 设置识别语言为英文 (通常包含数字)
+        // 如果验证码只有数字，可以尝试设置仅识别数字
+        // tesseract.setTessVariable("tessedit_char_whitelist", "0123456789");
+
+        try {
+            String result = tesseract.doOCR(image);
+            // 清理识别结果，去除空格或换行符等
+            result = result.trim().replaceAll("[^0-9a-zA-Z]", ""); // 根据验证码内容调整清理规则
+            System.out.println("OCR Result: " + result);
+            return result;
+        } catch (TesseractException e) {
+            System.err.println("Error during OCR: " + e.getMessage());
+            return null; // 识别失败
+        }
+    }
+
+    // 示例如何在你的爬虫流程中使用
+    public static void main(String[] args) {
+        String captchaImageUrl = "YOUR_CAPTCHA_IMAGE_URL"; // 从页面解析获取到的验证码图片 URL
+
+        try {
+            // 1. 下载图片
+            BufferedImage originalCaptchaImage = downloadImage(captchaImageUrl);
+            System.out.println("Image downloaded.");
+
+            // 2. 预处理图片
+            BufferedImage preprocessedImage = preprocessImage(originalCaptchaImage);
+            System.out.println("Image preprocessed.");
+
+            // 3. 识别验证码
+            String captchaCode = recognizeCaptcha(preprocessedImage);
+
+            if (captchaCode != null && !captchaCode.isEmpty()) {
+                System.out.println("Recognized CAPTCHA: " + captchaCode);
+                // 4. 将 captchaCode 填入 POST 数据中，提交表单
+                // ... (你的 ASP.NET WebForms POST 提交代码，将 captchaCode 放到对应的隐藏字段或输入框字段中) ...
+                // 例如：postData += "&captchaInputFieldName=" + URLEncoder.encode(captchaCode, StandardCharsets.UTF_8.name());
+                // ... 提交 POST 请求 ...
+
+            } else {
+                System.out.println("Failed to recognize CAPTCHA.");
+                // 5. 处理识别失败的情况，可能需要重试或记录日志
+            }
+
+        } catch (IOException e) {
+            System.err.println("Error downloading or processing image: " + e.getMessage());
+        }
+        // catch (URISyntaxException e) {
+        //     System.err.println("Invalid URL: " + e.getMessage());
+        // } // 如果你的 downloadImage 方法 throws URISyntaxException
+    }
+}
\ No newline at end of file
diff --git a/src/main/java/com/example/CsAirScraper.java b/src/main/java/com/example/CsAirScraper.java
new file mode 100644
index 0000000..b47cf8d
--- /dev/null
+++ b/src/main/java/com/example/CsAirScraper.java
@@ -0,0 +1,81 @@
+package com.example;
+
+import io.github.bonigarcia.wdm.WebDriverManager;
+import org.apache.hc.client5.http.classic.methods.HttpPost;
+import org.apache.hc.client5.http.impl.classic.CloseableHttpClient;
+import org.apache.hc.client5.http.impl.classic.CloseableHttpResponse;
+import org.apache.hc.client5.http.impl.classic.HttpClients;
+import org.apache.hc.core5.http.io.entity.StringEntity;
+import org.apache.hc.core5.http.io.entity.EntityUtils;
+import org.openqa.selenium.Cookie;
+import org.openqa.selenium.WebDriver;
+import org.openqa.selenium.chrome.ChromeDriver;
+
+import java.util.Set;
+import java.util.stream.Collectors;
+
+public class CsAirScraper {
+
+    public static void main(String[] args) throws Exception {
+        // 1. 启动 Selenium，访问南航主站
+        WebDriverManager.chromedriver().setup();
+        WebDriver driver = new ChromeDriver();
+        driver.get("https://b2c.csair.com/portal/main/flight/direct/query");
+
+        // 等待 Cookie 被 JS 设置（稍等几秒）
+        Thread.sleep(5000); // 可根据实际页面响应调整等待时间
+
+        // 2. 获取浏览器中所有 Cookie
+        Set<Cookie> seleniumCookies = driver.manage().getCookies();
+        String cookieHeader = seleniumCookies.stream()
+                .map(c -> c.getName() + "=" + c.getValue())
+                .collect(Collectors.joining("; "));
+
+        System.out.println("获取到 Cookie: " + cookieHeader);
+        driver.quit(); // 关闭浏览器
+
+        // 3. 准备 HttpClient 请求，携带 Cookie
+        try (CloseableHttpClient httpClient = HttpClients.createDefault()) {
+            HttpPost post = new HttpPost("https://b2c.csair.com/portal/main/flight/direct/query");
+
+            // 设置请求头
+            post.setHeader("Content-Type", "application/json");
+            post.setHeader("Cookie", cookieHeader);
+            post.setHeader("User-Agent", "Mozilla/5.0");
+
+            // 设置请求体（JSON）
+            String json = "{"
+                    + "\"action\": \"0\","
+                    + "\"adultNum\": \"1\","
+                    + "\"airLine\": 1,"
+                    + "\"arrCity\": \"PKX\","
+                    + "\"businessType\": \"COMMON\","
+                    + "\"cabinOrder\": \"0\","
+                    + "\"cache\": 0,"
+                    + "\"childNum\": \"0\","
+                    + "\"depCity\": \"CAN\","
+                    + "\"flightDate\": \"20250514\","
+                    + "\"flyType\": 0,"
+                    + "\"infantNum\": \"0\","
+                    + "\"international\": \"0\","
+                    + "\"isMember\": \"\","
+                    + "\"isMultipass\": 1,"
+                    + "\"language\": \"zh\","
+                    + "\"preUrl\": \"\","
+                    + "\"segType\": \"1\","
+                    + "\"tariffRules\": []"
+                    + "}";
+
+
+            post.setEntity(new StringEntity(json));
+
+            // 4. 发请求
+            try (CloseableHttpResponse response = httpClient.execute(post)) {
+                int code = response.getCode();
+                String result = EntityUtils.toString(response.getEntity());
+                System.out.println("状态码: " + code);
+                System.out.println("响应: " + result);
+            }
+        }
+    }
+}
diff --git a/src/main/java/com/example/CtriScraper.java b/src/main/java/com/example/CtriScraper.java
new file mode 100644
index 0000000..3ff578f
--- /dev/null
+++ b/src/main/java/com/example/CtriScraper.java
@@ -0,0 +1,404 @@
+package com.example;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import okhttp3.*;
+import org.apache.hc.client5.http.cookie.BasicCookieStore;
+import org.apache.hc.client5.http.cookie.CookieStore;
+import org.apache.hc.client5.http.classic.methods.HttpGet;
+import org.apache.hc.client5.http.classic.methods.HttpPost;
+import org.apache.hc.client5.http.entity.UrlEncodedFormEntity;
+import org.apache.hc.client5.http.impl.classic.CloseableHttpClient;
+import org.apache.hc.client5.http.impl.classic.CloseableHttpResponse;
+import org.apache.hc.client5.http.impl.classic.HttpClients;
+import org.apache.hc.client5.http.protocol.HttpClientContext;
+import org.apache.hc.core5.http.HttpEntity;
+import org.apache.hc.core5.http.NameValuePair;
+import org.apache.hc.core5.http.io.entity.EntityUtils;
+import org.apache.hc.core5.http.message.BasicNameValuePair;
+import org.apache.kafka.clients.producer.KafkaProducer;
+import org.apache.kafka.clients.producer.ProducerConfig;
+import org.apache.kafka.clients.producer.ProducerRecord;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.time.LocalDate;
+import java.time.LocalDateTime;
+import java.time.Month;
+import java.time.Year;
+import java.time.format.DateTimeFormatter;
+import java.util.*;
+import java.nio.charset.StandardCharsets;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class CtriScraper {
+
+    private static final String SEARCH_FORM_URL = "https://ctri.nic.in/Clinicaltrials/advancesearchmain.php";
+
+    private static final String SEARCH_SUBMIT_URL = "https://ctri.nic.in/Clinicaltrials/advsearch.php";
+
+    private static final Pattern LINK_REGEX_PATTERN = Pattern.compile("'([^']*)'");
+
+    private static final String TOPIC_NAME = "cliniTopic";
+    private static final String BOOTSTRAP_SERVERS = "node-01:19092";
+    private static KafkaProducer<String, String> producer;
+    private static ObjectMapper objectMapper = new ObjectMapper();
+    private static final Random random = new Random();
+
+    static {
+        Properties props = new Properties();
+        props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, BOOTSTRAP_SERVERS);
+        props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer");
+        props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer");
+        props.put(ProducerConfig.ACKS_CONFIG, "all"); // 等待所有副本确认
+        props.put(ProducerConfig.RETRIES_CONFIG, 3); // 重试次数
+        producer = new KafkaProducer<>(props);
+
+    }
+    public static List<String> getlink(Integer year, Integer month) {
+        List<String> linkList = new ArrayList<>(); // 用于存放提取到的链接
+        // 用于存储和管理 Cookies
+        CookieStore cookieStore = new BasicCookieStore();
+        // 用于在请求之间维护状态，特别是关联 CookieStore
+        HttpClientContext context = HttpClientContext.create();
+        context.setCookieStore(cookieStore);
+
+        // 使用 try-with-resources 确保 HttpClient 被正确关闭
+        try (CloseableHttpClient httpClient = HttpClients.custom()
+                .setDefaultCookieStore(cookieStore) // 将cookie store绑定到client
+                .build()) {
+
+            // --- Step 1 & 2: 发送 GET 请求获取表单页面并解析 ---
+            // System.out.println("Fetching search form page..."); // 调试信息可以按需保留或删除
+            HttpGet getRequest = new HttpGet(SEARCH_FORM_URL);
+            // 添加一些伪装的 Headers 模拟浏览器访问
+            getRequest.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36");
+            getRequest.setHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9");
+            getRequest.setHeader("Accept-Language", "en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7");
+
+            String formPageHtml = null;
+            try (CloseableHttpResponse response = httpClient.execute(getRequest, context)) {
+                int statusCode = response.getCode();
+                // System.out.println("GET Response Status: " + statusCode); // 调试信息
+
+                if (statusCode != 200) {
+                    System.err.println("Error: GET request to form page failed with status code: " + statusCode);
+                    EntityUtils.consume(response.getEntity()); // 确保消费掉实体，释放连接
+                    return null; // 获取表单页面失败，返回 null
+                }
+
+                HttpEntity entity = response.getEntity();
+                if (entity != null) {
+                    formPageHtml = EntityUtils.toString(entity, StandardCharsets.UTF_8);
+                    EntityUtils.consume(entity); // 确保实体内容被完全消费
+                } else {
+                    System.err.println("Error: Failed to get form page entity.");
+                    return null; // 获取页面内容失败，返回 null
+                }
+            }
+            // System.out.println("Form page fetched successfully."); // 调试信息
+
+            // --- Step 3 & 4: 解析 HTML 提取 csrf_token 和 __ncforminfo ---
+            Document doc = Jsoup.parse(formPageHtml, SEARCH_FORM_URL); // 传入 base URI 有助于处理相对路径
+
+            // 查找隐藏的输入字段
+            Element csrfTokenInput = doc.selectFirst("input[name=csrf_token][type=hidden]");
+            Element ncFormInfoInput = doc.selectFirst("input[name=__ncforminfo][type=hidden]");
+
+            String csrfToken = null;
+            String ncFormInfo = null;
+
+            if (csrfTokenInput != null) {
+                csrfToken = csrfTokenInput.val();
+                // System.out.println("Extracted csrf_token: " + csrfToken); // 调试信息
+            } else {
+                System.err.println("Warning: Could not find csrf_token input field.");
+                return null; // 缺少关键 token，返回 null
+            }
+
+            if (ncFormInfoInput != null) {
+                ncFormInfo = ncFormInfoInput.val();
+                // System.out.println("Extracted __ncforminfo: " + ncFormInfo); // 调试信息
+            } else {
+                System.err.println("Warning: Could not find __ncforminfo input field.");
+                return null; // 缺少关键 token，返回 null
+            }
+
+            // 如果必要的 token 没有获取到，可能无法继续 (虽然上面的检查已经覆盖，这里作为双重保险)
+            if (csrfToken == null || ncFormInfo == null) {
+                System.err.println("Error: Missing required tokens. Cannot proceed with POST request.");
+                return null;
+            }
+
+            // --- Step 5 & 6: 构建 POST 请求参数并发送 ---
+            // System.out.println("\nPreparing POST request..."); // 调试信息
+            HttpPost postRequest = new HttpPost(SEARCH_SUBMIT_URL);
+            // 添加 Headers 模拟浏览器提交表单
+            postRequest.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36");
+            // 重要：设置 Referer Header
+            postRequest.setHeader("Referer", SEARCH_FORM_URL);
+            // 添加 Origin Header
+            postRequest.setHeader("Origin", "https://ctri.nic.in");
+            postRequest.setHeader("Content-Type", "application/x-www-form-urlencoded");
+            postRequest.setHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9");
+            postRequest.setHeader("Accept-Language", "en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7");
+            postRequest.setHeader("Pragma", "no-cache");
+
+            List<NameValuePair> params = new ArrayList<>();
+            // 添加你之前分析的载荷中的所有参数，使用获取到的动态值
+            params.add(new BasicNameValuePair("stid", "1")); // 注意 stid 之前有两个，这里用 1
+            params.add(new BasicNameValuePair("csrf_token", csrfToken)); // 使用获取到的动态 token
+            params.add(new BasicNameValuePair("pros", "1"));
+            params.add(new BasicNameValuePair("month", String.format("%02d", month))); // 格式化月份为两位数
+            params.add(new BasicNameValuePair("year", String.valueOf(year)));
+            params.add(new BasicNameValuePair("study", "0"));
+            params.add(new BasicNameValuePair("sdid", "0"));
+            params.add(new BasicNameValuePair("phaseid", "0"));
+            params.add(new BasicNameValuePair("psponsor", "0"));
+            params.add(new BasicNameValuePair("recid", "0"));
+            params.add(new BasicNameValuePair("state", "0"));
+            params.add(new BasicNameValuePair("district", "0"));
+            params.add(new BasicNameValuePair("searchword", ""));
+            params.add(new BasicNameValuePair("T4", "anyvalue")); // T4既然无效，随便填
+            params.add(new BasicNameValuePair("btt", "Search"));
+            params.add(new BasicNameValuePair("__ncforminfo", ncFormInfo)); // 使用获取到的动态值
+
+            // 将参数列表设置到请求体中
+            postRequest.setEntity(new UrlEncodedFormEntity(params, StandardCharsets.UTF_8));
+
+            // System.out.println("Executing POST request to submit form..."); // 调试信息
+            try (CloseableHttpResponse postResponse = httpClient.execute(postRequest, context)) {
+                int postStatusCode = postResponse.getCode();
+                // System.out.println("POST Response Status: " + postStatusCode); // 打印状态码
+
+                if (postStatusCode != 200) {
+                    System.err.println("Error: POST request to submit form failed with status code: " + postStatusCode);
+                    EntityUtils.consume(postResponse.getEntity()); // 确保消费掉实体，释放连接
+                    return null; // 提交表单失败，返回 null
+                }
+
+
+                HttpEntity postEntity = postResponse.getEntity();
+
+                if (postEntity != null) {
+                    String searchResultsHtml = EntityUtils.toString(postEntity, StandardCharsets.UTF_8);
+                    EntityUtils.consume(postEntity); // 确保实体内容被完全消费
+
+                    // --- Step 7: 处理搜索结果页面 ---
+                    // System.out.println("\nParsing search results..."); // 调试信息
+
+                    Document resultsDoc = Jsoup.parse(searchResultsHtml, SEARCH_SUBMIT_URL);
+
+                    Elements links = resultsDoc.select("tr a");
+
+                    for (Element linkElement : links) {
+                        String rawLink = linkElement.attr("href");
+                        // System.out.println("Processing raw link: " + rawLink); // 调试信息
+
+                        // 使用预编译的正则表达式 Pattern
+                        Matcher matcher = LINK_REGEX_PATTERN.matcher(rawLink);
+
+                        // 查找匹配项
+                        if (matcher.find()) {
+                            String extractedContent = matcher.group(1); // 提取单引号内的内容
+                            // 构建完整的链接 URL
+                            String fullLink = "https://ctri.nic.in/Clinicaltrials/" + extractedContent;
+                            linkList.add(fullLink); // 将完整链接添加到列表中
+                            // System.out.println("Added link: " + fullLink); // 调试信息
+                        } else {
+                            // 如果链接不符合模式，打印警告并跳过
+                            System.err.println("Warning: Link does not match expected pattern: " + rawLink);
+                        }
+                    }
+
+                    // --- 返回提取到的链接列表 ---
+                    // 循环结束后，返回收集到的所有链接
+                    // System.out.println("Finished link extraction. Returning list."); // 调试信息
+                    return linkList;
+
+
+                } else {
+                    System.err.println("Error: Failed to get search results entity.");
+                    return null; // 获取结果内容失败，返回 null
+                }
+            }
+
+        } catch (IOException e) {
+            // 处理网络请求相关的异常
+            System.err.println("Network or IO error during scraping:");
+            e.printStackTrace();
+            return null; // 发生 IO 错误，返回 null
+        } catch (Exception e) {
+            // 处理其他可能的异常，例如解析错误或 NPE
+            System.err.println("An unexpected error occurred during scraping:");
+            e.printStackTrace();
+            return null; // 发生其他错误，返回 null
+        }
+    }
+    public static void main(String[] args) {
+        for (Integer year = Year.now().getValue(); year >= 2024; year--) {
+            int monthStart = (year == Year.now().getValue()) ? LocalDate.now().getMonthValue() : 12;
+
+            for (Integer month = monthStart; month >= 1; month--) {
+                try {
+                    List<String> links = getlink(year, month);
+                    if (links == null) {
+                        System.out.println("年份 " + year + " 月份 " + month + " 抓取失败！");
+                        continue;
+                    }
+
+                    if (links.isEmpty()) {
+                        System.out.println("年份 " + year + " 月份 " + month + " 无数据！");
+                        continue;
+                    }
+
+                    int sleepTime = random.nextInt(1001) + 3000;
+                    int count = 0;
+
+                    for (String url : links) {
+                        try {
+                            Map<String, Object> result = reslutData(url);
+                            result.put("crawlUrl", url);
+
+                            String registNum = String.valueOf(result.get("registNum"));
+                            String jsonValue = objectMapper.writeValueAsString(result);
+
+                            ProducerRecord<String, String> record = new ProducerRecord<>(TOPIC_NAME, registNum, jsonValue);
+
+                            producer.send(record, (metadata, exception) -> {
+                                if (exception == null) {
+                                    System.out.println("✅ 成功发送到 Kafka: " + registNum + " | Offset: " + metadata.offset() + " | " + url);
+                                } else {
+                                    System.err.println("❌ Kafka 发送失败: " + exception.getMessage());
+                                }
+                            });
+
+                            Thread.sleep(sleepTime); // 控制节奏
+                            count++;
+                        } catch (Exception e) {
+                            System.err.println("抓取或发送失败: " + url);
+                            e.printStackTrace();
+                        }
+                    }
+
+                    System.out.println("📦 年份 " + year + " 月份 " + month + " 已完成，共上传 " + count + " 条数据。");
+
+                } catch (Exception e) {
+                    System.err.println("处理年份 " + year + " 月份 " + month + " 失败: " + e.getMessage());
+                    e.printStackTrace();
+                }
+            }
+        }
+        // 关闭 producer
+        producer.close();
+    }
+
+    public static Map<String,Object> reslutData(String url) throws IOException {
+        Map<String,Object> resultData = new HashMap<>();
+        OkHttpClient client = new OkHttpClient().newBuilder()
+                .build();
+        MediaType mediaType = MediaType.parse("text/plain");
+        RequestBody body = RequestBody.create(mediaType, "");
+        Request request = new Request.Builder()
+                .url(url)
+                .get()
+                .build();
+        Response response = client.newCall(request).execute();
+        String html = response.body().string();
+        Document parse = Jsoup.parse(html);
+        String title = parse.select("body > table > tbody > tr > td > table:nth-child(11) > tbody > tr:nth-child(7) > td:nth-child(2)").text();
+        String registNum = parse.select("body > table > tbody > tr > td > table:nth-child(11) > tbody > tr:nth-child(1) > td:nth-child(2) > b").text();
+        String registTime = extractAndConvertDate(parse.select("body > table > tbody > tr > td > table:nth-child(11) > tbody > tr:nth-child(1) > td:nth-child(2)").text());
+        Map<String,Object> sponsor = new HashMap<>();
+        String SMMS = parse.select("body > table > tbody > tr > td > table:nth-child(11) > tbody > tr:nth-child(14) > td:nth-child(2) > table > tbody > tr > td").text();
+        String primarySponsor = parse.select("body > table > tbody > tr > td > table:nth-child(11) > tbody > tr:nth-child(15) > td:nth-child(2) > table > tbody").text();
+        sponsor.put("Source of Monetary or Material Support",SMMS);
+        sponsor.put("Primary Sponsor",primarySponsor);
+        String studyType = parse.select("body > table > tbody > tr > td > table:nth-child(11) > tbody > tr:nth-child(4) > td:nth-child(2)").text();
+        String phase = parse.select("body > table > tbody > tr > td > table:nth-child(11) > tbody > tr:nth-child(31) > td:nth-child(2)").text();
+        Map<String,Object> disease = new HashMap<>();
+        String healthType = parse.select("body > table > tbody > tr > td > table:nth-child(11) > tbody > tr:nth-child(21) > td:nth-child(2) > table > tbody > tr:nth-child(2) > td:nth-child(1)").text();
+        String condition = parse.select("body > table > tbody > tr > td > table:nth-child(11) > tbody > tr:nth-child(21) > td:nth-child(2) > table > tbody > tr:nth-child(2) > td:nth-child(2)").text();
+        disease.put("healthType",healthType);
+        disease.put("condition",condition);
+        String studyDesign = parse.select("body > table > tbody > tr > td > table:nth-child(11) > tbody > tr:nth-child(6) > td:nth-child(2)").text();
+        String inclusionCriteria = parse.select("body > table > tbody > tr > td > table:nth-child(11) > tbody > tr:nth-child(23) > td:nth-child(2) > table > tbody").text();
+        String exclusionCriteria = parse.select("body > table > tbody > tr > td > table:nth-child(11) > tbody > tr:nth-child(24) > td:nth-child(2) > table > tbody > tr > td:nth-child(2)").text();
+        String enrollment = parse.select("body > table > tbody > tr > td > table:nth-child(11) > tbody > tr:nth-child(30) > td:nth-child(2)").text();
+        String country = parse.select("body > table > tbody > tr > td > table:nth-child(11) > tbody > tr:nth-child(17) > td:nth-child(2)").text();
+        String intervention = parse.select("body > table > tbody > tr > td > table:nth-child(11) > tbody > tr:nth-child(22) > td:nth-child(2) > table").text();
+        Map<String,Object> primaryOutcome = new HashMap<>();
+        String firstOutcome = parse.select("body > table > tbody > tr > td > table:nth-child(11) > tbody > tr:nth-child(28) > td:nth-child(2) > table > tbody").text();
+        String secondOutcome = parse.select("body > table > tbody > tr > td > table:nth-child(11) > tbody > tr:nth-child(29) > td:nth-child(2) > table > tbody").text();
+        primaryOutcome.put("firstOutcome",firstOutcome);
+        primaryOutcome.put("secondOutcome",secondOutcome);
+
+        resultData.put("disease",disease);
+        resultData.put("primaryOutcome",primaryOutcome);
+        resultData.put("intervention",intervention);
+        resultData.put("country",country);
+        resultData.put("enrollment",enrollment);
+        resultData.put("exclusionCriteria",exclusionCriteria);
+        resultData.put("inclusionCriteria",inclusionCriteria);
+        resultData.put("studyDesign",studyDesign);
+        resultData.put("sponsor",sponsor);
+        resultData.put("title",title);
+        resultData.put("registNum",registNum);
+        resultData.put("registTime",registTime);
+        resultData.put("studyType",studyType);
+        resultData.put("phase",phase);
+        resultData.put("registStatus","");
+        resultData.put("registTitle","");
+        resultData.put("fullTitle","");
+        resultData.put("sponsorPart","");
+        resultData.put("studyObjective","");
+        resultData.put("studyStartDate","");
+        resultData.put("currentStatus","");
+        resultData.put("tagTime","");
+        resultData.put("crawlTime",getCurrentTime());
+        resultData.put("crawlUrl",url);
+        resultData.put("postTime",registTime);
+        resultData.put("content","content");
+        resultData.put("forwardcontent","forwardcontent");
+        resultData.put("cid","Nctrinicin");
+        return resultData;
+    }
+    public static String getCurrentTime() {
+        // 创建 DateTimeFormatter，指定输出格式
+        DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
+        // 获取当前时间
+        LocalDateTime now = LocalDateTime.now();
+        // 格式化
+        return now.format(formatter);
+    }
+    public static String extractAndConvertDate(String input) {
+        // 定义正则表达式提取 dd/MM/yyyy 格式的日期
+        Pattern pattern = Pattern.compile("\\[(?:Registered on|注册于):\\s*(\\d{2}/\\d{2}/\\d{4})\\]");
+        Matcher matcher = pattern.matcher(input);
+
+        if (matcher.find()) {
+            String dateStr = matcher.group(1); // 提取的日期字符串
+            try {
+                // 解析成 Date 对象
+                SimpleDateFormat inputFormat = new SimpleDateFormat("dd/MM/yyyy");
+                Date date = inputFormat.parse(dateStr);
+
+                // 格式化为 yyyy:MM:dd 00:00:00
+                SimpleDateFormat outputFormat = new SimpleDateFormat("yyyy-MM-dd '00:00:00'");
+                return outputFormat.format(date);
+
+            } catch (ParseException e) {
+                e.printStackTrace();
+            }
+        }
+
+        return null; // 如果未匹配或转换失败
+    }
+}
\ No newline at end of file
diff --git a/src/main/java/com/example/CtriScraperContent.java b/src/main/java/com/example/CtriScraperContent.java
new file mode 100644
index 0000000..9840b40
--- /dev/null
+++ b/src/main/java/com/example/CtriScraperContent.java
@@ -0,0 +1,121 @@
+package com.example;
+
+import okhttp3.*;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+
+import java.io.IOException;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.time.LocalDateTime;
+import java.time.format.DateTimeFormatter;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class CtriScraperContent {
+    public static void main(String[] args) throws IOException {
+        Map<String,Object> resultData = new HashMap<>();
+        String url = "https://ctri.nic.in/Clinicaltrials/pmaindet2.php?EncHid=MjQ3MjM=&Enc=&userName=";
+        OkHttpClient client = new OkHttpClient().newBuilder()
+                .build();
+        MediaType mediaType = MediaType.parse("text/plain");
+        RequestBody body = RequestBody.create(mediaType, "");
+        Request request = new Request.Builder()
+                .url(url)
+                .get()
+                .build();
+        Response response = client.newCall(request).execute();
+        String html = response.body().string();
+        Document parse = Jsoup.parse(html);
+        String title = parse.select("body > table > tbody > tr > td > table:nth-child(11) > tbody > tr:nth-child(7) > td:nth-child(2)").text();
+        String registNum = parse.select("body > table > tbody > tr > td > table:nth-child(11) > tbody > tr:nth-child(1) > td:nth-child(2) > b").text();
+        String registTime = extractAndConvertDate(parse.select("body > table > tbody > tr > td > table:nth-child(11) > tbody > tr:nth-child(1) > td:nth-child(2)").text());
+        Map<String,Object> sponsor = new HashMap<>();
+        String SMMS = parse.select("body > table > tbody > tr > td > table:nth-child(11) > tbody > tr:nth-child(14) > td:nth-child(2) > table > tbody > tr > td").text();
+        String primarySponsor = parse.select("body > table > tbody > tr > td > table:nth-child(11) > tbody > tr:nth-child(15) > td:nth-child(2) > table > tbody").text();
+        sponsor.put("Source of Monetary or Material Support",SMMS);
+        sponsor.put("Primary Sponsor",primarySponsor);
+        String studyType = parse.select("body > table > tbody > tr > td > table:nth-child(11) > tbody > tr:nth-child(4) > td:nth-child(2)").text();
+        String phase = parse.select("body > table > tbody > tr > td > table:nth-child(11) > tbody > tr:nth-child(31) > td:nth-child(2)").text();
+        Map<String,Object> disease = new HashMap<>();
+        String healthType = parse.select("body > table > tbody > tr > td > table:nth-child(11) > tbody > tr:nth-child(21) > td:nth-child(2) > table > tbody > tr:nth-child(2) > td:nth-child(1)").text();
+        String condition = parse.select("body > table > tbody > tr > td > table:nth-child(11) > tbody > tr:nth-child(21) > td:nth-child(2) > table > tbody > tr:nth-child(2) > td:nth-child(2)").text();
+        disease.put("healthType",healthType);
+        disease.put("condition",condition);
+        String studyDesign = parse.select("body > table > tbody > tr > td > table:nth-child(11) > tbody > tr:nth-child(6) > td:nth-child(2)").text();
+        String inclusionCriteria = parse.select("body > table > tbody > tr > td > table:nth-child(11) > tbody > tr:nth-child(23) > td:nth-child(2) > table > tbody").text();
+        String exclusionCriteria = parse.select("body > table > tbody > tr > td > table:nth-child(11) > tbody > tr:nth-child(24) > td:nth-child(2) > table > tbody > tr > td:nth-child(2)").text();
+        String enrollment = parse.select("body > table > tbody > tr > td > table:nth-child(11) > tbody > tr:nth-child(30) > td:nth-child(2)").text();
+        String country = parse.select("body > table > tbody > tr > td > table:nth-child(11) > tbody > tr:nth-child(17) > td:nth-child(2)").text();
+        String intervention = parse.select("body > table > tbody > tr > td > table:nth-child(11) > tbody > tr:nth-child(22) > td:nth-child(2) > table").text();
+        Map<String,Object> primaryOutcome = new HashMap<>();
+        String firstOutcome = parse.select("body > table > tbody > tr > td > table:nth-child(11) > tbody > tr:nth-child(28) > td:nth-child(2) > table > tbody").text();
+        String secondOutcome = parse.select("body > table > tbody > tr > td > table:nth-child(11) > tbody > tr:nth-child(29) > td:nth-child(2) > table > tbody").text();
+        primaryOutcome.put("firstOutcome",firstOutcome);
+        primaryOutcome.put("secondOutcome",secondOutcome);
+
+        resultData.put("disease",disease);
+        resultData.put("primaryOutcome",primaryOutcome);
+        resultData.put("intervention",intervention);
+        resultData.put("country",country);
+        resultData.put("enrollment",enrollment);
+        resultData.put("exclusionCriteria",exclusionCriteria);
+        resultData.put("inclusionCriteria",inclusionCriteria);
+        resultData.put("studyDesign",studyDesign);
+        resultData.put("sponsor",sponsor);
+        resultData.put("title",title);
+        resultData.put("registNum",registNum);
+        resultData.put("registTime",registTime);
+        resultData.put("studyType",studyType);
+        resultData.put("phase",phase);
+        resultData.put("registStatus","");
+        resultData.put("registTitle","");
+        resultData.put("fullTitle","");
+        resultData.put("sponsorPart","");
+        resultData.put("studyObjective","");
+        resultData.put("studyStartDate","");
+        resultData.put("currentStatus","");
+        resultData.put("tagTime","");
+        resultData.put("crawlTime",getCurrentTime());
+        resultData.put("crawlUrl",url);
+        resultData.put("postTime",registTime);
+        resultData.put("content","content");
+        resultData.put("forwardcontent","forwardcontent");
+
+        System.out.println(resultData);
+    }
+    public static String getCurrentTime() {
+        // 创建 DateTimeFormatter，指定输出格式
+        DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
+        // 获取当前时间
+        LocalDateTime now = LocalDateTime.now();
+        // 格式化
+        return now.format(formatter);
+    }
+    public static String extractAndConvertDate(String input) {
+        // 定义正则表达式提取 dd/MM/yyyy 格式的日期
+        Pattern pattern = Pattern.compile("\\[(?:Registered on|注册于):\\s*(\\d{2}/\\d{2}/\\d{4})\\]");
+        Matcher matcher = pattern.matcher(input);
+
+        if (matcher.find()) {
+            String dateStr = matcher.group(1); // 提取的日期字符串
+            try {
+                // 解析成 Date 对象
+                SimpleDateFormat inputFormat = new SimpleDateFormat("dd/MM/yyyy");
+                Date date = inputFormat.parse(dateStr);
+
+                // 格式化为 yyyy:MM:dd 00:00:00
+                SimpleDateFormat outputFormat = new SimpleDateFormat("yyyy-MM-dd '00:00:00'");
+                return outputFormat.format(date);
+
+            } catch (ParseException e) {
+                e.printStackTrace();
+            }
+        }
+
+        return null; // 如果未匹配或转换失败
+    }
+}
diff --git a/src/main/java/com/example/Inka.java b/src/main/java/com/example/Inka.java
new file mode 100644
index 0000000..c60f147
--- /dev/null
+++ b/src/main/java/com/example/Inka.java
@@ -0,0 +1,113 @@
+package com.example;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import okhttp3.*;
+import org.apache.kafka.clients.producer.KafkaProducer;
+import org.apache.kafka.clients.producer.ProducerConfig;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.time.LocalDate;
+import java.time.format.DateTimeFormatter;
+import java.util.*;
+import java.util.concurrent.TimeUnit;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class Inka {
+//    private static final String TOPIC_NAME = "patentTopic";
+//    private static final String BOOTSTRAP_SERVERS = "localhost:9092";
+//    private static KafkaProducer<String, String> producer;
+//    private static ObjectMapper objectMapper = new ObjectMapper();
+//    private static final Random random = new Random();
+    private static List<String> proxyList = new ArrayList<>();  // 代理池
+    private static int currentProxyIndex = 0;  // 当前使用的代理索引
+//    static {
+//        Properties props = new Properties();
+//        props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, BOOTSTRAP_SERVERS);
+//        props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer");
+//        props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer");
+//        props.put(ProducerConfig.ACKS_CONFIG, "all"); // 等待所有副本确认
+//        props.put(ProducerConfig.RETRIES_CONFIG, 3); // 重试次数
+//        producer = new KafkaProducer<>(props);
+//        try {
+//            proxyList = Files.readAllLines(Paths.get("proxy.txt"));
+//            if (proxyList.isEmpty()) {
+//                System.out.println("警告: proxy.txt 为空，未加载任何代理");
+//            } else {
+//                System.out.println("成功加载 " + proxyList.size() + " 个代理");
+//            }
+//        } catch (IOException e) {
+//            System.err.println("读取 proxy.txt 失败: " + e.getMessage());
+//        }
+//    }
+    public static void main(String[] args) throws IOException, InterruptedException {
+        String load = "javax.faces.partial.ajax=true&javax.faces.source=advancedSearchForm%3AadvancedSearchInput%3Aj_idt1225&javax.faces.partial.execute=advancedSearchForm%3AadvancedSearchInput%3Aj_idt1225+advancedSearchForm&javax.faces.partial.render=advancedSearchForm+results-container+j_idt1272&advancedSearchForm%3AadvancedSearchInput%3Aj_idt1225=advancedSearchForm%3AadvancedSearchInput%3Aj_idt1225&advancedSearchForm=advancedSearchForm&advancedSearchForm%3AadvancedSearchAssistant=on&advancedSearchForm%3AadvancedSearchInput%3Ainput=rance10&javax.faces.ViewState=-3602994148230912322%3A-6313250694718303467";
+
+        OkHttpClient client = createClientWithProxy();
+
+        MediaType mediaType = MediaType.parse("application/x-www-form-urlencoded; charset=UTF-8");
+        RequestBody body = RequestBody.create(mediaType, load);
+
+        // 构建请求
+        Request request = new Request.Builder()
+                .url("https://patentscope.wipo.int/search/zh/result.jsf?_vid=P21-M9APK2-00815") // 更新为 Patentscope 的 URL
+                .method("POST", body)
+                .addHeader("Accept", "application/xml, text/xml, */*; q=0.01")
+                .addHeader("Accept-Language", "zh-CN,zh;q=0.9,th;q=0.8,en;q=0.7")
+                .addHeader("Cache-Control", "no-cache")
+                .addHeader("Connection", "keep-alive")
+                .addHeader("Content-Type", "application/x-www-form-urlencoded; charset=UTF-8")
+//                .addHeader("Cookie", "JSESSIONID=F253B7B0920FFACB89354339F51E325C.wapp2nB; ABIW=balancer.cms41; _ga=GA1.1.33840258.1744249893; Hm_lvt_95e64d347633bfd0a2462e25c93606d6=1744249893; Hm_lpvt_95e64d347633bfd0a2462e25c93606d6=1744249893; HMACCOUNT=0388A9D4AC1C33F5; _pk_id.14.ec75=5aa7b2d46edf6083.1744249894.; cebs=1; _ce.clock_data=-923%2C212.87.194.3%2C1%2C33d0f257a817d1ca4c4381b87f8ad83f%2CChrome%2CJP; cebsp_=1; _pk_uid=0%3DNWFhN2IyZDQ2ZWRmNjA4Mw%3D%3D; _gcl_au=1.1.1245117354.1744249928; wipo-visitor-uunid=28f5a645185bc7b; _pk_ref.9.ec75=%5B%22%22%2C%22%22%2C1744249929%2C%22https%3A%2F%2Fwww.wipo.int%2F%22%5D; _pk_id.9.ec75=957af9d7ac871adb.1744249929.; _ga_15TSHJ0HWP=GS1.1.1744249893.1.1.1744250058.58.0.0; _ce.s=v~274adfa655dbaad3ae6a47724ee5bf89d205d10f~lcw~1744250058720~vir~new~lva~1744249893962~vpv~0~v11.cs~411929~v11.s~559ada70-15ae-11f0-a979-459b55a048ba~v11.sla~1744250058728~gtrk.la~m9apg5tj~v11.send~1744250058720~lcw~1744250058728; _pk_id.5.ec75=ab8529a634a38653.1744250080.; wipo_language=zh; _pk_ses.5.ec75=1")
+                .addHeader("Faces-Request", "partial/ajax")
+                .addHeader("Host", "patentscope.wipo.int")
+                .addHeader("Origin", "https://patentscope.wipo.int")
+                .addHeader("Pragma", "no-cache")
+                .addHeader("Referer", "https://patentscope.wipo.int/search/zh/result.jsf?_vid=P21-M9APK2-00815")
+                .addHeader("Sec-Ch-Ua", "\"Google Chrome\";v=\"135\", \"Not-A.Brand\";v=\"8\", \"Chromium\";v=\"135\"")
+                .addHeader("Sec-Ch-Ua-Mobile", "?0")
+                .addHeader("Sec-Ch-Ua-Platform", "\"Windows\"")
+                .addHeader("Sec-Fetch-Dest", "empty")
+                .addHeader("Sec-Fetch-Mode", "cors")
+                .addHeader("Sec-Fetch-Site", "same-origin")
+                .addHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36")
+                .addHeader("X-Requested-With", "XMLHttpRequest")
+                .build();
+
+        // 执行请求并打印响应
+        try (Response response = client.newCall(request).execute()) {
+            if (response.isSuccessful()) {
+                System.out.println("Response: " + response.body().string()+response.code());
+            } else {
+                System.out.println("Error: " + response.code() + " - " + response.message());
+                System.out.println("Response Body: " + response.body().string());
+            }
+        }
+    }
+
+    private static OkHttpClient createClientWithProxy() {
+        OkHttpClient.Builder builder = new OkHttpClient().newBuilder()
+                .connectTimeout(30, TimeUnit.SECONDS)
+                .readTimeout(30, TimeUnit.SECONDS)
+                .writeTimeout(30, TimeUnit.SECONDS);
+
+        if (!proxyList.isEmpty() && currentProxyIndex < proxyList.size()) {
+            String proxy = proxyList.get(currentProxyIndex);
+            String[] proxyParts = proxy.split(":");
+            if (proxyParts.length == 2) {
+                String proxyHost = proxyParts[0];
+                int proxyPort = Integer.parseInt(proxyParts[1]);
+                builder.proxy(new java.net.Proxy(java.net.Proxy.Type.HTTP,
+                        new java.net.InetSocketAddress(proxyHost, proxyPort)));
+                System.out.println("使用代理: " + proxy);
+            }
+        }
+        return builder.build();
+    }
+}
diff --git a/src/main/java/com/example/NSFAwardCrawler.java b/src/main/java/com/example/NSFAwardCrawler.java
new file mode 100644
index 0000000..bc3100e
--- /dev/null
+++ b/src/main/java/com/example/NSFAwardCrawler.java
@@ -0,0 +1,111 @@
+package com.example;
+
+import org.openqa.selenium.By;
+import org.openqa.selenium.WebDriver;
+import org.openqa.selenium.WebElement;
+import org.openqa.selenium.chrome.ChromeDriver;
+import org.openqa.selenium.chrome.ChromeOptions;
+import org.openqa.selenium.support.ui.ExpectedConditions;
+import org.openqa.selenium.support.ui.WebDriverWait;
+import org.openqa.selenium.NoSuchElementException;
+
+import java.time.Duration;
+import java.util.ArrayList;
+import java.util.List;
+
+public class NSFAwardCrawler {
+    private static final int PAGE_SIZE = 30; // 每页基准条数
+
+    public static void main(String[] args) {
+        // 设置 ChromeDriver 路径
+        System.setProperty("webdriver.chrome.driver",
+                "F:\\tool\\EasySpider_0.6.2_Windows_x64\\EasySpider_windows_x64\\EasySpider\\resources\\app\\chrome_win64\\chromedriver_win64.exe");
+
+        ChromeOptions options = new ChromeOptions();
+        WebDriver driver = new ChromeDriver(options);
+
+        try {
+            String url = "https://www.nsf.gov/awardsearch/simpleSearchResult?queryText=ebola&ActiveAwards=true";
+            driver.get(url);
+
+            WebDriverWait wait = new WebDriverWait(driver, Duration.ofSeconds(10));
+            List<String> allAwardIds = new ArrayList<>();
+            int pageNumber = 1;
+
+            while (true) {
+                System.out.println("Processing page " + pageNumber);
+
+                // 等待页面加载完成
+                wait.until(ExpectedConditions.presenceOfElementLocated(By.className("listview-item")));
+
+                // 获取当前页的结果项
+                List<WebElement> resultItems = driver.findElements(By.className("listview-item"));
+                int currentPageSize = resultItems.size();
+                System.out.println("Found " + currentPageSize + " items on page " + pageNumber);
+
+                // 如果当前页没有结果，退出
+                if (currentPageSize == 0) {
+                    System.out.println("No items found on page " + pageNumber + ", stopping...");
+                    break;
+                }
+
+                // 提取当前页的奖项 ID
+                for (WebElement item : resultItems) {
+                    try {
+                        String awardId = item.getAttribute("id");
+                        if (awardId != null && !awardId.isEmpty() && !allAwardIds.contains(awardId)) {
+                            allAwardIds.add(awardId);
+                        }
+                    } catch (Exception e) {
+                        System.out.println("Error processing item: " + e.getMessage());
+                    }
+                }
+
+                // 判断是否需要分页：如果当前页条数小于 30，认为是最后一页
+                if (currentPageSize < PAGE_SIZE) {
+                    System.out.println("Page " + pageNumber + " has less than " + PAGE_SIZE + " items (" + currentPageSize + "), assuming last page, stopping...");
+                    break;
+                }
+
+                // 检查下一页按钮
+                try {
+                    WebElement nextButton = driver.findElement(By.name("NEXT"));
+                    boolean isEnabled = nextButton.isEnabled();
+                    System.out.println("Next button enabled: " + isEnabled);
+
+                    if (!isEnabled) {
+                        System.out.println("Next button is disabled, stopping...");
+                        break;
+                    }
+
+                    // 点击下一页
+                    nextButton.click();
+                    Thread.sleep(2000); // 等待页面加载
+                    pageNumber++;
+                } catch (NoSuchElementException e) {
+                    System.out.println("Next button not found, stopping...");
+                    break;
+                } catch (Exception e) {
+                    System.out.println("Error clicking next button: " + e.getMessage());
+                    break;
+                }
+            }
+
+            // 打印所有结果
+            System.out.println("Found " + allAwardIds.size() + " award IDs across all pages:");
+            for (int i = 0; i < allAwardIds.size(); i++) {
+                System.out.println((i + 1) + ". " + allAwardIds.get(i));
+            }
+
+        } catch (Exception e) {
+            System.out.println("An error occurred: " + e.getMessage());
+        } finally {
+            try {
+                Thread.sleep(2000);
+            } catch (InterruptedException e) {
+                e.printStackTrace();
+            }
+            driver.quit();
+        }
+    }
+}
\ No newline at end of file
diff --git a/src/main/java/com/example/PatentscopeSeleniumCrawler.java b/src/main/java/com/example/PatentscopeSeleniumCrawler.java
new file mode 100644
index 0000000..4edb842
--- /dev/null
+++ b/src/main/java/com/example/PatentscopeSeleniumCrawler.java
@@ -0,0 +1,130 @@
+package com.example;
+
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+import org.openqa.selenium.By;
+import org.openqa.selenium.Keys;
+import org.openqa.selenium.WebDriver;
+import org.openqa.selenium.WebElement;
+import org.openqa.selenium.chrome.ChromeDriver;
+import org.openqa.selenium.chrome.ChromeOptions;
+import org.openqa.selenium.support.ui.ExpectedConditions;
+import org.openqa.selenium.support.ui.WebDriverWait;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.time.Duration;
+import java.util.Random;
+
+public class PatentscopeSeleniumCrawler {
+    private static final Logger LOGGER = LoggerFactory.getLogger(PatentscopeSeleniumCrawler.class);
+    private static final String SEARCH_URL = "https://patentscope.wipo.int/search/en/search.jsf";
+    private static final String SEARCH_INPUT_ID = "simpleSearchForm:fpSearch:input";
+    private static final String SEARCH_BUTTON_ID = "simpleSearchForm:fpSearch:j_idt1319";
+    private static final Random RANDOM = new Random();
+
+    public static void main(String[] args) {
+        // 配置 ChromeDriver
+        System.setProperty("webdriver.chrome.driver", "F:\\tool\\EasySpider_0.6.2_Windows_x64\\EasySpider_windows_x64\\EasySpider\\resources\\app\\chrome_win64\\chromedriver_win64.exe");
+        ChromeOptions options = new ChromeOptions();
+        options.addArguments("--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36");
+        options.addArguments("--disable-blink-features=AutomationControlled");
+        // 非无头模式，便于调试
+        WebDriver driver = null;
+
+        try {
+            driver = new ChromeDriver(options);
+            WebDriverWait wait = new WebDriverWait(driver, Duration.ofSeconds(15));
+
+            // Step 1: 访问搜索页面
+            LOGGER.info("Navigating to {}", SEARCH_URL);
+            driver.get(SEARCH_URL);
+            Thread.sleep(2000 + RANDOM.nextInt(2000)); // 等待页面加载
+
+            // Step 2: 输入搜索关键词
+            LOGGER.info("Entering search query: FP:(fever)");
+            WebElement searchInput = wait.until(ExpectedConditions.elementToBeClickable(By.id(SEARCH_INPUT_ID)));
+            searchInput.clear();
+            searchInput.sendKeys("FP:(fever)");
+            Thread.sleep(500 + RANDOM.nextInt(1000)); // 等待输入生效
+
+            // Step 3: 触发搜索
+            LOGGER.info("Attempting to trigger search...");
+            try {
+                // 方法 1: 点击搜索按钮
+                WebElement searchButton = wait.until(ExpectedConditions.elementToBeClickable(By.id(SEARCH_BUTTON_ID)));
+                LOGGER.info("Clicking search button");
+                searchButton.click();
+                Thread.sleep(3000 + RANDOM.nextInt(2000)); // 等待 AJAX 和跳转
+            } catch (Exception e) {
+                LOGGER.warn("Button click failed, trying Enter key: {}", e.getMessage());
+                // 方法 2: 模拟回车
+                searchInput.sendKeys(Keys.ENTER);
+                Thread.sleep(3000 + RANDOM.nextInt(2000));
+            }
+
+            // Step 4: 验证跳转
+            String currentUrl = driver.getCurrentUrl();
+            LOGGER.info("Current URL: {}", currentUrl);
+            if (!currentUrl.contains("result.jsf")) {
+                LOGGER.error("Failed to redirect to result.jsf, trying advanced search...");
+                // 尝试高级搜索（备用）
+                driver.get("https://patentscope.wipo.int/search/en/search.jsf?advancedSearch=true");
+                searchInput = wait.until(ExpectedConditions.elementToBeClickable(By.id("advancedSearchForm:advancedSearchInput:input")));
+                searchInput.clear();
+                searchInput.sendKeys("FP:(fever)");
+                WebElement advSearchButton = wait.until(ExpectedConditions.elementToBeClickable(By.id("advancedSearchForm:advancedSearchInput:j_idt1208")));
+                advSearchButton.click();
+                Thread.sleep(3000 + RANDOM.nextInt(2000));
+                currentUrl = driver.getCurrentUrl();
+                LOGGER.info("Advanced search URL: {}", currentUrl);
+            }
+
+            // Step 5: 解析结果页面
+            if (currentUrl.contains("result.jsf")) {
+                LOGGER.info("Successfully reached result page");
+                while (true) {
+                    Document doc = Jsoup.parse(driver.getPageSource());
+                    Elements results = doc.select("div.result-row"); // 需确认选择器
+                    if (results.isEmpty()) {
+                        LOGGER.warn("No results found, verify selector or query");
+                    }
+
+                    for (Element item : results) {
+                        String title = item.select("a.result-title__text").text(); // 需确认
+                        String patentId = item.select("div.result__number").text(); // 需确认
+                        LOGGER.info("Title: {}", title.isEmpty() ? "N/A" : title);
+                        LOGGER.info("Patent ID: {}", patentId.isEmpty() ? "N/A" : patentId);
+                    }
+
+                    // 分页
+                    WebElement nextPage = driver.findElements(By.cssSelector("a.paginator__button--next:not(.is-disabled)"))
+                            .stream()
+                            .filter(WebElement::isDisplayed)
+                            .findFirst()
+                            .orElse(null);
+                    if (nextPage == null) {
+                        LOGGER.info("No more pages");
+                        break;
+                    }
+
+                    LOGGER.info("Navigating to next page");
+                    nextPage.click();
+                    Thread.sleep(3000 + RANDOM.nextInt(2000));
+                }
+            } else {
+                LOGGER.error("Still not on result page, check query or network");
+            }
+
+        } catch (Exception e) {
+            LOGGER.error("Error during crawling: {}", e.getMessage(), e);
+        } finally {
+            if (driver != null) {
+                driver.quit();
+                LOGGER.info("WebDriver closed");
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/src/main/java/com/example/ProxyIPChecker.java b/src/main/java/com/example/ProxyIPChecker.java
new file mode 100644
index 0000000..8d027f2
--- /dev/null
+++ b/src/main/java/com/example/ProxyIPChecker.java
@@ -0,0 +1,25 @@
+package com.example;
+import java.io.BufferedReader;
+import java.io.InputStreamReader;
+import java.net.HttpURLConnection;
+import java.net.URL;
+
+public class ProxyIPChecker {
+    public static void main(String[] args) throws Exception {
+        URL url = new URL("http://httpbin.org/ip");
+        HttpURLConnection conn = (HttpURLConnection) url.openConnection();
+        conn.setRequestMethod("GET");
+
+        BufferedReader in = new BufferedReader(new InputStreamReader(conn.getInputStream(), "UTF-8"));
+        String inputLine;
+        StringBuilder response = new StringBuilder();
+
+        while ((inputLine = in.readLine()) != null) {
+            response.append(inputLine);
+        }
+        in.close();
+
+        System.out.println("当前公网 IP 信息：");
+        System.out.println(response.toString());
+    }
+}
diff --git a/src/main/java/com/example/ScraperWithCaptcha.java b/src/main/java/com/example/ScraperWithCaptcha.java
new file mode 100644
index 0000000..f171732
--- /dev/null
+++ b/src/main/java/com/example/ScraperWithCaptcha.java
@@ -0,0 +1,496 @@
+package com.example;// 修改为你的包名
+
+import java.awt.image.BufferedImage;
+import java.io.*;
+import java.net.HttpURLConnection;
+import java.net.URL;
+import java.net.URLEncoder;
+import java.nio.charset.StandardCharsets;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import javax.imageio.ImageIO;
+
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+
+import net.sourceforge.tess4j.Tesseract;
+import net.sourceforge.tess4j.TesseractException;
+
+public class ScraperWithCaptcha {
+
+    // --- 需要根据目标网站修改的常量 ---
+    private static final String BASE_URL = "https://ctri.nic.in/Clinicaltrials/advancesearchmain.php"; // *** 替换为目标网站包含表单和验证码的页面 URL ***
+    private static final String FORM_SUBMIT_URL = BASE_URL; // *** 表单提交的 URL，通常是页面本身或 action 属性指定的 URL ***
+    private static final String CAPTCHA_IMAGE_SRC_SUBSTRING = "captchasecurityimages.php"; // *** 验证码图片 src 中特有的字符串 ***
+    private static final String CAPTCHA_INPUT_SELECTOR = "input[name=T4]";
+    private static final String TARGET_FORM_SELECTOR = "form"; // *** 如果页面有多个表单，指定目标表单的选择器，例如 "#myFormId" ***
+
+    // --- 图像预处理相关的阈值，需要根据验证码样式调试 ---
+    private static final int BINARY_THRESHOLD = 128; // 二值化阈值 (0-255)
+
+    // --- Tesseract 配置 (根据你的安装修改) ---
+    // Tesseract tessdata 文件夹的路径
+    private static final String TESSDATA_PATH = "F:\\tool\\Tesseract-OCR\\tessdata"; // *** 请务必修改为你的实际路径 ***
+
+    // --- 其他通用配置 ---
+    private static final String USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36";
+    private Set<String> cookies = new HashSet<>(); // 存储 cookies
+
+    public static void main(String[] args) {
+        ScraperWithCaptcha scraper = new ScraperWithCaptcha();
+        try {
+            // 1. 获取包含表单和验证码的页面
+            PageInfo pageInfo = scraper.fetchPage(BASE_URL, null, null, false); // 第一次 GET 不需要 Cookies 和 POST Data, 也不是 AJAX
+
+            if (pageInfo.htmlContent == null || pageInfo.statusCode != HttpURLConnection.HTTP_OK) {
+                System.err.println("Failed to fetch the initial page. Status code: " + pageInfo.statusCode);
+                return;
+            }
+
+            // 解析页面提取验证码信息和所有表单字段
+            Document doc = Jsoup.parse(pageInfo.htmlContent, BASE_URL);
+
+            // 提取验证码图片 URL
+            Element captchaImg = doc.selectFirst("img[src*=" + CAPTCHA_IMAGE_SRC_SUBSTRING + "]");
+            String captchaImageUrl = null;
+            if (captchaImg != null) {
+                captchaImageUrl = captchaImg.absUrl("src"); // 获取绝对 URL
+                System.out.println("Found CAPTCHA image URL: " + captchaImageUrl);
+            } else {
+                System.err.println("CAPTCHA image not found using selector: img[src*=" + CAPTCHA_IMAGE_SRC_SUBSTRING + "]");
+                // 如果找不到验证码，可能无法继续
+                return;
+            }
+
+            // 提取验证码输入框的 name
+            Element captchaInput = doc.selectFirst(CAPTCHA_INPUT_SELECTOR);
+            String captchaInputName = null;
+            if (captchaInput != null) {
+                captchaInputName = captchaInput.attr("name");
+                System.out.println("Found CAPTCHA input field name: " + captchaInputName);
+            } else {
+                System.err.println("CAPTCHA input field not found using selector: " + CAPTCHA_INPUT_SELECTOR);
+                // 如果找不到输入框，也无法提交
+                return;
+            }
+
+            // 2. 下载验证码图片
+            BufferedImage originalCaptchaImage = scraper.downloadImage(captchaImageUrl);
+            System.out.println("Captcha image downloaded.");
+
+            // 3. 预处理图片
+            BufferedImage preprocessedImage = scraper.preprocessImage(originalCaptchaImage);
+            System.out.println("Image preprocessed (saved as preprocessed_captcha.png).");
+
+            // 4. 识别验证码
+            String captchaCode = scraper.recognizeCaptcha(preprocessedImage);
+
+            if (captchaCode != null && !captchaCode.isEmpty()) {
+                System.out.println("Recognized CAPTCHA: " + captchaCode);
+
+                // 5. 构建包含验证码的 POST 数据
+                // 从页面表单中提取所有字段，并设置其值
+                Map<String, String> formData = scraper.buildFormDataMap(doc, captchaInputName, captchaCode);
+
+                String postData = scraper.buildPostData(formData);
+                System.out.println("Built POST data: " + postData);
+
+                // 6. 提交表单
+                // 通常是标准的 POST 请求
+                PageInfo postResponseInfo = scraper.fetchPage(FORM_SUBMIT_URL, postData, scraper.getCookieHeader(), false); // 非 AJAX POST
+
+                System.out.println("Form submitted. Response status code: " + postResponseInfo.statusCode);
+                System.out.println("POST Response Body (partial): " + (postResponseInfo.htmlContent != null && postResponseInfo.htmlContent.length() > 500 ? postResponseInfo.htmlContent.substring(0, 500) + "..." : postResponseInfo.htmlContent)); // 打印部分响应查看
+
+                // 7. 检查响应判断是否成功
+                // 对于标准表单提交，成功通常是重定向 (302) 或返回新的页面
+                if (postResponseInfo.statusCode == HttpURLConnection.HTTP_MOVED_TEMP || postResponseInfo.statusCode == HttpURLConnection.HTTP_SEE_OTHER || postResponseInfo.statusCode == HttpURLConnection.HTTP_MOVED_PERM) {
+                    String redirectUrl = postResponseInfo.redirectUrl;
+                    System.out.println("POST resulted in redirect. Location: " + redirectUrl);
+                    // TODO: 如果重定向到成功页面，可以继续爬取该页面
+                    // 如果重定向回原页面或错误页，说明提交失败 (验证码错误或其他原因)
+                    if (redirectUrl != null && redirectUrl.equals(BASE_URL)) { // <-- 检查是否重定向回原页面，需根据实际情况判断
+                        System.err.println("Submission failed, redirected back to the form page.");
+                        // TODO: 实现重试逻辑 (需要重新获取页面和验证码)
+                    }
+
+                } else if (postResponseInfo.statusCode == HttpURLConnection.HTTP_OK) {
+                    System.out.println("POST returned OK (200). Analyzing response content...");
+                    // TODO: 解析 postResponseInfo.htmlContent 来判断是否成功（例如查找成功标志，或检查是否有验证码错误提示）
+                    if (postResponseInfo.htmlContent != null && postResponseInfo.htmlContent.contains("成功标志字符串")) { // <-- *** 根据实际成功响应的特征修改 ***
+                        System.out.println("Form submission appears successful based on content.");
+                        // TODO: 从 postResponseInfo.htmlContent 中提取你想要的数据
+                    } else if (postResponseInfo.htmlContent != null && postResponseInfo.htmlContent.contains("验证码错误提示字符串")) { // <-- *** 根据实际验证码错误提示修改 ***
+                        System.err.println("CAPTCHA appears incorrect. Need to retry.");
+                        // TODO: 实现重试逻辑 (可能需要重新获取页面，因为验证码会刷新)
+                    } else {
+                        System.out.println("POST returned 200, but content not clearly indicating success or failure.");
+                        // 需要更详细地检查响应内容
+                    }
+                }
+                else {
+                    System.err.println("POST request failed with status code: " + postResponseInfo.statusCode);
+                }
+
+
+            } else {
+                System.err.println("CAPTCHA recognition failed. Cannot submit form.");
+                // TODO: 实现识别失败的重试逻辑
+            }
+
+
+        } catch (IOException e) {
+            e.printStackTrace();
+            System.err.println("An I/O error occurred: " + e.getMessage());
+        } catch (TesseractException e) {
+            e.printStackTrace();
+            System.err.println("A Tesseract OCR error occurred: " + e.getMessage());
+        } catch (Exception e) {
+            e.printStackTrace();
+            System.err.println("An unexpected error occurred: " + e.getMessage());
+        }
+    }
+
+    /**
+     * 发起 HTTP 请求 (GET 或 POST)，获取页面内容和 Cookies。
+     *
+     * @param urlString 请求 URL
+     * @param postData POST 请求体数据 (GET 请求时为 null)
+     * @param cookieHeader 请求头中的 Cookie 值 (第一次请求时为 null)
+     * @param isAjaxPost 是否是 AJAX POST 请求 (影响请求头设置)
+     * @return PageInfo 对象，包含响应信息和内容
+     * @throws IOException
+     */
+    private PageInfo fetchPage(String urlString, String postData, String cookieHeader, boolean isAjaxPost) throws IOException {
+        URL url = new URL(urlString);
+        HttpURLConnection conn = (HttpURLConnection) url.openConnection();
+
+        if (postData != null) {
+            conn.setRequestMethod("POST");
+            conn.setDoOutput(true); // 允许写入 POST 数据
+        } else {
+            conn.setRequestMethod("GET");
+        }
+
+        conn.setInstanceFollowRedirects(false);
+        conn.setConnectTimeout(10000);
+        conn.setReadTimeout(20000);
+
+        // 设置请求头 (不包括 Cookie，Cookie 在后面统一处理)
+        conn.setRequestProperty("User-Agent", USER_AGENT);
+        if (cookieHeader != null) {
+            conn.setRequestProperty("Cookie", cookieHeader);
+        }
+        conn.setRequestProperty("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8");
+        if (postData != null) {
+            conn.setRequestProperty("Content-Type", "application/x-www-form-urlencoded; charset=UTF-8");
+            if(isAjaxPost) {
+                conn.setRequestProperty("X-Requested-With", "XMLHttpRequest");
+                conn.setRequestProperty("X-MicrosoftAjax", "Delta=true");
+            }
+            try {
+                conn.setRequestProperty("Referer", new URL(urlString).getProtocol() + "://" + new URL(urlString).getHost() + new URL(urlString).getPath());
+                conn.setRequestProperty("Origin", new URL(urlString).getProtocol() + "://" + new URL(urlString).getHost());
+            } catch (Exception e) { }
+        }
+
+        // --- 写入 POST 数据 (如果是 POST 请求) ---
+        // 这一块必须在读取响应之前
+        if (postData != null) {
+            try (OutputStream os = conn.getOutputStream()) { // 获取输出流，会触发连接
+                byte[] input = postData.getBytes(StandardCharsets.UTF_8);
+                os.write(input, 0, input.length);
+            } // os.close() 在 try-with-resources 结束时自动调用，数据在这里被发送
+        }
+        // --- End POST Data ---
+
+
+        // --- 现在可以获取响应信息了 ---
+        // 调用 getResponseCode() 会发送完整的请求 (包括头和体) 并接收响应头
+        int statusCode = conn.getResponseCode();
+        String redirectUrl = null;
+        if (statusCode == HttpURLConnection.HTTP_MOVED_TEMP || statusCode == HttpURLConnection.HTTP_SEE_OTHER || statusCode == HttpURLConnection.HTTP_MOVED_PERM) {
+            redirectUrl = conn.getHeaderField("Location");
+        }
+
+        // --- 处理 Cookies (从响应头读取) ---
+        // 这一块现在在获取响应码之后执行
+        Map<String, List<String>> headerFields = conn.getHeaderFields();
+        List<String> cookiesHeader = headerFields.get("Set-Cookie");
+        if (cookiesHeader != null) {
+            for (String cookie : cookiesHeader) {
+                String cookieValue = cookie.split(";")[0];
+                this.cookies.add(cookieValue);
+            }
+        }
+        // --- End Cookies ---
+
+
+        StringBuilder content = new StringBuilder();
+        // 只有当状态码表示成功 (2xx) 或客户端错误 (4xx) 且有响应体时才读取
+        if (statusCode >= 200 && statusCode < 300 || statusCode >= 400 && statusCode < 500 && conn.getContentLength() > 0) {
+            try (InputStream is = (statusCode >= 200 && statusCode < 300) ? conn.getInputStream() : conn.getErrorStream();
+                 BufferedReader reader = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8))) {
+                String line;
+                while ((line = reader.readLine()) != null) {
+                    content.append(line).append("\n");
+                }
+            } catch (IOException e) {
+                System.err.println("Error reading response body for status " + statusCode + ": " + e.getMessage());
+            }
+        }
+
+        conn.disconnect();
+
+        PageInfo pageInfo = new PageInfo();
+        pageInfo.statusCode = statusCode;
+        pageInfo.redirectUrl = redirectUrl;
+        pageInfo.htmlContent = content.toString();
+
+        return pageInfo;
+    }
+
+    /**
+     * 从页面表单中提取所有字段，并设置验证码字段的值
+     * @param doc Jsoup 解析后的 Document 对象
+     * @param captchaInputName 验证码输入框的 name 属性值
+     * @param captchaCode 识别出的验证码字符串
+     * @return 包含所有表单字段名称和值的 Map
+     */
+    private Map<String, String> buildFormDataMap(Document doc, String captchaInputName, String captchaCode) {
+        Map<String, String> formData = new HashMap<>();
+        Element form = doc.selectFirst(TARGET_FORM_SELECTOR); // 找到目标表单
+
+        if (form == null) {
+            System.err.println("Target form not found using selector: " + TARGET_FORM_SELECTOR);
+            return formData; // 返回空 Map
+        }
+
+        Elements formElements = form.select("input, select, textarea"); // 查找表单内的所有输入元素
+
+        for (Element element : formElements) {
+            String name = element.attr("name");
+            String type = element.attr("type"); // 获取 input 的类型
+            String value = element.attr("value"); // 获取默认 value
+
+            if (name == null || name.isEmpty()) {
+                continue; // 忽略没有 name 属性的元素
+            }
+
+            // 处理不同类型的输入元素
+            if ("text".equals(type) || "hidden".equals(type) || "password".equals(type)) {
+                if (name.equals(captchaInputName)) {
+                    // 这是验证码输入框，填入识别结果
+                    formData.put(name, captchaCode);
+                } else {
+                    // 其他文本/隐藏字段，使用默认值或留空，取决于需求
+                    formData.put(name, value != null ? value : ""); // 通常爬取时这些是空的
+                }
+            } else if ("checkbox".equals(type)) {
+                // 复选框，如果被勾选则添加到 formData
+                if (element.hasAttr("checked")) {
+                    formData.put(name, value != null ? value : "on"); // 复选框的值通常是 "on" 或 value 属性的值
+                }
+            } else if ("radio".equals(type)) {
+                // 单选按钮，如果被选中则添加到 formData
+                if (element.hasAttr("checked")) {
+                    formData.put(name, value != null ? value : "on"); // 单选按钮的值通常是 value 属性的值
+                }
+            } else if ("select".equals(element.tagName().toLowerCase())) {
+                // 下拉列表，找到被选中的 option 的值
+                Element selectedOption = element.selectFirst("option[selected]");
+                if (selectedOption != null) {
+                    formData.put(name, selectedOption.attr("value"));
+                } else {
+                    // 如果没有选中的项，可能需要根据网站逻辑选择第一个或默认项
+                    // 或者如果网站要求必须有值，这里需要更复杂的处理
+                    Element firstOption = element.selectFirst("option");
+                    if (firstOption != null) {
+                        formData.put(name, firstOption.attr("value"));
+                    } else {
+                        formData.put(name, ""); // 没有选项，留空
+                    }
+                }
+            } else if ("textarea".equals(element.tagName().toLowerCase())) {
+                // 文本域，获取其文本内容
+                formData.put(name, element.text());
+            }
+            // TODO: 根据需要处理其他类型的 input，如 file, submit, image, reset 等
+            // 注意：submit, image 类型的 input 通常只有在它们被点击时才会被包含在表单提交数据中，并且它们的值是按钮的值
+        }
+
+        // TODO: 如果网站通过 JavaScript 动态添加或修改了表单字段，你需要找到这些字段并手动添加到 formData 中。
+        // TODO: 有些表单提交按钮本身会作为 POST 数据的一部分被发送（例如 name="submitButton" value="提交"）
+        // 你可能需要确定哪个按钮触发了提交，并将它的 name=value 对添加到 formData 中。
+
+        return formData;
+    }
+
+
+    /**
+     * 下载验证码图片 (Java 8 兼容版本)
+     * @param imageUrl 图片的完整 URL
+     * @return 图片的 BufferedImage 对象
+     * @throws IOException 如果下载失败
+     */
+    public BufferedImage downloadImage(String imageUrl) throws IOException {
+        URL url = new URL(imageUrl);
+        HttpURLConnection conn = (HttpURLConnection) url.openConnection();
+        conn.setRequestMethod("GET");
+        conn.setRequestProperty("User-Agent", USER_AGENT);
+        // 下载图片时通常也需要带上 cookies，确保会话一致性
+        conn.setRequestProperty("Cookie", getCookieHeader());
+
+
+        int responseCode = conn.getResponseCode();
+        if (responseCode == HttpURLConnection.HTTP_OK) {
+            try (InputStream is = conn.getInputStream()) {
+                // --- 兼容 Java 8 及更早版本读取 InputStream ---
+                ByteArrayOutputStream baos = new ByteArrayOutputStream();
+                byte[] buffer = new byte[4096]; // 缓冲区大小
+                int bytesRead;
+                while ((bytesRead = is.read(buffer)) != -1) {
+                    baos.write(buffer, 0, bytesRead);
+                }
+                ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
+                // --- End 兼容代码 ---
+
+                BufferedImage image = ImageIO.read(bais);
+                if (image == null) {
+                    throw new IOException("Failed to read image stream. Check image format or content for URL: " + imageUrl);
+                }
+                return image;
+            }
+        } else {
+            throw new IOException("Failed to download image. HTTP error code: " + responseCode + " for URL: " + imageUrl);
+        }
+    }
+
+    /**
+     * 对验证码图片进行预处理 (基础示例：转灰度+二值化)
+     * 这是最关键的部分，需要根据验证码样式调整
+     * @param originalImage 原始图片
+     * @return 预处理后的图片
+     */
+    public BufferedImage preprocessImage(BufferedImage originalImage) {
+        // TODO: 这是图像预处理的重点，需要根据实际验证码样式进行调整和优化
+        // 保存原始图片方便对比
+        try {
+            File originalFile = new File("original_captcha.png");
+            ImageIO.write(originalImage, "png", originalFile);
+        } catch (IOException e) {
+            e.printStackTrace();
+        }
+
+        // 基础处理：转灰度 -> 二值化
+        int width = originalImage.getWidth();
+        int height = originalImage.getHeight();
+        BufferedImage grayImage = new BufferedImage(width, height, BufferedImage.TYPE_BYTE_GRAY);
+        grayImage.getGraphics().drawImage(originalImage, 0, 0, null);
+
+        BufferedImage binaryImage = new BufferedImage(width, height, BufferedImage.TYPE_BYTE_BINARY);
+        // 二值化阈值，需要调整 (0-255)
+        for (int y = 0; y < height; y++) {
+            for (int x = 0; x < width; x++) {
+                int gray = grayImage.getRaster().getSample(x, y, 0);
+                if (gray < BINARY_THRESHOLD) {
+                    binaryImage.getRaster().setSample(x, y, 0, 0); // 黑色
+                } else {
+                    binaryImage.getRaster().setSample(x, y, 0, 1); // 白色
+                }
+            }
+        }
+
+        // TODO: 更高级的预处理包括：去噪点、去干扰线、字符分割、倾斜校正等
+        // 如果验证码只有数字，可以尝试裁剪掉图片上下左右的空白或干扰区域
+
+        // 为了调试，将预处理后的图片保存下来查看效果
+        try {
+            File outputfile = new File("preprocessed_captcha.png");
+            ImageIO.write(binaryImage, "png", outputfile);
+            System.out.println("Preprocessed image saved to " + outputfile.getAbsolutePath());
+        } catch (IOException e) {
+            e.printStackTrace();
+        }
+
+        return binaryImage; // 返回预处理后的图片
+    }
+
+    /**
+     * 使用 Tess4J 识别图片中的文字
+     * @param image 待识别的图片 (最好是预处理后的)
+     * @return 识别出的字符串 (如果失败返回 null 或空字符串)
+     */
+    public String recognizeCaptcha(BufferedImage image) throws TesseractException {
+        Tesseract tesseract = new Tesseract();
+
+        // 设置 tessdata 路径 (如果 TESSDATA_PATH 已正确设置且 Tesseract 安装正确，这行可能不是必需的，Tess4J 会自动查找)
+        if (TESSDATA_PATH != null && !TESSDATA_PATH.isEmpty()) {
+            tesseract.setDatapath(TESSDATA_PATH);
+        } else {
+            System.err.println("WARNING: TESSDATA_PATH not set. Tess4J will try to find tessdata automatically.");
+        }
+
+        tesseract.setLanguage("eng"); // 设置识别语言为英文 (通常包含数字)
+        // 如果验证码只有数字，可以尝试设置仅识别数字，这有助于提高准确率
+        // tesseract.setTessVariable("tessedit_char_whitelist", "0123456789"); // 方法名请查阅 Tess4J 文档确认
+
+        String result = tesseract.doOCR(image);
+        // 清理识别结果，去除空格或换行符等
+        result = result != null ? result.trim().replaceAll("[^0-9a-zA-Z]", "") : ""; // 根据验证码内容（数字、字母）调整清理规则
+
+        return result;
+    }
+
+    /**
+     * 构建用于 POST 提交的表单数据字符串
+     * @param formDataMap 包含所有表单字段名称和值的 Map
+     * @return URL 编码后的表单数据字符串
+     * @throws IOException
+     */
+    private String buildPostData(Map<String, String> formDataMap) throws IOException {
+        StringBuilder postDataBuilder = new StringBuilder();
+        boolean first = true;
+        // 遍历 Map 构建 POST 数据。如果需要特定顺序，使用 LinkedHashMap
+        for (Map.Entry<String, String> entry : formDataMap.entrySet()) {
+            if (!first) {
+                postDataBuilder.append("&");
+            }
+            postDataBuilder.append(URLEncoder.encode(entry.getKey(), StandardCharsets.UTF_8.name()))
+                    .append("=")
+                    .append(URLEncoder.encode(entry.getValue() != null ? entry.getValue() : "", StandardCharsets.UTF_8.name()));
+            first = false;
+        }
+        return postDataBuilder.toString();
+    }
+
+    /**
+     * 将存储的 cookies 格式化为 HTTP 请求头部的 Cookie 字符串
+     */
+    private String getCookieHeader() {
+        StringBuilder cookieHeaderBuilder = new StringBuilder();
+        boolean first = true;
+        for (String cookie : this.cookies) {
+            if (!first) {
+                cookieHeaderBuilder.append("; ");
+            }
+            cookieHeaderBuilder.append(cookie);
+            first = false;
+        }
+        return cookieHeaderBuilder.toString();
+    }
+
+
+    // Helper class to hold information extracted from a page fetch
+    private static class PageInfo {
+        int statusCode;
+        String redirectUrl; // 如果发生重定向
+        String htmlContent; // 页面响应内容
+        // 这里不再包含 ASP.NET 特有的字段，因为它是通用的
+    }
+}
\ No newline at end of file
diff --git a/src/main/java/com/example/StringFieldExtractor.java b/src/main/java/com/example/StringFieldExtractor.java
new file mode 100644
index 0000000..9a36144
--- /dev/null
+++ b/src/main/java/com/example/StringFieldExtractor.java
@@ -0,0 +1,74 @@
+package com.example;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class StringFieldExtractor {
+    public static void main(String[] args) {
+        // 输入字符串
+        String input = "postTime:05-06-2024 00:00:00,title:PT/013/2024,content:澳門大學－N21科研大樓六樓智慧城市物聯網國家重點實驗室（澳門大學）建造工程 OBRAS DE CONSTRUÇÃO DO LABORATÓRIO DE REFERÊNCIA DO ESTADO DE INTERNET DAS COISAS PARA A CIDADE INTELIGENTE (UNIVERSIDADE DE MACAU), LOCALIZADO NO 6.º ANDAR DO EDIFÍCIO DE INVESTIGAÇÃO CIENTÍFICA N21 DA UNIVERSIDADE DE MACAU,fileList:[https://pct.admo.um.edu.mo/wp-content/uploads/2024/06/招標文件電子檔cover-CHI.pdf###pdf, https://pct.admo.um.edu.mo/wp-content/uploads/2024/06/招標文件電子檔cover-ENG-1.pdf###pdf, https://pct.admo.um.edu.mo/wp-content/uploads/2024/07/開標結果.pdf###pdf, https://pct.admo.um.edu.mo/wp-content/uploads/2024/11/判給結果-N21-6G.pdf###pdf]";
+
+        try {
+            // 存储提取结果
+            String postTime = null;
+            String title = null;
+            String content = null;
+            List<String> fileList = new ArrayList<>();
+
+            // Step 1: 分割 fileList（因为它包含方括号，可能干扰其他字段）
+            String fileListStr = null;
+            int fileListStart = input.indexOf("fileList:[");
+            if (fileListStart != -1) {
+                int fileListEnd = input.lastIndexOf("]");
+                if (fileListEnd != -1 && fileListEnd > fileListStart) {
+                    fileListStr = input.substring(fileListStart + 9, fileListEnd + 1); // 提取 [..]
+                    input = input.substring(0, fileListStart - 1); // 移除 fileList 部分
+                }
+            }
+
+            // Step 2: 解析其他字段（postTime, title, content）
+            String[] fields = input.split(",(?=\\w+:)", 3); // 按逗号分割，仅在键名前
+            for (String field : fields) {
+                String[] keyValue = field.split(":", 2); // 分割键值对
+                if (keyValue.length == 2) {
+                    String key = keyValue[0].trim();
+                    String value = keyValue[1].trim();
+                    switch (key) {
+                        case "postTime":
+                            postTime = value;
+                            break;
+                        case "title":
+                            title = value;
+                            break;
+                        case "content":
+                            content = value;
+                            break;
+                    }
+                }
+            }
+
+            // Step 3: 解析 fileList
+            if (fileListStr != null && fileListStr.startsWith("[") && fileListStr.endsWith("]")) {
+                String listContent = fileListStr.substring(1, fileListStr.length() - 1).trim();
+                if (!listContent.isEmpty()) {
+                    // 分割列表元素，注意 URL 内的逗号
+                    String[] urls = listContent.split(",\\s*(?=https)");
+                    for (String url : urls) {
+                        fileList.add(url.trim());
+                    }
+                }
+            }
+
+            // 输出结果
+            System.out.println("postTime: " + postTime);
+            System.out.println("title: " + title);
+            System.out.println("content: " + content);
+            System.out.println("fileList: " + fileList);
+
+        } catch (Exception e) {
+            System.err.println("Parsing error: " + e.getMessage());
+            e.printStackTrace();
+        }
+    }
+}
diff --git a/src/main/java/com/example/WipoPatentsSelenium.java b/src/main/java/com/example/WipoPatentsSelenium.java
new file mode 100644
index 0000000..5f933a3
--- /dev/null
+++ b/src/main/java/com/example/WipoPatentsSelenium.java
@@ -0,0 +1,60 @@
+package com.example;
+
+import io.github.bonigarcia.wdm.WebDriverManager;
+import org.openqa.selenium.By;
+import org.openqa.selenium.WebDriver;
+import org.openqa.selenium.WebElement;
+import org.openqa.selenium.chrome.ChromeDriver;
+
+import java.util.List;
+
+public class WipoPatentsSelenium {
+
+    public static void main(String[] args) throws InterruptedException {
+        // 自动管理驱动
+        WebDriverManager.chromedriver().setup();
+        WebDriver driver = new ChromeDriver();
+
+        try {
+            driver.get("https://patentscope.wipo.int/search/en/result.jsf?query=FP:(AI)");
+
+            // 等待页面加载（粗略等待）
+            Thread.sleep(3000);
+
+            int maxPages = 3;
+            int currentPage = 1;
+
+            while (currentPage <= maxPages) {
+                System.out.println("📄 当前第 " + currentPage + " 页：");
+
+                // 找到所有结果项
+                List<WebElement> results = driver.findElements(By.cssSelector(".resultitem"));
+
+                for (WebElement result : results) {
+                    String title = result.findElement(By.cssSelector(".resulttitle")).getText();
+                    String pubNum = result.findElement(By.cssSelector(".pubNumber")).getText();
+                    System.out.println("🔹 " + pubNum + " - " + title);
+                }
+
+                // 查找“下一页”按钮，点击
+                WebElement nextButton = null;
+                try {
+                    nextButton = driver.findElement(By.cssSelector("a[title='Next']"));
+                } catch (Exception e) {
+                    System.out.println("✅ 已到最后一页或按钮未找到");
+                    break;
+                }
+
+                if (nextButton != null && nextButton.isDisplayed()) {
+                    nextButton.click();
+                    currentPage++;
+                    Thread.sleep(3000); // 等待下一页加载
+                } else {
+                    break;
+                }
+            }
+        } finally {
+            driver.quit();
+        }
+    }
+}
diff --git a/src/main/java/com/example/cliniTopic.java b/src/main/java/com/example/cliniTopic.java
new file mode 100644
index 0000000..3142e1f
--- /dev/null
+++ b/src/main/java/com/example/cliniTopic.java
@@ -0,0 +1,594 @@
+package com.example;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import okhttp3.*;
+import org.apache.kafka.clients.producer.KafkaProducer;
+import org.apache.kafka.clients.producer.ProducerConfig;
+import org.apache.kafka.clients.producer.ProducerRecord;
+import org.apache.kafka.common.serialization.StringSerializer;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.OutputStream;
+import java.net.HttpURLConnection;
+import java.net.URL;
+import java.net.URLEncoder;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.time.LocalDate;
+import java.time.LocalDateTime;
+import java.time.format.DateTimeFormatter;
+import java.util.*;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.TimeUnit;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class cliniTopic {
+    private static final String TOPIC_NAME = "cliniTopic";
+    private static final String BOOTSTRAP_SERVERS = "localhost:9092";
+    private static KafkaProducer<String, String> producer;
+    private static ObjectMapper objectMapper = new ObjectMapper();
+    private static final Random random = new Random();
+    private static List<String> proxyList = new ArrayList<>();  // 代理池
+    private static int currentProxyIndex = 0;  // 当前使用的代理索引
+    static {
+        Properties props = new Properties();
+        props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, BOOTSTRAP_SERVERS);
+        props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer");
+        props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer");
+        props.put(ProducerConfig.ACKS_CONFIG, "all"); // 等待所有副本确认
+        props.put(ProducerConfig.RETRIES_CONFIG, 3); // 重试次数
+        producer = new KafkaProducer<>(props);
+        try {
+            proxyList = Files.readAllLines(Paths.get("proxy.txt"));
+            if (proxyList.isEmpty()) {
+                System.out.println("警告: proxy.txt 为空，未加载任何代理");
+            } else {
+                System.out.println("成功加载 " + proxyList.size() + " 个代理");
+            }
+        } catch (IOException e) {
+            System.err.println("读取 proxy.txt 失败: " + e.getMessage());
+        }
+    }
+
+    public static void main(String[] args) throws IOException, InterruptedException {
+        List<String> keywords = Files.readAllLines(Paths.get("keywords.txt"));
+        List<String> cleanedKeywords = new ArrayList<>();
+        for (String keyword : keywords) {
+            String cleaned = keyword.split(",")[0].trim(); // 取逗号前的部分并去除首尾空格
+            cleanedKeywords.add(cleaned);
+        }
+        ExecutorService executor = Executors.newFixedThreadPool(4); // 4 个线程
+        for (String keyword : cleanedKeywords) {
+            executor.submit(() -> {
+                try {
+                    int sleepTime = random.nextInt(1001) + 30000;
+                    for (Integer i=1;i<=7;i++){
+                        final Integer pageNum = i;
+                        Map list = list(keyword,i);
+                        List<String> urls = (List<String>) list.get("listUrl");
+                        if (urls.isEmpty()){
+                            System.out.println("没有关键词"+keyword+"检索结果");
+                            break;
+                        }
+                        Integer count = Integer.parseInt(String.valueOf(list.get("count")));
+                        Integer totalPage = Integer.parseInt(String.valueOf(list.get("totalPage")));
+                        for(String url:urls){
+                            Map<String,Object> result = content(url);
+                            Thread.sleep(sleepTime);
+                            String registNum = String.valueOf(result.get("registNum"));
+                            String crawlUrl = String.valueOf(result.get("crawlUrl"));
+
+                            try {
+                                String jsonValue = objectMapper.writeValueAsString(result);
+                                ProducerRecord<String, String> record = new ProducerRecord<>(TOPIC_NAME, registNum, jsonValue);
+
+                                producer.send(record, (metadata, exception) -> {
+                                    if (exception == null) {
+                                        System.out.println("成功发送到Kafka - Partition: " + metadata.partition() +
+                                                ", Offset: " + metadata.offset() + ", "+crawlUrl + ", "+ keyword + " , " + pageNum );
+                                    } else {
+                                        System.err.println("发送到Kafka失败: " + exception.getMessage());
+                                    }
+                                });
+                            } catch (Exception e) {
+                                System.err.println("序列化或发送Kafka消息失败: " + e.getMessage());
+                            }
+                            Thread.sleep(sleepTime);
+
+                            }
+                        if(count<10||totalPage==i){
+                            System.out.println("关键词"+keyword+"已检索完毕");
+                            break;
+                        }
+
+                    }
+                } catch (Exception e) {
+                    System.err.println("处理 " + keyword + " 失败: " + e.getMessage());
+                    e.printStackTrace();
+                }
+            });
+        }
+        executor.shutdown();
+        executor.awaitTermination(5, TimeUnit.HOURS);
+        producer.close();
+    }
+
+    private static  Map<String,Object> list(String keyword,Integer page) throws Exception{
+        Map<String,Object> map = new HashMap<>();
+        String baseUrl = "https://www.drks.de/search/de";
+        String hostUrl = "https://www.drks.de";
+        String cleanUrl = "https://www.drks.de/search/de/results";
+        System.out.println("Pure URL: " + cleanUrl);
+
+        System.out.println("Page Number: " + page);
+
+        // 存储 cookies
+        Set<String> cookieSet = new HashSet<>();
+        String sessionId = null;
+
+        // 第一步：初始 GET 请求，获取 cookies 和 ViewState
+        URL initialUrl = new URL(baseUrl);
+        HttpURLConnection initialConn = (HttpURLConnection) initialUrl.openConnection();
+        initialConn.setRequestMethod("GET");
+        initialConn.setInstanceFollowRedirects(false);
+        initialConn.setConnectTimeout(10000);
+        initialConn.setReadTimeout(10000);
+
+        // 捕获 cookies
+        sessionId = updateCookies(initialConn, cookieSet);
+        System.out.println("Initial Cookies: " + cookieSet);
+        System.out.println("Initial Session ID: " + sessionId);
+
+        // 读取响应内容以获取 ViewState
+        BufferedReader in = new BufferedReader(new InputStreamReader(initialConn.getInputStream()));
+        StringBuilder content = new StringBuilder();
+        String inputLine;
+        while ((inputLine = in.readLine()) != null) {
+            content.append(inputLine);
+        }
+        in.close();
+        initialConn.disconnect();
+
+        // 提取初始 ViewState
+        String initialViewState = extractViewState(content.toString());
+        System.out.println("Initial ViewState: " + initialViewState);
+
+        // 第二步：发送搜索 POST 请求
+        HttpURLConnection searchConn = (HttpURLConnection) new URL(baseUrl).openConnection();
+        searchConn.setRequestMethod("POST");
+        searchConn.setInstanceFollowRedirects(false);
+        searchConn.setDoOutput(true);
+        searchConn.setConnectTimeout(10000);
+        searchConn.setReadTimeout(10000);
+
+        // 设置搜索请求的请求头
+        searchConn.setRequestProperty("Content-Type", "application/x-www-form-urlencoded; charset=UTF-8");
+        searchConn.setRequestProperty("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7");
+        searchConn.setRequestProperty("Cookie", String.join("; ", cookieSet));
+        searchConn.setRequestProperty("Origin", "https://www.drks.de");
+        searchConn.setRequestProperty("Referer", baseUrl);
+        searchConn.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36");
+
+        // 构建搜索请求的 POST 数据
+        String searchPostData = buildSearchPostData(initialViewState,keyword);
+
+        // 发送搜索 POST 请求
+        try (OutputStream os = searchConn.getOutputStream()) {
+            byte[] input = searchPostData.getBytes(StandardCharsets.UTF_8);
+            os.write(input, 0, input.length);
+        }
+
+        // 更新 cookies
+        String searchSessionId = updateCookies(searchConn, cookieSet);
+        System.out.println("Search Cookies: " + cookieSet);
+        System.out.println("Search Session ID: " + searchSessionId);
+
+        // 处理搜索响应
+        int searchResponseCode = searchConn.getResponseCode();
+        System.out.println("Search Response Code: " + searchResponseCode);
+        String redirectUrl = searchConn.getHeaderField("Location");
+        searchConn.disconnect();
+
+        if (searchResponseCode != 302 || redirectUrl == null) {
+            System.err.println("Search request did not return expected 302 redirect. Response code: " + searchResponseCode);
+            return null;
+        }
+        System.out.println("Redirect URL (raw): " + redirectUrl);
+
+        // 解析相对 URL
+        if (!redirectUrl.startsWith("http")) {
+            redirectUrl = hostUrl + (redirectUrl.startsWith("/") ? redirectUrl : "/" + redirectUrl);
+        }
+        System.out.println("Resolved Redirect URL: " + redirectUrl);
+
+        // 第三步：跟随重定向（使用 GET 请求）
+        URL resultsUrl = new URL(redirectUrl);
+        HttpURLConnection resultsConn = (HttpURLConnection) resultsUrl.openConnection();
+        resultsConn.setRequestMethod("GET");
+        resultsConn.setInstanceFollowRedirects(false);
+        resultsConn.setConnectTimeout(10000);
+        resultsConn.setReadTimeout(10000);
+        resultsConn.setRequestProperty("Cookie", String.join("; ", cookieSet));
+        resultsConn.setRequestProperty("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7");
+        resultsConn.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64ек; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36");
+
+        // 更新 cookies
+        String resultsSessionId = updateCookies(resultsConn, cookieSet);
+        System.out.println("Results Cookies: " + cookieSet);
+        System.out.println("Results Session ID: " + resultsSessionId);
+
+        // 读取重定向后的结果页面内容
+        BufferedReader resultsReader = new BufferedReader(new InputStreamReader(resultsConn.getInputStream()));
+        StringBuilder resultsContent = new StringBuilder();
+        while ((inputLine = resultsReader.readLine()) != null) {
+            resultsContent.append(inputLine);
+        }
+        resultsReader.close();
+        resultsConn.disconnect();
+
+        // 提取页面中的 ViewState（状态信息，用于后续请求）
+        String viewState = extractViewState(resultsContent.toString());
+        System.out.println("Results ViewState: " + viewState);
+
+        // 检查 Session ID 是否一致，确保会话未被重置
+        if (sessionId != null && !sessionId.equals(resultsSessionId)) {
+            System.out.println("Warning: Session ID changed. Initial: " + sessionId + ", Results: " + resultsSessionId);
+        }
+
+        // Step 4: 第四步：发送分页请求（使用 POST）
+        HttpURLConnection postConn = (HttpURLConnection) new URL(cleanUrl).openConnection();
+        postConn.setRequestMethod("POST");
+        postConn.setInstanceFollowRedirects(false);
+        postConn.setDoOutput(true);
+        postConn.setConnectTimeout(10000);
+        postConn.setReadTimeout(10000);
+
+        // 设置分页请求的请求头（非 AJAX，模拟浏览器常规请求）
+        postConn.setRequestProperty("Content-Type", "application/x-www-form-urlencoded; charset=UTF-8");
+        postConn.setRequestProperty("Cookie", String.join("; ", cookieSet));
+        postConn.setRequestProperty("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7");
+        postConn.setRequestProperty("Origin", "https://www.drks.de");
+        postConn.setRequestProperty("Referer", cleanUrl);
+        postConn.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36");
+        postConn.setRequestProperty("Sec-Fetch-Dest", "document");
+        postConn.setRequestProperty("Sec-Fetch-Mode", "navigate");
+
+        // 构建分页请求的 POST 参数（包括页码和 ViewState 等）
+        String postData = buildPostData(viewState, page);
+        // 发送分页的 POST 请求
+        try (OutputStream os = postConn.getOutputStream()) {
+            byte[] input = postData.getBytes(StandardCharsets.UTF_8);
+            os.write(input, 0, input.length);
+        }
+
+        // 更新 cookies（分页响应可能返回新的 Set-Cookie）
+        String paginationSessionId = updateCookies(postConn, cookieSet);
+        System.out.println("Pagination Cookies: " + cookieSet);
+        System.out.println("Pagination Session ID: " + paginationSessionId);
+
+        // 处理分页响应
+        int responseCode = postConn.getResponseCode();
+        System.out.println("Pagination Response Code: " + responseCode);
+
+        // 读取分页响应的 HTML 内容
+        StringBuilder postContent = new StringBuilder();
+        try (BufferedReader postReader = new BufferedReader(
+                new InputStreamReader(
+                        responseCode >= 400 ? postConn.getErrorStream() : postConn.getInputStream()))) {
+            while ((inputLine = postReader.readLine()) != null) {
+                postContent.append(inputLine);
+            }
+        }
+        Document parse = null;
+        if (responseCode == HttpURLConnection.HTTP_MOVED_TEMP
+                || responseCode == HttpURLConnection.HTTP_MOVED_PERM
+                || responseCode == HttpURLConnection.HTTP_SEE_OTHER) {
+            String newUrl = postConn.getHeaderField("Location");
+            System.out.println("Pagination Redirecting to: " + newUrl);
+
+            //  解析重定向中的相对地址为完整 URL（如果是相对路径）
+            if (!newUrl.startsWith("http")) {
+                newUrl = hostUrl + (newUrl.startsWith("/") ? newUrl : "/" + newUrl);
+            }
+
+            // 重定向
+            URL redirectConn = new URL(newUrl);
+            HttpURLConnection followConn = (HttpURLConnection) redirectConn.openConnection();
+            followConn.setRequestMethod("GET");
+            followConn.setRequestProperty("Cookie", String.join("; ", cookieSet));
+            followConn.setRequestProperty("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7");
+            followConn.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36");
+
+            BufferedReader redirectReader = new BufferedReader(new InputStreamReader(followConn.getInputStream()));
+            StringBuilder redirectContent = new StringBuilder();
+            while ((inputLine = redirectReader.readLine()) != null) {
+                redirectContent.append(inputLine);
+            }
+            redirectReader.close();
+            followConn.disconnect();
+            parse = Jsoup.parse(String.valueOf(redirectContent));
+        } else if (responseCode == 200) {
+            parse = Jsoup.parse(String.valueOf(postContent));
+        }
+
+
+
+        Elements links = parse.select("div[data-label='Titel der Studie'] a");
+        List<String> listUrl = new ArrayList();
+        Integer count = 0;
+        for (Element link : links) {
+            String href = link.attr("href");
+            String trueUrl = "https://www.drks.de/"+href;
+            listUrl.add(trueUrl);
+            count++;
+        }
+        String text = parse.select("div.col-md-2.pt-3.ps-0.text-md-end").text();
+        // 使用正则表达式提取 "第" 和 "/" 之间的数字
+        String regex = "Seite\\s*(\\d+)\\s*/";
+        Matcher matcher = Pattern.compile(regex).matcher(text);
+        if (matcher.find()) {
+            map.put("totalPage",matcher.group(1));// 返回第一个捕获组，即数字 "1"
+        }
+        map.put("listUrl",listUrl);
+        map.put("count",count);
+        map.put("keyword",keyword);
+        postConn.disconnect();
+        return map;
+    }
+    // 更新并返回当前连接中的 Cookie，包含 JSESSIONID 的提取
+    private static String updateCookies(HttpURLConnection conn, Set<String> cookieSet) {
+        String sessionId = null;
+        Map<String, List<String>> headerFields = conn.getHeaderFields();
+        List<String> cookiesHeader = headerFields.get("Set-Cookie");
+        if (cookiesHeader != null) {
+            for (String cookie : cookiesHeader) {
+                String cookieValue = cookie.split(";")[0];
+                cookieSet.add(cookieValue);
+                if (cookieValue.startsWith("JSESSIONID=") || cookieValue.startsWith("csfcfc=")) {
+                    sessionId = cookieValue;
+                }
+            }
+        }
+        return sessionId;
+    }
+    // 提取 __VIEWSTATE 隐藏字段的值
+    private static String extractViewState(String html) {
+        if (html == null || html.isEmpty()) {
+            System.err.println("HTML content is empty or null");
+            return "";
+        }
+
+        // 兼容 jakarta.faces.ViewState 和 javax.faces.ViewState
+        String regex = "<input[^>]*name=[\"'](?:jakarta|javax)\\.faces\\.ViewState[\"'][^>]*value=[\"']([^\"']+)[\"']";
+        Pattern pattern = Pattern.compile(regex);
+        Matcher matcher = pattern.matcher(html);
+
+        if (matcher.find()) {
+            return matcher.group(1);
+        }
+
+        System.err.println("Failed to extract ViewState from HTML");
+        return "";
+    }
+
+    private static Map<String,Object> content(String url)throws Exception{
+
+        OkHttpClient client = new OkHttpClient().newBuilder()
+                .build();
+        MediaType mediaType = MediaType.parse("application/json");
+        Request request = new Request.Builder()
+                .url(url)
+                .get()
+                .addHeader("Content-Type", "application/json")
+                .build();
+        Response response = client.newCall(request).execute();
+        String html = response.body().string();
+        Document parse = Jsoup.parse(html, "UTF-8");
+        String title = parse.select(".title-bold").text();
+        String registNum = parse.select(".card.trial-details-float.mb-4 .card-body dl dd:nth-child(2)").text();
+        String registTime = convertDate(parse.select(".card.trial-details-float.mb-4 .card-body dl dd:nth-child(6)").text());
+        Map<String,Object> sponsor = new HashMap<>();
+        String header = parse.select("body > main > div.card-body > div:nth-child(9) > div.card-body > div > div > div > div.card-header > h4").text();
+        String site = parse.select("body > main > div.card-body > div:nth-child(9) > div.card-body > div > div > div > div.card-body > dl > dd:nth-child(2) > div").text();
+        String telefon = parse.select("body > main > div.card-body > div:nth-child(9) > div.card-body > div > div > div > div.card-body > dl > dd:nth-child(4) > span").text();
+        String disease = parse.select("body > main > div.card-body > div:nth-child(6) > div.card-body > div > div:nth-child(2) > dl > dd:nth-child(2) > span").text();
+        String studyType = parse.select("body > main > div.card-body > div:nth-child(3) > div.card-body > dl").text();
+        String inclusionCriteria = parse.select("body > main > div.card-body > div:nth-child(7) > div.card-body > div:nth-child(2) > div:nth-child(3) > div > div.card-body > div > div.col-12.mt-3 > dl > dd > span").text();
+        String exclusionCriteria = parse.select("body > main > div.card-body > div:nth-child(7) > div.card-body > div:nth-child(2) > div:nth-child(4) > div > div.card-body > p > span").text();
+        String country = parse.select("body > main > div.card-body > div:nth-child(7) > div.card-body > div:nth-child(2) > div:nth-child(1) > div > div.card-body > dl > dd:nth-child(2)").text();
+        String intervention = parse.select("body > main > div.card-body > div:nth-child(4) > div.card-body > dl").text();
+        String primaryOutcome = parse.select("body > main > div.card-body > div:nth-child(5) > div.card-body > div > div > dl").text();
+        String enrollment = parse.select("body > main > div.card-body > div:nth-child(7) > div.card-body > div:nth-child(2) > div:nth-child(2) > div > div.card-body > div > div:nth-child(5) > dl > dd > span").text();
+        sponsor.put("header",header);
+        sponsor.put("site",site);
+        sponsor.put("telefon",telefon);
+        Map<String,Object> resultData = new HashMap<>();
+        resultData.put("title", title);
+        resultData.put("registNum",registNum);
+        resultData.put("registTime",registTime);
+        resultData.put("registStatus","");
+        resultData.put("registTitle","");
+        resultData.put("fullTitle","");
+        resultData.put("sponsor",sponsor);
+        resultData.put("sponsorPart","");
+        resultData.put("studyType",studyType);
+        resultData.put("phase","");
+        resultData.put("disease",disease);
+        resultData.put("studyDesign","");
+        resultData.put("studyObjective","");
+        resultData.put("studyStartDate","");
+        resultData.put("inclusionCriteria",inclusionCriteria);
+        resultData.put("exclusionCriteria",exclusionCriteria);
+        resultData.put("currentStatus","");
+        resultData.put("enrollment",enrollment);
+        resultData.put("country",country);
+        resultData.put("tagTime","");
+        resultData.put("intervention",intervention);
+        resultData.put("primaryOutcome",primaryOutcome);
+        resultData.put("crawlTime",getCurrentTime());
+        resultData.put("crawlUrl",url);
+        resultData.put("postTime",registTime);
+        resultData.put("content","content");
+        resultData.put("forwardcontent","forwardcontent");
+        resultData.put("cid","Ndrks");
+        return resultData;
+    }
+    // 生成搜索请求的 POST 数据
+    private static String buildSearchPostData(String viewState,String keyword) {
+        try {
+            return "searchForm=searchForm" +
+                    "&searchForm%3Aj_idt80=" + keyword +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Acharacteristics%3AdrksId=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Acharacteristics%3AsecondaryId=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Acharacteristics%3AscientificSummary=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Acharacteristics%3Aoutcome=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Acharacteristics%3AhealthOfCondition=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Acharacteristics%3AhealthyVolunteers=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Acharacteristics%3Aaddresses=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Acharacteristics%3Aj_idt128=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Acharacteristics%3AipdSharingPlan=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Acharacteristics%3Aj_idt135%3Afrom=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Acharacteristics%3Aj_idt135%3Ato=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Acharacteristics%3Aj_idt146%3Afrom=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Acharacteristics%3Aj_idt146%3Ato=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Acharacteristics%3Aj_idt157%3Afrom=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Acharacteristics%3Aj_idt157%3Ato=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Arecruitment%3Agender=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Arecruitment%3AageInYears=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Arecruitment%3AinclusionCriteria=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Arecruitment%3AexclusionCriteria=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Arecruitment%3AtrialStatus=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Arecruitment%3ArecrutingLocation=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Arecruitment%3Aj_idt213%3Afrom=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Arecruitment%3Aj_idt213%3Ato=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3AtrialDesign%3Apurpose=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3AtrialDesign%3AstudyType=" +
+                    "&searchForm%3Aj_idt287=" +
+                    "&javax.faces.ViewState=" + URLEncoder.encode(viewState, StandardCharsets.UTF_8.name());
+        } catch (Exception e) {
+            System.err.println("Error encoding search ViewState: " + e.getMessage());
+            return "";
+        }
+    }
+    // 生成分页请求的 POST 数据
+    private static String buildPostData(String viewState, int page) {
+        int adjustedPage = page - 1;
+        try {
+            return "resultForm=resultForm" +
+                    "&resultForm%3Asorting%3ArowsPerPage=10" +
+                    "&resultForm%3ApaginationTop%3Aj_idt156%3A"+ adjustedPage +"%3Aj_idt158=" + page +
+                    "&resultForm%3Asorting%3AsortingBy=SCORE" +
+                    "&resultForm%3Asorting%3Aj_idt141=true" +
+                    "&resultForm%3Aj_idt221%3Aj_idt223%3AdownloadConfirmation=resultForm%3Aj_idt221%3Aj_idt223%3AdownloadConfirmation" +
+                    "&selectedType=JSON" +
+                    "&javax.faces.ViewState=" + URLEncoder.encode(viewState, StandardCharsets.UTF_8.name());
+        } catch (Exception e) {
+            System.err.println("Error encoding pagination ViewState: " + e.getMessage());
+            return "";
+        }
+    }
+    public static String convertDate(String inputDate) {
+        try {
+            // 输入格式：dd.MM.yyyy
+            SimpleDateFormat inputFormat = new SimpleDateFormat("dd.MM.yyyy");
+            // 解析输入日期
+            Date date = inputFormat.parse(inputDate);
+            // 输出格式：yyyy-MM-dd HH:mm:ss
+            SimpleDateFormat outputFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+            // 转换为目标格式
+            return outputFormat.format(date);
+        } catch (ParseException e) {
+            // 处理解析异常
+            return "Invalid date format";
+        }
+    }
+
+    public static String getCurrentTime() {
+        // 创建 DateTimeFormatter，指定输出格式
+        DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
+        // 获取当前时间
+        LocalDateTime now = LocalDateTime.now();
+        // 格式化
+        return now.format(formatter);
+    }
+    private static Response executeWithRetry(OkHttpClient client, Request request, String keyword) throws IOException {
+        int maxRetries = proxyList.isEmpty() ? 1 : proxyList.size();  // 如果没有代理，只尝试一次
+        int attempt = 0;
+
+        while (attempt < maxRetries) {
+            Response response = client.newCall(request).execute();
+            if (response.code() == 403) {
+                System.out.println("收到 403 状态码，尝试切换代理重试...");
+                response.close();
+                switchProxy();
+                client = createClientWithProxy();  // 使用新代理重建客户端
+                attempt++;
+                if (attempt == maxRetries) {
+                    throw new IOException("所有代理尝试失败，仍然收到 403");
+                }
+                continue;
+            }
+            return response;  // 成功或非 403 状态码，直接返回
+        }
+        throw new IOException("无法执行请求，未获取响应");
+    }
+    private static OkHttpClient createClientWithProxy() {
+        OkHttpClient.Builder builder = new OkHttpClient().newBuilder()
+                .connectTimeout(30, TimeUnit.SECONDS)
+                .readTimeout(30, TimeUnit.SECONDS)
+                .writeTimeout(30, TimeUnit.SECONDS);
+
+        if (!proxyList.isEmpty() && currentProxyIndex < proxyList.size()) {
+            String proxy = proxyList.get(currentProxyIndex);
+            String[] proxyParts = proxy.split(":");
+            if (proxyParts.length == 2) {
+                String proxyHost = proxyParts[0];
+                int proxyPort = Integer.parseInt(proxyParts[1]);
+                builder.proxy(new java.net.Proxy(java.net.Proxy.Type.HTTP,
+                        new java.net.InetSocketAddress(proxyHost, proxyPort)));
+                System.out.println("使用代理: " + proxy);
+            }
+        }
+        return builder.build();
+    }
+    private static synchronized void switchProxy() {
+        if (proxyList.isEmpty()) return;
+        currentProxyIndex = (currentProxyIndex + 1) % proxyList.size();
+        System.out.println("切换到新代理: " + proxyList.get(currentProxyIndex));
+    }
+    public static String increaseOffsetBy30(String originalPayload) {
+        // 以 "|" 分割载荷为数组
+        String[] parts = originalPayload.split("\\|");
+
+        // 检查数组长度，确保有足够元素
+        if (parts.length < 4) {
+            throw new IllegalArgumentException("载荷格式无效，元素不足");
+        }
+
+        // 找到倒数第 4 个元素的位置
+        int targetIndex = parts.length - 4;
+
+        try {
+            // 将倒数第 4 个数字解析为整数
+            int currentOffset = Integer.parseInt(parts[targetIndex]);
+            // 增加 30
+            int newOffset = currentOffset + 30;
+            // 将新值放回数组
+            parts[targetIndex] = String.valueOf(newOffset);
+            // 重新拼接载荷
+            return String.join("|", parts);
+        } catch (NumberFormatException e) {
+            throw new IllegalArgumentException("倒数第 4 个元素不是有效数字: " + parts[targetIndex]);
+        }
+    }
+}
diff --git a/src/main/java/com/example/drks.java b/src/main/java/com/example/drks.java
new file mode 100644
index 0000000..379d7f2
--- /dev/null
+++ b/src/main/java/com/example/drks.java
@@ -0,0 +1,438 @@
+package com.example;
+
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+
+import java.io.BufferedReader;
+import java.io.InputStreamReader;
+import java.io.OutputStream;
+import java.net.HttpURLConnection;
+import java.net.URL;
+import java.net.URLEncoder;
+import java.nio.charset.StandardCharsets;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class drks {
+    public static void main(String[] args) throws Exception {
+        String targetUrl = "https://www.drks.de/search/de/results?page=4";
+        String baseUrl = "https://www.drks.de/search/de";
+        String hostUrl = "https://www.drks.de";
+        String cleanUrl = targetUrl.split("\\?")[0];
+        System.out.println("Pure URL: " + cleanUrl);
+
+
+        String pageNumber = targetUrl.contains("?page=") ? targetUrl.split("page=")[1] : "1";
+        int page = Integer.parseInt(pageNumber);
+        System.out.println("Page Number: " + page);
+
+        // 存储 cookies
+        Set<String> cookieSet = new HashSet<>();
+        String sessionId = null;
+
+        // 第一步：初始 GET 请求，获取 cookies 和 ViewState
+        System.out.println("\n--- Step 1: Initial GET Request ---");
+        URL initialUrl = new URL(baseUrl);
+        HttpURLConnection initialConn = (HttpURLConnection) initialUrl.openConnection();
+        initialConn.setRequestMethod("GET");
+        initialConn.setInstanceFollowRedirects(false);
+        initialConn.setConnectTimeout(10000);
+        initialConn.setReadTimeout(10000);
+        initialConn.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36");
+
+
+        // 捕获 cookies
+        sessionId = updateCookies(initialConn, cookieSet);
+        System.out.println("Initial Cookies: " + cookieSet);
+        System.out.println("Initial Session ID: " + sessionId);
+
+        // 读取响应内容以获取 ViewState
+        BufferedReader in = new BufferedReader(new InputStreamReader(initialConn.getInputStream()));
+        StringBuilder content = new StringBuilder();
+        String inputLine;
+        while ((inputLine = in.readLine()) != null) {
+            content.append(inputLine);
+        }
+        in.close();
+        initialConn.disconnect();
+
+        // 提取初始 ViewState
+        String initialViewState = extractViewState(content.toString());
+        System.out.println("Initial ViewState: " + initialViewState);
+
+        // 第二步：发送搜索 POST 请求
+        System.out.println("\n--- Step 2: Search POST Request ---");
+        HttpURLConnection searchConn = (HttpURLConnection) new URL(baseUrl).openConnection();
+        searchConn.setRequestMethod("POST");
+        searchConn.setInstanceFollowRedirects(false);
+        searchConn.setDoOutput(true);
+        searchConn.setConnectTimeout(10000);
+        searchConn.setReadTimeout(10000);
+
+        // 设置搜索请求的请求头
+        searchConn.setRequestProperty("Content-Type", "application/x-www-form-urlencoded; charset=UTF-8");
+        searchConn.setRequestProperty("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7");
+        searchConn.setRequestProperty("Cookie", String.join("; ", cookieSet));
+        searchConn.setRequestProperty("Origin", "https://www.drks.de");
+        searchConn.setRequestProperty("Referer", baseUrl);
+        searchConn.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36");
+
+        // 构建搜索请求的 POST 数据
+        String searchPostData = buildSearchPostData(initialViewState);
+        System.out.println("Search POST Data: " + searchPostData);
+
+        // 发送搜索 POST 请求
+        try (OutputStream os = searchConn.getOutputStream()) {
+            byte[] input = searchPostData.getBytes(StandardCharsets.UTF_8);
+            os.write(input, 0, input.length);
+        }
+
+        // 更新 cookies
+        String searchSessionId = updateCookies(searchConn, cookieSet);
+        System.out.println("Search Cookies: " + cookieSet);
+        System.out.println("Search Session ID: " + searchSessionId); // This is null in your output, which is a potential issue
+
+        // 处理搜索响应
+        int searchResponseCode = searchConn.getResponseCode();
+        System.out.println("Search Response Code: " + searchResponseCode);
+
+        if (searchResponseCode == 302) {
+            String redirectUrl = searchConn.getHeaderField("Location");
+            searchConn.disconnect();
+
+            if (redirectUrl == null) {
+                System.err.println("Search request returned 302 but no Location header found.");
+                return;
+            }
+            System.out.println("Redirect URL (raw): " + redirectUrl);
+
+            // 解析相对 URL
+            if (!redirectUrl.startsWith("http")) {
+                redirectUrl = hostUrl + (redirectUrl.startsWith("/") ? redirectUrl : "/" + redirectUrl);
+            }
+            System.out.println("Resolved Redirect URL: " + redirectUrl);
+
+            // 第三步：跟随重定向（使用 GET 请求）
+            System.out.println("\n--- Step 3: Follow Redirect (GET Request) ---");
+            URL resultsUrl = new URL(redirectUrl);
+            HttpURLConnection resultsConn = (HttpURLConnection) resultsUrl.openConnection();
+            resultsConn.setRequestMethod("GET");
+            resultsConn.setInstanceFollowRedirects(false);
+            resultsConn.setConnectTimeout(10000);
+            resultsConn.setReadTimeout(10000);
+            resultsConn.setRequestProperty("Cookie", String.join("; ", cookieSet));
+            resultsConn.setRequestProperty("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7");
+            resultsConn.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36");
+
+            // 更新 cookies
+            String resultsSessionId = updateCookies(resultsConn, cookieSet);
+            System.out.println("Results Cookies: " + cookieSet);
+            System.out.println("Results Session ID: " + resultsSessionId);
+
+            // 读取重定向后的结果页面内容
+            BufferedReader resultsReader = new BufferedReader(new InputStreamReader(resultsConn.getInputStream()));
+            StringBuilder resultsContent = new StringBuilder();
+            while ((inputLine = resultsReader.readLine()) != null) {
+                resultsContent.append(inputLine);
+            }
+            resultsReader.close();
+            resultsConn.disconnect();
+
+            // 提取页面中的 ViewState（状态信息，用于后续请求）
+            String viewState = extractViewState(resultsContent.toString());
+            System.out.println("Results ViewState: " + viewState);
+
+            // 检查 Session ID 是否一致，确保会话未被重置
+            if (sessionId != null && !sessionId.equals(resultsSessionId)) {
+                System.out.println("Warning: Session ID changed. Initial: " + sessionId + ", Results: " + resultsSessionId);
+            }
+
+            // Step 4: 第四步：发送分页请求（使用 POST）
+            System.out.println("\n--- Step 4: Pagination POST Request ---");
+            HttpURLConnection postConn = (HttpURLConnection) new URL(cleanUrl).openConnection();
+            postConn.setRequestMethod("POST");
+            postConn.setInstanceFollowRedirects(false);
+            postConn.setDoOutput(true);
+            postConn.setConnectTimeout(10000);
+            postConn.setReadTimeout(10000);
+
+            // 设置分页请求的请求头（非 AJAX，模拟浏览器常规请求）
+            postConn.setRequestProperty("Content-Type", "application/x-www-form-urlencoded; charset=UTF-8");
+            postConn.setRequestProperty("Cookie", String.join("; ", cookieSet));
+            postConn.setRequestProperty("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7");
+            postConn.setRequestProperty("Origin", "https://www.drks.de");
+            postConn.setRequestProperty("Referer", cleanUrl);
+            postConn.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36");
+            postConn.setRequestProperty("Sec-Fetch-Dest", "document");
+            postConn.setRequestProperty("Sec-Fetch-Mode", "navigate");
+
+            // 构建分页请求的 POST 参数（包括页码和 ViewState 等）
+            String postData = buildPostData(viewState, page);
+            System.out.println("Pagination POST Data: " + postData);
+
+            // 发送分页的 POST 请求
+            try (OutputStream os = postConn.getOutputStream()) {
+                byte[] input = postData.getBytes(StandardCharsets.UTF_8);
+                os.write(input, 0, input.length);
+            }
+
+            // 更新 cookies（分页响应可能返回新的 Set-Cookie）
+            String paginationSessionId = updateCookies(postConn, cookieSet);
+            System.out.println("Pagination Cookies: " + cookieSet);
+            System.out.println("Pagination Session ID: " + paginationSessionId);
+
+            // 处理分页响应
+            int responseCode = postConn.getResponseCode();
+            System.out.println("Pagination Response Code: " + responseCode);
+
+            // Read and process the pagination response
+            StringBuilder postContent = new StringBuilder();
+            try (BufferedReader postReader = new BufferedReader(
+                    new InputStreamReader(
+                            responseCode >= 400 ? postConn.getErrorStream() : postConn.getInputStream()))) {
+                while ((inputLine = postReader.readLine()) != null) {
+                    postContent.append(inputLine);
+                }
+            }
+
+            Document parse = null;
+            if (responseCode == HttpURLConnection.HTTP_MOVED_TEMP
+                    || responseCode == HttpURLConnection.HTTP_MOVED_PERM
+                    || responseCode == HttpURLConnection.HTTP_SEE_OTHER) {
+                String newUrl = postConn.getHeaderField("Location");
+                System.out.println("Pagination Redirecting to: " + newUrl);
+
+                //  解析重定向中的相对地址为完整 URL（如果是相对路径）
+                if (!newUrl.startsWith("http")) {
+                    newUrl = hostUrl + (newUrl.startsWith("/") ? newUrl : "/" + newUrl);
+                }
+
+                // Follow the redirect
+                URL redirectConnUrl = new URL(newUrl);
+                HttpURLConnection followConn = (HttpURLConnection) redirectConnUrl.openConnection();
+                followConn.setRequestMethod("GET");
+                followConn.setRequestProperty("Cookie", String.join("; ", cookieSet));
+                followConn.setRequestProperty("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7");
+                followConn.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36");
+
+                BufferedReader redirectReader = new BufferedReader(new InputStreamReader(followConn.getInputStream()));
+                StringBuilder redirectContent = new StringBuilder();
+                while ((inputLine = redirectReader.readLine()) != null) {
+                    redirectContent.append(inputLine);
+                }
+                redirectReader.close();
+                followConn.disconnect();
+
+                System.out.println("Redirect Response: " + redirectContent);
+                parse = Jsoup.parse(String.valueOf(redirectContent));
+            } else if (responseCode == 200) {
+                System.out.println("Pagination Response: " + postContent);
+                parse = Jsoup.parse(String.valueOf(postContent));
+            } else {
+                System.err.println("Unexpected Pagination Response Code: " + responseCode);
+                // Optionally read and print error stream for non-200/3xx codes
+                try (BufferedReader errorReader = new BufferedReader(new InputStreamReader(postConn.getErrorStream()))) {
+                    String errorLine;
+                    System.err.println("Error Stream:");
+                    while ((errorLine = errorReader.readLine()) != null) {
+                        System.err.println(errorLine);
+                    }
+                } catch (Exception e) {
+                    System.err.println("Could not read error stream: " + e.getMessage());
+                }
+                return; // Exit if pagination fails unexpectedly
+            }
+
+            Elements links = parse.select("div[data-label='Titel der Studie'] a");
+
+            for (Element link : links) {
+                String href = link.attr("href");
+                String text = link.text();
+
+                System.out.println("链接: " + href);
+                System.out.println("标题: " + text);
+            }
+            String text = parse.select("div.col-md-2.pt-3.ps-0.text-md-end").text();
+            // 使用正则表达式提取 "第" 和 "/" 之间的数字
+            String regex = "Seite\\s*(\\d+)\\s*/";
+            Matcher matcher = Pattern.compile(regex).matcher(text);
+            if (matcher.find()) {
+                System.out.println("总共有"+matcher.group(1));// 返回第一个捕获组，即数字 "1"
+            }
+            postConn.disconnect();
+
+        } else if (searchResponseCode == 200) {
+            System.out.println("Search request returned 200 OK. Reading response body:");
+            // Read and print the response body for debugging
+            try (BufferedReader searchReader = new BufferedReader(new InputStreamReader(searchConn.getInputStream()))) {
+                String line;
+                StringBuilder searchResponseBody = new StringBuilder();
+                while ((line = searchReader.readLine()) != null) {
+                    searchResponseBody.append(line).append("\n");
+                }
+                System.out.println("Search Response Body:\n" + searchResponseBody.toString());
+            } catch (Exception e) {
+                System.err.println("Could not read search response body: " + e.getMessage());
+            } finally {
+                searchConn.disconnect();
+            }
+
+            System.err.println("Search request did not return expected 302 redirect. Response code: " + searchResponseCode);
+            System.err.println("The website's search mechanism may have changed.");
+
+        } else {
+            // Handle other unexpected response codes for the search request
+            System.err.println("Unexpected Search Response Code: " + searchResponseCode);
+            try (BufferedReader errorReader = new BufferedReader(new InputStreamReader(searchConn.getErrorStream()))) {
+                String errorLine;
+                System.err.println("Error Stream:");
+                while ((errorLine = errorReader.readLine()) != null) {
+                    System.err.println(errorLine);
+                }
+            } catch (Exception e) {
+                System.err.println("Could not read error stream for search response: " + e.getMessage());
+            }
+            searchConn.disconnect();
+        }
+    }
+
+    // 更新并返回当前连接中的 Cookie，包含 JSESSIONID 的提取
+    private static String updateCookies(HttpURLConnection conn, Set<String> cookieSet) {
+        String sessionId = null;
+        Map<String, List<String>> headerFields = conn.getHeaderFields();
+        List<String> cookiesHeader = headerFields.get("Set-Cookie");
+        if (cookiesHeader != null) {
+            for (String cookie : cookiesHeader) {
+                String cookieValue = cookie.split(";")[0];
+                cookieSet.add(cookieValue);
+                // Prioritize JSESSIONID or csfcfc if present
+                if (cookieValue.startsWith("JSESSIONID=")) {
+                    sessionId = cookieValue;
+                } else if (cookieValue.startsWith("csfcfc=") && sessionId == null) {
+                    sessionId = cookieValue;
+                }
+            }
+        }
+        return sessionId;
+    }
+
+    // 提取 __VIEWSTATE 隐藏字段的值
+    private static String extractViewState(String html) {
+        // Try regex first for jakarta.faces.ViewState
+        String regexJakarta = "name=\"jakarta\\.faces\\.ViewState\"[^>]*value=\"([^\"]+)\"";
+        Pattern patternJakarta = Pattern.compile(regexJakarta);
+        Matcher matcherJakarta = patternJakarta.matcher(html);
+
+        if (matcherJakarta.find()) {
+            return matcherJakarta.group(1);
+        }
+
+        // Fallback to regex for javax.faces.ViewState (older versions or other parts of site)
+        String regexJavax = "name=\"javax\\.faces\\.ViewState\"[^>]*value=\"([^\"]+)\"";
+        Pattern patternJavax = Pattern.compile(regexJavax);
+        Matcher matcherJavax = patternJavax.matcher(html);
+
+        if (matcherJavax.find()) {
+            return matcherJavax.group(1);
+        }
+
+        // Fallback to string search if regex fails (less reliable)
+        String searchStringJakarta = "jakarta.faces.ViewState";
+        int startIndexJakarta = html.indexOf(searchStringJakarta);
+        if (startIndexJakarta != -1) {
+            int valueStart = html.indexOf("value=\"", startIndexJakarta) + 7;
+            int valueEnd = html.indexOf("\"", valueStart);
+            if (valueStart != -1 && valueEnd != -1) {
+                return html.substring(valueStart, valueEnd);
+            }
+        }
+
+        String searchStringJavax = "javax.faces.ViewState";
+        int startIndexJavax = html.indexOf(searchStringJavax);
+        if (startIndexJavax != -1) {
+            int valueStart = html.indexOf("value=\"", startIndexJavax) + 7;
+            int valueEnd = html.indexOf("\"", valueStart);
+            if (valueStart != -1 && valueEnd != -1) {
+                return html.substring(valueStart, valueEnd);
+            }
+        }
+
+
+        System.err.println("Failed to extract ViewState from HTML");
+        return ""; // Return empty string if not found
+    }
+
+    // 生成搜索请求的 POST 数据
+    private static String buildSearchPostData(String viewState) {
+        try {
+            // URL-encode the ViewState
+            String encodedViewState = URLEncoder.encode(viewState, StandardCharsets.UTF_8.name());
+
+            return "searchForm=searchForm" +
+                    "&searchForm%3Aj_idt80=Midwifery" + // Assuming 'Midwifery' is the search term
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Acharacteristics%3AdrksId=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Acharacteristics%3AsecondaryId=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Acharacteristics%3AscientificSummary=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Acharacteristics%3Aoutcome=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Acharacteristics%3AhealthOfCondition=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Acharacteristics%3AhealthyVolunteers=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Acharacteristics%3Aaddresses=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Acharacteristics%3Aj_idt128=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Acharacteristics%3AipdSharingPlan=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Acharacteristics%3Aj_idt135%3Afrom=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Acharacteristics%3Aj_idt135%3Ato=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Acharacteristics%3Aj_idt146%3Afrom=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Acharacteristics%3Aj_idt146%3Ato=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Acharacteristics%3Aj_idt157%3Afrom=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Acharacteristics%3Aj_idt157%3Ato=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Arecruitment%3Agender=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Arecruitment%3AageInYears=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Arecruitment%3AinclusionCriteria=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Arecruitment%3AexclusionCriteria=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Arecruitment%3AtrialStatus=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Arecruitment%3ArecrutingLocation=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Arecruitment%3Aj_idt213%3Afrom=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Arecruitment%3Aj_idt213%3Ato=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3AtrialDesign%3Apurpose=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3AtrialDesign%3AstudyType=" +
+                    "&searchForm%3Aj_idt287=" + // This parameter might be related to the search button click
+                    "&jakarta.faces.ViewState=" + encodedViewState; // Changed to jakarta.faces.ViewState
+        } catch (Exception e) {
+            System.err.println("Error encoding search ViewState: " + e.getMessage());
+            return "";
+        }
+    }
+
+    // 生成分页请求的 POST 数据
+    private static String buildPostData(String viewState, int page) {
+        // The page parameter in the POST data might be 0-indexed or 1-indexed
+        // Let's assume it's 0-indexed for the parameter name and 1-indexed for the value based on your original code
+        int parameterPage = page - 1;
+        int valuePage = page; // The value sent in the form might be the actual page number
+
+        try {
+            // URL-encode the ViewState
+            String encodedViewState = URLEncoder.encode(viewState, StandardCharsets.UTF_8.name());
+
+            return "resultForm=resultForm" +
+                    "&resultForm%3Asorting%3ArowsPerPage=10" +
+                    // The parameter name for pagination button might have changed
+                    // Check browser network traffic for the exact parameter name for page buttons
+                    "&resultForm%3ApaginationTop%3Aj_idt156%3A"+ parameterPage +"%3Aj_idt158=" + valuePage +
+                    "&resultForm%3Asorting%3AsortingBy=SCORE" +
+                    "&resultForm%3Asorting%3Aj_idt141=true" + // This might be for sorting direction
+                    "&resultForm%3Aj_idt221%3Aj_idt223%3AdownloadConfirmation=resultForm%3Aj_idt221%3Aj_idt223%3AdownloadConfirmation" +
+                    "&selectedType=JSON" + // This might be for download format, potentially not needed for pagination
+                    "&jakarta.faces.ViewState=" + encodedViewState; // Changed to jakarta.faces.ViewState
+        } catch (Exception e) {
+            System.err.println("Error encoding pagination ViewState: " + e.getMessage());
+            return "";
+        }
+    }
+}
diff --git a/src/main/java/com/example/getInKa.java b/src/main/java/com/example/getInKa.java
new file mode 100644
index 0000000..c18d4e5
--- /dev/null
+++ b/src/main/java/com/example/getInKa.java
@@ -0,0 +1,165 @@
+package com.example;
+
+import org.apache.kafka.clients.producer.*;
+import org.apache.kafka.common.serialization.StringSerializer;
+
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.select.Elements;
+import okhttp3.OkHttpClient;
+import okhttp3.Request;
+import okhttp3.Response;
+
+import java.io.*;
+import java.util.*;
+import java.util.concurrent.Future;
+
+public class getInKa {
+    // 初始化 OkHttp 客户端，用于发送 HTTP 请求
+    private static final OkHttpClient httpClient = new OkHttpClient();
+    private static final String PROCESSED_URLS_FILE = "processed_urls.txt"; // 记录已处理的 URL 文件
+    public static void main(String[] args) {
+        try {
+            // 获取目标 URL 列表
+            System.out.println("Starting URL collection...");
+            List<String> urls = getUrls();
+            System.out.println("Collected " + urls.size() + " URLs.");
+
+            // 从 URL 中提取新闻数据并保存到 kafka
+            System.out.println("Starting news extraction...");
+            getNews(urls);
+            System.out.println("News extraction completed.");
+        } catch (IOException | InterruptedException e) {
+            System.out.println("Error in main: " + e.getMessage());
+        }
+    }
+    public static List<String> getUrls() throws IOException, InterruptedException {
+        List<String> urls = new ArrayList<>();
+        Set<String> processedUrls = loadProcessedUrls(); // 加载已处理的 URL
+
+        for (int page = 1; page <= 28; page++) {
+            String url = "https://www.zyctd.com/zixun/201/pz102-" + page + ".html";
+            Request request = new Request.Builder()
+                    .url(url)
+                    .addHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36 Edg/127.0.0.0")
+                    .build();
+
+            System.out.println("Fetching page " + page + ": " + url);
+            try (Response response = httpClient.newCall(request).execute()) {
+                if (response.isSuccessful() && response.body() != null) {
+                    System.out.println("Successfully fetched page " + page);
+                    String html = response.body().string();
+                    Document doc = Jsoup.parse(html);
+                    Elements links = doc.select("div.zixun-list > div.zixun-item-box > div.zixun-item-title > p > a");
+                    List<String> projectIDs = links.eachAttr("href");
+                    System.out.println("Found " + projectIDs.size() + " URLs on page " + page);
+
+                    for (String projectUrl : projectIDs) {
+                        if (!processedUrls.contains(projectUrl)) { // 检查是否已处理
+                            urls.add(projectUrl);
+                            processedUrls.add(projectUrl); // 添加到已处理集合
+                        }
+                    }
+                } else {
+                    System.out.println("Failed to fetch page " + page + ": Status code " + response.code());
+                }
+            }
+            Thread.sleep(1000);
+        }
+        saveProcessedUrls(processedUrls); // 保存已处理的 URL
+        return urls;
+    }
+    public static void getNews(List<String> urls) throws IOException {
+        for (int i = 0; i < urls.size(); i++) {
+            String url = urls.get(i);
+            Request request = new Request.Builder()
+                    .url(url)
+                    .addHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36 Edg/127.0.0.0")
+                    .build();
+
+            System.out.println("Processing URL " + (i + 1) + "/" + urls.size() + ": " + url);
+            try (Response response = httpClient.newCall(request).execute()) {
+                if (response.isSuccessful() && response.body() != null) {
+                    System.out.println("Successfully fetched news from " + url);
+                    String html = response.body().string();
+                    Document doc = Jsoup.parse(html);
+                    String title = doc.select("div.info-title.t-center > h1").text().trim();
+                    String date = doc.select("div.author.color-grey.art-info > span:nth-child(1)").text().trim();
+                    String content = String.join("\n", doc.select("div.info-content > div > p").eachText()).trim();
+                    if (content.isEmpty()) {
+                        content = String.join("\n", doc.select("div.info-content > p:nth-child(2)").eachText()).trim();
+                    }
+
+                    if (!title.isEmpty() && !date.isEmpty() && !content.isEmpty()) {
+                        Map<String, String> news = new HashMap<>();
+                        news.put("title", title);
+                        news.put("date", date);
+                        news.put("content", content);
+                        news.put("url", url);
+                        System.out.println("Extracted news: " + news.get("title"));
+                        saveData(news); // 调用修改后的 saveData 方法
+                    } else {
+                        System.out.println("Failed to extract complete data from " + url);
+                    }
+                } else {
+                    System.out.println("Failed to fetch news from " + url + ": Status code " + response.code());
+                }
+            } catch (Exception e) {
+                System.out.println("An error occurred while fetching " + url + ": " + e.getMessage());
+            }
+            try {
+                Thread.sleep(5000); // 休眠5秒
+            } catch (InterruptedException e) {
+                System.out.println("Sleep interrupted: " + e.getMessage());
+            }
+        }
+    }
+    public static void saveData(Map<String, String> news) {
+        Properties properties = new Properties();
+        properties.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
+        properties.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
+        properties.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
+
+        try (Producer<String, String> producer = new KafkaProducer<>(properties)) {
+            String topic = "news-topic";
+            String key = news.get("title");
+            String value = news.toString();
+            ProducerRecord<String, String> record = new ProducerRecord<>(topic, key, value);
+
+            producer.send(record, (metadata, exception) -> {
+                if (exception == null) {
+                    System.out.println("Data sent successfully to Kafka: topic=" + metadata.topic() +
+                            ", partition=" + metadata.partition() + ", offset=" + metadata.offset());
+                } else {
+                    System.err.println("Failed to send data to Kafka: " + exception.getMessage());
+                }
+            }).get();
+        } catch (Exception e) {
+            System.err.println("Error while sending data to Kafka: " + e.getMessage());
+        }
+    }
+    // 加载已处理的 URL
+    private static Set<String> loadProcessedUrls() throws IOException {
+        Set<String> processedUrls = new HashSet<>();
+        File file = new File(PROCESSED_URLS_FILE);
+        if (file.exists()) {
+            try (BufferedReader reader = new BufferedReader(new FileReader(file))) {
+                String line;
+                while ((line = reader.readLine()) != null) {
+                    processedUrls.add(line.trim());
+                }
+            }
+        }
+        return processedUrls;
+    }
+
+    // 保存已处理的 URL
+    private static void saveProcessedUrls(Set<String> processedUrls) throws IOException {
+        try (BufferedWriter writer = new BufferedWriter(new FileWriter(PROCESSED_URLS_FILE))) {
+            for (String url : processedUrls) {
+                writer.write(url);
+                writer.newLine();
+            }
+        }
+    }
+}
diff --git a/src/main/java/com/example/jsonGetOk.java b/src/main/java/com/example/jsonGetOk.java
new file mode 100644
index 0000000..ced112b
--- /dev/null
+++ b/src/main/java/com/example/jsonGetOk.java
@@ -0,0 +1,47 @@
+package com.example;
+
+import okhttp3.*;
+import org.json.JSONArray;
+import org.json.JSONObject;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+public class jsonGetOk {
+    public static void main(String[] args) throws IOException {
+        OkHttpClient client = new OkHttpClient().newBuilder()
+                .build();
+        MediaType mediaType = MediaType.parse("text/plain");
+        RequestBody body = RequestBody.create(mediaType, "");
+        Request request = new Request.Builder()
+                .url("https://www.dsscu.gov.mo/api/common/page_detail?PostType=page&EntityId=6654829e-8163-b801-0096-c02e09d690d1")
+                .get()
+                .build();
+        Response response = client.newCall(request).execute();
+        String responseBody = response.body().string();
+
+        // 解析 JSON
+        JSONObject jsonObject = new JSONObject(responseBody);
+        JSONObject data = jsonObject.getJSONObject("data");
+        String postTime = data.getString("onlineAt");
+        JSONObject metas = data.getJSONObject("metas");
+        String title = metas.getString("name");
+        String summary = metas.getString("summary");
+        Document parse = Jsoup.parse(summary);
+        String content = parse.text();
+        String forwardcontent = responseBody;
+        String fileList = metas.getString("biddersFile");
+        fileList = fileList+"###"+"pdf";
+        Map<String,Object> map = new HashMap<>();
+        map.put("postTime",postTime);
+        map.put("title",title);
+        map.put("content",content);
+        map.put("forwardcontent",forwardcontent);
+        map.put("fileList",fileList);
+        System.out.println(map);
+    }
+
+}
diff --git a/src/main/java/com/example/ook.java b/src/main/java/com/example/ook.java
new file mode 100644
index 0000000..2d67ed9
--- /dev/null
+++ b/src/main/java/com/example/ook.java
@@ -0,0 +1,256 @@
+package com.example;
+
+import okhttp3.*;
+import org.json.JSONObject;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+
+import java.io.IOException;
+import java.net.InetSocketAddress;
+import java.net.Proxy;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.time.LocalDate;
+import java.time.LocalDateTime;
+import java.time.ZonedDateTime;
+import java.time.format.DateTimeFormatter;
+import java.time.format.DateTimeParseException;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.Locale;
+import java.util.Map;
+import java.util.concurrent.TimeUnit;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class ook {
+
+
+    public static void main(String[] args) throws Exception {
+        // 1. 获取代理地址
+//        String proxyJson = getProxyFromLocalService();
+//        JSONObject proxyData = new JSONObject(proxyJson);
+//        String httpProxy = proxyData.getString("http"); // 例如 "http://proxy1:port"
+//
+//        // 2. 解析代理地址
+//        String[] proxyParts = httpProxy.replace("http://", "").split(":");
+//        String proxyHost = proxyParts[0]; // proxy1
+//        int proxyPort = Integer.parseInt(proxyParts[1]); // port
+
+        OkHttpClient client = new OkHttpClient().newBuilder()
+                .connectTimeout(30, TimeUnit.SECONDS)
+                .readTimeout(30, TimeUnit.SECONDS)
+                .writeTimeout(30, TimeUnit.SECONDS)
+                .proxy(new Proxy(Proxy.Type.HTTP, new InetSocketAddress("127.0.0.1", 7897))) // 直接使用 7897 端口
+                .build();
+
+        MediaType mediaType = MediaType.parse("text/plain");
+        Request request = new Request.Builder()
+                .url("https://wrair.health.mil/News-Media/Press-Releases/")
+                .get()
+                // 添加关键请求头
+                .addHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36")
+                .addHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7")
+//                .addHeader("Accept-Encoding", "gzip, deflate, br, zstd")
+                .addHeader("Accept-Language", "zh-CN,zh;q=0.9,th;q=0.8")
+                .addHeader("Cache-Control", "no-cache")
+                .addHeader("Pragma", "no-cache")
+                .addHeader("Referer", "https://wrair.health.mil/News-Media/Press-Releases/")
+                .addHeader("Cookie", "_ga=GA1.1.516170455.1740971326; .ASPXANONYMOUS=xUBztj4Ek1vHfBPe-1QqFJhd83I4bkB1k0_d-2QrQ7drfd7R7Y6eNsyyHVjSeffyIKzy_qm5tOKOCtbvst-s9ZGWThxifCGMdJE117EQlr1OZARa0; dnn_IsMobile=False; language=en-US; ARRAffinity=c30f7cdebcf208f7c5a996cb410451c36532afc64703669607f68f04a75f4b39; _ga_CSLL4ZEK4L=GS1.1.1742349582.4.1.1742350035.0.0.0")
+                .addHeader("Upgrade-Insecure-Requests", "1")
+                .addHeader("Sec-Fetch-Dest", "document")
+                .addHeader("Sec-Fetch-Mode", "navigate")
+                .addHeader("Sec-Fetch-Site", "same-origin")
+                .addHeader("Sec-Fetch-User", "?1")
+                .addHeader("Sec-Ch-Ua", "\"Chromium\";v=\"134\", \"Not:A-Brand\";v=\"24\", \"Google Chrome\";v=\"134\"")
+                .addHeader("Sec-Ch-Ua-Mobile", "?0")
+                .addHeader("Sec-Ch-Ua-Platform", "\"Windows\"")
+                .addHeader("Priority", "u=0, i")
+                .build();
+        Response response = client.newCall(request).execute();
+        String html = response.body().string();
+        Document parse = Jsoup.parse(html);
+//        String url = "https://www.uu.se/nyheter/alla?newsResearch=researchtopic11%3Bresearchtopic7%3Bresearchtopic22%3Bresearchtopic10%3Bresearchtopic2&start=20";
+//        // 定义正则表达式
+//        String regex = "start=(\\d+)";
+//        Pattern pattern = Pattern.compile(regex);
+//        Matcher matcher = pattern.matcher(url);
+//        Integer start = 0;
+
+
+//        String postTime = convertToTimestamp(parse.select(".mr10").text());
+//        String title = parse.select(".hdg01").text();
+//        String content = parse.select(".container01 p").text();
+//        String forwardcontent = parse.select("#main").html();
+//        Map<String,Object> map = new HashMap<>();
+
+//        if (matcher.find()) {
+//            start = Integer.parseInt(matcher.group(1));
+//            System.out.println("Start: " + start); // start = 12
+//        }
+//
+//        Elements allLinks = new Elements();
+//        Elements links = parse.select(".search-result-hit-text-container a");
+//        allLinks.addAll(links);
+//
+//        int totalLinks = allLinks.size();
+//        int startIndex = Math.max(0, totalLinks - 10);
+//        for (int i = startIndex; i < totalLinks; i++) {
+//            Map<String, Object> task = new HashMap<String, Object>(16);
+//            task.put("link","https://www.uu.se"+allLinks.get(i).attr("href"));
+//            task.put("linktype", "newscontent"); // 設置鏈接類型為 "newscontent"
+//
+//            System.out.println(task);
+//        }
+        Elements elements = parse.select(".title a");
+        for (Element element : elements) {
+            String link = element.attr("href");
+            System.out.println(link);
+        }
+
+
+//        map.put("postTime",postTime);
+//        map.put("title",title);
+//        map.put("content",content);
+//        map.put("forwardcontent",forwardcontent);
+//        System.out.println(map);
+
+    }
+    public ook() throws IOException {
+    }
+//    public static String convertToTimestamp(String dateStr) {
+//        try {
+//            // 定义输入格式：dd MMMM , yyyy（例如 "28 February , 2025"）
+//            DateTimeFormatter inputFormatter = DateTimeFormatter.ofPattern("MMMM dd, yyyy", Locale.ENGLISH);
+//            // 定义输出格式：yyyy-MM-dd HH:mm:ss
+//            DateTimeFormatter outputFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
+//
+//            // 解析输入日期
+//            LocalDate date = LocalDate.parse(dateStr, inputFormatter);
+//            // 转换为带时间的格式，时间设为 00:00:00
+//            return date.atStartOfDay().format(outputFormatter);
+//        } catch (Exception e) {
+//            e.printStackTrace();
+//            return null; // 或抛出异常，根据需求调整
+//        }
+//    }
+//    public static String convertToTimestamp(String dateStr) {
+//        try {
+//            // 定义输入格式：yyyy 年 MM 月 dd 日
+//            DateTimeFormatter inputFormatter = DateTimeFormatter.ofPattern("MM-dd-yyyy", Locale.CHINESE);
+//            // 定义输出格式：yyyy-MM-dd HH:mm:ss
+//            DateTimeFormatter outputFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
+//
+//            // 解析输入日期
+//            LocalDate date = LocalDate.parse(dateStr, inputFormatter);
+//            // 转换为带时间的格式，时间设为 00:00:00
+//            return date.atStartOfDay().format(outputFormatter);
+//        } catch (Exception e) {
+//            e.printStackTrace();
+//            return null; // 或抛出异常，根据需求调整
+//        }
+//    }
+
+//        public static String convertToTimestamp(String dateStr) {
+//            try {
+//                // 定义输入格式
+//                DateTimeFormatter inputFormatter = DateTimeFormatter.ofPattern("dd/MM/yyyy");
+//                // 定义输出格式
+//                DateTimeFormatter outputFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
+//
+//                // 解析输入字符串为 LocalDate
+//                LocalDate date = LocalDate.parse(dateStr, inputFormatter);
+//                // 转换为 LocalDateTime，设置时间为 00:00:00
+//                LocalDateTime dateTime = date.atStartOfDay();
+//                // 格式化为目标字符串
+//                return dateTime.format(outputFormatter);
+//            } catch (Exception e) {
+//                e.printStackTrace();
+//                return null; // 或者抛出异常，根据需求调整
+//            }
+//        }
+//    public static String convertToTimestamp(String dateStr) {
+//        try {
+//            // 定义输入格式：MMMM d, yyyy（例如 "June 3, 2015"）
+//            DateTimeFormatter inputFormatter = DateTimeFormatter.ofPattern("d MMMM, yyyy", Locale.ENGLISH);
+//            // 定义输出格式：yyyy-MM-dd HH:mm:ss
+//            DateTimeFormatter outputFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
+//
+//            // 解析输入日期
+//            LocalDate date = LocalDate.parse(dateStr, inputFormatter);
+//            // 转换为带时间的格式，时间设为 00:00:00
+//            return date.atStartOfDay().format(outputFormatter);
+//        } catch (Exception e) {
+//            e.printStackTrace();
+//            return null; // 或抛出异常，根据需求调整
+//        }
+//    }
+//    public static String convertToTimestamp(String input) {
+//        try {
+//            // 正则匹配 "d MMMM yyyy"
+//            Pattern pattern = Pattern.compile("\\d{1,2} [A-Za-z]+ \\d{4}");
+//            Matcher matcher = pattern.matcher(input);
+//            if (matcher.find()) {
+//                String dateStr = matcher.group();
+//                DateTimeFormatter inputFormatter = DateTimeFormatter.ofPattern("d MMMM yyyy", Locale.ENGLISH);
+//                DateTimeFormatter outputFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
+//                LocalDate date = LocalDate.parse(dateStr, inputFormatter);
+//                return date.atStartOfDay().format(outputFormatter);
+//            } else {
+//                System.out.println("No date found in: " + input);
+//                return null;
+//            }
+//        } catch (Exception e) {
+//            e.printStackTrace();
+//            return null;
+//        }
+//    }
+//    public static String convertToTimestamp(String dateStr) {
+//        try {
+//            // Parse the ISO 8601 date string (e.g., "2025-03-17T12:37:33.033Z")
+//            ZonedDateTime zdt = ZonedDateTime.parse(dateStr, DateTimeFormatter.ISO_DATE_TIME);
+//
+//            // Define the output format (yyyy-MM-dd hh:mm:ss)
+//            DateTimeFormatter outputFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
+//
+//            // Format the date to the desired output
+//            return zdt.format(outputFormatter);
+//        } catch (Exception e) {
+//            e.printStackTrace();
+//            return null; // Or throw an exception, depending on your needs
+//        }
+//    }
+    public static String convertToTimestamp(String dateStr) {
+        try {
+            // Parse "Jan. 9, 2025" (abbreviated month, dot, comma-separated)
+            DateTimeFormatter inputFormatter = DateTimeFormatter.ofPattern("MMM. d, yyyy", Locale.ENGLISH);
+            LocalDate date = LocalDate.parse(dateStr, inputFormatter);
+
+            // Format to "yyyy-MM-dd HH:mm:ss" (defaulting time to 00:00:00)
+            DateTimeFormatter outputFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
+            return date.atStartOfDay().format(outputFormatter);
+        } catch (Exception e) {
+            e.printStackTrace();
+            return null;
+        }
+    }
+    // 调用本地代理服务获取代理地址
+    private static String getProxyFromLocalService() throws Exception {
+        OkHttpClient client = new OkHttpClient();
+        Request request = new Request.Builder()
+                .url("http://127.0.0.1:7897")
+                .get()
+                .build();
+
+        try (Response response = client.newCall(request).execute()) {
+            if (response.isSuccessful()) {
+                return response.body().string(); // 返回 JSON 字符串
+            } else {
+                throw new Exception("获取代理失败，状态码: " + response.code());
+            }
+        }
+    }
+}
+
diff --git a/src/main/java/com/example/oook.java b/src/main/java/com/example/oook.java
new file mode 100644
index 0000000..d8c24d9
--- /dev/null
+++ b/src/main/java/com/example/oook.java
@@ -0,0 +1,524 @@
+package com.example;
+
+import okhttp3.*;
+import org.json.JSONArray;
+import org.json.JSONObject;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+
+import java.io.IOException;
+import java.net.InetSocketAddress;
+import java.net.Proxy;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.time.*;
+import java.time.format.DateTimeFormatter;
+import java.time.format.DateTimeParseException;
+import java.util.*;
+import java.util.concurrent.TimeUnit;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class oook {
+
+
+    public static void main(String[] args) throws Exception {
+        // 1. 获取代理地址
+//        String proxyJson = getProxyFromLocalService();
+//        JSONObject proxyData = new JSONObject(proxyJson);
+//        String httpProxy = proxyData.getString("http"); // 例如 "http://proxy1:port"
+//
+//        // 2. 解析代理地址
+//        String[] proxyParts = httpProxy.replace("http://", "").split(":");
+//        String proxyHost = proxyParts[0]; // proxy1
+//        int proxyPort = Integer.parseInt(proxyParts[1]); // port
+        OkHttpClient client = new OkHttpClient().newBuilder()
+                .connectTimeout(30, TimeUnit.SECONDS)
+                .readTimeout(30, TimeUnit.SECONDS)
+                .writeTimeout(30, TimeUnit.SECONDS)
+//                .cookieJar(new CookieJar() {
+//                    private final HashMap<String, List<Cookie>> cookieStore = new HashMap<>();
+//
+//                    @Override
+//                    public void saveFromResponse(HttpUrl url, List<Cookie> cookies) {
+//                        cookieStore.put(url.host(), cookies); // 保存 Cookie
+//                    }
+//
+//                    @Override
+//                    public List<Cookie> loadForRequest(HttpUrl url) {
+//                        List<Cookie> cookies = cookieStore.get(url.host());
+//                        return cookies != null ? cookies : new ArrayList<>();
+//                    }
+//                })
+//                .followRedirects(true) // 自动处理重定向
+                .build();
+
+
+        // 发送目标请求，自动获取和使用 Cookie
+//        Request request = new Request.Builder()
+//                .url("https://thl.fi/aiheet/infektiotaudit-ja-rokotukset/ajankohtaista/infektio-ja-rokotusuutiset?p_p_id=com_liferay_asset_publisher_web_portlet_AssetPublisherPortlet_INSTANCE_L2Jk5CCjrKPN&p_p_lifecycle=0&p_p_state=normal&p_p_mode=view&_com_liferay_asset_publisher_web_portlet_AssetPublisherPortlet_INSTANCE_L2Jk5CCjrKPN_redirect=%2Faiheet%2Finfektiotaudit-ja-rokotukset%2Fajankohtaista%2Finfektio-ja-rokotusuutiset&_com_liferay_asset_publisher_web_portlet_AssetPublisherPortlet_INSTANCE_L2Jk5CCjrKPN_delta=50&p_r_p_resetCur=false&_com_liferay_asset_publisher_web_portlet_AssetPublisherPortlet_INSTANCE_L2Jk5CCjrKPN_cur=1")
+//                .get()
+//                .addHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36")
+//                .addHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8")
+//                .addHeader("Accept-Language", "en-US,en;q=0.5")
+//                .addHeader("Cookie", "__cf_bm=HXf4OleH9DiJmEagV_4Wori6vFzyN4wf.CBVL57AQUI-1743471952-1.0.1.1-h0KqPKUW2_wblBJ1HWbn50Xi1EPDIxjvFhRyrkdPrAoRHNjlXk..tK_KDWGUs6f4Z1VbQUbJD1Vw3KTi9IYO5bx5af4ZqE2nABBXT.YpLKQ; _cfuvid=jdweOOZm.a8GWXZGqRHb.fiSFMKZuAppyOlkDBbafw0-1743471952167-0.0.1.1-604800000")                .build();
+//        OkHttpClient client = new OkHttpClient().newBuilder()
+//                .connectTimeout(30, TimeUnit.SECONDS)
+//                .readTimeout(30, TimeUnit.SECONDS)
+//                .writeTimeout(30, TimeUnit.SECONDS)
+////                .proxy(new Proxy(Proxy.Type.HTTP, new InetSocketAddress("127.0.0.1", 7897))) // 直接使用 7897 端口
+//                .build();
+        String url = "https://www.iranintl.com/en/202504116060";
+        MediaType mediaType = MediaType.parse("text/plain");
+        RequestBody body = RequestBody.create(mediaType, "");
+        Request request = new Request.Builder()
+                .url(url)
+                .get()
+                .build();
+        Response response = client.newCall(request).execute();
+        String html = response.body().string();
+        Document parse = Jsoup.parse(html);
+//        String htmlData = null;
+//        JSONArray jsonArray = new JSONArray(html);
+//        for (int i = 0; i < jsonArray.length(); i++) {
+//            JSONObject obj = jsonArray.getJSONObject(i);
+//            if ("insert".equals(obj.optString("command")) && obj.has("data")) {
+//                htmlData = obj.getString("data");
+//                break;
+//            }
+//        }
+//        Document doc = Jsoup.parse(htmlData);
+//        Elements rows = doc.select(".o-grid__item.col-1, .o-grid__item.col-2, .o-grid__item.col-3");
+//
+//        Set<String> uniqueHrefs = new HashSet<>();
+//
+//        for (Element row : rows) {
+//            Elements links = row.select("a[href]"); // 选择所有 a 标签
+//            if (!links.isEmpty()) {
+//                // 只取第一个 href
+//                String href = links.first().attr("href");
+//                uniqueHrefs.add(href);
+//            }
+//        }
+////
+//        for (String href : uniqueHrefs) {
+//            System.out.println("Href: " + href);
+//        }
+//        String next = getNextPageUrl(url);
+//        System.out.println(next);
+
+//        JSONObject jsonObject = new JSONObject(html);
+//        JSONObject response1 = jsonObject.getJSONObject("response");
+//        JSONArray docs = response1.getJSONArray("docs");
+//
+//        // 遍历 docs 数组，提取 permalink
+//        for (int i = 0; i < docs.length(); i++) {
+//            JSONObject doc = docs.getJSONObject(i);
+//            String permalink = doc.getString("permalink");
+//            System.out.println("Permalink: " + permalink);
+//        }
+
+//        String url = "https://www.uu.se/nyheter/alla?newsResearch=researchtopic11%3Bresearchtopic7%3Bresearchtopic22%3Bresearchtopic10%3Bresearchtopic2&start=20";
+//        // 定义正则表达式
+//        String regex = "start=(\\d+)";
+//        Pattern pattern = Pattern.compile(regex);
+//        Matcher matcher = pattern.matcher(url);
+//        Integer start = 0;
+//        String postTime = convertToTimestamp(
+//                parse.select(".c-news-info__date.o-meta span.c-date").text().trim() + " " +
+//                        parse.select(".c-news-info__date.o-meta span.c-year").text().trim()
+//        );
+//        String postTime = parse.select("tr:nth-child(3) td:nth-child(3)").text()+" 00:00:00";
+        String postTime = convertIsoToTimestamp(parse.select(".WrittenContentBlock-module__9pvVhW__timeAgo time").attr("datetime"));
+        String title = parse.select(".WrittenContentBlock-module__9pvVhW__headline").text();
+        String content = parse.select(".WrittenContentBlock-module__9pvVhW__body p").text();
+        String forwardcontent = parse.select(".page").html();
+        Elements imgs = parse.select(".page img");
+//        Elements pdfs = parse.select("tr:nth-child(3) td a");
+
+        String prefix = "";
+
+        List imgList = new ArrayList<String>();
+
+        for (Element img : imgs) {
+            String src = img.attr("src");
+            if (src != null && !src.isEmpty()) {
+                // 判断是否以 https 开头
+                String fullUrl;
+                if (!src.startsWith("https")) {
+                    // 如果不以 https 开头，拼接前缀
+                    if (src.startsWith("/")) {
+                        fullUrl = prefix + src;
+                    } else {
+                        fullUrl = prefix + "/" + src;
+                    }
+                } else {
+                    fullUrl = src;
+                }
+                // 拼接格式
+                String imgUrl = fullUrl + "###" + "avif";
+                imgList.add(imgUrl);
+            }
+        }
+
+//        String prefix = "";
+//
+//        List<String> fileList = new ArrayList<String>();
+//
+//        for (Element pdf : pdfs) {
+//            String pdfUrl = pdf.attr("href");
+//            if (pdfUrl != null && !pdfUrl.isEmpty()) {
+//                // 判断是否以 https 开头
+//                String fullUrl;
+//                if (!pdfUrl.startsWith("https")) {
+//                    // 如果不以 https 开头，拼接前缀
+//                    if (pdfUrl.startsWith("/")) {
+//                        fullUrl = prefix + pdfUrl;
+//                    } else {
+//                        fullUrl = prefix + "/" + pdfUrl;
+//                    }
+//                } else {
+//                    fullUrl = pdfUrl;
+//                }
+//                // 拼接格式
+//                String fileUrl = fullUrl + "###" + "pdf";
+//                fileList.add(fileUrl);
+//            }
+//        }
+//
+
+
+//        if (matcher.find()) {
+//            start = Integer.parseInt(matcher.group(1));
+//            System.out.println("Start: " + start); // start = 12
+//        }
+
+//        Elements allLinks = new Elements();
+//        Elements links = parse.select(".card-body a");
+//        allLinks.addAll(links);
+//
+//        int totalLinks = allLinks.size();
+//        int startIndex = Math.max(0, totalLinks - 10);
+//        for (int i = startIndex; i < totalLinks; i++) {
+//            Map<String, Object> task = new HashMap<String, Object>(16);
+//            task.put("link","https://www.uu.se"+allLinks.get(i).attr("href"));
+//            task.put("linktype", "newscontent"); // 設置鏈接類型為 "newscontent"
+//
+//            System.out.println(task);
+//        }
+
+//        Elements elements = parse.select(".topic__grid__item a");
+//        Integer count = elements.size();
+//        for (Element element : elements) {
+//            String link = element.attr("href"); // 獲取新聞鏈接的 href 屬性
+//            System.out.println(link);
+//        }
+
+//        if(count <10){
+//            String nextpageurl = getPreviousYearUrl(url);
+//            System.out.println(nextpageurl);
+//        }else {
+//            String nextpageurl = getNextPageUrl(url);
+//            System.out.println(nextpageurl);
+//        }
+        Map<String,Object> map = new HashMap<>();
+        map.put("postTime",postTime);
+        map.put("title",title);
+        map.put("content",content);
+        map.put("forwardcontent",forwardcontent);
+        map.put("imgList",imgList);
+//        map.put("fileList",fileList);
+        System.out.println(map);
+
+    }
+    public oook() throws IOException {
+    }
+//    public static String convertToTimestamp(String dateStr) {
+//        try {
+//            // 定义输入格式：dd MMMM , yyyy（例如 "28 February , 2025"）
+//            DateTimeFormatter inputFormatter = DateTimeFormatter.ofPattern("MMMM dd, yyyy", Locale.ENGLISH);
+//            // 定义输出格式：yyyy-MM-dd HH:mm:ss
+//            DateTimeFormatter outputFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
+//            dateStr = dateStr.replace("|", "").trim();
+//            // 解析输入日期
+//            LocalDate date = LocalDate.parse(dateStr, inputFormatter);
+//            // 转换为带时间的格式，时间设为 00:00:00
+//            return date.atStartOfDay().format(outputFormatter);
+//        } catch (Exception e) {
+//            e.printStackTrace();
+//            return null; // 或抛出异常，根据需求调整
+//        }
+//    }
+//    public static String convertToTimestamp(String dateStr) {
+//        try {
+//            // 去掉 "Publié le" 前缀并清理多余字符
+//            dateStr = dateStr.replace("Publié le", "").trim();
+//
+//            // 定义输入格式：dd MMMM yyyy（例如 "25 mars 2025"）
+//            DateTimeFormatter inputFormatter = DateTimeFormatter.ofPattern("dd MMMM yyyy", Locale.FRENCH);
+//
+//            // 定义输出格式：yyyy-MM-dd HH:mm:ss
+//            DateTimeFormatter outputFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
+//
+//            // 解析输入日期
+//            LocalDate date = LocalDate.parse(dateStr, inputFormatter);
+//
+//            // 转换为带时间的格式，时间设为 00:00:00
+//            return date.atStartOfDay().format(outputFormatter);
+//        } catch (Exception e) {
+//            e.printStackTrace();
+//            return null; // 或抛出异常，根据需求调整
+//        }
+//    }
+//    public static String convertToTimestamp(String dateStr) {
+//        try {
+//            // 定义输入格式：yyyy 年 MM 月 dd 日
+//            DateTimeFormatter inputFormatter = DateTimeFormatter.ofPattern("MM-dd-yyyy", Locale.CHINESE);
+//            // 定义输出格式：yyyy-MM-dd HH:mm:ss
+//            DateTimeFormatter outputFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
+//
+//            // 解析输入日期
+//            LocalDate date = LocalDate.parse(dateStr, inputFormatter);
+//            // 转换为带时间的格式，时间设为 00:00:00
+//            return date.atStartOfDay().format(outputFormatter);
+//        } catch (Exception e) {
+//            e.printStackTrace();
+//            return null; // 或抛出异常，根据需求调整
+//        }
+//    }
+
+    //        public static String convertToTimestamp(String dateStr) {
+//            try {
+//                // 定义输入格式
+//                DateTimeFormatter inputFormatter = DateTimeFormatter.ofPattern("dd/MM/yyyy");
+//                // 定义输出格式
+//                DateTimeFormatter outputFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
+//
+//                // 解析输入字符串为 LocalDate
+//                LocalDate date = LocalDate.parse(dateStr, inputFormatter);
+//                // 转换为 LocalDateTime，设置时间为 00:00:00
+//                LocalDateTime dateTime = date.atStartOfDay();
+//                // 格式化为目标字符串
+//                return dateTime.format(outputFormatter);
+//            } catch (Exception e) {
+//                e.printStackTrace();
+//                return null; // 或者抛出异常，根据需求调整
+//            }
+//        }
+//    public static String convertToTimestamp(String dateStr) {
+//        try {
+//            // 定义输入格式：MMMM d, yyyy（例如 "June 3, 2015"）
+//            DateTimeFormatter inputFormatter = DateTimeFormatter.ofPattern("d MMMM, yyyy", Locale.ENGLISH);
+//            // 定义输出格式：yyyy-MM-dd HH:mm:ss
+//            DateTimeFormatter outputFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
+//
+//            // 解析输入日期
+//            LocalDate date = LocalDate.parse(dateStr, inputFormatter);
+//            // 转换为带时间的格式，时间设为 00:00:00
+//            return date.atStartOfDay().format(outputFormatter);
+//        } catch (Exception e) {
+//            e.printStackTrace();
+//            return null; // 或抛出异常，根据需求调整
+//        }
+//    }
+//    public static String convertToTimestamp(String input) {
+//        try {
+//            // 正则匹配 "d MMMM yyyy"
+//            Pattern pattern = Pattern.compile("\\d{1,2} [A-Za-z]+ \\d{4}");
+//            Matcher matcher = pattern.matcher(input);
+//            if (matcher.find()) {
+//                String dateStr = matcher.group();
+//                DateTimeFormatter inputFormatter = DateTimeFormatter.ofPattern("d MMMM yyyy", Locale.ENGLISH);
+//                DateTimeFormatter outputFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
+//                LocalDate date = LocalDate.parse(dateStr, inputFormatter);
+//                return date.atStartOfDay().format(outputFormatter);
+//            } else {
+//                System.out.println("No date found in: " + input);
+//                return null;
+//            }
+//        } catch (Exception e) {
+//            e.printStackTrace();
+//            return null;
+//        }
+//    }
+//    public static String convertToTimestamp(String dateStr) {
+//        try {
+//            // Parse the ISO 8601 date string (e.g., "2025-03-17T12:37:33.033Z")
+//            ZonedDateTime zdt = ZonedDateTime.parse(dateStr, DateTimeFormatter.ISO_DATE_TIME);
+//
+//            // Define the output format (yyyy-MM-dd hh:mm:ss)
+//            DateTimeFormatter outputFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
+//
+//            // Format the date to the desired output
+//            return zdt.format(outputFormatter);
+//        } catch (Exception e) {
+//            e.printStackTrace();
+//            return null; // Or throw an exception, depending on your needs
+//        }
+//    }
+//    public static String convertToTimestamp(String dateStr) {
+//        try {
+//            // Parse "Jan. 9, 2025" (abbreviated month, dot, comma-separated)
+//            DateTimeFormatter inputFormatter = DateTimeFormatter.ofPattern("MMM d, yyyy", Locale.ENGLISH);
+//            LocalDate date = LocalDate.parse(dateStr, inputFormatter);
+//
+//            // Format to "yyyy-MM-dd HH:mm:ss" (defaulting time to 00:00:00)
+//            DateTimeFormatter outputFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
+//            return date.atStartOfDay().format(outputFormatter);
+//        } catch (Exception e) {
+//            e.printStackTrace();
+//            return null;
+//        }
+//    }
+//    public static String convertToTimestamp(String dateStr) {
+//        try {
+//            // 从文本中提取修改日期
+//            String modifiedDateStr = extractModifiedDate(dateStr);
+//            if (modifiedDateStr == null) {
+//                throw new IllegalArgumentException("无法找到修改日期");
+//            }
+//
+//            // Parse "20/12/2024" (day/month/year format, Italian style)
+//            DateTimeFormatter inputFormatter = DateTimeFormatter.ofPattern("dd/MM/yyyy", Locale.ITALIAN);
+//            LocalDate date = LocalDate.parse(modifiedDateStr, inputFormatter);
+//
+//            // Format to "yyyy-MM-dd HH:mm:ss" (defaulting time to 00:00:00)
+//            DateTimeFormatter outputFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
+//            return date.atStartOfDay().format(outputFormatter);
+//        } catch (Exception e) {
+//            e.printStackTrace();
+//            return null;
+//        }
+//    }
+    public static String convertIsoToTimestamp(String dateStr) {
+        try {
+            // 解析 ISO 8601 格式的 UTC 时间为 Instant
+            Instant instant = Instant.parse(dateStr);
+            // 转为本地时间（系统默认时区），如果你不想转换时区，可以用 LocalDateTime.ofInstant
+            LocalDateTime localDateTime = LocalDateTime.ofInstant(instant, ZoneOffset.UTC);
+            // 定义输出格式
+            DateTimeFormatter outputFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
+            return localDateTime.format(outputFormatter);
+        } catch (Exception e) {
+            e.printStackTrace();
+            return null;
+        }
+    }
+
+//    public static String convertToTimestamp(String dateStr) {
+//        try {
+//            // 创建捷克语的日期格式器，解析 "27. listopadu 2024"
+//            DateTimeFormatter inputFormatter = DateTimeFormatter.ofPattern("d. MMMM yyyy", new Locale("cs", "CZ"));
+//            LocalDate date = LocalDate.parse(dateStr, inputFormatter);
+//
+//            // 转换为 "yyyy-MM-dd HH:mm:ss" 格式，默认时间为 00:00:00
+//            DateTimeFormatter outputFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
+//            return date.atStartOfDay().format(outputFormatter);
+//        } catch (Exception e) {
+//            e.printStackTrace();
+//            return null;
+//        }
+//    }
+    // 提取修改日期的方法
+    private static String extractModifiedDate(String text) {
+        String[] lines = text.split("\n");
+        for (String line : lines) {
+            if (line.contains("Modificato")) {
+                // 提取 "Modificato" 后面的日期部分
+                String[] parts = line.split("\\s+");
+                for (String part : parts) {
+                    if (part.matches("\\d{2}/\\d{2}/\\d{4}")) {
+                        return part; // 返回 "20/12/2024"
+                    }
+                }
+            }
+        }
+        return null; // 如果没找到修改日期，返回 null
+    }
+    // 调用本地代理服务获取代理地址
+    private static String getProxyFromLocalService() throws Exception {
+        OkHttpClient client = new OkHttpClient();
+        Request request = new Request.Builder()
+                .url("http://127.0.0.1:7897")
+                .get()
+                .build();
+
+        try (Response response = client.newCall(request).execute()) {
+            if (response.isSuccessful()) {
+                return response.body().string(); // 返回 JSON 字符串
+            } else {
+                throw new Exception("获取代理失败，状态码: " + response.code());
+            }
+        }
+    }
+    public static String getNextPageUrl(String currentUrl) {
+        if (currentUrl == null || currentUrl.trim().isEmpty()) {
+            return null;
+        }
+
+//        // 定义基础 URL
+//        String baseUrl = "https://www.pasteur.dz/fr/espace-presse";
+//
+//        // 如果是基础 URL，默认第 1 页，下一页为 ?page=2
+//        if (currentUrl.equals(baseUrl)) {
+//            return baseUrl + "?start=5";
+//        }
+
+        // 定义正则表达式，匹配 ?page=数字
+        String regex = "_com_liferay_asset_publisher_web_portlet_AssetPublisherPortlet_INSTANCE_gJ3hFqMQsykM_cur=(\\d+)";
+        Pattern pattern = Pattern.compile(regex);
+        Matcher matcher = pattern.matcher(currentUrl);
+
+        // 如果找到 ?page=*
+        if (matcher.find()) {
+            // 提取页码（group(1) 是括号中的数字部分）
+            String pageNumStr = matcher.group(1);
+            try {
+                int currentPage = Integer.parseInt(pageNumStr);
+                // 替换旧页码为新页码
+                return matcher.replaceFirst("_com_liferay_asset_publisher_web_portlet_AssetPublisherPortlet_INSTANCE_gJ3hFqMQsykM_cur=" + (currentPage + 1));
+            } catch (NumberFormatException e) {
+                return null; // 页码解析失败
+            }
+        }else {
+            return null;
+        }
+    }
+    public static String getPreviousYearUrl(String url) {
+        if (url == null || url.trim().isEmpty()) {
+            return null;
+        }
+
+        // 定义正则表达式匹配年份
+        String yearRegex = "_com_liferay_asset_publisher_web_portlet_AssetPublisherPortlet_INSTANCE_gJ3hFqMQsykM_year=(\\d{4})";
+        Pattern yearPattern = Pattern.compile(yearRegex);
+        Matcher yearMatcher = yearPattern.matcher(url);
+
+        // 如果找到年份
+        if (yearMatcher.find()) {
+            String yearStr = yearMatcher.group(1);  // 提取年份
+            Integer currentYear = Integer.parseInt(yearStr);
+            Integer previousYear = currentYear - 1;  // 计算上一年
+
+            // 替换年份
+            url = yearMatcher.replaceFirst("_com_liferay_asset_publisher_web_portlet_AssetPublisherPortlet_INSTANCE_gJ3hFqMQsykM_year=" + previousYear);
+        }
+
+        // 定义正则表达式匹配页码
+        String pageRegex = "_com_liferay_asset_publisher_web_portlet_AssetPublisherPortlet_INSTANCE_gJ3hFqMQsykM_cur=(\\d+)";
+        Pattern pagePattern = Pattern.compile(pageRegex);
+        Matcher pageMatcher = pagePattern.matcher(url);
+
+        // 如果找到页码
+        if (pageMatcher.find()) {
+            // 替换页码为 1
+            return pageMatcher.replaceFirst("_com_liferay_asset_publisher_web_portlet_AssetPublisherPortlet_INSTANCE_gJ3hFqMQsykM_cur=1");
+        } else {
+            // 如果没有找到页码，默认页码为 1
+            return url + "_com_liferay_asset_publisher_web_portlet_AssetPublisherPortlet_INSTANCE_gJ3hFqMQsykM_cur=1";
+        }
+    }
+}
+
diff --git a/src/main/java/com/example/projTopic.java b/src/main/java/com/example/projTopic.java
new file mode 100644
index 0000000..f2377a7
--- /dev/null
+++ b/src/main/java/com/example/projTopic.java
@@ -0,0 +1,403 @@
+package com.example;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import okhttp3.*;
+import org.apache.kafka.clients.producer.KafkaProducer;
+import org.apache.kafka.clients.producer.ProducerConfig;
+import org.apache.kafka.clients.producer.ProducerRecord;
+import org.apache.kafka.common.serialization.StringSerializer;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.time.LocalDate;
+import java.time.LocalDateTime;
+import java.time.format.DateTimeFormatter;
+import java.util.*;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.TimeUnit;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class projTopic {
+    private static final String TOPIC_NAME = "projTopic";
+    private static final String BOOTSTRAP_SERVERS = "node-01:19092";
+    private static KafkaProducer<String, String> producer;
+    private static ObjectMapper objectMapper = new ObjectMapper();
+    private static final Random random = new Random();
+    private static List<String> proxyList = new ArrayList<>();  // 代理池
+    private static int currentProxyIndex = 0;  // 当前使用的代理索引
+    static {
+        Properties props = new Properties();
+        props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, BOOTSTRAP_SERVERS);
+        props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer");
+        props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer");
+        props.put(ProducerConfig.ACKS_CONFIG, "all"); // 等待所有副本确认
+        props.put(ProducerConfig.RETRIES_CONFIG, 3); // 重试次数
+        producer = new KafkaProducer<>(props);
+        try {
+            proxyList = Files.readAllLines(Paths.get("proxy.txt"));
+            if (proxyList.isEmpty()) {
+                System.out.println("警告: proxy.txt 为空，未加载任何代理");
+            } else {
+                System.out.println("成功加载 " + proxyList.size() + " 个代理");
+            }
+        } catch (IOException e) {
+            System.err.println("读取 proxy.txt 失败: " + e.getMessage());
+        }
+    }
+
+    public static void main(String[] args) throws IOException, InterruptedException {
+        List<String> keywords = Files.readAllLines(Paths.get("keywords.txt"));
+        List<String> cleanedKeywords = new ArrayList<>();
+        for (String keyword : keywords) {
+            String cleaned = keyword.split(",")[0].trim(); // 取逗号前的部分并去除首尾空格
+            cleaned = cleaned.replaceAll("\\s+", "+"); // 替换所有空格为 +
+            cleanedKeywords.add(cleaned);
+        }
+        ExecutorService executor = Executors.newFixedThreadPool(4); // 4 个线程
+        for (String keyword : cleanedKeywords) {
+            executor.submit(() -> {
+                try {
+                    int sleepTime = random.nextInt(1001) + 30000;
+                    String load = "5|0|20|https://www.nsf.gov/awardsearch/jsp/gwt/search/|57BE5CA45E781DC0159F727F8A8205EB|gov.nsf.research.awardsearch.gwt.client.SearchAwardService|getAwards|gov.nsf.research.awardsearch.gwt.bean.SearchRequestBean/3930579236|com.extjs.gxt.ui.client.data.PagingLoadConfig|java.util.HashMap/962170901|java.lang.String/2004016611|QueryText|" + keyword + "|ActiveAwards|true|com.extjs.gxt.ui.client.data.BasePagingLoadConfig/2011366567|com.extjs.gxt.ui.client.data.RpcMap/3441186752|sortField|sortDir|com.extjs.gxt.ui.client.Style$SortDir/640452531|offset|java.lang.Integer/3438268394|limit|1|2|3|4|2|5|6|5|7|2|8|9|8|10|8|11|8|12|13|0|1|14|4|15|0|16|17|0|18|19|0|20|19|30|";
+                    for(int i=0;;i++){
+                        OkHttpClient client = createClientWithProxy();
+                        MediaType mediaType = MediaType.parse("text/x-gwt-rpc; charset=UTF-8");
+                        RequestBody body = RequestBody.create(mediaType, load);
+
+                        Request request = new Request.Builder()
+                                .url("https://www.nsf.gov/awardsearch/jsp/gwt/search/.searchaward")
+                                .method("POST", body)
+                                .addHeader("Content-Type", "text/x-gwt-rpc; charset=UTF-8")
+                                .addHeader("X-GWT-Module-Base", "https://www.nsf.gov/awardsearch/jsp/gwt/search/")
+                                .addHeader("X-GWT-Permutation", "368C3CF86AA4CD7DB2250B35B844C1C2")
+//                    .addHeader("cookie", "JSESSIONID=E9DCB88F6AD2241C9973AFEC03158ECB")
+                                .build();
+                        Response response = executeWithRetry(client, request, keyword);
+                        String content = response.body().string();
+
+                        Pattern pattern = Pattern.compile("\"awdNumber\",\"(\\d+)\"");
+                        Matcher matcher = pattern.matcher(content);
+
+                        List<String> numbers = new ArrayList<>();  // 用于存储匹配的数字
+                        // 查找并提取数字
+                        List<String> additionalNumbers = new ArrayList<>();
+                        List<String> urls = new ArrayList<>();
+                        // 查找匹配项
+                        while (matcher.find()) {
+                            // 获取捕获到的数字，并将其添加到列表中
+                            numbers.add(matcher.group(1));
+                        }
+
+                        // 输出捕获到的数字
+                        if (numbers.isEmpty()) {
+                            System.out.println("没找到awdNumber，继续下一种查找");
+
+                        } else {
+                            for (String number : numbers) {
+                                additionalNumbers.add(number);
+                            }
+                        }
+
+                        Pattern additionalPattern = Pattern.compile("\"[^\"]+\",\"(?:\\d{2}/\\d{2}/\\d{4}|\\d+\\.\\d+)\"(?:,\"(?:\\d{2}/\\d{2}/\\d{4}|\\d+\\.\\d+)\")?,\"(\\d+)\"");
+                        Matcher additionalMatcher = additionalPattern.matcher(content);
+
+
+                        while (additionalMatcher.find()) {
+                            additionalNumbers.add(additionalMatcher.group(1));
+                        }
+                        if (additionalNumbers.isEmpty()) {
+                            System.out.println("没找到下一页内容链接");
+                            Thread.sleep(sleepTime);
+                            break;
+                        } else {
+                            for (String number : additionalNumbers) {
+                                String url = "https://www.nsf.gov/awardsearch/showAward?AWD_ID=" + number + "&HistoricalAwards=false";
+                                urls.add(url);
+                            }
+                        }
+                        if (!urls.isEmpty() && urls.get(0).equals("https://www.nsf.gov/awardsearch/showAward?AWD_ID=2446604&HistoricalAwards=false")) {
+                            System.out.println("第一个 URL 是 AWD_ID=2446604，跳过关键词: " + keyword);
+                            Thread.sleep(sleepTime);
+                            return; // 跳出当前任务，处理下一个关键词
+                        }
+                        for(String url:urls){
+                            OkHttpClient client2 = createClientWithProxy();
+                            MediaType mediaType2 = MediaType.parse("text/plain");
+                            RequestBody body2 = RequestBody.create(mediaType2, "");
+                            Request request2 = new Request.Builder()
+                                    .url(url)
+                                    .get()
+//                        .addHeader("Cookie", "JSESSIONID=E9DCB88F6AD2241C9973AFEC03158ECB")
+                                    .build();
+                            Response response2 = executeWithRetry(client2, request2, keyword);
+                            System.out.println(response2.code());
+                            String html = response2.body().string();
+                            Document parse = Jsoup.parse(html);
+                            String title = parse.select(".pageheadline").text();
+                            String projectNum = parse.select(".clear tr:nth-child(5) .tabletext2:nth-child(2)").text();
+                            String projectLeader = parse.select(".clear tr:nth-child(13) .tabletext2:nth-child(2)").text();
+                            String projectStartTime = convertToTimestamp(parse.select(".clear tr:nth-child(8) .tabletext2:nth-child(2)").text());
+                            String projectEndTime = convertToTimestamp2(parse.select(".clear tr:nth-child(9) .tabletext2:nth-child(2)").text());
+                            String sponsorPart = parse.select(".clear tr:nth-child(2) .tabletext2:nth-child(2)").text();
+                            String country = "USA";
+                            String brief = parse.select(".clear.margintop25 span").text();
+                            String sponsor = parse.select(".clear tr:nth-child(1) .tabletext2:nth-child(2)").text();
+                            String projectFunding = parse.select(".clear tr:nth-child(12) .tabletext2:nth-child(2)").text();
+                            String relatedProject = parse.select(".clear tr:nth-child(20) .tabletext2:nth-child(2)").text();
+
+
+
+                            String awardInstrument = parse.select(".clear tr:nth-child(6) .tabletext2:nth-child(2)").text();
+                            String programManager = parse.select(".clear tr:nth-child(7) .tabletext2:nth-child(2)").text();
+                            String totalIntendedAwardAmount = parse.select(".clear tr:nth-child(10) .tabletext2:nth-child(2)").text();
+                            String totalAwardedAmountToDate = parse.select(".clear tr:nth-child(11) .tabletext2:nth-child(2)").text();
+                            String recipientSponsoredResearchOffice = parse.select(".clear tr:nth-child(14) .tabletext2:nth-child(2)").text();
+                            String sponsorCongressionalDistrict = parse.select(".clear tr:nth-child(15) .tabletext2:nth-child(2)").text();
+                            String primaryPlaceOfPerformance = parse.select(".clear tr:nth-child(16) .tabletext2:nth-child(2)").text();
+                            String primaryPlaceOfPerformanceCongressionalDistrict = parse.select(".clear tr:nth-child(17) .tabletext2:nth-child(2)").text();
+                            String uniqueEntityIdentifier = parse.select(".clear tr:nth-child(18) .tabletext2:nth-child(2)").text();
+                            String parentUEI = parse.select(".clear tr:nth-child(19) .tabletext2:nth-child(2)").text();
+                            String primaryProgramSource = parse.select(".clear tr:nth-child(21) .tabletext2:nth-child(2)").text();
+                            String programReferenceCode = parse.select(".clear tr:nth-child(22) .tabletext2:nth-child(2)").text();
+                            String programElementCode = parse.select(".clear tr:nth-child(23) .tabletext2:nth-child(2)").text();
+                            String awardAgencyCode = parse.select(".clear tr:nth-child(24) .tabletext2:nth-child(2)").text();
+                            String fundAgencyCode = parse.select(".clear tr:nth-child(25) .tabletext2:nth-child(2)").text();
+                            String assistanceListingNumber = parse.select(".clear tr:nth-child(26) .tabletext2:nth-child(2)").text();
+                            String initialAmendmentDate = convertToTimestamp(parse.select(".clear tr:nth-child(3) .tabletext2:nth-child(2)").text());
+                            String latestAmendmentDate = convertToTimestamp(parse.select(".clear tr:nth-child(4) .tabletext2:nth-child(2)").text());
+
+                            List<Map<String, Object>> citations = extractAllCitationInfo(html);
+                            Map<String,Object> data = new HashMap<>();
+                            data.put("title",title);
+                            data.put("projectNum",projectNum);
+                            data.put("projectLeader",projectLeader);
+                            data.put("projectStartTime",projectStartTime);
+                            data.put("projectEndTime",projectEndTime);
+                            data.put("sponsorPart",sponsorPart);
+                            data.put("country",country);
+                            data.put("brief",brief);
+                            data.put("sponsor",sponsor);
+                            data.put("projectFunding",projectFunding);
+                            data.put("relatedProject",relatedProject);
+                            data.put("awardInstrument",awardInstrument);
+                            data.put("programManager",programManager);
+                            data.put("totalIntendedAwardAmount",totalIntendedAwardAmount);
+                            data.put("totalAwardedAmountToDate",totalAwardedAmountToDate);
+                            data.put("recipientSponsoredResearchOffice",recipientSponsoredResearchOffice);
+                            data.put("sponsorCongressionalDistrict",sponsorCongressionalDistrict);
+                            data.put("primaryPlaceOfPerformance",primaryPlaceOfPerformance);
+                            data.put("primaryPlaceOfPerformanceCongressionalDistrict",primaryPlaceOfPerformanceCongressionalDistrict);
+                            data.put("uniqueEntityIdentifier",uniqueEntityIdentifier);
+                            data.put("parentUEI",parentUEI);
+                            data.put("primaryProgramSource",primaryProgramSource);
+                            data.put("programReferenceCode",programReferenceCode);
+                            data.put("programElementCode",programElementCode);
+                            data.put("awardAgencyCode",awardAgencyCode);
+                            data.put("fundAgencyCode",fundAgencyCode);
+                            data.put("assistanceListingNumber",assistanceListingNumber);
+                            data.put("publications",citations);
+                            data.put("initialAmendmentDate",initialAmendmentDate);
+                            data.put("latestAmendmentDate",latestAmendmentDate);
+                            data.put("crawlUrl",url);
+                            data.put("crawlTime",localDateTime());
+                            Map<String,Object> result = new HashMap<>();
+                            result.put("keyword",keyword);
+                            result.put("data",data);
+                            try {
+                                String jsonValue = objectMapper.writeValueAsString(result);
+                                ProducerRecord<String, String> record = new ProducerRecord<>(TOPIC_NAME, projectNum, jsonValue);
+
+                                producer.send(record, (metadata, exception) -> {
+                                    if (exception == null) {
+                                        System.out.println("成功发送到Kafka - Partition: " + metadata.partition() +
+                                                ", Offset: " + metadata.offset());
+                                    } else {
+                                        System.err.println("发送到Kafka失败: " + exception.getMessage());
+                                    }
+                                });
+                            } catch (Exception e) {
+                                System.err.println("序列化或发送Kafka消息失败: " + e.getMessage());
+
+                            }
+
+                            Thread.sleep(sleepTime);
+                        }
+                        load = increaseOffsetBy30(load);
+                    }
+
+                } catch (Exception e) {
+                    System.err.println("处理 " + keyword + " 失败: " + e.getMessage());
+                    e.printStackTrace();
+                }
+            });
+        }
+        executor.shutdown();
+        executor.awaitTermination(5, TimeUnit.HOURS);
+        producer.close();
+    }
+
+    public static String convertToTimestamp(String dateStr) {
+        try {
+            // Parse "Jan. 9, 2025" (abbreviated month, dot, comma-separated)
+            DateTimeFormatter inputFormatter = DateTimeFormatter.ofPattern("MMMM d, yyyy", Locale.ENGLISH);
+            LocalDate date = LocalDate.parse(dateStr, inputFormatter);
+
+            // Format to "yyyy-MM-dd HH:mm:ss" (defaulting time to 00:00:00)
+            DateTimeFormatter outputFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
+            return date.atStartOfDay().format(outputFormatter);
+        } catch (Exception e) {
+            e.printStackTrace();
+            return null;
+        }
+
+    }
+    public static String convertToTimestamp2(String dateStr) {
+        try {
+            // 移除 "(Estimated)" 部分
+            String cleanDateStr = dateStr.replace(" (Estimated)", "").trim();
+
+            // Parse "June 30, 2025" (full month, day, comma-separated year)
+            DateTimeFormatter inputFormatter = DateTimeFormatter.ofPattern("MMMM d, yyyy", Locale.ENGLISH);
+            LocalDate date = LocalDate.parse(cleanDateStr, inputFormatter);
+
+            // Format to "yyyy-MM-dd HH:mm:ss" (defaulting time to 00:00:00)
+            DateTimeFormatter outputFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
+            return date.atStartOfDay().format(outputFormatter);
+        } catch (Exception e) {
+            e.printStackTrace();
+            return null;
+        }
+    }
+    public static List<Map<String, Object>> extractAllCitationInfo(String html) {
+        Document doc = Jsoup.parse(html);
+        List<Map<String, Object>> citations = new ArrayList<>();
+
+        // 选择所有 margintop15
+        Elements marginDivs = doc.select(".margintop15");
+        Pattern urlPattern = Pattern.compile("javascript:popwin\\('(.*?)'\\)");
+
+        for (Element div : marginDivs) {
+            Map<String, Object> info = new HashMap<>();
+
+            // 提取 span 中的文本
+            Elements spans = div.select("> span");
+            if (spans.size() >= 3) {
+                info.put("authors", spans.get(0).text());
+                info.put("title", spans.get(1).text());
+                info.put("year", spans.get(2).text());
+            }
+
+            // 提取链接
+            Elements links = div.select("a");
+            String doiUrl = "";
+            String citationUrl = "";
+            for (Element link : links) {
+                String href = link.attr("href");
+                Matcher matcher = urlPattern.matcher(href);
+                if (matcher.find()) {
+                    String url = matcher.group(1);
+                    if (link.text().contains("doi.org") && doiUrl.isEmpty()) {
+                        doiUrl = url;
+                    } else if (link.text().contains("引用详细信息") && citationUrl.isEmpty()) {
+                        citationUrl = url;
+                    }
+                }
+            }
+            info.put("doiUrl", doiUrl);
+            info.put("citationUrl", citationUrl);
+
+            // 添加到结果列表
+            citations.add(info);
+        }
+
+        return citations;
+    }
+    public static String localDateTime(){
+        LocalDateTime dateTime = LocalDateTime.now();
+
+        // 创建 DateTimeFormatter，定义日期时间的格式
+        DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
+
+        // 使用 formatter 格式化 LocalDateTime
+        String formattedDateTime = dateTime.format(formatter);
+
+        return formattedDateTime;  // 输出类似: 2025-04-08 13:45:30
+    }
+    public static String increaseOffsetBy30(String originalPayload) {
+        // 以 "|" 分割载荷为数组
+        String[] parts = originalPayload.split("\\|");
+
+        // 检查数组长度，确保有足够元素
+        if (parts.length < 4) {
+            throw new IllegalArgumentException("载荷格式无效，元素不足");
+        }
+
+        // 找到倒数第 4 个元素的位置
+        int targetIndex = parts.length - 4;
+
+        try {
+            // 将倒数第 4 个数字解析为整数
+            int currentOffset = Integer.parseInt(parts[targetIndex]);
+            // 增加 30
+            int newOffset = currentOffset + 30;
+            // 将新值放回数组
+            parts[targetIndex] = String.valueOf(newOffset);
+
+            // 重新拼接载荷
+            return String.join("|", parts);
+        } catch (NumberFormatException e) {
+            throw new IllegalArgumentException("倒数第 4 个元素不是有效数字: " + parts[targetIndex]);
+        }
+    }
+    private static Response executeWithRetry(OkHttpClient client, Request request, String keyword) throws IOException {
+        int maxRetries = proxyList.isEmpty() ? 1 : proxyList.size();  // 如果没有代理，只尝试一次
+        int attempt = 0;
+
+        while (attempt < maxRetries) {
+            Response response = client.newCall(request).execute();
+            if (response.code() == 403) {
+                System.out.println("收到 403 状态码，尝试切换代理重试...");
+                response.close();
+                switchProxy();
+                client = createClientWithProxy();  // 使用新代理重建客户端
+                attempt++;
+                if (attempt == maxRetries) {
+                    throw new IOException("所有代理尝试失败，仍然收到 403");
+                }
+                continue;
+            }
+            return response;  // 成功或非 403 状态码，直接返回
+        }
+        throw new IOException("无法执行请求，未获取响应");
+    }
+    private static OkHttpClient createClientWithProxy() {
+        OkHttpClient.Builder builder = new OkHttpClient().newBuilder()
+                .connectTimeout(30, TimeUnit.SECONDS)
+                .readTimeout(30, TimeUnit.SECONDS)
+                .writeTimeout(30, TimeUnit.SECONDS);
+
+        if (!proxyList.isEmpty() && currentProxyIndex < proxyList.size()) {
+            String proxy = proxyList.get(currentProxyIndex);
+            String[] proxyParts = proxy.split(":");
+            if (proxyParts.length == 2) {
+                String proxyHost = proxyParts[0];
+                int proxyPort = Integer.parseInt(proxyParts[1]);
+                builder.proxy(new java.net.Proxy(java.net.Proxy.Type.HTTP,
+                        new java.net.InetSocketAddress(proxyHost, proxyPort)));
+                System.out.println("使用代理: " + proxy);
+            }
+        }
+        return builder.build();
+    }
+    private static synchronized void switchProxy() {
+        if (proxyList.isEmpty()) return;
+        currentProxyIndex = (currentProxyIndex + 1) % proxyList.size();
+        System.out.println("切换到新代理: " + proxyList.get(currentProxyIndex));
+    }
+}
diff --git a/src/main/java/com/example/saveInES.java b/src/main/java/com/example/saveInES.java
new file mode 100644
index 0000000..ce1b21d
--- /dev/null
+++ b/src/main/java/com/example/saveInES.java
@@ -0,0 +1,122 @@
+package com.example;
+
+import co.elastic.clients.elasticsearch.ElasticsearchClient;
+import co.elastic.clients.elasticsearch.core.IndexRequest;
+import co.elastic.clients.elasticsearch.core.IndexResponse;
+import co.elastic.clients.json.jackson.JacksonJsonpMapper;
+import co.elastic.clients.transport.ElasticsearchTransport;
+import co.elastic.clients.transport.rest_client.RestClientTransport;
+import org.apache.http.HttpHost;
+import org.apache.kafka.clients.consumer.ConsumerConfig;
+import org.apache.kafka.clients.consumer.ConsumerRecord;
+import org.apache.kafka.clients.consumer.ConsumerRecords;
+import org.apache.kafka.clients.consumer.KafkaConsumer;
+import org.apache.kafka.common.serialization.StringDeserializer;
+import org.elasticsearch.client.RestClient;
+
+import java.io.IOException;
+import java.time.Duration;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Properties;
+
+public class saveInES {
+    public static void main(String[] args) {
+        ElasticsearchClient esClient = createElasticsearchClient();
+        Properties properties = new Properties();
+        properties.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
+        properties.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
+        properties.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
+        properties.put(ConsumerConfig.GROUP_ID_CONFIG, "news-consumer-group");
+        properties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
+        properties.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false"); // 关闭自动提交偏移量
+        // 创建 Kafka 消费者
+        KafkaConsumer<String, String> consumer = new KafkaConsumer<>(properties);
+
+        // 订阅主题
+        String topic = "news-topic"; // Kafka 主题
+        consumer.subscribe(Collections.singletonList(topic));
+
+        // 消费消息
+        try {
+            while (true) {
+                // 拉取消息
+                ConsumerRecords<String, String> records = consumer.poll(Duration.ofMillis(1000));
+
+                // 处理消息
+                for (ConsumerRecord<String, String> record : records) {
+                    System.out.println("Received message: key=" + record.key() + ", value=" + record.value());
+
+                    // 将消息保存到 Elasticsearch
+                    saveToElasticsearch(esClient, record.value());
+                }
+                consumer.commitSync();
+            }
+        } finally {
+            // 关闭消费者
+            consumer.close();
+            try {
+                esClient._transport().close();
+            } catch (IOException e) {
+                System.err.println("Error closing Elasticsearch client: " + e.getMessage());
+            }
+        }
+    }
+
+    /**
+     * 初始化 Elasticsearch 客户端
+     */
+    private static ElasticsearchClient createElasticsearchClient() {
+        RestClient restClient = RestClient.builder(new HttpHost("localhost", 9200)).build();
+        ElasticsearchTransport transport = new RestClientTransport(restClient, new JacksonJsonpMapper());
+        return new ElasticsearchClient(transport);
+    }
+
+    /**
+     * 将消息保存到 Elasticsearch
+     *
+     * @param esClient Elasticsearch 客户端
+     * @param message  消息内容（JSON 格式）
+     */
+    private static void saveToElasticsearch(ElasticsearchClient esClient, String message) {
+        try {
+            // 将消息解析为 Map（假设消息是 JSON 格式）
+            Map<String, String> news = parseMessageToMap(message);
+            String docId = news.get("url");
+            // 创建索引请求
+            IndexRequest<Map<String, String>> request = IndexRequest.of(b -> b
+                    .index("news") // 索引名称
+                    .id(docId)
+                    .document(news) // 要保存的数据
+            );
+
+            // 执行索引请求
+            IndexResponse response = esClient.index(request);
+            System.out.println("Data saved to Elasticsearch: ID=" + response.id());
+        } catch (Exception e) {
+            System.err.println("Failed to save data to Elasticsearch: " + e.getMessage());
+        }
+    }
+
+    /**
+     * 将消息解析为 Map
+     *
+     * @param message 消息内容（JSON 格式）
+     * @return 解析后的 Map
+     */
+    private static Map<String, String> parseMessageToMap(String message) {
+        // 这里假设消息是 JSON 格式，例如：{"title":"...", "date":"...", "content":"...", "url":"..."}
+        // 可以使用 JSON 库（如 Jackson）解析消息
+        // 这里简单地将消息按逗号分割并转换为 Map
+        Map<String, String> map = new HashMap<>();
+        String[] pairs = message.replace("{", "").replace("}", "").split(",");
+        for (String pair : pairs) {
+            String[] keyValue = pair.split("=");
+            if (keyValue.length == 2) {
+                map.put(keyValue[0].trim(), keyValue[1].trim());
+            }
+        }
+        return map;
+    }
+}
\ No newline at end of file
diff --git a/src/main/java/com/example/test.java b/src/main/java/com/example/test.java
new file mode 100644
index 0000000..e136915
--- /dev/null
+++ b/src/main/java/com/example/test.java
@@ -0,0 +1,101 @@
+package com.example;// 注意：如果你使用手动设置路径，就不需要导入 WebDriverManager 了
+// import io.github.bonigarcia.wdm.WebDriverManager;
+
+import org.openqa.selenium.By;
+import org.openqa.selenium.WebDriver;
+import org.openqa.selenium.WebElement;
+import org.openqa.selenium.chrome.ChromeDriver;
+import org.openqa.selenium.chrome.ChromeOptions;
+import org.openqa.selenium.support.ui.WebDriverWait;
+import org.openqa.selenium.support.ui.ExpectedConditions;
+import org.openqa.selenium.NoSuchElementException;
+import org.openqa.selenium.TimeoutException;
+
+import java.time.Duration;
+import java.util.List;
+
+public class test { // 更改类名以示区别
+
+    public static void main(String[] args) {
+        // 手动设置 ChromeDriver 的路径 (如果你选择手动方式的话)
+        // *** 将这里的路径替换为你实际的 chromedriver.exe 路径 ***
+        System.setProperty("webdriver.chrome.driver", "F:\\tool\\EasySpider_0.6.2_Windows_x64\\EasySpider_windows_x64\\EasySpider\\resources\\app\\chrome_win64\\chromedriver_win64.exe");
+
+        // 如果你选择使用 WebDriverManager，则使用以下代码替代上面的 System.setProperty：
+        // import io.github.bonigarcia.wdm.WebDriverManager;
+        // WebDriverManager.chromedriver().setup();
+
+
+        WebDriver driver = null;
+
+        try {
+            // 配置 Chrome 选项 (可选)
+            ChromeOptions options = new ChromeOptions();
+            // options.addArguments("--headless"); // 启用无头模式
+            // options.addArguments("--disable-gpu");
+
+            // 初始化 WebDriver
+            driver = new ChromeDriver(options);
+
+            // 直接打开包含搜索条件的 URL
+            // 注意这里使用的 URL 已经包含了查询参数
+            driver.get("https://patentscope.wipo.int/search/en/result.jsf?query=FP:(AI)");
+
+            // 设置一个显式等待
+            WebDriverWait wait = new WebDriverWait(driver, Duration.ofSeconds(20));
+
+            // 由于直接打开了结果页，我们不再需要等待搜索框和点击按钮
+            // 直接等待搜索结果列表加载
+            // *** 请使用浏览器开发者工具确认这里的元素定位器是否正确 ***
+            // "div.ps-result-list" 是一个可能的 CSS 选择器示例，你需要根据实际页面确认
+            wait.until(ExpectedConditions.presenceOfElementLocated(By.cssSelector("div.ps-result-list")));
+
+            // --- 在这里添加提取搜索结果的代码 ---
+            // *** 请使用浏览器开发者工具确认这里的元素定位器是否正确 ***
+            List<WebElement> resultItems = driver.findElements(By.cssSelector("div.ps-result-item")); // 定位每个结果项
+
+            System.out.println("Found " + resultItems.size() + " results:");
+
+            for (WebElement resultItem : resultItems) {
+                try {
+                    // 提取标题 (示例选择器)
+                    // *** 请使用浏览器开发者工具确认这里的元素定位器是否正确 ***
+                    WebElement titleElement = resultItem.findElement(By.cssSelector("span.ps-field-value.ps-field-title"));
+                    String title = titleElement.getText().trim();
+
+                    // 提取链接 (示例选择器)
+                    // *** 请使用浏览器开发者工具确认这里的元素定位器是否正确 ***
+                    WebElement linkElement = resultItem.findElement(By.tagName("a"));
+                    String patentLink = linkElement.getAttribute("href");
+
+
+                    System.out.println("Title: " + title + ", Link: " + patentLink);
+
+                } catch (NoSuchElementException e) {
+                    System.out.println("Could not find title or link for a result item in this result item.");
+                    continue;
+                }
+            }
+
+            // --- 处理分页（如果需要）---
+            // 这部分逻辑与之前相同，你需要找到下一页按钮的定位器并实现循环点击和等待
+            // 尽管是直接打开结果页，如果结果有多页，你仍然需要处理分页来获取所有结果。
+            // ...
+
+
+        } catch (TimeoutException e) {
+            System.err.println("等待元素超时，可能页面结构发生变化或加载缓慢: " + e.getMessage());
+        } catch (NoSuchElementException e) {
+            System.err.println("未能找到指定的元素，请检查元素定位器是否正确: " + e.getMessage());
+        } catch (Exception e) {
+            System.err.println("发生其他错误: " + e.getMessage());
+            e.printStackTrace();
+        } finally {
+            // 关闭浏览器
+            if (driver != null) {
+                driver.quit();
+                System.out.println("Browser closed.");
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/src/main/java/com/example/testContent.java b/src/main/java/com/example/testContent.java
new file mode 100644
index 0000000..821211a
--- /dev/null
+++ b/src/main/java/com/example/testContent.java
@@ -0,0 +1,103 @@
+package com.example;
+
+import okhttp3.*;
+import org.joda.time.DateTime;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+
+import java.io.IOException;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.time.LocalDateTime;
+import java.time.format.DateTimeFormatter;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.Map;
+
+public class testContent {
+    public static void main(String[] args) throws IOException {
+        String url = "https://www.drks.de/search/de/trial/DRKS00036725/details";
+        OkHttpClient client = new OkHttpClient().newBuilder()
+                .build();
+        MediaType mediaType = MediaType.parse("application/json");
+        Request request = new Request.Builder()
+                .url(url)
+                .get()
+//                .addHeader("Cookie", "JSESSIONID=F6B6320CBBC2A27482AEFC0EC641EBF8; JSESSIONID=D9A5D49C09D091E9791733727D8AF2F1")
+                .addHeader("Content-Type", "application/json")
+                .build();
+        Response response = client.newCall(request).execute();
+        String html = response.body().string();
+        Document parse = Jsoup.parse(html);
+        String title = parse.select(".title-bold").text();
+        String registNum = parse.select(".card.trial-details-float.mb-4 .card-body dl dd:nth-child(2)").text();
+        String registTime = convertDate(parse.select(".card.trial-details-float.mb-4 .card-body dl dd:nth-child(6)").text());
+        Map<String,Object> sponsor = new HashMap<>();
+        String header = parse.select("body > main > div.card-body > div:nth-child(9) > div.card-body > div > div > div > div.card-header > h4").text();
+        String site = parse.select("body > main > div.card-body > div:nth-child(9) > div.card-body > div > div > div > div.card-body > dl > dd:nth-child(2) > div").text();
+        String telefon = parse.select("body > main > div.card-body > div:nth-child(9) > div.card-body > div > div > div > div.card-body > dl > dd:nth-child(4) > span").text();
+        String disease = parse.select("body > main > div.card-body > div:nth-child(6) > div.card-body > div > div:nth-child(2) > dl > dd:nth-child(2) > span").text();
+        String studyType = parse.select("body > main > div.card-body > div:nth-child(3) > div.card-body > dl").text();
+        String inclusionCriteria = parse.select("body > main > div.card-body > div:nth-child(7) > div.card-body > div:nth-child(2) > div:nth-child(3) > div > div.card-body > div > div.col-12.mt-3 > dl > dd > span").text();
+        String exclusionCriteria = parse.select("body > main > div.card-body > div:nth-child(7) > div.card-body > div:nth-child(2) > div:nth-child(4) > div > div.card-body > p > span").text();
+        String country = parse.select("body > main > div.card-body > div:nth-child(7) > div.card-body > div:nth-child(2) > div:nth-child(1) > div > div.card-body > dl > dd:nth-child(2)").text();
+        String intervention = parse.select("body > main > div.card-body > div:nth-child(4) > div.card-body > dl").text();
+        String primaryOutcome = parse.select("body > main > div.card-body > div:nth-child(5) > div.card-body > div > div > dl").text();
+        String enrollment = parse.select("body > main > div.card-body > div:nth-child(7) > div.card-body > div:nth-child(2) > div:nth-child(2) > div > div.card-body > div > div:nth-child(5) > dl > dd > span").text();
+        sponsor.put("header",header);
+        sponsor.put("site",site);
+        sponsor.put("telefon",telefon);
+        Map<String,Object> resultData = new HashMap<>();
+        resultData.put("title", title);
+        resultData.put("registNum",registNum);
+        resultData.put("registTime",registTime);
+        resultData.put("registStatus","无");
+        resultData.put("registTitle","无");
+        resultData.put("fullTitle","无");
+        resultData.put("sponsor",sponsor);
+        resultData.put("sponsorPart","无");
+        resultData.put("studyType",studyType);
+        resultData.put("phase","无");
+        resultData.put("disease",disease);
+        resultData.put("studyDesign","无");
+        resultData.put("studyObjective","无");
+        resultData.put("studyStartDate","无");
+        resultData.put("inclusionCriteria",inclusionCriteria);
+        resultData.put("exclusionCriteria",exclusionCriteria);
+        resultData.put("currentStatus","无");
+        resultData.put("enrollment",enrollment);
+        resultData.put("country",country);
+        resultData.put("tagTime","无");
+        resultData.put("intervention",intervention);
+        resultData.put("primaryOutcome",primaryOutcome);
+        resultData.put("crawlTime",getCurrentTime());
+        resultData.put("crawlUrl",url);
+        resultData.put("postTime",registTime);
+        resultData.put("content","content");
+        resultData.put("forwardcontent","forwardcontent");
+        System.out.println(resultData);
+    }
+    public static String convertDate(String inputDate) {
+        try {
+            // 输入格式：dd.MM.yyyy
+            SimpleDateFormat inputFormat = new SimpleDateFormat("dd.MM.yyyy");
+            // 解析输入日期
+            Date date = inputFormat.parse(inputDate);
+            // 输出格式：yyyy-MM-dd HH:mm:ss
+            SimpleDateFormat outputFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+            // 转换为目标格式
+            return outputFormat.format(date);
+        } catch (ParseException e) {
+            // 处理解析异常
+            return "Invalid date format";
+        }
+    }
+    public static String getCurrentTime() {
+        // 创建 DateTimeFormatter，指定输出格式
+        DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
+        // 获取当前时间
+        LocalDateTime now = LocalDateTime.now();
+        // 格式化
+        return now.format(formatter);
+    }
+}
diff --git a/src/main/java/com/example/testList.java b/src/main/java/com/example/testList.java
new file mode 100644
index 0000000..b43bade
--- /dev/null
+++ b/src/main/java/com/example/testList.java
@@ -0,0 +1,340 @@
+package com.example;
+
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+
+import java.io.BufferedReader;
+import java.io.InputStreamReader;
+import java.io.OutputStream;
+import java.net.HttpURLConnection;
+import java.net.URL;
+import java.net.URLEncoder;
+import java.nio.charset.StandardCharsets;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class testList {
+    public static void main(String[] args) throws Exception {
+        String targetUrl = "https://www.drks.de/search/de/results?page=4";
+        String baseUrl = "https://www.drks.de/search/de";
+        String hostUrl = "https://www.drks.de";
+        String cleanUrl = targetUrl.split("\\?")[0];
+        System.out.println("Pure URL: " + cleanUrl);
+
+
+        String pageNumber = targetUrl.contains("?page=") ? targetUrl.split("page=")[1] : "1";
+        int page = Integer.parseInt(pageNumber);
+        System.out.println("Page Number: " + page);
+
+        // 存储 cookies
+        Set<String> cookieSet = new HashSet<>();
+        String sessionId = null;
+
+        // 第一步：初始 GET 请求，获取 cookies 和 ViewState
+        URL initialUrl = new URL(baseUrl);
+        HttpURLConnection initialConn = (HttpURLConnection) initialUrl.openConnection();
+        initialConn.setRequestMethod("GET");
+        initialConn.setInstanceFollowRedirects(false);
+        initialConn.setConnectTimeout(10000);
+        initialConn.setReadTimeout(10000);
+
+        // 捕获 cookies
+        sessionId = updateCookies(initialConn, cookieSet);
+        System.out.println("Initial Cookies: " + cookieSet);
+        System.out.println("Initial Session ID: " + sessionId);
+
+        // 读取响应内容以获取 ViewState
+        BufferedReader in = new BufferedReader(new InputStreamReader(initialConn.getInputStream()));
+        StringBuilder content = new StringBuilder();
+        String inputLine;
+        while ((inputLine = in.readLine()) != null) {
+            content.append(inputLine);
+        }
+        in.close();
+        initialConn.disconnect();
+
+        // 提取初始 ViewState
+        String initialViewState = extractViewState(content.toString());
+        System.out.println("Initial ViewState: " + initialViewState);
+
+        // 第二步：发送搜索 POST 请求
+        HttpURLConnection searchConn = (HttpURLConnection) new URL(baseUrl).openConnection();
+        searchConn.setRequestMethod("POST");
+        searchConn.setInstanceFollowRedirects(false);
+        searchConn.setDoOutput(true);
+        searchConn.setConnectTimeout(10000);
+        searchConn.setReadTimeout(10000);
+
+        // 设置搜索请求的请求头
+        searchConn.setRequestProperty("Content-Type", "application/x-www-form-urlencoded; charset=UTF-8");
+        searchConn.setRequestProperty("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7");
+        searchConn.setRequestProperty("Cookie", String.join("; ", cookieSet));
+        searchConn.setRequestProperty("Origin", "https://www.drks.de");
+        searchConn.setRequestProperty("Referer", baseUrl);
+        searchConn.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36");
+
+        // 构建搜索请求的 POST 数据
+        String searchPostData = buildSearchPostData(initialViewState);
+        System.out.println("Search POST Data: " + searchPostData);
+
+        // 发送搜索 POST 请求
+        try (OutputStream os = searchConn.getOutputStream()) {
+            byte[] input = searchPostData.getBytes(StandardCharsets.UTF_8);
+            os.write(input, 0, input.length);
+        }
+
+        // 更新 cookies
+        String searchSessionId = updateCookies(searchConn, cookieSet);
+        System.out.println("Search Cookies: " + cookieSet);
+        System.out.println("Search Session ID: " + searchSessionId);
+
+        // 处理搜索响应
+        int searchResponseCode = searchConn.getResponseCode();
+        System.out.println("Search Response Code: " + searchResponseCode);
+        String redirectUrl = searchConn.getHeaderField("Location");
+        searchConn.disconnect();
+
+        if (searchResponseCode != 302 || redirectUrl == null) {
+            System.err.println("Search request did not return expected 302 redirect. Response code: " + searchResponseCode);
+            return;
+        }
+        System.out.println("Redirect URL (raw): " + redirectUrl);
+
+        // 解析相对 URL
+        if (!redirectUrl.startsWith("http")) {
+            redirectUrl = hostUrl + (redirectUrl.startsWith("/") ? redirectUrl : "/" + redirectUrl);
+        }
+        System.out.println("Resolved Redirect URL: " + redirectUrl);
+
+        // 第三步：跟随重定向（使用 GET 请求）
+        URL resultsUrl = new URL(redirectUrl);
+        HttpURLConnection resultsConn = (HttpURLConnection) resultsUrl.openConnection();
+        resultsConn.setRequestMethod("GET");
+        resultsConn.setInstanceFollowRedirects(false);
+        resultsConn.setConnectTimeout(10000);
+        resultsConn.setReadTimeout(10000);
+        resultsConn.setRequestProperty("Cookie", String.join("; ", cookieSet));
+        resultsConn.setRequestProperty("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7");
+        resultsConn.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64ек; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36");
+
+        // 更新 cookies
+        String resultsSessionId = updateCookies(resultsConn, cookieSet);
+        System.out.println("Results Cookies: " + cookieSet);
+        System.out.println("Results Session ID: " + resultsSessionId);
+
+        // 读取重定向后的结果页面内容
+        BufferedReader resultsReader = new BufferedReader(new InputStreamReader(resultsConn.getInputStream()));
+        StringBuilder resultsContent = new StringBuilder();
+        while ((inputLine = resultsReader.readLine()) != null) {
+            resultsContent.append(inputLine);
+        }
+        resultsReader.close();
+        resultsConn.disconnect();
+
+        // 提取页面中的 ViewState（状态信息，用于后续请求）
+        String viewState = extractViewState(resultsContent.toString());
+        System.out.println("Results ViewState: " + viewState);
+
+        // 检查 Session ID 是否一致，确保会话未被重置
+        if (sessionId != null && !sessionId.equals(resultsSessionId)) {
+            System.out.println("Warning: Session ID changed. Initial: " + sessionId + ", Results: " + resultsSessionId);
+        }
+
+        // Step 4: 第四步：发送分页请求（使用 POST）
+        HttpURLConnection postConn = (HttpURLConnection) new URL(cleanUrl).openConnection();
+        postConn.setRequestMethod("POST");
+        postConn.setInstanceFollowRedirects(false);
+        postConn.setDoOutput(true);
+        postConn.setConnectTimeout(10000);
+        postConn.setReadTimeout(10000);
+
+        // 设置分页请求的请求头（非 AJAX，模拟浏览器常规请求）
+        postConn.setRequestProperty("Content-Type", "application/x-www-form-urlencoded; charset=UTF-8");
+        postConn.setRequestProperty("Cookie", String.join("; ", cookieSet));
+        postConn.setRequestProperty("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7");
+        postConn.setRequestProperty("Origin", "https://www.drks.de");
+        postConn.setRequestProperty("Referer", cleanUrl);
+        postConn.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36");
+        postConn.setRequestProperty("Sec-Fetch-Dest", "document");
+        postConn.setRequestProperty("Sec-Fetch-Mode", "navigate");
+
+        // 构建分页请求的 POST 参数（包括页码和 ViewState 等）
+        String postData = buildPostData(viewState, page);
+        System.out.println("Pagination POST Data: " + postData);
+
+        // 发送分页的 POST 请求
+        try (OutputStream os = postConn.getOutputStream()) {
+            byte[] input = postData.getBytes(StandardCharsets.UTF_8);
+            os.write(input, 0, input.length);
+        }
+
+        // 更新 cookies（分页响应可能返回新的 Set-Cookie）
+        String paginationSessionId = updateCookies(postConn, cookieSet);
+        System.out.println("Pagination Cookies: " + cookieSet);
+        System.out.println("Pagination Session ID: " + paginationSessionId);
+
+        // 处理分页响应
+        int responseCode = postConn.getResponseCode();
+        System.out.println("Pagination Response Code: " + responseCode);
+
+        // 读取分页响应的 HTML 内容
+        StringBuilder postContent = new StringBuilder();
+        try (BufferedReader postReader = new BufferedReader(
+                new InputStreamReader(
+                        responseCode >= 400 ? postConn.getErrorStream() : postConn.getInputStream()))) {
+            while ((inputLine = postReader.readLine()) != null) {
+                postContent.append(inputLine);
+            }
+        }
+        Document parse = null;
+        if (responseCode == HttpURLConnection.HTTP_MOVED_TEMP
+                || responseCode == HttpURLConnection.HTTP_MOVED_PERM
+                || responseCode == HttpURLConnection.HTTP_SEE_OTHER) {
+            String newUrl = postConn.getHeaderField("Location");
+            System.out.println("Pagination Redirecting to: " + newUrl);
+
+            //  解析重定向中的相对地址为完整 URL（如果是相对路径）
+            if (!newUrl.startsWith("http")) {
+                newUrl = hostUrl + (newUrl.startsWith("/") ? newUrl : "/" + newUrl);
+            }
+
+            // 重定向
+            URL redirectConn = new URL(newUrl);
+            HttpURLConnection followConn = (HttpURLConnection) redirectConn.openConnection();
+            followConn.setRequestMethod("GET");
+            followConn.setRequestProperty("Cookie", String.join("; ", cookieSet));
+            followConn.setRequestProperty("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7");
+            followConn.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36");
+
+            BufferedReader redirectReader = new BufferedReader(new InputStreamReader(followConn.getInputStream()));
+            StringBuilder redirectContent = new StringBuilder();
+            while ((inputLine = redirectReader.readLine()) != null) {
+                redirectContent.append(inputLine);
+            }
+            redirectReader.close();
+            followConn.disconnect();
+
+            System.out.println("Redirect Response: " + redirectContent);
+            parse = Jsoup.parse(String.valueOf(redirectContent));
+        } else if (responseCode == 200) {
+            System.out.println("Pagination Response: " + postContent);
+            parse = Jsoup.parse(String.valueOf(postContent));
+        }
+
+        
+
+        Elements links = parse.select("div[data-label='Titel der Studie'] a");
+
+        for (Element link : links) {
+            String href = link.attr("href");
+            String text = link.text();
+
+            System.out.println("链接: " + href);
+            System.out.println("标题: " + text);
+        }
+        String text = parse.select("div.col-md-2.pt-3.ps-0.text-md-end").text();
+        // 使用正则表达式提取 "第" 和 "/" 之间的数字
+        String regex = "Seite\\s*(\\d+)\\s*/";
+        Matcher matcher = Pattern.compile(regex).matcher(text);
+        if (matcher.find()) {
+            System.out.println("总共有"+matcher.group(1));// 返回第一个捕获组，即数字 "1"
+        }
+        postConn.disconnect();
+    }
+    // 更新并返回当前连接中的 Cookie，包含 JSESSIONID 的提取
+    private static String updateCookies(HttpURLConnection conn, Set<String> cookieSet) {
+        String sessionId = null;
+        Map<String, List<String>> headerFields = conn.getHeaderFields();
+        List<String> cookiesHeader = headerFields.get("Set-Cookie");
+        if (cookiesHeader != null) {
+            for (String cookie : cookiesHeader) {
+                String cookieValue = cookie.split(";")[0];
+                cookieSet.add(cookieValue);
+                if (cookieValue.startsWith("JSESSIONID=") || cookieValue.startsWith("csfcfc=")) {
+                    sessionId = cookieValue;
+                }
+            }
+        }
+        return sessionId;
+    }
+    // 提取 __VIEWSTATE 隐藏字段的值
+    private static String extractViewState(String html) {
+        if (html == null || html.isEmpty()) {
+            System.err.println("HTML content is empty or null");
+            return "";
+        }
+
+        // 兼容 jakarta.faces.ViewState 和 javax.faces.ViewState
+        String regex = "<input[^>]*name=[\"'](?:jakarta|javax)\\.faces\\.ViewState[\"'][^>]*value=[\"']([^\"']+)[\"']";
+        Pattern pattern = Pattern.compile(regex);
+        Matcher matcher = pattern.matcher(html);
+
+        if (matcher.find()) {
+            return matcher.group(1);
+        }
+
+        System.err.println("Failed to extract ViewState from HTML");
+        return "";
+    }
+
+    // 生成搜索请求的 POST 数据
+    private static String buildSearchPostData(String viewState) {
+        try {
+            return "searchForm=searchForm" +
+                    "&searchForm%3Aj_idt80=Midwifery" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Acharacteristics%3AdrksId=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Acharacteristics%3AsecondaryId=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Acharacteristics%3AscientificSummary=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Acharacteristics%3Aoutcome=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Acharacteristics%3AhealthOfCondition=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Acharacteristics%3AhealthyVolunteers=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Acharacteristics%3Aaddresses=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Acharacteristics%3Aj_idt128=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Acharacteristics%3AipdSharingPlan=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Acharacteristics%3Aj_idt135%3Afrom=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Acharacteristics%3Aj_idt135%3Ato=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Acharacteristics%3Aj_idt146%3Afrom=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Acharacteristics%3Aj_idt146%3Ato=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Acharacteristics%3Aj_idt157%3Afrom=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Acharacteristics%3Aj_idt157%3Ato=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Arecruitment%3Agender=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Arecruitment%3AageInYears=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Arecruitment%3AinclusionCriteria=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Arecruitment%3AexclusionCriteria=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Arecruitment%3AtrialStatus=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Arecruitment%3ArecrutingLocation=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Arecruitment%3Aj_idt213%3Afrom=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3Arecruitment%3Aj_idt213%3Ato=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3AtrialDesign%3Apurpose=" +
+                    "&searchForm%3AextendedSearch%3AextendedSearchTabs%3AtrialDesign%3AstudyType=" +
+                    "&searchForm%3Aj_idt287=" +
+                    "&javax.faces.ViewState=" + URLEncoder.encode(viewState, StandardCharsets.UTF_8.name());
+        } catch (Exception e) {
+            System.err.println("Error encoding search ViewState: " + e.getMessage());
+            return "";
+        }
+    }
+    // 生成分页请求的 POST 数据
+    private static String buildPostData(String viewState, int page) {
+        int adjustedPage = page - 1;
+        try {
+            return "resultForm=resultForm" +
+                    "&resultForm%3Asorting%3ArowsPerPage=10" +
+                    "&resultForm%3ApaginationTop%3Aj_idt156%3A"+ adjustedPage +"%3Aj_idt158=" + page +
+                    "&resultForm%3Asorting%3AsortingBy=SCORE" +
+                    "&resultForm%3Asorting%3Aj_idt141=true" +
+                    "&resultForm%3Aj_idt221%3Aj_idt223%3AdownloadConfirmation=resultForm%3Aj_idt221%3Aj_idt223%3AdownloadConfirmation" +
+                    "&selectedType=JSON" +
+                    "&javax.faces.ViewState=" + URLEncoder.encode(viewState, StandardCharsets.UTF_8.name());
+        } catch (Exception e) {
+            System.err.println("Error encoding pagination ViewState: " + e.getMessage());
+            return "";
+        }
+    }
+}
\ No newline at end of file
diff --git a/src/main/java/com/example/umlistTest.java b/src/main/java/com/example/umlistTest.java
new file mode 100644
index 0000000..3e9b959
--- /dev/null
+++ b/src/main/java/com/example/umlistTest.java
@@ -0,0 +1,22 @@
+package com.example;
+
+import okhttp3.*;
+
+import java.io.IOException;
+
+public class umlistTest {
+    public static void main(String[] args) throws IOException {
+        OkHttpClient client = new OkHttpClient().newBuilder()
+                .build();
+        MediaType mediaType = MediaType.parse("text/plain");
+        RequestBody body = RequestBody.create(mediaType, "");
+        Request request = new Request.Builder()
+                .url("http://who.int/westernpacific/publications/m/item/bi-weekly-covid-19-situation-update--11-april-2025")
+                .get()
+//                .addHeader("Cookie", "_cfuvid=Y2mczEYT8OCAEN719Uv9vPTpARSDmHju6OjSUfxYbb4-1745207891947-0.0.1.1-604800000")
+                .build();
+        Response response = client.newCall(request).execute();
+        String html = response.body().string();
+        System.out.println(html);
+    }
+}
diff --git a/src/main/resources/logback.xml b/src/main/resources/logback.xml
new file mode 100644
index 0000000..5f5a5d1
--- /dev/null
+++ b/src/main/resources/logback.xml
@@ -0,0 +1,12 @@
+<configuration>
+    <appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
+        <encoder>
+            <pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern>
+        </encoder>
+    </appender>
+
+    <root level="INFO">
+        <appender-ref ref="STDOUT" />
+    </root>
+
+</configuration>
\ No newline at end of file
diff --git a/target/classes/com/example/AusContent.class b/target/classes/com/example/AusContent.class
new file mode 100644
index 0000000..048b882
Binary files /dev/null and b/target/classes/com/example/AusContent.class differ
diff --git a/target/classes/com/example/AusList.class b/target/classes/com/example/AusList.class
new file mode 100644
index 0000000..9ec0bcd
Binary files /dev/null and b/target/classes/com/example/AusList.class differ
diff --git a/target/classes/com/example/CaptchaOCR.class b/target/classes/com/example/CaptchaOCR.class
new file mode 100644
index 0000000..8cd5bc8
Binary files /dev/null and b/target/classes/com/example/CaptchaOCR.class differ
diff --git a/target/classes/com/example/CsAirScraper.class b/target/classes/com/example/CsAirScraper.class
new file mode 100644
index 0000000..b084982
Binary files /dev/null and b/target/classes/com/example/CsAirScraper.class differ
diff --git a/target/classes/com/example/CtriScraper.class b/target/classes/com/example/CtriScraper.class
new file mode 100644
index 0000000..27d35cb
Binary files /dev/null and b/target/classes/com/example/CtriScraper.class differ
diff --git a/target/classes/com/example/CtriScraperContent.class b/target/classes/com/example/CtriScraperContent.class
new file mode 100644
index 0000000..e80414c
Binary files /dev/null and b/target/classes/com/example/CtriScraperContent.class differ
diff --git a/target/classes/com/example/Inka.class b/target/classes/com/example/Inka.class
new file mode 100644
index 0000000..a10742c
Binary files /dev/null and b/target/classes/com/example/Inka.class differ
diff --git a/target/classes/com/example/NSFAwardCrawler.class b/target/classes/com/example/NSFAwardCrawler.class
new file mode 100644
index 0000000..5f730e8
Binary files /dev/null and b/target/classes/com/example/NSFAwardCrawler.class differ
diff --git a/target/classes/com/example/PatentscopeSeleniumCrawler.class b/target/classes/com/example/PatentscopeSeleniumCrawler.class
new file mode 100644
index 0000000..d4d8231
Binary files /dev/null and b/target/classes/com/example/PatentscopeSeleniumCrawler.class differ
diff --git a/target/classes/com/example/ProxyIPChecker.class b/target/classes/com/example/ProxyIPChecker.class
new file mode 100644
index 0000000..d1ebe4a
Binary files /dev/null and b/target/classes/com/example/ProxyIPChecker.class differ
diff --git a/target/classes/com/example/ScraperWithCaptcha$1.class b/target/classes/com/example/ScraperWithCaptcha$1.class
new file mode 100644
index 0000000..d5e54e3
Binary files /dev/null and b/target/classes/com/example/ScraperWithCaptcha$1.class differ
diff --git a/target/classes/com/example/ScraperWithCaptcha$PageInfo.class b/target/classes/com/example/ScraperWithCaptcha$PageInfo.class
new file mode 100644
index 0000000..066e29b
Binary files /dev/null and b/target/classes/com/example/ScraperWithCaptcha$PageInfo.class differ
diff --git a/target/classes/com/example/ScraperWithCaptcha.class b/target/classes/com/example/ScraperWithCaptcha.class
new file mode 100644
index 0000000..6152d09
Binary files /dev/null and b/target/classes/com/example/ScraperWithCaptcha.class differ
diff --git a/target/classes/com/example/StringFieldExtractor.class b/target/classes/com/example/StringFieldExtractor.class
new file mode 100644
index 0000000..44d216e
Binary files /dev/null and b/target/classes/com/example/StringFieldExtractor.class differ
diff --git a/target/classes/com/example/WipoPatentsSelenium.class b/target/classes/com/example/WipoPatentsSelenium.class
new file mode 100644
index 0000000..e8b6ee2
Binary files /dev/null and b/target/classes/com/example/WipoPatentsSelenium.class differ
diff --git a/target/classes/com/example/cliniTopic.class b/target/classes/com/example/cliniTopic.class
new file mode 100644
index 0000000..2160bde
Binary files /dev/null and b/target/classes/com/example/cliniTopic.class differ
diff --git a/target/classes/com/example/drks.class b/target/classes/com/example/drks.class
new file mode 100644
index 0000000..8c2a53c
Binary files /dev/null and b/target/classes/com/example/drks.class differ
diff --git a/target/classes/com/example/getInKa.class b/target/classes/com/example/getInKa.class
new file mode 100644
index 0000000..f7fa167
Binary files /dev/null and b/target/classes/com/example/getInKa.class differ
diff --git a/target/classes/com/example/jsonGetOk.class b/target/classes/com/example/jsonGetOk.class
new file mode 100644
index 0000000..6e97745
Binary files /dev/null and b/target/classes/com/example/jsonGetOk.class differ
diff --git a/target/classes/com/example/ook.class b/target/classes/com/example/ook.class
new file mode 100644
index 0000000..959dbb8
Binary files /dev/null and b/target/classes/com/example/ook.class differ
diff --git a/target/classes/com/example/oook.class b/target/classes/com/example/oook.class
new file mode 100644
index 0000000..9dc6ff1
Binary files /dev/null and b/target/classes/com/example/oook.class differ
diff --git a/target/classes/com/example/projTopic.class b/target/classes/com/example/projTopic.class
new file mode 100644
index 0000000..2f2adee
Binary files /dev/null and b/target/classes/com/example/projTopic.class differ
diff --git a/target/classes/com/example/saveInES.class b/target/classes/com/example/saveInES.class
new file mode 100644
index 0000000..07702c6
Binary files /dev/null and b/target/classes/com/example/saveInES.class differ
diff --git a/target/classes/com/example/test.class b/target/classes/com/example/test.class
new file mode 100644
index 0000000..927763f
Binary files /dev/null and b/target/classes/com/example/test.class differ
diff --git a/target/classes/com/example/testContent.class b/target/classes/com/example/testContent.class
new file mode 100644
index 0000000..b75d67b
Binary files /dev/null and b/target/classes/com/example/testContent.class differ
diff --git a/target/classes/com/example/testList.class b/target/classes/com/example/testList.class
new file mode 100644
index 0000000..066638e
Binary files /dev/null and b/target/classes/com/example/testList.class differ
diff --git a/target/classes/com/example/umlistTest.class b/target/classes/com/example/umlistTest.class
new file mode 100644
index 0000000..8c5f5a5
Binary files /dev/null and b/target/classes/com/example/umlistTest.class differ
diff --git a/target/classes/logback.xml b/target/classes/logback.xml
new file mode 100644
index 0000000..5f5a5d1
--- /dev/null
+++ b/target/classes/logback.xml
@@ -0,0 +1,12 @@
+<configuration>
+    <appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
+        <encoder>
+            <pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern>
+        </encoder>
+    </appender>
+
+    <root level="INFO">
+        <appender-ref ref="STDOUT" />
+    </root>
+
+</configuration>
\ No newline at end of file
diff --git a/target/es-crawler-1.0-SNAPSHOT-jar-with-dependencies.jar b/target/es-crawler-1.0-SNAPSHOT-jar-with-dependencies.jar
new file mode 100644
index 0000000..9a161d3
Binary files /dev/null and b/target/es-crawler-1.0-SNAPSHOT-jar-with-dependencies.jar differ
diff --git a/target/es-crawler-1.0-SNAPSHOT.jar b/target/es-crawler-1.0-SNAPSHOT.jar
new file mode 100644
index 0000000..a26fac1
Binary files /dev/null and b/target/es-crawler-1.0-SNAPSHOT.jar differ
diff --git a/target/maven-archiver/pom.properties b/target/maven-archiver/pom.properties
new file mode 100644
index 0000000..17b9cce
--- /dev/null
+++ b/target/maven-archiver/pom.properties
@@ -0,0 +1,5 @@
+#Generated by Maven
+#Tue May 13 14:32:58 CST 2025
+version=1.0-SNAPSHOT
+groupId=com.example
+artifactId=es-crawler
diff --git a/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst b/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst
new file mode 100644
index 0000000..39fff43
--- /dev/null
+++ b/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst
@@ -0,0 +1 @@
+com\example\CtriScraper.class
diff --git a/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst b/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst
new file mode 100644
index 0000000..b8c08cb
--- /dev/null
+++ b/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst
@@ -0,0 +1 @@
+F:\workTest\DaKaES\src\main\java\com\example\CtriScraper.java
diff --git a/target/maven-status/maven-compiler-plugin/testCompile/default-testCompile/createdFiles.lst b/target/maven-status/maven-compiler-plugin/testCompile/default-testCompile/createdFiles.lst
new file mode 100644
index 0000000..e69de29
diff --git a/target/maven-status/maven-compiler-plugin/testCompile/default-testCompile/inputFiles.lst b/target/maven-status/maven-compiler-plugin/testCompile/default-testCompile/inputFiles.lst
new file mode 100644
index 0000000..e69de29