You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

256 lines
12 KiB

package com.example;
import okhttp3.*;
import org.json.JSONObject;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
import java.net.InetSocketAddress;
import java.net.Proxy;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.time.ZonedDateTime;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeParseException;
import java.util.Date;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class ook {
public static void main(String[] args) throws Exception {
// 1. 获取代理地址
// String proxyJson = getProxyFromLocalService();
// JSONObject proxyData = new JSONObject(proxyJson);
// String httpProxy = proxyData.getString("http"); // 例如 "http://proxy1:port"
//
// // 2. 解析代理地址
// String[] proxyParts = httpProxy.replace("http://", "").split(":");
// String proxyHost = proxyParts[0]; // proxy1
// int proxyPort = Integer.parseInt(proxyParts[1]); // port
OkHttpClient client = new OkHttpClient().newBuilder()
.connectTimeout(30, TimeUnit.SECONDS)
.readTimeout(30, TimeUnit.SECONDS)
.writeTimeout(30, TimeUnit.SECONDS)
.proxy(new Proxy(Proxy.Type.HTTP, new InetSocketAddress("127.0.0.1", 7897))) // 直接使用 7897 端口
.build();
MediaType mediaType = MediaType.parse("text/plain");
Request request = new Request.Builder()
.url("https://wrair.health.mil/News-Media/Press-Releases/")
.get()
// 添加关键请求头
.addHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36")
.addHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7")
// .addHeader("Accept-Encoding", "gzip, deflate, br, zstd")
.addHeader("Accept-Language", "zh-CN,zh;q=0.9,th;q=0.8")
.addHeader("Cache-Control", "no-cache")
.addHeader("Pragma", "no-cache")
.addHeader("Referer", "https://wrair.health.mil/News-Media/Press-Releases/")
.addHeader("Cookie", "_ga=GA1.1.516170455.1740971326; .ASPXANONYMOUS=xUBztj4Ek1vHfBPe-1QqFJhd83I4bkB1k0_d-2QrQ7drfd7R7Y6eNsyyHVjSeffyIKzy_qm5tOKOCtbvst-s9ZGWThxifCGMdJE117EQlr1OZARa0; dnn_IsMobile=False; language=en-US; ARRAffinity=c30f7cdebcf208f7c5a996cb410451c36532afc64703669607f68f04a75f4b39; _ga_CSLL4ZEK4L=GS1.1.1742349582.4.1.1742350035.0.0.0")
.addHeader("Upgrade-Insecure-Requests", "1")
.addHeader("Sec-Fetch-Dest", "document")
.addHeader("Sec-Fetch-Mode", "navigate")
.addHeader("Sec-Fetch-Site", "same-origin")
.addHeader("Sec-Fetch-User", "?1")
.addHeader("Sec-Ch-Ua", "\"Chromium\";v=\"134\", \"Not:A-Brand\";v=\"24\", \"Google Chrome\";v=\"134\"")
.addHeader("Sec-Ch-Ua-Mobile", "?0")
.addHeader("Sec-Ch-Ua-Platform", "\"Windows\"")
.addHeader("Priority", "u=0, i")
.build();
Response response = client.newCall(request).execute();
String html = response.body().string();
Document parse = Jsoup.parse(html);
// String url = "https://www.uu.se/nyheter/alla?newsResearch=researchtopic11%3Bresearchtopic7%3Bresearchtopic22%3Bresearchtopic10%3Bresearchtopic2&start=20";
// // 定义正则表达式
// String regex = "start=(\\d+)";
// Pattern pattern = Pattern.compile(regex);
// Matcher matcher = pattern.matcher(url);
// Integer start = 0;
// String postTime = convertToTimestamp(parse.select(".mr10").text());
// String title = parse.select(".hdg01").text();
// String content = parse.select(".container01 p").text();
// String forwardcontent = parse.select("#main").html();
// Map<String,Object> map = new HashMap<>();
// if (matcher.find()) {
// start = Integer.parseInt(matcher.group(1));
// System.out.println("Start: " + start); // start = 12
// }
//
// Elements allLinks = new Elements();
// Elements links = parse.select(".search-result-hit-text-container a");
// allLinks.addAll(links);
//
// int totalLinks = allLinks.size();
// int startIndex = Math.max(0, totalLinks - 10);
// for (int i = startIndex; i < totalLinks; i++) {
// Map<String, Object> task = new HashMap<String, Object>(16);
// task.put("link","https://www.uu.se"+allLinks.get(i).attr("href"));
// task.put("linktype", "newscontent"); // 設置鏈接類型為 "newscontent"
//
// System.out.println(task);
// }
Elements elements = parse.select(".title a");
for (Element element : elements) {
String link = element.attr("href");
System.out.println(link);
}
// map.put("postTime",postTime);
// map.put("title",title);
// map.put("content",content);
// map.put("forwardcontent",forwardcontent);
// System.out.println(map);
}
public ook() throws IOException {
}
// public static String convertToTimestamp(String dateStr) {
// try {
// // 定义输入格式:dd MMMM , yyyy(例如 "28 February , 2025")
// DateTimeFormatter inputFormatter = DateTimeFormatter.ofPattern("MMMM dd, yyyy", Locale.ENGLISH);
// // 定义输出格式:yyyy-MM-dd HH:mm:ss
// DateTimeFormatter outputFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
//
// // 解析输入日期
// LocalDate date = LocalDate.parse(dateStr, inputFormatter);
// // 转换为带时间的格式,时间设为 00:00:00
// return date.atStartOfDay().format(outputFormatter);
// } catch (Exception e) {
// e.printStackTrace();
// return null; // 或抛出异常,根据需求调整
// }
// }
// public static String convertToTimestamp(String dateStr) {
// try {
// // 定义输入格式:yyyy 年 MM 月 dd 日
// DateTimeFormatter inputFormatter = DateTimeFormatter.ofPattern("MM-dd-yyyy", Locale.CHINESE);
// // 定义输出格式:yyyy-MM-dd HH:mm:ss
// DateTimeFormatter outputFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
//
// // 解析输入日期
// LocalDate date = LocalDate.parse(dateStr, inputFormatter);
// // 转换为带时间的格式,时间设为 00:00:00
// return date.atStartOfDay().format(outputFormatter);
// } catch (Exception e) {
// e.printStackTrace();
// return null; // 或抛出异常,根据需求调整
// }
// }
// public static String convertToTimestamp(String dateStr) {
// try {
// // 定义输入格式
// DateTimeFormatter inputFormatter = DateTimeFormatter.ofPattern("dd/MM/yyyy");
// // 定义输出格式
// DateTimeFormatter outputFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
//
// // 解析输入字符串为 LocalDate
// LocalDate date = LocalDate.parse(dateStr, inputFormatter);
// // 转换为 LocalDateTime,设置时间为 00:00:00
// LocalDateTime dateTime = date.atStartOfDay();
// // 格式化为目标字符串
// return dateTime.format(outputFormatter);
// } catch (Exception e) {
// e.printStackTrace();
// return null; // 或者抛出异常,根据需求调整
// }
// }
// public static String convertToTimestamp(String dateStr) {
// try {
// // 定义输入格式:MMMM d, yyyy(例如 "June 3, 2015")
// DateTimeFormatter inputFormatter = DateTimeFormatter.ofPattern("d MMMM, yyyy", Locale.ENGLISH);
// // 定义输出格式:yyyy-MM-dd HH:mm:ss
// DateTimeFormatter outputFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
//
// // 解析输入日期
// LocalDate date = LocalDate.parse(dateStr, inputFormatter);
// // 转换为带时间的格式,时间设为 00:00:00
// return date.atStartOfDay().format(outputFormatter);
// } catch (Exception e) {
// e.printStackTrace();
// return null; // 或抛出异常,根据需求调整
// }
// }
// public static String convertToTimestamp(String input) {
// try {
// // 正则匹配 "d MMMM yyyy"
// Pattern pattern = Pattern.compile("\\d{1,2} [A-Za-z]+ \\d{4}");
// Matcher matcher = pattern.matcher(input);
// if (matcher.find()) {
// String dateStr = matcher.group();
// DateTimeFormatter inputFormatter = DateTimeFormatter.ofPattern("d MMMM yyyy", Locale.ENGLISH);
// DateTimeFormatter outputFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
// LocalDate date = LocalDate.parse(dateStr, inputFormatter);
// return date.atStartOfDay().format(outputFormatter);
// } else {
// System.out.println("No date found in: " + input);
// return null;
// }
// } catch (Exception e) {
// e.printStackTrace();
// return null;
// }
// }
// public static String convertToTimestamp(String dateStr) {
// try {
// // Parse the ISO 8601 date string (e.g., "2025-03-17T12:37:33.033Z")
// ZonedDateTime zdt = ZonedDateTime.parse(dateStr, DateTimeFormatter.ISO_DATE_TIME);
//
// // Define the output format (yyyy-MM-dd hh:mm:ss)
// DateTimeFormatter outputFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
//
// // Format the date to the desired output
// return zdt.format(outputFormatter);
// } catch (Exception e) {
// e.printStackTrace();
// return null; // Or throw an exception, depending on your needs
// }
// }
public static String convertToTimestamp(String dateStr) {
try {
// Parse "Jan. 9, 2025" (abbreviated month, dot, comma-separated)
DateTimeFormatter inputFormatter = DateTimeFormatter.ofPattern("MMM. d, yyyy", Locale.ENGLISH);
LocalDate date = LocalDate.parse(dateStr, inputFormatter);
// Format to "yyyy-MM-dd HH:mm:ss" (defaulting time to 00:00:00)
DateTimeFormatter outputFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
return date.atStartOfDay().format(outputFormatter);
} catch (Exception e) {
e.printStackTrace();
return null;
}
}
// 调用本地代理服务获取代理地址
private static String getProxyFromLocalService() throws Exception {
OkHttpClient client = new OkHttpClient();
Request request = new Request.Builder()
.url("http://127.0.0.1:7897")
.get()
.build();
try (Response response = client.newCall(request).execute()) {
if (response.isSuccessful()) {
return response.body().string(); // 返回 JSON 字符串
} else {
throw new Exception("获取代理失败,状态码: " + response.code());
}
}
}
}