You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
256 lines
12 KiB
256 lines
12 KiB
package com.example;
|
|
|
|
import okhttp3.*;
|
|
import org.json.JSONObject;
|
|
import org.jsoup.Jsoup;
|
|
import org.jsoup.nodes.Document;
|
|
import org.jsoup.nodes.Element;
|
|
import org.jsoup.select.Elements;
|
|
|
|
import java.io.IOException;
|
|
import java.net.InetSocketAddress;
|
|
import java.net.Proxy;
|
|
import java.text.ParseException;
|
|
import java.text.SimpleDateFormat;
|
|
import java.time.LocalDate;
|
|
import java.time.LocalDateTime;
|
|
import java.time.ZonedDateTime;
|
|
import java.time.format.DateTimeFormatter;
|
|
import java.time.format.DateTimeParseException;
|
|
import java.util.Date;
|
|
import java.util.HashMap;
|
|
import java.util.Locale;
|
|
import java.util.Map;
|
|
import java.util.concurrent.TimeUnit;
|
|
import java.util.regex.Matcher;
|
|
import java.util.regex.Pattern;
|
|
|
|
public class ook {
|
|
|
|
|
|
public static void main(String[] args) throws Exception {
|
|
// 1. 获取代理地址
|
|
// String proxyJson = getProxyFromLocalService();
|
|
// JSONObject proxyData = new JSONObject(proxyJson);
|
|
// String httpProxy = proxyData.getString("http"); // 例如 "http://proxy1:port"
|
|
//
|
|
// // 2. 解析代理地址
|
|
// String[] proxyParts = httpProxy.replace("http://", "").split(":");
|
|
// String proxyHost = proxyParts[0]; // proxy1
|
|
// int proxyPort = Integer.parseInt(proxyParts[1]); // port
|
|
|
|
OkHttpClient client = new OkHttpClient().newBuilder()
|
|
.connectTimeout(30, TimeUnit.SECONDS)
|
|
.readTimeout(30, TimeUnit.SECONDS)
|
|
.writeTimeout(30, TimeUnit.SECONDS)
|
|
.proxy(new Proxy(Proxy.Type.HTTP, new InetSocketAddress("127.0.0.1", 7897))) // 直接使用 7897 端口
|
|
.build();
|
|
|
|
MediaType mediaType = MediaType.parse("text/plain");
|
|
Request request = new Request.Builder()
|
|
.url("https://wrair.health.mil/News-Media/Press-Releases/")
|
|
.get()
|
|
// 添加关键请求头
|
|
.addHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36")
|
|
.addHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7")
|
|
// .addHeader("Accept-Encoding", "gzip, deflate, br, zstd")
|
|
.addHeader("Accept-Language", "zh-CN,zh;q=0.9,th;q=0.8")
|
|
.addHeader("Cache-Control", "no-cache")
|
|
.addHeader("Pragma", "no-cache")
|
|
.addHeader("Referer", "https://wrair.health.mil/News-Media/Press-Releases/")
|
|
.addHeader("Cookie", "_ga=GA1.1.516170455.1740971326; .ASPXANONYMOUS=xUBztj4Ek1vHfBPe-1QqFJhd83I4bkB1k0_d-2QrQ7drfd7R7Y6eNsyyHVjSeffyIKzy_qm5tOKOCtbvst-s9ZGWThxifCGMdJE117EQlr1OZARa0; dnn_IsMobile=False; language=en-US; ARRAffinity=c30f7cdebcf208f7c5a996cb410451c36532afc64703669607f68f04a75f4b39; _ga_CSLL4ZEK4L=GS1.1.1742349582.4.1.1742350035.0.0.0")
|
|
.addHeader("Upgrade-Insecure-Requests", "1")
|
|
.addHeader("Sec-Fetch-Dest", "document")
|
|
.addHeader("Sec-Fetch-Mode", "navigate")
|
|
.addHeader("Sec-Fetch-Site", "same-origin")
|
|
.addHeader("Sec-Fetch-User", "?1")
|
|
.addHeader("Sec-Ch-Ua", "\"Chromium\";v=\"134\", \"Not:A-Brand\";v=\"24\", \"Google Chrome\";v=\"134\"")
|
|
.addHeader("Sec-Ch-Ua-Mobile", "?0")
|
|
.addHeader("Sec-Ch-Ua-Platform", "\"Windows\"")
|
|
.addHeader("Priority", "u=0, i")
|
|
.build();
|
|
Response response = client.newCall(request).execute();
|
|
String html = response.body().string();
|
|
Document parse = Jsoup.parse(html);
|
|
// String url = "https://www.uu.se/nyheter/alla?newsResearch=researchtopic11%3Bresearchtopic7%3Bresearchtopic22%3Bresearchtopic10%3Bresearchtopic2&start=20";
|
|
// // 定义正则表达式
|
|
// String regex = "start=(\\d+)";
|
|
// Pattern pattern = Pattern.compile(regex);
|
|
// Matcher matcher = pattern.matcher(url);
|
|
// Integer start = 0;
|
|
|
|
|
|
// String postTime = convertToTimestamp(parse.select(".mr10").text());
|
|
// String title = parse.select(".hdg01").text();
|
|
// String content = parse.select(".container01 p").text();
|
|
// String forwardcontent = parse.select("#main").html();
|
|
// Map<String,Object> map = new HashMap<>();
|
|
|
|
// if (matcher.find()) {
|
|
// start = Integer.parseInt(matcher.group(1));
|
|
// System.out.println("Start: " + start); // start = 12
|
|
// }
|
|
//
|
|
// Elements allLinks = new Elements();
|
|
// Elements links = parse.select(".search-result-hit-text-container a");
|
|
// allLinks.addAll(links);
|
|
//
|
|
// int totalLinks = allLinks.size();
|
|
// int startIndex = Math.max(0, totalLinks - 10);
|
|
// for (int i = startIndex; i < totalLinks; i++) {
|
|
// Map<String, Object> task = new HashMap<String, Object>(16);
|
|
// task.put("link","https://www.uu.se"+allLinks.get(i).attr("href"));
|
|
// task.put("linktype", "newscontent"); // 設置鏈接類型為 "newscontent"
|
|
//
|
|
// System.out.println(task);
|
|
// }
|
|
Elements elements = parse.select(".title a");
|
|
for (Element element : elements) {
|
|
String link = element.attr("href");
|
|
System.out.println(link);
|
|
}
|
|
|
|
|
|
// map.put("postTime",postTime);
|
|
// map.put("title",title);
|
|
// map.put("content",content);
|
|
// map.put("forwardcontent",forwardcontent);
|
|
// System.out.println(map);
|
|
|
|
}
|
|
public ook() throws IOException {
|
|
}
|
|
// public static String convertToTimestamp(String dateStr) {
|
|
// try {
|
|
// // 定义输入格式:dd MMMM , yyyy(例如 "28 February , 2025")
|
|
// DateTimeFormatter inputFormatter = DateTimeFormatter.ofPattern("MMMM dd, yyyy", Locale.ENGLISH);
|
|
// // 定义输出格式:yyyy-MM-dd HH:mm:ss
|
|
// DateTimeFormatter outputFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
|
|
//
|
|
// // 解析输入日期
|
|
// LocalDate date = LocalDate.parse(dateStr, inputFormatter);
|
|
// // 转换为带时间的格式,时间设为 00:00:00
|
|
// return date.atStartOfDay().format(outputFormatter);
|
|
// } catch (Exception e) {
|
|
// e.printStackTrace();
|
|
// return null; // 或抛出异常,根据需求调整
|
|
// }
|
|
// }
|
|
// public static String convertToTimestamp(String dateStr) {
|
|
// try {
|
|
// // 定义输入格式:yyyy 年 MM 月 dd 日
|
|
// DateTimeFormatter inputFormatter = DateTimeFormatter.ofPattern("MM-dd-yyyy", Locale.CHINESE);
|
|
// // 定义输出格式:yyyy-MM-dd HH:mm:ss
|
|
// DateTimeFormatter outputFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
|
|
//
|
|
// // 解析输入日期
|
|
// LocalDate date = LocalDate.parse(dateStr, inputFormatter);
|
|
// // 转换为带时间的格式,时间设为 00:00:00
|
|
// return date.atStartOfDay().format(outputFormatter);
|
|
// } catch (Exception e) {
|
|
// e.printStackTrace();
|
|
// return null; // 或抛出异常,根据需求调整
|
|
// }
|
|
// }
|
|
|
|
// public static String convertToTimestamp(String dateStr) {
|
|
// try {
|
|
// // 定义输入格式
|
|
// DateTimeFormatter inputFormatter = DateTimeFormatter.ofPattern("dd/MM/yyyy");
|
|
// // 定义输出格式
|
|
// DateTimeFormatter outputFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
|
|
//
|
|
// // 解析输入字符串为 LocalDate
|
|
// LocalDate date = LocalDate.parse(dateStr, inputFormatter);
|
|
// // 转换为 LocalDateTime,设置时间为 00:00:00
|
|
// LocalDateTime dateTime = date.atStartOfDay();
|
|
// // 格式化为目标字符串
|
|
// return dateTime.format(outputFormatter);
|
|
// } catch (Exception e) {
|
|
// e.printStackTrace();
|
|
// return null; // 或者抛出异常,根据需求调整
|
|
// }
|
|
// }
|
|
// public static String convertToTimestamp(String dateStr) {
|
|
// try {
|
|
// // 定义输入格式:MMMM d, yyyy(例如 "June 3, 2015")
|
|
// DateTimeFormatter inputFormatter = DateTimeFormatter.ofPattern("d MMMM, yyyy", Locale.ENGLISH);
|
|
// // 定义输出格式:yyyy-MM-dd HH:mm:ss
|
|
// DateTimeFormatter outputFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
|
|
//
|
|
// // 解析输入日期
|
|
// LocalDate date = LocalDate.parse(dateStr, inputFormatter);
|
|
// // 转换为带时间的格式,时间设为 00:00:00
|
|
// return date.atStartOfDay().format(outputFormatter);
|
|
// } catch (Exception e) {
|
|
// e.printStackTrace();
|
|
// return null; // 或抛出异常,根据需求调整
|
|
// }
|
|
// }
|
|
// public static String convertToTimestamp(String input) {
|
|
// try {
|
|
// // 正则匹配 "d MMMM yyyy"
|
|
// Pattern pattern = Pattern.compile("\\d{1,2} [A-Za-z]+ \\d{4}");
|
|
// Matcher matcher = pattern.matcher(input);
|
|
// if (matcher.find()) {
|
|
// String dateStr = matcher.group();
|
|
// DateTimeFormatter inputFormatter = DateTimeFormatter.ofPattern("d MMMM yyyy", Locale.ENGLISH);
|
|
// DateTimeFormatter outputFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
|
|
// LocalDate date = LocalDate.parse(dateStr, inputFormatter);
|
|
// return date.atStartOfDay().format(outputFormatter);
|
|
// } else {
|
|
// System.out.println("No date found in: " + input);
|
|
// return null;
|
|
// }
|
|
// } catch (Exception e) {
|
|
// e.printStackTrace();
|
|
// return null;
|
|
// }
|
|
// }
|
|
// public static String convertToTimestamp(String dateStr) {
|
|
// try {
|
|
// // Parse the ISO 8601 date string (e.g., "2025-03-17T12:37:33.033Z")
|
|
// ZonedDateTime zdt = ZonedDateTime.parse(dateStr, DateTimeFormatter.ISO_DATE_TIME);
|
|
//
|
|
// // Define the output format (yyyy-MM-dd hh:mm:ss)
|
|
// DateTimeFormatter outputFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
|
|
//
|
|
// // Format the date to the desired output
|
|
// return zdt.format(outputFormatter);
|
|
// } catch (Exception e) {
|
|
// e.printStackTrace();
|
|
// return null; // Or throw an exception, depending on your needs
|
|
// }
|
|
// }
|
|
public static String convertToTimestamp(String dateStr) {
|
|
try {
|
|
// Parse "Jan. 9, 2025" (abbreviated month, dot, comma-separated)
|
|
DateTimeFormatter inputFormatter = DateTimeFormatter.ofPattern("MMM. d, yyyy", Locale.ENGLISH);
|
|
LocalDate date = LocalDate.parse(dateStr, inputFormatter);
|
|
|
|
// Format to "yyyy-MM-dd HH:mm:ss" (defaulting time to 00:00:00)
|
|
DateTimeFormatter outputFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
|
|
return date.atStartOfDay().format(outputFormatter);
|
|
} catch (Exception e) {
|
|
e.printStackTrace();
|
|
return null;
|
|
}
|
|
}
|
|
// 调用本地代理服务获取代理地址
|
|
private static String getProxyFromLocalService() throws Exception {
|
|
OkHttpClient client = new OkHttpClient();
|
|
Request request = new Request.Builder()
|
|
.url("http://127.0.0.1:7897")
|
|
.get()
|
|
.build();
|
|
|
|
try (Response response = client.newCall(request).execute()) {
|
|
if (response.isSuccessful()) {
|
|
return response.body().string(); // 返回 JSON 字符串
|
|
} else {
|
|
throw new Exception("获取代理失败,状态码: " + response.code());
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|