Browse Source

文字翻译应用

master
55007 6 months ago
commit
41e9225020
  1. 40
      .classpath
  2. 23
      .project
  3. 4
      .settings/org.eclipse.core.resources.prefs
  4. 9
      .settings/org.eclipse.jdt.core.prefs
  5. 4
      .settings/org.eclipse.m2e.core.prefs
  6. 1
      README.md
  7. 198
      pom.xml
  8. 69
      src/main/java/com/bfd/crawl_translate/CrawlTranslateApplication.java
  9. 32
      src/main/java/com/bfd/crawl_translate/controller/ApiController.java
  10. 105
      src/main/java/com/bfd/crawl_translate/service/MainHandler.java
  11. 296
      src/main/java/com/bfd/crawl_translate/service/TranslateChatGptService.java
  12. 28
      src/main/java/com/bfd/crawl_translate/utils/Config.java
  13. 19
      src/main/java/com/bfd/crawl_translate/utils/Constants.java
  14. 13
      src/main/java/com/bfd/crawl_translate/utils/ContentException.java
  15. 57
      src/main/java/com/bfd/crawl_translate/utils/ESClientFactory.java
  16. 274
      src/main/java/com/bfd/crawl_translate/utils/HttpUtil.java
  17. 81
      src/main/java/com/bfd/crawl_translate/utils/KfkUtil.java
  18. 92
      src/main/java/com/bfd/crawl_translate/utils/PauseTool.java
  19. 166
      src/main/java/com/bfd/crawl_translate/utils/PercentTransalteUtil.java
  20. 165
      src/main/java/com/bfd/crawl_translate/utils/TranslateUtil.java
  21. 59
      src/main/resources/application.yml
  22. 38
      src/main/resources/logback-spring.xml

40
.classpath

@ -0,0 +1,40 @@
<?xml version="1.0" encoding="UTF-8"?>
<classpath>
<classpathentry kind="src" output="target/classes" path="src/main/java">
<attributes>
<attribute name="optional" value="true"/>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry excluding="**" kind="src" output="target/classes" path="src/main/resources">
<attributes>
<attribute name="maven.pomderived" value="true"/>
<attribute name="optional" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="src" output="target/test-classes" path="src/test/java">
<attributes>
<attribute name="optional" value="true"/>
<attribute name="maven.pomderived" value="true"/>
<attribute name="test" value="true"/>
</attributes>
</classpathentry>
<classpathentry excluding="**" kind="src" output="target/test-classes" path="src/test/resources">
<attributes>
<attribute name="maven.pomderived" value="true"/>
<attribute name="test" value="true"/>
<attribute name="optional" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.8">
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="output" path="target/classes"/>
</classpath>

23
.project

@ -0,0 +1,23 @@
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>analyst_translate</name>
<comment></comment>
<projects>
</projects>
<buildSpec>
<buildCommand>
<name>org.eclipse.jdt.core.javabuilder</name>
<arguments>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.m2e.core.maven2Builder</name>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.eclipse.jdt.core.javanature</nature>
<nature>org.eclipse.m2e.core.maven2Nature</nature>
</natures>
</projectDescription>

4
.settings/org.eclipse.core.resources.prefs

@ -0,0 +1,4 @@
eclipse.preferences.version=1
encoding//src/main/java=UTF-8
encoding//src/main/resources=UTF-8
encoding/<project>=UTF-8

9
.settings/org.eclipse.jdt.core.prefs

@ -0,0 +1,9 @@
eclipse.preferences.version=1
org.eclipse.jdt.core.compiler.codegen.methodParameters=generate
org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8
org.eclipse.jdt.core.compiler.compliance=1.8
org.eclipse.jdt.core.compiler.problem.enablePreviewFeatures=disabled
org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
org.eclipse.jdt.core.compiler.problem.reportPreviewFeatures=ignore
org.eclipse.jdt.core.compiler.release=disabled
org.eclipse.jdt.core.compiler.source=1.8

4
.settings/org.eclipse.m2e.core.prefs

@ -0,0 +1,4 @@
activeProfiles=
eclipse.preferences.version=1
resolveWorkspaceProjects=true
version=1

1
README.md

@ -0,0 +1 @@
bfd翻译应用

198
pom.xml

@ -0,0 +1,198 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>2.2.4.RELEASE</version>
<relativePath/> <!-- lookup parent from repository -->
</parent>
<groupId>com.bfd</groupId>
<artifactId>crawl_translate</artifactId>
<version>0.0.1-SNAPSHOT</version>
<name>crawl_translate</name>
<description>crawl_translate</description>
<properties>
<java.version>1.8</java.version>
</properties>
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<optional>true</optional>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.elasticsearch</groupId>
<artifactId>elasticsearch</artifactId>
<version>6.0.0</version>
</dependency>
<dependency>
<groupId>org.elasticsearch.client</groupId>
<artifactId>elasticsearch-rest-client</artifactId>
<version>6.0.0</version>
</dependency>
<dependency>
<groupId>org.elasticsearch.client</groupId>
<artifactId>elasticsearch-rest-high-level-client</artifactId>
<version>6.0.0</version>
<exclusions>
<exclusion>
<groupId>org.elasticsearch.client</groupId>
<artifactId>elasticsearch-rest-client</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>com.squareup.okhttp3</groupId>
<artifactId>okhttp</artifactId>
<version>3.11.0</version>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>2.0.12</version>
</dependency>
<dependency>
<groupId>cn.hutool</groupId>
<artifactId>hutool-all</artifactId>
<version>5.8.9</version>
</dependency>
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka-clients</artifactId>
<version>2.7.1</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>5.2.2</version>
</dependency>
<!-- https://mvnrepository.com/artifact/de.codecentric/spring-boot-admin-starter-client -->
<dependency>
<groupId>de.codecentric</groupId>
<artifactId>spring-boot-admin-client</artifactId>
<version>2.2.4</version>
</dependency>
<!--redis -->
<dependency>
<groupId>org.redisson</groupId>
<artifactId>redisson-spring-boot-starter</artifactId>
<version>3.13.6</version>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-redis</artifactId>
</dependency>
<dependency>
<groupId>org.apache.curator</groupId>
<artifactId>curator-framework</artifactId>
<version>5.2.0</version>
</dependency>
<dependency>
<groupId>org.apache.curator</groupId>
<artifactId>curator-recipes</artifactId>
<version>5.2.0</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<configuration>
<!--不打入jar包的文件类型或者路径-->
<excludes>
<exclude>*.properties</exclude>
<exclude>*.yml</exclude>
<exclude>*.yaml</exclude>
</excludes>
<archive>
<manifest>
<!-- 执行的主程序路径 -->
<mainClass>com.bfd.crawl_translate.CrawlTranslateApplication</mainClass>
<!--是否要把第三方jar放到manifest的classpath中-->
<addClasspath>true</addClasspath>
<!--生成的manifest中classpath的前缀,因为要把第三方jar放到lib目录下,所以classpath的前缀是lib/-->
<classpathPrefix>lib/</classpathPrefix>
<!-- 打包时 MANIFEST.MF 文件不记录的时间戳版本 -->
<useUniqueVersions>false</useUniqueVersions>
</manifest>
<manifestEntries>
<!-- 在 Class-Path 下添加配置文件的路径 -->
<Class-Path>lib/pauseTool-1.0.jar config/</Class-Path>
</manifestEntries>
</archive>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<executions>
<execution>
<id>copy</id>
<phase>package</phase>
<goals>
<goal>copy-dependencies</goal>
</goals>
<configuration>
<outputDirectory>${project.build.directory}/lib/</outputDirectory>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<artifactId>maven-resources-plugin</artifactId>
<executions>
<execution>
<id>copy-resources</id>
<phase>package</phase>
<goals>
<goal>copy-resources</goal>
</goals>
<configuration>
<resources>
<!--把配置文件打包到指定路径-->
<resource>
<directory>src/main/resources/</directory>
<includes>
<include>*.properties</include>
<include>*.yml</include>
<exclude>*.yaml</exclude>
</includes>
</resource>
</resources>
<outputDirectory>${project.build.directory}/config</outputDirectory>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>8</source>
<target>8</target>
</configuration>
</plugin>
</plugins>
</build>
</project>

69
src/main/java/com/bfd/crawl_translate/CrawlTranslateApplication.java

@ -0,0 +1,69 @@
package com.bfd.crawl_translate;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.context.ConfigurableApplicationContext;
import org.springframework.data.redis.core.StringRedisTemplate;
import org.springframework.scheduling.annotation.EnableScheduling;
import org.springframework.scheduling.annotation.Scheduled;
import com.bfd.crawl_translate.service.TranslateChatGptService;
import com.bfd.crawl_translate.utils.Config;
import com.bfd.crawl_translate.utils.HttpUtil;
import com.bfd.crawl_translate.utils.PauseTool;
import cn.hutool.core.thread.ThreadFactoryBuilder;
import lombok.extern.slf4j.Slf4j;
@SpringBootApplication
@Slf4j
@EnableScheduling
public class CrawlTranslateApplication {
@Autowired
TranslateChatGptService translateChatGptService;
@Autowired
private StringRedisTemplate stringRedisTemplate;
@Value("${zookeeper.connection-string}")
private String connectionString;
@Value("${zookeeper.publish-node}")
private String nodePath;
public static void main(String[] args) {
ConfigurableApplicationContext applicationContext = SpringApplication.run(CrawlTranslateApplication.class, args);
applicationContext.getBean(CrawlTranslateApplication.class).start();
}
public void start(){
log.info("----------CrawlTranslateApplication start success----------");
HttpUtil.getToken();
//定义线程池
ThreadFactory namedThreadFactory = new ThreadFactoryBuilder().setNamePrefix("crawl-pool-%d").build();
ExecutorService singleThreadPool = new ThreadPoolExecutor(10, 20, 100L, TimeUnit.SECONDS, new LinkedBlockingQueue<Runnable>(1024), namedThreadFactory, new ThreadPoolExecutor.AbortPolicy());
singleThreadPool.execute(translateChatGptService);
PauseTool pauseTool = new PauseTool();
pauseTool.initializeRedisCache(stringRedisTemplate);
pauseTool.setupZookeeperListener(connectionString, nodePath);
log.info("----------CrawlTranslateApplication stop success----------");
}
@Scheduled(cron = "${crawl.cron.size_cron}")
public void getQueueSize(){
log.info("--------->taskQueue length == "+ Config.chatGptTranslateQueue.size());
}
}

32
src/main/java/com/bfd/crawl_translate/controller/ApiController.java

@ -0,0 +1,32 @@
package com.bfd.crawl_translate.controller;
import com.alibaba.fastjson.JSON;
import com.bfd.crawl_translate.utils.Config;
import lombok.extern.slf4j.Slf4j;
import org.springframework.web.bind.annotation.*;
import java.util.Map;
/**
* @author guowei
*/
@Slf4j
@RestController
@RequestMapping(value = "/chatGpt")
@CrossOrigin(origins = "*", maxAge = 3600)
public class ApiController {
@RequestMapping(value = "/translate", method = RequestMethod.POST, produces = "application/json")
@ResponseBody
public String getchannelitems(@RequestBody String RequestStr) {
System.out.println("收到gpt翻译请求:"+RequestStr);
log.info("收到gpt翻译请求");
// Map parse = (Map) JSON.parse(RequestStr);
try {
Config.chatGptTranslateQueue.put(RequestStr);
} catch (InterruptedException e) {
log.error("推送队列失败",e);
}
return "TranslationAPi Successfully";
}
}

105
src/main/java/com/bfd/crawl_translate/service/MainHandler.java

@ -0,0 +1,105 @@
package com.bfd.crawl_translate.service;
import cn.hutool.core.io.FileUtil;
import cn.hutool.core.io.file.FileWriter;
import com.bfd.crawl_translate.utils.Config;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.io.FileUtils;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.boot.ApplicationArguments;
import org.springframework.boot.ApplicationRunner;
import org.springframework.stereotype.Service;
import java.io.File;
import java.io.IOException;
import java.util.List;
import java.util.concurrent.LinkedBlockingQueue;
/**
* @author jian.mao
* @date 2023年11月3日
* @description
*/
@Slf4j
@Service
public class MainHandler implements ApplicationRunner {
@Value("${crawl.task.taskData}")
private String taskPath;
@Override
public void run(ApplicationArguments args) throws Exception {
//停止处理
waitDown();
//启动加载缓存任务
readTask(taskPath, Config.chatGptTranslateQueue);
}
public static void readTask(String path, LinkedBlockingQueue<String> queue){
File file = new File(path);
if(file.exists()){
List<String> tasks = null;
try {
tasks = FileUtils.readLines(file,"UTF-8");
} catch (IOException e) {
e.printStackTrace();
}
for (String taskStr : tasks) {
// Map<String, Object> task = JSONObject.parseObject(taskStr);
try {
System.out.println("读到缓存数据:"+taskStr);
queue.put(taskStr);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
file.delete();
}
}
/**
* 结束触发钩子
*/
public void waitDown() {
Runtime.getRuntime().addShutdownHook(new Thread() {
@Override
public void run() {
// 停止线程
Config.isStart = false;
log.info("stop-------");
writeTsskToFile();
}
});
}
/**
* 任务持久化到硬盘
*/
public void writeTsskToFile(){
System.out.println(taskPath);
File file = new File(taskPath);
FileWriter fileWriter = new FileWriter(file);
if (!file.exists()){
fileWriter = FileWriter.create(file);
}
while(true){
if(Config.chatGptTranslateQueue.size() > 0 ){
try {
String task = Config.chatGptTranslateQueue.take();
System.out.println("写入缓存数据:"+task);
fileWriter.write(task+"\r\n",true);
} catch (InterruptedException e) {
e.printStackTrace();
}
}else{
log.info("taskQueue write is file end");
break;
}
}
}
}

296
src/main/java/com/bfd/crawl_translate/service/TranslateChatGptService.java
File diff suppressed because it is too large
View File

28
src/main/java/com/bfd/crawl_translate/utils/Config.java

@ -0,0 +1,28 @@
package com.bfd.crawl_translate.utils;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.LinkedBlockingQueue;
/**
* @author guowei
*/
@Component
public class Config {
public static String access_token;
public static LinkedBlockingQueue<Map> taskQueue = new LinkedBlockingQueue<Map>();
/**
* chatGpt 翻译
*/
public static LinkedBlockingQueue<String> chatGptTranslateQueue = new LinkedBlockingQueue<String>();
public static Boolean isStart = true;
public static Map stopCache = new HashMap<>();
}

19
src/main/java/com/bfd/crawl_translate/utils/Constants.java

@ -0,0 +1,19 @@
package com.bfd.crawl_translate.utils;
import org.springframework.stereotype.Component;
/**
* @author guowei
*/
@Component
public class Constants {
public final static String STOP = "stop";
public final static String SCENES_ID = "scenes_id";
public final static String VERSION = "version";
public final static String UNDERLINE = "_";
}

13
src/main/java/com/bfd/crawl_translate/utils/ContentException.java

@ -0,0 +1,13 @@
package com.bfd.crawl_translate.utils;
/**
* @author guowei
*/
public class ContentException extends Exception{
public ContentException(){
}
public ContentException(String message){
super(message);
}
}

57
src/main/java/com/bfd/crawl_translate/utils/ESClientFactory.java

@ -0,0 +1,57 @@
package com.bfd.crawl_translate.utils;
import org.apache.http.HttpHost;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.impl.nio.client.HttpAsyncClientBuilder;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestClientBuilder;
import org.elasticsearch.client.RestClientBuilder.HttpClientConfigCallback;
import org.elasticsearch.client.RestHighLevelClient;
public class ESClientFactory {
private static final int CONNECT_TIME_OUT = 1000;
private static final int SOCKET_TIME_OUT = 30000;
private static final int CONNECTION_REQUEST_TIME_OUT = 500;
private static final int MAX_CONNECT_NUM = 100;
private static final int MAX_CONNECT_PER_ROUTE = 100;
private static boolean uniqueConnectTimeConfig = false;
private static boolean uniqueConnectNumConfig = true;
public static RestHighLevelClient init(){
RestClientBuilder builder = RestClient.builder(new HttpHost("172.18.1.81",9201,"http"));
if(uniqueConnectTimeConfig){
setConnectTimeOutConfig(builder);
}
if(uniqueConnectNumConfig){
setMutiConnectConfig(builder);
}
RestHighLevelClient restHighLevelClient = new RestHighLevelClient(builder);
return restHighLevelClient;
}
// 主要关于异步httpclient的连接延时配置
public static void setConnectTimeOutConfig(RestClientBuilder builder){
builder.setRequestConfigCallback(new RestClientBuilder.RequestConfigCallback() {
@Override
public RequestConfig.Builder customizeRequestConfig(RequestConfig.Builder requestConfigBuilder) {
requestConfigBuilder.setConnectTimeout(CONNECT_TIME_OUT);
requestConfigBuilder.setSocketTimeout(SOCKET_TIME_OUT);
requestConfigBuilder.setConnectionRequestTimeout(CONNECTION_REQUEST_TIME_OUT);
return requestConfigBuilder;
}
});
}
// 主要关于异步httpclient的连接数配置
public static void setMutiConnectConfig(RestClientBuilder builder){
builder.setHttpClientConfigCallback(new HttpClientConfigCallback() {
@Override
public HttpAsyncClientBuilder customizeHttpClient(HttpAsyncClientBuilder httpClientBuilder) {
httpClientBuilder.setMaxConnTotal(MAX_CONNECT_NUM);
httpClientBuilder.setMaxConnPerRoute(MAX_CONNECT_PER_ROUTE);
return httpClientBuilder;
}
});
}
}

274
src/main/java/com/bfd/crawl_translate/utils/HttpUtil.java

@ -0,0 +1,274 @@
package com.bfd.crawl_translate.utils;
import com.alibaba.fastjson2.JSON;
import lombok.extern.slf4j.Slf4j;
import okhttp3.*;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerRecord;
import org.apache.kafka.common.serialization.StringSerializer;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Component;
import java.io.IOException;
import java.net.ConnectException;
import java.net.URLDecoder;
import java.net.URLEncoder;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;
import java.util.concurrent.TimeUnit;
/**
* @author guowei
*/
@Slf4j
@Component
public class HttpUtil {
@Scheduled(cron = "${crawl.cron.token_cron}")
public static String getToken() {
OkHttpClient client = new OkHttpClient().newBuilder()
.readTimeout(6000,TimeUnit.SECONDS)
.connectTimeout(6000,TimeUnit.SECONDS)
.writeTimeout(6000,TimeUnit.SECONDS)
.readTimeout(6000,TimeUnit.SECONDS)
.build();
MediaType mediaType = MediaType.parse("application/x-www-form-urlencoded");
RequestBody body = RequestBody.create(mediaType, "username=collectionSystem&password=1ade58c775ddc6203f444c60f9af680e&grant_type=password");
Request request = new Request.Builder()
.url("https://fanyi.percent.cn/api/SystemManager/oauth/token")
.method("POST", body)
.addHeader("Content-Type", "application/x-www-form-urlencoded")
.build();
try {
Response response = client.newCall(request).execute();
Map json = (Map) JSON.parse(response.body().string());
if ("200".equals(json.get("code"))) {
log.info("获取token成功,当前时间:{},token:{}",System.currentTimeMillis(),json.get("access_token"));
Config.access_token = (String) json.get("access_token");
} else {
log.error("获取token失败,json:" + json);
}
} catch (IOException e) {
throw new RuntimeException(e);
}
return "";
}
public static Map getText(String fromLanguage,String toLanguage,String text) {
StackTraceElement[] stackTrace = Thread.currentThread().getStackTrace();
String methodName = stackTrace[2].getMethodName();
String className = stackTrace[2].getClassName();
// System.out.println("谁调用了我:"+className+","+methodName);
String result = "";
Map resultMap = new HashMap<>();
try {
String encode = URLEncoder.encode(text, "UTF-8");
OkHttpClient client = new OkHttpClient().newBuilder()
.connectTimeout(30000, TimeUnit.SECONDS)
.readTimeout(30000, TimeUnit.SECONDS)
.writeTimeout(30000, TimeUnit.SECONDS)
.build();
MediaType mediaType = MediaType.parse("application/json");
RequestBody body = RequestBody.create(mediaType, "{\"fromLanguage\":\""+fromLanguage+"\",\"toLanguage\":\""+toLanguage+"\",\"text\":\"" + encode + "\"}");
Request request = new Request.Builder()
.url("https://fanyi.percent.cn/api/dt/tran/text")
.method("POST", body)
.addHeader("Authorization", "Bearer " + Config.access_token)
.addHeader("Content-Type", "application/json")
// .addHeader("Cookie", "dt_saas_token="+Config.access_token+"; JSESSIONID=951aa17c-9d1d-4991-a327-d5d2dc2de860")
.build();
Response response = null;
for (int i = 0; i < 10; i++) {
// System.out.println(i);
try {
response = client.newCall(request).execute();
if (response.isSuccessful()) {
// break;
Map json = (Map) JSON.parse(response.body().string());
int code = (int) json.get("code");
String message = (String) json.get("message");
if (code == 200 && message == null) {
Map data = (Map) json.get("data");
result = URLDecoder.decode((String) data.get("value"), "UTF-8");
resultMap.put("isSuccess", true);
resultMap.put("result", result);
break;
} else if (code == 500 && message.contains("翻译超时")){
continue;
} else {
log.error("文本翻译失败,json:" + json);
resultMap.put("isSuccess", false);
resultMap.put("result", result);
break;
}
}
}catch (ConnectException connectException){
connectException.printStackTrace();
log.error("内容翻译失败"+i+"次",connectException);
}
}
} catch (Throwable e) {
// e.
log.error("实时文本接口翻译失败,text:"+text+",e:"+e);
}
return resultMap;
}
public static Boolean getLanguage(String text) {
String result = "";
Boolean isZh = false;
try {
// String encode = URLEncoder.encode(text, "UTF-8");
OkHttpClient client = new OkHttpClient().newBuilder()
.connectTimeout(300, TimeUnit.SECONDS)
.readTimeout(60, TimeUnit.SECONDS)
.writeTimeout(30, TimeUnit.SECONDS)
.build();
MediaType mediaType = MediaType.parse("application/json");
RequestBody body = RequestBody.create(mediaType, "{\"text\":\"" + text + "\"}");
Request request = new Request.Builder()
.url("https://fanyi.percent.cn/api/dt/api/language/distinguish")
.method("POST", body)
.addHeader("Authorization", "Bearer " + Config.access_token)
.addHeader("Content-Type", "application/json")
// .addHeader("Cookie", "dt_saas_token=d3c60514c65285836dc316b6d48d468e; JSESSIONID=2726ac68-ad92-470f-b1d5-c1c0255f1cb9")
.build();
Response response = null;
for (int i = 0; i < 10; i++) {
// System.out.println(i);
try {
response = client.newCall(request).execute();
if (response.isSuccessful()) {
break;
}
}catch (ConnectException connectException){
connectException.printStackTrace();
// continue;
}
}
Map json = (Map) JSON.parse(response.body().string());
if ((int) json.get("code") == 200) {
Map data = (Map) json.get("data");
// result = URLDecoder.decode((String) data.get("value"), "UTF-8");
String language = (String) data.get("language");
if (language.equals("zh")){
isZh = true;
}else {
isZh = false;
}
} else {
log.error("语种检测失败,json:" + json);
}
// System.out.println(response.code());
// System.out.println(response.body().string());
} catch (Throwable e) {
// e.
log.error("语种检测失败,text:"+text+",e:"+e);
}
return isZh;
}
public static String getLanguage2(String text) {
// String result = "";
String language = "";
try {
// String encode = URLEncoder.encode(text, "UTF-8");
OkHttpClient client = new OkHttpClient().newBuilder()
.connectTimeout(300, TimeUnit.SECONDS)
.readTimeout(60, TimeUnit.SECONDS)
.writeTimeout(30, TimeUnit.SECONDS)
.build();
MediaType mediaType = MediaType.parse("application/x-www-form-urlencoded");
RequestBody body = RequestBody.create(mediaType, "content="+text);
Request request = new Request.Builder()
.url("http://distinguish.pontoaplus.com/translate/lang_detect")
// .url("http://172.18.1.155:19999/translate/lang_detect")
.method("POST", body)
.build();
Response response = null;
for (int i = 0; i < 10; i++) {
// System.out.println(i);
try {
response = client.newCall(request).execute();
if (response.isSuccessful()) {
break;
}
} catch (ConnectException connectException) {
connectException.printStackTrace();
// continue;
}
}
Map json = (Map) JSON.parse(response.body().string());
if ((int) json.get("code") == 200) {
Map data = (Map) json.get("data");
// result = URLDecoder.decode((String) data.get("value"), "UTF-8");
language = (String) data.get("lang");
} else {
log.error("语种检测失败,json:" + json);
}
// System.out.println(response.code());
// System.out.println(response.body().string());
} catch (Throwable e) {
// e.
log.error("语种检测失败,text:" + text + ",e:" + e);
}
return language;
}
private static KafkaProducer<String, String> producer;
static {
producer = getKafkaProdect("172.18.1.101:9092,172.18.1.102:9092,172.18.1.104:9092");
}
public static void sendKafka(String topic, String resultData) {
ProducerRecord<String, String> se = new ProducerRecord<String, String>(topic, resultData);
// KafkaProducer<String, String> producer = getKafkaProdect(config.brokers);
producer.send(se);
}
public static KafkaProducer<String, String> getKafkaProdect(String brokerList) {
Properties props = new Properties();
props.put("bootstrap.servers", brokerList);//xxx服务器ip
// props.put("bootstrap.servers", "172.18.1.114:9992");//xxx服务器ip
props.put("acks", "all");//所有follower都响应了才认为消息提交成功"committed"
props.put("retries", 3);//retries = MAX 无限重试直到你意识到出现了问题:)
props.put("batch.size", 16384);//producer将试图批处理消息记录以减少请求次数.默认的批量处理消息字节数
//batch.size当批量的数据大小达到设定值后就会立即发送不顾下面的linger.ms
props.put("linger.ms", 1);//延迟1ms发送这项设置将通过增加小的延迟来完成--不是立即发送一条记录producer将会等待给定的延迟时间以允许其他消息记录发送这些消息记录可以批量处理
props.put("buffer.memory", 33554432);//producer可以用来缓存数据的内存大小
props.put("key.serializer",
StringSerializer.class.getName());
props.put("value.serializer",
StringSerializer.class.getName());
KafkaProducer<String, String> producer = new KafkaProducer<String, String>(props);
return producer;
}
public static void main(String[] args) throws IOException {
// getToken();
// String access_token = Config.access_token;
OkHttpClient client = new OkHttpClient().newBuilder()
.build();
MediaType mediaType = MediaType.parse("application/json");
RequestBody body = RequestBody.create(mediaType, "{\"fromLanguage\":\"auto\",\"toLanguage\":\"zh\",\"text\":\"hello\"}");
Request request = new Request.Builder()
.url("https://fanyi.percent.cn/api/dt/tran/text")
.method("POST", body)
.addHeader("Authorization", "Beare fc5e8aa070b0e6c1eba8140c3462afb0")
.addHeader("Content-Type", "application/json")
.addHeader("Cookie", "dt_saas_token=fc5e8aa070b0e6c1eba8140c3462afb0; JSESSIONID=951aa17c-9d1d-4991-a327-d5d2dc2de860")
.build();
Response response = client.newCall(request).execute();
String string = response.body().string();
System.out.println(string);
// System.out.println(access_token);
// getText("hello");
}
}

81
src/main/java/com/bfd/crawl_translate/utils/KfkUtil.java

@ -0,0 +1,81 @@
package com.bfd.crawl_translate.utils;
import lombok.extern.slf4j.Slf4j;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerRecord;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
import java.util.Properties;
/**
* @author guowei
* kfk工具类
*/
@Component
@Slf4j
public class KfkUtil {
private static String topic;
private static String brokerList;
@Value("${crawl.kafka.topic}")
public void setTopic(String topic) {
KfkUtil.topic = topic;
}
@Value("${crawl.kafka.brokers}")
public void setBrokerList(String brokerList) {
KfkUtil.brokerList = brokerList;
}
private static KafkaProducer<String, String> kafkaProducer;
public static int num = 0;
/**
* 获取KafkaProducer实例
*/
public static KafkaProducer<String, String> getProducer() {
if (kafkaProducer == null) {
Properties props = new Properties();
//xxx服务器ip
props.put("bootstrap.servers", brokerList);
//所有follower都响应了才认为消息提交成功"committed"
props.put("acks", "all");
//retries = MAX 无限重试直到你意识到出现了问题:)
props.put("retries", 3);
//producer将试图批处理消息记录以减少请求次数.默认的批量处理消息字节数
props.put("batch.size", 16384);
//batch.size当批量的数据大小达到设定值后就会立即发送不顾下面的linger.ms
//延迟1ms发送这项设置将通过增加小的延迟来完成--不是立即发送一条记录producer将会等待给定的延迟时间以允许其他消息记录发送这些消息记录可以批量处理
props.put("linger.ms", 1);
//producer可以用来缓存数据的内存大小
props.put("buffer.memory", 33554432);
props.put("key.serializer",
"org.apache.kafka.common.serialization.StringSerializer");
props.put("value.serializer",
"org.apache.kafka.common.serialization.StringSerializer");
kafkaProducer = new KafkaProducer<String, String>(props);
}
return kafkaProducer;
}
/**
* 关闭KafkaProducer实例
*/
public static void closeProducer() {
if (kafkaProducer != null) {
log.info("----------close producer----------");
kafkaProducer.close();
kafkaProducer = null;
}
}
public static void sendKafka(String resultData) {
KafkaProducer<String, String> producer = getProducer();
ProducerRecord<String, String> se = new ProducerRecord<String, String>(topic, resultData);
producer.send(se);
log.info("发送kafka成功");
// num++;
}
}

92
src/main/java/com/bfd/crawl_translate/utils/PauseTool.java

@ -0,0 +1,92 @@
package com.bfd.crawl_translate.utils;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import lombok.extern.slf4j.Slf4j;
import org.apache.curator.framework.CuratorFramework;
import org.apache.curator.framework.CuratorFrameworkFactory;
import org.apache.curator.framework.recipes.cache.NodeCache;
import org.apache.curator.framework.recipes.cache.NodeCacheListener;
import org.apache.curator.retry.ExponentialBackoffRetry;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.data.redis.core.StringRedisTemplate;
import org.springframework.stereotype.Component;
import javax.annotation.PostConstruct;
import javax.annotation.Resource;
import java.util.HashMap;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
/**
* @author:jinming
* @className:ZookeeperNodeMonitor
* @version:1.0
* @description: Zookeeper节点监听和Redis初始化工具类
* @Date:2024/7/2 14:20
*/
@Component
@Slf4j
public class PauseTool {
// 本地缓存
public static final HashMap<String, String> CACHE = new HashMap<>();
/**
* 初始化Redis中的version_*键并加载到本地缓存
*/
public void initializeRedisCache(StringRedisTemplate stringRedisTemplate) {
try {
Set<String> keys = stringRedisTemplate.keys("version_*");
if (keys != null) {
for (String key : keys) {
String value = stringRedisTemplate.opsForValue().get(key);
if (value != null) {
String sincesId = key.split("_")[1];
CACHE.put(sincesId.concat("_").concat(value), value);
}
}
}
log.info("当前缓存version信息:{}", JSON.toJSON(CACHE));
} catch (Exception e) {
log.error("Error initializing Redis cache", e);
}
}
public void setupZookeeperListener(String connectionString, String nodePath) {
CuratorFramework curatorFramework = CuratorFrameworkFactory.newClient(connectionString, new ExponentialBackoffRetry(1000, 3));
curatorFramework.start();
try {
// 创建节点监听器
NodeCache nodeCache = new NodeCache(curatorFramework, nodePath);
nodeCache.start();
log.info("数据监听已启动");
// 监听节点变化
nodeCache.getListenable().addListener(new NodeCacheListener() {
@Override
public void nodeChanged() throws Exception {
byte[] data = nodeCache.getCurrentData().getData();
try {
String nodeData = new String(data);
log.info("Node data changed: " + nodeData);
// 解析JSON数据
JSONObject jsonObject = JSON.parseObject(nodeData);
int scenesId = jsonObject.getIntValue("scenes_id");
int version = jsonObject.getIntValue("version");
String newKey = scenesId + "_" + version;
// 移除CACHE中所有以scenesId开头的key
CACHE.keySet().removeIf(key -> key.startsWith(scenesId + "_"));
// 将新的key放入CACHE
CACHE.put(newKey, String.valueOf(version));
log.info("当前缓存version信息:{}", JSON.toJSON(CACHE));
} catch (Exception e) {
e.printStackTrace();
}
}
});
} catch (Exception e) {
log.error("Error setting up Zookeeper listener", e);
}
}
}

166
src/main/java/com/bfd/crawl_translate/utils/PercentTransalteUtil.java

@ -0,0 +1,166 @@
package com.bfd.crawl_translate.utils;
import com.alibaba.fastjson2.JSON;
import lombok.extern.slf4j.Slf4j;
import okhttp3.*;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Component;
import java.io.IOException;
import java.net.ConnectException;
import java.net.URLDecoder;
import java.net.URLEncoder;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.TimeUnit;
/**
* @author guowei
*/
@Component
@Slf4j
public class PercentTransalteUtil {
public static String token;
// @Scheduled(cron = "${crawl.cron.token_cron}")
public static String getToken() {
OkHttpClient client = new OkHttpClient().newBuilder()
.build();
MediaType mediaType = MediaType.parse("application/x-www-form-urlencoded");
//isRememberMe为1表示获取长效token(7天内有效)不传或非1获取的token为12小时有效
RequestBody body = RequestBody.create(mediaType, "username=collectionSystem&password=67385d64fab46ab2a25f2ff2898e590d&grant_type=password");
Request request = new Request.Builder()
.url("https://fanyi.percent.cn/api/SystemManager/oauth/token")
.method("POST", body)
.addHeader("Content-Type", "application/x-www-form-urlencoded")
.build();
try {
Response response = client.newCall(request).execute();
Map json = (Map) JSON.parse(response.body().string());
if ("200".equals(json.get("code"))) {
log.info("获取token成功,当前时间:"+System.currentTimeMillis());
token = (String) json.get("access_token");
} else {
log.error("获取token失败,json:" + json);
}
} catch (IOException e) {
throw new RuntimeException(e);
}
return "";
}
public static Map getText(String text) {
String result = "";
Map resultMap = new HashMap<>();
try {
String encode = URLEncoder.encode(text, "UTF-8");
OkHttpClient client = new OkHttpClient().newBuilder()
.connectTimeout(300, TimeUnit.SECONDS)
.readTimeout(60, TimeUnit.SECONDS)
.writeTimeout(30, TimeUnit.SECONDS)
.build();
MediaType mediaType = MediaType.parse("application/json");
RequestBody body = RequestBody.create(mediaType, "{\"fromLanguage\":\"auto\",\"toLanguage\":\"en\",\"text\":\"" + encode + "\"}");
Request request = new Request.Builder()
.url("https://fanyi.percent.cn/api/dt/tran/text")
.method("POST", body)
.addHeader("Authorization", "Bearer " + token)
.addHeader("Content-Type", "application/json")
.build();
Response response = null;
for (int i = 0; i < 10; i++) {
try {
response = client.newCall(request).execute();
if (response.isSuccessful()) {
break;
}
}catch (ConnectException connectException){
connectException.printStackTrace();
}
}
Map json = (Map) JSON.parse(response.body().string());
if ((int) json.get("code") == 200) {
Map data = (Map) json.get("data");
result = URLDecoder.decode((String) data.get("value"), "UTF-8");
resultMap.put("isSuccess", true);
resultMap.put("result", result);
} else {
log.error("文本翻译失败,json:" + json);
resultMap.put("isSuccess", false);
resultMap.put("result", result);
}
} catch (Throwable e) {
// e.
log.error("实时文本接口翻译失败,text:"+text+",e:"+e);
}
return resultMap;
}
/**
* 获取语种
* @param text
* @return
*/
public static Boolean getLanguage(String text) {
String result = "";
Boolean isZh = false;
try {
// String encode = URLEncoder.encode(text, "UTF-8");
OkHttpClient client = new OkHttpClient().newBuilder()
.connectTimeout(300, TimeUnit.SECONDS)
.readTimeout(60, TimeUnit.SECONDS)
.writeTimeout(30, TimeUnit.SECONDS)
.build();
MediaType mediaType = MediaType.parse("application/json");
RequestBody body = RequestBody.create(mediaType, "{\"text\":\"" + text + "\"}");
Request request = new Request.Builder()
.url("https://fanyi.percent.cn/api/dt/api/language/distinguish")
.method("POST", body)
.addHeader("Authorization", "Bearer " + Config.access_token)
.addHeader("Content-Type", "application/json")
//.addHeader("Cookie", "dt_saas_token=d3c60514c65285836dc316b6d48d468e; JSESSIONID=2726ac68-ad92-470f-b1d5-c1c0255f1cb9")
.build();
Response response = null;
for (int i = 0; i < 10; i++) {
// System.out.println(i);
try {
response = client.newCall(request).execute();
if (response.isSuccessful()) {
break;
}
}catch (ConnectException connectException){
connectException.printStackTrace();
// continue;
}
}
Map json = (Map) JSON.parse(response.body().string());
if ((int) json.get("code") == 200) {
Map data = (Map) json.get("data");
// result = URLDecoder.decode((String) data.get("value"), "UTF-8");
String language = (String) data.get("language");
if (language.equals("zh")){
isZh = true;
}else {
isZh = false;
}
} else {
log.error("语种检测失败,json:" + json);
}
// System.out.println(response.code());
// System.out.println(response.body().string());
} catch (Throwable e) {
// e.
log.error("语种检测失败,text:"+text+",e:"+e);
}
return isZh;
}
public static void main(String[] args) {
getToken();
Map data = getText("你好");
System.out.println(data.get("result"));
}
}

165
src/main/java/com/bfd/crawl_translate/utils/TranslateUtil.java

@ -0,0 +1,165 @@
package com.bfd.crawl_translate.utils;
import com.alibaba.fastjson.JSONObject;
import lombok.extern.slf4j.Slf4j;
import okhttp3.*;
import org.springframework.stereotype.Component;
import java.io.IOException;
import java.net.InetSocketAddress;
import java.net.Proxy;
import java.net.URLEncoder;
import java.util.Random;
import java.util.concurrent.TimeUnit;
/**
* @author guowei
*/
@Slf4j
@Component
public class TranslateUtil {
/**
* 模拟请求翻译
* @param sourceContent 原始内容
* @return 翻译后内容
*/
public static String doDown(String sourceContent) {
// System.out.println("sourceContent --"+sourceContent);
MediaType mediaType = MediaType.parse("application/x-www-form-urlencoded;charset=UTF-8");
String freq = "[[[\"MkEWBc\",\"[[\\\"#sourceContent#\\\",\\\"auto\\\",\\\"zh\\\",true],[null]]\",null,\"generic\"]]]&";
sourceContent = sourceContent.replace("\"", "\\\\\\\"").replace("\n", "%5C%5Cn");
freq = freq.replace("#sourceContent#", sourceContent);
String resData = null;
OkHttpClient client = new OkHttpClient().newBuilder().build();
try {
freq = URLEncoder.encode(freq, "UTF-8");
freq = freq.replace("%255C", "%5C").replace("+", "%20");
RequestBody body = RequestBody.create(mediaType, "f.req=" + freq + "&");
Request request = new Request.Builder()
.url("https://translate.google.com/_/TranslateWebserverUi/data/batchexecute")
.method("POST", body)
.addHeader("authority", "translate.google.cn")
.addHeader("accept", "*/*")
.addHeader("accept-language", "zh-CN,zh;q=0.9,en;q=0.8")
.addHeader("cache-control", "no-cache")
.addHeader("content-type", "application/x-www-form-urlencoded;charset=UTF-8")
.addHeader("cookie", "NID=511=voX-g_h3pvWIPMGq5T4ZaWq5jd6vRlZxBa6wqNTkEkdarBpallKXRxEOvfJu5TLDfbUxJXopAExNiqHJEW1wZU0MuvTIRmkVAAwBknQKHO_gu_xjtuXA00a56i8JL7RWSharKyQ5Ihoq0B-x21AANraC1Fhs9Q6q9eaSKZ3SwRw; _ga=GA1.3.1552263839.1660025840; _gid=GA1.3.1708261691.1660025840; OTZ=6628698_24_24__24_")
.addHeader("origin", "https://www..google.com")
.addHeader("pragma", "no-cache")
.addHeader("referer", "https://www.google.com/")
.addHeader("sec-ch-ua", "\"Chromium\";v=\"104\", \" Not A;Brand\";v=\"99\", \"Google Chrome\";v=\"104\"")
.addHeader("sec-ch-ua-arch", "\"x86\"")
.addHeader("sec-ch-ua-bitness", "\"64\"")
.addHeader("sec-ch-ua-full-version", "\"105.0.5195.127\"")
.addHeader("sec-ch-ua-full-version-list", "\".Not/A)Brand\";v=\"99.0.0.0\", \"Google Chrome\";v=\"105.0.5195.127\", \"Chromium\";v=\"105.0.5195.127\"")
.addHeader("sec-ch-ua-mobile", "?0")
.addHeader("sec-ch-ua-model", "")
.addHeader("sec-ch-ua-platform", "\"Windows\"")
.addHeader("sec-ch-ua-platform-version", "\"14.0.0\"")
.addHeader("sec-fetch-dest", "empty")
.addHeader("sec-fetch-mode", "cors")
.addHeader("sec-fetch-site", "same-origin")
.addHeader("user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36")
.addHeader("x-goog-batchexecute-bgr", "[\";QF64XhPQAAYEzRk0JM1fOJOg-gftA3smACkAIwj8RvJeTIev0D-SyoRKJ6uoX00-HGvJj52RYTiUM4uTOmtSjeDe8R8AAABETwAAAAN1AQcXADDPHkARfFVQcldCsJmHArK64X7Kwazlq5rjQUagjtthTLDL8Kzwcpyq2ZSnfkW-TgeEAkCdc1ZvDogdYwYPMOz1Wlm8kLxPrcyeDMlDZPwN-mxBRJ37LOpgblKfQbr4Uol0HOHebHB1MFSkLsj1GXbaAleVvEXINfpVoeY3ffPsALBLISUQo2-U9qxDn37JXqCrusVYhqzob2M7PEIb92UC8FT9NnDamVfEqaEjSFsmWH3-nfWCh4w65mczyn7qIhrktTvjIpNa7f19cW5hiPwrolcvqU7jjhELE2u-pJTtdmyuWW505lNZkFB3JDWIWbnF5fwlXW4h956fijzKow8rQ6VghrCdPmk5HKsDBvKKczaMM-j--yXw9UbLDtjwU3M_W_MMa3pxImT1161eWEmny3itLXPrN7F0ooYXFn8NMIcZ9124sRpN0JGQwOToAzJw_A5NxHNHHthCqsuZcmS1QWoDc52J7iVptnljhJj8wvin5DYCopcIraaZZdzKsyPL_cApUctaEC1mr5B6Bi5AVSNvkfyoZCx9tPdqAiDLjK0a0sx_vZYK5yARycrTsycOzN1aA9wCYvTdvtruv1f8JQwG6N9SsOAi4Ko0yHb8Uo9KhY6bO6sqcYqNOkWG8Zhl2-nvC6u2vLHofilKy8CAdzJQq5VlnrwhPPKhZupo1I_3XBYOxHYgy3Ly7fDaVhHUWam2H6xU6m4gaUbaTyx5MtD9mJqAOQRjrzeO-kUBshbAlCAEqpBd7IJ089Ph-RrST0NsRoUL7wuC5cxIs3jNcbBf3yietmn3vnDCWzRYI1gh7CtYSQ2Xk_kNLR41BjsoOK0\",null,null,380,7,null,null,0,\"2\"]")
.addHeader("x-same-domain", "1")
.build();
int i = 0;
while (i < 3) {
i++;
Response response = null;
try {
//requestPost 使用代理方式
resData = requestPost(request);
//使用本机ip
// response = client.newCall(request).execute();
// if (response.isSuccessful() && response.body() != null) {
// resData = response.body().string();
// }
// resData = response.body().string();
// System.out.println(resData);
} catch (Exception e) {
log.warn("Download Fail retry:{}. errorMsg:", i, e);
} finally {
// response.close();
}
}
} catch (Exception e) {
log.error("Download Fail sourceContent:"+sourceContent+",e:"+e);
}
String parse = doParse(resData);
return parse;
}
/**
* 解析请求内容
* @param resData
* @return
*/
public static String doParse(String resData){
String resTranslate = "";
try {
resData = resData.replaceAll("\\)\\]}'","");
resData = "{\"a\":"+resData+"}";
JSONObject jsonObject = JSONObject.parseObject(resData);
String a1 = jsonObject.getJSONArray("a").getJSONArray(0).get(2).toString();
String a2 = "{\"a\":"+a1+"}";
JSONObject objects = JSONObject.parseObject(a2);
int size = objects.getJSONArray("a").getJSONArray(1).getJSONArray(0).getJSONArray(0).getJSONArray(5).size();
for (int i=0;i<=size-1;i++){
String string = objects.getJSONArray("a").getJSONArray(1).getJSONArray(0).getJSONArray(0).getJSONArray(5).getJSONArray(i).get(0).toString();
if (!string.isEmpty() && !string.equals("null") ) {
resTranslate += objects.getJSONArray("a").getJSONArray(1).getJSONArray(0).getJSONArray(0).getJSONArray(5).getJSONArray(i).get(0).toString()+" ";
}
}
} catch (Exception e) {
e.printStackTrace();
log.error("Parse fail,e:"+e);
}
return resTranslate;
}
public static String requestPost(Request request) throws Exception {
OkHttpClient httpClient = Proxy();
String resData = null;
Random random = new Random();
int m = (int) (20 * (random.nextFloat() + 1));
Thread.sleep(m);
Response response = httpClient.newCall(request).execute();
if (response.isSuccessful() && response.body() != null) {
resData = response.body().string();
}
response.close();
return resData;
}
public static OkHttpClient Proxy() {
Proxy proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress("oversea_vpn.baifendian.com", 3128));
Authenticator proxyAuthenticator = new Authenticator() {
@Override
public Request authenticate(Route route, Response response) throws IOException {
//设置代理服务器账号密码
String credential = Credentials.basic("", "");
return response.request().newBuilder()
.header("Proxy-Authorization", credential)
.build();
}
};
OkHttpClient httpClientProxy = new OkHttpClient.Builder()
// //1000是1秒
.connectTimeout(200, TimeUnit.SECONDS)
//设置读取超时时间
.readTimeout(30, TimeUnit.SECONDS)
.proxy(proxy)
.proxyAuthenticator(proxyAuthenticator)
.build();
return httpClientProxy;
}
public static void main(String[] args) {
String doDown = doDown("British authorities withhold Chelsea proceeds from Roman Abramovich");
System.out.println(doDown);
}
}

59
src/main/resources/application.yml

@ -0,0 +1,59 @@
crawl:
es:
subjectId: cl_special_1.0_305041
cid: all
beginTime: 2000-01-01
endTime: 2025-01-01
cron:
token_cron: 0 0 0/10 * * ?
query_cron: 0 26 10 * * ?
size_cron: 0 0/30 * * * *
kafka:
topic: analyze
brokers: 172.26.28.30:9092
task:
taskData: ./data/taskData.txt
server:
port: 9999
#日志级别
logging:
level:
com:
bfd: INFO
#日志路径
log:
path: ./logs
spring:
redis:
host: 172.24.12.126
port: 6379
timeout: 10000
database: 5
jedis:
pool:
max-active: 8 # 连接池最大连接数(使用负值表示没有限制)
max-wait: 800 # 连接池最大阻塞等待时间(使用负值表示没有限制)
max-idle: 8 # 连接池中的最大空闲连接
min-idle: 2 # 连接池中的最小空闲连接
boot:
admin:
client:
url: http://172.18.1.147:8001
instance:
service-base-url: http://172.18.1.147:9999
application:
name: translate
management:
endpoints:
web:
exposure:
include: "*"
endpoint:
health:
show-details: always
health:
elasticsearch:
enabled: false
zookeeper:
connection-string: 172.18.1.146:2181,172.18.1.147:2181,172.18.1.148:2181
publish-node: /analyze

38
src/main/resources/logback-spring.xml

@ -0,0 +1,38 @@
<configuration>
<!-- 属性文件:在properties文件中找到对应的配置项 -->
<springProperty scope="context" name="logging.path" source="logging.log.path"/>
<springProperty scope="context" name="logging.level" source="logging.level.com.bfd"/>
<!-- 默认的控制台日志输出,一般生产环境都是后台启动,这个没太大作用 -->
<!-- <appender name="STDOUT"
class="ch.qos.logback.core.ConsoleAppender">
<encoder class="ch.qos.logback.classic.encoder.PatternLayoutEncoder">
<Pattern>%d{HH:mm:ss.SSS} %-5level %logger{80} - %msg%n</Pattern>
</encoder>
</appender> -->
<appender name="GLMAPPER-LOGGERONE"
class="ch.qos.logback.core.rolling.RollingFileAppender">
<append>true</append>
<filter class="ch.qos.logback.classic.filter.ThresholdFilter">
<level>${logging.level}</level>
</filter>
<file>
${logging.path}/crawlSchedule.log
<!-- ${logging.path}/sendKafka.log -->
</file>
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
<FileNamePattern>${logging.path}/crawlSchedule.log.%d{yyyy-MM-dd}</FileNamePattern>
<!-- <FileNamePattern>${logging.path}/sendKafka.log.%d{yyyy-MM-dd}</FileNamePattern> -->
<MaxHistory>7</MaxHistory>
</rollingPolicy>
<encoder class="ch.qos.logback.classic.encoder.PatternLayoutEncoder">
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %line %-5level %logger{50} - %msg%n</pattern>
<charset>UTF-8</charset>
</encoder>
</appender>
<root level="info">
<appender-ref ref="GLMAPPER-LOGGERONE"/>
<!-- <appender-ref ref="STDOUT"/> -->
</root>
</configuration>
Loading…
Cancel
Save