commit
41e9225020
22 changed files with 1773 additions and 0 deletions
-
40.classpath
-
23.project
-
4.settings/org.eclipse.core.resources.prefs
-
9.settings/org.eclipse.jdt.core.prefs
-
4.settings/org.eclipse.m2e.core.prefs
-
1README.md
-
198pom.xml
-
69src/main/java/com/bfd/crawl_translate/CrawlTranslateApplication.java
-
32src/main/java/com/bfd/crawl_translate/controller/ApiController.java
-
105src/main/java/com/bfd/crawl_translate/service/MainHandler.java
-
296src/main/java/com/bfd/crawl_translate/service/TranslateChatGptService.java
-
28src/main/java/com/bfd/crawl_translate/utils/Config.java
-
19src/main/java/com/bfd/crawl_translate/utils/Constants.java
-
13src/main/java/com/bfd/crawl_translate/utils/ContentException.java
-
57src/main/java/com/bfd/crawl_translate/utils/ESClientFactory.java
-
274src/main/java/com/bfd/crawl_translate/utils/HttpUtil.java
-
81src/main/java/com/bfd/crawl_translate/utils/KfkUtil.java
-
92src/main/java/com/bfd/crawl_translate/utils/PauseTool.java
-
166src/main/java/com/bfd/crawl_translate/utils/PercentTransalteUtil.java
-
165src/main/java/com/bfd/crawl_translate/utils/TranslateUtil.java
-
59src/main/resources/application.yml
-
38src/main/resources/logback-spring.xml
@ -0,0 +1,40 @@ |
|||
<?xml version="1.0" encoding="UTF-8"?> |
|||
<classpath> |
|||
<classpathentry kind="src" output="target/classes" path="src/main/java"> |
|||
<attributes> |
|||
<attribute name="optional" value="true"/> |
|||
<attribute name="maven.pomderived" value="true"/> |
|||
</attributes> |
|||
</classpathentry> |
|||
<classpathentry excluding="**" kind="src" output="target/classes" path="src/main/resources"> |
|||
<attributes> |
|||
<attribute name="maven.pomderived" value="true"/> |
|||
<attribute name="optional" value="true"/> |
|||
</attributes> |
|||
</classpathentry> |
|||
<classpathentry kind="src" output="target/test-classes" path="src/test/java"> |
|||
<attributes> |
|||
<attribute name="optional" value="true"/> |
|||
<attribute name="maven.pomderived" value="true"/> |
|||
<attribute name="test" value="true"/> |
|||
</attributes> |
|||
</classpathentry> |
|||
<classpathentry excluding="**" kind="src" output="target/test-classes" path="src/test/resources"> |
|||
<attributes> |
|||
<attribute name="maven.pomderived" value="true"/> |
|||
<attribute name="test" value="true"/> |
|||
<attribute name="optional" value="true"/> |
|||
</attributes> |
|||
</classpathentry> |
|||
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.8"> |
|||
<attributes> |
|||
<attribute name="maven.pomderived" value="true"/> |
|||
</attributes> |
|||
</classpathentry> |
|||
<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER"> |
|||
<attributes> |
|||
<attribute name="maven.pomderived" value="true"/> |
|||
</attributes> |
|||
</classpathentry> |
|||
<classpathentry kind="output" path="target/classes"/> |
|||
</classpath> |
@ -0,0 +1,23 @@ |
|||
<?xml version="1.0" encoding="UTF-8"?> |
|||
<projectDescription> |
|||
<name>analyst_translate</name> |
|||
<comment></comment> |
|||
<projects> |
|||
</projects> |
|||
<buildSpec> |
|||
<buildCommand> |
|||
<name>org.eclipse.jdt.core.javabuilder</name> |
|||
<arguments> |
|||
</arguments> |
|||
</buildCommand> |
|||
<buildCommand> |
|||
<name>org.eclipse.m2e.core.maven2Builder</name> |
|||
<arguments> |
|||
</arguments> |
|||
</buildCommand> |
|||
</buildSpec> |
|||
<natures> |
|||
<nature>org.eclipse.jdt.core.javanature</nature> |
|||
<nature>org.eclipse.m2e.core.maven2Nature</nature> |
|||
</natures> |
|||
</projectDescription> |
@ -0,0 +1,4 @@ |
|||
eclipse.preferences.version=1 |
|||
encoding//src/main/java=UTF-8 |
|||
encoding//src/main/resources=UTF-8 |
|||
encoding/<project>=UTF-8 |
@ -0,0 +1,9 @@ |
|||
eclipse.preferences.version=1 |
|||
org.eclipse.jdt.core.compiler.codegen.methodParameters=generate |
|||
org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8 |
|||
org.eclipse.jdt.core.compiler.compliance=1.8 |
|||
org.eclipse.jdt.core.compiler.problem.enablePreviewFeatures=disabled |
|||
org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning |
|||
org.eclipse.jdt.core.compiler.problem.reportPreviewFeatures=ignore |
|||
org.eclipse.jdt.core.compiler.release=disabled |
|||
org.eclipse.jdt.core.compiler.source=1.8 |
@ -0,0 +1,4 @@ |
|||
activeProfiles= |
|||
eclipse.preferences.version=1 |
|||
resolveWorkspaceProjects=true |
|||
version=1 |
@ -0,0 +1 @@ |
|||
bfd翻译应用 |
@ -0,0 +1,198 @@ |
|||
<?xml version="1.0" encoding="UTF-8"?> |
|||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" |
|||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> |
|||
<modelVersion>4.0.0</modelVersion> |
|||
<parent> |
|||
<groupId>org.springframework.boot</groupId> |
|||
<artifactId>spring-boot-starter-parent</artifactId> |
|||
<version>2.2.4.RELEASE</version> |
|||
<relativePath/> <!-- lookup parent from repository --> |
|||
</parent> |
|||
<groupId>com.bfd</groupId> |
|||
<artifactId>crawl_translate</artifactId> |
|||
<version>0.0.1-SNAPSHOT</version> |
|||
<name>crawl_translate</name> |
|||
<description>crawl_translate</description> |
|||
<properties> |
|||
<java.version>1.8</java.version> |
|||
</properties> |
|||
<dependencies> |
|||
<dependency> |
|||
<groupId>org.springframework.boot</groupId> |
|||
<artifactId>spring-boot-starter-web</artifactId> |
|||
</dependency> |
|||
|
|||
<dependency> |
|||
<groupId>mysql</groupId> |
|||
<artifactId>mysql-connector-java</artifactId> |
|||
<scope>runtime</scope> |
|||
</dependency> |
|||
<dependency> |
|||
<groupId>org.projectlombok</groupId> |
|||
<artifactId>lombok</artifactId> |
|||
<optional>true</optional> |
|||
</dependency> |
|||
<dependency> |
|||
<groupId>org.springframework.boot</groupId> |
|||
<artifactId>spring-boot-starter-test</artifactId> |
|||
<scope>test</scope> |
|||
</dependency> |
|||
<dependency> |
|||
<groupId>org.elasticsearch</groupId> |
|||
<artifactId>elasticsearch</artifactId> |
|||
<version>6.0.0</version> |
|||
</dependency> |
|||
<dependency> |
|||
<groupId>org.elasticsearch.client</groupId> |
|||
<artifactId>elasticsearch-rest-client</artifactId> |
|||
<version>6.0.0</version> |
|||
</dependency> |
|||
<dependency> |
|||
<groupId>org.elasticsearch.client</groupId> |
|||
<artifactId>elasticsearch-rest-high-level-client</artifactId> |
|||
<version>6.0.0</version> |
|||
<exclusions> |
|||
<exclusion> |
|||
<groupId>org.elasticsearch.client</groupId> |
|||
<artifactId>elasticsearch-rest-client</artifactId> |
|||
</exclusion> |
|||
</exclusions> |
|||
</dependency> |
|||
<dependency> |
|||
<groupId>com.squareup.okhttp3</groupId> |
|||
<artifactId>okhttp</artifactId> |
|||
<version>3.11.0</version> |
|||
</dependency> |
|||
<dependency> |
|||
<groupId>com.alibaba</groupId> |
|||
<artifactId>fastjson</artifactId> |
|||
<version>2.0.12</version> |
|||
</dependency> |
|||
<dependency> |
|||
<groupId>cn.hutool</groupId> |
|||
<artifactId>hutool-all</artifactId> |
|||
<version>5.8.9</version> |
|||
</dependency> |
|||
<dependency> |
|||
<groupId>org.apache.kafka</groupId> |
|||
<artifactId>kafka-clients</artifactId> |
|||
<version>2.7.1</version> |
|||
</dependency> |
|||
<dependency> |
|||
<groupId>org.apache.poi</groupId> |
|||
<artifactId>poi-ooxml</artifactId> |
|||
<version>5.2.2</version> |
|||
</dependency> |
|||
<!-- https://mvnrepository.com/artifact/de.codecentric/spring-boot-admin-starter-client --> |
|||
<dependency> |
|||
<groupId>de.codecentric</groupId> |
|||
<artifactId>spring-boot-admin-client</artifactId> |
|||
<version>2.2.4</version> |
|||
</dependency> |
|||
<!--redis --> |
|||
<dependency> |
|||
<groupId>org.redisson</groupId> |
|||
<artifactId>redisson-spring-boot-starter</artifactId> |
|||
<version>3.13.6</version> |
|||
</dependency> |
|||
<dependency> |
|||
<groupId>org.springframework.boot</groupId> |
|||
<artifactId>spring-boot-starter-data-redis</artifactId> |
|||
</dependency> |
|||
<dependency> |
|||
<groupId>org.apache.curator</groupId> |
|||
<artifactId>curator-framework</artifactId> |
|||
<version>5.2.0</version> |
|||
</dependency> |
|||
<dependency> |
|||
<groupId>org.apache.curator</groupId> |
|||
<artifactId>curator-recipes</artifactId> |
|||
<version>5.2.0</version> |
|||
</dependency> |
|||
</dependencies> |
|||
|
|||
<build> |
|||
<plugins> |
|||
<plugin> |
|||
<groupId>org.apache.maven.plugins</groupId> |
|||
<artifactId>maven-jar-plugin</artifactId> |
|||
<configuration> |
|||
<!--不打入jar包的文件类型或者路径--> |
|||
<excludes> |
|||
<exclude>*.properties</exclude> |
|||
<exclude>*.yml</exclude> |
|||
<exclude>*.yaml</exclude> |
|||
</excludes> |
|||
<archive> |
|||
<manifest> |
|||
<!-- 执行的主程序路径 --> |
|||
<mainClass>com.bfd.crawl_translate.CrawlTranslateApplication</mainClass> |
|||
<!--是否要把第三方jar放到manifest的classpath中--> |
|||
<addClasspath>true</addClasspath> |
|||
<!--生成的manifest中classpath的前缀,因为要把第三方jar放到lib目录下,所以classpath的前缀是lib/--> |
|||
<classpathPrefix>lib/</classpathPrefix> |
|||
<!-- 打包时 MANIFEST.MF 文件不记录的时间戳版本 --> |
|||
<useUniqueVersions>false</useUniqueVersions> |
|||
</manifest> |
|||
<manifestEntries> |
|||
<!-- 在 Class-Path 下添加配置文件的路径 --> |
|||
<Class-Path>lib/pauseTool-1.0.jar config/</Class-Path> |
|||
</manifestEntries> |
|||
</archive> |
|||
</configuration> |
|||
</plugin> |
|||
<plugin> |
|||
<groupId>org.apache.maven.plugins</groupId> |
|||
<artifactId>maven-dependency-plugin</artifactId> |
|||
<executions> |
|||
<execution> |
|||
<id>copy</id> |
|||
<phase>package</phase> |
|||
<goals> |
|||
<goal>copy-dependencies</goal> |
|||
</goals> |
|||
<configuration> |
|||
<outputDirectory>${project.build.directory}/lib/</outputDirectory> |
|||
</configuration> |
|||
</execution> |
|||
</executions> |
|||
</plugin> |
|||
|
|||
<plugin> |
|||
<artifactId>maven-resources-plugin</artifactId> |
|||
<executions> |
|||
<execution> |
|||
<id>copy-resources</id> |
|||
<phase>package</phase> |
|||
<goals> |
|||
<goal>copy-resources</goal> |
|||
</goals> |
|||
<configuration> |
|||
<resources> |
|||
<!--把配置文件打包到指定路径--> |
|||
<resource> |
|||
<directory>src/main/resources/</directory> |
|||
<includes> |
|||
<include>*.properties</include> |
|||
<include>*.yml</include> |
|||
<exclude>*.yaml</exclude> |
|||
</includes> |
|||
</resource> |
|||
</resources> |
|||
<outputDirectory>${project.build.directory}/config</outputDirectory> |
|||
</configuration> |
|||
</execution> |
|||
</executions> |
|||
</plugin> |
|||
<plugin> |
|||
<groupId>org.apache.maven.plugins</groupId> |
|||
<artifactId>maven-compiler-plugin</artifactId> |
|||
<configuration> |
|||
<source>8</source> |
|||
<target>8</target> |
|||
</configuration> |
|||
</plugin> |
|||
</plugins> |
|||
</build> |
|||
|
|||
</project> |
@ -0,0 +1,69 @@ |
|||
package com.bfd.crawl_translate; |
|||
|
|||
import java.util.concurrent.ExecutorService; |
|||
import java.util.concurrent.LinkedBlockingQueue; |
|||
import java.util.concurrent.ThreadFactory; |
|||
import java.util.concurrent.ThreadPoolExecutor; |
|||
import java.util.concurrent.TimeUnit; |
|||
|
|||
import org.springframework.beans.factory.annotation.Autowired; |
|||
import org.springframework.beans.factory.annotation.Value; |
|||
import org.springframework.boot.SpringApplication; |
|||
import org.springframework.boot.autoconfigure.SpringBootApplication; |
|||
import org.springframework.context.ConfigurableApplicationContext; |
|||
import org.springframework.data.redis.core.StringRedisTemplate; |
|||
import org.springframework.scheduling.annotation.EnableScheduling; |
|||
import org.springframework.scheduling.annotation.Scheduled; |
|||
|
|||
import com.bfd.crawl_translate.service.TranslateChatGptService; |
|||
import com.bfd.crawl_translate.utils.Config; |
|||
import com.bfd.crawl_translate.utils.HttpUtil; |
|||
import com.bfd.crawl_translate.utils.PauseTool; |
|||
|
|||
import cn.hutool.core.thread.ThreadFactoryBuilder; |
|||
import lombok.extern.slf4j.Slf4j; |
|||
|
|||
@SpringBootApplication |
|||
@Slf4j |
|||
@EnableScheduling |
|||
public class CrawlTranslateApplication { |
|||
|
|||
@Autowired |
|||
TranslateChatGptService translateChatGptService; |
|||
@Autowired |
|||
private StringRedisTemplate stringRedisTemplate; |
|||
|
|||
@Value("${zookeeper.connection-string}") |
|||
private String connectionString; |
|||
@Value("${zookeeper.publish-node}") |
|||
private String nodePath; |
|||
|
|||
|
|||
public static void main(String[] args) { |
|||
|
|||
ConfigurableApplicationContext applicationContext = SpringApplication.run(CrawlTranslateApplication.class, args); |
|||
applicationContext.getBean(CrawlTranslateApplication.class).start(); |
|||
} |
|||
public void start(){ |
|||
log.info("----------CrawlTranslateApplication start success----------"); |
|||
HttpUtil.getToken(); |
|||
//定义线程池 |
|||
ThreadFactory namedThreadFactory = new ThreadFactoryBuilder().setNamePrefix("crawl-pool-%d").build(); |
|||
ExecutorService singleThreadPool = new ThreadPoolExecutor(10, 20, 100L, TimeUnit.SECONDS, new LinkedBlockingQueue<Runnable>(1024), namedThreadFactory, new ThreadPoolExecutor.AbortPolicy()); |
|||
|
|||
|
|||
singleThreadPool.execute(translateChatGptService); |
|||
|
|||
PauseTool pauseTool = new PauseTool(); |
|||
pauseTool.initializeRedisCache(stringRedisTemplate); |
|||
pauseTool.setupZookeeperListener(connectionString, nodePath); |
|||
|
|||
log.info("----------CrawlTranslateApplication stop success----------"); |
|||
} |
|||
|
|||
@Scheduled(cron = "${crawl.cron.size_cron}") |
|||
public void getQueueSize(){ |
|||
log.info("--------->taskQueue length == "+ Config.chatGptTranslateQueue.size()); |
|||
} |
|||
|
|||
} |
@ -0,0 +1,32 @@ |
|||
package com.bfd.crawl_translate.controller; |
|||
|
|||
import com.alibaba.fastjson.JSON; |
|||
import com.bfd.crawl_translate.utils.Config; |
|||
import lombok.extern.slf4j.Slf4j; |
|||
import org.springframework.web.bind.annotation.*; |
|||
|
|||
import java.util.Map; |
|||
|
|||
/** |
|||
* @author guowei |
|||
*/ |
|||
@Slf4j |
|||
@RestController |
|||
@RequestMapping(value = "/chatGpt") |
|||
@CrossOrigin(origins = "*", maxAge = 3600) |
|||
public class ApiController { |
|||
@RequestMapping(value = "/translate", method = RequestMethod.POST, produces = "application/json") |
|||
@ResponseBody |
|||
|
|||
public String getchannelitems(@RequestBody String RequestStr) { |
|||
System.out.println("收到gpt翻译请求:"+RequestStr); |
|||
log.info("收到gpt翻译请求"); |
|||
// Map parse = (Map) JSON.parse(RequestStr); |
|||
try { |
|||
Config.chatGptTranslateQueue.put(RequestStr); |
|||
} catch (InterruptedException e) { |
|||
log.error("推送队列失败",e); |
|||
} |
|||
return "TranslationAPi Successfully"; |
|||
} |
|||
} |
@ -0,0 +1,105 @@ |
|||
package com.bfd.crawl_translate.service; |
|||
|
|||
import cn.hutool.core.io.FileUtil; |
|||
import cn.hutool.core.io.file.FileWriter; |
|||
import com.bfd.crawl_translate.utils.Config; |
|||
import lombok.extern.slf4j.Slf4j; |
|||
import org.apache.commons.io.FileUtils; |
|||
import org.springframework.beans.factory.annotation.Value; |
|||
import org.springframework.boot.ApplicationArguments; |
|||
import org.springframework.boot.ApplicationRunner; |
|||
import org.springframework.stereotype.Service; |
|||
|
|||
import java.io.File; |
|||
import java.io.IOException; |
|||
import java.util.List; |
|||
import java.util.concurrent.LinkedBlockingQueue; |
|||
|
|||
|
|||
/** |
|||
* @author jian.mao |
|||
* @date 2023年11月3日 |
|||
* @description |
|||
*/ |
|||
@Slf4j |
|||
@Service |
|||
public class MainHandler implements ApplicationRunner { |
|||
|
|||
@Value("${crawl.task.taskData}") |
|||
private String taskPath; |
|||
|
|||
@Override |
|||
public void run(ApplicationArguments args) throws Exception { |
|||
//停止处理 |
|||
waitDown(); |
|||
//启动加载缓存任务 |
|||
readTask(taskPath, Config.chatGptTranslateQueue); |
|||
} |
|||
|
|||
|
|||
|
|||
|
|||
|
|||
public static void readTask(String path, LinkedBlockingQueue<String> queue){ |
|||
File file = new File(path); |
|||
if(file.exists()){ |
|||
List<String> tasks = null; |
|||
try { |
|||
tasks = FileUtils.readLines(file,"UTF-8"); |
|||
} catch (IOException e) { |
|||
e.printStackTrace(); |
|||
} |
|||
for (String taskStr : tasks) { |
|||
// Map<String, Object> task = JSONObject.parseObject(taskStr); |
|||
try { |
|||
System.out.println("读到缓存数据:"+taskStr); |
|||
queue.put(taskStr); |
|||
} catch (InterruptedException e) { |
|||
e.printStackTrace(); |
|||
} |
|||
} |
|||
file.delete(); |
|||
} |
|||
} |
|||
/** |
|||
* 结束触发钩子 |
|||
*/ |
|||
public void waitDown() { |
|||
Runtime.getRuntime().addShutdownHook(new Thread() { |
|||
@Override |
|||
public void run() { |
|||
// 停止线程 |
|||
Config.isStart = false; |
|||
log.info("stop-------"); |
|||
writeTsskToFile(); |
|||
} |
|||
}); |
|||
} |
|||
|
|||
|
|||
/** |
|||
* 任务持久化到硬盘 |
|||
*/ |
|||
public void writeTsskToFile(){ |
|||
System.out.println(taskPath); |
|||
File file = new File(taskPath); |
|||
FileWriter fileWriter = new FileWriter(file); |
|||
if (!file.exists()){ |
|||
fileWriter = FileWriter.create(file); |
|||
} |
|||
while(true){ |
|||
if(Config.chatGptTranslateQueue.size() > 0 ){ |
|||
try { |
|||
String task = Config.chatGptTranslateQueue.take(); |
|||
System.out.println("写入缓存数据:"+task); |
|||
fileWriter.write(task+"\r\n",true); |
|||
} catch (InterruptedException e) { |
|||
e.printStackTrace(); |
|||
} |
|||
}else{ |
|||
log.info("taskQueue write is file end"); |
|||
break; |
|||
} |
|||
} |
|||
} |
|||
} |
296
src/main/java/com/bfd/crawl_translate/service/TranslateChatGptService.java
File diff suppressed because it is too large
View File
File diff suppressed because it is too large
View File
@ -0,0 +1,28 @@ |
|||
package com.bfd.crawl_translate.utils; |
|||
|
|||
import org.springframework.beans.factory.annotation.Value; |
|||
import org.springframework.stereotype.Component; |
|||
|
|||
import java.util.HashMap; |
|||
import java.util.Map; |
|||
import java.util.concurrent.LinkedBlockingQueue; |
|||
|
|||
/** |
|||
* @author guowei |
|||
*/ |
|||
@Component |
|||
public class Config { |
|||
public static String access_token; |
|||
|
|||
public static LinkedBlockingQueue<Map> taskQueue = new LinkedBlockingQueue<Map>(); |
|||
|
|||
|
|||
/** |
|||
* chatGpt 翻译 |
|||
*/ |
|||
public static LinkedBlockingQueue<String> chatGptTranslateQueue = new LinkedBlockingQueue<String>(); |
|||
|
|||
public static Boolean isStart = true; |
|||
|
|||
public static Map stopCache = new HashMap<>(); |
|||
} |
@ -0,0 +1,19 @@ |
|||
package com.bfd.crawl_translate.utils; |
|||
|
|||
import org.springframework.stereotype.Component; |
|||
|
|||
/** |
|||
* @author guowei |
|||
*/ |
|||
@Component |
|||
public class Constants { |
|||
|
|||
public final static String STOP = "stop"; |
|||
|
|||
public final static String SCENES_ID = "scenes_id"; |
|||
|
|||
public final static String VERSION = "version"; |
|||
|
|||
public final static String UNDERLINE = "_"; |
|||
|
|||
} |
@ -0,0 +1,13 @@ |
|||
package com.bfd.crawl_translate.utils; |
|||
|
|||
/** |
|||
* @author guowei |
|||
*/ |
|||
public class ContentException extends Exception{ |
|||
public ContentException(){ |
|||
|
|||
} |
|||
public ContentException(String message){ |
|||
super(message); |
|||
} |
|||
} |
@ -0,0 +1,57 @@ |
|||
package com.bfd.crawl_translate.utils; |
|||
|
|||
import org.apache.http.HttpHost; |
|||
import org.apache.http.client.config.RequestConfig; |
|||
import org.apache.http.impl.nio.client.HttpAsyncClientBuilder; |
|||
import org.elasticsearch.client.RestClient; |
|||
import org.elasticsearch.client.RestClientBuilder; |
|||
import org.elasticsearch.client.RestClientBuilder.HttpClientConfigCallback; |
|||
import org.elasticsearch.client.RestHighLevelClient; |
|||
|
|||
public class ESClientFactory { |
|||
private static final int CONNECT_TIME_OUT = 1000; |
|||
private static final int SOCKET_TIME_OUT = 30000; |
|||
private static final int CONNECTION_REQUEST_TIME_OUT = 500; |
|||
private static final int MAX_CONNECT_NUM = 100; |
|||
private static final int MAX_CONNECT_PER_ROUTE = 100; |
|||
private static boolean uniqueConnectTimeConfig = false; |
|||
private static boolean uniqueConnectNumConfig = true; |
|||
|
|||
public static RestHighLevelClient init(){ |
|||
RestClientBuilder builder = RestClient.builder(new HttpHost("172.18.1.81",9201,"http")); |
|||
if(uniqueConnectTimeConfig){ |
|||
setConnectTimeOutConfig(builder); |
|||
} |
|||
if(uniqueConnectNumConfig){ |
|||
setMutiConnectConfig(builder); |
|||
} |
|||
RestHighLevelClient restHighLevelClient = new RestHighLevelClient(builder); |
|||
return restHighLevelClient; |
|||
} |
|||
|
|||
// 主要关于异步httpclient的连接延时配置 |
|||
|
|||
public static void setConnectTimeOutConfig(RestClientBuilder builder){ |
|||
builder.setRequestConfigCallback(new RestClientBuilder.RequestConfigCallback() { |
|||
@Override |
|||
public RequestConfig.Builder customizeRequestConfig(RequestConfig.Builder requestConfigBuilder) { |
|||
requestConfigBuilder.setConnectTimeout(CONNECT_TIME_OUT); |
|||
requestConfigBuilder.setSocketTimeout(SOCKET_TIME_OUT); |
|||
requestConfigBuilder.setConnectionRequestTimeout(CONNECTION_REQUEST_TIME_OUT); |
|||
return requestConfigBuilder; |
|||
} |
|||
}); |
|||
} |
|||
|
|||
// 主要关于异步httpclient的连接数配置 |
|||
public static void setMutiConnectConfig(RestClientBuilder builder){ |
|||
builder.setHttpClientConfigCallback(new HttpClientConfigCallback() { |
|||
@Override |
|||
public HttpAsyncClientBuilder customizeHttpClient(HttpAsyncClientBuilder httpClientBuilder) { |
|||
httpClientBuilder.setMaxConnTotal(MAX_CONNECT_NUM); |
|||
httpClientBuilder.setMaxConnPerRoute(MAX_CONNECT_PER_ROUTE); |
|||
return httpClientBuilder; |
|||
} |
|||
}); |
|||
} |
|||
} |
@ -0,0 +1,274 @@ |
|||
package com.bfd.crawl_translate.utils; |
|||
|
|||
import com.alibaba.fastjson2.JSON; |
|||
import lombok.extern.slf4j.Slf4j; |
|||
import okhttp3.*; |
|||
import org.apache.kafka.clients.producer.KafkaProducer; |
|||
import org.apache.kafka.clients.producer.ProducerRecord; |
|||
import org.apache.kafka.common.serialization.StringSerializer; |
|||
import org.springframework.scheduling.annotation.Scheduled; |
|||
import org.springframework.stereotype.Component; |
|||
|
|||
import java.io.IOException; |
|||
import java.net.ConnectException; |
|||
import java.net.URLDecoder; |
|||
import java.net.URLEncoder; |
|||
import java.util.HashMap; |
|||
import java.util.Map; |
|||
import java.util.Properties; |
|||
import java.util.concurrent.TimeUnit; |
|||
|
|||
/** |
|||
* @author guowei |
|||
*/ |
|||
@Slf4j |
|||
@Component |
|||
public class HttpUtil { |
|||
|
|||
@Scheduled(cron = "${crawl.cron.token_cron}") |
|||
public static String getToken() { |
|||
OkHttpClient client = new OkHttpClient().newBuilder() |
|||
.readTimeout(6000,TimeUnit.SECONDS) |
|||
.connectTimeout(6000,TimeUnit.SECONDS) |
|||
.writeTimeout(6000,TimeUnit.SECONDS) |
|||
.readTimeout(6000,TimeUnit.SECONDS) |
|||
.build(); |
|||
MediaType mediaType = MediaType.parse("application/x-www-form-urlencoded"); |
|||
RequestBody body = RequestBody.create(mediaType, "username=collectionSystem&password=1ade58c775ddc6203f444c60f9af680e&grant_type=password"); |
|||
Request request = new Request.Builder() |
|||
.url("https://fanyi.percent.cn/api/SystemManager/oauth/token") |
|||
.method("POST", body) |
|||
.addHeader("Content-Type", "application/x-www-form-urlencoded") |
|||
.build(); |
|||
try { |
|||
Response response = client.newCall(request).execute(); |
|||
Map json = (Map) JSON.parse(response.body().string()); |
|||
if ("200".equals(json.get("code"))) { |
|||
log.info("获取token成功,当前时间:{},token:{}",System.currentTimeMillis(),json.get("access_token")); |
|||
Config.access_token = (String) json.get("access_token"); |
|||
} else { |
|||
log.error("获取token失败,json:" + json); |
|||
} |
|||
} catch (IOException e) { |
|||
throw new RuntimeException(e); |
|||
} |
|||
return ""; |
|||
} |
|||
|
|||
public static Map getText(String fromLanguage,String toLanguage,String text) { |
|||
StackTraceElement[] stackTrace = Thread.currentThread().getStackTrace(); |
|||
String methodName = stackTrace[2].getMethodName(); |
|||
String className = stackTrace[2].getClassName(); |
|||
// System.out.println("谁调用了我:"+className+","+methodName); |
|||
String result = ""; |
|||
Map resultMap = new HashMap<>(); |
|||
try { |
|||
String encode = URLEncoder.encode(text, "UTF-8"); |
|||
OkHttpClient client = new OkHttpClient().newBuilder() |
|||
.connectTimeout(30000, TimeUnit.SECONDS) |
|||
.readTimeout(30000, TimeUnit.SECONDS) |
|||
.writeTimeout(30000, TimeUnit.SECONDS) |
|||
.build(); |
|||
MediaType mediaType = MediaType.parse("application/json"); |
|||
RequestBody body = RequestBody.create(mediaType, "{\"fromLanguage\":\""+fromLanguage+"\",\"toLanguage\":\""+toLanguage+"\",\"text\":\"" + encode + "\"}"); |
|||
Request request = new Request.Builder() |
|||
.url("https://fanyi.percent.cn/api/dt/tran/text") |
|||
.method("POST", body) |
|||
.addHeader("Authorization", "Bearer " + Config.access_token) |
|||
.addHeader("Content-Type", "application/json") |
|||
// .addHeader("Cookie", "dt_saas_token="+Config.access_token+"; JSESSIONID=951aa17c-9d1d-4991-a327-d5d2dc2de860") |
|||
.build(); |
|||
Response response = null; |
|||
for (int i = 0; i < 10; i++) { |
|||
// System.out.println(i); |
|||
try { |
|||
response = client.newCall(request).execute(); |
|||
if (response.isSuccessful()) { |
|||
// break; |
|||
Map json = (Map) JSON.parse(response.body().string()); |
|||
int code = (int) json.get("code"); |
|||
String message = (String) json.get("message"); |
|||
if (code == 200 && message == null) { |
|||
Map data = (Map) json.get("data"); |
|||
result = URLDecoder.decode((String) data.get("value"), "UTF-8"); |
|||
resultMap.put("isSuccess", true); |
|||
resultMap.put("result", result); |
|||
break; |
|||
} else if (code == 500 && message.contains("翻译超时")){ |
|||
continue; |
|||
} else { |
|||
log.error("文本翻译失败,json:" + json); |
|||
resultMap.put("isSuccess", false); |
|||
resultMap.put("result", result); |
|||
break; |
|||
} |
|||
} |
|||
}catch (ConnectException connectException){ |
|||
connectException.printStackTrace(); |
|||
log.error("内容翻译失败"+i+"次",connectException); |
|||
} |
|||
} |
|||
|
|||
} catch (Throwable e) { |
|||
// e. |
|||
log.error("实时文本接口翻译失败,text:"+text+",e:"+e); |
|||
} |
|||
return resultMap; |
|||
} |
|||
|
|||
public static Boolean getLanguage(String text) { |
|||
String result = ""; |
|||
Boolean isZh = false; |
|||
try { |
|||
// String encode = URLEncoder.encode(text, "UTF-8"); |
|||
OkHttpClient client = new OkHttpClient().newBuilder() |
|||
.connectTimeout(300, TimeUnit.SECONDS) |
|||
.readTimeout(60, TimeUnit.SECONDS) |
|||
.writeTimeout(30, TimeUnit.SECONDS) |
|||
.build(); |
|||
MediaType mediaType = MediaType.parse("application/json"); |
|||
RequestBody body = RequestBody.create(mediaType, "{\"text\":\"" + text + "\"}"); |
|||
Request request = new Request.Builder() |
|||
.url("https://fanyi.percent.cn/api/dt/api/language/distinguish") |
|||
.method("POST", body) |
|||
.addHeader("Authorization", "Bearer " + Config.access_token) |
|||
.addHeader("Content-Type", "application/json") |
|||
// .addHeader("Cookie", "dt_saas_token=d3c60514c65285836dc316b6d48d468e; JSESSIONID=2726ac68-ad92-470f-b1d5-c1c0255f1cb9") |
|||
.build(); |
|||
Response response = null; |
|||
for (int i = 0; i < 10; i++) { |
|||
// System.out.println(i); |
|||
try { |
|||
response = client.newCall(request).execute(); |
|||
if (response.isSuccessful()) { |
|||
break; |
|||
} |
|||
}catch (ConnectException connectException){ |
|||
connectException.printStackTrace(); |
|||
// continue; |
|||
} |
|||
} |
|||
Map json = (Map) JSON.parse(response.body().string()); |
|||
if ((int) json.get("code") == 200) { |
|||
Map data = (Map) json.get("data"); |
|||
// result = URLDecoder.decode((String) data.get("value"), "UTF-8"); |
|||
String language = (String) data.get("language"); |
|||
if (language.equals("zh")){ |
|||
isZh = true; |
|||
}else { |
|||
isZh = false; |
|||
} |
|||
} else { |
|||
log.error("语种检测失败,json:" + json); |
|||
|
|||
} |
|||
// System.out.println(response.code()); |
|||
// System.out.println(response.body().string()); |
|||
} catch (Throwable e) { |
|||
// e. |
|||
log.error("语种检测失败,text:"+text+",e:"+e); |
|||
} |
|||
return isZh; |
|||
} |
|||
|
|||
public static String getLanguage2(String text) { |
|||
// String result = ""; |
|||
String language = ""; |
|||
try { |
|||
// String encode = URLEncoder.encode(text, "UTF-8"); |
|||
OkHttpClient client = new OkHttpClient().newBuilder() |
|||
.connectTimeout(300, TimeUnit.SECONDS) |
|||
.readTimeout(60, TimeUnit.SECONDS) |
|||
.writeTimeout(30, TimeUnit.SECONDS) |
|||
.build(); |
|||
MediaType mediaType = MediaType.parse("application/x-www-form-urlencoded"); |
|||
RequestBody body = RequestBody.create(mediaType, "content="+text); |
|||
Request request = new Request.Builder() |
|||
.url("http://distinguish.pontoaplus.com/translate/lang_detect") |
|||
// .url("http://172.18.1.155:19999/translate/lang_detect") |
|||
.method("POST", body) |
|||
.build(); |
|||
Response response = null; |
|||
for (int i = 0; i < 10; i++) { |
|||
// System.out.println(i); |
|||
try { |
|||
response = client.newCall(request).execute(); |
|||
if (response.isSuccessful()) { |
|||
break; |
|||
} |
|||
} catch (ConnectException connectException) { |
|||
connectException.printStackTrace(); |
|||
// continue; |
|||
} |
|||
} |
|||
Map json = (Map) JSON.parse(response.body().string()); |
|||
if ((int) json.get("code") == 200) { |
|||
Map data = (Map) json.get("data"); |
|||
// result = URLDecoder.decode((String) data.get("value"), "UTF-8"); |
|||
language = (String) data.get("lang"); |
|||
|
|||
} else { |
|||
log.error("语种检测失败,json:" + json); |
|||
|
|||
} |
|||
// System.out.println(response.code()); |
|||
// System.out.println(response.body().string()); |
|||
} catch (Throwable e) { |
|||
// e. |
|||
log.error("语种检测失败,text:" + text + ",e:" + e); |
|||
} |
|||
return language; |
|||
} |
|||
|
|||
private static KafkaProducer<String, String> producer; |
|||
|
|||
static { |
|||
producer = getKafkaProdect("172.18.1.101:9092,172.18.1.102:9092,172.18.1.104:9092"); |
|||
} |
|||
|
|||
public static void sendKafka(String topic, String resultData) { |
|||
ProducerRecord<String, String> se = new ProducerRecord<String, String>(topic, resultData); |
|||
// KafkaProducer<String, String> producer = getKafkaProdect(config.brokers); |
|||
producer.send(se); |
|||
} |
|||
|
|||
public static KafkaProducer<String, String> getKafkaProdect(String brokerList) { |
|||
Properties props = new Properties(); |
|||
props.put("bootstrap.servers", brokerList);//xxx服务器ip |
|||
// props.put("bootstrap.servers", "172.18.1.114:9992");//xxx服务器ip |
|||
props.put("acks", "all");//所有follower都响应了才认为消息提交成功,即"committed" |
|||
props.put("retries", 3);//retries = MAX 无限重试,直到你意识到出现了问题:) |
|||
props.put("batch.size", 16384);//producer将试图批处理消息记录,以减少请求次数.默认的批量处理消息字节数 |
|||
//batch.size当批量的数据大小达到设定值后,就会立即发送,不顾下面的linger.ms |
|||
props.put("linger.ms", 1);//延迟1ms发送,这项设置将通过增加小的延迟来完成--即,不是立即发送一条记录,producer将会等待给定的延迟时间以允许其他消息记录发送,这些消息记录可以批量处理 |
|||
props.put("buffer.memory", 33554432);//producer可以用来缓存数据的内存大小。 |
|||
props.put("key.serializer", |
|||
StringSerializer.class.getName()); |
|||
props.put("value.serializer", |
|||
StringSerializer.class.getName()); |
|||
KafkaProducer<String, String> producer = new KafkaProducer<String, String>(props); |
|||
return producer; |
|||
} |
|||
|
|||
public static void main(String[] args) throws IOException { |
|||
// getToken(); |
|||
// String access_token = Config.access_token; |
|||
|
|||
OkHttpClient client = new OkHttpClient().newBuilder() |
|||
.build(); |
|||
MediaType mediaType = MediaType.parse("application/json"); |
|||
RequestBody body = RequestBody.create(mediaType, "{\"fromLanguage\":\"auto\",\"toLanguage\":\"zh\",\"text\":\"hello\"}"); |
|||
Request request = new Request.Builder() |
|||
.url("https://fanyi.percent.cn/api/dt/tran/text") |
|||
.method("POST", body) |
|||
.addHeader("Authorization", "Beare fc5e8aa070b0e6c1eba8140c3462afb0") |
|||
.addHeader("Content-Type", "application/json") |
|||
.addHeader("Cookie", "dt_saas_token=fc5e8aa070b0e6c1eba8140c3462afb0; JSESSIONID=951aa17c-9d1d-4991-a327-d5d2dc2de860") |
|||
.build(); |
|||
Response response = client.newCall(request).execute(); |
|||
String string = response.body().string(); |
|||
System.out.println(string); |
|||
// System.out.println(access_token); |
|||
// getText("hello"); |
|||
} |
|||
} |
@ -0,0 +1,81 @@ |
|||
package com.bfd.crawl_translate.utils; |
|||
|
|||
import lombok.extern.slf4j.Slf4j; |
|||
import org.apache.kafka.clients.producer.KafkaProducer; |
|||
import org.apache.kafka.clients.producer.ProducerRecord; |
|||
import org.springframework.beans.factory.annotation.Value; |
|||
import org.springframework.stereotype.Component; |
|||
|
|||
import java.util.Properties; |
|||
|
|||
/** |
|||
* @author guowei |
|||
* kfk工具类 |
|||
*/ |
|||
@Component |
|||
@Slf4j |
|||
public class KfkUtil { |
|||
private static String topic; |
|||
|
|||
private static String brokerList; |
|||
|
|||
@Value("${crawl.kafka.topic}") |
|||
public void setTopic(String topic) { |
|||
KfkUtil.topic = topic; |
|||
} |
|||
|
|||
@Value("${crawl.kafka.brokers}") |
|||
public void setBrokerList(String brokerList) { |
|||
KfkUtil.brokerList = brokerList; |
|||
} |
|||
private static KafkaProducer<String, String> kafkaProducer; |
|||
|
|||
public static int num = 0; |
|||
|
|||
/** |
|||
* 获取KafkaProducer实例 |
|||
*/ |
|||
public static KafkaProducer<String, String> getProducer() { |
|||
if (kafkaProducer == null) { |
|||
Properties props = new Properties(); |
|||
//xxx服务器ip |
|||
props.put("bootstrap.servers", brokerList); |
|||
//所有follower都响应了才认为消息提交成功,即"committed" |
|||
props.put("acks", "all"); |
|||
//retries = MAX 无限重试,直到你意识到出现了问题:) |
|||
props.put("retries", 3); |
|||
//producer将试图批处理消息记录,以减少请求次数.默认的批量处理消息字节数 |
|||
props.put("batch.size", 16384); |
|||
//batch.size当批量的数据大小达到设定值后,就会立即发送,不顾下面的linger.ms |
|||
//延迟1ms发送,这项设置将通过增加小的延迟来完成--即,不是立即发送一条记录,producer将会等待给定的延迟时间以允许其他消息记录发送,这些消息记录可以批量处理 |
|||
props.put("linger.ms", 1); |
|||
//producer可以用来缓存数据的内存大小。 |
|||
props.put("buffer.memory", 33554432); |
|||
props.put("key.serializer", |
|||
"org.apache.kafka.common.serialization.StringSerializer"); |
|||
props.put("value.serializer", |
|||
"org.apache.kafka.common.serialization.StringSerializer"); |
|||
kafkaProducer = new KafkaProducer<String, String>(props); |
|||
} |
|||
return kafkaProducer; |
|||
} |
|||
|
|||
/** |
|||
* 关闭KafkaProducer实例 |
|||
*/ |
|||
public static void closeProducer() { |
|||
if (kafkaProducer != null) { |
|||
log.info("----------close producer----------"); |
|||
kafkaProducer.close(); |
|||
kafkaProducer = null; |
|||
} |
|||
} |
|||
|
|||
public static void sendKafka(String resultData) { |
|||
KafkaProducer<String, String> producer = getProducer(); |
|||
ProducerRecord<String, String> se = new ProducerRecord<String, String>(topic, resultData); |
|||
producer.send(se); |
|||
log.info("发送kafka成功"); |
|||
// num++; |
|||
} |
|||
} |
@ -0,0 +1,92 @@ |
|||
package com.bfd.crawl_translate.utils; |
|||
|
|||
import com.alibaba.fastjson.JSON; |
|||
import com.alibaba.fastjson.JSONObject; |
|||
import lombok.extern.slf4j.Slf4j; |
|||
import org.apache.curator.framework.CuratorFramework; |
|||
import org.apache.curator.framework.CuratorFrameworkFactory; |
|||
import org.apache.curator.framework.recipes.cache.NodeCache; |
|||
import org.apache.curator.framework.recipes.cache.NodeCacheListener; |
|||
import org.apache.curator.retry.ExponentialBackoffRetry; |
|||
import org.springframework.beans.factory.annotation.Autowired; |
|||
import org.springframework.beans.factory.annotation.Value; |
|||
import org.springframework.data.redis.core.StringRedisTemplate; |
|||
import org.springframework.stereotype.Component; |
|||
|
|||
import javax.annotation.PostConstruct; |
|||
import javax.annotation.Resource; |
|||
import java.util.HashMap; |
|||
import java.util.Set; |
|||
import java.util.concurrent.ConcurrentHashMap; |
|||
|
|||
/** |
|||
* @author:jinming |
|||
* @className:ZookeeperNodeMonitor |
|||
* @version:1.0 |
|||
* @description: Zookeeper节点监听和Redis初始化工具类 |
|||
* @Date:2024/7/2 14:20 |
|||
*/ |
|||
@Component |
|||
@Slf4j |
|||
public class PauseTool { |
|||
|
|||
// 本地缓存 |
|||
public static final HashMap<String, String> CACHE = new HashMap<>(); |
|||
|
|||
/** |
|||
* 初始化Redis中的version_*键并加载到本地缓存 |
|||
*/ |
|||
public void initializeRedisCache(StringRedisTemplate stringRedisTemplate) { |
|||
try { |
|||
Set<String> keys = stringRedisTemplate.keys("version_*"); |
|||
if (keys != null) { |
|||
for (String key : keys) { |
|||
String value = stringRedisTemplate.opsForValue().get(key); |
|||
if (value != null) { |
|||
String sincesId = key.split("_")[1]; |
|||
CACHE.put(sincesId.concat("_").concat(value), value); |
|||
} |
|||
} |
|||
} |
|||
log.info("当前缓存version信息:{}", JSON.toJSON(CACHE)); |
|||
} catch (Exception e) { |
|||
log.error("Error initializing Redis cache", e); |
|||
} |
|||
} |
|||
|
|||
public void setupZookeeperListener(String connectionString, String nodePath) { |
|||
CuratorFramework curatorFramework = CuratorFrameworkFactory.newClient(connectionString, new ExponentialBackoffRetry(1000, 3)); |
|||
curatorFramework.start(); |
|||
try { |
|||
// 创建节点监听器 |
|||
NodeCache nodeCache = new NodeCache(curatorFramework, nodePath); |
|||
nodeCache.start(); |
|||
log.info("数据监听已启动"); |
|||
// 监听节点变化 |
|||
nodeCache.getListenable().addListener(new NodeCacheListener() { |
|||
@Override |
|||
public void nodeChanged() throws Exception { |
|||
byte[] data = nodeCache.getCurrentData().getData(); |
|||
try { |
|||
String nodeData = new String(data); |
|||
log.info("Node data changed: " + nodeData); |
|||
// 解析JSON数据 |
|||
JSONObject jsonObject = JSON.parseObject(nodeData); |
|||
int scenesId = jsonObject.getIntValue("scenes_id"); |
|||
int version = jsonObject.getIntValue("version"); |
|||
String newKey = scenesId + "_" + version; |
|||
// 移除CACHE中所有以scenesId开头的key |
|||
CACHE.keySet().removeIf(key -> key.startsWith(scenesId + "_")); |
|||
// 将新的key放入CACHE |
|||
CACHE.put(newKey, String.valueOf(version)); |
|||
log.info("当前缓存version信息:{}", JSON.toJSON(CACHE)); |
|||
} catch (Exception e) { |
|||
e.printStackTrace(); |
|||
} |
|||
} |
|||
}); |
|||
} catch (Exception e) { |
|||
log.error("Error setting up Zookeeper listener", e); |
|||
} |
|||
} |
|||
} |
@ -0,0 +1,166 @@ |
|||
package com.bfd.crawl_translate.utils; |
|||
|
|||
import com.alibaba.fastjson2.JSON; |
|||
import lombok.extern.slf4j.Slf4j; |
|||
import okhttp3.*; |
|||
import org.springframework.scheduling.annotation.Scheduled; |
|||
import org.springframework.stereotype.Component; |
|||
|
|||
import java.io.IOException; |
|||
import java.net.ConnectException; |
|||
import java.net.URLDecoder; |
|||
import java.net.URLEncoder; |
|||
import java.util.HashMap; |
|||
import java.util.Map; |
|||
import java.util.concurrent.TimeUnit; |
|||
|
|||
/** |
|||
* @author guowei |
|||
*/ |
|||
@Component |
|||
@Slf4j |
|||
public class PercentTransalteUtil { |
|||
public static String token; |
|||
|
|||
// @Scheduled(cron = "${crawl.cron.token_cron}") |
|||
public static String getToken() { |
|||
OkHttpClient client = new OkHttpClient().newBuilder() |
|||
.build(); |
|||
MediaType mediaType = MediaType.parse("application/x-www-form-urlencoded"); |
|||
//isRememberMe为1表示获取长效token(7天内有效),不传或非1获取的token为12小时有效 |
|||
RequestBody body = RequestBody.create(mediaType, "username=collectionSystem&password=67385d64fab46ab2a25f2ff2898e590d&grant_type=password"); |
|||
Request request = new Request.Builder() |
|||
.url("https://fanyi.percent.cn/api/SystemManager/oauth/token") |
|||
.method("POST", body) |
|||
.addHeader("Content-Type", "application/x-www-form-urlencoded") |
|||
.build(); |
|||
try { |
|||
Response response = client.newCall(request).execute(); |
|||
Map json = (Map) JSON.parse(response.body().string()); |
|||
if ("200".equals(json.get("code"))) { |
|||
log.info("获取token成功,当前时间:"+System.currentTimeMillis()); |
|||
token = (String) json.get("access_token"); |
|||
} else { |
|||
log.error("获取token失败,json:" + json); |
|||
} |
|||
} catch (IOException e) { |
|||
throw new RuntimeException(e); |
|||
} |
|||
return ""; |
|||
} |
|||
|
|||
public static Map getText(String text) { |
|||
String result = ""; |
|||
Map resultMap = new HashMap<>(); |
|||
try { |
|||
String encode = URLEncoder.encode(text, "UTF-8"); |
|||
OkHttpClient client = new OkHttpClient().newBuilder() |
|||
.connectTimeout(300, TimeUnit.SECONDS) |
|||
.readTimeout(60, TimeUnit.SECONDS) |
|||
.writeTimeout(30, TimeUnit.SECONDS) |
|||
.build(); |
|||
MediaType mediaType = MediaType.parse("application/json"); |
|||
RequestBody body = RequestBody.create(mediaType, "{\"fromLanguage\":\"auto\",\"toLanguage\":\"en\",\"text\":\"" + encode + "\"}"); |
|||
Request request = new Request.Builder() |
|||
.url("https://fanyi.percent.cn/api/dt/tran/text") |
|||
.method("POST", body) |
|||
.addHeader("Authorization", "Bearer " + token) |
|||
.addHeader("Content-Type", "application/json") |
|||
.build(); |
|||
Response response = null; |
|||
for (int i = 0; i < 10; i++) { |
|||
try { |
|||
response = client.newCall(request).execute(); |
|||
if (response.isSuccessful()) { |
|||
break; |
|||
} |
|||
}catch (ConnectException connectException){ |
|||
connectException.printStackTrace(); |
|||
} |
|||
} |
|||
Map json = (Map) JSON.parse(response.body().string()); |
|||
if ((int) json.get("code") == 200) { |
|||
Map data = (Map) json.get("data"); |
|||
result = URLDecoder.decode((String) data.get("value"), "UTF-8"); |
|||
resultMap.put("isSuccess", true); |
|||
resultMap.put("result", result); |
|||
} else { |
|||
log.error("文本翻译失败,json:" + json); |
|||
resultMap.put("isSuccess", false); |
|||
resultMap.put("result", result); |
|||
} |
|||
|
|||
} catch (Throwable e) { |
|||
// e. |
|||
log.error("实时文本接口翻译失败,text:"+text+",e:"+e); |
|||
} |
|||
return resultMap; |
|||
} |
|||
|
|||
/** |
|||
* 获取语种 |
|||
* @param text |
|||
* @return |
|||
*/ |
|||
public static Boolean getLanguage(String text) { |
|||
String result = ""; |
|||
Boolean isZh = false; |
|||
try { |
|||
// String encode = URLEncoder.encode(text, "UTF-8"); |
|||
OkHttpClient client = new OkHttpClient().newBuilder() |
|||
.connectTimeout(300, TimeUnit.SECONDS) |
|||
.readTimeout(60, TimeUnit.SECONDS) |
|||
.writeTimeout(30, TimeUnit.SECONDS) |
|||
.build(); |
|||
MediaType mediaType = MediaType.parse("application/json"); |
|||
RequestBody body = RequestBody.create(mediaType, "{\"text\":\"" + text + "\"}"); |
|||
Request request = new Request.Builder() |
|||
.url("https://fanyi.percent.cn/api/dt/api/language/distinguish") |
|||
.method("POST", body) |
|||
.addHeader("Authorization", "Bearer " + Config.access_token) |
|||
.addHeader("Content-Type", "application/json") |
|||
//.addHeader("Cookie", "dt_saas_token=d3c60514c65285836dc316b6d48d468e; JSESSIONID=2726ac68-ad92-470f-b1d5-c1c0255f1cb9") |
|||
.build(); |
|||
Response response = null; |
|||
for (int i = 0; i < 10; i++) { |
|||
// System.out.println(i); |
|||
try { |
|||
response = client.newCall(request).execute(); |
|||
if (response.isSuccessful()) { |
|||
break; |
|||
} |
|||
}catch (ConnectException connectException){ |
|||
connectException.printStackTrace(); |
|||
// continue; |
|||
} |
|||
} |
|||
Map json = (Map) JSON.parse(response.body().string()); |
|||
if ((int) json.get("code") == 200) { |
|||
Map data = (Map) json.get("data"); |
|||
// result = URLDecoder.decode((String) data.get("value"), "UTF-8"); |
|||
String language = (String) data.get("language"); |
|||
if (language.equals("zh")){ |
|||
isZh = true; |
|||
}else { |
|||
isZh = false; |
|||
} |
|||
} else { |
|||
log.error("语种检测失败,json:" + json); |
|||
|
|||
} |
|||
// System.out.println(response.code()); |
|||
// System.out.println(response.body().string()); |
|||
} catch (Throwable e) { |
|||
// e. |
|||
log.error("语种检测失败,text:"+text+",e:"+e); |
|||
} |
|||
return isZh; |
|||
} |
|||
|
|||
public static void main(String[] args) { |
|||
getToken(); |
|||
Map data = getText("你好"); |
|||
System.out.println(data.get("result")); |
|||
|
|||
} |
|||
} |
@ -0,0 +1,165 @@ |
|||
package com.bfd.crawl_translate.utils; |
|||
|
|||
import com.alibaba.fastjson.JSONObject; |
|||
import lombok.extern.slf4j.Slf4j; |
|||
import okhttp3.*; |
|||
import org.springframework.stereotype.Component; |
|||
|
|||
import java.io.IOException; |
|||
import java.net.InetSocketAddress; |
|||
import java.net.Proxy; |
|||
import java.net.URLEncoder; |
|||
import java.util.Random; |
|||
import java.util.concurrent.TimeUnit; |
|||
|
|||
/** |
|||
* @author guowei |
|||
*/ |
|||
@Slf4j |
|||
@Component |
|||
public class TranslateUtil { |
|||
/** |
|||
* 模拟请求翻译 |
|||
* @param sourceContent 原始内容 |
|||
* @return 翻译后内容 |
|||
*/ |
|||
public static String doDown(String sourceContent) { |
|||
// System.out.println("sourceContent --"+sourceContent); |
|||
MediaType mediaType = MediaType.parse("application/x-www-form-urlencoded;charset=UTF-8"); |
|||
String freq = "[[[\"MkEWBc\",\"[[\\\"#sourceContent#\\\",\\\"auto\\\",\\\"zh\\\",true],[null]]\",null,\"generic\"]]]&"; |
|||
sourceContent = sourceContent.replace("\"", "\\\\\\\"").replace("\n", "%5C%5Cn"); |
|||
freq = freq.replace("#sourceContent#", sourceContent); |
|||
String resData = null; |
|||
OkHttpClient client = new OkHttpClient().newBuilder().build(); |
|||
try { |
|||
freq = URLEncoder.encode(freq, "UTF-8"); |
|||
freq = freq.replace("%255C", "%5C").replace("+", "%20"); |
|||
RequestBody body = RequestBody.create(mediaType, "f.req=" + freq + "&"); |
|||
Request request = new Request.Builder() |
|||
.url("https://translate.google.com/_/TranslateWebserverUi/data/batchexecute") |
|||
.method("POST", body) |
|||
.addHeader("authority", "translate.google.cn") |
|||
.addHeader("accept", "*/*") |
|||
.addHeader("accept-language", "zh-CN,zh;q=0.9,en;q=0.8") |
|||
.addHeader("cache-control", "no-cache") |
|||
.addHeader("content-type", "application/x-www-form-urlencoded;charset=UTF-8") |
|||
.addHeader("cookie", "NID=511=voX-g_h3pvWIPMGq5T4ZaWq5jd6vRlZxBa6wqNTkEkdarBpallKXRxEOvfJu5TLDfbUxJXopAExNiqHJEW1wZU0MuvTIRmkVAAwBknQKHO_gu_xjtuXA00a56i8JL7RWSharKyQ5Ihoq0B-x21AANraC1Fhs9Q6q9eaSKZ3SwRw; _ga=GA1.3.1552263839.1660025840; _gid=GA1.3.1708261691.1660025840; OTZ=6628698_24_24__24_") |
|||
.addHeader("origin", "https://www..google.com") |
|||
.addHeader("pragma", "no-cache") |
|||
.addHeader("referer", "https://www.google.com/") |
|||
.addHeader("sec-ch-ua", "\"Chromium\";v=\"104\", \" Not A;Brand\";v=\"99\", \"Google Chrome\";v=\"104\"") |
|||
.addHeader("sec-ch-ua-arch", "\"x86\"") |
|||
.addHeader("sec-ch-ua-bitness", "\"64\"") |
|||
.addHeader("sec-ch-ua-full-version", "\"105.0.5195.127\"") |
|||
.addHeader("sec-ch-ua-full-version-list", "\".Not/A)Brand\";v=\"99.0.0.0\", \"Google Chrome\";v=\"105.0.5195.127\", \"Chromium\";v=\"105.0.5195.127\"") |
|||
.addHeader("sec-ch-ua-mobile", "?0") |
|||
.addHeader("sec-ch-ua-model", "") |
|||
.addHeader("sec-ch-ua-platform", "\"Windows\"") |
|||
.addHeader("sec-ch-ua-platform-version", "\"14.0.0\"") |
|||
.addHeader("sec-fetch-dest", "empty") |
|||
.addHeader("sec-fetch-mode", "cors") |
|||
.addHeader("sec-fetch-site", "same-origin") |
|||
.addHeader("user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36") |
|||
.addHeader("x-goog-batchexecute-bgr", "[\";QF64XhPQAAYEzRk0JM1fOJOg-gftA3smACkAIwj8RvJeTIev0D-SyoRKJ6uoX00-HGvJj52RYTiUM4uTOmtSjeDe8R8AAABETwAAAAN1AQcXADDPHkARfFVQcldCsJmHArK64X7Kwazlq5rjQUagjtthTLDL8Kzwcpyq2ZSnfkW-TgeEAkCdc1ZvDogdYwYPMOz1Wlm8kLxPrcyeDMlDZPwN-mxBRJ37LOpgblKfQbr4Uol0HOHebHB1MFSkLsj1GXbaAleVvEXINfpVoeY3ffPsALBLISUQo2-U9qxDn37JXqCrusVYhqzob2M7PEIb92UC8FT9NnDamVfEqaEjSFsmWH3-nfWCh4w65mczyn7qIhrktTvjIpNa7f19cW5hiPwrolcvqU7jjhELE2u-pJTtdmyuWW505lNZkFB3JDWIWbnF5fwlXW4h956fijzKow8rQ6VghrCdPmk5HKsDBvKKczaMM-j--yXw9UbLDtjwU3M_W_MMa3pxImT1161eWEmny3itLXPrN7F0ooYXFn8NMIcZ9124sRpN0JGQwOToAzJw_A5NxHNHHthCqsuZcmS1QWoDc52J7iVptnljhJj8wvin5DYCopcIraaZZdzKsyPL_cApUctaEC1mr5B6Bi5AVSNvkfyoZCx9tPdqAiDLjK0a0sx_vZYK5yARycrTsycOzN1aA9wCYvTdvtruv1f8JQwG6N9SsOAi4Ko0yHb8Uo9KhY6bO6sqcYqNOkWG8Zhl2-nvC6u2vLHofilKy8CAdzJQq5VlnrwhPPKhZupo1I_3XBYOxHYgy3Ly7fDaVhHUWam2H6xU6m4gaUbaTyx5MtD9mJqAOQRjrzeO-kUBshbAlCAEqpBd7IJ089Ph-RrST0NsRoUL7wuC5cxIs3jNcbBf3yietmn3vnDCWzRYI1gh7CtYSQ2Xk_kNLR41BjsoOK0\",null,null,380,7,null,null,0,\"2\"]") |
|||
.addHeader("x-same-domain", "1") |
|||
.build(); |
|||
int i = 0; |
|||
while (i < 3) { |
|||
i++; |
|||
Response response = null; |
|||
try { |
|||
//requestPost 使用代理方式 |
|||
resData = requestPost(request); |
|||
//使用本机ip |
|||
// response = client.newCall(request).execute(); |
|||
// if (response.isSuccessful() && response.body() != null) { |
|||
// resData = response.body().string(); |
|||
// } |
|||
// resData = response.body().string(); |
|||
// System.out.println(resData); |
|||
} catch (Exception e) { |
|||
log.warn("Download Fail retry:{}. errorMsg:", i, e); |
|||
} finally { |
|||
// response.close(); |
|||
} |
|||
} |
|||
} catch (Exception e) { |
|||
log.error("Download Fail sourceContent:"+sourceContent+",e:"+e); |
|||
} |
|||
String parse = doParse(resData); |
|||
return parse; |
|||
} |
|||
|
|||
|
|||
|
|||
/** |
|||
* 解析请求内容 |
|||
* @param resData |
|||
* @return |
|||
*/ |
|||
public static String doParse(String resData){ |
|||
String resTranslate = ""; |
|||
try { |
|||
resData = resData.replaceAll("\\)\\]}'",""); |
|||
resData = "{\"a\":"+resData+"}"; |
|||
JSONObject jsonObject = JSONObject.parseObject(resData); |
|||
String a1 = jsonObject.getJSONArray("a").getJSONArray(0).get(2).toString(); |
|||
String a2 = "{\"a\":"+a1+"}"; |
|||
JSONObject objects = JSONObject.parseObject(a2); |
|||
int size = objects.getJSONArray("a").getJSONArray(1).getJSONArray(0).getJSONArray(0).getJSONArray(5).size(); |
|||
for (int i=0;i<=size-1;i++){ |
|||
String string = objects.getJSONArray("a").getJSONArray(1).getJSONArray(0).getJSONArray(0).getJSONArray(5).getJSONArray(i).get(0).toString(); |
|||
if (!string.isEmpty() && !string.equals("null") ) { |
|||
resTranslate += objects.getJSONArray("a").getJSONArray(1).getJSONArray(0).getJSONArray(0).getJSONArray(5).getJSONArray(i).get(0).toString()+" "; |
|||
} |
|||
} |
|||
} catch (Exception e) { |
|||
e.printStackTrace(); |
|||
log.error("Parse fail,e:"+e); |
|||
} |
|||
return resTranslate; |
|||
} |
|||
|
|||
public static String requestPost(Request request) throws Exception { |
|||
|
|||
OkHttpClient httpClient = Proxy(); |
|||
String resData = null; |
|||
Random random = new Random(); |
|||
int m = (int) (20 * (random.nextFloat() + 1)); |
|||
Thread.sleep(m); |
|||
Response response = httpClient.newCall(request).execute(); |
|||
if (response.isSuccessful() && response.body() != null) { |
|||
resData = response.body().string(); |
|||
} |
|||
response.close(); |
|||
return resData; |
|||
} |
|||
public static OkHttpClient Proxy() { |
|||
Proxy proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress("oversea_vpn.baifendian.com", 3128)); |
|||
Authenticator proxyAuthenticator = new Authenticator() { |
|||
|
|||
@Override |
|||
public Request authenticate(Route route, Response response) throws IOException { |
|||
//设置代理服务器账号密码 |
|||
String credential = Credentials.basic("", ""); |
|||
return response.request().newBuilder() |
|||
.header("Proxy-Authorization", credential) |
|||
.build(); |
|||
} |
|||
}; |
|||
OkHttpClient httpClientProxy = new OkHttpClient.Builder() |
|||
// //1000是1秒 |
|||
.connectTimeout(200, TimeUnit.SECONDS) |
|||
//设置读取超时时间 |
|||
.readTimeout(30, TimeUnit.SECONDS) |
|||
.proxy(proxy) |
|||
.proxyAuthenticator(proxyAuthenticator) |
|||
.build(); |
|||
return httpClientProxy; |
|||
} |
|||
|
|||
public static void main(String[] args) { |
|||
String doDown = doDown("British authorities withhold Chelsea proceeds from Roman Abramovich"); |
|||
System.out.println(doDown); |
|||
} |
|||
} |
@ -0,0 +1,59 @@ |
|||
crawl: |
|||
es: |
|||
subjectId: cl_special_1.0_305041 |
|||
cid: all |
|||
beginTime: 2000-01-01 |
|||
endTime: 2025-01-01 |
|||
cron: |
|||
token_cron: 0 0 0/10 * * ? |
|||
query_cron: 0 26 10 * * ? |
|||
size_cron: 0 0/30 * * * * |
|||
kafka: |
|||
topic: analyze |
|||
brokers: 172.26.28.30:9092 |
|||
task: |
|||
taskData: ./data/taskData.txt |
|||
server: |
|||
port: 9999 |
|||
#日志级别 |
|||
logging: |
|||
level: |
|||
com: |
|||
bfd: INFO |
|||
#日志路径 |
|||
log: |
|||
path: ./logs |
|||
spring: |
|||
redis: |
|||
host: 172.24.12.126 |
|||
port: 6379 |
|||
timeout: 10000 |
|||
database: 5 |
|||
jedis: |
|||
pool: |
|||
max-active: 8 # 连接池最大连接数(使用负值表示没有限制) |
|||
max-wait: 800 # 连接池最大阻塞等待时间(使用负值表示没有限制) |
|||
max-idle: 8 # 连接池中的最大空闲连接 |
|||
min-idle: 2 # 连接池中的最小空闲连接 |
|||
boot: |
|||
admin: |
|||
client: |
|||
url: http://172.18.1.147:8001 |
|||
instance: |
|||
service-base-url: http://172.18.1.147:9999 |
|||
application: |
|||
name: translate |
|||
management: |
|||
endpoints: |
|||
web: |
|||
exposure: |
|||
include: "*" |
|||
endpoint: |
|||
health: |
|||
show-details: always |
|||
health: |
|||
elasticsearch: |
|||
enabled: false |
|||
zookeeper: |
|||
connection-string: 172.18.1.146:2181,172.18.1.147:2181,172.18.1.148:2181 |
|||
publish-node: /analyze |
@ -0,0 +1,38 @@ |
|||
<configuration> |
|||
<!-- 属性文件:在properties文件中找到对应的配置项 --> |
|||
<springProperty scope="context" name="logging.path" source="logging.log.path"/> |
|||
<springProperty scope="context" name="logging.level" source="logging.level.com.bfd"/> |
|||
<!-- 默认的控制台日志输出,一般生产环境都是后台启动,这个没太大作用 --> |
|||
<!-- <appender name="STDOUT" |
|||
class="ch.qos.logback.core.ConsoleAppender"> |
|||
<encoder class="ch.qos.logback.classic.encoder.PatternLayoutEncoder"> |
|||
<Pattern>%d{HH:mm:ss.SSS} %-5level %logger{80} - %msg%n</Pattern> |
|||
</encoder> |
|||
</appender> --> |
|||
|
|||
<appender name="GLMAPPER-LOGGERONE" |
|||
class="ch.qos.logback.core.rolling.RollingFileAppender"> |
|||
<append>true</append> |
|||
<filter class="ch.qos.logback.classic.filter.ThresholdFilter"> |
|||
<level>${logging.level}</level> |
|||
</filter> |
|||
<file> |
|||
${logging.path}/crawlSchedule.log |
|||
<!-- ${logging.path}/sendKafka.log --> |
|||
</file> |
|||
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy"> |
|||
<FileNamePattern>${logging.path}/crawlSchedule.log.%d{yyyy-MM-dd}</FileNamePattern> |
|||
<!-- <FileNamePattern>${logging.path}/sendKafka.log.%d{yyyy-MM-dd}</FileNamePattern> --> |
|||
<MaxHistory>7</MaxHistory> |
|||
</rollingPolicy> |
|||
<encoder class="ch.qos.logback.classic.encoder.PatternLayoutEncoder"> |
|||
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %line %-5level %logger{50} - %msg%n</pattern> |
|||
<charset>UTF-8</charset> |
|||
</encoder> |
|||
</appender> |
|||
|
|||
<root level="info"> |
|||
<appender-ref ref="GLMAPPER-LOGGERONE"/> |
|||
<!-- <appender-ref ref="STDOUT"/> --> |
|||
</root> |
|||
</configuration> |
Write
Preview
Loading…
Cancel
Save
Reference in new issue