commit
5bed73d153
25 changed files with 7332 additions and 0 deletions
-
33.gitignore
-
1README.md
-
37logs/formHandler.log
-
5627logs/ocrHandler.log
-
143pom.xml
-
13src/main/java/com/bfd/crawl/formhandler/FormHanlerApplication.java
-
60src/main/java/com/bfd/crawl/formhandler/bean/ResponsePo.java
-
48src/main/java/com/bfd/crawl/formhandler/config/AsyncThreadConfiguration.java
-
27src/main/java/com/bfd/crawl/formhandler/config/Constant.java
-
52src/main/java/com/bfd/crawl/formhandler/controller/ExcelHandlerController.java
-
32src/main/java/com/bfd/crawl/formhandler/enums/ResponseCode.java
-
243src/main/java/com/bfd/crawl/formhandler/service/HandlerService.java
-
54src/main/java/com/bfd/crawl/formhandler/service/SendService.java
-
75src/main/java/com/bfd/crawl/formhandler/service/StartServcie.java
-
59src/main/java/com/bfd/crawl/formhandler/util/CsvUtil.java
-
60src/main/java/com/bfd/crawl/formhandler/util/DataUtil.java
-
318src/main/java/com/bfd/crawl/formhandler/util/ExcelUtils.java
-
117src/main/java/com/bfd/crawl/formhandler/util/FileDownloader.java
-
42src/main/java/com/bfd/crawl/formhandler/util/FileUtil.java
-
59src/main/java/com/bfd/crawl/formhandler/util/OcrUtil.java
-
23src/main/java/com/bfd/crawl/formhandler/util/OsUtil.java
-
19src/main/java/com/bfd/crawl/formhandler/util/QueueUtil.java
-
94src/main/java/com/bfd/crawl/formhandler/util/StringUtil.java
-
60src/main/resources/application.yml
-
36src/main/resources/logback-spring.xml
@ -0,0 +1,33 @@ |
|||||
|
HELP.md |
||||
|
target/ |
||||
|
!.mvn/wrapper/maven-wrapper.jar |
||||
|
!**/src/main/**/target/ |
||||
|
!**/src/test/**/target/ |
||||
|
|
||||
|
### STS ### |
||||
|
.apt_generated |
||||
|
.classpath |
||||
|
.factorypath |
||||
|
.project |
||||
|
.settings |
||||
|
.springBeans |
||||
|
.sts4-cache |
||||
|
|
||||
|
### IntelliJ IDEA ### |
||||
|
.idea |
||||
|
*.iws |
||||
|
*.iml |
||||
|
*.ipr |
||||
|
|
||||
|
### NetBeans ### |
||||
|
/nbproject/private/ |
||||
|
/nbbuild/ |
||||
|
/dist/ |
||||
|
/nbdist/ |
||||
|
/.nb-gradle/ |
||||
|
build/ |
||||
|
!**/src/main/**/build/ |
||||
|
!**/src/test/**/build/ |
||||
|
|
||||
|
### VS Code ### |
||||
|
.vscode/ |
@ -0,0 +1 @@ |
|||||
|
excel解析应用 |
@ -0,0 +1,37 @@ |
|||||
|
2024-03-25 16:03:08.049 [restartedMain] 55 INFO com.bfd.crawl.formhandler.FormHanlerApplication - Starting FormHanlerApplication on JinMing with PID 18732 (D:\git\formHanler\target\classes started by ming.jin in D:\git\formHanler) |
||||
|
2024-03-25 16:03:08.051 [restartedMain] 651 INFO com.bfd.crawl.formhandler.FormHanlerApplication - No active profile set, falling back to default profiles: default |
||||
|
2024-03-25 16:03:08.100 [restartedMain] 225 INFO o.s.b.d.env.DevToolsPropertyDefaultsPostProcessor - Devtools property defaults active! Set 'spring.devtools.add-properties' to 'false' to disable |
||||
|
2024-03-25 16:03:08.100 [restartedMain] 225 INFO o.s.b.d.env.DevToolsPropertyDefaultsPostProcessor - For additional web related logging consider setting the 'logging.level.web' property to 'DEBUG' |
||||
|
2024-03-25 16:03:08.680 [restartedMain] 92 INFO o.s.boot.web.embedded.tomcat.TomcatWebServer - Tomcat initialized with port(s): 7081 (http) |
||||
|
2024-03-25 16:03:08.685 [restartedMain] 173 INFO org.apache.coyote.http11.Http11NioProtocol - Initializing ProtocolHandler ["http-nio-7081"] |
||||
|
2024-03-25 16:03:08.685 [restartedMain] 173 INFO org.apache.catalina.core.StandardService - Starting service [Tomcat] |
||||
|
2024-03-25 16:03:08.685 [restartedMain] 173 INFO org.apache.catalina.core.StandardEngine - Starting Servlet engine: [Apache Tomcat/9.0.30] |
||||
|
2024-03-25 16:03:08.729 [restartedMain] 173 INFO o.a.c.core.ContainerBase.[Tomcat].[localhost].[/] - Initializing Spring embedded WebApplicationContext |
||||
|
2024-03-25 16:03:08.729 [restartedMain] 284 INFO org.springframework.web.context.ContextLoader - Root WebApplicationContext: initialization completed in 629 ms |
||||
|
2024-03-25 16:03:08.873 [restartedMain] 171 INFO o.s.scheduling.concurrent.ThreadPoolTaskExecutor - Initializing ExecutorService |
||||
|
2024-03-25 16:03:08.874 [restartedMain] 171 INFO o.s.scheduling.concurrent.ThreadPoolTaskExecutor - Initializing ExecutorService 'asyncExecutor' |
||||
|
2024-03-25 16:03:08.875 [restartedMain] 171 INFO o.s.scheduling.concurrent.ThreadPoolTaskExecutor - Initializing ExecutorService |
||||
|
2024-03-25 16:03:08.875 [restartedMain] 171 INFO o.s.scheduling.concurrent.ThreadPoolTaskExecutor - Initializing ExecutorService 'sendExecutor' |
||||
|
2024-03-25 16:03:10.682 [restartedMain] 171 INFO o.s.scheduling.concurrent.ThreadPoolTaskScheduler - Initializing ExecutorService |
||||
|
2024-03-25 16:03:10.727 [restartedMain] 58 INFO o.s.b.d.autoconfigure.OptionalLiveReloadServer - LiveReload server is running on port 35729 |
||||
|
2024-03-25 16:03:10.729 [restartedMain] 58 INFO o.s.b.actuate.endpoint.web.EndpointLinksResolver - Exposing 14 endpoint(s) beneath base path '/actuator' |
||||
|
2024-03-25 16:03:10.754 [restartedMain] 173 INFO org.apache.coyote.http11.Http11NioProtocol - Starting ProtocolHandler ["http-nio-7081"] |
||||
|
2024-03-25 16:03:10.768 [restartedMain] 204 INFO o.s.boot.web.embedded.tomcat.TomcatWebServer - Tomcat started on port(s): 7081 (http) with context path '' |
||||
|
2024-03-25 16:03:10.770 [restartedMain] 61 INFO com.bfd.crawl.formhandler.FormHanlerApplication - Started FormHanlerApplication in 2.951 seconds (JVM running for 5.365) |
||||
|
2024-03-25 16:03:10.772 [restartedMain] 35 INFO com.bfd.crawl.formhandler.service.StartServcie - 处理服务线程0已启动 |
||||
|
2024-03-25 16:03:10.774 [restartedMain] 39 INFO com.bfd.crawl.formhandler.service.StartServcie - 发送服务线程0已启动 |
||||
|
2024-03-25 16:03:10.775 [handlerData-1] 133 INFO com.bfd.crawl.formhandler.service.HandlerService - 任务队列为空,休眠3秒 |
||||
|
2024-03-25 16:03:10.775 [sendData-1] 45 INFO com.bfd.crawl.formhandler.service.SendService - 任务队列为空,休眠3秒 |
||||
|
2024-03-25 16:03:10.775 [Thread-12] 46 INFO com.bfd.crawl.formhandler.service.StartServcie - 任务队列长度为0 |
||||
|
2024-03-25 16:03:10.775 [Thread-12] 47 INFO com.bfd.crawl.formhandler.service.StartServcie - 发送队列长度为0 |
||||
|
2024-03-25 16:03:11.012 [registrationTask1] 84 INFO d.c.b.a.client.registration.ApplicationRegistrator - Application registered itself as b40b416ce444 |
||||
|
2024-03-25 16:03:13.785 [sendData-1] 45 INFO com.bfd.crawl.formhandler.service.SendService - 任务队列为空,休眠3秒 |
||||
|
2024-03-25 16:03:13.785 [handlerData-1] 133 INFO com.bfd.crawl.formhandler.service.HandlerService - 任务队列为空,休眠3秒 |
||||
|
2024-03-25 16:03:13.897 [RMI TCP Connection(3)-10.10.144.49] 173 INFO o.a.c.core.ContainerBase.[Tomcat].[localhost].[/] - Initializing Spring DispatcherServlet 'dispatcherServlet' |
||||
|
2024-03-25 16:03:13.897 [RMI TCP Connection(3)-10.10.144.49] 525 INFO org.springframework.web.servlet.DispatcherServlet - Initializing Servlet 'dispatcherServlet' |
||||
|
2024-03-25 16:03:13.900 [RMI TCP Connection(3)-10.10.144.49] 547 INFO org.springframework.web.servlet.DispatcherServlet - Completed initialization in 3 ms |
||||
|
2024-03-25 16:03:15.244 [SpringContextShutdownHook] 208 INFO o.s.scheduling.concurrent.ThreadPoolTaskScheduler - Shutting down ExecutorService |
||||
|
2024-03-25 16:03:15.245 [SpringContextShutdownHook] 208 INFO o.s.scheduling.concurrent.ThreadPoolTaskExecutor - Shutting down ExecutorService 'sendExecutor' |
||||
|
2024-03-25 16:03:15.245 [SpringContextShutdownHook] 208 INFO o.s.scheduling.concurrent.ThreadPoolTaskExecutor - Shutting down ExecutorService 'asyncExecutor' |
||||
|
2024-03-25 16:03:16.788 [handlerData-1] 133 INFO com.bfd.crawl.formhandler.service.HandlerService - 任务队列为空,休眠3秒 |
||||
|
2024-03-25 16:03:16.788 [sendData-1] 45 INFO com.bfd.crawl.formhandler.service.SendService - 任务队列为空,休眠3秒 |
5627
logs/ocrHandler.log
File diff suppressed because it is too large
View File
File diff suppressed because it is too large
View File
@ -0,0 +1,143 @@ |
|||||
|
<?xml version="1.0" encoding="UTF-8"?> |
||||
|
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" |
||||
|
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd"> |
||||
|
<modelVersion>4.0.0</modelVersion> |
||||
|
<parent> |
||||
|
<groupId>org.springframework.boot</groupId> |
||||
|
<artifactId>spring-boot-starter-parent</artifactId> |
||||
|
<version>2.2.4.RELEASE</version> |
||||
|
<relativePath/> <!-- lookup parent from repository --> |
||||
|
</parent> |
||||
|
<groupId>com.bfd.crawl</groupId> |
||||
|
<artifactId>formHanler</artifactId> |
||||
|
<version>0.0.1-SNAPSHOT</version> |
||||
|
<name>formHanler</name> |
||||
|
<description>formHanler</description> |
||||
|
<properties> |
||||
|
<java.version>8</java.version> |
||||
|
</properties> |
||||
|
|
||||
|
|
||||
|
<dependencies> |
||||
|
<!-- https://mvnrepository.com/artifact/de.codecentric/spring-boot-admin-starter-client --> |
||||
|
<dependency> |
||||
|
<groupId>de.codecentric</groupId> |
||||
|
<artifactId>spring-boot-admin-client</artifactId> |
||||
|
<version>2.2.4</version> |
||||
|
</dependency> |
||||
|
<dependency> |
||||
|
<groupId>org.apache.commons</groupId> |
||||
|
<artifactId>commons-csv</artifactId> |
||||
|
<version>1.10.0</version> |
||||
|
</dependency> |
||||
|
<dependency> |
||||
|
<groupId>org.springframework.kafka</groupId> |
||||
|
<artifactId>spring-kafka</artifactId> |
||||
|
</dependency> |
||||
|
<dependency> |
||||
|
<groupId>org.springframework.boot</groupId> |
||||
|
<artifactId>spring-boot-starter</artifactId> |
||||
|
</dependency> |
||||
|
<dependency> |
||||
|
<groupId>org.springframework.boot</groupId> |
||||
|
<artifactId>spring-boot-starter-web</artifactId> |
||||
|
</dependency> |
||||
|
<dependency> |
||||
|
<groupId>org.springframework.boot</groupId> |
||||
|
<artifactId>spring-boot-devtools</artifactId> |
||||
|
<scope>runtime</scope> |
||||
|
<optional>true</optional> |
||||
|
</dependency> |
||||
|
<dependency> |
||||
|
<groupId>org.projectlombok</groupId> |
||||
|
<artifactId>lombok</artifactId> |
||||
|
<optional>true</optional> |
||||
|
</dependency> |
||||
|
<dependency> |
||||
|
<groupId>org.springframework.boot</groupId> |
||||
|
<artifactId>spring-boot-starter-test</artifactId> |
||||
|
<scope>test</scope> |
||||
|
</dependency> |
||||
|
<dependency> |
||||
|
<groupId>org.apache.pdfbox</groupId> |
||||
|
<artifactId>pdfbox</artifactId> |
||||
|
<version>2.0.28</version> |
||||
|
</dependency> |
||||
|
<!-- Apache POI --> |
||||
|
<dependency> |
||||
|
<groupId>org.apache.poi</groupId> |
||||
|
<artifactId>poi-scratchpad</artifactId> |
||||
|
<version>5.2.5</version> |
||||
|
</dependency> |
||||
|
<dependency> |
||||
|
<groupId>org.apache.poi</groupId> |
||||
|
<artifactId>poi</artifactId> |
||||
|
<version>5.2.5</version> |
||||
|
<exclusions> |
||||
|
<exclusion> |
||||
|
<groupId>org.apache.logging.log4j</groupId> |
||||
|
<artifactId>log4j-api</artifactId> |
||||
|
</exclusion> |
||||
|
</exclusions> |
||||
|
</dependency> |
||||
|
<dependency> |
||||
|
<groupId>org.apache.logging.log4j</groupId> |
||||
|
<artifactId>log4j-api</artifactId> |
||||
|
<version>2.17.1</version> |
||||
|
</dependency> |
||||
|
<dependency> |
||||
|
<groupId>org.apache.poi</groupId> |
||||
|
<artifactId>poi-ooxml</artifactId> |
||||
|
<version>5.2.0</version> |
||||
|
</dependency> |
||||
|
<!--JSON--> |
||||
|
<dependency> |
||||
|
<groupId>com.alibaba</groupId> |
||||
|
<artifactId>fastjson</artifactId> |
||||
|
<version>2.0.17</version> |
||||
|
</dependency> |
||||
|
<!--OKHTTP--> |
||||
|
<dependency> |
||||
|
<groupId>com.squareup.okhttp3</groupId> |
||||
|
<artifactId>okhttp</artifactId> |
||||
|
<version>3.9.1</version> |
||||
|
</dependency> |
||||
|
<dependency> |
||||
|
<groupId>com.google.code.gson</groupId> |
||||
|
<artifactId>gson</artifactId> |
||||
|
<version>2.8.8</version> |
||||
|
</dependency> |
||||
|
<dependency> |
||||
|
<groupId>org.apache.kafka</groupId> |
||||
|
<artifactId>kafka-clients</artifactId> |
||||
|
<version>2.3.1</version> <!--根据您正在使用的Kafka版本选择合适的版本号--> |
||||
|
</dependency> |
||||
|
<dependency> |
||||
|
<groupId>org.springframework.kafka</groupId> |
||||
|
<artifactId>spring-kafka-test</artifactId> |
||||
|
<scope>test</scope> |
||||
|
</dependency> |
||||
|
<dependency> |
||||
|
<groupId>com.bfd.util</groupId> |
||||
|
<artifactId>pauseTool</artifactId> |
||||
|
<version>1.0</version> |
||||
|
</dependency> |
||||
|
</dependencies> |
||||
|
<build> |
||||
|
<plugins> |
||||
|
<plugin> |
||||
|
<groupId>org.springframework.boot</groupId> |
||||
|
<artifactId>spring-boot-maven-plugin</artifactId> |
||||
|
<configuration> |
||||
|
<excludes> |
||||
|
<exclude> |
||||
|
<groupId>org.projectlombok</groupId> |
||||
|
<artifactId>lombok</artifactId> |
||||
|
</exclude> |
||||
|
</excludes> |
||||
|
</configuration> |
||||
|
</plugin> |
||||
|
</plugins> |
||||
|
</build> |
||||
|
|
||||
|
</project> |
@ -0,0 +1,13 @@ |
|||||
|
package com.bfd.crawl.formhandler; |
||||
|
|
||||
|
import org.springframework.boot.SpringApplication; |
||||
|
import org.springframework.boot.autoconfigure.SpringBootApplication; |
||||
|
|
||||
|
@SpringBootApplication |
||||
|
public class FormHanlerApplication { |
||||
|
|
||||
|
public static void main(String[] args) { |
||||
|
SpringApplication.run(FormHanlerApplication.class, args); |
||||
|
} |
||||
|
|
||||
|
} |
@ -0,0 +1,60 @@ |
|||||
|
package com.bfd.crawl.formhandler.bean; |
||||
|
|
||||
|
|
||||
|
|
||||
|
import com.bfd.crawl.formhandler.enums.ResponseCode; |
||||
|
import lombok.AllArgsConstructor; |
||||
|
import lombok.Data; |
||||
|
import lombok.NoArgsConstructor; |
||||
|
|
||||
|
/** |
||||
|
* @author:jinming |
||||
|
* @className:ResponsePo |
||||
|
* @version:1.0 |
||||
|
* @description: |
||||
|
* @Date:2023/4/3 17:23 |
||||
|
*/ |
||||
|
@Data |
||||
|
@NoArgsConstructor |
||||
|
@AllArgsConstructor |
||||
|
public class ResponsePo { |
||||
|
/** |
||||
|
* 响应码 |
||||
|
*/ |
||||
|
private int code; |
||||
|
|
||||
|
/** |
||||
|
* 正常放 返回数据 的JSON串 |
||||
|
*/ |
||||
|
private Object data; |
||||
|
|
||||
|
/** |
||||
|
* 提示消息 |
||||
|
*/ |
||||
|
private String message; |
||||
|
|
||||
|
public static ResponsePo success() { |
||||
|
return setStatus(ResponseCode.SUCCESS.getCode(), ResponseCode.SUCCESS.getMessage()); |
||||
|
} |
||||
|
|
||||
|
public static ResponsePo error() { |
||||
|
return setStatus(ResponseCode.FAILURE.getCode(), ResponseCode.FAILURE.getMessage()); |
||||
|
} |
||||
|
|
||||
|
public static ResponsePo setStatus(int code, String message) { |
||||
|
ResponsePo resultBean = new ResponsePo(); |
||||
|
resultBean.code = code; |
||||
|
resultBean.message = message; |
||||
|
return resultBean; |
||||
|
} |
||||
|
public ResponsePo(int code, String message) { |
||||
|
this.code = code; |
||||
|
this.message = message; |
||||
|
this.data = data; |
||||
|
} |
||||
|
public ResponsePo(ResponseCode responseCode){ |
||||
|
this.code = responseCode.getCode(); |
||||
|
this.message = responseCode.getMessage(); |
||||
|
this.data = data; |
||||
|
} |
||||
|
} |
@ -0,0 +1,48 @@ |
|||||
|
package com.bfd.crawl.formhandler.config; |
||||
|
|
||||
|
|
||||
|
import org.springframework.context.annotation.Bean; |
||||
|
import org.springframework.context.annotation.Configuration; |
||||
|
import org.springframework.scheduling.annotation.EnableAsync; |
||||
|
import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor; |
||||
|
|
||||
|
import java.util.concurrent.Executor; |
||||
|
|
||||
|
/** |
||||
|
* @author jinming |
||||
|
* @version 1.0 |
||||
|
* @className AsyncThreadConfiguration |
||||
|
* @Date 2022/2/17 18:37 |
||||
|
*/ |
||||
|
@Configuration |
||||
|
@EnableAsync |
||||
|
public class AsyncThreadConfiguration { |
||||
|
@Bean |
||||
|
public Executor asyncExecutor() { |
||||
|
ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor(); |
||||
|
// 核心线程数 |
||||
|
executor.setCorePoolSize(500); |
||||
|
// 并发线程的数量限制为2 |
||||
|
executor.setMaxPoolSize(500); |
||||
|
// 线程队列 |
||||
|
executor.setQueueCapacity(500); |
||||
|
executor.setThreadNamePrefix("handlerData-"); |
||||
|
executor.initialize(); |
||||
|
executor.setWaitForTasksToCompleteOnShutdown(true); |
||||
|
return executor; |
||||
|
} |
||||
|
@Bean |
||||
|
public Executor sendExecutor() { |
||||
|
ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor(); |
||||
|
// 核心线程数 |
||||
|
executor.setCorePoolSize(500); |
||||
|
// 并发线程的数量限制为2 |
||||
|
executor.setMaxPoolSize(500); |
||||
|
// 线程队列 |
||||
|
executor.setQueueCapacity(500); |
||||
|
executor.setThreadNamePrefix("sendData-"); |
||||
|
executor.initialize(); |
||||
|
executor.setWaitForTasksToCompleteOnShutdown(true); |
||||
|
return executor; |
||||
|
} |
||||
|
} |
@ -0,0 +1,27 @@ |
|||||
|
package com.bfd.crawl.formhandler.config; |
||||
|
|
||||
|
/** |
||||
|
* @author:jinming |
||||
|
* @className:Constant |
||||
|
* @version:1.0 |
||||
|
* @description: |
||||
|
* @Date:2023/8/16 15:26 |
||||
|
*/ |
||||
|
public class Constant { |
||||
|
/** |
||||
|
* |
||||
|
*/ |
||||
|
public final static String IS_XLS = "xls"; |
||||
|
|
||||
|
/** |
||||
|
* |
||||
|
*/ |
||||
|
public final static String IS_CSV = "csv"; |
||||
|
|
||||
|
/** |
||||
|
* |
||||
|
*/ |
||||
|
public final static String ALL = "*"; |
||||
|
|
||||
|
|
||||
|
} |
@ -0,0 +1,52 @@ |
|||||
|
package com.bfd.crawl.formhandler.controller; |
||||
|
|
||||
|
|
||||
|
import com.alibaba.fastjson.JSON; |
||||
|
|
||||
|
|
||||
|
import com.bfd.crawl.formhandler.bean.ResponsePo; |
||||
|
import com.bfd.crawl.formhandler.enums.ResponseCode; |
||||
|
import com.bfd.crawl.formhandler.util.QueueUtil; |
||||
|
import lombok.extern.slf4j.Slf4j; |
||||
|
import org.springframework.web.bind.annotation.PostMapping; |
||||
|
import org.springframework.web.bind.annotation.RequestBody; |
||||
|
import org.springframework.web.bind.annotation.RequestMapping; |
||||
|
import org.springframework.web.bind.annotation.RestController; |
||||
|
|
||||
|
import java.util.Map; |
||||
|
|
||||
|
/** |
||||
|
* @author:jinming |
||||
|
* @className:DataFilterController |
||||
|
* @version:1.0 |
||||
|
* @description: |
||||
|
* @Date:2023/7/26 11:21 |
||||
|
*/ |
||||
|
@RestController |
||||
|
@RequestMapping("/handlerdata") |
||||
|
@Slf4j |
||||
|
public class ExcelHandlerController { |
||||
|
@PostMapping("/formhandler") |
||||
|
public ResponsePo documentFeedback(@RequestBody String dataJson) { |
||||
|
String trace = "trace"; |
||||
|
ResponsePo responsePo = ResponsePo.success(); |
||||
|
try { |
||||
|
Map parse = (Map) JSON.parse(dataJson); |
||||
|
log.info("新增任务:" + dataJson); |
||||
|
if (parse.containsKey(trace) && (Boolean) parse.get(trace) == true) { |
||||
|
log.info("测试流程,插入队首"); |
||||
|
QueueUtil.taskQueue.putFirst(dataJson); |
||||
|
}else { |
||||
|
QueueUtil.taskQueue.add(dataJson); |
||||
|
} |
||||
|
} catch (Exception e) { |
||||
|
e.printStackTrace(); |
||||
|
log.error("请求格式发生异常" + e.getMessage()); |
||||
|
responsePo.setCode(ResponseCode.FAILURE.getCode()); |
||||
|
responsePo.setMessage(ResponseCode.FAILURE.getMessage()); |
||||
|
return responsePo; |
||||
|
} |
||||
|
|
||||
|
return responsePo; |
||||
|
} |
||||
|
} |
@ -0,0 +1,32 @@ |
|||||
|
package com.bfd.crawl.formhandler.enums; |
||||
|
|
||||
|
/** |
||||
|
* @author:jinming |
||||
|
* @className:ResponseCodeEnum |
||||
|
* @version:1.0 |
||||
|
* @description:响应结果码枚举类 |
||||
|
* @Date:2023/2/28 11:40 |
||||
|
*/ |
||||
|
public enum ResponseCode { |
||||
|
//返回结果码枚举类 |
||||
|
SUCCESS(200, "操作成功"), |
||||
|
FAILURE(400, "参数错误"), |
||||
|
INTERNAL_SERVER_ERROR(500, "服务器内部错误"), |
||||
|
TYPE_NOT_SUPPORT(601,"文件类型不支持"); |
||||
|
|
||||
|
private int code; |
||||
|
private String message; |
||||
|
|
||||
|
ResponseCode(int code, String message) { |
||||
|
this.code = code; |
||||
|
this.message = message; |
||||
|
} |
||||
|
|
||||
|
public int getCode() { |
||||
|
return code; |
||||
|
} |
||||
|
|
||||
|
public String getMessage() { |
||||
|
return message; |
||||
|
} |
||||
|
} |
@ -0,0 +1,243 @@ |
|||||
|
package com.bfd.crawl.formhandler.service; |
||||
|
|
||||
|
|
||||
|
import com.alibaba.fastjson.JSON; |
||||
|
import com.bfd.crawl.formhandler.config.Constant; |
||||
|
import com.bfd.crawl.formhandler.util.*; |
||||
|
import com.bfd.util.PauseTool; |
||||
|
import lombok.extern.slf4j.Slf4j; |
||||
|
import org.springframework.beans.factory.annotation.Value; |
||||
|
import org.springframework.scheduling.annotation.Async; |
||||
|
import org.springframework.stereotype.Service; |
||||
|
|
||||
|
import java.io.File; |
||||
|
import java.io.IOException; |
||||
|
import java.util.*; |
||||
|
|
||||
|
/** |
||||
|
* @author:jinming |
||||
|
* @className:HandlerService |
||||
|
* @version:1.0 |
||||
|
* @description: |
||||
|
* @Date:2023/8/1 16:05 |
||||
|
*/ |
||||
|
@Service |
||||
|
@Slf4j |
||||
|
public class HandlerService { |
||||
|
@Value("${file.path}") |
||||
|
private String downloadFilePath; |
||||
|
|
||||
|
@Async("asyncExecutor") |
||||
|
void run() { |
||||
|
while (true) { |
||||
|
try { |
||||
|
if (QueueUtil.taskQueue.size() > 0) { |
||||
|
log.info("当前工作线程开始获取数据"); |
||||
|
String dataJson = QueueUtil.taskQueue.poll(); |
||||
|
if (!StringUtil.hasValue(dataJson)) { |
||||
|
log.info("任务队列为空,休眠3秒"); |
||||
|
try { |
||||
|
Thread.sleep(3000); |
||||
|
} catch (InterruptedException e) { |
||||
|
e.printStackTrace(); |
||||
|
} |
||||
|
continue; |
||||
|
} |
||||
|
Map parse = (Map) JSON.parse(dataJson); |
||||
|
Map dataMap = (Map) parse.get("data"); |
||||
|
int id = (int) parse.get("id"); |
||||
|
Map admin = (Map) parse.get("input"); |
||||
|
int scenesId = (int) parse.get("scenes_id"); |
||||
|
int version = (int) parse.get("version"); |
||||
|
String pauseKey = scenesId + "_" + version; |
||||
|
if (!PauseTool.CACHE.containsKey(pauseKey)) { |
||||
|
log.info("流程:{}的版本:{}已失效,任务跳过", scenesId, version); |
||||
|
continue; |
||||
|
} |
||||
|
String key = (String) admin.get("fileUrl"); |
||||
|
String fileUrl = (String) DataUtil.getValue(key, dataMap); |
||||
|
boolean windows = OsUtil.isWindows(); |
||||
|
Map output = (Map) parse.get("output"); |
||||
|
//todo:后续如果需要选sheet将当前行注释放开即可 |
||||
|
int allSheet = 0; |
||||
|
try { |
||||
|
allSheet = (int) admin.get("allSheet"); |
||||
|
} catch (Exception e) { |
||||
|
|
||||
|
} |
||||
|
Boolean isTrace = false; |
||||
|
if (parse.containsKey("trace") && (Boolean)parse.get("trace")==true) { |
||||
|
log.info("测试流程,只返回第一个sheet页的第一条数据"); |
||||
|
isTrace = true; |
||||
|
} |
||||
|
//fieldType:自定义输出字段: 0 关闭,1-开启,如果开启则拼接form到output里(如果关闭,则取默认的output拼接) |
||||
|
int fieldType = 0; |
||||
|
String fileType = StringUtil.getStrByPattern(fileUrl, "\\.[^.\\\\/:*?\"<>|\\r\\n]+$"); |
||||
|
log.info("任务:" + id + "的文件类型为" + fileType); |
||||
|
if (fileType.contains(Constant.IS_XLS)) { |
||||
|
String fileName = StringUtil.getMd5(fileUrl); |
||||
|
//定义xls的下载路径 |
||||
|
String xlsDir = downloadFilePath.concat(windows ? "\\xls\\" : "/xls/").concat(fileName).concat(fileType); |
||||
|
int dataRow = (int) admin.get("dataRow"); |
||||
|
Map<Integer, String> fromMap = new HashMap<>(32); |
||||
|
Set<String> set = output.keySet(); |
||||
|
for (String s : set) { |
||||
|
fromMap.put(ExcelUtils.getAlphabetPosition(s), output.get(s).toString()); |
||||
|
} |
||||
|
try { |
||||
|
FileDownloader.downloadFile(fileUrl, new File(xlsDir)); |
||||
|
} catch (IOException e) { |
||||
|
|
||||
|
Map result = new HashMap(32); |
||||
|
result.put("status", 2); |
||||
|
result.put("results", ""); |
||||
|
result.put("message", "文件下载失败"); |
||||
|
parse.put("result", result); |
||||
|
String message = JSON.toJSONString(parse); |
||||
|
try { |
||||
|
QueueUtil.sendQueue.put(message); |
||||
|
} catch (InterruptedException ex) { |
||||
|
ex.printStackTrace(); |
||||
|
} |
||||
|
continue; |
||||
|
} |
||||
|
Map<String, Object> xlsParse = null; |
||||
|
try { |
||||
|
xlsParse = ExcelUtils.parse(new File(xlsDir), dataRow, fromMap); |
||||
|
} catch (Exception e) { |
||||
|
e.printStackTrace(); |
||||
|
Map result = new HashMap(32); |
||||
|
result.put("status", 2); |
||||
|
result.put("results", ""); |
||||
|
result.put("message", "Excel解析失败"); |
||||
|
parse.put("result", result); |
||||
|
String message = JSON.toJSONString(parse); |
||||
|
try { |
||||
|
QueueUtil.sendQueue.put(message); |
||||
|
} catch (InterruptedException ex) { |
||||
|
ex.printStackTrace(); |
||||
|
} |
||||
|
continue; |
||||
|
} |
||||
|
Set<String> xlsParseKeySet = xlsParse.keySet(); |
||||
|
int loopIndex = 1; |
||||
|
for (String xlsParseKey : xlsParseKeySet) { |
||||
|
int looploopIndex = 1; |
||||
|
List<Map> sheetListData = (List<Map>) xlsParse.get(xlsParseKey); |
||||
|
for (Map sheetListDatum : sheetListData) { |
||||
|
Map result = new HashMap(32); |
||||
|
Map resultsMap = new HashMap(32); |
||||
|
if (looploopIndex == sheetListData.size()) { |
||||
|
resultsMap.put("isLast", 1); |
||||
|
sheetListDatum.put("isLast", 1); |
||||
|
} else if (loopIndex == xlsParseKeySet.size() && looploopIndex == sheetListData.size()) { |
||||
|
resultsMap.put("isLast", 1); |
||||
|
sheetListDatum.put("isLast", 1); |
||||
|
} |
||||
|
if (fieldType != 0) { |
||||
|
resultsMap.remove("result"); |
||||
|
Set outputKeySet = output.keySet(); |
||||
|
for (Object outputKey : outputKeySet) { |
||||
|
String dataVlue = (String) sheetListDatum.get(outputKey); |
||||
|
if (StringUtil.hasValue(dataVlue)) { |
||||
|
resultsMap.put(outputKey, dataVlue); |
||||
|
} |
||||
|
} |
||||
|
if (resultsMap.isEmpty()) { |
||||
|
looploopIndex++; |
||||
|
continue; |
||||
|
} |
||||
|
resultsMap.put("sheetName", xlsParseKey); |
||||
|
String resultsMapJson = JSON.toJSONString(resultsMap); |
||||
|
result.put("results", resultsMapJson); |
||||
|
parse.put("result", result); |
||||
|
result.put("status", 1); |
||||
|
result.put("message", "成功"); |
||||
|
} else { |
||||
|
sheetListDatum.put("sheetName", xlsParseKey); |
||||
|
result.put("results", JSON.toJSONString(sheetListDatum)); |
||||
|
result.put("status", 1); |
||||
|
result.put("message", "成功"); |
||||
|
parse.put("result", result); |
||||
|
} |
||||
|
String message = JSON.toJSONString(parse); |
||||
|
QueueUtil.sendQueue.put(message); |
||||
|
looploopIndex++; |
||||
|
if (isTrace){ |
||||
|
break; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
loopIndex++; |
||||
|
if (isTrace){ |
||||
|
break; |
||||
|
} |
||||
|
} |
||||
|
} else if (fileType.contains(Constant.IS_CSV)) { |
||||
|
|
||||
|
List<Map> form = (List<Map>) admin.get("form"); |
||||
|
Set<Integer> outputLine = new HashSet<>(); |
||||
|
Map<Integer, String> fromMap = new HashMap<>(32); |
||||
|
for (Map map : form) { |
||||
|
String field = map.get("field").toString(); |
||||
|
int alphabetPosition = ExcelUtils.getAlphabetPosition(field); |
||||
|
fromMap.put(alphabetPosition, field); |
||||
|
} |
||||
|
|
||||
|
int loopIndex = 0; |
||||
|
String csvCharSet = (String) admin.get("csvCharSet"); |
||||
|
if (!StringUtil.hasValue(csvCharSet)) { |
||||
|
csvCharSet = "UTF-8"; |
||||
|
} |
||||
|
String fileName = StringUtil.getMd5(fileUrl); |
||||
|
//定义csv的下载路径 |
||||
|
String csvDir = downloadFilePath.concat(windows ? "\\csv\\" : "/csv/").concat(fileName).concat(".csv"); |
||||
|
FileDownloader.downloadFile(fileUrl, new File(csvDir)); |
||||
|
List<Map<String, String>> csvDataList = CsvUtil.parseCSV(csvDir, csvCharSet, fromMap); |
||||
|
for (Map<String, String> stringStringMap : csvDataList) { |
||||
|
Map result = new HashMap(32); |
||||
|
Map resultsMap = new HashMap(32); |
||||
|
if (loopIndex == csvDataList.size()) { |
||||
|
resultsMap.put("isLast", 1); |
||||
|
stringStringMap.put("isLast", String.valueOf(1)); |
||||
|
} |
||||
|
resultsMap.remove("result"); |
||||
|
Set outputKeySet = output.keySet(); |
||||
|
for (Object outputKey : outputKeySet) { |
||||
|
resultsMap.put(outputKey, stringStringMap.get(outputKey)); |
||||
|
} |
||||
|
resultsMap.put("isLast", 1); |
||||
|
String resultsMapJson = JSON.toJSONString(resultsMap); |
||||
|
result.put("results", resultsMapJson); |
||||
|
result.put("status", 1); |
||||
|
result.put("message", "成功"); |
||||
|
parse.put("result", result); |
||||
|
String empty = "{}"; |
||||
|
if (resultsMapJson.equals(empty)) { |
||||
|
continue; |
||||
|
} |
||||
|
|
||||
|
String message = JSON.toJSONString(parse); |
||||
|
QueueUtil.sendQueue.put(message); |
||||
|
loopIndex++; |
||||
|
if (isTrace){ |
||||
|
break; |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
} else { |
||||
|
log.info("任务队列为空,休眠3秒"); |
||||
|
try { |
||||
|
Thread.sleep(3000); |
||||
|
} catch (InterruptedException e) { |
||||
|
e.printStackTrace(); |
||||
|
} |
||||
|
} |
||||
|
} catch (Throwable e) { |
||||
|
e.printStackTrace(); |
||||
|
log.error("工作线程发生异常" + e.getMessage()); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
} |
@ -0,0 +1,54 @@ |
|||||
|
package com.bfd.crawl.formhandler.service; |
||||
|
|
||||
|
|
||||
|
import com.alibaba.fastjson.JSON; |
||||
|
import com.bfd.crawl.formhandler.util.QueueUtil; |
||||
|
import lombok.extern.slf4j.Slf4j; |
||||
|
import org.springframework.beans.factory.annotation.Value; |
||||
|
import org.springframework.kafka.core.KafkaTemplate; |
||||
|
import org.springframework.scheduling.annotation.Async; |
||||
|
import org.springframework.stereotype.Service; |
||||
|
|
||||
|
import javax.annotation.Resource; |
||||
|
import java.util.Map; |
||||
|
|
||||
|
/** |
||||
|
* @author:jinming |
||||
|
* @className:SendService |
||||
|
* @version:1.0 |
||||
|
* @description: |
||||
|
* @Date:2023/7/31 17:53 |
||||
|
*/ |
||||
|
@Slf4j |
||||
|
@Service |
||||
|
public class SendService { |
||||
|
@Value("${send.topic}") |
||||
|
private String topic; |
||||
|
|
||||
|
@Resource |
||||
|
private KafkaTemplate kafkaTemplate; |
||||
|
|
||||
|
@Async("sendExecutor") |
||||
|
void sendToKafka() { |
||||
|
while (true) { |
||||
|
if (QueueUtil.sendQueue.size() > 0) { |
||||
|
try { |
||||
|
String message = QueueUtil.sendQueue.take(); |
||||
|
Map parse = (Map) JSON.parse(message); |
||||
|
String id = parse.get("id").toString(); |
||||
|
log.info("ID:" + id + "\t" + "数据已发出"); |
||||
|
kafkaTemplate.send(topic, message); |
||||
|
} catch (Exception e) { |
||||
|
e.printStackTrace(); |
||||
|
} |
||||
|
} else { |
||||
|
log.info("任务队列为空,休眠3秒"); |
||||
|
try { |
||||
|
Thread.sleep(3000); |
||||
|
} catch (InterruptedException e) { |
||||
|
e.printStackTrace(); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
} |
@ -0,0 +1,75 @@ |
|||||
|
package com.bfd.crawl.formhandler.service; |
||||
|
|
||||
|
|
||||
|
import com.bfd.crawl.formhandler.util.QueueUtil; |
||||
|
import com.bfd.util.PauseTool; |
||||
|
import lombok.extern.slf4j.Slf4j; |
||||
|
import org.springframework.beans.factory.annotation.Autowired; |
||||
|
import org.springframework.beans.factory.annotation.Value; |
||||
|
import org.springframework.boot.ApplicationArguments; |
||||
|
import org.springframework.boot.ApplicationRunner; |
||||
|
import org.springframework.data.redis.core.StringRedisTemplate; |
||||
|
import org.springframework.stereotype.Service; |
||||
|
|
||||
|
import javax.annotation.Resource; |
||||
|
|
||||
|
/** |
||||
|
* @author:jinming |
||||
|
* @className:StartServcie |
||||
|
* @version:1.0 |
||||
|
* @description: |
||||
|
* @Date:2023/7/31 17:14 |
||||
|
*/ |
||||
|
@Service |
||||
|
@Slf4j |
||||
|
public class StartServcie implements ApplicationRunner { |
||||
|
@Value("${thread.handler}") |
||||
|
private int handlerNumber; |
||||
|
@Value("${thread.send}") |
||||
|
private int sendNumber; |
||||
|
|
||||
|
@Autowired |
||||
|
private HandlerService handlerService; |
||||
|
@Autowired |
||||
|
private SendService sendService; |
||||
|
|
||||
|
@Value("${zookeeper.connection-string}") |
||||
|
private String connectionString; |
||||
|
@Value("${zookeeper.publish-node}") |
||||
|
private String nodePath; |
||||
|
@Resource |
||||
|
private StringRedisTemplate stringRedisTemplate; |
||||
|
|
||||
|
@Override |
||||
|
public void run(ApplicationArguments args) throws Exception { |
||||
|
PauseTool pauseTool = new PauseTool(); |
||||
|
pauseTool.initializeRedisCache(stringRedisTemplate); |
||||
|
pauseTool.setupZookeeperListener(connectionString, nodePath); |
||||
|
for (int i = 0; i < handlerNumber; i++) { |
||||
|
log.info("处理服务线程" + i + "已启动 "); |
||||
|
handlerService.run(); |
||||
|
} |
||||
|
for (int i = 0; i < sendNumber; i++) { |
||||
|
log.info("发送服务线程" + i + "已启动 "); |
||||
|
sendService.sendToKafka(); |
||||
|
} |
||||
|
Runnable myRunnable = new Runnable() { |
||||
|
@Override |
||||
|
public void run() { |
||||
|
while (true) { |
||||
|
log.info("任务队列长度为" + QueueUtil.taskQueue.size()); |
||||
|
log.info("发送队列长度为" + QueueUtil.sendQueue.size()); |
||||
|
try { |
||||
|
Thread.sleep(10000); |
||||
|
} catch (InterruptedException e) { |
||||
|
e.printStackTrace(); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
}; |
||||
|
// 创建一个新的线程,并将Runnable对象传递给Thread构造函数 |
||||
|
Thread myThread = new Thread(myRunnable); |
||||
|
// 启动线程 |
||||
|
myThread.start(); |
||||
|
} |
||||
|
} |
@ -0,0 +1,59 @@ |
|||||
|
package com.bfd.crawl.formhandler.util; |
||||
|
|
||||
|
import com.alibaba.fastjson.JSON; |
||||
|
import org.apache.commons.csv.CSVFormat; |
||||
|
import org.apache.commons.csv.CSVParser; |
||||
|
import org.apache.commons.csv.CSVRecord; |
||||
|
|
||||
|
import java.io.FileInputStream; |
||||
|
import java.io.InputStreamReader; |
||||
|
import java.io.Reader; |
||||
|
import java.util.ArrayList; |
||||
|
import java.util.HashMap; |
||||
|
import java.util.List; |
||||
|
import java.util.Map; |
||||
|
|
||||
|
/** |
||||
|
* @author:jinming |
||||
|
* @className:CsvUtil |
||||
|
* @version:1.0 |
||||
|
* @description: |
||||
|
* @Date:2024/3/25 11:02 |
||||
|
*/ |
||||
|
public class CsvUtil { |
||||
|
public static List<Map<String, String>> parseCSV(String filePath, String csvCharSet, Map<Integer, String> fromMap) { |
||||
|
List<Map<String, String>> dataList = new ArrayList<>(); |
||||
|
try (Reader reader = new InputStreamReader(new FileInputStream(filePath), csvCharSet); |
||||
|
CSVParser csvParser = new CSVParser(reader, CSVFormat.DEFAULT.withFirstRecordAsHeader())) { |
||||
|
Map<String, String> haderDataMap = new HashMap<>(32); |
||||
|
for (Integer i : fromMap.keySet()) { |
||||
|
// System.out.println(csvParser.getHeaderNames().get(i)); |
||||
|
haderDataMap.put(fromMap.get(i), csvParser.getHeaderNames().get(i)); |
||||
|
System.out.println(JSON.toJSONString(haderDataMap)); |
||||
|
} |
||||
|
dataList.add(haderDataMap); |
||||
|
for (CSVRecord csvRecord : csvParser) { |
||||
|
Map<String, String> dataMap = new HashMap<>(32); |
||||
|
for (Integer i : fromMap.keySet()) { |
||||
|
// System.out.println(csvRecord.get(i)); |
||||
|
dataMap.put(fromMap.get(i), csvRecord.get(i)); |
||||
|
System.out.println(JSON.toJSONString(dataMap)); |
||||
|
} |
||||
|
dataList.add(dataMap); |
||||
|
} |
||||
|
} catch (Exception e) { |
||||
|
e.printStackTrace(); |
||||
|
} |
||||
|
return dataList; |
||||
|
} |
||||
|
|
||||
|
public static void main(String[] args) { |
||||
|
Map<Integer, String> fromMap = new HashMap<>(32); |
||||
|
fromMap.put(0, "A"); |
||||
|
fromMap.put(2, "C"); |
||||
|
List<Map<String, String>> gbk = parseCSV("C:\\Users\\10318\\Desktop\\评论.csv", "gbk", fromMap); |
||||
|
System.out.println(gbk); |
||||
|
System.out.println((gbk)); |
||||
|
} |
||||
|
|
||||
|
} |
@ -0,0 +1,60 @@ |
|||||
|
package com.bfd.crawl.formhandler.util; |
||||
|
|
||||
|
import com.alibaba.fastjson.JSON; |
||||
|
import com.alibaba.fastjson.JSONObject; |
||||
|
import com.alibaba.fastjson.JSONPath; |
||||
|
import lombok.extern.slf4j.Slf4j; |
||||
|
|
||||
|
import java.util.Map; |
||||
|
|
||||
|
/** |
||||
|
* @author:jinming |
||||
|
* @className:DataUtil |
||||
|
* @version:1.0 |
||||
|
* @description: 获取dataValue的值 |
||||
|
* @Date:2023/11/1 9:54 |
||||
|
*/ |
||||
|
@Slf4j |
||||
|
public class DataUtil { |
||||
|
/** |
||||
|
* @param key 传入的key |
||||
|
* @param dataMap 数据map |
||||
|
* @return 根据传入的参数进行判断解析,返回正确的dataValue |
||||
|
*/ |
||||
|
public static Object getValue(String key, Map dataMap) { |
||||
|
try { |
||||
|
//公式为空直接就返回 |
||||
|
if (!StringUtil.hasValue(key)) { |
||||
|
return ""; |
||||
|
} |
||||
|
Object dataValue; |
||||
|
String isJson = "#json#"; |
||||
|
if (key.contains(isJson)) { |
||||
|
//进行第一次拆分,获取#json#前面的部分 |
||||
|
String[] keySplit = key.split(isJson); |
||||
|
String firstDataKey = keySplit[0]; |
||||
|
String[] firstDataKeySplit = firstDataKey.split(":"); |
||||
|
//取出前半部分对应的JSON数据并转换为JSONObject |
||||
|
String dataJson = (String) dataMap.get(firstDataKeySplit[0]); |
||||
|
JSONObject dataJsonObject = JSON.parseObject(dataJson); |
||||
|
//根据key的后半部分取出对应JSONObject中的值 |
||||
|
String firstDataKeyJson = (String) JSONPath.eval(dataJsonObject, firstDataKeySplit[1]); |
||||
|
String secDataKey = keySplit[1]; |
||||
|
JSONObject firstDataJsonObject = JSON.parseObject(firstDataKeyJson); |
||||
|
dataValue = JSONPath.eval(firstDataJsonObject, secDataKey); |
||||
|
return dataValue; |
||||
|
} |
||||
|
String[] keySplit = key.split(":"); |
||||
|
String jsonPath = keySplit[1]; |
||||
|
String dataJson = (String) dataMap.get(keySplit[0]); |
||||
|
JSONObject dataJsonObject = JSON.parseObject(dataJson); |
||||
|
dataValue = JSONPath.eval(dataJsonObject, jsonPath); |
||||
|
return dataValue; |
||||
|
} catch (Exception e) { |
||||
|
// TODO: handle exception |
||||
|
log.error("jsonpath公式取值异常,", e); |
||||
|
return null; |
||||
|
} |
||||
|
|
||||
|
} |
||||
|
} |
@ -0,0 +1,318 @@ |
|||||
|
package com.bfd.crawl.formhandler.util; |
||||
|
|
||||
|
import org.apache.poi.ss.usermodel.*; |
||||
|
import org.apache.poi.ss.util.CellRangeAddress; |
||||
|
import org.apache.poi.util.IOUtils; |
||||
|
import org.apache.poi.xssf.usermodel.XSSFWorkbook; |
||||
|
|
||||
|
import java.io.File; |
||||
|
import java.io.FileInputStream; |
||||
|
import java.io.FileOutputStream; |
||||
|
import java.io.IOException; |
||||
|
import java.nio.file.Files; |
||||
|
import java.nio.file.Path; |
||||
|
import java.nio.file.Paths; |
||||
|
import java.nio.file.StandardCopyOption; |
||||
|
import java.util.*; |
||||
|
|
||||
|
|
||||
|
/** |
||||
|
* @author jian.mao |
||||
|
* @date 2023年4月7日 |
||||
|
* @description excel解析工具类 |
||||
|
*/ |
||||
|
public class ExcelUtils { |
||||
|
/** |
||||
|
* 判断指定的单元格是否是合并单元格 |
||||
|
* |
||||
|
* @param sheet :excel工作簿 |
||||
|
* @param row :行下标 |
||||
|
* @param column :列下标 |
||||
|
* @return boolean |
||||
|
*/ |
||||
|
public static Map isMergedRegion(Sheet sheet, int row, int column) { |
||||
|
Map infoMap = new HashMap(32); |
||||
|
//合并单元格的数量 |
||||
|
int sheetMergeCount = sheet.getNumMergedRegions(); |
||||
|
//遍历所有合并单元格信息 |
||||
|
for (int i = 0; i < sheetMergeCount; i++) { |
||||
|
//获取第i个合并单元格的信息 |
||||
|
CellRangeAddress range = sheet.getMergedRegion(i); |
||||
|
//获取开始合并的列位置 |
||||
|
int startColumn = range.getFirstColumn(); |
||||
|
//获取结束合列的行位置 |
||||
|
int endColumn = range.getLastColumn(); |
||||
|
//获取开始合并的行位置 |
||||
|
int startRow = range.getFirstRow(); |
||||
|
//获取结束合并的行位置 |
||||
|
int endRow = range.getLastRow(); |
||||
|
/* |
||||
|
判断row,column是否在合并的单元格里。 |
||||
|
即row大于开始合并的行位置,小于合并结束的行位置, |
||||
|
并且column大于开始合并的列位置,小于合并结束的列位置 |
||||
|
*/ |
||||
|
if (row >= startRow && row <= endRow) { |
||||
|
if (column >= startColumn && column <= endColumn) { |
||||
|
infoMap.put("startRow", startRow); |
||||
|
infoMap.put("startColumn", startColumn); |
||||
|
infoMap.put("isMergedRegion", true); |
||||
|
return infoMap; |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
infoMap.put("isMergedRegion", false); |
||||
|
return infoMap; |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* excel解析 |
||||
|
* |
||||
|
* @param excel |
||||
|
* @return |
||||
|
*/ |
||||
|
public static Map<String, Object> parse(File excel, int dataStartRow, Map<Integer, String> form) { |
||||
|
// 设置更高的覆盖值,比如 200,000,000 |
||||
|
IOUtils.setByteArrayMaxOverride(200000000); |
||||
|
dataStartRow = dataStartRow - 1; |
||||
|
Map<String, Object> excelMap = new HashMap<String, Object>(16); |
||||
|
try { |
||||
|
FileInputStream file = new FileInputStream(excel); |
||||
|
|
||||
|
// 使用工厂模式创建工作簿对象 |
||||
|
Workbook workbook = WorkbookFactory.create(file); |
||||
|
// 获取工作簿中工作表的数量 |
||||
|
int numberOfSheets = workbook.getNumberOfSheets(); |
||||
|
DataFormatter dataFormatter = new DataFormatter(); |
||||
|
// 遍历所有工作表 |
||||
|
for (int i = 0; i < numberOfSheets; i++) { |
||||
|
Sheet sheet = workbook.getSheetAt(i); |
||||
|
String key = sheet.getSheetName(); |
||||
|
//行码 |
||||
|
int rowNum = 0; |
||||
|
List<Map<String, String>> data = new ArrayList<Map<String, String>>(); |
||||
|
// 遍历所有行 |
||||
|
for (Row row : sheet) { |
||||
|
//行内容存储 |
||||
|
Map<String, String> rowMap = new HashMap<String, String>(16); |
||||
|
// 遍历所有单元格 |
||||
|
if (rowNum < dataStartRow) { |
||||
|
rowNum++; |
||||
|
continue; |
||||
|
} else { |
||||
|
Set<Integer> integers = form.keySet(); |
||||
|
for (Integer j : integers) { |
||||
|
Map mergedRegion = isMergedRegion(sheet, rowNum, j); |
||||
|
boolean isMergedRegion = (boolean) mergedRegion.get("isMergedRegion"); |
||||
|
String cellValue = dataFormatter.formatCellValue(row.getCell(j)); |
||||
|
if (isMergedRegion) { |
||||
|
//获取开始合并的列位置 |
||||
|
int startColumn = (int) mergedRegion.get("startColumn"); |
||||
|
//获取开始合并的行位置 |
||||
|
int startRow = (int) mergedRegion.get("startRow"); |
||||
|
cellValue = dataFormatter.formatCellValue(sheet.getRow(startRow).getCell(startColumn)); |
||||
|
} |
||||
|
rowMap.put(form.get(j), cellValue); |
||||
|
} |
||||
|
} |
||||
|
rowNum++; |
||||
|
if (rowNum > 0) { |
||||
|
data.add(rowMap); |
||||
|
} |
||||
|
|
||||
|
} |
||||
|
excelMap.put(key, data); |
||||
|
} |
||||
|
// 关闭文件输入流和工作簿对象 |
||||
|
file.close(); |
||||
|
workbook.close(); |
||||
|
} catch (IOException e) { |
||||
|
e.printStackTrace(); |
||||
|
} |
||||
|
return excelMap; |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* excel解析 |
||||
|
* |
||||
|
* @param excel |
||||
|
* @return |
||||
|
*/ |
||||
|
public static Map<String, Object> parse(File excel) { |
||||
|
Map<String, Object> excelMap = new HashMap<String, Object>(16); |
||||
|
try { |
||||
|
FileInputStream file = new FileInputStream(excel); |
||||
|
// 使用工厂模式创建工作簿对象 |
||||
|
Workbook workbook = WorkbookFactory.create(file); |
||||
|
// 获取工作簿中工作表的数量 |
||||
|
int numberOfSheets = workbook.getNumberOfSheets(); |
||||
|
DataFormatter dataFormatter = new DataFormatter(); |
||||
|
// 遍历所有工作表 |
||||
|
for (int i = 0; i < numberOfSheets; i++) { |
||||
|
Sheet sheet = workbook.getSheetAt(i); |
||||
|
String key = sheet.getSheetName(); |
||||
|
//行码 |
||||
|
int rowNum = 0; |
||||
|
List<Map<String, String>> data = new ArrayList<Map<String, String>>(); |
||||
|
Map<Integer, String> titleHead = new HashMap<Integer, String>(16); |
||||
|
// 遍历所有行 |
||||
|
for (Row row : sheet) { |
||||
|
//单元格码 |
||||
|
int cellNum = 0; |
||||
|
//行内容存储 |
||||
|
Map<String, String> rowMap = new HashMap<String, String>(16); |
||||
|
// 遍历所有单元格 |
||||
|
if (rowNum == 0) { |
||||
|
for (Cell cell : row) { |
||||
|
String cellValue = dataFormatter.formatCellValue(cell); |
||||
|
titleHead.put(cellNum, cellValue); |
||||
|
cellNum++; |
||||
|
} |
||||
|
} else { |
||||
|
for (int j = 0; j < titleHead.size(); j++) { |
||||
|
Map mergedRegion = isMergedRegion(sheet, rowNum, cellNum); |
||||
|
boolean isMergedRegion = (boolean) mergedRegion.get("isMergedRegion"); |
||||
|
String cellValue = dataFormatter.formatCellValue(row.getCell(j)); |
||||
|
if (isMergedRegion) { |
||||
|
//获取开始合并的列位置 |
||||
|
int startColumn = (int) mergedRegion.get("startColumn"); |
||||
|
//获取开始合并的行位置 |
||||
|
int startRow = (int) mergedRegion.get("startRow"); |
||||
|
cellValue = dataFormatter.formatCellValue(sheet.getRow(startRow).getCell(startColumn)); |
||||
|
} |
||||
|
rowMap.put(titleHead.get(cellNum), cellValue); |
||||
|
cellNum++; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
if (rowNum > 0) { |
||||
|
data.add(rowMap); |
||||
|
} |
||||
|
rowNum++; |
||||
|
} |
||||
|
excelMap.put(key, data); |
||||
|
} |
||||
|
// 关闭文件输入流和工作簿对象 |
||||
|
file.close(); |
||||
|
workbook.close(); |
||||
|
} catch (IOException e) { |
||||
|
e.printStackTrace(); |
||||
|
} |
||||
|
return excelMap; |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 将List<Map<String, String>>写入Excel文件中 |
||||
|
* |
||||
|
* @param data 要写入Excel的数据,每个Map代表一行数据,Map的key为列名,value为单元格数据 |
||||
|
* @param excelFilePath Excel文件路径,包含文件名和扩展名 |
||||
|
* @param sheetName 工作表名称 |
||||
|
* @throws IOException 如果写入Excel文件时发生IO异常,则抛出该异常 |
||||
|
*/ |
||||
|
public static void write(List<Map<String, String>> data, String excelFilePath, String sheetName) throws IOException { |
||||
|
// 创建一个新的工作簿对象 |
||||
|
Workbook workbook = new XSSFWorkbook(); |
||||
|
// 创建一个新的工作表 |
||||
|
Sheet sheet = workbook.createSheet(sheetName); |
||||
|
// 行码 |
||||
|
int rowNum = 0; |
||||
|
// 写入列头 |
||||
|
Row headerRow = sheet.createRow(rowNum++); |
||||
|
int colNum = 0; |
||||
|
for (String key : data.get(0).keySet()) { |
||||
|
Cell cell = headerRow.createCell(colNum++); |
||||
|
cell.setCellValue(key); |
||||
|
} |
||||
|
// 写入数据 |
||||
|
for (Map<String, String> rowMap : data) { |
||||
|
Row row = sheet.createRow(rowNum++); |
||||
|
colNum = 0; |
||||
|
for (String key : rowMap.keySet()) { |
||||
|
Cell cell = row.createCell(colNum++); |
||||
|
try { |
||||
|
String s = rowMap.get(key); |
||||
|
if (s.length() > 30000) { |
||||
|
|
||||
|
cell.setCellValue(s.substring(0, 25000)); |
||||
|
} else { |
||||
|
cell.setCellValue(s); |
||||
|
} |
||||
|
} catch (Exception e) { |
||||
|
System.out.println(key); |
||||
|
e.printStackTrace(); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
// 将数据写入文件 |
||||
|
FileOutputStream outputStream = new FileOutputStream(excelFilePath); |
||||
|
workbook.write(outputStream); |
||||
|
workbook.close(); |
||||
|
outputStream.close(); |
||||
|
} |
||||
|
|
||||
|
// public static void copyFile(String sourceFloder, String targetFileName) { |
||||
|
// File sourceFile = new File(sourceFloder); |
||||
|
// byte[] buffer = new byte[(int) sourceFile.length()]; |
||||
|
// try (InputStream inputStream = new FileInputStream(sourceFile)) { |
||||
|
// inputStream.read(buffer); |
||||
|
// } catch (IOException e) { |
||||
|
// e.printStackTrace(); |
||||
|
// return; |
||||
|
// } |
||||
|
// // 写入目标文件 |
||||
|
// File targetFile = new File(targetFileName); |
||||
|
// targetFile.mkdirs(); |
||||
|
// try (OutputStream outputStream = new FileOutputStream(targetFile)) { |
||||
|
// outputStream.write(buffer); |
||||
|
// } catch (IOException e) { |
||||
|
// e.printStackTrace(); |
||||
|
// return; |
||||
|
// } |
||||
|
// } |
||||
|
|
||||
|
public static void copyFile(String sourceFilePath) { |
||||
|
// 源文件和目标文件的路径 |
||||
|
String targetDrive = "F:"; |
||||
|
|
||||
|
try { |
||||
|
// 获取源文件和目标文件的路径信息 |
||||
|
Path sourcePath = Paths.get(sourceFilePath); |
||||
|
Path targetPath = Paths.get(targetDrive + sourcePath.toString().substring(2)); |
||||
|
|
||||
|
// 如果目标文件的父目录不存在,则创建该目录 |
||||
|
if (!targetPath.getParent().toFile().exists()) { |
||||
|
targetPath.getParent().toFile().mkdirs(); |
||||
|
} |
||||
|
|
||||
|
// 进行文件复制 |
||||
|
Files.copy(sourcePath, targetPath, StandardCopyOption.REPLACE_EXISTING); |
||||
|
|
||||
|
System.out.println("Copied file: " + sourceFilePath + " -> " + targetPath); |
||||
|
} catch (IOException e) { |
||||
|
e.printStackTrace(); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
public static int getAlphabetPosition(String input) { |
||||
|
if (input == null || input.isEmpty()) { |
||||
|
return 0; |
||||
|
} |
||||
|
// 将输入字符串转换为大写以处理大小写 |
||||
|
input = input.toUpperCase(); |
||||
|
int position = 0; |
||||
|
for (int i = 0; i < input.length(); i++) { |
||||
|
char c = input.charAt(i); |
||||
|
if (c >= 'A' && c <= 'Z') { |
||||
|
position = position * 26 + (c - 'A' + 1); |
||||
|
} |
||||
|
} |
||||
|
return position - 1; |
||||
|
} |
||||
|
|
||||
|
public static void main(String[] args) { |
||||
|
System.out.println(getAlphabetPosition("xfd")); |
||||
|
Map from = new HashMap(32); |
||||
|
from.put(0, "人名"); |
||||
|
from.put(3, "站点名"); |
||||
|
System.out.println(parse(new File("D:\\fromHanler\\xls\\b51484b213ed8fea61f5b99cbdc1490e.xlsx"), 2, from)); |
||||
|
} |
||||
|
} |
@ -0,0 +1,117 @@ |
|||||
|
package com.bfd.crawl.formhandler.util; |
||||
|
|
||||
|
import com.alibaba.fastjson.JSON; |
||||
|
import okhttp3.*; |
||||
|
|
||||
|
import java.io.File; |
||||
|
import java.io.FileOutputStream; |
||||
|
import java.io.IOException; |
||||
|
import java.io.InputStream; |
||||
|
import java.util.HashMap; |
||||
|
import java.util.Map; |
||||
|
|
||||
|
/** |
||||
|
* @author:jinming |
||||
|
* @className:FileDownloader |
||||
|
* @version:1.0 |
||||
|
* @description: |
||||
|
* @Date:2023/8/1 16:31 |
||||
|
*/ |
||||
|
public class FileDownloader { |
||||
|
private static OkHttpClient okHttpClient; |
||||
|
|
||||
|
private static OkHttpClient getOkHttpClient() { |
||||
|
if (okHttpClient == null) { |
||||
|
okHttpClient = new OkHttpClient(); |
||||
|
} |
||||
|
return okHttpClient; |
||||
|
} |
||||
|
|
||||
|
public static void downloadFile(String url, File destination) throws IOException { |
||||
|
OkHttpClient client = getOkHttpClient(); |
||||
|
Request request = new Request.Builder() |
||||
|
.url(url) |
||||
|
.build(); |
||||
|
|
||||
|
try (Response response = client.newCall(request).execute()) { |
||||
|
if (!response.isSuccessful()) { |
||||
|
throw new IOException("Failed to download file: " + response); |
||||
|
} |
||||
|
|
||||
|
ResponseBody body = response.body(); |
||||
|
if (body == null) { |
||||
|
throw new IOException("Response body is null"); |
||||
|
} |
||||
|
if (!destination.getParentFile().exists()) { |
||||
|
|
||||
|
destination.getParentFile().mkdirs(); |
||||
|
} |
||||
|
try (InputStream inputStream = body.byteStream(); |
||||
|
FileOutputStream outputStream = new FileOutputStream(destination)) { |
||||
|
byte[] buffer = new byte[8192]; |
||||
|
int bytesRead; |
||||
|
while ((bytesRead = inputStream.read(buffer)) != -1) { |
||||
|
outputStream.write(buffer, 0, bytesRead); |
||||
|
} |
||||
|
outputStream.flush(); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
public static Map<String, String> uploadFile(String url, String filePath) throws Exception { |
||||
|
File file = new File(filePath); |
||||
|
|
||||
|
Map returnMap = new HashMap(32); |
||||
|
OkHttpClient client = getOkHttpClient(); |
||||
|
// 设置文件上传的媒体类型 |
||||
|
MediaType mediaType = MediaType.parse("application/octet-stream"); |
||||
|
// 创建请求体,将文件添加到请求体中 |
||||
|
RequestBody requestBody = RequestBody.create(mediaType, file); |
||||
|
|
||||
|
// 创建多部分请求体,用于上传文件 |
||||
|
MultipartBody multipartBody = new MultipartBody.Builder() |
||||
|
.setType(MultipartBody.FORM) |
||||
|
.addFormDataPart("file", file.getName(), requestBody) |
||||
|
.build(); |
||||
|
// 创建上传文件的请求 |
||||
|
Request request = new Request.Builder() |
||||
|
.url(url) |
||||
|
.post(multipartBody) |
||||
|
.build(); |
||||
|
|
||||
|
try (Response response = client.newCall(request).execute()) { |
||||
|
if (!response.isSuccessful()) { |
||||
|
throw new IOException("Failed to upload file: " + response); |
||||
|
} |
||||
|
String html = response.body().string(); |
||||
|
|
||||
|
try { |
||||
|
Map parse = (Map) JSON.parse(html); |
||||
|
Map data = (Map) parse.get("data"); |
||||
|
String domain = (String) data.get("domain"); |
||||
|
String src = (String) data.get("src"); |
||||
|
String fileUrl = domain.concat(src); |
||||
|
returnMap.put("fileUrl", fileUrl); |
||||
|
} catch (Exception e) { |
||||
|
returnMap.put("fileUrl", html); |
||||
|
} |
||||
|
// 处理上传成功的响应 |
||||
|
System.out.println("File uploaded successfully!"); |
||||
|
} |
||||
|
return returnMap; |
||||
|
} |
||||
|
|
||||
|
public static void deleteFile(String url, String md5) throws Exception { |
||||
|
OkHttpClient client = getOkHttpClient(); |
||||
|
url = url.concat("delete?md5=").concat(md5); |
||||
|
Request request = new Request.Builder() |
||||
|
.url(url) |
||||
|
.get() |
||||
|
.build(); |
||||
|
try (Response response = client.newCall(request).execute()) { |
||||
|
if (!response.isSuccessful()) { |
||||
|
throw new IOException("Failed to upload file: " + response); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
} |
@ -0,0 +1,42 @@ |
|||||
|
package com.bfd.crawl.formhandler.util; |
||||
|
|
||||
|
import java.io.File; |
||||
|
import java.util.ArrayList; |
||||
|
import java.util.List; |
||||
|
|
||||
|
/** |
||||
|
* @author:jinming |
||||
|
* @className:FileUtil |
||||
|
* @version:1.0 |
||||
|
* @description: |
||||
|
* @Date:2023/8/2 10:57 |
||||
|
*/ |
||||
|
public class FileUtil { |
||||
|
|
||||
|
public static void main(String[] args) { |
||||
|
System.out.println(traverseAndReturnFilePath("D:\\\\ocr\\\\305ce27d7a05770456fdc09d0b3044f7\\\\")); |
||||
|
} |
||||
|
|
||||
|
public static List<String> traverseAndReturnFilePath(String folderPath) { |
||||
|
List<String> fileList = new ArrayList<>(); |
||||
|
File folder = new File(folderPath); |
||||
|
// 检查文件夹是否存在并且是一个文件夹 |
||||
|
if (folder.exists() && folder.isDirectory()) { |
||||
|
// 获取文件夹中的所有文件和子文件夹 |
||||
|
File[] files = folder.listFiles(); |
||||
|
if (files != null) { |
||||
|
for (File file : files) { |
||||
|
if (file.isFile()) { |
||||
|
// 如果是文件,则输出全路径 |
||||
|
fileList.add(file.getAbsolutePath()); |
||||
|
} else if (file.isDirectory()) { |
||||
|
|
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
} else { |
||||
|
System.out.println("指定的路径不是一个文件夹或文件夹不存在。"); |
||||
|
} |
||||
|
return fileList; |
||||
|
} |
||||
|
} |
@ -0,0 +1,59 @@ |
|||||
|
package com.bfd.crawl.formhandler.util; |
||||
|
|
||||
|
import com.alibaba.fastjson.JSON; |
||||
|
import okhttp3.*; |
||||
|
|
||||
|
import java.util.Map; |
||||
|
import java.util.concurrent.TimeUnit; |
||||
|
|
||||
|
/** |
||||
|
* @author:jinming |
||||
|
* @className:ocrUtil |
||||
|
* @version:1.0 |
||||
|
* @description: |
||||
|
* @Date:2023/8/1 16:38 |
||||
|
*/ |
||||
|
public class OcrUtil { |
||||
|
private static OkHttpClient okHttpClient; |
||||
|
|
||||
|
private static OkHttpClient getOkHttpClient() { |
||||
|
if (okHttpClient == null) { |
||||
|
okHttpClient = new OkHttpClient(); |
||||
|
} |
||||
|
return okHttpClient; |
||||
|
} |
||||
|
|
||||
|
public static String doOcr(String url,String ocrApi) { |
||||
|
String text = ""; |
||||
|
int reTryTimes = 3; |
||||
|
for (int i = 0; i < reTryTimes; i++) { |
||||
|
int okCode = 200; |
||||
|
OkHttpClient client = getOkHttpClient(); |
||||
|
OkHttpClient.Builder builder = client.newBuilder().writeTimeout(600, TimeUnit.SECONDS).connectTimeout(600, TimeUnit.SECONDS).readTimeout(600, TimeUnit.SECONDS); |
||||
|
client = builder.build(); |
||||
|
MediaType mediaType = MediaType.parse("application/json"); |
||||
|
RequestBody body = RequestBody.create(mediaType, "{\"id\":\"\",\"url\":\"" + url + "\"}"); |
||||
|
Request request = new Request.Builder() |
||||
|
.url(ocrApi) |
||||
|
.method("POST", body) |
||||
|
.addHeader("Content-Type", "application/json") |
||||
|
.build(); |
||||
|
try { |
||||
|
Response response = client.newCall(request).execute(); |
||||
|
String html = response.body().string(); |
||||
|
Map dataMap = (Map) JSON.parse(html); |
||||
|
int code = (int) dataMap.get("code"); |
||||
|
if (code == okCode) { |
||||
|
text = (String) dataMap.get("text"); |
||||
|
} |
||||
|
if (StringUtil.hasValue(text)) { |
||||
|
break; |
||||
|
} |
||||
|
} catch (Exception e) { |
||||
|
e.printStackTrace(); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
return text; |
||||
|
} |
||||
|
} |
@ -0,0 +1,23 @@ |
|||||
|
package com.bfd.crawl.formhandler.util; |
||||
|
|
||||
|
import lombok.extern.slf4j.Slf4j; |
||||
|
|
||||
|
import java.util.Locale; |
||||
|
|
||||
|
/** |
||||
|
* @author:jinming |
||||
|
* @className:OsUtil |
||||
|
* @version:1.0 |
||||
|
* @description: |
||||
|
* @Date:2023/4/23 9:40 |
||||
|
*/ |
||||
|
@Slf4j |
||||
|
public class OsUtil { |
||||
|
public static Boolean isWindows() { |
||||
|
String windows = "windows"; |
||||
|
String osName = System.getProperty("os.name").toLowerCase(Locale.ROOT); |
||||
|
log.info("osName = " + osName); |
||||
|
return osName.contains(windows); |
||||
|
} |
||||
|
|
||||
|
} |
@ -0,0 +1,19 @@ |
|||||
|
package com.bfd.crawl.formhandler.util; |
||||
|
|
||||
|
import java.util.concurrent.LinkedBlockingDeque; |
||||
|
|
||||
|
/** |
||||
|
* @author:jinming |
||||
|
* @className:QueueUtil |
||||
|
* @version:1.0 |
||||
|
* @description: |
||||
|
* @Date:2023/7/13 15:00 |
||||
|
*/ |
||||
|
public class QueueUtil { |
||||
|
|
||||
|
|
||||
|
public static LinkedBlockingDeque<String> taskQueue = new LinkedBlockingDeque<String>(); |
||||
|
|
||||
|
public static LinkedBlockingDeque<String> sendQueue = new LinkedBlockingDeque<String>(); |
||||
|
|
||||
|
} |
@ -0,0 +1,94 @@ |
|||||
|
package com.bfd.crawl.formhandler.util; |
||||
|
|
||||
|
|
||||
|
import lombok.extern.slf4j.Slf4j; |
||||
|
|
||||
|
import java.security.MessageDigest; |
||||
|
import java.util.HashSet; |
||||
|
import java.util.Set; |
||||
|
import java.util.regex.Matcher; |
||||
|
import java.util.regex.Pattern; |
||||
|
|
||||
|
/** |
||||
|
* @author jinming |
||||
|
* @version 1.0 |
||||
|
* @className StringUtile |
||||
|
* @Date 2022/1/21 11:46 |
||||
|
*/ |
||||
|
@Slf4j |
||||
|
public class StringUtil { |
||||
|
public static boolean hasValue(String str) { |
||||
|
return str != null && !"".equals(str.trim()); |
||||
|
} |
||||
|
|
||||
|
public static String getRegexGroup(String regex, String str, int id) { |
||||
|
String resultStr = ""; |
||||
|
if (hasValue(str)) { |
||||
|
Pattern p = Pattern.compile(regex); |
||||
|
Matcher m = p.matcher(str); |
||||
|
if (m.find()) { |
||||
|
resultStr = m.group(id); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
if ("".equals(resultStr)) { |
||||
|
} |
||||
|
|
||||
|
return resultStr; |
||||
|
} |
||||
|
|
||||
|
public static Set<String> getEmailAddress(String message) { |
||||
|
Set<String> emailList = new HashSet<>(); |
||||
|
Pattern pattern = Pattern.compile("\\w+\\.?\\w+\\@\\w+\\.\\w+"); |
||||
|
Matcher m = pattern.matcher(message); |
||||
|
while (m.find()) { |
||||
|
emailList.add(m.group(0)); |
||||
|
} |
||||
|
return emailList; |
||||
|
} |
||||
|
public static String getMd5(String string) { |
||||
|
try { |
||||
|
MessageDigest md5 = MessageDigest.getInstance("MD5"); |
||||
|
byte[] bs = md5.digest(string.getBytes("UTF-8")); |
||||
|
StringBuilder sb = new StringBuilder(40); |
||||
|
for (byte x : bs) { |
||||
|
if ((x & 0xff) >> 4 == 0) { |
||||
|
sb.append("0").append(Integer.toHexString(x & 0xff)); |
||||
|
} else { |
||||
|
sb.append(Integer.toHexString(x & 0xff)); |
||||
|
} |
||||
|
} |
||||
|
return sb.toString(); |
||||
|
} catch (Exception e) { |
||||
|
//LOG.error("获取md5异常", e); |
||||
|
return "nceaform" + System.currentTimeMillis(); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
public static String removeAllHtmlTags(String str) { |
||||
|
return hasValue(str) ? str.replaceAll("<[^<>]+?>", "") : ""; |
||||
|
} |
||||
|
|
||||
|
public static String getRegexGroup(Pattern regex, String str, int id) { |
||||
|
String resultStr = ""; |
||||
|
if (hasValue(str)) { |
||||
|
Matcher m = regex.matcher(str); |
||||
|
if (m.find()) { |
||||
|
resultStr = m.group(id); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
if ("".equals(resultStr)) { |
||||
|
log.error(regex + " parser error!"); |
||||
|
} |
||||
|
|
||||
|
return resultStr; |
||||
|
} |
||||
|
|
||||
|
public static String getStrByPattern(String str, String regex) { |
||||
|
Pattern pattern = Pattern.compile(regex); |
||||
|
Matcher m = pattern.matcher(str); |
||||
|
return m.find() ? m.group(0) : ""; |
||||
|
} |
||||
|
|
||||
|
} |
@ -0,0 +1,60 @@ |
|||||
|
server: |
||||
|
port: 7081 |
||||
|
spring: |
||||
|
application: |
||||
|
name: 表格处理 |
||||
|
boot: |
||||
|
admin: |
||||
|
client: |
||||
|
health: |
||||
|
timeout: 10s |
||||
|
url: http://172.16.12.55:8001 |
||||
|
instance: |
||||
|
service-base-url: http://172.16.12.56:7080 |
||||
|
kafka: |
||||
|
bootstrap-servers: 172.16.12.55:9092,172.16.12.56:9092,172.16.12.57:9092 |
||||
|
producer: |
||||
|
retries: 3 |
||||
|
acks: all |
||||
|
batch-size: 4096 |
||||
|
buffer-memory: 102476800 |
||||
|
key-serializer: org.apache.kafka.common.serialization.StringSerializer |
||||
|
value-serializer: org.apache.kafka.common.serialization.StringSerializer |
||||
|
redis: |
||||
|
host: 172.24.12.126 |
||||
|
port: 6379 |
||||
|
timeout: 10000 |
||||
|
database: 5 |
||||
|
jedis: |
||||
|
pool: |
||||
|
max-active: 8 # 连接池最大连接数(使用负值表示没有限制) |
||||
|
max-wait: 800 # 连接池最大阻塞等待时间(使用负值表示没有限制) |
||||
|
max-idle: 8 # 连接池中的最大空闲连接 |
||||
|
min-idle: 2 # 连接池中的最小空闲连接 |
||||
|
zookeeper: |
||||
|
connection-string: 172.16.12.55:2181,172.16.12.56:2181,172.16.12.57:2181 |
||||
|
publish-node: /analyze |
||||
|
|
||||
|
|
||||
|
logging: |
||||
|
file: |
||||
|
path: ./logs |
||||
|
|
||||
|
management: |
||||
|
endpoints: |
||||
|
web: |
||||
|
exposure: |
||||
|
include: "*" |
||||
|
endpoint: |
||||
|
health: |
||||
|
show-details: always |
||||
|
|
||||
|
send: |
||||
|
topic: analyze |
||||
|
|
||||
|
file: |
||||
|
path: /opt/analyze/apps/formHandler/file/ |
||||
|
|
||||
|
thread: |
||||
|
handler: 1 |
||||
|
send: 1 |
@ -0,0 +1,36 @@ |
|||||
|
<configuration> |
||||
|
<!-- 属性文件:在properties文件中找到对应的配置项 --> |
||||
|
<springProperty scope="context" name="logging.file.path" source="logging.file.path"/> |
||||
|
<springProperty scope="context" name="logging.level" source="logging.level"/> |
||||
|
<!-- 默认的控制台日志输出,一般生产环境都是后台启动,这个没太大作用 --> |
||||
|
<appender name="STDOUT" |
||||
|
class="ch.qos.logback.core.ConsoleAppender"> |
||||
|
<encoder class="ch.qos.logback.classic.encoder.PatternLayoutEncoder"> |
||||
|
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %line %-5level %logger{50} - %msg%n</pattern> |
||||
|
</encoder> |
||||
|
</appender> |
||||
|
|
||||
|
<appender name="GLMAPPER-LOGGERONE" |
||||
|
class="ch.qos.logback.core.rolling.RollingFileAppender"> |
||||
|
<append>true</append> |
||||
|
<filter class="ch.qos.logback.classic.filter.ThresholdFilter"> |
||||
|
<level>${logging.level}</level> |
||||
|
</filter> |
||||
|
<file> |
||||
|
${logging.file.path}/formHandler.log |
||||
|
</file> |
||||
|
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy"> |
||||
|
<FileNamePattern>${logging.file.path}/formHandler.log.%d{yyyy-MM-dd}</FileNamePattern> |
||||
|
<MaxHistory>3</MaxHistory> |
||||
|
</rollingPolicy> |
||||
|
<encoder class="ch.qos.logback.classic.encoder.PatternLayoutEncoder"> |
||||
|
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %line %-5level %logger{50} - %msg%n</pattern> |
||||
|
<charset>UTF-8</charset> |
||||
|
</encoder> |
||||
|
</appender> |
||||
|
|
||||
|
<root level="info"> |
||||
|
<appender-ref ref="GLMAPPER-LOGGERONE"/> |
||||
|
<appender-ref ref="STDOUT"/> |
||||
|
</root> |
||||
|
</configuration> |
Write
Preview
Loading…
Cancel
Save
Reference in new issue