Browse Source

pdf解析ocr版本代码控制

master
maojian 3 months ago
commit
88f57ab003
  1. 40
      .classpath
  2. 23
      .project
  3. 4
      .settings/org.eclipse.core.resources.prefs
  4. 9
      .settings/org.eclipse.jdt.core.prefs
  5. 4
      .settings/org.eclipse.m2e.core.prefs
  6. 265
      pom.xml
  7. 21
      src/main/java/com/bw/ocr/Application.java
  8. 35
      src/main/java/com/bw/ocr/cache/ConfigCache.java
  9. 39
      src/main/java/com/bw/ocr/controller/TaskReceiveController.java
  10. 64
      src/main/java/com/bw/ocr/entity/Constants.java
  11. 179
      src/main/java/com/bw/ocr/handler/MainHandler.java
  12. 19
      src/main/java/com/bw/ocr/service/OcrTaskService.java
  13. 17
      src/main/java/com/bw/ocr/service/TaskReceiveService.java
  14. 154
      src/main/java/com/bw/ocr/service/impl/OcrTaskServiceImpl.java
  15. 55
      src/main/java/com/bw/ocr/service/impl/TaskReceiveServiceImpl.java
  16. 48
      src/main/java/com/bw/ocr/utils/DataUtil.java
  17. 177
      src/main/java/com/bw/ocr/utils/DateUtil.java
  18. 1003
      src/main/java/com/bw/ocr/utils/DownLoadUtil.java
  19. 27
      src/main/java/com/bw/ocr/utils/EncryptionUtil.java
  20. 41
      src/main/java/com/bw/ocr/utils/FileUtil.java
  21. 53
      src/main/java/com/bw/ocr/utils/GPTResultParseUtil.java
  22. 32
      src/main/java/com/bw/ocr/utils/JsonUtil.java
  23. 33
      src/main/java/com/bw/ocr/utils/OtherUtils.java
  24. 18
      src/main/java/com/bw/ocr/utils/QueueUtil.java
  25. 46
      src/main/java/com/bw/ocr/utils/SpringBootKafka.java
  26. 23
      src/main/java/com/bw/ocr/utils/ThrowMessageUtil.java
  27. 101
      src/main/resources/application.yml
  28. 36
      src/main/resources/logback-spring.xml
  29. 38
      src/test/java/com/bw/baidu_dcoparse_ocr/AppTest.java

40
.classpath

@ -0,0 +1,40 @@
<?xml version="1.0" encoding="UTF-8"?>
<classpath>
<classpathentry kind="src" output="target/classes" path="src/main/java">
<attributes>
<attribute name="optional" value="true"/>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry excluding="**" kind="src" output="target/classes" path="src/main/resources">
<attributes>
<attribute name="maven.pomderived" value="true"/>
<attribute name="optional" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="src" output="target/test-classes" path="src/test/java">
<attributes>
<attribute name="optional" value="true"/>
<attribute name="maven.pomderived" value="true"/>
<attribute name="test" value="true"/>
</attributes>
</classpathentry>
<classpathentry excluding="**" kind="src" output="target/test-classes" path="src/test/resources">
<attributes>
<attribute name="maven.pomderived" value="true"/>
<attribute name="test" value="true"/>
<attribute name="optional" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.8">
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="output" path="target/classes"/>
</classpath>

23
.project

@ -0,0 +1,23 @@
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>baidu_dcoparse_ocr</name>
<comment></comment>
<projects>
</projects>
<buildSpec>
<buildCommand>
<name>org.eclipse.jdt.core.javabuilder</name>
<arguments>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.m2e.core.maven2Builder</name>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.eclipse.jdt.core.javanature</nature>
<nature>org.eclipse.m2e.core.maven2Nature</nature>
</natures>
</projectDescription>

4
.settings/org.eclipse.core.resources.prefs

@ -0,0 +1,4 @@
eclipse.preferences.version=1
encoding//src/main/java=UTF-8
encoding//src/test/java=UTF-8
encoding/<project>=UTF-8

9
.settings/org.eclipse.jdt.core.prefs

@ -0,0 +1,9 @@
eclipse.preferences.version=1
org.eclipse.jdt.core.compiler.codegen.methodParameters=generate
org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8
org.eclipse.jdt.core.compiler.compliance=1.8
org.eclipse.jdt.core.compiler.problem.enablePreviewFeatures=disabled
org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
org.eclipse.jdt.core.compiler.problem.reportPreviewFeatures=ignore
org.eclipse.jdt.core.compiler.release=disabled
org.eclipse.jdt.core.compiler.source=1.8

4
.settings/org.eclipse.m2e.core.prefs

@ -0,0 +1,4 @@
activeProfiles=
eclipse.preferences.version=1
resolveWorkspaceProjects=true
version=1

265
pom.xml

@ -0,0 +1,265 @@
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>2.2.4.RELEASE</version>
</parent>
<groupId>com.bw</groupId>
<artifactId>baidu_dcoparse_ocr</artifactId>
<version>0.0.1-SNAPSHOT</version>
<packaging>jar</packaging>
<name>baidu_dcoparse_ocr</name>
<url>http://maven.apache.org</url>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
</properties>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<!--
https://mvnrepository.com/artifact/de.codecentric/spring-boot-admin-starter-client -->
<dependency>
<groupId>de.codecentric</groupId>
<artifactId>spring-boot-admin-starter-client</artifactId>
<version>2.2.4</version>
</dependency>
<dependency>
<groupId>com.google.code.gson</groupId>
<artifactId>gson</artifactId>
<version>2.8.8</version>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-test</artifactId>
</dependency>
<!-- https://mvnrepository.com/artifact/org.springframework/spring-test -->
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-test</artifactId>
<version>5.0.10.RELEASE</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>1.4</version>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>2.0.17</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.mchange/c3p0 -->
<dependency>
<groupId>com.mchange</groupId>
<artifactId>c3p0</artifactId>
<version>0.9.5.5</version>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>8.0.29</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.squareup.okhttp3/okhttp -->
<dependency>
<groupId>com.squareup.okhttp3</groupId>
<artifactId>okhttp</artifactId>
<version>4.9.3</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.3</version>
</dependency>
<dependency>
<groupId>commons-lang</groupId>
<artifactId>commons-lang</artifactId>
<version>2.6</version>
</dependency>
<!--
https://mvnrepository.com/artifact/org.jetbrains.kotlin/kotlin-reflect -->
<dependency>
<groupId>org.jetbrains.kotlin</groupId>
<artifactId>kotlin-reflect</artifactId>
<version>1.6.21</version>
<scope>runtime</scope>
</dependency>
<!-- https://mvnrepository.com/artifact/org.jsoup/jsoup -->
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.8.1</version>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.kafka</groupId>
<artifactId>spring-kafka</artifactId>
</dependency>
<!-- <dependency> <groupId>org.springframework.kafka</groupId>
<artifactId>spring-kafka</artifactId>
</dependency> -->
<dependency>
<groupId>cn.hutool</groupId>
<artifactId>hutool-all</artifactId>
<version>5.8.5</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
</dependency>
<!-- https://mvnrepository.com/artifact/p6spy/p6spy -->
<dependency>
<groupId>p6spy</groupId>
<artifactId>p6spy</artifactId>
<version>3.9.0</version>
</dependency>
<dependency>
<groupId>commons-collections</groupId>
<artifactId>commons-collections</artifactId>
<version>3.2.2</version>
</dependency>
<dependency>
<groupId>org.elasticsearch.client</groupId>
<artifactId>transport</artifactId>
<version>6.0.0</version>
</dependency>
<dependency>
<groupId>org.elasticsearch</groupId>
<artifactId>elasticsearch</artifactId>
<version>6.0.0</version>
</dependency>
<dependency>
<groupId>org.elasticsearch.plugin</groupId>
<artifactId>transport-netty4-client</artifactId>
<version>6.0.0</version>
</dependency>
<dependency>
<groupId>com.auth0</groupId>
<artifactId>java-jwt</artifactId>
<version>4.2.2</version>
</dependency>
<dependency>
<groupId>com.bfd.util</groupId>
<artifactId>pauseTool</artifactId>
<version>1.0</version>
</dependency>
</dependencies>
<build>
<!-- <pluginManagement> --><!-- lock down plugins versions to avoid using Maven defaults (may be
moved
to parent pom) -->
<plugins>
<!-- clean lifecycle, see
https://maven.apache.org/ref/current/maven-core/lifecycles.html#clean_Lifecycle -->
<plugin>
<artifactId>maven-clean-plugin</artifactId>
<version>3.1.0</version>
</plugin>
<!-- default lifecycle, jar packaging: see
https://maven.apache.org/ref/current/maven-core/default-bindings.html#Plugin_bindings_for_jar_packaging -->
<plugin>
<artifactId>maven-resources-plugin</artifactId>
<version>3.0.2</version>
</plugin>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.8.0</version>
</plugin>
<plugin>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.22.1</version>
</plugin>
<plugin>
<artifactId>maven-jar-plugin</artifactId>
<version>3.0.2</version>
</plugin>
<plugin>
<artifactId>maven-install-plugin</artifactId>
<version>2.5.2</version>
</plugin>
<plugin>
<artifactId>maven-deploy-plugin</artifactId>
<version>2.8.2</version>
</plugin>
<!-- site lifecycle, see
https://maven.apache.org/ref/current/maven-core/lifecycles.html#site_Lifecycle -->
<plugin>
<artifactId>maven-site-plugin</artifactId>
<version>3.7.1</version>
</plugin>
<plugin>
<artifactId>maven-project-info-reports-plugin</artifactId>
<version>3.0.0</version>
</plugin>
<!-- spring-boot-maven-plugin插件就是打包spring boot应用的 -->
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
<configuration>
<mainClass>com.bw.ocr.Application</mainClass>
<layout>ZIP</layout>
<includes>
<include>
<groupId>${project.groupId}</groupId>
<artifactId>${project.artifactId}</artifactId>
</include>
</includes>
</configuration>
<executions>
<execution>
<goals>
<goal>repackage</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<version>3.1.1</version>
<executions>
<execution>
<id>copy</id>
<phase>package</phase>
<goals>
<goal>copy-dependencies</goal>
</goals>
<configuration>
<type>jar</type>
<includeTypes>jar</includeTypes>
<includeScope>runtime</includeScope>
<outputDirectory>${project.build.directory}/libs</outputDirectory>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
<!-- </pluginManagement> -->
</build>
</project>

21
src/main/java/com/bw/ocr/Application.java

@ -0,0 +1,21 @@
package com.bw.ocr;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.kafka.annotation.EnableKafka;
/**
* @author jian.mao
* @date 2025年1月13日
* @description
*/
@SpringBootApplication
@EnableKafka
public class Application {
public static void main(String[] args) {
SpringApplication.run(Application.class, args);
}
}

35
src/main/java/com/bw/ocr/cache/ConfigCache.java

@ -0,0 +1,35 @@
package com.bw.ocr.cache;
import lombok.extern.slf4j.Slf4j;
import java.util.Map;
import java.util.concurrent.LinkedBlockingDeque;
/**
* @author jian.mao
* @date 2022年11月11日
* @description 静态变量类
*/
@Slf4j
public class ConfigCache {
/**启动条件**/
public static boolean isStart = true;
/*****任务队列*****/
public static LinkedBlockingDeque<Map<String, Object>> taskQueue = new LinkedBlockingDeque<Map<String,Object>>();
/**
* 队列录入任务
* @param queue
* @param task
*/
public static void putQueue(LinkedBlockingDeque<Map<String, Object>> queue,Map<String, Object> task){
//next app 写入队列准备调出
try {
queue.put(task);
} catch (InterruptedException e) {
log.error("队列写入data失败---");
}
}
}

39
src/main/java/com/bw/ocr/controller/TaskReceiveController.java

@ -0,0 +1,39 @@
package com.bw.ocr.controller;
import javax.annotation.Resource;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestBody;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestMethod;
import org.springframework.web.bind.annotation.ResponseBody;
import com.bw.ocr.service.TaskReceiveService;
import lombok.extern.slf4j.Slf4j;
/**
* 任务接收控制层
* @author jian.mao
* @date 2025年1月14日
* @description
*/
@Controller
@RequestMapping("/task")
@Slf4j
public class TaskReceiveController {
@Resource
private TaskReceiveService taskReceiveService;
@PostMapping("/put")
@ResponseBody
public String put(@RequestBody String dataJson){
String response = taskReceiveService.put(dataJson);
return response;
}
@RequestMapping(value = "/hello", method = RequestMethod.GET)
@ResponseBody
public String hello(String param, String token) {
return "123";
}
}

64
src/main/java/com/bw/ocr/entity/Constants.java

@ -0,0 +1,64 @@
package com.bw.ocr.entity;
/**
* 常量实体类
* @author jian.mao
* @date 2022年11月15日
* @description
*/
public class Constants {
/*************************蓝图常量key名称*********************************/
public final static String ID = "id";
public final static String SCENES_ID = "scenes_id";
/*************************app常量key名称*********************************/
public final static String VERSION = "version";
/*************************other类型*********************************/
public static final String EMPTY = "";
public static final String MAP_TYPE = "Map";
public static final String LIST_TYPE = "List";
public static final String STRING_TYPE = "String";
public static final String RESULTS = "results";
public static final String RESULT = "result";
public static final String STATUS = "status";
/************************redis*************************************/
public static final String LOCK_KEY = "myLock";
public static final long LOCK_EXPIRE_TIME = 300000;
/************************应用参数*************************************/
public static final String CODE = "code";
public static final String MESSAGE = "message";
public static final String INPUT = "input";
public static final String OUTPUT = "output";
public static final String FORM = "form";
public static final String FIELD = "field";
public static final String VALUE = "value";
public static final String DATA = "data";
public static final String COLON_EN = ":";
/******************************admin*******************************/
public static final String CONTENT = "content";
public static final String GRANT_TYPE = "grant_type";
public static final String CLIENT_CREDENTIALS = "client_credentials";
public static final String CLIENT_SECRET = "client_secret";
public static final String CLIENT_ID = "client_id";
public static final String ACCESS_TOKEN = "access_token";
public static final String APIKEY = "apiKey";
public static final String SECRETKEY = "secretKey";
public static final String TOKENURL = "tokenUrl";
public static final String ISLAST = "isLast";
public static final String TRACE = "trace";
public static final String PARSE_FIAL = "解析失败";
public static final String FILEURL = "fileUrl";
public static final String CREATEURL = "createUrl";
public static final String PDF_FILE = "pdf_file";
public static final String PDF_FILE_NUM = "pdf_file_num";
public static final String PDF_FILE_SIZE = "pdf_file_size";
public static final String SUCCESS = "success";
public static final String FAILED = "failed";
public static final String WORDS_RESULT = "words_result";
public static final String WORDS = "words";
}

179
src/main/java/com/bw/ocr/handler/MainHandler.java

@ -0,0 +1,179 @@
package com.bw.ocr.handler;
import java.io.File;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.concurrent.LinkedBlockingDeque;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import javax.annotation.Resource;
import org.apache.commons.io.FileUtils;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.boot.ApplicationArguments;
import org.springframework.boot.ApplicationRunner;
import org.springframework.core.annotation.Order;
import org.springframework.data.redis.core.StringRedisTemplate;
import org.springframework.stereotype.Component;
import com.alibaba.fastjson.JSONObject;
import com.bfd.util.PauseTool;
import com.bw.ocr.cache.ConfigCache;
import com.bw.ocr.service.OcrTaskService;
import com.bw.ocr.utils.DateUtil;
import com.bw.ocr.utils.FileUtil;
import lombok.extern.slf4j.Slf4j;
/**
* @author jian.mao
* @date 2025年1月13日
* @description
*/
@Component
@Order(value = 1)
@Slf4j
public class MainHandler implements ApplicationRunner {
@Value("${task.task-queue-path}")
private String taskPath;
@Value("${zookeeper.connection-string}")
private String connectionString;
@Value("${zookeeper.publish-node}")
private String nodePath;
@Resource
private StringRedisTemplate stringRedisTemplate;
@Autowired
private OcrTaskService ocrTaskService;
/***线程池参数***/
@Value("${threadPool.corePoolSize}")
private int corePoolSize;
@Value("${threadPool.maximumPoolSize}")
private int maximumPoolSize;
@Value("${threadPool.keepAliveTime}")
private long keepAliveTime;
@Value("${threadPool.queueSize}")
private int queueSize;
/**
*执行入口
*/
@Override
public void run(ApplicationArguments args) throws Exception {
PauseTool pauseTool = new PauseTool();
pauseTool.initializeRedisCache(stringRedisTemplate);
pauseTool.setupZookeeperListener(connectionString, nodePath);
//线程池方式
ThreadPoolExecutor executor = new ThreadPoolExecutor(
corePoolSize,
maximumPoolSize,
keepAliveTime,
TimeUnit.SECONDS,
new LinkedBlockingQueue<>(queueSize),
new ThreadPoolExecutor.CallerRunsPolicy()
);
//消费创建任务队列数据
Thread consumerThread = new Thread(() -> {
while (true) {
try {
// 从队列中获取任务
Map<String, Object> task = ConfigCache.taskQueue.take();
// 提交给线程池执行
executor.execute(() -> parse(task));
} catch (InterruptedException e) {
// 恢复中断状态
Thread.currentThread().interrupt();
log.error("任务消费线程被中断");
break;
}
}
});
consumerThread.start();
log.info("任务消费线程启动-----");
//启动加载缓存任务
readTask(taskPath, ConfigCache.taskQueue);
//停止处理
waitDown();
}
/**
* 创建任务执行方法
* @param task
*/
private void parse(Map<String, Object> task) {
ocrTaskService.parse(task);
}
/****************************************************************load******************************************************************************/
/**
* 加载文件中的任务
* @param path 文件地址
* @param queue 队列
*/
@SuppressWarnings("unchecked")
public static void readTask(String path, LinkedBlockingDeque<Map<String, Object>> queue) {
File file = new File(path);
if (file.exists()) {
List<String> tasks = null;
try {
tasks = FileUtils.readLines(file, "UTF-8");
} catch (IOException e) {
e.printStackTrace();
}
for (String taskStr : tasks) {
Map<String, Object> task = JSONObject.parseObject(taskStr);
try {
queue.put(task);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
file.delete();
}
}
/*******************************************************************stop************************************************************************/
/**
* 结束触发钩子
*/
public void waitDown() {
Runtime.getRuntime().addShutdownHook(new Thread() {
@Override
public void run() {
// 停止线程
ConfigCache.isStart = false;
log.info("stop-------");
writeTsskToFile();
}
});
}
/**
* 任务持久化到硬盘
*/
public void writeTsskToFile() {
while (true) {
if (ConfigCache.taskQueue.size() > 0) {
try {
Map<String, Object> task = ConfigCache.taskQueue.take();
FileUtil.writeFile(taskPath, JSONObject.toJSONString(task));
} catch (InterruptedException e) {
e.printStackTrace();
}
} else {
log.info("taskQueue write is file end");
break;
}
}
}
}

19
src/main/java/com/bw/ocr/service/OcrTaskService.java

@ -0,0 +1,19 @@
package com.bw.ocr.service;
import java.util.Map;
/**
* ocr识别处理接口
* @author jian.mao
* @date 2025年2月18日
* @description
*/
public interface OcrTaskService {
/**
* ocr远端任务
* @param task
*/
public void parse(Map<String, Object> task);
}

17
src/main/java/com/bw/ocr/service/TaskReceiveService.java

@ -0,0 +1,17 @@
package com.bw.ocr.service;
/**
* 任务接收服务层
* @author jian.mao
* @date 2025年1月14日
* @description
*/
public interface TaskReceiveService {
/**
* 任务新增
* @param dataJson
* @return
*/
public String put(String dataJson);
}

154
src/main/java/com/bw/ocr/service/impl/OcrTaskServiceImpl.java

@ -0,0 +1,154 @@
package com.bw.ocr.service.impl;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.Base64;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;
import com.alibaba.fastjson.JSONObject;
import com.bfd.util.PauseTool;
import com.bw.ocr.cache.ConfigCache;
import com.bw.ocr.entity.Constants;
import com.bw.ocr.service.OcrTaskService;
import com.bw.ocr.utils.DataUtil;
import com.bw.ocr.utils.DownLoadUtil;
import com.bw.ocr.utils.FileUtil;
import com.bw.ocr.utils.SpringBootKafka;
import lombok.extern.slf4j.Slf4j;
/**
* ocr执行实现类
* @author jian.mao
* @date 2025年2月18日
* @description
*/
@Service
@Slf4j
public class OcrTaskServiceImpl implements OcrTaskService {
@Autowired
private SpringBootKafka springBootKafka;
@Value("${customize-kafka.producer.topic}")
private String topic;
@Value("${file.path-prefix}")
private String downloadFilePathPrefix;
@Override
public void parse(Map<String, Object> task) {
// TODO Auto-generated method stub
Map<String, Object> data = (Map<String, Object>) task.get(Constants.DATA);
Map<String, Object> input = (Map<String, Object>) task.get(Constants.INPUT);
int scenesId = (int) task.get(Constants.SCENES_ID);
int version = (int) task.get(Constants.VERSION);
String pauseKey = scenesId + "_" + version;
if (!PauseTool.CACHE.containsKey(pauseKey)) {
log.info("流程:{}的版本:{}已失效,任务跳过", scenesId, version);
return;
}
try {
//源文件链接
String fileUrl = DataUtil.getValue(input.get(Constants.FILEURL).toString(), data).toString();
//下载源文件
String format = fileUrl.replaceAll(".*\\.", Constants.EMPTY);
String fileName = UUID.randomUUID().toString() + "." + format;
String downloadFilePath = downloadFilePathPrefix + fileName;
DownLoadUtil.downloadFile(fileUrl, downloadFilePath);
//加载文件以base64编码
String fileContent = encodeFileToBase64(downloadFilePath);
//删除文件
FileUtil.delFile(downloadFilePath);
String token = getToken(input);
String createUrl = (String) input.get(Constants.CREATEURL);
String url = createUrl + token;
Map<String, Object> param = new HashMap<String, Object>(16);
param.put(Constants.PDF_FILE, fileContent);
//最大页码数
int maxpagenum = 1;
StringBuffer content = new StringBuffer();
for (int pagenum = 1; pagenum <= maxpagenum; pagenum++) {
log.info("scenesId:{},第{}页解析",scenesId,pagenum);
param.put(Constants.PDF_FILE_NUM, pagenum);
String resStr = DownLoadUtil.doPostFrom(url, param);
JSONObject res = JSONObject.parseObject(resStr);
maxpagenum = (int) res.get(Constants.PDF_FILE_SIZE);
List<Map<String, Object>> wordsResult = (List<Map<String, Object>>) res.get(Constants.WORDS_RESULT);
for (Map<String, Object> map : wordsResult) {
content.append(map.get(Constants.WORDS));
}
}
Map<String, Object> result = new HashMap<String, Object>(16);
Map<String, Object> results = new HashMap<String, Object>(16);
//遍历入库返回结果拼接响应内容
results.put(Constants.ISLAST, 1);
results.put(Constants.CONTENT, content.toString());
results.put(Constants.ID, UUID.randomUUID().toString());
result.put(Constants.RESULTS, JSONObject.toJSONString(results));
result.put(Constants.MESSAGE, "成功");
result.put(Constants.STATUS, 1);
task.put(Constants.RESULT, result);
//发送kafka
springBootKafka.send(topic, JSONObject.toJSONString(task));
log.info("数据流转至下游-------");
} catch (Exception e) {
// TODO: handle exception
log.error("创建文档解析任务异常。e:",e);
Map<String, Object> result = new HashMap<String, Object>(16);
Map<String, Object> results = new HashMap<String, Object>(16);
//遍历入库返回结果拼接响应内容
results.put(Constants.ISLAST, 1);
results.put(Constants.CONTENT, Constants.PARSE_FIAL);
results.put(Constants.ID, UUID.randomUUID().toString());
result.put(Constants.RESULTS, JSONObject.toJSONString(results));
result.put(Constants.MESSAGE, Constants.PARSE_FIAL);
result.put(Constants.STATUS, 2);
task.put(Constants.RESULT, result);
//发送kafka
springBootKafka.send(topic, JSONObject.toJSONString(task));
log.info("数据流转至下游-------");
}
}
/**
* 获取token
* @param task
* @return
*/
@SuppressWarnings("unchecked")
private String getToken(Map<String, Object> input) {
String apiKey = (String) input.get(Constants.APIKEY);
String secretKey = (String) input.get(Constants.SECRETKEY);
String tokenUrl = (String) input.get(Constants.TOKENURL);
Map<String, Object> param = new HashMap<String, Object>(16);
param.put(Constants.GRANT_TYPE, Constants.CLIENT_CREDENTIALS);
param.put(Constants.CLIENT_ID, apiKey);
param.put(Constants.CLIENT_SECRET, secretKey);
String resStr = DownLoadUtil.doPostFrom(tokenUrl, param);
JSONObject res = JSONObject.parseObject(resStr);
String accessToken = (String) res.get(Constants.ACCESS_TOKEN);
return accessToken;
}
/**
* 读取文件 base64格式
* @param filePath 文件地址
* @return
* @throws IOException
*/
private String encodeFileToBase64(String filePath) throws IOException {
byte[] fileContent = Files.readAllBytes(Paths.get(filePath));
return Base64.getEncoder().encodeToString(fileContent);
}
}

55
src/main/java/com/bw/ocr/service/impl/TaskReceiveServiceImpl.java

@ -0,0 +1,55 @@
package com.bw.ocr.service.impl;
import java.util.HashMap;
import java.util.Map;
import org.springframework.stereotype.Service;
import com.alibaba.fastjson.JSONObject;
import com.bw.ocr.cache.ConfigCache;
import com.bw.ocr.entity.Constants;
import com.bw.ocr.service.TaskReceiveService;
import lombok.extern.slf4j.Slf4j;
/**
* 任务接收服务层实现类
* @author jian.mao
* @date 2025年1月14日
* @description
*/
@Service
@Slf4j
public class TaskReceiveServiceImpl implements TaskReceiveService {
@Override
public String put(String dataJson) {
Map<String, Object> response = new HashMap<>(16);
int code = 200;
String message = "success";
Map<String, Object> task = null;
try {
task = JSONObject.parseObject(dataJson);
} catch (Exception e) {
log.error("参数结构不合法,", e);
code = 100010;
message = "参数不合法";
}
// 写入队列
try {
if(task.containsKey(Constants.TRACE) && (boolean)task.get(Constants.TRACE)){
ConfigCache.taskQueue.putFirst(task);
}else{
ConfigCache.taskQueue.put(task);
}
} catch (InterruptedException e) {
log.error("任务写入队列异常,", e);
code = 100011;
message = "任务写入队列失败";
}
response.put(Constants.CODE, code);
response.put(Constants.MESSAGE, message);
return JSONObject.toJSONString(response);
}
}

48
src/main/java/com/bw/ocr/utils/DataUtil.java

@ -0,0 +1,48 @@
package com.bw.ocr.utils;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import com.alibaba.fastjson.JSONPath;
import java.util.Map;
/**
* @author:jinming
* @className:DataUtil
* @version:1.0
* @description: 获取dataValue的值
* @Date:2023/11/1 9:54
*/
public class DataUtil {
/**
*
* @param key 传入的key
* @param dataMap 数据map
* @return 根据传入的参数进行判断解析返回正确的dataValue
*/
public static Object getValue(String key, Map dataMap) {
Object dataValue;
String isJson = "#json#";
if (key.contains(isJson)) {
//进行第一次拆分获取#json#前面的部分
String[] keySplit = key.split(isJson);
String firstDataKey = keySplit[0];
String[] firstDataKeySplit = firstDataKey.split(":");
//取出前半部分对应的JSON数据并转换为JSONObject
String dataJson = (String) dataMap.get(firstDataKeySplit[0]);
JSONObject dataJsonObject = JSON.parseObject(dataJson);
//根据key的后半部分取出对应JSONObject中的值
String firstDataKeyJson = (String) JSONPath.eval(dataJsonObject, firstDataKeySplit[1]);
String secDataKey = keySplit[1];
JSONObject firstDataJsonObject = JSON.parseObject(firstDataKeyJson);
dataValue = JSONPath.eval(firstDataJsonObject, secDataKey);
return dataValue;
}
String[] keySplit = key.split(":");
String jsonPath = keySplit[1];
String dataJson = (String) dataMap.get(keySplit[0]);
JSONObject dataJsonObject = JSON.parseObject(dataJson);
dataValue = JSONPath.eval(dataJsonObject, jsonPath);
return dataValue;
}
}

177
src/main/java/com/bw/ocr/utils/DateUtil.java

@ -0,0 +1,177 @@
package com.bw.ocr.utils;
import java.math.BigInteger;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.util.Date;
import lombok.extern.slf4j.Slf4j;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
/**
* 日期工具类
*
* @author jian.mao
* @date 2022年11月15日
* @description
*/
@Slf4j
public class DateUtil {
/**
* @return
*/
public static String getTimeStrForNow() {
SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddHH");
return sdf.format(new Date());
}
public static String getTimeStrForDay(long time) {
SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd");
return sdf.format(new Date(time * 1000));
}
public static String getTimeStrForDay() {
SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd");
return sdf.format(new Date());
}
public static String getDateTime() {
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
String time = sdf.format(new Date());
return time;
}
public static String getDateTime(Long timestap) {
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
String time = sdf.format(new Date(timestap));
return time;
}
public static String getDate(Long timestap) {
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
String time = sdf.format(new Date(timestap));
return time;
}
public static String getDateTimeForMonth() {
SimpleDateFormat sdf = new SimpleDateFormat("yyyyMM");
String time = sdf.format(new Date());
return time;
}
/**
* 休眠
*
* @param millis 毫秒
*/
public static void sleep(long millis) {
try {
Thread.sleep(millis);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
/**
* 1. @Description:时间戳转时间
* 2. @Author: ying.zhao
* 3. @Date: 2023/3/28
*/
public static String timestampToDate(String time) {
int thirteen = 13;
int ten = 10;
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
// if (time.length() == thirteen) {
if (time.length() > ten) {
return sdf.format(new Date(Long.parseLong(time)));
} else {
return sdf.format(new Date(Integer.parseInt(time) * 1000L));
}
}
public static String parseCreated(String jsonTime){
String formattedDateTime = getDateTime();
try {
// 使用fastjson解析JSON数据
JSONObject jsonObject = JSON.parseObject(jsonTime);
// 获取日期和时间的值
JSONObject dateObject = jsonObject.getJSONObject("date");
int day = dateObject.getIntValue("day");
int month = dateObject.getIntValue("month");
int year = dateObject.getIntValue("year");
JSONObject timeObject = jsonObject.getJSONObject("time");
int hour = timeObject.getIntValue("hour");
int minute = timeObject.getIntValue("minute");
int second = timeObject.getIntValue("second");
// 创建LocalDateTime对象
LocalDateTime dateTime = LocalDateTime.of(year, month, day, hour, minute, second);
// 定义日期时间格式化器
DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
// 格式化日期时间
formattedDateTime = dateTime.format(formatter);
} catch (Exception e) {
log.info("日期转换失败:{}",e);
}
return formattedDateTime;
}
/**
* 字符串转换日期
* @param format
* @param date
* @return
*/
public static Date strToDate(String format,String date){
SimpleDateFormat sdf = new SimpleDateFormat(format);
if (date == null || date.equals("")){
return new Date();
}else{
Date ru = null;
try {
ru = sdf.parse(date);
} catch (ParseException e) {
e.printStackTrace();
}
return ru;
}
}
/**
* 日期格式话
* @param format 日期格式
* @param dater 要转换的日期,默认当前时间
* @return
*/
public static String FormatDate(String format,Date date){
String fromatDate = null;
SimpleDateFormat sdf = new SimpleDateFormat(format);
if (date == null){
fromatDate = sdf.format(new Date());
}else{
fromatDate = sdf.format(date);
}
return fromatDate;
}
public static void main(String[] args) {
String time = timestampToDate("955814400000");
System.out.println(time);
}
}

1003
src/main/java/com/bw/ocr/utils/DownLoadUtil.java
File diff suppressed because it is too large
View File

27
src/main/java/com/bw/ocr/utils/EncryptionUtil.java

@ -0,0 +1,27 @@
package com.bw.ocr.utils;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
/**
* @author jian.mao
* @date 2023年3月10日
* @description
*/
public class EncryptionUtil {
public static String md5(String text) {
try {
MessageDigest md = MessageDigest.getInstance("MD5");
md.update(text.getBytes());
byte[] bytes = md.digest();
StringBuilder sb = new StringBuilder();
for (byte b : bytes) {
sb.append(String.format("%02x", b & 0xff));
}
return sb.toString();
} catch (NoSuchAlgorithmException e) {
e.printStackTrace();
return null;
}
}
}

41
src/main/java/com/bw/ocr/utils/FileUtil.java

@ -0,0 +1,41 @@
package com.bw.ocr.utils;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
/**
* 文件工具类
* @author jian.mao
* @date 2023年7月14日
* @description
*/
public class FileUtil {
/**
* 数据写入文件
* @param Path 文件路径
* @param result 数据
* @throws IOException
*/
public static void writeFile(String path,String result){
try {
FileWriter fw = new FileWriter(path,true);
fw.write(result+"\n");
fw.flush();
fw.close();
} catch (Exception e) {
e.printStackTrace();
}
}
public static void delFile(String path) {
try {
File file = new File(path);
file.delete();
} catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
}
}
}

53
src/main/java/com/bw/ocr/utils/GPTResultParseUtil.java

@ -0,0 +1,53 @@
package com.bw.ocr.utils;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import com.alibaba.fastjson.JSONException;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* @author:jinming
* @className:GPTResultParseUtil
* @version:1.0
* @description:
* @Date:2024/6/28 10:11
*/
public class GPTResultParseUtil {
public static Map<String, Object> parseGPTResult(Map<String, Object> output, String gptContent) {
Map<String, Object> jsonResult = new HashMap<>();
try {
// 替换```json, ``` \n
String jsonContent = gptContent.replace("```json", "").replace("```", "").replace("\n", "");
JSONObject jsonGPT = JSON.parseObject(jsonContent);
for (String key : output.keySet()) {
if (jsonGPT.containsKey(key)) {
jsonResult.put(key, jsonGPT.get(key));
}
}
return jsonResult;
} catch (JSONException e) {
try {
// 直接解析失败使用正则表达式匹配外层的 {}
Pattern pattern = Pattern.compile("\\{.*\\}", Pattern.DOTALL);
Matcher matcher = pattern.matcher(gptContent.replace("\n", ""));
if (matcher.find()) {
JSONObject jsonGPT = JSON.parseObject(matcher.group());
for (String key : output.keySet()) {
if (jsonGPT.containsKey(key)) {
jsonResult.put(key, jsonGPT.get(key));
}
}
return jsonResult;
} else {
return null;
}
} catch (Exception ex) {
ex.printStackTrace();
return null;
}
}
}
}

32
src/main/java/com/bw/ocr/utils/JsonUtil.java

@ -0,0 +1,32 @@
package com.bw.ocr.utils;
import com.alibaba.fastjson.JSONObject;
import com.bw.ocr.entity.Constants;
/**
* json工具
* @author jian.mao
* @date 2023年7月10日
* @description
*/
public class JsonUtil {
/**
* 校验字符串是list/map/str
* @param jsonString
* @return
*/
public static String checkJsonType(String jsonString) {
try {
JSONObject.parseObject(jsonString);
return Constants.MAP_TYPE;
} catch (Exception e) {
try {
JSONObject.parseArray(jsonString);
return Constants.LIST_TYPE;
} catch (Exception ex) {
return Constants.STRING_TYPE;
}
}
}
}

33
src/main/java/com/bw/ocr/utils/OtherUtils.java

@ -0,0 +1,33 @@
package com.bw.ocr.utils;
import java.security.MessageDigest;
/**
* 其他工具类
* @author jian.mao
* @date 2023年9月19日
* @description
*/
public class OtherUtils {
public static String getMd5(String string) {
try {
MessageDigest md5 = MessageDigest.getInstance("MD5");
byte[] bs = md5.digest(string.getBytes("UTF-8"));
StringBuilder sb = new StringBuilder(40);
for (byte x : bs) {
if ((x & 0xff) >> 4 == 0) {
sb.append("0").append(Integer.toHexString(x & 0xff));
} else {
sb.append(Integer.toHexString(x & 0xff));
}
}
return sb.toString();
} catch (Exception e) {
return "nceaform" + System.currentTimeMillis();
}
}
}

18
src/main/java/com/bw/ocr/utils/QueueUtil.java

@ -0,0 +1,18 @@
package com.bw.ocr.utils;
import java.util.Map;
import java.util.concurrent.LinkedBlockingDeque;
/**
* @author:jinming
* @className:QueueUtil
* @version:1.0
* @description:
* @Date:2023/7/13 15:00
*/
public class QueueUtil {
public static LinkedBlockingDeque<Map<String, Object>> taskQueue = new LinkedBlockingDeque<Map<String, Object>>();
public static LinkedBlockingDeque<String> sendQueue = new LinkedBlockingDeque<String>();
}

46
src/main/java/com/bw/ocr/utils/SpringBootKafka.java

@ -0,0 +1,46 @@
package com.bw.ocr.utils;
import com.alibaba.fastjson.JSONObject;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.kafka.core.KafkaTemplate;
import org.springframework.kafka.support.SendResult;
import org.springframework.stereotype.Component;
import org.springframework.util.concurrent.ListenableFuture;
import org.springframework.util.concurrent.ListenableFutureCallback;
/**
* @PROJECT_NAME: companybusinesscrawl
* @DESCRIPTION:SpringBootKafka 工具类
* @AUTHOR: ying.zhao
* @DATE: 2023/4/6 11:09
*/
@Slf4j
@Component
public class SpringBootKafka {
@Autowired
private KafkaTemplate<String, Object> kafkaTemplate;
/**
* 自定义topicKafkaTemplate
*/
/**
* public static final String TOPIC = "companyBussTest";
**/
public void send(String topic, String message) {
//发送消息
ListenableFuture<SendResult<String, Object>> future = kafkaTemplate.send(topic, message);
future.addCallback(new ListenableFutureCallback<SendResult<String, Object>>() {
@Override
public void onFailure(Throwable throwable) {
//发送失败的处理
log.info(topic + " - 生产者 发送消息失败:" + throwable.getMessage());
}
@Override
public void onSuccess(SendResult<String, Object> stringObjectSendResult) {
//成功的处理
log.info(topic + " - 生产者 发送消息成功" );
}
});
}
}

23
src/main/java/com/bw/ocr/utils/ThrowMessageUtil.java

@ -0,0 +1,23 @@
package com.bw.ocr.utils;
import java.io.PrintWriter;
import java.io.StringWriter;
/**
* @author jian.mao
* @date 2023年3月22日
* @description
*/
public class ThrowMessageUtil {
/**
* 获取异常信息
* @param t
* @return
*/
public static String getErrmessage(Throwable t){
StringWriter stringWriter=new StringWriter();
t.printStackTrace(new PrintWriter(stringWriter,true));
return stringWriter.getBuffer().toString();
}
}

101
src/main/resources/application.yml

@ -0,0 +1,101 @@
logging:
level:
root: info
path: ../logs
server:
port: 8027
servlet:
context-path: /doc_ocr
tomcat:
uri-encoding: utf-8
max-connections: 20000
max-http-form-post-size: 1
max-threads: 1000
spring:
application:
name: 百度文档解析(ocr版本)
kafka:
bootstrap-servers: node-01:19092,node-02:19092,node-03:19092
producer:
retries: 0
#当有多个消息需要被发送到同一个分区时,生产者会把它们放在同一个批次里。该参数指定了一个批次可以使用的内存大小,按照字节数计算。
batch-size: 16384
# 设置生产者内存缓冲区的大小。
buffer-memory: 33554432
# 键的序列化方式
key-serializer: org.apache.kafka.common.serialization.StringSerializer
# 值的序列化方式
value-serializer: org.apache.kafka.common.serialization.StringSerializer
# acks=0 : 生产者在成功写入消息之前不会等待任何来自服务器的响应。
# acks=1 : 只要集群的首领节点收到消息,生产者就会收到一个来自服务器成功响应。
# acks=all :只有当所有参与复制的节点全部收到消息时,生产者才会收到一个来自服务器的成功响应。
acks: 1
consumer:
# 自动提交的时间间隔 在spring boot 2.X 版本中这里采用的是值的类型为Duration 需要符合特定的格式,如1S,1M,2H,5D
auto-commit-interval: 1S
# 该属性指定了消费者在读取一个没有偏移量的分区或者偏移量无效的情况下该作何处理:
# latest(默认值)在偏移量无效的情况下,消费者将从最新的记录开始读取数据(在消费者启动之后生成的记录)
# earliest :在偏移量无效的情况下,消费者将从起始位置读取分区的记录
auto-offset-reset: earliest
# 是否自动提交偏移量,默认值是true,为了避免出现重复数据和数据丢失,可以把它设置为false,然后手动提交偏移量
enable-auto-commit: true
# 键的反序列化方式
key-deserializer: org.apache.kafka.common.serialization.StringDeserializer
# 值的反序列化方式
value-deserializer: org.apache.kafka.common.serialization.StringDeserializer
#消费组
group-id: test4
#消费者并发线程数
concurrency: 4
#超时时间
max-poll-interval-ms: 60000
listener:
# 在侦听器容器中运行的线程数。
#concurrency: 5
#listner负责ack,每调用一次,就立即commit
#ack-mode: manual_immediate
missing-topics-fatal: false
redis:
host: node-01
port: 6379
timeout: 10000
database: 5
jedis:
pool:
max-active: 8 # 连接池最大连接数(使用负值表示没有限制)
max-wait: 800 # 连接池最大阻塞等待时间(使用负值表示没有限制)
max-idle: 8 # 连接池中的最大空闲连接
min-idle: 2 # 连接池中的最小空闲连接
boot:
admin:
client:
url: http://192.168.0.44:8001
instance:
service-base-url: http://192.168.0.44:8019
management:
endpoints:
web:
exposure:
include: "*"
endpoint:
health:
show-details: always
health:
elasticsearch:
enabled: false
zookeeper:
connection-string: node-01:12181,node-02:12181,node-03:12181
publish-node: /analyze
customize-kafka:
producer:
topic: produce_analyze
task:
task-queue-path: ../data/taskQueue.txt
threadPool:
corePoolSize: 5
maximumPoolSize: 20
keepAliveTime: 60
queueSize: 100
file:
path-prefix: ../file/

36
src/main/resources/logback-spring.xml

@ -0,0 +1,36 @@
<configuration>
<!-- 属性文件:在properties文件中找到对应的配置项 -->
<springProperty scope="context" name="logging.path" source="logging.path"/>
<springProperty scope="context" name="logging.level" source="logging.level.com.bfd"/>
<!-- 默认的控制台日志输出,一般生产环境都是后台启动,这个没太大作用 -->
<!--<appender name="STDOUT"
class="ch.qos.logback.core.ConsoleAppender">
<encoder class="ch.qos.logback.classic.encoder.PatternLayoutEncoder">
<Pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %line %-5level %logger{50} - %msg%n</Pattern>
</encoder>
</appender>-->
<appender name="GLMAPPER-LOGGERONE"
class="ch.qos.logback.core.rolling.RollingFileAppender">
<append>true</append>
<filter class="ch.qos.logback.classic.filter.ThresholdFilter">
<level>${logging.level}</level>
</filter>
<file>
${logging.path}/baidu_ocrInfo.log
</file>
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
<FileNamePattern>${logging.path}/baidu_ocrInfo.log.%d{yyyy-MM-dd}</FileNamePattern>
<MaxHistory>7</MaxHistory>
</rollingPolicy>
<encoder class="ch.qos.logback.classic.encoder.PatternLayoutEncoder">
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %line %-5level %logger{50} - %msg%n</pattern>
<charset>UTF-8</charset>
</encoder>
</appender>
<root level="info">
<appender-ref ref="GLMAPPER-LOGGERONE"/>
<!--<appender-ref ref="STDOUT"/>-->
</root>
</configuration>

38
src/test/java/com/bw/baidu_dcoparse_ocr/AppTest.java

@ -0,0 +1,38 @@
package com.bw.baidu_dcoparse_ocr;
import junit.framework.Test;
import junit.framework.TestCase;
import junit.framework.TestSuite;
/**
* Unit test for simple App.
*/
public class AppTest
extends TestCase
{
/**
* Create the test case
*
* @param testName name of the test case
*/
public AppTest( String testName )
{
super( testName );
}
/**
* @return the suite of tests being tested
*/
public static Test suite()
{
return new TestSuite( AppTest.class );
}
/**
* Rigourous Test :-)
*/
public void testApp()
{
assertTrue( true );
}
}
Loading…
Cancel
Save