commit
4d48a8ed53
22 changed files with 1294 additions and 0 deletions
-
40.classpath
-
3.gitignore
-
23.project
-
5.settings/org.eclipse.core.resources.prefs
-
8.settings/org.eclipse.jdt.core.prefs
-
4.settings/org.eclipse.m2e.core.prefs
-
218pom.xml
-
67src/main/java/com/bfd/docconversion/DocConversionApplication.java
-
40src/main/java/com/bfd/docconversion/controller/ApiController.java
-
13src/main/java/com/bfd/docconversion/service/ConversionToPdfService.java
-
46src/main/java/com/bfd/docconversion/service/ProcessService.java
-
97src/main/java/com/bfd/docconversion/service/impl/ConversionToPdfServiceImpl.java
-
37src/main/java/com/bfd/docconversion/util/AsyncConfig.java
-
32src/main/java/com/bfd/docconversion/util/Config.java
-
19src/main/java/com/bfd/docconversion/util/Constants.java
-
39src/main/java/com/bfd/docconversion/util/FileExtensionEnum.java
-
83src/main/java/com/bfd/docconversion/util/KfkUtil.java
-
104src/main/java/com/bfd/docconversion/util/MainHandler.java
-
325src/main/java/com/bfd/docconversion/util/Utils.java
-
40src/main/resources/application.yml
-
38src/main/resources/logback-spring.xml
-
13src/test/java/com/bfd/doc_conversion/DocConversionApplicationTests.java
@ -0,0 +1,40 @@ |
|||
<?xml version="1.0" encoding="UTF-8"?> |
|||
<classpath> |
|||
<classpathentry kind="src" output="target/classes" path="src/main/java"> |
|||
<attributes> |
|||
<attribute name="optional" value="true"/> |
|||
<attribute name="maven.pomderived" value="true"/> |
|||
</attributes> |
|||
</classpathentry> |
|||
<classpathentry excluding="**" kind="src" output="target/classes" path="src/main/resources"> |
|||
<attributes> |
|||
<attribute name="maven.pomderived" value="true"/> |
|||
<attribute name="optional" value="true"/> |
|||
</attributes> |
|||
</classpathentry> |
|||
<classpathentry kind="src" output="target/test-classes" path="src/test/java"> |
|||
<attributes> |
|||
<attribute name="optional" value="true"/> |
|||
<attribute name="maven.pomderived" value="true"/> |
|||
<attribute name="test" value="true"/> |
|||
</attributes> |
|||
</classpathentry> |
|||
<classpathentry excluding="**" kind="src" output="target/test-classes" path="src/test/resources"> |
|||
<attributes> |
|||
<attribute name="maven.pomderived" value="true"/> |
|||
<attribute name="test" value="true"/> |
|||
<attribute name="optional" value="true"/> |
|||
</attributes> |
|||
</classpathentry> |
|||
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.8"> |
|||
<attributes> |
|||
<attribute name="maven.pomderived" value="true"/> |
|||
</attributes> |
|||
</classpathentry> |
|||
<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER"> |
|||
<attributes> |
|||
<attribute name="maven.pomderived" value="true"/> |
|||
</attributes> |
|||
</classpathentry> |
|||
<classpathentry kind="output" path="target/classes"/> |
|||
</classpath> |
@ -0,0 +1,3 @@ |
|||
/target/ |
|||
/logs/ |
|||
/jarlib/ |
@ -0,0 +1,23 @@ |
|||
<?xml version="1.0" encoding="UTF-8"?> |
|||
<projectDescription> |
|||
<name>doc_conversion</name> |
|||
<comment></comment> |
|||
<projects> |
|||
</projects> |
|||
<buildSpec> |
|||
<buildCommand> |
|||
<name>org.eclipse.jdt.core.javabuilder</name> |
|||
<arguments> |
|||
</arguments> |
|||
</buildCommand> |
|||
<buildCommand> |
|||
<name>org.eclipse.m2e.core.maven2Builder</name> |
|||
<arguments> |
|||
</arguments> |
|||
</buildCommand> |
|||
</buildSpec> |
|||
<natures> |
|||
<nature>org.eclipse.jdt.core.javanature</nature> |
|||
<nature>org.eclipse.m2e.core.maven2Nature</nature> |
|||
</natures> |
|||
</projectDescription> |
@ -0,0 +1,5 @@ |
|||
eclipse.preferences.version=1 |
|||
encoding//src/main/java=UTF-8 |
|||
encoding//src/main/resources=UTF-8 |
|||
encoding//src/test/java=UTF-8 |
|||
encoding/<project>=UTF-8 |
@ -0,0 +1,8 @@ |
|||
eclipse.preferences.version=1 |
|||
org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8 |
|||
org.eclipse.jdt.core.compiler.compliance=1.8 |
|||
org.eclipse.jdt.core.compiler.problem.enablePreviewFeatures=disabled |
|||
org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning |
|||
org.eclipse.jdt.core.compiler.problem.reportPreviewFeatures=ignore |
|||
org.eclipse.jdt.core.compiler.release=disabled |
|||
org.eclipse.jdt.core.compiler.source=1.8 |
@ -0,0 +1,4 @@ |
|||
activeProfiles= |
|||
eclipse.preferences.version=1 |
|||
resolveWorkspaceProjects=true |
|||
version=1 |
@ -0,0 +1,218 @@ |
|||
<?xml version="1.0" encoding="UTF-8"?> |
|||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" |
|||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> |
|||
<modelVersion>4.0.0</modelVersion> |
|||
<groupId>com.bfd</groupId> |
|||
<artifactId>doc_conversion</artifactId> |
|||
<version>0.0.1-SNAPSHOT</version> |
|||
<name>docconversion</name> |
|||
<description>docconversion</description> |
|||
<properties> |
|||
<java.version>1.8</java.version> |
|||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> |
|||
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding> |
|||
<spring-boot.version>2.2.4.RELEASE</spring-boot.version> |
|||
</properties> |
|||
<dependencies> |
|||
<dependency> |
|||
<groupId>org.springframework.boot</groupId> |
|||
<artifactId>spring-boot-starter-web</artifactId> |
|||
</dependency> |
|||
|
|||
<dependency> |
|||
<groupId>org.projectlombok</groupId> |
|||
<artifactId>lombok</artifactId> |
|||
<optional>true</optional> |
|||
</dependency> |
|||
<dependency> |
|||
<groupId>org.springframework.boot</groupId> |
|||
<artifactId>spring-boot-starter-test</artifactId> |
|||
<scope>test</scope> |
|||
</dependency> |
|||
<dependency> |
|||
<groupId>org.springframework.boot</groupId> |
|||
<artifactId>spring-boot-starter-test</artifactId> |
|||
<scope>test</scope> |
|||
</dependency> |
|||
<dependency> |
|||
<groupId>com.alibaba.fastjson2</groupId> |
|||
<artifactId>fastjson2</artifactId> |
|||
<version>2.0.12</version> |
|||
</dependency> |
|||
<dependency> |
|||
<groupId>cn.hutool</groupId> |
|||
<artifactId>hutool-all</artifactId> |
|||
<version>5.8.27</version> |
|||
</dependency> |
|||
<dependency> |
|||
<groupId>org.apache.kafka</groupId> |
|||
<artifactId>kafka-clients</artifactId> |
|||
<version>2.7.1</version> |
|||
</dependency> |
|||
<dependency> |
|||
<groupId>com.squareup.okhttp3</groupId> |
|||
<artifactId>okhttp</artifactId> |
|||
<version>3.11.0</version> |
|||
</dependency> |
|||
<dependency> |
|||
<groupId>de.codecentric</groupId> |
|||
<artifactId>spring-boot-admin-client</artifactId> |
|||
<version>2.2.4</version> |
|||
</dependency> |
|||
|
|||
<dependency> |
|||
<groupId>aspose-cells-20.12-crack</groupId> |
|||
<artifactId>aspose-cells-20.12-crack</artifactId> |
|||
<version>20.12</version> |
|||
<scope>system</scope> |
|||
<systemPath>D:\eclipseWork\doc_conversion/./jarlib/aspose-cells-20.12-crack.jar</systemPath> |
|||
</dependency> |
|||
<dependency> |
|||
<groupId>aspose-slides-20.12-crack</groupId> |
|||
<artifactId>aspose-slides-20.12-crack</artifactId> |
|||
<version>20.12</version> |
|||
<scope>system</scope> |
|||
<systemPath>D:\eclipseWork\doc_conversion/../jarlib/aspose-slides-20.12-crack.jar</systemPath> |
|||
</dependency> |
|||
<dependency> |
|||
<groupId>aspose-words-20.12-crack</groupId> |
|||
<artifactId>aspose-words-20.12-crack</artifactId> |
|||
<version>20.12</version> |
|||
<scope>system</scope> |
|||
<systemPath>D:\eclipseWork\doc_conversion/../jarlib/aspose-words-20.12-crack.jar</systemPath> |
|||
</dependency> |
|||
|
|||
<dependency> |
|||
<groupId>org.javassist</groupId> |
|||
<artifactId>javassist</artifactId> |
|||
<version>3.20.0-GA</version> |
|||
</dependency> |
|||
<!-- https://mvnrepository.com/artifact/com.aspose/aspose-pdf --> |
|||
<dependency> |
|||
<groupId>aspose-pdf-23.1</groupId> |
|||
<artifactId>aspose-pdf-23.1</artifactId> |
|||
<version>23.1</version> |
|||
<scope>system</scope> |
|||
<systemPath>D:\eclipseWork\doc_conversion/../jarlib/aspose-pdf-23.1.jar</systemPath> |
|||
</dependency> |
|||
<dependency> |
|||
<groupId>org.apache.curator</groupId> |
|||
<artifactId>curator-framework</artifactId> |
|||
<version>5.2.0</version> |
|||
</dependency> |
|||
<dependency> |
|||
<groupId>org.apache.curator</groupId> |
|||
<artifactId>curator-recipes</artifactId> |
|||
<version>5.2.0</version> |
|||
</dependency> |
|||
|
|||
<dependency> |
|||
<groupId>com.bfd.util</groupId> |
|||
<artifactId>pauseTool</artifactId> |
|||
<version>1.0</version> |
|||
<scope>system</scope> |
|||
<systemPath>D:\eclipseWork\doc_conversion/../jarlib/pauseTool-1.0.jar</systemPath> |
|||
</dependency> |
|||
|
|||
</dependencies> |
|||
|
|||
|
|||
<dependencyManagement> |
|||
<dependencies> |
|||
<dependency> |
|||
<groupId>org.springframework.boot</groupId> |
|||
<artifactId>spring-boot-dependencies</artifactId> |
|||
<version>${spring-boot.version}</version> |
|||
<type>pom</type> |
|||
<scope>import</scope> |
|||
</dependency> |
|||
</dependencies> |
|||
</dependencyManagement> |
|||
|
|||
<build> |
|||
<plugins> |
|||
<plugin> |
|||
<groupId>org.apache.maven.plugins</groupId> |
|||
<artifactId>maven-jar-plugin</artifactId> |
|||
<configuration> |
|||
<!--不打入jar包的文件类型或者路径--> |
|||
<excludes> |
|||
<exclude>*.properties</exclude> |
|||
<exclude>*.yml</exclude> |
|||
<exclude>*.yaml</exclude> |
|||
</excludes> |
|||
<archive> |
|||
<manifest> |
|||
<!-- 执行的主程序路径 --> |
|||
<mainClass>com.bfd.docconversion.DocConversionApplication</mainClass> |
|||
<!--是否要把第三方jar放到manifest的classpath中--> |
|||
<addClasspath>true</addClasspath> |
|||
<!--生成的manifest中classpath的前缀,因为要把第三方jar放到lib目录下,所以classpath的前缀是lib/--> |
|||
<classpathPrefix>lib/</classpathPrefix> |
|||
<!-- 打包时 MANIFEST.MF 文件不记录的时间戳版本 --> |
|||
<useUniqueVersions>false</useUniqueVersions> |
|||
</manifest> |
|||
<manifestEntries> |
|||
<!-- 在 Class-Path 下添加配置文件的路径 --> |
|||
<Class-Path>lib/pauseTool-1.0.jar lib/aspose-pdf-23.1-23.1.jar lib/aspose-cells-20.12-crack-20.12.jar lib/aspose-slides-20.12-crack-20.12.jar |
|||
lib/aspose-words-20.12-crack-20.12.jar config/ |
|||
</Class-Path> |
|||
</manifestEntries> |
|||
</archive> |
|||
</configuration> |
|||
</plugin> |
|||
<plugin> |
|||
<groupId>org.apache.maven.plugins</groupId> |
|||
<artifactId>maven-dependency-plugin</artifactId> |
|||
<executions> |
|||
<execution> |
|||
<id>copy</id> |
|||
<phase>package</phase> |
|||
<goals> |
|||
<goal>copy-dependencies</goal> |
|||
</goals> |
|||
<configuration> |
|||
<outputDirectory>${project.build.directory}/lib/</outputDirectory> |
|||
</configuration> |
|||
</execution> |
|||
</executions> |
|||
</plugin> |
|||
|
|||
<plugin> |
|||
<artifactId>maven-resources-plugin</artifactId> |
|||
<executions> |
|||
<execution> |
|||
<id>copy-resources</id> |
|||
<phase>package</phase> |
|||
<goals> |
|||
<goal>copy-resources</goal> |
|||
</goals> |
|||
<configuration> |
|||
<resources> |
|||
<!--把配置文件打包到指定路径--> |
|||
<resource> |
|||
<directory>src/main/resources/</directory> |
|||
<includes> |
|||
<include>*.properties</include> |
|||
<include>*.yml</include> |
|||
<exclude>*.yaml</exclude> |
|||
</includes> |
|||
</resource> |
|||
</resources> |
|||
<outputDirectory>${project.build.directory}/config</outputDirectory> |
|||
</configuration> |
|||
</execution> |
|||
</executions> |
|||
</plugin> |
|||
<plugin> |
|||
<groupId>org.apache.maven.plugins</groupId> |
|||
<artifactId>maven-compiler-plugin</artifactId> |
|||
<configuration> |
|||
<source>8</source> |
|||
<target>8</target> |
|||
</configuration> |
|||
</plugin> |
|||
</plugins> |
|||
</build> |
|||
|
|||
</project> |
@ -0,0 +1,67 @@ |
|||
package com.bfd.docconversion; |
|||
|
|||
import cn.hutool.core.thread.ThreadFactoryBuilder; |
|||
import com.bfd.docconversion.service.ProcessService; |
|||
import com.bfd.docconversion.util.Config; |
|||
import com.bfd.docconversion.util.KfkUtil; |
|||
import com.bfd.util.PauseTool; |
|||
import lombok.extern.slf4j.Slf4j; |
|||
import org.springframework.beans.factory.annotation.Autowired; |
|||
import org.springframework.beans.factory.annotation.Value; |
|||
import org.springframework.boot.SpringApplication; |
|||
import org.springframework.boot.autoconfigure.SpringBootApplication; |
|||
import org.springframework.context.ConfigurableApplicationContext; |
|||
import org.springframework.data.redis.core.StringRedisTemplate; |
|||
import org.springframework.scheduling.annotation.EnableScheduling; |
|||
import org.springframework.scheduling.annotation.Scheduled; |
|||
|
|||
import javax.annotation.Resource; |
|||
import java.util.concurrent.*; |
|||
|
|||
/** |
|||
* @author guowei |
|||
*/ |
|||
@SpringBootApplication |
|||
@EnableScheduling |
|||
@Slf4j |
|||
public class DocConversionApplication { |
|||
@Autowired |
|||
private StringRedisTemplate stringRedisTemplate; |
|||
|
|||
@Value("${zookeeper.connection-string}") |
|||
private String connectionString; |
|||
@Value("${zookeeper.publish-node}") |
|||
private String nodePath; |
|||
@Value("${crawl.threadNum}") |
|||
private int threadNum; |
|||
|
|||
@Resource |
|||
ProcessService processService; |
|||
public static void main(String[] args) { |
|||
ConfigurableApplicationContext applicationContext = SpringApplication.run(DocConversionApplication.class, args); |
|||
DocConversionApplication bean = applicationContext.getBean(DocConversionApplication.class); |
|||
System.setProperty("java.io.tmpdir","/opt/analyze/apps/doc_conversion/tmp"); |
|||
bean.start(); |
|||
} |
|||
|
|||
public void start(){ |
|||
|
|||
ThreadFactory namedThreadFactory = new ThreadFactoryBuilder().setNamePrefix("crawl-pool-%d").build(); |
|||
ExecutorService singleThreadPool = new ThreadPoolExecutor(10, 20, 100L, TimeUnit.SECONDS, new LinkedBlockingQueue<Runnable>(1024), namedThreadFactory, new ThreadPoolExecutor.AbortPolicy()); |
|||
for (int i=0;i<threadNum;i++){ |
|||
singleThreadPool.execute(processService); |
|||
} |
|||
KfkUtil.getProducer(); |
|||
|
|||
PauseTool pauseTool = new PauseTool(); |
|||
pauseTool.initializeRedisCache(stringRedisTemplate); |
|||
pauseTool.setupZookeeperListener(connectionString, nodePath); |
|||
|
|||
} |
|||
@Scheduled(cron = "0 0/5 * * * ?") |
|||
public void timeSize(){ |
|||
int size = Config.taskQueue.size(); |
|||
log.info("当前有 {} 条文档没有转换",size); |
|||
} |
|||
|
|||
} |
@ -0,0 +1,40 @@ |
|||
package com.bfd.docconversion.controller; |
|||
|
|||
import com.alibaba.fastjson2.JSONObject; |
|||
import com.bfd.docconversion.util.Config; |
|||
import lombok.extern.slf4j.Slf4j; |
|||
import org.springframework.web.bind.annotation.*; |
|||
|
|||
/** |
|||
* @author guowei |
|||
*/ |
|||
@RestController |
|||
@Slf4j |
|||
@RequestMapping(value = "/document") |
|||
@CrossOrigin(origins = "*", maxAge = 3600) |
|||
public class ApiController { |
|||
// @Resource |
|||
// conversionToPdfService conversionToPdfService; |
|||
/** |
|||
* 文档转换 Api |
|||
* @param jsonObject |
|||
* @return |
|||
*/ |
|||
@RequestMapping(value = "/conversion", method = RequestMethod.POST, produces = "application/json") |
|||
@ResponseBody |
|||
public String varAna(@RequestBody JSONObject jsonObject) { |
|||
log.info("文档转换参数:"+jsonObject); |
|||
// conversionToPdfService.conversion(jsonObject); |
|||
try { |
|||
if (jsonObject.containsKey(Config.TRACE) && jsonObject.getBoolean(Config.TRACE)==true){ |
|||
log.info("测试流程,插入队首"); |
|||
Config.taskQueue.putFirst(jsonObject); |
|||
}else { |
|||
Config.taskQueue.put(jsonObject); |
|||
} |
|||
} catch (InterruptedException e) { |
|||
e.printStackTrace(); |
|||
} |
|||
return "success"; |
|||
} |
|||
} |
@ -0,0 +1,13 @@ |
|||
package com.bfd.docconversion.service; |
|||
|
|||
import com.alibaba.fastjson2.JSONObject; |
|||
import org.springframework.stereotype.Service; |
|||
|
|||
/** |
|||
* @author guowei |
|||
*/ |
|||
@Service |
|||
public interface ConversionToPdfService { |
|||
|
|||
void conversion(JSONObject jsonObject); |
|||
} |
@ -0,0 +1,46 @@ |
|||
package com.bfd.docconversion.service; |
|||
|
|||
import com.alibaba.fastjson2.JSONObject; |
|||
import com.aspose.pdf.MemoryCleaner; |
|||
import com.bfd.docconversion.util.Config; |
|||
import com.bfd.docconversion.util.Constants; |
|||
import com.bfd.util.PauseTool; |
|||
import lombok.extern.slf4j.Slf4j; |
|||
import org.springframework.stereotype.Service; |
|||
|
|||
import javax.annotation.Resource; |
|||
|
|||
/** |
|||
* @author guowei |
|||
*/ |
|||
@Service |
|||
@Slf4j |
|||
public class ProcessService implements Runnable { |
|||
@Resource |
|||
ConversionToPdfService conversionToPdfService; |
|||
|
|||
@Override |
|||
public void run() { |
|||
while (true) { |
|||
try { |
|||
if (Config.taskQueue.size() <= 0) { |
|||
Thread.sleep(1000 * 10); |
|||
//清除缓存 |
|||
MemoryCleaner.clearAllTempFiles(); |
|||
} else { |
|||
JSONObject take = Config.taskQueue.take(); |
|||
Integer scense_id = (Integer) take.get(Constants.SCENES_ID); |
|||
Integer version = (Integer) take.get(Constants.VERSION); |
|||
if (PauseTool.CACHE.containsKey(scense_id + Constants.UNDERLINE + version)) { |
|||
conversionToPdfService.conversion(take); |
|||
} else { |
|||
log.info("暂停任务:{}", JSONObject.toJSONString(take)); |
|||
} |
|||
} |
|||
} catch (Exception e) { |
|||
e.printStackTrace(); |
|||
log.info("异常,{}", e); |
|||
} |
|||
} |
|||
} |
|||
} |
@ -0,0 +1,97 @@ |
|||
package com.bfd.docconversion.service.impl; |
|||
|
|||
import cn.hutool.core.util.IdUtil; |
|||
import com.alibaba.fastjson2.JSON; |
|||
import com.alibaba.fastjson2.JSONObject; |
|||
import com.bfd.docconversion.service.ConversionToPdfService; |
|||
import com.bfd.docconversion.util.Config; |
|||
import com.bfd.docconversion.util.KfkUtil; |
|||
import com.bfd.docconversion.util.Utils; |
|||
import lombok.extern.slf4j.Slf4j; |
|||
import org.springframework.stereotype.Service; |
|||
|
|||
import java.io.ByteArrayOutputStream; |
|||
import java.io.InputStream; |
|||
import java.net.URL; |
|||
import java.nio.file.Files; |
|||
import java.nio.file.Path; |
|||
import java.nio.file.Paths; |
|||
import java.util.HashMap; |
|||
import java.util.Map; |
|||
|
|||
/** |
|||
* @author guowei |
|||
*/ |
|||
@Service |
|||
@Slf4j |
|||
public class ConversionToPdfServiceImpl implements ConversionToPdfService { |
|||
/** |
|||
* 转换 |
|||
* @param jsonObject |
|||
*/ |
|||
@Override |
|||
public void conversion(JSONObject jsonObject) { |
|||
//输入 |
|||
JSONObject input = jsonObject.getJSONObject("input"); |
|||
//输出 |
|||
JSONObject output = jsonObject.getJSONObject("output"); |
|||
//data |
|||
JSONObject data = jsonObject.getJSONObject("data"); |
|||
|
|||
System.out.println("queryData ---> input:" + JSON.toJSONString(input)); |
|||
System.out.println("queryData ---> output:" + JSON.toJSONString(output)); |
|||
System.out.println("queryData ---> data:" + JSON.toJSONString(data)); |
|||
Map resultMap = new HashMap<>(32); |
|||
Map results = new HashMap<>(32); |
|||
try { |
|||
//需修改 |
|||
// String gofastUrl = input.getString("filePath"); |
|||
String gofastUrl = (String) Utils.jsonParse(input.getString("filePath"), data); |
|||
log.info("开始下载文件, path:"+ gofastUrl); |
|||
InputStream source = Utils.gofastDownLoadFile(gofastUrl); |
|||
if (source == null) { |
|||
throw new NullPointerException(); |
|||
} |
|||
URL url = new URL(gofastUrl); |
|||
String newPath = url.getPath(); |
|||
Path path = Paths.get(newPath); |
|||
String extension = Utils.getExtension(path); |
|||
ByteArrayOutputStream target = new ByteArrayOutputStream(); |
|||
String filePath = ""; |
|||
if (extension.equals(Config.PDF)) { |
|||
log.info("文档转换开始: " + extension + " --> DOC"); |
|||
Utils.asposePdfTo(extension, source,target); |
|||
filePath = "./files/"+IdUtil.simpleUUID()+".docx"; |
|||
}else { |
|||
log.info("文档转换开始: " + extension + " --> PDF"); |
|||
Utils.asposeToPdf(extension, source,target); |
|||
filePath = "./files/"+IdUtil.simpleUUID()+".pdf"; |
|||
} |
|||
// InputStream source = Files.newInputStream(path); |
|||
Files.write(Paths.get(filePath), target.toByteArray()); |
|||
log.info("文档转换完成"); |
|||
log.info("文件开始上传 path:{}",filePath); |
|||
String upLoadFile = Utils.upLoadFile(filePath); |
|||
System.out.println(upLoadFile); |
|||
log.info("文件结束上传"); |
|||
JSONObject resultUpload = JSONObject.parseObject(upLoadFile); |
|||
resultMap.put("id", IdUtil.randomUUID()); |
|||
resultMap.put("conversionUrl", Config.resultGofast + resultUpload.getString("path")); |
|||
results.put("status", 1); |
|||
results.put("message", "成功"); |
|||
}catch (Exception e){ |
|||
e.printStackTrace(); |
|||
log.error("文档转换异常",e); |
|||
resultMap.put("conversionUrl", "失败"); |
|||
results.put("status", 2); |
|||
results.put("message", "失败"); |
|||
} |
|||
resultMap.put("isLast",1); |
|||
results.put("results", JSON.toJSONString(resultMap)); |
|||
|
|||
jsonObject.put("result", results); |
|||
KfkUtil.sendKafka(JSON.toJSONString(jsonObject)); |
|||
log.info("处理完成,result:" + JSON.toJSONString(results)); |
|||
|
|||
} |
|||
} |
@ -0,0 +1,37 @@ |
|||
package com.bfd.docconversion.util; |
|||
|
|||
import org.springframework.context.annotation.Configuration; |
|||
import org.springframework.scheduling.annotation.AsyncConfigurer; |
|||
import org.springframework.scheduling.annotation.EnableAsync; |
|||
import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor; |
|||
|
|||
import java.util.concurrent.Executor; |
|||
|
|||
|
|||
@Configuration |
|||
@EnableAsync //Java配置文件标注它,那么Spring就会开启异步可用 |
|||
/** |
|||
* @author guowei |
|||
* 异步任务线程池 |
|||
* 注解@EnableAsync代表开启Spring异步。这样就可以使用@Async驱动Spring使用异步, |
|||
* 但是异步需要提供可用线程池,所以这里的配置类还会实现AsyncConfigurer接口,然后覆盖getAsyncExecutor方法,这样就可以自定义一个线程池 |
|||
*/ |
|||
public class AsyncConfig implements AsyncConfigurer { |
|||
|
|||
@Override |
|||
public Executor getAsyncExecutor() { |
|||
//定义线程池 |
|||
ThreadPoolTaskExecutor threadPoolTaskExecutor = new ThreadPoolTaskExecutor(); |
|||
//核心线程数 |
|||
threadPoolTaskExecutor.setCorePoolSize(10); |
|||
//线程池最大线程数 |
|||
threadPoolTaskExecutor.setMaxPoolSize(50); |
|||
//线程队列最大线程数 |
|||
threadPoolTaskExecutor.setQueueCapacity(200); |
|||
//初始化 |
|||
threadPoolTaskExecutor.initialize(); |
|||
|
|||
return threadPoolTaskExecutor; |
|||
} |
|||
|
|||
} |
@ -0,0 +1,32 @@ |
|||
package com.bfd.docconversion.util; |
|||
|
|||
import com.alibaba.fastjson2.JSONObject; |
|||
|
|||
import java.util.HashMap; |
|||
import java.util.Map; |
|||
import java.util.concurrent.LinkedBlockingDeque; |
|||
|
|||
/** |
|||
* @author guowei |
|||
*/ |
|||
public class Config { |
|||
|
|||
public static String gofastUrl = "http://172.18.1.180:9980/upload"; |
|||
|
|||
// public static String resultGofast = "https://crawl-files.pontoaplus.com"; |
|||
|
|||
public static String resultGofast = "https://caiji.pontoaplus.com"; |
|||
|
|||
public static LinkedBlockingDeque<JSONObject> taskQueue = new LinkedBlockingDeque <JSONObject>(); |
|||
|
|||
public static Map stopCache = new HashMap<>(); |
|||
|
|||
public static final String PDF = "pdf"; |
|||
|
|||
public static final Integer NUM = 5; |
|||
|
|||
public static final String TRACE = "trace"; |
|||
|
|||
|
|||
|
|||
} |
@ -0,0 +1,19 @@ |
|||
package com.bfd.docconversion.util; |
|||
|
|||
import org.springframework.stereotype.Component; |
|||
|
|||
/** |
|||
* @author guowei |
|||
*/ |
|||
@Component |
|||
public class Constants { |
|||
|
|||
public final static String STOP = "stop"; |
|||
|
|||
public final static String SCENES_ID = "scenes_id"; |
|||
|
|||
public final static String VERSION = "version"; |
|||
|
|||
public final static String UNDERLINE = "_"; |
|||
|
|||
} |
@ -0,0 +1,39 @@ |
|||
package com.bfd.docconversion.util; |
|||
/** |
|||
* @author guowei |
|||
*/ |
|||
public enum FileExtensionEnum { |
|||
/**doc**/ |
|||
doc("doc"), |
|||
/**docx**/ |
|||
docx("docx"), |
|||
/**xls**/ |
|||
xls("xls"), |
|||
/**xlsx**/ |
|||
xlsx("xlsx"), |
|||
/**ppt**/ |
|||
ppt("ppt"), |
|||
/**pptx"**/ |
|||
pptx("pptx"), |
|||
/**pdf**/ |
|||
pdf("pdf"); |
|||
|
|||
private final String extension; |
|||
|
|||
FileExtensionEnum(String extension) { |
|||
this.extension = extension; |
|||
} |
|||
|
|||
public String getExtension() { |
|||
return extension; |
|||
} |
|||
|
|||
public static FileExtensionEnum getByExtension(String extension) { |
|||
for (FileExtensionEnum fileExtension : values()) { |
|||
if (fileExtension.getExtension().equalsIgnoreCase(extension)) { |
|||
return fileExtension; |
|||
} |
|||
} |
|||
throw new IllegalArgumentException("Unsupported file extension: " + extension); |
|||
} |
|||
} |
@ -0,0 +1,83 @@ |
|||
package com.bfd.docconversion.util; |
|||
|
|||
import lombok.extern.slf4j.Slf4j; |
|||
import org.apache.kafka.clients.producer.KafkaProducer; |
|||
import org.apache.kafka.clients.producer.ProducerRecord; |
|||
import org.springframework.beans.factory.annotation.Value; |
|||
import org.springframework.stereotype.Component; |
|||
|
|||
import java.util.Properties; |
|||
|
|||
/** |
|||
* @author guowei |
|||
* kfk工具类 |
|||
*/ |
|||
@Component |
|||
@Slf4j |
|||
public class KfkUtil { |
|||
private static String topic; |
|||
|
|||
private static String brokerList; |
|||
|
|||
@Value("${crawl.kafka.topic}") |
|||
public void setTopic(String topic) { |
|||
KfkUtil.topic = topic; |
|||
} |
|||
|
|||
@Value("${crawl.kafka.brokers}") |
|||
public void setBrokerList(String brokerList) { |
|||
KfkUtil.brokerList = brokerList; |
|||
} |
|||
private static KafkaProducer<String, String> kafkaProducer; |
|||
|
|||
public static int num = 0; |
|||
|
|||
/** |
|||
* 获取KafkaProducer实例 |
|||
*/ |
|||
public static KafkaProducer<String, String> getProducer() { |
|||
// synchronized (kafkaProducer) { |
|||
if (kafkaProducer == null) { |
|||
Properties props = new Properties(); |
|||
//xxx服务器ip |
|||
props.put("bootstrap.servers", brokerList); |
|||
//所有follower都响应了才认为消息提交成功,即"committed" |
|||
props.put("acks", "all"); |
|||
//retries = MAX 无限重试,直到你意识到出现了问题:) |
|||
props.put("retries", 3); |
|||
//producer将试图批处理消息记录,以减少请求次数.默认的批量处理消息字节数 |
|||
props.put("batch.size", 16384); |
|||
//batch.size当批量的数据大小达到设定值后,就会立即发送,不顾下面的linger.ms |
|||
//延迟1ms发送,这项设置将通过增加小的延迟来完成--即,不是立即发送一条记录,producer将会等待给定的延迟时间以允许其他消息记录发送,这些消息记录可以批量处理 |
|||
props.put("linger.ms", 1); |
|||
//producer可以用来缓存数据的内存大小。 |
|||
props.put("buffer.memory", 33554432); |
|||
props.put("key.serializer", |
|||
"org.apache.kafka.common.serialization.StringSerializer"); |
|||
props.put("value.serializer", |
|||
"org.apache.kafka.common.serialization.StringSerializer"); |
|||
kafkaProducer = new KafkaProducer<String, String>(props); |
|||
} |
|||
// } |
|||
return kafkaProducer; |
|||
} |
|||
|
|||
/** |
|||
* 关闭KafkaProducer实例 |
|||
*/ |
|||
public static void closeProducer() { |
|||
if (kafkaProducer != null) { |
|||
log.info("----------close producer----------"); |
|||
kafkaProducer.close(); |
|||
kafkaProducer = null; |
|||
} |
|||
} |
|||
|
|||
public static void sendKafka(String resultData) { |
|||
KafkaProducer<String, String> producer = getProducer(); |
|||
ProducerRecord<String, String> se = new ProducerRecord<String, String>(topic, resultData); |
|||
producer.send(se); |
|||
log.info("发送kafka成功"); |
|||
// num++; |
|||
} |
|||
} |
@ -0,0 +1,104 @@ |
|||
package com.bfd.docconversion.util; |
|||
|
|||
import cn.hutool.core.io.FileUtil; |
|||
import cn.hutool.core.io.file.FileWriter; |
|||
import com.alibaba.fastjson2.JSON; |
|||
import com.alibaba.fastjson2.JSONObject; |
|||
import lombok.extern.slf4j.Slf4j; |
|||
import org.springframework.beans.factory.annotation.Value; |
|||
import org.springframework.boot.ApplicationArguments; |
|||
import org.springframework.boot.ApplicationRunner; |
|||
import org.springframework.stereotype.Service; |
|||
|
|||
import java.io.File; |
|||
import java.util.List; |
|||
import java.util.concurrent.LinkedBlockingDeque; |
|||
import java.util.concurrent.LinkedBlockingQueue; |
|||
|
|||
|
|||
/** |
|||
* @author guowei |
|||
*/ |
|||
@Slf4j |
|||
@Service |
|||
public class MainHandler implements ApplicationRunner { |
|||
|
|||
@Value("${crawl.task.taskData}") |
|||
private String taskPath; |
|||
|
|||
@Override |
|||
public void run(ApplicationArguments args) throws Exception { |
|||
log.info("监测程序运行线程 start"); |
|||
//停止处理 |
|||
waitDown(); |
|||
//启动加载缓存任务 |
|||
readTask(taskPath, Config.taskQueue); |
|||
} |
|||
|
|||
|
|||
public static void readTask(String path, LinkedBlockingDeque queue) throws InterruptedException { |
|||
File file = new File(path); |
|||
if (file.exists()) { |
|||
List<String> tasks = null; |
|||
tasks = FileUtil.readLines(file, "UTF-8"); |
|||
log.info("缓存文件有 " + tasks.size() + " 条数据"); |
|||
for (String taskStr : tasks) { |
|||
log.info("读到缓存数据:" + taskStr); |
|||
System.out.println("读到缓存数据:" + taskStr); |
|||
JSONObject parse = JSONObject.parseObject(taskStr); |
|||
// JSONObject value = (JSONObject) parse.get("value"); |
|||
// if (value.containsKey("result")){ |
|||
// KfkUtil.sendKafka(JSON.toJSONString(value)); |
|||
// log.info("此数据已经组装好,直接推送kfk"); |
|||
// continue; |
|||
// } |
|||
queue.put(parse); |
|||
} |
|||
file.delete(); |
|||
} else { |
|||
log.info("未找到缓存任务文件"); |
|||
} |
|||
|
|||
} |
|||
|
|||
/** |
|||
* 结束触发钩子 |
|||
*/ |
|||
public void waitDown() { |
|||
Runtime.getRuntime().addShutdownHook(new Thread() { |
|||
@Override |
|||
public void run() { |
|||
// 停止线程 |
|||
// Config.isStart = false; |
|||
log.info("stop-------"); |
|||
try { |
|||
writeTsskToFile(); |
|||
} catch (InterruptedException e) { |
|||
log.error("写出缓存异常,{}", e); |
|||
} |
|||
} |
|||
}); |
|||
} |
|||
|
|||
|
|||
/** |
|||
* 任务持久化到硬盘 |
|||
*/ |
|||
public void writeTsskToFile() throws InterruptedException { |
|||
|
|||
System.out.println(taskPath); |
|||
File file = new File(taskPath); |
|||
FileWriter fileWriter = new FileWriter(file); |
|||
if (!file.exists()) { |
|||
fileWriter = FileWriter.create(file); |
|||
} |
|||
while (Config.taskQueue.size() > 0) { |
|||
JSONObject take = Config.taskQueue.take(); |
|||
String entryJson = JSON.toJSONString(take); |
|||
System.out.println("写入缓存数据:" + entryJson); |
|||
fileWriter.write(entryJson + "\r\n", true); |
|||
} |
|||
log.info("taskMap 缓存已输出"); |
|||
} |
|||
|
|||
} |
@ -0,0 +1,325 @@ |
|||
package com.bfd.docconversion.util; |
|||
|
|||
import cn.hutool.core.util.IdUtil; |
|||
import com.alibaba.fastjson2.JSON; |
|||
import com.alibaba.fastjson2.JSONObject; |
|||
import com.alibaba.fastjson2.JSONPath; |
|||
|
|||
import com.aspose.cells.Workbook; |
|||
import com.aspose.slides.Presentation; |
|||
import lombok.extern.slf4j.Slf4j; |
|||
import okhttp3.*; |
|||
import org.springframework.stereotype.Component; |
|||
import com.aspose.pdf.Document; |
|||
import com.aspose.pdf.SaveFormat; |
|||
|
|||
import java.io.*; |
|||
import java.net.URL; |
|||
import java.nio.file.Files; |
|||
import java.nio.file.Path; |
|||
import java.nio.file.Paths; |
|||
import java.util.Map; |
|||
import java.util.concurrent.TimeUnit; |
|||
|
|||
/** |
|||
* @author guowei |
|||
*/ |
|||
@Component |
|||
@Slf4j |
|||
public class Utils { |
|||
|
|||
/** |
|||
* 转换成pdf |
|||
* |
|||
* @param extension |
|||
* @param source |
|||
* @param target |
|||
* @throws Exception |
|||
*/ |
|||
public static void asposeToPdf(String extension, InputStream source, ByteArrayOutputStream target) throws Exception { |
|||
switch (FileExtensionEnum.getByExtension(extension)) { |
|||
case doc: |
|||
case docx: |
|||
com.aspose.words.Document doc = new com.aspose.words.Document(source); |
|||
doc.save(target, com.aspose.words.SaveFormat.PDF); |
|||
|
|||
break; |
|||
case xls: |
|||
case xlsx: |
|||
com.aspose.cells.Workbook excel = new com.aspose.cells.Workbook(source); |
|||
com.aspose.cells.PdfSaveOptions pdfSaveOptions = new com.aspose.cells.PdfSaveOptions(); |
|||
// 单页显示,防截断 防换行 |
|||
pdfSaveOptions.setOnePagePerSheet(true); |
|||
excel.save(target, pdfSaveOptions); |
|||
excel.dispose(); |
|||
break; |
|||
case ppt: |
|||
case pptx: |
|||
com.aspose.slides.Presentation ppt = new com.aspose.slides.Presentation(source); |
|||
ppt.save(target, com.aspose.slides.SaveFormat.Pdf); |
|||
ppt.dispose(); |
|||
break; |
|||
default: |
|||
System.out.println("不支持的文件转换类型"); |
|||
// throw new BaseException("不支持的文件转换类型"); |
|||
} |
|||
} |
|||
|
|||
/** |
|||
* pdf 转换 |
|||
* @param extension |
|||
* @param source |
|||
* @param target |
|||
* @throws Exception |
|||
*/ |
|||
public static void asposePdfTo(String extension, InputStream source, ByteArrayOutputStream target) throws Exception { |
|||
switch (FileExtensionEnum.getByExtension(extension)) { |
|||
case doc: |
|||
case docx: |
|||
case pdf: |
|||
// 设置字体替换 |
|||
// FontSettings fontSettings = new FontSettings(); |
|||
// FontSubstitutionSettings fontSubstitutionSettings = fontSettings.getSubstitutionSettings(); |
|||
// fontSubstitutionSettings.getDefaultFontSubstitution().setDefaultFontName("Arial"); |
|||
// |
|||
// // 加载系统字体 |
|||
// FontSourceBase[] fontSources = fontSettings.getFontsSources(); |
|||
// SystemFontSource systemFontSource = new SystemFontSource(); |
|||
// FontSourceBase[] updatedFontSources = new FontSourceBase[fontSources.length + 1]; |
|||
// System.arraycopy(fontSources, 0, updatedFontSources, 0, fontSources.length); |
|||
// updatedFontSources[fontSources.length] = systemFontSource; |
|||
// fontSettings.setFontsSources(updatedFontSources); |
|||
// |
|||
// // 指定加载选项,以确保正确处理字体 |
|||
// LoadOptions loadOptions = new LoadOptions(); |
|||
// loadOptions.setFontSettings(fontSettings); |
|||
|
|||
Document doc = new Document(source); |
|||
//全面支持DOC, DOCX, OOXML, RTF HTML, OpenDocument, PDF, EPUB, XPS, SWF 相互转换 |
|||
doc.save(target, SaveFormat.DocX); |
|||
doc.close(); |
|||
break; |
|||
// case xls: |
|||
// case xlsx: |
|||
// // Load PDF document |
|||
// Document excel = new Document(source); |
|||
// excel.save(target, SaveFormat.Excel); |
|||
// break; |
|||
// case ppt: |
|||
// case pptx: |
|||
// Document ppt = new Document(source); |
|||
// ppt.save(target, SaveFormat.Pptx); |
|||
// break; |
|||
default: |
|||
System.out.println("不支持的文件转换类型"); |
|||
// throw new BaseException("不支持的文件转换类型"); |
|||
} |
|||
} |
|||
|
|||
// public static void convertFile(String inputFilePath, String outputFilePath) throws Exception { |
|||
// String inputExtension = getFileExtension(inputFilePath).toLowerCase(); |
|||
// String outputExtension = getFileExtension(outputFilePath).toLowerCase(); |
|||
// |
|||
// switch (inputExtension) { |
|||
// case "doc": |
|||
// case "docx": |
|||
// convertWord(inputFilePath, outputFilePath, outputExtension); |
|||
// break; |
|||
// case "xls": |
|||
// case "xlsx": |
|||
// convertExcel(inputFilePath, outputFilePath, outputExtension); |
|||
// break; |
|||
// case "ppt": |
|||
// case "pptx": |
|||
// convertPPT(inputFilePath, outputFilePath, outputExtension); |
|||
// break; |
|||
// case "pdf": |
|||
// convertPDF(inputFilePath, outputFilePath, outputExtension); |
|||
// break; |
|||
// default: |
|||
// throw new IllegalArgumentException("Unsupported file format: " + inputExtension); |
|||
// } |
|||
// } |
|||
|
|||
private static void convertWord(String inputFilePath, String outputFilePath, String outputExtension) throws Exception { |
|||
com.aspose.words.Document doc = new com.aspose.words.Document(inputFilePath); |
|||
switch (outputExtension) { |
|||
case "pdf": |
|||
doc.save(outputFilePath, com.aspose.words.SaveFormat.PDF); |
|||
break; |
|||
default: |
|||
System.out.println("不支持的文件转换类型"); |
|||
} |
|||
} |
|||
|
|||
private static void convertExcel(String inputFilePath, String outputFilePath, String outputExtension) throws Exception { |
|||
Workbook workbook = new Workbook(inputFilePath); |
|||
switch (outputExtension) { |
|||
case "pdf": |
|||
workbook.save(outputFilePath, com.aspose.cells.SaveFormat.PDF); |
|||
break; |
|||
case "docx": |
|||
// Excel to Word conversion (Not directly supported) |
|||
ByteArrayOutputStream htmlStream = new ByteArrayOutputStream(); |
|||
workbook.save(htmlStream, com.aspose.cells.SaveFormat.HTML); |
|||
ByteArrayInputStream htmlInputStream = new ByteArrayInputStream(htmlStream.toByteArray()); |
|||
com.aspose.words.Document doc = new com.aspose.words.Document(htmlInputStream); |
|||
doc.save(outputFilePath, com.aspose.cells.SaveFormat.DOCX); |
|||
break; |
|||
case "xlsx": |
|||
workbook.save(outputFilePath, com.aspose.cells.SaveFormat.XLSX); |
|||
break; |
|||
case "pptx": |
|||
// Excel to PPTX conversion (Not directly supported) |
|||
ByteArrayOutputStream htmlStream2 = new ByteArrayOutputStream(); |
|||
workbook.save(htmlStream2, com.aspose.cells.SaveFormat.HTML); |
|||
ByteArrayInputStream htmlInputStream2 = new ByteArrayInputStream(htmlStream2.toByteArray()); |
|||
Presentation presentation = new Presentation(htmlInputStream2); |
|||
presentation.save(outputFilePath, com.aspose.slides.SaveFormat.Pptx); |
|||
break; |
|||
default: |
|||
throw new IllegalArgumentException("Unsupported conversion: Excel to " + outputExtension); |
|||
} |
|||
} |
|||
|
|||
/** |
|||
* 获取文件扩展名 |
|||
* |
|||
* @param path 文件路径 |
|||
* @return 文件扩展名 |
|||
*/ |
|||
public static String getExtension(Path path) { |
|||
String fileName = path.getFileName().toString(); |
|||
int dotIndex = fileName.lastIndexOf('.'); |
|||
if (dotIndex == -1) { |
|||
throw new IllegalArgumentException("File without extension: " + fileName); |
|||
} |
|||
return fileName.substring(dotIndex + 1).toLowerCase(); |
|||
} |
|||
|
|||
public static Object jsonParse(String key, Map data) { |
|||
String[] keySplit = key.split(":"); |
|||
String jsonPath = keySplit[1]; |
|||
if (!data.containsKey(keySplit[0])) { |
|||
return ""; |
|||
} |
|||
String dataJson = (String) data.get(keySplit[0]); |
|||
JSONObject dataJsonObject = JSON.parseObject(dataJson); |
|||
Object dataValue = JSONPath.eval(dataJsonObject, jsonPath); |
|||
return dataValue; |
|||
} |
|||
|
|||
/** |
|||
* gofast 文件下载 |
|||
* |
|||
* @param url |
|||
* @return |
|||
* @throws IOException |
|||
*/ |
|||
public static InputStream gofastDownLoadFile(String url) { |
|||
OkHttpClient client = new OkHttpClient().newBuilder() |
|||
.readTimeout(60, TimeUnit.SECONDS) |
|||
.writeTimeout(60, TimeUnit.SECONDS) |
|||
.connectTimeout(60, TimeUnit.SECONDS) |
|||
.build(); |
|||
MediaType mediaType = MediaType.parse("text/plain"); |
|||
RequestBody body = RequestBody.create(mediaType, ""); |
|||
Request request = new Request.Builder() |
|||
.url(url) |
|||
.method("GET", null) |
|||
.addHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.63 Safari/537.36") |
|||
.build(); |
|||
BufferedOutputStream out = null; |
|||
InputStream inputStream = null; |
|||
Response response = null; |
|||
try { |
|||
response = client.newCall(request).execute(); |
|||
for (int i = 0; i < Config.NUM; i++) { |
|||
if (response.isSuccessful()) { |
|||
break; |
|||
} else { |
|||
response = client.newCall(request).execute(); |
|||
System.out.println("gofast文件下载失败,file=" + url + ",第" + i + "次"); |
|||
log.error("gofast文件下载失败,file=" + url + ",第" + i + "次"); |
|||
Thread.sleep(3000); |
|||
i++; |
|||
} |
|||
} |
|||
inputStream = response.body().byteStream(); |
|||
} catch (Exception e) { |
|||
e.printStackTrace(); |
|||
log.error("gofast文件下载异常", e); |
|||
} |
|||
return inputStream; |
|||
} |
|||
|
|||
public static String upLoadFile(String filePath) { |
|||
|
|||
File file = new File(filePath); |
|||
String realFilename = filePath.substring(filePath.lastIndexOf(File.separator) + 1); |
|||
MultipartBody.Builder builder = new MultipartBody.Builder().setType(MultipartBody.FORM); |
|||
builder.addPart(Headers.of("Content-Disposition", "form-data; name=\"file\";filename=\"" + realFilename + "\""), |
|||
RequestBody.create(MediaType.parse("image/png"), file) |
|||
|
|||
).addFormDataPart("output", "json").build(); |
|||
RequestBody body = builder.build(); |
|||
Request request = new Request.Builder().url(Config.gofastUrl).post(body).header("Expect", "100-continue").build(); |
|||
OkHttpClient.Builder okBuilder = new OkHttpClient.Builder(); |
|||
// 获得一个客户对象 |
|||
OkHttpClient client = okBuilder.build(); |
|||
Call call = client.newCall(request); |
|||
String html = ""; |
|||
Response response = null; |
|||
int retry = 0; |
|||
do { |
|||
try { |
|||
response = call.execute(); |
|||
html = response.body().string(); |
|||
break; |
|||
} catch (IOException e) { |
|||
log.error("文档上传异常,file:" + filePath + ",重试" + retry + "次"); |
|||
} finally { |
|||
response.close(); |
|||
} |
|||
} while (retry >= 5); |
|||
file.delete(); |
|||
|
|||
return html; |
|||
} |
|||
|
|||
public static void main(String[] args) throws Exception { |
|||
String filePath = "C:\\Users\\86150\\Desktop\\embed_watermark (1).pdf"; |
|||
// Path path = Paths.get(filePath); |
|||
//// String extension = getExtension(path); |
|||
// String extension = "docx"; |
|||
// System.out.println("文档转换: "+ extension + " --> PDF" ); |
|||
// ByteArrayOutputStream target = new ByteArrayOutputStream(); |
|||
// InputStream source = Files.newInputStream(path); |
|||
//// asposeToPdf(extension, source,target); |
|||
// asposePdfTo(extension,source,target); |
|||
// |
|||
// Files.write(Paths.get("C:\\Users\\86150\\Desktop\\embed_watermark (2).docx"), target.toByteArray()); |
|||
// String s = upLoadFile(filePath); |
|||
// System.out.println(s); |
|||
String gofastUrl = "http://172.18.1.180:9980/group17/default/20240812/16/40/3/971260fd6cce96624965c692f709660b.pdf"; |
|||
InputStream inputStream = gofastDownLoadFile(gofastUrl); |
|||
URL url = new URL(gofastUrl); |
|||
String newPath = url.getPath(); |
|||
Path path = Paths.get(newPath); |
|||
String extension = Utils.getExtension(path); |
|||
ByteArrayOutputStream target = new ByteArrayOutputStream(); |
|||
Utils.asposePdfTo(extension, inputStream,target); |
|||
filePath = "./files/"+ IdUtil.simpleUUID()+".docx"; |
|||
Files.write(Paths.get(filePath), target.toByteArray()); |
|||
} |
|||
|
|||
// public static void main(String[] args) { |
|||
// String pdfFilePath = "C:\\Users\\86150\\Desktop\\百分点\\考试\\百分点019期新员工特训营-文化篇(终版)20210512.pdf"; |
|||
// String wordFilePath = "C:\\Users\\86150\\Desktop\\百分点\\考试\\云学堂.docx"; |
|||
// |
|||
// pdf2doc(pdfFilePath); |
|||
// System.out.println("PDF successfully converted to Word document."); |
|||
// } |
|||
|
|||
|
|||
} |
@ -0,0 +1,40 @@ |
|||
server: |
|||
port: 9955 |
|||
crawl: |
|||
kafka: |
|||
topic: produce_analyze |
|||
brokers: 172.18.1.146:9092,172.18.1.147:9092,172.18.1.148:9092 |
|||
task: |
|||
taskData: ./data/task.txt |
|||
threadNum: 3 |
|||
#日志级别 |
|||
logging: |
|||
level: |
|||
com: |
|||
bfd: INFO |
|||
#日志路径 |
|||
log: |
|||
path: ./logs |
|||
spring: |
|||
boot: |
|||
admin: |
|||
client: |
|||
url: http://172.18.1.147:8001 |
|||
instance: |
|||
service-base-url: http://172.18.1.147:9999 |
|||
application: |
|||
name: 文档转换 |
|||
management: |
|||
endpoints: |
|||
web: |
|||
exposure: |
|||
include: "*" |
|||
endpoint: |
|||
health: |
|||
show-details: always |
|||
health: |
|||
elasticsearch: |
|||
enabled: false |
|||
zookeeper: |
|||
connection-string: 172.18.1.146:2181,172.18.1.147:2181,172.18.1.148:2181 |
|||
publish-node: /analyze |
@ -0,0 +1,38 @@ |
|||
<configuration> |
|||
<!-- 属性文件:在properties文件中找到对应的配置项 --> |
|||
<springProperty scope="context" name="logging.path" source="logging.log.path"/> |
|||
<springProperty scope="context" name="logging.level" source="logging.level.com.bfd"/> |
|||
<!-- 默认的控制台日志输出,一般生产环境都是后台启动,这个没太大作用 --> |
|||
<!-- <appender name="STDOUT" |
|||
class="ch.qos.logback.core.ConsoleAppender"> |
|||
<encoder class="ch.qos.logback.classic.encoder.PatternLayoutEncoder"> |
|||
<Pattern>%d{HH:mm:ss.SSS} %-5level %logger{80} - %msg%n</Pattern> |
|||
</encoder> |
|||
</appender> --> |
|||
|
|||
<appender name="GLMAPPER-LOGGERONE" |
|||
class="ch.qos.logback.core.rolling.RollingFileAppender"> |
|||
<append>true</append> |
|||
<filter class="ch.qos.logback.classic.filter.ThresholdFilter"> |
|||
<level>${logging.level}</level> |
|||
</filter> |
|||
<file> |
|||
${logging.path}/crawlSchedule.log |
|||
<!-- ${logging.path}/sendKafka.log --> |
|||
</file> |
|||
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy"> |
|||
<FileNamePattern>${logging.path}/crawlSchedule.log.%d{yyyy-MM-dd}</FileNamePattern> |
|||
<!-- <FileNamePattern>${logging.path}/sendKafka.log.%d{yyyy-MM-dd}</FileNamePattern> --> |
|||
<MaxHistory>7</MaxHistory> |
|||
</rollingPolicy> |
|||
<encoder class="ch.qos.logback.classic.encoder.PatternLayoutEncoder"> |
|||
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %line %-5level %logger{50} - %msg%n</pattern> |
|||
<charset>UTF-8</charset> |
|||
</encoder> |
|||
</appender> |
|||
|
|||
<root level="info"> |
|||
<appender-ref ref="GLMAPPER-LOGGERONE"/> |
|||
<!-- <appender-ref ref="STDOUT"/> --> |
|||
</root> |
|||
</configuration> |
@ -0,0 +1,13 @@ |
|||
package com.bfd.doc_conversion; |
|||
|
|||
import org.junit.jupiter.api.Test; |
|||
import org.springframework.boot.test.context.SpringBootTest; |
|||
|
|||
@SpringBootTest |
|||
class DocConversionApplicationTests { |
|||
|
|||
@Test |
|||
void contextLoads() { |
|||
} |
|||
|
|||
} |
Write
Preview
Loading…
Cancel
Save
Reference in new issue