commit
4d48a8ed53
22 changed files with 1294 additions and 0 deletions
-
40.classpath
-
3.gitignore
-
23.project
-
5.settings/org.eclipse.core.resources.prefs
-
8.settings/org.eclipse.jdt.core.prefs
-
4.settings/org.eclipse.m2e.core.prefs
-
218pom.xml
-
67src/main/java/com/bfd/docconversion/DocConversionApplication.java
-
40src/main/java/com/bfd/docconversion/controller/ApiController.java
-
13src/main/java/com/bfd/docconversion/service/ConversionToPdfService.java
-
46src/main/java/com/bfd/docconversion/service/ProcessService.java
-
97src/main/java/com/bfd/docconversion/service/impl/ConversionToPdfServiceImpl.java
-
37src/main/java/com/bfd/docconversion/util/AsyncConfig.java
-
32src/main/java/com/bfd/docconversion/util/Config.java
-
19src/main/java/com/bfd/docconversion/util/Constants.java
-
39src/main/java/com/bfd/docconversion/util/FileExtensionEnum.java
-
83src/main/java/com/bfd/docconversion/util/KfkUtil.java
-
104src/main/java/com/bfd/docconversion/util/MainHandler.java
-
325src/main/java/com/bfd/docconversion/util/Utils.java
-
40src/main/resources/application.yml
-
38src/main/resources/logback-spring.xml
-
13src/test/java/com/bfd/doc_conversion/DocConversionApplicationTests.java
@ -0,0 +1,40 @@ |
|||||
|
<?xml version="1.0" encoding="UTF-8"?> |
||||
|
<classpath> |
||||
|
<classpathentry kind="src" output="target/classes" path="src/main/java"> |
||||
|
<attributes> |
||||
|
<attribute name="optional" value="true"/> |
||||
|
<attribute name="maven.pomderived" value="true"/> |
||||
|
</attributes> |
||||
|
</classpathentry> |
||||
|
<classpathentry excluding="**" kind="src" output="target/classes" path="src/main/resources"> |
||||
|
<attributes> |
||||
|
<attribute name="maven.pomderived" value="true"/> |
||||
|
<attribute name="optional" value="true"/> |
||||
|
</attributes> |
||||
|
</classpathentry> |
||||
|
<classpathentry kind="src" output="target/test-classes" path="src/test/java"> |
||||
|
<attributes> |
||||
|
<attribute name="optional" value="true"/> |
||||
|
<attribute name="maven.pomderived" value="true"/> |
||||
|
<attribute name="test" value="true"/> |
||||
|
</attributes> |
||||
|
</classpathentry> |
||||
|
<classpathentry excluding="**" kind="src" output="target/test-classes" path="src/test/resources"> |
||||
|
<attributes> |
||||
|
<attribute name="maven.pomderived" value="true"/> |
||||
|
<attribute name="test" value="true"/> |
||||
|
<attribute name="optional" value="true"/> |
||||
|
</attributes> |
||||
|
</classpathentry> |
||||
|
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.8"> |
||||
|
<attributes> |
||||
|
<attribute name="maven.pomderived" value="true"/> |
||||
|
</attributes> |
||||
|
</classpathentry> |
||||
|
<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER"> |
||||
|
<attributes> |
||||
|
<attribute name="maven.pomderived" value="true"/> |
||||
|
</attributes> |
||||
|
</classpathentry> |
||||
|
<classpathentry kind="output" path="target/classes"/> |
||||
|
</classpath> |
@ -0,0 +1,3 @@ |
|||||
|
/target/ |
||||
|
/logs/ |
||||
|
/jarlib/ |
@ -0,0 +1,23 @@ |
|||||
|
<?xml version="1.0" encoding="UTF-8"?> |
||||
|
<projectDescription> |
||||
|
<name>doc_conversion</name> |
||||
|
<comment></comment> |
||||
|
<projects> |
||||
|
</projects> |
||||
|
<buildSpec> |
||||
|
<buildCommand> |
||||
|
<name>org.eclipse.jdt.core.javabuilder</name> |
||||
|
<arguments> |
||||
|
</arguments> |
||||
|
</buildCommand> |
||||
|
<buildCommand> |
||||
|
<name>org.eclipse.m2e.core.maven2Builder</name> |
||||
|
<arguments> |
||||
|
</arguments> |
||||
|
</buildCommand> |
||||
|
</buildSpec> |
||||
|
<natures> |
||||
|
<nature>org.eclipse.jdt.core.javanature</nature> |
||||
|
<nature>org.eclipse.m2e.core.maven2Nature</nature> |
||||
|
</natures> |
||||
|
</projectDescription> |
@ -0,0 +1,5 @@ |
|||||
|
eclipse.preferences.version=1 |
||||
|
encoding//src/main/java=UTF-8 |
||||
|
encoding//src/main/resources=UTF-8 |
||||
|
encoding//src/test/java=UTF-8 |
||||
|
encoding/<project>=UTF-8 |
@ -0,0 +1,8 @@ |
|||||
|
eclipse.preferences.version=1 |
||||
|
org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8 |
||||
|
org.eclipse.jdt.core.compiler.compliance=1.8 |
||||
|
org.eclipse.jdt.core.compiler.problem.enablePreviewFeatures=disabled |
||||
|
org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning |
||||
|
org.eclipse.jdt.core.compiler.problem.reportPreviewFeatures=ignore |
||||
|
org.eclipse.jdt.core.compiler.release=disabled |
||||
|
org.eclipse.jdt.core.compiler.source=1.8 |
@ -0,0 +1,4 @@ |
|||||
|
activeProfiles= |
||||
|
eclipse.preferences.version=1 |
||||
|
resolveWorkspaceProjects=true |
||||
|
version=1 |
@ -0,0 +1,218 @@ |
|||||
|
<?xml version="1.0" encoding="UTF-8"?> |
||||
|
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" |
||||
|
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> |
||||
|
<modelVersion>4.0.0</modelVersion> |
||||
|
<groupId>com.bfd</groupId> |
||||
|
<artifactId>doc_conversion</artifactId> |
||||
|
<version>0.0.1-SNAPSHOT</version> |
||||
|
<name>docconversion</name> |
||||
|
<description>docconversion</description> |
||||
|
<properties> |
||||
|
<java.version>1.8</java.version> |
||||
|
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> |
||||
|
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding> |
||||
|
<spring-boot.version>2.2.4.RELEASE</spring-boot.version> |
||||
|
</properties> |
||||
|
<dependencies> |
||||
|
<dependency> |
||||
|
<groupId>org.springframework.boot</groupId> |
||||
|
<artifactId>spring-boot-starter-web</artifactId> |
||||
|
</dependency> |
||||
|
|
||||
|
<dependency> |
||||
|
<groupId>org.projectlombok</groupId> |
||||
|
<artifactId>lombok</artifactId> |
||||
|
<optional>true</optional> |
||||
|
</dependency> |
||||
|
<dependency> |
||||
|
<groupId>org.springframework.boot</groupId> |
||||
|
<artifactId>spring-boot-starter-test</artifactId> |
||||
|
<scope>test</scope> |
||||
|
</dependency> |
||||
|
<dependency> |
||||
|
<groupId>org.springframework.boot</groupId> |
||||
|
<artifactId>spring-boot-starter-test</artifactId> |
||||
|
<scope>test</scope> |
||||
|
</dependency> |
||||
|
<dependency> |
||||
|
<groupId>com.alibaba.fastjson2</groupId> |
||||
|
<artifactId>fastjson2</artifactId> |
||||
|
<version>2.0.12</version> |
||||
|
</dependency> |
||||
|
<dependency> |
||||
|
<groupId>cn.hutool</groupId> |
||||
|
<artifactId>hutool-all</artifactId> |
||||
|
<version>5.8.27</version> |
||||
|
</dependency> |
||||
|
<dependency> |
||||
|
<groupId>org.apache.kafka</groupId> |
||||
|
<artifactId>kafka-clients</artifactId> |
||||
|
<version>2.7.1</version> |
||||
|
</dependency> |
||||
|
<dependency> |
||||
|
<groupId>com.squareup.okhttp3</groupId> |
||||
|
<artifactId>okhttp</artifactId> |
||||
|
<version>3.11.0</version> |
||||
|
</dependency> |
||||
|
<dependency> |
||||
|
<groupId>de.codecentric</groupId> |
||||
|
<artifactId>spring-boot-admin-client</artifactId> |
||||
|
<version>2.2.4</version> |
||||
|
</dependency> |
||||
|
|
||||
|
<dependency> |
||||
|
<groupId>aspose-cells-20.12-crack</groupId> |
||||
|
<artifactId>aspose-cells-20.12-crack</artifactId> |
||||
|
<version>20.12</version> |
||||
|
<scope>system</scope> |
||||
|
<systemPath>D:\eclipseWork\doc_conversion/./jarlib/aspose-cells-20.12-crack.jar</systemPath> |
||||
|
</dependency> |
||||
|
<dependency> |
||||
|
<groupId>aspose-slides-20.12-crack</groupId> |
||||
|
<artifactId>aspose-slides-20.12-crack</artifactId> |
||||
|
<version>20.12</version> |
||||
|
<scope>system</scope> |
||||
|
<systemPath>D:\eclipseWork\doc_conversion/../jarlib/aspose-slides-20.12-crack.jar</systemPath> |
||||
|
</dependency> |
||||
|
<dependency> |
||||
|
<groupId>aspose-words-20.12-crack</groupId> |
||||
|
<artifactId>aspose-words-20.12-crack</artifactId> |
||||
|
<version>20.12</version> |
||||
|
<scope>system</scope> |
||||
|
<systemPath>D:\eclipseWork\doc_conversion/../jarlib/aspose-words-20.12-crack.jar</systemPath> |
||||
|
</dependency> |
||||
|
|
||||
|
<dependency> |
||||
|
<groupId>org.javassist</groupId> |
||||
|
<artifactId>javassist</artifactId> |
||||
|
<version>3.20.0-GA</version> |
||||
|
</dependency> |
||||
|
<!-- https://mvnrepository.com/artifact/com.aspose/aspose-pdf --> |
||||
|
<dependency> |
||||
|
<groupId>aspose-pdf-23.1</groupId> |
||||
|
<artifactId>aspose-pdf-23.1</artifactId> |
||||
|
<version>23.1</version> |
||||
|
<scope>system</scope> |
||||
|
<systemPath>D:\eclipseWork\doc_conversion/../jarlib/aspose-pdf-23.1.jar</systemPath> |
||||
|
</dependency> |
||||
|
<dependency> |
||||
|
<groupId>org.apache.curator</groupId> |
||||
|
<artifactId>curator-framework</artifactId> |
||||
|
<version>5.2.0</version> |
||||
|
</dependency> |
||||
|
<dependency> |
||||
|
<groupId>org.apache.curator</groupId> |
||||
|
<artifactId>curator-recipes</artifactId> |
||||
|
<version>5.2.0</version> |
||||
|
</dependency> |
||||
|
|
||||
|
<dependency> |
||||
|
<groupId>com.bfd.util</groupId> |
||||
|
<artifactId>pauseTool</artifactId> |
||||
|
<version>1.0</version> |
||||
|
<scope>system</scope> |
||||
|
<systemPath>D:\eclipseWork\doc_conversion/../jarlib/pauseTool-1.0.jar</systemPath> |
||||
|
</dependency> |
||||
|
|
||||
|
</dependencies> |
||||
|
|
||||
|
|
||||
|
<dependencyManagement> |
||||
|
<dependencies> |
||||
|
<dependency> |
||||
|
<groupId>org.springframework.boot</groupId> |
||||
|
<artifactId>spring-boot-dependencies</artifactId> |
||||
|
<version>${spring-boot.version}</version> |
||||
|
<type>pom</type> |
||||
|
<scope>import</scope> |
||||
|
</dependency> |
||||
|
</dependencies> |
||||
|
</dependencyManagement> |
||||
|
|
||||
|
<build> |
||||
|
<plugins> |
||||
|
<plugin> |
||||
|
<groupId>org.apache.maven.plugins</groupId> |
||||
|
<artifactId>maven-jar-plugin</artifactId> |
||||
|
<configuration> |
||||
|
<!--不打入jar包的文件类型或者路径--> |
||||
|
<excludes> |
||||
|
<exclude>*.properties</exclude> |
||||
|
<exclude>*.yml</exclude> |
||||
|
<exclude>*.yaml</exclude> |
||||
|
</excludes> |
||||
|
<archive> |
||||
|
<manifest> |
||||
|
<!-- 执行的主程序路径 --> |
||||
|
<mainClass>com.bfd.docconversion.DocConversionApplication</mainClass> |
||||
|
<!--是否要把第三方jar放到manifest的classpath中--> |
||||
|
<addClasspath>true</addClasspath> |
||||
|
<!--生成的manifest中classpath的前缀,因为要把第三方jar放到lib目录下,所以classpath的前缀是lib/--> |
||||
|
<classpathPrefix>lib/</classpathPrefix> |
||||
|
<!-- 打包时 MANIFEST.MF 文件不记录的时间戳版本 --> |
||||
|
<useUniqueVersions>false</useUniqueVersions> |
||||
|
</manifest> |
||||
|
<manifestEntries> |
||||
|
<!-- 在 Class-Path 下添加配置文件的路径 --> |
||||
|
<Class-Path>lib/pauseTool-1.0.jar lib/aspose-pdf-23.1-23.1.jar lib/aspose-cells-20.12-crack-20.12.jar lib/aspose-slides-20.12-crack-20.12.jar |
||||
|
lib/aspose-words-20.12-crack-20.12.jar config/ |
||||
|
</Class-Path> |
||||
|
</manifestEntries> |
||||
|
</archive> |
||||
|
</configuration> |
||||
|
</plugin> |
||||
|
<plugin> |
||||
|
<groupId>org.apache.maven.plugins</groupId> |
||||
|
<artifactId>maven-dependency-plugin</artifactId> |
||||
|
<executions> |
||||
|
<execution> |
||||
|
<id>copy</id> |
||||
|
<phase>package</phase> |
||||
|
<goals> |
||||
|
<goal>copy-dependencies</goal> |
||||
|
</goals> |
||||
|
<configuration> |
||||
|
<outputDirectory>${project.build.directory}/lib/</outputDirectory> |
||||
|
</configuration> |
||||
|
</execution> |
||||
|
</executions> |
||||
|
</plugin> |
||||
|
|
||||
|
<plugin> |
||||
|
<artifactId>maven-resources-plugin</artifactId> |
||||
|
<executions> |
||||
|
<execution> |
||||
|
<id>copy-resources</id> |
||||
|
<phase>package</phase> |
||||
|
<goals> |
||||
|
<goal>copy-resources</goal> |
||||
|
</goals> |
||||
|
<configuration> |
||||
|
<resources> |
||||
|
<!--把配置文件打包到指定路径--> |
||||
|
<resource> |
||||
|
<directory>src/main/resources/</directory> |
||||
|
<includes> |
||||
|
<include>*.properties</include> |
||||
|
<include>*.yml</include> |
||||
|
<exclude>*.yaml</exclude> |
||||
|
</includes> |
||||
|
</resource> |
||||
|
</resources> |
||||
|
<outputDirectory>${project.build.directory}/config</outputDirectory> |
||||
|
</configuration> |
||||
|
</execution> |
||||
|
</executions> |
||||
|
</plugin> |
||||
|
<plugin> |
||||
|
<groupId>org.apache.maven.plugins</groupId> |
||||
|
<artifactId>maven-compiler-plugin</artifactId> |
||||
|
<configuration> |
||||
|
<source>8</source> |
||||
|
<target>8</target> |
||||
|
</configuration> |
||||
|
</plugin> |
||||
|
</plugins> |
||||
|
</build> |
||||
|
|
||||
|
</project> |
@ -0,0 +1,67 @@ |
|||||
|
package com.bfd.docconversion; |
||||
|
|
||||
|
import cn.hutool.core.thread.ThreadFactoryBuilder; |
||||
|
import com.bfd.docconversion.service.ProcessService; |
||||
|
import com.bfd.docconversion.util.Config; |
||||
|
import com.bfd.docconversion.util.KfkUtil; |
||||
|
import com.bfd.util.PauseTool; |
||||
|
import lombok.extern.slf4j.Slf4j; |
||||
|
import org.springframework.beans.factory.annotation.Autowired; |
||||
|
import org.springframework.beans.factory.annotation.Value; |
||||
|
import org.springframework.boot.SpringApplication; |
||||
|
import org.springframework.boot.autoconfigure.SpringBootApplication; |
||||
|
import org.springframework.context.ConfigurableApplicationContext; |
||||
|
import org.springframework.data.redis.core.StringRedisTemplate; |
||||
|
import org.springframework.scheduling.annotation.EnableScheduling; |
||||
|
import org.springframework.scheduling.annotation.Scheduled; |
||||
|
|
||||
|
import javax.annotation.Resource; |
||||
|
import java.util.concurrent.*; |
||||
|
|
||||
|
/** |
||||
|
* @author guowei |
||||
|
*/ |
||||
|
@SpringBootApplication |
||||
|
@EnableScheduling |
||||
|
@Slf4j |
||||
|
public class DocConversionApplication { |
||||
|
@Autowired |
||||
|
private StringRedisTemplate stringRedisTemplate; |
||||
|
|
||||
|
@Value("${zookeeper.connection-string}") |
||||
|
private String connectionString; |
||||
|
@Value("${zookeeper.publish-node}") |
||||
|
private String nodePath; |
||||
|
@Value("${crawl.threadNum}") |
||||
|
private int threadNum; |
||||
|
|
||||
|
@Resource |
||||
|
ProcessService processService; |
||||
|
public static void main(String[] args) { |
||||
|
ConfigurableApplicationContext applicationContext = SpringApplication.run(DocConversionApplication.class, args); |
||||
|
DocConversionApplication bean = applicationContext.getBean(DocConversionApplication.class); |
||||
|
System.setProperty("java.io.tmpdir","/opt/analyze/apps/doc_conversion/tmp"); |
||||
|
bean.start(); |
||||
|
} |
||||
|
|
||||
|
public void start(){ |
||||
|
|
||||
|
ThreadFactory namedThreadFactory = new ThreadFactoryBuilder().setNamePrefix("crawl-pool-%d").build(); |
||||
|
ExecutorService singleThreadPool = new ThreadPoolExecutor(10, 20, 100L, TimeUnit.SECONDS, new LinkedBlockingQueue<Runnable>(1024), namedThreadFactory, new ThreadPoolExecutor.AbortPolicy()); |
||||
|
for (int i=0;i<threadNum;i++){ |
||||
|
singleThreadPool.execute(processService); |
||||
|
} |
||||
|
KfkUtil.getProducer(); |
||||
|
|
||||
|
PauseTool pauseTool = new PauseTool(); |
||||
|
pauseTool.initializeRedisCache(stringRedisTemplate); |
||||
|
pauseTool.setupZookeeperListener(connectionString, nodePath); |
||||
|
|
||||
|
} |
||||
|
@Scheduled(cron = "0 0/5 * * * ?") |
||||
|
public void timeSize(){ |
||||
|
int size = Config.taskQueue.size(); |
||||
|
log.info("当前有 {} 条文档没有转换",size); |
||||
|
} |
||||
|
|
||||
|
} |
@ -0,0 +1,40 @@ |
|||||
|
package com.bfd.docconversion.controller; |
||||
|
|
||||
|
import com.alibaba.fastjson2.JSONObject; |
||||
|
import com.bfd.docconversion.util.Config; |
||||
|
import lombok.extern.slf4j.Slf4j; |
||||
|
import org.springframework.web.bind.annotation.*; |
||||
|
|
||||
|
/** |
||||
|
* @author guowei |
||||
|
*/ |
||||
|
@RestController |
||||
|
@Slf4j |
||||
|
@RequestMapping(value = "/document") |
||||
|
@CrossOrigin(origins = "*", maxAge = 3600) |
||||
|
public class ApiController { |
||||
|
// @Resource |
||||
|
// conversionToPdfService conversionToPdfService; |
||||
|
/** |
||||
|
* 文档转换 Api |
||||
|
* @param jsonObject |
||||
|
* @return |
||||
|
*/ |
||||
|
@RequestMapping(value = "/conversion", method = RequestMethod.POST, produces = "application/json") |
||||
|
@ResponseBody |
||||
|
public String varAna(@RequestBody JSONObject jsonObject) { |
||||
|
log.info("文档转换参数:"+jsonObject); |
||||
|
// conversionToPdfService.conversion(jsonObject); |
||||
|
try { |
||||
|
if (jsonObject.containsKey(Config.TRACE) && jsonObject.getBoolean(Config.TRACE)==true){ |
||||
|
log.info("测试流程,插入队首"); |
||||
|
Config.taskQueue.putFirst(jsonObject); |
||||
|
}else { |
||||
|
Config.taskQueue.put(jsonObject); |
||||
|
} |
||||
|
} catch (InterruptedException e) { |
||||
|
e.printStackTrace(); |
||||
|
} |
||||
|
return "success"; |
||||
|
} |
||||
|
} |
@ -0,0 +1,13 @@ |
|||||
|
package com.bfd.docconversion.service; |
||||
|
|
||||
|
import com.alibaba.fastjson2.JSONObject; |
||||
|
import org.springframework.stereotype.Service; |
||||
|
|
||||
|
/** |
||||
|
* @author guowei |
||||
|
*/ |
||||
|
@Service |
||||
|
public interface ConversionToPdfService { |
||||
|
|
||||
|
void conversion(JSONObject jsonObject); |
||||
|
} |
@ -0,0 +1,46 @@ |
|||||
|
package com.bfd.docconversion.service; |
||||
|
|
||||
|
import com.alibaba.fastjson2.JSONObject; |
||||
|
import com.aspose.pdf.MemoryCleaner; |
||||
|
import com.bfd.docconversion.util.Config; |
||||
|
import com.bfd.docconversion.util.Constants; |
||||
|
import com.bfd.util.PauseTool; |
||||
|
import lombok.extern.slf4j.Slf4j; |
||||
|
import org.springframework.stereotype.Service; |
||||
|
|
||||
|
import javax.annotation.Resource; |
||||
|
|
||||
|
/** |
||||
|
* @author guowei |
||||
|
*/ |
||||
|
@Service |
||||
|
@Slf4j |
||||
|
public class ProcessService implements Runnable { |
||||
|
@Resource |
||||
|
ConversionToPdfService conversionToPdfService; |
||||
|
|
||||
|
@Override |
||||
|
public void run() { |
||||
|
while (true) { |
||||
|
try { |
||||
|
if (Config.taskQueue.size() <= 0) { |
||||
|
Thread.sleep(1000 * 10); |
||||
|
//清除缓存 |
||||
|
MemoryCleaner.clearAllTempFiles(); |
||||
|
} else { |
||||
|
JSONObject take = Config.taskQueue.take(); |
||||
|
Integer scense_id = (Integer) take.get(Constants.SCENES_ID); |
||||
|
Integer version = (Integer) take.get(Constants.VERSION); |
||||
|
if (PauseTool.CACHE.containsKey(scense_id + Constants.UNDERLINE + version)) { |
||||
|
conversionToPdfService.conversion(take); |
||||
|
} else { |
||||
|
log.info("暂停任务:{}", JSONObject.toJSONString(take)); |
||||
|
} |
||||
|
} |
||||
|
} catch (Exception e) { |
||||
|
e.printStackTrace(); |
||||
|
log.info("异常,{}", e); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
} |
@ -0,0 +1,97 @@ |
|||||
|
package com.bfd.docconversion.service.impl; |
||||
|
|
||||
|
import cn.hutool.core.util.IdUtil; |
||||
|
import com.alibaba.fastjson2.JSON; |
||||
|
import com.alibaba.fastjson2.JSONObject; |
||||
|
import com.bfd.docconversion.service.ConversionToPdfService; |
||||
|
import com.bfd.docconversion.util.Config; |
||||
|
import com.bfd.docconversion.util.KfkUtil; |
||||
|
import com.bfd.docconversion.util.Utils; |
||||
|
import lombok.extern.slf4j.Slf4j; |
||||
|
import org.springframework.stereotype.Service; |
||||
|
|
||||
|
import java.io.ByteArrayOutputStream; |
||||
|
import java.io.InputStream; |
||||
|
import java.net.URL; |
||||
|
import java.nio.file.Files; |
||||
|
import java.nio.file.Path; |
||||
|
import java.nio.file.Paths; |
||||
|
import java.util.HashMap; |
||||
|
import java.util.Map; |
||||
|
|
||||
|
/** |
||||
|
* @author guowei |
||||
|
*/ |
||||
|
@Service |
||||
|
@Slf4j |
||||
|
public class ConversionToPdfServiceImpl implements ConversionToPdfService { |
||||
|
/** |
||||
|
* 转换 |
||||
|
* @param jsonObject |
||||
|
*/ |
||||
|
@Override |
||||
|
public void conversion(JSONObject jsonObject) { |
||||
|
//输入 |
||||
|
JSONObject input = jsonObject.getJSONObject("input"); |
||||
|
//输出 |
||||
|
JSONObject output = jsonObject.getJSONObject("output"); |
||||
|
//data |
||||
|
JSONObject data = jsonObject.getJSONObject("data"); |
||||
|
|
||||
|
System.out.println("queryData ---> input:" + JSON.toJSONString(input)); |
||||
|
System.out.println("queryData ---> output:" + JSON.toJSONString(output)); |
||||
|
System.out.println("queryData ---> data:" + JSON.toJSONString(data)); |
||||
|
Map resultMap = new HashMap<>(32); |
||||
|
Map results = new HashMap<>(32); |
||||
|
try { |
||||
|
//需修改 |
||||
|
// String gofastUrl = input.getString("filePath"); |
||||
|
String gofastUrl = (String) Utils.jsonParse(input.getString("filePath"), data); |
||||
|
log.info("开始下载文件, path:"+ gofastUrl); |
||||
|
InputStream source = Utils.gofastDownLoadFile(gofastUrl); |
||||
|
if (source == null) { |
||||
|
throw new NullPointerException(); |
||||
|
} |
||||
|
URL url = new URL(gofastUrl); |
||||
|
String newPath = url.getPath(); |
||||
|
Path path = Paths.get(newPath); |
||||
|
String extension = Utils.getExtension(path); |
||||
|
ByteArrayOutputStream target = new ByteArrayOutputStream(); |
||||
|
String filePath = ""; |
||||
|
if (extension.equals(Config.PDF)) { |
||||
|
log.info("文档转换开始: " + extension + " --> DOC"); |
||||
|
Utils.asposePdfTo(extension, source,target); |
||||
|
filePath = "./files/"+IdUtil.simpleUUID()+".docx"; |
||||
|
}else { |
||||
|
log.info("文档转换开始: " + extension + " --> PDF"); |
||||
|
Utils.asposeToPdf(extension, source,target); |
||||
|
filePath = "./files/"+IdUtil.simpleUUID()+".pdf"; |
||||
|
} |
||||
|
// InputStream source = Files.newInputStream(path); |
||||
|
Files.write(Paths.get(filePath), target.toByteArray()); |
||||
|
log.info("文档转换完成"); |
||||
|
log.info("文件开始上传 path:{}",filePath); |
||||
|
String upLoadFile = Utils.upLoadFile(filePath); |
||||
|
System.out.println(upLoadFile); |
||||
|
log.info("文件结束上传"); |
||||
|
JSONObject resultUpload = JSONObject.parseObject(upLoadFile); |
||||
|
resultMap.put("id", IdUtil.randomUUID()); |
||||
|
resultMap.put("conversionUrl", Config.resultGofast + resultUpload.getString("path")); |
||||
|
results.put("status", 1); |
||||
|
results.put("message", "成功"); |
||||
|
}catch (Exception e){ |
||||
|
e.printStackTrace(); |
||||
|
log.error("文档转换异常",e); |
||||
|
resultMap.put("conversionUrl", "失败"); |
||||
|
results.put("status", 2); |
||||
|
results.put("message", "失败"); |
||||
|
} |
||||
|
resultMap.put("isLast",1); |
||||
|
results.put("results", JSON.toJSONString(resultMap)); |
||||
|
|
||||
|
jsonObject.put("result", results); |
||||
|
KfkUtil.sendKafka(JSON.toJSONString(jsonObject)); |
||||
|
log.info("处理完成,result:" + JSON.toJSONString(results)); |
||||
|
|
||||
|
} |
||||
|
} |
@ -0,0 +1,37 @@ |
|||||
|
package com.bfd.docconversion.util; |
||||
|
|
||||
|
import org.springframework.context.annotation.Configuration; |
||||
|
import org.springframework.scheduling.annotation.AsyncConfigurer; |
||||
|
import org.springframework.scheduling.annotation.EnableAsync; |
||||
|
import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor; |
||||
|
|
||||
|
import java.util.concurrent.Executor; |
||||
|
|
||||
|
|
||||
|
@Configuration |
||||
|
@EnableAsync //Java配置文件标注它,那么Spring就会开启异步可用 |
||||
|
/** |
||||
|
* @author guowei |
||||
|
* 异步任务线程池 |
||||
|
* 注解@EnableAsync代表开启Spring异步。这样就可以使用@Async驱动Spring使用异步, |
||||
|
* 但是异步需要提供可用线程池,所以这里的配置类还会实现AsyncConfigurer接口,然后覆盖getAsyncExecutor方法,这样就可以自定义一个线程池 |
||||
|
*/ |
||||
|
public class AsyncConfig implements AsyncConfigurer { |
||||
|
|
||||
|
@Override |
||||
|
public Executor getAsyncExecutor() { |
||||
|
//定义线程池 |
||||
|
ThreadPoolTaskExecutor threadPoolTaskExecutor = new ThreadPoolTaskExecutor(); |
||||
|
//核心线程数 |
||||
|
threadPoolTaskExecutor.setCorePoolSize(10); |
||||
|
//线程池最大线程数 |
||||
|
threadPoolTaskExecutor.setMaxPoolSize(50); |
||||
|
//线程队列最大线程数 |
||||
|
threadPoolTaskExecutor.setQueueCapacity(200); |
||||
|
//初始化 |
||||
|
threadPoolTaskExecutor.initialize(); |
||||
|
|
||||
|
return threadPoolTaskExecutor; |
||||
|
} |
||||
|
|
||||
|
} |
@ -0,0 +1,32 @@ |
|||||
|
package com.bfd.docconversion.util; |
||||
|
|
||||
|
import com.alibaba.fastjson2.JSONObject; |
||||
|
|
||||
|
import java.util.HashMap; |
||||
|
import java.util.Map; |
||||
|
import java.util.concurrent.LinkedBlockingDeque; |
||||
|
|
||||
|
/** |
||||
|
* @author guowei |
||||
|
*/ |
||||
|
public class Config { |
||||
|
|
||||
|
public static String gofastUrl = "http://172.18.1.180:9980/upload"; |
||||
|
|
||||
|
// public static String resultGofast = "https://crawl-files.pontoaplus.com"; |
||||
|
|
||||
|
public static String resultGofast = "https://caiji.pontoaplus.com"; |
||||
|
|
||||
|
public static LinkedBlockingDeque<JSONObject> taskQueue = new LinkedBlockingDeque <JSONObject>(); |
||||
|
|
||||
|
public static Map stopCache = new HashMap<>(); |
||||
|
|
||||
|
public static final String PDF = "pdf"; |
||||
|
|
||||
|
public static final Integer NUM = 5; |
||||
|
|
||||
|
public static final String TRACE = "trace"; |
||||
|
|
||||
|
|
||||
|
|
||||
|
} |
@ -0,0 +1,19 @@ |
|||||
|
package com.bfd.docconversion.util; |
||||
|
|
||||
|
import org.springframework.stereotype.Component; |
||||
|
|
||||
|
/** |
||||
|
* @author guowei |
||||
|
*/ |
||||
|
@Component |
||||
|
public class Constants { |
||||
|
|
||||
|
public final static String STOP = "stop"; |
||||
|
|
||||
|
public final static String SCENES_ID = "scenes_id"; |
||||
|
|
||||
|
public final static String VERSION = "version"; |
||||
|
|
||||
|
public final static String UNDERLINE = "_"; |
||||
|
|
||||
|
} |
@ -0,0 +1,39 @@ |
|||||
|
package com.bfd.docconversion.util; |
||||
|
/** |
||||
|
* @author guowei |
||||
|
*/ |
||||
|
public enum FileExtensionEnum { |
||||
|
/**doc**/ |
||||
|
doc("doc"), |
||||
|
/**docx**/ |
||||
|
docx("docx"), |
||||
|
/**xls**/ |
||||
|
xls("xls"), |
||||
|
/**xlsx**/ |
||||
|
xlsx("xlsx"), |
||||
|
/**ppt**/ |
||||
|
ppt("ppt"), |
||||
|
/**pptx"**/ |
||||
|
pptx("pptx"), |
||||
|
/**pdf**/ |
||||
|
pdf("pdf"); |
||||
|
|
||||
|
private final String extension; |
||||
|
|
||||
|
FileExtensionEnum(String extension) { |
||||
|
this.extension = extension; |
||||
|
} |
||||
|
|
||||
|
public String getExtension() { |
||||
|
return extension; |
||||
|
} |
||||
|
|
||||
|
public static FileExtensionEnum getByExtension(String extension) { |
||||
|
for (FileExtensionEnum fileExtension : values()) { |
||||
|
if (fileExtension.getExtension().equalsIgnoreCase(extension)) { |
||||
|
return fileExtension; |
||||
|
} |
||||
|
} |
||||
|
throw new IllegalArgumentException("Unsupported file extension: " + extension); |
||||
|
} |
||||
|
} |
@ -0,0 +1,83 @@ |
|||||
|
package com.bfd.docconversion.util; |
||||
|
|
||||
|
import lombok.extern.slf4j.Slf4j; |
||||
|
import org.apache.kafka.clients.producer.KafkaProducer; |
||||
|
import org.apache.kafka.clients.producer.ProducerRecord; |
||||
|
import org.springframework.beans.factory.annotation.Value; |
||||
|
import org.springframework.stereotype.Component; |
||||
|
|
||||
|
import java.util.Properties; |
||||
|
|
||||
|
/** |
||||
|
* @author guowei |
||||
|
* kfk工具类 |
||||
|
*/ |
||||
|
@Component |
||||
|
@Slf4j |
||||
|
public class KfkUtil { |
||||
|
private static String topic; |
||||
|
|
||||
|
private static String brokerList; |
||||
|
|
||||
|
@Value("${crawl.kafka.topic}") |
||||
|
public void setTopic(String topic) { |
||||
|
KfkUtil.topic = topic; |
||||
|
} |
||||
|
|
||||
|
@Value("${crawl.kafka.brokers}") |
||||
|
public void setBrokerList(String brokerList) { |
||||
|
KfkUtil.brokerList = brokerList; |
||||
|
} |
||||
|
private static KafkaProducer<String, String> kafkaProducer; |
||||
|
|
||||
|
public static int num = 0; |
||||
|
|
||||
|
/** |
||||
|
* 获取KafkaProducer实例 |
||||
|
*/ |
||||
|
public static KafkaProducer<String, String> getProducer() { |
||||
|
// synchronized (kafkaProducer) { |
||||
|
if (kafkaProducer == null) { |
||||
|
Properties props = new Properties(); |
||||
|
//xxx服务器ip |
||||
|
props.put("bootstrap.servers", brokerList); |
||||
|
//所有follower都响应了才认为消息提交成功,即"committed" |
||||
|
props.put("acks", "all"); |
||||
|
//retries = MAX 无限重试,直到你意识到出现了问题:) |
||||
|
props.put("retries", 3); |
||||
|
//producer将试图批处理消息记录,以减少请求次数.默认的批量处理消息字节数 |
||||
|
props.put("batch.size", 16384); |
||||
|
//batch.size当批量的数据大小达到设定值后,就会立即发送,不顾下面的linger.ms |
||||
|
//延迟1ms发送,这项设置将通过增加小的延迟来完成--即,不是立即发送一条记录,producer将会等待给定的延迟时间以允许其他消息记录发送,这些消息记录可以批量处理 |
||||
|
props.put("linger.ms", 1); |
||||
|
//producer可以用来缓存数据的内存大小。 |
||||
|
props.put("buffer.memory", 33554432); |
||||
|
props.put("key.serializer", |
||||
|
"org.apache.kafka.common.serialization.StringSerializer"); |
||||
|
props.put("value.serializer", |
||||
|
"org.apache.kafka.common.serialization.StringSerializer"); |
||||
|
kafkaProducer = new KafkaProducer<String, String>(props); |
||||
|
} |
||||
|
// } |
||||
|
return kafkaProducer; |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 关闭KafkaProducer实例 |
||||
|
*/ |
||||
|
public static void closeProducer() { |
||||
|
if (kafkaProducer != null) { |
||||
|
log.info("----------close producer----------"); |
||||
|
kafkaProducer.close(); |
||||
|
kafkaProducer = null; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
public static void sendKafka(String resultData) { |
||||
|
KafkaProducer<String, String> producer = getProducer(); |
||||
|
ProducerRecord<String, String> se = new ProducerRecord<String, String>(topic, resultData); |
||||
|
producer.send(se); |
||||
|
log.info("发送kafka成功"); |
||||
|
// num++; |
||||
|
} |
||||
|
} |
@ -0,0 +1,104 @@ |
|||||
|
package com.bfd.docconversion.util; |
||||
|
|
||||
|
import cn.hutool.core.io.FileUtil; |
||||
|
import cn.hutool.core.io.file.FileWriter; |
||||
|
import com.alibaba.fastjson2.JSON; |
||||
|
import com.alibaba.fastjson2.JSONObject; |
||||
|
import lombok.extern.slf4j.Slf4j; |
||||
|
import org.springframework.beans.factory.annotation.Value; |
||||
|
import org.springframework.boot.ApplicationArguments; |
||||
|
import org.springframework.boot.ApplicationRunner; |
||||
|
import org.springframework.stereotype.Service; |
||||
|
|
||||
|
import java.io.File; |
||||
|
import java.util.List; |
||||
|
import java.util.concurrent.LinkedBlockingDeque; |
||||
|
import java.util.concurrent.LinkedBlockingQueue; |
||||
|
|
||||
|
|
||||
|
/** |
||||
|
* @author guowei |
||||
|
*/ |
||||
|
@Slf4j |
||||
|
@Service |
||||
|
public class MainHandler implements ApplicationRunner { |
||||
|
|
||||
|
@Value("${crawl.task.taskData}") |
||||
|
private String taskPath; |
||||
|
|
||||
|
@Override |
||||
|
public void run(ApplicationArguments args) throws Exception { |
||||
|
log.info("监测程序运行线程 start"); |
||||
|
//停止处理 |
||||
|
waitDown(); |
||||
|
//启动加载缓存任务 |
||||
|
readTask(taskPath, Config.taskQueue); |
||||
|
} |
||||
|
|
||||
|
|
||||
|
public static void readTask(String path, LinkedBlockingDeque queue) throws InterruptedException { |
||||
|
File file = new File(path); |
||||
|
if (file.exists()) { |
||||
|
List<String> tasks = null; |
||||
|
tasks = FileUtil.readLines(file, "UTF-8"); |
||||
|
log.info("缓存文件有 " + tasks.size() + " 条数据"); |
||||
|
for (String taskStr : tasks) { |
||||
|
log.info("读到缓存数据:" + taskStr); |
||||
|
System.out.println("读到缓存数据:" + taskStr); |
||||
|
JSONObject parse = JSONObject.parseObject(taskStr); |
||||
|
// JSONObject value = (JSONObject) parse.get("value"); |
||||
|
// if (value.containsKey("result")){ |
||||
|
// KfkUtil.sendKafka(JSON.toJSONString(value)); |
||||
|
// log.info("此数据已经组装好,直接推送kfk"); |
||||
|
// continue; |
||||
|
// } |
||||
|
queue.put(parse); |
||||
|
} |
||||
|
file.delete(); |
||||
|
} else { |
||||
|
log.info("未找到缓存任务文件"); |
||||
|
} |
||||
|
|
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 结束触发钩子 |
||||
|
*/ |
||||
|
public void waitDown() { |
||||
|
Runtime.getRuntime().addShutdownHook(new Thread() { |
||||
|
@Override |
||||
|
public void run() { |
||||
|
// 停止线程 |
||||
|
// Config.isStart = false; |
||||
|
log.info("stop-------"); |
||||
|
try { |
||||
|
writeTsskToFile(); |
||||
|
} catch (InterruptedException e) { |
||||
|
log.error("写出缓存异常,{}", e); |
||||
|
} |
||||
|
} |
||||
|
}); |
||||
|
} |
||||
|
|
||||
|
|
||||
|
/** |
||||
|
* 任务持久化到硬盘 |
||||
|
*/ |
||||
|
public void writeTsskToFile() throws InterruptedException { |
||||
|
|
||||
|
System.out.println(taskPath); |
||||
|
File file = new File(taskPath); |
||||
|
FileWriter fileWriter = new FileWriter(file); |
||||
|
if (!file.exists()) { |
||||
|
fileWriter = FileWriter.create(file); |
||||
|
} |
||||
|
while (Config.taskQueue.size() > 0) { |
||||
|
JSONObject take = Config.taskQueue.take(); |
||||
|
String entryJson = JSON.toJSONString(take); |
||||
|
System.out.println("写入缓存数据:" + entryJson); |
||||
|
fileWriter.write(entryJson + "\r\n", true); |
||||
|
} |
||||
|
log.info("taskMap 缓存已输出"); |
||||
|
} |
||||
|
|
||||
|
} |
@ -0,0 +1,325 @@ |
|||||
|
package com.bfd.docconversion.util; |
||||
|
|
||||
|
import cn.hutool.core.util.IdUtil; |
||||
|
import com.alibaba.fastjson2.JSON; |
||||
|
import com.alibaba.fastjson2.JSONObject; |
||||
|
import com.alibaba.fastjson2.JSONPath; |
||||
|
|
||||
|
import com.aspose.cells.Workbook; |
||||
|
import com.aspose.slides.Presentation; |
||||
|
import lombok.extern.slf4j.Slf4j; |
||||
|
import okhttp3.*; |
||||
|
import org.springframework.stereotype.Component; |
||||
|
import com.aspose.pdf.Document; |
||||
|
import com.aspose.pdf.SaveFormat; |
||||
|
|
||||
|
import java.io.*; |
||||
|
import java.net.URL; |
||||
|
import java.nio.file.Files; |
||||
|
import java.nio.file.Path; |
||||
|
import java.nio.file.Paths; |
||||
|
import java.util.Map; |
||||
|
import java.util.concurrent.TimeUnit; |
||||
|
|
||||
|
/** |
||||
|
* @author guowei |
||||
|
*/ |
||||
|
@Component |
||||
|
@Slf4j |
||||
|
public class Utils { |
||||
|
|
||||
|
/** |
||||
|
* 转换成pdf |
||||
|
* |
||||
|
* @param extension |
||||
|
* @param source |
||||
|
* @param target |
||||
|
* @throws Exception |
||||
|
*/ |
||||
|
public static void asposeToPdf(String extension, InputStream source, ByteArrayOutputStream target) throws Exception { |
||||
|
switch (FileExtensionEnum.getByExtension(extension)) { |
||||
|
case doc: |
||||
|
case docx: |
||||
|
com.aspose.words.Document doc = new com.aspose.words.Document(source); |
||||
|
doc.save(target, com.aspose.words.SaveFormat.PDF); |
||||
|
|
||||
|
break; |
||||
|
case xls: |
||||
|
case xlsx: |
||||
|
com.aspose.cells.Workbook excel = new com.aspose.cells.Workbook(source); |
||||
|
com.aspose.cells.PdfSaveOptions pdfSaveOptions = new com.aspose.cells.PdfSaveOptions(); |
||||
|
// 单页显示,防截断 防换行 |
||||
|
pdfSaveOptions.setOnePagePerSheet(true); |
||||
|
excel.save(target, pdfSaveOptions); |
||||
|
excel.dispose(); |
||||
|
break; |
||||
|
case ppt: |
||||
|
case pptx: |
||||
|
com.aspose.slides.Presentation ppt = new com.aspose.slides.Presentation(source); |
||||
|
ppt.save(target, com.aspose.slides.SaveFormat.Pdf); |
||||
|
ppt.dispose(); |
||||
|
break; |
||||
|
default: |
||||
|
System.out.println("不支持的文件转换类型"); |
||||
|
// throw new BaseException("不支持的文件转换类型"); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* pdf 转换 |
||||
|
* @param extension |
||||
|
* @param source |
||||
|
* @param target |
||||
|
* @throws Exception |
||||
|
*/ |
||||
|
public static void asposePdfTo(String extension, InputStream source, ByteArrayOutputStream target) throws Exception { |
||||
|
switch (FileExtensionEnum.getByExtension(extension)) { |
||||
|
case doc: |
||||
|
case docx: |
||||
|
case pdf: |
||||
|
// 设置字体替换 |
||||
|
// FontSettings fontSettings = new FontSettings(); |
||||
|
// FontSubstitutionSettings fontSubstitutionSettings = fontSettings.getSubstitutionSettings(); |
||||
|
// fontSubstitutionSettings.getDefaultFontSubstitution().setDefaultFontName("Arial"); |
||||
|
// |
||||
|
// // 加载系统字体 |
||||
|
// FontSourceBase[] fontSources = fontSettings.getFontsSources(); |
||||
|
// SystemFontSource systemFontSource = new SystemFontSource(); |
||||
|
// FontSourceBase[] updatedFontSources = new FontSourceBase[fontSources.length + 1]; |
||||
|
// System.arraycopy(fontSources, 0, updatedFontSources, 0, fontSources.length); |
||||
|
// updatedFontSources[fontSources.length] = systemFontSource; |
||||
|
// fontSettings.setFontsSources(updatedFontSources); |
||||
|
// |
||||
|
// // 指定加载选项,以确保正确处理字体 |
||||
|
// LoadOptions loadOptions = new LoadOptions(); |
||||
|
// loadOptions.setFontSettings(fontSettings); |
||||
|
|
||||
|
Document doc = new Document(source); |
||||
|
//全面支持DOC, DOCX, OOXML, RTF HTML, OpenDocument, PDF, EPUB, XPS, SWF 相互转换 |
||||
|
doc.save(target, SaveFormat.DocX); |
||||
|
doc.close(); |
||||
|
break; |
||||
|
// case xls: |
||||
|
// case xlsx: |
||||
|
// // Load PDF document |
||||
|
// Document excel = new Document(source); |
||||
|
// excel.save(target, SaveFormat.Excel); |
||||
|
// break; |
||||
|
// case ppt: |
||||
|
// case pptx: |
||||
|
// Document ppt = new Document(source); |
||||
|
// ppt.save(target, SaveFormat.Pptx); |
||||
|
// break; |
||||
|
default: |
||||
|
System.out.println("不支持的文件转换类型"); |
||||
|
// throw new BaseException("不支持的文件转换类型"); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// public static void convertFile(String inputFilePath, String outputFilePath) throws Exception { |
||||
|
// String inputExtension = getFileExtension(inputFilePath).toLowerCase(); |
||||
|
// String outputExtension = getFileExtension(outputFilePath).toLowerCase(); |
||||
|
// |
||||
|
// switch (inputExtension) { |
||||
|
// case "doc": |
||||
|
// case "docx": |
||||
|
// convertWord(inputFilePath, outputFilePath, outputExtension); |
||||
|
// break; |
||||
|
// case "xls": |
||||
|
// case "xlsx": |
||||
|
// convertExcel(inputFilePath, outputFilePath, outputExtension); |
||||
|
// break; |
||||
|
// case "ppt": |
||||
|
// case "pptx": |
||||
|
// convertPPT(inputFilePath, outputFilePath, outputExtension); |
||||
|
// break; |
||||
|
// case "pdf": |
||||
|
// convertPDF(inputFilePath, outputFilePath, outputExtension); |
||||
|
// break; |
||||
|
// default: |
||||
|
// throw new IllegalArgumentException("Unsupported file format: " + inputExtension); |
||||
|
// } |
||||
|
// } |
||||
|
|
||||
|
private static void convertWord(String inputFilePath, String outputFilePath, String outputExtension) throws Exception { |
||||
|
com.aspose.words.Document doc = new com.aspose.words.Document(inputFilePath); |
||||
|
switch (outputExtension) { |
||||
|
case "pdf": |
||||
|
doc.save(outputFilePath, com.aspose.words.SaveFormat.PDF); |
||||
|
break; |
||||
|
default: |
||||
|
System.out.println("不支持的文件转换类型"); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
private static void convertExcel(String inputFilePath, String outputFilePath, String outputExtension) throws Exception { |
||||
|
Workbook workbook = new Workbook(inputFilePath); |
||||
|
switch (outputExtension) { |
||||
|
case "pdf": |
||||
|
workbook.save(outputFilePath, com.aspose.cells.SaveFormat.PDF); |
||||
|
break; |
||||
|
case "docx": |
||||
|
// Excel to Word conversion (Not directly supported) |
||||
|
ByteArrayOutputStream htmlStream = new ByteArrayOutputStream(); |
||||
|
workbook.save(htmlStream, com.aspose.cells.SaveFormat.HTML); |
||||
|
ByteArrayInputStream htmlInputStream = new ByteArrayInputStream(htmlStream.toByteArray()); |
||||
|
com.aspose.words.Document doc = new com.aspose.words.Document(htmlInputStream); |
||||
|
doc.save(outputFilePath, com.aspose.cells.SaveFormat.DOCX); |
||||
|
break; |
||||
|
case "xlsx": |
||||
|
workbook.save(outputFilePath, com.aspose.cells.SaveFormat.XLSX); |
||||
|
break; |
||||
|
case "pptx": |
||||
|
// Excel to PPTX conversion (Not directly supported) |
||||
|
ByteArrayOutputStream htmlStream2 = new ByteArrayOutputStream(); |
||||
|
workbook.save(htmlStream2, com.aspose.cells.SaveFormat.HTML); |
||||
|
ByteArrayInputStream htmlInputStream2 = new ByteArrayInputStream(htmlStream2.toByteArray()); |
||||
|
Presentation presentation = new Presentation(htmlInputStream2); |
||||
|
presentation.save(outputFilePath, com.aspose.slides.SaveFormat.Pptx); |
||||
|
break; |
||||
|
default: |
||||
|
throw new IllegalArgumentException("Unsupported conversion: Excel to " + outputExtension); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 获取文件扩展名 |
||||
|
* |
||||
|
* @param path 文件路径 |
||||
|
* @return 文件扩展名 |
||||
|
*/ |
||||
|
public static String getExtension(Path path) { |
||||
|
String fileName = path.getFileName().toString(); |
||||
|
int dotIndex = fileName.lastIndexOf('.'); |
||||
|
if (dotIndex == -1) { |
||||
|
throw new IllegalArgumentException("File without extension: " + fileName); |
||||
|
} |
||||
|
return fileName.substring(dotIndex + 1).toLowerCase(); |
||||
|
} |
||||
|
|
||||
|
public static Object jsonParse(String key, Map data) { |
||||
|
String[] keySplit = key.split(":"); |
||||
|
String jsonPath = keySplit[1]; |
||||
|
if (!data.containsKey(keySplit[0])) { |
||||
|
return ""; |
||||
|
} |
||||
|
String dataJson = (String) data.get(keySplit[0]); |
||||
|
JSONObject dataJsonObject = JSON.parseObject(dataJson); |
||||
|
Object dataValue = JSONPath.eval(dataJsonObject, jsonPath); |
||||
|
return dataValue; |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* gofast 文件下载 |
||||
|
* |
||||
|
* @param url |
||||
|
* @return |
||||
|
* @throws IOException |
||||
|
*/ |
||||
|
public static InputStream gofastDownLoadFile(String url) { |
||||
|
OkHttpClient client = new OkHttpClient().newBuilder() |
||||
|
.readTimeout(60, TimeUnit.SECONDS) |
||||
|
.writeTimeout(60, TimeUnit.SECONDS) |
||||
|
.connectTimeout(60, TimeUnit.SECONDS) |
||||
|
.build(); |
||||
|
MediaType mediaType = MediaType.parse("text/plain"); |
||||
|
RequestBody body = RequestBody.create(mediaType, ""); |
||||
|
Request request = new Request.Builder() |
||||
|
.url(url) |
||||
|
.method("GET", null) |
||||
|
.addHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.63 Safari/537.36") |
||||
|
.build(); |
||||
|
BufferedOutputStream out = null; |
||||
|
InputStream inputStream = null; |
||||
|
Response response = null; |
||||
|
try { |
||||
|
response = client.newCall(request).execute(); |
||||
|
for (int i = 0; i < Config.NUM; i++) { |
||||
|
if (response.isSuccessful()) { |
||||
|
break; |
||||
|
} else { |
||||
|
response = client.newCall(request).execute(); |
||||
|
System.out.println("gofast文件下载失败,file=" + url + ",第" + i + "次"); |
||||
|
log.error("gofast文件下载失败,file=" + url + ",第" + i + "次"); |
||||
|
Thread.sleep(3000); |
||||
|
i++; |
||||
|
} |
||||
|
} |
||||
|
inputStream = response.body().byteStream(); |
||||
|
} catch (Exception e) { |
||||
|
e.printStackTrace(); |
||||
|
log.error("gofast文件下载异常", e); |
||||
|
} |
||||
|
return inputStream; |
||||
|
} |
||||
|
|
||||
|
public static String upLoadFile(String filePath) { |
||||
|
|
||||
|
File file = new File(filePath); |
||||
|
String realFilename = filePath.substring(filePath.lastIndexOf(File.separator) + 1); |
||||
|
MultipartBody.Builder builder = new MultipartBody.Builder().setType(MultipartBody.FORM); |
||||
|
builder.addPart(Headers.of("Content-Disposition", "form-data; name=\"file\";filename=\"" + realFilename + "\""), |
||||
|
RequestBody.create(MediaType.parse("image/png"), file) |
||||
|
|
||||
|
).addFormDataPart("output", "json").build(); |
||||
|
RequestBody body = builder.build(); |
||||
|
Request request = new Request.Builder().url(Config.gofastUrl).post(body).header("Expect", "100-continue").build(); |
||||
|
OkHttpClient.Builder okBuilder = new OkHttpClient.Builder(); |
||||
|
// 获得一个客户对象 |
||||
|
OkHttpClient client = okBuilder.build(); |
||||
|
Call call = client.newCall(request); |
||||
|
String html = ""; |
||||
|
Response response = null; |
||||
|
int retry = 0; |
||||
|
do { |
||||
|
try { |
||||
|
response = call.execute(); |
||||
|
html = response.body().string(); |
||||
|
break; |
||||
|
} catch (IOException e) { |
||||
|
log.error("文档上传异常,file:" + filePath + ",重试" + retry + "次"); |
||||
|
} finally { |
||||
|
response.close(); |
||||
|
} |
||||
|
} while (retry >= 5); |
||||
|
file.delete(); |
||||
|
|
||||
|
return html; |
||||
|
} |
||||
|
|
||||
|
public static void main(String[] args) throws Exception { |
||||
|
String filePath = "C:\\Users\\86150\\Desktop\\embed_watermark (1).pdf"; |
||||
|
// Path path = Paths.get(filePath); |
||||
|
//// String extension = getExtension(path); |
||||
|
// String extension = "docx"; |
||||
|
// System.out.println("文档转换: "+ extension + " --> PDF" ); |
||||
|
// ByteArrayOutputStream target = new ByteArrayOutputStream(); |
||||
|
// InputStream source = Files.newInputStream(path); |
||||
|
//// asposeToPdf(extension, source,target); |
||||
|
// asposePdfTo(extension,source,target); |
||||
|
// |
||||
|
// Files.write(Paths.get("C:\\Users\\86150\\Desktop\\embed_watermark (2).docx"), target.toByteArray()); |
||||
|
// String s = upLoadFile(filePath); |
||||
|
// System.out.println(s); |
||||
|
String gofastUrl = "http://172.18.1.180:9980/group17/default/20240812/16/40/3/971260fd6cce96624965c692f709660b.pdf"; |
||||
|
InputStream inputStream = gofastDownLoadFile(gofastUrl); |
||||
|
URL url = new URL(gofastUrl); |
||||
|
String newPath = url.getPath(); |
||||
|
Path path = Paths.get(newPath); |
||||
|
String extension = Utils.getExtension(path); |
||||
|
ByteArrayOutputStream target = new ByteArrayOutputStream(); |
||||
|
Utils.asposePdfTo(extension, inputStream,target); |
||||
|
filePath = "./files/"+ IdUtil.simpleUUID()+".docx"; |
||||
|
Files.write(Paths.get(filePath), target.toByteArray()); |
||||
|
} |
||||
|
|
||||
|
// public static void main(String[] args) { |
||||
|
// String pdfFilePath = "C:\\Users\\86150\\Desktop\\百分点\\考试\\百分点019期新员工特训营-文化篇(终版)20210512.pdf"; |
||||
|
// String wordFilePath = "C:\\Users\\86150\\Desktop\\百分点\\考试\\云学堂.docx"; |
||||
|
// |
||||
|
// pdf2doc(pdfFilePath); |
||||
|
// System.out.println("PDF successfully converted to Word document."); |
||||
|
// } |
||||
|
|
||||
|
|
||||
|
} |
@ -0,0 +1,40 @@ |
|||||
|
server: |
||||
|
port: 9955 |
||||
|
crawl: |
||||
|
kafka: |
||||
|
topic: produce_analyze |
||||
|
brokers: 172.18.1.146:9092,172.18.1.147:9092,172.18.1.148:9092 |
||||
|
task: |
||||
|
taskData: ./data/task.txt |
||||
|
threadNum: 3 |
||||
|
#日志级别 |
||||
|
logging: |
||||
|
level: |
||||
|
com: |
||||
|
bfd: INFO |
||||
|
#日志路径 |
||||
|
log: |
||||
|
path: ./logs |
||||
|
spring: |
||||
|
boot: |
||||
|
admin: |
||||
|
client: |
||||
|
url: http://172.18.1.147:8001 |
||||
|
instance: |
||||
|
service-base-url: http://172.18.1.147:9999 |
||||
|
application: |
||||
|
name: 文档转换 |
||||
|
management: |
||||
|
endpoints: |
||||
|
web: |
||||
|
exposure: |
||||
|
include: "*" |
||||
|
endpoint: |
||||
|
health: |
||||
|
show-details: always |
||||
|
health: |
||||
|
elasticsearch: |
||||
|
enabled: false |
||||
|
zookeeper: |
||||
|
connection-string: 172.18.1.146:2181,172.18.1.147:2181,172.18.1.148:2181 |
||||
|
publish-node: /analyze |
@ -0,0 +1,38 @@ |
|||||
|
<configuration> |
||||
|
<!-- 属性文件:在properties文件中找到对应的配置项 --> |
||||
|
<springProperty scope="context" name="logging.path" source="logging.log.path"/> |
||||
|
<springProperty scope="context" name="logging.level" source="logging.level.com.bfd"/> |
||||
|
<!-- 默认的控制台日志输出,一般生产环境都是后台启动,这个没太大作用 --> |
||||
|
<!-- <appender name="STDOUT" |
||||
|
class="ch.qos.logback.core.ConsoleAppender"> |
||||
|
<encoder class="ch.qos.logback.classic.encoder.PatternLayoutEncoder"> |
||||
|
<Pattern>%d{HH:mm:ss.SSS} %-5level %logger{80} - %msg%n</Pattern> |
||||
|
</encoder> |
||||
|
</appender> --> |
||||
|
|
||||
|
<appender name="GLMAPPER-LOGGERONE" |
||||
|
class="ch.qos.logback.core.rolling.RollingFileAppender"> |
||||
|
<append>true</append> |
||||
|
<filter class="ch.qos.logback.classic.filter.ThresholdFilter"> |
||||
|
<level>${logging.level}</level> |
||||
|
</filter> |
||||
|
<file> |
||||
|
${logging.path}/crawlSchedule.log |
||||
|
<!-- ${logging.path}/sendKafka.log --> |
||||
|
</file> |
||||
|
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy"> |
||||
|
<FileNamePattern>${logging.path}/crawlSchedule.log.%d{yyyy-MM-dd}</FileNamePattern> |
||||
|
<!-- <FileNamePattern>${logging.path}/sendKafka.log.%d{yyyy-MM-dd}</FileNamePattern> --> |
||||
|
<MaxHistory>7</MaxHistory> |
||||
|
</rollingPolicy> |
||||
|
<encoder class="ch.qos.logback.classic.encoder.PatternLayoutEncoder"> |
||||
|
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %line %-5level %logger{50} - %msg%n</pattern> |
||||
|
<charset>UTF-8</charset> |
||||
|
</encoder> |
||||
|
</appender> |
||||
|
|
||||
|
<root level="info"> |
||||
|
<appender-ref ref="GLMAPPER-LOGGERONE"/> |
||||
|
<!-- <appender-ref ref="STDOUT"/> --> |
||||
|
</root> |
||||
|
</configuration> |
@ -0,0 +1,13 @@ |
|||||
|
package com.bfd.doc_conversion; |
||||
|
|
||||
|
import org.junit.jupiter.api.Test; |
||||
|
import org.springframework.boot.test.context.SpringBootTest; |
||||
|
|
||||
|
@SpringBootTest |
||||
|
class DocConversionApplicationTests { |
||||
|
|
||||
|
@Test |
||||
|
void contextLoads() { |
||||
|
} |
||||
|
|
||||
|
} |
Write
Preview
Loading…
Cancel
Save
Reference in new issue