diff --git a/document-convert-service/.classpath b/document-convert-service/.classpath new file mode 100644 index 0000000..f7e4a1d --- /dev/null +++ b/document-convert-service/.classpath @@ -0,0 +1,40 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/document-convert-service/.gitignore b/document-convert-service/.gitignore new file mode 100644 index 0000000..b83d222 --- /dev/null +++ b/document-convert-service/.gitignore @@ -0,0 +1 @@ +/target/ diff --git a/document-convert-service/.project b/document-convert-service/.project new file mode 100644 index 0000000..b7de7c2 --- /dev/null +++ b/document-convert-service/.project @@ -0,0 +1,23 @@ + + + document-convert-service + + + + + + org.eclipse.jdt.core.javabuilder + + + + + org.eclipse.m2e.core.maven2Builder + + + + + + org.eclipse.jdt.core.javanature + org.eclipse.m2e.core.maven2Nature + + diff --git a/document-convert-service/.settings/org.eclipse.core.resources.prefs b/document-convert-service/.settings/org.eclipse.core.resources.prefs new file mode 100644 index 0000000..839d647 --- /dev/null +++ b/document-convert-service/.settings/org.eclipse.core.resources.prefs @@ -0,0 +1,5 @@ +eclipse.preferences.version=1 +encoding//src/main/java=UTF-8 +encoding//src/main/resources=UTF-8 +encoding//src/test/java=UTF-8 +encoding/=UTF-8 diff --git a/document-convert-service/.settings/org.eclipse.jdt.core.prefs b/document-convert-service/.settings/org.eclipse.jdt.core.prefs new file mode 100644 index 0000000..71df522 --- /dev/null +++ b/document-convert-service/.settings/org.eclipse.jdt.core.prefs @@ -0,0 +1,9 @@ +eclipse.preferences.version=1 +org.eclipse.jdt.core.compiler.codegen.methodParameters=generate +org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8 +org.eclipse.jdt.core.compiler.compliance=1.8 +org.eclipse.jdt.core.compiler.problem.enablePreviewFeatures=disabled +org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning +org.eclipse.jdt.core.compiler.problem.reportPreviewFeatures=ignore +org.eclipse.jdt.core.compiler.release=disabled +org.eclipse.jdt.core.compiler.source=1.8 diff --git a/document-convert-service/.settings/org.eclipse.m2e.core.prefs b/document-convert-service/.settings/org.eclipse.m2e.core.prefs new file mode 100644 index 0000000..f897a7f --- /dev/null +++ b/document-convert-service/.settings/org.eclipse.m2e.core.prefs @@ -0,0 +1,4 @@ +activeProfiles= +eclipse.preferences.version=1 +resolveWorkspaceProjects=true +version=1 diff --git a/document-convert-service/pom.xml b/document-convert-service/pom.xml new file mode 100644 index 0000000..3c5dd8c --- /dev/null +++ b/document-convert-service/pom.xml @@ -0,0 +1,187 @@ + + + 4.0.0 + + com.bw + opai-service-center + 0.0.1-SNAPSHOT + + com.bw + document-convert-service + 0.0.1-SNAPSHOT + document-convert-service + http://maven.apache.org + + UTF-8 + + + + org.springframework.boot + spring-boot-starter-web + + + com.alibaba.cloud + spring-cloud-starter-alibaba-nacos-discovery + + + org.springframework.cloud + spring-cloud-starter-openfeign + + + com.alibaba.cloud + spring-cloud-starter-alibaba-nacos-config + + + org.springframework.boot + spring-boot-starter-actuator + + + org.projectlombok + lombok + + + com.alibaba + fastjson + 2.0.17 + + + org.apache.httpcomponents + httpclient + 4.5.3 + + + org.apache.httpcomponents + httpmime + 4.5.13 + + + commons-lang + commons-lang + 2.6 + + + + com.squareup.okhttp3 + okhttp + 4.9.3 + + + org.springframework.kafka + spring-kafka + + + + org.apache.poi + poi + 4.1.2 + + + org.apache.poi + poi-ooxml + 4.1.2 + + + + + org.apache.poi + ooxml-schemas + 1.4 + + + + + + + + maven-clean-plugin + 3.1.0 + + + + maven-resources-plugin + 3.0.2 + + + maven-compiler-plugin + 3.8.0 + + + maven-surefire-plugin + 2.22.1 + + + maven-jar-plugin + 3.0.2 + + + maven-install-plugin + 2.5.2 + + + maven-deploy-plugin + 2.8.2 + + + + maven-site-plugin + 3.7.1 + + + maven-project-info-reports-plugin + 3.0.0 + + + + + org.springframework.boot + spring-boot-maven-plugin + + com.bw.convert.Application + ZIP + + + ${project.groupId} + ${project.artifactId} + + + + + + + repackage + + + + + + org.apache.maven.plugins + maven-dependency-plugin + 3.1.1 + + + copy + package + + copy-dependencies + + + jar + jar + runtime + ${project.build.directory}/libs + + + + + + + + \ No newline at end of file diff --git a/document-convert-service/src/main/java/com/bw/convert/Application.java b/document-convert-service/src/main/java/com/bw/convert/Application.java new file mode 100644 index 0000000..d134360 --- /dev/null +++ b/document-convert-service/src/main/java/com/bw/convert/Application.java @@ -0,0 +1,19 @@ +package com.bw.convert; + +import org.springframework.boot.SpringApplication; +import org.springframework.boot.autoconfigure.SpringBootApplication; + + +/** + * 系统接口启动类 + * @author jian.mao + * @date 2025年12月30日 + * @description + */ +@SpringBootApplication +public class Application { + + public static void main(String[] args) { + SpringApplication.run(Application.class, args); + } +} diff --git a/document-convert-service/src/main/java/com/bw/convert/cache/ConfigCache.java b/document-convert-service/src/main/java/com/bw/convert/cache/ConfigCache.java new file mode 100644 index 0000000..026e1ad --- /dev/null +++ b/document-convert-service/src/main/java/com/bw/convert/cache/ConfigCache.java @@ -0,0 +1,37 @@ +package com.bw.convert.cache; + +import lombok.extern.slf4j.Slf4j; + +import java.util.Map; +import java.util.concurrent.LinkedBlockingDeque; + +/** + * @author jian.mao + * @date 2022年11月11日 + * @description 静态变量类 + */ +@Slf4j +public class ConfigCache { + + /**启动条件**/ + public static boolean isStart = true; + /*****任务队列*****/ + public static LinkedBlockingDeque> taskQueue = new LinkedBlockingDeque>(); + /****结果队列****/ + public static LinkedBlockingDeque> resultQueue = new LinkedBlockingDeque>(); + + + /** + * 队列录入任务 + * @param queue + * @param task + */ + public static void putQueue(LinkedBlockingDeque> queue,Map task){ + //next app 写入队列准备调出 + try { + queue.put(task); + } catch (InterruptedException e) { + log.error("队列写入data失败---"); + } + } +} diff --git a/document-convert-service/src/main/java/com/bw/convert/controller/TaskReceiveController.java b/document-convert-service/src/main/java/com/bw/convert/controller/TaskReceiveController.java new file mode 100644 index 0000000..7431d78 --- /dev/null +++ b/document-convert-service/src/main/java/com/bw/convert/controller/TaskReceiveController.java @@ -0,0 +1,39 @@ +package com.bw.convert.controller; + +import javax.annotation.Resource; + +import org.springframework.stereotype.Controller; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestBody; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RequestMethod; +import org.springframework.web.bind.annotation.ResponseBody; + +import com.bw.convert.service.TaskReceiveService; + +import lombok.extern.slf4j.Slf4j; + +/** + * 任务接收控制层 + * @author jian.mao + * @date 2025年1月14日 + * @description + */ +@Controller +@RequestMapping("/task") +@Slf4j +public class TaskReceiveController { + @Resource + private TaskReceiveService taskReceiveService; + @PostMapping("/put") + @ResponseBody + public String put(@RequestBody String param){ + String response = taskReceiveService.put(param); + return response; + } + @RequestMapping(value = "/hello", method = RequestMethod.GET) + @ResponseBody + public String hello(String param, String token) { + return "123"; + } +} diff --git a/document-convert-service/src/main/java/com/bw/convert/entity/AppResultDoc.java b/document-convert-service/src/main/java/com/bw/convert/entity/AppResultDoc.java new file mode 100644 index 0000000..e60641e --- /dev/null +++ b/document-convert-service/src/main/java/com/bw/convert/entity/AppResultDoc.java @@ -0,0 +1,39 @@ +package com.bw.convert.entity; + + +import java.io.Serializable; +import java.util.Map; + + +import lombok.Data; + +/** + * ES 索引:opai_app_result + * 应用执行结果文档 + * + * @author jian.mao + */ +@Data +public class AppResultDoc implements Serializable { + + private static final long serialVersionUID = 1L; + + /** 任务ID */ + private String taskId; + + /** 应用id */ + private Integer appId; + + /** 状态 0 进行中,1成功,2失败 */ + private Integer status; + + /** 创建时间(毫秒时间戳) */ + private Long createTime; + + /** 执行结果(可索引) */ + private Map result; + + /** 逻辑删除标识:0-未删除 1-已删除 */ + private Integer del; + +} diff --git a/document-convert-service/src/main/java/com/bw/convert/entity/Constants.java b/document-convert-service/src/main/java/com/bw/convert/entity/Constants.java new file mode 100644 index 0000000..ac780ff --- /dev/null +++ b/document-convert-service/src/main/java/com/bw/convert/entity/Constants.java @@ -0,0 +1,81 @@ +package com.bw.convert.entity; + + +/** + * 常量实体类 + * @author jian.mao + * @date 2022年11月15日 + * @description + */ +public class Constants { + + + /** + * 空字符串常量 + */ + public static final String EMPTY = ""; + + /************************应用参数*************************************/ + public static final String CODE = "code"; + public static final String ID = "id"; + public static final String MESSAGE = "message"; + /******************************api使用*******************************/ + public static final String ERROR = "error"; + public static final String TRACE = "trace"; + public static final String PARSE_FIAL = "解析失败"; + public static final String FILEURL = "fileUrl"; + public static final String CREATEURL = "createUrl"; + public static final String JOBURL = "jobUrl"; + public static final String QUERYURL = "queryUrl"; + public static final String FILECONTENTS = "FileContents"; + public static final String FILENAME = "fileName"; + public static final String F_ILENAME = "FileName"; + public static final String OPENPASSWORD = "OpenPassword"; + public static final String OWNERPASSWORD = "OwnerPassword"; + public static final String LOCATIONPATH = "LocationPath"; + public static final String SUCCESS = "success"; + public static final String FAILED = "failed"; + public static final String WORDS_RESULT = "words_result"; + public static final String WORDS = "words"; + public static final String JOBID = "jobId"; + public static final String PROGRESS = "Progress"; + public static final String JOBDOCUMENTS = "JobDocuments"; + public static final String OutputDocuments = "OutputDocuments"; + public static final String FILES = "Files"; + public static final String IDRENAME = "#id"; + + /** + * 任务id + */ + public static final String TASKID = "taskId"; + + /** + * 文件格式 + */ + public static final String FORMAT = "format"; + + /** + * 文件类型 + */ + public static final String DOC_TYPE = "docType"; + + /** + * word + */ + public static final String WORD = "Word"; + + /** + * html + */ + public static final String HTML = "HTML"; + + /** + * url格式上替换标识 + */ + public static final String FORMAT_REPLACE_CHARACTER = "{format}"; + + /** + * 路径 + */ + public static final String PATH = "path"; +} diff --git a/document-convert-service/src/main/java/com/bw/convert/handler/MainHandler.java b/document-convert-service/src/main/java/com/bw/convert/handler/MainHandler.java new file mode 100644 index 0000000..46ee7d8 --- /dev/null +++ b/document-convert-service/src/main/java/com/bw/convert/handler/MainHandler.java @@ -0,0 +1,207 @@ +package com.bw.convert.handler; + +import java.io.File; +import java.io.IOException; +import java.util.List; +import java.util.Map; +import java.util.concurrent.LinkedBlockingDeque; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; + +import org.apache.commons.io.FileUtils; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.boot.ApplicationArguments; +import org.springframework.boot.ApplicationRunner; +import org.springframework.core.annotation.Order; +import org.springframework.stereotype.Component; + +import com.alibaba.fastjson.JSONObject; +import com.bw.convert.cache.ConfigCache; +import com.bw.convert.service.ConvertTaskService; +import com.bw.convert.utils.DateUtil; +import com.bw.convert.utils.FileUtil; + +import lombok.extern.slf4j.Slf4j; + + +/** + * @author jian.mao + * @date 2025年1月13日 + * @description + */ +@Component +@Order(value = 1) +@Slf4j +public class MainHandler implements ApplicationRunner { + + @Value("${task.task-queue-path}") + private String taskPath; + @Value("${task.result-task-queue-path}") + private String resultTaskPath; + @Autowired + private ConvertTaskService convertTaskService; + /***线程池参数***/ + @Value("${threadPool.corePoolSize}") + private int corePoolSize; + @Value("${threadPool.maximumPoolSize}") + private int maximumPoolSize; + @Value("${threadPool.keepAliveTime}") + private long keepAliveTime; + @Value("${threadPool.queueSize}") + private int queueSize; + + /** + *执行入口 + */ + @Override + public void run(ApplicationArguments args) throws Exception { + //线程池方式 + ThreadPoolExecutor executor = new ThreadPoolExecutor( + corePoolSize, + maximumPoolSize, + keepAliveTime, + TimeUnit.SECONDS, + new LinkedBlockingQueue<>(queueSize), + new ThreadPoolExecutor.CallerRunsPolicy() + ); + //消费创建任务队列数据 + Thread consumerThread = new Thread(() -> { + while (true) { + try { + // 从队列中获取任务 + Map task = ConfigCache.taskQueue.take(); + log.info("创建任务----:{}",JSONObject.toJSONString(task)); + // 提交给线程池执行 + executor.execute(() -> createTask(task)); + } catch (InterruptedException e) { + // 恢复中断状态 + Thread.currentThread().interrupt(); + log.error("任务消费线程被中断"); + break; + } + } + }); + consumerThread.start(); + log.info("创建任务消费线程启动-----"); + + + //消费结果任务队列数据 + Thread resultConsumerThread = new Thread(() -> { + while (true) { + try { + // 从队列中获取任务 + Map task = ConfigCache.resultQueue.take(); + log.info("获取结果任务----:{}",JSONObject.toJSONString(task)); + // 提交给线程池执行 + executor.execute(() -> getResult(task)); + } catch (InterruptedException e) { + // 恢复中断状态 + Thread.currentThread().interrupt(); + log.error("任务消费线程被中断"); + break; + } + DateUtil.sleep(3000); + } + }); + resultConsumerThread.start(); + log.info("结果任务消费线程启动-----"); + //启动加载缓存任务 + readTask(taskPath, ConfigCache.taskQueue); + readTask(resultTaskPath, ConfigCache.resultQueue); + //停止处理 + waitDown(); + } + + /** + * 创建任务执行方法 + * @param task + */ + private void createTask(Map task) { + convertTaskService.create(task); + } + + private void getResult(Map task) { + convertTaskService.parse(task); + } + + + /****************************************************************load******************************************************************************/ + /** + * 加载文件中的任务 + * @param path 文件地址 + * @param queue 队列 + */ + @SuppressWarnings("unchecked") + public static void readTask(String path, LinkedBlockingDeque> queue) { + File file = new File(path); + if (file.exists()) { + List tasks = null; + try { + tasks = FileUtils.readLines(file, "UTF-8"); + } catch (IOException e) { + e.printStackTrace(); + } + for (String taskStr : tasks) { + Map task = JSONObject.parseObject(taskStr); + try { + queue.put(task); + } catch (InterruptedException e) { + e.printStackTrace(); + } + } + file.delete(); + } + } + + /*******************************************************************stop************************************************************************/ + + /** + * 结束触发钩子 + */ + public void waitDown() { + Runtime.getRuntime().addShutdownHook(new Thread() { + @Override + public void run() { + // 停止线程 + ConfigCache.isStart = false; + log.info("stop-------"); + writeTsskToFile(); + } + }); + } + + + /** + * 任务持久化到硬盘 + */ + public void writeTsskToFile() { + while (true) { + if (ConfigCache.taskQueue.size() > 0) { + try { + Map task = ConfigCache.taskQueue.take(); + FileUtil.writeFile(taskPath, JSONObject.toJSONString(task)); + } catch (InterruptedException e) { + e.printStackTrace(); + } + } else { + log.info("taskQueue write is file end"); + break; + } + } + while (true) { + if (ConfigCache.resultQueue.size() > 0) { + try { + Map task = ConfigCache.resultQueue.take(); + FileUtil.writeFile(resultTaskPath, JSONObject.toJSONString(task)); + } catch (InterruptedException e) { + e.printStackTrace(); + } + } else { + log.info("taskQueue write is file end"); + break; + } + } + } +} diff --git a/document-convert-service/src/main/java/com/bw/convert/service/ConvertTaskService.java b/document-convert-service/src/main/java/com/bw/convert/service/ConvertTaskService.java new file mode 100644 index 0000000..3b8340c --- /dev/null +++ b/document-convert-service/src/main/java/com/bw/convert/service/ConvertTaskService.java @@ -0,0 +1,24 @@ +package com.bw.convert.service; + +import java.util.Map; + +/** + * ocr识别处理接口 + * @author jian.mao + * @date 2025年2月18日 + * @description + */ +public interface ConvertTaskService { + + /** + * ocr远端任务 + * @param task + */ + public void create(Map task); + + /** + * 解析结果 + * @param task + */ + public void parse(Map task); +} diff --git a/document-convert-service/src/main/java/com/bw/convert/service/TaskReceiveService.java b/document-convert-service/src/main/java/com/bw/convert/service/TaskReceiveService.java new file mode 100644 index 0000000..53265e5 --- /dev/null +++ b/document-convert-service/src/main/java/com/bw/convert/service/TaskReceiveService.java @@ -0,0 +1,17 @@ +package com.bw.convert.service; + +/** + * 任务接收服务层 + * @author jian.mao + * @date 2025年1月14日 + * @description + */ +public interface TaskReceiveService { + + /** + * 任务新增 + * @param dataJson + * @return + */ + public String put(String dataJson); +} diff --git a/document-convert-service/src/main/java/com/bw/convert/service/impl/ConvertTaskServiceImpl.java b/document-convert-service/src/main/java/com/bw/convert/service/impl/ConvertTaskServiceImpl.java new file mode 100644 index 0000000..f73a3bb --- /dev/null +++ b/document-convert-service/src/main/java/com/bw/convert/service/impl/ConvertTaskServiceImpl.java @@ -0,0 +1,439 @@ +package com.bw.convert.service.impl; + +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.net.URLEncoder; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.Base64; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.UUID; + +import org.apache.poi.xwpf.extractor.XWPFWordExtractor; +import org.apache.poi.xwpf.usermodel.XWPFDocument; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.cloud.context.config.annotation.RefreshScope; +import org.springframework.stereotype.Service; + +import com.alibaba.fastjson.JSONObject; +import com.bw.convert.cache.ConfigCache; +import com.bw.convert.entity.AppResultDoc; +import com.bw.convert.entity.Constants; +import com.bw.convert.service.ConvertTaskService; +import com.bw.convert.utils.DownLoadUtil; +import com.bw.convert.utils.FileUtil; +import com.bw.convert.utils.ZipUtil; + +import lombok.extern.slf4j.Slf4j; + +/** + * ocr执行实现类 + * @author jian.mao + * @date 2025年2月18日 + * @description + */ +@Service +@Slf4j +@RefreshScope + +public class ConvertTaskServiceImpl implements ConvertTaskService { + + + @Value("${file.path-prefix}") + private String downloadFilePathPrefix; + + @Value("${file.convert-path-prefix}") + private String convertFilePathPrefix; + + @Value("${api.create-url}") + private String createUrl; + + @Value("${api.job-url}") + private String jobUrl; + @Value("${api.query-url}") + private String queryUrl; + + @Value("${api.save-url}") + private String saveUrl; + + @Value("${gofast.upload-url}") + private String gofastUrl; + + @Value("${gofast.access-prefix}") + private String gofastAccessPrefix; + + + @Override + public void create(Map task) { + // TODO Auto-generated method stub + try { + //源文件链接 + String fileUrl =task.get(Constants.FILEURL).toString(); + //获取源文件文件格式 + String srcFileformat = fileUrl.replaceAll(".*\\.", Constants.EMPTY); + //目标文件格式 +// String format = (String) task.get(Constants.FORMAT); + //文件类型 + String fileType = (String) task.get(Constants.DOC_TYPE); + String fileName = UUID.randomUUID().toString() + "." + srcFileformat; + String downloadFilePath = downloadFilePathPrefix + fileName; + DownLoadUtil.downloadFile(fileUrl, downloadFilePath); + //加载文件以base64编码 + String fileContent = encodeFileToBase64(downloadFilePath); + //删除文件 + FileUtil.delFile(downloadFilePath); + + Map param = new HashMap(16); + param.put(Constants.FILECONTENTS, fileContent); + param.put(Constants.FILENAME, fileName); + param.put(Constants.OPENPASSWORD, Constants.EMPTY); + param.put(Constants.OWNERPASSWORD, Constants.EMPTY); + param.put(Constants.LOCATIONPATH, Constants.EMPTY); + //create url 根据类型来访问指定的链接 + String lastCreateUrl = createUrl; + if(!fileType.contains(Constants.WORD)) { + lastCreateUrl = lastCreateUrl.replace(Constants.FORMAT_REPLACE_CHARACTER, "-"+fileType); + }else { + lastCreateUrl = lastCreateUrl.replace(Constants.FORMAT_REPLACE_CHARACTER, Constants.EMPTY); + } + String jobId = DownLoadUtil.doPost(lastCreateUrl, JSONObject.toJSONString(param)); + log.info("任务url:{},任务创建id:{}",lastCreateUrl,jobId); + task.put(Constants.JOBID, URLEncoder.encode(jobId.replace("\"", ""), "UTF-8")); + //任务创建成功,放到监控结果队列中 + ConfigCache.resultQueue.put(task); + } catch (Throwable e) { + log.error("创建文档解析任务异常。e:",e); + //失败直接发送结果 + AppResultDoc entity = new AppResultDoc(); + entity.setTaskId((String)task.get(Constants.TASKID)); + entity.setAppId((Integer)task.get(Constants.ID)); + long now = System.currentTimeMillis(); + entity.setCreateTime(now); + Map result = new HashMap(16); + result.put(Constants.ERROR, "识别任务创建异常"); + entity.setResult(result); + entity.setStatus(2); + entity.setDel(0); + //回传给api服务保存 + DownLoadUtil.doPost(saveUrl, JSONObject.toJSONString(entity)); + } + + + } + + + /** + * 读取文件 base64格式 + * @param filePath 文件地址 + * @return + * @throws IOException + */ + private String encodeFileToBase64(String filePath) throws IOException { + byte[] fileContent = Files.readAllBytes(Paths.get(filePath)); + return Base64.getEncoder().encodeToString(fileContent); + } + + + @Override + public void parse(Map task) { + + // TODO Auto-generated method stub + try { + String jobId = (String) task.get(Constants.JOBID); + String lastJobUrl = jobUrl; + lastJobUrl = lastJobUrl.replace(Constants.IDRENAME,jobId); + log.info("jobUrl:{}",lastJobUrl); + String resStr = DownLoadUtil.doGet(lastJobUrl); + JSONObject res = JSONObject.parseObject(resStr); + log.info("获取解析结果-taskId{},result:{}",task.get(Constants.TASKID),res);; + int progress = (int) res.get(Constants.PROGRESS); + if (progress == 0) { + //识别中 -- 放回队列 + ConfigCache.resultQueue.put(task); + }else if (progress == 100) { + //文件类型 + String fileType = (String) task.get(Constants.DOC_TYPE); + //识别成功 -- 获取请求结果 + String lastQueryUrl = queryUrl; + lastQueryUrl = lastQueryUrl.replace(Constants.IDRENAME,jobId); + log.info("queryUrl:{}",queryUrl); + String queryResStr = DownLoadUtil.doGet(lastQueryUrl); + Map queryRes = JSONObject.parseObject(queryResStr); + List> jobDocuments = (List>) queryRes.get(Constants.JOBDOCUMENTS); + List> outputDocuments = (List>) jobDocuments.get(0).get(Constants.OutputDocuments); + List> files = (List>) outputDocuments.get(0).get(Constants.FILES); + for (int i =0;i map = files.get(i); + String fileContents = (String) map.get(Constants.FILECONTENTS); + String fileName = (String) map.get(Constants.F_ILENAME); + if(fileType.contains(Constants.HTML)) { + /*******html********/ + //拆分文件名,一部分是文件夹名称,一部分是文件名 + String[] arr = fileName.split("\\\\"); + String lastConvertFilePathPrefix = convertFilePathPrefix + arr[0]; + boolean isInputFile = base64ToFile(fileContents, lastConvertFilePathPrefix, arr[1]); + if (isInputFile) { + log.info("html相关文件转换成功,fileName:{}",arr[1]); + if(i == files.size() -1 ) { + log.info("最后的文件转换成功,准备压缩文件夹,path={}", lastConvertFilePathPrefix); + try { + // 生成 zip 文件名 + String zipFileName = UUID.randomUUID().toString().replace("-", "") + ".zip"; + String zipFilePath = convertFilePathPrefix + zipFileName; + + // 压缩文件夹 + ZipUtil.zipDirectory(lastConvertFilePathPrefix, zipFilePath); + + log.info("文件夹压缩成功,zipPath={}", zipFilePath); + + // === 后续你如果要:上传 / 回传结果,可继续写 === + String responseBody = DownLoadUtil.upLoadFile(zipFilePath, gofastUrl); + //删除目录 + FileUtil.deleteDir(new File(lastConvertFilePathPrefix)); + //删除压缩包 + FileUtil.delFile(zipFilePath); + // 解析返回 JSON + JSONObject json = JSONObject.parseObject(responseBody); + // ⚠️ 根据你 GoFast 实际返回字段调整 + String path = json.getString(Constants.PATH); + String url = gofastAccessPrefix + path; + //成功 发送结果 + AppResultDoc entity = new AppResultDoc(); + entity.setTaskId((String)task.get(Constants.TASKID)); + entity.setAppId((Integer)task.get(Constants.ID)); + long now = System.currentTimeMillis(); + entity.setCreateTime(now); + Map result = new HashMap(16); + result.put(Constants.FILEURL, url); + result.put(Constants.FILENAME, zipFileName); + entity.setResult(result); + entity.setStatus(1); + entity.setDel(0); + //回传给api服务保存 + DownLoadUtil.doPost(saveUrl, JSONObject.toJSONString(entity)); + } catch (Exception e) { + log.error("HTML 文件夹压缩失败", e); + + // 压缩失败,按失败处理 + AppResultDoc entity = new AppResultDoc(); + entity.setTaskId((String) task.get(Constants.TASKID)); + entity.setAppId((Integer) task.get(Constants.ID)); + long now = System.currentTimeMillis(); + entity.setCreateTime(now); + + Map result = new HashMap<>(16); + result.put(Constants.ERROR, "HTML 文件夹压缩失败"); + entity.setResult(result); + entity.setStatus(2); + entity.setDel(0); + + DownLoadUtil.doPost(saveUrl, JSONObject.toJSONString(entity)); + } + } + }else { + //识别异常 + log.error("html文档转换异常:{}",resStr); + //发送失败结果 + AppResultDoc entity = new AppResultDoc(); + entity.setTaskId((String)task.get(Constants.TASKID)); + entity.setAppId((Integer)task.get(Constants.ID)); + long now = System.currentTimeMillis(); + entity.setCreateTime(now); + Map result = new HashMap(16); + result.put(Constants.ERROR, "转换失败"); + entity.setResult(result); + entity.setStatus(2); + entity.setDel(0); + //回传给api服务保存 + DownLoadUtil.doPost(saveUrl, JSONObject.toJSONString(entity)); + break; + } + + }else { + /******其他类型**********/ + //输出文件 + boolean isInputFile = base64ToFile(fileContents, convertFilePathPrefix, fileName); + if (isInputFile) { + //转换成功 --上传文件 + String fileAddr = convertFilePathPrefix+fileName; + //上传 + String responseBody = DownLoadUtil.upLoadFile(fileAddr, gofastUrl); + //删除源文件 + FileUtil.delFile(fileAddr); + // 解析返回 JSON + JSONObject json = JSONObject.parseObject(responseBody); + // ⚠️ 根据你 GoFast 实际返回字段调整 + String path = json.getString(Constants.PATH); + String url = gofastAccessPrefix + path; + //成功 发送结果 + AppResultDoc entity = new AppResultDoc(); + entity.setTaskId((String)task.get(Constants.TASKID)); + entity.setAppId((Integer)task.get(Constants.ID)); + long now = System.currentTimeMillis(); + entity.setCreateTime(now); + Map result = new HashMap(16); + result.put(Constants.FILEURL, url); + result.put(Constants.FILENAME, fileName); + entity.setResult(result); + entity.setStatus(1); + entity.setDel(0); + //回传给api服务保存 + DownLoadUtil.doPost(saveUrl, JSONObject.toJSONString(entity)); + }else { + //识别异常 + log.error("文档转换异常:{}",resStr); + //发送失败结果 + AppResultDoc entity = new AppResultDoc(); + entity.setTaskId((String)task.get(Constants.TASKID)); + entity.setAppId((Integer)task.get(Constants.ID)); + long now = System.currentTimeMillis(); + entity.setCreateTime(now); + Map result = new HashMap(16); + result.put(Constants.ERROR, "转换失败"); + entity.setResult(result); + entity.setStatus(2); + entity.setDel(0); + //回传给api服务保存 + DownLoadUtil.doPost(saveUrl, JSONObject.toJSONString(entity)); + } + } + } + }else { + //识别异常 + log.error("文档转换异常:{}",resStr); + //发送失败结果 + AppResultDoc entity = new AppResultDoc(); + entity.setTaskId((String)task.get(Constants.TASKID)); + entity.setAppId((Integer)task.get(Constants.ID)); + long now = System.currentTimeMillis(); + entity.setCreateTime(now); + Map result = new HashMap(16); + result.put(Constants.ERROR, "转换失败"); + entity.setResult(result); + entity.setStatus(2); + entity.setDel(0); + //回传给api服务保存 + DownLoadUtil.doPost(saveUrl, JSONObject.toJSONString(entity)); + } + + + } catch (Throwable e) { + // TODO: handle exception + log.error("创建文档解析任务异常。e:",e); + //发送失败结果 + AppResultDoc entity = new AppResultDoc(); + entity.setTaskId((String)task.get(Constants.TASKID)); + entity.setAppId((Integer)task.get(Constants.ID)); + long now = System.currentTimeMillis(); + entity.setCreateTime(now); + Map result = new HashMap(16); + result.put(Constants.ERROR, "源文件解析异常"); + entity.setResult(result); + entity.setStatus(2); + entity.setDel(0); + //回传给api服务保存 + DownLoadUtil.doPost(saveUrl, JSONObject.toJSONString(entity)); + } + } + + /** + * 将Base64编码的Word文档读取为文本 + * @param base64Word Base64字符串 + * @return Word文本内容 + * @throws Exception + */ + public String readWordFromBase64(String base64Word) throws Exception { + byte[] bytes = Base64.getDecoder().decode(base64Word); + try (InputStream is = new ByteArrayInputStream(bytes)) { + // 尝试读取为docx格式 + try { + XWPFDocument docx = new XWPFDocument(is); + XWPFWordExtractor extractor = new XWPFWordExtractor(docx); + return extractor.getText(); + } catch (Exception e) { + e.printStackTrace(); + return null; + } + } + } + + /** + * Base64 字符串写入指定路径文件 + * + * @param base64byte Base64 内容(支持 data:image/png;base64,xxx) + * @param fileType 文件类型,如 png/jpg/pdf + * @param filePath 保存目录路径,如 /data/upload/ + * @param fileName 文件名称 + * @return true 成功,false 失败 + */ + public boolean base64ToFile(String base64byte, String filePath,String fileName) { + try { + if (base64byte == null || base64byte.isEmpty()) { + return false; + } + + // 处理 base64 头 + if (base64byte.contains(",")) { + base64byte = base64byte.substring(base64byte.indexOf(",") + 1); + } + + byte[] bytes = Base64.getDecoder().decode(base64byte); + + // 创建目录 + File dir = new File(filePath); + if (!dir.exists() && !dir.mkdirs()) { + return false; + } + + // 生成文件名 + File file = new File(dir, fileName); + + try (FileOutputStream fos = new FileOutputStream(file)) { + fos.write(bytes); + fos.flush(); + } + + return true; + } catch (Exception e) { + // 生产中建议打日志 + log.error("Base64 写文件失败", e); + return false; + } + } + + + public static void main(String[] args) throws Exception { +// OcrTaskServiceImpl ocrTaskServiceImpl = new OcrTaskServiceImpl(); +// String base64Word = "77u/VGFibGUgMS4gRGlzdHJpYnV0aW9uIG9mIGNvbmZpcm1lZCBhbnRocmF4IGNhc2VzIGJ5IGNvdW50cnkgYW5kIHllYXIsIEVVL0VFQSwgMjAxNS0yMDE5DQpDb3VudHJ5CTIwMTUJMjAxNgkyMDE3CTIwMTgJMjAxOQ0KCU51bWJlcglOdW1iZXIJTnVtYmVyCU51bWJlcglOdW1iZXINCkF1c3RyaWEJMAkwCTAJMAkwDQpCZWxnaXVtCTAJMAkwCTAJMA0KQnVsZ2FyaWEJMgkwCTEJMAkwDQpDcm9hdGlhCTAJMAkwCTAJMA0KQ3lwcnVzCTAJMAkwCTAJMA0KQ3plY2hpYQkwCTAJMAkwCTANCkRlbm1hcmsJMAkwCTAJMAkwDQpFc3RvbmlhCTAJMAkwCTAJMA0KRmlubGFuZAkwCTAJMAkwCTANCkZyYW5jZQkwCTAJMAkwCTANCkdlcm1hbnkJMAkwCTAJMAkwDQpHcmVlY2UJMAkwCTAJMAkwDQpIdW5nYXJ5CTAJMAkwCTAJMQ0KSWNlbGFuZAkwCTAJMAkwCTANCklyZWxhbmQJMAkwCTAJMAkwDQpJdGFseQkwCTAJMAkwCTANCkxhdHZpYQkwCTAJMAkwCTANClVlY2h0ZW5zdGVpbgnigKIJ4oCiCeKAognigKIJ4oCiDQpVdGh1YW5pYQkwCTAJMAkwCTANCkx1eGVtYm91cmcJMAkwCTAJMAkwDQpNYWx0YQkwCTAJMAkwCTANCk5ldGhlcmxhbmRzCTAJMAkwCTEJMA=="; +// boolean isscuccess = ocrTaskServiceImpl.base64ToFile(base64Word, "csv", "C:\\Users\\55007\\Desktop\\"); +// System.out.println(isscuccess); +// String text = ocrTaskServiceImpl.readWordFromBase64(base64Word); +// System.out.println(text); +// String queryResStr = DownLoadUtil.doGet("https://frs.wefile.com:4431/FineReaderServer14/api/jobs/{92648AEE-3C07-4554-91F1-EA4F1F31C97B}"); +// System.out.println(queryResStr); +// OcrTaskServiceImpl ocrTaskServiceImpl = new OcrTaskServiceImpl(); +// String fileContent = ocrTaskServiceImpl.encodeFileToBase64("C:\\Users\\55007\\Desktop\\0e8a49b8879687bfb21e66c0efe64070.png"); +// System.out.println(fileContent); + +// String fileName = "6bcafab08f1db336317904fab314ff66.png"; +// Map param = new HashMap(16); +// param.put(Constants.FILECONTENTS, fileContent); +// param.put(Constants.FILENAME, fileName); +// param.put(Constants.OPENPASSWORD, Constants.EMPTY); +// param.put(Constants.OWNERPASSWORD, Constants.EMPTY); +// param.put(Constants.LOCATIONPATH, Constants.EMPTY); +// String createUrl = "https://frs.wefile.com:4431/FineReaderServer14//api/workflows/Workflow-Macao/input/file"; +// String jobId = DownLoadUtil.doPost(createUrl,JSONObject.toJSONString(param)); +// System.out.println(jobId); + String fileName = "11960d3fe6f0c0a4a4c71c1388b10c21\\11960d3fe6f0c0a4a4c71c1388b10c21-8.0001.jpg"; + + String[] arr = fileName.split("\\\\"); + + System.out.println(JSONObject.toJSONString(arr)); + } +} diff --git a/document-convert-service/src/main/java/com/bw/convert/service/impl/TaskReceiveServiceImpl.java b/document-convert-service/src/main/java/com/bw/convert/service/impl/TaskReceiveServiceImpl.java new file mode 100644 index 0000000..c215141 --- /dev/null +++ b/document-convert-service/src/main/java/com/bw/convert/service/impl/TaskReceiveServiceImpl.java @@ -0,0 +1,55 @@ +package com.bw.convert.service.impl; + +import java.util.HashMap; +import java.util.Map; + +import org.springframework.stereotype.Service; + +import com.alibaba.fastjson.JSONObject; +import com.bw.convert.cache.ConfigCache; +import com.bw.convert.entity.Constants; +import com.bw.convert.service.TaskReceiveService; + +import lombok.extern.slf4j.Slf4j; + +/** + * 任务接收服务层实现类 + * @author jian.mao + * @date 2025年1月14日 + * @description + */ +@Service +@Slf4j +public class TaskReceiveServiceImpl implements TaskReceiveService { + + @Override + public String put(String dataJson) { + Map response = new HashMap<>(16); + int code = 200; + String message = "success"; + Map task = null; + try { + task = JSONObject.parseObject(dataJson); + } catch (Exception e) { + log.error("参数结构不合法,", e); + code = 100010; + message = "参数不合法"; + } + // 写入队列 + try { + if(task.containsKey(Constants.TRACE) && (boolean)task.get(Constants.TRACE)){ + ConfigCache.taskQueue.putFirst(task); + }else{ + ConfigCache.taskQueue.put(task); + } + } catch (InterruptedException e) { + log.error("任务写入队列异常,", e); + code = 100011; + message = "任务写入队列失败"; + } + response.put(Constants.CODE, code); + response.put(Constants.MESSAGE, message); + return JSONObject.toJSONString(response); + } + +} diff --git a/document-convert-service/src/main/java/com/bw/convert/utils/DataUtil.java b/document-convert-service/src/main/java/com/bw/convert/utils/DataUtil.java new file mode 100644 index 0000000..fb7ba2a --- /dev/null +++ b/document-convert-service/src/main/java/com/bw/convert/utils/DataUtil.java @@ -0,0 +1,48 @@ +package com.bw.convert.utils; + +import com.alibaba.fastjson.JSON; +import com.alibaba.fastjson.JSONObject; +import com.alibaba.fastjson.JSONPath; + +import java.util.Map; + +/** + * @author:jinming + * @className:DataUtil + * @version:1.0 + * @description: 获取dataValue的值 + * @Date:2023/11/1 9:54 + */ +public class DataUtil { + /** + * + * @param key 传入的key + * @param dataMap 数据map + * @return 根据传入的参数进行判断解析,返回正确的dataValue + */ + public static Object getValue(String key, Map dataMap) { + Object dataValue; + String isJson = "#json#"; + if (key.contains(isJson)) { + //进行第一次拆分,获取#json#前面的部分 + String[] keySplit = key.split(isJson); + String firstDataKey = keySplit[0]; + String[] firstDataKeySplit = firstDataKey.split(":"); + //取出前半部分对应的JSON数据并转换为JSONObject + String dataJson = (String) dataMap.get(firstDataKeySplit[0]); + JSONObject dataJsonObject = JSON.parseObject(dataJson); + //根据key的后半部分取出对应JSONObject中的值 + String firstDataKeyJson = (String) JSONPath.eval(dataJsonObject, firstDataKeySplit[1]); + String secDataKey = keySplit[1]; + JSONObject firstDataJsonObject = JSON.parseObject(firstDataKeyJson); + dataValue = JSONPath.eval(firstDataJsonObject, secDataKey); + return dataValue; + } + String[] keySplit = key.split(":"); + String jsonPath = keySplit[1]; + String dataJson = (String) dataMap.get(keySplit[0]); + JSONObject dataJsonObject = JSON.parseObject(dataJson); + dataValue = JSONPath.eval(dataJsonObject, jsonPath); + return dataValue; + } +} \ No newline at end of file diff --git a/document-convert-service/src/main/java/com/bw/convert/utils/DateUtil.java b/document-convert-service/src/main/java/com/bw/convert/utils/DateUtil.java new file mode 100644 index 0000000..82653ed --- /dev/null +++ b/document-convert-service/src/main/java/com/bw/convert/utils/DateUtil.java @@ -0,0 +1,177 @@ +package com.bw.convert.utils; + + +import java.math.BigInteger; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; +import java.util.Date; + +import lombok.extern.slf4j.Slf4j; + +import com.alibaba.fastjson.JSON; +import com.alibaba.fastjson.JSONObject; + +/** + * 日期工具类 + * + * @author jian.mao + * @date 2022年11月15日 + * @description + */ +@Slf4j +public class DateUtil { + + /** + * @return + */ + public static String getTimeStrForNow() { + SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddHH"); + return sdf.format(new Date()); + } + + + public static String getTimeStrForDay(long time) { + SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd"); + + return sdf.format(new Date(time * 1000)); + } + + public static String getTimeStrForDay() { + SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd"); + + return sdf.format(new Date()); + } + + + public static String getDateTime() { + SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + String time = sdf.format(new Date()); + return time; + } + + public static String getDateTime(Long timestap) { + + SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + String time = sdf.format(new Date(timestap)); + return time; + } + + public static String getDate(Long timestap) { + + SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd"); + String time = sdf.format(new Date(timestap)); + return time; + } + + public static String getDateTimeForMonth() { + SimpleDateFormat sdf = new SimpleDateFormat("yyyyMM"); + String time = sdf.format(new Date()); + return time; + } + + /** + * 休眠 + * + * @param millis 毫秒 + */ + public static void sleep(long millis) { + try { + Thread.sleep(millis); + } catch (InterruptedException e) { + e.printStackTrace(); + } + } + + /** + * 1. @Description:时间戳转时间 + * 2. @Author: ying.zhao + * 3. @Date: 2023/3/28 + */ + + public static String timestampToDate(String time) { + int thirteen = 13; + int ten = 10; + SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); +// if (time.length() == thirteen) { + if (time.length() > ten) { + return sdf.format(new Date(Long.parseLong(time))); + } else { + return sdf.format(new Date(Integer.parseInt(time) * 1000L)); + } + } + + public static String parseCreated(String jsonTime){ + String formattedDateTime = getDateTime(); + try { + // 使用fastjson解析JSON数据 + JSONObject jsonObject = JSON.parseObject(jsonTime); + // 获取日期和时间的值 + JSONObject dateObject = jsonObject.getJSONObject("date"); + int day = dateObject.getIntValue("day"); + int month = dateObject.getIntValue("month"); + int year = dateObject.getIntValue("year"); + + JSONObject timeObject = jsonObject.getJSONObject("time"); + int hour = timeObject.getIntValue("hour"); + int minute = timeObject.getIntValue("minute"); + int second = timeObject.getIntValue("second"); + + // 创建LocalDateTime对象 + LocalDateTime dateTime = LocalDateTime.of(year, month, day, hour, minute, second); + + // 定义日期时间格式化器 + DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"); + + // 格式化日期时间 + formattedDateTime = dateTime.format(formatter); + } catch (Exception e) { + log.info("日期转换失败:{}",e); + } + return formattedDateTime; + } + + /** + * 字符串转换日期 + * @param format + * @param date + * @return + */ + public static Date strToDate(String format,String date){ + SimpleDateFormat sdf = new SimpleDateFormat(format); + if (date == null || date.equals("")){ + return new Date(); + }else{ + Date ru = null; + try { + ru = sdf.parse(date); + } catch (ParseException e) { + e.printStackTrace(); + } + return ru; + } + } + /** + * 日期格式话 + * @param format 日期格式 + * @param dater 要转换的日期,默认当前时间 + * @return + */ + public static String FormatDate(String format,Date date){ + String fromatDate = null; + SimpleDateFormat sdf = new SimpleDateFormat(format); + if (date == null){ + fromatDate = sdf.format(new Date()); + }else{ + fromatDate = sdf.format(date); + } + return fromatDate; + } + public static void main(String[] args) { + String time = timestampToDate("955814400000"); + System.out.println(time); + } +} diff --git a/document-convert-service/src/main/java/com/bw/convert/utils/DownLoadUtil.java b/document-convert-service/src/main/java/com/bw/convert/utils/DownLoadUtil.java new file mode 100644 index 0000000..32df1f5 --- /dev/null +++ b/document-convert-service/src/main/java/com/bw/convert/utils/DownLoadUtil.java @@ -0,0 +1,1004 @@ +package com.bw.convert.utils; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.net.URI; +import java.security.KeyManagementException; +import java.security.NoSuchAlgorithmException; +import java.security.cert.CertificateException; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.concurrent.TimeUnit; + +import javax.net.ssl.SSLContext; +import javax.net.ssl.TrustManager; +import javax.net.ssl.X509TrustManager; + +import org.apache.http.HttpEntity; +import org.apache.http.HttpHost; +import org.apache.http.HttpResponse; +import org.apache.http.NameValuePair; +import org.apache.http.StatusLine; +import org.apache.http.auth.AuthScope; +import org.apache.http.auth.UsernamePasswordCredentials; +import org.apache.http.client.AuthCache; +import org.apache.http.client.ClientProtocolException; +import org.apache.http.client.CredentialsProvider; +import org.apache.http.client.HttpClient; +import org.apache.http.client.HttpRequestRetryHandler; +import org.apache.http.client.config.RequestConfig; +import org.apache.http.client.entity.UrlEncodedFormEntity; +import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.client.methods.HttpPost; +import org.apache.http.client.protocol.HttpClientContext; +import org.apache.http.config.Registry; +import org.apache.http.config.RegistryBuilder; +import org.apache.http.config.SocketConfig; +import org.apache.http.conn.socket.ConnectionSocketFactory; +import org.apache.http.conn.socket.LayeredConnectionSocketFactory; +import org.apache.http.conn.socket.PlainConnectionSocketFactory; +import org.apache.http.conn.ssl.SSLConnectionSocketFactory; +import org.apache.http.entity.StringEntity; +import org.apache.http.impl.auth.BasicScheme; +import org.apache.http.impl.client.BasicAuthCache; +import org.apache.http.impl.client.BasicCredentialsProvider; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClientBuilder; +import org.apache.http.impl.client.HttpClients; +import org.apache.http.impl.client.LaxRedirectStrategy; +import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; +import org.apache.http.message.BasicNameValuePair; +import org.apache.http.util.EntityUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.bw.convert.entity.Constants; + +import okhttp3.Call; +import okhttp3.Headers; +import okhttp3.MediaType; +import okhttp3.MultipartBody; +import okhttp3.OkHttpClient; +import okhttp3.Request; +import okhttp3.RequestBody; +import okhttp3.Response; + + + + + + + + + +/** + * 下载工具类 + * @author jian.mao + * @date 2023年9月19日 + * @description + */ +public class DownLoadUtil { + + private static String ua = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36"; + private final static Logger log = LoggerFactory.getLogger(DownLoadUtil.class); + /** 代理服务器(产品官网 www.16yun.cn) **/ + final static String PROXYHOST = "u270.40.tp.16yun.cn"; + final static Integer PROXYPORT = 6448; + /** 代理验证信息 **/ + final static String PROXYUSER = "16HFBVJC"; + final static String PROXYPASS = "897944"; + + private static PoolingHttpClientConnectionManager cm = null; + private static HttpRequestRetryHandler httpRequestRetryHandler = null; + private static HttpHost proxy = null; + + private static CredentialsProvider credsProvider = null; + private static RequestConfig reqConfig = null; + + static { + ConnectionSocketFactory plainsf = PlainConnectionSocketFactory + .getSocketFactory(); + LayeredConnectionSocketFactory sslsf = SSLConnectionSocketFactory + .getSocketFactory(); + + Registry registry = RegistryBuilder.create().register("http", plainsf) + .register("https", sslsf).build(); + + cm = new PoolingHttpClientConnectionManager(registry); + cm.setMaxTotal(20); + cm.setDefaultMaxPerRoute(5); + + proxy = new HttpHost(PROXYHOST, PROXYPORT, "https"); + + credsProvider = new BasicCredentialsProvider(); + credsProvider.setCredentials(AuthScope.ANY, + new UsernamePasswordCredentials(PROXYUSER, PROXYPASS)); + + reqConfig = RequestConfig.custom().setConnectionRequestTimeout(5000) + .setConnectTimeout(5000).setSocketTimeout(5000) + .setExpectContinueEnabled(false) + .setProxy(new HttpHost(PROXYHOST, PROXYPORT)).build(); + } + + /** + * 模拟客户端get请求 + * + * @param url + * 模拟请求得url + * @param headers + * 头部信息,没有可以不传 + * @return + */ + @SafeVarargs + public static String proxyDoGet(String url, Map... headers) { + // 设置超时时间 + int timeout = 30; + RequestConfig config = RequestConfig.custom() + .setConnectTimeout(timeout * 1000) + .setConnectionRequestTimeout(timeout * 1000) + .setSocketTimeout(timeout * 1000).build(); + SocketConfig socketConfig = SocketConfig.custom() + .setSoKeepAlive(false) + .setSoLinger(1) + .setSoReuseAddress(true) + .setSoTimeout(timeout * 1000) + .setTcpNoDelay(true).build(); + AuthCache authCache = new BasicAuthCache(); + authCache.put(proxy, new BasicScheme()); + HttpClientContext localContext = HttpClientContext.create(); + localContext.setAuthCache(authCache); + HttpClientBuilder httpBuilder = HttpClientBuilder.create(); + CloseableHttpClient httpClient = httpBuilder + .setDefaultSocketConfig(socketConfig) + .setDefaultRequestConfig(config) + .setDefaultCredentialsProvider(credsProvider).build(); + HttpGet httpGet = new HttpGet(url); + httpGet.setConfig(reqConfig); + if (headers != null && headers.length > 0) { + Map tempHeaders = headers[0]; + for (String key : tempHeaders.keySet()) { + httpGet.setHeader(key, tempHeaders.get(key).toString()); + } + } else { + httpGet.setHeader("Accept", + "application/json, text/javascript, */*; q=0.01"); + httpGet.setHeader("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8"); + } + CloseableHttpResponse response = null; + String html = ""; + int notFundCode = 404; + int successCode = 200; + try { + response = httpClient.execute(httpGet, localContext); + // 从响应模型中获取响应实体 + HttpEntity responseEntity = response.getEntity(); + StatusLine statusLine = response.getStatusLine(); + System.out.println("响应状态为:" + response.getStatusLine()); + if (statusLine.getStatusCode() == successCode) { + if (responseEntity != null) { + html = EntityUtils.toString(responseEntity, "utf-8"); + System.out.println("响应内容长度为:" + + responseEntity.getContentLength()); + // 下载结果为空不正常 + if (html.equals(Constants.EMPTY)) { + html = "Download failed error is:reslut is null"; + } + } + } else if (statusLine.getStatusCode() == notFundCode) { + html = "

页面404,正常结束请求即可

"; + } else { + throw new Exception("请求错误,code码为:" + statusLine.getStatusCode()); + } + } catch (Exception e) { + e.printStackTrace(); + html = "Download failed error is:reslut is null"; + }finally{ + try { + response.close(); + httpClient.close(); + } catch (Exception e) { + e.printStackTrace(); + } + } + return html; + + } + + + public static String httpsslProxyGet(String url, Map... headers) throws Exception { + //采用绕过验证的方式处理https请求 + SSLContext sslcontext = createIgnoreVerifySSL(); + + // 设置协议http和https对应的处理socket链接工厂的对象 + Registry socketFactoryRegistry = RegistryBuilder.create() + .register("http", PlainConnectionSocketFactory.INSTANCE) + .register("https", new SSLConnectionSocketFactory(sslcontext)) + .build(); + PoolingHttpClientConnectionManager connManager = new PoolingHttpClientConnectionManager(socketFactoryRegistry); + connManager.setMaxTotal(50); + connManager.setDefaultMaxPerRoute(10); + HttpClients.custom().setConnectionManager(connManager); + // 设置超时时间 + int timeout = 30; + RequestConfig config = RequestConfig.custom() + .setConnectTimeout(timeout * 1000) + .setConnectionRequestTimeout(timeout * 1000) + .setSocketTimeout(timeout * 1000).build(); + SocketConfig socketConfig = SocketConfig.custom() + .setSoKeepAlive(false) + .setSoLinger(1) + .setSoReuseAddress(true) + .setSoTimeout(timeout * 1000) + .setTcpNoDelay(true).build(); + AuthCache authCache = new BasicAuthCache(); + authCache.put(proxy, new BasicScheme()); + HttpClientContext localContext = HttpClientContext.create(); + localContext.setAuthCache(authCache); + HttpClientBuilder httpBuilder = HttpClientBuilder.create(); + CloseableHttpClient httpClient = httpBuilder + .setConnectionManager(connManager) + .setDefaultSocketConfig(socketConfig) + .setDefaultRequestConfig(config) + .setDefaultCredentialsProvider(credsProvider).build(); + HttpGet httpGet = new HttpGet(url); + httpGet.setConfig(reqConfig); + if (headers != null && headers.length > 0) { + Map tempHeaders = headers[0]; + for (String key : tempHeaders.keySet()) { + httpGet.setHeader(key, tempHeaders.get(key).toString()); + } + } else { + httpGet.setHeader("Accept", + "application/json, text/javascript, */*; q=0.01"); + httpGet.setHeader("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8"); + } + CloseableHttpResponse response = null; + String html = ""; + int notFundCode = 404; + int successCode = 200; + try { + response = httpClient.execute(httpGet, localContext); + // 从响应模型中获取响应实体 + HttpEntity responseEntity = response.getEntity(); + StatusLine statusLine = response.getStatusLine(); + System.out.println("响应状态为:" + response.getStatusLine()); + if (statusLine.getStatusCode() == successCode) { + if (responseEntity != null) { + html = EntityUtils.toString(responseEntity, "utf-8"); + System.out.println("响应内容长度为:" + + responseEntity.getContentLength()); + // 下载结果为空不正常 + if (html.equals(Constants.EMPTY)) { + html = "Download failed error is:reslut is null"; + } + } + } else if (statusLine.getStatusCode() == notFundCode) { + html = "

页面404,正常结束请求即可

"; + } else { + throw new Exception("请求错误,code码为:" + statusLine.getStatusCode()); + } + } catch (Exception e) { + e.printStackTrace(); + html = "Download failed error is:reslut is null"; + }finally{ + try { + response.close(); + httpClient.close(); + } catch (Exception e) { + e.printStackTrace(); + } + } + return html; + + } + + + /** + * json参数方式POST提交 + * @param url + * @param params + * @return + */ + public static String doPost(String url, String params, Map... headers){ + String strResult = ""; + //设置超时时间 + int timeout = 60; + RequestConfig config = RequestConfig.custom(). + setConnectTimeout(timeout * 1000). + setConnectionRequestTimeout(timeout * 1000). + setSocketTimeout(timeout * 1000).build(); + SocketConfig socketConfig = SocketConfig.custom() + .setSoKeepAlive(false) + .setSoLinger(1) + .setSoReuseAddress(true) + .setSoTimeout(timeout * 1000) + .setTcpNoDelay(true).build(); +// AuthCache authCache = new BasicAuthCache(); +// authCache.put(proxy, new BasicScheme()); +// HttpClientContext localContext = HttpClientContext.create(); +// localContext.setAuthCache(authCache); + // 1. 获取默认的client实例 + HttpClientBuilder httpBuilder = HttpClientBuilder.create(); + httpBuilder.setUserAgent(ua); + HttpClient client = httpBuilder.setDefaultSocketConfig(socketConfig).setDefaultRequestConfig(config).build(); +// HttpClient client = httpBuilder.setDefaultSocketConfig(socketConfig).setDefaultRequestConfig(config).setConnectionManager(cm) +// .setDefaultCredentialsProvider(credsProvider).build(); + // 2. 创建httppost实例 + HttpPost httpPost = new HttpPost(url); +// httpPost.setConfig(reqConfig); + if (headers != null && headers.length > 0) { + Map tempHeaders = headers[0]; + for (String key : tempHeaders.keySet()) { + httpPost.setHeader(key, tempHeaders.get(key).toString()); + } + } else { + httpPost.addHeader("Content-Type", "application/json;charset=utf-8"); + } + HttpResponse resp = null; + try { + httpPost.setEntity(new StringEntity(params,"utf-8")); + resp = client.execute(httpPost); +// resp = client.execute(httpPost,localContext); + StatusLine statusLine = resp.getStatusLine(); + System.out.println("响应状态为:" + resp.getStatusLine()); + int notFundCode = 300; + int successCode = 200; + if(statusLine.getStatusCode() >= successCode && statusLine.getStatusCode() < notFundCode){ + // 7. 获取响应entity + HttpEntity respEntity = resp.getEntity(); + strResult = EntityUtils.toString(respEntity, "UTF-8"); + if(strResult.equals(Constants.EMPTY)){ + strResult = "Download failed error is:reslut is null"; + } + }else{ + throw new Exception("请求错误,code码为:"+statusLine.getStatusCode()); + } + } catch (Exception e) { + e.printStackTrace(); + strResult = "Download failed error is:"+ThrowMessageUtil.getErrmessage(e); + } + return strResult; + } + public static String httpPost(String url,String params) { + String html=""; + html = doPost(url,params); + int i = 1; + while(true){ + if(html.contains("Download failed error is:")){ + log.error("DownLoadUtil------------->download is failure,url is:"+url); + DateUtil.sleep(5000); + i++; + }else{ + break; + } + if(i > 5){ + break; + } + html = doPost(url,params); + } + return html; + } + /** + * 绕过验证 + * + * @return + * @throws NoSuchAlgorithmException + * @throws KeyManagementException + */ + public static SSLContext createIgnoreVerifySSL() throws NoSuchAlgorithmException, KeyManagementException { + SSLContext sc = SSLContext.getInstance("SSLv3"); + + // 实现一个X509TrustManager接口,用于绕过验证,不用修改里面的方法 + X509TrustManager trustManager = new X509TrustManager() { + @Override + public void checkClientTrusted( + java.security.cert.X509Certificate[] paramArrayOfX509Certificate, + String paramString) throws CertificateException { + } + + @Override + public void checkServerTrusted( + java.security.cert.X509Certificate[] paramArrayOfX509Certificate, + String paramString) throws CertificateException { + } + + @Override + public java.security.cert.X509Certificate[] getAcceptedIssuers() { + return null; + } + }; + + sc.init(null, new TrustManager[] { trustManager }, null); + return sc; + } + /** + * 模拟请求 + * + * @param url 资源地址 + * @param map 参数列表 + * @param encoding 编码 + * @return + * @throws NoSuchAlgorithmException + * @throws KeyManagementException + * @throws IOException + * @throws ClientProtocolException + */ + public static String httpsslGet(String url,Map ... headers) { + String html=""; + CloseableHttpClient client = null; + HttpEntity responseEntity = null; + CloseableHttpResponse response = null; + try { + log.debug("DownLoadUtil------------->设置下载相关信息, start...."); + //采用绕过验证的方式处理https请求 + SSLContext sslcontext = createIgnoreVerifySSL(); + + // 设置协议http和https对应的处理socket链接工厂的对象 + Registry socketFactoryRegistry = RegistryBuilder.create() + .register("http", PlainConnectionSocketFactory.INSTANCE) + .register("https", new SSLConnectionSocketFactory(sslcontext)) + .build(); + PoolingHttpClientConnectionManager connManager = new PoolingHttpClientConnectionManager(socketFactoryRegistry); + connManager.setMaxTotal(50); + connManager.setDefaultMaxPerRoute(10); + HttpClients.custom().setConnectionManager(connManager); + //设置超时时间 + int timeout = 30; + RequestConfig config = RequestConfig.custom(). + setConnectTimeout(timeout * 1000). + setConnectionRequestTimeout(timeout * 1000). + setSocketTimeout(timeout * 1000).build(); + SocketConfig socketConfig = SocketConfig.custom() + .setSoKeepAlive(false) + .setSoLinger(1) + .setSoReuseAddress(true) + .setSoTimeout(10000) + .setTcpNoDelay(true).build(); + // 设置重定向策略 + LaxRedirectStrategy redirectStrategy = new LaxRedirectStrategy(); + //创建自定义的httpclient对象 + client = HttpClients.custom().setConnectionManager(connManager).setDefaultRequestConfig(config).setRedirectStrategy(redirectStrategy).setDefaultSocketConfig(socketConfig).setUserAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36").build(); +// CloseableHttpClient client = HttpClients.createDefault(); + + HttpGet httpGet = new HttpGet(url); + if(headers != null && headers.length > 0){ + Map tempHeaders = headers[0]; + for (String key : tempHeaders.keySet()) { + httpGet.setHeader(key,tempHeaders.get(key).toString()); + } + }else{ + httpGet.setHeader("Accept", "application/json, text/javascript, */*; q=0.01"); + httpGet.setHeader("Accept-Language","zh-CN,zh;q=0.9,en;q=0.8"); + httpGet.setHeader("User-Agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36"); + } + log.debug("DownLoadUtil------------->设置下载相关信息, end...."); + try { + int notFundCode = 404; + int successCode = 200; + log.debug("DownLoadUtil------------->下载执行,start...."); + httpGet.setConfig(config); + response = client.execute(httpGet); + log.debug("DownLoadUtil------------->下载执行,end...."); + // 从响应模型中获取响应实体 + StatusLine statusLine = response.getStatusLine(); + log.debug("DownLoadUtil------------->响应状态为:" + response.getStatusLine()+",下载请求没问题url:"+url+",read is start ...."); + System.out.println("响应状态为:" + response.getStatusLine()); + responseEntity = response.getEntity(); + log.debug("DownLoadUtil------------->响应状态为:" + response.getStatusLine()+",下载请求没问题url:"+url+",read is end ...."); + if(statusLine.getStatusCode() == successCode){ + if (responseEntity != null) { + html=EntityUtils.toString(responseEntity,"utf-8"); + System.out.println("响应内容长度为:" + responseEntity.getContentLength()); + } + }else if(statusLine.getStatusCode() == notFundCode){ + html = "

页面404,正常结束请求即可

"; + }else{ + throw new Exception("请求错误,code码为:"+statusLine.getStatusCode()); + } + } catch (Exception e) { + e.printStackTrace(); + html = "Download failed error is:"+ThrowMessageUtil.getErrmessage(e); + + } + } catch (Exception e) { + e.printStackTrace(); + html = "Download failed error is:"+ThrowMessageUtil.getErrmessage(e); + }finally{ + try { + responseEntity.getContent().close(); + response.close(); + client.close(); + } catch (Exception e) { + e.printStackTrace(); + } + + } + + + return html; + } + + public static String httpSSLGet(String url,Map ... headers) { + String html=""; + html = httpsslGet(url,headers); + int i = 1; + while(true){ + if(html.contains("Download failed error is:")){ + log.error("DownLoadUtil------------->download is failure,url is:"+url); + DateUtil.sleep(30000); + i++; + }else{ + break; + } + if(i > 5){ + break; + } + html = httpsslGet(url,headers); + } + return html; + } + public static String doPostFrom(String url,Map param,Map ... headers){ + //设置超时时间 + int timeout = 15; + RequestConfig config = RequestConfig.custom(). + setConnectTimeout(timeout * 1000). + setConnectionRequestTimeout(timeout * 1000). + setSocketTimeout(timeout * 1000).build(); + SocketConfig socketConfig = SocketConfig.custom() + .setSoKeepAlive(false) + .setSoLinger(1) + .setSoReuseAddress(true) + .setSoTimeout(10000) + .setTcpNoDelay(true).build(); +// AuthCache authCache = new BasicAuthCache(); +// authCache.put(proxy, new BasicScheme()); +// HttpClientContext localContext = HttpClientContext.create(); +// localContext.setAuthCache(authCache); + HttpClientBuilder httpBuilder = HttpClientBuilder.create(); + httpBuilder.setUserAgent(ua); +// HttpClient httpClient = httpBuilder.setDefaultSocketConfig(socketConfig).setDefaultRequestConfig(config).setConnectionManager(cm) +// .setDefaultCredentialsProvider(credsProvider).build(); + HttpClient httpClient = httpBuilder.setDefaultSocketConfig(socketConfig).setDefaultRequestConfig(config).build(); + HttpPost httpPost = new HttpPost(url); +// httpPost.setConfig(reqConfig); + if(headers != null && headers.length > 0){ + Map tempHeaders = headers[0]; + for (String key : tempHeaders.keySet()) { + httpPost.setHeader(key,tempHeaders.get(key).toString()); + } + }else{ + httpPost.addHeader("accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9"); + httpPost.addHeader("accept-Language", "zh-CN,zh;q=0.9,en;q=0.8"); + httpPost.addHeader("content-type", "application/x-www-form-urlencoded"); + httpPost.addHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36"); +// httpPost.addHeader("Referer", "http://www.neeq.com.cn/rule/Business_rules.html"); + } + // 创建请求参数 + List list = new LinkedList<>(); + for (String key : param.keySet()) { + BasicNameValuePair param1 = new BasicNameValuePair(key,param.get(key).toString()); + list.add(param1); + } + // 使用URL实体转换工具 + String html=""; + try { + UrlEncodedFormEntity entityParam = new UrlEncodedFormEntity(list, "UTF-8"); + httpPost.setEntity(entityParam); + HttpResponse response = httpClient.execute(httpPost); +// HttpResponse response = httpClient.execute(httpPost,localContext); + // 从响应模型中获取响应实体 + HttpEntity responseEntity = response.getEntity(); + StatusLine statusLine = response.getStatusLine(); + System.out.println("响应状态为:" + response.getStatusLine()); + int notFundCode = 404; + int successCode = 200; + if(statusLine.getStatusCode() == successCode){ + if (responseEntity != null) { + html=EntityUtils.toString(responseEntity,"utf-8"); + } + }else{ + throw new Exception("请求错误,code码为:"+statusLine.getStatusCode()); + } + + } catch (Exception e) { + e.printStackTrace(); + html = "Download failed error is:"+ThrowMessageUtil.getErrmessage(e); + } + + return html; + + } + public static String httpPostForm(String url,Map params,Map ... headers) { + String html=""; + html = doPostFrom(url,params); + int i = 1; + while(true){ + if(html.contains("Download failed error is:")){ + log.error("DownLoadUtil------------->download is failure,url is:"+url); + DateUtil.sleep(5000); + i++; + }else{ + break; + } + if(i > 5){ + break; + } + html = doPostFrom(url,params,headers); + } + return html; + } + + public static String dosslPost(String url,String params,Map ... headers) { + String html=""; + CloseableHttpClient client = null; + HttpEntity responseEntity = null; + CloseableHttpResponse response = null; + try { + //采用绕过验证的方式处理https请求 + SSLContext sslcontext = createIgnoreVerifySSL(); + // 设置协议http和https对应的处理socket链接工厂的对象 + Registry socketFactoryRegistry = RegistryBuilder.create() + .register("http", PlainConnectionSocketFactory.INSTANCE) + .register("https", new SSLConnectionSocketFactory(sslcontext)) + .build(); + PoolingHttpClientConnectionManager connManager = new PoolingHttpClientConnectionManager(socketFactoryRegistry); + HttpClients.custom().setConnectionManager(connManager); + //设置超时时间 + int timeout = 5; + RequestConfig config = RequestConfig.custom(). + setConnectTimeout(timeout * 1000). + setConnectionRequestTimeout(timeout * 1000). + setSocketTimeout(timeout * 1000).build(); + SocketConfig socketConfig = SocketConfig.custom() + .setSoKeepAlive(false) + .setSoLinger(1) + .setSoReuseAddress(true) + .setSoTimeout(10000) + .setTcpNoDelay(true).build(); + //创建自定义的httpclient对象 + client = HttpClients.custom().setConnectionManager(connManager).setDefaultRequestConfig(config).setDefaultSocketConfig(socketConfig).build(); +// CloseableHttpClient client = HttpClients.createDefault(); + // 2. 创建httppost实例 + HttpPost httpPost = new HttpPost(url); +// httpPost.setConfig(reqConfig); + httpPost.addHeader("Content-Type", "application/json;charset=utf-8"); + if(headers != null && headers.length > 0){ + Map tempHeaders = headers[0]; + for (String key : tempHeaders.keySet()) { + httpPost.setHeader(key,tempHeaders.get(key).toString()); + } + }else{ + httpPost.setHeader("Accept", "application/json, text/javascript, */*; q=0.01"); + httpPost.setHeader("Accept-Language","zh-CN,zh;q=0.9,en;q=0.8"); + httpPost.setHeader("User-Agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36"); + } + + try { + httpPost.setEntity(new StringEntity(params,"utf-8")); + response = client.execute(httpPost); + int notFundCode = 404; + int successCode = 200; + // 从响应模型中获取响应实体 + StatusLine statusLine = response.getStatusLine(); + System.out.println("响应状态为:" + response.getStatusLine()); + responseEntity = response.getEntity(); + if(statusLine.getStatusCode() == successCode){ + if (responseEntity != null) { + html=EntityUtils.toString(responseEntity,"utf-8"); + System.out.println("响应内容长度为:" + responseEntity.getContentLength()); + } + }else if(statusLine.getStatusCode() == notFundCode){ + html = "

页面404,正常结束请求即可

"; + }else{ + throw new Exception("请求错误,code码为:"+statusLine.getStatusCode()); + } + } catch (Exception e) { + e.printStackTrace(); + html = "Download failed error is:"+ThrowMessageUtil.getErrmessage(e); + + } + } catch (Exception e) { + e.printStackTrace(); + html = "Download failed error is:"+ThrowMessageUtil.getErrmessage(e); + }finally{ + try { + responseEntity.getContent().close(); + response.close(); + client.close(); + } catch (UnsupportedOperationException e) { + e.printStackTrace(); + } catch (IOException e) { + e.printStackTrace(); + } + + } + + + return html; + } + public static String dosslPostForm(String url,Map param,Map ... headers) { + String html=""; + try { + //采用绕过验证的方式处理https请求 + SSLContext sslcontext = createIgnoreVerifySSL(); + + // 设置协议http和https对应的处理socket链接工厂的对象 + Registry socketFactoryRegistry = RegistryBuilder.create() + .register("http", PlainConnectionSocketFactory.INSTANCE) + .register("https", new SSLConnectionSocketFactory(sslcontext)) + .build(); + PoolingHttpClientConnectionManager connManager = new PoolingHttpClientConnectionManager(socketFactoryRegistry); + HttpClients.custom().setConnectionManager(connManager); + //设置超时时间 + int timeout = 5; + RequestConfig config = RequestConfig.custom(). + setConnectTimeout(timeout * 1000). + setConnectionRequestTimeout(timeout * 1000). + setSocketTimeout(timeout * 1000).build(); + SocketConfig socketConfig = SocketConfig.custom() + .setSoKeepAlive(false) + .setSoLinger(1) + .setSoReuseAddress(true) + .setSoTimeout(10000) + .setTcpNoDelay(true).build(); + //创建自定义的httpclient对象 + CloseableHttpClient client = HttpClients.custom().setConnectionManager(connManager).setDefaultRequestConfig(config).setDefaultSocketConfig(socketConfig).build(); +// CloseableHttpClient client = HttpClients.createDefault(); + // 2. 创建httppost实例 + HttpPost httpPost = new HttpPost(url); +// httpPost.setConfig(reqConfig); + if(headers != null && headers.length > 0){ + Map tempHeaders = headers[0]; + for (String key : tempHeaders.keySet()) { + httpPost.setHeader(key,tempHeaders.get(key).toString()); + } + }else{ + httpPost.setHeader("Accept", "application/json, text/javascript, */*; q=0.01"); + httpPost.setHeader("Accept-Language","zh-CN,zh;q=0.9,en;q=0.8"); + httpPost.addHeader("content-type", "application/x-www-form-urlencoded"); + httpPost.setHeader("User-Agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36"); + } + + // 创建请求参数 + List list = new LinkedList<>(); + for (String key : param.keySet()) { + BasicNameValuePair param1 = new BasicNameValuePair(key,param.get(key).toString()); + list.add(param1); + } + // 使用URL实体转换工具 + try { + UrlEncodedFormEntity entityParam = new UrlEncodedFormEntity(list, "UTF-8"); + httpPost.setEntity(entityParam); + HttpResponse response = client.execute(httpPost); +// HttpResponse response = httpClient.execute(httpPost,localContext); + // 从响应模型中获取响应实体 + int notFundCode = 404; + int successCode = 200; + HttpEntity responseEntity = response.getEntity(); + StatusLine statusLine = response.getStatusLine(); + System.out.println("响应状态为:" + response.getStatusLine()); + if(statusLine.getStatusCode() == successCode){ + if (responseEntity != null) { + html=EntityUtils.toString(responseEntity,"utf-8"); + } + }else{ + throw new Exception("请求错误,code码为:"+statusLine.getStatusCode()); + } + + } catch (Exception e) { + e.printStackTrace(); + html = "Download failed error is:"+ThrowMessageUtil.getErrmessage(e); + } + } catch (Exception e) { + e.printStackTrace(); + html = "Download failed error is:"+ThrowMessageUtil.getErrmessage(e); + } + + + return html; + } + public static String httpSSLPostForm(String url,Map params,Map ...headers) { + String html=""; + try { + html = dosslPostForm(url,params,headers); + } catch (Exception e) { + e.printStackTrace(); + // TODO: handle exception + html = "Download failed error is:Exception!"; + } + int i = 1; + while(true){ + if(html.contains("Download failed error is:")){ + log.error("DownLoadUtil------------->download is failure,url is:"+url); + DateUtil.sleep(30000); + i++; + }else{ + break; + } + if(i > 5){ + break; + } + try { + html = dosslPostForm(url,params,headers); + } catch (Exception e) { + e.printStackTrace(); + // TODO: handle exception + html = "Download failed error is:Exception!"; + } + } + return html; + } + public static String httpSSLPost(String url,String params,Map ...headers) { + String html=""; + try { + html = dosslPost(url,params,headers); + } catch (Throwable e) { + e.printStackTrace(); + // TODO: handle exception + html = "Download failed error is:Exception!"; + } + int i = 1; + while(true){ + if(html.contains("Download failed error is:")){ + log.error("DownLoadUtil------------->download is failure,url is:"+url); + DateUtil.sleep(30000); + i++; + }else{ + break; + } + if(i > 5){ + break; + } + try { + html = dosslPost(url,params,headers); + } catch (Throwable e) { + e.printStackTrace(); + // TODO: handle exception + html = "Download failed error is:Exception!"; + } + } + return html; + } + + /** + * 模拟客户端get请求 + * @param url 模拟请求得url + * @param headers 头部信息,没有可以不传 + * @return + */ + public static String doGet(String url,Map ... headers){ + //设置超时时间 + int timeout = 15; + RequestConfig config = RequestConfig.custom(). + setConnectTimeout(timeout * 1000). + setConnectionRequestTimeout(timeout * 1000). + setSocketTimeout(timeout * 1000).build(); + SocketConfig socketConfig = SocketConfig.custom() + .setSoKeepAlive(false) + .setSoLinger(1) + .setSoReuseAddress(true) + .setSoTimeout(10000) + .setTcpNoDelay(true).build(); + HttpClientBuilder httpBuilder = HttpClientBuilder.create(); + httpBuilder.setUserAgent(ua); + HttpClient httpClient = httpBuilder.setDefaultSocketConfig(socketConfig).setDefaultRequestConfig(config).build(); + HttpGet httpGet = new HttpGet(url); + if(headers != null && headers.length > 0){ + Map tempHeaders = headers[0]; + for (String key : tempHeaders.keySet()) { + httpGet.setHeader(key,tempHeaders.get(key).toString()); + } + }else{ + httpGet.setHeader("Accept", "application/json, text/javascript, */*; q=0.01"); + httpGet.setHeader("Accept-Language","zh-CN,zh;q=0.9,en;q=0.8"); + } + String html=""; + try { + HttpResponse response = httpClient.execute(httpGet); + // 从响应模型中获取响应实体 + HttpEntity responseEntity = response.getEntity(); + StatusLine statusLine = response.getStatusLine(); + System.out.println("响应状态为:" + response.getStatusLine()); + int notFundCode = 404; + int successCode = 200; + int successCodeMax = 300; + if(statusLine.getStatusCode() >= successCode && statusLine.getStatusCode() < successCodeMax){ + if (responseEntity != null) { + html=EntityUtils.toString(responseEntity,"utf-8"); + if(html.equals("")){ + html = "Download failed error is:reslut is null"; + } + } + }else if(statusLine.getStatusCode() == notFundCode){ + html = "

页面404,正常结束请求即可

"; + }else{ + throw new Exception("请求错误,code码为:"+statusLine.getStatusCode()); + } + } catch (Exception e) { + e.printStackTrace(); + html = "Download failed error is:"+ThrowMessageUtil.getErrmessage(e); + } + return html; + + } + + /** + * 文件下载 + * @param fileURL 文件链接 + * @param destinationFilePath 文件存储地址 + * @throws IOException + */ + public static void downloadFile(String fileURL, String destinationFilePath) throws IOException { + // 设置连接超时和读取超时 + RequestConfig config = RequestConfig.custom() + // 设置连接超时为10秒 + .setConnectTimeout(10000) + // 设置读取超时为30秒 + .setSocketTimeout(30000) + .build(); + + // 创建 HttpClient 实例 + try (CloseableHttpClient httpClient = HttpClients.custom() + .setDefaultRequestConfig(config) + .build()) { + + // 创建 HttpGet 请求 + HttpGet request = new HttpGet(URI.create(fileURL)); + + // 执行请求 + try (CloseableHttpResponse response = httpClient.execute(request)) { + // 获取响应的输入流 + InputStream inputStream = response.getEntity().getContent(); + try (FileOutputStream outputStream = new FileOutputStream(destinationFilePath)) { + byte[] buffer = new byte[4096]; + int bytesRead; + while ((bytesRead = inputStream.read(buffer)) != -1) { + outputStream.write(buffer, 0, bytesRead); + } + } + log.info("文件下载成功---{}" , destinationFilePath); + } + } + } + + /** + * 文件上传 + * @param filePath + * @param gofastUrl + * @return + */ + public static String upLoadFile(String filePath,String gofastUrl) { + File file = new File(filePath); + String realFilename = filePath.substring(filePath.lastIndexOf(File.separator) + 1); + MultipartBody.Builder builder = new MultipartBody.Builder().setType(MultipartBody.FORM); + builder.addPart(Headers.of("Content-Disposition", "form-data; name=\"file\";filename=\"" + realFilename + "\""), + RequestBody.create(MediaType.parse("image/png"), file) + + ).addFormDataPart("output", "json").build(); + RequestBody body = builder.build(); + Request request = new Request.Builder().url(gofastUrl).post(body).header("Expect", "100-continue").build(); + OkHttpClient.Builder okBuilder = new OkHttpClient.Builder(); + OkHttpClient client = okBuilder.connectTimeout(600, TimeUnit.MILLISECONDS) + .readTimeout(600, TimeUnit.SECONDS).build(); + Call call = client.newCall(request); + String html = ""; + Response response = null; + try { + response = call.execute(); + html = response.body().string(); + } catch (IOException e) { + log.info("upload fail:{}", filePath); + e.printStackTrace(); + } finally { + response.close(); + } + return html; + } + public static void main(String[] args) throws Exception { + + } +} diff --git a/document-convert-service/src/main/java/com/bw/convert/utils/EncryptionUtil.java b/document-convert-service/src/main/java/com/bw/convert/utils/EncryptionUtil.java new file mode 100644 index 0000000..39bc2e0 --- /dev/null +++ b/document-convert-service/src/main/java/com/bw/convert/utils/EncryptionUtil.java @@ -0,0 +1,27 @@ +package com.bw.convert.utils; + +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; + +/** + * @author jian.mao + * @date 2023年3月10日 + * @description + */ +public class EncryptionUtil { + public static String md5(String text) { + try { + MessageDigest md = MessageDigest.getInstance("MD5"); + md.update(text.getBytes()); + byte[] bytes = md.digest(); + StringBuilder sb = new StringBuilder(); + for (byte b : bytes) { + sb.append(String.format("%02x", b & 0xff)); + } + return sb.toString(); + } catch (NoSuchAlgorithmException e) { + e.printStackTrace(); + return null; + } + } +} diff --git a/document-convert-service/src/main/java/com/bw/convert/utils/FileUtil.java b/document-convert-service/src/main/java/com/bw/convert/utils/FileUtil.java new file mode 100644 index 0000000..3b27184 --- /dev/null +++ b/document-convert-service/src/main/java/com/bw/convert/utils/FileUtil.java @@ -0,0 +1,59 @@ +package com.bw.convert.utils; + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; + +/** + * 文件工具类 + * @author jian.mao + * @date 2023年7月14日 + * @description + */ +public class FileUtil { + + /** + * 数据写入文件 + * @param Path 文件路径 + * @param result 数据 + * @throws IOException + */ + public static void writeFile(String path,String result){ + try { + FileWriter fw = new FileWriter(path,true); + fw.write(result+"\n"); + fw.flush(); + fw.close(); + } catch (Exception e) { + e.printStackTrace(); + } + } + + public static void delFile(String path) { + try { + File file = new File(path); + file.delete(); + } catch (Exception e) { + // TODO: handle exception + e.printStackTrace(); + } + } + + public static boolean deleteDir(File dir) { + if (dir == null || !dir.exists()) { + return true; + } + + if (dir.isDirectory()) { + File[] files = dir.listFiles(); + if (files != null) { + for (File file : files) { + deleteDir(file); + } + } + } + + return dir.delete(); + } + +} diff --git a/document-convert-service/src/main/java/com/bw/convert/utils/GPTResultParseUtil.java b/document-convert-service/src/main/java/com/bw/convert/utils/GPTResultParseUtil.java new file mode 100644 index 0000000..8c6364d --- /dev/null +++ b/document-convert-service/src/main/java/com/bw/convert/utils/GPTResultParseUtil.java @@ -0,0 +1,53 @@ +package com.bw.convert.utils; +import com.alibaba.fastjson.JSON; +import com.alibaba.fastjson.JSONObject; +import com.alibaba.fastjson.JSONException; +import java.util.HashMap; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * @author:jinming + * @className:GPTResultParseUtil + * @version:1.0 + * @description: + * @Date:2024/6/28 10:11 + */ +public class GPTResultParseUtil { + public static Map parseGPTResult(Map output, String gptContent) { + Map jsonResult = new HashMap<>(); + try { + // 替换```json, ``` 和 \n + String jsonContent = gptContent.replace("```json", "").replace("```", "").replace("\n", ""); + JSONObject jsonGPT = JSON.parseObject(jsonContent); + + for (String key : output.keySet()) { + if (jsonGPT.containsKey(key)) { + jsonResult.put(key, jsonGPT.get(key)); + } + } + return jsonResult; + } catch (JSONException e) { + try { + // 直接解析失败,使用正则表达式匹配外层的 {} + Pattern pattern = Pattern.compile("\\{.*\\}", Pattern.DOTALL); + Matcher matcher = pattern.matcher(gptContent.replace("\n", "")); + if (matcher.find()) { + JSONObject jsonGPT = JSON.parseObject(matcher.group()); + for (String key : output.keySet()) { + if (jsonGPT.containsKey(key)) { + jsonResult.put(key, jsonGPT.get(key)); + } + } + return jsonResult; + } else { + return null; + } + } catch (Exception ex) { + ex.printStackTrace(); + return null; + } + } + } +} \ No newline at end of file diff --git a/document-convert-service/src/main/java/com/bw/convert/utils/OtherUtils.java b/document-convert-service/src/main/java/com/bw/convert/utils/OtherUtils.java new file mode 100644 index 0000000..0cb89c5 --- /dev/null +++ b/document-convert-service/src/main/java/com/bw/convert/utils/OtherUtils.java @@ -0,0 +1,33 @@ +package com.bw.convert.utils; + +import java.security.MessageDigest; + +/** + * 其他工具类 + * @author jian.mao + * @date 2023年9月19日 + * @description + */ +public class OtherUtils { + + + + public static String getMd5(String string) { + try { + MessageDigest md5 = MessageDigest.getInstance("MD5"); + byte[] bs = md5.digest(string.getBytes("UTF-8")); + StringBuilder sb = new StringBuilder(40); + for (byte x : bs) { + if ((x & 0xff) >> 4 == 0) { + sb.append("0").append(Integer.toHexString(x & 0xff)); + } else { + sb.append(Integer.toHexString(x & 0xff)); + } + } + return sb.toString(); + } catch (Exception e) { + + return "nceaform" + System.currentTimeMillis(); + } + } +} diff --git a/document-convert-service/src/main/java/com/bw/convert/utils/QueueUtil.java b/document-convert-service/src/main/java/com/bw/convert/utils/QueueUtil.java new file mode 100644 index 0000000..a953b44 --- /dev/null +++ b/document-convert-service/src/main/java/com/bw/convert/utils/QueueUtil.java @@ -0,0 +1,18 @@ +package com.bw.convert.utils; + +import java.util.Map; +import java.util.concurrent.LinkedBlockingDeque; + +/** + * @author:jinming + * @className:QueueUtil + * @version:1.0 + * @description: + * @Date:2023/7/13 15:00 + */ +public class QueueUtil { + + public static LinkedBlockingDeque> taskQueue = new LinkedBlockingDeque>(); + + public static LinkedBlockingDeque sendQueue = new LinkedBlockingDeque(); +} \ No newline at end of file diff --git a/document-convert-service/src/main/java/com/bw/convert/utils/SpringBootKafka.java b/document-convert-service/src/main/java/com/bw/convert/utils/SpringBootKafka.java new file mode 100644 index 0000000..4dcb806 --- /dev/null +++ b/document-convert-service/src/main/java/com/bw/convert/utils/SpringBootKafka.java @@ -0,0 +1,45 @@ +package com.bw.convert.utils; + +import com.alibaba.fastjson.JSONObject; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.kafka.core.KafkaTemplate; +import org.springframework.kafka.support.SendResult; +import org.springframework.stereotype.Component; +import org.springframework.util.concurrent.ListenableFuture; +import org.springframework.util.concurrent.ListenableFutureCallback; + +/** + * @PROJECT_NAME: companybusinesscrawl + * @DESCRIPTION:SpringBootKafka 工具类 + * @DATE: 2023/4/6 11:09 + */ +@Slf4j +@Component +public class SpringBootKafka { + @Autowired + private KafkaTemplate kafkaTemplate; + /** + * 自定义topicKafkaTemplate + */ + /** + * public static final String TOPIC = "companyBussTest"; + **/ + public void send(String topic, String message) { + //发送消息 + ListenableFuture> future = kafkaTemplate.send(topic, message); + future.addCallback(new ListenableFutureCallback>() { + @Override + public void onFailure(Throwable throwable) { + //发送失败的处理 + log.info(topic + " - 生产者 发送消息失败:" + throwable.getMessage()); + } + + @Override + public void onSuccess(SendResult stringObjectSendResult) { + //成功的处理 + log.info(topic + " - 生产者 发送消息成功" ); + } + }); + } +} diff --git a/document-convert-service/src/main/java/com/bw/convert/utils/ThrowMessageUtil.java b/document-convert-service/src/main/java/com/bw/convert/utils/ThrowMessageUtil.java new file mode 100644 index 0000000..f74798e --- /dev/null +++ b/document-convert-service/src/main/java/com/bw/convert/utils/ThrowMessageUtil.java @@ -0,0 +1,23 @@ +package com.bw.convert.utils; + +import java.io.PrintWriter; +import java.io.StringWriter; + +/** + * @author jian.mao + * @date 2023年3月22日 + * @description + */ +public class ThrowMessageUtil { + + /** + * 获取异常信息 + * @param t + * @return + */ + public static String getErrmessage(Throwable t){ + StringWriter stringWriter=new StringWriter(); + t.printStackTrace(new PrintWriter(stringWriter,true)); + return stringWriter.getBuffer().toString(); + } +} diff --git a/document-convert-service/src/main/java/com/bw/convert/utils/ZipUtil.java b/document-convert-service/src/main/java/com/bw/convert/utils/ZipUtil.java new file mode 100644 index 0000000..06ec57d --- /dev/null +++ b/document-convert-service/src/main/java/com/bw/convert/utils/ZipUtil.java @@ -0,0 +1,58 @@ +package com.bw.convert.utils; + +import java.io.*; +import java.util.zip.ZipEntry; +import java.util.zip.ZipOutputStream; + +/** + * 压缩工具类 + * @author jian.mao + * @date 2026年1月21日 + * @description + */ +public class ZipUtil { + + public static void zipDirectory(String sourceDirPath, String zipFilePath) throws IOException { + File sourceDir = new File(sourceDirPath); + if (!sourceDir.exists() || !sourceDir.isDirectory()) { + throw new IllegalArgumentException("待压缩路径不是文件夹:" + sourceDirPath); + } + + try (FileOutputStream fos = new FileOutputStream(zipFilePath); + ZipOutputStream zos = new ZipOutputStream(fos)) { + zipFile(sourceDir, sourceDir.getName(), zos); + } + } + + private static void zipFile(File fileToZip, String fileName, ZipOutputStream zos) throws IOException { + if (fileToZip.isHidden()) { + return; + } + + if (fileToZip.isDirectory()) { + if (!fileName.endsWith("/")) { + zos.putNextEntry(new ZipEntry(fileName + "/")); + zos.closeEntry(); + } + + File[] children = fileToZip.listFiles(); + if (children != null) { + for (File childFile : children) { + zipFile(childFile, fileName + "/" + childFile.getName(), zos); + } + } + return; + } + + try (FileInputStream fis = new FileInputStream(fileToZip)) { + ZipEntry zipEntry = new ZipEntry(fileName); + zos.putNextEntry(zipEntry); + + byte[] bytes = new byte[8192]; + int length; + while ((length = fis.read(bytes)) >= 0) { + zos.write(bytes, 0, length); + } + } + } +} diff --git a/document-convert-service/src/main/resources/bootstrap.yml b/document-convert-service/src/main/resources/bootstrap.yml new file mode 100644 index 0000000..4ce47b4 --- /dev/null +++ b/document-convert-service/src/main/resources/bootstrap.yml @@ -0,0 +1,52 @@ +# ==================== 必须文件:bootstrap.yml ==================== +# 这个文件用于配置Nacos客户端,优先级最高 +spring: + application: + name: document-convert-service # 服务名,对应Nacos中的Data ID + + cloud: + nacos: + # ======== 配置中心 ======== + config: + server-addr: 127.0.0.1:8848 # Nacos地址 + username: nacos # 用户名 + password: nacos # 密码 + group: public_dev # 分组 + namespace: opai # 命名空间(默认public) + file-extension: yaml # 配置文件格式 + timeout: 5000 # 超时时间(ms) + + # 核心配置:开启动态刷新 + refresh-enabled: true # 必须为true! + + # 主配置文件(从Nacos加载) + data-id: ${spring.application.name}.${spring.cloud.nacos.config.file-extension} + + # 共享配置文件(可选) + shared-configs[0]: + data-id: application.yaml # 公共配置 + group: public_dev # 公共分组 + namespace: opai + refresh: true # 公共配置也要刷新 + + # 扩展配置(可选) + # extension-configs[0]: + # data-id: datasource.yaml + # group: dev + # refresh: true + + # ======== 服务发现 ======== + discovery: + server-addr: ${spring.cloud.nacos.config.server-addr} + username: ${spring.cloud.nacos.config.username} + password: ${spring.cloud.nacos.config.password} + group: ${spring.cloud.nacos.config.group} + namespace: ${spring.cloud.nacos.config.namespace} + +logging: + level: + root: info + com.alibaba.nacos.client.config.impl: WARN + file: + path: ../logs + \ No newline at end of file diff --git a/document-convert-service/src/main/resources/logback-spring.xml b/document-convert-service/src/main/resources/logback-spring.xml new file mode 100644 index 0000000..0e6ea0c --- /dev/null +++ b/document-convert-service/src/main/resources/logback-spring.xml @@ -0,0 +1,36 @@ + + + + + + + + + true + + ${logging.level} + + + ${log-path}/document-convert-service.log + + + ${log-path}/document-convert-service.log.%d{yyyy-MM-dd} + 7 + + + %d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %line %-5level %logger{50} - %msg%n + UTF-8 + + + + + + + + diff --git a/pom.xml b/pom.xml index 869be74..85ff50b 100644 --- a/pom.xml +++ b/pom.xml @@ -11,6 +11,7 @@ asr-service translate-service ai-service + document-convert-service