From 70dd6c9fdbca6ce708dc94f4d583ef45723b495a Mon Sep 17 00:00:00 2001 From: 55007 <55007@maojian> Date: Tue, 7 Jan 2025 16:26:14 +0800 Subject: [PATCH] =?UTF-8?q?=E6=96=87=E4=BB=B6=E4=B8=8A=E4=BC=A0=E5=BA=94?= =?UTF-8?q?=E7=94=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .classpath | 40 + .gitignore | 3 + .project | 23 + .settings/org.eclipse.core.resources.prefs | 5 + .settings/org.eclipse.jdt.core.prefs | 9 + .settings/org.eclipse.m2e.core.prefs | 4 + README.md | 1 + pom.xml | 262 +++++ src/main/java/com/bfd/upload/Application.java | 25 + .../java/com/bfd/upload/cache/ConfigCache.java | 35 + .../bfd/upload/controller/FileExecController.java | 38 + src/main/java/com/bfd/upload/entity/Constants.java | 177 ++++ .../java/com/bfd/upload/handler/MainHandler.java | 111 +++ .../java/com/bfd/upload/model/FilesEntity.java | 29 + .../com/bfd/upload/process/FileTaskProcess.java | 358 +++++++ .../com/bfd/upload/service/FileExecService.java | 15 + .../upload/service/impl/FileExecServiceImpl.java | 55 ++ src/main/java/com/bfd/upload/utils/DataUtil.java | 63 ++ src/main/java/com/bfd/upload/utils/DateUtil.java | 177 ++++ .../java/com/bfd/upload/utils/DownLoadUtil.java | 1007 ++++++++++++++++++++ .../java/com/bfd/upload/utils/EncryptionUtil.java | 27 + src/main/java/com/bfd/upload/utils/ExcelUtils.java | 184 ++++ src/main/java/com/bfd/upload/utils/FileUtil.java | 36 + src/main/java/com/bfd/upload/utils/JsonUtil.java | 32 + src/main/java/com/bfd/upload/utils/OcrUtil.java | 61 ++ src/main/java/com/bfd/upload/utils/OtherUtils.java | 33 + src/main/java/com/bfd/upload/utils/PptUtil.java | 93 ++ src/main/java/com/bfd/upload/utils/QueueUtil.java | 18 + .../java/com/bfd/upload/utils/SpringBootKafka.java | 46 + .../com/bfd/upload/utils/ThrowMessageUtil.java | 23 + src/main/resources/application.yml | 95 ++ src/main/resources/logback-spring.xml | 36 + src/test/java/com/bfd/AppTest.java | 53 ++ 33 files changed, 3174 insertions(+) create mode 100644 .classpath create mode 100644 .gitignore create mode 100644 .project create mode 100644 .settings/org.eclipse.core.resources.prefs create mode 100644 .settings/org.eclipse.jdt.core.prefs create mode 100644 .settings/org.eclipse.m2e.core.prefs create mode 100644 README.md create mode 100644 pom.xml create mode 100644 src/main/java/com/bfd/upload/Application.java create mode 100644 src/main/java/com/bfd/upload/cache/ConfigCache.java create mode 100644 src/main/java/com/bfd/upload/controller/FileExecController.java create mode 100644 src/main/java/com/bfd/upload/entity/Constants.java create mode 100644 src/main/java/com/bfd/upload/handler/MainHandler.java create mode 100644 src/main/java/com/bfd/upload/model/FilesEntity.java create mode 100644 src/main/java/com/bfd/upload/process/FileTaskProcess.java create mode 100644 src/main/java/com/bfd/upload/service/FileExecService.java create mode 100644 src/main/java/com/bfd/upload/service/impl/FileExecServiceImpl.java create mode 100644 src/main/java/com/bfd/upload/utils/DataUtil.java create mode 100644 src/main/java/com/bfd/upload/utils/DateUtil.java create mode 100644 src/main/java/com/bfd/upload/utils/DownLoadUtil.java create mode 100644 src/main/java/com/bfd/upload/utils/EncryptionUtil.java create mode 100644 src/main/java/com/bfd/upload/utils/ExcelUtils.java create mode 100644 src/main/java/com/bfd/upload/utils/FileUtil.java create mode 100644 src/main/java/com/bfd/upload/utils/JsonUtil.java create mode 100644 src/main/java/com/bfd/upload/utils/OcrUtil.java create mode 100644 src/main/java/com/bfd/upload/utils/OtherUtils.java create mode 100644 src/main/java/com/bfd/upload/utils/PptUtil.java create mode 100644 src/main/java/com/bfd/upload/utils/QueueUtil.java create mode 100644 src/main/java/com/bfd/upload/utils/SpringBootKafka.java create mode 100644 src/main/java/com/bfd/upload/utils/ThrowMessageUtil.java create mode 100644 src/main/resources/application.yml create mode 100644 src/main/resources/logback-spring.xml create mode 100644 src/test/java/com/bfd/AppTest.java diff --git a/.classpath b/.classpath new file mode 100644 index 0000000..deb4b51 --- /dev/null +++ b/.classpath @@ -0,0 +1,40 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..bf08e64 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +/logs/ +/target/ +/file/ \ No newline at end of file diff --git a/.project b/.project new file mode 100644 index 0000000..16d0160 --- /dev/null +++ b/.project @@ -0,0 +1,23 @@ + + + appendix_upload + + + + + + org.eclipse.jdt.core.javabuilder + + + + + org.eclipse.m2e.core.maven2Builder + + + + + + org.eclipse.jdt.core.javanature + org.eclipse.m2e.core.maven2Nature + + diff --git a/.settings/org.eclipse.core.resources.prefs b/.settings/org.eclipse.core.resources.prefs new file mode 100644 index 0000000..365bbd6 --- /dev/null +++ b/.settings/org.eclipse.core.resources.prefs @@ -0,0 +1,5 @@ +eclipse.preferences.version=1 +encoding//src/main/java=UTF-8 +encoding//src/main/resources=UTF-8 +encoding//src/test/java=UTF-8 +encoding/=UTF-8 diff --git a/.settings/org.eclipse.jdt.core.prefs b/.settings/org.eclipse.jdt.core.prefs new file mode 100644 index 0000000..0ada971 --- /dev/null +++ b/.settings/org.eclipse.jdt.core.prefs @@ -0,0 +1,9 @@ +eclipse.preferences.version=1 +org.eclipse.jdt.core.compiler.codegen.methodParameters=generate +org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8 +org.eclipse.jdt.core.compiler.compliance=1.8 +org.eclipse.jdt.core.compiler.problem.enablePreviewFeatures=disabled +org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning +org.eclipse.jdt.core.compiler.problem.reportPreviewFeatures=ignore +org.eclipse.jdt.core.compiler.release=disabled +org.eclipse.jdt.core.compiler.source=1.8 diff --git a/.settings/org.eclipse.m2e.core.prefs b/.settings/org.eclipse.m2e.core.prefs new file mode 100644 index 0000000..14b697b --- /dev/null +++ b/.settings/org.eclipse.m2e.core.prefs @@ -0,0 +1,4 @@ +activeProfiles= +eclipse.preferences.version=1 +resolveWorkspaceProjects=true +version=1 diff --git a/README.md b/README.md new file mode 100644 index 0000000..6aa6d18 --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +文件上传应用 diff --git a/pom.xml b/pom.xml new file mode 100644 index 0000000..bd90b84 --- /dev/null +++ b/pom.xml @@ -0,0 +1,262 @@ + + + + 4.0.0 + + org.springframework.boot + spring-boot-starter-parent + 2.2.4.RELEASE + + com.bfd + appendix_upload + 0.0.1-SNAPSHOT + + appendix_upload + + http://www.example.com + + + UTF-8 + 1.8 + 1.8 + + + + + junit + junit + 4.11 + test + + + org.springframework.boot + spring-boot-starter-web + + + + de.codecentric + spring-boot-admin-starter-client + 2.2.4 + + + com.google.code.gson + gson + 2.8.8 + + + org.springframework.boot + spring-boot-test + + + + org.springframework + spring-test + 5.0.10.RELEASE + test + + + commons-io + commons-io + 2.11.0 + + + + com.alibaba + fastjson + 2.0.17 + + + + com.mchange + c3p0 + 0.9.5.5 + + + + com.squareup.okhttp3 + okhttp + 4.9.3 + + + org.apache.httpcomponents + httpclient + 4.5.3 + + + commons-lang + commons-lang + 2.6 + + + + org.jetbrains.kotlin + kotlin-reflect + 1.6.21 + runtime + + + + org.jsoup + jsoup + 1.8.1 + + + org.apache.pdfbox + pdfbox + 2.0.28 + + + org.apache.poi + poi-scratchpad + 4.0.1 + + + org.apache.poi + poi + 4.0.1 + + + org.apache.poi + poi-ooxml + 4.0.1 + + + + org.apache.logging.log4j + log4j-api + 2.14.1 + + + org.apache.logging.log4j + log4j-core + 2.14.1 + + + + org.projectlombok + lombok + + + + + + + + org.springframework.kafka + spring-kafka + + + cn.hutool + hutool-all + 5.8.5 + + + junit + junit + + + + p6spy + p6spy + 3.9.0 + + + + commons-collections + commons-collections + 3.2.2 + + + + + + + + + + maven-clean-plugin + 3.1.0 + + + + maven-resources-plugin + 3.0.2 + + + maven-compiler-plugin + 3.8.0 + + + maven-surefire-plugin + 2.22.1 + + + maven-jar-plugin + 3.0.2 + + + maven-install-plugin + 2.5.2 + + + maven-deploy-plugin + 2.8.2 + + + + maven-site-plugin + 3.7.1 + + + maven-project-info-reports-plugin + 3.0.0 + + + + + org.springframework.boot + spring-boot-maven-plugin + + com.bfd.upload.Application + ZIP + + + ${project.groupId} + ${project.artifactId} + + + + + + + repackage + + + + + + org.apache.maven.plugins + maven-dependency-plugin + 3.1.1 + + + copy + package + + copy-dependencies + + + jar + jar + runtime + ${project.build.directory}/libs + + + + + + + + \ No newline at end of file diff --git a/src/main/java/com/bfd/upload/Application.java b/src/main/java/com/bfd/upload/Application.java new file mode 100644 index 0000000..8826147 --- /dev/null +++ b/src/main/java/com/bfd/upload/Application.java @@ -0,0 +1,25 @@ +package com.bfd.upload; + + + +import org.springframework.boot.SpringApplication; +import org.springframework.boot.autoconfigure.SpringBootApplication; +import org.springframework.kafka.annotation.EnableKafka; +import org.springframework.scheduling.annotation.EnableScheduling; + +/** + * 主入口 + * + * @author jian.mao + * @date 2023年7月4日 + * @description + */ +@SpringBootApplication +@EnableScheduling +@EnableKafka +public class Application { + + public static void main(String[] args) { + SpringApplication.run(Application.class, args); + } +} \ No newline at end of file diff --git a/src/main/java/com/bfd/upload/cache/ConfigCache.java b/src/main/java/com/bfd/upload/cache/ConfigCache.java new file mode 100644 index 0000000..5324282 --- /dev/null +++ b/src/main/java/com/bfd/upload/cache/ConfigCache.java @@ -0,0 +1,35 @@ +package com.bfd.upload.cache; + +import lombok.extern.slf4j.Slf4j; + +import java.util.Map; +import java.util.concurrent.LinkedBlockingDeque; + +/** + * @author jian.mao + * @date 2022年11月11日 + * @description 静态变量类 + */ +@Slf4j +public class ConfigCache { + + /**启动条件**/ + public static boolean isStart = true; + /*****任务队列*****/ + public static LinkedBlockingDeque> taskQueue = new LinkedBlockingDeque>(); + + + /** + * 队列录入任务 + * @param queue + * @param task + */ + public static void putQueue(LinkedBlockingDeque> queue,Map task){ + //next app 写入队列准备调出 + try { + queue.put(task); + } catch (InterruptedException e) { + log.error("队列写入data失败---"); + } + } +} diff --git a/src/main/java/com/bfd/upload/controller/FileExecController.java b/src/main/java/com/bfd/upload/controller/FileExecController.java new file mode 100644 index 0000000..1e64b16 --- /dev/null +++ b/src/main/java/com/bfd/upload/controller/FileExecController.java @@ -0,0 +1,38 @@ +package com.bfd.upload.controller; + +import javax.annotation.Resource; + +import lombok.extern.slf4j.Slf4j; + +import org.springframework.stereotype.Controller; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestBody; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RequestMethod; +import org.springframework.web.bind.annotation.ResponseBody; + +import com.bfd.upload.service.FileExecService; + + + +@Controller +@RequestMapping("/file") +@Slf4j +public class FileExecController { + + @Resource + private FileExecService fileExecService; + @PostMapping("/add") + @ResponseBody + public String add(@RequestBody String dataJson){ + String response = fileExecService.add(dataJson); + return response; + } + + + @RequestMapping(value = "/hello", method = RequestMethod.GET) + @ResponseBody + public String hello(String param, String token) { + return "123"; + } +} diff --git a/src/main/java/com/bfd/upload/entity/Constants.java b/src/main/java/com/bfd/upload/entity/Constants.java new file mode 100644 index 0000000..8525477 --- /dev/null +++ b/src/main/java/com/bfd/upload/entity/Constants.java @@ -0,0 +1,177 @@ +package com.bfd.upload.entity; + + +/** + * 常量实体类 + * @author jian.mao + * @date 2022年11月15日 + * @description + */ +public class Constants { + + /*************************蓝图常量key名称*********************************/ + public final static String SCHEDULING = "scheduling"; + public final static String TYPE = "type"; + public final static String INTERVAL = "interval"; + public final static String CREATED = "created"; + public final static String LAST_EDIT = "last_edit"; + public final static String BLUEPRINT_ID = "blueprint_id"; + public final static String BLUEPRINTID = "blueprintId"; + public final static String BLUEPRINT_NAME = "name"; + public final static String SCENARIO = "scenario"; + public final static String AUTOCOMMITTRIGGERLAST = "autoCommitTriggerLast"; + public final static String FRESHVARIABLES = "freshVariables"; + public final static String AUTOCOMMIT = "autoCommit"; + public final static String MAXERRORS = "maxErrors"; + public final static String DATALOSS = "dataloss"; + public final static String POSITION = "position"; + public final static String SCENES_ID = "scenes_id"; + public final static String SCENESID = "scenesId"; + public final static String MULTI_BRANCH = "multi_branch"; + + public final static String SINGLE = "single"; + /**已重试次数**/ + public final static String ERROR_TIME = "error_time"; + public final static String PREVIOUS_RESULT = "previous_result"; + + /****数据id*****/ + public final static String BUSINESSKEY = "businessKey"; + + + /*************************metadata常量key名称*********************************/ + public final static String LABEL_COL = "label_col"; + public final static String LABEL = "label"; + public final static String USER = "user"; + public final static String ADMIN = "admin"; + public final static String ADDRESS = "address"; + public final static String DATASOURCE = "datasource"; + public final static String INDEX = "index"; + + /*************************app常量key名称*********************************/ + public final static String APPS = "apps"; + public final static String TRANSFER_ID = "transfer_id"; + public final static String MODULE = "module"; + public final static String VERSION = "version"; + public final static String METADATA = "metadata"; + public final static String APP_NAME = "name"; + public final static String DESCRIBE = "describe"; + public final static String NEXT_APP_ID = "next_app_id"; + public final static String EDGE_ID = "edge_id"; + public final static String START_ID = "start_id"; + public final static String END_ID = "end_id"; + + public final static String WAIT_CONDITION = "wait_condition"; + public final static String START_TAG = "start_tag"; + + /*************************module类型*********************************/ + public final static String FILE = "file"; + public final static String OCR = "OCR"; + public final static String FILTER = "Filter"; + public final static String CHATGPT = "ChatGPT"; + public final static String MYSQL = "mysql"; + + /*************************other类型*********************************/ + public final static String UNDERLINE = "_"; + public final static String RESULT_TOPIC = null; + public static final String EMPTY = ""; + public static final String HTTP = "http"; + public static final String REQUEST_ERROR_MESSAGE = "Download failed error is"; + public static final String REQUEST_RESULT = "result"; + public static final String REQUEST_RESULT_RESULTS = "results"; + public static final String MAP_TYPE = "Map"; + public static final String LIST_TYPE = "List"; + public static final String STRING_TYPE = "String"; + public static final String DOCUMENT_TYPE = "doc"; + public static final String FILTER_ZH = "过滤器"; + + public static final String JSON_SELE_SYMBOL = "$."; + public static final String LEFT_BRACKETS = "["; + public static final String RIGTH_BRACKETS = "]"; + public static final String TASKTYPE = "taskType"; + public static final Integer USER_TYPE = 1; + public static final Integer KEYWORD_TYPE = 0; + public static final Integer DETAIL_TYPE = 2; + public static final String CID = "cid"; + public static final String SITETYPE = "siteType"; + public static final Integer DEFULT_SUBJECTID = 304864; + public static final Integer DEFULT_CRAWLCYCLICITYTIME = 1440; + public static final String CRAWLENDTIME = "crawlEndTime"; + public static final String CRAWLSTARTTIME = "crawlStartTime"; + public static final String CRAWLPAGETYPES = "crawlPageTypes"; + public static final String APPID = "113ic"; + public static final String APP_ID = "appId"; + public final static String ID = "id"; + public static final Integer DEFULT_CRAWLPERIODHOUR = 24; + public static final String CREATEUSERID = "662015832180933762"; + public static final String CRAWL_ADD_URL = "https://caiji.percent.cn/api/crawl/remote/task/save"; + public static final String CRAWLKEYWORD = "crawlKeyword"; + public static final String ATTACHTAG = "attachTag"; + public static final String ATTACHTAG_VALUE = "analyze"; + public static final String KEYWORD = "keyword"; + public static final String SITEID = "siteId"; + public static final String RESULTS = "results"; + public static final String RESULT = "result"; + public static final String CRAWLDATAFLAG = "crawlDataFlag"; + public static final String CRAWLDATAFLAG_PREFIX = "\"crawlDataFlag\":\"keyword:"; + public static final String TID = "tid"; + public static final Long TIME_OUT = 1800000L; + public static final String ATTR = "attr"; + public static final String HASVIDEO = "hasVideo"; + public static final String CRAWL_END_MARK = "crawl_end_mark"; + public static final String CRAWL_END_MESSAGE = "crawl_end_message"; + public static final String CRAWL_END_MESSAGE_VALUE = "数据采集完成"; + public static final String SUBJECTID = "subjectId"; + public static final String TASKID = "taskId"; + public static final int SUCCESS_CODE = 200; + public static final String WEB_URL_SUFFIX = "/api/aogeo/api/cda/caiji/status"; + public static final String STATUS = "status"; + /************************redis*************************************/ + public static final String LOCK_KEY = "myLock"; + public static final long LOCK_EXPIRE_TIME = 300000; + + /************************应用参数*************************************/ + public static final String CODE = "code"; + public static final String MESSAGE = "message"; + public static final String INPUT = "input"; + public static final String OUTPUT = "output"; + public static final String FORM = "form"; + public static final String FIELD = "field"; + public static final String VALUE = "value"; + public static final String DATA = "data"; + public static final String COLON_EN = ":"; + public static final String DATABASE = "database"; + public static final String TABLE = "table"; + public static final String USERNAME = "username"; + public static final String PASSWORD = "password"; + public static final String PORT = "port"; + public static final String HOSTNAME = "hostname"; + public static final String DATATYPE = "dataType"; + public static final String RULES = "rules"; + public static final String GENID = "genId"; + public static final String KEY = "key"; + public static final String DATAID = "dataId"; + public static final String APP_CODE = "app_code"; + public static final String TRACE = "trace"; + /***************文件相关参数常量***********************/ + public static final String READCONTENT = "readContent"; + public static final String CONTENT = "content"; + public static final String JPG = "jpg"; + public static final String PNG = "png"; + public static final String JPEG = "jpeg"; + public static final String XLS = "xls"; + public static final String XLSX = "xlsx"; + public static final String PDF = "pdf"; + public static final String DOC = "doc"; + public static final String DOCX = "docx"; + public static final String PPT = "ppt"; + public static final String PPTX = "pptx"; + public static final String MP4 = "mp4"; + public static final String EXT = "ext"; + public static final String FILES = "files"; + public static final String PATH = "path"; + public static final String TITLE = "title"; + public static final String DATAPROCESSID = "dataProcessId"; + public static final String ISLAST = "isLast"; + public static final String DOMAIN_NAME = "http://8.152.196.157:8081"; + public static final String CLUSTER_ADDRESS = "http://192.168.0.41:8081"; +} diff --git a/src/main/java/com/bfd/upload/handler/MainHandler.java b/src/main/java/com/bfd/upload/handler/MainHandler.java new file mode 100644 index 0000000..d42ba9a --- /dev/null +++ b/src/main/java/com/bfd/upload/handler/MainHandler.java @@ -0,0 +1,111 @@ +package com.bfd.upload.handler; + +import java.io.File; +import java.io.IOException; +import java.util.List; +import java.util.Map; +import java.util.concurrent.LinkedBlockingDeque; + +import javax.annotation.Resource; + +import lombok.extern.slf4j.Slf4j; + +import org.apache.commons.io.FileUtils; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.boot.ApplicationArguments; +import org.springframework.boot.ApplicationRunner; +import org.springframework.core.annotation.Order; +import org.springframework.stereotype.Component; + +import com.alibaba.fastjson.JSONObject; +import com.bfd.upload.cache.ConfigCache; +import com.bfd.upload.process.FileTaskProcess; +import com.bfd.upload.utils.FileUtil; + + +/** + * 启动处理入口 + * @author jian.mao + * @date 2023年11月3日 + * @description + */ + +@Component +@Order(value = 1) +@Slf4j +public class MainHandler implements ApplicationRunner { + + @Value("${task.task-queue-path}") + private String taskPath; + @Resource + private FileTaskProcess fileTaskProcess; + @Override + public void run(ApplicationArguments args) throws Exception { + new Thread(fileTaskProcess).start();; + log.info("开启文件上传执行线程-----"); + //停止处理 + waitDown(); + //启动加载缓存任务 + readTask(taskPath,ConfigCache.taskQueue); + } + + + + + + @SuppressWarnings("unchecked") + public static void readTask(String path,LinkedBlockingDeque> queue){ + File file = new File(path); + if(file.exists()){ + List tasks = null; + try { + tasks = FileUtils.readLines(file,"UTF-8"); + } catch (IOException e) { + e.printStackTrace(); + } + for (String taskStr : tasks) { + Map task = JSONObject.parseObject(taskStr); + try { + queue.put(task); + } catch (InterruptedException e) { + e.printStackTrace(); + } + } + file.delete(); + } + } + /** + * 结束触发钩子 + */ + public void waitDown() { + Runtime.getRuntime().addShutdownHook(new Thread() { + @Override + public void run() { + // 停止线程 + ConfigCache.isStart = false; + log.info("stop-------"); + writeTsskToFile(); + } + }); + } + + + /** + * 任务持久化到硬盘 + */ + public void writeTsskToFile(){ + while(true){ + if(ConfigCache.taskQueue.size() > 0 ){ + try { + Map task = ConfigCache.taskQueue.take(); + FileUtil.writeFile(taskPath, JSONObject.toJSONString(task)); + } catch (InterruptedException e) { + e.printStackTrace(); + } + }else{ + log.info("taskQueue write is file end"); + break; + } + } + } +} diff --git a/src/main/java/com/bfd/upload/model/FilesEntity.java b/src/main/java/com/bfd/upload/model/FilesEntity.java new file mode 100644 index 0000000..5b382b6 --- /dev/null +++ b/src/main/java/com/bfd/upload/model/FilesEntity.java @@ -0,0 +1,29 @@ +package com.bfd.upload.model; + +import java.time.LocalDateTime; + +import lombok.Data; + +/** + * @author jian.mao + * @date 2024年2月4日 + * @description + */ +@Data +public class FilesEntity { + + private Integer id; + private String createUserId; + private String createUser; + private String updateUserId; + private String updateUser; + private LocalDateTime createTime; + private LocalDateTime updateTime; + private Integer del; + private String appCode; + private String title; + private String path; + private String ext; + private Integer type; + +} diff --git a/src/main/java/com/bfd/upload/process/FileTaskProcess.java b/src/main/java/com/bfd/upload/process/FileTaskProcess.java new file mode 100644 index 0000000..a725bf7 --- /dev/null +++ b/src/main/java/com/bfd/upload/process/FileTaskProcess.java @@ -0,0 +1,358 @@ +package com.bfd.upload.process; + +import java.awt.image.BufferedImage; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.net.URLEncoder; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.UUID; + +import javax.imageio.ImageIO; + +import lombok.extern.slf4j.Slf4j; + +import org.apache.commons.io.FileUtils; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.rendering.PDFRenderer; +import org.apache.poi.hwpf.HWPFDocument; +import org.apache.poi.hwpf.extractor.WordExtractor; +import org.apache.poi.xwpf.extractor.XWPFWordExtractor; +import org.apache.poi.xwpf.usermodel.XWPFDocument; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Component; + +import com.alibaba.fastjson.JSONObject; +import com.bfd.upload.cache.ConfigCache; +import com.bfd.upload.entity.Constants; +import com.bfd.upload.utils.DownLoadUtil; +import com.bfd.upload.utils.EncryptionUtil; +import com.bfd.upload.utils.ExcelUtils; +import com.bfd.upload.utils.OcrUtil; +import com.bfd.upload.utils.PptUtil; +import com.bfd.upload.utils.SpringBootKafka; + +/** + * @author jian.mao + * @date 2024年2月4日 + * @description + */ +@Component +@Slf4j +public class FileTaskProcess implements Runnable{ + @Autowired + private SpringBootKafka springBootKafka; + @Value("${customize-kafka.producer.topic}") + private String topic; + @Value("${gofast.profix.host}") + private String host; + @Value("${file.download.dir}") + private String saveDir; + @Value("${file.ocrApi}") + private String ocrApi; + @Value("${file.uploadUrl}") + private String uploadUrl; + @Override + public void run() { + while (ConfigCache.isStart){ + Map task = null; + try { + task = ConfigCache.taskQueue.take(); + log.info("任务:{}",JSONObject.toJSONString(task)); + log.info("任务队列长度:{}",ConfigCache.taskQueue.size()); + //输出结果集 + Map results = new HashMap(16); + //input + Map input = (Map) task.get(Constants.INPUT); + //输出字段 + Map output = (Map) task.get(Constants.OUTPUT); + List> files = (List>) input.get(Constants.FILES); + //是否读取内容 + Integer readContent = (Integer) input.get(Constants.READCONTENT); + int fileSize = files.size(); + for (Map map : files) { + fileSize --; + for (String key: map.keySet()) { + if(output.containsKey(key)){ + if(key.equals(Constants.PATH)){ + //补充url前缀 + String path = (String) map.get(key); + if(path.contains(Constants.HTTP)){ + results.put(key, path.replace(Constants.CLUSTER_ADDRESS, Constants.DOMAIN_NAME)); + }else{ + results.put(key, host+path); + } + }else{ + results.put(key, map.get(key)); + } + } + } + //dataProcessId 添加到外层供给应用端使用 + if(map.containsKey(Constants.DATAPROCESSID)){ + task.put(Constants.DATAPROCESSID, map.get(Constants.DATAPROCESSID)); + }else{ + task.put(Constants.DATAPROCESSID, UUID.randomUUID().toString()); + } + if(readContent == 0 ){ + //结果集 + Map result = new HashMap(16); + if(fileSize == 0){ + results.put(Constants.ISLAST, 1); + } + //遍历入库返回结果,拼接响应内容 + result.put(Constants.RESULTS, JSONObject.toJSONString(results)); + result.put(Constants.STATUS, 1); + result.put(Constants.MESSAGE, "成功"); + task.put(Constants.RESULT, result); + //发送kafka + springBootKafka.send(topic,JSONObject.toJSONString(task)); + log.info("数据流转至下游-------"); + }else{ + //考虑异步 + String gofastUrl = ((String)map.get(Constants.PATH)).replace((String)map.get(Constants.TITLE), URLEncoder.encode((String)map.get(Constants.TITLE), "utf-8").replaceAll("\\+", "%20")); + //+读取内容发送 + if(!gofastUrl.contains(Constants.HTTP)){ + gofastUrl = host+((String)map.get(Constants.PATH)).replace((String)map.get(Constants.TITLE), URLEncoder.encode((String)map.get(Constants.TITLE), "utf-8").replaceAll("\\+", "%20")); + } + readFileToSend((String)map.get(Constants.EXT),gofastUrl,saveDir+(String)map.get(Constants.TITLE),task,results); + } + } + + } catch (Exception e) { + // TODO: handle exception + log.error("结果组装异常,",e); + //结果集 + Map result = new HashMap(16); + Map results = new HashMap(16); + //遍历入库返回结果,拼接响应内容 + result.put(Constants.MESSAGE, "异常"); + result.put(Constants.STATUS, 2); + results.put(Constants.ISLAST, 1); + results.put(Constants.VALUE, e.getMessage()); + result.put(Constants.RESULTS, JSONObject.toJSONString(results)); + task.put(Constants.RESULT, result); + //发送kafka + springBootKafka.send(topic, JSONObject.toJSONString(task)); + log.info("数据流转至下游-------"); + } + + } + } + + /** + * 根据文件类型读取内容 + * @param fileType 文件类型 + * @param path 文件路径 + * @return + * @throws IOException + */ + private void readFileToSend(String fileType,String path,String saveFilePath,Map task,Map results){ + try { + log.info("文件类型:{},文件云端地址:{}",fileType,path); + //下载文件 + DownLoadUtil.downloadFile(path, saveFilePath); + if(fileType.equals(Constants.PNG)||fileType.equals(Constants.JPG)||fileType.equals(Constants.JPEG)){ + //图片类型判断 + String content = OcrUtil.doOcr(path, ocrApi); + results.put(Constants.CONTENT, content); + //结果集 + Map result = new HashMap(16); + //遍历入库返回结果,拼接响应内容 + result.put(Constants.RESULTS, JSONObject.toJSONString(results)); + task.put(Constants.RESULT, result); + //发送kafka + springBootKafka.send(topic,JSONObject.toJSONString(task)); + log.info("数据流转至下游-------"); + }else if(fileType.equals(Constants.MP4)){ + //视频asr读取 --暂时不接 + + + }else if(fileType.equals(Constants.PPTX)||fileType.equals(Constants.PPT)){ + //ppt读取 + String content = PptUtil.parse(saveFilePath); + results.put(Constants.CONTENT, content); + //结果集 + Map result = new HashMap(16); + //遍历入库返回结果,拼接响应内容 + result.put(Constants.RESULTS, JSONObject.toJSONString(results)); + task.put(Constants.RESULT, result); + //发送kafka + springBootKafka.send(topic,JSONObject.toJSONString(task)); + log.info("数据流转至下游-------"); + }else if(fileType.equals(Constants.DOCX)||fileType.equals(Constants.DOC)){ + //doc读取 + String content = readWordFile(saveFilePath); + results.put(Constants.CONTENT, content); + log.info("文件内容读取:{}",content); + //结果集 + Map result = new HashMap(16); + //遍历入库返回结果,拼接响应内容 + result.put(Constants.RESULTS, JSONObject.toJSONString(results)); + task.put(Constants.RESULT, result); + //发送kafka + springBootKafka.send(topic,JSONObject.toJSONString(task)); + log.info("数据流转至下游-------"); + }else if(fileType.equals(Constants.PDF)){ + //pdf读取 + String pdfId = EncryptionUtil.md5(saveFilePath); + String outputFolder = saveDir +"pdf/" + pdfId + "/"; + int page = converterPdfToImg(saveFilePath,outputFolder); + StringBuffer sb = new StringBuffer(); + for (int i = 0; i < page; i++) { + String imageName = "page_" + (i + 1) + ".png"; + String imgFilePath = outputFolder+imageName; + Map imgUploadMap = DownLoadUtil.uploadFile(uploadUrl, imgFilePath); + String uploadFileUrl = imgUploadMap.get("fileUrl"); + String dataText = OcrUtil.doOcr(uploadFileUrl, ocrApi); + sb.append(dataText); + //图片删除 + delFile(imgFilePath); + } + results.put(Constants.CONTENT, sb.toString()); + //结果集 + Map result = new HashMap(16); + //遍历入库返回结果,拼接响应内容 + result.put(Constants.RESULTS, JSONObject.toJSONString(results)); + task.put(Constants.RESULT, result); + //发送kafka + springBootKafka.send(topic,JSONObject.toJSONString(task)); + log.info("数据流转至下游-------"); + + }else if(fileType.equals(Constants.XLSX)||fileType.equals(Constants.XLS)){ + //excel读取 + Map parseResult = ExcelUtils.parse(saveFilePath); + for (Entry entry : parseResult.entrySet()) { + String key = entry.getKey(); + List> data = (List>) entry.getValue(); + for (Map map : data) { + results.put(Constants.CONTENT, JSONObject.toJSONString(map)); + //结果集 + Map result = new HashMap(16); + //遍历入库返回结果,拼接响应内容 + result.put(Constants.RESULTS, JSONObject.toJSONString(results)); + task.put(Constants.RESULT, result); + //发送kafka + springBootKafka.send(topic,JSONObject.toJSONString(task)); + log.info("数据流转至下游-------"); + } + } + }else{ + //其他类型文件,直接读取 + StringBuffer sb = new StringBuffer(); + File file = new File(saveFilePath); + List lines = FileUtils.readLines(file); + for (String line : lines) { + sb.append(line); + } + results.put(Constants.CONTENT, sb.toString()); + //结果集 + Map result = new HashMap(16); + //遍历入库返回结果,拼接响应内容 + result.put(Constants.RESULTS, JSONObject.toJSONString(results)); + task.put(Constants.RESULT, result); + //发送kafka + springBootKafka.send(topic,JSONObject.toJSONString(task)); + log.info("数据流转至下游-------"); + } + } catch (Exception e) { + // TODO: handle exception + log.error("内容抽取异常,",e); + Map result = new HashMap(16); + //遍历入库返回结果,拼接响应内容 + result.put(Constants.RESULTS, e.getMessage()); + result.put(Constants.MESSAGE, "异常"); + result.put(Constants.STATUS, 2); + task.put(Constants.RESULT, result); + //发送kafka + springBootKafka.send(topic, JSONObject.toJSONString(task)); + log.info("数据流转至下游-------"); + }finally{ + //删除文件 + delFile(saveFilePath); + } + + } + /** + * PDF to png + * @param fileName 文件位置 + * @param outputFolder 图片输出位置 + */ + private int converterPdfToImg(String fileName, String outputFolder) { + int page = 0; + try { + PDDocument document = PDDocument.load(new File(fileName)); + PDFRenderer pdfRenderer = new PDFRenderer(document); + for (int pageIndex = 0; pageIndex < document.getNumberOfPages(); pageIndex++) { + // 设置 DPI(分辨率) + BufferedImage bim = pdfRenderer.renderImageWithDPI(pageIndex, 300); + // 图片文件名 + String imageName = "page_" + (pageIndex + 1) + ".png"; + // 完整的图片文件路径 + String imagePath = outputFolder + imageName; + File file = new File(imagePath); + + if (!file.getParentFile().exists()) { + file.getParentFile().mkdirs(); + } + + ImageIO.write(bim, "png", file); + page++; + } + document.close(); + log.info("PDF 已成功拆分为图片!"); + } catch (Exception e) { +// e.printStackTrace(); + log.error("拆分 PDF 为图片时出现错误:" ,e); + } + return page; + } + /** + * 解析word文档 + * @param filePath 文件路径 + * @return + * @throws IOException + */ + private String readWordFile(String filePath) throws IOException { + InputStream inputStream = new FileInputStream(filePath); + String fileTypeDoc = "doc"; + String fileTypeDocx = "docx"; + if (filePath.endsWith(fileTypeDoc)) { + try (HWPFDocument document = new HWPFDocument(inputStream)) { + WordExtractor extractor = new WordExtractor(document); + return extractor.getText(); + } + } else if (filePath.endsWith(fileTypeDocx)) { + try (XWPFDocument document = new XWPFDocument(inputStream)) { + XWPFWordExtractor extractor = new XWPFWordExtractor(document); + return extractor.getText(); + } + } else { + log.error("Unsupported file format"); + throw new IllegalArgumentException("Unsupported file format"); + } + } + + /** + * @param filePath + */ + private void delFile(String filePath) { + // 创建 File 对象 + File file = new File(filePath); + // 检查文件是否存在 + if (file.exists()) { + // 尝试删除文件 + if (file.delete()) { + log.info("文件删除成功: " + filePath); + } else { + log.error("无法删除文件: " + filePath); + } + } else { + log.warn("文件不存在: " + filePath); + } + } +} diff --git a/src/main/java/com/bfd/upload/service/FileExecService.java b/src/main/java/com/bfd/upload/service/FileExecService.java new file mode 100644 index 0000000..94447e7 --- /dev/null +++ b/src/main/java/com/bfd/upload/service/FileExecService.java @@ -0,0 +1,15 @@ +package com.bfd.upload.service; + +/** + * @author jian.mao + * @date 2024年2月4日 + * @description + */ +public interface FileExecService { + + /** + * @param dataJson + * @return + */ + String add(String dataJson); +} diff --git a/src/main/java/com/bfd/upload/service/impl/FileExecServiceImpl.java b/src/main/java/com/bfd/upload/service/impl/FileExecServiceImpl.java new file mode 100644 index 0000000..f03cc7c --- /dev/null +++ b/src/main/java/com/bfd/upload/service/impl/FileExecServiceImpl.java @@ -0,0 +1,55 @@ +package com.bfd.upload.service.impl; + +import java.util.HashMap; +import java.util.Map; + +import lombok.extern.slf4j.Slf4j; + +import org.springframework.stereotype.Service; + +import com.alibaba.fastjson.JSONObject; +import com.bfd.upload.cache.ConfigCache; +import com.bfd.upload.entity.Constants; +import com.bfd.upload.service.FileExecService; + +/** + * @author jian.mao + * @date 2024年2月4日 + * @description + */ +@Service +@Slf4j +public class FileExecServiceImpl implements FileExecService { + + @Override + public String add(String dataJson) { + // TODO Auto-generated method stub + Map response = new HashMap<>(16); + int code = 200; + String message = "success"; + Map task = null; + try { + task = JSONObject.parseObject(dataJson); + } catch (Exception e) { + log.error("参数结构不合法,",e); + code = 100010; + message = "参数不合法"; + } + // 写入队列 + try { + if(task.containsKey(Constants.TRACE) && (boolean)task.get(Constants.TRACE)){ + ConfigCache.taskQueue.putFirst(task); + }else{ + ConfigCache.taskQueue.put(task); + } + } catch (InterruptedException e) { + log.error("任务写入等待队列异常,",e); + code = 100011; + message = "任务写入等待队列失败"; + } + response.put(Constants.CODE,code); + response.put(Constants.MESSAGE,message); + return JSONObject.toJSONString(response); + } + +} diff --git a/src/main/java/com/bfd/upload/utils/DataUtil.java b/src/main/java/com/bfd/upload/utils/DataUtil.java new file mode 100644 index 0000000..54d6e2c --- /dev/null +++ b/src/main/java/com/bfd/upload/utils/DataUtil.java @@ -0,0 +1,63 @@ +package com.bfd.upload.utils; + +import java.util.Map; + +import lombok.extern.slf4j.Slf4j; + +import com.alibaba.fastjson.JSON; +import com.alibaba.fastjson.JSONObject; +import com.alibaba.fastjson.JSONPath; +import com.bfd.upload.entity.Constants; + +/** + * @author:jinming + * @className:DataUtil + * @version:1.0 + * @description: 获取dataValue的值 + * @Date:2023/11/1 9:54 + */ +@Slf4j +public class DataUtil { + /** + * + * @param key 传入的key + * @param dataMap 数据map + * @return 根据传入的参数进行判断解析,返回正确的dataValue + */ + public static Object getValue(String key, Map dataMap) { + try { + //公式为空直接就返回 + if(key.equals(Constants.EMPTY)){ + return Constants.EMPTY; + } + Object dataValue; + String isJson = "#json#"; + if (key.contains(isJson)) { + //进行第一次拆分,获取#json#前面的部分 + String[] keySplit = key.split(isJson); + String firstDataKey = keySplit[0]; + String[] firstDataKeySplit = firstDataKey.split(":"); + //取出前半部分对应的JSON数据并转换为JSONObject + String dataJson = (String) dataMap.get(firstDataKeySplit[0]); + JSONObject dataJsonObject = JSON.parseObject(dataJson); + //根据key的后半部分取出对应JSONObject中的值 + String firstDataKeyJson = (String) JSONPath.eval(dataJsonObject, firstDataKeySplit[1]); + String secDataKey = keySplit[1]; + JSONObject firstDataJsonObject = JSON.parseObject(firstDataKeyJson); + dataValue = JSONPath.eval(firstDataJsonObject, secDataKey); + return dataValue; + } + String[] keySplit = key.split(":"); + String jsonPath = keySplit[1]; + String dataJson = (String) dataMap.get(keySplit[0]); + JSONObject dataJsonObject = JSON.parseObject(dataJson); + dataValue = JSONPath.eval(dataJsonObject, jsonPath); + return dataValue; + } catch (Exception e) { + // TODO: handle exception + log.error("jsonpath公式取值异常,",e); + return null; + } + + } +} \ No newline at end of file diff --git a/src/main/java/com/bfd/upload/utils/DateUtil.java b/src/main/java/com/bfd/upload/utils/DateUtil.java new file mode 100644 index 0000000..702da31 --- /dev/null +++ b/src/main/java/com/bfd/upload/utils/DateUtil.java @@ -0,0 +1,177 @@ +package com.bfd.upload.utils; + + +import java.math.BigInteger; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; +import java.util.Date; + +import lombok.extern.slf4j.Slf4j; + +import com.alibaba.fastjson.JSON; +import com.alibaba.fastjson.JSONObject; + +/** + * 日期工具类 + * + * @author jian.mao + * @date 2022年11月15日 + * @description + */ +@Slf4j +public class DateUtil { + + /** + * @return + */ + public static String getTimeStrForNow() { + SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddHH"); + return sdf.format(new Date()); + } + + + public static String getTimeStrForDay(long time) { + SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd"); + + return sdf.format(new Date(time * 1000)); + } + + public static String getTimeStrForDay() { + SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd"); + + return sdf.format(new Date()); + } + + + public static String getDateTime() { + SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + String time = sdf.format(new Date()); + return time; + } + + public static String getDateTime(Long timestap) { + + SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + String time = sdf.format(new Date(timestap)); + return time; + } + + public static String getDate(Long timestap) { + + SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd"); + String time = sdf.format(new Date(timestap)); + return time; + } + + public static String getDateTimeForMonth() { + SimpleDateFormat sdf = new SimpleDateFormat("yyyyMM"); + String time = sdf.format(new Date()); + return time; + } + + /** + * 休眠 + * + * @param millis 毫秒 + */ + public static void sleep(long millis) { + try { + Thread.sleep(millis); + } catch (InterruptedException e) { + e.printStackTrace(); + } + } + + /** + * 1. @Description:时间戳转时间 + * 2. @Author: ying.zhao + * 3. @Date: 2023/3/28 + */ + + public static String timestampToDate(String time) { + int thirteen = 13; + int ten = 10; + SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); +// if (time.length() == thirteen) { + if (time.length() > ten) { + return sdf.format(new Date(Long.parseLong(time))); + } else { + return sdf.format(new Date(Integer.parseInt(time) * 1000L)); + } + } + + public static String parseCreated(String jsonTime){ + String formattedDateTime = getDateTime(); + try { + // 使用fastjson解析JSON数据 + JSONObject jsonObject = JSON.parseObject(jsonTime); + // 获取日期和时间的值 + JSONObject dateObject = jsonObject.getJSONObject("date"); + int day = dateObject.getIntValue("day"); + int month = dateObject.getIntValue("month"); + int year = dateObject.getIntValue("year"); + + JSONObject timeObject = jsonObject.getJSONObject("time"); + int hour = timeObject.getIntValue("hour"); + int minute = timeObject.getIntValue("minute"); + int second = timeObject.getIntValue("second"); + + // 创建LocalDateTime对象 + LocalDateTime dateTime = LocalDateTime.of(year, month, day, hour, minute, second); + + // 定义日期时间格式化器 + DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"); + + // 格式化日期时间 + formattedDateTime = dateTime.format(formatter); + } catch (Exception e) { + log.info("日期转换失败:{}",e); + } + return formattedDateTime; + } + + /** + * 字符串转换日期 + * @param format + * @param date + * @return + */ + public static Date strToDate(String format,String date){ + SimpleDateFormat sdf = new SimpleDateFormat(format); + if (date == null || date.equals("")){ + return new Date(); + }else{ + Date ru = null; + try { + ru = sdf.parse(date); + } catch (ParseException e) { + e.printStackTrace(); + } + return ru; + } + } + /** + * 日期格式话 + * @param format 日期格式 + * @param dater 要转换的日期,默认当前时间 + * @return + */ + public static String FormatDate(String format,Date date){ + String fromatDate = null; + SimpleDateFormat sdf = new SimpleDateFormat(format); + if (date == null){ + fromatDate = sdf.format(new Date()); + }else{ + fromatDate = sdf.format(date); + } + return fromatDate; + } + public static void main(String[] args) { + String time = timestampToDate("955814400000"); + System.out.println(time); + } +} diff --git a/src/main/java/com/bfd/upload/utils/DownLoadUtil.java b/src/main/java/com/bfd/upload/utils/DownLoadUtil.java new file mode 100644 index 0000000..f72c2bc --- /dev/null +++ b/src/main/java/com/bfd/upload/utils/DownLoadUtil.java @@ -0,0 +1,1007 @@ +package com.bfd.upload.utils; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.net.URLEncoder; +import java.security.KeyManagementException; +import java.security.NoSuchAlgorithmException; +import java.security.cert.CertificateException; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; + +import javax.net.ssl.SSLContext; +import javax.net.ssl.TrustManager; +import javax.net.ssl.X509TrustManager; + +import okhttp3.MediaType; +import okhttp3.MultipartBody; +import okhttp3.OkHttpClient; +import okhttp3.Request; +import okhttp3.RequestBody; +import okhttp3.Response; + +import org.apache.http.HttpEntity; +import org.apache.http.HttpHost; +import org.apache.http.HttpResponse; +import org.apache.http.NameValuePair; +import org.apache.http.StatusLine; +import org.apache.http.auth.AuthScope; +import org.apache.http.auth.UsernamePasswordCredentials; +import org.apache.http.client.AuthCache; +import org.apache.http.client.ClientProtocolException; +import org.apache.http.client.CredentialsProvider; +import org.apache.http.client.HttpClient; +import org.apache.http.client.HttpRequestRetryHandler; +import org.apache.http.client.config.RequestConfig; +import org.apache.http.client.entity.UrlEncodedFormEntity; +import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.client.methods.HttpPost; +import org.apache.http.client.protocol.HttpClientContext; +import org.apache.http.config.Registry; +import org.apache.http.config.RegistryBuilder; +import org.apache.http.config.SocketConfig; +import org.apache.http.conn.socket.ConnectionSocketFactory; +import org.apache.http.conn.socket.LayeredConnectionSocketFactory; +import org.apache.http.conn.socket.PlainConnectionSocketFactory; +import org.apache.http.conn.ssl.SSLConnectionSocketFactory; +import org.apache.http.entity.StringEntity; +import org.apache.http.impl.auth.BasicScheme; +import org.apache.http.impl.client.BasicAuthCache; +import org.apache.http.impl.client.BasicCredentialsProvider; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClientBuilder; +import org.apache.http.impl.client.HttpClients; +import org.apache.http.impl.client.LaxRedirectStrategy; +import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; +import org.apache.http.message.BasicNameValuePair; +import org.apache.http.util.EntityUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.alibaba.fastjson.JSON; +import com.bfd.upload.entity.Constants; + + + + + + + + +/** + * 下载工具类 + * @author jian.mao + * @date 2023年9月19日 + * @description + */ +public class DownLoadUtil { + + private static String ua = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36"; + private final static Logger log = LoggerFactory.getLogger(DownLoadUtil.class); + /** 代理服务器(产品官网 www.16yun.cn) **/ + final static String PROXYHOST = "u270.40.tp.16yun.cn"; + final static Integer PROXYPORT = 6448; + /** 代理验证信息 **/ + final static String PROXYUSER = "16HFBVJC"; + final static String PROXYPASS = "897944"; + + private static PoolingHttpClientConnectionManager cm = null; + private static HttpRequestRetryHandler httpRequestRetryHandler = null; + private static HttpHost proxy = null; + + private static CredentialsProvider credsProvider = null; + private static RequestConfig reqConfig = null; + private static OkHttpClient okHttpClient; + + private static OkHttpClient getOkHttpClient() { + if (okHttpClient == null) { + okHttpClient = new OkHttpClient(); + } + return okHttpClient; + } + static { + ConnectionSocketFactory plainsf = PlainConnectionSocketFactory + .getSocketFactory(); + LayeredConnectionSocketFactory sslsf = SSLConnectionSocketFactory + .getSocketFactory(); + + Registry registry = RegistryBuilder.create().register("http", plainsf) + .register("https", sslsf).build(); + + cm = new PoolingHttpClientConnectionManager(registry); + cm.setMaxTotal(20); + cm.setDefaultMaxPerRoute(5); + + proxy = new HttpHost(PROXYHOST, PROXYPORT, "https"); + + credsProvider = new BasicCredentialsProvider(); + credsProvider.setCredentials(AuthScope.ANY, + new UsernamePasswordCredentials(PROXYUSER, PROXYPASS)); + + reqConfig = RequestConfig.custom().setConnectionRequestTimeout(5000) + .setConnectTimeout(5000).setSocketTimeout(5000) + .setExpectContinueEnabled(false) + .setProxy(new HttpHost(PROXYHOST, PROXYPORT)).build(); + } + + /** + * 模拟客户端get请求 + * + * @param url + * 模拟请求得url + * @param headers + * 头部信息,没有可以不传 + * @return + */ + @SafeVarargs + public static String proxyDoGet(String url, Map... headers) { + // 设置超时时间 + int timeout = 30; + RequestConfig config = RequestConfig.custom() + .setConnectTimeout(timeout * 1000) + .setConnectionRequestTimeout(timeout * 1000) + .setSocketTimeout(timeout * 1000).build(); + SocketConfig socketConfig = SocketConfig.custom() + .setSoKeepAlive(false) + .setSoLinger(1) + .setSoReuseAddress(true) + .setSoTimeout(timeout * 1000) + .setTcpNoDelay(true).build(); + AuthCache authCache = new BasicAuthCache(); + authCache.put(proxy, new BasicScheme()); + HttpClientContext localContext = HttpClientContext.create(); + localContext.setAuthCache(authCache); + HttpClientBuilder httpBuilder = HttpClientBuilder.create(); + CloseableHttpClient httpClient = httpBuilder + .setDefaultSocketConfig(socketConfig) + .setDefaultRequestConfig(config) + .setDefaultCredentialsProvider(credsProvider).build(); + HttpGet httpGet = new HttpGet(url); + httpGet.setConfig(reqConfig); + if (headers != null && headers.length > 0) { + Map tempHeaders = headers[0]; + for (String key : tempHeaders.keySet()) { + httpGet.setHeader(key, tempHeaders.get(key).toString()); + } + } else { + httpGet.setHeader("Accept", + "application/json, text/javascript, */*; q=0.01"); + httpGet.setHeader("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8"); + } + CloseableHttpResponse response = null; + String html = ""; + int notFundCode = 404; + int successCode = 200; + try { + response = httpClient.execute(httpGet, localContext); + // 从响应模型中获取响应实体 + HttpEntity responseEntity = response.getEntity(); + StatusLine statusLine = response.getStatusLine(); + System.out.println("响应状态为:" + response.getStatusLine()); + if (statusLine.getStatusCode() == successCode) { + if (responseEntity != null) { + html = EntityUtils.toString(responseEntity, "utf-8"); + System.out.println("响应内容长度为:" + + responseEntity.getContentLength()); + // 下载结果为空不正常 + if (html.equals(Constants.EMPTY)) { + html = "Download failed error is:reslut is null"; + } + } + } else if (statusLine.getStatusCode() == notFundCode) { + html = "

页面404,正常结束请求即可

"; + } else { + throw new Exception("请求错误,code码为:" + statusLine.getStatusCode()); + } + } catch (Exception e) { + e.printStackTrace(); + html = "Download failed error is:reslut is null"; + }finally{ + try { + response.close(); + httpClient.close(); + } catch (Exception e) { + e.printStackTrace(); + } + } + return html; + + } + + + public static String httpsslProxyGet(String url, Map... headers) throws Exception { + //采用绕过验证的方式处理https请求 + SSLContext sslcontext = createIgnoreVerifySSL(); + + // 设置协议http和https对应的处理socket链接工厂的对象 + Registry socketFactoryRegistry = RegistryBuilder.create() + .register("http", PlainConnectionSocketFactory.INSTANCE) + .register("https", new SSLConnectionSocketFactory(sslcontext)) + .build(); + PoolingHttpClientConnectionManager connManager = new PoolingHttpClientConnectionManager(socketFactoryRegistry); + connManager.setMaxTotal(50); + connManager.setDefaultMaxPerRoute(10); + HttpClients.custom().setConnectionManager(connManager); + // 设置超时时间 + int timeout = 30; + RequestConfig config = RequestConfig.custom() + .setConnectTimeout(timeout * 1000) + .setConnectionRequestTimeout(timeout * 1000) + .setSocketTimeout(timeout * 1000).build(); + SocketConfig socketConfig = SocketConfig.custom() + .setSoKeepAlive(false) + .setSoLinger(1) + .setSoReuseAddress(true) + .setSoTimeout(timeout * 1000) + .setTcpNoDelay(true).build(); + AuthCache authCache = new BasicAuthCache(); + authCache.put(proxy, new BasicScheme()); + HttpClientContext localContext = HttpClientContext.create(); + localContext.setAuthCache(authCache); + HttpClientBuilder httpBuilder = HttpClientBuilder.create(); + CloseableHttpClient httpClient = httpBuilder + .setConnectionManager(connManager) + .setDefaultSocketConfig(socketConfig) + .setDefaultRequestConfig(config) + .setDefaultCredentialsProvider(credsProvider).build(); + HttpGet httpGet = new HttpGet(url); + httpGet.setConfig(reqConfig); + if (headers != null && headers.length > 0) { + Map tempHeaders = headers[0]; + for (String key : tempHeaders.keySet()) { + httpGet.setHeader(key, tempHeaders.get(key).toString()); + } + } else { + httpGet.setHeader("Accept", + "application/json, text/javascript, */*; q=0.01"); + httpGet.setHeader("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8"); + } + CloseableHttpResponse response = null; + String html = ""; + int notFundCode = 404; + int successCode = 200; + try { + response = httpClient.execute(httpGet, localContext); + // 从响应模型中获取响应实体 + HttpEntity responseEntity = response.getEntity(); + StatusLine statusLine = response.getStatusLine(); + System.out.println("响应状态为:" + response.getStatusLine()); + if (statusLine.getStatusCode() == successCode) { + if (responseEntity != null) { + html = EntityUtils.toString(responseEntity, "utf-8"); + System.out.println("响应内容长度为:" + + responseEntity.getContentLength()); + // 下载结果为空不正常 + if (html.equals(Constants.EMPTY)) { + html = "Download failed error is:reslut is null"; + } + } + } else if (statusLine.getStatusCode() == notFundCode) { + html = "

页面404,正常结束请求即可

"; + } else { + throw new Exception("请求错误,code码为:" + statusLine.getStatusCode()); + } + } catch (Exception e) { + e.printStackTrace(); + html = "Download failed error is:reslut is null"; + }finally{ + try { + response.close(); + httpClient.close(); + } catch (Exception e) { + e.printStackTrace(); + } + } + return html; + + } + + + /** + * json参数方式POST提交 + * @param url + * @param params + * @return + */ + public static String doPost(String url, String params){ + String strResult = ""; + //设置超时时间 + int timeout = 30; + RequestConfig config = RequestConfig.custom(). + setConnectTimeout(timeout * 1000). + setConnectionRequestTimeout(timeout * 1000). + setSocketTimeout(timeout * 1000).build(); + SocketConfig socketConfig = SocketConfig.custom() + .setSoKeepAlive(false) + .setSoLinger(1) + .setSoReuseAddress(true) + .setSoTimeout(timeout * 1000) + .setTcpNoDelay(true).build(); +// AuthCache authCache = new BasicAuthCache(); +// authCache.put(proxy, new BasicScheme()); +// HttpClientContext localContext = HttpClientContext.create(); +// localContext.setAuthCache(authCache); + // 1. 获取默认的client实例 + HttpClientBuilder httpBuilder = HttpClientBuilder.create(); + httpBuilder.setUserAgent(ua); + HttpClient client = httpBuilder.setDefaultSocketConfig(socketConfig).setDefaultRequestConfig(config).build(); +// HttpClient client = httpBuilder.setDefaultSocketConfig(socketConfig).setDefaultRequestConfig(config).setConnectionManager(cm) +// .setDefaultCredentialsProvider(credsProvider).build(); + // 2. 创建httppost实例 + HttpPost httpPost = new HttpPost(url); +// httpPost.setConfig(reqConfig); + httpPost.addHeader("Content-Type", "application/json;charset=utf-8"); + HttpResponse resp = null; + try { + httpPost.setEntity(new StringEntity(params,"utf-8")); + resp = client.execute(httpPost); +// resp = client.execute(httpPost,localContext); + StatusLine statusLine = resp.getStatusLine(); + System.out.println("响应状态为:" + resp.getStatusLine()); + int notFundCode = 404; + int successCode = 200; + if(statusLine.getStatusCode() == successCode){ + // 7. 获取响应entity + HttpEntity respEntity = resp.getEntity(); + strResult = EntityUtils.toString(respEntity, "UTF-8"); + if(strResult.equals(Constants.EMPTY)){ + strResult = "Download failed error is:reslut is null"; + } + }else{ + throw new Exception("请求错误,code码为:"+statusLine.getStatusCode()); + } + } catch (Exception e) { + e.printStackTrace(); + strResult = "Download failed error is:"+ThrowMessageUtil.getErrmessage(e); + } + return strResult; + } + public static String httpPost(String url,String params) { + String html=""; + html = doPost(url,params); + int i = 1; + while(true){ + if(html.contains("Download failed error is:")){ + log.error("DownLoadUtil------------->download is failure,url is:"+url); + DateUtil.sleep(5000); + i++; + }else{ + break; + } + if(i > 5){ + break; + } + html = doPost(url,params); + } + return html; + } + /** + * 绕过验证 + * + * @return + * @throws NoSuchAlgorithmException + * @throws KeyManagementException + */ + public static SSLContext createIgnoreVerifySSL() throws NoSuchAlgorithmException, KeyManagementException { + SSLContext sc = SSLContext.getInstance("SSLv3"); + + // 实现一个X509TrustManager接口,用于绕过验证,不用修改里面的方法 + X509TrustManager trustManager = new X509TrustManager() { + @Override + public void checkClientTrusted( + java.security.cert.X509Certificate[] paramArrayOfX509Certificate, + String paramString) throws CertificateException { + } + + @Override + public void checkServerTrusted( + java.security.cert.X509Certificate[] paramArrayOfX509Certificate, + String paramString) throws CertificateException { + } + + @Override + public java.security.cert.X509Certificate[] getAcceptedIssuers() { + return null; + } + }; + + sc.init(null, new TrustManager[] { trustManager }, null); + return sc; + } + /** + * 模拟请求 + * + * @param url 资源地址 + * @param map 参数列表 + * @param encoding 编码 + * @return + * @throws NoSuchAlgorithmException + * @throws KeyManagementException + * @throws IOException + * @throws ClientProtocolException + */ + public static String httpsslGet(String url,Map ... headers) { + String html=""; + CloseableHttpClient client = null; + HttpEntity responseEntity = null; + CloseableHttpResponse response = null; + try { + log.debug("DownLoadUtil------------->设置下载相关信息, start...."); + //采用绕过验证的方式处理https请求 + SSLContext sslcontext = createIgnoreVerifySSL(); + + // 设置协议http和https对应的处理socket链接工厂的对象 + Registry socketFactoryRegistry = RegistryBuilder.create() + .register("http", PlainConnectionSocketFactory.INSTANCE) + .register("https", new SSLConnectionSocketFactory(sslcontext)) + .build(); + PoolingHttpClientConnectionManager connManager = new PoolingHttpClientConnectionManager(socketFactoryRegistry); + connManager.setMaxTotal(50); + connManager.setDefaultMaxPerRoute(10); + HttpClients.custom().setConnectionManager(connManager); + //设置超时时间 + int timeout = 30; + RequestConfig config = RequestConfig.custom(). + setConnectTimeout(timeout * 1000). + setConnectionRequestTimeout(timeout * 1000). + setSocketTimeout(timeout * 1000).build(); + SocketConfig socketConfig = SocketConfig.custom() + .setSoKeepAlive(false) + .setSoLinger(1) + .setSoReuseAddress(true) + .setSoTimeout(10000) + .setTcpNoDelay(true).build(); + // 设置重定向策略 + LaxRedirectStrategy redirectStrategy = new LaxRedirectStrategy(); + //创建自定义的httpclient对象 + client = HttpClients.custom().setConnectionManager(connManager).setDefaultRequestConfig(config).setRedirectStrategy(redirectStrategy).setDefaultSocketConfig(socketConfig).setUserAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36").build(); +// CloseableHttpClient client = HttpClients.createDefault(); + + HttpGet httpGet = new HttpGet(url); + if(headers != null && headers.length > 0){ + Map tempHeaders = headers[0]; + for (String key : tempHeaders.keySet()) { + httpGet.setHeader(key,tempHeaders.get(key).toString()); + } + }else{ + httpGet.setHeader("Accept", "application/json, text/javascript, */*; q=0.01"); + httpGet.setHeader("Accept-Language","zh-CN,zh;q=0.9,en;q=0.8"); + httpGet.setHeader("User-Agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36"); + } + log.debug("DownLoadUtil------------->设置下载相关信息, end...."); + try { + int notFundCode = 404; + int successCode = 200; + log.debug("DownLoadUtil------------->下载执行,start...."); + httpGet.setConfig(config); + response = client.execute(httpGet); + log.debug("DownLoadUtil------------->下载执行,end...."); + // 从响应模型中获取响应实体 + StatusLine statusLine = response.getStatusLine(); + log.debug("DownLoadUtil------------->响应状态为:" + response.getStatusLine()+",下载请求没问题url:"+url+",read is start ...."); + System.out.println("响应状态为:" + response.getStatusLine()); + responseEntity = response.getEntity(); + log.debug("DownLoadUtil------------->响应状态为:" + response.getStatusLine()+",下载请求没问题url:"+url+",read is end ...."); + if(statusLine.getStatusCode() == successCode){ + if (responseEntity != null) { + html=EntityUtils.toString(responseEntity,"utf-8"); + System.out.println("响应内容长度为:" + responseEntity.getContentLength()); + } + }else if(statusLine.getStatusCode() == notFundCode){ + html = "

页面404,正常结束请求即可

"; + }else{ + throw new Exception("请求错误,code码为:"+statusLine.getStatusCode()); + } + } catch (Exception e) { + e.printStackTrace(); + html = "Download failed error is:"+ThrowMessageUtil.getErrmessage(e); + + } + } catch (Exception e) { + e.printStackTrace(); + html = "Download failed error is:"+ThrowMessageUtil.getErrmessage(e); + }finally{ + try { + responseEntity.getContent().close(); + response.close(); + client.close(); + } catch (Exception e) { + e.printStackTrace(); + } + + } + + + return html; + } + + public static String httpSSLGet(String url,Map ... headers) { + String html=""; + html = httpsslGet(url,headers); + int i = 1; + while(true){ + if(html.contains("Download failed error is:")){ + log.error("DownLoadUtil------------->download is failure,url is:"+url); + DateUtil.sleep(30000); + i++; + }else{ + break; + } + if(i > 5){ + break; + } + html = httpsslGet(url,headers); + } + return html; + } + public static String doPostFrom(String url,Map param,Map ... headers){ + //设置超时时间 + int timeout = 15; + RequestConfig config = RequestConfig.custom(). + setConnectTimeout(timeout * 1000). + setConnectionRequestTimeout(timeout * 1000). + setSocketTimeout(timeout * 1000).build(); + SocketConfig socketConfig = SocketConfig.custom() + .setSoKeepAlive(false) + .setSoLinger(1) + .setSoReuseAddress(true) + .setSoTimeout(10000) + .setTcpNoDelay(true).build(); +// AuthCache authCache = new BasicAuthCache(); +// authCache.put(proxy, new BasicScheme()); +// HttpClientContext localContext = HttpClientContext.create(); +// localContext.setAuthCache(authCache); + HttpClientBuilder httpBuilder = HttpClientBuilder.create(); + httpBuilder.setUserAgent(ua); +// HttpClient httpClient = httpBuilder.setDefaultSocketConfig(socketConfig).setDefaultRequestConfig(config).setConnectionManager(cm) +// .setDefaultCredentialsProvider(credsProvider).build(); + HttpClient httpClient = httpBuilder.setDefaultSocketConfig(socketConfig).setDefaultRequestConfig(config).build(); + HttpPost httpPost = new HttpPost(url); +// httpPost.setConfig(reqConfig); + if(headers != null && headers.length > 0){ + Map tempHeaders = headers[0]; + for (String key : tempHeaders.keySet()) { + httpPost.setHeader(key,tempHeaders.get(key).toString()); + } + }else{ + httpPost.addHeader("accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9"); + httpPost.addHeader("accept-Language", "zh-CN,zh;q=0.9,en;q=0.8"); + httpPost.addHeader("content-type", "application/x-www-form-urlencoded"); + httpPost.addHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36"); +// httpPost.addHeader("Referer", "http://www.neeq.com.cn/rule/Business_rules.html"); + } + // 创建请求参数 + List list = new LinkedList<>(); + for (String key : param.keySet()) { + BasicNameValuePair param1 = new BasicNameValuePair(key,param.get(key).toString()); + list.add(param1); + } + // 使用URL实体转换工具 + String html=""; + try { + UrlEncodedFormEntity entityParam = new UrlEncodedFormEntity(list, "UTF-8"); + httpPost.setEntity(entityParam); + HttpResponse response = httpClient.execute(httpPost); +// HttpResponse response = httpClient.execute(httpPost,localContext); + // 从响应模型中获取响应实体 + HttpEntity responseEntity = response.getEntity(); + StatusLine statusLine = response.getStatusLine(); + System.out.println("响应状态为:" + response.getStatusLine()); + int notFundCode = 404; + int successCode = 200; + if(statusLine.getStatusCode() == successCode){ + if (responseEntity != null) { + html=EntityUtils.toString(responseEntity,"utf-8"); + } + }else{ + throw new Exception("请求错误,code码为:"+statusLine.getStatusCode()); + } + + } catch (Exception e) { + e.printStackTrace(); + html = "Download failed error is:"+ThrowMessageUtil.getErrmessage(e); + } + + return html; + + } + public static String httpPostForm(String url,Map params,Map ... headers) { + String html=""; + html = doPostFrom(url,params); + int i = 1; + while(true){ + if(html.contains("Download failed error is:")){ + log.error("DownLoadUtil------------->download is failure,url is:"+url); + DateUtil.sleep(5000); + i++; + }else{ + break; + } + if(i > 5){ + break; + } + html = doPostFrom(url,params,headers); + } + return html; + } + + public static String dosslPost(String url,String params,Map ... headers) { + String html=""; + CloseableHttpClient client = null; + HttpEntity responseEntity = null; + CloseableHttpResponse response = null; + try { + //采用绕过验证的方式处理https请求 + SSLContext sslcontext = createIgnoreVerifySSL(); + // 设置协议http和https对应的处理socket链接工厂的对象 + Registry socketFactoryRegistry = RegistryBuilder.create() + .register("http", PlainConnectionSocketFactory.INSTANCE) + .register("https", new SSLConnectionSocketFactory(sslcontext)) + .build(); + PoolingHttpClientConnectionManager connManager = new PoolingHttpClientConnectionManager(socketFactoryRegistry); + HttpClients.custom().setConnectionManager(connManager); + //设置超时时间 + int timeout = 5; + RequestConfig config = RequestConfig.custom(). + setConnectTimeout(timeout * 1000). + setConnectionRequestTimeout(timeout * 1000). + setSocketTimeout(timeout * 1000).build(); + SocketConfig socketConfig = SocketConfig.custom() + .setSoKeepAlive(false) + .setSoLinger(1) + .setSoReuseAddress(true) + .setSoTimeout(10000) + .setTcpNoDelay(true).build(); + //创建自定义的httpclient对象 + client = HttpClients.custom().setConnectionManager(connManager).setDefaultRequestConfig(config).setDefaultSocketConfig(socketConfig).build(); +// CloseableHttpClient client = HttpClients.createDefault(); + // 2. 创建httppost实例 + HttpPost httpPost = new HttpPost(url); +// httpPost.setConfig(reqConfig); + httpPost.addHeader("Content-Type", "application/json;charset=utf-8"); + if(headers != null && headers.length > 0){ + Map tempHeaders = headers[0]; + for (String key : tempHeaders.keySet()) { + httpPost.setHeader(key,tempHeaders.get(key).toString()); + } + }else{ + httpPost.setHeader("Accept", "application/json, text/javascript, */*; q=0.01"); + httpPost.setHeader("Accept-Language","zh-CN,zh;q=0.9,en;q=0.8"); + httpPost.setHeader("User-Agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36"); + } + + try { + httpPost.setEntity(new StringEntity(params,"utf-8")); + response = client.execute(httpPost); + int notFundCode = 404; + int successCode = 200; + // 从响应模型中获取响应实体 + StatusLine statusLine = response.getStatusLine(); + System.out.println("响应状态为:" + response.getStatusLine()); + responseEntity = response.getEntity(); + if(statusLine.getStatusCode() == successCode){ + if (responseEntity != null) { + html=EntityUtils.toString(responseEntity,"utf-8"); + System.out.println("响应内容长度为:" + responseEntity.getContentLength()); + } + }else if(statusLine.getStatusCode() == notFundCode){ + html = "

页面404,正常结束请求即可

"; + }else{ + throw new Exception("请求错误,code码为:"+statusLine.getStatusCode()); + } + } catch (Exception e) { + e.printStackTrace(); + html = "Download failed error is:"+ThrowMessageUtil.getErrmessage(e); + + } + } catch (Exception e) { + e.printStackTrace(); + html = "Download failed error is:"+ThrowMessageUtil.getErrmessage(e); + }finally{ + try { + responseEntity.getContent().close(); + response.close(); + client.close(); + } catch (UnsupportedOperationException e) { + e.printStackTrace(); + } catch (IOException e) { + e.printStackTrace(); + } + + } + + + return html; + } + public static String dosslPostForm(String url,Map param,Map ... headers) { + String html=""; + try { + //采用绕过验证的方式处理https请求 + SSLContext sslcontext = createIgnoreVerifySSL(); + + // 设置协议http和https对应的处理socket链接工厂的对象 + Registry socketFactoryRegistry = RegistryBuilder.create() + .register("http", PlainConnectionSocketFactory.INSTANCE) + .register("https", new SSLConnectionSocketFactory(sslcontext)) + .build(); + PoolingHttpClientConnectionManager connManager = new PoolingHttpClientConnectionManager(socketFactoryRegistry); + HttpClients.custom().setConnectionManager(connManager); + //设置超时时间 + int timeout = 5; + RequestConfig config = RequestConfig.custom(). + setConnectTimeout(timeout * 1000). + setConnectionRequestTimeout(timeout * 1000). + setSocketTimeout(timeout * 1000).build(); + SocketConfig socketConfig = SocketConfig.custom() + .setSoKeepAlive(false) + .setSoLinger(1) + .setSoReuseAddress(true) + .setSoTimeout(10000) + .setTcpNoDelay(true).build(); + //创建自定义的httpclient对象 + CloseableHttpClient client = HttpClients.custom().setConnectionManager(connManager).setDefaultRequestConfig(config).setDefaultSocketConfig(socketConfig).build(); +// CloseableHttpClient client = HttpClients.createDefault(); + // 2. 创建httppost实例 + HttpPost httpPost = new HttpPost(url); +// httpPost.setConfig(reqConfig); + if(headers != null && headers.length > 0){ + Map tempHeaders = headers[0]; + for (String key : tempHeaders.keySet()) { + httpPost.setHeader(key,tempHeaders.get(key).toString()); + } + }else{ + httpPost.setHeader("Accept", "application/json, text/javascript, */*; q=0.01"); + httpPost.setHeader("Accept-Language","zh-CN,zh;q=0.9,en;q=0.8"); + httpPost.addHeader("content-type", "application/x-www-form-urlencoded"); + httpPost.setHeader("User-Agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36"); + } + + // 创建请求参数 + List list = new LinkedList<>(); + for (String key : param.keySet()) { + BasicNameValuePair param1 = new BasicNameValuePair(key,param.get(key).toString()); + list.add(param1); + } + // 使用URL实体转换工具 + try { + UrlEncodedFormEntity entityParam = new UrlEncodedFormEntity(list, "UTF-8"); + httpPost.setEntity(entityParam); + HttpResponse response = client.execute(httpPost); +// HttpResponse response = httpClient.execute(httpPost,localContext); + // 从响应模型中获取响应实体 + int notFundCode = 404; + int successCode = 200; + HttpEntity responseEntity = response.getEntity(); + StatusLine statusLine = response.getStatusLine(); + System.out.println("响应状态为:" + response.getStatusLine()); + if(statusLine.getStatusCode() == successCode){ + if (responseEntity != null) { + html=EntityUtils.toString(responseEntity,"utf-8"); + } + }else{ + throw new Exception("请求错误,code码为:"+statusLine.getStatusCode()); + } + + } catch (Exception e) { + e.printStackTrace(); + html = "Download failed error is:"+ThrowMessageUtil.getErrmessage(e); + } + } catch (Exception e) { + e.printStackTrace(); + html = "Download failed error is:"+ThrowMessageUtil.getErrmessage(e); + } + + + return html; + } + public static String httpSSLPostForm(String url,Map params,Map ...headers) { + String html=""; + try { + html = dosslPostForm(url,params,headers); + } catch (Exception e) { + e.printStackTrace(); + // TODO: handle exception + html = "Download failed error is:Exception!"; + } + int i = 1; + while(true){ + if(html.contains("Download failed error is:")){ + log.error("DownLoadUtil------------->download is failure,url is:"+url); + DateUtil.sleep(30000); + i++; + }else{ + break; + } + if(i > 5){ + break; + } + try { + html = dosslPostForm(url,params,headers); + } catch (Exception e) { + e.printStackTrace(); + // TODO: handle exception + html = "Download failed error is:Exception!"; + } + } + return html; + } + public static String httpSSLPost(String url,String params,Map ...headers) { + String html=""; + try { + html = dosslPost(url,params,headers); + } catch (Throwable e) { + e.printStackTrace(); + // TODO: handle exception + html = "Download failed error is:Exception!"; + } + int i = 1; + while(true){ + if(html.contains("Download failed error is:")){ + log.error("DownLoadUtil------------->download is failure,url is:"+url); + DateUtil.sleep(30000); + i++; + }else{ + break; + } + if(i > 5){ + break; + } + try { + html = dosslPost(url,params,headers); + } catch (Throwable e) { + e.printStackTrace(); + // TODO: handle exception + html = "Download failed error is:Exception!"; + } + } + return html; + } + + /** + * 模拟客户端get请求 + * @param url 模拟请求得url + * @param headers 头部信息,没有可以不传 + * @return + */ + public static String doGet(String url,Map ... headers){ + //设置超时时间 + int timeout = 15; + RequestConfig config = RequestConfig.custom(). + setConnectTimeout(timeout * 1000). + setConnectionRequestTimeout(timeout * 1000). + setSocketTimeout(timeout * 1000).build(); + SocketConfig socketConfig = SocketConfig.custom() + .setSoKeepAlive(false) + .setSoLinger(1) + .setSoReuseAddress(true) + .setSoTimeout(10000) + .setTcpNoDelay(true).build(); + HttpClientBuilder httpBuilder = HttpClientBuilder.create(); + httpBuilder.setUserAgent(ua); + HttpClient httpClient = httpBuilder.setDefaultSocketConfig(socketConfig).setDefaultRequestConfig(config).build(); + HttpGet httpGet = new HttpGet(url); + if(headers != null && headers.length > 0){ + Map tempHeaders = headers[0]; + for (String key : tempHeaders.keySet()) { + httpGet.setHeader(key,tempHeaders.get(key).toString()); + } + }else{ + httpGet.setHeader("Accept", "application/json, text/javascript, */*; q=0.01"); + httpGet.setHeader("Accept-Language","zh-CN,zh;q=0.9,en;q=0.8"); + } + String html=""; + try { + int notFundCode = 404; + int successCode = 200; + HttpResponse response = httpClient.execute(httpGet); + // 从响应模型中获取响应实体 + HttpEntity responseEntity = response.getEntity(); + StatusLine statusLine = response.getStatusLine(); + System.out.println("响应状态为:" + response.getStatusLine()); + if(statusLine.getStatusCode() == successCode){ + if (responseEntity != null) { + html=EntityUtils.toString(responseEntity,"utf-8"); + if(html.equals("")){ + html = "Download failed error is:reslut is null"; + } + } + }else if(statusLine.getStatusCode() == notFundCode){ + html = "

页面404,正常结束请求即可

"; + }else{ + throw new Exception("请求错误,code码为:"+statusLine.getStatusCode()); + } + } catch (Exception e) { + e.printStackTrace(); + html = "Download failed error is:"+ThrowMessageUtil.getErrmessage(e); + } + return html; + + } + + /** + * 文件下载 + * @param fileURL + * @param saveDir + * @param fileName + * @throws IOException + */ + public static void downloadFile(String fileURL, String saveFilePath) throws IOException { + CloseableHttpClient httpClient = HttpClients.createDefault(); + HttpGet httpGet = new HttpGet(fileURL); + + CloseableHttpResponse response = httpClient.execute(httpGet); + + try { + int successCode = 200; + if (response.getStatusLine().getStatusCode() == successCode) { + // 设置文件保存路径 + + // 将响应实体写入文件 + FileOutputStream outputStream = new FileOutputStream(saveFilePath); + response.getEntity().writeTo(outputStream); + + // 关闭流 + outputStream.close(); + } else { + log.error("下载失败. HTTP 响应码: {}", response.getStatusLine().getStatusCode()); + } + } finally { + response.close(); + httpClient.close(); + } + } + + public static Map uploadFile(String url, String filePath) throws Exception { + File file = new File(filePath); + + Map returnMap = new HashMap(32); + OkHttpClient client = getOkHttpClient(); + // 设置文件上传的媒体类型 + MediaType mediaType = MediaType.parse("application/octet-stream"); + // 创建请求体,将文件添加到请求体中 + RequestBody requestBody = RequestBody.create(mediaType, file); + + // 创建多部分请求体,用于上传文件 + MultipartBody multipartBody = new MultipartBody.Builder() + .setType(MultipartBody.FORM) + .addFormDataPart("file", file.getName(), requestBody) + .build(); + // 创建上传文件的请求 + Request request = new Request.Builder() + .url(url) + .post(multipartBody) + .build(); + + try (Response response = client.newCall(request).execute()) { + if (!response.isSuccessful()) { + throw new IOException("Failed to upload file: " + response); + } + String html = response.body().string(); + + try { + Map parse = (Map) JSON.parse(html); + Map data = (Map) parse.get("data"); + String domain = (String) data.get("domain"); + String src = (String) data.get("src"); + String fileUrl = domain.concat(src); + returnMap.put("fileUrl", fileUrl); + } catch (Exception e) { + returnMap.put("fileUrl", html); + } + // 处理上传成功的响应 + System.out.println("File uploaded successfully!"); + } + return returnMap; + } + public static void main(String[] args) throws Exception { + String fileURL = "http://172.18.1.146:8080/group1/default/20240305/09/59/5/China’s Concern About Nuclear Wastewater May Be More About Politics Than Science.docx"; + String saveFilePath = "D:\\工作使用\\analyze\\1010data\\政策\\China’s Concern About Nuclear Wastewater May Be More About Politics Than Science.docx"; + String encodedUrl = URLEncoder.encode(fileURL, "utf-8"); + System.out.println(encodedUrl); + downloadFile(encodedUrl, saveFilePath); + } +} diff --git a/src/main/java/com/bfd/upload/utils/EncryptionUtil.java b/src/main/java/com/bfd/upload/utils/EncryptionUtil.java new file mode 100644 index 0000000..bfd62c3 --- /dev/null +++ b/src/main/java/com/bfd/upload/utils/EncryptionUtil.java @@ -0,0 +1,27 @@ +package com.bfd.upload.utils; + +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; + +/** + * @author jian.mao + * @date 2023年3月10日 + * @description + */ +public class EncryptionUtil { + public static String md5(String text) { + try { + MessageDigest md = MessageDigest.getInstance("MD5"); + md.update(text.getBytes()); + byte[] bytes = md.digest(); + StringBuilder sb = new StringBuilder(); + for (byte b : bytes) { + sb.append(String.format("%02x", b & 0xff)); + } + return sb.toString(); + } catch (NoSuchAlgorithmException e) { + e.printStackTrace(); + return null; + } + } +} diff --git a/src/main/java/com/bfd/upload/utils/ExcelUtils.java b/src/main/java/com/bfd/upload/utils/ExcelUtils.java new file mode 100644 index 0000000..bfededd --- /dev/null +++ b/src/main/java/com/bfd/upload/utils/ExcelUtils.java @@ -0,0 +1,184 @@ +package com.bfd.upload.utils; + +import okhttp3.*; +import org.apache.poi.ss.usermodel.*; +import org.apache.poi.xssf.usermodel.XSSFWorkbook; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.net.InetSocketAddress; +import java.net.Proxy; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.StandardCopyOption; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.TimeUnit; + + +/** + * @author jian.mao + * @date 2023年4月7日 + * @description excel解析工具类 + */ +public class ExcelUtils { + /** + * excel解析 + * + * @param excel + * @return + */ + public static Map parse(String filePath) { + Map excelMap = new HashMap(16); + try { + File excel = new File(filePath); + FileInputStream file = new FileInputStream(excel); + // 使用工厂模式创建工作簿对象 + Workbook workbook = WorkbookFactory.create(file); + // 获取工作簿中工作表的数量 + int numberOfSheets = workbook.getNumberOfSheets(); + DataFormatter dataFormatter = new DataFormatter(); + // 遍历所有工作表 + for (int i = 0; i < numberOfSheets; i++) { + Sheet sheet = workbook.getSheetAt(i); + String key = sheet.getSheetName(); + //行码 + int rowNum = 0; + List> data = new ArrayList>(); + Map titleHead = new HashMap(16); + // 遍历所有行 + for (Row row : sheet) { + //单元格码 + int cellNum = 0; + //行内容存储 + Map rowMap = new HashMap(16); + // 遍历所有单元格 + if (rowNum == 0) { + for (Cell cell : row) { + String cellValue = dataFormatter.formatCellValue(cell); + titleHead.put(cellNum, cellValue); + cellNum++; + } + } else { + for (int j = 0; j < titleHead.size(); j++) { + String cellValue = dataFormatter.formatCellValue(row.getCell(j)); + rowMap.put(titleHead.get(cellNum), cellValue); + cellNum++; + } + } + + if (rowNum > 0) { + data.add(rowMap); + } + rowNum++; + } + excelMap.put(key, data); + } + // 关闭文件输入流和工作簿对象 + file.close(); + workbook.close(); + } catch (IOException e) { + e.printStackTrace(); + } + return excelMap; + } + + /** + * 将List>写入Excel文件中 + * + * @param data 要写入Excel的数据,每个Map代表一行数据,Map的key为列名,value为单元格数据 + * @param excelFilePath Excel文件路径,包含文件名和扩展名 + * @param sheetName 工作表名称 + * @throws IOException 如果写入Excel文件时发生IO异常,则抛出该异常 + */ + public static void write(List> data, String excelFilePath, String sheetName) throws IOException { + // 创建一个新的工作簿对象 + Workbook workbook = new XSSFWorkbook(); + // 创建一个新的工作表 + Sheet sheet = workbook.createSheet(sheetName); + // 行码 + int rowNum = 0; + // 写入列头 + Row headerRow = sheet.createRow(rowNum++); + int colNum = 0; + for (String key : data.get(0).keySet()) { + Cell cell = headerRow.createCell(colNum++); + cell.setCellValue(key); + } + // 写入数据 + for (Map rowMap : data) { + Row row = sheet.createRow(rowNum++); + colNum = 0; + for (String key : rowMap.keySet()) { + Cell cell = row.createCell(colNum++); + try { + String s = rowMap.get(key); + if (s.length() > 30000) { + + cell.setCellValue(s.substring(0, 25000)); + } else { + cell.setCellValue(s); + } + } catch (Exception e) { + System.out.println(key); + e.printStackTrace(); + } + } + } + // 将数据写入文件 + FileOutputStream outputStream = new FileOutputStream(excelFilePath); + workbook.write(outputStream); + workbook.close(); + outputStream.close(); + } + +// public static void copyFile(String sourceFloder, String targetFileName) { +// File sourceFile = new File(sourceFloder); +// byte[] buffer = new byte[(int) sourceFile.length()]; +// try (InputStream inputStream = new FileInputStream(sourceFile)) { +// inputStream.read(buffer); +// } catch (IOException e) { +// e.printStackTrace(); +// return; +// } +// // 写入目标文件 +// File targetFile = new File(targetFileName); +// targetFile.mkdirs(); +// try (OutputStream outputStream = new FileOutputStream(targetFile)) { +// outputStream.write(buffer); +// } catch (IOException e) { +// e.printStackTrace(); +// return; +// } +// } + + public static void copyFile(String sourceFilePath) { + // 源文件和目标文件的路径 + String targetDrive = "F:"; + + try { + // 获取源文件和目标文件的路径信息 + Path sourcePath = Paths.get(sourceFilePath); + Path targetPath = Paths.get(targetDrive + sourcePath.toString().substring(2)); + + // 如果目标文件的父目录不存在,则创建该目录 + if (!targetPath.getParent().toFile().exists()) { + targetPath.getParent().toFile().mkdirs(); + } + + // 进行文件复制 + Files.copy(sourcePath, targetPath, StandardCopyOption.REPLACE_EXISTING); + + System.out.println("Copied file: " + sourceFilePath + " -> " + targetPath); + } catch (IOException e) { + e.printStackTrace(); + } + } + + +} diff --git a/src/main/java/com/bfd/upload/utils/FileUtil.java b/src/main/java/com/bfd/upload/utils/FileUtil.java new file mode 100644 index 0000000..f61c422 --- /dev/null +++ b/src/main/java/com/bfd/upload/utils/FileUtil.java @@ -0,0 +1,36 @@ +package com.bfd.upload.utils; + +import java.io.FileWriter; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +/** + * 文件工具类 + * @author jian.mao + * @date 2023年7月14日 + * @description + */ +public class FileUtil { + + /** + * 数据写入文件 + * @param Path 文件路径 + * @param result 数据 + * @throws IOException + */ + public static void writeFile(String path,String result){ + try { + FileWriter fw = new FileWriter(path,true); + fw.write(result+"\n"); + fw.flush(); + fw.close(); + } catch (Exception e) { + e.printStackTrace(); + } + } +} diff --git a/src/main/java/com/bfd/upload/utils/JsonUtil.java b/src/main/java/com/bfd/upload/utils/JsonUtil.java new file mode 100644 index 0000000..c64e18a --- /dev/null +++ b/src/main/java/com/bfd/upload/utils/JsonUtil.java @@ -0,0 +1,32 @@ +package com.bfd.upload.utils; + +import com.alibaba.fastjson.JSONObject; +import com.bfd.upload.entity.Constants; + +/** + * json工具 + * @author jian.mao + * @date 2023年7月10日 + * @description + */ +public class JsonUtil { + + /** + * 校验字符串是list/map/str + * @param jsonString + * @return + */ + public static String checkJsonType(String jsonString) { + try { + JSONObject.parseObject(jsonString); + return Constants.MAP_TYPE; + } catch (Exception e) { + try { + JSONObject.parseArray(jsonString); + return Constants.LIST_TYPE; + } catch (Exception ex) { + return Constants.STRING_TYPE; + } + } + } +} diff --git a/src/main/java/com/bfd/upload/utils/OcrUtil.java b/src/main/java/com/bfd/upload/utils/OcrUtil.java new file mode 100644 index 0000000..f2a64f2 --- /dev/null +++ b/src/main/java/com/bfd/upload/utils/OcrUtil.java @@ -0,0 +1,61 @@ +package com.bfd.upload.utils; + +import com.alibaba.fastjson.JSON; +import com.bfd.upload.entity.Constants; + +import okhttp3.*; + +import java.util.Map; +import java.util.concurrent.TimeUnit; + +/** + * @author:jinming + * @className:ocrUtil + * @version:1.0 + * @description: + * @Date:2023/8/1 16:38 + */ +public class OcrUtil { + private static OkHttpClient okHttpClient; + + private static OkHttpClient getOkHttpClient() { + if (okHttpClient == null) { + okHttpClient = new OkHttpClient(); + } + return okHttpClient; + } + + public static String doOcr(String url,String ocrApi) { + String text = ""; + int reTryTimes = 3; + for (int i = 0; i < reTryTimes; i++) { + int okCode = 200; + OkHttpClient client = getOkHttpClient(); + OkHttpClient.Builder builder = client.newBuilder().writeTimeout(600, TimeUnit.SECONDS).connectTimeout(600, TimeUnit.SECONDS).readTimeout(600, TimeUnit.SECONDS); + client = builder.build(); + MediaType mediaType = MediaType.parse("application/json"); + RequestBody body = RequestBody.create(mediaType, "{\"id\":\"\",\"url\":\"" + url + "\"}"); + Request request = new Request.Builder() + .url(ocrApi) + .method("POST", body) + .addHeader("Content-Type", "application/json") + .build(); + try { + Response response = client.newCall(request).execute(); + String html = response.body().string(); + Map dataMap = (Map) JSON.parse(html); + int code = (int) dataMap.get("code"); + if (code == okCode) { + text = (String) dataMap.get("text"); + } + if (text.equals(Constants.EMPTY)) { + break; + } + } catch (Exception e) { + e.printStackTrace(); + } + } + + return text; + } +} \ No newline at end of file diff --git a/src/main/java/com/bfd/upload/utils/OtherUtils.java b/src/main/java/com/bfd/upload/utils/OtherUtils.java new file mode 100644 index 0000000..e2d97e9 --- /dev/null +++ b/src/main/java/com/bfd/upload/utils/OtherUtils.java @@ -0,0 +1,33 @@ +package com.bfd.upload.utils; + +import java.security.MessageDigest; + +/** + * 其他工具类 + * @author jian.mao + * @date 2023年9月19日 + * @description + */ +public class OtherUtils { + + + + public static String getMd5(String string) { + try { + MessageDigest md5 = MessageDigest.getInstance("MD5"); + byte[] bs = md5.digest(string.getBytes("UTF-8")); + StringBuilder sb = new StringBuilder(40); + for (byte x : bs) { + if ((x & 0xff) >> 4 == 0) { + sb.append("0").append(Integer.toHexString(x & 0xff)); + } else { + sb.append(Integer.toHexString(x & 0xff)); + } + } + return sb.toString(); + } catch (Exception e) { + + return "nceaform" + System.currentTimeMillis(); + } + } +} diff --git a/src/main/java/com/bfd/upload/utils/PptUtil.java b/src/main/java/com/bfd/upload/utils/PptUtil.java new file mode 100644 index 0000000..7fdbf06 --- /dev/null +++ b/src/main/java/com/bfd/upload/utils/PptUtil.java @@ -0,0 +1,93 @@ +package com.bfd.upload.utils; + +import org.apache.poi.xslf.usermodel.*; + +import java.io.FileInputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +/** + * @author:jinming + * @className:PptUtil + * @version:1.0 + * @description: + * @Date:2024/3/25 16:14 + */ +public class PptUtil { + + + public static String parse(String filePath) { + StringBuilder dataStringsb = new StringBuilder(); + XMLSlideShow ppt = null; + try { + // PPT类 + ppt = new XMLSlideShow(new FileInputStream(filePath)); + // 获取PPT中的所有幻灯片 +// List slides = ppt.getSlides(); + // 遍历幻灯片 + for (int i = 0; i < ppt.getSlides().size(); i++) { + + //拿到第i页的PPT + XSLFSlide slides = ppt.getSlides().get(i); + System.out.println("第" + (i + 1) + "页"); + //注释的for循环是获取所以PPT的内容 +// for (XSLFSlide slide : slides) { + // 获取幻灯片中的所有图形 + List shapes = slides.getShapes(); + // 遍历PPT的图形 + for (XSLFShape shape : shapes) { + // 判断该图形类是否是文本框类 + if (shape instanceof XSLFTextShape) { + // 将图像类强制装换成文本框类 + XSLFTextShape ts = (XSLFTextShape) shape; + // 获取文本框内的文字 + String str = ts.getText(); + dataStringsb.append(str); + } + // 判断该图形类是否是表格类 + if (shape instanceof XSLFTable) { + // 将图像类强制装换成表格类 + XSLFTable table = (XSLFTable) shape; + // 获取表格中的所有行 + List rows = table.getRows(); + for (XSLFTableRow tr : rows) { + // 获取行中的所有单元格 + List cells = tr.getCells(); + for (XSLFTableCell tc : cells) { + // 获取单元格内的文字 + String str = tc.getText(); + dataStringsb.append(str); + } + } + } + // 判断该图形类是否是图片框类 + if (shape instanceof XSLFPictureShape) { + // 将图像类强制装换成图片框类 + XSLFPictureShape ps = (XSLFPictureShape) shape; + // 获取图片的字节码数据(可以利用输出流将该图片保存到硬盘里) + byte[] pictureData = ps.getPictureData().getData(); +// System.out.println("图片信息:" + pictureData); + } + } + + } +// } + } catch (Exception e) { + e.printStackTrace(); + } finally { + if (ppt != null) { + + try { + // 保存完之后要对PPT进行关闭操作 + ppt.close(); + } catch (IOException e) { + e.printStackTrace(); + } + + } + + } + return dataStringsb.toString(); + } +} \ No newline at end of file diff --git a/src/main/java/com/bfd/upload/utils/QueueUtil.java b/src/main/java/com/bfd/upload/utils/QueueUtil.java new file mode 100644 index 0000000..9fc3aef --- /dev/null +++ b/src/main/java/com/bfd/upload/utils/QueueUtil.java @@ -0,0 +1,18 @@ +package com.bfd.upload.utils; + +import java.util.Map; +import java.util.concurrent.LinkedBlockingDeque; + +/** + * @author:jinming + * @className:QueueUtil + * @version:1.0 + * @description: + * @Date:2023/7/13 15:00 + */ +public class QueueUtil { + + public static LinkedBlockingDeque> taskQueue = new LinkedBlockingDeque>(); + + public static LinkedBlockingDeque sendQueue = new LinkedBlockingDeque(); +} \ No newline at end of file diff --git a/src/main/java/com/bfd/upload/utils/SpringBootKafka.java b/src/main/java/com/bfd/upload/utils/SpringBootKafka.java new file mode 100644 index 0000000..8191c5e --- /dev/null +++ b/src/main/java/com/bfd/upload/utils/SpringBootKafka.java @@ -0,0 +1,46 @@ +package com.bfd.upload.utils; + +import com.alibaba.fastjson.JSONObject; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.kafka.core.KafkaTemplate; +import org.springframework.kafka.support.SendResult; +import org.springframework.stereotype.Component; +import org.springframework.util.concurrent.ListenableFuture; +import org.springframework.util.concurrent.ListenableFutureCallback; + +/** + * @PROJECT_NAME: companybusinesscrawl + * @DESCRIPTION:SpringBootKafka 工具类 + * @AUTHOR: ying.zhao + * @DATE: 2023/4/6 11:09 + */ +@Slf4j +@Component +public class SpringBootKafka { + @Autowired + private KafkaTemplate kafkaTemplate; + /** + * 自定义topicKafkaTemplate + */ + /** + * public static final String TOPIC = "companyBussTest"; + **/ + public void send(String topic, String message) { + //发送消息 + ListenableFuture> future = kafkaTemplate.send(topic, message); + future.addCallback(new ListenableFutureCallback>() { + @Override + public void onFailure(Throwable throwable) { + //发送失败的处理 + log.info(topic + " - 生产者 发送消息失败:" + throwable.getMessage()); + } + + @Override + public void onSuccess(SendResult stringObjectSendResult) { + //成功的处理 + log.info("{} - 生产者 发送消息成功:",topic); + } + }); + } +} diff --git a/src/main/java/com/bfd/upload/utils/ThrowMessageUtil.java b/src/main/java/com/bfd/upload/utils/ThrowMessageUtil.java new file mode 100644 index 0000000..1205890 --- /dev/null +++ b/src/main/java/com/bfd/upload/utils/ThrowMessageUtil.java @@ -0,0 +1,23 @@ +package com.bfd.upload.utils; + +import java.io.PrintWriter; +import java.io.StringWriter; + +/** + * @author jian.mao + * @date 2023年3月22日 + * @description + */ +public class ThrowMessageUtil { + + /** + * 获取异常信息 + * @param t + * @return + */ + public static String getErrmessage(Throwable t){ + StringWriter stringWriter=new StringWriter(); + t.printStackTrace(new PrintWriter(stringWriter,true)); + return stringWriter.getBuffer().toString(); + } +} diff --git a/src/main/resources/application.yml b/src/main/resources/application.yml new file mode 100644 index 0000000..72a0082 --- /dev/null +++ b/src/main/resources/application.yml @@ -0,0 +1,95 @@ +logging: + level: + root: info + path: ./logs +server: + port: 8014 + servlet: + context-path: /appendix_upload + tomcat: + uri-encoding: utf-8 + max-connections: 20000 + max-http-form-post-size: 1 + max-threads: 1000 +spring: + application: + name: 文件上传 + datasource: + url: jdbc:mysql://172.24.12.126:3306/cda_db?serverTimezone=UTC&useUnicode=true&characterEncoding=utf-8&useSSL=true + username: root + password: baifendian123 + driver-class-name: com.mysql.cj.jdbc.Driver + kafka: + bootstrap-servers: 172.16.12.55:9092,172.16.12.56:9092,172.16.12.57:9092 + producer: + retries: 0 + #当有多个消息需要被发送到同一个分区时,生产者会把它们放在同一个批次里。该参数指定了一个批次可以使用的内存大小,按照字节数计算。 + batch-size: 16384 + # 设置生产者内存缓冲区的大小。 + buffer-memory: 33554432 + # 键的序列化方式 + key-serializer: org.apache.kafka.common.serialization.StringSerializer + # 值的序列化方式 + value-serializer: org.apache.kafka.common.serialization.StringSerializer + # acks=0 : 生产者在成功写入消息之前不会等待任何来自服务器的响应。 + # acks=1 : 只要集群的首领节点收到消息,生产者就会收到一个来自服务器成功响应。 + # acks=all :只有当所有参与复制的节点全部收到消息时,生产者才会收到一个来自服务器的成功响应。 + acks: 1 + consumer: + # 自动提交的时间间隔 在spring boot 2.X 版本中这里采用的是值的类型为Duration 需要符合特定的格式,如1S,1M,2H,5D + auto-commit-interval: 1S + # 该属性指定了消费者在读取一个没有偏移量的分区或者偏移量无效的情况下该作何处理: + # latest(默认值)在偏移量无效的情况下,消费者将从最新的记录开始读取数据(在消费者启动之后生成的记录) + # earliest :在偏移量无效的情况下,消费者将从起始位置读取分区的记录 + auto-offset-reset: earliest + # 是否自动提交偏移量,默认值是true,为了避免出现重复数据和数据丢失,可以把它设置为false,然后手动提交偏移量 + enable-auto-commit: true + # 键的反序列化方式 + key-deserializer: org.apache.kafka.common.serialization.StringDeserializer + # 值的反序列化方式 + value-deserializer: org.apache.kafka.common.serialization.StringDeserializer + #消费组 + group-id: test4 + #消费者并发线程数 + concurrency: 4 + #超时时间 + max-poll-interval-ms: 60000 + #listener: + # 在侦听器容器中运行的线程数。 + #concurrency: 5 + #listner负责ack,每调用一次,就立即commit + #ack-mode: manual_immediate + #missing-topics-fatal: false + boot: + admin: + client: + url: http://172.16.12.55:8001 + instance: + service-base-url: http://10.10.143.85:8010 + +management: + endpoints: + web: + exposure: + include: "*" + endpoint: + health: + show-details: always + health: + elasticsearch: + enabled: false + +customize-kafka: + bootstrap-servers: 172.18.1.119:9992 + producer: + topic: analyze +task: + task-queue-path: ../data/taskQueue.txt +gofast: + profix: + host: http://172.18.1.146:8080 +file: + download: + dir: ./file/ + ocrApi: http://10.0.32.238:10004/ocr/arm + uploadUrl: http://172.18.1.130:9985/group33/upload diff --git a/src/main/resources/logback-spring.xml b/src/main/resources/logback-spring.xml new file mode 100644 index 0000000..a9fb765 --- /dev/null +++ b/src/main/resources/logback-spring.xml @@ -0,0 +1,36 @@ + + + + + + + + + true + + ${logging.level} + + + ${logging.path}/appendix_uploadInfo.log + + + ${logging.path}/appendix_uploadInfo.log.%d{yyyy-MM-dd} + 7 + + + %d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %line %-5level %logger{50} - %msg%n + UTF-8 + + + + + + + + diff --git a/src/test/java/com/bfd/AppTest.java b/src/test/java/com/bfd/AppTest.java new file mode 100644 index 0000000..683d5d6 --- /dev/null +++ b/src/test/java/com/bfd/AppTest.java @@ -0,0 +1,53 @@ +//package com.bfd; +// +//import java.io.File; +//import java.io.IOException; +//import java.util.HashMap; +//import java.util.List; +//import java.util.Map; +//import java.util.concurrent.ConcurrentHashMap; +// +//import org.apache.commons.io.FileUtils; +// +//import com.alibaba.fastjson.JSONObject; +//import com.bfd.upload.utils.ExcelUtils; +// +// +///** +// * Unit test for simple App. +// */ +//public class AppTest { +// +// public static void main(String[] args) throws IOException { +// Map json = ExcelUtils.parse("C:\\Users\\毛健\\Downloads\\results.xlsx"); +// System.out.println(com.alibaba.fastjson.JSONObject.toJSONString(json)); +// List> list = (List>) json.get("Sheet"); +// for (int i = 0; i < list.size(); i++) { +// Map map = list.get(i); +// String caption = (String) map.get("外挂字幕文件名称"); +// String audio = (String) map.get("音频文件名称"); +// String video = (String) map.get("视频文件名称"); +// /////////////////////////////////////////////////// +// String captionUrl = (String) map.get("字幕文件地址"); +// String audioUrl = (String) map.get("音频wav文件地址"); +// String videoUrl = (String) map.get("视频文件地址"); +// if (captionUrl.contains(caption)&&videoUrl.contains(video)){ +// +// }else{ +// System.out.println(i); +// } +// } +//// File file = new File("C:\\Users\\毛健\\Downloads\\count.txt"); +//// List list = FileUtils.readLines(file); +//// ConcurrentHashMap map = new ConcurrentHashMap(); +//// for (String string : list) { +//// int i = 1; +//// if(map.containsKey(string.replaceAll("\\..*", ""))){ +//// i = map.get(string.replaceAll("\\..*", "")); +//// i++; +//// } +//// map.put(string.replaceAll("\\..*", ""), i); +//// } +//// System.out.println(JSONObject.toJSONString(map)); +// } +//}