commit 43506f85b62c6335ba63d181221cff5db8bd4b33 Author: 55007 <55007@maojian> Date: Tue Jan 7 17:09:39 2025 +0800 数据加工应用 diff --git a/.classpath b/.classpath new file mode 100644 index 0000000..1a0a8d2 --- /dev/null +++ b/.classpath @@ -0,0 +1,40 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..69f78ae --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/logs/ +/target/ \ No newline at end of file diff --git a/.project b/.project new file mode 100644 index 0000000..2184d36 --- /dev/null +++ b/.project @@ -0,0 +1,23 @@ + + + data_forge + + + + + + org.eclipse.jdt.core.javabuilder + + + + + org.eclipse.m2e.core.maven2Builder + + + + + + org.eclipse.jdt.core.javanature + org.eclipse.m2e.core.maven2Nature + + diff --git a/.settings/org.eclipse.core.resources.prefs b/.settings/org.eclipse.core.resources.prefs new file mode 100644 index 0000000..839d647 --- /dev/null +++ b/.settings/org.eclipse.core.resources.prefs @@ -0,0 +1,5 @@ +eclipse.preferences.version=1 +encoding//src/main/java=UTF-8 +encoding//src/main/resources=UTF-8 +encoding//src/test/java=UTF-8 +encoding/=UTF-8 diff --git a/.settings/org.eclipse.jdt.core.prefs b/.settings/org.eclipse.jdt.core.prefs new file mode 100644 index 0000000..71df522 --- /dev/null +++ b/.settings/org.eclipse.jdt.core.prefs @@ -0,0 +1,9 @@ +eclipse.preferences.version=1 +org.eclipse.jdt.core.compiler.codegen.methodParameters=generate +org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8 +org.eclipse.jdt.core.compiler.compliance=1.8 +org.eclipse.jdt.core.compiler.problem.enablePreviewFeatures=disabled +org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning +org.eclipse.jdt.core.compiler.problem.reportPreviewFeatures=ignore +org.eclipse.jdt.core.compiler.release=disabled +org.eclipse.jdt.core.compiler.source=1.8 diff --git a/.settings/org.eclipse.m2e.core.prefs b/.settings/org.eclipse.m2e.core.prefs new file mode 100644 index 0000000..f897a7f --- /dev/null +++ b/.settings/org.eclipse.m2e.core.prefs @@ -0,0 +1,4 @@ +activeProfiles= +eclipse.preferences.version=1 +resolveWorkspaceProjects=true +version=1 diff --git a/README.md b/README.md new file mode 100644 index 0000000..09047c2 --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +数据锻造应用,主要对字段进行加工处理 diff --git a/pom.xml b/pom.xml new file mode 100644 index 0000000..04c063d --- /dev/null +++ b/pom.xml @@ -0,0 +1,232 @@ + + + + 4.0.0 + + org.springframework.boot + spring-boot-starter-parent + 2.2.4.RELEASE + + com.bfd + data_forge + 0.0.1-SNAPSHOT + + data_forge + + http://www.example.com + + + UTF-8 + 1.8 + 1.8 + + + + + junit + junit + 4.11 + test + + + org.springframework.boot + spring-boot-starter-web + + + com.google.code.gson + gson + 2.8.8 + + + org.springframework.boot + spring-boot-test + + + + org.springframework + spring-test + 5.0.10.RELEASE + test + + + commons-io + commons-io + 1.4 + + + com.alibaba + fastjson + 2.0.17 + + + + com.mchange + c3p0 + 0.9.5.5 + + + mysql + mysql-connector-java + 8.0.29 + + + + com.squareup.okhttp3 + okhttp + 4.9.3 + + + org.apache.httpcomponents + httpclient + 4.5.3 + + + commons-lang + commons-lang + 2.6 + + + + org.jetbrains.kotlin + kotlin-reflect + 1.6.21 + runtime + + + + org.jsoup + jsoup + 1.8.1 + + + + de.codecentric + spring-boot-admin-starter-client + 2.2.4 + + + org.projectlombok + lombok + + + org.springframework.kafka + spring-kafka + + + + + cn.hutool + hutool-all + 5.8.5 + + + junit + junit + + + + p6spy + p6spy + 3.9.0 + + + + commons-collections + commons-collections + 3.2.2 + + + + + + + + + + maven-clean-plugin + 3.1.0 + + + + maven-resources-plugin + 3.0.2 + + + maven-compiler-plugin + 3.8.0 + + + maven-surefire-plugin + 2.22.1 + + + maven-jar-plugin + 3.0.2 + + + maven-install-plugin + 2.5.2 + + + maven-deploy-plugin + 2.8.2 + + + + maven-site-plugin + 3.7.1 + + + maven-project-info-reports-plugin + 3.0.0 + + + + + org.springframework.boot + spring-boot-maven-plugin + + com.bfd.function.Application + ZIP + + + ${project.groupId} + ${project.artifactId} + + + + + + + repackage + + + + + + org.apache.maven.plugins + maven-dependency-plugin + 3.1.1 + + + copy + package + + copy-dependencies + + + jar + jar + runtime + ${project.build.directory}/libs + + + + + + + + diff --git a/src/main/java/com/bfd/function/Application.java b/src/main/java/com/bfd/function/Application.java new file mode 100644 index 0000000..51bd591 --- /dev/null +++ b/src/main/java/com/bfd/function/Application.java @@ -0,0 +1,25 @@ +package com.bfd.function; + + + +import org.springframework.boot.SpringApplication; +import org.springframework.boot.autoconfigure.SpringBootApplication; +import org.springframework.kafka.annotation.EnableKafka; +import org.springframework.scheduling.annotation.EnableScheduling; + +/** + * 主入口 + * + * @author jian.mao + * @date 2023年7月4日 + * @description + */ +@SpringBootApplication +@EnableScheduling +@EnableKafka +public class Application { + + public static void main(String[] args) { + SpringApplication.run(Application.class, args); + } +} \ No newline at end of file diff --git a/src/main/java/com/bfd/function/cache/ConfigCache.java b/src/main/java/com/bfd/function/cache/ConfigCache.java new file mode 100644 index 0000000..6488b3c --- /dev/null +++ b/src/main/java/com/bfd/function/cache/ConfigCache.java @@ -0,0 +1,37 @@ +package com.bfd.function.cache; + +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.LinkedBlockingDeque; + +import lombok.extern.slf4j.Slf4j; + +/** + * @author jian.mao + * @date 2022年11月11日 + * @description 静态变量类 + */ +@Slf4j +public class ConfigCache { + + /**启动条件**/ + public static boolean isStart = true; + /*****任务队列*****/ + public static LinkedBlockingDeque> taskQueue = new LinkedBlockingDeque>(); + /****数据合并容器****/ + public static Map mergeDataMap = new HashMap(16); + + /** + * 队列录入任务 + * @param queue + * @param task + */ + public static void putQueue(LinkedBlockingDeque> queue,Map task){ + //next app 写入队列准备调出 + try { + queue.put(task); + } catch (InterruptedException e) { + log.error("队列写入data失败---"); + } + } +} diff --git a/src/main/java/com/bfd/function/controller/FunctionContrller.java b/src/main/java/com/bfd/function/controller/FunctionContrller.java new file mode 100644 index 0000000..24ee14e --- /dev/null +++ b/src/main/java/com/bfd/function/controller/FunctionContrller.java @@ -0,0 +1,41 @@ +package com.bfd.function.controller; + +import javax.annotation.Resource; + +import lombok.extern.slf4j.Slf4j; + +import org.springframework.stereotype.Controller; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestBody; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RequestMethod; +import org.springframework.web.bind.annotation.ResponseBody; + +import com.bfd.function.service.FuncrionService; + + +/** + * @author jian.mao + * @date 2023年11月9日 + * @description + */ +@Controller +@RequestMapping("/function") +@Slf4j +public class FunctionContrller { + @Resource + private FuncrionService funcrionService; + @PostMapping("/achieve") + @ResponseBody + public String achieve(@RequestBody String dataJson){ + String response = funcrionService.achieve(dataJson); + return response; + } + + + @RequestMapping(value = "/hello", method = RequestMethod.GET) + @ResponseBody + public String hello(String param, String token) { + return "123"; + } +} diff --git a/src/main/java/com/bfd/function/entity/Constants.java b/src/main/java/com/bfd/function/entity/Constants.java new file mode 100644 index 0000000..16be4d9 --- /dev/null +++ b/src/main/java/com/bfd/function/entity/Constants.java @@ -0,0 +1,193 @@ +package com.bfd.function.entity; + + +/** + * 常量实体类 + * + * @author jian.mao + * @date 2022年11月15日 + * @description + */ +public class Constants { + + /*************************蓝图常量key名称*********************************/ + public final static String SCHEDULING = "scheduling"; + public final static String TYPE = "type"; + public final static String INTERVAL = "interval"; + public final static String CREATED = "created"; + public final static String LAST_EDIT = "last_edit"; + public final static String BLUEPRINT_ID = "blueprint_id"; + public final static String BLUEPRINTID = "blueprintId"; + public final static String BLUEPRINT_NAME = "name"; + public final static String SCENARIO = "scenario"; + public final static String AUTOCOMMITTRIGGERLAST = "autoCommitTriggerLast"; + public final static String FRESHVARIABLES = "freshVariables"; + public final static String AUTOCOMMIT = "autoCommit"; + public final static String MAXERRORS = "maxErrors"; + public final static String DATALOSS = "dataloss"; + public final static String POSITION = "position"; + public final static String SCENES_ID = "scenes_id"; + public final static String SCENESID = "scenesId"; + public final static String MULTI_BRANCH = "multi_branch"; + + public final static String SINGLE = "single"; + /** + * 已重试次数 + **/ + public final static String ERROR_TIME = "error_time"; + public final static String PREVIOUS_RESULT = "previous_result"; + + /****数据id*****/ + public final static String BUSINESSKEY = "businessKey"; + + + /*************************metadata常量key名称*********************************/ + public final static String LABEL_COL = "label_col"; + public final static String LABEL = "label"; + public final static String USER = "user"; + public final static String ADMIN = "admin"; + public final static String ADDRESS = "address"; + public final static String DATASOURCE = "datasource"; + public final static String INDEX = "index"; + + /*************************app常量key名称*********************************/ + public final static String APPS = "apps"; + public final static String TRANSFER_ID = "transfer_id"; + public final static String MODULE = "module"; + public final static String VERSION = "version"; + public final static String METADATA = "metadata"; + public final static String APP_NAME = "name"; + public final static String DESCRIBE = "describe"; + public final static String NEXT_APP_ID = "next_app_id"; + public final static String EDGE_ID = "edge_id"; + public final static String START_ID = "start_id"; + public final static String END_ID = "end_id"; + + public final static String WAIT_CONDITION = "wait_condition"; + public final static String START_TAG = "start_tag"; + + /*************************module类型*********************************/ + public final static String FILE = "file"; + public final static String OCR = "OCR"; + public final static String FILTER = "Filter"; + public final static String CHATGPT = "ChatGPT"; + public final static String MYSQL = "mysql"; + + /*************************other类型*********************************/ + public final static String UNDERLINE = "_"; + public final static String RESULT_TOPIC = null; + public static final String EMPTY = ""; + public static final String HTTP = "http"; + public static final String REQUEST_ERROR_MESSAGE = "Download failed error is"; + public static final String REQUEST_RESULT = "result"; + public static final String REQUEST_RESULT_RESULTS = "results"; + public static final String MAP_TYPE = "Map"; + public static final String LIST_TYPE = "List"; + public static final String STRING_TYPE = "String"; + public static final String DOCUMENT_TYPE = "doc"; + public static final String FILTER_ZH = "过滤器"; + + public static final String JSON_SELE_SYMBOL = "$."; + public static final String LEFT_BRACKETS = "["; + public static final String RIGTH_BRACKETS = "]"; + public static final String TASKTYPE = "taskType"; + public static final Integer USER_TYPE = 1; + public static final Integer KEYWORD_TYPE = 0; + public static final Integer DETAIL_TYPE = 2; + public static final String CID = "cid"; + public static final String SITETYPE = "siteType"; + public static final Integer DEFULT_SUBJECTID = 304864; + public static final Integer DEFULT_CRAWLCYCLICITYTIME = 1440; + public static final String CRAWLENDTIME = "crawlEndTime"; + public static final String CRAWLSTARTTIME = "crawlStartTime"; + public static final String CRAWLPAGETYPES = "crawlPageTypes"; + public static final String APPID = "113ic"; + public static final String APP_ID = "appId"; + public final static String ID = "id"; + public static final Integer DEFULT_CRAWLPERIODHOUR = 24; + public static final String CREATEUSERID = "662015832180933762"; + public static final String CRAWL_ADD_URL = "https://caiji.percent.cn/api/crawl/remote/task/save"; + public static final String CRAWLKEYWORD = "crawlKeyword"; + public static final String ATTACHTAG = "attachTag"; + public static final String ATTACHTAG_VALUE = "analyze"; + public static final String KEYWORD = "keyword"; + public static final String SITEID = "siteId"; + public static final String RESULTS = "results"; + public static final String RESULT = "result"; + public static final String CRAWLDATAFLAG = "crawlDataFlag"; + public static final String CRAWLDATAFLAG_PREFIX = "\"crawlDataFlag\":\"keyword:"; + public static final String TID = "tid"; + public static final Long TIME_OUT = 1800000L; + public static final String ATTR = "attr"; + public static final String HASVIDEO = "hasVideo"; + public static final String CRAWL_END_MARK = "crawl_end_mark"; + public static final String CRAWL_END_MESSAGE = "crawl_end_message"; + public static final String CRAWL_END_MESSAGE_VALUE = "数据采集完成"; + public static final String SUBJECTID = "subjectId"; + public static final String TASKID = "taskId"; + public static final int SUCCESS_CODE = 200; + public static final String WEB_URL_SUFFIX = "/api/aogeo/api/cda/caiji/status"; + public static final String STATUS = "status"; + /************************redis*************************************/ + public static final String LOCK_KEY = "myLock"; + public static final long LOCK_EXPIRE_TIME = 300000; + + /************************应用参数*************************************/ + public static final String CODE = "code"; + public static final String MESSAGE = "message"; + public static final String INPUT = "input"; + public static final String OUTPUT = "output"; + public static final String FORM = "form"; + public static final String FIELD = "field"; + public static final String VALUE = "value"; + public static final String DATA = "data"; + public static final String COLON_EN = ":"; + public static final String DATABASE = "database"; + public static final String TABLE = "table"; + public static final String USERNAME = "username"; + public static final String PASSWORD = "password"; + public static final String PORT = "port"; + public static final String HOSTNAME = "hostname"; + public static final String DATATYPE = "dataType"; + public static final String RULES = "rules"; + public static final String GENID = "genId"; + public static final String KEY = "key"; + public static final String DATAID = "dataId"; + public static final String CLASSIFY = "classify"; + public static final String SOURCE_DATA_ID = "source_data_id"; + public static final String APP_CODE = "app_code"; + public static final String SUBJECT_ID = "subject_id"; + public static final String PARAM = "param"; + public static final String PROCESSING = "processing"; + public static final String REGEX = "regex"; + public static final String SPLIT = "split"; + public static final String NEW_CHAR = "new_char"; + public static final String OLD_CHAR = "old_char"; + public static final String SPLIC = "splic"; + public static final String SIZE = "size"; + public static final String CONTENT = "content"; + public static final String ISSEND = "isSend"; + public static final String ISSPLIT = "isSplit"; + public static final String MERGETIMEMILLIS = "mergeTimeMillis"; + public static final String TASK = "task"; + public static final String ISTIMEOUT = "isTimeOut"; + public static final String TIMEOUT_MERGE_RESULT = "timeoutMergeResult"; + public static final String BULID_TYPE = "bulid_type"; + public static final String GENERICS = "generics"; + public static final String PARSE_TYPE = "parse_type"; + public static final String KEYS = "keys"; + public static final String ISJSONSTRING = "isJsonString"; + /** + * 不需要DataUtil解析的Key + */ + public static final String NOT_KEY = ":$"; + public static final String ENCRYPT_TYPE = "encrypt_type"; + public static final String ENCRYPT_MODE = "mode"; + public static final String FIELD_MAPPING = "field_mapping"; + public static final String BUILD_DATA = "build_data"; + public static final String OUTPUT_TYPE = "output_type"; + + public static final String TRACE = "trace"; + public static final String ISLAST = "isLast"; + public static final String IS_DIFFUSION = "is_diffusion"; +} diff --git a/src/main/java/com/bfd/function/handler/MainHandler.java b/src/main/java/com/bfd/function/handler/MainHandler.java new file mode 100644 index 0000000..b44b3ca --- /dev/null +++ b/src/main/java/com/bfd/function/handler/MainHandler.java @@ -0,0 +1,151 @@ +package com.bfd.function.handler; + +import java.io.File; +import java.io.IOException; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.concurrent.LinkedBlockingDeque; + +import javax.annotation.Resource; + +import lombok.extern.slf4j.Slf4j; + +import org.apache.commons.io.FileUtils; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.boot.ApplicationArguments; +import org.springframework.boot.ApplicationRunner; +import org.springframework.core.annotation.Order; +import org.springframework.stereotype.Component; + +import com.alibaba.fastjson.JSONObject; +import com.bfd.function.cache.ConfigCache; +import com.bfd.function.process.FunctionAchievePorcess; +import com.bfd.function.process.MonitorMergeDataProcess; +import com.bfd.function.utils.FileUtil; + + +/** + * 启动处理入口 + * @author jian.mao + * @date 2023年11月3日 + * @description + */ + +@Component +@Order(value = 1) +@Slf4j +public class MainHandler implements ApplicationRunner { + + @Value("${task.task-queue-path}") + private String taskPath; + @Value("${merge.data-path}") + private String mergeDataPath; + @Resource + private FunctionAchievePorcess functionAchievePorcess; + @Resource + private MonitorMergeDataProcess monitorMergeDataProcess; + @Override + public void run(ApplicationArguments args) throws Exception { + //读取合并数据容器 + readMergeDataMap(mergeDataPath,ConfigCache.mergeDataMap); + log.info("开启数据处理线程-----"); + new Thread(functionAchievePorcess).start(); + log.info("开启合并超时处理线程----"); + new Thread(monitorMergeDataProcess).start(); + //停止处理 + waitDown(); + //启动加载缓存任务 + readTask(taskPath,ConfigCache.taskQueue); + + } + + + + + @SuppressWarnings("unchecked") + public static void readMergeDataMap(String path,Map map){ + File file = new File(path); + if(file.exists()){ + List tasks = null; + try { + tasks = FileUtils.readLines(file,"UTF-8"); + } catch (IOException e) { + e.printStackTrace(); + } + for (String task : tasks) { + Map loadCacheMap = JSONObject.parseObject(task); + for (Entry entry : loadCacheMap.entrySet()) { + map.put(entry.getKey(), entry.getValue()); + } + } + file.delete(); + } + } + + @SuppressWarnings("unchecked") + public static void readTask(String path,LinkedBlockingDeque> queue){ + File file = new File(path); + if(file.exists()){ + List tasks = null; + try { + tasks = FileUtils.readLines(file,"UTF-8"); + } catch (IOException e) { + e.printStackTrace(); + } + for (String taskStr : tasks) { + Map task = JSONObject.parseObject(taskStr); + try { + queue.put(task); + } catch (InterruptedException e) { + e.printStackTrace(); + } + } + file.delete(); + } + } + /** + * 结束触发钩子 + */ + public void waitDown() { + Runtime.getRuntime().addShutdownHook(new Thread() { + @Override + public void run() { + // 停止线程 + ConfigCache.isStart = false; + log.info("stop-------"); + writeTsskToFile(); + + } + }); + } + + + /** + * 任务持久化到硬盘 + */ + public void writeTsskToFile(){ + while(true){ + if(ConfigCache.taskQueue.size() > 0 ){ + try { + Map task = ConfigCache.taskQueue.take(); + FileUtil.writeFile(taskPath, JSONObject.toJSONString(task)); + } catch (InterruptedException e) { + e.printStackTrace(); + } + }else{ + log.info("taskQueue write is file end"); + break; + } + } + } + /** + * 任务持久化到硬盘 + */ + public void writeMergeMapToFile(){ + if(!ConfigCache.mergeDataMap.isEmpty()){ + FileUtil.writeFile(mergeDataPath, JSONObject.toJSONString(ConfigCache.mergeDataMap)); + } + log.info("mergeMapp write is file end"); + } +} diff --git a/src/main/java/com/bfd/function/process/FunctionAchievePorcess.java b/src/main/java/com/bfd/function/process/FunctionAchievePorcess.java new file mode 100644 index 0000000..a2c5710 --- /dev/null +++ b/src/main/java/com/bfd/function/process/FunctionAchievePorcess.java @@ -0,0 +1,437 @@ +package com.bfd.function.process; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.UUID; +import java.util.Map.Entry; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import com.alibaba.fastjson2.JSON; +import com.alibaba.fastjson2.JSONArray; +import com.bfd.function.utils.*; + +import lombok.extern.slf4j.Slf4j; + +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Component; + +import com.alibaba.fastjson.JSONObject; +import com.bfd.function.cache.ConfigCache; +import com.bfd.function.entity.Constants; + +/** + * @author jian.mao + * @date 2024年2月22日 + * @description + */ +@Component +@Slf4j +public class FunctionAchievePorcess implements Runnable { + @Autowired + private SpringBootKafka springBootKafka; + @Value("${customize-kafka.producer.topic}") + private String topic; + + private static final int REGULAR_MATCH_TYPE = 1; + private static final int SPLIT_TYPE = 2; + private static final int REGULAR_REPLACE_TYPE = 3; + private static final int STRING_REPLACE_TYPE = 4; + private static final int STRING_SPLIC_TYPE = 5; + private static final int DATA_BUILD_TYPE = 6; + private static final int DATA_PARSE_TYPE = 7; + private static final int BUILDORPARSE_LIST_TYPE = 1; + private static final int BUILDORPARSE_MAP_TYPE = 2; + private static final int IS_JSON_STRING = 1; + private static final int GENERICS_MAP_TYPE = 1; + private static final int GENERICS_STRING_TYPE = 2; + private static final int DATA_ENCRYPT_TYPE = 8; + private static final int ENCRYPT_MD5_TYPE = 1; + private static final int ENCRYPT_BASE64_TYPE = 2; + private static final int ENCRYPT_WHOLE_TYPE = 1; + private static final int ENCRYPT_ELEMENT_TYPE = 2; + private static final int PARSE_JSON_TYPE = 9; + private static final int BUILD_JSON_TYPE = 10; + private static final int ARRAY_ITERATE_TYPE = 11; + private static final Object LOCK = new Object(); + + @Override + public void run() { + while (ConfigCache.isStart) { + Map task = null; + try { + task = ConfigCache.taskQueue.take(); + log.info("任务队列长度:{}", ConfigCache.taskQueue.size()); + //输入字段 + Map input = (Map) task.get(Constants.INPUT); + //输出字段 + Map output = (Map) task.get(Constants.OUTPUT); + //data + Map data = (Map) task.get(Constants.DATA); + //businessKey + String businessKey = (String) task.get(Constants.BUSINESSKEY); + //businessKey + String appCode = (String) task.get(Constants.APP_CODE); + //获取字段value + Object field = null; + if (input.containsKey(Constants.FIELD)) { + field = DataUtil.getValue((String) input.get(Constants.FIELD), data); + } + //获取处理过程 + List> processing = (List>) input.get(Constants.PROCESSING); + //字段处理 + String mergeId = businessKey + appCode; + Map processResult = null; + if (task.containsKey(Constants.TIMEOUT_MERGE_RESULT + appCode)) { + //合并超时 生产 + processResult = (Map) task.get(Constants.TIMEOUT_MERGE_RESULT + appCode); + } else { + processResult = fieldHandling(field, processing, mergeId, task); + } + if (!(boolean) processResult.get(Constants.ISSEND)) { + //合并类型 没完成不进行发送 + continue; + } + //结果集 + Map result = new HashMap(16); + Map results = new HashMap(16); + //分两种发送 + if (processResult.get(Constants.DATATYPE).equals(Constants.LIST_TYPE)) { + //切割类型逐条发送 + List fieldValues = (List) processResult.get(Constants.VALUE); + for (int i = 0; i < fieldValues.size(); i++) { + Object value = fieldValues.get(i); + //保证总调度的es结果显示不被消重,需要每条记录重置dataid + task.put(Constants.DATAID, UUID.randomUUID().toString()); + //遍历入库返回结果,拼接响应内容 + for (String key : processResult.keySet()) { + if (output.containsKey(key)) { + if (key.equals(Constants.VALUE)) { + results.put(key, value); + } else { + results.put(key, processResult.get(key)); + } + } + } + //id生成 + processResult.put(Constants.ID, UUID.randomUUID().toString()); + //最后一条数据体加标识 + if (i == (fieldValues.size()-1)){ + results.put(Constants.ISLAST, 1); + } + //遍历入库返回结果,拼接响应内容 + result.put(Constants.RESULTS, JSONObject.toJSONString(results)); + task.put(Constants.RESULT, result); + //扩散类型标识 + task.put(Constants.IS_DIFFUSION, true); + //先生成对象,避免线程不安全的问题 + String response = JSONObject.toJSONString(task); + //发送kafka + springBootKafka.send(topic, response); + log.info("数据流转至下游-------"); + if(task.containsKey(Constants.TRACE) && (boolean)task.get(Constants.TRACE)){ + log.info("调试模式任务,发送一条跳出----"); + break; + } + } + } else { + //其他直接发送即可 + //遍历入库返回结果,拼接响应内容 + for (String key : processResult.keySet()) { + if (output.containsKey(key)) { + results.put(key, processResult.get(key)); + } + } + //遍历入库返回结果,拼接响应内容 + results.put(Constants.ISLAST, 1); + result.put(Constants.RESULTS, JSONObject.toJSONString(results)); + result.put(Constants.MESSAGE, "成功"); + result.put(Constants.STATUS, 1); + task.put(Constants.RESULT, result); + //发送kafka + springBootKafka.send(topic, JSONObject.toJSONString(task)); + log.info("数据流转至下游-------"); + } + } catch (Exception e) { + log.error("结果组装异常,", e); + //结果集 + Map result = new HashMap(16); + Map results = new HashMap(16); + //遍历入库返回结果,拼接响应内容 + results.put(Constants.CONTENT, e.getMessage()); + results.put(Constants.ISLAST, 1); + result.put(Constants.RESULTS, JSONObject.toJSONString(results)); + result.put(Constants.MESSAGE, "异常"); + result.put(Constants.STATUS, 2); + task.put(Constants.RESULT, result); + //发送kafka + springBootKafka.send(topic, JSONObject.toJSONString(task)); + log.info("数据流转至下游-------"); + } + + } + } + + /** + * @param field 被处理的字段 + * @param processing 处理过程(可多个) + * @param mergeId 合并id + * @return + */ + private Map fieldHandling(Object field, List> processing, String mergeId, Map task) { + Map processResult = new HashMap(16); + processResult.put(Constants.DATATYPE, Constants.STRING_TYPE); + //结果发送标识 + processResult.put(Constants.ISSEND, true); + Object newField = field; + for (Map map : processing) { + int type = (int) map.get(Constants.TYPE); + if (type == REGULAR_MATCH_TYPE) { + //正则匹配 + String regex = (String) map.get(Constants.REGEX); + Pattern pattern = Pattern.compile(regex); + Matcher matcher = pattern.matcher(newField.toString()); + if (matcher.find()) { + newField = matcher.group(); + } else { + newField = Constants.EMPTY; + } + } else if (type == SPLIT_TYPE) { + //split 切割 + String split = (String) map.get(Constants.SPLIT); + List list = Arrays.asList(newField.toString().split(split)); + newField = list; + break; + } else if (type == REGULAR_REPLACE_TYPE) { + //正则替换 + String regex = (String) map.get(Constants.REGEX); + String new_char = (String) map.get(Constants.NEW_CHAR); + newField = newField.toString().replaceAll(regex, new_char); + } else if (type == STRING_REPLACE_TYPE) { + //普通字符串替换 + String old_char = (String) map.get(Constants.OLD_CHAR); +// log.info("原始内容:" + old_char); + String new_char = (String) map.get(Constants.NEW_CHAR); +// log.info("被替换的内容:" + new_char); +// log.info("替换前的字符串:" + newField); + newField = newField.toString().replace(old_char, new_char); +// log.info("被替换后的字符串:" + newField); + } else if (type == STRING_SPLIC_TYPE) { + synchronized (LOCK) { + //内容合并 + Map mergeTask = null; + String splic = (String) map.get(Constants.SPLIC); + int size = Integer.valueOf(map.get(Constants.SIZE).toString()); + StringBuffer mergeContent = null; + if (ConfigCache.mergeDataMap.containsKey(mergeId)) { + mergeTask = (Map) ConfigCache.mergeDataMap.get(mergeId); + size = (int) mergeTask.get(Constants.SIZE); + log.info("累加数据合并,当前合并条数:{},合并id:{}", size, mergeId); + mergeContent = (StringBuffer) mergeTask.get(Constants.CONTENT); + mergeContent.append(splic); + mergeContent.append(newField); + } else { + log.info("数据合并首条:{}", mergeId); + mergeTask = new HashMap(16); + mergeContent = new StringBuffer(); + mergeContent.append(newField); + } + if (--size <= 0) { + //拼接结束 + log.info("数据合并结束:{}", mergeId); + newField = mergeTask.get(Constants.CONTENT).toString(); + ConfigCache.mergeDataMap.remove(mergeId); + } else { + mergeTask.put(Constants.SIZE, size); + mergeTask.put(Constants.CONTENT, mergeContent); + mergeTask.put(Constants.MERGETIMEMILLIS, System.currentTimeMillis()); + mergeTask.put(Constants.TASK, task); + ConfigCache.mergeDataMap.put(mergeId, mergeTask); + processResult.put(Constants.ISSEND, false); + } + } + } else if (type == DATA_BUILD_TYPE) { + //data + Map dataSource = (Map) task.get(Constants.DATA); + //数据组装 + int bulidType = (int) map.get(Constants.BULID_TYPE); + if (bulidType == BUILDORPARSE_LIST_TYPE) { + //组装为list + int generics = (int) map.get(Constants.GENERICS); + if (generics == GENERICS_MAP_TYPE) { + //泛型 是map + List data = (List) map.get(Constants.DATA); + for (Map item : data) { + for (Object key : item.keySet()) { + Object beforeValue = item.get(key); + Object value = DataUtil.getValue(beforeValue.toString(), dataSource); + item.put(key, value); + } + } + newField = data; + } else if (generics == GENERICS_STRING_TYPE) { + //泛型 是string + List data = (List) map.get(Constants.DATA); + List results = new ArrayList(); + for (String item : data) { + Object value = DataUtil.getValue(item, dataSource); + results.add(value.toString()); + } + newField = results; + } + } else if (bulidType == BUILDORPARSE_MAP_TYPE) { + Map data = (Map) map.get(Constants.DATA); + for (Object key : data.keySet()) { + Object beforeValue = data.get(key); + Object value = DataUtil.getValue(beforeValue.toString(), dataSource); + data.put(key, value); + } + newField = data; + } + } else if (type == DATA_PARSE_TYPE) { + //数据组装 + int parseType = (int) map.get(Constants.PARSE_TYPE); + if (parseType == BUILDORPARSE_LIST_TYPE) { + //组装为list + int generics = (int) map.get(Constants.GENERICS); + processResult.put(Constants.DATATYPE, Constants.LIST_TYPE); + int isJsonString = (int) map.get(Constants.ISJSONSTRING); + if (generics == GENERICS_MAP_TYPE) { + List> results = new ArrayList>(); + //泛型map + List keys = (List) map.get(Constants.KEYS); + if (isJsonString == IS_JSON_STRING) { + newField = JSON.parse((String) newField); + } + for (Map item : (List>) newField) { + Map parseResult = new HashMap(16); + for (String key : keys) { + if (item.containsKey(key)) { + parseResult.put(key, item.get(key)); + } + } + results.add(parseResult); + } + + newField = results; + + } else if (generics == GENERICS_STRING_TYPE) { + List results = new ArrayList(); + //泛型 String + for (String item : (List) newField) { + results.add(item); + } + newField = results; + } + + } else if (parseType == BUILDORPARSE_MAP_TYPE) { + List keys = (List) map.get(Constants.KEYS); + Map parseResult = new HashMap(16); + for (String key : keys) { + if (((Map) newField).containsKey(key)) { + parseResult.put(key, ((Map) newField).get(key)); + } + } + newField = parseResult; + } + } else if (type == DATA_ENCRYPT_TYPE) { + int mode = (int) map.get(Constants.ENCRYPT_MODE); + //字段加密 + int encrypTtype = (int) map.get(Constants.ENCRYPT_TYPE); + if (mode == ENCRYPT_WHOLE_TYPE) { + if (ENCRYPT_MD5_TYPE == encrypTtype) { + newField = EncryptionUtil.md5(newField.toString()); + } else if (ENCRYPT_BASE64_TYPE == encrypTtype) { + newField = EncryptionUtil.base64(newField.toString()); + } + } else { + if (newField instanceof List) { + List resultList = new ArrayList(); + for (Object o : (List) newField) { + if (ENCRYPT_MD5_TYPE == encrypTtype) { + resultList.add(EncryptionUtil.md5(o.toString())); + } else if (ENCRYPT_BASE64_TYPE == encrypTtype) { + resultList.add(EncryptionUtil.base64(o.toString())); + } + } + newField = resultList; + } else if (newField instanceof String) { + String s = JsonUtil.checkJsonType((String) newField); + if (!s.equals(Constants.LIST_TYPE)) { + newField = "数据类型或格式错误"; + } else { + List resultList = new ArrayList(); + JSONArray objects = JSON.parseArray((String) newField); + for (Object o : objects) { + if (ENCRYPT_MD5_TYPE == encrypTtype) { + resultList.add(EncryptionUtil.md5(o.toString())); + } else if (ENCRYPT_BASE64_TYPE == encrypTtype) { + resultList.add(EncryptionUtil.base64(o.toString())); + } + } + newField = resultList; + } + + } else { + newField = "数据类型或格式错误"; + } + } + }else if (type == PARSE_JSON_TYPE){ + //json解析 + if (newField instanceof String){ + //字符串 先转对象 + Map filedMap = JSONObject.parseObject((String)newField); + processResult.putAll(filedMap); + }else{ + //本身是对象无需转换 + Map filedMap = (Map) newField; + processResult.putAll(filedMap); + } + String id = UUID.randomUUID().toString(); + processResult.put(Constants.ID, id); + return processResult; + }else if(type == BUILD_JSON_TYPE){ + //data + Map dataSource = (Map) task.get(Constants.DATA); + Map buildData = (Map) map.get(Constants.BUILD_DATA); + Map result = new HashMap(16); + for (Entry entry: buildData.entrySet()) { + Object value = DataUtil.getValue(entry.getValue(),dataSource); + result.put(entry.getKey(), value); + } + int output_type = (int) map.get(Constants.OUTPUT_TYPE); + if(output_type == 1){ + newField = result; + }else if(output_type == 3){ + newField = JSONObject.toJSONString(result); + }else{ + List> resultList = new ArrayList>(); + for (Entry entry : result.entrySet()) { + Map item = new HashMap(16); + item.put(Constants.KEY, entry.getKey()); + item.put(Constants.VALUE, entry.getValue()); + resultList.add(item); + } + newField = resultList; + } + }else if(type == ARRAY_ITERATE_TYPE){ + if (newField instanceof String){ + //字符串 先转对象 + newField = JSONObject.parseArray((String)newField, Object.class); + } + //赋值迭代类型 + processResult.put(Constants.DATATYPE, Constants.LIST_TYPE); + } + } + String id = UUID.randomUUID().toString(); + processResult.put(Constants.ID, id); + processResult.put(Constants.VALUE, newField); + return processResult; + } + + +} \ No newline at end of file diff --git a/src/main/java/com/bfd/function/process/MonitorMergeDataProcess.java b/src/main/java/com/bfd/function/process/MonitorMergeDataProcess.java new file mode 100644 index 0000000..f50f2d9 --- /dev/null +++ b/src/main/java/com/bfd/function/process/MonitorMergeDataProcess.java @@ -0,0 +1,74 @@ +package com.bfd.function.process; + +import java.util.HashMap; +import java.util.Map; +import java.util.UUID; +import java.util.Map.Entry; + +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Component; + +import com.bfd.function.cache.ConfigCache; +import com.bfd.function.entity.Constants; +import com.bfd.function.utils.DateUtil; + +import lombok.extern.slf4j.Slf4j; + + +/** + * 监控数据合并容器 + * @author jian.mao + * @date 2024年3月21日 + * @description + */ +@Slf4j +@Component +public class MonitorMergeDataProcess implements Runnable{ + @Value("${merge.time}") + private Long timeOut; + @Override + public void run() { + while(ConfigCache.isStart){ + for (Entry entry : ConfigCache.mergeDataMap.entrySet()) { + String key = entry.getKey(); + Map value = (Map) entry.getValue(); + Long mergeTimeMillis = (Long) value.get(Constants.MERGETIMEMILLIS); + Long currentTimeMillis = System.currentTimeMillis(); + if(currentTimeMillis - mergeTimeMillis >= timeOut){ + log.info("合并数据等待超时,发送未统计完成的结果。合并标识:{}",key); + //超过过期时间了释放合并的数据 + Map task = biuldTask((Map) value.get(Constants.TASK)); + String appCode = task.get(Constants.APP_CODE).toString(); + Map processResult = new HashMap(16); + processResult.put(Constants.DATATYPE, Constants.STRING_TYPE); + String id = UUID.randomUUID().toString(); + processResult.put(Constants.ID, id); + processResult.put(Constants.VALUE, value.get(Constants.CONTENT).toString()); + //结果发送标识 + processResult.put(Constants.ISSEND, true); + task.put(Constants.TIMEOUT_MERGE_RESULT+appCode, processResult); + try { + ConfigCache.taskQueue.put(task); + } catch (InterruptedException e) { + e.printStackTrace(); + } + ConfigCache.mergeDataMap.remove(key); + } + } + DateUtil.sleep(30*1000); + } + } + + /** + * 任务重新构建,避免引用对象是同一个导致线程不安全 + * @param task + * @return + */ + private Map biuldTask(Map task){ + Map newTask = new HashMap(16); + for (Entry entry : task.entrySet()) { + newTask.put(entry.getKey(), entry.getValue()); + } + return newTask; + } +} diff --git a/src/main/java/com/bfd/function/service/FuncrionService.java b/src/main/java/com/bfd/function/service/FuncrionService.java new file mode 100644 index 0000000..eb1160b --- /dev/null +++ b/src/main/java/com/bfd/function/service/FuncrionService.java @@ -0,0 +1,17 @@ +package com.bfd.function.service; + +/** + * 数据聚合服务层接口 + * @author jian.mao + * @date 2024年1月16日 + * @description + */ +public interface FuncrionService { + + /** + * 聚合方法 + * @param dataJson + * @return + */ + public String achieve(String dataJson); +} diff --git a/src/main/java/com/bfd/function/service/impl/FuncrionServiceImpl.java b/src/main/java/com/bfd/function/service/impl/FuncrionServiceImpl.java new file mode 100644 index 0000000..dbdeb31 --- /dev/null +++ b/src/main/java/com/bfd/function/service/impl/FuncrionServiceImpl.java @@ -0,0 +1,55 @@ +package com.bfd.function.service.impl; + +import java.util.HashMap; +import java.util.Map; + +import lombok.extern.slf4j.Slf4j; + +import org.springframework.stereotype.Service; + +import com.alibaba.fastjson.JSONObject; +import com.bfd.function.cache.ConfigCache; +import com.bfd.function.entity.Constants; +import com.bfd.function.service.FuncrionService; + +/** + * @author jian.mao + * @date 2024年2月22日 + * @description + */ +@Service +@Slf4j +public class FuncrionServiceImpl implements FuncrionService { + + @Override + public String achieve(String dataJson) { + // TODO Auto-generated method stub + Map response = new HashMap<>(16); + int code = 200; + String message = "success"; + Map task = null; + try { + task = JSONObject.parseObject(dataJson); + } catch (Exception e) { + log.error("参数结构不合法,",e); + code = 100010; + message = "参数不合法"; + } + //写入队列 + try { + if(task.containsKey(Constants.TRACE) && (boolean)task.get(Constants.TRACE)){ + ConfigCache.taskQueue.putFirst(task); + }else{ + ConfigCache.taskQueue.put(task); + } + } catch (InterruptedException e) { + log.error("任务写入等待队列异常,",e); + code = 100011; + message = "任务写入等待队列失败"; + } + response.put(Constants.CODE,code); + response.put(Constants.MESSAGE,message); + return JSONObject.toJSONString(response); + } + +} diff --git a/src/main/java/com/bfd/function/utils/DataUtil.java b/src/main/java/com/bfd/function/utils/DataUtil.java new file mode 100644 index 0000000..2e42e6d --- /dev/null +++ b/src/main/java/com/bfd/function/utils/DataUtil.java @@ -0,0 +1,65 @@ +package com.bfd.function.utils; + +import java.util.Map; + +import lombok.extern.slf4j.Slf4j; + +import com.alibaba.fastjson.JSON; +import com.alibaba.fastjson.JSONObject; +import com.alibaba.fastjson.JSONPath; +import com.bfd.function.entity.Constants; + +/** + * @author:jinming + * @className:DataUtil + * @version:1.0 + * @description: 获取dataValue的值 + * @Date:2023/11/1 9:54 + */ +@Slf4j +public class DataUtil { + /** + * @param key 传入的key + * @param dataMap 数据map + * @return 根据传入的参数进行判断解析,返回正确的dataValue + */ + public static Object getValue(String key, Map dataMap) { + try { + //公式为空直接就返回 + if (key.equals(Constants.EMPTY)) { + return Constants.EMPTY; + } + if (!key.contains(Constants.NOT_KEY)) { + return key; + } + Object dataValue; + String isJson = "#json#"; + if (key.contains(isJson)) { + //进行第一次拆分,获取#json#前面的部分 + String[] keySplit = key.split(isJson); + String firstDataKey = keySplit[0]; + String[] firstDataKeySplit = firstDataKey.split(":"); + //取出前半部分对应的JSON数据并转换为JSONObject + String dataJson = (String) dataMap.get(firstDataKeySplit[0]); + JSONObject dataJsonObject = JSON.parseObject(dataJson); + //根据key的后半部分取出对应JSONObject中的值 + String firstDataKeyJson = (String) JSONPath.eval(dataJsonObject, firstDataKeySplit[1]); + String secDataKey = keySplit[1]; + JSONObject firstDataJsonObject = JSON.parseObject(firstDataKeyJson); + dataValue = JSONPath.eval(firstDataJsonObject, secDataKey); + return dataValue; + } + String[] keySplit = key.split(":"); + String jsonPath = keySplit[1]; + String dataJson = (String) dataMap.get(keySplit[0]); + JSONObject dataJsonObject = JSON.parseObject(dataJson); + dataValue = JSONPath.eval(dataJsonObject, jsonPath); + return dataValue; + } catch (Exception e) { + // TODO: handle exception + log.error("jsonpath公式取值异常,", e); + return null; + } + + } +} \ No newline at end of file diff --git a/src/main/java/com/bfd/function/utils/DateUtil.java b/src/main/java/com/bfd/function/utils/DateUtil.java new file mode 100644 index 0000000..cbc78f2 --- /dev/null +++ b/src/main/java/com/bfd/function/utils/DateUtil.java @@ -0,0 +1,177 @@ +package com.bfd.function.utils; + + +import java.math.BigInteger; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; +import java.util.Date; + +import lombok.extern.slf4j.Slf4j; + +import com.alibaba.fastjson.JSON; +import com.alibaba.fastjson.JSONObject; + +/** + * 日期工具类 + * + * @author jian.mao + * @date 2022年11月15日 + * @description + */ +@Slf4j +public class DateUtil { + + /** + * @return + */ + public static String getTimeStrForNow() { + SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddHH"); + return sdf.format(new Date()); + } + + + public static String getTimeStrForDay(long time) { + SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd"); + + return sdf.format(new Date(time * 1000)); + } + + public static String getTimeStrForDay() { + SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd"); + + return sdf.format(new Date()); + } + + + public static String getDateTime() { + SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + String time = sdf.format(new Date()); + return time; + } + + public static String getDateTime(Long timestap) { + + SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + String time = sdf.format(new Date(timestap)); + return time; + } + + public static String getDate(Long timestap) { + + SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd"); + String time = sdf.format(new Date(timestap)); + return time; + } + + public static String getDateTimeForMonth() { + SimpleDateFormat sdf = new SimpleDateFormat("yyyyMM"); + String time = sdf.format(new Date()); + return time; + } + + /** + * 休眠 + * + * @param millis 毫秒 + */ + public static void sleep(long millis) { + try { + Thread.sleep(millis); + } catch (InterruptedException e) { + e.printStackTrace(); + } + } + + /** + * 1. @Description:时间戳转时间 + * 2. @Author: ying.zhao + * 3. @Date: 2023/3/28 + */ + + public static String timestampToDate(String time) { + int thirteen = 13; + int ten = 10; + SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); +// if (time.length() == thirteen) { + if (time.length() > ten) { + return sdf.format(new Date(Long.parseLong(time))); + } else { + return sdf.format(new Date(Integer.parseInt(time) * 1000L)); + } + } + + public static String parseCreated(String jsonTime){ + String formattedDateTime = getDateTime(); + try { + // 使用fastjson解析JSON数据 + JSONObject jsonObject = JSON.parseObject(jsonTime); + // 获取日期和时间的值 + JSONObject dateObject = jsonObject.getJSONObject("date"); + int day = dateObject.getIntValue("day"); + int month = dateObject.getIntValue("month"); + int year = dateObject.getIntValue("year"); + + JSONObject timeObject = jsonObject.getJSONObject("time"); + int hour = timeObject.getIntValue("hour"); + int minute = timeObject.getIntValue("minute"); + int second = timeObject.getIntValue("second"); + + // 创建LocalDateTime对象 + LocalDateTime dateTime = LocalDateTime.of(year, month, day, hour, minute, second); + + // 定义日期时间格式化器 + DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"); + + // 格式化日期时间 + formattedDateTime = dateTime.format(formatter); + } catch (Exception e) { + log.info("日期转换失败:{}",e); + } + return formattedDateTime; + } + + /** + * 字符串转换日期 + * @param format + * @param date + * @return + */ + public static Date strToDate(String format,String date){ + SimpleDateFormat sdf = new SimpleDateFormat(format); + if (date == null || date.equals("")){ + return new Date(); + }else{ + Date ru = null; + try { + ru = sdf.parse(date); + } catch (ParseException e) { + e.printStackTrace(); + } + return ru; + } + } + /** + * 日期格式话 + * @param format 日期格式 + * @param dater 要转换的日期,默认当前时间 + * @return + */ + public static String FormatDate(String format,Date date){ + String fromatDate = null; + SimpleDateFormat sdf = new SimpleDateFormat(format); + if (date == null){ + fromatDate = sdf.format(new Date()); + }else{ + fromatDate = sdf.format(date); + } + return fromatDate; + } + public static void main(String[] args) { + String time = timestampToDate("955814400000"); + System.out.println(time); + } +} diff --git a/src/main/java/com/bfd/function/utils/DownLoadUtil.java b/src/main/java/com/bfd/function/utils/DownLoadUtil.java new file mode 100644 index 0000000..bfcadf5 --- /dev/null +++ b/src/main/java/com/bfd/function/utils/DownLoadUtil.java @@ -0,0 +1,907 @@ +package com.bfd.function.utils; + +import java.io.IOException; +import java.security.KeyManagementException; +import java.security.NoSuchAlgorithmException; +import java.security.cert.CertificateException; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; + +import javax.net.ssl.SSLContext; +import javax.net.ssl.TrustManager; +import javax.net.ssl.X509TrustManager; + +import org.apache.http.HttpEntity; +import org.apache.http.HttpHost; +import org.apache.http.HttpResponse; +import org.apache.http.NameValuePair; +import org.apache.http.StatusLine; +import org.apache.http.auth.AuthScope; +import org.apache.http.auth.UsernamePasswordCredentials; +import org.apache.http.client.AuthCache; +import org.apache.http.client.ClientProtocolException; +import org.apache.http.client.CredentialsProvider; +import org.apache.http.client.HttpClient; +import org.apache.http.client.HttpRequestRetryHandler; +import org.apache.http.client.config.RequestConfig; +import org.apache.http.client.entity.UrlEncodedFormEntity; +import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.client.methods.HttpPost; +import org.apache.http.client.protocol.HttpClientContext; +import org.apache.http.config.Registry; +import org.apache.http.config.RegistryBuilder; +import org.apache.http.config.SocketConfig; +import org.apache.http.conn.socket.ConnectionSocketFactory; +import org.apache.http.conn.socket.LayeredConnectionSocketFactory; +import org.apache.http.conn.socket.PlainConnectionSocketFactory; +import org.apache.http.conn.ssl.SSLConnectionSocketFactory; +import org.apache.http.entity.StringEntity; +import org.apache.http.impl.auth.BasicScheme; +import org.apache.http.impl.client.BasicAuthCache; +import org.apache.http.impl.client.BasicCredentialsProvider; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClientBuilder; +import org.apache.http.impl.client.HttpClients; +import org.apache.http.impl.client.LaxRedirectStrategy; +import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; +import org.apache.http.message.BasicNameValuePair; +import org.apache.http.util.EntityUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.bfd.function.entity.Constants; + + + + + + + +/** + * 下载工具类 + * @author jian.mao + * @date 2023年9月19日 + * @description + */ +public class DownLoadUtil { + + private static String ua = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36"; + private final static Logger log = LoggerFactory.getLogger(DownLoadUtil.class); + /** 代理服务器(产品官网 www.16yun.cn) **/ + final static String PROXYHOST = "u270.40.tp.16yun.cn"; + final static Integer PROXYPORT = 6448; + /** 代理验证信息 **/ + final static String PROXYUSER = "16HFBVJC"; + final static String PROXYPASS = "897944"; + + private static PoolingHttpClientConnectionManager cm = null; + private static HttpRequestRetryHandler httpRequestRetryHandler = null; + private static HttpHost proxy = null; + + private static CredentialsProvider credsProvider = null; + private static RequestConfig reqConfig = null; + + static { + ConnectionSocketFactory plainsf = PlainConnectionSocketFactory + .getSocketFactory(); + LayeredConnectionSocketFactory sslsf = SSLConnectionSocketFactory + .getSocketFactory(); + + Registry registry = RegistryBuilder.create().register("http", plainsf) + .register("https", sslsf).build(); + + cm = new PoolingHttpClientConnectionManager(registry); + cm.setMaxTotal(20); + cm.setDefaultMaxPerRoute(5); + + proxy = new HttpHost(PROXYHOST, PROXYPORT, "https"); + + credsProvider = new BasicCredentialsProvider(); + credsProvider.setCredentials(AuthScope.ANY, + new UsernamePasswordCredentials(PROXYUSER, PROXYPASS)); + + reqConfig = RequestConfig.custom().setConnectionRequestTimeout(5000) + .setConnectTimeout(5000).setSocketTimeout(5000) + .setExpectContinueEnabled(false) + .setProxy(new HttpHost(PROXYHOST, PROXYPORT)).build(); + } + + /** + * 模拟客户端get请求 + * + * @param url + * 模拟请求得url + * @param headers + * 头部信息,没有可以不传 + * @return + */ + @SafeVarargs + public static String proxyDoGet(String url, Map... headers) { + // 设置超时时间 + int timeout = 30; + RequestConfig config = RequestConfig.custom() + .setConnectTimeout(timeout * 1000) + .setConnectionRequestTimeout(timeout * 1000) + .setSocketTimeout(timeout * 1000).build(); + SocketConfig socketConfig = SocketConfig.custom() + .setSoKeepAlive(false) + .setSoLinger(1) + .setSoReuseAddress(true) + .setSoTimeout(timeout * 1000) + .setTcpNoDelay(true).build(); + AuthCache authCache = new BasicAuthCache(); + authCache.put(proxy, new BasicScheme()); + HttpClientContext localContext = HttpClientContext.create(); + localContext.setAuthCache(authCache); + HttpClientBuilder httpBuilder = HttpClientBuilder.create(); + CloseableHttpClient httpClient = httpBuilder + .setDefaultSocketConfig(socketConfig) + .setDefaultRequestConfig(config) + .setDefaultCredentialsProvider(credsProvider).build(); + HttpGet httpGet = new HttpGet(url); + httpGet.setConfig(reqConfig); + if (headers != null && headers.length > 0) { + Map tempHeaders = headers[0]; + for (String key : tempHeaders.keySet()) { + httpGet.setHeader(key, tempHeaders.get(key).toString()); + } + } else { + httpGet.setHeader("Accept", + "application/json, text/javascript, */*; q=0.01"); + httpGet.setHeader("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8"); + } + CloseableHttpResponse response = null; + String html = ""; + int notFundCode = 404; + int successCode = 200; + try { + response = httpClient.execute(httpGet, localContext); + // 从响应模型中获取响应实体 + HttpEntity responseEntity = response.getEntity(); + StatusLine statusLine = response.getStatusLine(); + System.out.println("响应状态为:" + response.getStatusLine()); + if (statusLine.getStatusCode() == successCode) { + if (responseEntity != null) { + html = EntityUtils.toString(responseEntity, "utf-8"); + System.out.println("响应内容长度为:" + + responseEntity.getContentLength()); + // 下载结果为空不正常 + if (html.equals(Constants.EMPTY)) { + html = "Download failed error is:reslut is null"; + } + } + } else if (statusLine.getStatusCode() == notFundCode) { + html = "

页面404,正常结束请求即可

"; + } else { + throw new Exception("请求错误,code码为:" + statusLine.getStatusCode()); + } + } catch (Exception e) { + e.printStackTrace(); + html = "Download failed error is:reslut is null"; + }finally{ + try { + response.close(); + httpClient.close(); + } catch (Exception e) { + e.printStackTrace(); + } + } + return html; + + } + + + public static String httpsslProxyGet(String url, Map... headers) throws Exception { + //采用绕过验证的方式处理https请求 + SSLContext sslcontext = createIgnoreVerifySSL(); + + // 设置协议http和https对应的处理socket链接工厂的对象 + Registry socketFactoryRegistry = RegistryBuilder.create() + .register("http", PlainConnectionSocketFactory.INSTANCE) + .register("https", new SSLConnectionSocketFactory(sslcontext)) + .build(); + PoolingHttpClientConnectionManager connManager = new PoolingHttpClientConnectionManager(socketFactoryRegistry); + connManager.setMaxTotal(50); + connManager.setDefaultMaxPerRoute(10); + HttpClients.custom().setConnectionManager(connManager); + // 设置超时时间 + int timeout = 30; + RequestConfig config = RequestConfig.custom() + .setConnectTimeout(timeout * 1000) + .setConnectionRequestTimeout(timeout * 1000) + .setSocketTimeout(timeout * 1000).build(); + SocketConfig socketConfig = SocketConfig.custom() + .setSoKeepAlive(false) + .setSoLinger(1) + .setSoReuseAddress(true) + .setSoTimeout(timeout * 1000) + .setTcpNoDelay(true).build(); + AuthCache authCache = new BasicAuthCache(); + authCache.put(proxy, new BasicScheme()); + HttpClientContext localContext = HttpClientContext.create(); + localContext.setAuthCache(authCache); + HttpClientBuilder httpBuilder = HttpClientBuilder.create(); + CloseableHttpClient httpClient = httpBuilder + .setConnectionManager(connManager) + .setDefaultSocketConfig(socketConfig) + .setDefaultRequestConfig(config) + .setDefaultCredentialsProvider(credsProvider).build(); + HttpGet httpGet = new HttpGet(url); + httpGet.setConfig(reqConfig); + if (headers != null && headers.length > 0) { + Map tempHeaders = headers[0]; + for (String key : tempHeaders.keySet()) { + httpGet.setHeader(key, tempHeaders.get(key).toString()); + } + } else { + httpGet.setHeader("Accept", + "application/json, text/javascript, */*; q=0.01"); + httpGet.setHeader("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8"); + } + CloseableHttpResponse response = null; + String html = ""; + int notFundCode = 404; + int successCode = 200; + try { + response = httpClient.execute(httpGet, localContext); + // 从响应模型中获取响应实体 + HttpEntity responseEntity = response.getEntity(); + StatusLine statusLine = response.getStatusLine(); + System.out.println("响应状态为:" + response.getStatusLine()); + if (statusLine.getStatusCode() == successCode) { + if (responseEntity != null) { + html = EntityUtils.toString(responseEntity, "utf-8"); + System.out.println("响应内容长度为:" + + responseEntity.getContentLength()); + // 下载结果为空不正常 + if (html.equals(Constants.EMPTY)) { + html = "Download failed error is:reslut is null"; + } + } + } else if (statusLine.getStatusCode() == notFundCode) { + html = "

页面404,正常结束请求即可

"; + } else { + throw new Exception("请求错误,code码为:" + statusLine.getStatusCode()); + } + } catch (Exception e) { + e.printStackTrace(); + html = "Download failed error is:reslut is null"; + }finally{ + try { + response.close(); + httpClient.close(); + } catch (Exception e) { + e.printStackTrace(); + } + } + return html; + + } + + + /** + * json参数方式POST提交 + * @param url + * @param params + * @return + */ + public static String doPost(String url, String params){ + String strResult = ""; + //设置超时时间 + int timeout = 30; + RequestConfig config = RequestConfig.custom(). + setConnectTimeout(timeout * 1000). + setConnectionRequestTimeout(timeout * 1000). + setSocketTimeout(timeout * 1000).build(); + SocketConfig socketConfig = SocketConfig.custom() + .setSoKeepAlive(false) + .setSoLinger(1) + .setSoReuseAddress(true) + .setSoTimeout(timeout * 1000) + .setTcpNoDelay(true).build(); +// AuthCache authCache = new BasicAuthCache(); +// authCache.put(proxy, new BasicScheme()); +// HttpClientContext localContext = HttpClientContext.create(); +// localContext.setAuthCache(authCache); + // 1. 获取默认的client实例 + HttpClientBuilder httpBuilder = HttpClientBuilder.create(); + httpBuilder.setUserAgent(ua); + HttpClient client = httpBuilder.setDefaultSocketConfig(socketConfig).setDefaultRequestConfig(config).build(); +// HttpClient client = httpBuilder.setDefaultSocketConfig(socketConfig).setDefaultRequestConfig(config).setConnectionManager(cm) +// .setDefaultCredentialsProvider(credsProvider).build(); + // 2. 创建httppost实例 + HttpPost httpPost = new HttpPost(url); +// httpPost.setConfig(reqConfig); + httpPost.addHeader("Content-Type", "application/json;charset=utf-8"); + HttpResponse resp = null; + try { + httpPost.setEntity(new StringEntity(params,"utf-8")); + resp = client.execute(httpPost); +// resp = client.execute(httpPost,localContext); + StatusLine statusLine = resp.getStatusLine(); + System.out.println("响应状态为:" + resp.getStatusLine()); + int notFundCode = 404; + int successCode = 200; + if(statusLine.getStatusCode() == successCode){ + // 7. 获取响应entity + HttpEntity respEntity = resp.getEntity(); + strResult = EntityUtils.toString(respEntity, "UTF-8"); + if(strResult.equals(Constants.EMPTY)){ + strResult = "Download failed error is:reslut is null"; + } + }else{ + throw new Exception("请求错误,code码为:"+statusLine.getStatusCode()); + } + } catch (Exception e) { + e.printStackTrace(); + strResult = "Download failed error is:"+ThrowMessageUtil.getErrmessage(e); + } + return strResult; + } + public static String httpPost(String url,String params) { + String html=""; + html = doPost(url,params); + int i = 1; + while(true){ + if(html.contains("Download failed error is:")){ + log.error("DownLoadUtil------------->download is failure,url is:"+url); + DateUtil.sleep(5000); + i++; + }else{ + break; + } + if(i > 5){ + break; + } + html = doPost(url,params); + } + return html; + } + /** + * 绕过验证 + * + * @return + * @throws NoSuchAlgorithmException + * @throws KeyManagementException + */ + public static SSLContext createIgnoreVerifySSL() throws NoSuchAlgorithmException, KeyManagementException { + SSLContext sc = SSLContext.getInstance("SSLv3"); + + // 实现一个X509TrustManager接口,用于绕过验证,不用修改里面的方法 + X509TrustManager trustManager = new X509TrustManager() { + @Override + public void checkClientTrusted( + java.security.cert.X509Certificate[] paramArrayOfX509Certificate, + String paramString) throws CertificateException { + } + + @Override + public void checkServerTrusted( + java.security.cert.X509Certificate[] paramArrayOfX509Certificate, + String paramString) throws CertificateException { + } + + @Override + public java.security.cert.X509Certificate[] getAcceptedIssuers() { + return null; + } + }; + + sc.init(null, new TrustManager[] { trustManager }, null); + return sc; + } + /** + * 模拟请求 + * + * @param url 资源地址 + * @param map 参数列表 + * @param encoding 编码 + * @return + * @throws NoSuchAlgorithmException + * @throws KeyManagementException + * @throws IOException + * @throws ClientProtocolException + */ + public static String httpsslGet(String url,Map ... headers) { + String html=""; + CloseableHttpClient client = null; + HttpEntity responseEntity = null; + CloseableHttpResponse response = null; + try { + log.debug("DownLoadUtil------------->设置下载相关信息, start...."); + //采用绕过验证的方式处理https请求 + SSLContext sslcontext = createIgnoreVerifySSL(); + + // 设置协议http和https对应的处理socket链接工厂的对象 + Registry socketFactoryRegistry = RegistryBuilder.create() + .register("http", PlainConnectionSocketFactory.INSTANCE) + .register("https", new SSLConnectionSocketFactory(sslcontext)) + .build(); + PoolingHttpClientConnectionManager connManager = new PoolingHttpClientConnectionManager(socketFactoryRegistry); + connManager.setMaxTotal(50); + connManager.setDefaultMaxPerRoute(10); + HttpClients.custom().setConnectionManager(connManager); + //设置超时时间 + int timeout = 30; + RequestConfig config = RequestConfig.custom(). + setConnectTimeout(timeout * 1000). + setConnectionRequestTimeout(timeout * 1000). + setSocketTimeout(timeout * 1000).build(); + SocketConfig socketConfig = SocketConfig.custom() + .setSoKeepAlive(false) + .setSoLinger(1) + .setSoReuseAddress(true) + .setSoTimeout(10000) + .setTcpNoDelay(true).build(); + // 设置重定向策略 + LaxRedirectStrategy redirectStrategy = new LaxRedirectStrategy(); + //创建自定义的httpclient对象 + client = HttpClients.custom().setConnectionManager(connManager).setDefaultRequestConfig(config).setRedirectStrategy(redirectStrategy).setDefaultSocketConfig(socketConfig).setUserAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36").build(); +// CloseableHttpClient client = HttpClients.createDefault(); + + HttpGet httpGet = new HttpGet(url); + if(headers != null && headers.length > 0){ + Map tempHeaders = headers[0]; + for (String key : tempHeaders.keySet()) { + httpGet.setHeader(key,tempHeaders.get(key).toString()); + } + }else{ + httpGet.setHeader("Accept", "application/json, text/javascript, */*; q=0.01"); + httpGet.setHeader("Accept-Language","zh-CN,zh;q=0.9,en;q=0.8"); + httpGet.setHeader("User-Agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36"); + } + log.debug("DownLoadUtil------------->设置下载相关信息, end...."); + try { + int notFundCode = 404; + int successCode = 200; + log.debug("DownLoadUtil------------->下载执行,start...."); + httpGet.setConfig(config); + response = client.execute(httpGet); + log.debug("DownLoadUtil------------->下载执行,end...."); + // 从响应模型中获取响应实体 + StatusLine statusLine = response.getStatusLine(); + log.debug("DownLoadUtil------------->响应状态为:" + response.getStatusLine()+",下载请求没问题url:"+url+",read is start ...."); + System.out.println("响应状态为:" + response.getStatusLine()); + responseEntity = response.getEntity(); + log.debug("DownLoadUtil------------->响应状态为:" + response.getStatusLine()+",下载请求没问题url:"+url+",read is end ...."); + if(statusLine.getStatusCode() == successCode){ + if (responseEntity != null) { + html=EntityUtils.toString(responseEntity,"utf-8"); + System.out.println("响应内容长度为:" + responseEntity.getContentLength()); + } + }else if(statusLine.getStatusCode() == notFundCode){ + html = "

页面404,正常结束请求即可

"; + }else{ + throw new Exception("请求错误,code码为:"+statusLine.getStatusCode()); + } + } catch (Exception e) { + e.printStackTrace(); + html = "Download failed error is:"+ThrowMessageUtil.getErrmessage(e); + + } + } catch (Exception e) { + e.printStackTrace(); + html = "Download failed error is:"+ThrowMessageUtil.getErrmessage(e); + }finally{ + try { + responseEntity.getContent().close(); + response.close(); + client.close(); + } catch (Exception e) { + e.printStackTrace(); + } + + } + + + return html; + } + + public static String httpSSLGet(String url,Map ... headers) { + String html=""; + html = httpsslGet(url,headers); + int i = 1; + while(true){ + if(html.contains("Download failed error is:")){ + log.error("DownLoadUtil------------->download is failure,url is:"+url); + DateUtil.sleep(30000); + i++; + }else{ + break; + } + if(i > 5){ + break; + } + html = httpsslGet(url,headers); + } + return html; + } + public static String doPostFrom(String url,Map param,Map ... headers){ + //设置超时时间 + int timeout = 15; + RequestConfig config = RequestConfig.custom(). + setConnectTimeout(timeout * 1000). + setConnectionRequestTimeout(timeout * 1000). + setSocketTimeout(timeout * 1000).build(); + SocketConfig socketConfig = SocketConfig.custom() + .setSoKeepAlive(false) + .setSoLinger(1) + .setSoReuseAddress(true) + .setSoTimeout(10000) + .setTcpNoDelay(true).build(); +// AuthCache authCache = new BasicAuthCache(); +// authCache.put(proxy, new BasicScheme()); +// HttpClientContext localContext = HttpClientContext.create(); +// localContext.setAuthCache(authCache); + HttpClientBuilder httpBuilder = HttpClientBuilder.create(); + httpBuilder.setUserAgent(ua); +// HttpClient httpClient = httpBuilder.setDefaultSocketConfig(socketConfig).setDefaultRequestConfig(config).setConnectionManager(cm) +// .setDefaultCredentialsProvider(credsProvider).build(); + HttpClient httpClient = httpBuilder.setDefaultSocketConfig(socketConfig).setDefaultRequestConfig(config).build(); + HttpPost httpPost = new HttpPost(url); +// httpPost.setConfig(reqConfig); + if(headers != null && headers.length > 0){ + Map tempHeaders = headers[0]; + for (String key : tempHeaders.keySet()) { + httpPost.setHeader(key,tempHeaders.get(key).toString()); + } + }else{ + httpPost.addHeader("accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9"); + httpPost.addHeader("accept-Language", "zh-CN,zh;q=0.9,en;q=0.8"); + httpPost.addHeader("content-type", "application/x-www-form-urlencoded"); + httpPost.addHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36"); +// httpPost.addHeader("Referer", "http://www.neeq.com.cn/rule/Business_rules.html"); + } + // 创建请求参数 + List list = new LinkedList<>(); + for (String key : param.keySet()) { + BasicNameValuePair param1 = new BasicNameValuePair(key,param.get(key).toString()); + list.add(param1); + } + // 使用URL实体转换工具 + String html=""; + try { + UrlEncodedFormEntity entityParam = new UrlEncodedFormEntity(list, "UTF-8"); + httpPost.setEntity(entityParam); + HttpResponse response = httpClient.execute(httpPost); +// HttpResponse response = httpClient.execute(httpPost,localContext); + // 从响应模型中获取响应实体 + HttpEntity responseEntity = response.getEntity(); + StatusLine statusLine = response.getStatusLine(); + System.out.println("响应状态为:" + response.getStatusLine()); + int notFundCode = 404; + int successCode = 200; + if(statusLine.getStatusCode() == successCode){ + if (responseEntity != null) { + html=EntityUtils.toString(responseEntity,"utf-8"); + } + }else{ + throw new Exception("请求错误,code码为:"+statusLine.getStatusCode()); + } + + } catch (Exception e) { + e.printStackTrace(); + html = "Download failed error is:"+ThrowMessageUtil.getErrmessage(e); + } + + return html; + + } + public static String httpPostForm(String url,Map params,Map ... headers) { + String html=""; + html = doPostFrom(url,params); + int i = 1; + while(true){ + if(html.contains("Download failed error is:")){ + log.error("DownLoadUtil------------->download is failure,url is:"+url); + DateUtil.sleep(5000); + i++; + }else{ + break; + } + if(i > 5){ + break; + } + html = doPostFrom(url,params,headers); + } + return html; + } + + public static String dosslPost(String url,String params,Map ... headers) { + String html=""; + CloseableHttpClient client = null; + HttpEntity responseEntity = null; + CloseableHttpResponse response = null; + try { + //采用绕过验证的方式处理https请求 + SSLContext sslcontext = createIgnoreVerifySSL(); + // 设置协议http和https对应的处理socket链接工厂的对象 + Registry socketFactoryRegistry = RegistryBuilder.create() + .register("http", PlainConnectionSocketFactory.INSTANCE) + .register("https", new SSLConnectionSocketFactory(sslcontext)) + .build(); + PoolingHttpClientConnectionManager connManager = new PoolingHttpClientConnectionManager(socketFactoryRegistry); + HttpClients.custom().setConnectionManager(connManager); + //设置超时时间 + int timeout = 5; + RequestConfig config = RequestConfig.custom(). + setConnectTimeout(timeout * 1000). + setConnectionRequestTimeout(timeout * 1000). + setSocketTimeout(timeout * 1000).build(); + SocketConfig socketConfig = SocketConfig.custom() + .setSoKeepAlive(false) + .setSoLinger(1) + .setSoReuseAddress(true) + .setSoTimeout(10000) + .setTcpNoDelay(true).build(); + //创建自定义的httpclient对象 + client = HttpClients.custom().setConnectionManager(connManager).setDefaultRequestConfig(config).setDefaultSocketConfig(socketConfig).build(); +// CloseableHttpClient client = HttpClients.createDefault(); + // 2. 创建httppost实例 + HttpPost httpPost = new HttpPost(url); +// httpPost.setConfig(reqConfig); + httpPost.addHeader("Content-Type", "application/json;charset=utf-8"); + if(headers != null && headers.length > 0){ + Map tempHeaders = headers[0]; + for (String key : tempHeaders.keySet()) { + httpPost.setHeader(key,tempHeaders.get(key).toString()); + } + }else{ + httpPost.setHeader("Accept", "application/json, text/javascript, */*; q=0.01"); + httpPost.setHeader("Accept-Language","zh-CN,zh;q=0.9,en;q=0.8"); + httpPost.setHeader("User-Agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36"); + } + + try { + httpPost.setEntity(new StringEntity(params,"utf-8")); + response = client.execute(httpPost); + int notFundCode = 404; + int successCode = 200; + // 从响应模型中获取响应实体 + StatusLine statusLine = response.getStatusLine(); + System.out.println("响应状态为:" + response.getStatusLine()); + responseEntity = response.getEntity(); + if(statusLine.getStatusCode() == successCode){ + if (responseEntity != null) { + html=EntityUtils.toString(responseEntity,"utf-8"); + System.out.println("响应内容长度为:" + responseEntity.getContentLength()); + } + }else if(statusLine.getStatusCode() == notFundCode){ + html = "

页面404,正常结束请求即可

"; + }else{ + throw new Exception("请求错误,code码为:"+statusLine.getStatusCode()); + } + } catch (Exception e) { + e.printStackTrace(); + html = "Download failed error is:"+ThrowMessageUtil.getErrmessage(e); + + } + } catch (Exception e) { + e.printStackTrace(); + html = "Download failed error is:"+ThrowMessageUtil.getErrmessage(e); + }finally{ + try { + responseEntity.getContent().close(); + response.close(); + client.close(); + } catch (UnsupportedOperationException e) { + e.printStackTrace(); + } catch (IOException e) { + e.printStackTrace(); + } + + } + + + return html; + } + public static String dosslPostForm(String url,Map param,Map ... headers) { + String html=""; + try { + //采用绕过验证的方式处理https请求 + SSLContext sslcontext = createIgnoreVerifySSL(); + + // 设置协议http和https对应的处理socket链接工厂的对象 + Registry socketFactoryRegistry = RegistryBuilder.create() + .register("http", PlainConnectionSocketFactory.INSTANCE) + .register("https", new SSLConnectionSocketFactory(sslcontext)) + .build(); + PoolingHttpClientConnectionManager connManager = new PoolingHttpClientConnectionManager(socketFactoryRegistry); + HttpClients.custom().setConnectionManager(connManager); + //设置超时时间 + int timeout = 5; + RequestConfig config = RequestConfig.custom(). + setConnectTimeout(timeout * 1000). + setConnectionRequestTimeout(timeout * 1000). + setSocketTimeout(timeout * 1000).build(); + SocketConfig socketConfig = SocketConfig.custom() + .setSoKeepAlive(false) + .setSoLinger(1) + .setSoReuseAddress(true) + .setSoTimeout(10000) + .setTcpNoDelay(true).build(); + //创建自定义的httpclient对象 + CloseableHttpClient client = HttpClients.custom().setConnectionManager(connManager).setDefaultRequestConfig(config).setDefaultSocketConfig(socketConfig).build(); +// CloseableHttpClient client = HttpClients.createDefault(); + // 2. 创建httppost实例 + HttpPost httpPost = new HttpPost(url); +// httpPost.setConfig(reqConfig); + if(headers != null && headers.length > 0){ + Map tempHeaders = headers[0]; + for (String key : tempHeaders.keySet()) { + httpPost.setHeader(key,tempHeaders.get(key).toString()); + } + }else{ + httpPost.setHeader("Accept", "application/json, text/javascript, */*; q=0.01"); + httpPost.setHeader("Accept-Language","zh-CN,zh;q=0.9,en;q=0.8"); + httpPost.addHeader("content-type", "application/x-www-form-urlencoded"); + httpPost.setHeader("User-Agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36"); + } + + // 创建请求参数 + List list = new LinkedList<>(); + for (String key : param.keySet()) { + BasicNameValuePair param1 = new BasicNameValuePair(key,param.get(key).toString()); + list.add(param1); + } + // 使用URL实体转换工具 + try { + UrlEncodedFormEntity entityParam = new UrlEncodedFormEntity(list, "UTF-8"); + httpPost.setEntity(entityParam); + HttpResponse response = client.execute(httpPost); +// HttpResponse response = httpClient.execute(httpPost,localContext); + // 从响应模型中获取响应实体 + int notFundCode = 404; + int successCode = 200; + HttpEntity responseEntity = response.getEntity(); + StatusLine statusLine = response.getStatusLine(); + System.out.println("响应状态为:" + response.getStatusLine()); + if(statusLine.getStatusCode() == successCode){ + if (responseEntity != null) { + html=EntityUtils.toString(responseEntity,"utf-8"); + } + }else{ + throw new Exception("请求错误,code码为:"+statusLine.getStatusCode()); + } + + } catch (Exception e) { + e.printStackTrace(); + html = "Download failed error is:"+ThrowMessageUtil.getErrmessage(e); + } + } catch (Exception e) { + e.printStackTrace(); + html = "Download failed error is:"+ThrowMessageUtil.getErrmessage(e); + } + + + return html; + } + public static String httpSSLPostForm(String url,Map params,Map ...headers) { + String html=""; + try { + html = dosslPostForm(url,params,headers); + } catch (Exception e) { + e.printStackTrace(); + // TODO: handle exception + html = "Download failed error is:Exception!"; + } + int i = 1; + while(true){ + if(html.contains("Download failed error is:")){ + log.error("DownLoadUtil------------->download is failure,url is:"+url); + DateUtil.sleep(30000); + i++; + }else{ + break; + } + if(i > 5){ + break; + } + try { + html = dosslPostForm(url,params,headers); + } catch (Exception e) { + e.printStackTrace(); + // TODO: handle exception + html = "Download failed error is:Exception!"; + } + } + return html; + } + public static String httpSSLPost(String url,String params,Map ...headers) { + String html=""; + try { + html = dosslPost(url,params,headers); + } catch (Throwable e) { + e.printStackTrace(); + // TODO: handle exception + html = "Download failed error is:Exception!"; + } + int i = 1; + while(true){ + if(html.contains("Download failed error is:")){ + log.error("DownLoadUtil------------->download is failure,url is:"+url); + DateUtil.sleep(30000); + i++; + }else{ + break; + } + if(i > 5){ + break; + } + try { + html = dosslPost(url,params,headers); + } catch (Throwable e) { + e.printStackTrace(); + // TODO: handle exception + html = "Download failed error is:Exception!"; + } + } + return html; + } + + /** + * 模拟客户端get请求 + * @param url 模拟请求得url + * @param headers 头部信息,没有可以不传 + * @return + */ + public static String doGet(String url,Map ... headers){ + //设置超时时间 + int timeout = 15; + RequestConfig config = RequestConfig.custom(). + setConnectTimeout(timeout * 1000). + setConnectionRequestTimeout(timeout * 1000). + setSocketTimeout(timeout * 1000).build(); + SocketConfig socketConfig = SocketConfig.custom() + .setSoKeepAlive(false) + .setSoLinger(1) + .setSoReuseAddress(true) + .setSoTimeout(10000) + .setTcpNoDelay(true).build(); + HttpClientBuilder httpBuilder = HttpClientBuilder.create(); + httpBuilder.setUserAgent(ua); + HttpClient httpClient = httpBuilder.setDefaultSocketConfig(socketConfig).setDefaultRequestConfig(config).build(); + HttpGet httpGet = new HttpGet(url); + if(headers != null && headers.length > 0){ + Map tempHeaders = headers[0]; + for (String key : tempHeaders.keySet()) { + httpGet.setHeader(key,tempHeaders.get(key).toString()); + } + }else{ + httpGet.setHeader("Accept", "application/json, text/javascript, */*; q=0.01"); + httpGet.setHeader("Accept-Language","zh-CN,zh;q=0.9,en;q=0.8"); + } + String html=""; + try { + int notFundCode = 404; + int successCode = 200; + HttpResponse response = httpClient.execute(httpGet); + // 从响应模型中获取响应实体 + HttpEntity responseEntity = response.getEntity(); + StatusLine statusLine = response.getStatusLine(); + System.out.println("响应状态为:" + response.getStatusLine()); + if(statusLine.getStatusCode() == successCode){ + if (responseEntity != null) { + html=EntityUtils.toString(responseEntity,"utf-8"); + if(html.equals("")){ + html = "Download failed error is:reslut is null"; + } + } + }else if(statusLine.getStatusCode() == notFundCode){ + html = "

页面404,正常结束请求即可

"; + }else{ + throw new Exception("请求错误,code码为:"+statusLine.getStatusCode()); + } + } catch (Exception e) { + e.printStackTrace(); + html = "Download failed error is:"+ThrowMessageUtil.getErrmessage(e); + } + return html; + + } + public static void main(String[] args) throws Exception { + + } +} diff --git a/src/main/java/com/bfd/function/utils/EncryptionUtil.java b/src/main/java/com/bfd/function/utils/EncryptionUtil.java new file mode 100644 index 0000000..819392e --- /dev/null +++ b/src/main/java/com/bfd/function/utils/EncryptionUtil.java @@ -0,0 +1,41 @@ +package com.bfd.function.utils; + +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.util.Base64; + +/** + * @author jian.mao + * @date 2023年3月10日 + * @description + */ +public class EncryptionUtil { + /**MD5加密 + * @param text + * @return + */ + public static String md5(String text) { + try { + MessageDigest md = MessageDigest.getInstance("MD5"); + md.update(text.getBytes()); + byte[] bytes = md.digest(); + StringBuilder sb = new StringBuilder(); + for (byte b : bytes) { + sb.append(String.format("%02x", b & 0xff)); + } + return sb.toString(); + } catch (NoSuchAlgorithmException e) { + e.printStackTrace(); + return null; + } + } + /**base64加密 + * @param text + * @return + */ + public static String base64(String text) { + // 使用 Base64 编码器对字符串进行编码 + byte[] encodedBytes = Base64.getEncoder().encode(text.getBytes()); + return new String(encodedBytes); + } +} diff --git a/src/main/java/com/bfd/function/utils/FileUtil.java b/src/main/java/com/bfd/function/utils/FileUtil.java new file mode 100644 index 0000000..1d29bed --- /dev/null +++ b/src/main/java/com/bfd/function/utils/FileUtil.java @@ -0,0 +1,36 @@ +package com.bfd.function.utils; + +import java.io.FileWriter; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +/** + * 文件工具类 + * @author jian.mao + * @date 2023年7月14日 + * @description + */ +public class FileUtil { + + /** + * 数据写入文件 + * @param Path 文件路径 + * @param result 数据 + * @throws IOException + */ + public static void writeFile(String path,String result){ + try { + FileWriter fw = new FileWriter(path,true); + fw.write(result+"\n"); + fw.flush(); + fw.close(); + } catch (Exception e) { + e.printStackTrace(); + } + } +} diff --git a/src/main/java/com/bfd/function/utils/JsonUtil.java b/src/main/java/com/bfd/function/utils/JsonUtil.java new file mode 100644 index 0000000..eff00ce --- /dev/null +++ b/src/main/java/com/bfd/function/utils/JsonUtil.java @@ -0,0 +1,32 @@ +package com.bfd.function.utils; + +import com.alibaba.fastjson.JSONObject; +import com.bfd.function.entity.Constants; + +/** + * json工具 + * @author jian.mao + * @date 2023年7月10日 + * @description + */ +public class JsonUtil { + + /** + * 校验字符串是list/map/str + * @param jsonString + * @return + */ + public static String checkJsonType(String jsonString) { + try { + JSONObject.parseObject(jsonString); + return Constants.MAP_TYPE; + } catch (Exception e) { + try { + JSONObject.parseArray(jsonString); + return Constants.LIST_TYPE; + } catch (Exception ex) { + return Constants.STRING_TYPE; + } + } + } +} diff --git a/src/main/java/com/bfd/function/utils/OtherUtils.java b/src/main/java/com/bfd/function/utils/OtherUtils.java new file mode 100644 index 0000000..3352a2d --- /dev/null +++ b/src/main/java/com/bfd/function/utils/OtherUtils.java @@ -0,0 +1,61 @@ +package com.bfd.function.utils; + +import java.security.MessageDigest; + +/** + * 其他工具类 + * @author jian.mao + * @date 2023年9月19日 + * @description + */ +public class OtherUtils { + + + + public static String getMd5(String string) { + try { + MessageDigest md5 = MessageDigest.getInstance("MD5"); + byte[] bs = md5.digest(string.getBytes("UTF-8")); + StringBuilder sb = new StringBuilder(40); + for (byte x : bs) { + if ((x & 0xff) >> 4 == 0) { + sb.append("0").append(Integer.toHexString(x & 0xff)); + } else { + sb.append(Integer.toHexString(x & 0xff)); + } + } + return sb.toString(); + } catch (Exception e) { + + return "nceaform" + System.currentTimeMillis(); + } + } + public static boolean isEmpty(Object obj) { + if (obj == null) { + return true; + } + + // 检查字符串 + if (obj instanceof String) { + return ((String) obj).isEmpty(); + } + + // 检查集合(包括Set和List) + if (obj instanceof java.util.Collection) { + return ((java.util.Collection) obj).isEmpty(); + } + + // 检查Map + if (obj instanceof java.util.Map) { + return ((java.util.Map) obj).isEmpty(); + } + + // 检查数组 + if (obj.getClass().isArray()) { + return java.lang.reflect.Array.getLength(obj) == 0; + } + + // 其他类型非null即视为非空 + return false; + } +} diff --git a/src/main/java/com/bfd/function/utils/QueueUtil.java b/src/main/java/com/bfd/function/utils/QueueUtil.java new file mode 100644 index 0000000..6659d49 --- /dev/null +++ b/src/main/java/com/bfd/function/utils/QueueUtil.java @@ -0,0 +1,18 @@ +package com.bfd.function.utils; + +import java.util.Map; +import java.util.concurrent.LinkedBlockingDeque; + +/** + * @author:jinming + * @className:QueueUtil + * @version:1.0 + * @description: + * @Date:2023/7/13 15:00 + */ +public class QueueUtil { + + public static LinkedBlockingDeque> taskQueue = new LinkedBlockingDeque>(); + + public static LinkedBlockingDeque sendQueue = new LinkedBlockingDeque(); +} \ No newline at end of file diff --git a/src/main/java/com/bfd/function/utils/SpringBootKafka.java b/src/main/java/com/bfd/function/utils/SpringBootKafka.java new file mode 100644 index 0000000..ec0877e --- /dev/null +++ b/src/main/java/com/bfd/function/utils/SpringBootKafka.java @@ -0,0 +1,46 @@ +package com.bfd.function.utils; + +import com.alibaba.fastjson.JSONObject; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.kafka.core.KafkaTemplate; +import org.springframework.kafka.support.SendResult; +import org.springframework.stereotype.Component; +import org.springframework.util.concurrent.ListenableFuture; +import org.springframework.util.concurrent.ListenableFutureCallback; + +/** + * @PROJECT_NAME: companybusinesscrawl + * @DESCRIPTION:SpringBootKafka 工具类 + * @AUTHOR: ying.zhao + * @DATE: 2023/4/6 11:09 + */ +@Slf4j +@Component +public class SpringBootKafka { + @Autowired + private KafkaTemplate kafkaTemplate; + /** + * 自定义topicKafkaTemplate + */ + /** + * public static final String TOPIC = "companyBussTest"; + **/ + public void send(String topic, String message) { + //发送消息 + ListenableFuture> future = kafkaTemplate.send(topic, message); + future.addCallback(new ListenableFutureCallback>() { + @Override + public void onFailure(Throwable throwable) { + //发送失败的处理 + log.info(topic + " - 生产者 发送消息失败:" + throwable.getMessage()); + } + + @Override + public void onSuccess(SendResult stringObjectSendResult) { + //成功的处理 + log.info("{} - 生产者 发送消息成功:",topic); + } + }); + } +} diff --git a/src/main/java/com/bfd/function/utils/ThrowMessageUtil.java b/src/main/java/com/bfd/function/utils/ThrowMessageUtil.java new file mode 100644 index 0000000..afd0630 --- /dev/null +++ b/src/main/java/com/bfd/function/utils/ThrowMessageUtil.java @@ -0,0 +1,23 @@ +package com.bfd.function.utils; + +import java.io.PrintWriter; +import java.io.StringWriter; + +/** + * @author jian.mao + * @date 2023年3月22日 + * @description + */ +public class ThrowMessageUtil { + + /** + * 获取异常信息 + * @param t + * @return + */ + public static String getErrmessage(Throwable t){ + StringWriter stringWriter=new StringWriter(); + t.printStackTrace(new PrintWriter(stringWriter,true)); + return stringWriter.getBuffer().toString(); + } +} diff --git a/src/main/resources/application.yml b/src/main/resources/application.yml new file mode 100644 index 0000000..59bb8e6 --- /dev/null +++ b/src/main/resources/application.yml @@ -0,0 +1,94 @@ +logging: + level: + root: info + path: ./logs +server: + port: 8015 + servlet: + context-path: /dataForge + tomcat: + uri-encoding: utf-8 + max-connections: 20000 + max-http-form-post-size: 1 + max-threads: 1000 +spring: + application: + name: dataForge + kafka: + bootstrap-servers: 172.16.12.55:9092,172.16.12.56:9092,172.16.12.57:9092 + producer: + retries: 0 + #当有多个消息需要被发送到同一个分区时,生产者会把它们放在同一个批次里。该参数指定了一个批次可以使用的内存大小,按照字节数计算。 + batch-size: 16384 + # 设置生产者内存缓冲区的大小。 + buffer-memory: 33554432 + # 键的序列化方式 + key-serializer: org.apache.kafka.common.serialization.StringSerializer + # 值的序列化方式 + value-serializer: org.apache.kafka.common.serialization.StringSerializer + # acks=0 : 生产者在成功写入消息之前不会等待任何来自服务器的响应。 + # acks=1 : 只要集群的首领节点收到消息,生产者就会收到一个来自服务器成功响应。 + # acks=all :只有当所有参与复制的节点全部收到消息时,生产者才会收到一个来自服务器的成功响应。 + acks: 1 + consumer: + # 自动提交的时间间隔 在spring boot 2.X 版本中这里采用的是值的类型为Duration 需要符合特定的格式,如1S,1M,2H,5D + auto-commit-interval: 1S + # 该属性指定了消费者在读取一个没有偏移量的分区或者偏移量无效的情况下该作何处理: + # latest(默认值)在偏移量无效的情况下,消费者将从最新的记录开始读取数据(在消费者启动之后生成的记录) + # earliest :在偏移量无效的情况下,消费者将从起始位置读取分区的记录 + auto-offset-reset: earliest + # 是否自动提交偏移量,默认值是true,为了避免出现重复数据和数据丢失,可以把它设置为false,然后手动提交偏移量 + enable-auto-commit: true + # 键的反序列化方式 + key-deserializer: org.apache.kafka.common.serialization.StringDeserializer + # 值的反序列化方式 + value-deserializer: org.apache.kafka.common.serialization.StringDeserializer + #消费组 + group-id: test4 + #消费者并发线程数 + concurrency: 4 + #超时时间 + max-poll-interval-ms: 60000 + #listener: + # 在侦听器容器中运行的线程数。 + #concurrency: 5 + #listner负责ack,每调用一次,就立即commit + #ack-mode: manual_immediate + #missing-topics-fatal: false + boot: + admin: + client: + #url: http://10.10.143.85:8000 + url: http://172.16.12.55:8001 + instance: + service-base-url: http://10.10.143.85:8013 + +management: + endpoints: + web: + exposure: + include: "*" + endpoint: + health: + show-details: always + health: + elasticsearch: + enabled: false + +customize-kafka: + bootstrap-servers: 172.18.1.119:9992 + producer: + topic: analyze +customize-elasticsearch: + indexName: cda_kg_relation + username: elastic + password: bfd123 + port: 9200 + hostname: 172.16.12.55 + +task: + task-queue-path: ../data/taskQueue.txt + +merge: + time: 300000 #单位毫秒级 + data-path: ../data/mergeDataMap.txt \ No newline at end of file diff --git a/src/main/resources/logback-spring.xml b/src/main/resources/logback-spring.xml new file mode 100644 index 0000000..0922c81 --- /dev/null +++ b/src/main/resources/logback-spring.xml @@ -0,0 +1,36 @@ + + + + + + + + + true + + ${logging.level} + + + ${logging.path}/dataForgeInfo.log + + + ${logging.path}/dataForgeInfo.log.%d{yyyy-MM-dd} + 7 + + + %d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %line %-5level %logger{50} - %msg%n + UTF-8 + + + + + + + + diff --git a/src/test/java/com/learn/AppTest.java b/src/test/java/com/learn/AppTest.java new file mode 100644 index 0000000..5907f9e --- /dev/null +++ b/src/test/java/com/learn/AppTest.java @@ -0,0 +1,20 @@ +package com.learn; + +import static org.junit.Assert.assertTrue; + +import org.junit.Test; + +/** + * Unit test for simple App. + */ +public class AppTest +{ + /** + * Rigorous Test :-) + */ + @Test + public void shouldAnswerWithTrue() + { + assertTrue( true ); + } +}