Browse Source

网站数据采集应用管理

master
55007 6 months ago
commit
354ac7fbc9
  1. 40
      .classpath
  2. 4
      .gitignore
  3. 23
      .project
  4. 5
      .settings/org.eclipse.core.resources.prefs
  5. 9
      .settings/org.eclipse.jdt.core.prefs
  6. 4
      .settings/org.eclipse.m2e.core.prefs
  7. 1
      README.md
  8. 244
      pom.xml
  9. 25
      src/main/java/com/bfd/task/Application.java
  10. 30
      src/main/java/com/bfd/task/cache/ConfigCache.java
  11. 46
      src/main/java/com/bfd/task/controller/TaskManagerController.java
  12. 159
      src/main/java/com/bfd/task/entity/Constants.java
  13. 62
      src/main/java/com/bfd/task/entity/ResponsePo.java
  14. 33
      src/main/java/com/bfd/task/enums/ResponseCode.java
  15. 116
      src/main/java/com/bfd/task/handler/MainHander.java
  16. 54
      src/main/java/com/bfd/task/model/AppsEntity.java
  17. 44
      src/main/java/com/bfd/task/model/BlueprintEntity.java
  18. 28
      src/main/java/com/bfd/task/model/ModulesEntity.java
  19. 32
      src/main/java/com/bfd/task/model/RelationsEntity.java
  20. 61
      src/main/java/com/bfd/task/model/TaskParam.java
  21. 19
      src/main/java/com/bfd/task/model/WebSite.java
  22. 81
      src/main/java/com/bfd/task/process/CacheMonitorProcess.java
  23. 145
      src/main/java/com/bfd/task/process/DataConsumptionProcess.java
  24. 228
      src/main/java/com/bfd/task/process/KafkaConsumerProcess.java
  25. 41
      src/main/java/com/bfd/task/process/SendResultProcess.java
  26. 19
      src/main/java/com/bfd/task/service/TaskManagerService.java
  27. 284
      src/main/java/com/bfd/task/service/impl/TaskManagerServiceImpl.java
  28. 177
      src/main/java/com/bfd/task/utils/DateUtil.java
  29. 906
      src/main/java/com/bfd/task/utils/DownLoadUtil.java
  30. 27
      src/main/java/com/bfd/task/utils/EncryptionUtil.java
  31. 36
      src/main/java/com/bfd/task/utils/FileUtil.java
  32. 32
      src/main/java/com/bfd/task/utils/JsonUtil.java
  33. 33
      src/main/java/com/bfd/task/utils/OtherUtils.java
  34. 18
      src/main/java/com/bfd/task/utils/QueueUtil.java
  35. 48
      src/main/java/com/bfd/task/utils/SpringBootKafka.java
  36. 23
      src/main/java/com/bfd/task/utils/ThrowMessageUtil.java
  37. 98
      src/main/resources/application.yml
  38. 36
      src/main/resources/logback-spring.xml
  39. 20
      src/test/java/com/bfd/AppTest.java

40
.classpath

@ -0,0 +1,40 @@
<?xml version="1.0" encoding="UTF-8"?>
<classpath>
<classpathentry kind="src" output="target/classes" path="src/main/java">
<attributes>
<attribute name="optional" value="true"/>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="src" output="target/test-classes" path="src/test/java">
<attributes>
<attribute name="optional" value="true"/>
<attribute name="maven.pomderived" value="true"/>
<attribute name="test" value="true"/>
</attributes>
</classpathentry>
<classpathentry excluding="**" kind="src" output="target/classes" path="src/main/resources">
<attributes>
<attribute name="maven.pomderived" value="true"/>
<attribute name="optional" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.8">
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry excluding="**" kind="src" output="target/test-classes" path="src/test/resources">
<attributes>
<attribute name="maven.pomderived" value="true"/>
<attribute name="test" value="true"/>
<attribute name="optional" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="output" path="target/classes"/>
</classpath>

4
.gitignore

@ -0,0 +1,4 @@
/target/
/logs/
/.idea/
/crawltaskmanager.iml

23
.project

@ -0,0 +1,23 @@
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>crawltaskmanager</name>
<comment></comment>
<projects>
</projects>
<buildSpec>
<buildCommand>
<name>org.eclipse.jdt.core.javabuilder</name>
<arguments>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.m2e.core.maven2Builder</name>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.eclipse.jdt.core.javanature</nature>
<nature>org.eclipse.m2e.core.maven2Nature</nature>
</natures>
</projectDescription>

5
.settings/org.eclipse.core.resources.prefs

@ -0,0 +1,5 @@
eclipse.preferences.version=1
encoding//src/main/java=UTF-8
encoding//src/main/resources=UTF-8
encoding//src/test/java=UTF-8
encoding/<project>=UTF-8

9
.settings/org.eclipse.jdt.core.prefs

@ -0,0 +1,9 @@
eclipse.preferences.version=1
org.eclipse.jdt.core.compiler.codegen.methodParameters=generate
org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8
org.eclipse.jdt.core.compiler.compliance=1.8
org.eclipse.jdt.core.compiler.problem.enablePreviewFeatures=disabled
org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
org.eclipse.jdt.core.compiler.problem.reportPreviewFeatures=ignore
org.eclipse.jdt.core.compiler.release=disabled
org.eclipse.jdt.core.compiler.source=1.8

4
.settings/org.eclipse.m2e.core.prefs

@ -0,0 +1,4 @@
activeProfiles=
eclipse.preferences.version=1
resolveWorkspaceProjects=true
version=1

1
README.md

@ -0,0 +1 @@
采集任务管理应用

244
pom.xml

@ -0,0 +1,244 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>2.2.4.RELEASE</version>
</parent>
<groupId>com.bfd.task</groupId>
<artifactId>crawltaskmanager</artifactId>
<version>0.0.1-SNAPSHOT</version>
<name>crawltaskmanager</name>
<!-- FIXME change it to the project's website -->
<url>http://www.example.com</url>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
</properties>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<!-- https://mvnrepository.com/artifact/de.codecentric/spring-boot-admin-starter-client -->
<dependency>
<groupId>de.codecentric</groupId>
<artifactId>spring-boot-admin-starter-client</artifactId>
<version>2.2.4</version>
</dependency>
<dependency>
<groupId>com.google.code.gson</groupId>
<artifactId>gson</artifactId>
<version>2.8.8</version>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-test</artifactId>
</dependency>
<!-- https://mvnrepository.com/artifact/org.springframework/spring-test -->
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-test</artifactId>
<version>5.0.10.RELEASE</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>1.4</version>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>2.0.17</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.mchange/c3p0 -->
<dependency>
<groupId>com.mchange</groupId>
<artifactId>c3p0</artifactId>
<version>0.9.5.5</version>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>8.0.29</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.squareup.okhttp3/okhttp -->
<dependency>
<groupId>com.squareup.okhttp3</groupId>
<artifactId>okhttp</artifactId>
<version>4.9.3</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.3</version>
</dependency>
<dependency>
<groupId>commons-lang</groupId>
<artifactId>commons-lang</artifactId>
<version>2.6</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.jetbrains.kotlin/kotlin-reflect -->
<dependency>
<groupId>org.jetbrains.kotlin</groupId>
<artifactId>kotlin-reflect</artifactId>
<version>1.6.21</version>
<scope>runtime</scope>
</dependency>
<!-- https://mvnrepository.com/artifact/org.jsoup/jsoup -->
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.8.1</version>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.kafka</groupId>
<artifactId>spring-kafka</artifactId>
</dependency>
<!-- <dependency> <groupId>org.springframework.kafka</groupId> <artifactId>spring-kafka</artifactId>
</dependency> -->
<dependency>
<groupId>cn.hutool</groupId>
<artifactId>hutool-all</artifactId>
<version>5.8.5</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
</dependency>
<!-- https://mvnrepository.com/artifact/p6spy/p6spy -->
<dependency>
<groupId>p6spy</groupId>
<artifactId>p6spy</artifactId>
<version>3.9.0</version>
</dependency>
<dependency>
<groupId>commons-collections</groupId>
<artifactId>commons-collections</artifactId>
<version>3.2.2</version>
</dependency>
<!--redis-->
<dependency>
<groupId>org.redisson</groupId>
<artifactId>redisson-spring-boot-starter</artifactId>
<version>3.13.6</version>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-redis</artifactId>
</dependency>
</dependencies>
<build>
<!-- <pluginManagement> --><!-- lock down plugins versions to avoid using Maven defaults (may be moved
to parent pom) -->
<plugins>
<!-- clean lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#clean_Lifecycle -->
<plugin>
<artifactId>maven-clean-plugin</artifactId>
<version>3.1.0</version>
</plugin>
<!-- default lifecycle, jar packaging: see https://maven.apache.org/ref/current/maven-core/default-bindings.html#Plugin_bindings_for_jar_packaging -->
<plugin>
<artifactId>maven-resources-plugin</artifactId>
<version>3.0.2</version>
</plugin>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.8.0</version>
</plugin>
<plugin>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.22.1</version>
</plugin>
<plugin>
<artifactId>maven-jar-plugin</artifactId>
<version>3.0.2</version>
</plugin>
<plugin>
<artifactId>maven-install-plugin</artifactId>
<version>2.5.2</version>
</plugin>
<plugin>
<artifactId>maven-deploy-plugin</artifactId>
<version>2.8.2</version>
</plugin>
<!-- site lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#site_Lifecycle -->
<plugin>
<artifactId>maven-site-plugin</artifactId>
<version>3.7.1</version>
</plugin>
<plugin>
<artifactId>maven-project-info-reports-plugin</artifactId>
<version>3.0.0</version>
</plugin>
<!-- spring-boot-maven-plugin插件就是打包spring boot应用的 -->
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
<configuration>
<mainClass>com.bfd.task.Application</mainClass>
<layout>ZIP</layout>
<includes>
<include>
<groupId>${project.groupId}</groupId>
<artifactId>${project.artifactId}</artifactId>
</include>
</includes>
</configuration>
<executions>
<execution>
<goals>
<goal>repackage</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<version>3.1.1</version>
<executions>
<execution>
<id>copy</id>
<phase>package</phase>
<goals>
<goal>copy-dependencies</goal>
</goals>
<configuration>
<type>jar</type>
<includeTypes>jar</includeTypes>
<includeScope>runtime</includeScope>
<outputDirectory>${project.build.directory}/libs</outputDirectory>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
<!-- </pluginManagement> -->
</build>
</project>

25
src/main/java/com/bfd/task/Application.java

@ -0,0 +1,25 @@
package com.bfd.task;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
/**
* 主入口
*
* @author jian.mao
* @date 2023年7月4日
* @description
*/
@SpringBootApplication
public class Application {
public static void main(String[] args) {
SpringApplication.run(Application.class, args);
}
}

30
src/main/java/com/bfd/task/cache/ConfigCache.java

@ -0,0 +1,30 @@
package com.bfd.task.cache;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
/**
* @author jian.mao
* @date 2022年11月11日
* @description 静态变量类
*/
public class ConfigCache {
/**启动条件**/
public static boolean isStart = true;
/**
* token缓存不操作30分钟
*/
public static ConcurrentHashMap<String,Object> tokenCache = new ConcurrentHashMap<String,Object>(16);
/**
* 任务缓存
*/
public static ConcurrentHashMap<String, Object> taskCache = new ConcurrentHashMap<String, Object>(16);
}

46
src/main/java/com/bfd/task/controller/TaskManagerController.java

@ -0,0 +1,46 @@
package com.bfd.task.controller;
import javax.annotation.Resource;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestBody;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestMethod;
import org.springframework.web.bind.annotation.ResponseBody;
import com.alibaba.fastjson.JSONObject;
import com.bfd.task.entity.ResponsePo;
import com.bfd.task.service.TaskManagerService;
/**
* 任务管理接口
* @author jian.mao
* @date 2023年9月19日
* @description
*/
@Controller
@RequestMapping("/task")
@Slf4j
public class TaskManagerController {
@Resource
TaskManagerService taskManagerService;
@PostMapping("/add")
@ResponseBody
public String addTask(@RequestBody String dataJson) {
log.info("请求参数:{}",dataJson);
ResponsePo responsePo = taskManagerService.addTask(dataJson);
return JSONObject.toJSONString(responsePo);
}
@RequestMapping(value = "/hello", method = RequestMethod.GET)
@ResponseBody
public String hello(String param, String token) {
return "123";
}
}

159
src/main/java/com/bfd/task/entity/Constants.java

@ -0,0 +1,159 @@
package com.bfd.task.entity;
/**
* 常量实体类
* @author jian.mao
* @date 2022年11月15日
* @description
*/
public class Constants {
/*************************蓝图常量key名称*********************************/
public final static String SCHEDULING = "scheduling";
public final static String TYPE = "type";
public final static String INTERVAL = "interval";
public final static String CREATED = "created";
public final static String LAST_EDIT = "last_edit";
public final static String BLUEPRINT_ID = "blueprint_id";
public final static String BLUEPRINTID = "blueprintId";
public final static String BLUEPRINT_NAME = "name";
public final static String SCENARIO = "scenario";
public final static String AUTOCOMMITTRIGGERLAST = "autoCommitTriggerLast";
public final static String FRESHVARIABLES = "freshVariables";
public final static String AUTOCOMMIT = "autoCommit";
public final static String MAXERRORS = "maxErrors";
public final static String DATALOSS = "dataloss";
public final static String POSITION = "position";
public final static String SCENES_ID = "scenes_id";
public final static String SCENESID = "scenesId";
public final static String MULTI_BRANCH = "multi_branch";
public final static String SINGLE = "single";
/**已重试次数**/
public final static String ERROR_TIME = "error_time";
public final static String PREVIOUS_RESULT = "previous_result";
/****数据id*****/
public final static String BUSINESSKEY = "businessKey";
/*************************metadata常量key名称*********************************/
public final static String OUTPUT = "output";
public final static String LABEL_COL = "label_col";
public final static String LABEL = "label";
public final static String INPUT = "input";
public final static String USER = "user";
public final static String ADMIN = "admin";
public final static String ADDRESS = "address";
public final static String DATASOURCE = "datasource";
public final static String INDEX = "index";
/*************************app常量key名称*********************************/
public final static String APPS = "apps";
public final static String TRANSFER_ID = "transfer_id";
public final static String MODULE = "module";
public final static String VERSION = "version";
public final static String METADATA = "metadata";
public final static String DATA = "data";
public final static String APP_NAME = "name";
public final static String DESCRIBE = "describe";
public final static String NEXT_APP_ID = "next_app_id";
public final static String EDGE_ID = "edge_id";
public final static String START_ID = "start_id";
public final static String END_ID = "end_id";
public final static String WAIT_CONDITION = "wait_condition";
public final static String START_TAG = "start_tag";
/*************************module类型*********************************/
public final static String FILE = "file";
public final static String OCR = "OCR";
public final static String FILTER = "Filter";
public final static String CHATGPT = "ChatGPT";
public final static String MYSQL = "mysql";
/*************************other类型*********************************/
public final static String UNDERLINE = "_";
public final static String RESULT_TOPIC = null;
public static final String EMPTY = "";
public static final String HTTP = "http";
public static final String REQUEST_ERROR_MESSAGE = "Download failed error is";
public static final String REQUEST_RESULT = "result";
public static final String REQUEST_RESULT_RESULTS = "results";
public static final String MAP_TYPE = "Map";
public static final String LIST_TYPE = "List";
public static final String STRING_TYPE = "String";
public static final String DOCUMENT_TYPE = "doc";
public static final String FILTER_ZH = "过滤器";
public static final String JSON_SELE_SYMBOL = "$.";
public static final String LEFT_BRACKETS = "[";
public static final String RIGTH_BRACKETS = "]";
public static final String TASKTYPE = "taskType";
public static final Integer USER_TYPE = 1;
public static final Integer KEYWORD_TYPE = 0;
public static final Integer DETAIL_TYPE = 2;
public static final String CID = "cid";
public static final String SITETYPE = "siteType";
public static final Integer DEFULT_SUBJECTID = 304864;
public static final Integer DEFULT_CRAWLCYCLICITYTIME = 1440;
public static final String CRAWLENDTIME = "crawlEndTime";
public static final String CRAWLSTARTTIME = "crawlStartTime";
public static final String CRAWLPAGETYPES = "crawlPageTypes";
public static final String APPID = "113ic";
public static final String APP_ID = "appId";
public final static String ID = "id";
public static final Integer DEFULT_CRAWLPERIODHOUR = 24;
public static final String CREATEUSERID_ANALYZE = "662015832180933762";
public static final String CREATEUSERID = "createUserId";
public static final String CRAWL_ADD_URL = "https://caiji.percent.cn/api/crawl/remote/task/save";
public static final String CRAWLKEYWORD = "crawlKeyword";
public static final String ATTACHTAG = "attachTag";
public static final String ATTACHTAG_VALUE = "analyze";
public static final String KEYWORD = "keyword";
public static final String SITEID = "siteId";
public static final String RESULTS = "results";
public static final String RESULT = "result";
public static final String CRAWLDATAFLAG = "crawlDataFlag";
public static final String CRAWLDATAFLAG_PREFIX = "\"crawlDataFlag\":\"keyword:";
public static final String TID = "tid";
public static final Long TIME_OUT = 1800000L;
public static final String ATTR = "attr";
public static final String HASVIDEO = "hasVideo";
public static final String CRAWL_END_MARK = "crawl_end_mark";
public static final String CRAWL_END_MESSAGE = "crawl_end_message";
public static final String CRAWL_END_MESSAGE_VALUE = "数据采集完成";
public static final String SUBJECTID = "subjectId";
public static final String SUBJECT_ID = "subject_id";
public static final String KAFKA_TOPIC = "kafka_topic";
public static final String KAFKA_ADDR = "kafka_addr";
public static final String TASK_ID = "task_id";
public static final String TASKID = "taskId";
public static final String CODE = "code";
public static final int SUCCESS_CODE = 200;
public static final String WEB_URL_SUFFIX = "/api/cda/caiji/status";
public static final String STATUS = "status";
/*****关键词任务前缀crawldataflag*******/
public static final String KEYWORD_PREFIX = "keyword:";
/*****用户任务前缀crawldataflag*******/
public static final String ACCOUNT_PREFIX = "account:";
/*****url任务前缀crawldataflag*******/
public static final String URL_PREFIX = "url:";
/************************redis*************************************/
public static final String LOCK_KEY = "myLock";
public static final long LOCK_EXPIRE_TIME = 300000;
public static final String APP_CODE = "app_code";
public static final String APPCODE = "appCode";
public static final String ISLAST = "isLast";
public static final String REDISKEY_MIDDLE = "#####";
/****************************es字段key映射*****************/
public static final String VIDEOPATH = "videoPath";
public static final String SOURCE_DATA_ID = "source_data_id";
public static final String MESSAGE = "message";
}

62
src/main/java/com/bfd/task/entity/ResponsePo.java

@ -0,0 +1,62 @@
package com.bfd.task.entity;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
import com.bfd.task.enums.ResponseCode;
/**
* @author:jinming
* @className:ResponsePo
* @version:1.0
* @description:
* @Date:2023/4/3 17:23
*/
@Data
@NoArgsConstructor
@AllArgsConstructor
public class ResponsePo {
/**
* 响应码
*/
private int code;
/**
* 正常放 返回数据 的JSON串
*/
private Object data;
/**
* 提示消息
*/
private String message;
public static ResponsePo success() {
return setStatus(ResponseCode.SUCCESS.getCode(), ResponseCode.SUCCESS.getMessage());
}
public static ResponsePo error() {
return setStatus(ResponseCode.FAILURE.getCode(), ResponseCode.FAILURE.getMessage());
}
public static ResponsePo setStatus(int code, String message) {
ResponsePo resultBean = new ResponsePo();
resultBean.code = code;
resultBean.message = message;
return resultBean;
}
public ResponsePo(int code, String message) {
this.code = code;
this.message = message;
this.data = data;
}
public ResponsePo(ResponseCode responseCode){
this.code = responseCode.getCode();
this.message = responseCode.getMessage();
this.data = data;
}
}

33
src/main/java/com/bfd/task/enums/ResponseCode.java

@ -0,0 +1,33 @@
package com.bfd.task.enums;
/**
* @author:jinming
* @className:ResponseCodeEnum
* @version:1.0
* @description:响应结果码枚举类
* @Date:2023/2/28 11:40
*/
public enum ResponseCode {
//返回结果码枚举类
SUCCESS(200, "操作成功"),
FAILURE(400, "参数错误"),
FAILCIRCULATION(601, "数据消费错误"),
FAILADDTASK(1001,"任务下发失败"),
INTERNAL_SERVER_ERROR(500, "服务器内部错误"),
TYPE_NOT_SUPPORT(601,"文件类型不支持");
private int code;
private String message;
ResponseCode(int code, String message) {
this.code = code;
this.message = message;
}
public int getCode() {
return code;
}
public String getMessage() {
return message;
}
}

116
src/main/java/com/bfd/task/handler/MainHander.java

@ -0,0 +1,116 @@
package com.bfd.task.handler;
import java.io.File;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.io.FileUtils;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.boot.ApplicationArguments;
import org.springframework.boot.ApplicationRunner;
import org.springframework.core.annotation.Order;
import org.springframework.stereotype.Component;
import com.alibaba.fastjson.JSONObject;
import com.bfd.task.cache.ConfigCache;
import com.bfd.task.process.CacheMonitorProcess;
import com.bfd.task.process.DataConsumptionProcess;
import com.bfd.task.process.SendResultProcess;
import com.bfd.task.utils.FileUtil;
/**
* 执行入口
* @author jian.mao
* @date 2023年9月15日
* @description
*/
@Component
@Order(value = 1)
@Slf4j
public class MainHander implements ApplicationRunner{
@Value("${task.task-queue-path}")
private String taskPath;
@Value("${task.token-queue-path}")
private String tokenPath;
@Autowired
SendResultProcess sendResultProcess;
@Autowired
CacheMonitorProcess cacheMonitorProcess;
@Autowired
DataConsumptionProcess dataConsumptionProcess;
@Override
public void run(ApplicationArguments args) throws Exception {
//启动加载缓存任务
readTask(taskPath,ConfigCache.taskCache);
readTask(tokenPath,ConfigCache.tokenCache);
//启动任务流转线程
log.info("sendResultProcess is start");
new Thread(sendResultProcess).start();
//开启数据消费线程
log.info("dataConsumptionProcess is start");
new Thread(dataConsumptionProcess).start();
//开启缓存监控线程
log.info("cacheMonitorProcess is start");
new Thread(cacheMonitorProcess).start();
//停止处理
waitDown();
}
@SuppressWarnings("unchecked")
public static void readTask(String path,Map<String, Object> map){
File file = new File(path);
if(file.exists()){
List<String> tasks = null;
try {
tasks = FileUtils.readLines(file,"UTF-8");
} catch (IOException e) {
e.printStackTrace();
}
for (String task : tasks) {
Map<String, Object> loadCacheMap = JSONObject.parseObject(task);
for (Entry<String, Object> entry : loadCacheMap.entrySet()) {
map.put(entry.getKey(), entry.getValue());
}
}
file.delete();
}
}
/**
* 结束触发钩子
*/
public void waitDown() {
Runtime.getRuntime().addShutdownHook(new Thread() {
@Override
public void run() {
// 停止线程
ConfigCache.isStart = false;
log.info("stop-------");
writeTsskToFile();
}
});
}
/**
* 任务持久化到硬盘
*/
public void writeTsskToFile(){
if(!ConfigCache.taskCache.isEmpty()){
FileUtil.writeFile(taskPath, JSONObject.toJSONString(ConfigCache.taskCache));
}
if(!ConfigCache.tokenCache.isEmpty()){
FileUtil.writeFile(tokenPath, JSONObject.toJSONString(ConfigCache.tokenCache));
}
log.info("cache write is file end");
}
}

54
src/main/java/com/bfd/task/model/AppsEntity.java

@ -0,0 +1,54 @@
package com.bfd.task.model;
import java.sql.Timestamp;
import java.time.LocalDateTime;
import lombok.Data;
/**
* app节点实体类
*
* @author jian.mao
* @date 2023年7月5日
* @description
*/
@Data
public class AppsEntity {
/*** 蓝图id ***/
private Integer blueprintId;
/*** 流转id ***/
private Integer transferId;
/*** appid ***/
private Integer appId;
/*** 模型id ***/
private Integer moduleId;
/*** 节点操作名称 ***/
private String name;
/*** 节点描述 ***/
private String describe;
/*** 等待节点 ***/
private String waitCondition;
/*** 是否初始节点 ***/
private Integer startTag;
/*** 坐标 ***/
private String position;
/*** 版本 ***/
private Integer version;
/*** 输出 ***/
private String output;
/*** 输入 ***/
private String input;
/*** 用户输入 ***/
private String user;
/*** 管理员输入 ***/
private String admin;
/*** 模型服务地址信息 ***/
private String address;
/*** 数据结果 ***/
private String data;
/*** 创建时间 ***/
private LocalDateTime created;
/*** 最后修改时间 ***/
private Timestamp lastEdit;
}

44
src/main/java/com/bfd/task/model/BlueprintEntity.java

@ -0,0 +1,44 @@
package com.bfd.task.model;
import java.sql.Timestamp;
import java.time.LocalDateTime;
import lombok.Data;
/**
* 蓝图实体类
* @author jian.mao
* @date 2023年7月5日
* @description
*/
@Data
public class BlueprintEntity {
/***蓝图id***/
private Integer blueprintId;
/***场景id***/
private Integer scenesId;
/***蓝图名称***/
private String name;
/***调度类型***/
private String schedulingType;
/***周期***/
private Integer schedulingInterval;
/***蓝图是否最后提交***/
private Integer autoCommitTriggerLast;
/***蓝图状态,成功\失败***/
private Integer dataloss;
/***重试次数***/
private Integer maxErrors;
/***自动提交***/
private Integer autoCommit;
/*** ***/
private Integer freshVariables;
/***创建时间***/
private LocalDateTime created;
/***最后修改时间***/
private Timestamp lastEdit;
/*** ***/
private String user;
/***单、多分支标识***/
public Integer multiBranch;
}

28
src/main/java/com/bfd/task/model/ModulesEntity.java

@ -0,0 +1,28 @@
package com.bfd.task.model;
import java.sql.Timestamp;
import java.time.LocalDateTime;
import lombok.Data;
/**
* 模块实体类
*
* @author jian.mao
* @date 2023年7月5日
* @description
*/
@Data
public class ModulesEntity {
/*** 模型id ***/
private Integer moduleId;
/*** 模型名称 ***/
private String module;
/*** 模型logo地址 ***/
private String logo;
/*** 创建时间 ***/
private LocalDateTime created;
/*** 最后修改时间 ***/
private Timestamp lastEdit;
}

32
src/main/java/com/bfd/task/model/RelationsEntity.java

@ -0,0 +1,32 @@
package com.bfd.task.model;
import java.sql.Timestamp;
import java.time.LocalDateTime;
import lombok.Data;
/**
* 关系实体类
*
* @author jian.mao
* @date 2023年7月5日
* @description
*/
@Data
public class RelationsEntity {
/*** 关系id ***/
private Integer relationId;
/*** 蓝图id ***/
private Integer blueprintId;
/*** 开始节点id ***/
private Integer startId;
/*** 结束节点id ***/
private Integer endId;
/*** 创建时间 ***/
private LocalDateTime created;
/*** 最后修改时间 ***/
private Timestamp lastEdit;
/******/
private String user;
}

61
src/main/java/com/bfd/task/model/TaskParam.java

@ -0,0 +1,61 @@
package com.bfd.task.model;
import java.util.List;
import lombok.Data;
/**
* 采集任务添加 参数
* @author jian.mao
* @date 2023年9月19日
* @description
*/
@Data
public class TaskParam {
/**唯一标识**/
private String id ;
/**网站siteid**/
private Integer siteId ;
/**站点类型**/
private Integer siteType ;
/**采集频率**/
private Integer crawlCyclicityTime ;
/**采集结束时间**/
private Long crawlEndTime ;
/**采集模式**/
private Integer crawlMode ;
/**页面类型**/
private List<Integer> crawlPageTypes ;
/**增量时间范围**/
private Integer crawlPeriodHour ;
/**采集开始时间**/
private Long crawlStartTime ;
/**创建者id**/
private String createUserId ;
/**翻页上限**/
private Integer maxPageNum ;
/**批量标识**/
private Integer isBatch ;
/**专题id**/
private Integer subjectId ;
/**任务类型**/
private Integer taskType ;
/**关键词**/
private String crawlKeyword ;
/**标签**/
private String attachTag;
/**网站cid**/
private String cid ;
/**时间戳**/
private Long time ;
/**appid**/
private String appId ;
/**签名**/
private String sign ;
/**场景id**/
private Integer scenesId;
/**项目id**/
private Integer blueprintId;
}

19
src/main/java/com/bfd/task/model/WebSite.java

@ -0,0 +1,19 @@
package com.bfd.task.model;
import lombok.Data;
/**
* 网站表
* @author jian.mao
* @date 2023年9月19日
* @description
*/
@Data
public class WebSite {
private Integer id;
private Integer siteId;
private String cid;
private Integer status;
}

81
src/main/java/com/bfd/task/process/CacheMonitorProcess.java

@ -0,0 +1,81 @@
package com.bfd.task.process;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
import com.alibaba.fastjson.JSONObject;
import com.bfd.task.cache.ConfigCache;
import com.bfd.task.entity.Constants;
import com.bfd.task.utils.DownLoadUtil;
import com.bfd.task.utils.QueueUtil;
/**
* 监控缓存变量动态
* @author jian.mao
* @date 2024年3月14日
* @description
*/
@Slf4j
@Component
public class CacheMonitorProcess implements Runnable{
@Value("${manageweb.host}")
private String webUrlProfix;
@Override
public void run() {
// TODO Auto-generated method stub
while(ConfigCache.isStart){
try {
for (Entry<String, Object> entry:ConfigCache.tokenCache.entrySet()) {
String key = entry.getKey();
long value = (long)entry.getValue();
long currentTimeMillis = System.currentTimeMillis();
if(currentTimeMillis >= value){
if(!ConfigCache.taskCache.containsKey(key)){
log.error("监控任务中不存在:{}",key);
continue;
}
Map<String, Object> task = (Map<String, Object>) ConfigCache.taskCache.get(key);
log.info("释放任务:{}",JSONObject.toJSONString(task));
//发送结束标识
Map<String, Object> endResults = new HashMap<String, Object>(16);
// 结果集组装
Map<String, Object> result = new HashMap<String, Object>(16);
endResults.put(Constants.TASKID, key);
endResults.put(Constants.CRAWL_END_MARK, "ok");
endResults.put(Constants.CRAWL_END_MESSAGE, Constants.CRAWL_END_MESSAGE_VALUE);
endResults.put(Constants.ISLAST,1);
result.put(Constants.RESULTS, JSONObject.toJSONString(endResults));
result.put(Constants.ISLAST, true);
task.put(Constants.RESULT, result);
//发送采集结束标识先注掉后面在考虑
QueueUtil.sendQueue.put(JSONObject.toJSONString(task));
//反馈给前端系统
Map<String, Object> param = new HashMap<String, Object>(16);
param.put(Constants.APPCODE, task.get(Constants.APP_CODE));
param.put(Constants.STATUS, 3);
log.info("结束触发参数:{}",JSONObject.toJSONString(param));
String html = DownLoadUtil.doPost(webUrlProfix+Constants.WEB_URL_SUFFIX, JSONObject.toJSONString(param));
log.info("通知管理系统采集结束,管理系统返回结果:{}",html);
//清空缓存
ConfigCache.tokenCache.remove(key);
ConfigCache.taskCache.remove(key);
log.info("taskId:{},任务已过期。",key);
}
}
//10秒监控
Thread.sleep(10*1000);
} catch (Exception e) {
log.error("未知异常:",e);
}
}
}
}

145
src/main/java/com/bfd/task/process/DataConsumptionProcess.java

@ -0,0 +1,145 @@
package com.bfd.task.process;
import org.apache.kafka.clients.consumer.Consumer;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
import com.alibaba.fastjson.JSONObject;
import com.bfd.task.cache.ConfigCache;
import com.bfd.task.entity.Constants;
import com.bfd.task.utils.QueueUtil;
import java.time.Duration;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import lombok.extern.slf4j.Slf4j;
/**
* @author jian.mao
* @date 2023年9月21日
* @description
*/
@Slf4j
@Component
public class DataConsumptionProcess implements Runnable {
@Value("${spring.kafka.bootstrap-servers}")
private String bootstrapServers;
@Value("${spring.kafka.consumer.group-id}")
private String groupId;
@Value("${customize-kafka.consumer.topic}")
private String topic;
@SuppressWarnings("unchecked")
@Override
public void run() {
// 创建 Kafka 消费者配置
Map<String, Object> consumerProps = new HashMap<String, Object>(16);
consumerProps.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG,
bootstrapServers);
consumerProps.put(ConsumerConfig.GROUP_ID_CONFIG, groupId);
//跟读
consumerProps.put("auto.offset.reset", "latest");
consumerProps.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
consumerProps.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
consumerProps.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
consumerProps.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
Consumer<String, String> consumer = new KafkaConsumer<>(consumerProps);
try {
// 订阅主题
consumer.subscribe(Collections.singletonList(topic));
// 消费消息
while (true) {
// 没超时的话正常消费数据
ConsumerRecords<String, String> records = consumer.poll(Duration.ofMillis(1000));
for (ConsumerRecord<String, String> record : records) {
Map<String, Object> resultData = new HashMap<String, Object>(32);
try {
// 处理消息这里可以根据需要进行业务处理
Map<String, Object> resultEs = JSONObject.parseObject(record.value());
log.info("Received message: "+ record.value());
if(!resultEs.containsKey(Constants.TASKID)){
log.warn("数据体缺少taskId");
continue;
}
String taskId = resultEs.get(Constants.TASKID).toString();
if(!ConfigCache.taskCache.containsKey(taskId)){
log.warn("不属于有知任务产出的数据,taskId:{}",taskId);
continue;
}
Map<String, Object> task = (Map<String, Object>) ConfigCache.taskCache.get(taskId);
String token = (String) task.get(Constants.BUSINESSKEY);
Map<String, Object> input = (Map<String, Object>) task.get(Constants.INPUT);
Integer hasVideo = (Integer) input.get(Constants.HASVIDEO);
if(resultEs.get(Constants.HASVIDEO).equals(hasVideo)){
Map<String,Object> crawlResults = JSONObject.parseObject(record.value());
//结果加工 例如videopath[]转换成String
bulidResult(crawlResults);
// 结果集组装
Map<String, Object> result = new HashMap<String, Object>(16);
//结果内容
Map<String,Object> data = new HashMap<String, Object>(16);
//获取输出字段
Map<String,Object> output = (Map<String, Object>) task.get(Constants.OUTPUT);
for (String key: output.keySet()) {
if (crawlResults.containsKey(key)){
data.put(key,crawlResults.get(key));
}
}
result.put(Constants.RESULTS, JSONObject.toJSONString(data));
for (String key : task.keySet()) {
resultData.put(key, task.get(key));
}
result.put(Constants.STATUS, 1);
result.put(Constants.MESSAGE, "成功");
resultData.put(Constants.RESULT, result);
QueueUtil.sendQueue.put(JSONObject.toJSONString(resultData));
//taskId赋值
if(resultEs.containsKey(Constants.TASKID)){
if(taskId == null){
taskId = resultEs.get(Constants.TASKID).toString();
}
}
}else{
log.info("不符合需求数据----");
}
} catch (Exception e) {
// TODO: handle exception
log.error("数据格式异常:{}",record.value());
//结果集
Map<String, Object> result = new HashMap<String, Object>(16);
//遍历入库返回结果拼接响应内容
result.put(Constants.RESULTS, e.getMessage());
result.put(Constants.MESSAGE, "异常");
result.put(Constants.STATUS, 2);
resultData.put(Constants.RESULT, result);
//发送kafka
QueueUtil.sendQueue.put(JSONObject.toJSONString(resultData));
}
}
}
} catch (Exception e) {
log.error("kafka消费异常\n", e);
consumer.close();
}
}
/**
* 结果加工
* @param result
*/
private void bulidResult(Map<String, Object> result){
//视频gofast地址加工
List<String> videoPath = (List<String>) result.get(Constants.VIDEOPATH);
if(videoPath != null && videoPath.size() > 0){
String videoUrl = videoPath.get(0);
result.put(Constants.VIDEOPATH, videoUrl);
}
}
}

228
src/main/java/com/bfd/task/process/KafkaConsumerProcess.java

@ -0,0 +1,228 @@
package com.bfd.task.process;
import org.apache.kafka.clients.consumer.Consumer;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.kafka.support.serializer.ErrorHandlingDeserializer;
import org.springframework.kafka.support.serializer.JsonDeserializer;
import org.springframework.stereotype.Component;
import org.springframework.stereotype.Service;
import com.alibaba.fastjson.JSONObject;
import com.bfd.task.cache.ConfigCache;
import com.bfd.task.entity.Constants;
import com.bfd.task.utils.DateUtil;
import com.bfd.task.utils.DownLoadUtil;
import com.bfd.task.utils.QueueUtil;
import java.time.Duration;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import lombok.extern.slf4j.Slf4j;
/**
* @author jian.mao
* @date 2023年9月21日
* @description
*/
@Slf4j
public class KafkaConsumerProcess implements Runnable {
private String bootstrapServers;
private String groupId;
private String topic;
private Map<String, Object> task;
private String webUrlProfix;
public String getWebUrlProfix() {
return webUrlProfix;
}
public void setWebUrlProfix(String webUrlProfix) {
this.webUrlProfix = webUrlProfix;
}
public String getBootstrapServers() {
return bootstrapServers;
}
public void setBootstrapServers(String bootstrapServers) {
this.bootstrapServers = bootstrapServers;
}
public String getGroupId() {
return groupId;
}
public void setGroupId(String groupId) {
this.groupId = groupId;
}
public String getTopic() {
return topic;
}
public void setTopic(String topic) {
this.topic = topic;
}
public Map<String, Object> getTask() {
return task;
}
public void setTask(Map<String, Object> task) {
this.task = task;
}
public KafkaConsumerProcess(String bootstrapServers, String groupId,
String topic, Map<String, Object> task, String webUrlProfix) {
this.bootstrapServers = bootstrapServers;
this.groupId = groupId;
this.topic = topic;
this.task = task;
this.webUrlProfix = webUrlProfix;
}
public KafkaConsumerProcess() {
}
@Override
public void run() {
// 创建 Kafka 消费者配置
Map<String, Object> consumerProps = new HashMap<String, Object>(16);
consumerProps.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG,
bootstrapServers);
consumerProps.put(ConsumerConfig.GROUP_ID_CONFIG, groupId);
//跟读
consumerProps.put("auto.offset.reset", "latest");
consumerProps.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
consumerProps.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
consumerProps.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
consumerProps.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
Consumer<String, String> consumer = new KafkaConsumer<>(consumerProps);
String taskId = null;
Map<String, Object> endResults = null;
try {
String token = (String) task.get(Constants.BUSINESSKEY);
Map<String, Object> input = (Map<String, Object>) task.get(Constants.INPUT);
Integer hasVideo = (Integer) input.get(Constants.HASVIDEO);
log.info("任务id;{},消费地址:{},消费主题:{},消费组:{}",token,bootstrapServers,topic,groupId);
// 订阅主题
consumer.subscribe(Collections.singletonList(topic));
// 消费消息
while (true) {
// 判断是否过期
Long time = (long)ConfigCache.tokenCache.get(token);
if(time == null){
time = System.currentTimeMillis();
ConfigCache.tokenCache.put(token, time);
}
// 没超时的话正常消费数据
if (System.currentTimeMillis() - time < Constants.TIME_OUT) {
log.info("此任务正常消费:{},消费组id:{},消费时间:{},当前时间:{}",token,groupId,DateUtil.getDateTime(time),DateUtil.getDateTime(System.currentTimeMillis()));
ConsumerRecords<String, String> records = consumer.poll(Duration.ofMillis(1000));
for (ConsumerRecord<String, String> record : records) {
Map<String, Object> resultData = new HashMap<String, Object>(32);
for (String key : task.keySet()) {
resultData.put(key, task.get(key));
}
// 处理消息这里可以根据需要进行业务处理
Map<String, Object> resultEs = JSONObject.parseObject(record.value());
log.info("Received message: "+ record.value());
if(((String)resultEs.get(Constants.ATTR)).contains(token) && resultEs.get(Constants.HASVIDEO).equals(hasVideo)){
endResults = resultEs;
Map<String,Object> crawlResults = JSONObject.parseObject(record.value());
//结果加工 例如videopath[]转换成String
bulidResult(crawlResults);
// 结果集组装
Map<String, Object> result = new HashMap<String, Object>(16);
//结果内容
Map<String,Object> data = new HashMap<String, Object>(16);
//获取输出字段
Map<String,Object> output = (Map<String, Object>) task.get(Constants.OUTPUT);
for (String key: output.keySet()) {
if (crawlResults.containsKey(key)){
data.put(key,crawlResults.get(key));
}
}
result.put(Constants.RESULTS, JSONObject.toJSONString(data));
resultData.put(Constants.RESULT, result);
QueueUtil.sendQueue.put(JSONObject.toJSONString(resultData));
ConfigCache.tokenCache.put(token,System.currentTimeMillis());
//taskId赋值
if(resultEs.containsKey(Constants.TASKID)){
if(taskId == null){
taskId = resultEs.get(Constants.TASKID).toString();
}
}
}else{
log.info("不符合需求数据----");
}
}
}else{
log.info("kafka消费者会话过期,已进行销毁----");
ConfigCache.tokenCache.remove(token);
//发送结束标识
Map<String, Object> resultData = new HashMap<String, Object>(32);
for (String key : task.keySet()) {
resultData.put(key, task.get(key));
}
// 结果集组装
Map<String, Object> result = new HashMap<String, Object>(16);
if(taskId == null){
log.info("此任务:{},没有采集到数据",token);
}else{
endResults = new HashMap<String, Object>(16);
endResults.put(Constants.TASKID, taskId);
endResults.put(Constants.CRAWL_END_MARK, "ok");
endResults.put(Constants.CRAWL_END_MESSAGE, Constants.CRAWL_END_MESSAGE_VALUE);
result.put(Constants.RESULTS, JSONObject.toJSONString(endResults));
result.put(Constants.ISLAST, true);
resultData.put(Constants.RESULT, result);
//发送采集结束标识先注掉后面在考虑
QueueUtil.sendQueue.put(JSONObject.toJSONString(resultData));
}
consumer.close();
//反馈给前端系统
Map<String, Object> param = new HashMap<String, Object>(16);
// param.put(Constants.BUSINESSKEY, resultData.get(Constants.BUSINESSKEY));
// param.put(Constants.APP_ID, resultData.get(Constants.APP_ID));
param.put(Constants.APPCODE, resultData.get(Constants.APP_CODE));
// param.put(Constants.SCENESID, resultData.get(Constants.SCENES_ID));
param.put(Constants.STATUS, 3);
log.info("结束触发参数:{}",JSONObject.toJSONString(param));
String html = DownLoadUtil.doPost(webUrlProfix+Constants.WEB_URL_SUFFIX, JSONObject.toJSONString(param));
log.info("通知管理系统采集结束,管理系统返回结果:{}",html);
break;
}
}
} catch (Exception e) {
log.error("kafka消费异常\n", e);
consumer.close();
}
}
/**
* 结果加工
* @param result
*/
private void bulidResult(Map<String, Object> result){
//视频gofast地址加工
List<String> videoPath = (List<String>) result.get(Constants.VIDEOPATH);
if(videoPath != null && videoPath.size() > 0){
String videoUrl = videoPath.get(0);
result.put(Constants.VIDEOPATH, videoUrl);
}
}
}

41
src/main/java/com/bfd/task/process/SendResultProcess.java

@ -0,0 +1,41 @@
package com.bfd.task.process;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
import com.bfd.task.utils.QueueUtil;
import com.bfd.task.utils.SpringBootKafka;
/**
* @author jian.mao
* @date 2023年9月21日
* @description
*/
@Component
@Slf4j
public class SendResultProcess implements Runnable{
@Autowired
private SpringBootKafka springBootKafka;
@Value("${customize-kafka.producer.topic}")
private String topic;
@Override
public void run() {
// TODO Auto-generated method stub
while (true) {
String task;
try {
task = QueueUtil.sendQueue.take();
//写入kafka
springBootKafka.send(topic,task);
log.info("数据流转至下游-------");
} catch (InterruptedException e) {
log.error("获取发送数据异常",e);
}
}
}
}

19
src/main/java/com/bfd/task/service/TaskManagerService.java

@ -0,0 +1,19 @@
package com.bfd.task.service;
import com.bfd.task.entity.ResponsePo;
/**
* 任务管理逻辑层接口
* @author jian.mao
* @date 2023年9月19日
* @description
*/
public interface TaskManagerService {
/**
* 任务添加接口
* @param param
* @return
*/
public ResponsePo addTask(String param);
}

284
src/main/java/com/bfd/task/service/impl/TaskManagerServiceImpl.java

@ -0,0 +1,284 @@
package com.bfd.task.service.impl;
import java.time.Instant;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.data.redis.core.StringRedisTemplate;
import org.springframework.stereotype.Service;
import com.alibaba.fastjson.JSONObject;
import com.bfd.task.cache.ConfigCache;
import com.bfd.task.entity.Constants;
import com.bfd.task.entity.ResponsePo;
import com.bfd.task.enums.ResponseCode;
import com.bfd.task.model.TaskParam;
import com.bfd.task.process.KafkaConsumerProcess;
import com.bfd.task.service.TaskManagerService;
import com.bfd.task.utils.DownLoadUtil;
import com.bfd.task.utils.OtherUtils;
/**
* 逻辑层实现类
* @author jian.mao
* @date 2023年9月19日
* @description
*/
@Service
@Slf4j
public class TaskManagerServiceImpl implements TaskManagerService {
@Value("${spring.kafka.bootstrap-servers}")
private String bootstrapServers;
@Value("${spring.kafka.consumer.group-id}")
private String groupId;
@Value("${customize-kafka.consumer.topic}")
private String topic;
@Value("${manageweb.host}")
private String webUrlProfix;
@Autowired
private StringRedisTemplate stringRedisTemplate;
@Override
public ResponsePo addTask(String param) {
// TODO Auto-generated method stub
ResponsePo responsePo = ResponsePo.success();
Map<String, Object> paramMap = null;
//读取队列
try {
paramMap = JSONObject.parseObject(param);
//任务下发
addTask(responsePo,paramMap);
} catch (Exception e) {
log.error("请求格式发生异常\n",e);
responsePo.setCode(ResponseCode.FAILURE.getCode());
responsePo.setMessage(ResponseCode.FAILURE.getMessage());
}
if(responsePo.getCode() != Constants.SUCCESS_CODE){
return responsePo;
}
/*try {
//流程流转
sendResult(paramMap);
} catch (Exception e) {
// TODO: handle exception
log.error("数据消费启动异常\n",e);
responsePo.setCode(ResponseCode.FAILCIRCULATION.getCode());
responsePo.setMessage(ResponseCode.FAILCIRCULATION.getMessage());
}*/
return responsePo;
}
/**
* 查询流程流转
* @param paramMap
*/
private void sendResult(Map<String, Object> paramMap){
KafkaConsumerProcess kafkaConsumerProcess = new KafkaConsumerProcess();
kafkaConsumerProcess.setBootstrapServers(bootstrapServers);
//输入配置
Map<String,Object> input = (Map<String, Object>) paramMap.get(Constants.INPUT);
//网站cid
String cid = (String) input.get(Constants.CID);
//groupid随机生成避免多个线程数据偷取
String uuGroupId = UUID.randomUUID().toString().replace("-", "");
kafkaConsumerProcess.setGroupId(uuGroupId);
kafkaConsumerProcess.setTask(paramMap);
kafkaConsumerProcess.setWebUrlProfix(webUrlProfix);
kafkaConsumerProcess.setTopic(cid);
new Thread(kafkaConsumerProcess).start();
}
/**
* 任务下发到采集平台
* @param responsePo
* @param params
*/
private void addTask(ResponsePo responsePo,Map<String, Object> params){
try {
Map<String,Object> input = (Map<String, Object>) params.get(Constants.INPUT);
TaskParam taskParam = new TaskParam();
//唯一标识
String id = UUID.randomUUID().toString();
taskParam.setId(id);
//网站siteid
Integer siteId = (Integer) input.get(Constants.SITEID);
taskParam.setSiteId(siteId);
//站点类型
Integer siteType = (Integer) input.get(Constants.SITETYPE);
taskParam.setSiteType(siteType);
//采集频率
Integer crawlCyclicityTime = Constants.DEFULT_CRAWLCYCLICITYTIME;
taskParam.setCrawlCyclicityTime(crawlCyclicityTime);
//采集结束时间
Long crawlEndTime = (Long) input.get(Constants.CRAWLENDTIME);
taskParam.setCrawlEndTime(crawlEndTime);
//采集模式
Integer crawlMode = 1;
taskParam.setCrawlMode(crawlMode);
//页面类型
List<Integer> crawlPageTypes = (List<Integer>) input.get(Constants.CRAWLPAGETYPES);
taskParam.setCrawlPageTypes(crawlPageTypes);
//增量时间范围
Integer crawlPeriodHour = Constants.DEFULT_CRAWLPERIODHOUR;
taskParam.setCrawlPeriodHour(crawlPeriodHour);
//采集开始时间
Long crawlStartTime = (Long) input.get(Constants.CRAWLSTARTTIME);
taskParam.setCrawlStartTime(crawlStartTime);
//创建者id
String createUserId = params.get(Constants.CREATEUSERID).toString();
taskParam.setCreateUserId(createUserId);
//批量
Integer isBatch = 0;
taskParam.setIsBatch(isBatch);
//翻页上限
Integer maxPageNum = 1;
taskParam.setMaxPageNum(maxPageNum);
//专题id
Integer subjectId = Integer.valueOf(params.get(Constants.SOURCE_DATA_ID).toString());
taskParam.setSubjectId(subjectId);
//任务类型
Integer taskType = (Integer) input.get(Constants.TASKTYPE);
taskParam.setTaskType(taskType);
//关键词
String keyWords = ((List<String>)input.get(Constants.CRAWLKEYWORD)).stream().collect(Collectors.joining(" "));
taskParam.setCrawlKeyword(keyWords);
//标签
taskParam.setAttachTag(params.get(Constants.BUSINESSKEY).toString());
//网站cid
String cid = (String) input.get(Constants.CID);
taskParam.setCid(cid);
Long time = new Date().getTime();
taskParam.setTime(time);
String appId = Constants.APPID;
taskParam.setAppId(appId);
//签名
String sign = OtherUtils.getMd5(appId + createUserId + time);
taskParam.setSign(sign);
System.out.println(JSONObject.toJSONString(taskParam));
String html = DownLoadUtil.doPost(Constants.CRAWL_ADD_URL, JSONObject.toJSONString(taskParam));
if(html.contains(Constants.REQUEST_ERROR_MESSAGE)){
log.error("任务下发失败,{}",html);
responsePo.setCode(ResponseCode.FAILADDTASK.getCode());
responsePo.setMessage(ResponseCode.FAILADDTASK.getMessage());
}else{
log.info("任务下发结果:{}",html);
Map<String, Object> result = JSONObject.parseObject(html);
if(result.get(Constants.CODE).equals(Constants.SUCCESS_CODE)){
log.info("任务下发成功:{}",html);
String crawlDataFlag = null;
//构造crawldataflag
if(taskType == Constants.KEYWORD_TYPE){
crawlDataFlag = Constants.KEYWORD_PREFIX + keyWords;
}else if(taskType == Constants.USER_TYPE){
crawlDataFlag = Constants.ACCOUNT_PREFIX + keyWords;
}else {
crawlDataFlag = Constants.URL_PREFIX + keyWords;
}
//下发成功写入缓存数据获取流程用
String redisCacheKey = (cid + Constants.REDISKEY_MIDDLE + crawlDataFlag).toLowerCase();
//判断key是否存在
boolean isKey = isKeyExists(redisCacheKey);
//缓存value存储格式 外层
List<Map> cacheList = new ArrayList<Map>();
if(isKey){
String value = getValue(redisCacheKey);
cacheList = JSONObject.parseArray(value, Map.class);
}
//存储对象
Map<String, Object> cacheMap = new HashMap<String, Object>(16);
cacheMap.put(Constants.SUBJECT_ID, subjectId.toString());
cacheMap.put(Constants.KAFKA_ADDR, bootstrapServers);
cacheMap.put(Constants.KAFKA_TOPIC, topic);
cacheMap.put(Constants.TASK_ID, result.get(Constants.DATA).toString());
cacheMap.put(Constants.APP_ID, Constants.APPID);
cacheList.add(cacheMap);
//计算缓存有效时常
long currentTime = System.currentTimeMillis();
//默认时效性0.5天
long validTime = 43200L;
//缓存截止时间
long cacheEndTime = getTimestampAfterHours(12);
if(currentTime < crawlEndTime){
validTime = (crawlEndTime -currentTime)/1000;
cacheEndTime = crawlEndTime;
}
//缓存添加
setWithExpiration(redisCacheKey,JSONObject.toJSONString(cacheList),validTime);
//任务添加到内存缓存以供数据发送组装使用
ConfigCache.taskCache.put(result.get(Constants.DATA).toString(), params);
//内存缓存截止时间存储
ConfigCache.tokenCache.put(result.get(Constants.DATA).toString(),cacheEndTime);
}else{
log.error("任务下发失败,{}",html);
responsePo.setCode(ResponseCode.FAILADDTASK.getCode());
responsePo.setMessage(ResponseCode.FAILADDTASK.getMessage());
}
}
} catch (Exception e) {
// TODO: handle exception
log.error("任务下发失败\n",e);
e.printStackTrace();
responsePo.setCode(ResponseCode.FAILADDTASK.getCode());
responsePo.setMessage(ResponseCode.FAILADDTASK.getMessage());
}
}
/**
* 获取指定key下的数据
* @param key
* @return
*/
public String getValue(String key) {
return stringRedisTemplate.opsForValue().get(key);
}
/**
* redis赋值带有时效性的内容
* @param key
* @param value
* @param expirationInSeconds
*/
public void setWithExpiration(String key, String value, long expirationInSeconds) {
stringRedisTemplate.opsForValue().set(key, value);
stringRedisTemplate.expire(key, expirationInSeconds, TimeUnit.SECONDS);
}
/**
* 判断key是否存在
* @param key
* @return
*/
public boolean isKeyExists(String key) {
return stringRedisTemplate.hasKey(key);
}
/**
* 获取多少小时侯的时间戳
* @param hours
* @return
*/
public long getTimestampAfterHours(int hours) {
Instant currentTime = Instant.now();
// 增加指定小时数
Instant targetTime = currentTime.plusSeconds(hours * 60 * 60);
// 返回毫秒级时间戳
return targetTime.toEpochMilli();
}
}

177
src/main/java/com/bfd/task/utils/DateUtil.java

@ -0,0 +1,177 @@
package com.bfd.task.utils;
import java.math.BigInteger;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.util.Date;
import lombok.extern.slf4j.Slf4j;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
/**
* 日期工具类
*
* @author jian.mao
* @date 2022年11月15日
* @description
*/
@Slf4j
public class DateUtil {
/**
* @return
*/
public static String getTimeStrForNow() {
SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddHH");
return sdf.format(new Date());
}
public static String getTimeStrForDay(long time) {
SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd");
return sdf.format(new Date(time * 1000));
}
public static String getTimeStrForDay() {
SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd");
return sdf.format(new Date());
}
public static String getDateTime() {
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
String time = sdf.format(new Date());
return time;
}
public static String getDateTime(Long timestap) {
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
String time = sdf.format(new Date(timestap));
return time;
}
public static String getDate(Long timestap) {
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
String time = sdf.format(new Date(timestap));
return time;
}
public static String getDateTimeForMonth() {
SimpleDateFormat sdf = new SimpleDateFormat("yyyyMM");
String time = sdf.format(new Date());
return time;
}
/**
* 休眠
*
* @param millis 毫秒
*/
public static void sleep(long millis) {
try {
Thread.sleep(millis);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
/**
* 1. @Description:时间戳转时间
* 2. @Author: ying.zhao
* 3. @Date: 2023/3/28
*/
public static String timestampToDate(String time) {
int thirteen = 13;
int ten = 10;
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
// if (time.length() == thirteen) {
if (time.length() > ten) {
return sdf.format(new Date(Long.parseLong(time)));
} else {
return sdf.format(new Date(Integer.parseInt(time) * 1000L));
}
}
public static String parseCreated(String jsonTime){
String formattedDateTime = getDateTime();
try {
// 使用fastjson解析JSON数据
JSONObject jsonObject = JSON.parseObject(jsonTime);
// 获取日期和时间的值
JSONObject dateObject = jsonObject.getJSONObject("date");
int day = dateObject.getIntValue("day");
int month = dateObject.getIntValue("month");
int year = dateObject.getIntValue("year");
JSONObject timeObject = jsonObject.getJSONObject("time");
int hour = timeObject.getIntValue("hour");
int minute = timeObject.getIntValue("minute");
int second = timeObject.getIntValue("second");
// 创建LocalDateTime对象
LocalDateTime dateTime = LocalDateTime.of(year, month, day, hour, minute, second);
// 定义日期时间格式化器
DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
// 格式化日期时间
formattedDateTime = dateTime.format(formatter);
} catch (Exception e) {
log.info("日期转换失败:{}",e);
}
return formattedDateTime;
}
/**
* 字符串转换日期
* @param format
* @param date
* @return
*/
public static Date strToDate(String format,String date){
SimpleDateFormat sdf = new SimpleDateFormat(format);
if (date == null || date.equals("")){
return new Date();
}else{
Date ru = null;
try {
ru = sdf.parse(date);
} catch (ParseException e) {
e.printStackTrace();
}
return ru;
}
}
/**
* 日期格式话
* @param format 日期格式
* @param dater 要转换的日期,默认当前时间
* @return
*/
public static String FormatDate(String format,Date date){
String fromatDate = null;
SimpleDateFormat sdf = new SimpleDateFormat(format);
if (date == null){
fromatDate = sdf.format(new Date());
}else{
fromatDate = sdf.format(date);
}
return fromatDate;
}
public static void main(String[] args) {
String time = timestampToDate("955814400000");
System.out.println(time);
}
}

906
src/main/java/com/bfd/task/utils/DownLoadUtil.java

@ -0,0 +1,906 @@
package com.bfd.task.utils;
import java.io.IOException;
import java.security.KeyManagementException;
import java.security.NoSuchAlgorithmException;
import java.security.cert.CertificateException;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import javax.net.ssl.SSLContext;
import javax.net.ssl.TrustManager;
import javax.net.ssl.X509TrustManager;
import org.apache.http.HttpEntity;
import org.apache.http.HttpHost;
import org.apache.http.HttpResponse;
import org.apache.http.NameValuePair;
import org.apache.http.StatusLine;
import org.apache.http.auth.AuthScope;
import org.apache.http.auth.UsernamePasswordCredentials;
import org.apache.http.client.AuthCache;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.CredentialsProvider;
import org.apache.http.client.HttpClient;
import org.apache.http.client.HttpRequestRetryHandler;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.protocol.HttpClientContext;
import org.apache.http.config.Registry;
import org.apache.http.config.RegistryBuilder;
import org.apache.http.config.SocketConfig;
import org.apache.http.conn.socket.ConnectionSocketFactory;
import org.apache.http.conn.socket.LayeredConnectionSocketFactory;
import org.apache.http.conn.socket.PlainConnectionSocketFactory;
import org.apache.http.conn.ssl.SSLConnectionSocketFactory;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.auth.BasicScheme;
import org.apache.http.impl.client.BasicAuthCache;
import org.apache.http.impl.client.BasicCredentialsProvider;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.impl.client.LaxRedirectStrategy;
import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.util.EntityUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.bfd.task.entity.Constants;
/**
* 下载工具类
* @author jian.mao
* @date 2023年9月19日
* @description
*/
public class DownLoadUtil {
private static String ua = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36";
private final static Logger log = LoggerFactory.getLogger(DownLoadUtil.class);
/** 代理服务器(产品官网 www.16yun.cn) **/
final static String PROXYHOST = "u270.40.tp.16yun.cn";
final static Integer PROXYPORT = 6448;
/** 代理验证信息 **/
final static String PROXYUSER = "16HFBVJC";
final static String PROXYPASS = "897944";
private static PoolingHttpClientConnectionManager cm = null;
private static HttpRequestRetryHandler httpRequestRetryHandler = null;
private static HttpHost proxy = null;
private static CredentialsProvider credsProvider = null;
private static RequestConfig reqConfig = null;
static {
ConnectionSocketFactory plainsf = PlainConnectionSocketFactory
.getSocketFactory();
LayeredConnectionSocketFactory sslsf = SSLConnectionSocketFactory
.getSocketFactory();
Registry registry = RegistryBuilder.create().register("http", plainsf)
.register("https", sslsf).build();
cm = new PoolingHttpClientConnectionManager(registry);
cm.setMaxTotal(20);
cm.setDefaultMaxPerRoute(5);
proxy = new HttpHost(PROXYHOST, PROXYPORT, "https");
credsProvider = new BasicCredentialsProvider();
credsProvider.setCredentials(AuthScope.ANY,
new UsernamePasswordCredentials(PROXYUSER, PROXYPASS));
reqConfig = RequestConfig.custom().setConnectionRequestTimeout(5000)
.setConnectTimeout(5000).setSocketTimeout(5000)
.setExpectContinueEnabled(false)
.setProxy(new HttpHost(PROXYHOST, PROXYPORT)).build();
}
/**
* 模拟客户端get请求
*
* @param url
* 模拟请求得url
* @param headers
* 头部信息没有可以不传
* @return
*/
@SafeVarargs
public static String proxyDoGet(String url, Map<String, Object>... headers) {
// 设置超时时间
int timeout = 30;
RequestConfig config = RequestConfig.custom()
.setConnectTimeout(timeout * 1000)
.setConnectionRequestTimeout(timeout * 1000)
.setSocketTimeout(timeout * 1000).build();
SocketConfig socketConfig = SocketConfig.custom()
.setSoKeepAlive(false)
.setSoLinger(1)
.setSoReuseAddress(true)
.setSoTimeout(timeout * 1000)
.setTcpNoDelay(true).build();
AuthCache authCache = new BasicAuthCache();
authCache.put(proxy, new BasicScheme());
HttpClientContext localContext = HttpClientContext.create();
localContext.setAuthCache(authCache);
HttpClientBuilder httpBuilder = HttpClientBuilder.create();
CloseableHttpClient httpClient = httpBuilder
.setDefaultSocketConfig(socketConfig)
.setDefaultRequestConfig(config)
.setDefaultCredentialsProvider(credsProvider).build();
HttpGet httpGet = new HttpGet(url);
httpGet.setConfig(reqConfig);
if (headers != null && headers.length > 0) {
Map<String, Object> tempHeaders = headers[0];
for (String key : tempHeaders.keySet()) {
httpGet.setHeader(key, tempHeaders.get(key).toString());
}
} else {
httpGet.setHeader("Accept",
"application/json, text/javascript, */*; q=0.01");
httpGet.setHeader("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8");
}
CloseableHttpResponse response = null;
String html = "";
int notFundCode = 404;
int successCode = 200;
try {
response = httpClient.execute(httpGet, localContext);
// 从响应模型中获取响应实体
HttpEntity responseEntity = response.getEntity();
StatusLine statusLine = response.getStatusLine();
System.out.println("响应状态为:" + response.getStatusLine());
if (statusLine.getStatusCode() == successCode) {
if (responseEntity != null) {
html = EntityUtils.toString(responseEntity, "utf-8");
System.out.println("响应内容长度为:"
+ responseEntity.getContentLength());
// 下载结果为空不正常
if (html.equals(Constants.EMPTY)) {
html = "Download failed error is:reslut is null";
}
}
} else if (statusLine.getStatusCode() == notFundCode) {
html = "<h2>页面404,正常结束请求即可</h2>";
} else {
throw new Exception("请求错误,code码为:" + statusLine.getStatusCode());
}
} catch (Exception e) {
e.printStackTrace();
html = "Download failed error is:reslut is null";
}finally{
try {
response.close();
httpClient.close();
} catch (Exception e) {
e.printStackTrace();
}
}
return html;
}
public static String httpsslProxyGet(String url, Map<String, Object>... headers) throws Exception {
//采用绕过验证的方式处理https请求
SSLContext sslcontext = createIgnoreVerifySSL();
// 设置协议http和https对应的处理socket链接工厂的对象
Registry<ConnectionSocketFactory> socketFactoryRegistry = RegistryBuilder.<ConnectionSocketFactory>create()
.register("http", PlainConnectionSocketFactory.INSTANCE)
.register("https", new SSLConnectionSocketFactory(sslcontext))
.build();
PoolingHttpClientConnectionManager connManager = new PoolingHttpClientConnectionManager(socketFactoryRegistry);
connManager.setMaxTotal(50);
connManager.setDefaultMaxPerRoute(10);
HttpClients.custom().setConnectionManager(connManager);
// 设置超时时间
int timeout = 30;
RequestConfig config = RequestConfig.custom()
.setConnectTimeout(timeout * 1000)
.setConnectionRequestTimeout(timeout * 1000)
.setSocketTimeout(timeout * 1000).build();
SocketConfig socketConfig = SocketConfig.custom()
.setSoKeepAlive(false)
.setSoLinger(1)
.setSoReuseAddress(true)
.setSoTimeout(timeout * 1000)
.setTcpNoDelay(true).build();
AuthCache authCache = new BasicAuthCache();
authCache.put(proxy, new BasicScheme());
HttpClientContext localContext = HttpClientContext.create();
localContext.setAuthCache(authCache);
HttpClientBuilder httpBuilder = HttpClientBuilder.create();
CloseableHttpClient httpClient = httpBuilder
.setConnectionManager(connManager)
.setDefaultSocketConfig(socketConfig)
.setDefaultRequestConfig(config)
.setDefaultCredentialsProvider(credsProvider).build();
HttpGet httpGet = new HttpGet(url);
httpGet.setConfig(reqConfig);
if (headers != null && headers.length > 0) {
Map<String, Object> tempHeaders = headers[0];
for (String key : tempHeaders.keySet()) {
httpGet.setHeader(key, tempHeaders.get(key).toString());
}
} else {
httpGet.setHeader("Accept",
"application/json, text/javascript, */*; q=0.01");
httpGet.setHeader("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8");
}
CloseableHttpResponse response = null;
String html = "";
int notFundCode = 404;
int successCode = 200;
try {
response = httpClient.execute(httpGet, localContext);
// 从响应模型中获取响应实体
HttpEntity responseEntity = response.getEntity();
StatusLine statusLine = response.getStatusLine();
System.out.println("响应状态为:" + response.getStatusLine());
if (statusLine.getStatusCode() == successCode) {
if (responseEntity != null) {
html = EntityUtils.toString(responseEntity, "utf-8");
System.out.println("响应内容长度为:"
+ responseEntity.getContentLength());
// 下载结果为空不正常
if (html.equals(Constants.EMPTY)) {
html = "Download failed error is:reslut is null";
}
}
} else if (statusLine.getStatusCode() == notFundCode) {
html = "<h2>页面404,正常结束请求即可</h2>";
} else {
throw new Exception("请求错误,code码为:" + statusLine.getStatusCode());
}
} catch (Exception e) {
e.printStackTrace();
html = "Download failed error is:reslut is null";
}finally{
try {
response.close();
httpClient.close();
} catch (Exception e) {
e.printStackTrace();
}
}
return html;
}
/**
* json参数方式POST提交
* @param url
* @param params
* @return
*/
public static String doPost(String url, String params){
String strResult = "";
//设置超时时间
int timeout = 30;
RequestConfig config = RequestConfig.custom().
setConnectTimeout(timeout * 1000).
setConnectionRequestTimeout(timeout * 1000).
setSocketTimeout(timeout * 1000).build();
SocketConfig socketConfig = SocketConfig.custom()
.setSoKeepAlive(false)
.setSoLinger(1)
.setSoReuseAddress(true)
.setSoTimeout(timeout * 1000)
.setTcpNoDelay(true).build();
// AuthCache authCache = new BasicAuthCache();
// authCache.put(proxy, new BasicScheme());
// HttpClientContext localContext = HttpClientContext.create();
// localContext.setAuthCache(authCache);
// 1. 获取默认的client实例
HttpClientBuilder httpBuilder = HttpClientBuilder.create();
httpBuilder.setUserAgent(ua);
HttpClient client = httpBuilder.setDefaultSocketConfig(socketConfig).setDefaultRequestConfig(config).build();
// HttpClient client = httpBuilder.setDefaultSocketConfig(socketConfig).setDefaultRequestConfig(config).setConnectionManager(cm)
// .setDefaultCredentialsProvider(credsProvider).build();
// 2. 创建httppost实例
HttpPost httpPost = new HttpPost(url);
// httpPost.setConfig(reqConfig);
httpPost.addHeader("Content-Type", "application/json;charset=utf-8");
HttpResponse resp = null;
try {
httpPost.setEntity(new StringEntity(params,"utf-8"));
resp = client.execute(httpPost);
// resp = client.execute(httpPost,localContext);
StatusLine statusLine = resp.getStatusLine();
System.out.println("响应状态为:" + resp.getStatusLine());
int notFundCode = 404;
int successCode = 200;
if(statusLine.getStatusCode() == successCode){
// 7. 获取响应entity
HttpEntity respEntity = resp.getEntity();
strResult = EntityUtils.toString(respEntity, "UTF-8");
if(strResult.equals(Constants.EMPTY)){
strResult = "Download failed error is:reslut is null";
}
}else{
throw new Exception("请求错误,code码为:"+statusLine.getStatusCode());
}
} catch (Exception e) {
e.printStackTrace();
strResult = "Download failed error is:"+ThrowMessageUtil.getErrmessage(e);
}
return strResult;
}
public static String httpPost(String url,String params) {
String html="";
html = doPost(url,params);
int i = 1;
while(true){
if(html.contains("Download failed error is:")){
log.error("DownLoadUtil------------->download is failure,url is:"+url);
DateUtil.sleep(5000);
i++;
}else{
break;
}
if(i > 5){
break;
}
html = doPost(url,params);
}
return html;
}
/**
* 绕过验证
*
* @return
* @throws NoSuchAlgorithmException
* @throws KeyManagementException
*/
public static SSLContext createIgnoreVerifySSL() throws NoSuchAlgorithmException, KeyManagementException {
SSLContext sc = SSLContext.getInstance("SSLv3");
// 实现一个X509TrustManager接口用于绕过验证不用修改里面的方法
X509TrustManager trustManager = new X509TrustManager() {
@Override
public void checkClientTrusted(
java.security.cert.X509Certificate[] paramArrayOfX509Certificate,
String paramString) throws CertificateException {
}
@Override
public void checkServerTrusted(
java.security.cert.X509Certificate[] paramArrayOfX509Certificate,
String paramString) throws CertificateException {
}
@Override
public java.security.cert.X509Certificate[] getAcceptedIssuers() {
return null;
}
};
sc.init(null, new TrustManager[] { trustManager }, null);
return sc;
}
/**
* 模拟请求
*
* @param url 资源地址
* @param map 参数列表
* @param encoding 编码
* @return
* @throws NoSuchAlgorithmException
* @throws KeyManagementException
* @throws IOException
* @throws ClientProtocolException
*/
public static String httpsslGet(String url,Map<String, Object> ... headers) {
String html="";
CloseableHttpClient client = null;
HttpEntity responseEntity = null;
CloseableHttpResponse response = null;
try {
log.debug("DownLoadUtil------------->设置下载相关信息, start....");
//采用绕过验证的方式处理https请求
SSLContext sslcontext = createIgnoreVerifySSL();
// 设置协议http和https对应的处理socket链接工厂的对象
Registry<ConnectionSocketFactory> socketFactoryRegistry = RegistryBuilder.<ConnectionSocketFactory>create()
.register("http", PlainConnectionSocketFactory.INSTANCE)
.register("https", new SSLConnectionSocketFactory(sslcontext))
.build();
PoolingHttpClientConnectionManager connManager = new PoolingHttpClientConnectionManager(socketFactoryRegistry);
connManager.setMaxTotal(50);
connManager.setDefaultMaxPerRoute(10);
HttpClients.custom().setConnectionManager(connManager);
//设置超时时间
int timeout = 30;
RequestConfig config = RequestConfig.custom().
setConnectTimeout(timeout * 1000).
setConnectionRequestTimeout(timeout * 1000).
setSocketTimeout(timeout * 1000).build();
SocketConfig socketConfig = SocketConfig.custom()
.setSoKeepAlive(false)
.setSoLinger(1)
.setSoReuseAddress(true)
.setSoTimeout(10000)
.setTcpNoDelay(true).build();
// 设置重定向策略
LaxRedirectStrategy redirectStrategy = new LaxRedirectStrategy();
//创建自定义的httpclient对象
client = HttpClients.custom().setConnectionManager(connManager).setDefaultRequestConfig(config).setRedirectStrategy(redirectStrategy).setDefaultSocketConfig(socketConfig).setUserAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36").build();
// CloseableHttpClient client = HttpClients.createDefault();
HttpGet httpGet = new HttpGet(url);
if(headers != null && headers.length > 0){
Map<String, Object> tempHeaders = headers[0];
for (String key : tempHeaders.keySet()) {
httpGet.setHeader(key,tempHeaders.get(key).toString());
}
}else{
httpGet.setHeader("Accept", "application/json, text/javascript, */*; q=0.01");
httpGet.setHeader("Accept-Language","zh-CN,zh;q=0.9,en;q=0.8");
httpGet.setHeader("User-Agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36");
}
log.debug("DownLoadUtil------------->设置下载相关信息, end....");
try {
int notFundCode = 404;
int successCode = 200;
log.debug("DownLoadUtil------------->下载执行,start....");
httpGet.setConfig(config);
response = client.execute(httpGet);
log.debug("DownLoadUtil------------->下载执行,end....");
// 从响应模型中获取响应实体
StatusLine statusLine = response.getStatusLine();
log.debug("DownLoadUtil------------->响应状态为:" + response.getStatusLine()+",下载请求没问题url:"+url+",read is start ....");
System.out.println("响应状态为:" + response.getStatusLine());
responseEntity = response.getEntity();
log.debug("DownLoadUtil------------->响应状态为:" + response.getStatusLine()+",下载请求没问题url:"+url+",read is end ....");
if(statusLine.getStatusCode() == successCode){
if (responseEntity != null) {
html=EntityUtils.toString(responseEntity,"utf-8");
System.out.println("响应内容长度为:" + responseEntity.getContentLength());
}
}else if(statusLine.getStatusCode() == notFundCode){
html = "<h2>页面404,正常结束请求即可</h2>";
}else{
throw new Exception("请求错误,code码为:"+statusLine.getStatusCode());
}
} catch (Exception e) {
e.printStackTrace();
html = "Download failed error is:"+ThrowMessageUtil.getErrmessage(e);
}
} catch (Exception e) {
e.printStackTrace();
html = "Download failed error is:"+ThrowMessageUtil.getErrmessage(e);
}finally{
try {
responseEntity.getContent().close();
response.close();
client.close();
} catch (Exception e) {
e.printStackTrace();
}
}
return html;
}
public static String httpSSLGet(String url,Map<String, Object> ... headers) {
String html="";
html = httpsslGet(url,headers);
int i = 1;
while(true){
if(html.contains("Download failed error is:")){
log.error("DownLoadUtil------------->download is failure,url is:"+url);
DateUtil.sleep(30000);
i++;
}else{
break;
}
if(i > 5){
break;
}
html = httpsslGet(url,headers);
}
return html;
}
public static String doPostFrom(String url,Map<String, Object> param,Map<String, Object> ... headers){
//设置超时时间
int timeout = 15;
RequestConfig config = RequestConfig.custom().
setConnectTimeout(timeout * 1000).
setConnectionRequestTimeout(timeout * 1000).
setSocketTimeout(timeout * 1000).build();
SocketConfig socketConfig = SocketConfig.custom()
.setSoKeepAlive(false)
.setSoLinger(1)
.setSoReuseAddress(true)
.setSoTimeout(10000)
.setTcpNoDelay(true).build();
// AuthCache authCache = new BasicAuthCache();
// authCache.put(proxy, new BasicScheme());
// HttpClientContext localContext = HttpClientContext.create();
// localContext.setAuthCache(authCache);
HttpClientBuilder httpBuilder = HttpClientBuilder.create();
httpBuilder.setUserAgent(ua);
// HttpClient httpClient = httpBuilder.setDefaultSocketConfig(socketConfig).setDefaultRequestConfig(config).setConnectionManager(cm)
// .setDefaultCredentialsProvider(credsProvider).build();
HttpClient httpClient = httpBuilder.setDefaultSocketConfig(socketConfig).setDefaultRequestConfig(config).build();
HttpPost httpPost = new HttpPost(url);
// httpPost.setConfig(reqConfig);
if(headers != null && headers.length > 0){
Map<String, Object> tempHeaders = headers[0];
for (String key : tempHeaders.keySet()) {
httpPost.setHeader(key,tempHeaders.get(key).toString());
}
}else{
httpPost.addHeader("accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9");
httpPost.addHeader("accept-Language", "zh-CN,zh;q=0.9,en;q=0.8");
httpPost.addHeader("content-type", "application/x-www-form-urlencoded");
httpPost.addHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36");
// httpPost.addHeader("Referer", "http://www.neeq.com.cn/rule/Business_rules.html");
}
// 创建请求参数
List<NameValuePair> list = new LinkedList<>();
for (String key : param.keySet()) {
BasicNameValuePair param1 = new BasicNameValuePair(key,param.get(key).toString());
list.add(param1);
}
// 使用URL实体转换工具
String html="";
try {
UrlEncodedFormEntity entityParam = new UrlEncodedFormEntity(list, "UTF-8");
httpPost.setEntity(entityParam);
HttpResponse response = httpClient.execute(httpPost);
// HttpResponse response = httpClient.execute(httpPost,localContext);
// 从响应模型中获取响应实体
HttpEntity responseEntity = response.getEntity();
StatusLine statusLine = response.getStatusLine();
System.out.println("响应状态为:" + response.getStatusLine());
int notFundCode = 404;
int successCode = 200;
if(statusLine.getStatusCode() == successCode){
if (responseEntity != null) {
html=EntityUtils.toString(responseEntity,"utf-8");
}
}else{
throw new Exception("请求错误,code码为:"+statusLine.getStatusCode());
}
} catch (Exception e) {
e.printStackTrace();
html = "Download failed error is:"+ThrowMessageUtil.getErrmessage(e);
}
return html;
}
public static String httpPostForm(String url,Map<String,Object> params,Map<String, Object> ... headers) {
String html="";
html = doPostFrom(url,params);
int i = 1;
while(true){
if(html.contains("Download failed error is:")){
log.error("DownLoadUtil------------->download is failure,url is:"+url);
DateUtil.sleep(5000);
i++;
}else{
break;
}
if(i > 5){
break;
}
html = doPostFrom(url,params,headers);
}
return html;
}
public static String dosslPost(String url,String params,Map<String, Object> ... headers) {
String html="";
CloseableHttpClient client = null;
HttpEntity responseEntity = null;
CloseableHttpResponse response = null;
try {
//采用绕过验证的方式处理https请求
SSLContext sslcontext = createIgnoreVerifySSL();
// 设置协议http和https对应的处理socket链接工厂的对象
Registry<ConnectionSocketFactory> socketFactoryRegistry = RegistryBuilder.<ConnectionSocketFactory>create()
.register("http", PlainConnectionSocketFactory.INSTANCE)
.register("https", new SSLConnectionSocketFactory(sslcontext))
.build();
PoolingHttpClientConnectionManager connManager = new PoolingHttpClientConnectionManager(socketFactoryRegistry);
HttpClients.custom().setConnectionManager(connManager);
//设置超时时间
int timeout = 5;
RequestConfig config = RequestConfig.custom().
setConnectTimeout(timeout * 1000).
setConnectionRequestTimeout(timeout * 1000).
setSocketTimeout(timeout * 1000).build();
SocketConfig socketConfig = SocketConfig.custom()
.setSoKeepAlive(false)
.setSoLinger(1)
.setSoReuseAddress(true)
.setSoTimeout(10000)
.setTcpNoDelay(true).build();
//创建自定义的httpclient对象
client = HttpClients.custom().setConnectionManager(connManager).setDefaultRequestConfig(config).setDefaultSocketConfig(socketConfig).build();
// CloseableHttpClient client = HttpClients.createDefault();
// 2. 创建httppost实例
HttpPost httpPost = new HttpPost(url);
// httpPost.setConfig(reqConfig);
httpPost.addHeader("Content-Type", "application/json;charset=utf-8");
if(headers != null && headers.length > 0){
Map<String, Object> tempHeaders = headers[0];
for (String key : tempHeaders.keySet()) {
httpPost.setHeader(key,tempHeaders.get(key).toString());
}
}else{
httpPost.setHeader("Accept", "application/json, text/javascript, */*; q=0.01");
httpPost.setHeader("Accept-Language","zh-CN,zh;q=0.9,en;q=0.8");
httpPost.setHeader("User-Agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36");
}
try {
httpPost.setEntity(new StringEntity(params,"utf-8"));
response = client.execute(httpPost);
int notFundCode = 404;
int successCode = 200;
// 从响应模型中获取响应实体
StatusLine statusLine = response.getStatusLine();
System.out.println("响应状态为:" + response.getStatusLine());
responseEntity = response.getEntity();
if(statusLine.getStatusCode() == successCode){
if (responseEntity != null) {
html=EntityUtils.toString(responseEntity,"utf-8");
System.out.println("响应内容长度为:" + responseEntity.getContentLength());
}
}else if(statusLine.getStatusCode() == notFundCode){
html = "<h2>页面404,正常结束请求即可</h2>";
}else{
throw new Exception("请求错误,code码为:"+statusLine.getStatusCode());
}
} catch (Exception e) {
e.printStackTrace();
html = "Download failed error is:"+ThrowMessageUtil.getErrmessage(e);
}
} catch (Exception e) {
e.printStackTrace();
html = "Download failed error is:"+ThrowMessageUtil.getErrmessage(e);
}finally{
try {
responseEntity.getContent().close();
response.close();
client.close();
} catch (UnsupportedOperationException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
return html;
}
public static String dosslPostForm(String url,Map<String, Object> param,Map<String, Object> ... headers) {
String html="";
try {
//采用绕过验证的方式处理https请求
SSLContext sslcontext = createIgnoreVerifySSL();
// 设置协议http和https对应的处理socket链接工厂的对象
Registry<ConnectionSocketFactory> socketFactoryRegistry = RegistryBuilder.<ConnectionSocketFactory>create()
.register("http", PlainConnectionSocketFactory.INSTANCE)
.register("https", new SSLConnectionSocketFactory(sslcontext))
.build();
PoolingHttpClientConnectionManager connManager = new PoolingHttpClientConnectionManager(socketFactoryRegistry);
HttpClients.custom().setConnectionManager(connManager);
//设置超时时间
int timeout = 5;
RequestConfig config = RequestConfig.custom().
setConnectTimeout(timeout * 1000).
setConnectionRequestTimeout(timeout * 1000).
setSocketTimeout(timeout * 1000).build();
SocketConfig socketConfig = SocketConfig.custom()
.setSoKeepAlive(false)
.setSoLinger(1)
.setSoReuseAddress(true)
.setSoTimeout(10000)
.setTcpNoDelay(true).build();
//创建自定义的httpclient对象
CloseableHttpClient client = HttpClients.custom().setConnectionManager(connManager).setDefaultRequestConfig(config).setDefaultSocketConfig(socketConfig).build();
// CloseableHttpClient client = HttpClients.createDefault();
// 2. 创建httppost实例
HttpPost httpPost = new HttpPost(url);
// httpPost.setConfig(reqConfig);
if(headers != null && headers.length > 0){
Map<String, Object> tempHeaders = headers[0];
for (String key : tempHeaders.keySet()) {
httpPost.setHeader(key,tempHeaders.get(key).toString());
}
}else{
httpPost.setHeader("Accept", "application/json, text/javascript, */*; q=0.01");
httpPost.setHeader("Accept-Language","zh-CN,zh;q=0.9,en;q=0.8");
httpPost.addHeader("content-type", "application/x-www-form-urlencoded");
httpPost.setHeader("User-Agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36");
}
// 创建请求参数
List<NameValuePair> list = new LinkedList<>();
for (String key : param.keySet()) {
BasicNameValuePair param1 = new BasicNameValuePair(key,param.get(key).toString());
list.add(param1);
}
// 使用URL实体转换工具
try {
UrlEncodedFormEntity entityParam = new UrlEncodedFormEntity(list, "UTF-8");
httpPost.setEntity(entityParam);
HttpResponse response = client.execute(httpPost);
// HttpResponse response = httpClient.execute(httpPost,localContext);
// 从响应模型中获取响应实体
int notFundCode = 404;
int successCode = 200;
HttpEntity responseEntity = response.getEntity();
StatusLine statusLine = response.getStatusLine();
System.out.println("响应状态为:" + response.getStatusLine());
if(statusLine.getStatusCode() == successCode){
if (responseEntity != null) {
html=EntityUtils.toString(responseEntity,"utf-8");
}
}else{
throw new Exception("请求错误,code码为:"+statusLine.getStatusCode());
}
} catch (Exception e) {
e.printStackTrace();
html = "Download failed error is:"+ThrowMessageUtil.getErrmessage(e);
}
} catch (Exception e) {
e.printStackTrace();
html = "Download failed error is:"+ThrowMessageUtil.getErrmessage(e);
}
return html;
}
public static String httpSSLPostForm(String url,Map<String, Object> params,Map<String, Object> ...headers) {
String html="";
try {
html = dosslPostForm(url,params,headers);
} catch (Exception e) {
e.printStackTrace();
// TODO: handle exception
html = "Download failed error is:Exception!";
}
int i = 1;
while(true){
if(html.contains("Download failed error is:")){
log.error("DownLoadUtil------------->download is failure,url is:"+url);
DateUtil.sleep(30000);
i++;
}else{
break;
}
if(i > 5){
break;
}
try {
html = dosslPostForm(url,params,headers);
} catch (Exception e) {
e.printStackTrace();
// TODO: handle exception
html = "Download failed error is:Exception!";
}
}
return html;
}
public static String httpSSLPost(String url,String params,Map<String, Object> ...headers) {
String html="";
try {
html = dosslPost(url,params,headers);
} catch (Throwable e) {
e.printStackTrace();
// TODO: handle exception
html = "Download failed error is:Exception!";
}
int i = 1;
while(true){
if(html.contains("Download failed error is:")){
log.error("DownLoadUtil------------->download is failure,url is:"+url);
DateUtil.sleep(30000);
i++;
}else{
break;
}
if(i > 5){
break;
}
try {
html = dosslPost(url,params,headers);
} catch (Throwable e) {
e.printStackTrace();
// TODO: handle exception
html = "Download failed error is:Exception!";
}
}
return html;
}
/**
* 模拟客户端get请求
* @param url 模拟请求得url
* @param headers 头部信息没有可以不传
* @return
*/
public static String doGet(String url,Map<String, Object> ... headers){
//设置超时时间
int timeout = 15;
RequestConfig config = RequestConfig.custom().
setConnectTimeout(timeout * 1000).
setConnectionRequestTimeout(timeout * 1000).
setSocketTimeout(timeout * 1000).build();
SocketConfig socketConfig = SocketConfig.custom()
.setSoKeepAlive(false)
.setSoLinger(1)
.setSoReuseAddress(true)
.setSoTimeout(10000)
.setTcpNoDelay(true).build();
HttpClientBuilder httpBuilder = HttpClientBuilder.create();
httpBuilder.setUserAgent(ua);
HttpClient httpClient = httpBuilder.setDefaultSocketConfig(socketConfig).setDefaultRequestConfig(config).build();
HttpGet httpGet = new HttpGet(url);
if(headers != null && headers.length > 0){
Map<String, Object> tempHeaders = headers[0];
for (String key : tempHeaders.keySet()) {
httpGet.setHeader(key,tempHeaders.get(key).toString());
}
}else{
httpGet.setHeader("Accept", "application/json, text/javascript, */*; q=0.01");
httpGet.setHeader("Accept-Language","zh-CN,zh;q=0.9,en;q=0.8");
}
String html="";
try {
int notFundCode = 404;
int successCode = 200;
HttpResponse response = httpClient.execute(httpGet);
// 从响应模型中获取响应实体
HttpEntity responseEntity = response.getEntity();
StatusLine statusLine = response.getStatusLine();
System.out.println("响应状态为:" + response.getStatusLine());
if(statusLine.getStatusCode() == successCode){
if (responseEntity != null) {
html=EntityUtils.toString(responseEntity,"utf-8");
if(html.equals("")){
html = "Download failed error is:reslut is null";
}
}
}else if(statusLine.getStatusCode() == notFundCode){
html = "<h2>页面404,正常结束请求即可</h2>";
}else{
throw new Exception("请求错误,code码为:"+statusLine.getStatusCode());
}
} catch (Exception e) {
e.printStackTrace();
html = "Download failed error is:"+ThrowMessageUtil.getErrmessage(e);
}
return html;
}
public static void main(String[] args) throws Exception {
}
}

27
src/main/java/com/bfd/task/utils/EncryptionUtil.java

@ -0,0 +1,27 @@
package com.bfd.task.utils;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
/**
* @author jian.mao
* @date 2023年3月10日
* @description
*/
public class EncryptionUtil {
public static String md5(String text) {
try {
MessageDigest md = MessageDigest.getInstance("MD5");
md.update(text.getBytes());
byte[] bytes = md.digest();
StringBuilder sb = new StringBuilder();
for (byte b : bytes) {
sb.append(String.format("%02x", b & 0xff));
}
return sb.toString();
} catch (NoSuchAlgorithmException e) {
e.printStackTrace();
return null;
}
}
}

36
src/main/java/com/bfd/task/utils/FileUtil.java

@ -0,0 +1,36 @@
package com.bfd.task.utils;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
/**
* 文件工具类
* @author jian.mao
* @date 2023年7月14日
* @description
*/
public class FileUtil {
/**
* 数据写入文件
* @param Path 文件路径
* @param result 数据
* @throws IOException
*/
public static void writeFile(String path,String result){
try {
FileWriter fw = new FileWriter(path,true);
fw.write(result+"\n");
fw.flush();
fw.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}

32
src/main/java/com/bfd/task/utils/JsonUtil.java

@ -0,0 +1,32 @@
package com.bfd.task.utils;
import com.alibaba.fastjson.JSONObject;
import com.bfd.task.entity.Constants;
/**
* json工具
* @author jian.mao
* @date 2023年7月10日
* @description
*/
public class JsonUtil {
/**
* 校验字符串是list/map/str
* @param jsonString
* @return
*/
public static String checkJsonType(String jsonString) {
try {
JSONObject.parseObject(jsonString);
return Constants.MAP_TYPE;
} catch (Exception e) {
try {
JSONObject.parseArray(jsonString);
return Constants.LIST_TYPE;
} catch (Exception ex) {
return Constants.STRING_TYPE;
}
}
}
}

33
src/main/java/com/bfd/task/utils/OtherUtils.java

@ -0,0 +1,33 @@
package com.bfd.task.utils;
import java.security.MessageDigest;
/**
* 其他工具类
* @author jian.mao
* @date 2023年9月19日
* @description
*/
public class OtherUtils {
public static String getMd5(String string) {
try {
MessageDigest md5 = MessageDigest.getInstance("MD5");
byte[] bs = md5.digest(string.getBytes("UTF-8"));
StringBuilder sb = new StringBuilder(40);
for (byte x : bs) {
if ((x & 0xff) >> 4 == 0) {
sb.append("0").append(Integer.toHexString(x & 0xff));
} else {
sb.append(Integer.toHexString(x & 0xff));
}
}
return sb.toString();
} catch (Exception e) {
return "nceaform" + System.currentTimeMillis();
}
}
}

18
src/main/java/com/bfd/task/utils/QueueUtil.java

@ -0,0 +1,18 @@
package com.bfd.task.utils;
import java.util.Map;
import java.util.concurrent.LinkedBlockingDeque;
/**
* @author:jinming
* @className:QueueUtil
* @version:1.0
* @description:
* @Date:2023/7/13 15:00
*/
public class QueueUtil {
public static LinkedBlockingDeque<Map<String, Object>> taskQueue = new LinkedBlockingDeque<Map<String, Object>>();
public static LinkedBlockingDeque<String> sendQueue = new LinkedBlockingDeque<String>();
}

48
src/main/java/com/bfd/task/utils/SpringBootKafka.java

@ -0,0 +1,48 @@
package com.bfd.task.utils;
import com.alibaba.fastjson.JSONObject;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.kafka.core.KafkaTemplate;
import org.springframework.kafka.support.SendResult;
import org.springframework.stereotype.Component;
import org.springframework.util.concurrent.ListenableFuture;
import org.springframework.util.concurrent.ListenableFutureCallback;
/**
* @PROJECT_NAME: companybusinesscrawl
* @DESCRIPTION:SpringBootKafka 工具类
* @AUTHOR: ying.zhao
* @DATE: 2023/4/6 11:09
*/
@Slf4j
@Component
public class SpringBootKafka {
@Autowired
private KafkaTemplate<String, Object> kafkaTemplate;
/**
* 自定义topicKafkaTemplate
*/
/**
* public static final String TOPIC = "companyBussTest";
**/
public void send(String topic, String message) {
String obj2String = JSONObject.toJSONString(message);
log.info("准备发送消息为:{}", obj2String);
//发送消息
ListenableFuture<SendResult<String, Object>> future = kafkaTemplate.send(topic, message);
future.addCallback(new ListenableFutureCallback<SendResult<String, Object>>() {
@Override
public void onFailure(Throwable throwable) {
//发送失败的处理
log.info(topic + " - 生产者 发送消息失败:" + throwable.getMessage());
}
@Override
public void onSuccess(SendResult<String, Object> stringObjectSendResult) {
//成功的处理
log.info(topic + " - 生产者 发送消息成功:" + stringObjectSendResult.toString());
}
});
}
}

23
src/main/java/com/bfd/task/utils/ThrowMessageUtil.java

@ -0,0 +1,23 @@
package com.bfd.task.utils;
import java.io.PrintWriter;
import java.io.StringWriter;
/**
* @author jian.mao
* @date 2023年3月22日
* @description
*/
public class ThrowMessageUtil {
/**
* 获取异常信息
* @param t
* @return
*/
public static String getErrmessage(Throwable t){
StringWriter stringWriter=new StringWriter();
t.printStackTrace(new PrintWriter(stringWriter,true));
return stringWriter.getBuffer().toString();
}
}

98
src/main/resources/application.yml

@ -0,0 +1,98 @@
logging:
level:
root: info
path: ./logs
#spring admin boot日志输出配置,需要跟logback-spring.xml配置中日志路径一致
file:
name: ./logs/crawltaskmanagerInfo.log
server:
port: 8008
servlet:
context-path: /crawltaskmanager
tomcat:
uri-encoding: utf-8
max-connections: 20000
max-http-form-post-size: 1
max-threads: 1000
spring:
application:
name: crawltaskmanager
kafka:
bootstrap-servers: 172.16.12.55:9092,172.16.12.56:9092,172.16.12.57:9092
producer:
retries: 0
#当有多个消息需要被发送到同一个分区时,生产者会把它们放在同一个批次里。该参数指定了一个批次可以使用的内存大小,按照字节数计算。
batch-size: 16384
# 设置生产者内存缓冲区的大小。
buffer-memory: 33554432
# 键的序列化方式
key-serializer: org.apache.kafka.common.serialization.StringSerializer
# 值的序列化方式
value-serializer: org.apache.kafka.common.serialization.StringSerializer
# acks=0 : 生产者在成功写入消息之前不会等待任何来自服务器的响应。
# acks=1 : 只要集群的首领节点收到消息,生产者就会收到一个来自服务器成功响应。
# acks=all :只有当所有参与复制的节点全部收到消息时,生产者才会收到一个来自服务器的成功响应。
acks: 1
consumer:
# 自动提交的时间间隔 在spring boot 2.X 版本中这里采用的是值的类型为Duration 需要符合特定的格式,如1S,1M,2H,5D
auto-commit-interval: 1S
# 该属性指定了消费者在读取一个没有偏移量的分区或者偏移量无效的情况下该作何处理:
# latest(默认值)在偏移量无效的情况下,消费者将从最新的记录开始读取数据(在消费者启动之后生成的记录)
# earliest :在偏移量无效的情况下,消费者将从起始位置读取分区的记录
auto-offset-reset: earliest
# 是否自动提交偏移量,默认值是true,为了避免出现重复数据和数据丢失,可以把它设置为false,然后手动提交偏移量
enable-auto-commit: true
# 键的反序列化方式
key-deserializer: org.apache.kafka.common.serialization.StringDeserializer
# 值的反序列化方式
value-deserializer: org.apache.kafka.common.serialization.StringDeserializer
#消费组
group-id: test4
#消费者并发线程数
concurrency: 4
#超时时间
max-poll-interval-ms: 60000
#listener:
# 在侦听器容器中运行的线程数。
#concurrency: 5
#listner负责ack,每调用一次,就立即commit
#ack-mode: manual_immediate
#missing-topics-fatal: false
redis:
host: 172.24.12.126
port: 6379
timeout: 10000
database: 11
jedis:
pool:
max-active: 8 # 连接池最大连接数(使用负值表示没有限制)
max-wait: 800 # 连接池最大阻塞等待时间(使用负值表示没有限制)
max-idle: 8 # 连接池中的最大空闲连接
min-idle: 2 # 连接池中的最小空闲连接
boot:
admin:
client:
url: http://10.10.143.85:8000
instance:
service-base-url: http://10.10.143.85:8008
management:
endpoints:
web:
exposure:
include: "*"
endpoint:
health:
show-details: always
customize-kafka:
bootstrap-servers: 172.18.1.119:9992
producer:
topic: analyze
consumer:
topic: youzhi_analyze
manageweb:
host: http://172.16.12.55:9071
task:
task-queue-path: ../data/taskCache.txt
token-queue-path: ../data/tokenCache.txt

36
src/main/resources/logback-spring.xml

@ -0,0 +1,36 @@
<configuration>
<!-- 属性文件:在properties文件中找到对应的配置项 -->
<springProperty scope="context" name="logging.path" source="logging.path"/>
<springProperty scope="context" name="logging.level" source="logging.level.com.bfd"/>
<!-- 默认的控制台日志输出,一般生产环境都是后台启动,这个没太大作用 -->
<!-- <appender name="STDOUT"
class="ch.qos.logback.core.ConsoleAppender">
<encoder class="ch.qos.logback.classic.encoder.PatternLayoutEncoder">
<Pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %line %-5level %logger{50} - %msg%n</Pattern>
</encoder>
</appender> -->
<appender name="GLMAPPER-LOGGERONE"
class="ch.qos.logback.core.rolling.RollingFileAppender">
<append>true</append>
<filter class="ch.qos.logback.classic.filter.ThresholdFilter">
<level>${logging.level}</level>
</filter>
<file>
${logging.path}/crawltaskmanagerInfo.log
</file>
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
<FileNamePattern>${logging.path}/crawltaskmanagerInfo.log.%d{yyyy-MM-dd}</FileNamePattern>
<MaxHistory>7</MaxHistory>
</rollingPolicy>
<encoder class="ch.qos.logback.classic.encoder.PatternLayoutEncoder">
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %line %-5level %logger{50} - %msg%n</pattern>
<charset>UTF-8</charset>
</encoder>
</appender>
<root level="info">
<appender-ref ref="GLMAPPER-LOGGERONE"/>
<!-- <appender-ref ref="STDOUT"/> -->
</root>
</configuration>

20
src/test/java/com/bfd/AppTest.java

@ -0,0 +1,20 @@
package com.bfd;
import static org.junit.Assert.assertTrue;
import org.junit.Test;
/**
* Unit test for simple App.
*/
public class AppTest
{
/**
* Rigorous Test :-)
*/
@Test
public void shouldAnswerWithTrue()
{
assertTrue( true );
}
}
Loading…
Cancel
Save