commit
6621b5f0bf
22 changed files with 1311 additions and 0 deletions
-
34.gitignore
-
1README.md
-
130pom.xml
-
24src/main/java/com/bfd/crawl/datafilter/DataFilterApplication.java
-
36src/main/java/com/bfd/crawl/datafilter/bean/Constants.java
-
59src/main/java/com/bfd/crawl/datafilter/bean/ResponsePo.java
-
48src/main/java/com/bfd/crawl/datafilter/config/AsyncThreadConfiguration.java
-
39src/main/java/com/bfd/crawl/datafilter/controller/DataFilterController.java
-
14src/main/java/com/bfd/crawl/datafilter/dao/FilterTypeDao.java
-
49src/main/java/com/bfd/crawl/datafilter/entity/FilterType.java
-
32src/main/java/com/bfd/crawl/datafilter/enums/ResponseCode.java
-
32src/main/java/com/bfd/crawl/datafilter/exception/GlobalExceptionHandler.java
-
331src/main/java/com/bfd/crawl/datafilter/service/HandlerService.java
-
47src/main/java/com/bfd/crawl/datafilter/service/SendService.java
-
79src/main/java/com/bfd/crawl/datafilter/service/StartServcie.java
-
61src/main/java/com/bfd/crawl/datafilter/util/DataUtil.java
-
72src/main/java/com/bfd/crawl/datafilter/util/DateUtil.java
-
16src/main/java/com/bfd/crawl/datafilter/util/QueueUtil.java
-
94src/main/java/com/bfd/crawl/datafilter/util/StringUtil.java
-
64src/main/resources/application.yml
-
36src/main/resources/logback-spring.xml
-
13src/test/java/com/bfd/crawl/datafilter/DataFilterApplicationTests.java
@ -0,0 +1,34 @@ |
|||||
|
HELP.md |
||||
|
target/ |
||||
|
logs/ |
||||
|
!.mvn/wrapper/maven-wrapper.jar |
||||
|
!**/src/main/**/target/ |
||||
|
!**/src/test/**/target/ |
||||
|
|
||||
|
### STS ### |
||||
|
.apt_generated |
||||
|
.classpath |
||||
|
.factorypath |
||||
|
.project |
||||
|
.settings |
||||
|
.springBeans |
||||
|
.sts4-cache |
||||
|
|
||||
|
### IntelliJ IDEA ### |
||||
|
.idea |
||||
|
*.iws |
||||
|
*.iml |
||||
|
*.ipr |
||||
|
|
||||
|
### NetBeans ### |
||||
|
/nbproject/private/ |
||||
|
/nbbuild/ |
||||
|
/dist/ |
||||
|
/nbdist/ |
||||
|
/.nb-gradle/ |
||||
|
build/ |
||||
|
!**/src/main/**/build/ |
||||
|
!**/src/test/**/build/ |
||||
|
|
||||
|
### VS Code ### |
||||
|
.vscode/ |
@ -0,0 +1 @@ |
|||||
|
过滤器 |
@ -0,0 +1,130 @@ |
|||||
|
<?xml version="1.0" encoding="UTF-8"?> |
||||
|
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" |
||||
|
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> |
||||
|
<modelVersion>4.0.0</modelVersion> |
||||
|
<groupId>com.bfd.crawl</groupId> |
||||
|
<artifactId>dataFilter</artifactId> |
||||
|
<version>0.0.1-SNAPSHOT</version> |
||||
|
<name>dataFilter</name> |
||||
|
<description>dataFilter</description> |
||||
|
<properties> |
||||
|
<java.version>1.8</java.version> |
||||
|
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> |
||||
|
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding> |
||||
|
<spring-boot.version>2.2.4.RELEASE</spring-boot.version> |
||||
|
</properties> |
||||
|
<dependencies> |
||||
|
<!-- https://mvnrepository.com/artifact/de.codecentric/spring-boot-admin-starter-client --> |
||||
|
<dependency> |
||||
|
<groupId>de.codecentric</groupId> |
||||
|
<artifactId>spring-boot-admin-client</artifactId> |
||||
|
<version>2.2.4</version> |
||||
|
</dependency> |
||||
|
<dependency> |
||||
|
<groupId>org.springframework.boot</groupId> |
||||
|
<artifactId>spring-boot-starter</artifactId> |
||||
|
</dependency> |
||||
|
<dependency> |
||||
|
<groupId>org.springframework.kafka</groupId> |
||||
|
<artifactId>spring-kafka</artifactId> |
||||
|
</dependency> |
||||
|
<dependency> |
||||
|
<groupId>org.springframework.boot</groupId> |
||||
|
<artifactId>spring-boot-starter-web</artifactId> |
||||
|
</dependency> |
||||
|
<!--redis--> |
||||
|
<dependency> |
||||
|
<groupId>org.redisson</groupId> |
||||
|
<artifactId>redisson-spring-boot-starter</artifactId> |
||||
|
<version>3.13.6</version> |
||||
|
</dependency> |
||||
|
<dependency> |
||||
|
<groupId>org.springframework.boot</groupId> |
||||
|
<artifactId>spring-boot-starter-data-redis</artifactId> |
||||
|
</dependency> |
||||
|
<!--JPA--> |
||||
|
<dependency> |
||||
|
<groupId>org.springframework.boot</groupId> |
||||
|
<artifactId>spring-boot-starter-data-jpa</artifactId> |
||||
|
</dependency> |
||||
|
<!-- mysql --> |
||||
|
<dependency> |
||||
|
<groupId>mysql</groupId> |
||||
|
<artifactId>mysql-connector-java</artifactId> |
||||
|
<scope>runtime</scope> |
||||
|
</dependency> |
||||
|
<!--JSON--> |
||||
|
<dependency> |
||||
|
<groupId>com.alibaba</groupId> |
||||
|
<artifactId>fastjson</artifactId> |
||||
|
<version>2.0.17</version> |
||||
|
</dependency> |
||||
|
<dependency> |
||||
|
<groupId>org.springframework.boot</groupId> |
||||
|
<artifactId>spring-boot-devtools</artifactId> |
||||
|
<scope>runtime</scope> |
||||
|
<optional>true</optional> |
||||
|
</dependency> |
||||
|
<dependency> |
||||
|
<groupId>org.projectlombok</groupId> |
||||
|
<artifactId>lombok</artifactId> |
||||
|
<optional>true</optional> |
||||
|
</dependency> |
||||
|
<dependency> |
||||
|
<groupId>org.springframework.boot</groupId> |
||||
|
<artifactId>spring-boot-starter-test</artifactId> |
||||
|
<scope>test</scope> |
||||
|
</dependency> |
||||
|
<dependency> |
||||
|
<groupId>org.springframework.kafka</groupId> |
||||
|
<artifactId>spring-kafka-test</artifactId> |
||||
|
<scope>test</scope> |
||||
|
</dependency> |
||||
|
<dependency> |
||||
|
<groupId>com.bfd.util</groupId> |
||||
|
<artifactId>pauseTool</artifactId> |
||||
|
<version>1.0</version> |
||||
|
</dependency> |
||||
|
</dependencies> |
||||
|
<dependencyManagement> |
||||
|
<dependencies> |
||||
|
<dependency> |
||||
|
<groupId>org.springframework.boot</groupId> |
||||
|
<artifactId>spring-boot-dependencies</artifactId> |
||||
|
<version>${spring-boot.version}</version> |
||||
|
<type>pom</type> |
||||
|
<scope>import</scope> |
||||
|
</dependency> |
||||
|
</dependencies> |
||||
|
</dependencyManagement> |
||||
|
|
||||
|
<build> |
||||
|
<finalName>dataFilter-0.0.1-SNAPSHOT</finalName> |
||||
|
<plugins> |
||||
|
<plugin> |
||||
|
<groupId>org.springframework.boot</groupId> |
||||
|
<artifactId>spring-boot-maven-plugin</artifactId> |
||||
|
<version>2.4.1</version> |
||||
|
<configuration> |
||||
|
<includeSystemScope>true</includeSystemScope> |
||||
|
</configuration> |
||||
|
<executions> |
||||
|
<execution> |
||||
|
<goals> |
||||
|
<goal>repackage</goal> |
||||
|
</goals> |
||||
|
</execution> |
||||
|
</executions> |
||||
|
</plugin> |
||||
|
<plugin> |
||||
|
<groupId>org.apache.maven.plugins</groupId> |
||||
|
<artifactId>maven-compiler-plugin</artifactId> |
||||
|
<configuration> |
||||
|
<source>8</source> |
||||
|
<target>8</target> |
||||
|
</configuration> |
||||
|
</plugin> |
||||
|
</plugins> |
||||
|
</build> |
||||
|
|
||||
|
</project> |
@ -0,0 +1,24 @@ |
|||||
|
package com.bfd.crawl.datafilter; |
||||
|
|
||||
|
import org.springframework.boot.SpringApplication; |
||||
|
import org.springframework.boot.autoconfigure.SpringBootApplication; |
||||
|
import org.springframework.scheduling.annotation.EnableAsync; |
||||
|
import org.springframework.scheduling.annotation.EnableScheduling; |
||||
|
|
||||
|
@SpringBootApplication |
||||
|
@EnableAsync |
||||
|
@EnableScheduling |
||||
|
/** |
||||
|
* @author:jinming |
||||
|
* @className:DataFilterApplication |
||||
|
* @version:1.0 |
||||
|
* @description: |
||||
|
* @Date:2023/7/31 17:53 |
||||
|
*/ |
||||
|
public class DataFilterApplication { |
||||
|
|
||||
|
public static void main(String[] args) { |
||||
|
SpringApplication.run(DataFilterApplication.class, args); |
||||
|
} |
||||
|
|
||||
|
} |
@ -0,0 +1,36 @@ |
|||||
|
package com.bfd.crawl.datafilter.bean; |
||||
|
|
||||
|
/** |
||||
|
* @author:jinming |
||||
|
* @className:Constants |
||||
|
* @version:1.0 |
||||
|
* @description: |
||||
|
* @Date:2023/7/14 10:41 |
||||
|
*/ |
||||
|
public class Constants { |
||||
|
/** |
||||
|
* 查询数据时间 |
||||
|
*/ |
||||
|
public static String TIME = "time"; |
||||
|
|
||||
|
/** |
||||
|
* 查询数据开始时间 |
||||
|
*/ |
||||
|
public static String START_TIME = "startTime"; |
||||
|
|
||||
|
/** |
||||
|
* 查询数据结束时间 |
||||
|
*/ |
||||
|
public static String END_TIME = "endTime"; |
||||
|
|
||||
|
/** |
||||
|
* 查询排除词 |
||||
|
*/ |
||||
|
public static String EXCLUSION_WORDS = "exclusionWords"; |
||||
|
|
||||
|
/** |
||||
|
* 查询包含命中关键词 |
||||
|
*/ |
||||
|
public static String INCLUD_WORDS = "includWords"; |
||||
|
|
||||
|
} |
@ -0,0 +1,59 @@ |
|||||
|
package com.bfd.crawl.datafilter.bean; |
||||
|
|
||||
|
|
||||
|
import com.bfd.crawl.datafilter.enums.ResponseCode; |
||||
|
import lombok.AllArgsConstructor; |
||||
|
import lombok.Data; |
||||
|
import lombok.NoArgsConstructor; |
||||
|
|
||||
|
/** |
||||
|
* @author:jinming |
||||
|
* @className:ResponsePo |
||||
|
* @version:1.0 |
||||
|
* @description: |
||||
|
* @Date:2023/4/3 17:23 |
||||
|
*/ |
||||
|
@Data |
||||
|
@NoArgsConstructor |
||||
|
@AllArgsConstructor |
||||
|
public class ResponsePo { |
||||
|
/** |
||||
|
* 响应码 |
||||
|
*/ |
||||
|
private int code; |
||||
|
|
||||
|
/** |
||||
|
* 正常放 返回数据 的JSON串 |
||||
|
*/ |
||||
|
private Object data; |
||||
|
|
||||
|
/** |
||||
|
* 提示消息 |
||||
|
*/ |
||||
|
private String message; |
||||
|
|
||||
|
public static ResponsePo success() { |
||||
|
return setStatus(ResponseCode.SUCCESS.getCode(), ResponseCode.SUCCESS.getMessage()); |
||||
|
} |
||||
|
|
||||
|
public static ResponsePo error() { |
||||
|
return setStatus(ResponseCode.FAILURE.getCode(), ResponseCode.FAILURE.getMessage()); |
||||
|
} |
||||
|
|
||||
|
public static ResponsePo setStatus(int code, String message) { |
||||
|
ResponsePo resultBean = new ResponsePo(); |
||||
|
resultBean.code = code; |
||||
|
resultBean.message = message; |
||||
|
return resultBean; |
||||
|
} |
||||
|
public ResponsePo(int code, String message) { |
||||
|
this.code = code; |
||||
|
this.message = message; |
||||
|
this.data = data; |
||||
|
} |
||||
|
public ResponsePo(ResponseCode responseCode){ |
||||
|
this.code = responseCode.getCode(); |
||||
|
this.message = responseCode.getMessage(); |
||||
|
this.data = data; |
||||
|
} |
||||
|
} |
@ -0,0 +1,48 @@ |
|||||
|
package com.bfd.crawl.datafilter.config; |
||||
|
|
||||
|
|
||||
|
import org.springframework.context.annotation.Bean; |
||||
|
import org.springframework.context.annotation.Configuration; |
||||
|
import org.springframework.scheduling.annotation.EnableAsync; |
||||
|
import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor; |
||||
|
|
||||
|
import java.util.concurrent.Executor; |
||||
|
|
||||
|
/** |
||||
|
* @author jinming |
||||
|
* @version 1.0 |
||||
|
* @className AsyncThreadConfiguration |
||||
|
* @Date 2022/2/17 18:37 |
||||
|
*/ |
||||
|
@Configuration |
||||
|
@EnableAsync |
||||
|
public class AsyncThreadConfiguration { |
||||
|
@Bean(name = "asyncExecutor") |
||||
|
public Executor asyncExecutor() { |
||||
|
ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor(); |
||||
|
// 核心线程数 |
||||
|
executor.setCorePoolSize(500); |
||||
|
// 并发线程的数量限制为2 |
||||
|
executor.setMaxPoolSize(500); |
||||
|
// 线程队列 |
||||
|
executor.setQueueCapacity(500); |
||||
|
executor.setThreadNamePrefix("dataFilter-"); |
||||
|
executor.initialize(); |
||||
|
executor.setWaitForTasksToCompleteOnShutdown(true); |
||||
|
return executor; |
||||
|
} |
||||
|
@Bean(name = "sendExecutor") |
||||
|
public Executor sendExecutor() { |
||||
|
ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor(); |
||||
|
// 核心线程数 |
||||
|
executor.setCorePoolSize(500); |
||||
|
// 并发线程的数量限制为2 |
||||
|
executor.setMaxPoolSize(500); |
||||
|
// 线程队列 |
||||
|
executor.setQueueCapacity(500); |
||||
|
executor.setThreadNamePrefix("sendData-"); |
||||
|
executor.initialize(); |
||||
|
executor.setWaitForTasksToCompleteOnShutdown(true); |
||||
|
return executor; |
||||
|
} |
||||
|
} |
@ -0,0 +1,39 @@ |
|||||
|
package com.bfd.crawl.datafilter.controller; |
||||
|
|
||||
|
import com.alibaba.fastjson.JSON; |
||||
|
import com.bfd.crawl.datafilter.bean.ResponsePo; |
||||
|
import com.bfd.crawl.datafilter.enums.ResponseCode; |
||||
|
import com.bfd.crawl.datafilter.util.QueueUtil; |
||||
|
import lombok.extern.slf4j.Slf4j; |
||||
|
import org.springframework.web.bind.annotation.PostMapping; |
||||
|
import org.springframework.web.bind.annotation.RequestBody; |
||||
|
import org.springframework.web.bind.annotation.RequestMapping; |
||||
|
import org.springframework.web.bind.annotation.RestController; |
||||
|
|
||||
|
import java.util.Map; |
||||
|
|
||||
|
/** |
||||
|
* @author:jinming |
||||
|
* @className:DataFilterController |
||||
|
* @version:1.0 |
||||
|
* @description: |
||||
|
* @Date:2023/7/26 11:21 |
||||
|
*/ |
||||
|
@RestController |
||||
|
@RequestMapping("/handlerdata") |
||||
|
@Slf4j |
||||
|
public class DataFilterController { |
||||
|
@PostMapping("/filter") |
||||
|
public ResponsePo documentFeedback(@RequestBody String dataJson) { |
||||
|
|
||||
|
ResponsePo responsePo = ResponsePo.success(); |
||||
|
log.info("新增任务:" + dataJson); |
||||
|
try { |
||||
|
QueueUtil.taskQueue.put(dataJson); |
||||
|
} catch (InterruptedException e) { |
||||
|
e.printStackTrace(); |
||||
|
} |
||||
|
|
||||
|
return responsePo; |
||||
|
} |
||||
|
} |
@ -0,0 +1,14 @@ |
|||||
|
package com.bfd.crawl.datafilter.dao; |
||||
|
|
||||
|
import com.bfd.crawl.datafilter.entity.FilterType; |
||||
|
import org.springframework.data.jpa.repository.JpaRepository; |
||||
|
|
||||
|
/** |
||||
|
* @author jinming |
||||
|
* @version 1.0 |
||||
|
* @className FilterTypeDao |
||||
|
* @Date 2023/7/26 16:28 |
||||
|
*/ |
||||
|
public interface FilterTypeDao extends JpaRepository<FilterType, Integer> { |
||||
|
FilterType getFilterTypeById(int id); |
||||
|
} |
@ -0,0 +1,49 @@ |
|||||
|
package com.bfd.crawl.datafilter.entity; |
||||
|
|
||||
|
import lombok.AllArgsConstructor; |
||||
|
import lombok.Data; |
||||
|
import lombok.NoArgsConstructor; |
||||
|
|
||||
|
import javax.persistence.*; |
||||
|
|
||||
|
/** |
||||
|
* @author:jinming |
||||
|
* @className:FilterType |
||||
|
* @version:1.0 |
||||
|
* @description: |
||||
|
* @Date:2023/7/26 10:58 |
||||
|
*/ |
||||
|
@Entity |
||||
|
@Data |
||||
|
@NoArgsConstructor |
||||
|
@AllArgsConstructor |
||||
|
@Table(name = "filter_type") |
||||
|
public class FilterType { |
||||
|
/** |
||||
|
* 自增主键ID |
||||
|
*/ |
||||
|
@Id |
||||
|
@GeneratedValue(strategy = GenerationType.IDENTITY) |
||||
|
@Column(name = "id") |
||||
|
private Integer id; |
||||
|
/** |
||||
|
* 类型(66运算方式,67运算条件) |
||||
|
*/ |
||||
|
@Column(name = "type") |
||||
|
private String type; |
||||
|
/** |
||||
|
* 描述 |
||||
|
*/ |
||||
|
@Column(name = "describe") |
||||
|
private String describe; |
||||
|
/** |
||||
|
* 上级id |
||||
|
*/ |
||||
|
@Column(name = "parent_id") |
||||
|
private Integer parentId; |
||||
|
/** |
||||
|
* 等级 |
||||
|
*/ |
||||
|
@Column(name = "level") |
||||
|
private String level; |
||||
|
} |
@ -0,0 +1,32 @@ |
|||||
|
package com.bfd.crawl.datafilter.enums; |
||||
|
|
||||
|
/** |
||||
|
* @author:jinming |
||||
|
* @className:ResponseCodeEnum |
||||
|
* @version:1.0 |
||||
|
* @description:响应结果码枚举类 |
||||
|
* @Date:2023/2/28 11:40 |
||||
|
*/ |
||||
|
public enum ResponseCode { |
||||
|
//返回结果码枚举类 |
||||
|
SUCCESS(200, "操作成功"), |
||||
|
FAILURE(400, "参数错误"), |
||||
|
INTERNAL_SERVER_ERROR(500, "服务器内部错误"), |
||||
|
TYPE_NOT_SUPPORT(601,"文件类型不支持"); |
||||
|
|
||||
|
private int code; |
||||
|
private String message; |
||||
|
|
||||
|
ResponseCode(int code, String message) { |
||||
|
this.code = code; |
||||
|
this.message = message; |
||||
|
} |
||||
|
|
||||
|
public int getCode() { |
||||
|
return code; |
||||
|
} |
||||
|
|
||||
|
public String getMessage() { |
||||
|
return message; |
||||
|
} |
||||
|
} |
@ -0,0 +1,32 @@ |
|||||
|
package com.bfd.crawl.datafilter.exception; |
||||
|
|
||||
|
|
||||
|
|
||||
|
import com.bfd.crawl.datafilter.bean.ResponsePo; |
||||
|
import com.bfd.crawl.datafilter.enums.ResponseCode; |
||||
|
import org.springframework.http.HttpStatus; |
||||
|
import org.springframework.web.bind.annotation.ExceptionHandler; |
||||
|
import org.springframework.web.bind.annotation.ResponseStatus; |
||||
|
import org.springframework.web.bind.annotation.RestControllerAdvice; |
||||
|
|
||||
|
/** |
||||
|
* @author:jinming |
||||
|
* @className:GlobalExceptionHandler |
||||
|
* @version:1.0 |
||||
|
* @description: 异常处理类 |
||||
|
* @Date:2023/2/28 16:29 |
||||
|
*/ |
||||
|
@RestControllerAdvice |
||||
|
public class GlobalExceptionHandler { |
||||
|
|
||||
|
@ExceptionHandler(value = {IllegalArgumentException.class}) |
||||
|
@ResponseStatus(HttpStatus.BAD_REQUEST) |
||||
|
public ResponsePo handleBadRequest(Exception ex) { |
||||
|
return new ResponsePo(ResponseCode.FAILURE.getCode(), ex.getMessage()); |
||||
|
} |
||||
|
@ExceptionHandler(value = {Exception.class}) |
||||
|
@ResponseStatus(HttpStatus.INTERNAL_SERVER_ERROR) |
||||
|
public ResponsePo handleException(Exception ex) { |
||||
|
return new ResponsePo(ResponseCode.INTERNAL_SERVER_ERROR.getCode(), ex.getMessage()); |
||||
|
} |
||||
|
} |
@ -0,0 +1,331 @@ |
|||||
|
package com.bfd.crawl.datafilter.service; |
||||
|
|
||||
|
import com.alibaba.fastjson.JSON; |
||||
|
import com.alibaba.fastjson.JSONObject; |
||||
|
import com.alibaba.fastjson.JSONPath; |
||||
|
import com.bfd.crawl.datafilter.dao.FilterTypeDao; |
||||
|
import com.bfd.crawl.datafilter.entity.FilterType; |
||||
|
import com.bfd.crawl.datafilter.util.DataUtil; |
||||
|
import com.bfd.crawl.datafilter.util.DateUtil; |
||||
|
import com.bfd.crawl.datafilter.util.QueueUtil; |
||||
|
import lombok.extern.slf4j.Slf4j; |
||||
|
import org.springframework.beans.factory.annotation.Autowired; |
||||
|
import org.springframework.scheduling.annotation.Async; |
||||
|
import org.springframework.stereotype.Service; |
||||
|
|
||||
|
import java.text.SimpleDateFormat; |
||||
|
import java.util.Date; |
||||
|
import java.util.HashMap; |
||||
|
import java.util.List; |
||||
|
import java.util.Map; |
||||
|
import java.util.regex.Matcher; |
||||
|
import java.util.regex.Pattern; |
||||
|
|
||||
|
/** |
||||
|
* @author:jinming |
||||
|
* @className:HandlerService |
||||
|
* @version:1.0 |
||||
|
* @description: |
||||
|
* @Date:2023/7/26 11:29 |
||||
|
*/ |
||||
|
@Service |
||||
|
@Slf4j |
||||
|
public class HandlerService { |
||||
|
@Autowired |
||||
|
private FilterTypeDao filterTypeDao; |
||||
|
|
||||
|
|
||||
|
@Async("asyncExecutor") |
||||
|
public void run() { |
||||
|
while (true) { |
||||
|
if (QueueUtil.taskQueue.size() > 0) { |
||||
|
String dataJson = null; |
||||
|
try { |
||||
|
dataJson = QueueUtil.taskQueue.take(); |
||||
|
} catch (InterruptedException e) { |
||||
|
e.printStackTrace(); |
||||
|
} |
||||
|
Map parse = null; |
||||
|
try { |
||||
|
parse = (Map) JSON.parse(dataJson); |
||||
|
} catch (Exception e) { |
||||
|
e.printStackTrace(); |
||||
|
} |
||||
|
try { |
||||
|
Map data = (Map) parse.get("data"); |
||||
|
int id = (int) parse.get("id"); |
||||
|
log.info("任务:" + id + "已开始处理"); |
||||
|
boolean needSend = false; |
||||
|
Map admin = (Map) parse.get("input"); |
||||
|
Map dataMap = (Map) parse.get("data"); |
||||
|
log.info("任务:" + id + "的判断条件为:" + JSON.toJSONString(admin)); |
||||
|
boolean caseSensitive = true; |
||||
|
try { |
||||
|
caseSensitive = (boolean) admin.get("caseSensitive"); |
||||
|
} catch (Exception e) { |
||||
|
|
||||
|
} |
||||
|
List<List<Map>> conditions = (List<List<Map>>) admin.get("or"); |
||||
|
for (List<Map> condition : conditions) { |
||||
|
for (Map andCondition : condition) { |
||||
|
int conditionId = (int) andCondition.get("id"); |
||||
|
FilterType filterType = filterTypeDao.getFilterTypeById(conditionId); |
||||
|
int typeId = filterType.getParentId(); |
||||
|
switch (typeId) { |
||||
|
case 6000: |
||||
|
needSend = textTypeHandler(conditionId, andCondition, dataMap, caseSensitive); |
||||
|
break; |
||||
|
case 6001: |
||||
|
needSend = numberTypeHandler(conditionId, andCondition, dataMap); |
||||
|
break; |
||||
|
case 6002: |
||||
|
needSend = dateTypeHandler(conditionId, andCondition, dataMap); |
||||
|
break; |
||||
|
case 6025: |
||||
|
needSend = normalHandler(conditionId, andCondition, dataMap); |
||||
|
break; |
||||
|
default: |
||||
|
break; |
||||
|
} |
||||
|
if (!needSend) { |
||||
|
break; |
||||
|
} |
||||
|
} |
||||
|
if (needSend) { |
||||
|
break; |
||||
|
} |
||||
|
} |
||||
|
if (needSend) { |
||||
|
log.info("任务:" + id + "满足" + JSON.toJSONString(admin) + "条件,发送数据到指定Kafka"); |
||||
|
Map result = new HashMap(32); |
||||
|
Map resultMap = new HashMap(32); |
||||
|
resultMap.put("isLast", 1); |
||||
|
resultMap.put("content", "数据满足条件过滤成功"); |
||||
|
result.put("results", JSON.toJSONString(resultMap)); |
||||
|
result.put("status", 1); |
||||
|
result.put("message", ""); |
||||
|
parse.put("result", result); |
||||
|
String message = JSON.toJSONString(parse); |
||||
|
QueueUtil.sendQueue.put(message); |
||||
|
} else { |
||||
|
log.info("任务:" + id + "不满足" + JSON.toJSONString(admin) + "条件,发送数据到指定Kafka"); |
||||
|
// Map result = new HashMap(32); |
||||
|
// Map resultMap = new HashMap(32); |
||||
|
// resultMap.put("isLast", 1); |
||||
|
// resultMap.put("content", "数据不满足条件过滤"); |
||||
|
// resultMap.put("status", 3); |
||||
|
// result.put("results", JSON.toJSONString(resultMap)); |
||||
|
// result.put("message", ""); |
||||
|
// parse.put("result", result); |
||||
|
// String message = JSON.toJSONString(parse); |
||||
|
// QueueUtil.sendQueue.put(message); |
||||
|
} |
||||
|
|
||||
|
} catch (Throwable e) { |
||||
|
log.error("处理程序发生异常:", e); |
||||
|
log.error("任务发生异常:{}", dataJson); |
||||
|
e.printStackTrace(); |
||||
|
Map result = new HashMap(32); |
||||
|
Map resultMap = new HashMap(32); |
||||
|
resultMap.put("isLast", 1); |
||||
|
resultMap.put("content", "数据满足条件过滤成功"); |
||||
|
result.put("results", JSON.toJSONString(resultMap)); |
||||
|
result.put("status", 2); |
||||
|
result.put("message", "未知异常"); |
||||
|
parse.put("result", result); |
||||
|
// String message = JSON.toJSONString(parse); |
||||
|
// try { |
||||
|
// QueueUtil.sendQueue.put(message); |
||||
|
// } catch (InterruptedException ex) { |
||||
|
// ex.printStackTrace(); |
||||
|
// } |
||||
|
} |
||||
|
} else { |
||||
|
log.info("任务队列为空,休眠10秒"); |
||||
|
try { |
||||
|
Thread.sleep(10000); |
||||
|
} catch (InterruptedException e) { |
||||
|
e.printStackTrace(); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
} |
||||
|
} |
||||
|
|
||||
|
private boolean normalHandler(int id, Map conditionMap, Map dataMap) { |
||||
|
boolean condition = false; |
||||
|
String key = (String) conditionMap.get("key"); |
||||
|
String value = (String) conditionMap.get("value"); |
||||
|
Object dataValue = DataUtil.getValue(key, dataMap); |
||||
|
switch (id) { |
||||
|
case 6024: |
||||
|
try { |
||||
|
dataValue.getClass(); |
||||
|
} catch (NullPointerException e) { |
||||
|
condition = true; |
||||
|
} |
||||
|
break; |
||||
|
case 6026: |
||||
|
Map valueMap = (Map) DataUtil.getValue(key, dataMap); |
||||
|
condition = valueMap.containsKey(value); |
||||
|
break; |
||||
|
case 6027: |
||||
|
List<Object> valueList = (List<Object>) DataUtil.getValue(key, dataMap); |
||||
|
condition = valueList.size() == 0; |
||||
|
break; |
||||
|
case 6028: |
||||
|
List<Object> objectList = (List<Object>) DataUtil.getValue(key, dataMap); |
||||
|
condition = objectList.size() != 0; |
||||
|
default: |
||||
|
break; |
||||
|
|
||||
|
} |
||||
|
return condition; |
||||
|
} |
||||
|
|
||||
|
private boolean textTypeHandler(int id, Map conditionMap, Map dataMap, boolean caseSensitive) { |
||||
|
boolean condition = false; |
||||
|
String key = (String) conditionMap.get("key"); |
||||
|
String value = (String) conditionMap.get("value"); |
||||
|
String dataValue = (String) DataUtil.getValue(key, dataMap); |
||||
|
switch (id) { |
||||
|
case 6003: |
||||
|
if (caseSensitive) { |
||||
|
condition = dataValue.equals(value); |
||||
|
} else { |
||||
|
dataValue = dataValue.toLowerCase(); |
||||
|
condition = dataValue.equals(value.toLowerCase()); |
||||
|
} |
||||
|
break; |
||||
|
case 6004: |
||||
|
if (caseSensitive) { |
||||
|
condition = dataValue.contains(value); |
||||
|
} else { |
||||
|
dataValue = dataValue.toLowerCase(); |
||||
|
condition = dataValue.contains(value.toLowerCase()); |
||||
|
} |
||||
|
break; |
||||
|
case 6005: |
||||
|
if (caseSensitive) { |
||||
|
condition = dataValue.startsWith(value); |
||||
|
} else { |
||||
|
dataValue = dataValue.toLowerCase(); |
||||
|
condition = dataValue.startsWith(value.toLowerCase()); |
||||
|
} |
||||
|
break; |
||||
|
case 6006: |
||||
|
if (caseSensitive) { |
||||
|
condition = dataValue.endsWith(value); |
||||
|
} else { |
||||
|
dataValue = dataValue.toLowerCase(); |
||||
|
condition = dataValue.endsWith(value.toLowerCase()); |
||||
|
} |
||||
|
break; |
||||
|
case 6007: |
||||
|
Pattern p = Pattern.compile(value); |
||||
|
Matcher m = p.matcher(dataValue); |
||||
|
condition = m.find(); |
||||
|
break; |
||||
|
case 6020: |
||||
|
if (caseSensitive) { |
||||
|
condition = !dataValue.equals(value); |
||||
|
} else { |
||||
|
dataValue = dataValue.toLowerCase(); |
||||
|
condition = !dataValue.equals(value.toLowerCase()); |
||||
|
} |
||||
|
break; |
||||
|
case 6023: |
||||
|
if (caseSensitive) { |
||||
|
condition = !dataValue.contains(value); |
||||
|
} else { |
||||
|
dataValue = dataValue.toLowerCase(); |
||||
|
condition = !dataValue.contains(value.toLowerCase()); |
||||
|
} |
||||
|
break; |
||||
|
|
||||
|
default: |
||||
|
break; |
||||
|
} |
||||
|
return condition; |
||||
|
} |
||||
|
|
||||
|
private boolean numberTypeHandler(int id, Map conditionMap, Map dataMap) { |
||||
|
boolean condition = false; |
||||
|
String key = (String) conditionMap.get("key"); |
||||
|
int value = Integer.parseInt(conditionMap.get("value").toString()); |
||||
|
int dataValue = Integer.parseInt(String.valueOf(DataUtil.getValue(key, dataMap))); |
||||
|
switch (id) { |
||||
|
case 6008: |
||||
|
condition = dataValue > value; |
||||
|
break; |
||||
|
case 6009: |
||||
|
condition = dataValue < value; |
||||
|
break; |
||||
|
case 6010: |
||||
|
condition = dataValue == value; |
||||
|
break; |
||||
|
case 6011: |
||||
|
condition = dataValue >= value; |
||||
|
break; |
||||
|
case 6012: |
||||
|
condition = dataValue <= value; |
||||
|
break; |
||||
|
case 6021: |
||||
|
condition = dataValue != value; |
||||
|
break; |
||||
|
default: |
||||
|
break; |
||||
|
} |
||||
|
return condition; |
||||
|
} |
||||
|
|
||||
|
private boolean dateTypeHandler(int id, Map conditionMap, Map dataMap) { |
||||
|
String format = "yyyy-MM-dd HH:mm:ss"; |
||||
|
boolean condition = false; |
||||
|
String key = (String) conditionMap.get("key"); |
||||
|
long valueTimestamp = 0; |
||||
|
try { |
||||
|
String value = (String) conditionMap.get("value"); |
||||
|
valueTimestamp = convertDateTimeToTimestamp(value, format); |
||||
|
} catch (Exception e) { |
||||
|
e.printStackTrace(); |
||||
|
} |
||||
|
String dataValue = (String) DataUtil.getValue(key, dataMap); |
||||
|
|
||||
|
long dataValueTimestamp = convertDateTimeToTimestamp(dataValue, format); |
||||
|
switch (id) { |
||||
|
case 6013: |
||||
|
condition = dataValueTimestamp > valueTimestamp; |
||||
|
break; |
||||
|
case 6014: |
||||
|
condition = dataValueTimestamp < valueTimestamp; |
||||
|
break; |
||||
|
case 6022: |
||||
|
String theDayBegin = DateUtil.theDayBegin(); |
||||
|
String theDayEnd = DateUtil.theDayEnd(); |
||||
|
condition = convertDateTimeToTimestamp(theDayBegin, format) < dataValueTimestamp && dataValueTimestamp < convertDateTimeToTimestamp(theDayEnd, format); |
||||
|
break; |
||||
|
default: |
||||
|
break; |
||||
|
|
||||
|
} |
||||
|
return condition; |
||||
|
} |
||||
|
|
||||
|
private static long convertDateTimeToTimestamp(String datetimeString, String format) { |
||||
|
SimpleDateFormat sdf = new SimpleDateFormat(format); |
||||
|
try { |
||||
|
Date date = sdf.parse(datetimeString); |
||||
|
return date.getTime(); |
||||
|
} catch (Exception e) { |
||||
|
e.printStackTrace(); |
||||
|
return 0; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
public static void main(String[] args) { |
||||
|
String json = "{\"charset\":\"UTF-8\",\"iid\":\"57dc5083f49dd088c413e1f28572e83a\",\"length\":21909,\"tmpl_id\":3838,\"type\":\"newslist\",\"version\":\"6\",\"news_id\":\"57dc5083f49dd088c413e1f28572e83a\",\"url\":\"https://i.news.qq.com/trpc.qqnews_web.kv_srv.kv_srv_http_proxy/list?sub_srv_id=edu&srv_id=pc&offset=0&limit=20&strategy=1&ext=%7B%22pool%22%3A%5B%22top%22%2C%22hot%22%5D%2C%22is_filter%22%3A10%2C%22check_type%22%3Atrue%7D\",\"nextpage\":\"https://i.news.qq.com/trpc.qqnews_web.kv_srv.kv_srv_http_proxy/list?sub_srv_id=edu&srv_id=pc&offset=20&limit=20&strategy=1&ext=%7B%22pool%22%3A%5B%22top%22%2C%22hot%22%5D%2C%22is_filter%22%3A10%2C%22check_type%22%3Atrue%7D\",\"host\":\"172.18.1.182\",\"category\":1,\"items\":[{\"link\":{\"iid\":\"93b44cdcad298b82cecd5e0c09cb078d\",\"link\":\"https://new.qq.com/rain/a/20231031A012QT00\",\"linktype\":\"newscontent\",\"rawlink\":\"https://new.qq.com/rain/a/20231031A012QT00\"},\"posttime\":\"2023-10-31 08:02:07\",\"title\":\"\u200B中学周末校内托管服务,确定符合“双减”精神?\"}],\"tasks\":[{\"iid\":\"93b44cdcad298b82cecd5e0c09cb078d\",\"link\":\"https://new.qq.com/rain/a/20231031A012QT00\",\"linktype\":\"newscomment\",\"rawlink\":\"rain/a/20231031A012QT00\"},{\"iid\":\"f3d8ce4383320a7dc7c6270c74297272\",\"link\":\"https://new.qq.com/rain/a/20231030A09KEE00\",\"linktype\":\"newscontent\",\"rawlink\":\"https://new.qq.com/rain/a/20231030A09KEE00\"},{\"iid\":\"6a275c516abc33e2b216a08fb3b8668e\",\"link\":\"https://i.news.qq.com/trpc.qqnews_web.kv_srv.kv_srv_http_proxy/list?sub_srv_id=edu&srv_id=pc&offset=20&limit=20&strategy=1&ext=%7B%22pool%22%3A%5B%22top%22%2C%22hot%22%5D%2C%22is_filter%22%3A10%2C%22check_type%22%3Atrue%7D\",\"linktype\":\"newslist\",\"rawlink\":\"https://i.news.qq.com/trpc.qqnews_web.kv_srv.kv_srv_http_proxy/list?sub_srv_id=edu&srv_id=pc&offset=20&limit=20&strategy=1&ext=%7B%22pool%22%3A%5B%22top%22%2C%22hot%22%5D%2C%22is_filter%22%3A10%2C%22check_type%22%3Atrue%7D\"}],\"cid\":\"NtengxunNews\"}"; |
||||
|
JSONObject jsonObject = JSON.parseObject(json); |
||||
|
System.out.println(JSONPath.eval(jsonObject, "$$$$.['tasks'][0]['iissssd']")); |
||||
|
} |
||||
|
} |
@ -0,0 +1,47 @@ |
|||||
|
package com.bfd.crawl.datafilter.service; |
||||
|
|
||||
|
|
||||
|
import com.bfd.crawl.datafilter.util.QueueUtil; |
||||
|
import lombok.extern.slf4j.Slf4j; |
||||
|
import org.springframework.beans.factory.annotation.Autowired; |
||||
|
import org.springframework.beans.factory.annotation.Value; |
||||
|
import org.springframework.kafka.core.KafkaTemplate; |
||||
|
import org.springframework.scheduling.annotation.Async; |
||||
|
import org.springframework.stereotype.Service; |
||||
|
|
||||
|
/** |
||||
|
* @author:jinming |
||||
|
* @className:SendService |
||||
|
* @version:1.0 |
||||
|
* @description: |
||||
|
* @Date:2023/7/31 17:53 |
||||
|
*/ |
||||
|
@Slf4j |
||||
|
@Service |
||||
|
public class SendService { |
||||
|
@Value("${send.topic}") |
||||
|
private String topic; |
||||
|
@Autowired |
||||
|
private KafkaTemplate kafkaTemplate; |
||||
|
|
||||
|
@Async("sendExecutor") |
||||
|
void sendToKafka() { |
||||
|
while (true) { |
||||
|
if (QueueUtil.sendQueue.size() > 0) { |
||||
|
try { |
||||
|
String message = QueueUtil.sendQueue.take(); |
||||
|
kafkaTemplate.send(topic,message); |
||||
|
} catch (Exception e) { |
||||
|
e.printStackTrace(); |
||||
|
} |
||||
|
}else { |
||||
|
log.info("任务队列为空,休眠3秒"); |
||||
|
try { |
||||
|
Thread.sleep(3000); |
||||
|
} catch (InterruptedException e) { |
||||
|
e.printStackTrace(); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
} |
@ -0,0 +1,79 @@ |
|||||
|
package com.bfd.crawl.datafilter.service; |
||||
|
|
||||
|
import com.bfd.crawl.datafilter.util.QueueUtil; |
||||
|
import com.bfd.util.PauseTool; |
||||
|
import lombok.extern.slf4j.Slf4j; |
||||
|
import org.springframework.beans.factory.annotation.Autowired; |
||||
|
import org.springframework.beans.factory.annotation.Value; |
||||
|
import org.springframework.boot.ApplicationArguments; |
||||
|
import org.springframework.boot.ApplicationRunner; |
||||
|
import org.springframework.core.annotation.Order; |
||||
|
import org.springframework.data.redis.core.StringRedisTemplate; |
||||
|
import org.springframework.stereotype.Service; |
||||
|
|
||||
|
import javax.annotation.Resource; |
||||
|
|
||||
|
/** |
||||
|
* @author:jinming |
||||
|
* @className:StartServcie |
||||
|
* @version:1.0 |
||||
|
* @description: |
||||
|
* @Date:2023/7/31 17:14 |
||||
|
*/ |
||||
|
@Service |
||||
|
@Slf4j |
||||
|
@Order(value = 1) |
||||
|
public class StartServcie implements ApplicationRunner { |
||||
|
@Value("${thread.handler}") |
||||
|
private int handlerNumber; |
||||
|
|
||||
|
@Value("${thread.send}") |
||||
|
private int sendNumber; |
||||
|
@Autowired |
||||
|
private HandlerService handlerService; |
||||
|
@Autowired |
||||
|
private SendService sendService; |
||||
|
// @Value("${zookeeper.connection-string}") |
||||
|
// private String connectionString; |
||||
|
// @Value("${zookeeper.publish-node}") |
||||
|
// private String nodePath; |
||||
|
// @Resource |
||||
|
// private StringRedisTemplate stringRedisTemplate; |
||||
|
|
||||
|
@Override |
||||
|
public void run(ApplicationArguments args) throws Exception { |
||||
|
// PauseTool pauseTool = new PauseTool(); |
||||
|
// pauseTool.initializeRedisCache(stringRedisTemplate); |
||||
|
// pauseTool.setupZookeeperListener(connectionString, nodePath); |
||||
|
for (int i = 0; i < handlerNumber; i++) { |
||||
|
log.info("处理服务线程" + i + "已启动 "); |
||||
|
handlerService.run(); |
||||
|
} |
||||
|
for (int i = 0; i < sendNumber; i++) { |
||||
|
log.info("发送服务线程" + i + "已启动 "); |
||||
|
sendService.sendToKafka(); |
||||
|
} |
||||
|
// 创建一个匿名内部类实现了Runnable接口 |
||||
|
Runnable myRunnable = new Runnable() { |
||||
|
@Override |
||||
|
public void run() { |
||||
|
// 在这里定义线程要执行的任务 |
||||
|
while (true) { |
||||
|
log.info("任务队列长度为" + QueueUtil.taskQueue.size()); |
||||
|
log.info("发送队列长度为" + QueueUtil.taskQueue.size()); |
||||
|
try { |
||||
|
Thread.sleep(10000); |
||||
|
} catch (InterruptedException e) { |
||||
|
e.printStackTrace(); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
}; |
||||
|
// 创建一个新的线程,并将Runnable对象传递给Thread构造函数 |
||||
|
Thread myThread = new Thread(myRunnable); |
||||
|
// 启动线程 |
||||
|
myThread.start(); |
||||
|
|
||||
|
|
||||
|
} |
||||
|
} |
@ -0,0 +1,61 @@ |
|||||
|
package com.bfd.crawl.datafilter.util; |
||||
|
|
||||
|
import com.alibaba.fastjson.JSON; |
||||
|
import com.alibaba.fastjson.JSONObject; |
||||
|
import com.alibaba.fastjson.JSONPath; |
||||
|
import lombok.extern.slf4j.Slf4j; |
||||
|
|
||||
|
import java.util.HashMap; |
||||
|
import java.util.Map; |
||||
|
|
||||
|
/** |
||||
|
* @author:jinming |
||||
|
* @className:DataUtil |
||||
|
* @version:1.0 |
||||
|
* @description: 获取dataValue的值 |
||||
|
* @Date:2023/11/1 9:54 |
||||
|
*/ |
||||
|
@Slf4j |
||||
|
public class DataUtil { |
||||
|
/** |
||||
|
* @param key 传入的key |
||||
|
* @param dataMap 数据map |
||||
|
* @return 根据传入的参数进行判断解析,返回正确的dataValue |
||||
|
*/ |
||||
|
public static Object getValue(String key, Map dataMap) { |
||||
|
try { |
||||
|
//公式为空直接就返回 |
||||
|
if (!StringUtil.hasValue(key)) { |
||||
|
return ""; |
||||
|
} |
||||
|
Object dataValue; |
||||
|
String isJson = "#json#"; |
||||
|
if (key.contains(isJson)) { |
||||
|
//进行第一次拆分,获取#json#前面的部分 |
||||
|
String[] keySplit = key.split(isJson); |
||||
|
String firstDataKey = keySplit[0]; |
||||
|
String[] firstDataKeySplit = firstDataKey.split(":"); |
||||
|
//取出前半部分对应的JSON数据并转换为JSONObject |
||||
|
String dataJson = (String) dataMap.get(firstDataKeySplit[0]); |
||||
|
JSONObject dataJsonObject = JSON.parseObject(dataJson); |
||||
|
//根据key的后半部分取出对应JSONObject中的值 |
||||
|
String firstDataKeyJson = (String) JSONPath.eval(dataJsonObject, firstDataKeySplit[1]); |
||||
|
String secDataKey = keySplit[1]; |
||||
|
JSONObject firstDataJsonObject = JSON.parseObject(firstDataKeyJson); |
||||
|
dataValue = JSONPath.eval(firstDataJsonObject, secDataKey); |
||||
|
return dataValue; |
||||
|
} |
||||
|
String[] keySplit = key.split(":"); |
||||
|
String jsonPath = keySplit[1]; |
||||
|
String dataJson = (String) dataMap.get(keySplit[0]); |
||||
|
JSONObject dataJsonObject = JSON.parseObject(dataJson); |
||||
|
dataValue = JSONPath.eval(dataJsonObject, jsonPath); |
||||
|
return dataValue; |
||||
|
} catch (Exception e) { |
||||
|
// TODO: handle exception |
||||
|
log.error("jsonpath公式取值异常,", e); |
||||
|
return ""; |
||||
|
} |
||||
|
|
||||
|
} |
||||
|
} |
@ -0,0 +1,72 @@ |
|||||
|
package com.bfd.crawl.datafilter.util; |
||||
|
|
||||
|
import java.text.ParseException; |
||||
|
import java.text.SimpleDateFormat; |
||||
|
import java.util.Date; |
||||
|
|
||||
|
/** |
||||
|
* @author jinming |
||||
|
* @version 1.0 |
||||
|
* @className DateUtil |
||||
|
* @Date 2022/7/29 15:49 |
||||
|
*/ |
||||
|
public class DateUtil { |
||||
|
public static String theDayBegin() { |
||||
|
return new SimpleDateFormat("yyyy-MM-dd").format(new Date()) + " 00:00:00"; |
||||
|
} |
||||
|
|
||||
|
public static String theDayEnd() { |
||||
|
return new SimpleDateFormat("yyyy-MM-dd").format(new Date()) + " 23:59:59"; |
||||
|
} |
||||
|
|
||||
|
public static String theTaskBegin() { |
||||
|
String theDayBegin = new SimpleDateFormat("yyyy-MM-dd").format(new Date()) + " 00:00:00"; |
||||
|
try { |
||||
|
Date parse = new SimpleDateFormat("yyyy-MM-dd").parse(theDayBegin); |
||||
|
return new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(new Date(parse.getTime() - 7200000)).toString(); |
||||
|
} catch (ParseException e) { |
||||
|
e.printStackTrace(); |
||||
|
} |
||||
|
return ""; |
||||
|
|
||||
|
} |
||||
|
|
||||
|
|
||||
|
public static String theMonthBegin() { |
||||
|
String theDayBegin = new SimpleDateFormat("yyyy-MM").format(new Date()) + "-01 00:00:00"; |
||||
|
try { |
||||
|
Date parse = new SimpleDateFormat("yyyy-MM-dd").parse(theDayBegin); |
||||
|
return new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(new Date(parse.getTime())); |
||||
|
} catch (ParseException e) { |
||||
|
e.printStackTrace(); |
||||
|
} |
||||
|
return ""; |
||||
|
|
||||
|
} |
||||
|
|
||||
|
public static long timeToLong(String time) { |
||||
|
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); |
||||
|
Date parse = null; |
||||
|
try { |
||||
|
parse = simpleDateFormat.parse(time); |
||||
|
} catch (ParseException e) { |
||||
|
e.printStackTrace(); |
||||
|
} |
||||
|
return parse.getTime(); |
||||
|
|
||||
|
} |
||||
|
|
||||
|
public static String fomateTime(long time) { |
||||
|
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); |
||||
|
String parse = ""; |
||||
|
|
||||
|
parse = simpleDateFormat.format(time); |
||||
|
|
||||
|
return parse; |
||||
|
|
||||
|
} |
||||
|
|
||||
|
public static void main(String[] args) { |
||||
|
System.out.println(DateUtil.theTaskBegin()); |
||||
|
} |
||||
|
} |
@ -0,0 +1,16 @@ |
|||||
|
package com.bfd.crawl.datafilter.util; |
||||
|
|
||||
|
import java.util.concurrent.LinkedBlockingDeque; |
||||
|
|
||||
|
/** |
||||
|
* @author:jinming |
||||
|
* @className:QueueUtil |
||||
|
* @version:1.0 |
||||
|
* @description: |
||||
|
* @Date:2023/7/13 15:00 |
||||
|
*/ |
||||
|
public class QueueUtil { |
||||
|
|
||||
|
public static LinkedBlockingDeque<String> taskQueue = new LinkedBlockingDeque<String>(); |
||||
|
public static LinkedBlockingDeque<String> sendQueue = new LinkedBlockingDeque<String>(); |
||||
|
} |
@ -0,0 +1,94 @@ |
|||||
|
package com.bfd.crawl.datafilter.util; |
||||
|
|
||||
|
|
||||
|
import lombok.extern.slf4j.Slf4j; |
||||
|
|
||||
|
import java.security.MessageDigest; |
||||
|
import java.util.HashSet; |
||||
|
import java.util.Set; |
||||
|
import java.util.regex.Matcher; |
||||
|
import java.util.regex.Pattern; |
||||
|
|
||||
|
/** |
||||
|
* @author jinming |
||||
|
* @version 1.0 |
||||
|
* @className StringUtile |
||||
|
* @Date 2022/1/21 11:46 |
||||
|
*/ |
||||
|
@Slf4j |
||||
|
public class StringUtil { |
||||
|
public static boolean hasValue(String str) { |
||||
|
return str != null && !"".equals(str.trim()); |
||||
|
} |
||||
|
|
||||
|
public static String getRegexGroup(String regex, String str, int id) { |
||||
|
String resultStr = ""; |
||||
|
if (hasValue(str)) { |
||||
|
Pattern p = Pattern.compile(regex); |
||||
|
Matcher m = p.matcher(str); |
||||
|
if (m.find()) { |
||||
|
resultStr = m.group(id); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
if ("".equals(resultStr)) { |
||||
|
} |
||||
|
|
||||
|
return resultStr; |
||||
|
} |
||||
|
|
||||
|
public static Set<String> getEmailAddress(String message) { |
||||
|
Set<String> emailList = new HashSet<>(); |
||||
|
Pattern pattern = Pattern.compile("\\w+\\.?\\w+\\@\\w+\\.\\w+"); |
||||
|
Matcher m = pattern.matcher(message); |
||||
|
while (m.find()) { |
||||
|
emailList.add(m.group(0)); |
||||
|
} |
||||
|
return emailList; |
||||
|
} |
||||
|
public static String getMd5(String string) { |
||||
|
try { |
||||
|
MessageDigest md5 = MessageDigest.getInstance("MD5"); |
||||
|
byte[] bs = md5.digest(string.getBytes("UTF-8")); |
||||
|
StringBuilder sb = new StringBuilder(40); |
||||
|
for (byte x : bs) { |
||||
|
if ((x & 0xff) >> 4 == 0) { |
||||
|
sb.append("0").append(Integer.toHexString(x & 0xff)); |
||||
|
} else { |
||||
|
sb.append(Integer.toHexString(x & 0xff)); |
||||
|
} |
||||
|
} |
||||
|
return sb.toString(); |
||||
|
} catch (Exception e) { |
||||
|
//LOG.error("获取md5异常", e); |
||||
|
return "nceaform" + System.currentTimeMillis(); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
public static String removeAllHtmlTags(String str) { |
||||
|
return hasValue(str) ? str.replaceAll("<[^<>]+?>", "") : ""; |
||||
|
} |
||||
|
|
||||
|
public static String getRegexGroup(Pattern regex, String str, int id) { |
||||
|
String resultStr = ""; |
||||
|
if (hasValue(str)) { |
||||
|
Matcher m = regex.matcher(str); |
||||
|
if (m.find()) { |
||||
|
resultStr = m.group(id); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
if ("".equals(resultStr)) { |
||||
|
log.error(regex + " parser error!"); |
||||
|
} |
||||
|
|
||||
|
return resultStr; |
||||
|
} |
||||
|
|
||||
|
public static String getStrByPattern(String str, String regex) { |
||||
|
Pattern pattern = Pattern.compile(regex); |
||||
|
Matcher m = pattern.matcher(str); |
||||
|
return m.find() ? m.group(0) : ""; |
||||
|
} |
||||
|
|
||||
|
} |
@ -0,0 +1,64 @@ |
|||||
|
server: |
||||
|
port: 7088 |
||||
|
spring: |
||||
|
application: |
||||
|
name: 过滤器 |
||||
|
boot: |
||||
|
admin: |
||||
|
client: |
||||
|
health: |
||||
|
timeout: 10s |
||||
|
url: http://172.16.12.55:8001 |
||||
|
instance: |
||||
|
service-base-url: http://172.16.12.56:7088 |
||||
|
datasource: |
||||
|
driver-class-name: com.mysql.cj.jdbc.Driver |
||||
|
username: crawl |
||||
|
password: crawl123 |
||||
|
url: jdbc:mysql://172.26.11.110:3306/kyyzgpt?useUnicode=true&characterEncoding=UTF-8&zeroDateTimeBehavior=convertToNull&allowMultiQueries=true&useSSL=false |
||||
|
|
||||
|
redis: |
||||
|
host: 172.24.12.126 |
||||
|
port: 6379 |
||||
|
timeout: 10000 |
||||
|
database: 7 |
||||
|
jedis: |
||||
|
pool: |
||||
|
max-active: 8 # 连接池最大连接数(使用负值表示没有限制) |
||||
|
max-wait: 800 # 连接池最大阻塞等待时间(使用负值表示没有限制) |
||||
|
max-idle: 8 # 连接池中的最大空闲连接 |
||||
|
min-idle: 2 # 连接池中的最小空闲连接 |
||||
|
kafka: |
||||
|
bootstrap-servers: 172.16.12.55:9092,172.16.12.56:9092,172.16.12.57:9092 |
||||
|
producer: |
||||
|
retries: 3 |
||||
|
acks: all |
||||
|
batch-size: 4096 |
||||
|
buffer-memory: 102476800 |
||||
|
key-serializer: org.apache.kafka.common.serialization.StringSerializer |
||||
|
value-serializer: org.apache.kafka.common.serialization.StringSerializer |
||||
|
jpa: |
||||
|
database-platform: org.hibernate.dialect.MySQL8Dialect |
||||
|
hibernate: |
||||
|
naming: |
||||
|
physical-strategy: org.hibernate.boot.model.naming.PhysicalNamingStrategyStandardImpl |
||||
|
|
||||
|
logging: |
||||
|
file: |
||||
|
path: ./logs |
||||
|
|
||||
|
management: |
||||
|
endpoints: |
||||
|
web: |
||||
|
exposure: |
||||
|
include: "*" |
||||
|
endpoint: |
||||
|
health: |
||||
|
show-details: always |
||||
|
|
||||
|
send: |
||||
|
topic: analyze |
||||
|
|
||||
|
thread: |
||||
|
handler: 50 |
||||
|
send: 20 |
@ -0,0 +1,36 @@ |
|||||
|
<configuration> |
||||
|
<!-- 属性文件:在properties文件中找到对应的配置项 --> |
||||
|
<springProperty scope="context" name="logging.file.path" source="logging.file.path"/> |
||||
|
<springProperty scope="context" name="logging.level" source="logging.level"/> |
||||
|
<!-- 默认的控制台日志输出,一般生产环境都是后台启动,这个没太大作用 --> |
||||
|
<appender name="STDOUT" |
||||
|
class="ch.qos.logback.core.ConsoleAppender"> |
||||
|
<encoder class="ch.qos.logback.classic.encoder.PatternLayoutEncoder"> |
||||
|
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %line %-5level %logger{50} - %msg%n</pattern> |
||||
|
</encoder> |
||||
|
</appender> |
||||
|
|
||||
|
<appender name="GLMAPPER-LOGGERONE" |
||||
|
class="ch.qos.logback.core.rolling.RollingFileAppender"> |
||||
|
<append>true</append> |
||||
|
<filter class="ch.qos.logback.classic.filter.ThresholdFilter"> |
||||
|
<level>${logging.level}</level> |
||||
|
</filter> |
||||
|
<file> |
||||
|
${logging.file.path}/data-filter.log |
||||
|
</file> |
||||
|
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy"> |
||||
|
<FileNamePattern>${logging.file.path}/data-filter.log.%d{yyyy-MM-dd}</FileNamePattern> |
||||
|
<MaxHistory>3</MaxHistory> |
||||
|
</rollingPolicy> |
||||
|
<encoder class="ch.qos.logback.classic.encoder.PatternLayoutEncoder"> |
||||
|
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %line %-5level %logger{50} - %msg%n</pattern> |
||||
|
<charset>UTF-8</charset> |
||||
|
</encoder> |
||||
|
</appender> |
||||
|
|
||||
|
<root level="info"> |
||||
|
<appender-ref ref="GLMAPPER-LOGGERONE"/> |
||||
|
<appender-ref ref="STDOUT"/> |
||||
|
</root> |
||||
|
</configuration> |
@ -0,0 +1,13 @@ |
|||||
|
package com.bfd.crawl.datafilter; |
||||
|
|
||||
|
import org.junit.jupiter.api.Test; |
||||
|
import org.springframework.boot.test.context.SpringBootTest; |
||||
|
|
||||
|
@SpringBootTest |
||||
|
class DataFilterApplicationTests { |
||||
|
|
||||
|
@Test |
||||
|
void contextLoads() { |
||||
|
} |
||||
|
|
||||
|
} |
Write
Preview
Loading…
Cancel
Save
Reference in new issue