Browse Source

2023-10-25

采集平台2.0版本
修改了一下pom中的一些组建的版本
release-1.0
jing.du 2 years ago
parent
commit
fbc814bc93
  1. 5
      cl_query_data_job/pom.xml
  2. 60
      cl_search_api/pom.xml
  3. 11
      cl_search_api/src/main/java/com/bfd/mf/common/util/constants/ESConstant.java
  4. 3
      cl_search_api/src/main/java/com/bfd/mf/common/util/es/EsUtils.java
  5. 3
      cl_search_api/src/main/java/com/bfd/mf/service/SearchAnalysisService.java
  6. 6
      cl_search_api/src/main/java/com/bfd/mf/service/SearchDataService.java

5
cl_query_data_job/pom.xml

@ -3,10 +3,11 @@
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<parent> <parent>
<artifactId>cl_stream_3.2</artifactId>
<artifactId>cl_stream_3.3</artifactId>
<groupId>com.bfd.mf</groupId> <groupId>com.bfd.mf</groupId>
<version>3.2-SNAPSHOT</version>
<version>3.3-SNAPSHOT</version>
</parent> </parent>
<artifactId>cl_query_data_job</artifactId> <artifactId>cl_query_data_job</artifactId>

60
cl_search_api/pom.xml

@ -10,24 +10,27 @@
<version>3.3-SNAPSHOT</version> <version>3.3-SNAPSHOT</version>
</parent> </parent>
<!-- 这个是最新的线上的版本 -->
<name>cl_search_api</name> <name>cl_search_api</name>
<description>Search V3.3 API</description> <description>Search V3.3 API</description>
<artifactId>cl_search_api</artifactId> <artifactId>cl_search_api</artifactId>
<version>3.3.0-SNAPSHOT</version> <version>3.3.0-SNAPSHOT</version>
<properties> <properties>
<start-class>com.bfd.mf.SearchApplication</start-class> <start-class>com.bfd.mf.SearchApplication</start-class>
<source>1.8</source> <source>1.8</source>
<es.version>6.0.0</es.version> <es.version>6.0.0</es.version>
<spring-boot-version>2.0.0.RELEASE</spring-boot-version> <spring-boot-version>2.0.0.RELEASE</spring-boot-version>
<springframework.boot.version>2.0.0.RELEASE</springframework.boot.version> <springframework.boot.version>2.0.0.RELEASE</springframework.boot.version>
<springframework.version>5.0.4.RELEASE</springframework.version>
<springframework.version>5.0.16.RELEASE</springframework.version>
<logstash.version>4.4</logstash.version> <logstash.version>4.4</logstash.version>
<!-- <jna.version>4.1.0</jna.version>-->
<jna.version>4.1.0</jna.version>
<jetty.version>9.4.8.v20171121</jetty.version> <jetty.version>9.4.8.v20171121</jetty.version>
<druid.version>1.1.6</druid.version> <druid.version>1.1.6</druid.version>
<guava.version>19.0</guava.version> <guava.version>19.0</guava.version>
<poi.version>3.15</poi.version>
<poi.version>4.1.0</poi.version>
<java.version>1.8</java.version> <java.version>1.8</java.version>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding> <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
@ -90,7 +93,7 @@
<dependency> <dependency>
<groupId>org.mybatis.spring.boot</groupId> <groupId>org.mybatis.spring.boot</groupId>
<artifactId>mybatis-spring-boot-starter</artifactId> <artifactId>mybatis-spring-boot-starter</artifactId>
<version>1.3.1</version>
<version>2.2.2</version>
<exclusions> <exclusions>
<exclusion> <exclusion>
<groupId>org.springframework.boot</groupId> <groupId>org.springframework.boot</groupId>
@ -124,7 +127,7 @@
<dependency> <dependency>
<groupId>com.alibaba</groupId> <groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId> <artifactId>fastjson</artifactId>
<version>1.2.68</version>
<version>1.2.83</version>
</dependency> </dependency>
<dependency> <dependency>
@ -133,6 +136,12 @@
<version>1.16.20</version> <version>1.16.20</version>
</dependency> </dependency>
<!-- <dependency>-->
<!-- <groupId>mysql</groupId>-->
<!-- <artifactId>mysql-connector-java</artifactId>-->
<!-- <version>5.1.29</version>-->
<!-- </dependency>-->
<dependency> <dependency>
<groupId>mysql</groupId> <groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId> <artifactId>mysql-connector-java</artifactId>
@ -166,7 +175,7 @@
<dependency> <dependency>
<groupId>com.fasterxml.jackson.core</groupId> <groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId> <artifactId>jackson-databind</artifactId>
<version>2.9.6</version>
<version>2.9.9.2</version>
</dependency> </dependency>
<!--<dependency>--> <!--<dependency>-->
<!--<groupId>com.fasterxml.jackson.core</groupId>--> <!--<groupId>com.fasterxml.jackson.core</groupId>-->
@ -237,16 +246,16 @@
<version>25.0-jre</version> <version>25.0-jre</version>
</dependency> </dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>4.1.0</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>4.1.0</version>
</dependency>
<!-- <dependency>-->
<!-- <groupId>org.apache.poi</groupId>-->
<!-- <artifactId>poi</artifactId>-->
<!-- <version>4.1.0</version>-->
<!-- </dependency>-->
<!-- <dependency>-->
<!-- <groupId>org.apache.poi</groupId>-->
<!-- <artifactId>poi-ooxml</artifactId>-->
<!-- <version>4.1.0</version>-->
<!-- </dependency>-->
<dependency> <dependency>
@ -254,19 +263,6 @@
<artifactId>okhttp</artifactId> <artifactId>okhttp</artifactId>
<version>3.6.0</version> <version>3.6.0</version>
</dependency> </dependency>
<dependency>
<groupId>commons-lang</groupId>
<artifactId>commons-lang</artifactId>
<version>2.6</version>
<scope>compile</scope>
</dependency>
<!-- jsoup -->
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.10.2</version>
</dependency>
<!-- https://mvnrepository.com/artifact/it.sauronsoftware/jave --> <!-- https://mvnrepository.com/artifact/it.sauronsoftware/jave -->
<!--<dependency>--> <!--<dependency>-->
<!--<groupId>it.sauronsoftware</groupId>--> <!--<groupId>it.sauronsoftware</groupId>-->
@ -274,6 +270,12 @@
<!--<version>1.0.2</version>--> <!--<version>1.0.2</version>-->
<!--</dependency>--> <!--</dependency>-->
<!-- <dependency>-->
<!-- <groupId>org.springframework.boot</groupId>-->
<!-- <artifactId>spring-boot-configuration-processor</artifactId>-->
<!-- <optional>true</optional>-->
<!-- </dependency>-->
</dependencies> </dependencies>
<build> <build>

11
cl_search_api/src/main/java/com/bfd/mf/common/util/constants/ESConstant.java

@ -17,7 +17,6 @@
package com.bfd.mf.common.util.constants; package com.bfd.mf.common.util.constants;
import com.bfd.mf.common.util.utility.EsQueryConditionUtils; import com.bfd.mf.common.util.utility.EsQueryConditionUtils;
import com.bfd.mf.config.BFDApiConfig;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashMap; import java.util.HashMap;
@ -729,6 +728,7 @@ public class ESConstant {
ESConstant.AUTHOR, //用户名 ESConstant.AUTHOR, //用户名
ESConstant.AUTHORNICKNAME, // 用户昵称 ESConstant.AUTHORNICKNAME, // 用户昵称
ESConstant.AUTHORID, // 用户ID ESConstant.AUTHORID, // 用户ID
ESConstant.USER_URL,
ESConstant.DOC_ID ,// 主贴唯一ID ESConstant.DOC_ID ,// 主贴唯一ID
ESConstant.DATA_ID, // 数据唯一ID ESConstant.DATA_ID, // 数据唯一ID
@ -740,18 +740,23 @@ public class ESConstant {
ESConstant.CRAWLTIME , // 抓取时间 ESConstant.CRAWLTIME , // 抓取时间
ESConstant.PUBTIME , // 发表时间 ESConstant.PUBTIME , // 发表时间
ESConstant.PUBTIMESTR,
ESConstant.CRAWLTIMESTR,
ESConstant.QUOTE_COUNT , // 转发数 ESConstant.QUOTE_COUNT , // 转发数
ESConstant.COMMENTS_COUNT , // 评论数 ESConstant.COMMENTS_COUNT , // 评论数
ESConstant.ATTITUDES_COUNT , // 点赞数 ESConstant.ATTITUDES_COUNT , // 点赞数
ESConstant.PRICE, // 价格 ESConstant.PRICE, // 价格
ESConstant.POST_COUNT, // 销量 ESConstant.POST_COUNT, // 销量
ESConstant.COLLE_CTCOUNT, // 收藏数
ESConstant.VIEW_CNT, // 浏览数
ESConstant.TITLE ,// 标题 ESConstant.TITLE ,// 标题
ESConstant.CONTENT , // 正文 ESConstant.CONTENT , // 正文
ESConstant.TRANSLATETITLE, // 译文标题 ESConstant.TRANSLATETITLE, // 译文标题
ESConstant.TRANSLATECONTENT, // 译文正文 ESConstant.TRANSLATECONTENT, // 译文正文
ESConstant.FORWARD_CONTENT, // 正文源码
/* ESConstant.FORWARD_CONTENT, // 正文源码*/
ESConstant.SYS_SENTIMENT, // 情感 ESConstant.SYS_SENTIMENT, // 情感
ESConstant.PRIMARY, // 数据类型 ESConstant.PRIMARY, // 数据类型
@ -818,7 +823,7 @@ public class ESConstant {
ESConstant.ISDOWNLOAD, // 是否下载 ESConstant.ISDOWNLOAD, // 是否下载
ESConstant.FILEPATH, // 文件路径 ESConstant.FILEPATH, // 文件路径
ESConstant.IMAGEPATH, // 图片路径
ESConstant.IMAGEPATH, // 片路径
ESConstant.VIDEOPATH, // 视频路径 ESConstant.VIDEOPATH, // 视频路径
ESConstant.FILEPATHSIZE, // 文件详细路径 ESConstant.FILEPATHSIZE, // 文件详细路径
ESConstant.IMAGEPATHSIZE, // 图片详细路径 ESConstant.IMAGEPATHSIZE, // 图片详细路径

3
cl_search_api/src/main/java/com/bfd/mf/common/util/es/EsUtils.java

@ -287,7 +287,7 @@ public abstract class EsUtils {
private static void getHighlightResult(String fieldName, SearchHit hit, JSONObject data) { private static void getHighlightResult(String fieldName, SearchHit hit, JSONObject data) {
if (hit.getHighlightFields().containsKey(fieldName)) { if (hit.getHighlightFields().containsKey(fieldName)) {
HighlightField highlightField = hit.getHighlightFields().get(fieldName); HighlightField highlightField = hit.getHighlightFields().get(fieldName);
System.out.println("getHighlightResult highlightField : "+highlightField);
System.out.println("getHighlightResult highlightField : " + highlightField);
Text[] fragments = highlightField.fragments(); Text[] fragments = highlightField.fragments();
String fragmentString = ""; String fragmentString = "";
for (Text fragment : fragments) { for (Text fragment : fragments) {
@ -511,6 +511,7 @@ public abstract class EsUtils {
.setQuery(boolQueryBuilder) .setQuery(boolQueryBuilder)
.setSearchType(SearchType.DEFAULT) .setSearchType(SearchType.DEFAULT)
.setSize(limit) .setSize(limit)
//.setFetchSource(ESConstant.FIELD_DATA, null)
.setScroll(new TimeValue(300000)) .setScroll(new TimeValue(300000))
.execute() .execute()
.actionGet();//注意:首次搜索并不包含数据 .actionGet();//注意:首次搜索并不包含数据

3
cl_search_api/src/main/java/com/bfd/mf/service/SearchAnalysisService.java

@ -41,7 +41,8 @@ public class SearchAnalysisService {
JSONObject jsonObject = new JSONObject(); JSONObject jsonObject = new JSONObject();
try{ try{
if(null != queryRequest.getSubjectId()) { if(null != queryRequest.getSubjectId()) {
List<ESMonitorEntity> esMonitorEntity = sliceScrollUtil.fetchResultSubjectCache(queryRequest, ESConstant.FIELD_LIST_ANALYSIS);
List<ESMonitorEntity> esMonitorEntity = sliceScrollUtil.fetchResultSubjectCache(queryRequest,
ESConstant.FIELD_LIST_ANALYSIS);
// 渠道走势 // 渠道走势
jsonObject = dataAnalysisTrendByDayQueryTimes(queryRequest, esMonitorEntity); jsonObject = dataAnalysisTrendByDayQueryTimes(queryRequest, esMonitorEntity);
// 获取 渠道统计结果 分类标签统计结果 价值标签统计结果 // 获取 渠道统计结果 分类标签统计结果 价值标签统计结果

6
cl_search_api/src/main/java/com/bfd/mf/service/SearchDataService.java

@ -577,16 +577,18 @@ public class SearchDataService extends CrudService<SentimentModify, SentimentRep
// System.out.println("--------------" + url); // System.out.println("--------------" + url);
try { try {
// todo 微信的页面源码太大了影响导出所以导出微信的时候就把这块注释了
String forwardContent = sourceAsMap.get("forwardContent").toString(); String forwardContent = sourceAsMap.get("forwardContent").toString();
String forContent = forwardContent; String forContent = forwardContent;
// if(forwardContent.contains("</title>")){ // if(forwardContent.contains("</title>")){
// Document doc = Jsoup.parse(forwardContent); // Document doc = Jsoup.parse(forwardContent);
//String text = Jsoup.clean(forwardContent, Whitelist.basicWithImages()); //String text = Jsoup.clean(forwardContent, Whitelist.basicWithImages());
String text = Jsoup.clean(forwardContent, Whitelist.basic()); String text = Jsoup.clean(forwardContent, Whitelist.basic());
forContent = text; forContent = text;
// }
esMonitorEntity.setForwardContent(forContent); esMonitorEntity.setForwardContent(forContent);
esMonitorEntity.setForwardContent("");
esMonitorEntity.setReadCount(readCount); esMonitorEntity.setReadCount(readCount);
esMonitorEntity.setHasFile(hasFile); esMonitorEntity.setHasFile(hasFile);
esMonitorEntity.setHasVideo(hasVideo); esMonitorEntity.setHasVideo(hasVideo);

Loading…
Cancel
Save