Browse Source

2023-10-25

采集平台2.0版本
修改了一下pom中的一些组建的版本
release-1.0
jing.du 2 years ago
parent
commit
fbc814bc93
  1. 5
      cl_query_data_job/pom.xml
  2. 96
      cl_search_api/pom.xml
  3. 13
      cl_search_api/src/main/java/com/bfd/mf/common/util/constants/ESConstant.java
  4. 3
      cl_search_api/src/main/java/com/bfd/mf/common/util/es/EsUtils.java
  5. 3
      cl_search_api/src/main/java/com/bfd/mf/service/SearchAnalysisService.java
  6. 8
      cl_search_api/src/main/java/com/bfd/mf/service/SearchDataService.java

5
cl_query_data_job/pom.xml

@ -3,10 +3,11 @@
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<artifactId>cl_stream_3.2</artifactId>
<artifactId>cl_stream_3.3</artifactId>
<groupId>com.bfd.mf</groupId>
<version>3.2-SNAPSHOT</version>
<version>3.3-SNAPSHOT</version>
</parent>
<artifactId>cl_query_data_job</artifactId>

96
cl_search_api/pom.xml

@ -10,24 +10,27 @@
<version>3.3-SNAPSHOT</version>
</parent>
<!-- 这个是最新的线上的版本 -->
<name>cl_search_api</name>
<description>Search V3.3 API</description>
<artifactId>cl_search_api</artifactId>
<version>3.3.0-SNAPSHOT</version>
<properties>
<start-class>com.bfd.mf.SearchApplication</start-class>
<source>1.8</source>
<es.version>6.0.0</es.version>
<spring-boot-version>2.0.0.RELEASE</spring-boot-version>
<springframework.boot.version>2.0.0.RELEASE</springframework.boot.version>
<springframework.version>5.0.4.RELEASE</springframework.version>
<springframework.version>5.0.16.RELEASE</springframework.version>
<logstash.version>4.4</logstash.version>
<!-- <jna.version>4.1.0</jna.version>-->
<jna.version>4.1.0</jna.version>
<jetty.version>9.4.8.v20171121</jetty.version>
<druid.version>1.1.6</druid.version>
<guava.version>19.0</guava.version>
<poi.version>3.15</poi.version>
<poi.version>4.1.0</poi.version>
<java.version>1.8</java.version>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
@ -90,7 +93,7 @@
<dependency>
<groupId>org.mybatis.spring.boot</groupId>
<artifactId>mybatis-spring-boot-starter</artifactId>
<version>1.3.1</version>
<version>2.2.2</version>
<exclusions>
<exclusion>
<groupId>org.springframework.boot</groupId>
@ -124,7 +127,7 @@
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.68</version>
<version>1.2.83</version>
</dependency>
<dependency>
@ -133,6 +136,12 @@
<version>1.16.20</version>
</dependency>
<!-- <dependency>-->
<!-- <groupId>mysql</groupId>-->
<!-- <artifactId>mysql-connector-java</artifactId>-->
<!-- <version>5.1.29</version>-->
<!-- </dependency>-->
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
@ -166,22 +175,22 @@
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
<version>2.9.6</version>
<version>2.9.9.2</version>
</dependency>
<!--<dependency>-->
<!--<groupId>com.fasterxml.jackson.core</groupId>-->
<!--<artifactId>jackson-core</artifactId>-->
<!--<version>2.9.6</version>-->
<!--<groupId>com.fasterxml.jackson.core</groupId>-->
<!--<artifactId>jackson-core</artifactId>-->
<!--<version>2.9.6</version>-->
<!--</dependency>-->
<!--<dependency>-->
<!--<groupId>com.fasterxml.jackson.core</groupId>-->
<!--<artifactId>jackson-annotations</artifactId>-->
<!--<version>2.9.6</version>-->
<!--<groupId>com.fasterxml.jackson.core</groupId>-->
<!--<artifactId>jackson-annotations</artifactId>-->
<!--<version>2.9.6</version>-->
<!--</dependency>-->
<!--<dependency>-->
<!--<groupId>com.fasterxml.jackson.module</groupId>-->
<!--<artifactId>jackson-module-jaxb-annotations</artifactId>-->
<!--<version>2.9.6</version>-->
<!--<groupId>com.fasterxml.jackson.module</groupId>-->
<!--<artifactId>jackson-module-jaxb-annotations</artifactId>-->
<!--<version>2.9.6</version>-->
<!--</dependency>-->
<dependency>
<groupId>net.logstash.logback</groupId>
@ -199,9 +208,9 @@
<version>1.1.7</version>
</dependency>
<!--<dependency>-->
<!--<groupId>com.swagger.ui</groupId>-->
<!--<artifactId>swagger-bootstrap-ui</artifactId>-->
<!--<version>1.8.8</version>-->
<!--<groupId>com.swagger.ui</groupId>-->
<!--<artifactId>swagger-bootstrap-ui</artifactId>-->
<!--<version>1.8.8</version>-->
<!--</dependency>-->
<dependency>
<groupId>io.springfox</groupId>
@ -227,9 +236,9 @@
</dependency>
<!--这个很坑我 忘了引依赖-->
<!--<dependency>-->
<!--<groupId>com.github.xiaoymin</groupId>-->
<!--<artifactId>swagger-bootstrap-ui</artifactId>-->
<!--<version>2.9.2</version>-->
<!--<groupId>com.github.xiaoymin</groupId>-->
<!--<artifactId>swagger-bootstrap-ui</artifactId>-->
<!--<version>2.9.2</version>-->
<!--</dependency>-->
<dependency>
<groupId>com.google.guava</groupId>
@ -237,16 +246,16 @@
<version>25.0-jre</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>4.1.0</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>4.1.0</version>
</dependency>
<!-- <dependency>-->
<!-- <groupId>org.apache.poi</groupId>-->
<!-- <artifactId>poi</artifactId>-->
<!-- <version>4.1.0</version>-->
<!-- </dependency>-->
<!-- <dependency>-->
<!-- <groupId>org.apache.poi</groupId>-->
<!-- <artifactId>poi-ooxml</artifactId>-->
<!-- <version>4.1.0</version>-->
<!-- </dependency>-->
<dependency>
@ -254,26 +263,19 @@
<artifactId>okhttp</artifactId>
<version>3.6.0</version>
</dependency>
<dependency>
<groupId>commons-lang</groupId>
<artifactId>commons-lang</artifactId>
<version>2.6</version>
<scope>compile</scope>
</dependency>
<!-- jsoup -->
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.10.2</version>
</dependency>
<!-- https://mvnrepository.com/artifact/it.sauronsoftware/jave -->
<!--<dependency>-->
<!--<groupId>it.sauronsoftware</groupId>-->
<!--<artifactId>jave</artifactId>-->
<!--<version>1.0.2</version>-->
<!--<groupId>it.sauronsoftware</groupId>-->
<!--<artifactId>jave</artifactId>-->
<!--<version>1.0.2</version>-->
<!--</dependency>-->
<!-- <dependency>-->
<!-- <groupId>org.springframework.boot</groupId>-->
<!-- <artifactId>spring-boot-configuration-processor</artifactId>-->
<!-- <optional>true</optional>-->
<!-- </dependency>-->
</dependencies>
<build>

13
cl_search_api/src/main/java/com/bfd/mf/common/util/constants/ESConstant.java

@ -17,7 +17,6 @@
package com.bfd.mf.common.util.constants;
import com.bfd.mf.common.util.utility.EsQueryConditionUtils;
import com.bfd.mf.config.BFDApiConfig;
import java.util.ArrayList;
import java.util.HashMap;
@ -729,6 +728,7 @@ public class ESConstant {
ESConstant.AUTHOR, //用户名
ESConstant.AUTHORNICKNAME, // 用户昵称
ESConstant.AUTHORID, // 用户ID
ESConstant.USER_URL,
ESConstant.DOC_ID ,// 主贴唯一ID
ESConstant.DATA_ID, // 数据唯一ID
@ -740,18 +740,23 @@ public class ESConstant {
ESConstant.CRAWLTIME , // 抓取时间
ESConstant.PUBTIME , // 发表时间
ESConstant.PUBTIMESTR,
ESConstant.CRAWLTIMESTR,
ESConstant.QUOTE_COUNT , // 转发数
ESConstant.COMMENTS_COUNT , // 评论数
ESConstant.ATTITUDES_COUNT , // 点赞数
ESConstant.PRICE, // 价格
ESConstant.POST_COUNT, // 销量
ESConstant.COLLE_CTCOUNT, // 收藏数
ESConstant.VIEW_CNT, // 浏览数
ESConstant.TITLE ,// 标题
ESConstant.CONTENT , // 正文
ESConstant.TRANSLATETITLE, // 译文标题
ESConstant.TRANSLATECONTENT, // 译文正文
ESConstant.FORWARD_CONTENT, // 正文源码
/* ESConstant.FORWARD_CONTENT, // 正文源码*/
ESConstant.SYS_SENTIMENT, // 情感
ESConstant.PRIMARY, // 数据类型
@ -818,7 +823,7 @@ public class ESConstant {
ESConstant.ISDOWNLOAD, // 是否下载
ESConstant.FILEPATH, // 文件路径
ESConstant.IMAGEPATH, // 图片路径
ESConstant.IMAGEPATH, // 片路径
ESConstant.VIDEOPATH, // 视频路径
ESConstant.FILEPATHSIZE, // 文件详细路径
ESConstant.IMAGEPATHSIZE, // 图片详细路径
@ -843,7 +848,7 @@ public class ESConstant {
);
// 总体分析要用的字段
public static final String[] FIELD_LIST_ANALYSIS =
public static final String[] FIELD_LIST_ANALYSIS =
EsQueryConditionUtils.getIncludeQueryField(
ESConstant.PUBTIME
, ESConstant.CRAWLTIME

3
cl_search_api/src/main/java/com/bfd/mf/common/util/es/EsUtils.java

@ -287,7 +287,7 @@ public abstract class EsUtils {
private static void getHighlightResult(String fieldName, SearchHit hit, JSONObject data) {
if (hit.getHighlightFields().containsKey(fieldName)) {
HighlightField highlightField = hit.getHighlightFields().get(fieldName);
System.out.println("getHighlightResult highlightField : "+highlightField);
System.out.println("getHighlightResult highlightField : " + highlightField);
Text[] fragments = highlightField.fragments();
String fragmentString = "";
for (Text fragment : fragments) {
@ -511,6 +511,7 @@ public abstract class EsUtils {
.setQuery(boolQueryBuilder)
.setSearchType(SearchType.DEFAULT)
.setSize(limit)
//.setFetchSource(ESConstant.FIELD_DATA, null)
.setScroll(new TimeValue(300000))
.execute()
.actionGet();//注意:首次搜索并不包含数据

3
cl_search_api/src/main/java/com/bfd/mf/service/SearchAnalysisService.java

@ -41,7 +41,8 @@ public class SearchAnalysisService {
JSONObject jsonObject = new JSONObject();
try{
if(null != queryRequest.getSubjectId()) {
List<ESMonitorEntity> esMonitorEntity = sliceScrollUtil.fetchResultSubjectCache(queryRequest, ESConstant.FIELD_LIST_ANALYSIS);
List<ESMonitorEntity> esMonitorEntity = sliceScrollUtil.fetchResultSubjectCache(queryRequest,
ESConstant.FIELD_LIST_ANALYSIS);
// 渠道走势
jsonObject = dataAnalysisTrendByDayQueryTimes(queryRequest, esMonitorEntity);
// 获取 渠道统计结果 分类标签统计结果 价值标签统计结果

8
cl_search_api/src/main/java/com/bfd/mf/service/SearchDataService.java

@ -574,19 +574,21 @@ public class SearchDataService extends CrudService<SentimentModify, SentimentRep
System.out.println(JSONObject.toJSONString(highlight));
}
// System.out.println("--------------" + url);
// System.out.println("--------------" + url);
try {
// todo 微信的页面源码太大了影响导出所以导出微信的时候就把这块注释了
String forwardContent = sourceAsMap.get("forwardContent").toString();
String forContent = forwardContent;
// if(forwardContent.contains("</title>")){
// Document doc = Jsoup.parse(forwardContent);
//String text = Jsoup.clean(forwardContent, Whitelist.basicWithImages());
String text = Jsoup.clean(forwardContent, Whitelist.basic());
forContent = text;
// }
esMonitorEntity.setForwardContent(forContent);
esMonitorEntity.setForwardContent("");
esMonitorEntity.setReadCount(readCount);
esMonitorEntity.setHasFile(hasFile);
esMonitorEntity.setHasVideo(hasVideo);

Loading…
Cancel
Save