Browse Source

2023-09-11

采集平台2.0版本
导出的 location 字段添加了一下
release-1.0
jing.du 2 years ago
parent
commit
8fd98c05d7
  1. 7
      cl_query_data_job/pom.xml
  2. 175
      cl_query_data_job/src/main/java/com/bfd/mf/job/service/es/EsQueryMiniService.java
  3. 3
      cl_query_data_job/src/main/java/com/bfd/mf/job/service/statistics/StatisticsService.java
  4. 1
      cl_query_data_job/src/main/java/com/bfd/mf/job/service/taskCount/TaskCountService.java
  5. 40
      cl_query_data_job/src/main/resources/application.yml
  6. 15
      cl_search_api/pom.xml
  7. 21
      cl_search_api/src/main/java/com/bfd/mf/common/service/cache/TopicQueryService.java
  8. 201
      cl_search_api/src/main/java/com/bfd/mf/common/service/es/EsQueryAuthorService.java
  9. 55
      cl_search_api/src/main/java/com/bfd/mf/common/service/es/EsQueryServiceForSQMini.java
  10. 134
      cl_search_api/src/main/java/com/bfd/mf/common/service/es/GetQueryBuilder.java
  11. 5
      cl_search_api/src/main/java/com/bfd/mf/common/util/constants/ESConstant.java
  12. 12
      cl_search_api/src/main/java/com/bfd/mf/common/util/enums/BaseFieldEnum.java
  13. 2
      cl_search_api/src/main/java/com/bfd/mf/common/util/enums/SearchScopeEnum.java
  14. 657
      cl_search_api/src/main/java/com/bfd/mf/common/util/es/EsUtils.java
  15. 69
      cl_search_api/src/main/java/com/bfd/mf/common/web/vo/params/QueryRequest.java
  16. 18
      cl_search_api/src/main/java/com/bfd/mf/common/web/vo/view/monitor/ESMonitorBaseEntity.java
  17. 113
      cl_search_api/src/main/java/com/bfd/mf/controller/SearchDataController.java
  18. 161
      cl_search_api/src/main/java/com/bfd/mf/service/SearchDataService.java
  19. 4
      pom.xml

7
cl_query_data_job/pom.xml

@ -72,15 +72,10 @@
<version>19.0</version>
</dependency>
<!--<dependency>-->
<!--<groupId>com.alibaba</groupId>-->
<!--<artifactId>fastjson</artifactId>-->
<!--<version>1.2.6</version>-->
<!--</dependency>-->
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.60</version>
<version>1.2.68</version>
</dependency>

175
cl_query_data_job/src/main/java/com/bfd/mf/job/service/es/EsQueryMiniService.java

@ -3,7 +3,10 @@ package com.bfd.mf.job.service.es;
import com.bfd.mf.job.config.ESConstants;
import com.bfd.mf.job.domain.entity.Task;
import com.bfd.mf.job.util.EsUtils;
import org.elasticsearch.index.query.*;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.RangeQueryBuilder;
import org.elasticsearch.index.query.TermQueryBuilder;
import org.elasticsearch.search.aggregations.AggregationBuilder;
import org.elasticsearch.search.aggregations.bucket.terms.Terms;
import org.slf4j.Logger;
@ -22,21 +25,21 @@ public class EsQueryMiniService {
/**
* 统计 每个专题下每个渠道 的总量
*/
public Map<String,Long> getSubjectChannelStatistics(String clusterName,String indexName) {
public Map<String, Long> getSubjectChannelStatistics(String clusterName, String indexName) {
Map<String,Long> resultMap = new HashMap<>();
try{
Map<String, Long> resultMap = new HashMap<>();
try {
boolean isExists = EsUtils.indexExists(clusterName, indexName);
if (isExists) {
BoolQueryBuilder qb = QueryBuilders.boolQuery();
AggregationBuilder ab = EsUtils.getSubjectChannelAB(ESConstants.DOC_TYPE);
String indexNames [] = {indexName};
String indexNames[] = {indexName};
Terms result = EsUtils.queryTag(clusterName, indexNames, qb, ab, ESConstants.DOC_TYPE + "Tag");
resultMap = EsUtils.parseTerms(result);
}
}catch (Exception e){
} catch (Exception e) {
e.printStackTrace();
}
return resultMap;
@ -45,15 +48,15 @@ public class EsQueryMiniService {
/**
* 统计 每个专题下每个渠道 当天的增量
*/
public Map<String,Long> getSubjectChannelTodayStatistics(String clusterName,String indexName) {
Map<String,Long> resultMap = new HashMap<>();
try{
public Map<String, Long> getSubjectChannelTodayStatistics(String clusterName, String indexName) {
Map<String, Long> resultMap = new HashMap<>();
try {
boolean isExists = EsUtils.indexExists(clusterName, indexName);
if (isExists) {
BoolQueryBuilder qb = QueryBuilders.boolQuery();
long current=System.currentTimeMillis();
long zero=current/(1000*3600*24)*(1000*3600*24)-TimeZone.getDefault().getRawOffset();
long current = System.currentTimeMillis();
long zero = current / (1000 * 3600 * 24) * (1000 * 3600 * 24) - TimeZone.getDefault().getRawOffset();
Long startTime = new Timestamp(zero).getTime();
RangeQueryBuilder rangeQueryBuilder = QueryBuilders
.rangeQuery(ESConstants.CRAWLTIME)
@ -61,12 +64,12 @@ public class EsQueryMiniService {
.lt(current);
qb.must(rangeQueryBuilder);
AggregationBuilder ab = EsUtils.getSubjectChannelAB(ESConstants.DOC_TYPE);
String indexNames [] = {indexName};
String indexNames[] = {indexName};
Terms result = EsUtils.queryTag(clusterName, indexNames, qb, ab, ESConstants.DOC_TYPE + "Tag");
resultMap = EsUtils.parseTerms(result);
}
}catch (Exception e){
} catch (Exception e) {
e.printStackTrace();
}
return resultMap;
@ -75,19 +78,19 @@ public class EsQueryMiniService {
/**
* 统计 每个专题下crawlDataFlag 三种类型当天的总量
*/
public Map<String,Long> getSubjectCrawlDataFlagStatistics(String clusterName, String indexName) {
Map<String,Long> resultMap = new HashMap<>();
try{
public Map<String, Long> getSubjectCrawlDataFlagStatistics(String clusterName, String indexName) {
Map<String, Long> resultMap = new HashMap<>();
try {
boolean isExists = EsUtils.indexExists(clusterName, indexName);
if (isExists) {
BoolQueryBuilder qb = QueryBuilders.boolQuery();
AggregationBuilder ab = EsUtils.getSubjectChannelAB(ESConstants.CRAWLDATAFLAG);
String indexNames [] = {indexName};
String indexNames[] = {indexName};
Terms result = EsUtils.queryTag(clusterName, indexNames, qb, ab, ESConstants.CRAWLDATAFLAG + "Tag");
Map<String,Long> termsMap = EsUtils.parseTerms(result);
Map<String, Long> termsMap = EsUtils.parseTerms(result);
resultMap = EsUtils.getResultMap(termsMap);
}
}catch (Exception e){
} catch (Exception e) {
e.printStackTrace();
}
return resultMap;
@ -96,15 +99,15 @@ public class EsQueryMiniService {
/**
* 统计 每个专题下crawlDataFlag 三种类型 的增量
*/
public Map<String,Long> getSubjectCrawlDataFlagTodayStatistics(String clusterName, String indexName) {
Map<String,Long> resultMap = new HashMap<>();
try{
public Map<String, Long> getSubjectCrawlDataFlagTodayStatistics(String clusterName, String indexName) {
Map<String, Long> resultMap = new HashMap<>();
try {
boolean isExists = EsUtils.indexExists(clusterName, indexName);
if (isExists) {
BoolQueryBuilder qb = QueryBuilders.boolQuery();
long current=System.currentTimeMillis();
long zero=current/(1000*3600*24)*(1000*3600*24)-TimeZone.getDefault().getRawOffset();
long current = System.currentTimeMillis();
long zero = current / (1000 * 3600 * 24) * (1000 * 3600 * 24) - TimeZone.getDefault().getRawOffset();
Long startTime = new Timestamp(zero).getTime();
RangeQueryBuilder rangeQueryBuilder = QueryBuilders
.rangeQuery(ESConstants.CRAWLTIME)
@ -112,13 +115,13 @@ public class EsQueryMiniService {
.lt(current);
qb.must(rangeQueryBuilder);
AggregationBuilder ab = EsUtils.getSubjectChannelAB(ESConstants.CRAWLDATAFLAG);
String indexNames [] = {indexName};
String indexNames[] = {indexName};
Terms result = EsUtils.queryTag(clusterName, indexNames, qb, ab, ESConstants.CRAWLDATAFLAG + "Tag");
Map<String,Long> termsMap = EsUtils.parseTerms(result);
Map<String, Long> termsMap = EsUtils.parseTerms(result);
resultMap = EsUtils.getResultMap(termsMap);
}
}catch (Exception e){
} catch (Exception e) {
e.printStackTrace();
}
return resultMap;
@ -126,12 +129,12 @@ public class EsQueryMiniService {
/**
* 查询每个任务 的总量和当天的量 以及 包含图片的量包含视频的量包含附件的量
* 查询每个任务 的总量和当天的量 以及 包含图片的量包含视频的量包含附件的量
*/
public Map<String,Long> getTaskCount(String clusterName,Long taskId, Task task,String crawlDataFlag,String indexNamePre) {
Map<String,Long> countMap = new HashMap<>();
String indexName = indexNamePre + task.getSubjectId();//subject_id
if(null != task.getCid()) {
public Map<String, Long> getTaskCount(String clusterName, Long taskId, Task task, String crawlDataFlag, String indexNamePre) {
Map<String, Long> countMap = new HashMap<>();
String indexName = indexNamePre + task.getSubjectId();//subject_id
if (null != task.getCid()) {
String cid = task.getCid().toLowerCase();
Long crawlStartTime = task.getCrawlStartTime().longValue();
Long crawlEndTime = task.getCrawlEndTime().longValue();
@ -139,8 +142,9 @@ public class EsQueryMiniService {
if (indexName.contains(indexNamePre)) {
boolean isExists = EsUtils.indexExists(clusterName, indexName);
if (isExists) {
BoolQueryBuilder qb = getQueryBuilder(cid,crawlDataFlag,crawlStartTime,crawlEndTime);
logger.info("QB1 查询总量: indexName: {}. taskId : {}.{\"query\": {}}.", indexName, taskId, qb.toString().replace("\n", "").replace("\r", "").replace(" ", ""));
BoolQueryBuilder qb = getQueryBuilder(cid, crawlDataFlag, crawlStartTime, crawlEndTime);
logger.info("QB1 查询总量: indexName: {}. taskId : {}.{\"query\": {}}.", indexName, taskId, qb.toString()
.replace("\n", "").replace("\r", "").replace(" ", ""));
Long count = EsUtils.queryCount(clusterName, indexName, qb);
countMap.put("totalCount", count);
@ -158,26 +162,26 @@ public class EsQueryMiniService {
// 查询包含图片的数据的量
//videoPath == egc filePath == ugc imagePath == pgc
TermQueryBuilder pgcTermQueryBuilder = QueryBuilders.termQuery(ESConstants.PGC,1);
TermQueryBuilder egcTermQueryBuilder = QueryBuilders.termQuery(ESConstants.EGC,1);
TermQueryBuilder ugcTermQueryBuilder = QueryBuilders.termQuery(ESConstants.UGC,1);
TermQueryBuilder textTermQueryBuilder = QueryBuilders.termQuery(ESConstants.ISDOWNLOAD,false);
qb = getQueryBuilder(cid,crawlDataFlag,crawlStartTime,crawlEndTime);
TermQueryBuilder pgcTermQueryBuilder = QueryBuilders.termQuery(ESConstants.PGC, 1);
TermQueryBuilder egcTermQueryBuilder = QueryBuilders.termQuery(ESConstants.EGC, 1);
TermQueryBuilder ugcTermQueryBuilder = QueryBuilders.termQuery(ESConstants.UGC, 1);
TermQueryBuilder textTermQueryBuilder = QueryBuilders.termQuery(ESConstants.ISDOWNLOAD, false);
qb = getQueryBuilder(cid, crawlDataFlag, crawlStartTime, crawlEndTime);
qb.must(pgcTermQueryBuilder);
logger.info("QB3 查询有图片的任务数: indexName: {}. taskId : {}.{\"query\": {}}.", indexName, taskId, qb.toString().replace("\n", "").replace("\r", "").replace(" ", ""));
Long imageCount = EsUtils.queryCount(clusterName, indexName, qb);
countMap.put(ESConstants.IMAGECOUNT, imageCount);
qb = getQueryBuilder(cid,crawlDataFlag,crawlStartTime,crawlEndTime);
qb = getQueryBuilder(cid, crawlDataFlag, crawlStartTime, crawlEndTime);
qb.must(egcTermQueryBuilder);
logger.info("QB4 查询有视频的任务数: indexName: {}. taskId : {}.{\"query\": {}}.", indexName, taskId, qb.toString().replace("\n", "").replace("\r", "").replace(" ", ""));
Long videoCount = EsUtils.queryCount(clusterName, indexName, qb);
countMap.put(ESConstants.VIDEOCOUNT, videoCount);
qb = getQueryBuilder(cid,crawlDataFlag,crawlStartTime,crawlEndTime);
qb = getQueryBuilder(cid, crawlDataFlag, crawlStartTime, crawlEndTime);
qb.must(ugcTermQueryBuilder);
logger.info("QB5 查询有文件的任务数: indexName: {}. taskId : {}.{\"query\": {}}.", indexName, taskId, qb.toString().replace("\n", "").replace("\r", "").replace(" ", ""));
Long fileCount = EsUtils.queryCount(clusterName, indexName, qb);
countMap.put(ESConstants.FILECOUNT, fileCount);
qb = getQueryBuilder(cid,crawlDataFlag,crawlStartTime,crawlEndTime);
qb = getQueryBuilder(cid, crawlDataFlag, crawlStartTime, crawlEndTime);
qb.must(textTermQueryBuilder);
logger.info("QB6 查询纯文本的任务数: indexName: {}. taskId : {}.{\"query\": {}}.", indexName, taskId, qb.toString().replace("\n", "").replace("\r", "").replace(" ", ""));
Long textCount = EsUtils.queryCount(clusterName, indexName, qb);
@ -200,10 +204,93 @@ public class EsQueryMiniService {
BoolQueryBuilder shouldbq = QueryBuilders.boolQuery();
RangeQueryBuilder rangeQueryBuilder = QueryBuilders
.rangeQuery(ESConstants.PUBTIME).gte(crawlStartTime).lt(crawlEndTime);
TermQueryBuilder primary2 = QueryBuilders.termQuery(ESConstants.PRIMARY,2);
TermQueryBuilder primary2 = QueryBuilders.termQuery(ESConstants.PRIMARY, 2);
shouldbq.must(rangeQueryBuilder).mustNot(primary2);
// 不用统计FB 的这种粉丝的量
TermQueryBuilder pageTypeQueryBuilder = QueryBuilders.termQuery(ESConstants.PAGETYPR,"socialFans");
TermQueryBuilder pageTypeQueryBuilder = QueryBuilders.termQuery(ESConstants.PAGETYPR, "socialFans");
qb.mustNot(pageTypeQueryBuilder).should(shouldbq);
return qb;
}
public Map<String, Long> getTaskCountNew(String clusterName, Long taskId, Task task, String indexNamePre) {
Map<String, Long> countMap = new HashMap<>();
String indexName = indexNamePre + task.getSubjectId();//subject_id
String taskIdString = taskId.toString();
if (null != task.getCid()) {
String cid = task.getCid().toLowerCase();
Long crawlStartTime = task.getCrawlStartTime().longValue();
Long crawlEndTime = task.getCrawlEndTime().longValue();
// String crawlDataFlag =task.getCrawlDataFlag();
if (indexName.contains(indexNamePre)) {
boolean isExists = EsUtils.indexExists(clusterName, indexName);
if (isExists) {
BoolQueryBuilder qb = getQueryBuilderNew(taskIdString, crawlStartTime, crawlEndTime);
logger.info("QB1 查询总量: indexName: {}. taskId : {}.{\"query\": {}}.", indexName, taskId, qb.toString()
.replace("\n", "").replace("\r", "").replace(" ", ""));
Long count = EsUtils.queryCount(clusterName, indexName, qb);
countMap.put("totalCount", count);
// 上面的语句是查询 该任务的 总数据量totalCount下面的语句是查询 该任务当天的数据量todayCount
long current = System.currentTimeMillis();
long zero = current / (1000 * 3600 * 24) * (1000 * 3600 * 24) - TimeZone.getDefault().getRawOffset();
Long startTime = new Timestamp(zero).getTime();
RangeQueryBuilder rangeQueryBuilder2 = QueryBuilders
.rangeQuery(ESConstants.CRAWLTIME)
.gte(startTime).lt(current);
qb.must(rangeQueryBuilder2);
logger.info("QB2 查询今日总量: indexName: {}. taskId : {}.{\"query\": {}}.", indexName, taskId, qb.toString().replace("\n", "").replace("\r", "").replace(" ", ""));
Long todayCount = EsUtils.queryCount(clusterName, indexName, qb);
countMap.put("todayCount", todayCount);
// 查询包含图片的数据的量
//videoPath == egc filePath == ugc imagePath == pgc
TermQueryBuilder pgcTermQueryBuilder = QueryBuilders.termQuery(ESConstants.PGC, 1);
TermQueryBuilder egcTermQueryBuilder = QueryBuilders.termQuery(ESConstants.EGC, 1);
TermQueryBuilder ugcTermQueryBuilder = QueryBuilders.termQuery(ESConstants.UGC, 1);
TermQueryBuilder textTermQueryBuilder = QueryBuilders.termQuery(ESConstants.ISDOWNLOAD, false);
qb = getQueryBuilderNew(taskIdString, crawlStartTime, crawlEndTime);
qb.must(pgcTermQueryBuilder);
logger.info("QB3 查询有图片的任务数: indexName: {}. taskId : {}.{\"query\": {}}.", indexName, taskId, qb.toString().replace("\n", "").replace("\r", "").replace(" ", ""));
Long imageCount = EsUtils.queryCount(clusterName, indexName, qb);
countMap.put(ESConstants.IMAGECOUNT, imageCount);
qb = getQueryBuilderNew(taskIdString, crawlStartTime, crawlEndTime);
qb.must(egcTermQueryBuilder);
logger.info("QB4 查询有视频的任务数: indexName: {}. taskId : {}.{\"query\": {}}.", indexName, taskId, qb.toString().replace("\n", "").replace("\r", "").replace(" ", ""));
Long videoCount = EsUtils.queryCount(clusterName, indexName, qb);
countMap.put(ESConstants.VIDEOCOUNT, videoCount);
qb = getQueryBuilderNew(taskIdString, crawlStartTime, crawlEndTime);
qb.must(ugcTermQueryBuilder);
logger.info("QB5 查询有文件的任务数: indexName: {}. taskId : {}.{\"query\": {}}.", indexName, taskId, qb.toString().replace("\n", "").replace("\r", "").replace(" ", ""));
Long fileCount = EsUtils.queryCount(clusterName, indexName, qb);
countMap.put(ESConstants.FILECOUNT, fileCount);
qb = getQueryBuilderNew(taskIdString, crawlStartTime, crawlEndTime);
qb.must(textTermQueryBuilder);
logger.info("QB6 查询纯文本的任务数: indexName: {}. taskId : {}.{\"query\": {}}.", indexName, taskId, qb.toString().replace("\n", "").replace("\r", "").replace(" ", ""));
Long textCount = EsUtils.queryCount(clusterName, indexName, qb);
countMap.put(ESConstants.TEXTCOUNT, textCount);
logger.info("含图片的数据量:" + imageCount + " ; 含视频的数据量:" + videoCount + " ; 含文件的数据量:" + fileCount + " ; 纯文本的数据量:" + textCount);
}
}
}
return countMap;
}
private BoolQueryBuilder getQueryBuilderNew(String taskId, Long crawlStartTime, Long crawlEndTime) {
System.out.println("要统计的任务ID: " + taskId);
BoolQueryBuilder qb = QueryBuilders.boolQuery();
// 任务ID 筛选
//TermQueryBuilder cidTermQueryBuilder = QueryBuilders.termQuery(ESConstants.EN_SOURCE + ".keyword", cid);
TermQueryBuilder taskIdTermQueryBuilder = QueryBuilders.termQuery(ESConstants.TASKID, taskId);
qb.must(taskIdTermQueryBuilder);
// 时间范围筛选 只有主贴评论需要查时间用户不需要设置时间范围
BoolQueryBuilder shouldbq = QueryBuilders.boolQuery();
RangeQueryBuilder rangeQueryBuilder = QueryBuilders
.rangeQuery(ESConstants.PUBTIME).gte(crawlStartTime).lt(crawlEndTime);
TermQueryBuilder primary2 = QueryBuilders.termQuery(ESConstants.PRIMARY, 2);
shouldbq.must(rangeQueryBuilder).mustNot(primary2);
// 不用统计FB 的这种粉丝的量
TermQueryBuilder pageTypeQueryBuilder = QueryBuilders.termQuery(ESConstants.PAGETYPR, "socialFans");
qb.mustNot(pageTypeQueryBuilder).should(shouldbq);
return qb;
}

3
cl_query_data_job/src/main/java/com/bfd/mf/job/service/statistics/StatisticsService.java

@ -251,6 +251,8 @@ public class StatisticsService {
if(null != task.getCid() && !task.getCid().equals("test")) {
// 获取任务数量
countMap = esQueryMiniService.getTaskCount(miniName, taskId, task, crawlDataFlag, indexNamePre);
countMap = esQueryMiniService.getTaskCountNew(miniName, taskId, task, indexNamePre);
// 直接更新 cl_task 表中的 data_total today_data_total
long totalCount = 0L;
long todayCount = 0L;
@ -267,6 +269,7 @@ public class StatisticsService {
fileCount = countMap.get(ESConstants.FILECOUNT);
textCount = countMap.get(ESConstants.TEXTCOUNT);
}
// taskRepository.updateTaskCount(taskId,totalCount,todayCount);
taskRepository.updateTaskCountAll(taskId,totalCount,todayCount,imageCount,videoCount,fileCount,textCount);
}

1
cl_query_data_job/src/main/java/com/bfd/mf/job/service/taskCount/TaskCountService.java

@ -12,7 +12,6 @@ import com.bfd.mf.job.service.es.EsQueryNormalService;
import com.bfd.mf.job.service.statistics.TotalCountService;
import com.bfd.mf.job.util.DateUtil;
import com.bfd.mf.job.util.EsUtils;
import kafka.utils.Json;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;

40
cl_query_data_job/src/main/resources/application.yml

@ -3,22 +3,14 @@ debug: false
logging:
level:
com.bfd.mf: debug
#spring:
# datasource:
# driver-class-name: com.mysql.jdbc.Driver
# username: root
# password: bfd123
# url: jdbc:mysql://172.26.11.113:3306/intelligent_crawl?useOldAliasMetadataBehavior=true&characterEncoding=UTF-8&zeroDateTimeBehavior=round
# hikari:
# maximum-pool-size: 10
# minimum-idle: 1
spring:
datasource:
driver-class-name: com.mysql.jdbc.Driver
username: crawl
password: D5HLOvk553DUNV62qJI=
url: jdbc:mysql://172.18.1.134:3306/all_task?useOldAliasMetadataBehavior=true&characterEncoding=UTF-8&zeroDateTimeBehavior=round
hikari:
driver-class-name: com.mysql.cj.jdbc.Driver
username: crawl666
password: lx2a4jN1xFT96kj20LU=
url: jdbc:mysql://172.18.1.134:3306/intelligent_crawl?useSSL=true&useUnicode=true&characterEncoding=UTF-8&serverTimezone=UTC
hikari:
maximum-pool-size: 10
minimum-idle: 1
@ -29,17 +21,17 @@ worker:
test-task-id: 180
## 数据默认要写的 kafka
broker-list: 172.18.1.113:9092
send-topic : databasestokafka
send-topic: databasestokafka
analysis-topic:
- sq_topic_cl_query_analysis_1
- sq_topic_cl_query_analysis_1
analysis-group: sq_group_cl_analysis_1
## 服务的状态,true 为启动
enable-analysis-producer: false # 查ES写kafka
enable-analysis-consumer: false # 读kafka写ES
enable-statistics-producer: false # 统计 taskCount 和 subjectCount (采集平台)
enable-statistics-producer: true # 统计 taskCount 和 subjectCount (采集平台)
enable-query-producer: false # 离线拉数(采集平台)
enable-high-frequency-producer: true # 高频离线拉数(采集平台)
enable-high-frequency-producer: false # 高频离线拉数(采集平台)
enable-backtrace-producer: false # 欧莱雅查数(采集平台,欧莱雅项目独用)
enable-rw-oly-producer: false # 欧莱雅数据导出,暂时不用
enable-up-load-producer: false # 上传(采集平台)
@ -63,16 +55,16 @@ worker:
query-data-year-starttime: 1546272000000
rule-rest: http://rule.sq.baifendian.com/data_match/content/
comment-rest: http://rule.sq.baifendian.com/reputation/addReputationTask
comment-rest: http://rule.sq.baifendian.com/reputation/addReputationTask
rule-rest-concurrency: 500
content-limit: 2000
failure-upper: 2000
goFastPostUrl : http://172.18.1.113:8080/upload
goFastDomain : http://172.18.1.113:8080
uploadOLYExcelPath : /opt/nfsdata/excelTask/
uploadZipPath : /opt/nfsdata/uploadFiles/
indexNamePre : cl_major_
goFastPostUrl: http://172.18.1.113:8080/upload
goFastDomain: http://172.18.1.113:8080
uploadOLYExcelPath: /opt/nfsdata/excelTask/
uploadZipPath: /opt/nfsdata/uploadFiles/
indexNamePre: cl_major_
es-normal:
name: SQ_Normal_new

15
cl_search_api/pom.xml

@ -5,15 +5,15 @@
<modelVersion>4.0.0</modelVersion>
<parent>
<artifactId>cl_stream_3.2</artifactId>
<artifactId>cl_stream_3.3</artifactId>
<groupId>com.bfd.mf</groupId>
<version>3.2-SNAPSHOT</version>
<version>3.3-SNAPSHOT</version>
</parent>
<name>cl_search_api</name>
<description>Search V3.2 API</description>
<description>Search V3.3 API</description>
<artifactId>cl_search_api</artifactId>
<version>3.2.7-SNAPSHOT</version>
<version>3.3.0-SNAPSHOT</version>
<properties>
<start-class>com.bfd.mf.SearchApplication</start-class>
@ -260,6 +260,13 @@
<version>2.6</version>
<scope>compile</scope>
</dependency>
<!-- jsoup -->
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.10.2</version>
</dependency>
<!-- https://mvnrepository.com/artifact/it.sauronsoftware/jave -->
<!--<dependency>-->
<!--<groupId>it.sauronsoftware</groupId>-->

21
cl_search_api/src/main/java/com/bfd/mf/common/service/cache/TopicQueryService.java

@ -4,16 +4,15 @@ package com.bfd.mf.common.service.cache;
import com.bfd.mf.common.service.es.EsCommonService;
import com.bfd.mf.common.service.es.ParseSearchScopeService;
import com.bfd.mf.common.util.constants.ESConstant;
import com.bfd.mf.common.web.entity.mysql.topic.Task;
import com.bfd.mf.common.web.repository.mysql.base.SiteRepository;
import com.bfd.mf.common.web.repository.mysql.topic.TaskRepository;
import com.bfd.mf.common.web.vo.params.QueryRequest;
import com.bfd.nlp.common.util.object.TObjectUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
@ -25,7 +24,7 @@ import java.util.stream.Collectors;
@Service
public class TopicQueryService {
private static Logger logger = LoggerFactory.getLogger(TopicQueryService.class);
private static Logger logger = LoggerFactory.getLogger(TopicQueryService.class);
@Autowired
private EsCommonService esCommonService;
@Autowired
@ -70,7 +69,9 @@ public class TopicQueryService {
} else {
List<String> areaList = siteRepository.findCidsByArea(queryRequest.getSearchArea());
List lowCaseAreaList = areaList.stream().map(String::toLowerCase).collect(Collectors.toList());
// boolQuery.must(QueryBuilders.termsQuery(ESConstant.EN_SOURCE, lowCaseAreaList));
if (lowCaseAreaList.size() > 0) {
boolQuery.must(QueryBuilders.termsQuery(ESConstant.EN_SOURCE, lowCaseAreaList));
}
// String searchArea = getSearchArea(queryRequest.getSearchArea());
// boolQuery.must(QueryBuilders.termQuery(ESConstant.AREA, searchArea));
}
@ -101,16 +102,16 @@ public class TopicQueryService {
* 2023-04-24
* 采集平台2.0 版本可以选中多个任务进行查询
*/
if(null == queryRequest.getTaskIds()){
if (null == queryRequest.getTaskIds()) {
logger.info("[TopicQueryService] queryByConditions_v1 没有任务ID,查询专题下全部任务");
}else {
} else {
List<Long> taskIds = queryRequest.getTaskIds();
boolQuery.must(QueryBuilders.termsQuery(ESConstant.TASK_ID, taskIds));
if (taskIds.size() > 0) {
boolQuery.must(QueryBuilders.termsQuery(ESConstant.TASK_ID, taskIds));
}
}
if (null == cid || ("").equals(cid) || ("test").equals(cid)) {
logger.info("[TopicQueryService] queryByConditions_v1 查询全部站点");
} else {

201
cl_search_api/src/main/java/com/bfd/mf/common/service/es/EsQueryAuthorService.java

@ -8,9 +8,7 @@ import com.bfd.mf.common.web.repository.mysql.base.SiteRepository;
import com.bfd.mf.common.web.vo.params.QueryRequest;
import com.bfd.mf.config.BFDApiConfig;
import com.bfd.mf.service.SearchAuthorService;
import com.bfd.nlp.common.util.string.TStringUtils;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.MatchPhraseQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.slf4j.Logger;
@ -35,58 +33,59 @@ public class EsQueryAuthorService {
@Autowired
private SiteRepository siteRepository;
private String clusterName ="";
private String clusterName = "";
@PostConstruct
public void init() {
// 注册数据查询来源
clusterName = bfdApiConfig.esMiniName();
String sourceAddress [] = bfdApiConfig.esMiniAddress();
String sourceAddress[] = bfdApiConfig.esMiniAddress();
EsUtils.registerCluster(clusterName, sourceAddress);// 配置文件中的 es-source
}
public List<JSONObject> queryAuthorListByKeyword(String[] indexName, QueryRequest queryRequest) {
try{
try {
BoolQueryBuilder boolQueryBuilder = null;
logger.debug("[EsQueryAuthorService] queryAuthorListByKeyword ...");
Integer limit = queryRequest.getLimit(); //每页的数量
Integer start = (queryRequest.getPage() - 1) * limit; //起始页(0,20,40....)
String orderFlag = "desc";
if(!queryRequest.getOrder().equals("")) {
if (!queryRequest.getOrder().equals("")) {
queryRequest.getOrder(); // 排序方式 asc/desc
}
String sortFlag = "pubTime";
if(!queryRequest.getSidx().equals("")) {
if (!queryRequest.getSidx().equals("")) {
queryRequest.getSidx(); // 排序字段
}
boolQueryBuilder = getQueryBuilder(queryRequest);
boolQueryBuilder = getQueryBuilderNew(queryRequest);
Integer searchType = queryRequest.getSearchType();
logger.info("[EsQueryAuthorService] queryAuthorListByKeyword indexName = " + indexName[0] + "; qb: \n {}.", boolQueryBuilder.toString());
List<JSONObject> result = EsUtils.query(clusterName, indexName, boolQueryBuilder, sortFlag, orderFlag, limit, start,searchType);
List<Map<String,Object>> site = siteRepository.findsiteByDel(0);
Map<String,String> siteIdsMap = new HashMap<>();
Map<String,String> siteIconMap = new HashMap<>();
for (Map<String,Object> map: site) {
if(map.containsKey("site_id")) {
List<JSONObject> result = EsUtils.query(clusterName, indexName, boolQueryBuilder, sortFlag, orderFlag, limit, start, searchType);
List<Map<String, Object>> site = siteRepository.findsiteByDel(0);
Map<String, String> siteIdsMap = new HashMap<>();
Map<String, String> siteIconMap = new HashMap<>();
for (Map<String, Object> map : site) {
if (map.containsKey("site_id")) {
siteIdsMap.put(map.get("cid").toString().toLowerCase(), map.get("site_id").toString());
}
if(map.containsKey("site_icon")) {
if (map.containsKey("site_icon")) {
siteIconMap.put(map.get("cid").toString().toLowerCase(), map.get("site_icon").toString());
}
}
List<JSONObject> newResult = new ArrayList<>();
for (JSONObject json: result) {
JSONObject newJson= json;
for (JSONObject json : result) {
JSONObject newJson = json;
String enSource = json.getString("enSource");
String siteId = siteIdsMap.get(enSource);
String icon = siteIdsMap.get(enSource);
newJson.put("siteId",siteId);
newJson.put("icon",icon);
newJson.put("siteId", siteId);
newJson.put("icon", icon);
newResult.add(newJson);
}
return result;
}catch (Exception e){
} catch (Exception e) {
e.printStackTrace();
return new ArrayList<>();
}
@ -96,41 +95,123 @@ public class EsQueryAuthorService {
/**
* 查询语句组装
*/
private BoolQueryBuilder getQueryBuilder(QueryRequest queryRequest) {
logger.info("[EsQueryAuthorService] getQueryBuilder start ..." );
BoolQueryBuilder bqb = QueryBuilders.boolQuery();
// private BoolQueryBuilder getQueryBuilder(QueryRequest queryRequest) {
// logger.info("[EsQueryAuthorService] getQueryBuilder start ..." );
// BoolQueryBuilder bqb = QueryBuilders.boolQuery();
// // 基础查询根据查询条件组装查询语句
// BoolQueryBuilder boolQueryBuilder = null;
// boolQueryBuilder = topicQueryService.queryByConditions_v1(queryRequest);
// // 二次查询 关键词不为空就添加关键词查询语句 = 0 content 1 title 2 author 3 con+tit 4con+aut 5con+com
// // 单选 0:主贴;1:评论;2:用户
// Integer searchType = queryRequest.getSearchType();
// // String searchScope = queryRequest.getSearchScope(); //复选 0:标题;1:正文;2:作者 多个用,分割 0,1
// String keyword = queryRequest.getKeyword();
//
// BoolQueryBuilder searchTextBuilder = topicQueryService.buildSearchTextBuilder(searchType);
// boolQueryBuilder.filter(searchTextBuilder);
// // Map<String ,Float> fields = new HashedMap();
// if (TStringUtils.isNotEmpty(keyword)) {
// // 主贴的话 标题和内容
// if(searchType == 0){
// MatchPhraseQueryBuilder titleQuery = QueryBuilders.matchPhraseQuery(ESConstant.TITLE, keyword).slop(0);
// MatchPhraseQueryBuilder contentQuery = QueryBuilders.matchPhraseQuery(ESConstant.CONTENT, keyword).slop(0);
// QueryBuilder queryBuilder = QueryBuilders.boolQuery().should(titleQuery).should(contentQuery);
// bqb.must(queryBuilder);
// // 评论的话 评论内容
// }else if (searchType == 1){
//// MatchPhraseQueryBuilder contentQuery = QueryBuilders.matchPhraseQuery(ESConstant.CONTENT, keyword).slop(0);
//// QueryBuilder queryBuilder = QueryBuilders.boolQuery().must(contentQuery);
//// qb.must(queryBuilder);
// boolQueryBuilder.must(QueryBuilders.matchPhraseQuery(ESConstant.CONTENT, keyword).slop(0));
// // 用户 就只查 用户名
// }else if (searchType == 2){
// boolQueryBuilder.must(QueryBuilders.queryStringQuery("*"+keyword+"*").field(ESConstant.AUTHOR));
// //boolQueryBuilder.must(QueryBuilders.queryStringQuery("*"+keyword+"*").field(ESConstant.AUTHOR));
// }
// }
// bqb.must(boolQueryBuilder);
// return bqb;
// }
/**
* 2023-05-24 漏了用户的高级搜索
* @param queryRequest
* @return
*/
private BoolQueryBuilder getQueryBuilderNew(QueryRequest queryRequest) {
logger.info("[EsQueryAuthorService] getQueryBuilderNew start ...");
BoolQueryBuilder qb = QueryBuilders.boolQuery();
// 基础查询根据查询条件组装查询语句
BoolQueryBuilder boolQueryBuilder = null;
boolQueryBuilder = topicQueryService.queryByConditions_v1(queryRequest);
// 二次查询 关键词不为空就添加关键词查询语句 = 0 content 1 title 2 author 3 con+tit 4con+aut 5con+com
// 单选 0:主贴;1:评论;2:用户
Integer searchType = queryRequest.getSearchType();
// String searchScope = queryRequest.getSearchScope(); //复选 0:标题;1:正文;2:作者 多个用,分割 0,1
String keyword = queryRequest.getKeyword();
BoolQueryBuilder boolQueryBuilder = topicQueryService.queryByConditions_v1(queryRequest);
// 如果要根据ID 查询数据 如果查ID 后面的条件就不用查了
if (null != queryRequest.getDataIds() && !("").equals(queryRequest.getDataIds())) {
String dataIds = queryRequest.getDataIds();
List<String> dataIdList = getDataIdList(dataIds);
QueryBuilder queryBuilder = QueryBuilders.termsQuery(ESConstant.DATA_ID, dataIdList);
boolQueryBuilder = QueryBuilders.boolQuery().filter(queryBuilder);
// 如果有 任务ID就有没有就没有啊
if (null != queryRequest.getTaskIds()) {
List<Long> taskIds = queryRequest.getTaskIds();
if (taskIds.size() > 0) {
boolQueryBuilder = boolQueryBuilder.must(QueryBuilders.termsQuery("taskId", taskIds));
}
}
qb.must(boolQueryBuilder);
return qb;
}
Integer searchType = queryRequest.getSearchType(); // 单选 0:主贴;1:评论;2:用户
BoolQueryBuilder searchTextBuilder = topicQueryService.buildSearchTextBuilder(searchType);
boolQueryBuilder.filter(searchTextBuilder);
// Map<String ,Float> fields = new HashedMap();
if (TStringUtils.isNotEmpty(keyword)) {
// 主贴的话 标题和内容
if(searchType == 0){
MatchPhraseQueryBuilder titleQuery = QueryBuilders.matchPhraseQuery(ESConstant.TITLE, keyword).slop(0);
MatchPhraseQueryBuilder contentQuery = QueryBuilders.matchPhraseQuery(ESConstant.CONTENT, keyword).slop(0);
QueryBuilder queryBuilder = QueryBuilders.boolQuery().should(titleQuery).should(contentQuery);
bqb.must(queryBuilder);
// 评论的话 评论内容
}else if (searchType == 1){
// MatchPhraseQueryBuilder contentQuery = QueryBuilders.matchPhraseQuery(ESConstant.CONTENT, keyword).slop(0);
// QueryBuilder queryBuilder = QueryBuilders.boolQuery().must(contentQuery);
// qb.must(queryBuilder);
boolQueryBuilder.must(QueryBuilders.matchPhraseQuery(ESConstant.CONTENT, keyword).slop(0));
// 用户 就只查 用户名
}else if (searchType == 2){
boolQueryBuilder.must(QueryBuilders.queryStringQuery("*"+keyword+"*").field(ESConstant.AUTHOR));
if (null != queryRequest.getHighLevelQueries()) {
List<HighLevelQuery> highLevelQueries = queryRequest.getHighLevelQueries();
List<HighLevelQuery> tempHighLevel = new ArrayList<>(highLevelQueries.size());
for (HighLevelQuery high :tempHighLevel){
System.out.println(high.getText());
boolQueryBuilder.must(QueryBuilders.queryStringQuery("*"+high.getText()+"*").field(ESConstant.AUTHOR));
}
}
bqb.must(boolQueryBuilder);
return bqb;
// if (null != queryRequest.getHighLevelQueries()) {
// List<HighLevelQuery> highLevelQueries = queryRequest.getHighLevelQueries();
//
// // 1找到所有的not进行非处理
// highLevelQueries.stream().filter(e -> SearchExpressionEnum.NOT.is(e.getExpression())).forEach(e -> {
// qb.mustNot(this.getHighLevelQueryBuilder(e, true));
// });
//
// // 2循环处理剩下的不含not的处理逻辑为如果当前是and则将tempHighLevel进行must处理tempHighLevel中如果有多个则内部should处理
// List<HighLevelQuery> tempHighLevel = new ArrayList<>(highLevelQueries.size());
// highLevelQueries.stream().filter(e -> !SearchExpressionEnum.NOT.is(e.getExpression())).forEach(e -> {
// // 如果是and tempHighLevel不为空则处理tempHighLevel(>1个做内部或操作)并清空
// if (SearchExpressionEnum.AND.is(e.getExpression()) && !tempHighLevel.isEmpty()) {
// // 拼接条件
// BoolQueryBuilder tempQueryBuilder = QueryBuilders.boolQuery();
// tempHighLevel.forEach(temp -> tempQueryBuilder.should(this.getHighLevelQueryBuilder(temp, false)));
// qb.must(tempQueryBuilder);
// tempHighLevel.clear();
// }
// // 将当前项加入临时队列
// tempHighLevel.add(e);
// });
//
// // 此处拼接tempHighLevel未处理的内容
// if (!tempHighLevel.isEmpty()) {
// BoolQueryBuilder tempQueryBuilder = QueryBuilders.boolQuery();
// tempHighLevel.forEach(temp -> tempQueryBuilder.should(this.getHighLevelQueryBuilder(temp, false)));
// qb.must(tempQueryBuilder);
// }
//
// }
qb.must(boolQueryBuilder);
return qb;
}
@ -147,9 +228,9 @@ public class EsQueryAuthorService {
String sortFlag = "pubTime"; // 排序字段
Integer searchType = 2; // 用户的查询 type 默认为2
logger.info("[EsQueryAuthorService] queryAuthorByAuthorId indexName = " + indexName[0] + "; qb: \n {}.", boolQueryBuilder.toString());
List<JSONObject> result = EsUtils.query(clusterName, indexName, boolQueryBuilder, sortFlag, orderFlag, limit, start,searchType);
List<JSONObject> result = EsUtils.query(clusterName, indexName, boolQueryBuilder, sortFlag, orderFlag, limit, start, searchType);
return result;
}catch (Exception e){
} catch (Exception e) {
return new ArrayList<>();
}
}
@ -179,9 +260,9 @@ public class EsQueryAuthorService {
String sortFlag = "pubTime";
Integer searchType = 0; // 查用户法的主贴因此 type =0
logger.info("[EsQueryAuthorService] queryContentsByAuthorId indexName = " + indexName[0] + "; qb: \n {}.", boolQueryBuilder.toString());
List<JSONObject> result = EsUtils.query(clusterName, indexName, boolQueryBuilder, sortFlag, orderFlag, limit, start,searchType);
List<JSONObject> result = EsUtils.query(clusterName, indexName, boolQueryBuilder, sortFlag, orderFlag, limit, start, searchType);
return result;
}catch (Exception e){
} catch (Exception e) {
return new ArrayList<>();
}
}
@ -199,4 +280,18 @@ public class EsQueryAuthorService {
return qb;
}
public List<String> getDataIdList(String dataIds) {
List<String> dataIdList = new ArrayList<>();
if (dataIds.contains(",")) {
String ids[] = dataIds.split(",");
for (String id : ids) {
dataIdList.add(id);
}
} else {
dataIdList.add(dataIds);
}
return dataIdList;
}
}

55
cl_search_api/src/main/java/com/bfd/mf/common/service/es/EsQueryServiceForSQMini.java

@ -6,10 +6,7 @@ import com.bfd.mf.common.util.es.EsUtils;
import com.bfd.mf.common.web.repository.mysql.topic.TaskRepository;
import com.bfd.mf.common.web.vo.params.QueryRequest;
import com.bfd.mf.config.BFDApiConfig;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.TermsQueryBuilder;
import org.elasticsearch.index.query.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
@ -18,6 +15,7 @@ import org.springframework.stereotype.Service;
import javax.annotation.PostConstruct;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
@Service
public class EsQueryServiceForSQMini {
@ -42,6 +40,7 @@ public class EsQueryServiceForSQMini {
/**
* 2023-04-25 查询调用的方法
* 查询
*
* @param indexName
* @param queryRequest
* @return
@ -81,7 +80,8 @@ public class EsQueryServiceForSQMini {
}
/**
* 查询数据量
* 查询数据量 Count
*
* @param indexName
* @param queryRequest
* @return
@ -89,7 +89,8 @@ public class EsQueryServiceForSQMini {
public Long queryDataCountFromFolder(String[] indexName, QueryRequest queryRequest) {
try {
logger.debug("[EsQueryServiceForSQMini - 专题] queryDataCountFromOneSubject ...");
BoolQueryBuilder boolQueryBuilder = getQueryBuilder.getQueryBuilder(queryRequest);
//BoolQueryBuilder boolQueryBuilder = getQueryBuilder.getQueryBuilder(queryRequest);
BoolQueryBuilder boolQueryBuilder = getQueryBuilder.getQueryBuilderNew(queryRequest);
Integer searchType = queryRequest.getSearchType();
Integer size = queryRequest.getLimit();
Long totalCount = EsUtils.queryTotalCountNew(clusterName, indexName, boolQueryBuilder, searchType);
@ -157,6 +158,48 @@ public class EsQueryServiceForSQMini {
public long reIndexData(String indexList, String newIndex) {
try {
long created = EsUtils.reIndex(clusterName, indexList, newIndex);
System.out.println(created);
// 这块是不是得等3分钟后查一下ES中到底有木有数据哇
return created;
} catch (Exception e) {
e.printStackTrace();
return 0;
}
}
/**
* 2023-05-30 新的示例文件夹拉取
* @return
*/
public long reIndexDataNew(QueryRequest queryRequest) {
// 这个是之前准备好的示例文件夹
String originalIndex = "cl_major_9999";
String currentIndex = "cl_special_1.0_" + queryRequest.getSubjectId();
System.out.println(originalIndex + " to " + currentIndex);
List<Map<String, Object>> tasks = queryRequest.getTasks();
try {
long created = 0L;
for (Map<String, Object> task : tasks) {
/**
* "cid":"facebook",
* "crawlKeyword":"account:https://www.facebook.com/joebiden",
* "id":1000882,
* "siteId":182
*/
Long taskId = Long.valueOf(task.get("id").toString());
String crawlDataFlag = task.get("crawlDataFlag").toString();
String enSource = task.get("cid").toString().toLowerCase();
TermQueryBuilder termQueryBuilder1 = QueryBuilders.termQuery(ESConstant.CRAWLDATAFLAG, crawlDataFlag);
TermQueryBuilder termQueryBuilder2 = QueryBuilders.termQuery(ESConstant.EN_SOURCE, enSource);
QueryBuilder queryBuilder = QueryBuilders.boolQuery().must(termQueryBuilder1).must(termQueryBuilder2);
System.out.println(queryBuilder);
created = EsUtils.reIndexByTask(clusterName, originalIndex, currentIndex, queryBuilder);
System.out.println("条数: "+created);
//有条数之后是得执行个update操作吧
EsUtils.updateByQuery(clusterName,currentIndex,queryBuilder,taskId);
}
// 这块是不是得等3分钟后查一下ES中到底有木有数据哇
return created;
} catch (Exception e) {
e.printStackTrace();

134
cl_search_api/src/main/java/com/bfd/mf/common/service/es/GetQueryBuilder.java

@ -21,6 +21,7 @@ import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import java.util.*;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.stream.Stream;
@Service
@ -46,7 +47,7 @@ public class GetQueryBuilder {
* @param queryRequest
* @return
*/
public BoolQueryBuilder getQueryBuilderNew(QueryRequest queryRequest) {
public BoolQueryBuilder getQueryBuilderNew0530(QueryRequest queryRequest) {
logger.info("[GetQueryBuilder] getQueryBuilder...");
BoolQueryBuilder qb = QueryBuilders.boolQuery();
@ -65,7 +66,7 @@ public class GetQueryBuilder {
if (null != queryRequest.getTaskIds()) {
List<Long> taskIds = queryRequest.getTaskIds();
if (taskIds.size() > 0) {
boolQueryBuilder = boolQueryBuilder.must(QueryBuilders.termQuery("taskId", taskIds.get(0)));
boolQueryBuilder = boolQueryBuilder.must(QueryBuilders.termsQuery("taskId", taskIds));
}
}
@ -497,6 +498,8 @@ public class GetQueryBuilder {
*/
protected QueryBuilder getHighLevelQueryBuilder(HighLevelQuery highLevelQuery, boolean isNot) {
BoolQueryBuilder result = QueryBuilders.boolQuery();
//
highLevelQuery.setWordStrategy("2");
// 获取高级查询的字段
//Map<String, Float> fieldMap = SearchScopeEnum.getFieldsByKey(highLevelQuery.getScope());
// 默认就是查全文标题 + 正文
@ -523,14 +526,17 @@ public class GetQueryBuilder {
}
// 原文普通搜索
QueryBuilder rawQueryBuilder = this.getMatchQueryBuilder(path, fieldMap, highLevelQuery.getText(), isNot, SearchWordStrategyEnum.getByKey(highLevelQuery.getWordStrategy()));
QueryBuilder rawQueryBuilder = this.getMatchQueryBuilder(path, fieldMap, highLevelQuery.getText(), isNot,
SearchWordStrategyEnum.getByKey(highLevelQuery.getWordStrategy()));
if (rawQueryBuilder != null) {
result.should(rawQueryBuilder);
}
// 如果译文不为空则进行译文普通搜索
if (highLevelQuery.getTranslateText() != null) {
QueryBuilder transQueryBuilder = this.getMatchQueryBuilder(path, fieldMap, highLevelQuery.getTranslateText(), isNot, SearchWordStrategyEnum.getByKey(highLevelQuery.getWordStrategy()));
QueryBuilder transQueryBuilder = this.getMatchQueryBuilder(path, fieldMap,
highLevelQuery.getTranslateText(), isNot,
SearchWordStrategyEnum.getByKey(highLevelQuery.getWordStrategy()));
if (transQueryBuilder != null) {
result.should(transQueryBuilder);
}
@ -607,6 +613,10 @@ public class GetQueryBuilder {
return null;
}
System.out.println(" 0***** 要检索的词:" + splitText[0] + " --- " + strategyEnum);
// System.out.println(" 1***** 要检索的词:"+splitText[1]);
BoolQueryBuilder result = QueryBuilders.boolQuery();
// 如果是非 指定完整匹配则用短语否则用最佳字段
MultiMatchQueryBuilder.Type multiMatchType = isNot
@ -662,4 +672,120 @@ public class GetQueryBuilder {
public QueryBuilder nestedQuery(String nested, QueryBuilder queryBuilder) {
return QueryBuilders.nestedQuery(nested, queryBuilder, ScoreMode.None);
}
public BoolQueryBuilder getQueryBuilderNew(QueryRequest queryRequest) {
logger.info("[GetQueryBuilder] getQueryBuilder...");
BoolQueryBuilder qb = QueryBuilders.boolQuery();
// 基础查询根据查询条件组装查询语句
BoolQueryBuilder boolQueryBuilder = topicQueryService.queryByConditions_v1(queryRequest);
// 如果要根据ID 查询数据 如果查ID 后面的条件就不用查了
if (null != queryRequest.getDataIds() && !("").equals(queryRequest.getDataIds())) {
String dataIds = queryRequest.getDataIds();
List<String> dataIdList = getDataIdList(dataIds);
QueryBuilder queryBuilder = QueryBuilders.termsQuery(ESConstant.DATA_ID, dataIdList);
boolQueryBuilder = QueryBuilders.boolQuery().filter(queryBuilder);
// 如果有 任务ID就有没有就没有啊
if (null != queryRequest.getTaskIds()) {
List<Long> taskIds = queryRequest.getTaskIds();
if (taskIds.size() > 0) {
boolQueryBuilder = boolQueryBuilder.must(QueryBuilders.termsQuery("taskId", taskIds));
}
}
qb.must(boolQueryBuilder);
return qb;
}
Integer searchType = queryRequest.getSearchType(); // 单选 0:主贴;1:评论;2:用户
BoolQueryBuilder searchTextBuilder = topicQueryService.buildSearchTextBuilder(searchType);
boolQueryBuilder.filter(searchTextBuilder);
if (null != queryRequest.getHighLevelQueries()) {
//List<HighLevelQuery> highLevelQueries = queryRequest.getHighLevelQueries();
List<HighLevelQuery> highLevelQueries = queryRequest.getHighLevelQueries();
// 1找到所有的not进行非处理
// highLevelQueries.stream().filter(e -> SearchExpressionEnum.NOT.is(e.getExpression())).forEach(e -> {
// qb.mustNot(this.getHighLevelQueryBuilder(e, true));
// });
// 2循环处理剩下的不含not的处理逻辑为如果当前是and则将tempHighLevel进行must处理tempHighLevel中如果有多个则内部should处理
CopyOnWriteArrayList<HighLevelQuery> tempHighLevel = new CopyOnWriteArrayList<>();
//CopyOnWriteArrayList<HighLevelQuery> tempHighLevel = queryRequest.getHighLevelQueries();
for (int i = 0; i < highLevelQueries.size(); i++) {
HighLevelQuery e = highLevelQueries.get(i);
System.out.println("??? 没有这一步? : "+e.getExpression());
BoolQueryBuilder tempQueryBuilder = QueryBuilders.boolQuery();
if(SearchExpressionEnum.AND.is(e.getExpression())){
tempQueryBuilder.must(this.getHighLevelQueryBuilder(e, false));
qb.must(tempQueryBuilder);
}
}
//System.out.println( highLevelQueries.stream());
highLevelQueries.stream().forEach(e -> {
if( !tempHighLevel.isEmpty()){
// 拼接条件
BoolQueryBuilder tempQueryBuilder = QueryBuilders.boolQuery();
// tempHighLevel.forEach(temp -> tempQueryBuilder.should(this.getHighLevelQueryBuilder(temp, false)));
System.out.println(tempHighLevel.size());
System.out.println("??? : "+tempHighLevel.get(0).getText());
tempHighLevel.forEach(temp -> {
System.out.println(temp);
if( SearchExpressionEnum.AND.is(e.getExpression())){
tempQueryBuilder.must(this.getHighLevelQueryBuilder(temp, false));
}else if (SearchExpressionEnum.OR.is(e.getExpression())){
tempQueryBuilder.should(this.getHighLevelQueryBuilder(temp, false));
// qb.should(tempQueryBuilder);
}else{
tempQueryBuilder.mustNot(this.getHighLevelQueryBuilder(temp, false));
// qb.mustNot(tempQueryBuilder);
}
// qb.must(tempQueryBuilder);
tempHighLevel.clear();
});
// BoolQueryBuilder tempQueryBuilder = QueryBuilders.boolQuery();
// tempHighLevel.forEach(temp -> tempQueryBuilder.should(this.getHighLevelQueryBuilder(temp, false)));
// qb.must(tempQueryBuilder);
}
// 将当前项加入临时队列
tempHighLevel.add(e);
// // 如果是and tempHighLevel不为空则处理tempHighLevel(>1个做内部或操作)并清空
// if (SearchExpressionEnum.AND.is(e.getExpression()) && !tempHighLevel.isEmpty()) {
// // 拼接条件
// BoolQueryBuilder tempQueryBuilder = QueryBuilders.boolQuery();
// tempHighLevel.forEach(temp -> tempQueryBuilder.should(this.getHighLevelQueryBuilder(temp, false)));
// qb.must(tempQueryBuilder);
// tempHighLevel.clear();
// }
// // 将当前项加入临时队列
// tempHighLevel.add(e);
});
// 此处拼接tempHighLevel未处理的内容
if (!tempHighLevel.isEmpty()) {
BoolQueryBuilder tempQueryBuilder = QueryBuilders.boolQuery();
tempHighLevel.forEach(temp -> tempQueryBuilder.should(this.getHighLevelQueryBuilder(temp, false)));
qb.must(tempQueryBuilder);
}
}
qb.must(boolQueryBuilder);
return qb;
}
}

5
cl_search_api/src/main/java/com/bfd/mf/common/util/constants/ESConstant.java

@ -283,6 +283,8 @@ public class ESConstant {
public static final String HAS_IMAGE = "hasImage";
public static final String HAS_VIDEO = "hasVideo";
public static final String HAS_FILE = "hasFile";
public static final String HAS_TRANS = "hasTrans";
/**
* 关键词
*/
@ -773,7 +775,8 @@ public class ESConstant {
ESConstant.VALUELABEL,
ESConstant.CATEGORYLABEL,
ESConstant.TAG
ESConstant.TAG,
ESConstant.HAS_TRANS
);

12
cl_search_api/src/main/java/com/bfd/mf/common/util/enums/BaseFieldEnum.java

@ -170,7 +170,15 @@ public enum BaseFieldEnum {
* 区县
*/
county_code,
/**
* OCR 结果
*/
ocrText,
/**
* ASR 结果
*/
asrText
;
/**
@ -178,9 +186,11 @@ public enum BaseFieldEnum {
* @return
*/
public static Map<String, Float> getMatchFields(){
Map<String, Float> matchMap = new HashMap<>(2);
Map<String, Float> matchMap = new HashMap<>(4);
matchMap.put(BaseFieldEnum.title.name(), 2.0F);
matchMap.put(BaseFieldEnum.content.name(), 1.0F);
matchMap.put(BaseFieldEnum.ocrText.name(), 1.0F);
matchMap.put(BaseFieldEnum.asrText.name(), 1.0F);
return matchMap;
}

2
cl_search_api/src/main/java/com/bfd/mf/common/util/enums/SearchScopeEnum.java

@ -22,6 +22,8 @@ public enum SearchScopeEnum {
return new HashMap() {{
put(ESConstant.TITLE, 1.0F);
put(ESConstant.CONTENT, 1.0F);
put(ESConstant.OCRTEXT, 1.0F);
put(ESConstant.ASRTEXT, 1.0F);
}};
}
},

657
cl_search_api/src/main/java/com/bfd/mf/common/util/es/EsUtils.java

@ -26,9 +26,13 @@ import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.transport.TransportAddress;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.index.query.*;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.MultiMatchQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.reindex.*;
import org.elasticsearch.script.Script;
import org.elasticsearch.script.ScriptType;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.aggregations.AggregationBuilder;
@ -47,8 +51,6 @@ import org.springframework.util.Assert;
import java.net.InetAddress;
import java.util.*;
import java.util.stream.Collectors;
import java.util.stream.Stream;
public abstract class EsUtils {
@ -84,20 +86,23 @@ public abstract class EsUtils {
return CLIENT_MAP.get(clusterName);
}
public static List<JSONObject> query(String clusterName, String[] index,
final QueryBuilder queryBuilder,
String sortFlag, String orderFlag,
Integer size, Integer from,
Integer searchType) {
public static List<JSONObject> query0530(String clusterName, String[] index,
final QueryBuilder queryBuilder,
String sortFlag, String orderFlag,
Integer size, Integer from,
Integer searchType) {
System.out.println("非高亮查询");
TransportClient client = getClient(clusterName);
boolean options = true;
boolean optionsf = false;
// 现在不同任务的同一条数据不做消重因此同一个DOCID 的数据会有多条因此只有查主贴的时候需要用DOCID 消重
CollapseBuilder collapseBuilder = new CollapseBuilder(ESConstant.DATA_ID);
if (searchType == 0) {
collapseBuilder = new CollapseBuilder(ESConstant.DOC_ID);
}
// CollapseBuilder collapseBuilder = new CollapseBuilder(ESConstant.DATA_ID);
// CollapseBuilder collapseBuilder = null;
// if (searchType == 0) {
// collapseBuilder = new CollapseBuilder(ESConstant.DOC_ID);
// }
// Object[] objects= new Object[]{"9999"};
// 查询
// from + size 分页 查询方式
@ -105,9 +110,10 @@ public abstract class EsUtils {
.setIndicesOptions(IndicesOptions.fromOptions(options, options, options, optionsf))
.addSort(sortFlag, orderFlag.equals(ESConstant.ASC) ? SortOrder.ASC : SortOrder.DESC)
.setQuery(queryBuilder)
.setCollapse(collapseBuilder)
// .searchAfter(objects)
//.setCollapse(collapseBuilder)
.setSize(size)
.setFrom(from);
.setFrom(from); // 用search_after 的话这个 from == 0
System.out.println(requestBuilder);
@ -130,11 +136,11 @@ public abstract class EsUtils {
}
public static List<JSONObject> queryWithHighlight(String clusterName, String[] index,
final QueryBuilder queryBuilder,
String sortFlag, String orderFlag,
Integer size, Integer from,
Integer searchType) {
public static List<JSONObject> queryWithHighlight0530(String clusterName, String[] index,
final QueryBuilder queryBuilder,
String sortFlag, String orderFlag,
Integer size, Integer from,
Integer searchType) {
System.out.println("高亮查询");
EsBaseParam esBaseParam = new EsBaseParam();
TransportClient client = getClient(clusterName);
@ -235,22 +241,53 @@ public abstract class EsUtils {
List<JSONObject> dataList = new ArrayList<>();
if (searchResponse.getHits().totalHits > 0) {
for (SearchHit hit : searchResponse.getHits().getHits()) {
SearchHit[] hits = searchResponse.getHits().getHits();
for (int i = 0; i < hits.length; i++) {
JSONObject data = new JSONObject();
data.putAll(hit.getSourceAsMap());
String fieldName[] = {ESConstant.CONTENT, ESConstant.TITLE};
for (int i = 0; i < fieldName.length; i++) {
getHighlightResult(fieldName[i], hit, data);
data.putAll(hits[i].getSourceAsMap());
String fieldName[] = {ESConstant.CONTENT, ESConstant.TITLE, ESConstant.OCRTEXT, ESConstant.ASRTEXT};
for (int j = 0; j < fieldName.length; j++) {
getHighlightResult(fieldName[j], hits[i], data);
}
data.put("subjectId", hits[i].getIndex()
.replace("cl_major_", "")
.replace("cl_subject_", "")
.replace("cl_special_1.0_", ""));
dataList.add(data);
}
}
// if (searchResponse.getHits().totalHits > 0) {
// for (SearchHit hit : searchResponse.getHits().getHits()) {
// JSONObject data = new JSONObject();
// data.putAll(hits[i].getSourceAsMap());
// data.put("subjectId", hits[i].getIndex()
// .replace("cl_major_", "")
// .replace("cl_subject_", "")
// .replace("cl_special_1.0_", ""));
// dataList.add(data);
//
// JSONObject data = new JSONObject();
// data.putAll(hit.getSourceAsMap());
// String fieldName[] = {ESConstant.CONTENT, ESConstant.TITLE};
// for (int i = 0; i < fieldName.length; i++) {
// getHighlightResult(fieldName[i], hit, data);
// }
//
// data.put("subjectId", hit.getSourceAsMap().get()
// .replace("cl_major_", "")
// .replace("cl_subject_", "")
// .replace("cl_special_1.0_", ""));
// dataList.add(data);
// }
// }
return dataList;
}
private static void getHighlightResult(String fieldName, SearchHit hit, JSONObject data) {
if (hit.getHighlightFields().containsKey(fieldName)) {
HighlightField highlightField = hit.getHighlightFields().get(fieldName);
System.out.println("getHighlightResult highlightField : "+highlightField);
Text[] fragments = highlightField.fragments();
String fragmentString = "";
for (Text fragment : fragments) {
@ -385,9 +422,10 @@ public abstract class EsUtils {
return 0L;
}
public static Long queryTotalCountNew(String clusterName, String[] index,
QueryBuilder queryBuilder,
Integer searchType) {
public static Long queryTotalCountNew_0530(String clusterName, String[] index,
QueryBuilder queryBuilder,
Integer searchType) {
TransportClient client = getClient(clusterName);
boolean options = true;
@ -395,27 +433,62 @@ public abstract class EsUtils {
// 现在不同任务的同一条数据不做消重因此同一个DOCID 的数据会有多条因此只有查主贴的时候需要用DOCID 消重
String count = "count";
AggregationBuilder aggregation;
// searchType = 0 主贴
if (searchType == 0) {
aggregation = AggregationBuilders.cardinality(count).field(ESConstant.DOC_ID);
} else {
aggregation = AggregationBuilders.cardinality(count).field(ESConstant.DATA_ID);
}
// CollapseBuilder collapseBuilder = new CollapseBuilder(ESConstant.DATA_ID);
CollapseBuilder collapseBuilder = null;
if (searchType == 0) {
collapseBuilder = new CollapseBuilder(ESConstant.DOC_ID);
}
//searchSourceBuilder.aggregation(aggregation);
// from + size 分页 查询方式
SearchRequestBuilder requestBuilder = client.prepareSearch().setIndices(index)
.setIndicesOptions(IndicesOptions.fromOptions(options, options, options, optionsf))
.setQuery(queryBuilder)
//.setCollapse(collapseBuilder);
.addAggregation(aggregation);
// System.out.println(requestBuilder);
/**
* 2023-05-30 先注释掉看看情况
*/
// System.out.println("3333 : " + requestBuilder.get().getHits().totalHits);
Aggregations aggregations = requestBuilder.get().getAggregations();
Cardinality cardinality = aggregations.get(count);
// System.out.println("1111 : " + cardinality.getValue());
// System.out.println("2222 : " + requestBuilder.get().getHits().totalHits);
long resultCount = cardinality.getValue();
if (searchType == 2) {
resultCount = requestBuilder.get().getHits().totalHits;
}
System.out.println("cardinality : " + cardinality.getValue());
System.out.println("totalHits : " + requestBuilder.get().getHits().totalHits);
// long resultCount = cardinality.getValue();
// if (searchType == 2) {
// resultCount = requestBuilder.get().getHits().totalHits;
// }
/**
* 折叠查询的参考代码
*/
// CollapseBuilder collapseBuilder = new CollapseBuilder("duplicate_id");
// InnerHitBuilder innerHitBuilder = new InnerHitBuilder();
// innerHitBuilder.setName("test");
// innerHitBuilder.setSize(0);
// innerHitBuilder.setTrackScores(true);
// innerHitBuilder.setIgnoreUnmapped(true);
// innerHitBuilder.addSort(SortBuilders.fieldSort("level").order(SortOrder.DESC));
// collapseBuilder.setInnerHits(innerHitBuilder);
//
// ......
//
// srb = client.prepareSearch(indexName)
// .setTypes(typeName)
// .setQuery(bqb)
// .setFrom(params.getFrom())
// .setSize(params.getSize())
// .setCollapse(collapseBuilder)
// .setPreference("_primary_first");
long resultCount = requestBuilder.get().getHits().totalHits;
return resultCount;
}
@ -426,6 +499,7 @@ public abstract class EsUtils {
Integer limit,
String scrollId,
Integer searchType) {
Map<String, Object> result = new HashMap<>();
TransportClient client = getClient(clusterName);
SearchResponse searchResponse = null;
@ -590,6 +664,14 @@ public abstract class EsUtils {
}
}
/**
* 复制索引数据
*
* @param clusterName
* @param originalIndex
* @param currentIndex
* @return
*/
public static long reIndex(String clusterName, String originalIndex, String currentIndex) {
// String clusterName, String originalIndex, String currentIndex,
try {
@ -599,6 +681,7 @@ public abstract class EsUtils {
.newRequestBuilder(client)
.source(originalIndex)
.destination(currentIndex);
// 新建别名查询需要用别名查不加别名查不到哦
String newAliex = currentIndex.replace("cl_special_1.0_", "cl_major_");
BulkByScrollResponse response = builder.get();
@ -690,6 +773,46 @@ public abstract class EsUtils {
}
}
/**
* 2023-05-30
*
* @param clusterName
* @param originalIndex
* @param currentIndex
* @param queryBuilder
* @return
*/
public static long reIndexByTask(String clusterName,
String originalIndex,
String currentIndex,
QueryBuilder queryBuilder) {
try {
TransportClient client = getClient(clusterName);
System.out.println(originalIndex + " *** " + currentIndex);
ReindexRequestBuilder builder = ReindexAction.INSTANCE
.newRequestBuilder(client)
.source(originalIndex)// 来源索引
.destination(currentIndex) // 目标索引
.filter(queryBuilder)
.refresh(true);
// builder.
BulkByScrollResponse response = builder.get();
// 添加别名将cl_special_1.0_ 替换成 cl_major 别名
String newAliex = currentIndex.replace("cl_special_1.0_", "cl_major_");
IndicesAliasesRequestBuilder indicesBuilder = IndicesAliasesAction.INSTANCE
.newRequestBuilder(client)
.addAlias(currentIndex, newAliex);
IndicesAliasesResponse IndicesResponse = indicesBuilder.get();
System.out.println("******* : " + response);
System.out.println("##### : " + IndicesResponse);
return response.getCreated();
} catch (Exception e) {
e.printStackTrace();
return 0;
}
}
public static void delIndexByTasks(String clusterName, String indexName, String cid, List<String> tasks) {
try {
TransportClient client = getClient(clusterName);
@ -707,6 +830,17 @@ public abstract class EsUtils {
}
}
public static void updateByQuery(String clusterName, String currentIndex, QueryBuilder queryBuilder, Long taskId) {
TransportClient client = getClient(clusterName);
UpdateByQueryRequestBuilder updateByQuery = UpdateByQueryAction.INSTANCE.newRequestBuilder(client);
// "source": "ctx._source['source']='路透社';"
updateByQuery.source(currentIndex)
.filter(queryBuilder)
.size(1000)
.script(new Script(ScriptType.INLINE, "painless", "ctx._source['taskId'] = '" + taskId + "'", Collections.emptyMap()));
BulkByScrollResponse response = updateByQuery.get();
}
/**
* 全文检索查询拼接(非nested属性重载方法)
*
@ -757,4 +891,461 @@ public abstract class EsUtils {
public QueryBuilder nestedQuery(String nested, QueryBuilder queryBuilder) {
return QueryBuilders.nestedQuery(nested, queryBuilder, ScoreMode.None);
}
// public void testAggAndDistinct(){
// //获取注解通过注解可以得到 indexName type
// Document document = Customer.class.getAnnotation(Document.class);
// // dateHistogram Aggregation 是时间柱状图聚合按照天来聚合
// // dataAgg 为聚合结果的名称createTime 为字段名称
// // cardinality 用来去重
// SearchQuery searchQuery = new NativeSearchQueryBuilder()
// .withQuery(matchAllQuery())
// .withSearchType(SearchType.QUERY_THEN_FETCH)
// .withIndices(document.indexName()).withTypes(document.type())
// .addAggregation(AggregationBuilders.dateHistogram("dataAgg").field("createTime")
// .dateHistogramInterval(DateHistogramInterval.DAY)
// .subAggregation(AggregationBuilders.cardinality("nameAgg").field("firstName")))
// .build();
//
// // 聚合的结果
// Aggregations aggregations = elasticsearchTemplate.query(searchQuery, response -> response.getAggregations());
// Map<String, Aggregation> results = aggregations.asMap();
// Histogram histogram = (Histogram) results.get("dataAgg");
// // 将bucket list 转换成 map key -> 名字 value-> 出现次数
// histogram.getBuckets().stream().forEach(t->{
// Histogram.Bucket histogram1 = t;
// System.out.println(histogram1.getKeyAsString());
// Cardinality cardinality = histogram1.getAggregations().get("nameAgg");
// System.out.println(cardinality.getValue());
// });
// }
public static Long queryTotalCountNew0530(String clusterName, String[] index,
QueryBuilder queryBuilder,
Integer searchType) {
System.out.println("---------------------------");
long resultCount = 0l;
try {
TransportClient client = getClient(clusterName);
boolean options = true;
boolean optionsf = false;
// 现在不同任务的同一条数据不做消重因此同一个DOCID 的数据会有多条因此只有查主贴的时候需要用DOCID 消重
String count = "count";
AggregationBuilder aggregation;
// searchType = 0 主贴
if (searchType == 0) {
aggregation = AggregationBuilders.cardinality(count).field(ESConstant.DOC_ID);
} else {
aggregation = AggregationBuilders.cardinality(count).field(ESConstant.DATA_ID);
}
// aggregation = AggregationBuilders.dateHistogram("dataAgg").field("createTimeStr")
// .dateHistogramInterval(DateHistogramInterval.DAY)
// .subAggregation(AggregationBuilders.cardinality("idAgg").field("dataId"));
// CollapseBuilder collapseBuilder = new CollapseBuilder(ESConstant.DATA_ID);
// CollapseBuilder collapseBuilder = null;
// if (searchType == 0) {
// collapseBuilder = new CollapseBuilder(ESConstant.DOC_ID);
// }
//searchSourceBuilder.aggregation(aggregation);
// from + size 分页 查询方式
SearchRequestBuilder requestBuilder = client.prepareSearch().setIndices(index)
.setIndicesOptions(IndicesOptions.fromOptions(options, options, options, optionsf))
.setQuery(queryBuilder);
//.setCollapse(collapseBuilder);
// .addAggregation(aggregation);
// Aggregations aggregations = elasticsearchTemplate.query(searchQuery, response -> response.getAggregations());
// System.out.println(requestBuilder);
/**
* 2023-05-30 先注释掉看看情况
*/
// System.out.println("3333 : " + requestBuilder.get().getHits().totalHits);
// Aggregations aggregations = requestBuilder.get().getAggregations();
// Cardinality cardinality = aggregations.get(count);
// System.out.println("cardinality : " + cardinality.getValue());
System.out.println("totalHits : " + requestBuilder.get().getHits().totalHits);
// Map<String, Aggregation> results = aggregations.asMap();
// Histogram histogram = (Histogram) results.get("dataAgg");
// // 将bucket list 转换成 map key -> 名字 value-> 出现次数
// histogram.getBuckets().stream().forEach(t -> {
// Histogram.Bucket histogram1 = t;
// System.out.println(histogram1.getKeyAsString());
// Cardinality cardinality1 = histogram1.getAggregations().get("idAgg");
// System.out.println(cardinality1.getValue());
// });
// long resultCount = cardinality.getValue();
// if (searchType == 2) {
// resultCount = requestBuilder.get().getHits().totalHits;
// }
/**
* 折叠查询的参考代码
*/
// CollapseBuilder collapseBuilder = new CollapseBuilder("duplicate_id");
// InnerHitBuilder innerHitBuilder = new InnerHitBuilder();
// innerHitBuilder.setName("test");
// innerHitBuilder.setSize(0);
// innerHitBuilder.setTrackScores(true);
// innerHitBuilder.setIgnoreUnmapped(true);
// innerHitBuilder.addSort(SortBuilders.fieldSort("level").order(SortOrder.DESC));
// collapseBuilder.setInnerHits(innerHitBuilder);
//
// ......
//
// srb = client.prepareSearch(indexName)
// .setTypes(typeName)
// .setQuery(bqb)
// .setFrom(params.getFrom())
// .setSize(params.getSize())
// .setCollapse(collapseBuilder)
// .setPreference("_primary_first");
resultCount = requestBuilder.get().getHits().totalHits;
} catch (Exception e) {
e.printStackTrace();
}
return resultCount;
}
public static List<JSONObject> query05301(String clusterName, String[] index,
final QueryBuilder queryBuilder,
String sortFlag, String orderFlag,
Integer size, Integer from,
Integer searchType) {
System.out.println("非高亮查询");
TransportClient client = getClient(clusterName);
boolean options = true;
boolean optionsf = false;
// 现在不同任务的同一条数据不做消重因此同一个DOCID 的数据会有多条因此只有查主贴的时候需要用DOCID 消重
CollapseBuilder collapseBuilder = new CollapseBuilder(ESConstant.DATA_ID);
// CollapseBuilder collapseBuilder = null;
if (searchType == 0) {
collapseBuilder = new CollapseBuilder(ESConstant.DOC_ID);
}
// Object[] objects= new Object[]{"9999"};
// 查询
// from + size 分页 查询方式
SearchRequestBuilder requestBuilder = client.prepareSearch().setIndices(index)
.setIndicesOptions(IndicesOptions.fromOptions(options, options, options, optionsf))
.addSort(sortFlag, orderFlag.equals(ESConstant.ASC) ? SortOrder.ASC : SortOrder.DESC)
.setQuery(queryBuilder)
// .searchAfter(objects)
//.setCollapse(collapseBuilder)
.setSize(size)
.setFrom(from); // 用search_after 的话这个 from == 0
System.out.println(requestBuilder);
SearchResponse searchResponse = requestBuilder.execute().actionGet();
List<JSONObject> dataList = new ArrayList<>();
if (searchResponse.getHits().totalHits > 0) {
SearchHit[] hits = searchResponse.getHits().getHits();
for (int i = 0; i < hits.length; i++) {
JSONObject data = new JSONObject();
data.putAll(hits[i].getSourceAsMap());
data.put("subjectId", hits[i].getIndex()
.replace("cl_major_", "")
.replace("cl_subject_", "")
.replace("cl_special_1.0_", ""));
dataList.add(data);
}
}
return dataList;
}
/**
* 第一组查询不做数据聚合
*/
public static Long queryTotalCountNew_0531(String clusterName, String[] index,
QueryBuilder queryBuilder,
Integer searchType) {
System.out.println("---------------------------");
long resultCount = 0l;
try {
TransportClient client = getClient(clusterName);
boolean options = true;
boolean optionsf = false;
SearchRequestBuilder requestBuilder = client.prepareSearch().setIndices(index)
.setIndicesOptions(IndicesOptions.fromOptions(options, options, options, optionsf))
.setQuery(queryBuilder);
System.out.println("totalHits : " + requestBuilder.get().getHits().totalHits);
resultCount = requestBuilder.get().getHits().totalHits;
} catch (Exception e) {
e.printStackTrace();
}
return resultCount;
}
// public static List<JSONObject> query_0531(String clusterName, String[] index,
// final QueryBuilder queryBuilder,
// String sortFlag, String orderFlag,
// Integer size, Integer from,
// Integer searchType) {
// System.out.println("非高亮查询");
// TransportClient client = getClient(clusterName);
// boolean options = true;
// boolean optionsf = false;
// // from + size 分页 查询方式
// SearchRequestBuilder requestBuilder = client.prepareSearch().setIndices(index)
// .setIndicesOptions(IndicesOptions.fromOptions(options, options, options, optionsf))
// .addSort(sortFlag, orderFlag.equals(ESConstant.ASC) ? SortOrder.ASC : SortOrder.DESC)
// .setQuery(queryBuilder)
// .setSize(size)
// .setFrom(from);
//
// System.out.println(requestBuilder);
//
// SearchResponse searchResponse = requestBuilder.execute().actionGet();
// List<JSONObject> dataList = new ArrayList<>();
// if (searchResponse.getHits().totalHits > 0) {
// SearchHit[] hits = searchResponse.getHits().getHits();
// for (int i = 0; i < hits.length; i++) {
// JSONObject data = new JSONObject();
// data.putAll(hits[i].getSourceAsMap());
// data.put("subjectId", hits[i].getIndex()
// .replace("cl_major_", "")
// .replace("cl_subject_", "")
// .replace("cl_special_1.0_", ""));
// dataList.add(data);
// }
// }
// return dataList;
// }
public static List<JSONObject> queryWithHighlight(String clusterName, String[] index,
final QueryBuilder queryBuilder,
String sortFlag, String orderFlag,
Integer size, Integer from,
Integer searchType) {
System.out.println("高亮查询");
EsBaseParam esBaseParam = new EsBaseParam();
TransportClient client = getClient(clusterName);
boolean options = true;
boolean optionsf = false;
// 现在不同任务的同一条数据不做消重因此同一个DOCID 的数据会有多条因此只有查主贴的时候需要用DOCID 消重
CollapseBuilder collapseBuilder = new CollapseBuilder(ESConstant.DATA_ID);
if (searchType == 0) {
collapseBuilder = new CollapseBuilder(ESConstant.DOC_ID);
}
esBaseParam.setWithHighlight(true);
esBaseParam.setHighlightFields(new ArrayList<>(BaseFieldEnum.getMatchFieldsWithPy().keySet()));
Integer numOfFragments = 2;
HighlightBuilder highlightBuilder = new HighlightBuilder()
// match进行高亮
.requireFieldMatch(true)
.order(HighlightBuilder.Order.SCORE)
//fragment 是指一段连续的文字返回结果最多可以包含几段不连续的文字默认是5
.numOfFragments(numOfFragments)
//一段 fragment 包含多少个字符默认100
// .fragmentSize(Constants.MAX_R_LENGTH / numOfFragments)
// .noMatchSize(Constants.MAX_R_LENGTH)
.preTags(ESConstant.HIGHLIGHTPRETAGS)
.postTags(ESConstant.HIGHLIGHTPOSTTAGS);
BaseFieldEnum.getMatchFieldsWithPy().keySet().forEach(highlightBuilder::field);
esBaseParam.setHighlightBuilder(highlightBuilder);
// 查询
// from + size 分页 查询方式
SearchRequestBuilder requestBuilder = client.prepareSearch().setIndices(index)
.setIndicesOptions(IndicesOptions.fromOptions(options, options, options, optionsf))
.addSort(sortFlag, orderFlag.equals(ESConstant.ASC) ? SortOrder.ASC : SortOrder.DESC)
.setQuery(queryBuilder)
.setCollapse(collapseBuilder)
.setSize(size)
.setFrom(from)
.highlighter(esBaseParam.getHighlightBuilder());
System.out.println(requestBuilder);
System.out.println("-----");
SearchResponse searchResponse = requestBuilder.execute().actionGet();
List<JSONObject> dataList = new ArrayList<>();
if (searchResponse.getHits().totalHits > 0) {
SearchHit[] hits = searchResponse.getHits().getHits();
for (int i = 0; i < hits.length; i++) {
JSONObject data = new JSONObject();
data.putAll(hits[i].getSourceAsMap());
String fieldName[] = {ESConstant.CONTENT, ESConstant.TITLE, ESConstant.OCRTEXT, ESConstant.ASRTEXT};
for (int j = 0; j < fieldName.length; j++) {
getHighlightResult(fieldName[j], hits[i], data);
}
data.put("subjectId", hits[i].getIndex()
.replace("cl_major_", "")
.replace("cl_subject_", "")
.replace("cl_special_1.0_", ""));
dataList.add(data);
}
}
return dataList;
}
/**
* 05-30 聚合查询
*/
public static Long queryTotalCountNew(String clusterName, String[] index,
QueryBuilder queryBuilder,
Integer searchType) {
long resultCount = 0l;
try {
TransportClient client = getClient(clusterName);
boolean options = true;
boolean optionsf = false;
String aggrCount = "count";
AggregationBuilder aggregation;
// searchType = 0 主贴
if (searchType == 0) {
aggregation = AggregationBuilders.cardinality(aggrCount).field(ESConstant.DOC_ID);
} else {
aggregation = AggregationBuilders.cardinality(aggrCount).field(ESConstant.DATA_ID);
}
SearchRequestBuilder requestBuilder = client.prepareSearch().setIndices(index)
// .setIndicesOptions(IndicesOptions.fromOptions(options, options, options, optionsf))
.setQuery(queryBuilder)
.addAggregation(aggregation);
System.out.println("totalHits : " + requestBuilder.get().getHits().totalHits);
// ParsedCardinality parsedCardinality = (ParsedCardinality) searchResponse.getAggregations().asList().get(0);
// Aggregations aggregations = requestBuilder.get().getAggregations();
// Cardinality cardinality = aggregations.get(count);
// System.out.println("cardinality : " + cardinality.getValue());
Aggregations aggregations = requestBuilder.get().getAggregations();
Cardinality cardinality = aggregations.get(aggrCount);
System.out.println("1111 : " + aggregations.get(aggrCount));
System.out.println("cardinality : " + cardinality.getValue());
resultCount = cardinality.getValue();
// 用户数据不用ID做聚合
if (searchType == 2) {
resultCount = requestBuilder.get().getHits().totalHits;
}
// resultCount = requestBuilder.get().getHits().totalHits;
} catch (Exception e) {
e.printStackTrace();
}
return resultCount;
}
public static List<JSONObject> query(String clusterName, String[] index,
final QueryBuilder queryBuilder,
String sortFlag, String orderFlag,
Integer size, Integer from,
Integer searchType) {
System.out.println("非高亮查询");
TransportClient client = getClient(clusterName);
boolean options = true;
boolean optionsf = false;
String aggrCount = "count";
CollapseBuilder collapseBuilder = new CollapseBuilder(ESConstant.DATA_ID);
AggregationBuilder aggregationBuilder = AggregationBuilders.cardinality(aggrCount).field(ESConstant.DATA_ID);
if (searchType == 0) {
collapseBuilder = new CollapseBuilder(ESConstant.DOC_ID);
aggregationBuilder = AggregationBuilders.cardinality(aggrCount).field(ESConstant.DOC_ID);
}
// from + size 分页 查询方式
SearchRequestBuilder requestBuilder = client.prepareSearch().setIndices(index)
.setIndicesOptions(IndicesOptions.fromOptions(options, options, options, optionsf))
.addSort(sortFlag, orderFlag.equals(ESConstant.ASC) ? SortOrder.ASC : SortOrder.DESC)
.setQuery(queryBuilder)
.setCollapse(collapseBuilder)
.addAggregation(aggregationBuilder)
.setSize(size)
.setFrom(from);
System.out.println(requestBuilder);
SearchResponse searchResponse = requestBuilder.execute().actionGet();
List<JSONObject> dataList = new ArrayList<>();
if (searchResponse.getHits().totalHits > 0) {
SearchHit[] hits = searchResponse.getHits().getHits();
for (int i = 0; i < hits.length; i++) {
JSONObject data = new JSONObject();
data.putAll(hits[i].getSourceAsMap());
data.put("subjectId", hits[i].getIndex()
.replace("cl_major_", "")
.replace("cl_subject_", "")
.replace("cl_special_1.0_", ""));
dataList.add(data);
}
}
Cardinality cardinality = searchResponse.getAggregations().get(aggrCount);
//总数
long value = cardinality.getValue();
System.out.println("去重总数:" + value);
System.out.println("不去重的总数:" + requestBuilder.get().getHits().totalHits);
return dataList;
}
// private long getCardinality( QueryBuilder queryBuilder,String indexName,
// Integer size, Integer from) {
// // 获取查询的索引列表String indexName = "sjck_personnel"
// ;// 获取查询的条件列表
//// List<HashMap<String, String>> options = (List<HashMap<String, String>>) bindParams.get("conditions");
//// // 1.构建查询请求
// SearchRequest searchRequest = new SearchRequest(indexName);
//// // 4.构建最外面的
//// boolQueryBoolQueryBuilder query = QueryBuilders.boolQuery();
//// // 5.构建查询请求
//// synQueryPersonnelIndexBuilder(query, options);
// //6.高亮
// HighlightBuilder highlightBuilder = new HighlightBuilder();
// // 所有查询出来的字段全部高亮
// HighlightBuilder.Field highlightTitle = new HighlightBuilder.Field("*").requireFieldMatch(false);
// highlightTitle.highlighterType("unified");
// highlightBuilder.field(highlightTitle);
// //从第几条开始
//
// // 3.构建高亮
// AggregationBuilder aggregation = AggregationBuilders.cardinality("total_size").field("concat_field");
// SearchSourceBuilder sourceBuilder = new SearchSourceBuilder()
// .query(queryBuilder)
// .highlighter(highlightBuilder)
// .from(from)
// .size(size)
// .aggregation(aggregation);
// // 2.将查询构建器放入查询请求中
// searchRequest.source(sourceBuilder);
// SearchResponse searchResponse = null;
// try {
// searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
// } catch (ElasticsearchStatusException e) {
// logger.error("请检查elasticsearchIndex是否存在{},错误信息{}", e, e.getMessage());
// } catch (IOException e) {
// logger.error("搜索出错了{},错误信息{}", e, e.getMessage());
// }
// assert searchResponse != null;
// ParsedCardinality parsedCardinality = (ParsedCardinality) searchResponse.getAggregations().asList().get(0);
// return parsedCardinality.getValue();
// }
}

69
cl_search_api/src/main/java/com/bfd/mf/common/web/vo/params/QueryRequest.java

@ -33,32 +33,32 @@ public class QueryRequest implements Serializable {
private static final long serialVersionUID = 1L;
// 必传字段
// private Long subjectId;
@ApiModelProperty(value = "subjectId" , required = true , notes = "专题ID")
@ApiModelProperty(value = "subjectId", required = true, notes = "专题ID")
private String subjectId;
// @ApiModelProperty(value = "taskId" , required = true , notes = "任务ID")
// private String taskId;
@ApiModelProperty(value = "userId",required = true)
@ApiModelProperty(value = "userId", required = true)
private Long userId;
// 翻页字段
@ApiModelProperty(value = "page",required = true)
@ApiModelProperty(value = "page", required = true)
private Integer page;
@ApiModelProperty(value = "limit",required = true)
@ApiModelProperty(value = "limit", required = true)
private Integer limit;
//其他参数
@ApiModelProperty(value = "pubTime",required = true)
@ApiModelProperty(value = "pubTime", required = true)
private Long pubTime;
@ApiModelProperty(value = "docId",required = true)
@ApiModelProperty(value = "docId", required = true)
private String docId;
@ApiModelProperty(value = "dataIds",required = true)
@ApiModelProperty(value = "dataIds", required = true)
private String dataIds;
@ApiModelProperty(value = "dataId",required = true)
@ApiModelProperty(value = "dataId", required = true)
private String dataId;
@ApiModelProperty(value = "docType",required = true)
@ApiModelProperty(value = "docType", required = true)
private String docType;
@ApiModelProperty(value = "siteTypes",required = true)
@ApiModelProperty(value = "siteTypes", required = true)
private String siteTypes; //站点类型 必传多个,分隔 全部传
@ApiModelProperty(value = "siteId",required = true)
@ApiModelProperty(value = "siteId", required = true)
private String siteId;
// 排序字段
private String order; // 排序方式 asc/desc
@ -66,7 +66,7 @@ public class QueryRequest implements Serializable {
// 基础查询字段
private String searchArea; //0 美国1中国
private String cid; // 站点名
private String crawlDataFlag ; // 数据标识
private String crawlDataFlag; // 数据标识
// 二次查询字段
private Integer searchType; // 二次查询 选项 0主贴1评论2用户
// private String searchScope; // 二次查询 字段选项 0标题1内容2用户
@ -91,7 +91,7 @@ public class QueryRequest implements Serializable {
private String valueLabel;
private String categoryLabel;
private List<String> tasks;
// private List<String> tasks;
private String originalIndex;
private String currentIndex;
@ -99,9 +99,24 @@ public class QueryRequest implements Serializable {
private List<Long> delTasks;
private List<Long> taskIds;
private String pageType ;
private String pageType;
private String userType;
private String ocrTest;
private String asrText;
private List<Map<String, Object>> tasks;
public List<Map<String, Object>> getTasks() {
return tasks;
}
public void setTasks(List<Map<String, Object>> tasks) {
this.tasks = tasks;
}
public String getUserType() {
return userType;
}
@ -143,6 +158,7 @@ public class QueryRequest implements Serializable {
}
private List<HighLevelQuery> highLevelQueries;
public List<HighLevelQuery> getHighLevelQueries() {
return highLevelQueries;
}
@ -152,7 +168,6 @@ public class QueryRequest implements Serializable {
}
public String getOriginalIndex() {
return originalIndex;
}
@ -169,13 +184,13 @@ public class QueryRequest implements Serializable {
this.currentIndex = currentIndex;
}
public List<String> getTasks() {
return tasks;
}
public void setTasks(List<String> tasks) {
this.tasks = tasks;
}
// public List<String> getTasks() {
// return tasks;
// }
//
// public void setTasks(List<String> tasks) {
// this.tasks = tasks;
// }
public String getValueLabel() {
return valueLabel;
@ -392,18 +407,18 @@ public class QueryRequest implements Serializable {
this.crawlDataFlag = crawlDataFlag;
}
public List<String> getSearchScopeValue(String nums){
public List<String> getSearchScopeValue(String nums) {
String numbers[] = nums.split(",");
// 0:标题;1:正文;2:作者 多个用,分割 0,1
List<String> resultList = new ArrayList<>();
for (String num:numbers) {
if(num .equals("0")){
for (String num : numbers) {
if (num.equals("0")) {
resultList.add("title");
}
if(num.equals("1")){
if (num.equals("1")) {
resultList.add("content");
}
if(num.equals("2")){
if (num.equals("2")) {
resultList.add("author");
}
}

18
cl_search_api/src/main/java/com/bfd/mf/common/web/vo/view/monitor/ESMonitorBaseEntity.java

@ -790,16 +790,14 @@ public class ESMonitorBaseEntity implements Comparable<ESMonitorBaseEntity>, Ser
}
public String getSysSentimentTag() {
sysSentimentTag = "中性";
// if(sysSentiment < 0.5){
// sysSentimentTag = "负面";
// }
// if(sysSentiment == 0.5){
// sysSentimentTag = "中性";
// }
// if(sysSentiment > 0.5){
// sysSentimentTag = "正面";
// }
sysSentimentTag = sysSentiment.toString();
if(sysSentiment < 0.5){
sysSentimentTag = "负面";
}else if(sysSentiment == 0.5 || sysSentiment == 0.0){
sysSentimentTag = "中性";
}else if(sysSentiment > 0.5){
sysSentimentTag = "正面";
}
return sysSentimentTag;
}

113
cl_search_api/src/main/java/com/bfd/mf/controller/SearchDataController.java

@ -27,7 +27,7 @@ import org.springframework.web.bind.annotation.ResponseBody;
@Controller
@RequestMapping("/crawl")
@Api(value="数据查询的控制器")
@Api(value = "数据查询的控制器")
public class SearchDataController {
private static final Logger logger = LoggerFactory.getLogger(SearchDataController.class);
@Autowired
@ -39,7 +39,7 @@ public class SearchDataController {
* 查询数据列表
*/
@ApiOperation(value = "查询数据列表")
@RequestMapping(value = "/subject/query", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8)
@RequestMapping(value = "/subject/query", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8)
@ResponseBody
public JSONObject queryDataList(@RequestBody QueryRequest queryRequest) {
logger.info("[queryDataList] partial / Params: {}", JSONObject.toJSONString(queryRequest));
@ -48,29 +48,30 @@ public class SearchDataController {
long start = System.currentTimeMillis();
String scorllId = queryRequest.getScrollId();
String subjectId = queryRequest.getSubjectId();
if(null != scorllId ){
if (null != scorllId) {
// 数据导出
result = searchDataService.exportDataFromFolder(queryRequest);
return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result);
}else {
} else {
// 数据查询
result = searchDataService.queryDataFromFolder(queryRequest);
}
Integer allDocNumber = result.getIntValue(ESConstant.ALLDOCNUMBER);
Integer limit = queryRequest.getLimit();
int page = 1;
if(allDocNumber%limit == 0){
page = allDocNumber/limit;
}else{
page = allDocNumber/limit + 1;
if (allDocNumber % limit == 0) {
page = allDocNumber / limit;
} else {
page = allDocNumber / limit + 1;
}
if(null != queryRequest.getPage() && !queryRequest.getPage().equals("")) {
if (null != queryRequest.getPage() && !queryRequest.getPage().equals("")) {
if (page > 0 && queryRequest.getPage() > page) {
//return ResponseWrapper.buildResponse(RTCodeEnum.C_SUBJECT_GRAMMAR_ERROR, "总数和分页不匹配");
return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result); }
return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result);
}
}
long end = System.currentTimeMillis();
logger.info("接口查询时长:statr:"+ start +" ; end:"+end + " ; time = " + (end - start) + " ; count = "+result.get(ESConstant.ALLDOCNUMBER));
logger.info("接口查询时长:statr:" + start + " ; end:" + end + " ; time = " + (end - start) + " ; count = " + result.get(ESConstant.ALLDOCNUMBER));
return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result);
} catch (Exception e) {
logger.error("[SearchDataController] queryDataList Failed,The error message is :{}", e);
@ -83,13 +84,13 @@ public class SearchDataController {
* 根据ID 查询 一条数据详情
*/
@ResponseBody
@RequestMapping(value="/subject/getInfoByDocId",method=RequestMethod.GET)
@RequestMapping(value = "/subject/getInfoByDocId", method = RequestMethod.GET)
@ApiOperation(value = "查询单条数据")
@ApiImplicitParams({
@ApiImplicitParam(paramType="query", name = "subjectId", value = "专题ID", required = true, dataType = "String"),
@ApiImplicitParam(paramType="query", name = "docId", value = "主贴唯一ID", required = true, dataType = "String"),
@ApiImplicitParam(paramType="query", name = "siteId", value = "站点ID", required = true, dataType = "String"),})
public JSONObject getInfo(String subjectId,String docId,String siteId) {
@ApiImplicitParam(paramType = "query", name = "subjectId", value = "专题ID", required = true, dataType = "String"),
@ApiImplicitParam(paramType = "query", name = "docId", value = "主贴唯一ID", required = true, dataType = "String"),
@ApiImplicitParam(paramType = "query", name = "siteId", value = "站点ID", required = true, dataType = "String"),})
public JSONObject getInfo(String subjectId, String docId, String siteId) {
QueryRequest queryRequest = new QueryRequest();
queryRequest.setSubjectId(subjectId);
queryRequest.setDocId(docId);
@ -134,7 +135,7 @@ public class SearchDataController {
*/
@ResponseBody
@ApiOperation(value = "查询评论列表")
@RequestMapping(value = "/getCommentsByDocId", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8)
@RequestMapping(value = "/getCommentsByDocId", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8)
public JSONObject getCommentsByDocId(@RequestBody QueryRequest queryRequest) {
logger.info("[getCommentsByDocId] partial / Params: {}", JSONObject.toJSONString(queryRequest));
try {
@ -152,12 +153,13 @@ public class SearchDataController {
* https://caiji.percent.cn/api/sq/crawl/getCommentsByDocId
* https://caiji.percent.cn/api/sq/crawl/getQuotesByDocId
* https://caiji.percent.cn/api/sq/crawl/getAttitudesByDocId
*
* @param queryRequest
* @return
*/
@ResponseBody
@ApiOperation(value = "查询转发列表")
@RequestMapping(value = "/getQuotesByDocId", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8)
@RequestMapping(value = "/getQuotesByDocId", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8)
public JSONObject getQuotesByDocId(@RequestBody QueryRequest queryRequest) {
logger.info("[getQuotesByDocId] partial / Params: {}", JSONObject.toJSONString(queryRequest));
try {
@ -171,9 +173,10 @@ public class SearchDataController {
}
}
@ResponseBody
@ApiOperation(value = "查询点赞列表")
@RequestMapping(value = "/getAttitudesByDocId", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8)
@RequestMapping(value = "/getAttitudesByDocId", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8)
public JSONObject getAttitudesByDocId(@RequestBody QueryRequest queryRequest) {
logger.info("[getAttitudesByDocId] partial / Params: {}", JSONObject.toJSONString(queryRequest));
try {
@ -191,19 +194,20 @@ public class SearchDataController {
/**
* 查询 数据的Counts 用户左侧的显示
*
* @param queryRequest
* @return
*/
@ApiOperation(value = "查询数据列表")
@RequestMapping(value = "/subject/queryCounts", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8)
@RequestMapping(value = "/subject/queryCounts", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8)
@ResponseBody
public JSONObject queryDataCounts(@RequestBody QueryRequest queryRequest) {
logger.info("[queryDataCounts] partial / Params: {}", JSONObject.toJSONString(queryRequest));
try {
JSONObject result = new JSONObject();
if(null != queryRequest.getSubjectId()) {
if (null != queryRequest.getSubjectId()) {
result = searchDataService.queryDataCountsInOneIndex(queryRequest);
}else{
} else {
return ResponseWrapper.buildResponse(RTCodeEnum.C_SERVICE_NOT_AVAILABLE, "Query failed");
}
return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result);
@ -216,12 +220,13 @@ public class SearchDataController {
/**
* 崔老师版本使修改标签调用的接口其他版本不调用该接口
* 崔老师版本使修改标签调用的接口其他版本不调用该接口
*
* @param queryRequest
* @return
*/
@ApiOperation(value = "修改标签")
@RequestMapping(value = "/update/updateByDocId", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8)
@RequestMapping(value = "/update/updateByDocId", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8)
@ResponseBody
public JSONObject updateLabel(@RequestBody QueryRequest queryRequest) {
logger.info("[updateLabel] partial / Params: {}", JSONObject.toJSONString(queryRequest));
@ -235,18 +240,16 @@ public class SearchDataController {
}
/**
* 删除专题的接口
*/
@ApiOperation(value = "删除专题")
@RequestMapping(value = "/delete/deleteBySubjectId", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8)
@RequestMapping(value = "/delete/deleteBySubjectId", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8)
@ResponseBody
public JSONObject deleteSubject(@RequestBody QueryRequest queryRequest){
public JSONObject deleteSubject(@RequestBody QueryRequest queryRequest) {
logger.info("[deleteSubject] partial / Params: {}", JSONObject.toJSONString(queryRequest));
try {
JSONObject result = searchDataService.deleteBySubjectId(queryRequest);
JSONObject result = searchDataService.deleteBySubjectId(queryRequest);
return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result);
} catch (Exception e) {
logger.error("[deleteSubject] Failed,The error message is :{}", e);
@ -255,12 +258,12 @@ public class SearchDataController {
}
@ApiOperation(value = "根据 cid 删除指定专题下的数据")
@RequestMapping(value = "/delete/deleteByCid", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8)
@RequestMapping(value = "/delete/deleteByCid", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8)
@ResponseBody
public JSONObject deleteSubjectByCid(@RequestBody QueryRequest queryRequest){
public JSONObject deleteSubjectByCid(@RequestBody QueryRequest queryRequest) {
logger.info("[deleteSubject] partial / Params: {}", JSONObject.toJSONString(queryRequest));
try {
JSONObject result = searchDataService.deleteBySubjectIdByCid(queryRequest);
JSONObject result = searchDataService.deleteBySubjectIdByCid(queryRequest);
return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result);
} catch (Exception e) {
logger.error("[deleteSubject] Failed,The error message is :{}", e);
@ -269,12 +272,12 @@ public class SearchDataController {
}
@ApiOperation(value = "根据 crawlDataFlag 删除指定专题下的问题")
@RequestMapping(value = "/delete/deleteByCrawlDataFlag", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8)
@RequestMapping(value = "/delete/deleteByCrawlDataFlag", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8)
@ResponseBody
public JSONObject deleteSubjectByCrawlDataFlag(@RequestBody QueryRequest queryRequest){
public JSONObject deleteSubjectByCrawlDataFlag(@RequestBody QueryRequest queryRequest) {
logger.info("[deleteSubject] partial / Params: {}", JSONObject.toJSONString(queryRequest));
try {
JSONObject result = searchDataService.deleteBySubjectIdByCrawlDataFlag(queryRequest);
JSONObject result = searchDataService.deleteBySubjectIdByCrawlDataFlag(queryRequest);
return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result);
} catch (Exception e) {
logger.error("[deleteSubject] Failed,The error message is :{}", e);
@ -285,11 +288,12 @@ public class SearchDataController {
/**
* 获取json 结构数据
*
* @param queryRequest
* @return
*/
@ApiOperation(value = "查询数据列表")
@RequestMapping(value = "/subject/getJson", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8)
@RequestMapping(value = "/subject/getJson", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8)
@ResponseBody
public JSONObject queryDataJsonList(@RequestBody QueryRequest queryRequest) {
logger.info("[queryDataList] partial / Params: {}", JSONObject.toJSONString(queryRequest));
@ -298,25 +302,25 @@ public class SearchDataController {
long start = System.currentTimeMillis();
String scorllId = queryRequest.getScrollId();
// 专题数据导出
result = searchDataService.exportJsonDataInSubject(queryRequest);
// 专题数据导出
result = searchDataService.exportJsonDataInSubject(queryRequest);
Integer allDocNumber = result.getIntValue(ESConstant.ALLDOCNUMBER);
Integer limit = queryRequest.getLimit();
Integer page = 1;
if(allDocNumber%limit==0){
page = allDocNumber/limit;
}else{
page = allDocNumber/limit +1;
if (allDocNumber % limit == 0) {
page = allDocNumber / limit;
} else {
page = allDocNumber / limit + 1;
}
if(null != queryRequest.getPage() && !queryRequest.getPage().equals("")) {
if (null != queryRequest.getPage() && !queryRequest.getPage().equals("")) {
if (page > 0 && queryRequest.getPage() > page) {
return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result);
// return ResponseWrapper.buildResponse(RTCodeEnum.C_SUBJECT_GRAMMAR_ERROR, "总数和分页不匹配");
// return ResponseWrapper.buildResponse(RTCodeEnum.C_SUBJECT_GRAMMAR_ERROR, "总数和分页不匹配");
}
}
long end = System.currentTimeMillis();
logger.info("接口查询时长:statr:"+ start +" ; end:"+end + " ; time = " + (end - start) + " ; count = "+result.get(ESConstant.ALLDOCNUMBER));
logger.info("接口查询时长:statr:" + start + " ; end:" + end + " ; time = " + (end - start) + " ; count = " + result.get(ESConstant.ALLDOCNUMBER));
return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result);
} catch (Exception e) {
logger.error("[queryData] Failed,The error message is :{}", e);
@ -329,18 +333,21 @@ public class SearchDataController {
* 示例文件夹的专题复制
*/
@ApiOperation(value = "复制专题")
@RequestMapping(value = "/reindex/reindexSubject", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8)
@RequestMapping(value = "/reindex/reindexSubject", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8)
@ResponseBody
public JSONObject reIndex(@RequestBody QueryRequest queryRequest){
public JSONObject reIndex(@RequestBody QueryRequest queryRequest) {
logger.info("[reIndex] partial / Params: {}", JSONObject.toJSONString(queryRequest));
try {
searchDataService.reIndexSubject(queryRequest);
//searchDataService.reIndexSubject(queryRequest);
searchDataService.reIndexFolder(queryRequest);
} catch (Exception e) {
logger.error("[reIndex] Failed,The error message is :{}", e);
return ResponseWrapper.buildResponse(RTCodeEnum.C_SERVICE_NOT_AVAILABLE, "Query failed");
}
return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, new JSONObject());
}
/**
* 2023-04-14 采集平台2.0新增接口
* 移动任务的数据并将原索引中的数据删除
@ -348,11 +355,12 @@ public class SearchDataController {
* crawl/subject/moveByTasks
* 参数
* {"originalIndex":"302088","moveTasks":["13889"],"currentIndex":"309980"}
*
* @param queryRequest
* @return
*/
@ApiOperation(value = "查询数据列表")
@RequestMapping(value = "/subject/moveByTasks", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8)
@RequestMapping(value = "/subject/moveByTasks", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8)
@ResponseBody
public JSONObject reindexDataByTasks(@RequestBody QueryRequest queryRequest) {
logger.info("[queryDataList] partial / Params: {}", JSONObject.toJSONString(queryRequest));
@ -362,7 +370,7 @@ public class SearchDataController {
// 任务数据移动这个需要离线移动
result = searchDataService.reindexByTasks(queryRequest);
long end = System.currentTimeMillis();
logger.info("接口查询时长:statr:"+ start +" ; end:"+end + " ; time = " + (end - start));
logger.info("接口查询时长:statr:" + start + " ; end:" + end + " ; time = " + (end - start));
return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result);
} catch (Exception e) {
logger.error("[queryData] Failed,The error message is :{}", e);
@ -378,16 +386,17 @@ public class SearchDataController {
* crawl/subject/deleteByTasks
* 参数
* {"index":"302088","delTasks":["13889"]}
*
* @param queryRequest
* @return
*/
@ApiOperation(value = "查询数据列表")
@RequestMapping(value = "/subject/deleteByTasks", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8)
@RequestMapping(value = "/subject/deleteByTasks", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8)
@ResponseBody
public JSONObject delDataByTasks(@RequestBody QueryRequest queryRequest) {
logger.info("[delDataByTasks] partial / Params: {}", JSONObject.toJSONString(queryRequest));
try {
JSONObject result = searchDataService.deleteByTasks(queryRequest);
JSONObject result = searchDataService.deleteByTasks(queryRequest);
return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result);
} catch (Exception e) {
logger.error("[deleteSubject] Failed,The error message is :{}", e);

161
cl_search_api/src/main/java/com/bfd/mf/service/SearchDataService.java

@ -8,6 +8,7 @@ import com.bfd.mf.common.service.es.EsQueryServiceForSQNormal;
import com.bfd.mf.common.service.es.SubjectQueryDataService;
import com.bfd.mf.common.util.ESServerUtils;
import com.bfd.mf.common.util.constants.ESConstant;
import com.bfd.mf.common.util.es.EsUtils;
import com.bfd.mf.common.web.entity.mysql.SentimentModify;
import com.bfd.mf.common.web.entity.mysql.cache.Cluster;
import com.bfd.mf.common.web.repository.mysql.SentimentRepository;
@ -22,7 +23,10 @@ import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.collapse.CollapseBuilder;
import org.elasticsearch.search.sort.SortOrder;
import org.jsoup.Jsoup;
import org.jsoup.safety.Whitelist;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
@ -209,7 +213,7 @@ public class SearchDataService extends CrudService<SentimentModify, SentimentRep
* 导出时会用到这个字段解析解析组装返回结果 2
*/
private ESMonitorEntity parseMainMessage(JSONObject jsonObject, Integer searchType,
Map<String, Map<String, Object>> siteMap) throws Exception {
Map<String, Map<String, Object>> siteMap) {
ESMonitorEntity esMonitorEntity = new ESMonitorEntity();
try {
Map<String, Object> sourceAsMap = jsonObject;
@ -351,15 +355,31 @@ public class SearchDataService extends CrudService<SentimentModify, SentimentRep
List filePathSize = new ArrayList();
if (sourceAsMap.containsKey(ESConstant.FILEPATHSIZE)) {
if (!("").equals(sourceAsMap.get(ESConstant.FILEPATHSIZE)) && null != sourceAsMap.get(ESConstant.FILEPATHSIZE)) {
// filePathSize = JSONObject.parseArray(sourceAsMap.get(ESConstant.FILEPATHSIZE).toString());
filePathSize = (List) sourceAsMap.get(ESConstant.FILEPATHSIZE);
if (sourceAsMap.get(ESConstant.FILEPATHSIZE).toString().contains("url=")) {
filePathSize = (List) sourceAsMap.get(ESConstant.FILEPATHSIZE);
} else {
filePathSize = JSONObject.parseArray(sourceAsMap.get(ESConstant.FILEPATHSIZE).toString());
}
}
}
List imagePathSize = new ArrayList();
if (sourceAsMap.containsKey(ESConstant.IMAGEPATHSIZE)) {
if (null != sourceAsMap.get(ESConstant.IMAGEPATHSIZE) && !("[]").equals(sourceAsMap.get(ESConstant.IMAGEPATHSIZE))) {
//if(sourceAsMap.get(ESConstant.IMAGEPATHSIZE))
/**
* [{"size":"","videoTime":"","resolution":"","url":"/group13/default/20220928/17/23/6/86b2566a903bbdbfa8e9313e105a2beb_4.png"}, {"size":"","videoTime":"","resolution":"","url":"/group13/default/20220928/17/23/6/86b2566a903bbdbfa8e9313e105a2beb_7.png"}]
* [{"size":"3541.040039KB","videoTime":"70.980000s","resolution":"","url":"http://crawl-files.pontoaplus.com/group13/default/20221010/11/50/6/7b5a86115c242223816d2b9e43acd0b1.mp4"}]
*/
// imagePathSize = JSONObject.parseArray(sourceAsMap.get(ESConstant.IMAGEPATHSIZE).toString());
imagePathSize = (List) sourceAsMap.get(ESConstant.IMAGEPATHSIZE);
/**
* [{size=107.41KB, videoTime=, resolution=-1x-1, url=/group16/default/20230308/16/07/6/53e6d72b9fe838529936572730d12441.jpg}]
*/
if (sourceAsMap.get(ESConstant.IMAGEPATHSIZE).toString().contains("url=")) {
imagePathSize = (List) sourceAsMap.get(ESConstant.IMAGEPATHSIZE);
} else {
imagePathSize = JSONObject.parseArray(sourceAsMap.get(ESConstant.IMAGEPATHSIZE).toString());
}
}
}
List videoPathSize = new ArrayList();
@ -368,13 +388,12 @@ public class SearchDataService extends CrudService<SentimentModify, SentimentRep
&& !("[]").equals(sourceAsMap.get(ESConstant.VIDEOPATHSIZE))
&& !("{\"\":null}").equals(sourceAsMap.get(ESConstant.VIDEOPATHSIZE).toString())) {
if (sourceAsMap.get(ESConstant.VIDEOPATHSIZE).toString().contains(ESConstant.URL)) {
if (sourceAsMap.get(ESConstant.VIDEOPATHSIZE) instanceof String) {
videoPathSize = JSONObject.parseArray(sourceAsMap.get(ESConstant.VIDEOPATHSIZE).toString());
} else {
if (sourceAsMap.get(ESConstant.VIDEOPATHSIZE).toString().contains("url=")) {
videoPathSize = (List) sourceAsMap.get(ESConstant.VIDEOPATHSIZE);
} else {
videoPathSize = JSONObject.parseArray(sourceAsMap.get(ESConstant.VIDEOPATHSIZE).toString());
}
//java.lang.String cannot be cast to java.util.List
//
}
}
}
@ -404,31 +423,40 @@ public class SearchDataService extends CrudService<SentimentModify, SentimentRep
//List<String>
// 视频分析结果
String asrText = "";
// String ocrText = "";
List<String> ocrText = new ArrayList<>();
if (sourceAsMap.containsKey(ESConstant.ASRTEXT)) {
asrText = sourceAsMap.get(ESConstant.ASRTEXT).toString();
}
if (sourceAsMap.containsKey(ESConstant.OCRTEXT)) {
ocrText = (List<String>) sourceAsMap.get(ESConstant.OCRTEXT);
if (sourceAsMap.get(ESConstant.OCRTEXT).toString().contains("[]")) {
} else {
ocrText.add(sourceAsMap.get(ESConstant.OCRTEXT).toString());
}
}
// 如果是用户数据需要获取下面四个字段值
String fansCount = "";
String friendsCount = "";
String postCount = "";
String location = "";
if (searchType == 2) {
if (sourceAsMap.containsKey(ESConstant.FANS_COUNT)) {
fansCount = sourceAsMap.get(ESConstant.FANS_COUNT).toString();
}
if (sourceAsMap.containsKey(ESConstant.FRIENDS_COUNT)) {
friendsCount = sourceAsMap.get(ESConstant.FRIENDS_COUNT).toString();
}
if (sourceAsMap.containsKey(ESConstant.POST_COUNT)) {
postCount = sourceAsMap.get(ESConstant.POST_COUNT).toString();
}
if (sourceAsMap.containsKey(ESConstant.WEIBO_LOCATION)) {
location = sourceAsMap.get(ESConstant.WEIBO_LOCATION).toString();
}
// if (searchType == 2) {
// if (sourceAsMap.containsKey(ESConstant.FANS_COUNT)) {
// fansCount = sourceAsMap.get(ESConstant.FANS_COUNT).toString();
// }
// if (sourceAsMap.containsKey(ESConstant.FRIENDS_COUNT)) {
// friendsCount = sourceAsMap.get(ESConstant.FRIENDS_COUNT).toString();
// }
// if (sourceAsMap.containsKey(ESConstant.POST_COUNT)) {
// postCount = sourceAsMap.get(ESConstant.POST_COUNT).toString();
// }
// if (sourceAsMap.containsKey(ESConstant.WEIBO_LOCATION)) {
// location = sourceAsMap.get(ESConstant.WEIBO_LOCATION).toString();
// }
// }
if (sourceAsMap.containsKey("location")) {
location = sourceAsMap.get("location").toString();
}
// 这个项目新增的三个字段
@ -546,8 +574,19 @@ public class SearchDataService extends CrudService<SentimentModify, SentimentRep
System.out.println(JSONObject.toJSONString(highlight));
}
// System.out.println("--------------" + url);
try {
esMonitorEntity.setForwardContent(sourceAsMap.get("forwardContent").toString());
String forwardContent = sourceAsMap.get("forwardContent").toString();
String forContent = forwardContent;
// if(forwardContent.contains("</title>")){
// Document doc = Jsoup.parse(forwardContent);
//String text = Jsoup.clean(forwardContent, Whitelist.basicWithImages());
String text = Jsoup.clean(forwardContent, Whitelist.basic());
forContent = text;
// }
esMonitorEntity.setForwardContent(forContent);
esMonitorEntity.setReadCount(readCount);
esMonitorEntity.setHasFile(hasFile);
esMonitorEntity.setHasVideo(hasVideo);
@ -572,6 +611,7 @@ public class SearchDataService extends CrudService<SentimentModify, SentimentRep
esMonitorEntity.setHlKeyWords(hlKeywords);
// 评论数转发数点赞数收藏数
esMonitorEntity.setCommentsCount(Integer.valueOf(sourceAsMap.getOrDefault(ESConstant.COMMENTS_COUNT, 0).toString()));
if (quoteCount.equals("-1")) {
quoteCount = "-";
}
@ -694,6 +734,7 @@ public class SearchDataService extends CrudService<SentimentModify, SentimentRep
/**
* 2023-04-26
* 根据主贴ID查 评论转发点赞的数据列表
*
* @param queryRequest
* @param currentIndexList
* @return
@ -732,6 +773,18 @@ public class SearchDataService extends CrudService<SentimentModify, SentimentRep
if (TopComment.size() > 0) {
size = 1L;
comments.add(TopComment);
String commentsCount = TopComment.getString(ESConstant.COMMENTS_COUNT);
if (commentsCount.equals("-1")) {
TopComment.put(ESConstant.COMMENTS_COUNT, "-");
}
String quoteCount = TopComment.getString(ESConstant.QUOTE_COUNT);
if (quoteCount.equals("-1")) {
TopComment.put(ESConstant.QUOTE_COUNT, "-");
}
String attitudesCount = TopComment.getString(ESConstant.ATTITUDES_COUNT);
if (attitudesCount.equals("-1")) {
TopComment.put(ESConstant.ATTITUDES_COUNT, "-");
}
}
boolQueryBuilder.mustNot(QueryBuilders.termQuery(ESConstant.DATA_ID, dataId));
}
@ -756,10 +809,12 @@ public class SearchDataService extends CrudService<SentimentModify, SentimentRep
long clusterId = 4;
System.out.println(boolQueryBuilder);
CollapseBuilder collapseBuilder = new CollapseBuilder(ESConstant.DATA_ID);
/**获取信息*/
SearchRequestBuilder builder = esServerUtils
.buildSearchRequestBuilder(clusterId, currentIndexList)
.setQuery(boolQueryBuilder)
.setCollapse(collapseBuilder)
.setFrom(start)
.setSize(limit)
.setFetchSource(ESConstant.COMMENT_FIELD_DATA, null)
@ -768,16 +823,41 @@ public class SearchDataService extends CrudService<SentimentModify, SentimentRep
SearchResponse searchResponse = builder.execute().actionGet();
SearchHit[] response = searchResponse.getHits().getHits();
/**
* 字段替换
* 将所有 = -1 的值替换成 -
*/
for (int i = 0; i < response.length; i++) {
JSONObject jsonObject = new JSONObject();
Map<String, Object> result = response[i].getSourceAsMap();
jsonObject.putAll(result);
jsonObject.put(ESConstant.SITEID, siteId);
String commentsCount = jsonObject.getString(ESConstant.COMMENTS_COUNT);
if (commentsCount.equals("-1")) {
jsonObject.put(ESConstant.COMMENTS_COUNT, "-");
}
String quoteCount = jsonObject.getString(ESConstant.QUOTE_COUNT);
if (quoteCount.equals("-1")) {
jsonObject.put(ESConstant.QUOTE_COUNT, "-");
}
String attitudesCount = jsonObject.getString(ESConstant.ATTITUDES_COUNT);
if (attitudesCount.equals("-1")) {
jsonObject.put(ESConstant.ATTITUDES_COUNT, "-");
}
comments.add(jsonObject);
}
/**
* TODO
* 评论回来的总数不对
*/
json.put(ESConstant.COMMENTLISTS, comments);
size = size + searchResponse.getHits().getTotalHits();
String[] currentList = {currentIndexList.get(0)};
Long totalCount = EsUtils.queryTotalCountNew("CL_Mini_2", currentList, boolQueryBuilder, 1);
System.out.println(size);
System.out.println(totalCount);
json.put(ESConstant.ALLDOCNUMBER, size);
} catch (Exception e) {
e.printStackTrace();
@ -938,7 +1018,7 @@ public class SearchDataService extends CrudService<SentimentModify, SentimentRep
String icon = "";
String siteType = "";
Map<String, Object> siteOtherMap = siteMap.get(enSource);
if(null != siteOtherMap && siteOtherMap.size()> 0){
if (null != siteOtherMap && siteOtherMap.size() > 0) {
if (siteOtherMap.containsKey("site_id")) {
siteId = siteMap.get(enSource).get("site_id").toString();
}
@ -1043,12 +1123,17 @@ public class SearchDataService extends CrudService<SentimentModify, SentimentRep
if (jsonObject.containsKey(ESConstant.DOC_TYPE)) {
if (jsonObject.get(ESConstant.DOC_TYPE).equals(ESConstant.SOCIAL)) {
String author = jsonObject.getString(ESConstant.AUTHOR);
/**
* 2023-05-17 社交媒体类详情的原文译文展示有问题
* 应该 author = author (translateTitle) 社交媒体类的用户不翻译
*/
String enSource = jsonObject.getString(ESConstant.EN_SOURCE);
if (!enSource.equals("weixin")) {
jsonObject.put(ESConstant.TITLE, author);
jsonObject.put(ESConstant.TRANSLATETITLE, author);
// content 的值 放入到 译文Title 中是为了展示一下翻译这个后面还是删掉吧
String content = jsonObject.getString(ESConstant.CONTENT);
jsonObject.put(ESConstant.TRANSLATECONTENT, content);
// String content = jsonObject.getString(ESConstant.CONTENT);
// jsonObject.put(ESConstant.TRANSLATECONTENT, content);
}
if (jsonObject.get(ESConstant.EN_SOURCE).equals(ESConstant.SINA)) {
jsonObject.put(ESConstant.SOURCE, "微博");
@ -1370,6 +1455,28 @@ public class SearchDataService extends CrudService<SentimentModify, SentimentRep
return jsonObject;
}
@Async
public JSONObject reIndexFolder(QueryRequest queryRequest) {
JSONObject jsonObject = new JSONObject();
/**
* 2023-05-29
* 复制示例文件夹的时候任务ID也需要替换成新的所以这块应该不用reindex而是直接拉数写入的时候替换就行吧
*/
// String oldIndex = "cl_major_12962";
// String oldIndex = "cl_major_12941";
// String newIndexPre = "cl_special_1.0_";
// String newIndex = newIndexPre + queryRequest.getSubjectId();
// System.out.println(newIndex);
try {
// 将上面专题的数据复制到新的专题下
long created = esQueryServiceForSQMini.reIndexDataNew(queryRequest);
jsonObject.put("created", created);
} catch (Exception e) {
e.printStackTrace();
}
return jsonObject;
}
private static int MOVE_TASKS_STATUS = 6;
private static int FINSH_TASKS_STATUS = 3;
private static int MOVE_FLORD_STATUS = 1;

4
pom.xml

@ -5,8 +5,8 @@
<modelVersion>4.0.0</modelVersion>
<groupId>com.bfd.mf</groupId>
<artifactId>cl_stream_3.2</artifactId>
<version>3.2-SNAPSHOT</version>
<artifactId>cl_stream_3.3</artifactId>
<version>3.3-SNAPSHOT</version>
<packaging>pom</packaging>
<modules>

Loading…
Cancel
Save