From 8fd98c05d71494565ad3be81a6e69d68104b69ff Mon Sep 17 00:00:00 2001 From: "jing.du" Date: Mon, 11 Sep 2023 19:38:53 +0800 Subject: [PATCH] =?UTF-8?q?2023-09-11=20=E9=87=87=E9=9B=86=E5=B9=B3?= =?UTF-8?q?=E5=8F=B02.0=E7=89=88=E6=9C=AC=20=E5=AF=BC=E5=87=BA=E7=9A=84=20?= =?UTF-8?q?location=20=E5=AD=97=E6=AE=B5=E6=B7=BB=E5=8A=A0=E4=BA=86?= =?UTF-8?q?=E4=B8=80=E4=B8=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cl_query_data_job/pom.xml | 7 +- .../bfd/mf/job/service/es/EsQueryMiniService.java | 175 ++++-- .../job/service/statistics/StatisticsService.java | 3 + .../mf/job/service/taskCount/TaskCountService.java | 1 - .../src/main/resources/application.yml | 40 +- cl_search_api/pom.xml | 15 +- .../mf/common/service/cache/TopicQueryService.java | 21 +- .../mf/common/service/es/EsQueryAuthorService.java | 201 +++++-- .../common/service/es/EsQueryServiceForSQMini.java | 55 +- .../bfd/mf/common/service/es/GetQueryBuilder.java | 134 ++++- .../bfd/mf/common/util/constants/ESConstant.java | 5 +- .../bfd/mf/common/util/enums/BaseFieldEnum.java | 12 +- .../bfd/mf/common/util/enums/SearchScopeEnum.java | 2 + .../java/com/bfd/mf/common/util/es/EsUtils.java | 657 +++++++++++++++++++-- .../bfd/mf/common/web/vo/params/QueryRequest.java | 69 ++- .../web/vo/view/monitor/ESMonitorBaseEntity.java | 18 +- .../bfd/mf/controller/SearchDataController.java | 113 ++-- .../java/com/bfd/mf/service/SearchDataService.java | 161 ++++- pom.xml | 4 +- 19 files changed, 1388 insertions(+), 305 deletions(-) diff --git a/cl_query_data_job/pom.xml b/cl_query_data_job/pom.xml index 4f9f20f..d66c17a 100644 --- a/cl_query_data_job/pom.xml +++ b/cl_query_data_job/pom.xml @@ -72,15 +72,10 @@ 19.0 - - - - - com.alibaba fastjson - 1.2.60 + 1.2.68 diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/service/es/EsQueryMiniService.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/service/es/EsQueryMiniService.java index 6ce5df0..aeafb3a 100644 --- a/cl_query_data_job/src/main/java/com/bfd/mf/job/service/es/EsQueryMiniService.java +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/service/es/EsQueryMiniService.java @@ -3,7 +3,10 @@ package com.bfd.mf.job.service.es; import com.bfd.mf.job.config.ESConstants; import com.bfd.mf.job.domain.entity.Task; import com.bfd.mf.job.util.EsUtils; -import org.elasticsearch.index.query.*; +import org.elasticsearch.index.query.BoolQueryBuilder; +import org.elasticsearch.index.query.QueryBuilders; +import org.elasticsearch.index.query.RangeQueryBuilder; +import org.elasticsearch.index.query.TermQueryBuilder; import org.elasticsearch.search.aggregations.AggregationBuilder; import org.elasticsearch.search.aggregations.bucket.terms.Terms; import org.slf4j.Logger; @@ -22,21 +25,21 @@ public class EsQueryMiniService { /** * 统计 每个专题下,每个渠道 的总量 */ - public Map getSubjectChannelStatistics(String clusterName,String indexName) { + public Map getSubjectChannelStatistics(String clusterName, String indexName) { - Map resultMap = new HashMap<>(); - try{ + Map resultMap = new HashMap<>(); + try { boolean isExists = EsUtils.indexExists(clusterName, indexName); if (isExists) { BoolQueryBuilder qb = QueryBuilders.boolQuery(); AggregationBuilder ab = EsUtils.getSubjectChannelAB(ESConstants.DOC_TYPE); - String indexNames [] = {indexName}; + String indexNames[] = {indexName}; Terms result = EsUtils.queryTag(clusterName, indexNames, qb, ab, ESConstants.DOC_TYPE + "Tag"); resultMap = EsUtils.parseTerms(result); } - }catch (Exception e){ + } catch (Exception e) { e.printStackTrace(); } return resultMap; @@ -45,15 +48,15 @@ public class EsQueryMiniService { /** * 统计 每个专题下,每个渠道 当天的增量 */ - public Map getSubjectChannelTodayStatistics(String clusterName,String indexName) { - Map resultMap = new HashMap<>(); - try{ + public Map getSubjectChannelTodayStatistics(String clusterName, String indexName) { + Map resultMap = new HashMap<>(); + try { boolean isExists = EsUtils.indexExists(clusterName, indexName); if (isExists) { BoolQueryBuilder qb = QueryBuilders.boolQuery(); - long current=System.currentTimeMillis(); - long zero=current/(1000*3600*24)*(1000*3600*24)-TimeZone.getDefault().getRawOffset(); + long current = System.currentTimeMillis(); + long zero = current / (1000 * 3600 * 24) * (1000 * 3600 * 24) - TimeZone.getDefault().getRawOffset(); Long startTime = new Timestamp(zero).getTime(); RangeQueryBuilder rangeQueryBuilder = QueryBuilders .rangeQuery(ESConstants.CRAWLTIME) @@ -61,12 +64,12 @@ public class EsQueryMiniService { .lt(current); qb.must(rangeQueryBuilder); AggregationBuilder ab = EsUtils.getSubjectChannelAB(ESConstants.DOC_TYPE); - String indexNames [] = {indexName}; + String indexNames[] = {indexName}; Terms result = EsUtils.queryTag(clusterName, indexNames, qb, ab, ESConstants.DOC_TYPE + "Tag"); resultMap = EsUtils.parseTerms(result); } - }catch (Exception e){ + } catch (Exception e) { e.printStackTrace(); } return resultMap; @@ -75,19 +78,19 @@ public class EsQueryMiniService { /** * 统计 每个专题下,crawlDataFlag 三种类型当天的总量 */ - public Map getSubjectCrawlDataFlagStatistics(String clusterName, String indexName) { - Map resultMap = new HashMap<>(); - try{ + public Map getSubjectCrawlDataFlagStatistics(String clusterName, String indexName) { + Map resultMap = new HashMap<>(); + try { boolean isExists = EsUtils.indexExists(clusterName, indexName); if (isExists) { BoolQueryBuilder qb = QueryBuilders.boolQuery(); AggregationBuilder ab = EsUtils.getSubjectChannelAB(ESConstants.CRAWLDATAFLAG); - String indexNames [] = {indexName}; + String indexNames[] = {indexName}; Terms result = EsUtils.queryTag(clusterName, indexNames, qb, ab, ESConstants.CRAWLDATAFLAG + "Tag"); - Map termsMap = EsUtils.parseTerms(result); + Map termsMap = EsUtils.parseTerms(result); resultMap = EsUtils.getResultMap(termsMap); } - }catch (Exception e){ + } catch (Exception e) { e.printStackTrace(); } return resultMap; @@ -96,15 +99,15 @@ public class EsQueryMiniService { /** * 统计 每个专题下,crawlDataFlag 三种类型 的增量 */ - public Map getSubjectCrawlDataFlagTodayStatistics(String clusterName, String indexName) { - Map resultMap = new HashMap<>(); - try{ + public Map getSubjectCrawlDataFlagTodayStatistics(String clusterName, String indexName) { + Map resultMap = new HashMap<>(); + try { boolean isExists = EsUtils.indexExists(clusterName, indexName); if (isExists) { BoolQueryBuilder qb = QueryBuilders.boolQuery(); - long current=System.currentTimeMillis(); - long zero=current/(1000*3600*24)*(1000*3600*24)-TimeZone.getDefault().getRawOffset(); + long current = System.currentTimeMillis(); + long zero = current / (1000 * 3600 * 24) * (1000 * 3600 * 24) - TimeZone.getDefault().getRawOffset(); Long startTime = new Timestamp(zero).getTime(); RangeQueryBuilder rangeQueryBuilder = QueryBuilders .rangeQuery(ESConstants.CRAWLTIME) @@ -112,13 +115,13 @@ public class EsQueryMiniService { .lt(current); qb.must(rangeQueryBuilder); AggregationBuilder ab = EsUtils.getSubjectChannelAB(ESConstants.CRAWLDATAFLAG); - String indexNames [] = {indexName}; + String indexNames[] = {indexName}; Terms result = EsUtils.queryTag(clusterName, indexNames, qb, ab, ESConstants.CRAWLDATAFLAG + "Tag"); - Map termsMap = EsUtils.parseTerms(result); + Map termsMap = EsUtils.parseTerms(result); resultMap = EsUtils.getResultMap(termsMap); } - }catch (Exception e){ + } catch (Exception e) { e.printStackTrace(); } return resultMap; @@ -126,12 +129,12 @@ public class EsQueryMiniService { /** - * 查询每个任务 的总量和当天的量 以及 包含图片的量、包含视频的量、包含附件的量 + * 查询每个任务 的总量和当天的量 以及 包含图片的量、包含视频的量、包含附件的量 */ - public Map getTaskCount(String clusterName,Long taskId, Task task,String crawlDataFlag,String indexNamePre) { - Map countMap = new HashMap<>(); - String indexName = indexNamePre + task.getSubjectId();//subject_id - if(null != task.getCid()) { + public Map getTaskCount(String clusterName, Long taskId, Task task, String crawlDataFlag, String indexNamePre) { + Map countMap = new HashMap<>(); + String indexName = indexNamePre + task.getSubjectId();//subject_id + if (null != task.getCid()) { String cid = task.getCid().toLowerCase(); Long crawlStartTime = task.getCrawlStartTime().longValue(); Long crawlEndTime = task.getCrawlEndTime().longValue(); @@ -139,8 +142,9 @@ public class EsQueryMiniService { if (indexName.contains(indexNamePre)) { boolean isExists = EsUtils.indexExists(clusterName, indexName); if (isExists) { - BoolQueryBuilder qb = getQueryBuilder(cid,crawlDataFlag,crawlStartTime,crawlEndTime); - logger.info("QB1 查询总量: indexName: {}. taskId : {}.{\"query\": {}}.", indexName, taskId, qb.toString().replace("\n", "").replace("\r", "").replace(" ", "")); + BoolQueryBuilder qb = getQueryBuilder(cid, crawlDataFlag, crawlStartTime, crawlEndTime); + logger.info("QB1 查询总量: indexName: {}. taskId : {}.{\"query\": {}}.", indexName, taskId, qb.toString() + .replace("\n", "").replace("\r", "").replace(" ", "")); Long count = EsUtils.queryCount(clusterName, indexName, qb); countMap.put("totalCount", count); @@ -158,26 +162,26 @@ public class EsQueryMiniService { // 查询包含图片的数据的量 //videoPath == egc filePath == ugc imagePath == pgc - TermQueryBuilder pgcTermQueryBuilder = QueryBuilders.termQuery(ESConstants.PGC,1); - TermQueryBuilder egcTermQueryBuilder = QueryBuilders.termQuery(ESConstants.EGC,1); - TermQueryBuilder ugcTermQueryBuilder = QueryBuilders.termQuery(ESConstants.UGC,1); - TermQueryBuilder textTermQueryBuilder = QueryBuilders.termQuery(ESConstants.ISDOWNLOAD,false); - qb = getQueryBuilder(cid,crawlDataFlag,crawlStartTime,crawlEndTime); + TermQueryBuilder pgcTermQueryBuilder = QueryBuilders.termQuery(ESConstants.PGC, 1); + TermQueryBuilder egcTermQueryBuilder = QueryBuilders.termQuery(ESConstants.EGC, 1); + TermQueryBuilder ugcTermQueryBuilder = QueryBuilders.termQuery(ESConstants.UGC, 1); + TermQueryBuilder textTermQueryBuilder = QueryBuilders.termQuery(ESConstants.ISDOWNLOAD, false); + qb = getQueryBuilder(cid, crawlDataFlag, crawlStartTime, crawlEndTime); qb.must(pgcTermQueryBuilder); logger.info("QB3 查询有图片的任务数: indexName: {}. taskId : {}.{\"query\": {}}.", indexName, taskId, qb.toString().replace("\n", "").replace("\r", "").replace(" ", "")); Long imageCount = EsUtils.queryCount(clusterName, indexName, qb); countMap.put(ESConstants.IMAGECOUNT, imageCount); - qb = getQueryBuilder(cid,crawlDataFlag,crawlStartTime,crawlEndTime); + qb = getQueryBuilder(cid, crawlDataFlag, crawlStartTime, crawlEndTime); qb.must(egcTermQueryBuilder); logger.info("QB4 查询有视频的任务数: indexName: {}. taskId : {}.{\"query\": {}}.", indexName, taskId, qb.toString().replace("\n", "").replace("\r", "").replace(" ", "")); Long videoCount = EsUtils.queryCount(clusterName, indexName, qb); countMap.put(ESConstants.VIDEOCOUNT, videoCount); - qb = getQueryBuilder(cid,crawlDataFlag,crawlStartTime,crawlEndTime); + qb = getQueryBuilder(cid, crawlDataFlag, crawlStartTime, crawlEndTime); qb.must(ugcTermQueryBuilder); logger.info("QB5 查询有文件的任务数: indexName: {}. taskId : {}.{\"query\": {}}.", indexName, taskId, qb.toString().replace("\n", "").replace("\r", "").replace(" ", "")); Long fileCount = EsUtils.queryCount(clusterName, indexName, qb); countMap.put(ESConstants.FILECOUNT, fileCount); - qb = getQueryBuilder(cid,crawlDataFlag,crawlStartTime,crawlEndTime); + qb = getQueryBuilder(cid, crawlDataFlag, crawlStartTime, crawlEndTime); qb.must(textTermQueryBuilder); logger.info("QB6 查询纯文本的任务数: indexName: {}. taskId : {}.{\"query\": {}}.", indexName, taskId, qb.toString().replace("\n", "").replace("\r", "").replace(" ", "")); Long textCount = EsUtils.queryCount(clusterName, indexName, qb); @@ -200,10 +204,93 @@ public class EsQueryMiniService { BoolQueryBuilder shouldbq = QueryBuilders.boolQuery(); RangeQueryBuilder rangeQueryBuilder = QueryBuilders .rangeQuery(ESConstants.PUBTIME).gte(crawlStartTime).lt(crawlEndTime); - TermQueryBuilder primary2 = QueryBuilders.termQuery(ESConstants.PRIMARY,2); + TermQueryBuilder primary2 = QueryBuilders.termQuery(ESConstants.PRIMARY, 2); shouldbq.must(rangeQueryBuilder).mustNot(primary2); // 不用统计FB 的这种粉丝的量 - TermQueryBuilder pageTypeQueryBuilder = QueryBuilders.termQuery(ESConstants.PAGETYPR,"socialFans"); + TermQueryBuilder pageTypeQueryBuilder = QueryBuilders.termQuery(ESConstants.PAGETYPR, "socialFans"); + qb.mustNot(pageTypeQueryBuilder).should(shouldbq); + return qb; + } + + public Map getTaskCountNew(String clusterName, Long taskId, Task task, String indexNamePre) { + Map countMap = new HashMap<>(); + String indexName = indexNamePre + task.getSubjectId();//subject_id + String taskIdString = taskId.toString(); + if (null != task.getCid()) { + String cid = task.getCid().toLowerCase(); + Long crawlStartTime = task.getCrawlStartTime().longValue(); + Long crawlEndTime = task.getCrawlEndTime().longValue(); + // String crawlDataFlag =task.getCrawlDataFlag(); + if (indexName.contains(indexNamePre)) { + boolean isExists = EsUtils.indexExists(clusterName, indexName); + if (isExists) { + BoolQueryBuilder qb = getQueryBuilderNew(taskIdString, crawlStartTime, crawlEndTime); + logger.info("QB1 查询总量: indexName: {}. taskId : {}.{\"query\": {}}.", indexName, taskId, qb.toString() + .replace("\n", "").replace("\r", "").replace(" ", "")); + Long count = EsUtils.queryCount(clusterName, indexName, qb); + countMap.put("totalCount", count); + + // 上面的语句是查询 该任务的 总数据量:totalCount,下面的语句是查询 该任务当天的数据量:todayCount + long current = System.currentTimeMillis(); + long zero = current / (1000 * 3600 * 24) * (1000 * 3600 * 24) - TimeZone.getDefault().getRawOffset(); + Long startTime = new Timestamp(zero).getTime(); + RangeQueryBuilder rangeQueryBuilder2 = QueryBuilders + .rangeQuery(ESConstants.CRAWLTIME) + .gte(startTime).lt(current); + qb.must(rangeQueryBuilder2); + logger.info("QB2 查询今日总量: indexName: {}. taskId : {}.{\"query\": {}}.", indexName, taskId, qb.toString().replace("\n", "").replace("\r", "").replace(" ", "")); + Long todayCount = EsUtils.queryCount(clusterName, indexName, qb); + countMap.put("todayCount", todayCount); + + // 查询包含图片的数据的量 + //videoPath == egc filePath == ugc imagePath == pgc + TermQueryBuilder pgcTermQueryBuilder = QueryBuilders.termQuery(ESConstants.PGC, 1); + TermQueryBuilder egcTermQueryBuilder = QueryBuilders.termQuery(ESConstants.EGC, 1); + TermQueryBuilder ugcTermQueryBuilder = QueryBuilders.termQuery(ESConstants.UGC, 1); + TermQueryBuilder textTermQueryBuilder = QueryBuilders.termQuery(ESConstants.ISDOWNLOAD, false); + qb = getQueryBuilderNew(taskIdString, crawlStartTime, crawlEndTime); + qb.must(pgcTermQueryBuilder); + logger.info("QB3 查询有图片的任务数: indexName: {}. taskId : {}.{\"query\": {}}.", indexName, taskId, qb.toString().replace("\n", "").replace("\r", "").replace(" ", "")); + Long imageCount = EsUtils.queryCount(clusterName, indexName, qb); + countMap.put(ESConstants.IMAGECOUNT, imageCount); + qb = getQueryBuilderNew(taskIdString, crawlStartTime, crawlEndTime); + qb.must(egcTermQueryBuilder); + logger.info("QB4 查询有视频的任务数: indexName: {}. taskId : {}.{\"query\": {}}.", indexName, taskId, qb.toString().replace("\n", "").replace("\r", "").replace(" ", "")); + Long videoCount = EsUtils.queryCount(clusterName, indexName, qb); + countMap.put(ESConstants.VIDEOCOUNT, videoCount); + qb = getQueryBuilderNew(taskIdString, crawlStartTime, crawlEndTime); + qb.must(ugcTermQueryBuilder); + logger.info("QB5 查询有文件的任务数: indexName: {}. taskId : {}.{\"query\": {}}.", indexName, taskId, qb.toString().replace("\n", "").replace("\r", "").replace(" ", "")); + Long fileCount = EsUtils.queryCount(clusterName, indexName, qb); + countMap.put(ESConstants.FILECOUNT, fileCount); + qb = getQueryBuilderNew(taskIdString, crawlStartTime, crawlEndTime); + qb.must(textTermQueryBuilder); + logger.info("QB6 查询纯文本的任务数: indexName: {}. taskId : {}.{\"query\": {}}.", indexName, taskId, qb.toString().replace("\n", "").replace("\r", "").replace(" ", "")); + Long textCount = EsUtils.queryCount(clusterName, indexName, qb); + countMap.put(ESConstants.TEXTCOUNT, textCount); + logger.info("含图片的数据量:" + imageCount + " ; 含视频的数据量:" + videoCount + " ; 含文件的数据量:" + fileCount + " ; 纯文本的数据量:" + textCount); + } + } + } + return countMap; + } + + + private BoolQueryBuilder getQueryBuilderNew(String taskId, Long crawlStartTime, Long crawlEndTime) { + System.out.println("要统计的任务ID: " + taskId); + BoolQueryBuilder qb = QueryBuilders.boolQuery(); + // 任务ID 筛选 + //TermQueryBuilder cidTermQueryBuilder = QueryBuilders.termQuery(ESConstants.EN_SOURCE + ".keyword", cid); + TermQueryBuilder taskIdTermQueryBuilder = QueryBuilders.termQuery(ESConstants.TASKID, taskId); + qb.must(taskIdTermQueryBuilder); + // 时间范围筛选 只有主贴评论需要查时间,用户不需要设置时间范围 + BoolQueryBuilder shouldbq = QueryBuilders.boolQuery(); + RangeQueryBuilder rangeQueryBuilder = QueryBuilders + .rangeQuery(ESConstants.PUBTIME).gte(crawlStartTime).lt(crawlEndTime); + TermQueryBuilder primary2 = QueryBuilders.termQuery(ESConstants.PRIMARY, 2); + shouldbq.must(rangeQueryBuilder).mustNot(primary2); + // 不用统计FB 的这种粉丝的量 + TermQueryBuilder pageTypeQueryBuilder = QueryBuilders.termQuery(ESConstants.PAGETYPR, "socialFans"); qb.mustNot(pageTypeQueryBuilder).should(shouldbq); return qb; } diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/service/statistics/StatisticsService.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/service/statistics/StatisticsService.java index 25da273..814cb02 100644 --- a/cl_query_data_job/src/main/java/com/bfd/mf/job/service/statistics/StatisticsService.java +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/service/statistics/StatisticsService.java @@ -251,6 +251,8 @@ public class StatisticsService { if(null != task.getCid() && !task.getCid().equals("test")) { // 获取任务数量 countMap = esQueryMiniService.getTaskCount(miniName, taskId, task, crawlDataFlag, indexNamePre); + countMap = esQueryMiniService.getTaskCountNew(miniName, taskId, task, indexNamePre); + // 直接更新 cl_task 表中的 data_total 和 today_data_total long totalCount = 0L; long todayCount = 0L; @@ -267,6 +269,7 @@ public class StatisticsService { fileCount = countMap.get(ESConstants.FILECOUNT); textCount = countMap.get(ESConstants.TEXTCOUNT); } + // taskRepository.updateTaskCount(taskId,totalCount,todayCount); taskRepository.updateTaskCountAll(taskId,totalCount,todayCount,imageCount,videoCount,fileCount,textCount); } diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/service/taskCount/TaskCountService.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/service/taskCount/TaskCountService.java index a59a456..9636c36 100644 --- a/cl_query_data_job/src/main/java/com/bfd/mf/job/service/taskCount/TaskCountService.java +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/service/taskCount/TaskCountService.java @@ -12,7 +12,6 @@ import com.bfd.mf.job.service.es.EsQueryNormalService; import com.bfd.mf.job.service.statistics.TotalCountService; import com.bfd.mf.job.util.DateUtil; import com.bfd.mf.job.util.EsUtils; -import kafka.utils.Json; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; diff --git a/cl_query_data_job/src/main/resources/application.yml b/cl_query_data_job/src/main/resources/application.yml index 8bf8063..d99d397 100644 --- a/cl_query_data_job/src/main/resources/application.yml +++ b/cl_query_data_job/src/main/resources/application.yml @@ -3,22 +3,14 @@ debug: false logging: level: com.bfd.mf: debug -#spring: -# datasource: -# driver-class-name: com.mysql.jdbc.Driver -# username: root -# password: bfd123 -# url: jdbc:mysql://172.26.11.113:3306/intelligent_crawl?useOldAliasMetadataBehavior=true&characterEncoding=UTF-8&zeroDateTimeBehavior=round -# hikari: -# maximum-pool-size: 10 -# minimum-idle: 1 + spring: datasource: - driver-class-name: com.mysql.jdbc.Driver - username: crawl - password: D5HLOvk553DUNV62qJI= - url: jdbc:mysql://172.18.1.134:3306/all_task?useOldAliasMetadataBehavior=true&characterEncoding=UTF-8&zeroDateTimeBehavior=round - hikari: + driver-class-name: com.mysql.cj.jdbc.Driver + username: crawl666 + password: lx2a4jN1xFT96kj20LU= + url: jdbc:mysql://172.18.1.134:3306/intelligent_crawl?useSSL=true&useUnicode=true&characterEncoding=UTF-8&serverTimezone=UTC + hikari: maximum-pool-size: 10 minimum-idle: 1 @@ -29,17 +21,17 @@ worker: test-task-id: 180 ## 数据默认要写的 kafka broker-list: 172.18.1.113:9092 - send-topic : databasestokafka + send-topic: databasestokafka analysis-topic: - - sq_topic_cl_query_analysis_1 + - sq_topic_cl_query_analysis_1 analysis-group: sq_group_cl_analysis_1 ## 服务的状态,true 为启动 enable-analysis-producer: false # 查ES写kafka enable-analysis-consumer: false # 读kafka写ES - enable-statistics-producer: false # 统计 taskCount 和 subjectCount (采集平台) + enable-statistics-producer: true # 统计 taskCount 和 subjectCount (采集平台) enable-query-producer: false # 离线拉数(采集平台) - enable-high-frequency-producer: true # 高频离线拉数(采集平台) + enable-high-frequency-producer: false # 高频离线拉数(采集平台) enable-backtrace-producer: false # 欧莱雅查数(采集平台,欧莱雅项目独用) enable-rw-oly-producer: false # 欧莱雅数据导出,暂时不用 enable-up-load-producer: false # 上传(采集平台) @@ -63,16 +55,16 @@ worker: query-data-year-starttime: 1546272000000 rule-rest: http://rule.sq.baifendian.com/data_match/content/ - comment-rest: http://rule.sq.baifendian.com/reputation/addReputationTask + comment-rest: http://rule.sq.baifendian.com/reputation/addReputationTask rule-rest-concurrency: 500 content-limit: 2000 failure-upper: 2000 - goFastPostUrl : http://172.18.1.113:8080/upload - goFastDomain : http://172.18.1.113:8080 - uploadOLYExcelPath : /opt/nfsdata/excelTask/ - uploadZipPath : /opt/nfsdata/uploadFiles/ - indexNamePre : cl_major_ + goFastPostUrl: http://172.18.1.113:8080/upload + goFastDomain: http://172.18.1.113:8080 + uploadOLYExcelPath: /opt/nfsdata/excelTask/ + uploadZipPath: /opt/nfsdata/uploadFiles/ + indexNamePre: cl_major_ es-normal: name: SQ_Normal_new diff --git a/cl_search_api/pom.xml b/cl_search_api/pom.xml index 5b0e959..8d8893a 100644 --- a/cl_search_api/pom.xml +++ b/cl_search_api/pom.xml @@ -5,15 +5,15 @@ 4.0.0 - cl_stream_3.2 + cl_stream_3.3 com.bfd.mf - 3.2-SNAPSHOT + 3.3-SNAPSHOT cl_search_api - Search V3.2 API + Search V3.3 API cl_search_api - 3.2.7-SNAPSHOT + 3.3.0-SNAPSHOT com.bfd.mf.SearchApplication @@ -260,6 +260,13 @@ 2.6 compile + + + + org.jsoup + jsoup + 1.10.2 + diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/service/cache/TopicQueryService.java b/cl_search_api/src/main/java/com/bfd/mf/common/service/cache/TopicQueryService.java index 13546b5..1836d5a 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/common/service/cache/TopicQueryService.java +++ b/cl_search_api/src/main/java/com/bfd/mf/common/service/cache/TopicQueryService.java @@ -4,16 +4,15 @@ package com.bfd.mf.common.service.cache; import com.bfd.mf.common.service.es.EsCommonService; import com.bfd.mf.common.service.es.ParseSearchScopeService; import com.bfd.mf.common.util.constants.ESConstant; -import com.bfd.mf.common.web.entity.mysql.topic.Task; import com.bfd.mf.common.web.repository.mysql.base.SiteRepository; import com.bfd.mf.common.web.repository.mysql.topic.TaskRepository; import com.bfd.mf.common.web.vo.params.QueryRequest; import com.bfd.nlp.common.util.object.TObjectUtils; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.elasticsearch.index.query.BoolQueryBuilder; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryBuilders; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; @@ -25,7 +24,7 @@ import java.util.stream.Collectors; @Service public class TopicQueryService { - private static Logger logger = LoggerFactory.getLogger(TopicQueryService.class); + private static Logger logger = LoggerFactory.getLogger(TopicQueryService.class); @Autowired private EsCommonService esCommonService; @Autowired @@ -70,7 +69,9 @@ public class TopicQueryService { } else { List areaList = siteRepository.findCidsByArea(queryRequest.getSearchArea()); List lowCaseAreaList = areaList.stream().map(String::toLowerCase).collect(Collectors.toList()); - // boolQuery.must(QueryBuilders.termsQuery(ESConstant.EN_SOURCE, lowCaseAreaList)); + if (lowCaseAreaList.size() > 0) { + boolQuery.must(QueryBuilders.termsQuery(ESConstant.EN_SOURCE, lowCaseAreaList)); + } // String searchArea = getSearchArea(queryRequest.getSearchArea()); // boolQuery.must(QueryBuilders.termQuery(ESConstant.AREA, searchArea)); } @@ -101,16 +102,16 @@ public class TopicQueryService { * 2023-04-24 * 采集平台2.0 版本,可以选中多个任务进行查询 */ - if(null == queryRequest.getTaskIds()){ + if (null == queryRequest.getTaskIds()) { logger.info("[TopicQueryService] queryByConditions_v1 没有任务ID,查询专题下全部任务"); - }else { + } else { List taskIds = queryRequest.getTaskIds(); - boolQuery.must(QueryBuilders.termsQuery(ESConstant.TASK_ID, taskIds)); + if (taskIds.size() > 0) { + boolQuery.must(QueryBuilders.termsQuery(ESConstant.TASK_ID, taskIds)); + } } - - if (null == cid || ("").equals(cid) || ("test").equals(cid)) { logger.info("[TopicQueryService] queryByConditions_v1 查询全部站点"); } else { diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/service/es/EsQueryAuthorService.java b/cl_search_api/src/main/java/com/bfd/mf/common/service/es/EsQueryAuthorService.java index 1ea0e7a..10130c0 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/common/service/es/EsQueryAuthorService.java +++ b/cl_search_api/src/main/java/com/bfd/mf/common/service/es/EsQueryAuthorService.java @@ -8,9 +8,7 @@ import com.bfd.mf.common.web.repository.mysql.base.SiteRepository; import com.bfd.mf.common.web.vo.params.QueryRequest; import com.bfd.mf.config.BFDApiConfig; import com.bfd.mf.service.SearchAuthorService; -import com.bfd.nlp.common.util.string.TStringUtils; import org.elasticsearch.index.query.BoolQueryBuilder; -import org.elasticsearch.index.query.MatchPhraseQueryBuilder; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryBuilders; import org.slf4j.Logger; @@ -35,58 +33,59 @@ public class EsQueryAuthorService { @Autowired private SiteRepository siteRepository; - private String clusterName =""; + private String clusterName = ""; + @PostConstruct public void init() { // 注册数据查询来源 clusterName = bfdApiConfig.esMiniName(); - String sourceAddress [] = bfdApiConfig.esMiniAddress(); + String sourceAddress[] = bfdApiConfig.esMiniAddress(); EsUtils.registerCluster(clusterName, sourceAddress);// 配置文件中的 es-source } public List queryAuthorListByKeyword(String[] indexName, QueryRequest queryRequest) { - try{ + try { BoolQueryBuilder boolQueryBuilder = null; logger.debug("[EsQueryAuthorService] queryAuthorListByKeyword ..."); Integer limit = queryRequest.getLimit(); //每页的数量 Integer start = (queryRequest.getPage() - 1) * limit; //起始页(0,20,40....) String orderFlag = "desc"; - if(!queryRequest.getOrder().equals("")) { + if (!queryRequest.getOrder().equals("")) { queryRequest.getOrder(); // 排序方式 asc/desc } String sortFlag = "pubTime"; - if(!queryRequest.getSidx().equals("")) { + if (!queryRequest.getSidx().equals("")) { queryRequest.getSidx(); // 排序字段 } - boolQueryBuilder = getQueryBuilder(queryRequest); + boolQueryBuilder = getQueryBuilderNew(queryRequest); Integer searchType = queryRequest.getSearchType(); logger.info("[EsQueryAuthorService] queryAuthorListByKeyword indexName = " + indexName[0] + "; qb: \n {}.", boolQueryBuilder.toString()); - List result = EsUtils.query(clusterName, indexName, boolQueryBuilder, sortFlag, orderFlag, limit, start,searchType); - List> site = siteRepository.findsiteByDel(0); - Map siteIdsMap = new HashMap<>(); - Map siteIconMap = new HashMap<>(); - for (Map map: site) { - if(map.containsKey("site_id")) { + List result = EsUtils.query(clusterName, indexName, boolQueryBuilder, sortFlag, orderFlag, limit, start, searchType); + List> site = siteRepository.findsiteByDel(0); + Map siteIdsMap = new HashMap<>(); + Map siteIconMap = new HashMap<>(); + for (Map map : site) { + if (map.containsKey("site_id")) { siteIdsMap.put(map.get("cid").toString().toLowerCase(), map.get("site_id").toString()); } - if(map.containsKey("site_icon")) { + if (map.containsKey("site_icon")) { siteIconMap.put(map.get("cid").toString().toLowerCase(), map.get("site_icon").toString()); } } List newResult = new ArrayList<>(); - for (JSONObject json: result) { - JSONObject newJson= json; + for (JSONObject json : result) { + JSONObject newJson = json; String enSource = json.getString("enSource"); String siteId = siteIdsMap.get(enSource); String icon = siteIdsMap.get(enSource); - newJson.put("siteId",siteId); - newJson.put("icon",icon); + newJson.put("siteId", siteId); + newJson.put("icon", icon); newResult.add(newJson); } return result; - }catch (Exception e){ + } catch (Exception e) { e.printStackTrace(); return new ArrayList<>(); } @@ -96,41 +95,123 @@ public class EsQueryAuthorService { /** * 查询语句组装 */ - private BoolQueryBuilder getQueryBuilder(QueryRequest queryRequest) { - logger.info("[EsQueryAuthorService] getQueryBuilder start ..." ); - BoolQueryBuilder bqb = QueryBuilders.boolQuery(); +// private BoolQueryBuilder getQueryBuilder(QueryRequest queryRequest) { +// logger.info("[EsQueryAuthorService] getQueryBuilder start ..." ); +// BoolQueryBuilder bqb = QueryBuilders.boolQuery(); +// // 基础查询:根据查询条件组装查询语句 +// BoolQueryBuilder boolQueryBuilder = null; +// boolQueryBuilder = topicQueryService.queryByConditions_v1(queryRequest); +// // 二次查询: 关键词不为空就添加关键词查询语句 = 0 content 1 title 2 author 3 con+tit 4con+aut 5con+com +// // 单选 0:主贴;1:评论;2:用户 +// Integer searchType = queryRequest.getSearchType(); +// // String searchScope = queryRequest.getSearchScope(); //复选 0:标题;1:正文;2:作者 多个用,分割,例 “0,1” +// String keyword = queryRequest.getKeyword(); +// +// BoolQueryBuilder searchTextBuilder = topicQueryService.buildSearchTextBuilder(searchType); +// boolQueryBuilder.filter(searchTextBuilder); +// // Map fields = new HashedMap(); +// if (TStringUtils.isNotEmpty(keyword)) { +// // 主贴的话 查 标题和内容 +// if(searchType == 0){ +// MatchPhraseQueryBuilder titleQuery = QueryBuilders.matchPhraseQuery(ESConstant.TITLE, keyword).slop(0); +// MatchPhraseQueryBuilder contentQuery = QueryBuilders.matchPhraseQuery(ESConstant.CONTENT, keyword).slop(0); +// QueryBuilder queryBuilder = QueryBuilders.boolQuery().should(titleQuery).should(contentQuery); +// bqb.must(queryBuilder); +// // 评论的话 查 评论内容 +// }else if (searchType == 1){ +//// MatchPhraseQueryBuilder contentQuery = QueryBuilders.matchPhraseQuery(ESConstant.CONTENT, keyword).slop(0); +//// QueryBuilder queryBuilder = QueryBuilders.boolQuery().must(contentQuery); +//// qb.must(queryBuilder); +// boolQueryBuilder.must(QueryBuilders.matchPhraseQuery(ESConstant.CONTENT, keyword).slop(0)); +// // 用户 就只查 用户名 +// }else if (searchType == 2){ +// boolQueryBuilder.must(QueryBuilders.queryStringQuery("*"+keyword+"*").field(ESConstant.AUTHOR)); +// //boolQueryBuilder.must(QueryBuilders.queryStringQuery("*"+keyword+"*").field(ESConstant.AUTHOR)); +// } +// } +// bqb.must(boolQueryBuilder); +// return bqb; +// } + + /** + * 2023-05-24 漏了用户的高级搜索 + * @param queryRequest + * @return + */ + private BoolQueryBuilder getQueryBuilderNew(QueryRequest queryRequest) { + logger.info("[EsQueryAuthorService] getQueryBuilderNew start ..."); + + BoolQueryBuilder qb = QueryBuilders.boolQuery(); + // 基础查询:根据查询条件组装查询语句 - BoolQueryBuilder boolQueryBuilder = null; - boolQueryBuilder = topicQueryService.queryByConditions_v1(queryRequest); - // 二次查询: 关键词不为空就添加关键词查询语句 = 0 content 1 title 2 author 3 con+tit 4con+aut 5con+com - // 单选 0:主贴;1:评论;2:用户 - Integer searchType = queryRequest.getSearchType(); - // String searchScope = queryRequest.getSearchScope(); //复选 0:标题;1:正文;2:作者 多个用,分割,例 “0,1” - String keyword = queryRequest.getKeyword(); + BoolQueryBuilder boolQueryBuilder = topicQueryService.queryByConditions_v1(queryRequest); + + // 如果要根据ID 查询数据 如果查ID 的,后面的条件就不用查了。 + if (null != queryRequest.getDataIds() && !("").equals(queryRequest.getDataIds())) { + + String dataIds = queryRequest.getDataIds(); + List dataIdList = getDataIdList(dataIds); + QueryBuilder queryBuilder = QueryBuilders.termsQuery(ESConstant.DATA_ID, dataIdList); + boolQueryBuilder = QueryBuilders.boolQuery().filter(queryBuilder); + + // 如果有 任务ID就有,没有就没有啊! + if (null != queryRequest.getTaskIds()) { + List taskIds = queryRequest.getTaskIds(); + if (taskIds.size() > 0) { + boolQueryBuilder = boolQueryBuilder.must(QueryBuilders.termsQuery("taskId", taskIds)); + } + } + + qb.must(boolQueryBuilder); + return qb; + } + Integer searchType = queryRequest.getSearchType(); // 单选 0:主贴;1:评论;2:用户 BoolQueryBuilder searchTextBuilder = topicQueryService.buildSearchTextBuilder(searchType); boolQueryBuilder.filter(searchTextBuilder); - // Map fields = new HashedMap(); - if (TStringUtils.isNotEmpty(keyword)) { - // 主贴的话 查 标题和内容 - if(searchType == 0){ - MatchPhraseQueryBuilder titleQuery = QueryBuilders.matchPhraseQuery(ESConstant.TITLE, keyword).slop(0); - MatchPhraseQueryBuilder contentQuery = QueryBuilders.matchPhraseQuery(ESConstant.CONTENT, keyword).slop(0); - QueryBuilder queryBuilder = QueryBuilders.boolQuery().should(titleQuery).should(contentQuery); - bqb.must(queryBuilder); - // 评论的话 查 评论内容 - }else if (searchType == 1){ -// MatchPhraseQueryBuilder contentQuery = QueryBuilders.matchPhraseQuery(ESConstant.CONTENT, keyword).slop(0); -// QueryBuilder queryBuilder = QueryBuilders.boolQuery().must(contentQuery); -// qb.must(queryBuilder); - boolQueryBuilder.must(QueryBuilders.matchPhraseQuery(ESConstant.CONTENT, keyword).slop(0)); - // 用户 就只查 用户名 - }else if (searchType == 2){ - boolQueryBuilder.must(QueryBuilders.queryStringQuery("*"+keyword+"*").field(ESConstant.AUTHOR)); + if (null != queryRequest.getHighLevelQueries()) { + List highLevelQueries = queryRequest.getHighLevelQueries(); + List tempHighLevel = new ArrayList<>(highLevelQueries.size()); + for (HighLevelQuery high :tempHighLevel){ + System.out.println(high.getText()); + boolQueryBuilder.must(QueryBuilders.queryStringQuery("*"+high.getText()+"*").field(ESConstant.AUTHOR)); } } - bqb.must(boolQueryBuilder); - return bqb; + + +// if (null != queryRequest.getHighLevelQueries()) { +// List highLevelQueries = queryRequest.getHighLevelQueries(); +// +// // 1、找到所有的not进行非处理 +// highLevelQueries.stream().filter(e -> SearchExpressionEnum.NOT.is(e.getExpression())).forEach(e -> { +// qb.mustNot(this.getHighLevelQueryBuilder(e, true)); +// }); +// +// // 2、循环处理剩下的不含not的,处理逻辑为:如果当前是and,则将tempHighLevel进行must处理,tempHighLevel中如果有多个则内部should处理 +// List tempHighLevel = new ArrayList<>(highLevelQueries.size()); +// highLevelQueries.stream().filter(e -> !SearchExpressionEnum.NOT.is(e.getExpression())).forEach(e -> { +// // 如果是and 且 tempHighLevel不为空,则处理tempHighLevel(>1个做内部或操作)并清空 +// if (SearchExpressionEnum.AND.is(e.getExpression()) && !tempHighLevel.isEmpty()) { +// // 拼接条件 +// BoolQueryBuilder tempQueryBuilder = QueryBuilders.boolQuery(); +// tempHighLevel.forEach(temp -> tempQueryBuilder.should(this.getHighLevelQueryBuilder(temp, false))); +// qb.must(tempQueryBuilder); +// tempHighLevel.clear(); +// } +// // 将当前项加入临时队列 +// tempHighLevel.add(e); +// }); +// +// // 此处拼接tempHighLevel未处理的内容 +// if (!tempHighLevel.isEmpty()) { +// BoolQueryBuilder tempQueryBuilder = QueryBuilders.boolQuery(); +// tempHighLevel.forEach(temp -> tempQueryBuilder.should(this.getHighLevelQueryBuilder(temp, false))); +// qb.must(tempQueryBuilder); +// } +// +// } + qb.must(boolQueryBuilder); + return qb; } @@ -147,9 +228,9 @@ public class EsQueryAuthorService { String sortFlag = "pubTime"; // 排序字段 Integer searchType = 2; // 用户的查询 type 默认为2 logger.info("[EsQueryAuthorService] queryAuthorByAuthorId indexName = " + indexName[0] + "; qb: \n {}.", boolQueryBuilder.toString()); - List result = EsUtils.query(clusterName, indexName, boolQueryBuilder, sortFlag, orderFlag, limit, start,searchType); + List result = EsUtils.query(clusterName, indexName, boolQueryBuilder, sortFlag, orderFlag, limit, start, searchType); return result; - }catch (Exception e){ + } catch (Exception e) { return new ArrayList<>(); } } @@ -179,9 +260,9 @@ public class EsQueryAuthorService { String sortFlag = "pubTime"; Integer searchType = 0; // 查用户法的主贴,因此 type =0 logger.info("[EsQueryAuthorService] queryContentsByAuthorId indexName = " + indexName[0] + "; qb: \n {}.", boolQueryBuilder.toString()); - List result = EsUtils.query(clusterName, indexName, boolQueryBuilder, sortFlag, orderFlag, limit, start,searchType); + List result = EsUtils.query(clusterName, indexName, boolQueryBuilder, sortFlag, orderFlag, limit, start, searchType); return result; - }catch (Exception e){ + } catch (Exception e) { return new ArrayList<>(); } } @@ -199,4 +280,18 @@ public class EsQueryAuthorService { return qb; } + + public List getDataIdList(String dataIds) { + List dataIdList = new ArrayList<>(); + if (dataIds.contains(",")) { + String ids[] = dataIds.split(","); + for (String id : ids) { + dataIdList.add(id); + } + } else { + dataIdList.add(dataIds); + } + return dataIdList; + } + } diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/service/es/EsQueryServiceForSQMini.java b/cl_search_api/src/main/java/com/bfd/mf/common/service/es/EsQueryServiceForSQMini.java index a7dd2e5..3689ecb 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/common/service/es/EsQueryServiceForSQMini.java +++ b/cl_search_api/src/main/java/com/bfd/mf/common/service/es/EsQueryServiceForSQMini.java @@ -6,10 +6,7 @@ import com.bfd.mf.common.util.es.EsUtils; import com.bfd.mf.common.web.repository.mysql.topic.TaskRepository; import com.bfd.mf.common.web.vo.params.QueryRequest; import com.bfd.mf.config.BFDApiConfig; -import org.elasticsearch.index.query.BoolQueryBuilder; -import org.elasticsearch.index.query.QueryBuilder; -import org.elasticsearch.index.query.QueryBuilders; -import org.elasticsearch.index.query.TermsQueryBuilder; +import org.elasticsearch.index.query.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; @@ -18,6 +15,7 @@ import org.springframework.stereotype.Service; import javax.annotation.PostConstruct; import java.util.ArrayList; import java.util.List; +import java.util.Map; @Service public class EsQueryServiceForSQMini { @@ -42,6 +40,7 @@ public class EsQueryServiceForSQMini { /** * 2023-04-25 查询调用的方法 * 查询 + * * @param indexName * @param queryRequest * @return @@ -81,7 +80,8 @@ public class EsQueryServiceForSQMini { } /** - * 查询数据量 + * 查询数据量 Count + * * @param indexName * @param queryRequest * @return @@ -89,7 +89,8 @@ public class EsQueryServiceForSQMini { public Long queryDataCountFromFolder(String[] indexName, QueryRequest queryRequest) { try { logger.debug("[EsQueryServiceForSQMini - 专题] queryDataCountFromOneSubject ..."); - BoolQueryBuilder boolQueryBuilder = getQueryBuilder.getQueryBuilder(queryRequest); + //BoolQueryBuilder boolQueryBuilder = getQueryBuilder.getQueryBuilder(queryRequest); + BoolQueryBuilder boolQueryBuilder = getQueryBuilder.getQueryBuilderNew(queryRequest); Integer searchType = queryRequest.getSearchType(); Integer size = queryRequest.getLimit(); Long totalCount = EsUtils.queryTotalCountNew(clusterName, indexName, boolQueryBuilder, searchType); @@ -157,6 +158,48 @@ public class EsQueryServiceForSQMini { public long reIndexData(String indexList, String newIndex) { try { long created = EsUtils.reIndex(clusterName, indexList, newIndex); + System.out.println(created); + // 这块是不是得等3分钟后查一下ES中到底有木有数据哇! + return created; + } catch (Exception e) { + e.printStackTrace(); + return 0; + } + } + + /** + * 2023-05-30 新的示例文件夹拉取 + * @return + */ + public long reIndexDataNew(QueryRequest queryRequest) { + // 这个是之前准备好的示例文件夹!!! + String originalIndex = "cl_major_9999"; + String currentIndex = "cl_special_1.0_" + queryRequest.getSubjectId(); + System.out.println(originalIndex + " to " + currentIndex); + List> tasks = queryRequest.getTasks(); + try { + long created = 0L; + for (Map task : tasks) { + /** + * "cid":"facebook", + * "crawlKeyword":"account:https://www.facebook.com/joebiden", + * "id":1000882, + * "siteId":182 + */ + Long taskId = Long.valueOf(task.get("id").toString()); + String crawlDataFlag = task.get("crawlDataFlag").toString(); + String enSource = task.get("cid").toString().toLowerCase(); + TermQueryBuilder termQueryBuilder1 = QueryBuilders.termQuery(ESConstant.CRAWLDATAFLAG, crawlDataFlag); + TermQueryBuilder termQueryBuilder2 = QueryBuilders.termQuery(ESConstant.EN_SOURCE, enSource); + QueryBuilder queryBuilder = QueryBuilders.boolQuery().must(termQueryBuilder1).must(termQueryBuilder2); + System.out.println(queryBuilder); + created = EsUtils.reIndexByTask(clusterName, originalIndex, currentIndex, queryBuilder); + System.out.println("条数: "+created); + //有条数之后是得执行个update操作吧 + EsUtils.updateByQuery(clusterName,currentIndex,queryBuilder,taskId); + + } + // 这块是不是得等3分钟后查一下ES中到底有木有数据哇! return created; } catch (Exception e) { e.printStackTrace(); diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/service/es/GetQueryBuilder.java b/cl_search_api/src/main/java/com/bfd/mf/common/service/es/GetQueryBuilder.java index 975521f..3ff5094 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/common/service/es/GetQueryBuilder.java +++ b/cl_search_api/src/main/java/com/bfd/mf/common/service/es/GetQueryBuilder.java @@ -21,6 +21,7 @@ import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; import java.util.*; +import java.util.concurrent.CopyOnWriteArrayList; import java.util.stream.Stream; @Service @@ -46,7 +47,7 @@ public class GetQueryBuilder { * @param queryRequest * @return */ - public BoolQueryBuilder getQueryBuilderNew(QueryRequest queryRequest) { + public BoolQueryBuilder getQueryBuilderNew0530(QueryRequest queryRequest) { logger.info("[GetQueryBuilder] getQueryBuilder..."); BoolQueryBuilder qb = QueryBuilders.boolQuery(); @@ -65,7 +66,7 @@ public class GetQueryBuilder { if (null != queryRequest.getTaskIds()) { List taskIds = queryRequest.getTaskIds(); if (taskIds.size() > 0) { - boolQueryBuilder = boolQueryBuilder.must(QueryBuilders.termQuery("taskId", taskIds.get(0))); + boolQueryBuilder = boolQueryBuilder.must(QueryBuilders.termsQuery("taskId", taskIds)); } } @@ -497,6 +498,8 @@ public class GetQueryBuilder { */ protected QueryBuilder getHighLevelQueryBuilder(HighLevelQuery highLevelQuery, boolean isNot) { BoolQueryBuilder result = QueryBuilders.boolQuery(); + // + highLevelQuery.setWordStrategy("2"); // 获取高级查询的字段 //Map fieldMap = SearchScopeEnum.getFieldsByKey(highLevelQuery.getScope()); // 默认就是查全文(标题 + 正文) @@ -523,14 +526,17 @@ public class GetQueryBuilder { } // 原文普通搜索 - QueryBuilder rawQueryBuilder = this.getMatchQueryBuilder(path, fieldMap, highLevelQuery.getText(), isNot, SearchWordStrategyEnum.getByKey(highLevelQuery.getWordStrategy())); + QueryBuilder rawQueryBuilder = this.getMatchQueryBuilder(path, fieldMap, highLevelQuery.getText(), isNot, + SearchWordStrategyEnum.getByKey(highLevelQuery.getWordStrategy())); if (rawQueryBuilder != null) { result.should(rawQueryBuilder); } // 如果译文不为空,则进行译文普通搜索 if (highLevelQuery.getTranslateText() != null) { - QueryBuilder transQueryBuilder = this.getMatchQueryBuilder(path, fieldMap, highLevelQuery.getTranslateText(), isNot, SearchWordStrategyEnum.getByKey(highLevelQuery.getWordStrategy())); + QueryBuilder transQueryBuilder = this.getMatchQueryBuilder(path, fieldMap, + highLevelQuery.getTranslateText(), isNot, + SearchWordStrategyEnum.getByKey(highLevelQuery.getWordStrategy())); if (transQueryBuilder != null) { result.should(transQueryBuilder); } @@ -607,6 +613,10 @@ public class GetQueryBuilder { return null; } + + System.out.println(" 0***** 要检索的词:" + splitText[0] + " --- " + strategyEnum); + // System.out.println(" 1***** 要检索的词:"+splitText[1]); + BoolQueryBuilder result = QueryBuilders.boolQuery(); // 如果是非 或 指定完整匹配,则用短语,否则用最佳字段 MultiMatchQueryBuilder.Type multiMatchType = isNot @@ -662,4 +672,120 @@ public class GetQueryBuilder { public QueryBuilder nestedQuery(String nested, QueryBuilder queryBuilder) { return QueryBuilders.nestedQuery(nested, queryBuilder, ScoreMode.None); } + + + public BoolQueryBuilder getQueryBuilderNew(QueryRequest queryRequest) { + logger.info("[GetQueryBuilder] getQueryBuilder..."); + BoolQueryBuilder qb = QueryBuilders.boolQuery(); + + // 基础查询:根据查询条件组装查询语句 + BoolQueryBuilder boolQueryBuilder = topicQueryService.queryByConditions_v1(queryRequest); + + // 如果要根据ID 查询数据 如果查ID 的,后面的条件就不用查了。 + if (null != queryRequest.getDataIds() && !("").equals(queryRequest.getDataIds())) { + + String dataIds = queryRequest.getDataIds(); + List dataIdList = getDataIdList(dataIds); + QueryBuilder queryBuilder = QueryBuilders.termsQuery(ESConstant.DATA_ID, dataIdList); + boolQueryBuilder = QueryBuilders.boolQuery().filter(queryBuilder); + + // 如果有 任务ID就有,没有就没有啊! + if (null != queryRequest.getTaskIds()) { + List taskIds = queryRequest.getTaskIds(); + if (taskIds.size() > 0) { + boolQueryBuilder = boolQueryBuilder.must(QueryBuilders.termsQuery("taskId", taskIds)); + } + } + + qb.must(boolQueryBuilder); + return qb; + } + + Integer searchType = queryRequest.getSearchType(); // 单选 0:主贴;1:评论;2:用户 + BoolQueryBuilder searchTextBuilder = topicQueryService.buildSearchTextBuilder(searchType); + boolQueryBuilder.filter(searchTextBuilder); + + if (null != queryRequest.getHighLevelQueries()) { + //List highLevelQueries = queryRequest.getHighLevelQueries(); + List highLevelQueries = queryRequest.getHighLevelQueries(); + + // 1、找到所有的not进行非处理 +// highLevelQueries.stream().filter(e -> SearchExpressionEnum.NOT.is(e.getExpression())).forEach(e -> { +// qb.mustNot(this.getHighLevelQueryBuilder(e, true)); +// }); + + // 2、循环处理剩下的不含not的,处理逻辑为:如果当前是and,则将tempHighLevel进行must处理,tempHighLevel中如果有多个则内部should处理 + CopyOnWriteArrayList tempHighLevel = new CopyOnWriteArrayList<>(); + + //CopyOnWriteArrayList tempHighLevel = queryRequest.getHighLevelQueries(); + + for (int i = 0; i < highLevelQueries.size(); i++) { + HighLevelQuery e = highLevelQueries.get(i); + System.out.println("??? 没有这一步? : "+e.getExpression()); + BoolQueryBuilder tempQueryBuilder = QueryBuilders.boolQuery(); + if(SearchExpressionEnum.AND.is(e.getExpression())){ + tempQueryBuilder.must(this.getHighLevelQueryBuilder(e, false)); + qb.must(tempQueryBuilder); + } + } + //System.out.println( highLevelQueries.stream()); + + highLevelQueries.stream().forEach(e -> { + if( !tempHighLevel.isEmpty()){ + // 拼接条件 + BoolQueryBuilder tempQueryBuilder = QueryBuilders.boolQuery(); + // tempHighLevel.forEach(temp -> tempQueryBuilder.should(this.getHighLevelQueryBuilder(temp, false))); + System.out.println(tempHighLevel.size()); + System.out.println("??? : "+tempHighLevel.get(0).getText()); + + tempHighLevel.forEach(temp -> { + System.out.println(temp); + + if( SearchExpressionEnum.AND.is(e.getExpression())){ + tempQueryBuilder.must(this.getHighLevelQueryBuilder(temp, false)); + + }else if (SearchExpressionEnum.OR.is(e.getExpression())){ + tempQueryBuilder.should(this.getHighLevelQueryBuilder(temp, false)); + // qb.should(tempQueryBuilder); + }else{ + tempQueryBuilder.mustNot(this.getHighLevelQueryBuilder(temp, false)); + // qb.mustNot(tempQueryBuilder); + } + // qb.must(tempQueryBuilder); + tempHighLevel.clear(); + }); + +// BoolQueryBuilder tempQueryBuilder = QueryBuilders.boolQuery(); +// tempHighLevel.forEach(temp -> tempQueryBuilder.should(this.getHighLevelQueryBuilder(temp, false))); +// qb.must(tempQueryBuilder); + + } + // 将当前项加入临时队列 + tempHighLevel.add(e); + +// // 如果是and 且 tempHighLevel不为空,则处理tempHighLevel(>1个做内部或操作)并清空 +// if (SearchExpressionEnum.AND.is(e.getExpression()) && !tempHighLevel.isEmpty()) { +// // 拼接条件 +// BoolQueryBuilder tempQueryBuilder = QueryBuilders.boolQuery(); +// tempHighLevel.forEach(temp -> tempQueryBuilder.should(this.getHighLevelQueryBuilder(temp, false))); +// qb.must(tempQueryBuilder); +// tempHighLevel.clear(); +// } +// // 将当前项加入临时队列 +// tempHighLevel.add(e); + }); + + // 此处拼接tempHighLevel未处理的内容 + if (!tempHighLevel.isEmpty()) { + BoolQueryBuilder tempQueryBuilder = QueryBuilders.boolQuery(); + tempHighLevel.forEach(temp -> tempQueryBuilder.should(this.getHighLevelQueryBuilder(temp, false))); + qb.must(tempQueryBuilder); + } + + } + + qb.must(boolQueryBuilder); + return qb; + } + } diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/util/constants/ESConstant.java b/cl_search_api/src/main/java/com/bfd/mf/common/util/constants/ESConstant.java index 3f712a4..9044e61 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/common/util/constants/ESConstant.java +++ b/cl_search_api/src/main/java/com/bfd/mf/common/util/constants/ESConstant.java @@ -283,6 +283,8 @@ public class ESConstant { public static final String HAS_IMAGE = "hasImage"; public static final String HAS_VIDEO = "hasVideo"; public static final String HAS_FILE = "hasFile"; + + public static final String HAS_TRANS = "hasTrans"; /** * 关键词 */ @@ -773,7 +775,8 @@ public class ESConstant { ESConstant.VALUELABEL, ESConstant.CATEGORYLABEL, - ESConstant.TAG + ESConstant.TAG, + ESConstant.HAS_TRANS ); diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/util/enums/BaseFieldEnum.java b/cl_search_api/src/main/java/com/bfd/mf/common/util/enums/BaseFieldEnum.java index 0310e71..eaec719 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/common/util/enums/BaseFieldEnum.java +++ b/cl_search_api/src/main/java/com/bfd/mf/common/util/enums/BaseFieldEnum.java @@ -170,7 +170,15 @@ public enum BaseFieldEnum { * 区县 */ county_code, + /** + * OCR 结果 + */ + ocrText, + /** + * ASR 结果 + */ + asrText ; /** @@ -178,9 +186,11 @@ public enum BaseFieldEnum { * @return */ public static Map getMatchFields(){ - Map matchMap = new HashMap<>(2); + Map matchMap = new HashMap<>(4); matchMap.put(BaseFieldEnum.title.name(), 2.0F); matchMap.put(BaseFieldEnum.content.name(), 1.0F); + matchMap.put(BaseFieldEnum.ocrText.name(), 1.0F); + matchMap.put(BaseFieldEnum.asrText.name(), 1.0F); return matchMap; } diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/util/enums/SearchScopeEnum.java b/cl_search_api/src/main/java/com/bfd/mf/common/util/enums/SearchScopeEnum.java index c2f90b2..c9bd67e 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/common/util/enums/SearchScopeEnum.java +++ b/cl_search_api/src/main/java/com/bfd/mf/common/util/enums/SearchScopeEnum.java @@ -22,6 +22,8 @@ public enum SearchScopeEnum { return new HashMap() {{ put(ESConstant.TITLE, 1.0F); put(ESConstant.CONTENT, 1.0F); + put(ESConstant.OCRTEXT, 1.0F); + put(ESConstant.ASRTEXT, 1.0F); }}; } }, diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/util/es/EsUtils.java b/cl_search_api/src/main/java/com/bfd/mf/common/util/es/EsUtils.java index f53d8f6..c3a8daf 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/common/util/es/EsUtils.java +++ b/cl_search_api/src/main/java/com/bfd/mf/common/util/es/EsUtils.java @@ -26,9 +26,13 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.text.Text; import org.elasticsearch.common.transport.TransportAddress; import org.elasticsearch.common.unit.TimeValue; -import org.elasticsearch.index.query.*; +import org.elasticsearch.index.query.BoolQueryBuilder; +import org.elasticsearch.index.query.MultiMatchQueryBuilder; +import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.index.reindex.*; import org.elasticsearch.script.Script; +import org.elasticsearch.script.ScriptType; import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.SearchHits; import org.elasticsearch.search.aggregations.AggregationBuilder; @@ -47,8 +51,6 @@ import org.springframework.util.Assert; import java.net.InetAddress; import java.util.*; -import java.util.stream.Collectors; -import java.util.stream.Stream; public abstract class EsUtils { @@ -84,20 +86,23 @@ public abstract class EsUtils { return CLIENT_MAP.get(clusterName); } - public static List query(String clusterName, String[] index, - final QueryBuilder queryBuilder, - String sortFlag, String orderFlag, - Integer size, Integer from, - Integer searchType) { + public static List query0530(String clusterName, String[] index, + final QueryBuilder queryBuilder, + String sortFlag, String orderFlag, + Integer size, Integer from, + Integer searchType) { System.out.println("非高亮查询"); TransportClient client = getClient(clusterName); boolean options = true; boolean optionsf = false; // 现在不同任务的同一条数据不做消重,因此同一个DOCID 的数据会有多条。因此只有查主贴的时候需要用DOCID 消重 - CollapseBuilder collapseBuilder = new CollapseBuilder(ESConstant.DATA_ID); - if (searchType == 0) { - collapseBuilder = new CollapseBuilder(ESConstant.DOC_ID); - } +// CollapseBuilder collapseBuilder = new CollapseBuilder(ESConstant.DATA_ID); +// CollapseBuilder collapseBuilder = null; +// if (searchType == 0) { +// collapseBuilder = new CollapseBuilder(ESConstant.DOC_ID); +// } + + // Object[] objects= new Object[]{"9999"}; // 查询 // from + size 的 分页 查询方式 @@ -105,9 +110,10 @@ public abstract class EsUtils { .setIndicesOptions(IndicesOptions.fromOptions(options, options, options, optionsf)) .addSort(sortFlag, orderFlag.equals(ESConstant.ASC) ? SortOrder.ASC : SortOrder.DESC) .setQuery(queryBuilder) - .setCollapse(collapseBuilder) + // .searchAfter(objects) + //.setCollapse(collapseBuilder) .setSize(size) - .setFrom(from); + .setFrom(from); // 用search_after 的话,这个 from 得 == 0 System.out.println(requestBuilder); @@ -130,11 +136,11 @@ public abstract class EsUtils { } - public static List queryWithHighlight(String clusterName, String[] index, - final QueryBuilder queryBuilder, - String sortFlag, String orderFlag, - Integer size, Integer from, - Integer searchType) { + public static List queryWithHighlight0530(String clusterName, String[] index, + final QueryBuilder queryBuilder, + String sortFlag, String orderFlag, + Integer size, Integer from, + Integer searchType) { System.out.println("高亮查询"); EsBaseParam esBaseParam = new EsBaseParam(); TransportClient client = getClient(clusterName); @@ -235,22 +241,53 @@ public abstract class EsUtils { List dataList = new ArrayList<>(); if (searchResponse.getHits().totalHits > 0) { - for (SearchHit hit : searchResponse.getHits().getHits()) { + SearchHit[] hits = searchResponse.getHits().getHits(); + for (int i = 0; i < hits.length; i++) { JSONObject data = new JSONObject(); - data.putAll(hit.getSourceAsMap()); - String fieldName[] = {ESConstant.CONTENT, ESConstant.TITLE}; - for (int i = 0; i < fieldName.length; i++) { - getHighlightResult(fieldName[i], hit, data); + data.putAll(hits[i].getSourceAsMap()); + String fieldName[] = {ESConstant.CONTENT, ESConstant.TITLE, ESConstant.OCRTEXT, ESConstant.ASRTEXT}; + for (int j = 0; j < fieldName.length; j++) { + getHighlightResult(fieldName[j], hits[i], data); } + data.put("subjectId", hits[i].getIndex() + .replace("cl_major_", "") + .replace("cl_subject_", "") + .replace("cl_special_1.0_", "")); dataList.add(data); } } + +// if (searchResponse.getHits().totalHits > 0) { +// for (SearchHit hit : searchResponse.getHits().getHits()) { +// JSONObject data = new JSONObject(); +// data.putAll(hits[i].getSourceAsMap()); +// data.put("subjectId", hits[i].getIndex() +// .replace("cl_major_", "") +// .replace("cl_subject_", "") +// .replace("cl_special_1.0_", "")); +// dataList.add(data); +// +// JSONObject data = new JSONObject(); +// data.putAll(hit.getSourceAsMap()); +// String fieldName[] = {ESConstant.CONTENT, ESConstant.TITLE}; +// for (int i = 0; i < fieldName.length; i++) { +// getHighlightResult(fieldName[i], hit, data); +// } +// +// data.put("subjectId", hit.getSourceAsMap().get() +// .replace("cl_major_", "") +// .replace("cl_subject_", "") +// .replace("cl_special_1.0_", "")); +// dataList.add(data); +// } +// } return dataList; } private static void getHighlightResult(String fieldName, SearchHit hit, JSONObject data) { if (hit.getHighlightFields().containsKey(fieldName)) { HighlightField highlightField = hit.getHighlightFields().get(fieldName); + System.out.println("getHighlightResult highlightField : "+highlightField); Text[] fragments = highlightField.fragments(); String fragmentString = ""; for (Text fragment : fragments) { @@ -385,9 +422,10 @@ public abstract class EsUtils { return 0L; } - public static Long queryTotalCountNew(String clusterName, String[] index, - QueryBuilder queryBuilder, - Integer searchType) { + + public static Long queryTotalCountNew_0530(String clusterName, String[] index, + QueryBuilder queryBuilder, + Integer searchType) { TransportClient client = getClient(clusterName); boolean options = true; @@ -395,27 +433,62 @@ public abstract class EsUtils { // 现在不同任务的同一条数据不做消重,因此同一个DOCID 的数据会有多条。因此只有查主贴的时候需要用DOCID 消重 String count = "count"; AggregationBuilder aggregation; + // searchType = 0 是 主贴, if (searchType == 0) { aggregation = AggregationBuilders.cardinality(count).field(ESConstant.DOC_ID); } else { aggregation = AggregationBuilders.cardinality(count).field(ESConstant.DATA_ID); } + + // CollapseBuilder collapseBuilder = new CollapseBuilder(ESConstant.DATA_ID); + CollapseBuilder collapseBuilder = null; + if (searchType == 0) { + collapseBuilder = new CollapseBuilder(ESConstant.DOC_ID); + } //searchSourceBuilder.aggregation(aggregation); // from + size 的 分页 查询方式 SearchRequestBuilder requestBuilder = client.prepareSearch().setIndices(index) .setIndicesOptions(IndicesOptions.fromOptions(options, options, options, optionsf)) .setQuery(queryBuilder) + //.setCollapse(collapseBuilder); .addAggregation(aggregation); // System.out.println(requestBuilder); + /** + * 2023-05-30 先注释掉看看情况 + */ +// System.out.println("3333 : " + requestBuilder.get().getHits().totalHits); Aggregations aggregations = requestBuilder.get().getAggregations(); Cardinality cardinality = aggregations.get(count); -// System.out.println("1111 : " + cardinality.getValue()); -// System.out.println("2222 : " + requestBuilder.get().getHits().totalHits); - long resultCount = cardinality.getValue(); - if (searchType == 2) { - resultCount = requestBuilder.get().getHits().totalHits; - } + System.out.println("cardinality : " + cardinality.getValue()); + System.out.println("totalHits : " + requestBuilder.get().getHits().totalHits); +// long resultCount = cardinality.getValue(); +// if (searchType == 2) { +// resultCount = requestBuilder.get().getHits().totalHits; +// } +/** + * 折叠查询的参考代码 + */ +// CollapseBuilder collapseBuilder = new CollapseBuilder("duplicate_id"); +// InnerHitBuilder innerHitBuilder = new InnerHitBuilder(); +// innerHitBuilder.setName("test"); +// innerHitBuilder.setSize(0); +// innerHitBuilder.setTrackScores(true); +// innerHitBuilder.setIgnoreUnmapped(true); +// innerHitBuilder.addSort(SortBuilders.fieldSort("level").order(SortOrder.DESC)); +// collapseBuilder.setInnerHits(innerHitBuilder); +// +// ...... +// +// srb = client.prepareSearch(indexName) +// .setTypes(typeName) +// .setQuery(bqb) +// .setFrom(params.getFrom()) +// .setSize(params.getSize()) +// .setCollapse(collapseBuilder) +// .setPreference("_primary_first"); + + long resultCount = requestBuilder.get().getHits().totalHits; return resultCount; } @@ -426,6 +499,7 @@ public abstract class EsUtils { Integer limit, String scrollId, Integer searchType) { + Map result = new HashMap<>(); TransportClient client = getClient(clusterName); SearchResponse searchResponse = null; @@ -590,6 +664,14 @@ public abstract class EsUtils { } } + /** + * 复制索引数据 + * + * @param clusterName + * @param originalIndex + * @param currentIndex + * @return + */ public static long reIndex(String clusterName, String originalIndex, String currentIndex) { // String clusterName, String originalIndex, String currentIndex, try { @@ -599,6 +681,7 @@ public abstract class EsUtils { .newRequestBuilder(client) .source(originalIndex) .destination(currentIndex); + // 新建别名(查询需要用别名查,不加别名查不到哦) String newAliex = currentIndex.replace("cl_special_1.0_", "cl_major_"); BulkByScrollResponse response = builder.get(); @@ -690,6 +773,46 @@ public abstract class EsUtils { } } + /** + * 2023-05-30 + * + * @param clusterName + * @param originalIndex + * @param currentIndex + * @param queryBuilder + * @return + */ + public static long reIndexByTask(String clusterName, + String originalIndex, + String currentIndex, + QueryBuilder queryBuilder) { + try { + TransportClient client = getClient(clusterName); + + System.out.println(originalIndex + " *** " + currentIndex); + ReindexRequestBuilder builder = ReindexAction.INSTANCE + .newRequestBuilder(client) + .source(originalIndex)// 来源索引 + .destination(currentIndex) // 目标索引 + .filter(queryBuilder) + .refresh(true); + // builder. + BulkByScrollResponse response = builder.get(); + // 添加别名,将cl_special_1.0_ 替换成 cl_major 别名 + String newAliex = currentIndex.replace("cl_special_1.0_", "cl_major_"); + IndicesAliasesRequestBuilder indicesBuilder = IndicesAliasesAction.INSTANCE + .newRequestBuilder(client) + .addAlias(currentIndex, newAliex); + IndicesAliasesResponse IndicesResponse = indicesBuilder.get(); + System.out.println("******* : " + response); + System.out.println("##### : " + IndicesResponse); + return response.getCreated(); + } catch (Exception e) { + e.printStackTrace(); + return 0; + } + } + public static void delIndexByTasks(String clusterName, String indexName, String cid, List tasks) { try { TransportClient client = getClient(clusterName); @@ -707,6 +830,17 @@ public abstract class EsUtils { } } + public static void updateByQuery(String clusterName, String currentIndex, QueryBuilder queryBuilder, Long taskId) { + TransportClient client = getClient(clusterName); + UpdateByQueryRequestBuilder updateByQuery = UpdateByQueryAction.INSTANCE.newRequestBuilder(client); + // "source": "ctx._source['source']='路透社';" + updateByQuery.source(currentIndex) + .filter(queryBuilder) + .size(1000) + .script(new Script(ScriptType.INLINE, "painless", "ctx._source['taskId'] = '" + taskId + "'", Collections.emptyMap())); + BulkByScrollResponse response = updateByQuery.get(); + } + /** * 全文检索查询拼接(非nested属性重载方法) * @@ -757,4 +891,461 @@ public abstract class EsUtils { public QueryBuilder nestedQuery(String nested, QueryBuilder queryBuilder) { return QueryBuilders.nestedQuery(nested, queryBuilder, ScoreMode.None); } + + +// public void testAggAndDistinct(){ +// //获取注解,通过注解可以得到 indexName 和 type +// Document document = Customer.class.getAnnotation(Document.class); +// // dateHistogram Aggregation 是时间柱状图聚合,按照天来聚合 , +// // dataAgg 为聚合结果的名称,createTime 为字段名称 +// // cardinality 用来去重 +// SearchQuery searchQuery = new NativeSearchQueryBuilder() +// .withQuery(matchAllQuery()) +// .withSearchType(SearchType.QUERY_THEN_FETCH) +// .withIndices(document.indexName()).withTypes(document.type()) +// .addAggregation(AggregationBuilders.dateHistogram("dataAgg").field("createTime") +// .dateHistogramInterval(DateHistogramInterval.DAY) +// .subAggregation(AggregationBuilders.cardinality("nameAgg").field("firstName"))) +// .build(); +// +// // 聚合的结果 +// Aggregations aggregations = elasticsearchTemplate.query(searchQuery, response -> response.getAggregations()); +// Map results = aggregations.asMap(); +// Histogram histogram = (Histogram) results.get("dataAgg"); +// // 将bucket list 转换成 map , key -> 名字 value-> 出现次数 +// histogram.getBuckets().stream().forEach(t->{ +// Histogram.Bucket histogram1 = t; +// System.out.println(histogram1.getKeyAsString()); +// Cardinality cardinality = histogram1.getAggregations().get("nameAgg"); +// System.out.println(cardinality.getValue()); +// }); +// } + + + public static Long queryTotalCountNew0530(String clusterName, String[] index, + QueryBuilder queryBuilder, + Integer searchType) { + System.out.println("---------------------------"); + long resultCount = 0l; + try { + + TransportClient client = getClient(clusterName); + boolean options = true; + boolean optionsf = false; + // 现在不同任务的同一条数据不做消重,因此同一个DOCID 的数据会有多条。因此只有查主贴的时候需要用DOCID 消重 + String count = "count"; + AggregationBuilder aggregation; + // searchType = 0 是 主贴, + if (searchType == 0) { + aggregation = AggregationBuilders.cardinality(count).field(ESConstant.DOC_ID); + } else { + aggregation = AggregationBuilders.cardinality(count).field(ESConstant.DATA_ID); + } + +// aggregation = AggregationBuilders.dateHistogram("dataAgg").field("createTimeStr") +// .dateHistogramInterval(DateHistogramInterval.DAY) +// .subAggregation(AggregationBuilders.cardinality("idAgg").field("dataId")); + + // CollapseBuilder collapseBuilder = new CollapseBuilder(ESConstant.DATA_ID); +// CollapseBuilder collapseBuilder = null; +// if (searchType == 0) { +// collapseBuilder = new CollapseBuilder(ESConstant.DOC_ID); +// } + //searchSourceBuilder.aggregation(aggregation); + // from + size 的 分页 查询方式 + SearchRequestBuilder requestBuilder = client.prepareSearch().setIndices(index) + .setIndicesOptions(IndicesOptions.fromOptions(options, options, options, optionsf)) + .setQuery(queryBuilder); + //.setCollapse(collapseBuilder); + // .addAggregation(aggregation); + + + // Aggregations aggregations = elasticsearchTemplate.query(searchQuery, response -> response.getAggregations()); + // System.out.println(requestBuilder); + /** + * 2023-05-30 先注释掉看看情况 + */ +// System.out.println("3333 : " + requestBuilder.get().getHits().totalHits); +// Aggregations aggregations = requestBuilder.get().getAggregations(); +// Cardinality cardinality = aggregations.get(count); +// System.out.println("cardinality : " + cardinality.getValue()); + System.out.println("totalHits : " + requestBuilder.get().getHits().totalHits); + +// Map results = aggregations.asMap(); +// Histogram histogram = (Histogram) results.get("dataAgg"); +// // 将bucket list 转换成 map , key -> 名字 value-> 出现次数 +// histogram.getBuckets().stream().forEach(t -> { +// Histogram.Bucket histogram1 = t; +// System.out.println(histogram1.getKeyAsString()); +// Cardinality cardinality1 = histogram1.getAggregations().get("idAgg"); +// System.out.println(cardinality1.getValue()); +// }); +// long resultCount = cardinality.getValue(); +// if (searchType == 2) { +// resultCount = requestBuilder.get().getHits().totalHits; +// } +/** + * 折叠查询的参考代码 + */ +// CollapseBuilder collapseBuilder = new CollapseBuilder("duplicate_id"); +// InnerHitBuilder innerHitBuilder = new InnerHitBuilder(); +// innerHitBuilder.setName("test"); +// innerHitBuilder.setSize(0); +// innerHitBuilder.setTrackScores(true); +// innerHitBuilder.setIgnoreUnmapped(true); +// innerHitBuilder.addSort(SortBuilders.fieldSort("level").order(SortOrder.DESC)); +// collapseBuilder.setInnerHits(innerHitBuilder); +// +// ...... +// +// srb = client.prepareSearch(indexName) +// .setTypes(typeName) +// .setQuery(bqb) +// .setFrom(params.getFrom()) +// .setSize(params.getSize()) +// .setCollapse(collapseBuilder) +// .setPreference("_primary_first"); + + resultCount = requestBuilder.get().getHits().totalHits; + } catch (Exception e) { + e.printStackTrace(); + } + return resultCount; + } + + + public static List query05301(String clusterName, String[] index, + final QueryBuilder queryBuilder, + String sortFlag, String orderFlag, + Integer size, Integer from, + Integer searchType) { + System.out.println("非高亮查询"); + TransportClient client = getClient(clusterName); + boolean options = true; + boolean optionsf = false; + // 现在不同任务的同一条数据不做消重,因此同一个DOCID 的数据会有多条。因此只有查主贴的时候需要用DOCID 消重 + CollapseBuilder collapseBuilder = new CollapseBuilder(ESConstant.DATA_ID); + // CollapseBuilder collapseBuilder = null; + if (searchType == 0) { + collapseBuilder = new CollapseBuilder(ESConstant.DOC_ID); + } + + // Object[] objects= new Object[]{"9999"}; + + // 查询 + // from + size 的 分页 查询方式 + SearchRequestBuilder requestBuilder = client.prepareSearch().setIndices(index) + .setIndicesOptions(IndicesOptions.fromOptions(options, options, options, optionsf)) + .addSort(sortFlag, orderFlag.equals(ESConstant.ASC) ? SortOrder.ASC : SortOrder.DESC) + .setQuery(queryBuilder) + // .searchAfter(objects) + //.setCollapse(collapseBuilder) + .setSize(size) + .setFrom(from); // 用search_after 的话,这个 from 得 == 0 + + System.out.println(requestBuilder); + + + SearchResponse searchResponse = requestBuilder.execute().actionGet(); + List dataList = new ArrayList<>(); + if (searchResponse.getHits().totalHits > 0) { + SearchHit[] hits = searchResponse.getHits().getHits(); + for (int i = 0; i < hits.length; i++) { + JSONObject data = new JSONObject(); + data.putAll(hits[i].getSourceAsMap()); + data.put("subjectId", hits[i].getIndex() + .replace("cl_major_", "") + .replace("cl_subject_", "") + .replace("cl_special_1.0_", "")); + dataList.add(data); + } + } + return dataList; + } + + + /** + * 第一组查询,不做数据聚合 + */ + public static Long queryTotalCountNew_0531(String clusterName, String[] index, + QueryBuilder queryBuilder, + Integer searchType) { + System.out.println("---------------------------"); + long resultCount = 0l; + try { + + TransportClient client = getClient(clusterName); + boolean options = true; + boolean optionsf = false; + SearchRequestBuilder requestBuilder = client.prepareSearch().setIndices(index) + .setIndicesOptions(IndicesOptions.fromOptions(options, options, options, optionsf)) + .setQuery(queryBuilder); + + System.out.println("totalHits : " + requestBuilder.get().getHits().totalHits); + + + resultCount = requestBuilder.get().getHits().totalHits; + } catch (Exception e) { + e.printStackTrace(); + } + return resultCount; + } + +// public static List query_0531(String clusterName, String[] index, +// final QueryBuilder queryBuilder, +// String sortFlag, String orderFlag, +// Integer size, Integer from, +// Integer searchType) { +// System.out.println("非高亮查询"); +// TransportClient client = getClient(clusterName); +// boolean options = true; +// boolean optionsf = false; +// // from + size 的 分页 查询方式 +// SearchRequestBuilder requestBuilder = client.prepareSearch().setIndices(index) +// .setIndicesOptions(IndicesOptions.fromOptions(options, options, options, optionsf)) +// .addSort(sortFlag, orderFlag.equals(ESConstant.ASC) ? SortOrder.ASC : SortOrder.DESC) +// .setQuery(queryBuilder) +// .setSize(size) +// .setFrom(from); +// +// System.out.println(requestBuilder); +// +// SearchResponse searchResponse = requestBuilder.execute().actionGet(); +// List dataList = new ArrayList<>(); +// if (searchResponse.getHits().totalHits > 0) { +// SearchHit[] hits = searchResponse.getHits().getHits(); +// for (int i = 0; i < hits.length; i++) { +// JSONObject data = new JSONObject(); +// data.putAll(hits[i].getSourceAsMap()); +// data.put("subjectId", hits[i].getIndex() +// .replace("cl_major_", "") +// .replace("cl_subject_", "") +// .replace("cl_special_1.0_", "")); +// dataList.add(data); +// } +// } +// return dataList; +// } + + public static List queryWithHighlight(String clusterName, String[] index, + final QueryBuilder queryBuilder, + String sortFlag, String orderFlag, + Integer size, Integer from, + Integer searchType) { + System.out.println("高亮查询"); + EsBaseParam esBaseParam = new EsBaseParam(); + TransportClient client = getClient(clusterName); + boolean options = true; + boolean optionsf = false; + // 现在不同任务的同一条数据不做消重,因此同一个DOCID 的数据会有多条。因此只有查主贴的时候需要用DOCID 消重 + CollapseBuilder collapseBuilder = new CollapseBuilder(ESConstant.DATA_ID); + if (searchType == 0) { + collapseBuilder = new CollapseBuilder(ESConstant.DOC_ID); + } + + + esBaseParam.setWithHighlight(true); + esBaseParam.setHighlightFields(new ArrayList<>(BaseFieldEnum.getMatchFieldsWithPy().keySet())); + Integer numOfFragments = 2; + HighlightBuilder highlightBuilder = new HighlightBuilder() + // match进行高亮 + .requireFieldMatch(true) + .order(HighlightBuilder.Order.SCORE) + //fragment 是指一段连续的文字。返回结果最多可以包含几段不连续的文字。默认是5。 + .numOfFragments(numOfFragments) + //一段 fragment 包含多少个字符。默认100。 +// .fragmentSize(Constants.MAX_R_LENGTH / numOfFragments) +// .noMatchSize(Constants.MAX_R_LENGTH) + .preTags(ESConstant.HIGHLIGHTPRETAGS) + .postTags(ESConstant.HIGHLIGHTPOSTTAGS); + BaseFieldEnum.getMatchFieldsWithPy().keySet().forEach(highlightBuilder::field); + + esBaseParam.setHighlightBuilder(highlightBuilder); + + // 查询 + // from + size 的 分页 查询方式 + SearchRequestBuilder requestBuilder = client.prepareSearch().setIndices(index) + .setIndicesOptions(IndicesOptions.fromOptions(options, options, options, optionsf)) + .addSort(sortFlag, orderFlag.equals(ESConstant.ASC) ? SortOrder.ASC : SortOrder.DESC) + .setQuery(queryBuilder) + .setCollapse(collapseBuilder) + .setSize(size) + .setFrom(from) + .highlighter(esBaseParam.getHighlightBuilder()); + + System.out.println(requestBuilder); + System.out.println("-----"); + + SearchResponse searchResponse = requestBuilder.execute().actionGet(); + + List dataList = new ArrayList<>(); + if (searchResponse.getHits().totalHits > 0) { + SearchHit[] hits = searchResponse.getHits().getHits(); + for (int i = 0; i < hits.length; i++) { + JSONObject data = new JSONObject(); + data.putAll(hits[i].getSourceAsMap()); + String fieldName[] = {ESConstant.CONTENT, ESConstant.TITLE, ESConstant.OCRTEXT, ESConstant.ASRTEXT}; + for (int j = 0; j < fieldName.length; j++) { + getHighlightResult(fieldName[j], hits[i], data); + } + data.put("subjectId", hits[i].getIndex() + .replace("cl_major_", "") + .replace("cl_subject_", "") + .replace("cl_special_1.0_", "")); + dataList.add(data); + } + } + + return dataList; + } + + + /** + * 05-30 聚合查询 + */ + + public static Long queryTotalCountNew(String clusterName, String[] index, + QueryBuilder queryBuilder, + Integer searchType) { + long resultCount = 0l; + try { + + TransportClient client = getClient(clusterName); + boolean options = true; + boolean optionsf = false; + String aggrCount = "count"; + AggregationBuilder aggregation; + // searchType = 0 是 主贴, + if (searchType == 0) { + aggregation = AggregationBuilders.cardinality(aggrCount).field(ESConstant.DOC_ID); + } else { + aggregation = AggregationBuilders.cardinality(aggrCount).field(ESConstant.DATA_ID); + } + + SearchRequestBuilder requestBuilder = client.prepareSearch().setIndices(index) + // .setIndicesOptions(IndicesOptions.fromOptions(options, options, options, optionsf)) + .setQuery(queryBuilder) + .addAggregation(aggregation); + + System.out.println("totalHits : " + requestBuilder.get().getHits().totalHits); + + // ParsedCardinality parsedCardinality = (ParsedCardinality) searchResponse.getAggregations().asList().get(0); +// Aggregations aggregations = requestBuilder.get().getAggregations(); +// Cardinality cardinality = aggregations.get(count); +// System.out.println("cardinality : " + cardinality.getValue()); + + Aggregations aggregations = requestBuilder.get().getAggregations(); + Cardinality cardinality = aggregations.get(aggrCount); + System.out.println("1111 : " + aggregations.get(aggrCount)); + System.out.println("cardinality : " + cardinality.getValue()); + resultCount = cardinality.getValue(); + // 用户数据不用ID做聚合?? + if (searchType == 2) { + resultCount = requestBuilder.get().getHits().totalHits; + } + + // resultCount = requestBuilder.get().getHits().totalHits; + } catch (Exception e) { + e.printStackTrace(); + } + return resultCount; + } + + public static List query(String clusterName, String[] index, + final QueryBuilder queryBuilder, + String sortFlag, String orderFlag, + Integer size, Integer from, + Integer searchType) { + System.out.println("非高亮查询"); + TransportClient client = getClient(clusterName); + boolean options = true; + boolean optionsf = false; + String aggrCount = "count"; + + CollapseBuilder collapseBuilder = new CollapseBuilder(ESConstant.DATA_ID); + AggregationBuilder aggregationBuilder = AggregationBuilders.cardinality(aggrCount).field(ESConstant.DATA_ID); + + if (searchType == 0) { + collapseBuilder = new CollapseBuilder(ESConstant.DOC_ID); + aggregationBuilder = AggregationBuilders.cardinality(aggrCount).field(ESConstant.DOC_ID); + } + // from + size 的 分页 查询方式 + SearchRequestBuilder requestBuilder = client.prepareSearch().setIndices(index) + .setIndicesOptions(IndicesOptions.fromOptions(options, options, options, optionsf)) + .addSort(sortFlag, orderFlag.equals(ESConstant.ASC) ? SortOrder.ASC : SortOrder.DESC) + .setQuery(queryBuilder) + .setCollapse(collapseBuilder) + .addAggregation(aggregationBuilder) + .setSize(size) + .setFrom(from); + + System.out.println(requestBuilder); + + SearchResponse searchResponse = requestBuilder.execute().actionGet(); + List dataList = new ArrayList<>(); + if (searchResponse.getHits().totalHits > 0) { + SearchHit[] hits = searchResponse.getHits().getHits(); + for (int i = 0; i < hits.length; i++) { + JSONObject data = new JSONObject(); + data.putAll(hits[i].getSourceAsMap()); + data.put("subjectId", hits[i].getIndex() + .replace("cl_major_", "") + .replace("cl_subject_", "") + .replace("cl_special_1.0_", "")); + dataList.add(data); + } + } + + Cardinality cardinality = searchResponse.getAggregations().get(aggrCount); + //总数 + long value = cardinality.getValue(); + + System.out.println("去重总数:" + value); + System.out.println("不去重的总数:" + requestBuilder.get().getHits().totalHits); + + return dataList; + } + +// private long getCardinality( QueryBuilder queryBuilder,String indexName, +// Integer size, Integer from) { +// // 获取查询的索引列表String indexName = "sjck_personnel" +// ;// 获取查询的条件列表 +//// List> options = (List>) bindParams.get("conditions"); +//// // 1.构建查询请求 +// SearchRequest searchRequest = new SearchRequest(indexName); +//// // 4.构建最外面的 +//// boolQueryBoolQueryBuilder query = QueryBuilders.boolQuery(); +//// // 5.构建查询请求 +//// synQueryPersonnelIndexBuilder(query, options); +// //6.高亮 +// HighlightBuilder highlightBuilder = new HighlightBuilder(); +// // 所有查询出来的字段全部高亮 +// HighlightBuilder.Field highlightTitle = new HighlightBuilder.Field("*").requireFieldMatch(false); +// highlightTitle.highlighterType("unified"); +// highlightBuilder.field(highlightTitle); +// //从第几条开始 +// +// // 3.构建高亮 +// AggregationBuilder aggregation = AggregationBuilders.cardinality("total_size").field("concat_field"); +// SearchSourceBuilder sourceBuilder = new SearchSourceBuilder() +// .query(queryBuilder) +// .highlighter(highlightBuilder) +// .from(from) +// .size(size) +// .aggregation(aggregation); +// // 2.将查询构建器放入查询请求中 +// searchRequest.source(sourceBuilder); +// SearchResponse searchResponse = null; +// try { +// searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT); +// } catch (ElasticsearchStatusException e) { +// logger.error("请检查elasticsearchIndex是否存在{},错误信息{}", e, e.getMessage()); +// } catch (IOException e) { +// logger.error("搜索出错了{},错误信息{}", e, e.getMessage()); +// } +// assert searchResponse != null; +// ParsedCardinality parsedCardinality = (ParsedCardinality) searchResponse.getAggregations().asList().get(0); +// return parsedCardinality.getValue(); +// } + } diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/web/vo/params/QueryRequest.java b/cl_search_api/src/main/java/com/bfd/mf/common/web/vo/params/QueryRequest.java index 35e5bd1..1f6c2bf 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/common/web/vo/params/QueryRequest.java +++ b/cl_search_api/src/main/java/com/bfd/mf/common/web/vo/params/QueryRequest.java @@ -33,32 +33,32 @@ public class QueryRequest implements Serializable { private static final long serialVersionUID = 1L; // 必传字段 // private Long subjectId; - @ApiModelProperty(value = "subjectId" , required = true , notes = "专题ID") + @ApiModelProperty(value = "subjectId", required = true, notes = "专题ID") private String subjectId; // @ApiModelProperty(value = "taskId" , required = true , notes = "任务ID") // private String taskId; - @ApiModelProperty(value = "userId",required = true) + @ApiModelProperty(value = "userId", required = true) private Long userId; // 翻页字段 - @ApiModelProperty(value = "page",required = true) + @ApiModelProperty(value = "page", required = true) private Integer page; - @ApiModelProperty(value = "limit",required = true) + @ApiModelProperty(value = "limit", required = true) private Integer limit; //其他参数 - @ApiModelProperty(value = "pubTime",required = true) + @ApiModelProperty(value = "pubTime", required = true) private Long pubTime; - @ApiModelProperty(value = "docId",required = true) + @ApiModelProperty(value = "docId", required = true) private String docId; - @ApiModelProperty(value = "dataIds",required = true) + @ApiModelProperty(value = "dataIds", required = true) private String dataIds; - @ApiModelProperty(value = "dataId",required = true) + @ApiModelProperty(value = "dataId", required = true) private String dataId; - @ApiModelProperty(value = "docType",required = true) + @ApiModelProperty(value = "docType", required = true) private String docType; - @ApiModelProperty(value = "siteTypes",required = true) + @ApiModelProperty(value = "siteTypes", required = true) private String siteTypes; //站点类型 必传,多个,分隔, 全部传“” - @ApiModelProperty(value = "siteId",required = true) + @ApiModelProperty(value = "siteId", required = true) private String siteId; // 排序字段 private String order; // 排序方式 asc/desc @@ -66,7 +66,7 @@ public class QueryRequest implements Serializable { // 基础查询字段 private String searchArea; //0 美国1中国…… private String cid; // 站点名 - private String crawlDataFlag ; // 数据标识 + private String crawlDataFlag; // 数据标识 // 二次查询字段 private Integer searchType; // 二次查询 选项 0:主贴、1:评论、2:用户 // private String searchScope; // 二次查询 字段选项 0:标题、1:内容、2:用户 @@ -91,7 +91,7 @@ public class QueryRequest implements Serializable { private String valueLabel; private String categoryLabel; - private List tasks; + // private List tasks; private String originalIndex; private String currentIndex; @@ -99,9 +99,24 @@ public class QueryRequest implements Serializable { private List delTasks; private List taskIds; - private String pageType ; + private String pageType; private String userType; + private String ocrTest; + private String asrText; + + + + private List> tasks; + + public List> getTasks() { + return tasks; + } + + public void setTasks(List> tasks) { + this.tasks = tasks; + } + public String getUserType() { return userType; } @@ -143,6 +158,7 @@ public class QueryRequest implements Serializable { } private List highLevelQueries; + public List getHighLevelQueries() { return highLevelQueries; } @@ -152,7 +168,6 @@ public class QueryRequest implements Serializable { } - public String getOriginalIndex() { return originalIndex; } @@ -169,13 +184,13 @@ public class QueryRequest implements Serializable { this.currentIndex = currentIndex; } - public List getTasks() { - return tasks; - } - - public void setTasks(List tasks) { - this.tasks = tasks; - } +// public List getTasks() { +// return tasks; +// } +// +// public void setTasks(List tasks) { +// this.tasks = tasks; +// } public String getValueLabel() { return valueLabel; @@ -392,18 +407,18 @@ public class QueryRequest implements Serializable { this.crawlDataFlag = crawlDataFlag; } - public List getSearchScopeValue(String nums){ + public List getSearchScopeValue(String nums) { String numbers[] = nums.split(","); // 0:标题;1:正文;2:作者 多个用,分割,例 “0,1” List resultList = new ArrayList<>(); - for (String num:numbers) { - if(num .equals("0")){ + for (String num : numbers) { + if (num.equals("0")) { resultList.add("title"); } - if(num.equals("1")){ + if (num.equals("1")) { resultList.add("content"); } - if(num.equals("2")){ + if (num.equals("2")) { resultList.add("author"); } } diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/web/vo/view/monitor/ESMonitorBaseEntity.java b/cl_search_api/src/main/java/com/bfd/mf/common/web/vo/view/monitor/ESMonitorBaseEntity.java index 11c3004..156fd16 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/common/web/vo/view/monitor/ESMonitorBaseEntity.java +++ b/cl_search_api/src/main/java/com/bfd/mf/common/web/vo/view/monitor/ESMonitorBaseEntity.java @@ -790,16 +790,14 @@ public class ESMonitorBaseEntity implements Comparable, Ser } public String getSysSentimentTag() { - sysSentimentTag = "中性"; -// if(sysSentiment < 0.5){ -// sysSentimentTag = "负面"; -// } -// if(sysSentiment == 0.5){ -// sysSentimentTag = "中性"; -// } -// if(sysSentiment > 0.5){ -// sysSentimentTag = "正面"; -// } + sysSentimentTag = sysSentiment.toString(); + if(sysSentiment < 0.5){ + sysSentimentTag = "负面"; + }else if(sysSentiment == 0.5 || sysSentiment == 0.0){ + sysSentimentTag = "中性"; + }else if(sysSentiment > 0.5){ + sysSentimentTag = "正面"; + } return sysSentimentTag; } diff --git a/cl_search_api/src/main/java/com/bfd/mf/controller/SearchDataController.java b/cl_search_api/src/main/java/com/bfd/mf/controller/SearchDataController.java index a50257c..aca2586 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/controller/SearchDataController.java +++ b/cl_search_api/src/main/java/com/bfd/mf/controller/SearchDataController.java @@ -27,7 +27,7 @@ import org.springframework.web.bind.annotation.ResponseBody; @Controller @RequestMapping("/crawl") -@Api(value="数据查询的控制器") +@Api(value = "数据查询的控制器") public class SearchDataController { private static final Logger logger = LoggerFactory.getLogger(SearchDataController.class); @Autowired @@ -39,7 +39,7 @@ public class SearchDataController { * 查询数据列表 */ @ApiOperation(value = "查询数据列表") - @RequestMapping(value = "/subject/query", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8) + @RequestMapping(value = "/subject/query", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8) @ResponseBody public JSONObject queryDataList(@RequestBody QueryRequest queryRequest) { logger.info("[queryDataList] partial / Params: {}", JSONObject.toJSONString(queryRequest)); @@ -48,29 +48,30 @@ public class SearchDataController { long start = System.currentTimeMillis(); String scorllId = queryRequest.getScrollId(); String subjectId = queryRequest.getSubjectId(); - if(null != scorllId ){ + if (null != scorllId) { // 数据导出 result = searchDataService.exportDataFromFolder(queryRequest); return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result); - }else { + } else { // 数据查询 result = searchDataService.queryDataFromFolder(queryRequest); } Integer allDocNumber = result.getIntValue(ESConstant.ALLDOCNUMBER); Integer limit = queryRequest.getLimit(); int page = 1; - if(allDocNumber%limit == 0){ - page = allDocNumber/limit; - }else{ - page = allDocNumber/limit + 1; + if (allDocNumber % limit == 0) { + page = allDocNumber / limit; + } else { + page = allDocNumber / limit + 1; } - if(null != queryRequest.getPage() && !queryRequest.getPage().equals("")) { + if (null != queryRequest.getPage() && !queryRequest.getPage().equals("")) { if (page > 0 && queryRequest.getPage() > page) { //return ResponseWrapper.buildResponse(RTCodeEnum.C_SUBJECT_GRAMMAR_ERROR, "总数和分页不匹配"); - return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result); } + return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result); + } } long end = System.currentTimeMillis(); - logger.info("接口查询时长:statr:"+ start +" ; end:"+end + " ; time = " + (end - start) + " ; count = "+result.get(ESConstant.ALLDOCNUMBER)); + logger.info("接口查询时长:statr:" + start + " ; end:" + end + " ; time = " + (end - start) + " ; count = " + result.get(ESConstant.ALLDOCNUMBER)); return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result); } catch (Exception e) { logger.error("[SearchDataController] queryDataList Failed,The error message is :{}", e); @@ -83,13 +84,13 @@ public class SearchDataController { * 根据ID 查询 一条数据详情 */ @ResponseBody - @RequestMapping(value="/subject/getInfoByDocId",method=RequestMethod.GET) + @RequestMapping(value = "/subject/getInfoByDocId", method = RequestMethod.GET) @ApiOperation(value = "查询单条数据") @ApiImplicitParams({ - @ApiImplicitParam(paramType="query", name = "subjectId", value = "专题ID", required = true, dataType = "String"), - @ApiImplicitParam(paramType="query", name = "docId", value = "主贴唯一ID", required = true, dataType = "String"), - @ApiImplicitParam(paramType="query", name = "siteId", value = "站点ID", required = true, dataType = "String"),}) - public JSONObject getInfo(String subjectId,String docId,String siteId) { + @ApiImplicitParam(paramType = "query", name = "subjectId", value = "专题ID", required = true, dataType = "String"), + @ApiImplicitParam(paramType = "query", name = "docId", value = "主贴唯一ID", required = true, dataType = "String"), + @ApiImplicitParam(paramType = "query", name = "siteId", value = "站点ID", required = true, dataType = "String"),}) + public JSONObject getInfo(String subjectId, String docId, String siteId) { QueryRequest queryRequest = new QueryRequest(); queryRequest.setSubjectId(subjectId); queryRequest.setDocId(docId); @@ -134,7 +135,7 @@ public class SearchDataController { */ @ResponseBody @ApiOperation(value = "查询评论列表") - @RequestMapping(value = "/getCommentsByDocId", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8) + @RequestMapping(value = "/getCommentsByDocId", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8) public JSONObject getCommentsByDocId(@RequestBody QueryRequest queryRequest) { logger.info("[getCommentsByDocId] partial / Params: {}", JSONObject.toJSONString(queryRequest)); try { @@ -152,12 +153,13 @@ public class SearchDataController { * https://caiji.percent.cn/api/sq/crawl/getCommentsByDocId * https://caiji.percent.cn/api/sq/crawl/getQuotesByDocId * https://caiji.percent.cn/api/sq/crawl/getAttitudesByDocId + * * @param queryRequest * @return */ @ResponseBody @ApiOperation(value = "查询转发列表") - @RequestMapping(value = "/getQuotesByDocId", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8) + @RequestMapping(value = "/getQuotesByDocId", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8) public JSONObject getQuotesByDocId(@RequestBody QueryRequest queryRequest) { logger.info("[getQuotesByDocId] partial / Params: {}", JSONObject.toJSONString(queryRequest)); try { @@ -171,9 +173,10 @@ public class SearchDataController { } } + @ResponseBody @ApiOperation(value = "查询点赞列表") - @RequestMapping(value = "/getAttitudesByDocId", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8) + @RequestMapping(value = "/getAttitudesByDocId", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8) public JSONObject getAttitudesByDocId(@RequestBody QueryRequest queryRequest) { logger.info("[getAttitudesByDocId] partial / Params: {}", JSONObject.toJSONString(queryRequest)); try { @@ -191,19 +194,20 @@ public class SearchDataController { /** * 查询 数据的Counts 用户左侧的显示 + * * @param queryRequest * @return */ @ApiOperation(value = "查询数据列表") - @RequestMapping(value = "/subject/queryCounts", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8) + @RequestMapping(value = "/subject/queryCounts", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8) @ResponseBody public JSONObject queryDataCounts(@RequestBody QueryRequest queryRequest) { logger.info("[queryDataCounts] partial / Params: {}", JSONObject.toJSONString(queryRequest)); try { JSONObject result = new JSONObject(); - if(null != queryRequest.getSubjectId()) { + if (null != queryRequest.getSubjectId()) { result = searchDataService.queryDataCountsInOneIndex(queryRequest); - }else{ + } else { return ResponseWrapper.buildResponse(RTCodeEnum.C_SERVICE_NOT_AVAILABLE, "Query failed"); } return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result); @@ -216,12 +220,13 @@ public class SearchDataController { /** - * 崔老师版本使修改标签调用的接口,其他版本不调用该接口 + * 崔老师版本使修改标签调用的接口,其他版本不调用该接口 + * * @param queryRequest * @return */ @ApiOperation(value = "修改标签") - @RequestMapping(value = "/update/updateByDocId", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8) + @RequestMapping(value = "/update/updateByDocId", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8) @ResponseBody public JSONObject updateLabel(@RequestBody QueryRequest queryRequest) { logger.info("[updateLabel] partial / Params: {}", JSONObject.toJSONString(queryRequest)); @@ -235,18 +240,16 @@ public class SearchDataController { } - - /** * 删除专题的接口 */ @ApiOperation(value = "删除专题") - @RequestMapping(value = "/delete/deleteBySubjectId", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8) + @RequestMapping(value = "/delete/deleteBySubjectId", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8) @ResponseBody - public JSONObject deleteSubject(@RequestBody QueryRequest queryRequest){ + public JSONObject deleteSubject(@RequestBody QueryRequest queryRequest) { logger.info("[deleteSubject] partial / Params: {}", JSONObject.toJSONString(queryRequest)); try { - JSONObject result = searchDataService.deleteBySubjectId(queryRequest); + JSONObject result = searchDataService.deleteBySubjectId(queryRequest); return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result); } catch (Exception e) { logger.error("[deleteSubject] Failed,The error message is :{}", e); @@ -255,12 +258,12 @@ public class SearchDataController { } @ApiOperation(value = "根据 cid 删除指定专题下的数据") - @RequestMapping(value = "/delete/deleteByCid", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8) + @RequestMapping(value = "/delete/deleteByCid", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8) @ResponseBody - public JSONObject deleteSubjectByCid(@RequestBody QueryRequest queryRequest){ + public JSONObject deleteSubjectByCid(@RequestBody QueryRequest queryRequest) { logger.info("[deleteSubject] partial / Params: {}", JSONObject.toJSONString(queryRequest)); try { - JSONObject result = searchDataService.deleteBySubjectIdByCid(queryRequest); + JSONObject result = searchDataService.deleteBySubjectIdByCid(queryRequest); return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result); } catch (Exception e) { logger.error("[deleteSubject] Failed,The error message is :{}", e); @@ -269,12 +272,12 @@ public class SearchDataController { } @ApiOperation(value = "根据 crawlDataFlag 删除指定专题下的问题") - @RequestMapping(value = "/delete/deleteByCrawlDataFlag", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8) + @RequestMapping(value = "/delete/deleteByCrawlDataFlag", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8) @ResponseBody - public JSONObject deleteSubjectByCrawlDataFlag(@RequestBody QueryRequest queryRequest){ + public JSONObject deleteSubjectByCrawlDataFlag(@RequestBody QueryRequest queryRequest) { logger.info("[deleteSubject] partial / Params: {}", JSONObject.toJSONString(queryRequest)); try { - JSONObject result = searchDataService.deleteBySubjectIdByCrawlDataFlag(queryRequest); + JSONObject result = searchDataService.deleteBySubjectIdByCrawlDataFlag(queryRequest); return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result); } catch (Exception e) { logger.error("[deleteSubject] Failed,The error message is :{}", e); @@ -285,11 +288,12 @@ public class SearchDataController { /** * 获取json 结构数据 + * * @param queryRequest * @return */ @ApiOperation(value = "查询数据列表") - @RequestMapping(value = "/subject/getJson", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8) + @RequestMapping(value = "/subject/getJson", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8) @ResponseBody public JSONObject queryDataJsonList(@RequestBody QueryRequest queryRequest) { logger.info("[queryDataList] partial / Params: {}", JSONObject.toJSONString(queryRequest)); @@ -298,25 +302,25 @@ public class SearchDataController { long start = System.currentTimeMillis(); String scorllId = queryRequest.getScrollId(); - // 专题数据导出 - result = searchDataService.exportJsonDataInSubject(queryRequest); + // 专题数据导出 + result = searchDataService.exportJsonDataInSubject(queryRequest); Integer allDocNumber = result.getIntValue(ESConstant.ALLDOCNUMBER); Integer limit = queryRequest.getLimit(); Integer page = 1; - if(allDocNumber%limit==0){ - page = allDocNumber/limit; - }else{ - page = allDocNumber/limit +1; + if (allDocNumber % limit == 0) { + page = allDocNumber / limit; + } else { + page = allDocNumber / limit + 1; } - if(null != queryRequest.getPage() && !queryRequest.getPage().equals("")) { + if (null != queryRequest.getPage() && !queryRequest.getPage().equals("")) { if (page > 0 && queryRequest.getPage() > page) { return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result); - // return ResponseWrapper.buildResponse(RTCodeEnum.C_SUBJECT_GRAMMAR_ERROR, "总数和分页不匹配"); + // return ResponseWrapper.buildResponse(RTCodeEnum.C_SUBJECT_GRAMMAR_ERROR, "总数和分页不匹配"); } } long end = System.currentTimeMillis(); - logger.info("接口查询时长:statr:"+ start +" ; end:"+end + " ; time = " + (end - start) + " ; count = "+result.get(ESConstant.ALLDOCNUMBER)); + logger.info("接口查询时长:statr:" + start + " ; end:" + end + " ; time = " + (end - start) + " ; count = " + result.get(ESConstant.ALLDOCNUMBER)); return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result); } catch (Exception e) { logger.error("[queryData] Failed,The error message is :{}", e); @@ -329,18 +333,21 @@ public class SearchDataController { * 示例文件夹的专题复制 */ @ApiOperation(value = "复制专题") - @RequestMapping(value = "/reindex/reindexSubject", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8) + @RequestMapping(value = "/reindex/reindexSubject", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8) @ResponseBody - public JSONObject reIndex(@RequestBody QueryRequest queryRequest){ + public JSONObject reIndex(@RequestBody QueryRequest queryRequest) { logger.info("[reIndex] partial / Params: {}", JSONObject.toJSONString(queryRequest)); try { - searchDataService.reIndexSubject(queryRequest); + //searchDataService.reIndexSubject(queryRequest); + + searchDataService.reIndexFolder(queryRequest); } catch (Exception e) { logger.error("[reIndex] Failed,The error message is :{}", e); return ResponseWrapper.buildResponse(RTCodeEnum.C_SERVICE_NOT_AVAILABLE, "Query failed"); } return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, new JSONObject()); } + /** * 2023-04-14 采集平台2.0新增接口 * 移动任务的数据,并将原索引中的数据删除 @@ -348,11 +355,12 @@ public class SearchDataController { * crawl/subject/moveByTasks * 参数 * {"originalIndex":"302088","moveTasks":["13889"],"currentIndex":"309980"} + * * @param queryRequest * @return */ @ApiOperation(value = "查询数据列表") - @RequestMapping(value = "/subject/moveByTasks", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8) + @RequestMapping(value = "/subject/moveByTasks", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8) @ResponseBody public JSONObject reindexDataByTasks(@RequestBody QueryRequest queryRequest) { logger.info("[queryDataList] partial / Params: {}", JSONObject.toJSONString(queryRequest)); @@ -362,7 +370,7 @@ public class SearchDataController { // 任务数据移动,这个需要离线移动 result = searchDataService.reindexByTasks(queryRequest); long end = System.currentTimeMillis(); - logger.info("接口查询时长:statr:"+ start +" ; end:"+end + " ; time = " + (end - start)); + logger.info("接口查询时长:statr:" + start + " ; end:" + end + " ; time = " + (end - start)); return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result); } catch (Exception e) { logger.error("[queryData] Failed,The error message is :{}", e); @@ -378,16 +386,17 @@ public class SearchDataController { * crawl/subject/deleteByTasks * 参数 * {"index":"302088","delTasks":["13889"]} + * * @param queryRequest * @return */ @ApiOperation(value = "查询数据列表") - @RequestMapping(value = "/subject/deleteByTasks", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8) + @RequestMapping(value = "/subject/deleteByTasks", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8) @ResponseBody public JSONObject delDataByTasks(@RequestBody QueryRequest queryRequest) { logger.info("[delDataByTasks] partial / Params: {}", JSONObject.toJSONString(queryRequest)); try { - JSONObject result = searchDataService.deleteByTasks(queryRequest); + JSONObject result = searchDataService.deleteByTasks(queryRequest); return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result); } catch (Exception e) { logger.error("[deleteSubject] Failed,The error message is :{}", e); diff --git a/cl_search_api/src/main/java/com/bfd/mf/service/SearchDataService.java b/cl_search_api/src/main/java/com/bfd/mf/service/SearchDataService.java index 1f90dc2..1063066 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/service/SearchDataService.java +++ b/cl_search_api/src/main/java/com/bfd/mf/service/SearchDataService.java @@ -8,6 +8,7 @@ import com.bfd.mf.common.service.es.EsQueryServiceForSQNormal; import com.bfd.mf.common.service.es.SubjectQueryDataService; import com.bfd.mf.common.util.ESServerUtils; import com.bfd.mf.common.util.constants.ESConstant; +import com.bfd.mf.common.util.es.EsUtils; import com.bfd.mf.common.web.entity.mysql.SentimentModify; import com.bfd.mf.common.web.entity.mysql.cache.Cluster; import com.bfd.mf.common.web.repository.mysql.SentimentRepository; @@ -22,7 +23,10 @@ import org.elasticsearch.index.query.BoolQueryBuilder; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.search.SearchHit; +import org.elasticsearch.search.collapse.CollapseBuilder; import org.elasticsearch.search.sort.SortOrder; +import org.jsoup.Jsoup; +import org.jsoup.safety.Whitelist; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; @@ -209,7 +213,7 @@ public class SearchDataService extends CrudService> siteMap) throws Exception { + Map> siteMap) { ESMonitorEntity esMonitorEntity = new ESMonitorEntity(); try { Map sourceAsMap = jsonObject; @@ -351,15 +355,31 @@ public class SearchDataService extends CrudService ocrText = new ArrayList<>(); if (sourceAsMap.containsKey(ESConstant.ASRTEXT)) { asrText = sourceAsMap.get(ESConstant.ASRTEXT).toString(); } if (sourceAsMap.containsKey(ESConstant.OCRTEXT)) { - ocrText = (List) sourceAsMap.get(ESConstant.OCRTEXT); + if (sourceAsMap.get(ESConstant.OCRTEXT).toString().contains("[]")) { + + } else { + ocrText.add(sourceAsMap.get(ESConstant.OCRTEXT).toString()); + } } // 如果是用户数据,需要获取下面四个字段值 String fansCount = ""; String friendsCount = ""; String postCount = ""; String location = ""; - if (searchType == 2) { - if (sourceAsMap.containsKey(ESConstant.FANS_COUNT)) { - fansCount = sourceAsMap.get(ESConstant.FANS_COUNT).toString(); - } - if (sourceAsMap.containsKey(ESConstant.FRIENDS_COUNT)) { - friendsCount = sourceAsMap.get(ESConstant.FRIENDS_COUNT).toString(); - } - if (sourceAsMap.containsKey(ESConstant.POST_COUNT)) { - postCount = sourceAsMap.get(ESConstant.POST_COUNT).toString(); - } - if (sourceAsMap.containsKey(ESConstant.WEIBO_LOCATION)) { - location = sourceAsMap.get(ESConstant.WEIBO_LOCATION).toString(); - } +// if (searchType == 2) { +// if (sourceAsMap.containsKey(ESConstant.FANS_COUNT)) { +// fansCount = sourceAsMap.get(ESConstant.FANS_COUNT).toString(); +// } +// if (sourceAsMap.containsKey(ESConstant.FRIENDS_COUNT)) { +// friendsCount = sourceAsMap.get(ESConstant.FRIENDS_COUNT).toString(); +// } +// if (sourceAsMap.containsKey(ESConstant.POST_COUNT)) { +// postCount = sourceAsMap.get(ESConstant.POST_COUNT).toString(); +// } +// if (sourceAsMap.containsKey(ESConstant.WEIBO_LOCATION)) { +// location = sourceAsMap.get(ESConstant.WEIBO_LOCATION).toString(); +// } +// } + + if (sourceAsMap.containsKey("location")) { + location = sourceAsMap.get("location").toString(); } // 这个项目新增的三个字段 @@ -546,8 +574,19 @@ public class SearchDataService extends CrudService")){ +// Document doc = Jsoup.parse(forwardContent); + //String text = Jsoup.clean(forwardContent, Whitelist.basicWithImages()); + String text = Jsoup.clean(forwardContent, Whitelist.basic()); + + forContent = text; + // } + esMonitorEntity.setForwardContent(forContent); esMonitorEntity.setReadCount(readCount); esMonitorEntity.setHasFile(hasFile); esMonitorEntity.setHasVideo(hasVideo); @@ -572,6 +611,7 @@ public class SearchDataService extends CrudService 0) { size = 1L; comments.add(TopComment); + String commentsCount = TopComment.getString(ESConstant.COMMENTS_COUNT); + if (commentsCount.equals("-1")) { + TopComment.put(ESConstant.COMMENTS_COUNT, "-"); + } + String quoteCount = TopComment.getString(ESConstant.QUOTE_COUNT); + if (quoteCount.equals("-1")) { + TopComment.put(ESConstant.QUOTE_COUNT, "-"); + } + String attitudesCount = TopComment.getString(ESConstant.ATTITUDES_COUNT); + if (attitudesCount.equals("-1")) { + TopComment.put(ESConstant.ATTITUDES_COUNT, "-"); + } } boolQueryBuilder.mustNot(QueryBuilders.termQuery(ESConstant.DATA_ID, dataId)); } @@ -756,10 +809,12 @@ public class SearchDataService extends CrudService result = response[i].getSourceAsMap(); jsonObject.putAll(result); jsonObject.put(ESConstant.SITEID, siteId); + String commentsCount = jsonObject.getString(ESConstant.COMMENTS_COUNT); + if (commentsCount.equals("-1")) { + jsonObject.put(ESConstant.COMMENTS_COUNT, "-"); + } + String quoteCount = jsonObject.getString(ESConstant.QUOTE_COUNT); + if (quoteCount.equals("-1")) { + jsonObject.put(ESConstant.QUOTE_COUNT, "-"); + } + String attitudesCount = jsonObject.getString(ESConstant.ATTITUDES_COUNT); + if (attitudesCount.equals("-1")) { + jsonObject.put(ESConstant.ATTITUDES_COUNT, "-"); + } comments.add(jsonObject); } + /** + * TODO + * 评论回来的总数不对 + */ json.put(ESConstant.COMMENTLISTS, comments); size = size + searchResponse.getHits().getTotalHits(); + String[] currentList = {currentIndexList.get(0)}; + + Long totalCount = EsUtils.queryTotalCountNew("CL_Mini_2", currentList, boolQueryBuilder, 1); + System.out.println(size); + System.out.println(totalCount); json.put(ESConstant.ALLDOCNUMBER, size); } catch (Exception e) { e.printStackTrace(); @@ -938,7 +1018,7 @@ public class SearchDataService extends CrudService siteOtherMap = siteMap.get(enSource); - if(null != siteOtherMap && siteOtherMap.size()> 0){ + if (null != siteOtherMap && siteOtherMap.size() > 0) { if (siteOtherMap.containsKey("site_id")) { siteId = siteMap.get(enSource).get("site_id").toString(); } @@ -1043,12 +1123,17 @@ public class SearchDataService extends CrudService4.0.0 com.bfd.mf - cl_stream_3.2 - 3.2-SNAPSHOT + cl_stream_3.3 + 3.3-SNAPSHOT pom