Browse Source

按大佬的要求,改了一版查询,即:输入关键词中间有空格就是 并且的关系。两个词都有才会被查出来,同时查询结果加了高亮,但是高亮显示的不对,我先注释掉

release-1.0
jing.du 2 years ago
parent
commit
2a9cf34142
  1. 33
      cl_query_data_job/src/main/java/com/bfd/mf/job/config/AppConfig.java
  2. 20
      cl_query_data_job/src/main/java/com/bfd/mf/job/service/query/HighFrequencyQuery.java
  3. 44
      cl_query_data_job/src/main/java/com/bfd/mf/job/worker/HighFrequencyQueryProducer.java
  4. 4
      cl_query_data_job/src/main/resources/application.yml
  5. 2
      cl_search_api/src/main/java/com/bfd/mf/common/service/es/EsQueryServiceForSQMini.java
  6. 40
      cl_search_api/src/main/java/com/bfd/mf/common/service/es/GetQueryBuilder.java
  7. 84
      cl_search_api/src/main/java/com/bfd/mf/common/util/constants/ESConstant.java
  8. 25
      cl_search_api/src/main/java/com/bfd/mf/common/util/es/EsUtils.java
  9. 5
      cl_search_api/src/main/java/com/bfd/mf/common/util/slice/SliceScrollUtil.java
  10. 1
      cl_search_api/src/main/java/com/bfd/mf/common/web/vo/view/monitor/ESMonitorBaseEntity.java
  11. 2
      cl_search_api/src/main/java/com/bfd/mf/controller/SearchAuthorController.java
  12. 27
      cl_search_api/src/main/java/com/bfd/mf/controller/SearchDataController.java
  13. 2
      cl_search_api/src/main/java/com/bfd/mf/service/SearchAuthorService.java
  14. 306
      cl_search_api/src/main/java/com/bfd/mf/service/SearchDataService.java
  15. 32
      cl_search_api/src/main/resources/application.yml

33
cl_query_data_job/src/main/java/com/bfd/mf/job/config/AppConfig.java

@ -42,20 +42,33 @@ public class AppConfig {
private String sendTopic;
private List<String> analysisTopic;
private String analysisGroup;
/**
* 各个服务开关配置
*/
private Boolean enableStatisticsProducer; // 离线查询统计服务的状态
private Boolean enableQueryProducer; // 离线查询数据服务的状态
/**
* 高频离线拉数
* enable-high-frequency-producer
*/
private Boolean enableHighFrequencyProducer;
private Boolean enableBacktraceProducer; // 离线拉取数据服务的状态欧莱雅
private Boolean enableUpLoadProducer;
private Boolean enableOutputProducer;
private Boolean enableTaskcountProducer;
private Boolean enableAlarmProducer;
private Integer statisticsProducerThreadCount; // 离线查询统计服务的线程数
/**
* 各个服务线程数配置
*/
private Integer statisticsProducerThreadCount;
private Integer queryProducerThreadCount;
private Integer highFrequencyProducerThreadCount;
private Integer backtraceProducerThreadCount;
private Integer upLoadProducerThreadCount;
private Integer outputProducerThreadCount;
private Integer taskcountProducerThreadCount;
private Integer alarmProducerThreadCount;
private String goFastPostUrl;
private String goFastDomain;
private String uploadOLYExcelPath;
@ -448,6 +461,21 @@ public class AppConfig {
}
public Boolean getEnableHighFrequencyProducer() {
return enableHighFrequencyProducer;
}
public void setEnableHighFrequencyProducer(Boolean enableHighFrequencyProducer) {
this.enableHighFrequencyProducer = enableHighFrequencyProducer;
}
public Integer getHighFrequencyProducerThreadCount() {
return highFrequencyProducerThreadCount;
}
public void setHighFrequencyProducerThreadCount(Integer highFrequencyProducerThreadCount) {
this.highFrequencyProducerThreadCount = highFrequencyProducerThreadCount;
}
public void verify() {
@ -463,6 +491,9 @@ public class AppConfig {
if(enableQueryProducer){ // 离线拉数
Assert.isTrue(queryProducerThreadCount > 0, "Config queryProducerThreadCount must gt 0");
}
if(enableHighFrequencyProducer){ // 离线拉数
Assert.isTrue(highFrequencyProducerThreadCount > 0, "Config highFrequencyProducerThreadCount must gt 0");
}
if(enableBacktraceProducer){ // 欧莱雅离线拉数
Assert.isTrue(backtraceProducerThreadCount > 0, "Config backtraceProducerThreadCount must gt 0");
}

20
cl_query_data_job/src/main/java/com/bfd/mf/job/service/query/HighFrequencyQuery.java

@ -0,0 +1,20 @@
package com.bfd.mf.job.service.query;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Service;
/**
* @Author dujing
* @Date 2023/3/1 14:31
*/
@Service
public class HighFrequencyQuery {
private static final Logger LOGGER = LoggerFactory.getLogger(HighFrequencyQuery.class);
public void tryAcquire() {
}
public void produce() {
}
}

44
cl_query_data_job/src/main/java/com/bfd/mf/job/worker/HighFrequencyQueryProducer.java

@ -0,0 +1,44 @@
package com.bfd.mf.job.worker;
import com.bfd.mf.job.config.AppConfig;
import com.bfd.mf.job.service.query.HighFrequencyQuery;
import com.bfd.mf.job.service.query.QueryService;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
@Component
public class HighFrequencyQueryProducer extends AbstractWorker {
private static final Logger LOGGER = LoggerFactory.getLogger(HighFrequencyQueryProducer.class);
@Autowired
private AppConfig config;
@Autowired
private HighFrequencyQuery highFrequencyQuery;
@Override
protected Integer getThreadCount() {
return config.getQueryProducerThreadCount();
}
@Override
protected String getThreadNameFormat() {
return "backtrace-producer-%d";
}
/**
* 这个 是用来 做数据拉取的专门针对专题数据的拉取
*/
@Override
protected void work(String json) {
LOGGER.info("[HighFrequencyQueryProducer] work start ... ");
highFrequencyQuery.tryAcquire();
highFrequencyQuery.produce();
try {
Thread.sleep(60000);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}

4
cl_query_data_job/src/main/resources/application.yml

@ -38,7 +38,8 @@ worker:
enable-analysis-producer: false # 查ES写kafka
enable-analysis-consumer: false # 读kafka写ES
enable-statistics-producer: false # 统计 taskCount 和 subjectCount (采集平台)
enable-query-producer: true # 离线拉数(采集平台)
enable-query-producer: false # 离线拉数(采集平台)
enable-high-frequency-producer: true # 高频离线拉数(采集平台)
enable-backtrace-producer: false # 欧莱雅查数(采集平台,欧莱雅项目独用)
enable-rw-oly-producer: false # 欧莱雅数据导出,暂时不用
enable-up-load-producer: false # 上传(采集平台)
@ -48,6 +49,7 @@ worker:
## 启动服务的线程数
statistics-producer-thread-count: 1
query-producer-thread-count: 10
high-frequency-producer-thread-count: 10
backtrace-producer-thread-count: 1
rw-oly-producer-thread-count: 1
up-load-producer-thread-count: 1

2
cl_search_api/src/main/java/com/bfd/mf/common/service/es/EsQueryServiceForSQMini.java

@ -51,10 +51,12 @@ public class EsQueryServiceForSQMini {
// 主贴评论还是用户
Integer searchType = queryRequest.getSearchType();
List<JSONObject> result = EsUtils.query(clusterName, indexName, boolQueryBuilder, sortFlag, orderFlag, limit, start, searchType);
//System.out.println(result);
return result;
}catch (Exception e){
return new ArrayList<>();
}
}
/**

40
cl_search_api/src/main/java/com/bfd/mf/common/service/es/GetQueryBuilder.java

@ -50,12 +50,44 @@ public class GetQueryBuilder {
boolQueryBuilder.filter(searchTextBuilder);
String keyword = queryRequest.getKeyword();
/**
* 2023-03-06
* 添加一个查询功能用空格分割查询 并且的关系
*/
// System.out.println("**** " + keyword);
String[] keywords;
try {
if (TStringUtils.isNotEmpty(keyword)) {
if (keyword.contains(" ")) {
keywords = keyword.split(" ");
} else {
keywords = new String[]{keyword};
}
if (searchType == 0) { // 主贴的话 标题和内容
MatchPhraseQueryBuilder titleQuery = QueryBuilders.matchPhraseQuery(ESConstant.TITLE, keyword).slop(0);
MatchPhraseQueryBuilder contentQuery = QueryBuilders.matchPhraseQuery(ESConstant.CONTENT, keyword).slop(0);
// System.out.println(keywords[0] + " == " + keywords[1]);
// if (keywords.length == 1) {
// MatchPhraseQueryBuilder titleQuery = QueryBuilders.matchPhraseQuery(ESConstant.TITLE, keyword).slop(0);
// MatchPhraseQueryBuilder contentQuery = QueryBuilders.matchPhraseQuery(ESConstant.CONTENT, keyword).slop(0);
// QueryBuilder queryBuilder = QueryBuilders.boolQuery().should(titleQuery).should(contentQuery);
// qb.must(queryBuilder);
// } else {
// MatchPhraseQueryBuilder titleQuery1 = QueryBuilders.matchPhraseQuery(ESConstant.TITLE, keywords[0]).slop(0);
// MatchPhraseQueryBuilder contentQuery1 = QueryBuilders.matchPhraseQuery(ESConstant.CONTENT, keywords[0]).slop(0);
// QueryBuilder queryBuilder1 = QueryBuilders.boolQuery().should(titleQuery1).should(contentQuery1);
//
// MatchPhraseQueryBuilder titleQuery2 = QueryBuilders.matchPhraseQuery(ESConstant.TITLE, keywords[1]).slop(0);
// MatchPhraseQueryBuilder contentQuery2 = QueryBuilders.matchPhraseQuery(ESConstant.CONTENT, keywords[1]).slop(0);
// QueryBuilder queryBuilder2 = QueryBuilders.boolQuery().should(titleQuery2).should(contentQuery2);
//
// qb.must(queryBuilder1).must(queryBuilder2);
// }
for (int i = 0; i < keywords.length; i++) {
MatchPhraseQueryBuilder titleQuery = QueryBuilders.matchPhraseQuery(ESConstant.TITLE, keywords[i]).slop(0);
MatchPhraseQueryBuilder contentQuery = QueryBuilders.matchPhraseQuery(ESConstant.CONTENT, keywords[i]).slop(0);
QueryBuilder queryBuilder = QueryBuilders.boolQuery().should(titleQuery).should(contentQuery);
qb.must(queryBuilder);
}
} else if (searchType == 1) { // 评论的话 评论内容
// MatchPhraseQueryBuilder contentQuery = QueryBuilders.matchPhraseQuery(ESConstant.CONTENT, keyword).slop(0);
// QueryBuilder queryBuilder = QueryBuilders.boolQuery().must(contentQuery);
@ -65,6 +97,9 @@ public class GetQueryBuilder {
boolQueryBuilder.must(QueryBuilders.queryStringQuery("*" + keyword + "*").field(ESConstant.AUTHOR));
}
}
} catch (Exception e) {
e.printStackTrace();
}
qb.must(boolQueryBuilder);
return qb;
}
@ -168,7 +203,6 @@ public class GetQueryBuilder {
}
/**
* Map Value 正序排序
*/

84
cl_search_api/src/main/java/com/bfd/mf/common/util/constants/ESConstant.java

@ -399,6 +399,8 @@ public class ESConstant {
public static String CHANNEL = "channel";
public static final String CONTENT = "content";
public static final String SYS_SENTIMENT = "sysSentiment";
public static final String HIGHLIGHT = "highlight";
// public static String POST_SOURCE = "postSource";
public static String TRANSLATETITLE = "translateTitle";
@ -660,47 +662,47 @@ public class ESConstant {
public static final Map<String, String> SORT_FLAG_MAPPING_MYSQL_SORT = new HashMap<>();
public static final String[] FIELD_LIST_MONITOR_NORMAL = EsQueryConditionUtils.getIncludeQueryField(ESConstant.CONTENT_SIMHASH
, ESConstant.TITLE_SIMHASH
, ESConstant.TITLE
, ESConstant.AUTHOR
, ESConstant.AUTHORID
, ESConstant.CONTENT
, ESConstant.SOURCE
, ESConstant.PUBTIME
, ESConstant.PUBTIMESTR
, ESConstant.DOC_TYPE
, ESConstant.CHANNEL
, ESConstant.URL
, ESConstant.SYS_SENTIMENT
, ESConstant.DOC_ID
, ESConstant.DATA_ID
, ESConstant.COMMENT_ID
, ESConstant.SYS_ABSTRACT
, ESConstant.COMMENT_SCORE
, ESConstant.ATTITUDES_COUNT
, ESConstant.COMMENTS_COUNT
, ESConstant.QUOTE_COUNT
, ESConstant.CRAWLTIME
, ESConstant.VIDEOURL
, ESConstant.ISDOWNLOAD
, ESConstant.FILEPATH
, ESConstant.IMAGEPATH
, ESConstant.VIDEOPATH
, ESConstant.CRAWLDATAFLAG
, ESConstant.TRANSLATETITLE
, ESConstant.TRANSLATECONTENT);
// ID 查询字段
public static final String[] FIELD_ID_LIST = {
ESConstant.DATA_ID,
ESConstant.DOC_ID,
ESConstant.PUBTIMESTR,
// ESConstant.ISDOWNLOAD,
// ESConstant.FILEPATH,
// ESConstant.IMAGEPATH,
// ESConstant.VIDEOPATH
};
// public static final String[] FIELD_LIST_MONITOR_NORMAL = EsQueryConditionUtils.getIncludeQueryField(ESConstant.CONTENT_SIMHASH
// , ESConstant.TITLE_SIMHASH
// , ESConstant.TITLE
// , ESConstant.AUTHOR
// , ESConstant.AUTHORID
// , ESConstant.CONTENT
// , ESConstant.SOURCE
// , ESConstant.PUBTIME
// , ESConstant.PUBTIMESTR
// , ESConstant.DOC_TYPE
// , ESConstant.CHANNEL
// , ESConstant.URL
// , ESConstant.SYS_SENTIMENT
// , ESConstant.DOC_ID
// , ESConstant.DATA_ID
// , ESConstant.COMMENT_ID
// , ESConstant.SYS_ABSTRACT
// , ESConstant.COMMENT_SCORE
// , ESConstant.ATTITUDES_COUNT
// , ESConstant.COMMENTS_COUNT
// , ESConstant.QUOTE_COUNT
// , ESConstant.CRAWLTIME
// , ESConstant.VIDEOURL
// , ESConstant.ISDOWNLOAD
// , ESConstant.FILEPATH
// , ESConstant.IMAGEPATH
// , ESConstant.VIDEOPATH
// , ESConstant.CRAWLDATAFLAG
// , ESConstant.TRANSLATETITLE
// , ESConstant.TRANSLATECONTENT);
//
// // ID 查询字段
// public static final String[] FIELD_ID_LIST = {
// ESConstant.DATA_ID,
// ESConstant.DOC_ID,
// ESConstant.PUBTIMESTR,
//// ESConstant.ISDOWNLOAD,
//// ESConstant.FILEPATH,
//// ESConstant.IMAGEPATH,
//// ESConstant.VIDEOPATH
// };
// 一条 数据 详情要查的字段封装
public static final String[] FIELD_DATA = EsQueryConditionUtils.getIncludeQueryField(

25
cl_search_api/src/main/java/com/bfd/mf/common/util/es/EsUtils.java

@ -34,6 +34,8 @@ import org.elasticsearch.search.aggregations.Aggregations;
import org.elasticsearch.search.aggregations.bucket.terms.Terms;
import org.elasticsearch.search.aggregations.metrics.cardinality.Cardinality;
import org.elasticsearch.search.collapse.CollapseBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightField;
import org.elasticsearch.search.sort.SortOrder;
import org.elasticsearch.transport.client.PreBuiltTransportClient;
import org.slf4j.Logger;
@ -92,6 +94,16 @@ public abstract class EsUtils {
if (searchType == 0) {
collapseBuilder = new CollapseBuilder(ESConstant.DOC_ID);
}
//高亮显示
HighlightBuilder highlightBuilder = new HighlightBuilder();
highlightBuilder.field("title");
highlightBuilder.field("content");
highlightBuilder.requireFieldMatch(false); //多个单词高亮的话要把这个设置为trues
highlightBuilder.preTags("<span style='color:red'>");
highlightBuilder.postTags("</span>");
// 查询
// from + size 分页 查询方式
SearchRequestBuilder requestBuilder = client.prepareSearch().setIndices(index)
.setIndicesOptions(IndicesOptions.fromOptions(options, options, options, optionsf))
@ -99,8 +111,9 @@ public abstract class EsUtils {
.setQuery(queryBuilder)
.setCollapse(collapseBuilder)
.setSize(size)
.setFrom(from);
System.out.println(requestBuilder);
.setFrom(from)
.highlighter(highlightBuilder);
SearchResponse searchResponse = requestBuilder.execute().actionGet();
// List<JSONObject> dataList = Lists.newLinkedList();
@ -114,6 +127,8 @@ public abstract class EsUtils {
.replace("cl_major_", "")
.replace("cl_subject_", "")
.replace("cl_special_1.0_", ""));
Map<String, HighlightField> highlight = hits[i].getHighlightFields();
data.put("highlight",highlight);
dataList.add(data);
}
}
@ -241,11 +256,11 @@ public abstract class EsUtils {
.setQuery(queryBuilder)
.addAggregation(aggregation);
System.out.println(requestBuilder);
// System.out.println(requestBuilder);
Aggregations aggregations = requestBuilder.get().getAggregations();
Cardinality cardinality = aggregations.get(count);
System.out.println("1111 : " + cardinality.getValue());
System.out.println("2222 : " + requestBuilder.get().getHits().totalHits);
// System.out.println("1111 : " + cardinality.getValue());
// System.out.println("2222 : " + requestBuilder.get().getHits().totalHits);
long resultCount = cardinality.getValue();
if(searchType == 2){
resultCount = requestBuilder.get().getHits().totalHits;

5
cl_search_api/src/main/java/com/bfd/mf/common/util/slice/SliceScrollUtil.java

@ -119,7 +119,6 @@ public class SliceScrollUtil {
}
public List<ESMonitorEntity> fetchResultSubjectCache(QueryRequest queryRequest, String[] fieldList) {
logger.info("[SliceScrollUtil] fetchResultSubjectCache ... 统计+词云 数据查询");
List<ESMonitorEntity> jsonObjectList = new ArrayList<>();
@ -136,7 +135,8 @@ public class SliceScrollUtil {
// currentIndexList = subjectQueryDataService.getIndexListByTimeRange(queryRequest.getStartTime(),queryRequest.getEndTime());
}
Long clusterId = cluster.getId();
//Long clusterId = cluster.getId();
Long clusterId = 4L;
logger.info("[SliceScrollUtil] dataAnalysisCloud : queryDataList clusterId :{}; currentIndexList :{}", clusterId, currentIndexList.toString());
logger.info("==========进入数据分析Es and Cache,计算开始执行============");
String sortFlag = "";
@ -164,7 +164,6 @@ public class SliceScrollUtil {
}
public void parseResponseList(List<ESMonitorEntity> jsonObjectList, List<SearchResponse> responseList) {
for (SearchResponse searchResponse : responseList) {
// 解析数据

1
cl_search_api/src/main/java/com/bfd/mf/common/web/vo/view/monitor/ESMonitorBaseEntity.java

@ -20,6 +20,7 @@ import com.alibaba.fastjson.JSONObject;
import com.google.common.base.Objects;
import org.apache.commons.lang3.builder.ToStringBuilder;
import org.apache.commons.lang3.builder.ToStringStyle;
import org.elasticsearch.common.text.Text;
import java.io.Serializable;
import java.util.ArrayList;

2
cl_search_api/src/main/java/com/bfd/mf/controller/SearchAuthorController.java

@ -40,7 +40,7 @@ public class SearchAuthorController {
String scorllId = queryRequest.getScrollId();
// 这个是导出要用的
if(null != scorllId){
result = searchDataService.exportDataInSubjectIndex(queryRequest);
result = searchDataService.exportDataFromFolder(queryRequest);
return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result);
}else{ // 这个是查询
result = searchAuthorService.queryAuthorList(queryRequest);

27
cl_search_api/src/main/java/com/bfd/mf/controller/SearchDataController.java

@ -50,30 +50,13 @@ public class SearchDataController {
long start = System.currentTimeMillis();
String scorllId = queryRequest.getScrollId();
String subjectId = queryRequest.getSubjectId();
if(null != scorllId ){// 导出数据
// 是要导出数据呀走导出数据的流并返回结果
if(subjectId.equals("all") || subjectId.contains(",")){ // 全局数据导出
result = searchDataService.exportDataInDateIndex(queryRequest);
}else if(subjectId.equals("")){ // 没有专题
result.put(ESConstant.ALLDOCNUMBER,0L);
result.put(ESConstant.MONITORLISTS,new ArrayList<>());
}else{
// 专题数据导出
result = searchDataService.exportDataInSubjectIndex(queryRequest);
}
if(null != scorllId ){
// 数据导出
result = searchDataService.exportDataFromFolder(queryRequest);
return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result);
}else {
// 全局数据查询
if (subjectId.equals("all") || subjectId.contains(",")) {
result = searchDataService.queryDataList(queryRequest);
// 没有专题
} else if (subjectId.equals("")) {
result.put(ESConstant.ALLDOCNUMBER, 0L);
result.put(ESConstant.MONITORLISTS, new ArrayList<>());
} else {
// 专题数据查询
result = searchDataService.queryDataInOneIndex(queryRequest);
}
// 数据查询
result = searchDataService.queryDataFromFolder(queryRequest);
}
Integer allDocNumber = result.getIntValue(ESConstant.ALLDOCNUMBER);
Integer limit = queryRequest.getLimit();

2
cl_search_api/src/main/java/com/bfd/mf/service/SearchAuthorService.java

@ -7,12 +7,12 @@ import com.bfd.mf.common.service.es.EsQueryAuthorService;
import com.bfd.mf.common.service.es.SubjectQueryDataService;
import com.bfd.mf.common.util.constants.ESConstant;
import com.bfd.mf.common.web.entity.mysql.SentimentModify;
import com.bfd.mf.common.web.entity.mysql.cache.Cluster;
import com.bfd.mf.common.web.repository.mysql.SentimentRepository;
import com.bfd.mf.common.web.repository.mysql.base.SiteRepository;
import com.bfd.mf.common.web.vo.params.QueryRequest;
import com.bfd.mf.common.web.vo.view.monitor.ESMonitorEntity;
import com.bfd.mf.config.BFDApiConfig;
import org.elasticsearch.common.text.Text;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;

306
cl_search_api/src/main/java/com/bfd/mf/service/SearchDataService.java

@ -4,22 +4,22 @@ import com.alibaba.fastjson.JSONObject;
import com.bfd.mf.common.service.common.CrudService;
import com.bfd.mf.common.service.es.*;
import com.bfd.mf.common.util.ESServerUtils;
import com.bfd.mf.common.util.cache.ResponseParseUtils;
import com.bfd.mf.common.util.constants.ESConstant;
import com.bfd.mf.common.web.entity.mysql.SentimentModify;
import com.bfd.mf.common.web.entity.mysql.cache.Cluster;
import com.bfd.mf.common.web.repository.mysql.SentimentRepository;
import com.bfd.mf.common.web.repository.mysql.base.SiteRepository;
import com.bfd.mf.common.web.vo.params.QueryRequest;
import com.bfd.mf.common.web.vo.view.monitor.ESMonitorBaseEntity;
import com.bfd.mf.common.web.vo.view.monitor.ESMonitorEntity;
import com.bfd.mf.config.BFDApiConfig;
import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightField;
import org.elasticsearch.search.sort.SortOrder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -54,27 +54,27 @@ public class SearchDataService extends CrudService<SentimentModify, SentimentRep
return null;
}
private SearchResponse buildDataIdQueryCrawl(Integer from, Integer searchSize, List<String> dataIdList,
String orderFlag, String sortFlag,
List<String> currentIndexList, Cluster cluster) {
if (sortFlag.equals(ESConstant.COMMENT)) {
sortFlag = ESConstant.COMMENTS_COUNT;
}
if (sortFlag.equals("")) {
sortFlag = ESConstant.PUBTIME;
}
QueryBuilder queryBuilder = esCommonService.buildStringQueryByField(ESConstant.DATA_ID, dataIdList);
BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery().filter(queryBuilder);
SearchRequestBuilder resultBuilder = esServerUtils
.buildSearchRequestBuilder(cluster.getId(), currentIndexList)
.setFrom(from)
.setSize(searchSize)
.setQuery(boolQueryBuilder)
// .setScroll(TimeValue.timeValueMinutes(8))
.addSort(sortFlag, orderFlag.equals(ESConstant.ASC) ? SortOrder.ASC : SortOrder.DESC)
.setFetchSource(ESConstant.FIELD_LIST_MONITOR_NORMAL, null);
return resultBuilder.get();
}
// private SearchResponse buildDataIdQueryCrawl(Integer from, Integer searchSize, List<String> dataIdList,
// String orderFlag, String sortFlag,
// List<String> currentIndexList, Cluster cluster) {
// if (sortFlag.equals(ESConstant.COMMENT)) {
// sortFlag = ESConstant.COMMENTS_COUNT;
// }
// if (sortFlag.equals("")) {
// sortFlag = ESConstant.PUBTIME;
// }
// QueryBuilder queryBuilder = esCommonService.buildStringQueryByField(ESConstant.DATA_ID, dataIdList);
// BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery().filter(queryBuilder);
// SearchRequestBuilder resultBuilder = esServerUtils
// .buildSearchRequestBuilder(cluster.getId(), currentIndexList)
// .setFrom(from)
// .setSize(searchSize)
// .setQuery(boolQueryBuilder)
// // .setScroll(TimeValue.timeValueMinutes(8))
// .addSort(sortFlag, orderFlag.equals(ESConstant.ASC) ? SortOrder.ASC : SortOrder.DESC)
// .setFetchSource(ESConstant.FIELD_LIST_MONITOR_NORMAL, null);
// return resultBuilder.get();
// }
/**
* 遍历查询结果调用解析组装方法 1
@ -497,6 +497,25 @@ public class SearchDataService extends CrudService<SentimentModify, SentimentRep
if (sourceAsMap.containsKey("pageType")) {
pageType = sourceAsMap.get("pageType").toString();
}
// 获取高亮结果
// if(sourceAsMap.containsKey(ESConstant.HIGHLIGHT)){
// Map<String, HighlightField> highlight = (Map<String, HighlightField>) sourceAsMap.get(ESConstant.HIGHLIGHT);
//
// if(highlight.containsKey(ESConstant.CONTENT)){
// content = "";
// Text [] contents = highlight.get(ESConstant.CONTENT).getFragments();
// for (Text con:contents) {
// content = content + con;
// }
// }
// if(highlight.containsKey(ESConstant.TITLE)){
// title = "";
// Text [] titles = highlight.get(ESConstant.TITLE).getFragments();
// for (Text con:titles) {
// title = title + con;
// }
// }
// }
try {
esMonitorEntity.setForwardContent(sourceAsMap.get("forwardContent").toString());
esMonitorEntity.setReadCount(readCount);
@ -523,8 +542,17 @@ public class SearchDataService extends CrudService<SentimentModify, SentimentRep
esMonitorEntity.setHlKeyWords(hlKeywords);
// 评论数转发数点赞数收藏数
esMonitorEntity.setCommentsCount(Integer.valueOf(sourceAsMap.getOrDefault(ESConstant.COMMENTS_COUNT, 0).toString()));
if(quoteCount.equals("-1")){
quoteCount = "-";
}
esMonitorEntity.setQuoteCount(quoteCount);
if(attitudeCount.equals("-1")){
attitudeCount = "-";
}
esMonitorEntity.setAttitudesCount(attitudeCount);
if(collentCount.equals("-1")){
collentCount = "-";
}
esMonitorEntity.setCollectCount(collentCount);
esMonitorEntity.setViewCnt(viewCnt);
// 视频音频
@ -532,8 +560,17 @@ public class SearchDataService extends CrudService<SentimentModify, SentimentRep
esMonitorEntity.setAsrText(asrText);
// 用户字段
esMonitorEntity.setUserUrl(userUrl);
if(fansCount.equals("-1")){
fansCount = "-";
}
esMonitorEntity.setFansCount(fansCount);
if(friendsCount.equals("-1")){
friendsCount = "-";
}
esMonitorEntity.setFriendsCount(friendsCount);
if(postCount.equals("-1")){
postCount = "-";
}
esMonitorEntity.setPostCount(postCount);
esMonitorEntity.setLocation(location);
@ -599,11 +636,11 @@ public class SearchDataService extends CrudService<SentimentModify, SentimentRep
String indexType = "cl_index_" + docId.split("_")[1];
currentIndexList.add(indexType);
}
JSONObject result = getCommentListByDocId(queryRequest, cluster, currentIndexList);
JSONObject result = getCommentListByDocId(queryRequest, currentIndexList);
return result;
}
private JSONObject getCommentListByDocId(QueryRequest queryRequest, Cluster cluster, List<String> currentIndexList) {
private JSONObject getCommentListByDocId(QueryRequest queryRequest, List<String> currentIndexList) {
JSONObject json = new JSONObject();
try {
String docId = queryRequest.getDocId();
@ -633,7 +670,7 @@ public class SearchDataService extends CrudService<SentimentModify, SentimentRep
if (null != queryRequest.getDataId() && !queryRequest.getDataId().equals("")) {
String dataId = queryRequest.getDataId();
// String docType = queryRequest.getDocType();
JSONObject TopComment = getCommentByDataId(cluster, currentIndexList, dataId, docType);
JSONObject TopComment = getCommentByDataId(currentIndexList, dataId, docType);
// System.out.println(TopComment);
if (TopComment.size() > 0) {
size = 1L;
@ -643,9 +680,10 @@ public class SearchDataService extends CrudService<SentimentModify, SentimentRep
}
String siteId = queryRequest.getSiteId();
long clusterId = 4;
/**获取信息*/
SearchRequestBuilder builder = esServerUtils
.buildSearchRequestBuilder(cluster.getId(), currentIndexList)
.buildSearchRequestBuilder(clusterId,currentIndexList)
.setQuery(boolQueryBuilder)
.setFrom(start)
.setSize(limit)
@ -672,8 +710,9 @@ public class SearchDataService extends CrudService<SentimentModify, SentimentRep
return json;
}
private JSONObject getCommentByDataId(Cluster cluster, List<String> currentIndexList, String dataId, String docType) {
private JSONObject getCommentByDataId( List<String> currentIndexList, String dataId, String docType) {
JSONObject jsonObject = new JSONObject();
long clusterId = 4L;
try {
BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery().must(QueryBuilders.termQuery(ESConstant.DATA_ID, dataId));
// 由于电商的数据的primary =1 因此不加这个条件了,
@ -682,7 +721,7 @@ public class SearchDataService extends CrudService<SentimentModify, SentimentRep
// }else {
boolQueryBuilder.must(QueryBuilders.termQuery(ESConstant.PRIMARY, 0));
// }
SearchRequestBuilder builder = esServerUtils.buildSearchRequestBuilder(cluster.getId(), currentIndexList)
SearchRequestBuilder builder = esServerUtils.buildSearchRequestBuilder(clusterId, currentIndexList)
.setQuery(boolQueryBuilder)
.setFetchSource(ESConstant.COMMENT_FIELD_DATA, null);
@ -720,7 +759,7 @@ public class SearchDataService extends CrudService<SentimentModify, SentimentRep
currentIndexList.add(subjectId);
String docId = queryRequest.getDocId();
// 根据ID 获取一条详情数据
JSONObject jsonObject = getOneDataByDocId(docId, cluster, currentIndexList);
JSONObject jsonObject = getOneDataByDocId(docId,currentIndexList);
jsonObject = setLable(jsonObject);
// 替换几个 pathSize 中的链接的前缀
if (jsonObject.containsKey(ESConstant.IMAGEPATHSIZE)) {
@ -815,6 +854,28 @@ public class SearchDataService extends CrudService<SentimentModify, SentimentRep
newJsonObject.put(ESConstant.URL, jsonObject.get(ESConstant.URL));
jsonObject.put(ESConstant.AUTHOR, newJsonObject);
}
// attitudesCount
if(jsonObject.containsKey(ESConstant.ATTITUDES_COUNT)){
String attitudesCount = jsonObject.get(ESConstant.ATTITUDES_COUNT).toString();
if(attitudesCount.equals("-1")){
attitudesCount = "-";
jsonObject.put(ESConstant.ATTITUDES_COUNT,attitudesCount);
}
}
if(jsonObject.containsKey(ESConstant.COMMENTS_COUNT)){
String commentsCount = jsonObject.get(ESConstant.COMMENTS_COUNT).toString();
if(commentsCount.equals("-1")){
commentsCount = "-";
jsonObject.put(ESConstant.COMMENTS_COUNT,commentsCount);
}
}
if(jsonObject.containsKey(ESConstant.QUOTE_COUNT)){
String quoteCount = jsonObject.get(ESConstant.QUOTE_COUNT).toString();
if(quoteCount.equals("-1")){
quoteCount = "-";
jsonObject.put(ESConstant.QUOTE_COUNT,quoteCount);
}
}
return jsonObject;
}
@ -852,16 +913,17 @@ public class SearchDataService extends CrudService<SentimentModify, SentimentRep
return jsonObject;
}
private JSONObject getOneDataByDocId(String docId, Cluster cluster, List<String> currentIndexList) {
private JSONObject getOneDataByDocId(String docId, List<String> currentIndexList) {
JSONObject jsonObject = new JSONObject();
/**依据文档Id查询对应的文档*/
QueryBuilder queryBuilder = esCommonService.buildKeyWordsQueryBuilder(docId, ESConstant.DOC_ID);
QueryBuilder queryBuilder1 = QueryBuilders.termQuery(ESConstant.PRIMARY, 1);
BoolQueryBuilder conditionQuery = QueryBuilders.boolQuery().must(queryBuilder).must(queryBuilder1);
Long clusterId = 4L;
/**获取信息*/
SearchRequestBuilder builder = esServerUtils
.buildSearchRequestBuilder(cluster.getId(), currentIndexList)
.buildSearchRequestBuilder(clusterId, currentIndexList)
.setQuery(conditionQuery)
.setFetchSource(ESConstant.FIELD_DATA, null);
@ -899,109 +961,109 @@ public class SearchDataService extends CrudService<SentimentModify, SentimentRep
}
// 之前的 queryDataList
public JSONObject queryDataList(QueryRequest queryRequest) throws Exception {
// 先看一下有没有 subjectId ,如果没有的话标识查询的是 全部数据
JSONObject jsonObject = new JSONObject();
Cluster cluster = null;
List<String> currentIndexList = new ArrayList<>();
// 获取ES的参数及要查询的索引列表
String subjectId = queryRequest.getSubjectId();
// cluster = clusterService.findClusterByType(Cluster.CLUSTER_TYPE.special_cluster_type); // 111 special_cluster_type
currentIndexList = subjectQueryDataService.getIndexBySubjectIds(subjectId);
Long clusterId = cluster.getId();
logger.info("[SearchDataService] queryDataList clusterId : {}; currentIndexList : {}", clusterId, currentIndexList.toString());
String orderFlag = queryRequest.getOrder(); // 排序方式 asc/desc
String sortFlag = queryRequest.getSidx(); // 排序字段
long sortTimeSeries = System.currentTimeMillis();
long foldDocAllNumber = 0L;
List<ESMonitorEntity> esMonitorEntityLists = new ArrayList<>();
// 开始查询
List<String> dataIdList = new ArrayList<>();
List<SearchResponse> cacheList = subjectQueryDataService.fetchResponseDataFromCache(
queryRequest, clusterId,
orderFlag, sortFlag,
currentIndexList,
ESConstant.FIELD_ID_LIST);
if (cacheList.size() == 0) {
logger.info("没有查到相关数据哦!");
jsonObject.put(ESConstant.ALLDOCNUMBER, foldDocAllNumber);// 实际查询总量
jsonObject.put(ESConstant.MONITORLISTS, esMonitorEntityLists);
return jsonObject;
}
Map<String, ESMonitorBaseEntity> timeSeriesMap = new HashMap<>();
// 获取查询结果List的总量及结果的map
foldDocAllNumber = ResponseParseUtils.combineEsAndCatchResponse(
cacheList,
timeSeriesMap);
Collection<ESMonitorBaseEntity> values = timeSeriesMap.values();
List<ESMonitorBaseEntity> timeSeries = new ArrayList<>(values);
Integer limit = queryRequest.getLimit(); //每页的数量
Integer start = (queryRequest.getPage() - 1) * limit; //起始页(0,20,40....)
// 将查询结果的 dataId 写入到 dataIdList中
Map<String, String> dedupmap = new HashMap<>();
if (timeSeries.size() > 0) {
for (int i = start; i < timeSeries.size(); i++) {
if (null != timeSeries.get(i).getDataId() && !("").equals(timeSeries.get(i).getDataId())) {
dedupmap.put(timeSeries.get(i).getDataId(), timeSeries.get(i).getDataId());
}
}
}
for (String key : dedupmap.keySet()) {
dataIdList.add(dedupmap.get(key));
}
SearchResponse response = buildDataIdQueryCrawl(
start,
limit,
dataIdList,
orderFlag,
sortFlag,
currentIndexList,
cluster);
logger.info("Response : ", response);
parseQueryResult(response, esMonitorEntityLists);
Long responseStart = System.currentTimeMillis();
logger.info("[SearchDataService] [responseStart] used :{}", System.currentTimeMillis() - responseStart);
long pageTimeCount = System.currentTimeMillis();
logger.info("--------------------------third step,structure page search from es ;response of the HTTP status " +
"code is {} ,the time used: {}" +
"-------------------------------------" + (pageTimeCount - sortTimeSeries));
Collections.sort(esMonitorEntityLists);
// 实际查询总量
jsonObject.put(ESConstant.ALLDOCNUMBER, foldDocAllNumber);
jsonObject.put(ESConstant.MONITORLISTS, esMonitorEntityLists);
return jsonObject;
}
// public JSONObject queryDataList(QueryRequest queryRequest) throws Exception {
//
// // 先看一下有没有 subjectId ,如果没有的话标识查询的是 全部数据
// JSONObject jsonObject = new JSONObject();
// Cluster cluster = null;
// List<String> currentIndexList = new ArrayList<>();
// // 获取ES的参数及要查询的索引列表
// String subjectId = queryRequest.getSubjectId();
// // cluster = clusterService.findClusterByType(Cluster.CLUSTER_TYPE.special_cluster_type); // 111 special_cluster_type
// currentIndexList = subjectQueryDataService.getIndexBySubjectIds(subjectId);
// // Long clusterId = cluster.getId();
// Long clusterId = 4L;
// logger.info("[SearchDataService] queryDataList clusterId : {}; currentIndexList : {}", clusterId, currentIndexList.toString());
//
// String orderFlag = queryRequest.getOrder(); // 排序方式 asc/desc
// String sortFlag = queryRequest.getSidx(); // 排序字段
//
// long sortTimeSeries = System.currentTimeMillis();
// long foldDocAllNumber = 0L;
// List<ESMonitorEntity> esMonitorEntityLists = new ArrayList<>();
//
// // 开始查询
// List<String> dataIdList = new ArrayList<>();
// List<SearchResponse> cacheList = subjectQueryDataService.fetchResponseDataFromCache(
// queryRequest, clusterId,
// orderFlag, sortFlag,
// currentIndexList,
// ESConstant.FIELD_ID_LIST);
//
// if (cacheList.size() == 0) {
// logger.info("没有查到相关数据哦!");
// jsonObject.put(ESConstant.ALLDOCNUMBER, foldDocAllNumber);// 实际查询总量
// jsonObject.put(ESConstant.MONITORLISTS, esMonitorEntityLists);
// return jsonObject;
// }
//
// Map<String, ESMonitorBaseEntity> timeSeriesMap = new HashMap<>();
// // 获取查询结果List的总量及结果的map
// foldDocAllNumber = ResponseParseUtils.combineEsAndCatchResponse(
// cacheList,
// timeSeriesMap);
//
// Collection<ESMonitorBaseEntity> values = timeSeriesMap.values();
// List<ESMonitorBaseEntity> timeSeries = new ArrayList<>(values);
//
// Integer limit = queryRequest.getLimit(); //每页的数量
// Integer start = (queryRequest.getPage() - 1) * limit; //起始页(0,20,40....)
//
// // 将查询结果的 dataId 写入到 dataIdList中
// Map<String, String> dedupmap = new HashMap<>();
// if (timeSeries.size() > 0) {
// for (int i = start; i < timeSeries.size(); i++) {
// if (null != timeSeries.get(i).getDataId() && !("").equals(timeSeries.get(i).getDataId())) {
// dedupmap.put(timeSeries.get(i).getDataId(), timeSeries.get(i).getDataId());
// }
// }
// }
// for (String key : dedupmap.keySet()) {
// dataIdList.add(dedupmap.get(key));
// }
// SearchResponse response = buildDataIdQueryCrawl(
// start,
// limit,
// dataIdList,
// orderFlag,
// sortFlag,
// currentIndexList,
// cluster);
//
// logger.info("Response : ", response);
// parseQueryResult(response, esMonitorEntityLists);
// Long responseStart = System.currentTimeMillis();
// logger.info("[SearchDataService] [responseStart] used :{}", System.currentTimeMillis() - responseStart);
// long pageTimeCount = System.currentTimeMillis();
// logger.info("--------------------------third step,structure page search from es ;response of the HTTP status " +
// "code is {} ,the time used: {}" +
// "-------------------------------------" + (pageTimeCount - sortTimeSeries));
//
// Collections.sort(esMonitorEntityLists);
// // 实际查询总量
// jsonObject.put(ESConstant.ALLDOCNUMBER, foldDocAllNumber);
// jsonObject.put(ESConstant.MONITORLISTS, esMonitorEntityLists);
// return jsonObject;
// }
/**
* 查询某个确认的专题下的数据列表
* 查询某个确认的 文件夹 下的数据列表
*/
public JSONObject queryDataInOneIndex(QueryRequest queryRequest) {
public JSONObject queryDataFromFolder(QueryRequest queryRequest) {
JSONObject jsonObject = new JSONObject();
try {
// Cluster cluster = clusterService.findClusterByType(Cluster.CLUSTER_TYPE.special_cluster_type);
List<String> currentIndexList = subjectQueryDataService.getIndexBySubjectIds( queryRequest.getSubjectId());
//Long clusterId = cluster.getId();
logger.info("[SearchDataService] queryDataInOneIndex: currentIndexList : {}", currentIndexList.toString());
logger.info("[SearchDataService] queryDataFromFolder: currentIndexList : {}", currentIndexList.toString());
// String indexName = currentIndexList.get(0);
String indexNames[] = currentIndexList.toArray(new String[currentIndexList.size()]);
List<JSONObject> dataList = esQueryServiceForSQMini.queryDataFromOneSubject(indexNames, queryRequest);
List<ESMonitorEntity> esMonitorEntityLists = new ArrayList<>();
Integer searchType = queryRequest.getSearchType();
parseQueryResult(dataList, esMonitorEntityLists, searchType);
Long totalCount = esQueryServiceForSQMini.queryDataCountFromOneSubject(indexNames, queryRequest);
logger.info("[SearchDataService] queryDataInOneIndex: {}", totalCount);
logger.info("[SearchDataService] queryDataFromFolder: {}", totalCount);
jsonObject.put(ESConstant.ALLDOCNUMBER, totalCount);
jsonObject.put(ESConstant.MONITORLISTS, esMonitorEntityLists);
@ -1017,7 +1079,7 @@ public class SearchDataService extends CrudService<SentimentModify, SentimentRep
* @param queryRequest
* @return
*/
public JSONObject exportDataInSubjectIndex(QueryRequest queryRequest) {
public JSONObject exportDataFromFolder(QueryRequest queryRequest) {
JSONObject jsonObject = new JSONObject();
try {
// 获取 ES 的连接方式及要查询的索引列表 专题索引 special_cluster_type
@ -1025,7 +1087,7 @@ public class SearchDataService extends CrudService<SentimentModify, SentimentRep
List<String> currentIndexList = subjectQueryDataService.getIndexBySubjectIds( queryRequest.getSubjectId());
// Long clusterId = cluster.getId();
String[] indexName = currentIndexList.toArray(new String[currentIndexList.size()]);
logger.info("[SearchDataService] exportDataInSubjectIndex : IndexName :{} ; currentIndexList :{}", indexName[0], currentIndexList.toString());
logger.info("[SearchDataService] exportDataFromFolder : IndexName :{} ; currentIndexList :{}", indexName[0], currentIndexList.toString());
// 开始查询
jsonObject = esQueryServiceForSQMini.exportDataFromOneSubject(indexName, queryRequest);
List<JSONObject> dataList = (List<JSONObject>) jsonObject.get(ESConstant.MONITORLISTS);

32
cl_search_api/src/main/resources/application.yml

@ -15,15 +15,15 @@ server:
spring:
datasource:
driver-class-name: com.mysql.cj.jdbc.Driver
# username: crawl
# password: crawl123
# url: jdbc:mysql://172.18.1.134:3306/intelligent_crawl?useSSL=true&useUnicode=true&characterEncoding=UTF-8&serverTimezone=UTC
username: crawl
password: crawl123
url: jdbc:mysql://172.18.1.134:3306/intelligent_crawl?useSSL=true&useUnicode=true&characterEncoding=UTF-8&serverTimezone=UTC
# username: root
# password: bfd123
# url: jdbc:mysql://172.26.11.113:3306/intelligent_crawl?useSSL=true&useUnicode=true&characterEncoding=UTF-8&serverTimezone=UTC
username: crawl
password: crawl123
url: jdbc:mysql://172.26.11.110:3306/intelligent_crawl?useSSL=true&useUnicode=true&characterEncoding=UTF-8&serverTimezone=UTC
# username: crawl
# password: crawl123
# url: jdbc:mysql://172.26.11.110:3306/intelligent_crawl?useSSL=true&useUnicode=true&characterEncoding=UTF-8&serverTimezone=UTC
hikari:
maximum-pool-size: 10
@ -50,11 +50,11 @@ bfd.api.mf:
uploadZipPath : /opt/nfsdata/uploadFiles/
indexNamePre : cl_major_
# es-mini:
# name: CL_Mini_2
# address: 172.18.1.81:9301
# upper: 2018-09-01
# standby: cl_major_*
es-mini:
name: CL_Mini_2
address: 172.18.1.81:9301
upper: 2018-09-01
standby: cl_major_*
es-normal:
name: SQ_Normal_new
address: 172.18.1.134:9301
@ -70,11 +70,11 @@ bfd.api.mf:
# address: 172.16.10.42:9300
# upper: 2018-09-01
# standby: cl_major_*
es-mini: # 这个是28赛博项目阿里云上的ES
name: crawl
address: 47.254.148.208:9300
upper: 2018-09-01
standby: cl_major_*
# es-mini: # 这个是28赛博项目阿里云上的ES
# name: crawl
# address: 47.254.148.208:9300
# upper: 2018-09-01
# standby: cl_major_*
# es-mini:

Loading…
Cancel
Save