diff --git a/.idea/compiler.xml b/.idea/compiler.xml index 0e19055..cccd275 100644 --- a/.idea/compiler.xml +++ b/.idea/compiler.xml @@ -12,11 +12,9 @@ - - - + \ No newline at end of file diff --git a/.idea/encodings.xml b/.idea/encodings.xml index 50cf68e..204b4b3 100644 --- a/.idea/encodings.xml +++ b/.idea/encodings.xml @@ -8,6 +8,8 @@ + + diff --git a/.idea/libraries/Maven__com_alibaba_fastjson_1_1_22.xml b/.idea/libraries/Maven__com_alibaba_fastjson_1_1_22.xml deleted file mode 100644 index 24e5ac0..0000000 --- a/.idea/libraries/Maven__com_alibaba_fastjson_1_1_22.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml index 20293e3..97adf71 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -1,11 +1,17 @@ + + diff --git a/.idea/modules.xml b/.idea/modules.xml index 6395727..bbf2905 100644 --- a/.idea/modules.xml +++ b/.idea/modules.xml @@ -2,10 +2,7 @@ - - - \ No newline at end of file diff --git a/cl_query_data_job/cl_query_data_job.iml b/cl_query_data_job/cl_query_data_job.iml index b959125..b2a1074 100644 --- a/cl_query_data_job/cl_query_data_job.iml +++ b/cl_query_data_job/cl_query_data_job.iml @@ -1,5 +1,5 @@ - + @@ -15,176 +15,4 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/cl_search_api/cl_search_api.iml b/cl_search_api/cl_search_api.iml index 952369f..8d08fa0 100644 --- a/cl_search_api/cl_search_api.iml +++ b/cl_search_api/cl_search_api.iml @@ -1,5 +1,5 @@ - + @@ -24,175 +24,4 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/cl_search_api/pom.xml b/cl_search_api/pom.xml index 9decd64..b39e35b 100644 --- a/cl_search_api/pom.xml +++ b/cl_search_api/pom.xml @@ -13,9 +13,7 @@ cl_search_api Search V3.2 API cl_search_api - 3.2.5-SNAPSHOT - - + 3.2.6-SNAPSHOT com.bfd.mf.SearchApplication @@ -126,7 +124,7 @@ com.alibaba fastjson - 1.1.22 + 1.2.68 @@ -256,6 +254,12 @@ okhttp 3.6.0 + + commons-lang + commons-lang + 2.6 + compile + diff --git a/cl_search_api/src/main/java/com/bfd/mf/SearchApplication.java b/cl_search_api/src/main/java/com/bfd/mf/SearchApplication.java index 889d92f..a63b038 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/SearchApplication.java +++ b/cl_search_api/src/main/java/com/bfd/mf/SearchApplication.java @@ -5,10 +5,12 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.boot.SpringApplication; import org.springframework.boot.autoconfigure.SpringBootApplication; +import org.springframework.scheduling.annotation.EnableAsync; import java.io.IOException; @SpringBootApplication +@EnableAsync @MapperScan("com.bfd.mf") public class SearchApplication { private static final Logger LOGGER = LoggerFactory.getLogger(SearchApplication.class); diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/service/cache/TopicQueryService.java b/cl_search_api/src/main/java/com/bfd/mf/common/service/cache/TopicQueryService.java index a20b442..13546b5 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/common/service/cache/TopicQueryService.java +++ b/cl_search_api/src/main/java/com/bfd/mf/common/service/cache/TopicQueryService.java @@ -70,7 +70,7 @@ public class TopicQueryService { } else { List areaList = siteRepository.findCidsByArea(queryRequest.getSearchArea()); List lowCaseAreaList = areaList.stream().map(String::toLowerCase).collect(Collectors.toList()); - boolQuery.must(QueryBuilders.termsQuery(ESConstant.EN_SOURCE, lowCaseAreaList)); + // boolQuery.must(QueryBuilders.termsQuery(ESConstant.EN_SOURCE, lowCaseAreaList)); // String searchArea = getSearchArea(queryRequest.getSearchArea()); // boolQuery.must(QueryBuilders.termQuery(ESConstant.AREA, searchArea)); } @@ -85,19 +85,32 @@ public class TopicQueryService { // cid 站点 String cid = queryRequest.getCid(); // taskId 任务ID - if (null == queryRequest.getTaskId() || ("").equals(queryRequest.getTaskId())) { +// if (null == queryRequest.getTaskId() || ("").equals(queryRequest.getTaskId())) { +// logger.info("[TopicQueryService] queryByConditions_v1 没有任务ID,查询专题下全部任务"); +// } else { +// String taskId = queryRequest.getTaskId(); +// // 如果有任务ID 的话,查这个任务ID 对应的 crawlDataFlag 及时间范围,然后写到查询条件中 +// Task task = taskRepository.findAllBydel0AndTaskId(taskId); +// String crawlDataFlag = task.getCrawlDataFlag(); +// cid = task.getCid(); +// queryRequest.setCid(cid); +// queryRequest.setCrawlDataFlag(crawlDataFlag); +// } + + /** + * 2023-04-24 + * 采集平台2.0 版本,可以选中多个任务进行查询 + */ + if(null == queryRequest.getTaskIds()){ logger.info("[TopicQueryService] queryByConditions_v1 没有任务ID,查询专题下全部任务"); - } else { - String taskId = queryRequest.getTaskId(); - // 如果有任务ID 的话,查这个任务ID 对应的 crawlDataFlag 及时间范围,然后写到查询条件中 - Task task = taskRepository.findAllBydel0AndTaskId(taskId); - String crawlDataFlag = task.getCrawlDataFlag(); - cid = task.getCid(); - queryRequest.setCid(cid); - queryRequest.setCrawlDataFlag(crawlDataFlag); + }else { + List taskIds = queryRequest.getTaskIds(); + boolQuery.must(QueryBuilders.termsQuery(ESConstant.TASK_ID, taskIds)); } + + if (null == cid || ("").equals(cid) || ("test").equals(cid)) { logger.info("[TopicQueryService] queryByConditions_v1 查询全部站点"); } else { diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/service/es/BaseFieldEnum.java b/cl_search_api/src/main/java/com/bfd/mf/common/service/es/BaseFieldEnum.java new file mode 100644 index 0000000..dd5bc97 --- /dev/null +++ b/cl_search_api/src/main/java/com/bfd/mf/common/service/es/BaseFieldEnum.java @@ -0,0 +1,209 @@ +package com.bfd.mf.common.service.es; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; + +/** + * 文档-网页-图片索引属性枚举 + * @author lihonghao + */ +public enum BaseFieldEnum { + /** + * 文件id + */ + id, + /** + * 文件md5值 + */ + md5, + /** + * 文件标题 + */ + title, + /** + * 摘要 + */ + summary, + /** + * 文档内容 + */ + content, + /** + * 数据来源 + */ + source, + /** + * 原文/译文 + */ + type, + /** + * 原文id + */ + original_id, + /** + *入库时间 + */ + create_time, + /** + * 文档语言 + */ + language, + /** + * 上传用户 + */ + upload_user, + /** + * 上传用户姓名 + */ + upload_user_name, + /** + * 是否删除 + */ + del, + /** + * 网站枚举 + */ + website, + /** + * 发布人 + */ + publisher, + /** + * 发布时间 + */ + public_time, + /** + * 网站版面 + */ + cate_md5, + /** + * 智能标签 + */ + ai_tag, + /** + * 智能地区 + */ + ai_area, + /** + * 主题一级分类 + */ + subject_classify1, + /** + * 主题二级分类 + */ + subject_classify2, + /** + * 主题 + */ + subject, + /** + * 渠道 + */ + channel, + /** + * 审核状态 + */ + audit_state, + /** + * 用户上传文档,归属部门 + */ + department_id, + /** + * 整编状态 + */ + edit_state, + + /** + * 敏感词 + */ + sensitive_tag, + /** + * 置顶状态 + */ + flag_top, + /** + * 置顶有效期 + */ + flag_top_validity, + /** + * 0-无 1-不重要 2-有点重要 3-一般、4-重要、5-非常重要 + */ + flag_importance, + /** + * 分类标签 + */ + subject_tag, + /** + * 事件id + */ + event_id, + /** + * 事件id + */ + event_detect_time, + /** + * 重复校验字段 + */ + duplicate_key, + /** + * 媒体类型 + */ + media_type, + /** + * 文中提及的标准时间 + */ + norm_time, + /** + * 时间间隔(天) + */ + delay_time, + /** + * 省名称 + */ + province_code, + /** + * 市名称 + */ + city_code, + /** + * 区县 + */ + county_code, + + ; + + /** + * 需要进行匹配的属性 + * @return + */ + public static Map getMatchFields(){ + Map matchMap = new HashMap<>(2); + matchMap.put(BaseFieldEnum.title.name(), 2.0F); + matchMap.put(BaseFieldEnum.content.name(), 1.0F); + return matchMap; + } + + /** + * 需要进行匹配的属性-含拼音 + * @return + */ + public static Map getMatchFieldsWithPy(){ + Map matchMap = getMatchFields(); + Map pyMap = new HashMap<>(matchMap.size() * 2); + matchMap.forEach((k, v) -> { + pyMap.put(k, v); + /// 系统中取消拼音搜索 + //pyMap.put(k.concat(EsBase.ES_PINYIN_SUFFIX), v/10); + }); + return pyMap; + } + + /** + * 获取全部属性名 + * @return + */ + public static String[] getAllFields(){ + return Arrays.stream(values()).map(BaseFieldEnum::name).toArray(String[]::new); + } +} diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/service/es/BaseSearchQuery.java b/cl_search_api/src/main/java/com/bfd/mf/common/service/es/BaseSearchQuery.java new file mode 100644 index 0000000..0c01315 --- /dev/null +++ b/cl_search_api/src/main/java/com/bfd/mf/common/service/es/BaseSearchQuery.java @@ -0,0 +1,416 @@ +//package com.bfd.mf.common.service.es; +// +//import cn.percent.common.base.BasePageQuery; +//import cn.percent.modules.ais.enums.SearchMatchTypeEnum; +//import io.swagger.annotations.ApiModel; +//import io.swagger.annotations.ApiModelProperty; +// +//import java.util.List; +//import java.util.stream.Collectors; +// +///** +// * 检索查询基础类 +// * +// * @author lihonghao +// */ +//@ApiModel(value = "BaseSearchQuery") +//public class BaseSearchQuery extends BasePageQuery { +// +// /** +// * 限制数据主键范围 +// */ +// @ApiModelProperty(value = "documentId限制数据主键范围", hidden = true) +// private String[] documentId; +// /** +// * 二次搜索关键词 +// */ +// @ApiModelProperty(value = "二次搜索关键词") +// private String sk; +// /** +// * 排除关键词 +// */ +// @ApiModelProperty(value = "排除关键词") +// private String nk; +// /** +// * 标题搜索 +// */ +// @ApiModelProperty(value = "标题搜索") +// private String title; +// /** +// * 是否高亮 +// */ +// @ApiModelProperty(value = "是否高亮, 默认高亮") +// private Boolean highlight = true; +// /** +// * 原文译文 +// */ +// @ApiModelProperty(value = "原文译文,多值逗号分隔 /enum/contentType枚举值") +// private List contentTypeList; +// /** +// * 开始时间 +// */ +// @ApiModelProperty(value = "开始时间") +// private Long beginTime; +// /** +// * 结束时间 +// */ +// @ApiModelProperty(value = "结束时间") +// private Long endTime; +// /** +// * 上传用户 +// */ +// @ApiModelProperty(value = "是否只展示当前用户数据 1-是 0-否 默认否") +// private Integer onlyMine; +// /** +// * 上传用户 +// */ +// @ApiModelProperty(value = "上传用户账号", hidden = true) +// private String uploadUser; +// /** +// * 文档来源 +// */ +// @ApiModelProperty(value = "文档来源,多值逗号分隔 /enum/source枚举值") +// private List sourceList; +// /** +// * 语言 +// */ +// @ApiModelProperty(value = "语言,多值逗号分隔或数组") +// private List languageList; +// /** +// * 语言 +// */ +// @ApiModelProperty(value = "智能标签,多值逗号分隔或数组") +// private List aiTagList; +// /** +// * 语言 +// */ +// @ApiModelProperty(value = "智能地区,多值逗号分隔或数组") +// private List aiAreaList; +// /** +// * 主题一级分类 +// */ +// @ApiModelProperty(value = "主题一级分类编码, 默认全部") +// private String subjectClassify1; +// /** +// * 主题2级分类 +// */ +// @ApiModelProperty(value = "主题2级分类编码, 默认全部") +// private String subjectClassify2; +// /** +// * 主题 +// */ +// @ApiModelProperty(value = "主题编码,多值逗号分隔或数组") +// private List subjectList; +// /** +// * 网站 +// */ +// @ApiModelProperty(value = "网站 /enum/website枚举值,单值") +// public String website; +// /** +// * 网站 +// */ +// @ApiModelProperty(value = "网站 /enum/website枚举值,多值逗号分隔或数组") +// public List websiteList; +// /** +// * 所属网站的版面--面包夹 +// */ +// @ApiModelProperty(value = "所属网站的版面--面包夹md5,多值逗号分隔或数组") +// private List cateMd5List; +// /** +// * 升序降序 +// */ +// @ApiModelProperty(value = "升序降序 1升序 2降序,默认降序排列") +// private Integer order = 2; +// /** +// * 排序类型 +// */ +// @ApiModelProperty(value = "排序类型:1相关度排序 2时间排序,默认相关度排序") +// private Integer orderType = 1; +// /** +// * 高级搜索 +// */ +// @ApiModelProperty(value = "高级搜索") +// private List highLevelQueries; +// +// /** +// * 精确搜索--提取出的短语集合 +// */ +// @ApiModelProperty(hidden = true) +// private List accurateList; +// /** +// * 渠道 +// */ +// @ApiModelProperty(value = "渠道数据来源") +// private List channelList; +// /** +// * 文档审核状态 +// */ +// @ApiModelProperty(value = "文档审核状态 0-未审核,1-审核通过, 2-审核不通过") +// private List auditStateList; +// /** +// * 实体类型 +// */ +// @ApiModelProperty(value = "实体类型") +// private List ontologyIdList; +// /** +// * 是否高级搜索 +// * +// * @return +// */ +// @ApiModelProperty(hidden = true) +// public boolean isHighLevel() { +// return highLevelQueries != null && !highLevelQueries.isEmpty(); +// } +// +// /** +// * 是否精确搜索 +// * +// * @return +// */ +// @ApiModelProperty(hidden = true) +// public boolean isAccurateQuery() { +// return null != accurateList && !accurateList.isEmpty(); +// } +// +// +// /** +// * 是否是跨度搜索 +// * +// * @return +// */ +// @ApiModelProperty(hidden = true) +// public boolean isSpanQuery() { +// return this.isHighLevel() && highLevelQueries.stream().anyMatch(e -> SearchMatchTypeEnum.PARAGRAPH.equals(e.getMatchType()) || SearchMatchTypeEnum.SENTENCE.equals(e.getMatchType())); +// } +// +// /** +// * 部门,0-不查询部门,1-查询用户所在部门 +// */ +// @ApiModelProperty(value = "部门,0-不查询部门,1-查询用户所在部门") +// private String onlyDepartment; +// +// public String getOnlyDepartment() { +// return onlyDepartment; +// } +// +// public void setOnlyDepartment(String onlyDepartment) { +// this.onlyDepartment = onlyDepartment; +// } +// +// public String[] getDocumentId() { +// return documentId; +// } +// +// public void setDocumentId(String... documentId) { +// this.documentId = documentId; +// } +// +// public String getTitle() { +// return title; +// } +// +// public void setTitle(String title) { +// this.title = title; +// } +// +// public Boolean getHighlight() { +// return highlight; +// } +// +// public void setHighlight(Boolean highlight) { +// this.highlight = highlight; +// } +// +// public Long getBeginTime() { +// return beginTime; +// } +// +// public void setBeginTime(Long beginTime) { +// this.beginTime = beginTime; +// } +// +// public Long getEndTime() { +// return endTime; +// } +// +// public void setEndTime(Long endTime) { +// this.endTime = endTime; +// } +// +// public String getUploadUser() { +// return uploadUser; +// } +// +// public void setUploadUser(String uploadUser) { +// this.uploadUser = uploadUser; +// } +// +// public List getLanguageList() { +// return languageList; +// } +// +// public void setLanguageList(List languageList) { +// this.languageList = languageList; +// } +// +// public Integer getOrder() { +// return order; +// } +// +// public void setOrder(Integer order) { +// this.order = order; +// } +// +// public Integer getOrderType() { +// return orderType; +// } +// +// public void setOrderType(Integer orderType) { +// this.orderType = orderType; +// } +// +// public String getSk() { +// return sk; +// } +// +// public void setSk(String sk) { +// this.sk = sk; +// } +// +// public List getHighLevelQueries() { +// return highLevelQueries; +// } +// +// public void setHighLevelQueries(List highLevelQueries) { +// this.highLevelQueries = highLevelQueries == null ? null : highLevelQueries.stream().filter(HighLevelQuery::isAvailable).collect(Collectors.toList()); +// } +// +// public List getContentTypeList() { +// return contentTypeList; +// } +// +// public void setContentTypeList(List contentTypeList) { +// this.contentTypeList = contentTypeList; +// } +// +// public Integer getOnlyMine() { +// return onlyMine; +// } +// +// public void setOnlyMine(Integer onlyMine) { +// this.onlyMine = onlyMine; +// } +// +// public List getSourceList() { +// return sourceList; +// } +// +// public void setSourceList(List sourceList) { +// this.sourceList = sourceList; +// } +// +// public List getAiTagList() { +// return aiTagList; +// } +// +// public void setAiTagList(List aiTagList) { +// this.aiTagList = aiTagList; +// } +// +// public List getAiAreaList() { +// return aiAreaList; +// } +// +// public void setAiAreaList(List aiAreaList) { +// this.aiAreaList = aiAreaList; +// } +// +// public String getSubjectClassify1() { +// return subjectClassify1; +// } +// +// public void setSubjectClassify1(String subjectClassify1) { +// this.subjectClassify1 = subjectClassify1; +// } +// +// public String getSubjectClassify2() { +// return subjectClassify2; +// } +// +// public void setSubjectClassify2(String subjectClassify2) { +// this.subjectClassify2 = subjectClassify2; +// } +// +// public List getSubjectList() { +// return subjectList; +// } +// +// public void setSubjectList(List subjectList) { +// this.subjectList = subjectList; +// } +// +// public String getWebsite() { +// return website; +// } +// +// public void setWebsite(String website) { +// this.website = website; +// } +// +// public List getWebsiteList() { +// return websiteList; +// } +// +// public void setWebsiteList(List websiteList) { +// this.websiteList = websiteList; +// } +// +// public List getCateMd5List() { +// return cateMd5List; +// } +// +// public void setCateMd5List(List cateMd5List) { +// this.cateMd5List = cateMd5List; +// } +// +// +// public List getAccurateList() { +// return accurateList; +// } +// +// public void setAccurateList(List accurateList) { +// this.accurateList = accurateList; +// } +// +// public List getChannelList() { +// return channelList; +// } +// +// public void setChannelList(List channelList) { +// this.channelList = channelList; +// } +// +// public List getAuditStateList() { +// return auditStateList; +// } +// +// public void setAuditStateList(List auditStateList) { +// this.auditStateList = auditStateList; +// } +// +// public String getNk() { +// return nk; +// } +// +// public void setNk(String nk) { +// this.nk = nk; +// } +// +// public List getOntologyIdList() { +// return ontologyIdList; +// } +// +// public void setOntologyIdList(List ontologyIdList) { +// this.ontologyIdList = ontologyIdList; +// } +//} diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/service/es/EsBase.java b/cl_search_api/src/main/java/com/bfd/mf/common/service/es/EsBase.java new file mode 100644 index 0000000..e93cc99 --- /dev/null +++ b/cl_search_api/src/main/java/com/bfd/mf/common/service/es/EsBase.java @@ -0,0 +1,879 @@ +//package com.bfd.mf.common.service.es; +// +//import com.google.common.base.Strings; +//import org.apache.commons.lang.ArrayUtils; +//import org.apache.commons.lang.StringUtils; +//import org.apache.lucene.queryparser.classic.QueryParser; +//import org.apache.lucene.search.join.ScoreMode; +//import org.elasticsearch.action.search.SearchRequestBuilder; +//import org.elasticsearch.action.search.SearchResponse; +//import org.elasticsearch.action.search.SearchType; +//import org.elasticsearch.client.transport.TransportClient; +//import org.elasticsearch.common.text.Text; +//import org.elasticsearch.index.query.BoolQueryBuilder; +//import org.elasticsearch.index.query.DisMaxQueryBuilder; +//import org.elasticsearch.index.query.MultiMatchQueryBuilder; +//import org.elasticsearch.index.query.QueryBuilder; +//import org.elasticsearch.index.query.QueryBuilders; +//import org.elasticsearch.index.query.SpanNearQueryBuilder; +//import org.elasticsearch.index.query.SpanNotQueryBuilder; +//import org.elasticsearch.index.query.SpanQueryBuilder; +//import org.elasticsearch.index.query.TermsQueryBuilder; +//import org.elasticsearch.search.SearchHit; +//import org.elasticsearch.search.SearchHits; +//import org.elasticsearch.search.aggregations.AggregationBuilder; +//import org.elasticsearch.search.aggregations.AggregationBuilders; +//import org.elasticsearch.search.aggregations.bucket.nested.Nested; +//import org.elasticsearch.search.aggregations.bucket.nested.NestedAggregationBuilder; +//import org.elasticsearch.search.aggregations.bucket.terms.Terms; +//import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder; +//import org.elasticsearch.search.fetch.subphase.highlight.HighlightField; +//import org.elasticsearch.search.rescore.RescoreBuilder; +//import org.elasticsearch.search.sort.FieldSortBuilder; +//import org.elasticsearch.search.sort.SortBuilders; +//import org.elasticsearch.search.sort.SortOrder; +//import org.slf4j.Logger; +//import org.slf4j.LoggerFactory; +//import org.springframework.stereotype.Component; +// +//import javax.annotation.Resource; +//import java.util.ArrayList; +//import java.util.Arrays; +//import java.util.HashMap; +//import java.util.List; +//import java.util.Map; +//import java.util.Objects; +//import java.util.function.Consumer; +//import java.util.stream.Collectors; +//import java.util.stream.Stream; +// +// +///** +// * es 基础服务 +// * +// * @author Aquarius & Hao +// */ +//@Component +//public class EsBase { +// +// protected Logger logger = LoggerFactory.getLogger(EsBase.class); +// @Resource +// protected TransportClient transportClient; +// /** +// * keyword默认名 +// */ +// public static final String ES_INDEX = "_index"; +// /** +// * 默认索引type +// */ +// public static final String ES_TYPE = "docs"; +// /** +// * keyword默认名 +// */ +// public static final String ES_KEYWORD_SUFFIX = ".keyword"; +// /** +// * 拼音默认名 +// */ +// public static final String ES_PINYIN_SUFFIX = ".pinyin"; +// /** +// * 拼音默认名 +// */ +// public static final String ES_SPY_SUFFIX = ".spy"; +// /** +// * 拼音默认名 +// */ +// public static final String ES_FPY_SUFFIX = ".fpy"; +// /** +// * 拼音默认名 +// */ +// public static final String ES_NGRAM_SUFFIX = ".ngram"; +// /** +// * 分隔符星号 +// */ +// public static final String SEPARATOR_ASTERISK = "*"; +// /** +// * 分隔符点 +// */ +// public static final String SEPARATOR_POINT = "."; +// /** +// * 分隔符逗号 +// */ +// public static final String SEPARATOR_COMMA = ","; +// /** +// * es返回_id键值 +// */ +// public static final String ES_DOC_ID = "ES_DOC_ID"; +// /** +// * es返回高亮信息 +// */ +// public static final String ES_HIGHLIGHT_FIELD = "ES_HIGHLIGHT_FIELD"; +// /** +// * es 句子分隔符 +// */ +// public static final String SEPARATOR_SENTENCE = "sentenceforbfd"; +// /** +// * es返 段落分隔符 +// */ +// public static final String SEPARATOR_PARAGRAPH = "paragraphforbfd"; +// +// /** +// * field append suffix e.g. ".keyword" +// * +// * @return e.g. "id.keyword" +// */ +// public String keyword(String field) { +// if (StringUtils.isNotBlank(field)) { +// return field + ES_KEYWORD_SUFFIX; +// } +// return field; +// } +// +// /** +// * 字段添加.pin后缀 +// * +// * @param field +// * @return +// */ +// public String pinyin(String field) { +// if (StringUtils.isNotBlank(field)) { +// return field + ES_PINYIN_SUFFIX; +// } +// return field; +// } +// +// /** +// * 字段添加.spy后缀 +// * +// * @param field +// * @return +// */ +// public String spy(String field) { +// if (StringUtils.isNotBlank(field)) { +// return field + ES_SPY_SUFFIX; +// } +// return field; +// } +// +// /** +// * 字段添加.fpy后缀 +// * +// * @param field +// * @return +// */ +// public String fpy(String field) { +// if (StringUtils.isNotBlank(field)) { +// return field + ES_FPY_SUFFIX; +// } +// return field; +// } +// +// /** +// * 字段添加.ngram后缀 +// * +// * @param field +// * @return +// */ +// public String ngram(String field) { +// if (StringUtils.isNotBlank(field)) { +// return field + ES_NGRAM_SUFFIX; +// } +// return field; +// } +// +// /** +// * Splicing "*" +// * +// * @param value +// * @return e.g. "*test*" +// */ +// public String wildcardDelimiter(String value) { +// if (StringUtils.isNotBlank(value)) { +// return SEPARATOR_ASTERISK + value + SEPARATOR_ASTERISK; +// } +// return value; +// } +// +// /** +// * 属性值转为nested属性 +// * +// * @param field +// * @return +// */ +// public String nested(String nested, String field) { +// if (StringUtils.isNotBlank(field)) { +// return nested.concat(SEPARATOR_POINT).concat(field); +// } +// return field; +// } +// +// /** +// * 返回匹配 s | s,... | ...,s | ...,s,... 的正则表达式 +// */ +// public String regexp(String s) { +// return "(.+,)*" + s + "(,.+)*"; +// } +// +// /** +// * 拼装嵌套查询 +// * +// * @param nested 属性名 +// * @param field 子级属性名 +// * @param values 值 +// * @return +// */ +// public QueryBuilder nestedTermQuery(String nested, String field, Object... values) { +// return this.nestedQuery(nested, QueryBuilders.termsQuery(this.nested(nested, field), values)); +// } +// +// /** +// * 拼装嵌套查询 +// * +// * @param field 属性名 +// * @param value 属性值 +// * @return +// */ +// public QueryBuilder nestedWildcardQuery(String nested, String field, String value) { +// return this.nestedQuery(nested, QueryBuilders.wildcardQuery(this.nested(nested, field), this.wildcardDelimiter(value))); +// } +// +// /** +// * 拼装nested条件 +// * +// * @param nested +// * @param queryBuilder +// * @return +// */ +// public QueryBuilder nestedQuery(String nested, QueryBuilder queryBuilder) { +// return QueryBuilders.nestedQuery(nested, queryBuilder, ScoreMode.None); +// } +// +// /** +// * 分页查询列表 +// * +// * @param queryBuilder +// * @param params +// * @return +// */ +// public PageResp fetchPage(QueryBuilder queryBuilder, EsBaseParam params +// , Consumer consumerRequestBuilder, Consumer consumerResponse) { +// SearchRequestBuilder searchRequestBuilder = transportClient.prepareSearch(params.getIndex()); +// +// if (params.getType() != null) { +// searchRequestBuilder.setTypes(params.getType()); +// } +// searchRequestBuilder.setSearchType(SearchType.QUERY_THEN_FETCH); +// +// // 返回结果集 +// PageResp pageResp = new PageResp<>(); +// try { +// //filter方式查询 +// searchRequestBuilder.setQuery(queryBuilder); +// // 排序 +// if (params.getOrderField() != null && params.getDescOrAsc() != null) { +// // 无该字段时动态生成该字段最大值/最小值排在最后,如果用missing会出现报错情况,unmappedType可避免报错,排序依然在最后 +// FieldSortBuilder sortBuilder = SortBuilders.fieldSort(params.getOrderField()).order(params.getDescOrAsc()); +// if (BaseFieldEnum.public_time.name().equals(params.getOrderField())){ +// sortBuilder.unmappedType("long"); +// } +// searchRequestBuilder.addSort(sortBuilder); +// } +// // Includes表示设置返回值只能返回Includes数组中属性 +// // Excludes表示设置返回值不进行返回Excludes数组中的属性 +// if (ArrayUtils.isNotEmpty(params.getIncludes()) || ArrayUtils.isNotEmpty(params.getExcludes())) { +// searchRequestBuilder.setFetchSource(params.getIncludes(), params.getExcludes()); +// } +// +// // 设置高亮,使用默认的highlighter高亮器 +// if (params.isWithHighlight()) { +// // 如果进行了指定,则以指定为主 +// if (params.getHighlightBuilder() != null) { +// searchRequestBuilder.highlighter(params.getHighlightBuilder()); +// } else { +// // 否则执行默认配置 +// HighlightBuilder highlightBuilder = new HighlightBuilder() +// // match进行高亮 +// .requireFieldMatch(false) +// //fragment 是指一段连续的文字。返回结果最多可以包含几段不连续的文字。默认是5。 +// .numOfFragments(0) +// //一段 fragment 包含多少个字符。默认100。 +// .fragmentSize(800000) +// .preTags("") +// .postTags(""); +// if (params.getHighlightFields().isEmpty()) { +// highlightBuilder.field("*"); +// } else { +// params.getHighlightFields().forEach(highlightBuilder::field); +// } +// searchRequestBuilder.highlighter(highlightBuilder); +// } +// } +// +// // 分页处理, 否则默认返回10000条数据 +// if (params.getOffset() > -1) { +// searchRequestBuilder.setSize(params.getLimit()); +// searchRequestBuilder.setFrom(params.getOffset()); +// } else { +// searchRequestBuilder.setSize(EsClientConfig.scrollSize); +// } +// +// // 自定义条件 +// if (consumerRequestBuilder != null) { +// consumerRequestBuilder.accept(searchRequestBuilder); +// } +// +// logger.debug("{},{},{}", params.getIndex(), params.getType(), searchRequestBuilder.toString()); +// +// // 查询结果处理 +// SearchResponse response = searchRequestBuilder.setPreference("_primary_first").execute().actionGet(); +// SearchHits hits = response.getHits(); +// Long count = hits.getTotalHits(); +// SearchHit[] searchHits = hits.getHits(); +// EsDTO esDTO = null; +// for (SearchHit searchHit : searchHits) { +// esDTO = new EsDTO(searchHit.getId(), searchHit.sourceAsMap()); +// esDTO.setIndex(searchHit.getIndex()); +// esDTO.setType(searchHit.getType()); +// +// // 返回文档的高亮字段 +// if (params.isWithHighlight()) { +// Map highlightFields = searchHit.getHighlightFields(); +// if (highlightFields != null) { +// Map map = new HashMap<>(highlightFields.size()); +// highlightFields.forEach((k, v) -> { +// if (v != null && v.getFragments() != null) { +// map.put(k.replace(ES_KEYWORD_SUFFIX, ""), +// Arrays.asList(v.getFragments()).stream().filter(e -> e != null).map(Text::toString).collect(Collectors.joining(Constants.SEPARATOR_ELLIPSIS))); +// } +// }); +// esDTO.setHighlightData(map); +// } +// } +// pageResp.getList().add(esDTO); +// } +// pageResp.setTotal(count > EsClientConfig.scrollSize ? EsClientConfig.scrollSize : count); +// +// // 自定义结果处理 +// if (consumerResponse != null) { +// consumerResponse.accept(response); +// } +// logger.debug("fetchPage,size = {}, total = {}", pageResp.getList().size(), pageResp.getTotal()); +// } catch (Exception e) { +// throw new RuntimeException("call pageDataQuery exception ", e); +// } +// return pageResp; +// } +// +// +// /** +// * 此方法中,通用字段名使用了BaseFieldEnum索引字段的名字,因各索引统一所以不会产生问题。 +// *

+// * 定制化分页查询,用于主检索 +// * +// * @param query 查询条件 +// * @param customBuilder 自定义条件 +// * @param clazz 返回类型 +// * @param indexEnums 查询的索引 +// * @param

参数类型 +// * @param 结果类型 +// * @return +// */ +// public

SearchPageResp fetchCustomPage(P query, Consumer customBuilder, Class clazz, IndexEnum... indexEnums) { +// EsBaseParam esBaseParam = new EsBaseParam(); +// esBaseParam.setIndex(Arrays.stream(indexEnums).map(IndexEnum::getSearchIndex).toArray(String[]::new)); +// esBaseParam.setExcludes(new String[]{BaseFieldEnum.content.name(), HtmlFieldEnum.forward_content.name()}); +// esBaseParam.setPage(query.getPage()); +// esBaseParam.setLimit(query.getLimit()); +// // 排序处理 +// int scoreOrder = 1, timeOrder = 2, asc = 1; +// if (query.getOrderType() == timeOrder) { +// esBaseParam.setOrderField(BaseFieldEnum.public_time.name()); +// esBaseParam.setDescOrAsc(query.getOrder() == asc ? SortOrder.ASC : SortOrder.DESC); +// } +// +// // 拼装查询条件 +// BoolQueryBuilder queryBuilder = QueryBuilders.boolQuery(); +// // 过滤掉被删除的 +// queryBuilder.filter(QueryBuilders.termQuery(BaseFieldEnum.del.name(), Constants.NO)); +// // 自定义条件 +// if (customBuilder != null) { +// customBuilder.accept(queryBuilder); +// } +// // 设置documentId查询范围 +// if (query.getDocumentId() != null && query.getDocumentId().length != 0) { +// queryBuilder.filter(QueryBuilders.idsQuery().addIds(query.getDocumentId())); +// } +// // 标题条件 +// if (!Strings.isNullOrEmpty(query.getTitle())) { +// queryBuilder.filter(QueryBuilders.wildcardQuery(this.keyword(BaseFieldEnum.title.name()), this.wildcardDelimiter(QueryParser.escape(query.getTitle())))); +// } +// // 来源条件 +// if (query.getSourceList() != null && !query.getSourceList().isEmpty()) { +// queryBuilder.filter(QueryBuilders.termsQuery(BaseFieldEnum.source.name(), query.getSourceList())); +// } +// // 原文译文条件(满足原文条件时无该字段匹配) +// if (query.getContentTypeList() != null && !query.getContentTypeList().isEmpty()) { +// TermsQueryBuilder termsQueryBuilder = QueryBuilders.termsQuery(BaseFieldEnum.type.name(), query.getContentTypeList()); +// if (query.getContentTypeList().contains(ContentTypeEnum.ORIGINAL.getKey())){ +// BoolQueryBuilder contentTypeBoolQueryBuilder = QueryBuilders.boolQuery(); +// contentTypeBoolQueryBuilder.should(QueryBuilders.boolQuery().mustNot(QueryBuilders.existsQuery(BaseFieldEnum.type.name()))); +// contentTypeBoolQueryBuilder.should(termsQueryBuilder); +// queryBuilder.filter(contentTypeBoolQueryBuilder); +// }else { +// queryBuilder.filter(termsQueryBuilder); +// } +// } +// // 上传用户条件 +// if (query.getUploadUser() != null) { +// queryBuilder.filter(QueryBuilders.termQuery(BaseFieldEnum.upload_user.name(), query.getUploadUser())); +// } +// //部门条件 +// if (String.valueOf(Constants.YES).equals(query.getOnlyDepartment()) && UserUtil.getUser().getDepartmentId() != null) { +// queryBuilder.filter(QueryBuilders.termQuery(DocumentFieldEnum.department_id.name(), UserUtil.getUser().getDepartmentId())); +// } +// // 语言条件 +// if (query.getLanguageList() != null && !query.getLanguageList().isEmpty()) { +// queryBuilder.filter(QueryBuilders.termsQuery(BaseFieldEnum.language.name(), query.getLanguageList())); +// } +// +// // 关键词搜索自定义--带拼音 +// if (query.getK() != null && !"".equals(query.getK().trim())) { +// // 默认短语 +// MultiMatchQueryBuilder.Type matchType = MultiMatchQueryBuilder.Type.BEST_FIELDS; +// queryBuilder.must(this.getMatchQueryBuilder(BaseFieldEnum.getMatchFieldsWithPy(), matchType, query.getK())); +// } +// +// // 精确搜索 +// if (query.isAccurateQuery()) { +// // 暂时只考虑一对双引号的情况 +// query.getAccurateList().stream().forEach(e -> { +// queryBuilder.must(this.getMatchQueryBuilder(BaseFieldEnum.getMatchFieldsWithPy(), MultiMatchQueryBuilder.Type.PHRASE_PREFIX, e)); +// }); +// } +// +// // 二次搜索--采用短语搜素--带拼音 +// if (query.getSk() != null && !"".equals(query.getSk().trim())) { +// queryBuilder.must(this.getMatchQueryBuilder(BaseFieldEnum.getMatchFieldsWithPy(), MultiMatchQueryBuilder.Type.PHRASE_PREFIX, query.getSk())); +// } +// +// // 排除搜索--采用短语搜素 +// if (query.getNk() != null && !"".equals(query.getNk().trim())) { +// queryBuilder.mustNot(this.getMatchQueryBuilder(null,BaseFieldEnum.getMatchFields(), new String[]{query.getNk()}, true, SearchWordStrategyEnum.ANY)); +// } +// +// // 时间条件(满足条件或无该字段) +// if (query.getBeginTime() != null || query.getEndTime() != null){ +// BoolQueryBuilder timeBoolQueryBuilder = QueryBuilders.boolQuery(); +// timeBoolQueryBuilder.should(QueryBuilders.boolQuery().mustNot(QueryBuilders.existsQuery(BaseFieldEnum.public_time.name()))); +// BoolQueryBuilder timeRangeBoolQueryBuilder = QueryBuilders.boolQuery(); +// if (query.getBeginTime() != null) { +// timeRangeBoolQueryBuilder.filter(QueryBuilders.rangeQuery(BaseFieldEnum.public_time.name()).gte(query.getBeginTime())); +// } +// if (query.getEndTime() != null) { +// timeRangeBoolQueryBuilder.filter(QueryBuilders.rangeQuery(BaseFieldEnum.public_time.name()).lte(query.getEndTime())); +// } +// timeBoolQueryBuilder.should(timeRangeBoolQueryBuilder); +// queryBuilder.filter(timeBoolQueryBuilder); +// } +// +// +// // 标签搜索 +// if (query.getAiTagList() != null && !query.getAiTagList().isEmpty()) { +// queryBuilder.filter(this.nestedTermQuery(BaseFieldEnum.ai_tag.name(), LabelWeightFieldEnum.label.name(), query.getAiTagList().stream().toArray(String[]::new))); +// } +// // 地区搜索 +// if (query.getAiAreaList() != null && !query.getAiAreaList().isEmpty()) { +// queryBuilder.filter(this.nestedTermQuery(BaseFieldEnum.ai_area.name(), LabelWeightFieldEnum.label.name(), query.getAiAreaList().stream().toArray(String[]::new))); +// } +// // 主题一级分类 +// if (!Strings.isNullOrEmpty(query.getSubjectClassify1())) { +// queryBuilder.filter(QueryBuilders.termsQuery(BaseFieldEnum.subject_classify1.name(), query.getSubjectClassify1())); +// } +// // 主题二级分类 +// if (!Strings.isNullOrEmpty(query.getSubjectClassify2())) { +// queryBuilder.filter(QueryBuilders.termsQuery(BaseFieldEnum.subject_classify2.name(), query.getSubjectClassify2())); +// } +// // 主题(满足其他主题时无主题字段匹配) +// if (query.getSubjectList() != null && !query.getSubjectList().isEmpty()) { +// if (query.getSubjectList().contains(DefaultConstants.SUBJECT_DEFAULT)){ +// BoolQueryBuilder subjectBoolQueryBuilder = QueryBuilders.boolQuery(); +// subjectBoolQueryBuilder.should(QueryBuilders.boolQuery().mustNot(QueryBuilders.existsQuery(BaseFieldEnum.subject.name()))); +// subjectBoolQueryBuilder.should(QueryBuilders.termsQuery(BaseFieldEnum.subject.name(), query.getSubjectList())); +// queryBuilder.filter(subjectBoolQueryBuilder); +// }else { +// queryBuilder.filter(QueryBuilders.termsQuery(BaseFieldEnum.subject.name(), query.getSubjectList())); +// } +// } +// // 渠道条件 +// if (query.getChannelList() != null && !query.getChannelList().isEmpty()) { +// queryBuilder.filter(QueryBuilders.termsQuery(BaseFieldEnum.channel.name(), query.getChannelList())); +// } +// // 网站 +// if (!Strings.isNullOrEmpty(query.getWebsite())) { +// queryBuilder.filter(QueryBuilders.termQuery(BaseFieldEnum.website.name(), query.getWebsite())); +// } +// if (query.getWebsiteList() != null && !query.getWebsiteList().isEmpty()) { +// // 如果渠道选择了用户上传,并且其他渠道选择了二级网站,则需兼容用户上传网站为空的结果 +// if (query.getChannelList() != null && query.getChannelList().contains(DefaultConstants.DEFAULT_CHANNEL_USER)){ +// BoolQueryBuilder subQuery = QueryBuilders.boolQuery(); +// subQuery.should(QueryBuilders.termQuery(BaseFieldEnum.channel.name(), DefaultConstants.DEFAULT_CHANNEL_USER)); +// subQuery.should(QueryBuilders.termsQuery(BaseFieldEnum.website.name(), query.getWebsiteList())); +// queryBuilder.filter(subQuery); +// } else { +// queryBuilder.filter(QueryBuilders.termsQuery(BaseFieldEnum.website.name(), query.getWebsiteList())); +// } +// } +// // 网站面包夹 +// if (query.getCateMd5List() != null && !query.getCateMd5List().isEmpty()) { +// queryBuilder.filter(QueryBuilders.termsQuery(BaseFieldEnum.cate_md5.name(), query.getCateMd5List())); +// } +// // 审核状态(满足审核通过条件时无该字段匹配) +// if (query.getAuditStateList() != null && !query.getAuditStateList().isEmpty()) { +// if (query.getAuditStateList().contains(AuditStateEnum.YES.getKey())){ +// BoolQueryBuilder auditBoolQuery = QueryBuilders.boolQuery(); +// auditBoolQuery.should(QueryBuilders.termsQuery(BaseFieldEnum.audit_state.name(), query.getAuditStateList())); +// auditBoolQuery.should(QueryBuilders.boolQuery().mustNot(QueryBuilders.existsQuery(BaseFieldEnum.audit_state.name()))); +// queryBuilder.filter(auditBoolQuery); +// }else { +// queryBuilder.filter(QueryBuilders.termsQuery(BaseFieldEnum.audit_state.name(), query.getAuditStateList())); +// } +// } +// // 实体类型 +// if (query.getOntologyIdList() != null && !query.getOntologyIdList().isEmpty()){ +// queryBuilder.filter(QueryBuilders.termsQuery(KgSystemEnum.ontology_id.name(),query.getOntologyIdList())); +// } +// +// // 高级搜索自定义设置 +// if (query.isHighLevel()) { +// // 1、找到所有的not进行非处理 +// query.getHighLevelQueries().stream().filter(e -> SearchExpressionEnum.NOT.is(e.getExpression())).forEach(e -> { +// queryBuilder.mustNot(this.getHighLevelQueryBuilder(e, true)); +// }); +// +// // 2、循环处理剩下的不含not的,处理逻辑为:如果当前是and,则将tempHighLevel进行must处理,tempHighLevel中如果有多个则内部should处理 +// List tempHighLevel = new ArrayList<>(query.getHighLevelQueries().size()); +// query.getHighLevelQueries().stream().filter(e -> !SearchExpressionEnum.NOT.is(e.getExpression())).forEach(e -> { +// // 如果是and 且 tempHighLevel不为空,则处理tempHighLevel(>1个做内部或操作)并清空 +// if (SearchExpressionEnum.AND.is(e.getExpression()) && !tempHighLevel.isEmpty()) { +// // 拼接条件 +// BoolQueryBuilder tempQueryBuilder = QueryBuilders.boolQuery(); +// tempHighLevel.forEach(temp -> tempQueryBuilder.should(this.getHighLevelQueryBuilder(temp, false))); +// queryBuilder.must(tempQueryBuilder); +// tempHighLevel.clear(); +// } +// // 将当前项加入临时队列 +// tempHighLevel.add(e); +// }); +// +// // 此处拼接tempHighLevel未处理的内容 +// if (!tempHighLevel.isEmpty()) { +// BoolQueryBuilder tempQueryBuilder = QueryBuilders.boolQuery(); +// tempHighLevel.forEach(temp -> tempQueryBuilder.should(this.getHighLevelQueryBuilder(temp, false))); +// queryBuilder.must(tempQueryBuilder); +// } +// } +// +// // 统计全部关键词, k=关键词 v=积分放大倍数 +// Map keywordMap = new HashMap<>(2); +// if (!Strings.isNullOrEmpty(query.getK())) { +// keywordMap.put(query.getK(), 10); +// } +// if (!Strings.isNullOrEmpty(query.getSk())) { +// keywordMap.put(query.getSk(), 20); +// } +// if (query.isHighLevel()) { +// query.getHighLevelQueries().forEach(e -> { +// Stream.of(e.getText()).forEach(text -> keywordMap.put(text, 10)); +// if (e.getTranslateText() != null) { +// Stream.of(e.getTranslateText()).forEach(text -> keywordMap.put(text, 10)); +// } +// }); +// } +// +// // 高亮自定义设置 +// if (query.getHighlight() != null && query.getHighlight()) { +// esBaseParam.setWithHighlight(true); +// esBaseParam.setHighlightFields(new ArrayList<>(BaseFieldEnum.getMatchFieldsWithPy().keySet())); +// Integer numOfFragments = 2; +// HighlightBuilder highlightBuilder = new HighlightBuilder() +// // match进行高亮 +// .requireFieldMatch(true) +// .order(HighlightBuilder.Order.SCORE) +// //fragment 是指一段连续的文字。返回结果最多可以包含几段不连续的文字。默认是5。 +// .numOfFragments(numOfFragments) +// //一段 fragment 包含多少个字符。默认100。 +// .fragmentSize(Constants.MAX_R_LENGTH / numOfFragments) +// .noMatchSize(Constants.MAX_R_LENGTH) +// .preTags("") +// .postTags(""); +// BaseFieldEnum.getMatchFieldsWithPy().keySet().forEach(highlightBuilder::field); +// +// /* +// * 高级搜索取消自定义高亮 +// * 精确搜索进行短语高亮重定义 +// * 否则进行关键词的高亮重定义 +// */ +// if (!query.isHighLevel()) { +// DisMaxQueryBuilder highlightQuery = QueryBuilders.disMaxQuery(); +// if (query.isAccurateQuery()) { +// query.getAccurateList().stream().forEach(e -> { +// highlightQuery.add(this.getMatchQueryBuilder(BaseFieldEnum.getMatchFieldsWithPy(), MultiMatchQueryBuilder.Type.PHRASE_PREFIX, e)); +// }); +// } else { +// keywordMap.forEach((keyword, boost) -> { +// BaseFieldEnum.getMatchFieldsWithPy().forEach((field, baseBoost) -> { +// float realBoost = baseBoost * boost * 100; +// highlightQuery.add(QueryBuilders.termQuery(field, keyword).boost(realBoost * 2)); +// highlightQuery.add(QueryBuilders.matchPhraseQuery(field, keyword).boost(realBoost)); +// }); +// highlightQuery.add(this.getMatchQueryBuilder(BaseFieldEnum.getMatchFieldsWithPy(), MultiMatchQueryBuilder.Type.BEST_FIELDS, keyword).boost(0.5F)); +// +// }); +// // 如果有二次搜索,因二次搜索使用短语前缀,此处需要特殊处理 +// if (query.getSk() != null && !"".equals(query.getSk().trim())) { +// highlightQuery.add(this.getMatchQueryBuilder(BaseFieldEnum.getMatchFieldsWithPy(), MultiMatchQueryBuilder.Type.PHRASE_PREFIX, query.getSk())); +// } +// } +// highlightBuilder.highlightQuery(highlightQuery); +// } +// esBaseParam.setHighlightBuilder(highlightBuilder); +// } +// +// +// +// // 评分重算条件 +// BoolQueryBuilder reScoreQueryBuilder = QueryBuilders.boolQuery(); +// if (!keywordMap.isEmpty()) { +// BoolQueryBuilder phraseQueryBuilder = QueryBuilders.boolQuery(); +// keywordMap.forEach((keyword, boost) -> { +// BaseFieldEnum.getMatchFieldsWithPy().forEach((field, baseBoost) -> { +// phraseQueryBuilder.should(QueryBuilders.matchPhrasePrefixQuery(field, keyword).slop(2).maxExpansions(10).boost(baseBoost * boost)); +// }); +// +// String paramKgSearchPriority = SettingEnum.PARAMS_KG_SEARCH_PRIORITY.getValue(); +// if (!Strings.isNullOrEmpty(paramKgSearchPriority)){ +// // 高优先级实体评分重算(Type指定PHRASE短语匹配;如果搜索词不匹配时,无损原重算逻辑) +// Long[] ontologyIds = Arrays.stream(paramKgSearchPriority.split(Constants.SEPARATOR_COMMA)).map(Long::parseLong).toArray(Long[]::new); +// List titleProperties = OntologyUtil.getTitleProperties(ontologyIds); +// Map fieldMap = titleProperties.stream().collect(Collectors.toMap(AisKgPropertyEntity::getCode, aisKgPropertyEntity -> 300f, (o1, o2) -> o2)); +// phraseQueryBuilder.should(this.getMatchQueryBuilder(fieldMap,MultiMatchQueryBuilder.Type.PHRASE,keyword)); +// } +// }); +// reScoreQueryBuilder.must(phraseQueryBuilder); +// } +// /* +// // 相关度查询标记 +// boolean functionFlag = false; +// FunctionScoreQueryBuilder functionQueryBuilder = null; +// // 优化时间衰减函数查询 +// if (query.getOrderType() == scoreOrder) { +// // 更改标记并构建时间衰减函数Query对象 +// functionFlag = true; +// GaussDecayFunctionBuilder exp = ScoreFunctionBuilders.gaussDecayFunction(BaseFieldEnum.create_time.name(), System.currentTimeMillis(), 86400000, 86400000 * 30.0, 0.5); +// functionQueryBuilder = QueryBuilders.functionScoreQuery(queryBuilder, exp).boostMode(CombineFunction.MULTIPLY); +// } +// */ +// +// // 热门标签聚合名称 +// String aiTagLabelAgg = "aiTagLabelAgg"; +// String aiTagAgg = "aiTagAgg"; +// List hotTagList = new ArrayList<>(); +// PageResp pageList = this.fetchPage(queryBuilder, esBaseParam, (customRequestBuilder) -> { +// // 高级/跨度/精确搜索时不增加评分重算 +// if (!query.isHighLevel() && !query.isSpanQuery() && !query.isAccurateQuery()) { +// customRequestBuilder.addRescorer(RescoreBuilder.queryRescorer(reScoreQueryBuilder).setQueryWeight(0.7f).setRescoreQueryWeight(1.2f), 100); +// } +// // 聚合当前结果的热门标签 +// // 内层标签属性聚合 +// AggregationBuilder aiTagLabelAggBuilder = AggregationBuilders.terms(aiTagLabelAgg) +// .field(this.nested(BaseFieldEnum.ai_tag.name(), LabelWeightFieldEnum.label.name())) +// .order(Terms.Order.count(false)) +// .size(query.getLimit()); +// // nested主聚合 +// NestedAggregationBuilder aiTagAggBuilder = AggregationBuilders.nested(aiTagAgg, BaseFieldEnum.ai_tag.name()).subAggregation(aiTagLabelAggBuilder); +// customRequestBuilder.addAggregation(aiTagAggBuilder); +// }, (customResponse) -> { +// // 处理热门标签, 取出聚合结果 +// Nested aiTagAggData = customResponse.getAggregations().get(aiTagAgg); +// Terms aiTagLabelAggData = aiTagAggData.getAggregations().get(aiTagLabelAgg); +// if (aiTagLabelAggData != null) { +// for (Terms.Bucket bucket : aiTagLabelAggData.getBuckets()) { +// hotTagList.add(new KeyValueDTO(bucket.getKeyAsString(), String.valueOf(bucket.getDocCount()))); +// } +// } +// }); +// // 拼装返回结果 +// SearchPageResp pageResp = new SearchPageResp<>(query.getLimit(), query.getPage()); +// pageResp.setTotalCount((int) pageList.getTotal()); +// pageResp.setList(pageList.getList().stream().map(e -> e.toHighlightEntity(clazz)).collect(Collectors.toList())); +// pageResp.setHotAiTagList(hotTagList); +// return pageResp; +// } +// +// /** +// * 拼装高级搜索--针对高级搜索中的一行或一个框 +// * +// * @param highLevelQuery +// * @return +// */ +// protected QueryBuilder getHighLevelQueryBuilder(HighLevelQuery highLevelQuery, boolean isNot) { +// BoolQueryBuilder result = QueryBuilders.boolQuery(); +// // 获取高级查询的字段 +// Map fieldMap = SearchScopeEnum.getFieldsByKey(highLevelQuery.getScope()); +// SearchScopeEnum searchScopeEnum = SearchScopeEnum.getEnumByKey(Objects.toString(highLevelQuery.getScope())); +// String path = (null == searchScopeEnum ? null : searchScopeEnum.getPath()); +// +// // 同段搜索---跨度搜索 +// if (SearchMatchTypeEnum.PARAGRAPH.equals(highLevelQuery.getMatchType())) { +// fieldMap.forEach((k, v) -> { +// this.addSpanQueryBuilder(result, k, highLevelQuery.getText(), SEPARATOR_PARAGRAPH); +// this.addSpanQueryBuilder(result, k, highLevelQuery.getTranslateText(), SEPARATOR_PARAGRAPH); +// }); +// return result; +// } +// +// // 同句搜索 +// if (SearchMatchTypeEnum.SENTENCE.equals(highLevelQuery.getMatchType())) { +// fieldMap.forEach((k, v) -> { +// this.addSpanQueryBuilder(result, k, highLevelQuery.getText(), SEPARATOR_SENTENCE); +// this.addSpanQueryBuilder(result, k, highLevelQuery.getTranslateText(), SEPARATOR_SENTENCE); +// }); +// return result; +// } +// +// // 原文普通搜索 +// QueryBuilder rawQueryBuilder = this.getMatchQueryBuilder(path,fieldMap, highLevelQuery.getText(), isNot, SearchWordStrategyEnum.getByKey(highLevelQuery.getWordStrategy())); +// if(rawQueryBuilder != null){ +// result.should(rawQueryBuilder); +// } +// +// // 如果译文不为空,则进行译文普通搜索 +// if (highLevelQuery.getTranslateText() != null) { +// QueryBuilder transQueryBuilder = this.getMatchQueryBuilder(path,fieldMap, highLevelQuery.getTranslateText(), isNot, SearchWordStrategyEnum.getByKey(highLevelQuery.getWordStrategy())); +// if (transQueryBuilder != null){ +// result.should(transQueryBuilder); +// } +// } +// return result; +// } +// +// /** +// * 获取跨度搜索查询条件 +// * +// * @param field 字段名 +// * @param text 内容 +// * @param separator 分隔符 +// * @return +// */ +// private void addSpanQueryBuilder(BoolQueryBuilder queryBuilder, String field, String[] text, String separator) { +// +// if (text == null || text.length == 0) { +// return; +// } +// // 将所有原词按照空格拆分 +// /* +// String[] splitText = Stream.of(text) +// .filter(StringUtils::isNotBlank) +// .flatMap(e -> Stream.of(e.split(" "))) +// .filter(StringUtils::isNotBlank) +// .toArray(String[]::new); +// */ +// String[] splitText = text; +// if (splitText == null || splitText.length == 0) { +// return; +// } +// +// SpanNearQueryBuilder spanNearQueryBuilder = QueryBuilders.spanNearQuery(QueryBuilders.spanTermQuery(field, splitText[0]), 250).inOrder(false); +// Stream.of(splitText).skip(1).forEach(e -> spanNearQueryBuilder.addClause(QueryBuilders.spanTermQuery(field, e))); +// SpanQueryBuilder exclude = QueryBuilders.spanTermQuery(field, separator); +// SpanNotQueryBuilder spanNotQueryBuilder = QueryBuilders.spanNotQuery(spanNearQueryBuilder, exclude); +// //跨度搜索 为了高亮显示 新增对于每个单次进行查询。 +// BoolQueryBuilder query = new BoolQueryBuilder(); +// Stream.of(splitText).forEach(e -> query.must(QueryBuilders.matchPhraseQuery(BaseFieldEnum.content.name(), e))); +// queryBuilder.should(QueryBuilders.boolQuery().must(spanNotQueryBuilder).must(query)); +// } +// +// /** +// * 全文检索查询拼接----含词语策略 +// * +// * @param fieldMap 查询字段 +// * @param text 文本 +// * @param isNot 是否是排除 +// * @param strategyEnum 搜索词策略 +// * @return +// */ +// private QueryBuilder getMatchQueryBuilder(String nestedPath,Map fieldMap, String[] text, boolean isNot, SearchWordStrategyEnum strategyEnum) { +// +// if (text == null || text.length == 0) { +// return null; +// } +// /* +// // 将所有原词按照空格拆分 +// String[] splitText = Stream.of(text) +// .filter(StringUtils::isNotBlank) +// .flatMap(e -> Stream.of(e.split(" "))) +// .filter(StringUtils::isNotBlank) +// .toArray(String[]::new); +// */ +// String [] splitText = text; +// if (splitText == null || splitText.length == 0) { +// return null; +// } +// +// BoolQueryBuilder result = QueryBuilders.boolQuery(); +// // 如果是非 或 指定完整匹配,则用短语,否则用最佳字段 +// MultiMatchQueryBuilder.Type multiMatchType = isNot || SearchWordStrategyEnum.WHOLE.equals(strategyEnum) ? MultiMatchQueryBuilder.Type.PHRASE_PREFIX : MultiMatchQueryBuilder.Type.BEST_FIELDS; +// // 使用拆分后的词进行匹配----如果使用完整匹配则不进行拆分,否则按空格拆分 +// Stream.of(SearchWordStrategyEnum.WHOLE.equals(strategyEnum) ? text : splitText).forEach((e) -> { +// // 校验所有还是单个词 +// QueryBuilder matchQuery = this.getMatchQueryBuilder(nestedPath,fieldMap, multiMatchType, e); +// if (SearchWordStrategyEnum.ALL.equals(strategyEnum)) { +// result.must(matchQuery); +// } else { +// result.should(matchQuery); +// } +// }); +// return result; +// } +// +// +// +// /** +// * 全文检索查询拼接(非nested属性重载方法) +// * +// * @param fieldMap 查询字段 +// * @param type 查询类型 +// * @param text 文本 +// * @return +// */ +// private QueryBuilder getMatchQueryBuilder(Map fieldMap, MultiMatchQueryBuilder.Type type, String text) { +// return this.getMatchQueryBuilder(null,fieldMap,type,text); +// } +// +// /** +// * 全文检索查询拼接,不支持nested属性与非nested属性混合使用,并且nested属性必须归属相同path +// * +// * @param fieldMap 查询字段 +// * @param type 查询类型 +// * @param text 文本 +// * @return +// */ +// private QueryBuilder getMatchQueryBuilder(String nestedPath,Map fieldMap, MultiMatchQueryBuilder.Type type, String text) { +// // 拼装搜索 +// QueryBuilder queryBuilder = QueryBuilders.multiMatchQuery(text) +// .fields(fieldMap) +// .type(type == null ? MultiMatchQueryBuilder.Type.BEST_FIELDS : type) +// .maxExpansions(5) +// .tieBreaker(0.3f) +// /// 关闭高频词处理 +// //.cutoffFrequency(0.01f) +// .lenient(Boolean.TRUE) +// .minimumShouldMatch("60%"); +// +// // 如果有path拼接nested并返回 +// if (!Strings.isNullOrEmpty(nestedPath)){ +// return this.nestedQuery(nestedPath, queryBuilder); +// } +// return queryBuilder; +// } +// +//} diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/service/es/EsBaseParam.java b/cl_search_api/src/main/java/com/bfd/mf/common/service/es/EsBaseParam.java new file mode 100644 index 0000000..31ee646 --- /dev/null +++ b/cl_search_api/src/main/java/com/bfd/mf/common/service/es/EsBaseParam.java @@ -0,0 +1,240 @@ +package com.bfd.mf.common.service.es; + +import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder; +import org.elasticsearch.search.sort.SortOrder; + +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; + +/** + * es查询基类 + * @author Aquarius & Hao + */ +public class EsBaseParam { + + public EsBaseParam(){} + + /** + * 构建 + * @param indexEnum + */ +// public EsBaseParam(IndexEnum indexEnum){ +// this.index = new String[]{indexEnum.getSearchIndex()}; +// this.type = indexEnum.getType(); +// } + + /** + * 构建 + * @param index + * @param type + */ + public EsBaseParam(String index, String type){ + this.index = new String[]{index}; + this.type = type; + } + + /** + * 排序 + */ + private String orderField; + private SortOrder descOrAsc; + + /** + * 多列排序 + */ + private LinkedHashMap lhashMap; + + /** + * 当前页码 + */ + private Integer page; + /** + * 每页条数 + */ + private Integer limit; + /** + * 从第几条开始 + */ + private Integer offset; + + /** + * ES的索引 + */ + private String[] index; + /** + * ES的type + */ + private String type; + /** + * 分组的列 + */ + private String term; + /** + * 分组的列1 + */ + private String term1; + + /** + * 指定返回的字段名,不指定返回全部 + */ + private String[] includes; + + /** + * 排出返回的字段名,不指定不做限制 + */ + private String[] excludes; + + /** + * 高亮属性 + */ + private HighlightBuilder highlightBuilder; + + /** + * 是否需要高亮 + */ + private boolean withHighlight = false; + /** + * 高亮字段 + */ + private List highlightFields = new ArrayList<>(); + + public String[] getIncludes() { + return includes; + } + + public void setIncludes(String[] includes) { + this.includes = includes; + } + + public String[] getExcludes() { + return excludes; + } + + public void setExcludes(String[] excludes) { + this.excludes = excludes; + } + + public String getTerm1() { + return term1; + } + + public void setTerm1(String term1) { + this.term1 = term1; + } + + public String getOrderField() { + return orderField; + } + + public void setOrderField(String orderField) { + this.orderField = orderField; + } + + public String[] getIndex() { + return index; + } + + public void setIndex(String... index) { + this.index = index; + } + + public String getType() { + return type; + } + + public void setType(String type) { + this.type = type; + } + + public String getTerm() { + return term; + } + + public void setTerm(String term) { + this.term = term; + } + + public LinkedHashMap getLhashMap() { + return lhashMap; + } + + public void setLhashMap(LinkedHashMap lhashMap) { + this.lhashMap = lhashMap; + } + + public SortOrder getDescOrAsc() { + return descOrAsc; + } + + public void setDescOrAsc(SortOrder descOrAsc) { + this.descOrAsc = descOrAsc; + } + + public Integer getPage() { + return page; + } + + public void setPage(Integer page) { + this.page = page; + } + + public Integer getLimit() { + if (null == limit) { + limit = 0; + } + return limit; + } + + public void setLimit(Integer limit) { + this.limit = limit; + } + + public Integer getOffset() { + if (this.page != null && this.limit != null) { + if (this.page > 0 && this.limit > 0) { + offset = (this.page - 1) * this.limit; + } else { + offset = -1; + } + } + if (null == offset) { + offset = -1; + } + return offset; + } + + public void setOffset(Integer offset) { + this.offset = offset; + } + + public EsBaseParam orderBy(String orderField, SortOrder descOrAsc){ + this.setOrderField(orderField); + this.setDescOrAsc(descOrAsc); + return this; + } + + public boolean isWithHighlight() { + return withHighlight; + } + + public void setWithHighlight(boolean withHighlight) { + this.withHighlight = withHighlight; + } + + public List getHighlightFields() { + return highlightFields; + } + + public void setHighlightFields(List highlightFields) { + this.highlightFields = highlightFields; + } + + public HighlightBuilder getHighlightBuilder() { + return highlightBuilder; + } + + public void setHighlightBuilder(HighlightBuilder highlightBuilder) { + this.highlightBuilder = highlightBuilder; + } +} diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/service/es/EsDTO.java b/cl_search_api/src/main/java/com/bfd/mf/common/service/es/EsDTO.java new file mode 100644 index 0000000..363f7d5 --- /dev/null +++ b/cl_search_api/src/main/java/com/bfd/mf/common/service/es/EsDTO.java @@ -0,0 +1,277 @@ +//package com.bfd.mf.common.service.es; +// +//import cn.percent.common.constants.Constants; +//import cn.percent.common.utils.BeanUtils; +//import cn.percent.modules.ais.entity.es.BaseEsEntity; +//import com.alibaba.fastjson.JSON; +//import com.google.common.base.Strings; +//import com.google.common.collect.Lists; +//import org.apache.commons.lang.ObjectUtils; +//import org.apache.commons.lang.StringUtils; +// +//import java.util.ArrayList; +//import java.util.List; +//import java.util.Map; +// +///** +// * ElasticSearch返回值 +// * +// * @author lihonghao +// */ +//public class EsDTO { +// public EsDTO() { +// } +// +// public EsDTO(String docId, Map data) { +// this.docId = docId; +// this.data = data; +// } +// +// public EsDTO(String docId, String index, Map data) { +// this.docId = docId; +// this.index = index; +// this.data = data; +// } +// +// /** +// * 唯一主键 +// */ +// private String docId; +// /** +// * 索引名字 +// */ +// private String index; +// /** +// * 索引类型 +// */ +// private String type; +// +// /** +// * 数据 +// */ +// private Map data; +// /** +// * 数据 +// */ +// private Map highlightData; +// +// /** +// * 获取String值 +// * +// * @param key +// * @return +// */ +// public String getString(Object key) { +// return data == null ? "" : ObjectUtils.toString(data.get(key), ""); +// } +// +// /** +// * 获取String值 +// * +// * @param key +// * @return +// */ +// public String getString(Object key, String nullStr) { +// return data == null ? "" : ObjectUtils.toString(data.get(key), nullStr); +// } +// +// /** +// * 地理坐标点用字符串形式表示时是纬度在前,经度在后(”latitude,longitude”), +// * 而数组形式表示时刚好相反,是经度在前,纬度在后([longitude,latitude])。 +// * 其实,在 ElasticeSearch 内部,不管字符串形式还是数组形式,都是纬度在前,经度在后。 +// * 不过早期为了适配 GeoJSON 的格式规范,调整了数组形式的表示方式。 +// * 因此,在使用地理位置(geolocation)的路上就出现了这么一个“捕熊器”,专坑那些不了解这个陷阱的使用者。 +// *

+// * 获取String值 +// * +// * @param key +// * @return [lon, lat] +// */ +// public Double[] getLocation(Object key) { +// try { +// if (data == null) { +// return null; +// } +// Object location = data.get(key); +// if (location == null) { +// return null; +// } +// +// if (location instanceof ArrayList) { +// List geoList = Lists.newArrayList(); +// ((ArrayList) location).forEach(e -> geoList.add(Double.parseDouble(ObjectUtils.toString(e)))); +// return geoList.toArray(new Double[geoList.size()]); +// } +// if (location instanceof Object[]) { +// return (Double[]) location; +// } +// if (location instanceof String) { +// String strLocation = ObjectUtils.toString(location, ""); +// if (strLocation.indexOf(Constants.SEPARATOR_COMMA) != -1) { +// String[] arrLocation = strLocation.split(","); +// return new Double[]{Double.parseDouble(arrLocation[0]), Double.parseDouble(arrLocation[1])}; +// } +// } +// return null; +// } catch (Exception e) { +// e.printStackTrace(); +// return null; +// } +// } +// +// /** +// * 获取经度值 +// * +// * @param key +// */ +// public Double getLongitude(Object key) { +// Double[] geoMap = getLocation(key); +// return geoMap == null ? null : geoMap[0]; +// } +// +// /** +// * 获取纬度值 +// * +// * @param key +// */ +// public Double getLatitude(Object key) { +// Double[] geoMap = getLocation(key); +// return geoMap == null ? null : geoMap[1]; +// } +// +// /** +// * 转实体 +// * +// * @param clazz +// * @param +// * @return +// */ +// public T toEntity(Class clazz) { +// if (data == null) { +// return null; +// } +// // 如果继承至es基类则赋值doc_id +// T entity = JSON.parseObject(JSON.toJSONString(data), clazz); +// if (BaseEsEntity.class.isAssignableFrom(clazz)) { +// BeanUtils.setProperty(entity, BaseEsEntity.DOC_ID, this.getDocId()); +// if (this.getIndex() != null) { +// BeanUtils.setProperty(entity, BaseEsEntity.INDEX, this.getIndex()); +// } +// if (this.getType() != null) { +// BeanUtils.setProperty(entity, BaseEsEntity.TYPE, this.getType()); +// } +// } +// return entity; +// } +// +// /** +// * 转高亮实体 +// * +// * @param clazz +// * @param +// * @return +// */ +// public T toHighlightEntity(Class clazz) { +// String emTag = ""; +// if (data == null) { +// return null; +// } +// if (highlightData != null) { +// +// highlightData.forEach((k, v) -> { +// boolean flag = data.get(k) == null || (!Strings.isNullOrEmpty(v) && v.contains(emTag)); +// if (flag){ +// data.put(k, v); +// } +// }); +// // data.putAll(highlightData); +// } +// // 如果拼音有值则覆盖原始值 +// if (highlightData != null) { +// highlightData.forEach((k, v) -> { +// String realKey = k; +// // 处理拼音后缀高亮 +// if (k.endsWith(EsBase.ES_PINYIN_SUFFIX) && StringUtils.isNotBlank(v)) { +// // 获取拼音字段原始key(去掉后缀.pinyin) +// realKey = StringUtils.removeEnd(k, EsBase.ES_PINYIN_SUFFIX); +// // 获取原始字段值 +// String realValue = data.get(realKey) == null ? "" : data.get(realKey).toString(); +// // 原始值中是否有高亮 +// boolean isOriginalHighlight = !Strings.isNullOrEmpty(realValue) && realValue.contains(emTag); +// // 如果原始值中有高亮则不再处理 +// if (isOriginalHighlight) { +// return; +// } +// +// // 如果原始值没有高亮,那么判断拼音值是否有高亮 +// String pinyinValue = v == null ? "" : v; +// // 原始值中是否有高亮 +// boolean isPinyinHighlight = !Strings.isNullOrEmpty(pinyinValue) && pinyinValue.contains(emTag); +// if (isPinyinHighlight) { +// data.put(realKey, v); +// } +// } +// }); +// } +// // 如果继承至es基类则赋值doc_id +// T entity = JSON.parseObject(JSON.toJSONString(data), clazz); +// if (BaseEsEntity.class.isAssignableFrom(clazz)) { +// BeanUtils.setProperty(entity, BaseEsEntity.DOC_ID, this.getDocId()); +// if (this.getIndex() != null) { +// BeanUtils.setProperty(entity, BaseEsEntity.INDEX, this.getIndex()); +// } +// if (this.getType() != null) { +// BeanUtils.setProperty(entity, BaseEsEntity.TYPE, this.getType()); +// } +// } +// return entity; +// } +// +// public String getDocId() { +// return docId; +// } +// +// public void setDocId(String docId) { +// this.docId = docId; +// } +// +// public Map getData() { +// return data; +// } +// +// public void setData(Map data) { +// this.data = data; +// } +// +// public Map getHighlightData() { +// return highlightData; +// } +// +// public void setHighlightData(Map highlightData) { +// this.highlightData = highlightData; +// } +// +// public String getIndex() { +// return index; +// } +// +// public void setIndex(String index) { +// this.index = index; +// } +// +// public String getType() { +// return type; +// } +// +// public void setType(String type) { +// this.type = type; +// } +// +// @Override +// public String toString() { +// return "EsDTO{" + +// "docId='" + docId + '\'' + +// ", data=" + data + +// '}'; +// } +//} diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/service/es/EsQueryServiceForSQMini.java b/cl_search_api/src/main/java/com/bfd/mf/common/service/es/EsQueryServiceForSQMini.java index 19cd37c..a7dd2e5 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/common/service/es/EsQueryServiceForSQMini.java +++ b/cl_search_api/src/main/java/com/bfd/mf/common/service/es/EsQueryServiceForSQMini.java @@ -1,10 +1,15 @@ package com.bfd.mf.common.service.es; import com.alibaba.fastjson.JSONObject; +import com.bfd.mf.common.util.constants.ESConstant; import com.bfd.mf.common.util.es.EsUtils; +import com.bfd.mf.common.web.repository.mysql.topic.TaskRepository; import com.bfd.mf.common.web.vo.params.QueryRequest; import com.bfd.mf.config.BFDApiConfig; import org.elasticsearch.index.query.BoolQueryBuilder; +import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.index.query.QueryBuilders; +import org.elasticsearch.index.query.TermsQueryBuilder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; @@ -17,29 +22,40 @@ import java.util.List; @Service public class EsQueryServiceForSQMini { private static final Logger logger = LoggerFactory.getLogger(EsQueryServiceForSQMini.class); + private String clusterName = ""; @Autowired private GetQueryBuilder getQueryBuilder; @Autowired private BFDApiConfig bfdApiConfig; - - private String clusterName =""; + @Autowired + private TaskRepository taskRepository; @PostConstruct public void init() { // 注册数据查询来源 - clusterName = bfdApiConfig.esMiniName(); - String sourceAddress [] = bfdApiConfig.esMiniAddress(); + clusterName = bfdApiConfig.esMiniName(); + String sourceAddress[] = bfdApiConfig.esMiniAddress(); // 配置文件中的 es-source EsUtils.registerCluster(clusterName, sourceAddress); } + /** + * 2023-04-25 查询调用的方法 + * 查询 + * @param indexName + * @param queryRequest + * @return + */ - public List queryDataFromOneSubject(String[] indexName, QueryRequest queryRequest) { + public List queryDataFromFolder(String[] indexName, QueryRequest queryRequest) { + List result = new ArrayList<>(); try { logger.debug("[EsQueryServiceForSQMini - 专题] queryDataFromOneSubject ..."); - BoolQueryBuilder boolQueryBuilder = getQueryBuilder.getQueryBuilder(queryRequest); - //logger.info("[EsQueryService] queryDataFromOneSubject: indexName = " + indexName[0] + "; qb:\n" + "{}.", boolQueryBuilder.toString()); - logger.info("[EsQueryService] queryDataFromOneSubject: indexName : {}" ,indexName[0]); + // BoolQueryBuilder boolQueryBuilder = getQueryBuilder.getQueryBuilder(queryRequest); + // 2023-04-23 新查询语句的组装 + BoolQueryBuilder boolQueryBuilder = getQueryBuilder.getQueryBuilderNew(queryRequest); + logger.info("[EsQueryService] queryDataFromOneSubject: indexName :{} . ", indexName[0], " ; qb:\n" + "{}.", boolQueryBuilder.toString()); + // logger.info("[EsQueryService] queryDataFromOneSubject: indexName : {}" ,indexName[0]); //每页的数量 Integer limit = queryRequest.getLimit(); //起始页(0,20,40....) @@ -50,13 +66,18 @@ public class EsQueryServiceForSQMini { String sortFlag = queryRequest.getSidx(); // 主贴、评论、还是用户? Integer searchType = queryRequest.getSearchType(); - List result = EsUtils.query(clusterName, indexName, boolQueryBuilder, sortFlag, orderFlag, limit, start, searchType); + // 得用两个查询?一个是有高亮的,一个是没有高亮的? + if (null != queryRequest.getHighLevelQueries()) { + result = EsUtils.queryWithHighlight(clusterName, indexName, boolQueryBuilder, sortFlag, orderFlag, limit, start, searchType); + } else { + result = EsUtils.query(clusterName, indexName, boolQueryBuilder, sortFlag, orderFlag, limit, start, searchType); + } //System.out.println(result); - return result; - }catch (Exception e){ - return new ArrayList<>(); - } + } catch (Exception e) { + e.printStackTrace(); + } + return result; } /** @@ -65,74 +86,115 @@ public class EsQueryServiceForSQMini { * @param queryRequest * @return */ - public Long queryDataCountFromOneSubject(String[] indexName, QueryRequest queryRequest) { + public Long queryDataCountFromFolder(String[] indexName, QueryRequest queryRequest) { try { logger.debug("[EsQueryServiceForSQMini - 专题] queryDataCountFromOneSubject ..."); BoolQueryBuilder boolQueryBuilder = getQueryBuilder.getQueryBuilder(queryRequest); Integer searchType = queryRequest.getSearchType(); Integer size = queryRequest.getLimit(); - Long totalCount = EsUtils.queryTotalCountNew(clusterName, indexName, boolQueryBuilder, searchType ); + Long totalCount = EsUtils.queryTotalCountNew(clusterName, indexName, boolQueryBuilder, searchType); //System.out.println("EsQueryServiceForSQMini : queryDataCountFromOneSubject " + totalCount); return totalCount; - }catch (Exception e){ + } catch (Exception e) { return 0L; } } /** * 复制示例专题的数据到新的专题 + * * @param indexName * @param queryRequest * @return */ - public JSONObject exportDataFromOneSubject(String[] indexName, QueryRequest queryRequest) { + public JSONObject exportDataFromFolder(String[] indexName, QueryRequest queryRequest) { try { logger.info("[EsQueryServiceForSQMini - 专题] exportDataFromOneSubject start ..."); - BoolQueryBuilder boolQueryBuilder =getQueryBuilder.getQueryBuilder(queryRequest); -// logger.info("[EsQueryServiceForSQMini-专题] queryDataFromOneSubject: indexName = " + indexName + "; qb:" + "{}.", boolQueryBuilder.toString()); + BoolQueryBuilder boolQueryBuilder = getQueryBuilder.getQueryBuilderNew(queryRequest); //每页的数量 Integer limit = queryRequest.getLimit(); String scrollId = queryRequest.getScrollId(); Integer searchType = queryRequest.getSearchType(); - JSONObject result = EsUtils.queryForExport(clusterName, indexName, boolQueryBuilder, limit,scrollId,searchType); + JSONObject result = EsUtils.queryForExport(clusterName, indexName, boolQueryBuilder, limit, scrollId, searchType); return result; - }catch (Exception e){ + } catch (Exception e) { e.printStackTrace(); return new JSONObject(); } } - public long reIndexData(String indexList, String newIndex) { - try{ - long created = EsUtils.reIndex(clusterName,indexList,newIndex); - return created; - }catch (Exception e){ - e.printStackTrace(); - return 0; - } - } public void deleteBySubjectId(String indexName) { - try{ - EsUtils.delIndex(clusterName,indexName); - }catch (Exception e){ + try { + EsUtils.delIndex(clusterName, indexName); + } catch (Exception e) { e.printStackTrace(); } } public void deleteBySubjectIdByCid(String indexName, String cid) { - try{ - EsUtils.delIndexByCid(clusterName,indexName,cid); - }catch (Exception e){ + try { + EsUtils.delIndexByCid(clusterName, indexName, cid); + } catch (Exception e) { e.printStackTrace(); } } public void deleteBySubjectIdByCrawlDataFlag(String indexName, String cid, String crawlDataFlag) { - try{ - EsUtils.delIndexByCrawlDataFlag(clusterName,indexName,cid,crawlDataFlag); - }catch (Exception e){ + try { + EsUtils.delIndexByCrawlDataFlag(clusterName, indexName, cid, crawlDataFlag); + } catch (Exception e) { + e.printStackTrace(); + } + } + + private static int MOVE_TASKS_STATUS = 6; + private static int FINSH_TASKS_STATUS = 3; + private static int MOVE_FLORD_STATUS = 1; + private static int FINSH_FLORD_STATUS = 0; + + public long reIndexData(String indexList, String newIndex) { + try { + long created = EsUtils.reIndex(clusterName, indexList, newIndex); + return created; + } catch (Exception e) { + e.printStackTrace(); + return 0; + } + } + + public long reIndexDataByTasks(String originalIndex, String currentIndex, List tasks) { + long start = System.currentTimeMillis(); + try { + TermsQueryBuilder termQueryBuilder = QueryBuilders.termsQuery(ESConstant.TASK_ID, tasks); + QueryBuilder queryBuilder = QueryBuilders.boolQuery().must(termQueryBuilder); + + // Thread.sleep(5000); + long created = EsUtils.reIndexByTasks(clusterName, originalIndex, currentIndex, tasks, queryBuilder); + + for (Long taskId : tasks) { + taskRepository.updateTaskStatus(Long.valueOf(taskId), FINSH_TASKS_STATUS); + } + String newIndexPre = "cl_special_1.0_"; + originalIndex = originalIndex.replace(newIndexPre, ""); + currentIndex = currentIndex.replace(newIndexPre, ""); + taskRepository.updateSubjectStatus(Long.valueOf(originalIndex), FINSH_FLORD_STATUS); + taskRepository.updateSubjectStatus(Long.valueOf(currentIndex), FINSH_FLORD_STATUS); + long end = System.currentTimeMillis(); + logger.info("reIndexDataByTasks:statr:" + start + " ; end:" + end + " ; time = " + (end - start)); + + return created; + } catch (Exception e) { + e.printStackTrace(); + return 0; + } + } + + public void deleteByTasks(String indexName, String cid, List tasks) { + try { + EsUtils.delIndexByTasks(clusterName, indexName, cid, tasks); + } catch (Exception e) { e.printStackTrace(); } } diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/service/es/GetQueryBuilder.java b/cl_search_api/src/main/java/com/bfd/mf/common/service/es/GetQueryBuilder.java index 3c27812..975521f 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/common/service/es/GetQueryBuilder.java +++ b/cl_search_api/src/main/java/com/bfd/mf/common/service/es/GetQueryBuilder.java @@ -2,13 +2,16 @@ package com.bfd.mf.common.service.es; import com.bfd.mf.common.service.cache.TopicQueryService; import com.bfd.mf.common.util.constants.ESConstant; +import com.bfd.mf.common.util.enums.SearchExpressionEnum; +import com.bfd.mf.common.util.enums.SearchMatchTypeEnum; +import com.bfd.mf.common.util.enums.SearchScopeEnum; +import com.bfd.mf.common.util.enums.SearchWordStrategyEnum; import com.bfd.mf.common.web.vo.params.QueryRequest; import com.bfd.nlp.common.util.string.TStringUtils; +import com.google.common.base.Strings; import org.apache.commons.collections4.map.HashedMap; -import org.elasticsearch.index.query.BoolQueryBuilder; -import org.elasticsearch.index.query.MatchPhraseQueryBuilder; -import org.elasticsearch.index.query.QueryBuilder; -import org.elasticsearch.index.query.QueryBuilders; +import org.apache.lucene.search.join.ScoreMode; +import org.elasticsearch.index.query.*; import org.elasticsearch.search.aggregations.AggregationBuilder; import org.elasticsearch.search.aggregations.AggregationBuilders; import org.elasticsearch.search.aggregations.BucketOrder; @@ -18,14 +21,232 @@ import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; import java.util.*; +import java.util.stream.Stream; @Service public class GetQueryBuilder { private static final Logger logger = LoggerFactory.getLogger(GetQueryBuilder.class); + /** + * es 句子分隔符 + */ + public static final String SEPARATOR_SENTENCE = "sentenceforbfd"; + /** + * es返 段落分隔符 + */ + public static final String SEPARATOR_PARAGRAPH = "paragraphforbfd"; + @Autowired private TopicQueryService topicQueryService; + /** + * 2023-04-23 新的查询语句组装 + * 传入参数有变化 + * + * @param queryRequest + * @return + */ + public BoolQueryBuilder getQueryBuilderNew(QueryRequest queryRequest) { + logger.info("[GetQueryBuilder] getQueryBuilder..."); + BoolQueryBuilder qb = QueryBuilders.boolQuery(); + + // 基础查询:根据查询条件组装查询语句 + BoolQueryBuilder boolQueryBuilder = topicQueryService.queryByConditions_v1(queryRequest); + + // 如果要根据ID 查询数据 如果查ID 的,后面的条件就不用查了。 + if (null != queryRequest.getDataIds() && !("").equals(queryRequest.getDataIds())) { + + String dataIds = queryRequest.getDataIds(); + List dataIdList = getDataIdList(dataIds); + QueryBuilder queryBuilder = QueryBuilders.termsQuery(ESConstant.DATA_ID, dataIdList); + boolQueryBuilder = QueryBuilders.boolQuery().filter(queryBuilder); + + // 如果有 任务ID就有,没有就没有啊! + if (null != queryRequest.getTaskIds()) { + List taskIds = queryRequest.getTaskIds(); + if (taskIds.size() > 0) { + boolQueryBuilder = boolQueryBuilder.must(QueryBuilders.termQuery("taskId", taskIds.get(0))); + } + } + + qb.must(boolQueryBuilder); + return qb; + } + + Integer searchType = queryRequest.getSearchType(); // 单选 0:主贴;1:评论;2:用户 + BoolQueryBuilder searchTextBuilder = topicQueryService.buildSearchTextBuilder(searchType); + boolQueryBuilder.filter(searchTextBuilder); + + + /** + * 1、不考虑组合搜索,那higtLevelQueries里只有一个元素么? + * 是的 + * 还有text截图看是数组[]的,咱也是这样么? + * 是的 + * 几个属性的含义: + * id 是顺序 + * expression 是关系,1=并且(默认第一个词为1) + * 2=或者 + * 3=排除 + * 当只有一个关键词的时候就只能是 且,只有超过1个以上的关键词才会有组合关系, + * 然后根据组合关系进行语句的组装 + * 1,2,2,2 + * must[a should[b,c,d]] + * 1,3,3,2 + * must[a,should d]mustnot[b,c] + * 2、选中导出还是传多个dataIds ,但是现在如果先按上述条件查询后导出全部,还是得按上面的条件筛选,因此 导出和查询参数应该是一致的。 + * 3、导出到kafka的接口志成哪儿也要同步改,@Z z c 跟我的查询逻辑一致即可,这块代码可复用。 + * + * {"id":1,"expression":"1","text":["the"],"scope":"100"} + */ + + // 高级搜索自定义设置 + /** + * 高级查询语句组装说明: + * 1、先确认是否有排除词,只要有排除词,先排除再查询 + * 2、当只有一个词的时候,默认就是mast matchPhraseQuery 查询 + * 3、当有多个的时候就需要按顺序逻辑进行组装了 + * a & b | c + * mast(a should (b,c)) + * a | b & c + * mast(c shoule(a,b)) + */ + if (null != queryRequest.getHighLevelQueries()) { + List highLevelQueries = queryRequest.getHighLevelQueries(); + + // 1、找到所有的not进行非处理 + highLevelQueries.stream().filter(e -> SearchExpressionEnum.NOT.is(e.getExpression())).forEach(e -> { + qb.mustNot(this.getHighLevelQueryBuilder(e, true)); + }); + + // 2、循环处理剩下的不含not的,处理逻辑为:如果当前是and,则将tempHighLevel进行must处理,tempHighLevel中如果有多个则内部should处理 + List tempHighLevel = new ArrayList<>(highLevelQueries.size()); + highLevelQueries.stream().filter(e -> !SearchExpressionEnum.NOT.is(e.getExpression())).forEach(e -> { + // 如果是and 且 tempHighLevel不为空,则处理tempHighLevel(>1个做内部或操作)并清空 + if (SearchExpressionEnum.AND.is(e.getExpression()) && !tempHighLevel.isEmpty()) { + // 拼接条件 + BoolQueryBuilder tempQueryBuilder = QueryBuilders.boolQuery(); + tempHighLevel.forEach(temp -> tempQueryBuilder.should(this.getHighLevelQueryBuilder(temp, false))); + qb.must(tempQueryBuilder); + tempHighLevel.clear(); + } + // 将当前项加入临时队列 + tempHighLevel.add(e); + }); + + // 此处拼接tempHighLevel未处理的内容 + if (!tempHighLevel.isEmpty()) { + BoolQueryBuilder tempQueryBuilder = QueryBuilders.boolQuery(); + tempHighLevel.forEach(temp -> tempQueryBuilder.should(this.getHighLevelQueryBuilder(temp, false))); + qb.must(tempQueryBuilder); + } + + } +// +// // 高级搜索自定义设置 +// +// +// if (null == highLevelQueries || highLevelQueries.size() == 0) { +// System.out.println("木有传入关键词,直接返回查询语句"); +// } else if (highLevelQueries.size() == 1) { +// System.out.println("只有一个查询关键词,直接组装查询即可"); +// BoolQueryBuilder tempQueryBuilder = QueryBuilders.boolQuery(); +// Map query = highLevelQueries.get(0); +// List texts = (List) query.get("text"); +// System.out.println(texts.get(0)); +// MatchPhraseQueryBuilder titleQuery = QueryBuilders.matchPhraseQuery(ESConstant.TITLE, texts.get(0)).slop(0); +// MatchPhraseQueryBuilder contentQuery = QueryBuilders.matchPhraseQuery(ESConstant.CONTENT, texts.get(0)).slop(0); +// QueryBuilder queryBuilder = QueryBuilders.boolQuery().should(titleQuery).should(contentQuery); +// qb.must(queryBuilder); +// } else { +// System.out.println("多个组装查询,需要拆分啦"); +// // 1、找到所有的 not 进行非处理 +// // expression = 1,2,3(与 或 非) +// for (Map query : highLevelQueries) { +// int id = Integer.valueOf(query.get("id").toString()); +// String expression = query.get("expression").toString(); +// List texts = (List) query.get("text"); +// +// if (expression.equals("2")) { +// String text = texts.get(0); +// QueryBuilder contentMustNotQueryBuilder = QueryBuilders.matchPhraseQuery(ESConstant.CONTENT, text); +// QueryBuilder titleMustNotQueryBuilder = QueryBuilders.matchPhraseQuery(ESConstant.TITLE, text); +// qb.mustNot(contentMustNotQueryBuilder); +// qb.mustNot(titleMustNotQueryBuilder); +// } else { +// List tempHighLevel = new ArrayList<>(highLevelQueries.size()); +// BoolQueryBuilder tempQueryBuilder = QueryBuilders.boolQuery(); +// tempHighLevel.forEach(temp -> tempQueryBuilder.should(this.getHighLevelQueryBuilder(temp, false))); +// qb.must(tempQueryBuilder); +// } +// } +// return qb; +// +//// // 1、找到所有的not进行非处理 +//// query.getHighLevelQueries().stream().filter(e -> SearchExpressionEnum.NOT.is(e.getExpression())).forEach(e -> { +//// queryBuilder.mustNot(this.getHighLevelQueryBuilder(e, true)); +//// }); +//// // 2、循环处理剩下的不含not的,处理逻辑为:如果当前是and,则将tempHighLevel进行must处理,tempHighLevel中如果有多个则内部should处理 +//// List tempHighLevel = new ArrayList<>(query.getHighLevelQueries().size()); +//// queryRequest.getHighLevelQueries().stream().filter(e -> !SearchExpressionEnum.NOT.is(e.getExpression())).forEach(e -> { +//// // 如果是and 且 tempHighLevel不为空,则处理tempHighLevel(>1个做内部或操作)并清空 +//// if (SearchExpressionEnum.AND.is(e.getExpression()) && !tempHighLevel.isEmpty()) { +//// // 拼接条件 +//// BoolQueryBuilder tempQueryBuilder = QueryBuilders.boolQuery(); +//// tempHighLevel.forEach(temp -> tempQueryBuilder.should(this.getHighLevelQueryBuilder(temp, false))); +//// queryBuilder.must(tempQueryBuilder); +//// tempHighLevel.clear(); +//// } +//// // 将当前项加入临时队列 +//// tempHighLevel.add(e); +//// }); +//// +//// // 此处拼接tempHighLevel未处理的内容 +//// if (!tempHighLevel.isEmpty()) { +//// BoolQueryBuilder tempQueryBuilder = QueryBuilders.boolQuery(); +//// tempHighLevel.forEach(temp -> tempQueryBuilder.should(this.getHighLevelQueryBuilder(temp, false))); +//// queryBuilder.must(tempQueryBuilder); +//// } +// } + + // System.out.println("**** " + keyword); +// String[] keywords; +// try { +// if (TStringUtils.isNotEmpty(keyword)) { +// if (keyword.contains(" ")) { +// keywords = keyword.split(" "); +// } else { +// keywords = new String[]{keyword}; +// } +// if (searchType == 0) { // 主贴的话 查 标题和内容 +// for (int i = 0; i < keywords.length; i++) { +// MatchPhraseQueryBuilder titleQuery = QueryBuilders.matchPhraseQuery(ESConstant.TITLE, keywords[i]).slop(0); +// MatchPhraseQueryBuilder contentQuery = QueryBuilders.matchPhraseQuery(ESConstant.CONTENT, keywords[i]).slop(0); +// QueryBuilder queryBuilder = QueryBuilders.boolQuery().should(titleQuery).should(contentQuery); +// qb.must(queryBuilder); +// } +// } else if (searchType == 1) { // 评论的话 查 评论内容 +//// MatchPhraseQueryBuilder contentQuery = QueryBuilders.matchPhraseQuery(ESConstant.CONTENT, keyword).slop(0); +//// QueryBuilder queryBuilder = QueryBuilders.boolQuery().must(contentQuery); +//// qb.must(queryBuilder); +// boolQueryBuilder.must(QueryBuilders.matchPhraseQuery(ESConstant.CONTENT, keyword).slop(0)); +// } else if (searchType == 2) { // 用户 就只查 用户名 +// boolQueryBuilder.must(QueryBuilders.queryStringQuery("*" + keyword + "*").field(ESConstant.AUTHOR)); +// } +// } +// } catch (Exception e) { +// e.printStackTrace(); +// } + qb.must(boolQueryBuilder); + return qb; + } + + /** + * 这个是导出的查询 + * + * @param queryRequest + * @return + */ public BoolQueryBuilder getQueryBuilder(QueryRequest queryRequest) { logger.info("[GetQueryBuilder] getQueryBuilder..."); BoolQueryBuilder qb = QueryBuilders.boolQuery(); @@ -35,12 +256,17 @@ public class GetQueryBuilder { // 如果要根据ID 查询数据 如果查ID 的,后面的条件就不用查了。 if (null != queryRequest.getDataIds() && !("").equals(queryRequest.getDataIds())) { - String taskId = queryRequest.getTaskId(); String dataIds = queryRequest.getDataIds(); List dataIdList = getDataIdList(dataIds); QueryBuilder queryBuilder = QueryBuilders.termsQuery(ESConstant.DATA_ID, dataIdList); - boolQueryBuilder = QueryBuilders.boolQuery().filter(queryBuilder) - .must(QueryBuilders.termQuery("taskId", taskId)); + boolQueryBuilder = QueryBuilders.boolQuery().filter(queryBuilder); + // 如果有 任务ID就有,没有就没有啊! + if (null != queryRequest.getTaskIds()) { + List taskIds = queryRequest.getTaskIds(); + if (taskIds.size() > 0) { + boolQueryBuilder = boolQueryBuilder.must(QueryBuilders.termsQuery("taskId", taskIds)); + } + } qb.must(boolQueryBuilder); return qb; } @@ -53,6 +279,7 @@ public class GetQueryBuilder { /** * 2023-03-06 * 添加一个查询功能,用空格分割,查询 并且的关系 + * */ // System.out.println("**** " + keyword); String[] keywords; @@ -85,7 +312,7 @@ public class GetQueryBuilder { for (int i = 0; i < keywords.length; i++) { MatchPhraseQueryBuilder titleQuery = QueryBuilders.matchPhraseQuery(ESConstant.TITLE, keywords[i]).slop(0); MatchPhraseQueryBuilder contentQuery = QueryBuilders.matchPhraseQuery(ESConstant.CONTENT, keywords[i]).slop(0); - QueryBuilder queryBuilder = QueryBuilders.boolQuery().should(titleQuery).should(contentQuery); + QueryBuilder queryBuilder = QueryBuilders.boolQuery().should(titleQuery).should(contentQuery); qb.must(queryBuilder); } } else if (searchType == 1) { // 评论的话 查 评论内容 @@ -260,6 +487,179 @@ public class GetQueryBuilder { // aggregationBuilder.subAggregation(aggregationBuilder1); return aggregationBuilder; + } + + /** + * 拼装高级搜索--针对高级搜索中的一行或一个框 + * + * @param highLevelQuery + * @return + */ + protected QueryBuilder getHighLevelQueryBuilder(HighLevelQuery highLevelQuery, boolean isNot) { + BoolQueryBuilder result = QueryBuilders.boolQuery(); + // 获取高级查询的字段 + //Map fieldMap = SearchScopeEnum.getFieldsByKey(highLevelQuery.getScope()); + // 默认就是查全文(标题 + 正文) + Map fieldMap = SearchScopeEnum.getFieldsByKey(100); + SearchScopeEnum searchScopeEnum = SearchScopeEnum.getEnumByKey(Objects.toString(highLevelQuery.getScope())); + String path = (null == searchScopeEnum ? null : searchScopeEnum.getPath()); + + // 同段搜索---跨度搜索 + if (SearchMatchTypeEnum.PARAGRAPH.equals(highLevelQuery.getMatchType())) { + fieldMap.forEach((k, v) -> { + this.addSpanQueryBuilder(result, k, highLevelQuery.getText(), SEPARATOR_PARAGRAPH); + this.addSpanQueryBuilder(result, k, highLevelQuery.getTranslateText(), SEPARATOR_PARAGRAPH); + }); + return result; + } + + // 同句搜索 + if (SearchMatchTypeEnum.SENTENCE.equals(highLevelQuery.getMatchType())) { + fieldMap.forEach((k, v) -> { + this.addSpanQueryBuilder(result, k, highLevelQuery.getText(), SEPARATOR_SENTENCE); + this.addSpanQueryBuilder(result, k, highLevelQuery.getTranslateText(), SEPARATOR_SENTENCE); + }); + return result; + } + + // 原文普通搜索 + QueryBuilder rawQueryBuilder = this.getMatchQueryBuilder(path, fieldMap, highLevelQuery.getText(), isNot, SearchWordStrategyEnum.getByKey(highLevelQuery.getWordStrategy())); + if (rawQueryBuilder != null) { + result.should(rawQueryBuilder); + } + // 如果译文不为空,则进行译文普通搜索 + if (highLevelQuery.getTranslateText() != null) { + QueryBuilder transQueryBuilder = this.getMatchQueryBuilder(path, fieldMap, highLevelQuery.getTranslateText(), isNot, SearchWordStrategyEnum.getByKey(highLevelQuery.getWordStrategy())); + if (transQueryBuilder != null) { + result.should(transQueryBuilder); + } + } + return result; + } + + /** + * 获取跨度搜索查询条件 + * + * @param field 字段名 + * @param text 内容 + * @param separator 分隔符 + * @return + */ + private void addSpanQueryBuilder(BoolQueryBuilder queryBuilder, String field, String[] text, String separator) { + + if (text == null || text.length == 0) { + return; + } + // 将所有原词按照空格拆分 + /* + String[] splitText = Stream.of(text) + .filter(StringUtils::isNotBlank) + .flatMap(e -> Stream.of(e.split(" "))) + .filter(StringUtils::isNotBlank) + .toArray(String[]::new); + */ + String[] splitText = text; + if (splitText == null || splitText.length == 0) { + return; + } + SpanNearQueryBuilder spanNearQueryBuilder = QueryBuilders + .spanNearQuery(QueryBuilders.spanTermQuery(field, splitText[0]), 250) + .inOrder(false); + Stream.of(splitText).skip(1).forEach(e -> spanNearQueryBuilder.addClause(QueryBuilders.spanTermQuery(field, e))); + SpanQueryBuilder exclude = QueryBuilders.spanTermQuery(field, separator); + SpanNotQueryBuilder spanNotQueryBuilder = QueryBuilders.spanNotQuery(spanNearQueryBuilder, exclude); + //跨度搜索 为了高亮显示 新增对于每个单次进行查询。 + BoolQueryBuilder query = new BoolQueryBuilder(); + // Stream.of(splitText).forEach(e -> query.must(QueryBuilders.matchPhraseQuery(BaseFieldEnum.content.name(), e))); + queryBuilder.should(QueryBuilders.boolQuery().must(spanNotQueryBuilder).must(query)); + } + + + /** + * 全文检索查询拼接----含词语策略 + * + * @param fieldMap 查询字段 + * @param text 文本 + * @param isNot 是否是排除 + * @param strategyEnum 搜索词策略 + * @return + */ + private QueryBuilder getMatchQueryBuilder(String nestedPath, + Map fieldMap, + String[] text, + boolean isNot, + SearchWordStrategyEnum strategyEnum) { + + if (text == null || text.length == 0) { + return null; + } + /* + // 将所有原词按照空格拆分 + String[] splitText = Stream.of(text) + .filter(StringUtils::isNotBlank) + .flatMap(e -> Stream.of(e.split(" "))) + .filter(StringUtils::isNotBlank) + .toArray(String[]::new); + */ + String[] splitText = text; + if (splitText == null || splitText.length == 0) { + return null; + } + + BoolQueryBuilder result = QueryBuilders.boolQuery(); + // 如果是非 或 指定完整匹配,则用短语,否则用最佳字段 + MultiMatchQueryBuilder.Type multiMatchType = isNot + || SearchWordStrategyEnum.WHOLE.equals(strategyEnum) ? MultiMatchQueryBuilder.Type.PHRASE_PREFIX : MultiMatchQueryBuilder.Type.BEST_FIELDS; + // 使用拆分后的词进行匹配----如果使用完整匹配则不进行拆分,否则按空格拆分 + Stream.of(SearchWordStrategyEnum.WHOLE.equals(strategyEnum) ? text : splitText).forEach((e) -> { + // 校验所有还是单个词 + QueryBuilder matchQuery = this.getMatchQueryBuilder(nestedPath, fieldMap, multiMatchType, e); + if (SearchWordStrategyEnum.ALL.equals(strategyEnum)) { + result.must(matchQuery); + } else { + result.should(matchQuery); + } + }); + return result; + } + + /** + * 全文检索查询拼接,不支持nested属性与非nested属性混合使用,并且nested属性必须归属相同path + * + * @param fieldMap 查询字段 + * @param type 查询类型 + * @param text 文本 + * @return + */ + private QueryBuilder getMatchQueryBuilder(String nestedPath, Map fieldMap, MultiMatchQueryBuilder.Type type, String text) { + // 拼装搜索 + QueryBuilder queryBuilder = QueryBuilders.multiMatchQuery(text) + .fields(fieldMap) + .type(type == null ? MultiMatchQueryBuilder.Type.BEST_FIELDS : type) + .maxExpansions(5) + .tieBreaker(0.3f) + /// 关闭高频词处理 + //.cutoffFrequency(0.01f) + .lenient(Boolean.TRUE) + .minimumShouldMatch("90%"); + + // 如果有path拼接nested并返回 + if (!Strings.isNullOrEmpty(nestedPath)) { + return this.nestedQuery(nestedPath, queryBuilder); + } + return queryBuilder; + } + + + /** + * 拼装nested条件 + * + * @param nested + * @param queryBuilder + * @return + */ + public QueryBuilder nestedQuery(String nested, QueryBuilder queryBuilder) { + return QueryBuilders.nestedQuery(nested, queryBuilder, ScoreMode.None); } } diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/service/es/HighLevelQuery.java b/cl_search_api/src/main/java/com/bfd/mf/common/service/es/HighLevelQuery.java new file mode 100644 index 0000000..795ddf3 --- /dev/null +++ b/cl_search_api/src/main/java/com/bfd/mf/common/service/es/HighLevelQuery.java @@ -0,0 +1,118 @@ +package com.bfd.mf.common.service.es; + +import com.bfd.mf.common.util.enums.SearchExpressionEnum; +import com.bfd.mf.common.util.enums.SearchMatchTypeEnum; +import com.bfd.mf.common.util.enums.SearchScopeEnum; +import com.bfd.mf.common.util.enums.SearchWordStrategyEnum; +import com.google.common.base.Strings; +import io.swagger.annotations.ApiModel; +import io.swagger.annotations.ApiModelProperty; +import org.apache.commons.lang.StringUtils; + +import java.io.Serializable; +import java.util.stream.Stream; + +/** + * 高级搜索条件 + * @author lihonghao + */ +@ApiModel(value = "HighLevelQuery") +public class HighLevelQuery implements Serializable { + + /** + * 表达式 + */ + @ApiModelProperty(value = "表达式 1-AND 2-OR 3-NOT") + private Integer expression = SearchExpressionEnum.AND.iKey(); + /** + * 搜索类型 + */ + @ApiModelProperty(value = "搜索范围 100-全文 200-标题 300-内容 400-关键词 500-同一段落 600-同一句子") + private Integer scope = SearchScopeEnum.ALL.iKey(); + /** + * 搜索词策略 + */ + @ApiModelProperty(value = "搜索词策略 1-包含以下全部字词 2-包含以下完整词句 3-包含以下任一字词, 默认 3") + private String wordStrategy = SearchWordStrategyEnum.ANY.getKey(); + /** + * 文本内容 + */ + @ApiModelProperty(value = "文本内容") + private String[] text; + /** + * 文本内容 + */ + @ApiModelProperty(value = "译后-文本内容") + private String[] translateText; + /** + * 跨语种 + */ + @ApiModelProperty(value = "跨语种, zh, en...") + private String language; + + /** + * 本条件是否可用 值不为空且如果是数组则数组至少有一个不为空 + * @return + */ + @ApiModelProperty(hidden = true) + public boolean isAvailable(){ + return text != null && !Stream.of(text).allMatch(Strings::isNullOrEmpty); + } + + /** + * 获取匹配类型 + * @return + */ + @ApiModelProperty(hidden = true) + public SearchMatchTypeEnum getMatchType() { + return SearchScopeEnum.getMatchTypeByKey(this.scope); + } + + public Integer getExpression() { + return expression; + } + + public void setExpression(Integer expression) { + this.expression = expression; + } + + public Integer getScope() { + return scope; + } + + public void setScope(Integer scope) { + this.scope = scope; + } + + public String[] getText() { + return text; + } + + public void setText(String[] text) { + this.text = text == null ? null : Stream.of(text).filter(StringUtils::isNotBlank).toArray(String[]::new); + } + + public String[] getTranslateText() { + return translateText; + } + + public void setTranslateText(String[] translateText) { + this.translateText = translateText == null ? null : Stream.of(translateText).filter(StringUtils::isNotBlank).toArray(String[]::new); + } + + public String getLanguage() { + return language; + } + + public void setLanguage(String language) { + this.language = language; + } + + public String getWordStrategy() { + return wordStrategy; + } + + public void setWordStrategy(String wordStrategy) { + this.wordStrategy = wordStrategy; + } +} diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/service/es/ParseSearchScopeService.java b/cl_search_api/src/main/java/com/bfd/mf/common/service/es/ParseSearchScopeService.java index 12b8798..a409a0f 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/common/service/es/ParseSearchScopeService.java +++ b/cl_search_api/src/main/java/com/bfd/mf/common/service/es/ParseSearchScopeService.java @@ -64,8 +64,8 @@ public class ParseSearchScopeService { .must(QueryBuilders.termQuery(ESConstant.PRIMARY,1)) .must(QueryBuilders.termQuery(ESConstant.DOC_TYPE,ESConstant.ITEM))); } else if(searchType == 2){ - searchScopeQuery = QueryBuilders.boolQuery().must(QueryBuilders.termQuery(ESConstant.PRIMARY, 2)) - .must(QueryBuilders.termsQuery(ESConstant.PAGETYPE,"userInfoPage","newsuser","videoAccount")); + searchScopeQuery = QueryBuilders.boolQuery().must(QueryBuilders.termQuery(ESConstant.PRIMARY, 2)); + // .must(QueryBuilders.termsQuery(ESConstant.PAGETYPE,"userInfoPage","newsuser","videoAccount")); } return searchScopeQuery; } diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/util/constants/ESConstant.java b/cl_search_api/src/main/java/com/bfd/mf/common/util/constants/ESConstant.java index 4ff284a..5b6b50c 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/common/util/constants/ESConstant.java +++ b/cl_search_api/src/main/java/com/bfd/mf/common/util/constants/ESConstant.java @@ -25,6 +25,9 @@ import java.util.Map; public class ESConstant { + public static final String HIGHLIGHTPRETAGS = ""; + public static final String HIGHLIGHTPOSTTAGS = ""; + public static final String MEDIA_AREA_KEY = "dict"; @@ -517,6 +520,11 @@ public class ESConstant { public static String FACEBOOK = "facebook"; public static String TWITTER = "twitter"; + + public static String SOCIAL_COMMENT = "socialComment"; + public static String SOCIAL_FOLLOW = "socialFollow"; + // public static String SOCIALCOMMENT = "socialComment"; + public static String CID = "cid"; public static String SITEID = "siteId"; public static String SITETYPE = "siteType"; @@ -840,8 +848,9 @@ public class ESConstant { , ESConstant.OPINIONS // 表情 , ESConstant.EXPRESSION - + // 来源 , ESConstant.SOURCE + // 情感,这个字段不用了 , ESConstant.SYS_SENTIMENT , ESConstant.CONTENT_TAG , ESConstant.EMOTION_ENTRY + ESConstant.DEFAULT_JOIN_SYMBOL + ESConstant.EMOTION_NAME diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/util/enums/BaseFieldEnum.java b/cl_search_api/src/main/java/com/bfd/mf/common/util/enums/BaseFieldEnum.java new file mode 100644 index 0000000..0310e71 --- /dev/null +++ b/cl_search_api/src/main/java/com/bfd/mf/common/util/enums/BaseFieldEnum.java @@ -0,0 +1,209 @@ +package com.bfd.mf.common.util.enums; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; + +/** + * 文档-网页-图片索引属性枚举 + * @author lihonghao + */ +public enum BaseFieldEnum { + /** + * 文件id + */ + id, + /** + * 文件md5值 + */ + md5, + /** + * 文件标题 + */ + title, + /** + * 摘要 + */ + summary, + /** + * 文档内容 + */ + content, + /** + * 数据来源 + */ + source, + /** + * 原文/译文 + */ + type, + /** + * 原文id + */ + original_id, + /** + *入库时间 + */ + create_time, + /** + * 文档语言 + */ + language, + /** + * 上传用户 + */ + upload_user, + /** + * 上传用户姓名 + */ + upload_user_name, + /** + * 是否删除 + */ + del, + /** + * 网站枚举 + */ + website, + /** + * 发布人 + */ + publisher, + /** + * 发布时间 + */ + public_time, + /** + * 网站版面 + */ + cate_md5, + /** + * 智能标签 + */ + ai_tag, + /** + * 智能地区 + */ + ai_area, + /** + * 主题一级分类 + */ + subject_classify1, + /** + * 主题二级分类 + */ + subject_classify2, + /** + * 主题 + */ + subject, + /** + * 渠道 + */ + channel, + /** + * 审核状态 + */ + audit_state, + /** + * 用户上传文档,归属部门 + */ + department_id, + /** + * 整编状态 + */ + edit_state, + + /** + * 敏感词 + */ + sensitive_tag, + /** + * 置顶状态 + */ + flag_top, + /** + * 置顶有效期 + */ + flag_top_validity, + /** + * 0-无 1-不重要 2-有点重要 3-一般、4-重要、5-非常重要 + */ + flag_importance, + /** + * 分类标签 + */ + subject_tag, + /** + * 事件id + */ + event_id, + /** + * 事件id + */ + event_detect_time, + /** + * 重复校验字段 + */ + duplicate_key, + /** + * 媒体类型 + */ + media_type, + /** + * 文中提及的标准时间 + */ + norm_time, + /** + * 时间间隔(天) + */ + delay_time, + /** + * 省名称 + */ + province_code, + /** + * 市名称 + */ + city_code, + /** + * 区县 + */ + county_code, + + ; + + /** + * 需要进行匹配的属性 + * @return + */ + public static Map getMatchFields(){ + Map matchMap = new HashMap<>(2); + matchMap.put(BaseFieldEnum.title.name(), 2.0F); + matchMap.put(BaseFieldEnum.content.name(), 1.0F); + return matchMap; + } + + /** + * 需要进行匹配的属性-含拼音 + * @return + */ + public static Map getMatchFieldsWithPy(){ + Map matchMap = getMatchFields(); + Map pyMap = new HashMap<>(matchMap.size() * 2); + matchMap.forEach((k, v) -> { + pyMap.put(k, v); + /// 系统中取消拼音搜索 + //pyMap.put(k.concat(EsBase.ES_PINYIN_SUFFIX), v/10); + }); + return pyMap; + } + + /** + * 获取全部属性名 + * @return + */ + public static String[] getAllFields(){ + return Arrays.stream(values()).map(BaseFieldEnum::name).toArray(String[]::new); + } +} diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/util/enums/DocumentFieldEnum.java b/cl_search_api/src/main/java/com/bfd/mf/common/util/enums/DocumentFieldEnum.java new file mode 100644 index 0000000..89d6546 --- /dev/null +++ b/cl_search_api/src/main/java/com/bfd/mf/common/util/enums/DocumentFieldEnum.java @@ -0,0 +1,15 @@ +package com.bfd.mf.common.util.enums; + +import javax.annotation.Resource; + +/** + * @Author dujing + * @Date 2023/4/23 10:34 + */ +public enum DocumentFieldEnum { + ; + + public static Resource title; + public static Resource content; + public static Resource ai_tag; +} diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/util/enums/SearchExpressionEnum.java b/cl_search_api/src/main/java/com/bfd/mf/common/util/enums/SearchExpressionEnum.java new file mode 100644 index 0000000..59651a3 --- /dev/null +++ b/cl_search_api/src/main/java/com/bfd/mf/common/util/enums/SearchExpressionEnum.java @@ -0,0 +1,138 @@ +package com.bfd.mf.common.util.enums; + +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; + +import java.util.Arrays; +import java.util.LinkedHashMap; +import java.util.List; + +/** + * 表达式枚举值 + * + * @author honghao.li + */ +public enum SearchExpressionEnum { + + /** + * 与 + */ + AND("1", ""), + /** + * 或 + */ + OR("2", ""), + /** + * 非 + */ + NOT("3", ""); + + /** + * 码值 + */ + private String key; + /** + * 国际化值 + */ + private String label; + + /** + * @param key 码值 + * @param label 列表展示值国际化编码 + */ + SearchExpressionEnum(String key, String label) { + this.key = key; + this.label = label; + } + + public String getKey() { + return key; + } + + /** + * 返回int型枚举 + * + * @return + */ + public int iKey() { + return Integer.parseInt(key); + } + + public String getLabel() { + return label; + } + + /** + * 获取当前枚举值的国际化 + * + * @return + */ +// public String getLabelI18n() { +// return LocaleI18nUtils.getMessage(label); +// } + + /** + * 判断是否是当前枚举 + * + * @param key + * @return + */ + public boolean is(Integer key) { + return Integer.valueOf(getKey()).equals(key); + } + + /** + * 校验是否支持此类型 + * + * @param key + * @return + */ + public static boolean isSupport(String key) { + return Arrays.stream(values()).anyMatch(e -> e.getKey().equals(key)); + } + + /** + * 获取全部枚举值的map + * + * @return + */ + public static LinkedHashMap getEnumMap() { + LinkedHashMap map = Maps.newLinkedHashMap(); + for (SearchExpressionEnum temp : values()) { + map.put(temp.getKey(), temp); + } + return map; + } + + /** + * 根据key获取国际化内容 + * + * @param key + * @return + */ +// public static String getLabelI18n(String key) { +// if (key == null) { +// return ""; +// } +// +// LinkedHashMap map = getEnumMap(); +// if (map.containsKey(key)) { +// return map.get(key).getLabelI18n(); +// } +// return key; +// } +// +// /** +// * 获取key value模式枚举值集合 +// * +// * @return +// */ +// public static List getEnumList() { +// List rList = Lists.newArrayList(); +// for (SearchExpressionEnum temp : values()) { +// rList.add(KeyValueDTO.parse(temp.getKey(), temp.getLabelI18n())); +// } +// return rList; +// } + +} diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/util/enums/SearchMatchTypeEnum.java b/cl_search_api/src/main/java/com/bfd/mf/common/util/enums/SearchMatchTypeEnum.java new file mode 100644 index 0000000..0ee8e8f --- /dev/null +++ b/cl_search_api/src/main/java/com/bfd/mf/common/util/enums/SearchMatchTypeEnum.java @@ -0,0 +1,160 @@ +package com.bfd.mf.common.util.enums; + +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; + +import java.util.Arrays; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.stream.Stream; + +/** + * 跨度枚举值 + * + * @author honghao.li + */ +public enum SearchMatchTypeEnum { + /** + * 分词 + */ + MATCH("1", ""), + /** + * 精确 + */ + ACCURATE("2", ""), + /** + * 模糊 + */ + FUZZY("3", ""), + /** + * 同段 + */ + PARAGRAPH("4", ""), + /** + * 同句 + */ + SENTENCE("5", ""), + /** + * 嵌套 + */ + NESTED("6", ""); + + /** + * 码值 + */ + private String key; + /** + * 国际化值 + */ + private String label; + + /** + * @param key 码值 + * @param label 列表展示值国际化编码 + */ + SearchMatchTypeEnum(String key, String label) { + this.key = key; + this.label = label; + } + + public String getKey() { + return key; + } + + /** + * 返回int型枚举 + * + * @return + */ + public int iKey() { + return Integer.parseInt(key); + } + + public String getLabel() { + return label; + } + + /** + * 获取当前枚举值的国际化 + * + * @return + */ +// public String getLabelI18n() { +// return LocaleI18nUtils.getMessage(label); +// } + + /** + * 判断是否是当前枚举 + * + * @param key + * @return + */ + public boolean is(Integer key) { + return Integer.valueOf(getKey()).equals(key); + } + + /** + * 校验是否支持此类型 + * + * @param key + * @return + */ + public static boolean isSupport(String key) { + return Arrays.stream(values()).anyMatch(e -> e.getKey().equals(key)); + } + + /** + * 校验是否支持此类型 + * + * @param tempEnum + * @return + */ + public static boolean isSpanType(SearchMatchTypeEnum tempEnum) { + return tempEnum != null && Stream.of(SearchMatchTypeEnum.PARAGRAPH, SearchMatchTypeEnum.SENTENCE).anyMatch(tempEnum::equals); + } + + /** + * 获取全部枚举值的map + * + * @return + */ + public static LinkedHashMap getEnumMap() { + LinkedHashMap map = Maps.newLinkedHashMap(); + for (SearchMatchTypeEnum temp : values()) { + map.put(temp.getKey(), temp); + } + return map; + } + + /** + * 根据key获取国际化内容 + * + * @param key + * @return + */ +// public static String getLabelI18n(String key) { +// if (key == null) { +// return ""; +// } +// +// LinkedHashMap map = getEnumMap(); +// if (map.containsKey(key)) { +// return map.get(key).getLabelI18n(); +// } +// return key; +// } + + /** + * 获取key value模式枚举值集合 + * + * @return + */ +// public static List getEnumList() { +// List rList = Lists.newArrayList(); +// for (SearchMatchTypeEnum temp : values()) { +// rList.add(KeyValueDTO.parse(temp.getKey(), temp.getLabelI18n())); +// } +// return rList; +// } + +} diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/util/enums/SearchScopeEnum.java b/cl_search_api/src/main/java/com/bfd/mf/common/util/enums/SearchScopeEnum.java new file mode 100644 index 0000000..c2f90b2 --- /dev/null +++ b/cl_search_api/src/main/java/com/bfd/mf/common/util/enums/SearchScopeEnum.java @@ -0,0 +1,218 @@ +package com.bfd.mf.common.util.enums; + +import com.bfd.mf.common.util.constants.ESConstant; +import com.google.common.collect.Maps; + +import java.util.*; + +/** + * 100-全文 200-标题 300-内容 400-关键词 500-同一段落 600-同一句子 + * 搜索范围 + * + * @author honghao.li + */ +public enum SearchScopeEnum { + + /** + * 全文 + */ + ALL("100", SearchMatchTypeEnum.MATCH) { + @Override + public Map getFieldMap() { + return new HashMap() {{ + put(ESConstant.TITLE, 1.0F); + put(ESConstant.CONTENT, 1.0F); + }}; + } + }, + /** + * 标题 + */ + TITLE("200", SearchMatchTypeEnum.MATCH) { + @Override + public Map getFieldMap() { + return new HashMap() {{ + put(DocumentFieldEnum.title.name(), 1.0F); + }}; + } + }, + /** + * 内容 + */ + CONTENT("300", SearchMatchTypeEnum.MATCH) { + @Override + public Map getFieldMap() { + return new HashMap() {{ + put(DocumentFieldEnum.content.name(), 1.0F); + }}; + } + }, + /** + * 关键词 + */ +// TAG("400", SearchMatchTypeEnum.NESTED) { +// @Override +// public Map getFieldMap() { +// return new HashMap() {{ +// put(DocumentFieldEnum.ai_tag.name().concat(Constants.SEPARATOR_POINT).concat(LabelWeightFieldEnum.label.name()), 1.0F); +// }}; +// } +// +// @Override +// public String getPath() { +// return this.getFieldMap().keySet().stream().findFirst().get().split("\\"+Constants.SEPARATOR_POINT)[0]; +// } +// }, + /** + * 内容同一段落 + */ +// PARAGRAPH("500", SearchMatchTypeEnum.PARAGRAPH) { +// @Override +// public Map getFieldMap() { +// return new HashMap() {{ +// put(HtmlFieldEnum.content.name(), 1.0F); +// }}; +// } +// }, + /** + * 内容同一句子 + */ + SENTENCE("600", SearchMatchTypeEnum.SENTENCE) { + @Override + public Map getFieldMap() { + return new HashMap() {{ + put(DocumentFieldEnum.content.name(), 1.0F); + }}; + } + }; + + /** + * 码值 + */ + private String key; + /** + * 匹配类型 + */ + private SearchMatchTypeEnum matchTypeEnum; + + /** + * @param key 码值 + */ + SearchScopeEnum(String key, SearchMatchTypeEnum matchTypeEnum) { + this.key = key; + this.matchTypeEnum = matchTypeEnum; + } + + public String getKey() { + return key; + } + + public String getPath() { + return null; + } + + public SearchMatchTypeEnum getMatchTypeEnum() { + return matchTypeEnum; + } + +// public Map getFieldMap() { +// return BaseFieldEnum.getMatchFieldsWithPy(); +// } + + /** + * 返回int型枚举 + * + * @return + */ + public int iKey() { + return Integer.parseInt(key); + } + + /** + * 判断是否是当前枚举 + * + * @param key + * @return + */ + public boolean is(Integer key) { + return Integer.valueOf(getKey()).equals(key); + } + + /** + * 校验是否支持此类型 + * + * @param key + * @return + */ + public static boolean isSupport(String key) { + return Arrays.stream(values()).anyMatch(e -> e.getKey().equals(key)); + } + + /** + * 根据key获取对应属性 + * + * @param key + * @return + */ + public static Map getFieldsByKey(Integer key) { + if (key == null) { + return new HashMap<>(0); + } + + String strKey = key.toString(); + Map map = getEnumMap(); + if (map.containsKey(strKey)) { + return map.get(strKey).getFieldMap(); + } + // 默认值 + return BaseFieldEnum.getMatchFields(); + } + + /** + * 根据key获取对应属性 + * + * @param key + * @return + */ + public static SearchScopeEnum getEnumByKey(String key) { + for (SearchScopeEnum value : SearchScopeEnum.values()) { + if (Objects.equals(key, value.getKey())) { + return value; + } + } + return null; + } + + /** + * 根据key获取对应属性 + * + * @param key + * @return + */ + public static SearchMatchTypeEnum getMatchTypeByKey(Integer key) { + if (key == null) { + return null; + } + String strKey = key.toString(); + Map map = getEnumMap(); + if (!map.containsKey(strKey)) { + return null; + } + return map.get(strKey).getMatchTypeEnum(); + } + + /** + * 获取全部枚举值的map + * + * @return + */ + public static LinkedHashMap getEnumMap() { + LinkedHashMap map = Maps.newLinkedHashMap(); + for (SearchScopeEnum temp : values()) { + map.put(temp.getKey(), temp); + } + return map; + } + + public abstract Map getFieldMap(); +} diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/util/enums/SearchWordStrategyEnum.java b/cl_search_api/src/main/java/com/bfd/mf/common/util/enums/SearchWordStrategyEnum.java new file mode 100644 index 0000000..0b52c12 --- /dev/null +++ b/cl_search_api/src/main/java/com/bfd/mf/common/util/enums/SearchWordStrategyEnum.java @@ -0,0 +1,71 @@ +package com.bfd.mf.common.util.enums; + +import com.google.common.collect.Maps; + +import java.util.LinkedHashMap; + +/** + * 搜索词策略 + * 包含以下全部字词 + * 包含以下完整词句 + * 包含以下任一字词 + * @author lihonghao + */ +public enum SearchWordStrategyEnum { + + /** + * 包含以下全部字词 + */ + ALL("1"), + /** + * 包含以下完整词句 + */ + WHOLE("2"), + /** + * 包含以下任一字词 + */ + ANY("3"); + + SearchWordStrategyEnum(String key) { + this.key = key; + } + + private String key; + + public String getKey() { + return key; + } + + /** + * 判断是否是当前枚举 + * + * @param key + * @return + */ + public boolean is(String key) { + return getKey().equals(key); + } + + /** + * 根据key获取枚举 + * + * @param key 扩展名 + * @return + */ + public static SearchWordStrategyEnum getByKey(String key) { + return SearchWordStrategyEnum.getEnumMap().get(key); + } + + /** + * 获取全部枚举值的map + * + * @return + */ + public static LinkedHashMap getEnumMap() { + LinkedHashMap map = Maps.newLinkedHashMap(); + for (SearchWordStrategyEnum temp : values()) { + map.put(temp.getKey(), temp); + } + return map; + } +} diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/util/es/EsUtils.java b/cl_search_api/src/main/java/com/bfd/mf/common/util/es/EsUtils.java index 36449b2..f53d8f6 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/common/util/es/EsUtils.java +++ b/cl_search_api/src/main/java/com/bfd/mf/common/util/es/EsUtils.java @@ -1,8 +1,12 @@ package com.bfd.mf.common.util.es; import com.alibaba.fastjson.JSONObject; +import com.bfd.mf.common.service.es.EsBaseParam; import com.bfd.mf.common.util.constants.ESConstant; +import com.bfd.mf.common.util.enums.BaseFieldEnum; +import com.google.common.base.Strings; import com.google.common.collect.Maps; +import org.apache.lucene.search.join.ScoreMode; import org.elasticsearch.action.admin.indices.alias.IndicesAliasesAction; import org.elasticsearch.action.admin.indices.alias.IndicesAliasesRequestBuilder; import org.elasticsearch.action.admin.indices.alias.IndicesAliasesResponse; @@ -19,11 +23,10 @@ import org.elasticsearch.action.update.UpdateResponse; import org.elasticsearch.client.Requests; import org.elasticsearch.client.transport.TransportClient; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.text.Text; import org.elasticsearch.common.transport.TransportAddress; import org.elasticsearch.common.unit.TimeValue; -import org.elasticsearch.index.query.BoolQueryBuilder; -import org.elasticsearch.index.query.QueryBuilder; -import org.elasticsearch.index.query.QueryBuilders; +import org.elasticsearch.index.query.*; import org.elasticsearch.index.reindex.*; import org.elasticsearch.script.Script; import org.elasticsearch.search.SearchHit; @@ -43,10 +46,9 @@ import org.slf4j.LoggerFactory; import org.springframework.util.Assert; import java.net.InetAddress; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.util.*; +import java.util.stream.Collectors; +import java.util.stream.Stream; public abstract class EsUtils { @@ -55,6 +57,7 @@ public abstract class EsUtils { private static final Map CLIENT_MAP = Maps.newHashMap(); private static final String DOCS = "docs"; + public static void registerCluster(String clusterName, String[] addresses) { System.setProperty("es.set.netty.runtime.available.processors", "false"); Assert.hasLength(clusterName, "Param clusterName must not be empty."); @@ -86,6 +89,7 @@ public abstract class EsUtils { String sortFlag, String orderFlag, Integer size, Integer from, Integer searchType) { + System.out.println("非高亮查询"); TransportClient client = getClient(clusterName); boolean options = true; boolean optionsf = false; @@ -94,14 +98,6 @@ public abstract class EsUtils { if (searchType == 0) { collapseBuilder = new CollapseBuilder(ESConstant.DOC_ID); } - //高亮显示 - HighlightBuilder highlightBuilder = new HighlightBuilder(); - highlightBuilder.field("title"); - highlightBuilder.field("content"); - - highlightBuilder.requireFieldMatch(false); //多个单词高亮的话,要把这个设置为trues - highlightBuilder.preTags(""); - highlightBuilder.postTags(""); // 查询 // from + size 的 分页 查询方式 @@ -111,12 +107,12 @@ public abstract class EsUtils { .setQuery(queryBuilder) .setCollapse(collapseBuilder) .setSize(size) - .setFrom(from) - .highlighter(highlightBuilder); + .setFrom(from); + + System.out.println(requestBuilder); SearchResponse searchResponse = requestBuilder.execute().actionGet(); - // List dataList = Lists.newLinkedList(); List dataList = new ArrayList<>(); if (searchResponse.getHits().totalHits > 0) { SearchHit[] hits = searchResponse.getHits().getHits(); @@ -127,19 +123,173 @@ public abstract class EsUtils { .replace("cl_major_", "") .replace("cl_subject_", "") .replace("cl_special_1.0_", "")); - Map highlight = hits[i].getHighlightFields(); - data.put("highlight",highlight); dataList.add(data); } } return dataList; } + + public static List queryWithHighlight(String clusterName, String[] index, + final QueryBuilder queryBuilder, + String sortFlag, String orderFlag, + Integer size, Integer from, + Integer searchType) { + System.out.println("高亮查询"); + EsBaseParam esBaseParam = new EsBaseParam(); + TransportClient client = getClient(clusterName); + boolean options = true; + boolean optionsf = false; + // 现在不同任务的同一条数据不做消重,因此同一个DOCID 的数据会有多条。因此只有查主贴的时候需要用DOCID 消重 + CollapseBuilder collapseBuilder = new CollapseBuilder(ESConstant.DATA_ID); + if (searchType == 0) { + collapseBuilder = new CollapseBuilder(ESConstant.DOC_ID); + } + + // 高亮自定义设置 +// query.getHighLevelQueries().forEach(e -> { +// Stream.of(e.getText()).forEach(text -> keywordMap.put(text, 10)); +// if (e.getTranslateText() != null) { +// Stream.of(e.getTranslateText()).forEach(text -> keywordMap.put(text, 10)); +// } +// }); + + + esBaseParam.setWithHighlight(true); + esBaseParam.setHighlightFields(new ArrayList<>(BaseFieldEnum.getMatchFieldsWithPy().keySet())); + Integer numOfFragments = 2; + HighlightBuilder highlightBuilder = new HighlightBuilder() + // match进行高亮 + .requireFieldMatch(true) + .order(HighlightBuilder.Order.SCORE) + //fragment 是指一段连续的文字。返回结果最多可以包含几段不连续的文字。默认是5。 + .numOfFragments(numOfFragments) + //一段 fragment 包含多少个字符。默认100。 +// .fragmentSize(Constants.MAX_R_LENGTH / numOfFragments) +// .noMatchSize(Constants.MAX_R_LENGTH) + .preTags(ESConstant.HIGHLIGHTPRETAGS) + .postTags(ESConstant.HIGHLIGHTPOSTTAGS); + BaseFieldEnum.getMatchFieldsWithPy().keySet().forEach(highlightBuilder::field); + + /* + * 高级搜索取消自定义高亮 + * 精确搜索进行短语高亮重定义 + * 否则进行关键词的高亮重定义 + */ +// if (!query.isHighLevel()) { +// DisMaxQueryBuilder highlightQuery = QueryBuilders.disMaxQuery(); +// if (query.isAccurateQuery()) { +// query.getAccurateList().stream().forEach(e -> { +// highlightQuery.add(this.getMatchQueryBuilder(BaseFieldEnum.getMatchFieldsWithPy(), +// MultiMatchQueryBuilder.Type.PHRASE_PREFIX, e)); +// }); +// } else { +// keywordMap.forEach((keyword, boost) -> { +// BaseFieldEnum.getMatchFieldsWithPy().forEach((field, baseBoost) -> { +// float realBoost = baseBoost * boost * 100; +// highlightQuery.add(QueryBuilders.termQuery(field, keyword).boost(realBoost * 2)); +// highlightQuery.add(QueryBuilders.matchPhraseQuery(field, keyword).boost(realBoost)); +// }); +// highlightQuery.add(this.getMatchQueryBuilder(BaseFieldEnum.getMatchFieldsWithPy(), MultiMatchQueryBuilder.Type.BEST_FIELDS, keyword).boost(0.5F)); +// +// }); +// // 如果有二次搜索,因二次搜索使用短语前缀,此处需要特殊处理 +// if (query.getSk() != null && !"".equals(query.getSk().trim())) { +// highlightQuery.add(this.getMatchQueryBuilder(BaseFieldEnum.getMatchFieldsWithPy(), MultiMatchQueryBuilder.Type.PHRASE_PREFIX, query.getSk())); +// } +// } +// highlightBuilder.highlightQuery(highlightQuery); +// } + esBaseParam.setHighlightBuilder(highlightBuilder); + +// //高亮显示 +// HighlightBuilder highlightBuilder = new HighlightBuilder(); +// //fragment 是指一段连续的文字。返回结果最多可以包含几段不连续的文字。默认是5。 +// highlightBuilder.numOfFragments(0); +// //一段 fragment 包含多少个字符。默认100。 +// highlightBuilder.fragmentSize(800000); +// +// highlightBuilder.field(ESConstant.TITLE); +// highlightBuilder.field(ESConstant.CONTENT); +// +// highlightBuilder.requireFieldMatch(false); //多个单词高亮的话,要把这个设置为trues +// highlightBuilder.preTags(ESConstant.HIGHLIGHTPRETAGS); +// highlightBuilder.postTags(ESConstant.HIGHLIGHTPOSTTAGS); + + // 查询 + // from + size 的 分页 查询方式 + SearchRequestBuilder requestBuilder = client.prepareSearch().setIndices(index) + .setIndicesOptions(IndicesOptions.fromOptions(options, options, options, optionsf)) + .addSort(sortFlag, orderFlag.equals(ESConstant.ASC) ? SortOrder.ASC : SortOrder.DESC) + .setQuery(queryBuilder) + .setCollapse(collapseBuilder) + .setSize(size) + .setFrom(from) + .highlighter(esBaseParam.getHighlightBuilder()); + + System.out.println(requestBuilder); + System.out.println("-----"); + + SearchResponse searchResponse = requestBuilder.execute().actionGet(); + // List dataList = Lists.newLinkedList(); + + List dataList = new ArrayList<>(); + if (searchResponse.getHits().totalHits > 0) { + for (SearchHit hit : searchResponse.getHits().getHits()) { + JSONObject data = new JSONObject(); + data.putAll(hit.getSourceAsMap()); + String fieldName[] = {ESConstant.CONTENT, ESConstant.TITLE}; + for (int i = 0; i < fieldName.length; i++) { + getHighlightResult(fieldName[i], hit, data); + } + dataList.add(data); + } + } + return dataList; + } + + private static void getHighlightResult(String fieldName, SearchHit hit, JSONObject data) { + if (hit.getHighlightFields().containsKey(fieldName)) { + HighlightField highlightField = hit.getHighlightFields().get(fieldName); + Text[] fragments = highlightField.fragments(); + String fragmentString = ""; + for (Text fragment : fragments) { + fragmentString += fragment; + } + data.put(fieldName, fragmentString); + } + +// for (SearchHit searchHit : searchHits) { +// esDTO = new EsDTO(searchHit.getId(), searchHit.sourceAsMap()); +// esDTO.setIndex(searchHit.getIndex()); +// esDTO.setType(searchHit.getType()); +// +// // 返回文档的高亮字段 +// if (params.isWithHighlight()) { +// Map highlightFields = searchHit.getHighlightFields(); +// if (highlightFields != null) { +// Map map = new HashMap<>(highlightFields.size()); +// highlightFields.forEach((k, v) -> { +// if (v != null && v.getFragments() != null) { +// map.put(k.replace(ES_KEYWORD_SUFFIX, ""), +// Arrays.asList(v.getFragments()).stream().filter(e -> e != null).map(Text::toString).collect(Collectors.joining(Constants.SEPARATOR_ELLIPSIS))); +// } +// }); +// esDTO.setHighlightData(map); +// } +// } +// pageResp.getList().add(esDTO); +// } + + + } + + public static List queryNew(String clusterName, String[] index, - final QueryBuilder queryBuilder, - String sortFlag, String orderFlag, - Integer size, Integer from, - Integer searchType) { + final QueryBuilder queryBuilder, + String sortFlag, String orderFlag, + Integer size, Integer from, + Integer searchType) { TransportClient client = getClient(clusterName); boolean options = true; boolean optionsf = false; @@ -178,6 +328,7 @@ public abstract class EsUtils { /** * 查询数据量 count + * * @param clusterName * @param index * @param queryBuilder @@ -235,18 +386,18 @@ public abstract class EsUtils { } public static Long queryTotalCountNew(String clusterName, String[] index, - QueryBuilder queryBuilder, - Integer searchType) { + QueryBuilder queryBuilder, + Integer searchType) { TransportClient client = getClient(clusterName); boolean options = true; boolean optionsf = false; // 现在不同任务的同一条数据不做消重,因此同一个DOCID 的数据会有多条。因此只有查主贴的时候需要用DOCID 消重 String count = "count"; - AggregationBuilder aggregation ; - if(searchType == 0) { + AggregationBuilder aggregation; + if (searchType == 0) { aggregation = AggregationBuilders.cardinality(count).field(ESConstant.DOC_ID); - }else{ + } else { aggregation = AggregationBuilders.cardinality(count).field(ESConstant.DATA_ID); } //searchSourceBuilder.aggregation(aggregation); @@ -256,13 +407,13 @@ public abstract class EsUtils { .setQuery(queryBuilder) .addAggregation(aggregation); - // System.out.println(requestBuilder); + // System.out.println(requestBuilder); Aggregations aggregations = requestBuilder.get().getAggregations(); Cardinality cardinality = aggregations.get(count); // System.out.println("1111 : " + cardinality.getValue()); // System.out.println("2222 : " + requestBuilder.get().getHits().totalHits); long resultCount = cardinality.getValue(); - if(searchType == 2){ + if (searchType == 2) { resultCount = requestBuilder.get().getHits().totalHits; } return resultCount; @@ -439,18 +590,21 @@ public abstract class EsUtils { } } - public static long reIndex(String clusterName, String indexList, String newIndex) { + public static long reIndex(String clusterName, String originalIndex, String currentIndex) { + // String clusterName, String originalIndex, String currentIndex, try { TransportClient client = getClient(clusterName); - System.out.println(indexList + " *** " + newIndex); + System.out.println(originalIndex + " to : " + currentIndex); ReindexRequestBuilder builder = ReindexAction.INSTANCE .newRequestBuilder(client) - .source(indexList) - .destination(newIndex); - String newAliex = newIndex.replace("cl_special_1.0_", "cl_major_"); + .source(originalIndex) + .destination(currentIndex); + String newAliex = currentIndex.replace("cl_special_1.0_", "cl_major_"); BulkByScrollResponse response = builder.get(); - IndicesAliasesRequestBuilder indicesBuilder = IndicesAliasesAction.INSTANCE.newRequestBuilder(client).addAlias(newIndex, newAliex); + IndicesAliasesRequestBuilder indicesBuilder = IndicesAliasesAction.INSTANCE.newRequestBuilder(client) + .addAlias(currentIndex, newAliex); + IndicesAliasesResponse IndicesResponse = indicesBuilder.get(); System.out.println("******* : " + response); System.out.println("##### : " + IndicesResponse); @@ -503,4 +657,104 @@ public abstract class EsUtils { e.printStackTrace(); } } + + public static long reIndexByTasks(String clusterName, + String originalIndex, + String currentIndex, + List tasks, + QueryBuilder queryBuilder) { + try { + TransportClient client = getClient(clusterName); + + System.out.println(originalIndex + " *** " + currentIndex); + ReindexRequestBuilder builder = ReindexAction.INSTANCE + .newRequestBuilder(client) + .source(originalIndex)// 来源索引 + .destination(currentIndex) // 目标索引 + .filter(queryBuilder) + .refresh(true); + // builder. + BulkByScrollResponse response = builder.get(); + // 添加别名,将cl_special_1.0_ 替换成 cl_major 别名 + String newAliex = currentIndex.replace("cl_special_1.0_", "cl_major_"); + IndicesAliasesRequestBuilder indicesBuilder = IndicesAliasesAction.INSTANCE + .newRequestBuilder(client) + .addAlias(currentIndex, newAliex); + IndicesAliasesResponse IndicesResponse = indicesBuilder.get(); + System.out.println("******* : " + response); + System.out.println("##### : " + IndicesResponse); + return response.getCreated(); + } catch (Exception e) { + e.printStackTrace(); + return 0; + } + } + + public static void delIndexByTasks(String clusterName, String indexName, String cid, List tasks) { + try { + TransportClient client = getClient(clusterName); + System.out.println("**** " + indexName); + cid = cid.toLowerCase(); + DeleteByQueryRequestBuilder builder = DeleteByQueryAction.INSTANCE.newRequestBuilder(client) + .filter(QueryBuilders.termQuery(ESConstant.EN_SOURCE, cid)) + .filter(QueryBuilders.termQuery(ESConstant.TASK_ID, tasks.get(0))) + .source(indexName); + BulkByScrollResponse response2 = builder.get(); + long deleted = response2.getDeleted(); + System.out.println(deleted); + } catch (Exception e) { + e.printStackTrace(); + } + } + + /** + * 全文检索查询拼接(非nested属性重载方法) + * + * @param fieldMap 查询字段 + * @param type 查询类型 + * @param text 文本 + * @return + */ + private QueryBuilder getMatchQueryBuilder(Map fieldMap, MultiMatchQueryBuilder.Type type, String text) { + return this.getMatchQueryBuilder(null, fieldMap, type, text); + } + + /** + * 全文检索查询拼接,不支持nested属性与非nested属性混合使用,并且nested属性必须归属相同path + * + * @param fieldMap 查询字段 + * @param type 查询类型 + * @param text 文本 + * @return + */ + private QueryBuilder getMatchQueryBuilder(String nestedPath, Map fieldMap, MultiMatchQueryBuilder.Type type, String text) { + // 拼装搜索 + QueryBuilder queryBuilder = QueryBuilders.multiMatchQuery(text) + .fields(fieldMap) + .type(type == null ? MultiMatchQueryBuilder.Type.BEST_FIELDS : type) + .maxExpansions(5) + .tieBreaker(0.3f) + /// 关闭高频词处理 + //.cutoffFrequency(0.01f) + .lenient(Boolean.TRUE) + .minimumShouldMatch("60%"); + + // 如果有path拼接nested并返回 + if (!Strings.isNullOrEmpty(nestedPath)) { + return this.nestedQuery(nestedPath, queryBuilder); + } + return queryBuilder; + } + + + /** + * 拼装nested条件 + * + * @param nested + * @param queryBuilder + * @return + */ + public QueryBuilder nestedQuery(String nested, QueryBuilder queryBuilder) { + return QueryBuilders.nestedQuery(nested, queryBuilder, ScoreMode.None); + } } diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/util/slice/SliceScrollUtil.java b/cl_search_api/src/main/java/com/bfd/mf/common/util/slice/SliceScrollUtil.java index 6f2f65d..5635250 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/common/util/slice/SliceScrollUtil.java +++ b/cl_search_api/src/main/java/com/bfd/mf/common/util/slice/SliceScrollUtil.java @@ -123,19 +123,14 @@ public class SliceScrollUtil { logger.info("[SliceScrollUtil] fetchResultSubjectCache ... 统计+词云 数据查询"); List jsonObjectList = new ArrayList<>(); long calculateStartTime = System.currentTimeMillis(); - Cluster cluster = null; + // Cluster cluster = null; List currentIndexList = null; - if (null != queryRequest.getSubjectId() && !("all").equals(queryRequest.getSubjectId())) { - logger.info("查询 【专题数据】 subjectId :{}", queryRequest.getSubjectId()); - //cluster = clusterService.findClusterByType(Cluster.CLUSTER_TYPE.special_cluster_type); + if(null != queryRequest.getSubjectId()){ currentIndexList = subjectQueryDataService.getIndexBySubjectIds(queryRequest.getSubjectId()); -// }else{ -// logger.info("[SliceScrollUtil] fetchResultSubjectCache : 查询 【全局数据】"); -// cluster = clusterService.findClusterByType(Cluster.CLUSTER_TYPE.special_cluster_type); -// currentIndexList = subjectQueryDataService.getIndexListByTimeRange(queryRequest.getStartTime(),queryRequest.getEndTime()); + }else{ + return jsonObjectList; } - //Long clusterId = cluster.getId(); Long clusterId = 4L; logger.info("[SliceScrollUtil] dataAnalysisCloud : queryDataList clusterId :{}; currentIndexList :{}", clusterId, currentIndexList.toString()); logger.info("==========进入数据分析Es and Cache,计算开始执行============"); diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/util/utility/CollectionUtils.java b/cl_search_api/src/main/java/com/bfd/mf/common/util/utility/CollectionUtils.java index eb93f7f..30bef9e 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/common/util/utility/CollectionUtils.java +++ b/cl_search_api/src/main/java/com/bfd/mf/common/util/utility/CollectionUtils.java @@ -63,10 +63,14 @@ public class CollectionUtils { int index = 0; for (Iterator it = list.iterator(); it.hasNext();) { Map.Entry entry = (Map.Entry) it.next(); - JSONObject json= new JSONObject(); - json.put("key",entry.getKey()); - json.put("value",entry.getValue()); - listResult.add(json); + if(!entry.getKey().equals("")) { + if(entry.getKey().toString().length() > 1){ + JSONObject json= new JSONObject(); + json.put("key", entry.getKey()); + json.put("value", entry.getValue()); + listResult.add(json); + } + } index++; if(index == topSize){ break; diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/web/repository/mysql/topic/TaskRepository.java b/cl_search_api/src/main/java/com/bfd/mf/common/web/repository/mysql/topic/TaskRepository.java index 8435faf..257d1ca 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/common/web/repository/mysql/topic/TaskRepository.java +++ b/cl_search_api/src/main/java/com/bfd/mf/common/web/repository/mysql/topic/TaskRepository.java @@ -36,6 +36,28 @@ public interface TaskRepository extends CrudRepository { void updateTaskCount(Long id, Long totalCount, Long todayCount); + /** + * 修改数据库中拖拽任务的状态 + * 有任务移动的时候: + * 移动中:任务的状态为6 + * 专题的状态为1 + * + * 移动完成:任务的状态为3 + * 专题的状态为0 + + */ + @Modifying + @Transactional(rollbackFor = Exception.class) + @Query(value = "update cl_task set crawl_status =?2 where id =?1 ", nativeQuery = true) + void updateTaskStatus(Long id, int crawlStatus); + + @Modifying + @Transactional(rollbackFor = Exception.class) + @Query(value = "update cl_subject set status =?2 where id =?1 ", nativeQuery = true) + void updateSubjectStatus(Long id, int status); + + + // // /** diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/web/vo/params/QueryRequest.java b/cl_search_api/src/main/java/com/bfd/mf/common/web/vo/params/QueryRequest.java index ac25ae2..35e5bd1 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/common/web/vo/params/QueryRequest.java +++ b/cl_search_api/src/main/java/com/bfd/mf/common/web/vo/params/QueryRequest.java @@ -17,12 +17,15 @@ package com.bfd.mf.common.web.vo.params; +import com.bfd.mf.common.service.es.HighLevelQuery; import io.swagger.annotations.ApiModel; import io.swagger.annotations.ApiModelProperty; import java.io.Serializable; import java.util.ArrayList; import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; @ApiModel(value = "查询参数对象") public class QueryRequest implements Serializable { @@ -32,8 +35,9 @@ public class QueryRequest implements Serializable { // private Long subjectId; @ApiModelProperty(value = "subjectId" , required = true , notes = "专题ID") private String subjectId; - @ApiModelProperty(value = "taskId" , required = true , notes = "任务ID") - private String taskId; +// @ApiModelProperty(value = "taskId" , required = true , notes = "任务ID") +// private String taskId; + @ApiModelProperty(value = "userId",required = true) private Long userId; // 翻页字段 @@ -87,6 +91,91 @@ public class QueryRequest implements Serializable { private String valueLabel; private String categoryLabel; + private List tasks; + private String originalIndex; + private String currentIndex; + + private List moveTasks; + private List delTasks; + private List taskIds; + + private String pageType ; + private String userType; + + public String getUserType() { + return userType; + } + + public void setUserType(String userType) { + this.userType = userType; + } + + public String getPageType() { + return pageType; + } + + public void setPageType(String pageType) { + this.pageType = pageType; + } + + public List getMoveTasks() { + return moveTasks; + } + + public void setMoveTasks(List moveTasks) { + this.moveTasks = moveTasks; + } + + public List getDelTasks() { + return delTasks; + } + + public void setDelTasks(List delTasks) { + this.delTasks = delTasks; + } + + public List getTaskIds() { + return taskIds; + } + + public void setTaskIds(List taskIds) { + this.taskIds = taskIds; + } + + private List highLevelQueries; + public List getHighLevelQueries() { + return highLevelQueries; + } + + public void setHighLevelQueries(List highLevelQueries) { + this.highLevelQueries = highLevelQueries == null ? null : highLevelQueries.stream().filter(HighLevelQuery::isAvailable).collect(Collectors.toList()); + } + + + + public String getOriginalIndex() { + return originalIndex; + } + + public void setOriginalIndex(String originalIndex) { + this.originalIndex = originalIndex; + } + + public String getCurrentIndex() { + return currentIndex; + } + + public void setCurrentIndex(String currentIndex) { + this.currentIndex = currentIndex; + } + + public List getTasks() { + return tasks; + } + + public void setTasks(List tasks) { + this.tasks = tasks; + } public String getValueLabel() { return valueLabel; @@ -120,13 +209,13 @@ public class QueryRequest implements Serializable { this.subjectId = subjectId; } - public String getTaskId() { - return taskId; - } - - public void setTaskId(String taskId) { - this.taskId = taskId; - } +// public String getTaskId() { +// return taskId; +// } +// +// public void setTaskId(String taskId) { +// this.taskId = taskId; +// } public String getScrollId() { return scrollId; @@ -381,7 +470,7 @@ public class QueryRequest implements Serializable { public String toString() { return "QueryRequest{" + "subjectId='" + subjectId + '\'' + - ", taskId='" + taskId + '\'' + +// ", taskId='" + taskId + '\'' + ", userId=" + userId + ", page=" + page + ", limit=" + limit + diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/web/vo/view/monitor/ESMonitorBaseEntity.java b/cl_search_api/src/main/java/com/bfd/mf/common/web/vo/view/monitor/ESMonitorBaseEntity.java index 79c0cd0..11c3004 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/common/web/vo/view/monitor/ESMonitorBaseEntity.java +++ b/cl_search_api/src/main/java/com/bfd/mf/common/web/vo/view/monitor/ESMonitorBaseEntity.java @@ -147,6 +147,34 @@ public class ESMonitorBaseEntity implements Comparable, Ser private int hasVideo; private int hasFile; + private String attr; + private String attachTag; + private String brand; + + public String getBrand() { + return brand; + } + + public void setBrand(String brand) { + this.brand = brand; + } + + public String getAttachTag() { + return attachTag; + } + + public void setAttachTag(String attachTag) { + this.attachTag = attachTag; + } + + public String getAttr() { + return attr; + } + + public void setAttr(String attr) { + this.attr = attr; + } + public int getViewCnt() { return viewCnt; } diff --git a/cl_search_api/src/main/java/com/bfd/mf/controller/SearchDataController.java b/cl_search_api/src/main/java/com/bfd/mf/controller/SearchDataController.java index b566c11..6e33cd8 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/controller/SearchDataController.java +++ b/cl_search_api/src/main/java/com/bfd/mf/controller/SearchDataController.java @@ -21,8 +21,6 @@ import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RequestMethod; import org.springframework.web.bind.annotation.ResponseBody; -import java.util.ArrayList; - /** * @author dujing */ @@ -128,6 +126,11 @@ public class SearchDataController { /** * 查询一条数据对应的 评论列表 + * pageType = socialComment:评论 + * === + * pageType = socialFollow:转发和点赞 + * userType = 1:点赞用户 + * userType = 0:分享用户 */ @ResponseBody @ApiOperation(value = "查询评论列表") @@ -135,7 +138,48 @@ public class SearchDataController { public JSONObject getCommentsByDocId(@RequestBody QueryRequest queryRequest) { logger.info("[getCommentsByDocId] partial / Params: {}", JSONObject.toJSONString(queryRequest)); try { - JSONObject result = searchDataService.queryComments(queryRequest); + JSONObject result = searchDataService.queryCommentsNew(queryRequest); + return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result); + } catch (Exception e) { + logger.error("[getCommentsByDocId] Failed,The error message is :{}", e); + return ResponseWrapper.buildResponse(RTCodeEnum.C_SERVICE_NOT_AVAILABLE, "Query failed"); + } + + } + + /** + * 2023-04-26 需要将主贴中的评论,转发,点在拆分出来 + * https://caiji.percent.cn/api/sq/crawl/getCommentsByDocId + * https://caiji.percent.cn/api/sq/crawl/getQuotesByDocId + * https://caiji.percent.cn/api/sq/crawl/getAttitudesByDocId + * @param queryRequest + * @return + */ + @ResponseBody + @ApiOperation(value = "查询转发列表") + @RequestMapping(value = "/getQuotesByDocId", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8) + public JSONObject getQuotesByDocId(@RequestBody QueryRequest queryRequest) { + logger.info("[getQuotesByDocId] partial / Params: {}", JSONObject.toJSONString(queryRequest)); + try { + queryRequest.setPageType(ESConstant.SOCIAL_FOLLOW); + queryRequest.setUserType("0"); + JSONObject result = searchDataService.queryCommentsNew(queryRequest); + return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result); + } catch (Exception e) { + logger.error("[getCommentsByDocId] Failed,The error message is :{}", e); + return ResponseWrapper.buildResponse(RTCodeEnum.C_SERVICE_NOT_AVAILABLE, "Query failed"); + } + + } + @ResponseBody + @ApiOperation(value = "查询点赞列表") + @RequestMapping(value = "/getAttitudesByDocId", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8) + public JSONObject getAttitudesByDocId(@RequestBody QueryRequest queryRequest) { + logger.info("[getAttitudesByDocId] partial / Params: {}", JSONObject.toJSONString(queryRequest)); + try { + queryRequest.setPageType(ESConstant.SOCIAL_FOLLOW); + queryRequest.setUserType("1"); + JSONObject result = searchDataService.queryCommentsNew(queryRequest); return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result); } catch (Exception e) { logger.error("[getCommentsByDocId] Failed,The error message is :{}", e); @@ -188,26 +232,10 @@ public class SearchDataController { logger.error("[updateLabel] Failed,The error message is :{}", e); return ResponseWrapper.buildResponse(RTCodeEnum.C_SERVICE_NOT_AVAILABLE, "Query failed"); } - } - /** - * 示例文件夹的专题复制 - */ - @ApiOperation(value = "复制专题") - @RequestMapping(value = "/reindex/reindexSubject", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8) - @ResponseBody - public JSONObject reIndex(@RequestBody QueryRequest queryRequest){ - logger.info("[reIndex] partial / Params: {}", JSONObject.toJSONString(queryRequest)); - try { - searchDataService.reIndexSubject(queryRequest); - } catch (Exception e) { - logger.error("[reIndex] Failed,The error message is :{}", e); - return ResponseWrapper.buildResponse(RTCodeEnum.C_SERVICE_NOT_AVAILABLE, "Query failed"); - } - return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, new JSONObject()); - } + /** * 删除专题的接口 @@ -296,4 +324,73 @@ public class SearchDataController { } + /** + * 示例文件夹的专题复制 + */ + @ApiOperation(value = "复制专题") + @RequestMapping(value = "/reindex/reindexSubject", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8) + @ResponseBody + public JSONObject reIndex(@RequestBody QueryRequest queryRequest){ + logger.info("[reIndex] partial / Params: {}", JSONObject.toJSONString(queryRequest)); + try { + searchDataService.reIndexSubject(queryRequest); + } catch (Exception e) { + logger.error("[reIndex] Failed,The error message is :{}", e); + return ResponseWrapper.buildResponse(RTCodeEnum.C_SERVICE_NOT_AVAILABLE, "Query failed"); + } + return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, new JSONObject()); + } + /** + * 2023-04-14 采集平台2.0新增接口 + * 移动任务的数据,并将原索引中的数据删除 + * POST + * crawl/subject/moveByTasks + * 参数 + * {"originalIndex":"302088","moveTasks":["13889"],"currentIndex":"309980"} + * @param queryRequest + * @return + */ + @ApiOperation(value = "查询数据列表") + @RequestMapping(value = "/subject/moveByTasks", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8) + @ResponseBody + public JSONObject reindexDataByTasks(@RequestBody QueryRequest queryRequest) { + logger.info("[queryDataList] partial / Params: {}", JSONObject.toJSONString(queryRequest)); + try { + JSONObject result = new JSONObject(); + long start = System.currentTimeMillis(); + // 任务数据移动,这个需要离线移动 + result = searchDataService.reindexByTasks(queryRequest); + long end = System.currentTimeMillis(); + logger.info("接口查询时长:statr:"+ start +" ; end:"+end + " ; time = " + (end - start)); + return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result); + } catch (Exception e) { + logger.error("[queryData] Failed,The error message is :{}", e); + return ResponseWrapper.buildResponse(RTCodeEnum.C_SERVICE_NOT_AVAILABLE, "Query failed"); + } + + } + + + /** + * 任务数据删除 + * POST + * crawl/subject/deleteByTasks + * 参数 + * {"index":"302088","delTasks":["13889"]} + * @param queryRequest + * @return + */ + @ApiOperation(value = "查询数据列表") + @RequestMapping(value = "/subject/deleteByTasks", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8) + @ResponseBody + public JSONObject delDataByTasks(@RequestBody QueryRequest queryRequest) { + logger.info("[delDataByTasks] partial / Params: {}", JSONObject.toJSONString(queryRequest)); + try { + JSONObject result = searchDataService.deleteByTasks(queryRequest); + return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result); + } catch (Exception e) { + logger.error("[deleteSubject] Failed,The error message is :{}", e); + return ResponseWrapper.buildResponse(RTCodeEnum.C_SERVICE_NOT_AVAILABLE, "Query failed"); + } + } } diff --git a/cl_search_api/src/main/java/com/bfd/mf/service/SearchAnalysisService.java b/cl_search_api/src/main/java/com/bfd/mf/service/SearchAnalysisService.java index 27d24ce..bca2c05 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/service/SearchAnalysisService.java +++ b/cl_search_api/src/main/java/com/bfd/mf/service/SearchAnalysisService.java @@ -40,14 +40,16 @@ public class SearchAnalysisService { public JSONObject getAnalysisResponse(QueryRequest queryRequest) { JSONObject jsonObject = new JSONObject(); try{ - List esMonitorEntity = sliceScrollUtil.fetchResultSubjectCache(queryRequest, ESConstant.FIELD_LIST_ANALYSIS); - // 渠道走势 - jsonObject = dataAnalysisTrendByDayQueryTimes(queryRequest,esMonitorEntity); - // 获取 渠道统计结果 分类标签统计结果 价值标签统计结果 - jsonObject = dataAnalysisChannelCounts(jsonObject,esMonitorEntity); - // 获取词云 - JSONObject cloudCounts = searchKeywordsCouldService.dataAnalysisCloud(esMonitorEntity); - jsonObject.put("cloudCounts",cloudCounts); + if(null != queryRequest.getSubjectId()) { + List esMonitorEntity = sliceScrollUtil.fetchResultSubjectCache(queryRequest, ESConstant.FIELD_LIST_ANALYSIS); + // 渠道走势 + jsonObject = dataAnalysisTrendByDayQueryTimes(queryRequest, esMonitorEntity); + // 获取 渠道统计结果 分类标签统计结果 价值标签统计结果 + jsonObject = dataAnalysisChannelCounts(jsonObject, esMonitorEntity); + // 获取词云 + JSONObject cloudCounts = searchKeywordsCouldService.dataAnalysisCloud(esMonitorEntity); + jsonObject.put("cloudCounts", cloudCounts); + } }catch (Exception e){ e.printStackTrace(); } diff --git a/cl_search_api/src/main/java/com/bfd/mf/service/SearchAuthorService.java b/cl_search_api/src/main/java/com/bfd/mf/service/SearchAuthorService.java index 9bc7b67..e36f4c5 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/service/SearchAuthorService.java +++ b/cl_search_api/src/main/java/com/bfd/mf/service/SearchAuthorService.java @@ -146,6 +146,7 @@ public class SearchAuthorService extends CrudService()); @@ -154,22 +155,51 @@ public class SearchAuthorService extends CrudService dataList, List esMonitorListEntity,String indexName) throws Exception { - List> site = siteRepository.findsiteByDel(0); - Map siteIdsMap = new HashMap<>(); - Map siteIconMap = new HashMap<>(); - for (Map map: site) { - siteIdsMap.put(map.get(ESConstant.CID).toString().toLowerCase(),map.get("site_id").toString()); - siteIconMap.put(map.get(ESConstant.CID).toString().toLowerCase(),map.get("site_icon").toString()); + List> site = siteRepository.findsiteByDel(0); + Map> siteMap = new HashMap<>(); + for (Map map : site) { + siteMap.put(map.get(ESConstant.CID).toString().toLowerCase(), map); } + if(null != dataList && dataList.size() > 0) { for (JSONObject json : dataList) { - ESMonitorEntity mainMonitorEntity = parseMainMessage(json, indexName,siteIdsMap,siteIconMap); + ESMonitorEntity mainMonitorEntity = parseMainMessage(json, indexName,siteMap); esMonitorListEntity.add(mainMonitorEntity); } } } - private ESMonitorEntity parseMainMessage(JSONObject jsonObject,String indexName,Map siteIdsMap,Map siteIconMap) throws Exception { + private JSONObject getSite(JSONObject jsonObject, String enSource) { + List> site = siteRepository.findSiteByEnSource(enSource); + Map> siteMap = new HashMap<>(); + for (Map map : site) { + siteMap.put(map.get(ESConstant.CID).toString().toLowerCase(), map); + } + String siteId = ""; + String icon = ""; + String siteType = ""; + Map siteOtherMap = siteMap.get(enSource); + if (enSource.equals(ESConstant.SINA)) { + siteId = "183"; + } else { + if (siteOtherMap.containsKey("site_id")) { + siteId = siteMap.get(enSource).get("site_id").toString(); + } + if (siteOtherMap.containsKey("site_icon")) { + icon = siteMap.get(enSource).get("site_icon").toString(); + } + if (siteOtherMap.containsKey("site_type")) { + siteType = siteMap.get(enSource).get("site_type").toString(); + } + } + jsonObject.put(ESConstant.SITEID, siteId); + jsonObject.put(ESConstant.SITETYPE, siteType); + jsonObject.put(ESConstant.SITEICON, icon); + return jsonObject; + } + + private ESMonitorEntity parseMainMessage(JSONObject jsonObject,String indexName, + Map> siteMap) throws Exception { // logger.info("[SearchAuthorService] parseMainMessage ... "); Map sourceAsMap = jsonObject; @@ -182,12 +212,10 @@ public class SearchAuthorService extends CrudService siteOtherMap = siteMap.get(enSource); + if (siteOtherMap.containsKey("site_id")) { + siteId = siteMap.get(enSource).get("site_id").toString(); + } + if (siteOtherMap.containsKey("site_icon")) { + icon = siteMap.get(enSource).get("site_icon").toString(); + } + if (siteOtherMap.containsKey("site_type")) { + siteType = siteMap.get(enSource).get("site_type").toString(); + } + } + //JSONObject newjsonObject = getVideoPathList(jsonObject); //List> videoList = (List>) newjsonObject.get(ESConstant.VIDEOLIST); diff --git a/cl_search_api/src/main/java/com/bfd/mf/service/SearchDataService.java b/cl_search_api/src/main/java/com/bfd/mf/service/SearchDataService.java index 641e87b..1836559 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/service/SearchDataService.java +++ b/cl_search_api/src/main/java/com/bfd/mf/service/SearchDataService.java @@ -2,24 +2,26 @@ package com.bfd.mf.service; import com.alibaba.fastjson.JSONObject; import com.bfd.mf.common.service.common.CrudService; -import com.bfd.mf.common.service.es.*; +import com.bfd.mf.common.service.es.EsCommonService; +import com.bfd.mf.common.service.es.EsQueryServiceForSQMini; +import com.bfd.mf.common.service.es.EsQueryServiceForSQNormal; +import com.bfd.mf.common.service.es.SubjectQueryDataService; import com.bfd.mf.common.util.ESServerUtils; import com.bfd.mf.common.util.constants.ESConstant; import com.bfd.mf.common.web.entity.mysql.SentimentModify; import com.bfd.mf.common.web.entity.mysql.cache.Cluster; import com.bfd.mf.common.web.repository.mysql.SentimentRepository; import com.bfd.mf.common.web.repository.mysql.base.SiteRepository; +import com.bfd.mf.common.web.repository.mysql.topic.TaskRepository; import com.bfd.mf.common.web.vo.params.QueryRequest; import com.bfd.mf.common.web.vo.view.monitor.ESMonitorEntity; import com.bfd.mf.config.BFDApiConfig; import org.elasticsearch.action.search.SearchRequestBuilder; import org.elasticsearch.action.search.SearchResponse; -import org.elasticsearch.common.text.Text; import org.elasticsearch.index.query.BoolQueryBuilder; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.search.SearchHit; -import org.elasticsearch.search.fetch.subphase.highlight.HighlightField; import org.elasticsearch.search.sort.SortOrder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -28,7 +30,10 @@ import org.springframework.scheduling.annotation.Async; import org.springframework.stereotype.Service; import java.io.Serializable; -import java.util.*; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; @Service public class SearchDataService extends CrudService implements Serializable { @@ -48,6 +53,8 @@ public class SearchDataService extends CrudService dataList, List esMonitorListEntity, Integer searchType) { try { @@ -224,12 +232,12 @@ public class SearchDataService extends CrudService highlight; + if (sourceAsMap.containsKey("highlight")) { + System.out.println(JSONObject.toJSONString(sourceAsMap.get("highlight"))); + highlight = (Map) sourceAsMap.get("highlight"); + System.out.println(JSONObject.toJSONString(highlight)); + } + try { esMonitorEntity.setForwardContent(sourceAsMap.get("forwardContent").toString()); esMonitorEntity.setReadCount(readCount); @@ -542,15 +572,15 @@ public class SearchDataService extends CrudService currentIndexList = new ArrayList<>(); +// public JSONObject queryComments(QueryRequest queryRequest) { +// // 先确认一下 要查的主贴是属于 专题还是 全部数据,因此需要查 subjectId,如果没有 subjectId 这个字段说明要查的是 日期索引的ES +// Cluster cluster = null; +// List currentIndexList = new ArrayList<>(); +// String subjectId = queryRequest.getSubjectId(); +// if (null != queryRequest.getSubjectId() && !("").equals(subjectId)) { // 如果是专题,去专题的索引查就行 +// subjectId = bfdApiConfig.getIndexNamePre() + subjectId; +// currentIndexList.add(subjectId); +// } else { +// // 如果是全部数据,就直接去 渠道对应的索引查,渠道可以从 docId 中截取出来 +// // 这个以后不会用了 +// logger.info("[SearchDataService] queryComment: 查询 全局数据"); +// String docId = queryRequest.getDocId(); +// String indexType = "cl_index_" + docId.split("_")[1]; +// currentIndexList.add(indexType); +// } +// JSONObject result = getCommentListByDocId(queryRequest, currentIndexList); +// return result; +// } + + /** + * 2023-04-26 + * 根据 docId 查询 评论列表 + * + * @param queryRequest + * @return + */ + public JSONObject queryCommentsNew(QueryRequest queryRequest) { + List indexList = new ArrayList<>(); String subjectId = queryRequest.getSubjectId(); - if (null != queryRequest.getSubjectId() && !("").equals(subjectId)) { // 如果是专题,去专题的索引查就行 - subjectId =bfdApiConfig.getIndexNamePre() + subjectId; - currentIndexList.add(subjectId); - } else { // 如果是全部数据,就直接去 渠道对应的索引查,渠道可以从 docId 中截取出来 - logger.info("[SearchDataService] queryComment: 查询 全局数据"); - String docId = queryRequest.getDocId(); - String indexType = "cl_index_" + docId.split("_")[1]; - currentIndexList.add(indexType); - } - JSONObject result = getCommentListByDocId(queryRequest, currentIndexList); + subjectId = bfdApiConfig.getIndexNamePre() + subjectId; + indexList.add(subjectId); + JSONObject result = getCommentListByDocId(queryRequest, indexList); return result; } + /** + * 2023-04-26 + * 根据主贴ID查 评论,转发,点赞的数据列表 + * @param queryRequest + * @param currentIndexList + * @return + */ private JSONObject getCommentListByDocId(QueryRequest queryRequest, List currentIndexList) { JSONObject json = new JSONObject(); try { String docId = queryRequest.getDocId(); /**依据文档Id查询对应的文档*/ + BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery(); QueryBuilder queryBuilder = esCommonService.buildKeyWordsQueryBuilder(docId, ESConstant.DOC_ID); QueryBuilder queryBuilder1 = QueryBuilders.termQuery(ESConstant.PRIMARY, 0); - String docType = queryRequest.getDocId().split("_")[1]; -// if(docType.equals(ESConstant.ITEM)) { -// queryBuilder1 = QueryBuilders.termQuery(ESConstant.PRIMARY, 1); -// } - BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery() - .must(queryBuilder1) - .must(queryBuilder); + QueryBuilder queryBuilder3 = QueryBuilders.termQuery(ESConstant.PAGETYPE, ESConstant.SOCIAL_FOLLOW); + // pageType 为空的话说明就不是转发和点赞,因此pageType != socialFollow + + List comments = new ArrayList<>(); + Long size = 0L; + // 查询 转发 或者 点赞 + if (null != queryRequest.getPageType()) { + String userType = queryRequest.getUserType(); + System.out.println(queryRequest.getPageType() + " | " + userType); + // 包含这个字段就意味着查询的是转发或者点赞啊 + if (null != queryRequest.getUserType()) { + QueryBuilder queryBuilder2 = QueryBuilders.termQuery(ESConstant.USER_TYPE, userType); + boolQueryBuilder.must(queryBuilder2) + .must(queryBuilder3); + } + } else { + // 查询评论,首先 + boolQueryBuilder.mustNot(queryBuilder3); + + // 这个是从评论列表点击进去详情页面,如果是 转发和点赞,就先不要这个TOP了吧 + if (null != queryRequest.getDataId() && !queryRequest.getDataId().equals("")) { + String dataId = queryRequest.getDataId(); + JSONObject TopComment = getCommentByDataId(currentIndexList, dataId); + if (TopComment.size() > 0) { + size = 1L; + comments.add(TopComment); + } + boolQueryBuilder.mustNot(QueryBuilders.termQuery(ESConstant.DATA_ID, dataId)); + } + } + + +// +// String docType = queryRequest.getDocId().split("_")[1]; + boolQueryBuilder.must(queryBuilder1).must(queryBuilder); SortOrder flag; if (null != queryRequest.getOrder() && queryRequest.getOrder().equals(ESConstant.DESC)) { @@ -665,25 +752,13 @@ public class SearchDataService extends CrudService comments = new ArrayList<>(); - Long size = 0L; - if (null != queryRequest.getDataId() && !queryRequest.getDataId().equals("")) { - String dataId = queryRequest.getDataId(); -// String docType = queryRequest.getDocType(); - JSONObject TopComment = getCommentByDataId(currentIndexList, dataId, docType); - // System.out.println(TopComment); - if (TopComment.size() > 0) { - size = 1L; - comments.add(TopComment); - } - boolQueryBuilder.mustNot(QueryBuilders.termQuery(ESConstant.DATA_ID, dataId)); - } - String siteId = queryRequest.getSiteId(); long clusterId = 4; + + System.out.println(boolQueryBuilder); /**获取信息*/ SearchRequestBuilder builder = esServerUtils - .buildSearchRequestBuilder(clusterId,currentIndexList) + .buildSearchRequestBuilder(clusterId, currentIndexList) .setQuery(boolQueryBuilder) .setFrom(start) .setSize(limit) @@ -710,17 +785,22 @@ public class SearchDataService extends CrudService currentIndexList, String dataId, String docType) { + /** + * 2023-04-26 + * 这个方法是这样的,当从评论列表点击一条评论进入详情的时候,被点击的这条评论需要置顶。因此需要单独查出来放到TOP中 + * + * @param currentIndexList + * @param dataId + * @return + */ + + + private JSONObject getCommentByDataId(List currentIndexList, String dataId) { JSONObject jsonObject = new JSONObject(); long clusterId = 4L; try { BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery().must(QueryBuilders.termQuery(ESConstant.DATA_ID, dataId)); - // 由于电商的数据的primary =1 因此不加这个条件了, -// if(docType.equals(ESConstant.ITEM)){ -// boolQueryBuilder.must(QueryBuilders.termQuery(ESConstant.PRIMARY, 1)); -// }else { boolQueryBuilder.must(QueryBuilders.termQuery(ESConstant.PRIMARY, 0)); -// } SearchRequestBuilder builder = esServerUtils.buildSearchRequestBuilder(clusterId, currentIndexList) .setQuery(boolQueryBuilder) .setFetchSource(ESConstant.COMMENT_FIELD_DATA, null); @@ -740,6 +820,39 @@ public class SearchDataService extends CrudService currentIndexList, String dataId) { + JSONObject jsonObject = new JSONObject(); + long clusterId = 4L; + try { + BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery().must(QueryBuilders.termQuery(ESConstant.DATA_ID, dataId)); + boolQueryBuilder.must(QueryBuilders.termQuery(ESConstant.PRIMARY, 0)); + SearchRequestBuilder builder = esServerUtils.buildSearchRequestBuilder(clusterId, currentIndexList) + .setQuery(boolQueryBuilder) + .setFetchSource(ESConstant.COMMENT_FIELD_DATA, null); + + SearchResponse searchResponse = builder.execute().actionGet(); + SearchHit[] response = searchResponse.getHits().getHits(); + if (response.length > 0) { + Map result = response[0].getSourceAsMap(); + jsonObject.putAll(result); + } + + } catch (Exception e) { + e.printStackTrace(); + } + return jsonObject; + } + + + /** * 根据docId 查询一条数据的详情及评论列表 * * @param queryRequest @@ -759,7 +872,7 @@ public class SearchDataService extends CrudService currentIndexList = subjectQueryDataService.getIndexBySubjectIds( queryRequest.getSubjectId()); - //Long clusterId = cluster.getId(); - logger.info("[SearchDataService] queryDataFromFolder: currentIndexList : {}", currentIndexList.toString()); - // String indexName = currentIndexList.get(0); + List currentIndexList = subjectQueryDataService.getIndexBySubjectIds(queryRequest.getSubjectId()); + logger.info("[SearchDataService] queryDataFromFolder: currentIndexList : {}", currentIndexList.toString()); String indexNames[] = currentIndexList.toArray(new String[currentIndexList.size()]); - List dataList = esQueryServiceForSQMini.queryDataFromOneSubject(indexNames, queryRequest); + List dataList = esQueryServiceForSQMini.queryDataFromFolder(indexNames, queryRequest); List esMonitorEntityLists = new ArrayList<>(); Integer searchType = queryRequest.getSearchType(); parseQueryResult(dataList, esMonitorEntityLists, searchType); - Long totalCount = esQueryServiceForSQMini.queryDataCountFromOneSubject(indexNames, queryRequest); + Long totalCount = esQueryServiceForSQMini.queryDataCountFromFolder(indexNames, queryRequest); logger.info("[SearchDataService] queryDataFromFolder: {}", totalCount); jsonObject.put(ESConstant.ALLDOCNUMBER, totalCount); jsonObject.put(ESConstant.MONITORLISTS, esMonitorEntityLists); @@ -1084,12 +1193,12 @@ public class SearchDataService extends CrudService currentIndexList = subjectQueryDataService.getIndexBySubjectIds( queryRequest.getSubjectId()); - // Long clusterId = cluster.getId(); + List currentIndexList = subjectQueryDataService.getIndexBySubjectIds(queryRequest.getSubjectId()); + // Long clusterId = cluster.getId(); String[] indexName = currentIndexList.toArray(new String[currentIndexList.size()]); logger.info("[SearchDataService] exportDataFromFolder : IndexName :{} ; currentIndexList :{}", indexName[0], currentIndexList.toString()); // 开始查询 - jsonObject = esQueryServiceForSQMini.exportDataFromOneSubject(indexName, queryRequest); + jsonObject = esQueryServiceForSQMini.exportDataFromFolder(indexName, queryRequest); List dataList = (List) jsonObject.get(ESConstant.MONITORLISTS); List esMonitorEntityLists = new ArrayList<>(); Integer searchType = queryRequest.getSearchType(); @@ -1105,12 +1214,12 @@ public class SearchDataService extends CrudService currentIndexList = subjectQueryDataService.getIndexBySubjectIds( queryRequest.getSubjectId()); - // Long clusterId = cluster.getId(); + // Cluster cluster = clusterService.findClusterByType(Cluster.CLUSTER_TYPE.special_cluster_type); // 专题索引 special_cluster_type + List currentIndexList = subjectQueryDataService.getIndexBySubjectIds(queryRequest.getSubjectId()); + // Long clusterId = cluster.getId(); String[] indexName = currentIndexList.toArray(new String[currentIndexList.size()]); logger.info("[SearchDataService] exportDataInSubjectIndex : IndexName :{} ; currentIndexList :{}", indexName[0], currentIndexList.toString()); - jsonObject = esQueryServiceForSQMini.exportDataFromOneSubject(indexName, queryRequest); + jsonObject = esQueryServiceForSQMini.exportDataFromFolder(indexName, queryRequest); } catch (Exception e) { e.printStackTrace(); } @@ -1125,11 +1234,11 @@ public class SearchDataService extends CrudService tasks = queryRequest.getMoveTasks(); + // 将要移动的任务的状态置为 6,并将两个专题的状态都置为1 + for (Long taskId : tasks) { + taskRepository.updateTaskStatus(Long.valueOf(taskId), MOVE_TASKS_STATUS); + } + taskRepository.updateSubjectStatus(Long.valueOf(queryRequest.getOriginalIndex()), MOVE_FLORD_STATUS); + taskRepository.updateSubjectStatus(Long.valueOf(queryRequest.getCurrentIndex()), MOVE_FLORD_STATUS); + System.out.println("moveTaskId : " + tasks.get(0)); + try { + // 将上面专题的数据复制到新的专题下! + long created = esQueryServiceForSQMini.reIndexDataByTasks(originalIndex, currentIndex, tasks); + jsonObject.put("move", created); + // 这块是等移动完成后再修改的。所以应该在里面去调用 + //移动完成后需要将任务的状态改为 3(已完成)并将两个专题的状态都置为0 + } catch (Exception e) { + e.printStackTrace(); + } + return jsonObject; + } + + public JSONObject deleteByTasks(QueryRequest queryRequest) { + JSONObject jsonObject = new JSONObject(); + String indexName = bfdApiConfig.getIndexNamePre() + queryRequest.getSubjectId(); + String cid = queryRequest.getCid(); + List tasks = queryRequest.getDelTasks(); + System.out.println("del : " + tasks.get(0)); + + try { + //esQueryServiceForSQMini.deleteByTasks(indexName, cid, tasks); + } catch (Exception e) { + e.printStackTrace(); + } + return jsonObject; + } } diff --git a/cl_search_api/src/main/resources/application-113.yml b/cl_search_api/src/main/resources/application-113.yml deleted file mode 100644 index 53c40b1..0000000 --- a/cl_search_api/src/main/resources/application-113.yml +++ /dev/null @@ -1,61 +0,0 @@ -server: - port: 18909 - tomcat: - uri-encoding: UTF-8 - max-threads: 800 - maxHttpHeaderSize: 655360 - http2: - enabled: true - -spring: - datasource: - driver-class-name: com.mysql.jdbc.Driver - username: root - password: bfd123 - url: jdbc:mysql://172.26.11.113:3306/intelligent_crawl?useOldAliasMetadataBehavior=true&characterEncoding=UTF-8&zeroDateTimeBehavior=round - - hikari: - maximum-pool-size: 10 - minimum-idle: 1 - - jpa: - open-in-view: false - database: mysql - - servlet: - multipart: - max-file-size: 1009MB #单个数据大小 - max-request-size: 2048MB #总数据大小 - -####### -bfd.api.mf: - textPostUrl: http://rule.sq.baifendian.com/nerplace - emotionPostUrl : http://172.18.1.166:15038/bertsentiment - wordCloudPostUrl : http://rule.sq.baifendian.com/wordcloud - - goFastPostUrl : http://172.18.1.113:8080/upload - goFastDomain : http://172.18.1.113:8080 - uploadOLYExcelPath : /opt/nfsdata/excelTask/" - uploadZipPath : /opt/nfsdata/uploadFiles/ - indexNamePre : cl_major_ - -# es-mini: -# name: SQ_Mini -# address: 172.18.1.147:9313 -# upper: 2018-09-01 -# standby: cl_major_* -# es-normal: -# name: SQ_Normal_new -# address: 172.18.1.134:9301 -# upper: 2018-09-01 -# standby: cl_index_* - es-mini: - name: SQ_Mini - address: 172.26.11.111:9301 - upper: 2018-09-01 - standby: cl_major_ - es-normal: - name: SQ_Normal - address: 172.26.11.109:9301 - upper: 2018-09-01 - standby: cl_index_* \ No newline at end of file diff --git a/cl_search_api/src/main/resources/application-134.yml b/cl_search_api/src/main/resources/application-134.yml deleted file mode 100644 index 8fc57eb..0000000 --- a/cl_search_api/src/main/resources/application-134.yml +++ /dev/null @@ -1,61 +0,0 @@ -server: - port: 18909 - tomcat: - uri-encoding: UTF-8 - max-threads: 800 - maxHttpHeaderSize: 655360 - http2: - enabled: true - -spring: - datasource: - driver-class-name: com.mysql.jdbc.Driver - username: root - password: Bfd123!@# - url: jdbc:mysql://172.18.1.134:3306/intelligent_crawl?useOldAliasMetadataBehavior=true&characterEncoding=UTF-8&zeroDateTimeBehavior=round - - hikari: - maximum-pool-size: 10 - minimum-idle: 1 - - jpa: - open-in-view: false - database: mysql - - servlet: - multipart: - max-file-size: 1009MB #单个数据大小 - max-request-size: 2048MB #总数据大小 - -####### -bfd.api.mf: - textPostUrl: http://rule.sq.baifendian.com/nerplace - emotionPostUrl : http://172.18.1.166:15038/bertsentiment - wordCloudPostUrl : http://rule.sq.baifendian.com/wordcloud - - goFastPostUrl : http://172.18.1.113:8080/upload - goFastDomain : http://172.18.1.113:8080 - uploadOLYExcelPath : /opt/nfsdata/excelTask/ - uploadZipPath : /opt/nfsdata/uploadFiles/ - indexNamePre : cl_major_ - - es-mini: - name: SQ_Mini - address: 172.18.1.147:9313 - upper: 2018-09-01 - standby: cl_major_* - es-normal: - name: SQ_Normal_new - address: 172.18.1.134:9301 - upper: 2018-09-01 - standby: cl_index_* -# es-mini: -# name: SQ_Mini -# address: 172.26.11.111:9301 -# upper: 2018-09-01 -# standby: cl_major_ -# es-normal: -# name: SQ_Normal -# address: 172.26.11.109:9301 -# upper: 2018-09-01 -# standby: cl_index_* \ No newline at end of file diff --git a/cl_search_api/src/main/resources/application.yml b/cl_search_api/src/main/resources/application.yml index ab96bf1..6ad23d3 100644 --- a/cl_search_api/src/main/resources/application.yml +++ b/cl_search_api/src/main/resources/application.yml @@ -1,5 +1,5 @@ server: - port: 18902 + port: 18907 tomcat: uri-encoding: UTF-8 max-threads: 800 @@ -15,15 +15,15 @@ server: spring: datasource: driver-class-name: com.mysql.cj.jdbc.Driver - username: crawl - password: crawl123 - url: jdbc:mysql://172.18.1.134:3306/intelligent_crawl?useSSL=true&useUnicode=true&characterEncoding=UTF-8&serverTimezone=UTC +# username: crawl666 +# password: lx2a4jN1xFT96kj20LU= +# url: jdbc:mysql://172.18.1.134:3306/intelligent_crawl?useSSL=true&useUnicode=true&characterEncoding=UTF-8&serverTimezone=UTC # username: root # password: bfd123 # url: jdbc:mysql://172.26.11.113:3306/intelligent_crawl?useSSL=true&useUnicode=true&characterEncoding=UTF-8&serverTimezone=UTC -# username: crawl -# password: crawl123 -# url: jdbc:mysql://172.26.11.110:3306/intelligent_crawl?useSSL=true&useUnicode=true&characterEncoding=UTF-8&serverTimezone=UTC + username: crawl + password: crawl123 + url: jdbc:mysql://172.26.11.110:3306/intelligent_crawl?useSSL=true&useUnicode=true&characterEncoding=UTF-8&serverTimezone=UTC hikari: maximum-pool-size: 10