map = new HashMap<>(highlightFields.size());
+// highlightFields.forEach((k, v) -> {
+// if (v != null && v.getFragments() != null) {
+// map.put(k.replace(ES_KEYWORD_SUFFIX, ""),
+// Arrays.asList(v.getFragments()).stream().filter(e -> e != null).map(Text::toString).collect(Collectors.joining(Constants.SEPARATOR_ELLIPSIS)));
+// }
+// });
+// esDTO.setHighlightData(map);
+// }
+// }
+// pageResp.getList().add(esDTO);
+// }
+// pageResp.setTotal(count > EsClientConfig.scrollSize ? EsClientConfig.scrollSize : count);
+//
+// // 自定义结果处理
+// if (consumerResponse != null) {
+// consumerResponse.accept(response);
+// }
+// logger.debug("fetchPage,size = {}, total = {}", pageResp.getList().size(), pageResp.getTotal());
+// } catch (Exception e) {
+// throw new RuntimeException("call pageDataQuery exception ", e);
+// }
+// return pageResp;
+// }
+//
+//
+// /**
+// * 此方法中,通用字段名使用了BaseFieldEnum索引字段的名字,因各索引统一所以不会产生问题。
+// *
+// * 定制化分页查询,用于主检索
+// *
+// * @param query 查询条件
+// * @param customBuilder 自定义条件
+// * @param clazz 返回类型
+// * @param indexEnums 查询的索引
+// * @param
参数类型
+// * @param 结果类型
+// * @return
+// */
+// public SearchPageResp fetchCustomPage(P query, Consumer customBuilder, Class clazz, IndexEnum... indexEnums) {
+// EsBaseParam esBaseParam = new EsBaseParam();
+// esBaseParam.setIndex(Arrays.stream(indexEnums).map(IndexEnum::getSearchIndex).toArray(String[]::new));
+// esBaseParam.setExcludes(new String[]{BaseFieldEnum.content.name(), HtmlFieldEnum.forward_content.name()});
+// esBaseParam.setPage(query.getPage());
+// esBaseParam.setLimit(query.getLimit());
+// // 排序处理
+// int scoreOrder = 1, timeOrder = 2, asc = 1;
+// if (query.getOrderType() == timeOrder) {
+// esBaseParam.setOrderField(BaseFieldEnum.public_time.name());
+// esBaseParam.setDescOrAsc(query.getOrder() == asc ? SortOrder.ASC : SortOrder.DESC);
+// }
+//
+// // 拼装查询条件
+// BoolQueryBuilder queryBuilder = QueryBuilders.boolQuery();
+// // 过滤掉被删除的
+// queryBuilder.filter(QueryBuilders.termQuery(BaseFieldEnum.del.name(), Constants.NO));
+// // 自定义条件
+// if (customBuilder != null) {
+// customBuilder.accept(queryBuilder);
+// }
+// // 设置documentId查询范围
+// if (query.getDocumentId() != null && query.getDocumentId().length != 0) {
+// queryBuilder.filter(QueryBuilders.idsQuery().addIds(query.getDocumentId()));
+// }
+// // 标题条件
+// if (!Strings.isNullOrEmpty(query.getTitle())) {
+// queryBuilder.filter(QueryBuilders.wildcardQuery(this.keyword(BaseFieldEnum.title.name()), this.wildcardDelimiter(QueryParser.escape(query.getTitle()))));
+// }
+// // 来源条件
+// if (query.getSourceList() != null && !query.getSourceList().isEmpty()) {
+// queryBuilder.filter(QueryBuilders.termsQuery(BaseFieldEnum.source.name(), query.getSourceList()));
+// }
+// // 原文译文条件(满足原文条件时无该字段匹配)
+// if (query.getContentTypeList() != null && !query.getContentTypeList().isEmpty()) {
+// TermsQueryBuilder termsQueryBuilder = QueryBuilders.termsQuery(BaseFieldEnum.type.name(), query.getContentTypeList());
+// if (query.getContentTypeList().contains(ContentTypeEnum.ORIGINAL.getKey())){
+// BoolQueryBuilder contentTypeBoolQueryBuilder = QueryBuilders.boolQuery();
+// contentTypeBoolQueryBuilder.should(QueryBuilders.boolQuery().mustNot(QueryBuilders.existsQuery(BaseFieldEnum.type.name())));
+// contentTypeBoolQueryBuilder.should(termsQueryBuilder);
+// queryBuilder.filter(contentTypeBoolQueryBuilder);
+// }else {
+// queryBuilder.filter(termsQueryBuilder);
+// }
+// }
+// // 上传用户条件
+// if (query.getUploadUser() != null) {
+// queryBuilder.filter(QueryBuilders.termQuery(BaseFieldEnum.upload_user.name(), query.getUploadUser()));
+// }
+// //部门条件
+// if (String.valueOf(Constants.YES).equals(query.getOnlyDepartment()) && UserUtil.getUser().getDepartmentId() != null) {
+// queryBuilder.filter(QueryBuilders.termQuery(DocumentFieldEnum.department_id.name(), UserUtil.getUser().getDepartmentId()));
+// }
+// // 语言条件
+// if (query.getLanguageList() != null && !query.getLanguageList().isEmpty()) {
+// queryBuilder.filter(QueryBuilders.termsQuery(BaseFieldEnum.language.name(), query.getLanguageList()));
+// }
+//
+// // 关键词搜索自定义--带拼音
+// if (query.getK() != null && !"".equals(query.getK().trim())) {
+// // 默认短语
+// MultiMatchQueryBuilder.Type matchType = MultiMatchQueryBuilder.Type.BEST_FIELDS;
+// queryBuilder.must(this.getMatchQueryBuilder(BaseFieldEnum.getMatchFieldsWithPy(), matchType, query.getK()));
+// }
+//
+// // 精确搜索
+// if (query.isAccurateQuery()) {
+// // 暂时只考虑一对双引号的情况
+// query.getAccurateList().stream().forEach(e -> {
+// queryBuilder.must(this.getMatchQueryBuilder(BaseFieldEnum.getMatchFieldsWithPy(), MultiMatchQueryBuilder.Type.PHRASE_PREFIX, e));
+// });
+// }
+//
+// // 二次搜索--采用短语搜素--带拼音
+// if (query.getSk() != null && !"".equals(query.getSk().trim())) {
+// queryBuilder.must(this.getMatchQueryBuilder(BaseFieldEnum.getMatchFieldsWithPy(), MultiMatchQueryBuilder.Type.PHRASE_PREFIX, query.getSk()));
+// }
+//
+// // 排除搜索--采用短语搜素
+// if (query.getNk() != null && !"".equals(query.getNk().trim())) {
+// queryBuilder.mustNot(this.getMatchQueryBuilder(null,BaseFieldEnum.getMatchFields(), new String[]{query.getNk()}, true, SearchWordStrategyEnum.ANY));
+// }
+//
+// // 时间条件(满足条件或无该字段)
+// if (query.getBeginTime() != null || query.getEndTime() != null){
+// BoolQueryBuilder timeBoolQueryBuilder = QueryBuilders.boolQuery();
+// timeBoolQueryBuilder.should(QueryBuilders.boolQuery().mustNot(QueryBuilders.existsQuery(BaseFieldEnum.public_time.name())));
+// BoolQueryBuilder timeRangeBoolQueryBuilder = QueryBuilders.boolQuery();
+// if (query.getBeginTime() != null) {
+// timeRangeBoolQueryBuilder.filter(QueryBuilders.rangeQuery(BaseFieldEnum.public_time.name()).gte(query.getBeginTime()));
+// }
+// if (query.getEndTime() != null) {
+// timeRangeBoolQueryBuilder.filter(QueryBuilders.rangeQuery(BaseFieldEnum.public_time.name()).lte(query.getEndTime()));
+// }
+// timeBoolQueryBuilder.should(timeRangeBoolQueryBuilder);
+// queryBuilder.filter(timeBoolQueryBuilder);
+// }
+//
+//
+// // 标签搜索
+// if (query.getAiTagList() != null && !query.getAiTagList().isEmpty()) {
+// queryBuilder.filter(this.nestedTermQuery(BaseFieldEnum.ai_tag.name(), LabelWeightFieldEnum.label.name(), query.getAiTagList().stream().toArray(String[]::new)));
+// }
+// // 地区搜索
+// if (query.getAiAreaList() != null && !query.getAiAreaList().isEmpty()) {
+// queryBuilder.filter(this.nestedTermQuery(BaseFieldEnum.ai_area.name(), LabelWeightFieldEnum.label.name(), query.getAiAreaList().stream().toArray(String[]::new)));
+// }
+// // 主题一级分类
+// if (!Strings.isNullOrEmpty(query.getSubjectClassify1())) {
+// queryBuilder.filter(QueryBuilders.termsQuery(BaseFieldEnum.subject_classify1.name(), query.getSubjectClassify1()));
+// }
+// // 主题二级分类
+// if (!Strings.isNullOrEmpty(query.getSubjectClassify2())) {
+// queryBuilder.filter(QueryBuilders.termsQuery(BaseFieldEnum.subject_classify2.name(), query.getSubjectClassify2()));
+// }
+// // 主题(满足其他主题时无主题字段匹配)
+// if (query.getSubjectList() != null && !query.getSubjectList().isEmpty()) {
+// if (query.getSubjectList().contains(DefaultConstants.SUBJECT_DEFAULT)){
+// BoolQueryBuilder subjectBoolQueryBuilder = QueryBuilders.boolQuery();
+// subjectBoolQueryBuilder.should(QueryBuilders.boolQuery().mustNot(QueryBuilders.existsQuery(BaseFieldEnum.subject.name())));
+// subjectBoolQueryBuilder.should(QueryBuilders.termsQuery(BaseFieldEnum.subject.name(), query.getSubjectList()));
+// queryBuilder.filter(subjectBoolQueryBuilder);
+// }else {
+// queryBuilder.filter(QueryBuilders.termsQuery(BaseFieldEnum.subject.name(), query.getSubjectList()));
+// }
+// }
+// // 渠道条件
+// if (query.getChannelList() != null && !query.getChannelList().isEmpty()) {
+// queryBuilder.filter(QueryBuilders.termsQuery(BaseFieldEnum.channel.name(), query.getChannelList()));
+// }
+// // 网站
+// if (!Strings.isNullOrEmpty(query.getWebsite())) {
+// queryBuilder.filter(QueryBuilders.termQuery(BaseFieldEnum.website.name(), query.getWebsite()));
+// }
+// if (query.getWebsiteList() != null && !query.getWebsiteList().isEmpty()) {
+// // 如果渠道选择了用户上传,并且其他渠道选择了二级网站,则需兼容用户上传网站为空的结果
+// if (query.getChannelList() != null && query.getChannelList().contains(DefaultConstants.DEFAULT_CHANNEL_USER)){
+// BoolQueryBuilder subQuery = QueryBuilders.boolQuery();
+// subQuery.should(QueryBuilders.termQuery(BaseFieldEnum.channel.name(), DefaultConstants.DEFAULT_CHANNEL_USER));
+// subQuery.should(QueryBuilders.termsQuery(BaseFieldEnum.website.name(), query.getWebsiteList()));
+// queryBuilder.filter(subQuery);
+// } else {
+// queryBuilder.filter(QueryBuilders.termsQuery(BaseFieldEnum.website.name(), query.getWebsiteList()));
+// }
+// }
+// // 网站面包夹
+// if (query.getCateMd5List() != null && !query.getCateMd5List().isEmpty()) {
+// queryBuilder.filter(QueryBuilders.termsQuery(BaseFieldEnum.cate_md5.name(), query.getCateMd5List()));
+// }
+// // 审核状态(满足审核通过条件时无该字段匹配)
+// if (query.getAuditStateList() != null && !query.getAuditStateList().isEmpty()) {
+// if (query.getAuditStateList().contains(AuditStateEnum.YES.getKey())){
+// BoolQueryBuilder auditBoolQuery = QueryBuilders.boolQuery();
+// auditBoolQuery.should(QueryBuilders.termsQuery(BaseFieldEnum.audit_state.name(), query.getAuditStateList()));
+// auditBoolQuery.should(QueryBuilders.boolQuery().mustNot(QueryBuilders.existsQuery(BaseFieldEnum.audit_state.name())));
+// queryBuilder.filter(auditBoolQuery);
+// }else {
+// queryBuilder.filter(QueryBuilders.termsQuery(BaseFieldEnum.audit_state.name(), query.getAuditStateList()));
+// }
+// }
+// // 实体类型
+// if (query.getOntologyIdList() != null && !query.getOntologyIdList().isEmpty()){
+// queryBuilder.filter(QueryBuilders.termsQuery(KgSystemEnum.ontology_id.name(),query.getOntologyIdList()));
+// }
+//
+// // 高级搜索自定义设置
+// if (query.isHighLevel()) {
+// // 1、找到所有的not进行非处理
+// query.getHighLevelQueries().stream().filter(e -> SearchExpressionEnum.NOT.is(e.getExpression())).forEach(e -> {
+// queryBuilder.mustNot(this.getHighLevelQueryBuilder(e, true));
+// });
+//
+// // 2、循环处理剩下的不含not的,处理逻辑为:如果当前是and,则将tempHighLevel进行must处理,tempHighLevel中如果有多个则内部should处理
+// List tempHighLevel = new ArrayList<>(query.getHighLevelQueries().size());
+// query.getHighLevelQueries().stream().filter(e -> !SearchExpressionEnum.NOT.is(e.getExpression())).forEach(e -> {
+// // 如果是and 且 tempHighLevel不为空,则处理tempHighLevel(>1个做内部或操作)并清空
+// if (SearchExpressionEnum.AND.is(e.getExpression()) && !tempHighLevel.isEmpty()) {
+// // 拼接条件
+// BoolQueryBuilder tempQueryBuilder = QueryBuilders.boolQuery();
+// tempHighLevel.forEach(temp -> tempQueryBuilder.should(this.getHighLevelQueryBuilder(temp, false)));
+// queryBuilder.must(tempQueryBuilder);
+// tempHighLevel.clear();
+// }
+// // 将当前项加入临时队列
+// tempHighLevel.add(e);
+// });
+//
+// // 此处拼接tempHighLevel未处理的内容
+// if (!tempHighLevel.isEmpty()) {
+// BoolQueryBuilder tempQueryBuilder = QueryBuilders.boolQuery();
+// tempHighLevel.forEach(temp -> tempQueryBuilder.should(this.getHighLevelQueryBuilder(temp, false)));
+// queryBuilder.must(tempQueryBuilder);
+// }
+// }
+//
+// // 统计全部关键词, k=关键词 v=积分放大倍数
+// Map keywordMap = new HashMap<>(2);
+// if (!Strings.isNullOrEmpty(query.getK())) {
+// keywordMap.put(query.getK(), 10);
+// }
+// if (!Strings.isNullOrEmpty(query.getSk())) {
+// keywordMap.put(query.getSk(), 20);
+// }
+// if (query.isHighLevel()) {
+// query.getHighLevelQueries().forEach(e -> {
+// Stream.of(e.getText()).forEach(text -> keywordMap.put(text, 10));
+// if (e.getTranslateText() != null) {
+// Stream.of(e.getTranslateText()).forEach(text -> keywordMap.put(text, 10));
+// }
+// });
+// }
+//
+// // 高亮自定义设置
+// if (query.getHighlight() != null && query.getHighlight()) {
+// esBaseParam.setWithHighlight(true);
+// esBaseParam.setHighlightFields(new ArrayList<>(BaseFieldEnum.getMatchFieldsWithPy().keySet()));
+// Integer numOfFragments = 2;
+// HighlightBuilder highlightBuilder = new HighlightBuilder()
+// // match进行高亮
+// .requireFieldMatch(true)
+// .order(HighlightBuilder.Order.SCORE)
+// //fragment 是指一段连续的文字。返回结果最多可以包含几段不连续的文字。默认是5。
+// .numOfFragments(numOfFragments)
+// //一段 fragment 包含多少个字符。默认100。
+// .fragmentSize(Constants.MAX_R_LENGTH / numOfFragments)
+// .noMatchSize(Constants.MAX_R_LENGTH)
+// .preTags("")
+// .postTags("");
+// BaseFieldEnum.getMatchFieldsWithPy().keySet().forEach(highlightBuilder::field);
+//
+// /*
+// * 高级搜索取消自定义高亮
+// * 精确搜索进行短语高亮重定义
+// * 否则进行关键词的高亮重定义
+// */
+// if (!query.isHighLevel()) {
+// DisMaxQueryBuilder highlightQuery = QueryBuilders.disMaxQuery();
+// if (query.isAccurateQuery()) {
+// query.getAccurateList().stream().forEach(e -> {
+// highlightQuery.add(this.getMatchQueryBuilder(BaseFieldEnum.getMatchFieldsWithPy(), MultiMatchQueryBuilder.Type.PHRASE_PREFIX, e));
+// });
+// } else {
+// keywordMap.forEach((keyword, boost) -> {
+// BaseFieldEnum.getMatchFieldsWithPy().forEach((field, baseBoost) -> {
+// float realBoost = baseBoost * boost * 100;
+// highlightQuery.add(QueryBuilders.termQuery(field, keyword).boost(realBoost * 2));
+// highlightQuery.add(QueryBuilders.matchPhraseQuery(field, keyword).boost(realBoost));
+// });
+// highlightQuery.add(this.getMatchQueryBuilder(BaseFieldEnum.getMatchFieldsWithPy(), MultiMatchQueryBuilder.Type.BEST_FIELDS, keyword).boost(0.5F));
+//
+// });
+// // 如果有二次搜索,因二次搜索使用短语前缀,此处需要特殊处理
+// if (query.getSk() != null && !"".equals(query.getSk().trim())) {
+// highlightQuery.add(this.getMatchQueryBuilder(BaseFieldEnum.getMatchFieldsWithPy(), MultiMatchQueryBuilder.Type.PHRASE_PREFIX, query.getSk()));
+// }
+// }
+// highlightBuilder.highlightQuery(highlightQuery);
+// }
+// esBaseParam.setHighlightBuilder(highlightBuilder);
+// }
+//
+//
+//
+// // 评分重算条件
+// BoolQueryBuilder reScoreQueryBuilder = QueryBuilders.boolQuery();
+// if (!keywordMap.isEmpty()) {
+// BoolQueryBuilder phraseQueryBuilder = QueryBuilders.boolQuery();
+// keywordMap.forEach((keyword, boost) -> {
+// BaseFieldEnum.getMatchFieldsWithPy().forEach((field, baseBoost) -> {
+// phraseQueryBuilder.should(QueryBuilders.matchPhrasePrefixQuery(field, keyword).slop(2).maxExpansions(10).boost(baseBoost * boost));
+// });
+//
+// String paramKgSearchPriority = SettingEnum.PARAMS_KG_SEARCH_PRIORITY.getValue();
+// if (!Strings.isNullOrEmpty(paramKgSearchPriority)){
+// // 高优先级实体评分重算(Type指定PHRASE短语匹配;如果搜索词不匹配时,无损原重算逻辑)
+// Long[] ontologyIds = Arrays.stream(paramKgSearchPriority.split(Constants.SEPARATOR_COMMA)).map(Long::parseLong).toArray(Long[]::new);
+// List titleProperties = OntologyUtil.getTitleProperties(ontologyIds);
+// Map fieldMap = titleProperties.stream().collect(Collectors.toMap(AisKgPropertyEntity::getCode, aisKgPropertyEntity -> 300f, (o1, o2) -> o2));
+// phraseQueryBuilder.should(this.getMatchQueryBuilder(fieldMap,MultiMatchQueryBuilder.Type.PHRASE,keyword));
+// }
+// });
+// reScoreQueryBuilder.must(phraseQueryBuilder);
+// }
+// /*
+// // 相关度查询标记
+// boolean functionFlag = false;
+// FunctionScoreQueryBuilder functionQueryBuilder = null;
+// // 优化时间衰减函数查询
+// if (query.getOrderType() == scoreOrder) {
+// // 更改标记并构建时间衰减函数Query对象
+// functionFlag = true;
+// GaussDecayFunctionBuilder exp = ScoreFunctionBuilders.gaussDecayFunction(BaseFieldEnum.create_time.name(), System.currentTimeMillis(), 86400000, 86400000 * 30.0, 0.5);
+// functionQueryBuilder = QueryBuilders.functionScoreQuery(queryBuilder, exp).boostMode(CombineFunction.MULTIPLY);
+// }
+// */
+//
+// // 热门标签聚合名称
+// String aiTagLabelAgg = "aiTagLabelAgg";
+// String aiTagAgg = "aiTagAgg";
+// List hotTagList = new ArrayList<>();
+// PageResp pageList = this.fetchPage(queryBuilder, esBaseParam, (customRequestBuilder) -> {
+// // 高级/跨度/精确搜索时不增加评分重算
+// if (!query.isHighLevel() && !query.isSpanQuery() && !query.isAccurateQuery()) {
+// customRequestBuilder.addRescorer(RescoreBuilder.queryRescorer(reScoreQueryBuilder).setQueryWeight(0.7f).setRescoreQueryWeight(1.2f), 100);
+// }
+// // 聚合当前结果的热门标签
+// // 内层标签属性聚合
+// AggregationBuilder aiTagLabelAggBuilder = AggregationBuilders.terms(aiTagLabelAgg)
+// .field(this.nested(BaseFieldEnum.ai_tag.name(), LabelWeightFieldEnum.label.name()))
+// .order(Terms.Order.count(false))
+// .size(query.getLimit());
+// // nested主聚合
+// NestedAggregationBuilder aiTagAggBuilder = AggregationBuilders.nested(aiTagAgg, BaseFieldEnum.ai_tag.name()).subAggregation(aiTagLabelAggBuilder);
+// customRequestBuilder.addAggregation(aiTagAggBuilder);
+// }, (customResponse) -> {
+// // 处理热门标签, 取出聚合结果
+// Nested aiTagAggData = customResponse.getAggregations().get(aiTagAgg);
+// Terms aiTagLabelAggData = aiTagAggData.getAggregations().get(aiTagLabelAgg);
+// if (aiTagLabelAggData != null) {
+// for (Terms.Bucket bucket : aiTagLabelAggData.getBuckets()) {
+// hotTagList.add(new KeyValueDTO(bucket.getKeyAsString(), String.valueOf(bucket.getDocCount())));
+// }
+// }
+// });
+// // 拼装返回结果
+// SearchPageResp pageResp = new SearchPageResp<>(query.getLimit(), query.getPage());
+// pageResp.setTotalCount((int) pageList.getTotal());
+// pageResp.setList(pageList.getList().stream().map(e -> e.toHighlightEntity(clazz)).collect(Collectors.toList()));
+// pageResp.setHotAiTagList(hotTagList);
+// return pageResp;
+// }
+//
+// /**
+// * 拼装高级搜索--针对高级搜索中的一行或一个框
+// *
+// * @param highLevelQuery
+// * @return
+// */
+// protected QueryBuilder getHighLevelQueryBuilder(HighLevelQuery highLevelQuery, boolean isNot) {
+// BoolQueryBuilder result = QueryBuilders.boolQuery();
+// // 获取高级查询的字段
+// Map fieldMap = SearchScopeEnum.getFieldsByKey(highLevelQuery.getScope());
+// SearchScopeEnum searchScopeEnum = SearchScopeEnum.getEnumByKey(Objects.toString(highLevelQuery.getScope()));
+// String path = (null == searchScopeEnum ? null : searchScopeEnum.getPath());
+//
+// // 同段搜索---跨度搜索
+// if (SearchMatchTypeEnum.PARAGRAPH.equals(highLevelQuery.getMatchType())) {
+// fieldMap.forEach((k, v) -> {
+// this.addSpanQueryBuilder(result, k, highLevelQuery.getText(), SEPARATOR_PARAGRAPH);
+// this.addSpanQueryBuilder(result, k, highLevelQuery.getTranslateText(), SEPARATOR_PARAGRAPH);
+// });
+// return result;
+// }
+//
+// // 同句搜索
+// if (SearchMatchTypeEnum.SENTENCE.equals(highLevelQuery.getMatchType())) {
+// fieldMap.forEach((k, v) -> {
+// this.addSpanQueryBuilder(result, k, highLevelQuery.getText(), SEPARATOR_SENTENCE);
+// this.addSpanQueryBuilder(result, k, highLevelQuery.getTranslateText(), SEPARATOR_SENTENCE);
+// });
+// return result;
+// }
+//
+// // 原文普通搜索
+// QueryBuilder rawQueryBuilder = this.getMatchQueryBuilder(path,fieldMap, highLevelQuery.getText(), isNot, SearchWordStrategyEnum.getByKey(highLevelQuery.getWordStrategy()));
+// if(rawQueryBuilder != null){
+// result.should(rawQueryBuilder);
+// }
+//
+// // 如果译文不为空,则进行译文普通搜索
+// if (highLevelQuery.getTranslateText() != null) {
+// QueryBuilder transQueryBuilder = this.getMatchQueryBuilder(path,fieldMap, highLevelQuery.getTranslateText(), isNot, SearchWordStrategyEnum.getByKey(highLevelQuery.getWordStrategy()));
+// if (transQueryBuilder != null){
+// result.should(transQueryBuilder);
+// }
+// }
+// return result;
+// }
+//
+// /**
+// * 获取跨度搜索查询条件
+// *
+// * @param field 字段名
+// * @param text 内容
+// * @param separator 分隔符
+// * @return
+// */
+// private void addSpanQueryBuilder(BoolQueryBuilder queryBuilder, String field, String[] text, String separator) {
+//
+// if (text == null || text.length == 0) {
+// return;
+// }
+// // 将所有原词按照空格拆分
+// /*
+// String[] splitText = Stream.of(text)
+// .filter(StringUtils::isNotBlank)
+// .flatMap(e -> Stream.of(e.split(" ")))
+// .filter(StringUtils::isNotBlank)
+// .toArray(String[]::new);
+// */
+// String[] splitText = text;
+// if (splitText == null || splitText.length == 0) {
+// return;
+// }
+//
+// SpanNearQueryBuilder spanNearQueryBuilder = QueryBuilders.spanNearQuery(QueryBuilders.spanTermQuery(field, splitText[0]), 250).inOrder(false);
+// Stream.of(splitText).skip(1).forEach(e -> spanNearQueryBuilder.addClause(QueryBuilders.spanTermQuery(field, e)));
+// SpanQueryBuilder exclude = QueryBuilders.spanTermQuery(field, separator);
+// SpanNotQueryBuilder spanNotQueryBuilder = QueryBuilders.spanNotQuery(spanNearQueryBuilder, exclude);
+// //跨度搜索 为了高亮显示 新增对于每个单次进行查询。
+// BoolQueryBuilder query = new BoolQueryBuilder();
+// Stream.of(splitText).forEach(e -> query.must(QueryBuilders.matchPhraseQuery(BaseFieldEnum.content.name(), e)));
+// queryBuilder.should(QueryBuilders.boolQuery().must(spanNotQueryBuilder).must(query));
+// }
+//
+// /**
+// * 全文检索查询拼接----含词语策略
+// *
+// * @param fieldMap 查询字段
+// * @param text 文本
+// * @param isNot 是否是排除
+// * @param strategyEnum 搜索词策略
+// * @return
+// */
+// private QueryBuilder getMatchQueryBuilder(String nestedPath,Map fieldMap, String[] text, boolean isNot, SearchWordStrategyEnum strategyEnum) {
+//
+// if (text == null || text.length == 0) {
+// return null;
+// }
+// /*
+// // 将所有原词按照空格拆分
+// String[] splitText = Stream.of(text)
+// .filter(StringUtils::isNotBlank)
+// .flatMap(e -> Stream.of(e.split(" ")))
+// .filter(StringUtils::isNotBlank)
+// .toArray(String[]::new);
+// */
+// String [] splitText = text;
+// if (splitText == null || splitText.length == 0) {
+// return null;
+// }
+//
+// BoolQueryBuilder result = QueryBuilders.boolQuery();
+// // 如果是非 或 指定完整匹配,则用短语,否则用最佳字段
+// MultiMatchQueryBuilder.Type multiMatchType = isNot || SearchWordStrategyEnum.WHOLE.equals(strategyEnum) ? MultiMatchQueryBuilder.Type.PHRASE_PREFIX : MultiMatchQueryBuilder.Type.BEST_FIELDS;
+// // 使用拆分后的词进行匹配----如果使用完整匹配则不进行拆分,否则按空格拆分
+// Stream.of(SearchWordStrategyEnum.WHOLE.equals(strategyEnum) ? text : splitText).forEach((e) -> {
+// // 校验所有还是单个词
+// QueryBuilder matchQuery = this.getMatchQueryBuilder(nestedPath,fieldMap, multiMatchType, e);
+// if (SearchWordStrategyEnum.ALL.equals(strategyEnum)) {
+// result.must(matchQuery);
+// } else {
+// result.should(matchQuery);
+// }
+// });
+// return result;
+// }
+//
+//
+//
+// /**
+// * 全文检索查询拼接(非nested属性重载方法)
+// *
+// * @param fieldMap 查询字段
+// * @param type 查询类型
+// * @param text 文本
+// * @return
+// */
+// private QueryBuilder getMatchQueryBuilder(Map fieldMap, MultiMatchQueryBuilder.Type type, String text) {
+// return this.getMatchQueryBuilder(null,fieldMap,type,text);
+// }
+//
+// /**
+// * 全文检索查询拼接,不支持nested属性与非nested属性混合使用,并且nested属性必须归属相同path
+// *
+// * @param fieldMap 查询字段
+// * @param type 查询类型
+// * @param text 文本
+// * @return
+// */
+// private QueryBuilder getMatchQueryBuilder(String nestedPath,Map fieldMap, MultiMatchQueryBuilder.Type type, String text) {
+// // 拼装搜索
+// QueryBuilder queryBuilder = QueryBuilders.multiMatchQuery(text)
+// .fields(fieldMap)
+// .type(type == null ? MultiMatchQueryBuilder.Type.BEST_FIELDS : type)
+// .maxExpansions(5)
+// .tieBreaker(0.3f)
+// /// 关闭高频词处理
+// //.cutoffFrequency(0.01f)
+// .lenient(Boolean.TRUE)
+// .minimumShouldMatch("60%");
+//
+// // 如果有path拼接nested并返回
+// if (!Strings.isNullOrEmpty(nestedPath)){
+// return this.nestedQuery(nestedPath, queryBuilder);
+// }
+// return queryBuilder;
+// }
+//
+//}
diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/service/es/EsBaseParam.java b/cl_search_api/src/main/java/com/bfd/mf/common/service/es/EsBaseParam.java
new file mode 100644
index 0000000..31ee646
--- /dev/null
+++ b/cl_search_api/src/main/java/com/bfd/mf/common/service/es/EsBaseParam.java
@@ -0,0 +1,240 @@
+package com.bfd.mf.common.service.es;
+
+import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
+import org.elasticsearch.search.sort.SortOrder;
+
+import java.util.ArrayList;
+import java.util.LinkedHashMap;
+import java.util.List;
+
+/**
+ * es查询基类
+ * @author Aquarius & Hao
+ */
+public class EsBaseParam {
+
+ public EsBaseParam(){}
+
+ /**
+ * 构建
+ * @param indexEnum
+ */
+// public EsBaseParam(IndexEnum indexEnum){
+// this.index = new String[]{indexEnum.getSearchIndex()};
+// this.type = indexEnum.getType();
+// }
+
+ /**
+ * 构建
+ * @param index
+ * @param type
+ */
+ public EsBaseParam(String index, String type){
+ this.index = new String[]{index};
+ this.type = type;
+ }
+
+ /**
+ * 排序
+ */
+ private String orderField;
+ private SortOrder descOrAsc;
+
+ /**
+ * 多列排序
+ */
+ private LinkedHashMap lhashMap;
+
+ /**
+ * 当前页码
+ */
+ private Integer page;
+ /**
+ * 每页条数
+ */
+ private Integer limit;
+ /**
+ * 从第几条开始
+ */
+ private Integer offset;
+
+ /**
+ * ES的索引
+ */
+ private String[] index;
+ /**
+ * ES的type
+ */
+ private String type;
+ /**
+ * 分组的列
+ */
+ private String term;
+ /**
+ * 分组的列1
+ */
+ private String term1;
+
+ /**
+ * 指定返回的字段名,不指定返回全部
+ */
+ private String[] includes;
+
+ /**
+ * 排出返回的字段名,不指定不做限制
+ */
+ private String[] excludes;
+
+ /**
+ * 高亮属性
+ */
+ private HighlightBuilder highlightBuilder;
+
+ /**
+ * 是否需要高亮
+ */
+ private boolean withHighlight = false;
+ /**
+ * 高亮字段
+ */
+ private List highlightFields = new ArrayList<>();
+
+ public String[] getIncludes() {
+ return includes;
+ }
+
+ public void setIncludes(String[] includes) {
+ this.includes = includes;
+ }
+
+ public String[] getExcludes() {
+ return excludes;
+ }
+
+ public void setExcludes(String[] excludes) {
+ this.excludes = excludes;
+ }
+
+ public String getTerm1() {
+ return term1;
+ }
+
+ public void setTerm1(String term1) {
+ this.term1 = term1;
+ }
+
+ public String getOrderField() {
+ return orderField;
+ }
+
+ public void setOrderField(String orderField) {
+ this.orderField = orderField;
+ }
+
+ public String[] getIndex() {
+ return index;
+ }
+
+ public void setIndex(String... index) {
+ this.index = index;
+ }
+
+ public String getType() {
+ return type;
+ }
+
+ public void setType(String type) {
+ this.type = type;
+ }
+
+ public String getTerm() {
+ return term;
+ }
+
+ public void setTerm(String term) {
+ this.term = term;
+ }
+
+ public LinkedHashMap getLhashMap() {
+ return lhashMap;
+ }
+
+ public void setLhashMap(LinkedHashMap lhashMap) {
+ this.lhashMap = lhashMap;
+ }
+
+ public SortOrder getDescOrAsc() {
+ return descOrAsc;
+ }
+
+ public void setDescOrAsc(SortOrder descOrAsc) {
+ this.descOrAsc = descOrAsc;
+ }
+
+ public Integer getPage() {
+ return page;
+ }
+
+ public void setPage(Integer page) {
+ this.page = page;
+ }
+
+ public Integer getLimit() {
+ if (null == limit) {
+ limit = 0;
+ }
+ return limit;
+ }
+
+ public void setLimit(Integer limit) {
+ this.limit = limit;
+ }
+
+ public Integer getOffset() {
+ if (this.page != null && this.limit != null) {
+ if (this.page > 0 && this.limit > 0) {
+ offset = (this.page - 1) * this.limit;
+ } else {
+ offset = -1;
+ }
+ }
+ if (null == offset) {
+ offset = -1;
+ }
+ return offset;
+ }
+
+ public void setOffset(Integer offset) {
+ this.offset = offset;
+ }
+
+ public EsBaseParam orderBy(String orderField, SortOrder descOrAsc){
+ this.setOrderField(orderField);
+ this.setDescOrAsc(descOrAsc);
+ return this;
+ }
+
+ public boolean isWithHighlight() {
+ return withHighlight;
+ }
+
+ public void setWithHighlight(boolean withHighlight) {
+ this.withHighlight = withHighlight;
+ }
+
+ public List getHighlightFields() {
+ return highlightFields;
+ }
+
+ public void setHighlightFields(List highlightFields) {
+ this.highlightFields = highlightFields;
+ }
+
+ public HighlightBuilder getHighlightBuilder() {
+ return highlightBuilder;
+ }
+
+ public void setHighlightBuilder(HighlightBuilder highlightBuilder) {
+ this.highlightBuilder = highlightBuilder;
+ }
+}
diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/service/es/EsDTO.java b/cl_search_api/src/main/java/com/bfd/mf/common/service/es/EsDTO.java
new file mode 100644
index 0000000..363f7d5
--- /dev/null
+++ b/cl_search_api/src/main/java/com/bfd/mf/common/service/es/EsDTO.java
@@ -0,0 +1,277 @@
+//package com.bfd.mf.common.service.es;
+//
+//import cn.percent.common.constants.Constants;
+//import cn.percent.common.utils.BeanUtils;
+//import cn.percent.modules.ais.entity.es.BaseEsEntity;
+//import com.alibaba.fastjson.JSON;
+//import com.google.common.base.Strings;
+//import com.google.common.collect.Lists;
+//import org.apache.commons.lang.ObjectUtils;
+//import org.apache.commons.lang.StringUtils;
+//
+//import java.util.ArrayList;
+//import java.util.List;
+//import java.util.Map;
+//
+///**
+// * ElasticSearch返回值
+// *
+// * @author lihonghao
+// */
+//public class EsDTO {
+// public EsDTO() {
+// }
+//
+// public EsDTO(String docId, Map data) {
+// this.docId = docId;
+// this.data = data;
+// }
+//
+// public EsDTO(String docId, String index, Map data) {
+// this.docId = docId;
+// this.index = index;
+// this.data = data;
+// }
+//
+// /**
+// * 唯一主键
+// */
+// private String docId;
+// /**
+// * 索引名字
+// */
+// private String index;
+// /**
+// * 索引类型
+// */
+// private String type;
+//
+// /**
+// * 数据
+// */
+// private Map data;
+// /**
+// * 数据
+// */
+// private Map highlightData;
+//
+// /**
+// * 获取String值
+// *
+// * @param key
+// * @return
+// */
+// public String getString(Object key) {
+// return data == null ? "" : ObjectUtils.toString(data.get(key), "");
+// }
+//
+// /**
+// * 获取String值
+// *
+// * @param key
+// * @return
+// */
+// public String getString(Object key, String nullStr) {
+// return data == null ? "" : ObjectUtils.toString(data.get(key), nullStr);
+// }
+//
+// /**
+// * 地理坐标点用字符串形式表示时是纬度在前,经度在后(”latitude,longitude”),
+// * 而数组形式表示时刚好相反,是经度在前,纬度在后([longitude,latitude])。
+// * 其实,在 ElasticeSearch 内部,不管字符串形式还是数组形式,都是纬度在前,经度在后。
+// * 不过早期为了适配 GeoJSON 的格式规范,调整了数组形式的表示方式。
+// * 因此,在使用地理位置(geolocation)的路上就出现了这么一个“捕熊器”,专坑那些不了解这个陷阱的使用者。
+// *
+// * 获取String值
+// *
+// * @param key
+// * @return [lon, lat]
+// */
+// public Double[] getLocation(Object key) {
+// try {
+// if (data == null) {
+// return null;
+// }
+// Object location = data.get(key);
+// if (location == null) {
+// return null;
+// }
+//
+// if (location instanceof ArrayList) {
+// List geoList = Lists.newArrayList();
+// ((ArrayList) location).forEach(e -> geoList.add(Double.parseDouble(ObjectUtils.toString(e))));
+// return geoList.toArray(new Double[geoList.size()]);
+// }
+// if (location instanceof Object[]) {
+// return (Double[]) location;
+// }
+// if (location instanceof String) {
+// String strLocation = ObjectUtils.toString(location, "");
+// if (strLocation.indexOf(Constants.SEPARATOR_COMMA) != -1) {
+// String[] arrLocation = strLocation.split(",");
+// return new Double[]{Double.parseDouble(arrLocation[0]), Double.parseDouble(arrLocation[1])};
+// }
+// }
+// return null;
+// } catch (Exception e) {
+// e.printStackTrace();
+// return null;
+// }
+// }
+//
+// /**
+// * 获取经度值
+// *
+// * @param key
+// */
+// public Double getLongitude(Object key) {
+// Double[] geoMap = getLocation(key);
+// return geoMap == null ? null : geoMap[0];
+// }
+//
+// /**
+// * 获取纬度值
+// *
+// * @param key
+// */
+// public Double getLatitude(Object key) {
+// Double[] geoMap = getLocation(key);
+// return geoMap == null ? null : geoMap[1];
+// }
+//
+// /**
+// * 转实体
+// *
+// * @param clazz
+// * @param
+// * @return
+// */
+// public T toEntity(Class clazz) {
+// if (data == null) {
+// return null;
+// }
+// // 如果继承至es基类则赋值doc_id
+// T entity = JSON.parseObject(JSON.toJSONString(data), clazz);
+// if (BaseEsEntity.class.isAssignableFrom(clazz)) {
+// BeanUtils.setProperty(entity, BaseEsEntity.DOC_ID, this.getDocId());
+// if (this.getIndex() != null) {
+// BeanUtils.setProperty(entity, BaseEsEntity.INDEX, this.getIndex());
+// }
+// if (this.getType() != null) {
+// BeanUtils.setProperty(entity, BaseEsEntity.TYPE, this.getType());
+// }
+// }
+// return entity;
+// }
+//
+// /**
+// * 转高亮实体
+// *
+// * @param clazz
+// * @param
+// * @return
+// */
+// public T toHighlightEntity(Class clazz) {
+// String emTag = "";
+// if (data == null) {
+// return null;
+// }
+// if (highlightData != null) {
+//
+// highlightData.forEach((k, v) -> {
+// boolean flag = data.get(k) == null || (!Strings.isNullOrEmpty(v) && v.contains(emTag));
+// if (flag){
+// data.put(k, v);
+// }
+// });
+// // data.putAll(highlightData);
+// }
+// // 如果拼音有值则覆盖原始值
+// if (highlightData != null) {
+// highlightData.forEach((k, v) -> {
+// String realKey = k;
+// // 处理拼音后缀高亮
+// if (k.endsWith(EsBase.ES_PINYIN_SUFFIX) && StringUtils.isNotBlank(v)) {
+// // 获取拼音字段原始key(去掉后缀.pinyin)
+// realKey = StringUtils.removeEnd(k, EsBase.ES_PINYIN_SUFFIX);
+// // 获取原始字段值
+// String realValue = data.get(realKey) == null ? "" : data.get(realKey).toString();
+// // 原始值中是否有高亮
+// boolean isOriginalHighlight = !Strings.isNullOrEmpty(realValue) && realValue.contains(emTag);
+// // 如果原始值中有高亮则不再处理
+// if (isOriginalHighlight) {
+// return;
+// }
+//
+// // 如果原始值没有高亮,那么判断拼音值是否有高亮
+// String pinyinValue = v == null ? "" : v;
+// // 原始值中是否有高亮
+// boolean isPinyinHighlight = !Strings.isNullOrEmpty(pinyinValue) && pinyinValue.contains(emTag);
+// if (isPinyinHighlight) {
+// data.put(realKey, v);
+// }
+// }
+// });
+// }
+// // 如果继承至es基类则赋值doc_id
+// T entity = JSON.parseObject(JSON.toJSONString(data), clazz);
+// if (BaseEsEntity.class.isAssignableFrom(clazz)) {
+// BeanUtils.setProperty(entity, BaseEsEntity.DOC_ID, this.getDocId());
+// if (this.getIndex() != null) {
+// BeanUtils.setProperty(entity, BaseEsEntity.INDEX, this.getIndex());
+// }
+// if (this.getType() != null) {
+// BeanUtils.setProperty(entity, BaseEsEntity.TYPE, this.getType());
+// }
+// }
+// return entity;
+// }
+//
+// public String getDocId() {
+// return docId;
+// }
+//
+// public void setDocId(String docId) {
+// this.docId = docId;
+// }
+//
+// public Map getData() {
+// return data;
+// }
+//
+// public void setData(Map data) {
+// this.data = data;
+// }
+//
+// public Map getHighlightData() {
+// return highlightData;
+// }
+//
+// public void setHighlightData(Map highlightData) {
+// this.highlightData = highlightData;
+// }
+//
+// public String getIndex() {
+// return index;
+// }
+//
+// public void setIndex(String index) {
+// this.index = index;
+// }
+//
+// public String getType() {
+// return type;
+// }
+//
+// public void setType(String type) {
+// this.type = type;
+// }
+//
+// @Override
+// public String toString() {
+// return "EsDTO{" +
+// "docId='" + docId + '\'' +
+// ", data=" + data +
+// '}';
+// }
+//}
diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/service/es/EsQueryServiceForSQMini.java b/cl_search_api/src/main/java/com/bfd/mf/common/service/es/EsQueryServiceForSQMini.java
index 19cd37c..a7dd2e5 100644
--- a/cl_search_api/src/main/java/com/bfd/mf/common/service/es/EsQueryServiceForSQMini.java
+++ b/cl_search_api/src/main/java/com/bfd/mf/common/service/es/EsQueryServiceForSQMini.java
@@ -1,10 +1,15 @@
package com.bfd.mf.common.service.es;
import com.alibaba.fastjson.JSONObject;
+import com.bfd.mf.common.util.constants.ESConstant;
import com.bfd.mf.common.util.es.EsUtils;
+import com.bfd.mf.common.web.repository.mysql.topic.TaskRepository;
import com.bfd.mf.common.web.vo.params.QueryRequest;
import com.bfd.mf.config.BFDApiConfig;
import org.elasticsearch.index.query.BoolQueryBuilder;
+import org.elasticsearch.index.query.QueryBuilder;
+import org.elasticsearch.index.query.QueryBuilders;
+import org.elasticsearch.index.query.TermsQueryBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
@@ -17,29 +22,40 @@ import java.util.List;
@Service
public class EsQueryServiceForSQMini {
private static final Logger logger = LoggerFactory.getLogger(EsQueryServiceForSQMini.class);
+ private String clusterName = "";
@Autowired
private GetQueryBuilder getQueryBuilder;
@Autowired
private BFDApiConfig bfdApiConfig;
-
- private String clusterName ="";
+ @Autowired
+ private TaskRepository taskRepository;
@PostConstruct
public void init() {
// 注册数据查询来源
- clusterName = bfdApiConfig.esMiniName();
- String sourceAddress [] = bfdApiConfig.esMiniAddress();
+ clusterName = bfdApiConfig.esMiniName();
+ String sourceAddress[] = bfdApiConfig.esMiniAddress();
// 配置文件中的 es-source
EsUtils.registerCluster(clusterName, sourceAddress);
}
+ /**
+ * 2023-04-25 查询调用的方法
+ * 查询
+ * @param indexName
+ * @param queryRequest
+ * @return
+ */
- public List queryDataFromOneSubject(String[] indexName, QueryRequest queryRequest) {
+ public List queryDataFromFolder(String[] indexName, QueryRequest queryRequest) {
+ List result = new ArrayList<>();
try {
logger.debug("[EsQueryServiceForSQMini - 专题] queryDataFromOneSubject ...");
- BoolQueryBuilder boolQueryBuilder = getQueryBuilder.getQueryBuilder(queryRequest);
- //logger.info("[EsQueryService] queryDataFromOneSubject: indexName = " + indexName[0] + "; qb:\n" + "{}.", boolQueryBuilder.toString());
- logger.info("[EsQueryService] queryDataFromOneSubject: indexName : {}" ,indexName[0]);
+ // BoolQueryBuilder boolQueryBuilder = getQueryBuilder.getQueryBuilder(queryRequest);
+ // 2023-04-23 新查询语句的组装
+ BoolQueryBuilder boolQueryBuilder = getQueryBuilder.getQueryBuilderNew(queryRequest);
+ logger.info("[EsQueryService] queryDataFromOneSubject: indexName :{} . ", indexName[0], " ; qb:\n" + "{}.", boolQueryBuilder.toString());
+ // logger.info("[EsQueryService] queryDataFromOneSubject: indexName : {}" ,indexName[0]);
//每页的数量
Integer limit = queryRequest.getLimit();
//起始页(0,20,40....)
@@ -50,13 +66,18 @@ public class EsQueryServiceForSQMini {
String sortFlag = queryRequest.getSidx();
// 主贴、评论、还是用户?
Integer searchType = queryRequest.getSearchType();
- List result = EsUtils.query(clusterName, indexName, boolQueryBuilder, sortFlag, orderFlag, limit, start, searchType);
+ // 得用两个查询?一个是有高亮的,一个是没有高亮的?
+ if (null != queryRequest.getHighLevelQueries()) {
+ result = EsUtils.queryWithHighlight(clusterName, indexName, boolQueryBuilder, sortFlag, orderFlag, limit, start, searchType);
+ } else {
+ result = EsUtils.query(clusterName, indexName, boolQueryBuilder, sortFlag, orderFlag, limit, start, searchType);
+ }
//System.out.println(result);
- return result;
- }catch (Exception e){
- return new ArrayList<>();
- }
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ return result;
}
/**
@@ -65,74 +86,115 @@ public class EsQueryServiceForSQMini {
* @param queryRequest
* @return
*/
- public Long queryDataCountFromOneSubject(String[] indexName, QueryRequest queryRequest) {
+ public Long queryDataCountFromFolder(String[] indexName, QueryRequest queryRequest) {
try {
logger.debug("[EsQueryServiceForSQMini - 专题] queryDataCountFromOneSubject ...");
BoolQueryBuilder boolQueryBuilder = getQueryBuilder.getQueryBuilder(queryRequest);
Integer searchType = queryRequest.getSearchType();
Integer size = queryRequest.getLimit();
- Long totalCount = EsUtils.queryTotalCountNew(clusterName, indexName, boolQueryBuilder, searchType );
+ Long totalCount = EsUtils.queryTotalCountNew(clusterName, indexName, boolQueryBuilder, searchType);
//System.out.println("EsQueryServiceForSQMini : queryDataCountFromOneSubject " + totalCount);
return totalCount;
- }catch (Exception e){
+ } catch (Exception e) {
return 0L;
}
}
/**
* 复制示例专题的数据到新的专题
+ *
* @param indexName
* @param queryRequest
* @return
*/
- public JSONObject exportDataFromOneSubject(String[] indexName, QueryRequest queryRequest) {
+ public JSONObject exportDataFromFolder(String[] indexName, QueryRequest queryRequest) {
try {
logger.info("[EsQueryServiceForSQMini - 专题] exportDataFromOneSubject start ...");
- BoolQueryBuilder boolQueryBuilder =getQueryBuilder.getQueryBuilder(queryRequest);
-// logger.info("[EsQueryServiceForSQMini-专题] queryDataFromOneSubject: indexName = " + indexName + "; qb:" + "{}.", boolQueryBuilder.toString());
+ BoolQueryBuilder boolQueryBuilder = getQueryBuilder.getQueryBuilderNew(queryRequest);
//每页的数量
Integer limit = queryRequest.getLimit();
String scrollId = queryRequest.getScrollId();
Integer searchType = queryRequest.getSearchType();
- JSONObject result = EsUtils.queryForExport(clusterName, indexName, boolQueryBuilder, limit,scrollId,searchType);
+ JSONObject result = EsUtils.queryForExport(clusterName, indexName, boolQueryBuilder, limit, scrollId, searchType);
return result;
- }catch (Exception e){
+ } catch (Exception e) {
e.printStackTrace();
return new JSONObject();
}
}
- public long reIndexData(String indexList, String newIndex) {
- try{
- long created = EsUtils.reIndex(clusterName,indexList,newIndex);
- return created;
- }catch (Exception e){
- e.printStackTrace();
- return 0;
- }
- }
public void deleteBySubjectId(String indexName) {
- try{
- EsUtils.delIndex(clusterName,indexName);
- }catch (Exception e){
+ try {
+ EsUtils.delIndex(clusterName, indexName);
+ } catch (Exception e) {
e.printStackTrace();
}
}
public void deleteBySubjectIdByCid(String indexName, String cid) {
- try{
- EsUtils.delIndexByCid(clusterName,indexName,cid);
- }catch (Exception e){
+ try {
+ EsUtils.delIndexByCid(clusterName, indexName, cid);
+ } catch (Exception e) {
e.printStackTrace();
}
}
public void deleteBySubjectIdByCrawlDataFlag(String indexName, String cid, String crawlDataFlag) {
- try{
- EsUtils.delIndexByCrawlDataFlag(clusterName,indexName,cid,crawlDataFlag);
- }catch (Exception e){
+ try {
+ EsUtils.delIndexByCrawlDataFlag(clusterName, indexName, cid, crawlDataFlag);
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+
+ private static int MOVE_TASKS_STATUS = 6;
+ private static int FINSH_TASKS_STATUS = 3;
+ private static int MOVE_FLORD_STATUS = 1;
+ private static int FINSH_FLORD_STATUS = 0;
+
+ public long reIndexData(String indexList, String newIndex) {
+ try {
+ long created = EsUtils.reIndex(clusterName, indexList, newIndex);
+ return created;
+ } catch (Exception e) {
+ e.printStackTrace();
+ return 0;
+ }
+ }
+
+ public long reIndexDataByTasks(String originalIndex, String currentIndex, List tasks) {
+ long start = System.currentTimeMillis();
+ try {
+ TermsQueryBuilder termQueryBuilder = QueryBuilders.termsQuery(ESConstant.TASK_ID, tasks);
+ QueryBuilder queryBuilder = QueryBuilders.boolQuery().must(termQueryBuilder);
+
+ // Thread.sleep(5000);
+ long created = EsUtils.reIndexByTasks(clusterName, originalIndex, currentIndex, tasks, queryBuilder);
+
+ for (Long taskId : tasks) {
+ taskRepository.updateTaskStatus(Long.valueOf(taskId), FINSH_TASKS_STATUS);
+ }
+ String newIndexPre = "cl_special_1.0_";
+ originalIndex = originalIndex.replace(newIndexPre, "");
+ currentIndex = currentIndex.replace(newIndexPre, "");
+ taskRepository.updateSubjectStatus(Long.valueOf(originalIndex), FINSH_FLORD_STATUS);
+ taskRepository.updateSubjectStatus(Long.valueOf(currentIndex), FINSH_FLORD_STATUS);
+ long end = System.currentTimeMillis();
+ logger.info("reIndexDataByTasks:statr:" + start + " ; end:" + end + " ; time = " + (end - start));
+
+ return created;
+ } catch (Exception e) {
+ e.printStackTrace();
+ return 0;
+ }
+ }
+
+ public void deleteByTasks(String indexName, String cid, List tasks) {
+ try {
+ EsUtils.delIndexByTasks(clusterName, indexName, cid, tasks);
+ } catch (Exception e) {
e.printStackTrace();
}
}
diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/service/es/GetQueryBuilder.java b/cl_search_api/src/main/java/com/bfd/mf/common/service/es/GetQueryBuilder.java
index 3c27812..975521f 100644
--- a/cl_search_api/src/main/java/com/bfd/mf/common/service/es/GetQueryBuilder.java
+++ b/cl_search_api/src/main/java/com/bfd/mf/common/service/es/GetQueryBuilder.java
@@ -2,13 +2,16 @@ package com.bfd.mf.common.service.es;
import com.bfd.mf.common.service.cache.TopicQueryService;
import com.bfd.mf.common.util.constants.ESConstant;
+import com.bfd.mf.common.util.enums.SearchExpressionEnum;
+import com.bfd.mf.common.util.enums.SearchMatchTypeEnum;
+import com.bfd.mf.common.util.enums.SearchScopeEnum;
+import com.bfd.mf.common.util.enums.SearchWordStrategyEnum;
import com.bfd.mf.common.web.vo.params.QueryRequest;
import com.bfd.nlp.common.util.string.TStringUtils;
+import com.google.common.base.Strings;
import org.apache.commons.collections4.map.HashedMap;
-import org.elasticsearch.index.query.BoolQueryBuilder;
-import org.elasticsearch.index.query.MatchPhraseQueryBuilder;
-import org.elasticsearch.index.query.QueryBuilder;
-import org.elasticsearch.index.query.QueryBuilders;
+import org.apache.lucene.search.join.ScoreMode;
+import org.elasticsearch.index.query.*;
import org.elasticsearch.search.aggregations.AggregationBuilder;
import org.elasticsearch.search.aggregations.AggregationBuilders;
import org.elasticsearch.search.aggregations.BucketOrder;
@@ -18,14 +21,232 @@ import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import java.util.*;
+import java.util.stream.Stream;
@Service
public class GetQueryBuilder {
private static final Logger logger = LoggerFactory.getLogger(GetQueryBuilder.class);
+ /**
+ * es 句子分隔符
+ */
+ public static final String SEPARATOR_SENTENCE = "sentenceforbfd";
+ /**
+ * es返 段落分隔符
+ */
+ public static final String SEPARATOR_PARAGRAPH = "paragraphforbfd";
+
@Autowired
private TopicQueryService topicQueryService;
+ /**
+ * 2023-04-23 新的查询语句组装
+ * 传入参数有变化
+ *
+ * @param queryRequest
+ * @return
+ */
+ public BoolQueryBuilder getQueryBuilderNew(QueryRequest queryRequest) {
+ logger.info("[GetQueryBuilder] getQueryBuilder...");
+ BoolQueryBuilder qb = QueryBuilders.boolQuery();
+
+ // 基础查询:根据查询条件组装查询语句
+ BoolQueryBuilder boolQueryBuilder = topicQueryService.queryByConditions_v1(queryRequest);
+
+ // 如果要根据ID 查询数据 如果查ID 的,后面的条件就不用查了。
+ if (null != queryRequest.getDataIds() && !("").equals(queryRequest.getDataIds())) {
+
+ String dataIds = queryRequest.getDataIds();
+ List dataIdList = getDataIdList(dataIds);
+ QueryBuilder queryBuilder = QueryBuilders.termsQuery(ESConstant.DATA_ID, dataIdList);
+ boolQueryBuilder = QueryBuilders.boolQuery().filter(queryBuilder);
+
+ // 如果有 任务ID就有,没有就没有啊!
+ if (null != queryRequest.getTaskIds()) {
+ List taskIds = queryRequest.getTaskIds();
+ if (taskIds.size() > 0) {
+ boolQueryBuilder = boolQueryBuilder.must(QueryBuilders.termQuery("taskId", taskIds.get(0)));
+ }
+ }
+
+ qb.must(boolQueryBuilder);
+ return qb;
+ }
+
+ Integer searchType = queryRequest.getSearchType(); // 单选 0:主贴;1:评论;2:用户
+ BoolQueryBuilder searchTextBuilder = topicQueryService.buildSearchTextBuilder(searchType);
+ boolQueryBuilder.filter(searchTextBuilder);
+
+
+ /**
+ * 1、不考虑组合搜索,那higtLevelQueries里只有一个元素么?
+ * 是的
+ * 还有text截图看是数组[]的,咱也是这样么?
+ * 是的
+ * 几个属性的含义:
+ * id 是顺序
+ * expression 是关系,1=并且(默认第一个词为1)
+ * 2=或者
+ * 3=排除
+ * 当只有一个关键词的时候就只能是 且,只有超过1个以上的关键词才会有组合关系,
+ * 然后根据组合关系进行语句的组装
+ * 1,2,2,2
+ * must[a should[b,c,d]]
+ * 1,3,3,2
+ * must[a,should d]mustnot[b,c]
+ * 2、选中导出还是传多个dataIds ,但是现在如果先按上述条件查询后导出全部,还是得按上面的条件筛选,因此 导出和查询参数应该是一致的。
+ * 3、导出到kafka的接口志成哪儿也要同步改,@Z z c 跟我的查询逻辑一致即可,这块代码可复用。
+ *
+ * {"id":1,"expression":"1","text":["the"],"scope":"100"}
+ */
+
+ // 高级搜索自定义设置
+ /**
+ * 高级查询语句组装说明:
+ * 1、先确认是否有排除词,只要有排除词,先排除再查询
+ * 2、当只有一个词的时候,默认就是mast matchPhraseQuery 查询
+ * 3、当有多个的时候就需要按顺序逻辑进行组装了
+ * a & b | c
+ * mast(a should (b,c))
+ * a | b & c
+ * mast(c shoule(a,b))
+ */
+ if (null != queryRequest.getHighLevelQueries()) {
+ List highLevelQueries = queryRequest.getHighLevelQueries();
+
+ // 1、找到所有的not进行非处理
+ highLevelQueries.stream().filter(e -> SearchExpressionEnum.NOT.is(e.getExpression())).forEach(e -> {
+ qb.mustNot(this.getHighLevelQueryBuilder(e, true));
+ });
+
+ // 2、循环处理剩下的不含not的,处理逻辑为:如果当前是and,则将tempHighLevel进行must处理,tempHighLevel中如果有多个则内部should处理
+ List tempHighLevel = new ArrayList<>(highLevelQueries.size());
+ highLevelQueries.stream().filter(e -> !SearchExpressionEnum.NOT.is(e.getExpression())).forEach(e -> {
+ // 如果是and 且 tempHighLevel不为空,则处理tempHighLevel(>1个做内部或操作)并清空
+ if (SearchExpressionEnum.AND.is(e.getExpression()) && !tempHighLevel.isEmpty()) {
+ // 拼接条件
+ BoolQueryBuilder tempQueryBuilder = QueryBuilders.boolQuery();
+ tempHighLevel.forEach(temp -> tempQueryBuilder.should(this.getHighLevelQueryBuilder(temp, false)));
+ qb.must(tempQueryBuilder);
+ tempHighLevel.clear();
+ }
+ // 将当前项加入临时队列
+ tempHighLevel.add(e);
+ });
+
+ // 此处拼接tempHighLevel未处理的内容
+ if (!tempHighLevel.isEmpty()) {
+ BoolQueryBuilder tempQueryBuilder = QueryBuilders.boolQuery();
+ tempHighLevel.forEach(temp -> tempQueryBuilder.should(this.getHighLevelQueryBuilder(temp, false)));
+ qb.must(tempQueryBuilder);
+ }
+
+ }
+//
+// // 高级搜索自定义设置
+//
+//
+// if (null == highLevelQueries || highLevelQueries.size() == 0) {
+// System.out.println("木有传入关键词,直接返回查询语句");
+// } else if (highLevelQueries.size() == 1) {
+// System.out.println("只有一个查询关键词,直接组装查询即可");
+// BoolQueryBuilder tempQueryBuilder = QueryBuilders.boolQuery();
+// Map query = highLevelQueries.get(0);
+// List texts = (List) query.get("text");
+// System.out.println(texts.get(0));
+// MatchPhraseQueryBuilder titleQuery = QueryBuilders.matchPhraseQuery(ESConstant.TITLE, texts.get(0)).slop(0);
+// MatchPhraseQueryBuilder contentQuery = QueryBuilders.matchPhraseQuery(ESConstant.CONTENT, texts.get(0)).slop(0);
+// QueryBuilder queryBuilder = QueryBuilders.boolQuery().should(titleQuery).should(contentQuery);
+// qb.must(queryBuilder);
+// } else {
+// System.out.println("多个组装查询,需要拆分啦");
+// // 1、找到所有的 not 进行非处理
+// // expression = 1,2,3(与 或 非)
+// for (Map query : highLevelQueries) {
+// int id = Integer.valueOf(query.get("id").toString());
+// String expression = query.get("expression").toString();
+// List texts = (List) query.get("text");
+//
+// if (expression.equals("2")) {
+// String text = texts.get(0);
+// QueryBuilder contentMustNotQueryBuilder = QueryBuilders.matchPhraseQuery(ESConstant.CONTENT, text);
+// QueryBuilder titleMustNotQueryBuilder = QueryBuilders.matchPhraseQuery(ESConstant.TITLE, text);
+// qb.mustNot(contentMustNotQueryBuilder);
+// qb.mustNot(titleMustNotQueryBuilder);
+// } else {
+// List tempHighLevel = new ArrayList<>(highLevelQueries.size());
+// BoolQueryBuilder tempQueryBuilder = QueryBuilders.boolQuery();
+// tempHighLevel.forEach(temp -> tempQueryBuilder.should(this.getHighLevelQueryBuilder(temp, false)));
+// qb.must(tempQueryBuilder);
+// }
+// }
+// return qb;
+//
+//// // 1、找到所有的not进行非处理
+//// query.getHighLevelQueries().stream().filter(e -> SearchExpressionEnum.NOT.is(e.getExpression())).forEach(e -> {
+//// queryBuilder.mustNot(this.getHighLevelQueryBuilder(e, true));
+//// });
+//// // 2、循环处理剩下的不含not的,处理逻辑为:如果当前是and,则将tempHighLevel进行must处理,tempHighLevel中如果有多个则内部should处理
+//// List tempHighLevel = new ArrayList<>(query.getHighLevelQueries().size());
+//// queryRequest.getHighLevelQueries().stream().filter(e -> !SearchExpressionEnum.NOT.is(e.getExpression())).forEach(e -> {
+//// // 如果是and 且 tempHighLevel不为空,则处理tempHighLevel(>1个做内部或操作)并清空
+//// if (SearchExpressionEnum.AND.is(e.getExpression()) && !tempHighLevel.isEmpty()) {
+//// // 拼接条件
+//// BoolQueryBuilder tempQueryBuilder = QueryBuilders.boolQuery();
+//// tempHighLevel.forEach(temp -> tempQueryBuilder.should(this.getHighLevelQueryBuilder(temp, false)));
+//// queryBuilder.must(tempQueryBuilder);
+//// tempHighLevel.clear();
+//// }
+//// // 将当前项加入临时队列
+//// tempHighLevel.add(e);
+//// });
+////
+//// // 此处拼接tempHighLevel未处理的内容
+//// if (!tempHighLevel.isEmpty()) {
+//// BoolQueryBuilder tempQueryBuilder = QueryBuilders.boolQuery();
+//// tempHighLevel.forEach(temp -> tempQueryBuilder.should(this.getHighLevelQueryBuilder(temp, false)));
+//// queryBuilder.must(tempQueryBuilder);
+//// }
+// }
+
+ // System.out.println("**** " + keyword);
+// String[] keywords;
+// try {
+// if (TStringUtils.isNotEmpty(keyword)) {
+// if (keyword.contains(" ")) {
+// keywords = keyword.split(" ");
+// } else {
+// keywords = new String[]{keyword};
+// }
+// if (searchType == 0) { // 主贴的话 查 标题和内容
+// for (int i = 0; i < keywords.length; i++) {
+// MatchPhraseQueryBuilder titleQuery = QueryBuilders.matchPhraseQuery(ESConstant.TITLE, keywords[i]).slop(0);
+// MatchPhraseQueryBuilder contentQuery = QueryBuilders.matchPhraseQuery(ESConstant.CONTENT, keywords[i]).slop(0);
+// QueryBuilder queryBuilder = QueryBuilders.boolQuery().should(titleQuery).should(contentQuery);
+// qb.must(queryBuilder);
+// }
+// } else if (searchType == 1) { // 评论的话 查 评论内容
+//// MatchPhraseQueryBuilder contentQuery = QueryBuilders.matchPhraseQuery(ESConstant.CONTENT, keyword).slop(0);
+//// QueryBuilder queryBuilder = QueryBuilders.boolQuery().must(contentQuery);
+//// qb.must(queryBuilder);
+// boolQueryBuilder.must(QueryBuilders.matchPhraseQuery(ESConstant.CONTENT, keyword).slop(0));
+// } else if (searchType == 2) { // 用户 就只查 用户名
+// boolQueryBuilder.must(QueryBuilders.queryStringQuery("*" + keyword + "*").field(ESConstant.AUTHOR));
+// }
+// }
+// } catch (Exception e) {
+// e.printStackTrace();
+// }
+ qb.must(boolQueryBuilder);
+ return qb;
+ }
+
+ /**
+ * 这个是导出的查询
+ *
+ * @param queryRequest
+ * @return
+ */
public BoolQueryBuilder getQueryBuilder(QueryRequest queryRequest) {
logger.info("[GetQueryBuilder] getQueryBuilder...");
BoolQueryBuilder qb = QueryBuilders.boolQuery();
@@ -35,12 +256,17 @@ public class GetQueryBuilder {
// 如果要根据ID 查询数据 如果查ID 的,后面的条件就不用查了。
if (null != queryRequest.getDataIds() && !("").equals(queryRequest.getDataIds())) {
- String taskId = queryRequest.getTaskId();
String dataIds = queryRequest.getDataIds();
List dataIdList = getDataIdList(dataIds);
QueryBuilder queryBuilder = QueryBuilders.termsQuery(ESConstant.DATA_ID, dataIdList);
- boolQueryBuilder = QueryBuilders.boolQuery().filter(queryBuilder)
- .must(QueryBuilders.termQuery("taskId", taskId));
+ boolQueryBuilder = QueryBuilders.boolQuery().filter(queryBuilder);
+ // 如果有 任务ID就有,没有就没有啊!
+ if (null != queryRequest.getTaskIds()) {
+ List taskIds = queryRequest.getTaskIds();
+ if (taskIds.size() > 0) {
+ boolQueryBuilder = boolQueryBuilder.must(QueryBuilders.termsQuery("taskId", taskIds));
+ }
+ }
qb.must(boolQueryBuilder);
return qb;
}
@@ -53,6 +279,7 @@ public class GetQueryBuilder {
/**
* 2023-03-06
* 添加一个查询功能,用空格分割,查询 并且的关系
+ *
*/
// System.out.println("**** " + keyword);
String[] keywords;
@@ -85,7 +312,7 @@ public class GetQueryBuilder {
for (int i = 0; i < keywords.length; i++) {
MatchPhraseQueryBuilder titleQuery = QueryBuilders.matchPhraseQuery(ESConstant.TITLE, keywords[i]).slop(0);
MatchPhraseQueryBuilder contentQuery = QueryBuilders.matchPhraseQuery(ESConstant.CONTENT, keywords[i]).slop(0);
- QueryBuilder queryBuilder = QueryBuilders.boolQuery().should(titleQuery).should(contentQuery);
+ QueryBuilder queryBuilder = QueryBuilders.boolQuery().should(titleQuery).should(contentQuery);
qb.must(queryBuilder);
}
} else if (searchType == 1) { // 评论的话 查 评论内容
@@ -260,6 +487,179 @@ public class GetQueryBuilder {
// aggregationBuilder.subAggregation(aggregationBuilder1);
return aggregationBuilder;
+ }
+
+ /**
+ * 拼装高级搜索--针对高级搜索中的一行或一个框
+ *
+ * @param highLevelQuery
+ * @return
+ */
+ protected QueryBuilder getHighLevelQueryBuilder(HighLevelQuery highLevelQuery, boolean isNot) {
+ BoolQueryBuilder result = QueryBuilders.boolQuery();
+ // 获取高级查询的字段
+ //Map fieldMap = SearchScopeEnum.getFieldsByKey(highLevelQuery.getScope());
+ // 默认就是查全文(标题 + 正文)
+ Map fieldMap = SearchScopeEnum.getFieldsByKey(100);
+ SearchScopeEnum searchScopeEnum = SearchScopeEnum.getEnumByKey(Objects.toString(highLevelQuery.getScope()));
+ String path = (null == searchScopeEnum ? null : searchScopeEnum.getPath());
+
+ // 同段搜索---跨度搜索
+ if (SearchMatchTypeEnum.PARAGRAPH.equals(highLevelQuery.getMatchType())) {
+ fieldMap.forEach((k, v) -> {
+ this.addSpanQueryBuilder(result, k, highLevelQuery.getText(), SEPARATOR_PARAGRAPH);
+ this.addSpanQueryBuilder(result, k, highLevelQuery.getTranslateText(), SEPARATOR_PARAGRAPH);
+ });
+ return result;
+ }
+
+ // 同句搜索
+ if (SearchMatchTypeEnum.SENTENCE.equals(highLevelQuery.getMatchType())) {
+ fieldMap.forEach((k, v) -> {
+ this.addSpanQueryBuilder(result, k, highLevelQuery.getText(), SEPARATOR_SENTENCE);
+ this.addSpanQueryBuilder(result, k, highLevelQuery.getTranslateText(), SEPARATOR_SENTENCE);
+ });
+ return result;
+ }
+
+ // 原文普通搜索
+ QueryBuilder rawQueryBuilder = this.getMatchQueryBuilder(path, fieldMap, highLevelQuery.getText(), isNot, SearchWordStrategyEnum.getByKey(highLevelQuery.getWordStrategy()));
+ if (rawQueryBuilder != null) {
+ result.should(rawQueryBuilder);
+ }
+ // 如果译文不为空,则进行译文普通搜索
+ if (highLevelQuery.getTranslateText() != null) {
+ QueryBuilder transQueryBuilder = this.getMatchQueryBuilder(path, fieldMap, highLevelQuery.getTranslateText(), isNot, SearchWordStrategyEnum.getByKey(highLevelQuery.getWordStrategy()));
+ if (transQueryBuilder != null) {
+ result.should(transQueryBuilder);
+ }
+ }
+ return result;
+ }
+
+ /**
+ * 获取跨度搜索查询条件
+ *
+ * @param field 字段名
+ * @param text 内容
+ * @param separator 分隔符
+ * @return
+ */
+ private void addSpanQueryBuilder(BoolQueryBuilder queryBuilder, String field, String[] text, String separator) {
+
+ if (text == null || text.length == 0) {
+ return;
+ }
+ // 将所有原词按照空格拆分
+ /*
+ String[] splitText = Stream.of(text)
+ .filter(StringUtils::isNotBlank)
+ .flatMap(e -> Stream.of(e.split(" ")))
+ .filter(StringUtils::isNotBlank)
+ .toArray(String[]::new);
+ */
+ String[] splitText = text;
+ if (splitText == null || splitText.length == 0) {
+ return;
+ }
+ SpanNearQueryBuilder spanNearQueryBuilder = QueryBuilders
+ .spanNearQuery(QueryBuilders.spanTermQuery(field, splitText[0]), 250)
+ .inOrder(false);
+ Stream.of(splitText).skip(1).forEach(e -> spanNearQueryBuilder.addClause(QueryBuilders.spanTermQuery(field, e)));
+ SpanQueryBuilder exclude = QueryBuilders.spanTermQuery(field, separator);
+ SpanNotQueryBuilder spanNotQueryBuilder = QueryBuilders.spanNotQuery(spanNearQueryBuilder, exclude);
+ //跨度搜索 为了高亮显示 新增对于每个单次进行查询。
+ BoolQueryBuilder query = new BoolQueryBuilder();
+ // Stream.of(splitText).forEach(e -> query.must(QueryBuilders.matchPhraseQuery(BaseFieldEnum.content.name(), e)));
+ queryBuilder.should(QueryBuilders.boolQuery().must(spanNotQueryBuilder).must(query));
+ }
+
+
+ /**
+ * 全文检索查询拼接----含词语策略
+ *
+ * @param fieldMap 查询字段
+ * @param text 文本
+ * @param isNot 是否是排除
+ * @param strategyEnum 搜索词策略
+ * @return
+ */
+ private QueryBuilder getMatchQueryBuilder(String nestedPath,
+ Map fieldMap,
+ String[] text,
+ boolean isNot,
+ SearchWordStrategyEnum strategyEnum) {
+
+ if (text == null || text.length == 0) {
+ return null;
+ }
+ /*
+ // 将所有原词按照空格拆分
+ String[] splitText = Stream.of(text)
+ .filter(StringUtils::isNotBlank)
+ .flatMap(e -> Stream.of(e.split(" ")))
+ .filter(StringUtils::isNotBlank)
+ .toArray(String[]::new);
+ */
+ String[] splitText = text;
+ if (splitText == null || splitText.length == 0) {
+ return null;
+ }
+
+ BoolQueryBuilder result = QueryBuilders.boolQuery();
+ // 如果是非 或 指定完整匹配,则用短语,否则用最佳字段
+ MultiMatchQueryBuilder.Type multiMatchType = isNot
+ || SearchWordStrategyEnum.WHOLE.equals(strategyEnum) ? MultiMatchQueryBuilder.Type.PHRASE_PREFIX : MultiMatchQueryBuilder.Type.BEST_FIELDS;
+ // 使用拆分后的词进行匹配----如果使用完整匹配则不进行拆分,否则按空格拆分
+ Stream.of(SearchWordStrategyEnum.WHOLE.equals(strategyEnum) ? text : splitText).forEach((e) -> {
+ // 校验所有还是单个词
+ QueryBuilder matchQuery = this.getMatchQueryBuilder(nestedPath, fieldMap, multiMatchType, e);
+ if (SearchWordStrategyEnum.ALL.equals(strategyEnum)) {
+ result.must(matchQuery);
+ } else {
+ result.should(matchQuery);
+ }
+ });
+ return result;
+ }
+
+ /**
+ * 全文检索查询拼接,不支持nested属性与非nested属性混合使用,并且nested属性必须归属相同path
+ *
+ * @param fieldMap 查询字段
+ * @param type 查询类型
+ * @param text 文本
+ * @return
+ */
+ private QueryBuilder getMatchQueryBuilder(String nestedPath, Map fieldMap, MultiMatchQueryBuilder.Type type, String text) {
+ // 拼装搜索
+ QueryBuilder queryBuilder = QueryBuilders.multiMatchQuery(text)
+ .fields(fieldMap)
+ .type(type == null ? MultiMatchQueryBuilder.Type.BEST_FIELDS : type)
+ .maxExpansions(5)
+ .tieBreaker(0.3f)
+ /// 关闭高频词处理
+ //.cutoffFrequency(0.01f)
+ .lenient(Boolean.TRUE)
+ .minimumShouldMatch("90%");
+
+ // 如果有path拼接nested并返回
+ if (!Strings.isNullOrEmpty(nestedPath)) {
+ return this.nestedQuery(nestedPath, queryBuilder);
+ }
+ return queryBuilder;
+ }
+
+
+ /**
+ * 拼装nested条件
+ *
+ * @param nested
+ * @param queryBuilder
+ * @return
+ */
+ public QueryBuilder nestedQuery(String nested, QueryBuilder queryBuilder) {
+ return QueryBuilders.nestedQuery(nested, queryBuilder, ScoreMode.None);
}
}
diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/service/es/HighLevelQuery.java b/cl_search_api/src/main/java/com/bfd/mf/common/service/es/HighLevelQuery.java
new file mode 100644
index 0000000..795ddf3
--- /dev/null
+++ b/cl_search_api/src/main/java/com/bfd/mf/common/service/es/HighLevelQuery.java
@@ -0,0 +1,118 @@
+package com.bfd.mf.common.service.es;
+
+import com.bfd.mf.common.util.enums.SearchExpressionEnum;
+import com.bfd.mf.common.util.enums.SearchMatchTypeEnum;
+import com.bfd.mf.common.util.enums.SearchScopeEnum;
+import com.bfd.mf.common.util.enums.SearchWordStrategyEnum;
+import com.google.common.base.Strings;
+import io.swagger.annotations.ApiModel;
+import io.swagger.annotations.ApiModelProperty;
+import org.apache.commons.lang.StringUtils;
+
+import java.io.Serializable;
+import java.util.stream.Stream;
+
+/**
+ * 高级搜索条件
+ * @author lihonghao
+ */
+@ApiModel(value = "HighLevelQuery")
+public class HighLevelQuery implements Serializable {
+
+ /**
+ * 表达式
+ */
+ @ApiModelProperty(value = "表达式 1-AND 2-OR 3-NOT")
+ private Integer expression = SearchExpressionEnum.AND.iKey();
+ /**
+ * 搜索类型
+ */
+ @ApiModelProperty(value = "搜索范围 100-全文 200-标题 300-内容 400-关键词 500-同一段落 600-同一句子")
+ private Integer scope = SearchScopeEnum.ALL.iKey();
+ /**
+ * 搜索词策略
+ */
+ @ApiModelProperty(value = "搜索词策略 1-包含以下全部字词 2-包含以下完整词句 3-包含以下任一字词, 默认 3")
+ private String wordStrategy = SearchWordStrategyEnum.ANY.getKey();
+ /**
+ * 文本内容
+ */
+ @ApiModelProperty(value = "文本内容")
+ private String[] text;
+ /**
+ * 文本内容
+ */
+ @ApiModelProperty(value = "译后-文本内容")
+ private String[] translateText;
+ /**
+ * 跨语种
+ */
+ @ApiModelProperty(value = "跨语种, zh, en...")
+ private String language;
+
+ /**
+ * 本条件是否可用 值不为空且如果是数组则数组至少有一个不为空
+ * @return
+ */
+ @ApiModelProperty(hidden = true)
+ public boolean isAvailable(){
+ return text != null && !Stream.of(text).allMatch(Strings::isNullOrEmpty);
+ }
+
+ /**
+ * 获取匹配类型
+ * @return
+ */
+ @ApiModelProperty(hidden = true)
+ public SearchMatchTypeEnum getMatchType() {
+ return SearchScopeEnum.getMatchTypeByKey(this.scope);
+ }
+
+ public Integer getExpression() {
+ return expression;
+ }
+
+ public void setExpression(Integer expression) {
+ this.expression = expression;
+ }
+
+ public Integer getScope() {
+ return scope;
+ }
+
+ public void setScope(Integer scope) {
+ this.scope = scope;
+ }
+
+ public String[] getText() {
+ return text;
+ }
+
+ public void setText(String[] text) {
+ this.text = text == null ? null : Stream.of(text).filter(StringUtils::isNotBlank).toArray(String[]::new);
+ }
+
+ public String[] getTranslateText() {
+ return translateText;
+ }
+
+ public void setTranslateText(String[] translateText) {
+ this.translateText = translateText == null ? null : Stream.of(translateText).filter(StringUtils::isNotBlank).toArray(String[]::new);
+ }
+
+ public String getLanguage() {
+ return language;
+ }
+
+ public void setLanguage(String language) {
+ this.language = language;
+ }
+
+ public String getWordStrategy() {
+ return wordStrategy;
+ }
+
+ public void setWordStrategy(String wordStrategy) {
+ this.wordStrategy = wordStrategy;
+ }
+}
diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/service/es/ParseSearchScopeService.java b/cl_search_api/src/main/java/com/bfd/mf/common/service/es/ParseSearchScopeService.java
index 12b8798..a409a0f 100644
--- a/cl_search_api/src/main/java/com/bfd/mf/common/service/es/ParseSearchScopeService.java
+++ b/cl_search_api/src/main/java/com/bfd/mf/common/service/es/ParseSearchScopeService.java
@@ -64,8 +64,8 @@ public class ParseSearchScopeService {
.must(QueryBuilders.termQuery(ESConstant.PRIMARY,1))
.must(QueryBuilders.termQuery(ESConstant.DOC_TYPE,ESConstant.ITEM)));
} else if(searchType == 2){
- searchScopeQuery = QueryBuilders.boolQuery().must(QueryBuilders.termQuery(ESConstant.PRIMARY, 2))
- .must(QueryBuilders.termsQuery(ESConstant.PAGETYPE,"userInfoPage","newsuser","videoAccount"));
+ searchScopeQuery = QueryBuilders.boolQuery().must(QueryBuilders.termQuery(ESConstant.PRIMARY, 2));
+ // .must(QueryBuilders.termsQuery(ESConstant.PAGETYPE,"userInfoPage","newsuser","videoAccount"));
}
return searchScopeQuery;
}
diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/util/constants/ESConstant.java b/cl_search_api/src/main/java/com/bfd/mf/common/util/constants/ESConstant.java
index 4ff284a..5b6b50c 100644
--- a/cl_search_api/src/main/java/com/bfd/mf/common/util/constants/ESConstant.java
+++ b/cl_search_api/src/main/java/com/bfd/mf/common/util/constants/ESConstant.java
@@ -25,6 +25,9 @@ import java.util.Map;
public class ESConstant {
+ public static final String HIGHLIGHTPRETAGS = "";
+ public static final String HIGHLIGHTPOSTTAGS = "";
+
public static final String MEDIA_AREA_KEY = "dict";
@@ -517,6 +520,11 @@ public class ESConstant {
public static String FACEBOOK = "facebook";
public static String TWITTER = "twitter";
+
+ public static String SOCIAL_COMMENT = "socialComment";
+ public static String SOCIAL_FOLLOW = "socialFollow";
+ // public static String SOCIALCOMMENT = "socialComment";
+
public static String CID = "cid";
public static String SITEID = "siteId";
public static String SITETYPE = "siteType";
@@ -840,8 +848,9 @@ public class ESConstant {
, ESConstant.OPINIONS
// 表情
, ESConstant.EXPRESSION
-
+ // 来源
, ESConstant.SOURCE
+ // 情感,这个字段不用了
, ESConstant.SYS_SENTIMENT
, ESConstant.CONTENT_TAG
, ESConstant.EMOTION_ENTRY + ESConstant.DEFAULT_JOIN_SYMBOL + ESConstant.EMOTION_NAME
diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/util/enums/BaseFieldEnum.java b/cl_search_api/src/main/java/com/bfd/mf/common/util/enums/BaseFieldEnum.java
new file mode 100644
index 0000000..0310e71
--- /dev/null
+++ b/cl_search_api/src/main/java/com/bfd/mf/common/util/enums/BaseFieldEnum.java
@@ -0,0 +1,209 @@
+package com.bfd.mf.common.util.enums;
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * 文档-网页-图片索引属性枚举
+ * @author lihonghao
+ */
+public enum BaseFieldEnum {
+ /**
+ * 文件id
+ */
+ id,
+ /**
+ * 文件md5值
+ */
+ md5,
+ /**
+ * 文件标题
+ */
+ title,
+ /**
+ * 摘要
+ */
+ summary,
+ /**
+ * 文档内容
+ */
+ content,
+ /**
+ * 数据来源
+ */
+ source,
+ /**
+ * 原文/译文
+ */
+ type,
+ /**
+ * 原文id
+ */
+ original_id,
+ /**
+ *入库时间
+ */
+ create_time,
+ /**
+ * 文档语言
+ */
+ language,
+ /**
+ * 上传用户
+ */
+ upload_user,
+ /**
+ * 上传用户姓名
+ */
+ upload_user_name,
+ /**
+ * 是否删除
+ */
+ del,
+ /**
+ * 网站枚举
+ */
+ website,
+ /**
+ * 发布人
+ */
+ publisher,
+ /**
+ * 发布时间
+ */
+ public_time,
+ /**
+ * 网站版面
+ */
+ cate_md5,
+ /**
+ * 智能标签
+ */
+ ai_tag,
+ /**
+ * 智能地区
+ */
+ ai_area,
+ /**
+ * 主题一级分类
+ */
+ subject_classify1,
+ /**
+ * 主题二级分类
+ */
+ subject_classify2,
+ /**
+ * 主题
+ */
+ subject,
+ /**
+ * 渠道
+ */
+ channel,
+ /**
+ * 审核状态
+ */
+ audit_state,
+ /**
+ * 用户上传文档,归属部门
+ */
+ department_id,
+ /**
+ * 整编状态
+ */
+ edit_state,
+
+ /**
+ * 敏感词
+ */
+ sensitive_tag,
+ /**
+ * 置顶状态
+ */
+ flag_top,
+ /**
+ * 置顶有效期
+ */
+ flag_top_validity,
+ /**
+ * 0-无 1-不重要 2-有点重要 3-一般、4-重要、5-非常重要
+ */
+ flag_importance,
+ /**
+ * 分类标签
+ */
+ subject_tag,
+ /**
+ * 事件id
+ */
+ event_id,
+ /**
+ * 事件id
+ */
+ event_detect_time,
+ /**
+ * 重复校验字段
+ */
+ duplicate_key,
+ /**
+ * 媒体类型
+ */
+ media_type,
+ /**
+ * 文中提及的标准时间
+ */
+ norm_time,
+ /**
+ * 时间间隔(天)
+ */
+ delay_time,
+ /**
+ * 省名称
+ */
+ province_code,
+ /**
+ * 市名称
+ */
+ city_code,
+ /**
+ * 区县
+ */
+ county_code,
+
+ ;
+
+ /**
+ * 需要进行匹配的属性
+ * @return
+ */
+ public static Map getMatchFields(){
+ Map matchMap = new HashMap<>(2);
+ matchMap.put(BaseFieldEnum.title.name(), 2.0F);
+ matchMap.put(BaseFieldEnum.content.name(), 1.0F);
+ return matchMap;
+ }
+
+ /**
+ * 需要进行匹配的属性-含拼音
+ * @return
+ */
+ public static Map getMatchFieldsWithPy(){
+ Map matchMap = getMatchFields();
+ Map pyMap = new HashMap<>(matchMap.size() * 2);
+ matchMap.forEach((k, v) -> {
+ pyMap.put(k, v);
+ /// 系统中取消拼音搜索
+ //pyMap.put(k.concat(EsBase.ES_PINYIN_SUFFIX), v/10);
+ });
+ return pyMap;
+ }
+
+ /**
+ * 获取全部属性名
+ * @return
+ */
+ public static String[] getAllFields(){
+ return Arrays.stream(values()).map(BaseFieldEnum::name).toArray(String[]::new);
+ }
+}
diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/util/enums/DocumentFieldEnum.java b/cl_search_api/src/main/java/com/bfd/mf/common/util/enums/DocumentFieldEnum.java
new file mode 100644
index 0000000..89d6546
--- /dev/null
+++ b/cl_search_api/src/main/java/com/bfd/mf/common/util/enums/DocumentFieldEnum.java
@@ -0,0 +1,15 @@
+package com.bfd.mf.common.util.enums;
+
+import javax.annotation.Resource;
+
+/**
+ * @Author dujing
+ * @Date 2023/4/23 10:34
+ */
+public enum DocumentFieldEnum {
+ ;
+
+ public static Resource title;
+ public static Resource content;
+ public static Resource ai_tag;
+}
diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/util/enums/SearchExpressionEnum.java b/cl_search_api/src/main/java/com/bfd/mf/common/util/enums/SearchExpressionEnum.java
new file mode 100644
index 0000000..59651a3
--- /dev/null
+++ b/cl_search_api/src/main/java/com/bfd/mf/common/util/enums/SearchExpressionEnum.java
@@ -0,0 +1,138 @@
+package com.bfd.mf.common.util.enums;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+
+import java.util.Arrays;
+import java.util.LinkedHashMap;
+import java.util.List;
+
+/**
+ * 表达式枚举值
+ *
+ * @author honghao.li
+ */
+public enum SearchExpressionEnum {
+
+ /**
+ * 与
+ */
+ AND("1", ""),
+ /**
+ * 或
+ */
+ OR("2", ""),
+ /**
+ * 非
+ */
+ NOT("3", "");
+
+ /**
+ * 码值
+ */
+ private String key;
+ /**
+ * 国际化值
+ */
+ private String label;
+
+ /**
+ * @param key 码值
+ * @param label 列表展示值国际化编码
+ */
+ SearchExpressionEnum(String key, String label) {
+ this.key = key;
+ this.label = label;
+ }
+
+ public String getKey() {
+ return key;
+ }
+
+ /**
+ * 返回int型枚举
+ *
+ * @return
+ */
+ public int iKey() {
+ return Integer.parseInt(key);
+ }
+
+ public String getLabel() {
+ return label;
+ }
+
+ /**
+ * 获取当前枚举值的国际化
+ *
+ * @return
+ */
+// public String getLabelI18n() {
+// return LocaleI18nUtils.getMessage(label);
+// }
+
+ /**
+ * 判断是否是当前枚举
+ *
+ * @param key
+ * @return
+ */
+ public boolean is(Integer key) {
+ return Integer.valueOf(getKey()).equals(key);
+ }
+
+ /**
+ * 校验是否支持此类型
+ *
+ * @param key
+ * @return
+ */
+ public static boolean isSupport(String key) {
+ return Arrays.stream(values()).anyMatch(e -> e.getKey().equals(key));
+ }
+
+ /**
+ * 获取全部枚举值的map
+ *
+ * @return
+ */
+ public static LinkedHashMap getEnumMap() {
+ LinkedHashMap map = Maps.newLinkedHashMap();
+ for (SearchExpressionEnum temp : values()) {
+ map.put(temp.getKey(), temp);
+ }
+ return map;
+ }
+
+ /**
+ * 根据key获取国际化内容
+ *
+ * @param key
+ * @return
+ */
+// public static String getLabelI18n(String key) {
+// if (key == null) {
+// return "";
+// }
+//
+// LinkedHashMap map = getEnumMap();
+// if (map.containsKey(key)) {
+// return map.get(key).getLabelI18n();
+// }
+// return key;
+// }
+//
+// /**
+// * 获取key value模式枚举值集合
+// *
+// * @return
+// */
+// public static List getEnumList() {
+// List rList = Lists.newArrayList();
+// for (SearchExpressionEnum temp : values()) {
+// rList.add(KeyValueDTO.parse(temp.getKey(), temp.getLabelI18n()));
+// }
+// return rList;
+// }
+
+}
diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/util/enums/SearchMatchTypeEnum.java b/cl_search_api/src/main/java/com/bfd/mf/common/util/enums/SearchMatchTypeEnum.java
new file mode 100644
index 0000000..0ee8e8f
--- /dev/null
+++ b/cl_search_api/src/main/java/com/bfd/mf/common/util/enums/SearchMatchTypeEnum.java
@@ -0,0 +1,160 @@
+package com.bfd.mf.common.util.enums;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+
+import java.util.Arrays;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.stream.Stream;
+
+/**
+ * 跨度枚举值
+ *
+ * @author honghao.li
+ */
+public enum SearchMatchTypeEnum {
+ /**
+ * 分词
+ */
+ MATCH("1", ""),
+ /**
+ * 精确
+ */
+ ACCURATE("2", ""),
+ /**
+ * 模糊
+ */
+ FUZZY("3", ""),
+ /**
+ * 同段
+ */
+ PARAGRAPH("4", ""),
+ /**
+ * 同句
+ */
+ SENTENCE("5", ""),
+ /**
+ * 嵌套
+ */
+ NESTED("6", "");
+
+ /**
+ * 码值
+ */
+ private String key;
+ /**
+ * 国际化值
+ */
+ private String label;
+
+ /**
+ * @param key 码值
+ * @param label 列表展示值国际化编码
+ */
+ SearchMatchTypeEnum(String key, String label) {
+ this.key = key;
+ this.label = label;
+ }
+
+ public String getKey() {
+ return key;
+ }
+
+ /**
+ * 返回int型枚举
+ *
+ * @return
+ */
+ public int iKey() {
+ return Integer.parseInt(key);
+ }
+
+ public String getLabel() {
+ return label;
+ }
+
+ /**
+ * 获取当前枚举值的国际化
+ *
+ * @return
+ */
+// public String getLabelI18n() {
+// return LocaleI18nUtils.getMessage(label);
+// }
+
+ /**
+ * 判断是否是当前枚举
+ *
+ * @param key
+ * @return
+ */
+ public boolean is(Integer key) {
+ return Integer.valueOf(getKey()).equals(key);
+ }
+
+ /**
+ * 校验是否支持此类型
+ *
+ * @param key
+ * @return
+ */
+ public static boolean isSupport(String key) {
+ return Arrays.stream(values()).anyMatch(e -> e.getKey().equals(key));
+ }
+
+ /**
+ * 校验是否支持此类型
+ *
+ * @param tempEnum
+ * @return
+ */
+ public static boolean isSpanType(SearchMatchTypeEnum tempEnum) {
+ return tempEnum != null && Stream.of(SearchMatchTypeEnum.PARAGRAPH, SearchMatchTypeEnum.SENTENCE).anyMatch(tempEnum::equals);
+ }
+
+ /**
+ * 获取全部枚举值的map
+ *
+ * @return
+ */
+ public static LinkedHashMap getEnumMap() {
+ LinkedHashMap map = Maps.newLinkedHashMap();
+ for (SearchMatchTypeEnum temp : values()) {
+ map.put(temp.getKey(), temp);
+ }
+ return map;
+ }
+
+ /**
+ * 根据key获取国际化内容
+ *
+ * @param key
+ * @return
+ */
+// public static String getLabelI18n(String key) {
+// if (key == null) {
+// return "";
+// }
+//
+// LinkedHashMap map = getEnumMap();
+// if (map.containsKey(key)) {
+// return map.get(key).getLabelI18n();
+// }
+// return key;
+// }
+
+ /**
+ * 获取key value模式枚举值集合
+ *
+ * @return
+ */
+// public static List getEnumList() {
+// List rList = Lists.newArrayList();
+// for (SearchMatchTypeEnum temp : values()) {
+// rList.add(KeyValueDTO.parse(temp.getKey(), temp.getLabelI18n()));
+// }
+// return rList;
+// }
+
+}
diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/util/enums/SearchScopeEnum.java b/cl_search_api/src/main/java/com/bfd/mf/common/util/enums/SearchScopeEnum.java
new file mode 100644
index 0000000..c2f90b2
--- /dev/null
+++ b/cl_search_api/src/main/java/com/bfd/mf/common/util/enums/SearchScopeEnum.java
@@ -0,0 +1,218 @@
+package com.bfd.mf.common.util.enums;
+
+import com.bfd.mf.common.util.constants.ESConstant;
+import com.google.common.collect.Maps;
+
+import java.util.*;
+
+/**
+ * 100-全文 200-标题 300-内容 400-关键词 500-同一段落 600-同一句子
+ * 搜索范围
+ *
+ * @author honghao.li
+ */
+public enum SearchScopeEnum {
+
+ /**
+ * 全文
+ */
+ ALL("100", SearchMatchTypeEnum.MATCH) {
+ @Override
+ public Map getFieldMap() {
+ return new HashMap() {{
+ put(ESConstant.TITLE, 1.0F);
+ put(ESConstant.CONTENT, 1.0F);
+ }};
+ }
+ },
+ /**
+ * 标题
+ */
+ TITLE("200", SearchMatchTypeEnum.MATCH) {
+ @Override
+ public Map getFieldMap() {
+ return new HashMap() {{
+ put(DocumentFieldEnum.title.name(), 1.0F);
+ }};
+ }
+ },
+ /**
+ * 内容
+ */
+ CONTENT("300", SearchMatchTypeEnum.MATCH) {
+ @Override
+ public Map getFieldMap() {
+ return new HashMap() {{
+ put(DocumentFieldEnum.content.name(), 1.0F);
+ }};
+ }
+ },
+ /**
+ * 关键词
+ */
+// TAG("400", SearchMatchTypeEnum.NESTED) {
+// @Override
+// public Map getFieldMap() {
+// return new HashMap() {{
+// put(DocumentFieldEnum.ai_tag.name().concat(Constants.SEPARATOR_POINT).concat(LabelWeightFieldEnum.label.name()), 1.0F);
+// }};
+// }
+//
+// @Override
+// public String getPath() {
+// return this.getFieldMap().keySet().stream().findFirst().get().split("\\"+Constants.SEPARATOR_POINT)[0];
+// }
+// },
+ /**
+ * 内容同一段落
+ */
+// PARAGRAPH("500", SearchMatchTypeEnum.PARAGRAPH) {
+// @Override
+// public Map getFieldMap() {
+// return new HashMap() {{
+// put(HtmlFieldEnum.content.name(), 1.0F);
+// }};
+// }
+// },
+ /**
+ * 内容同一句子
+ */
+ SENTENCE("600", SearchMatchTypeEnum.SENTENCE) {
+ @Override
+ public Map getFieldMap() {
+ return new HashMap() {{
+ put(DocumentFieldEnum.content.name(), 1.0F);
+ }};
+ }
+ };
+
+ /**
+ * 码值
+ */
+ private String key;
+ /**
+ * 匹配类型
+ */
+ private SearchMatchTypeEnum matchTypeEnum;
+
+ /**
+ * @param key 码值
+ */
+ SearchScopeEnum(String key, SearchMatchTypeEnum matchTypeEnum) {
+ this.key = key;
+ this.matchTypeEnum = matchTypeEnum;
+ }
+
+ public String getKey() {
+ return key;
+ }
+
+ public String getPath() {
+ return null;
+ }
+
+ public SearchMatchTypeEnum getMatchTypeEnum() {
+ return matchTypeEnum;
+ }
+
+// public Map getFieldMap() {
+// return BaseFieldEnum.getMatchFieldsWithPy();
+// }
+
+ /**
+ * 返回int型枚举
+ *
+ * @return
+ */
+ public int iKey() {
+ return Integer.parseInt(key);
+ }
+
+ /**
+ * 判断是否是当前枚举
+ *
+ * @param key
+ * @return
+ */
+ public boolean is(Integer key) {
+ return Integer.valueOf(getKey()).equals(key);
+ }
+
+ /**
+ * 校验是否支持此类型
+ *
+ * @param key
+ * @return
+ */
+ public static boolean isSupport(String key) {
+ return Arrays.stream(values()).anyMatch(e -> e.getKey().equals(key));
+ }
+
+ /**
+ * 根据key获取对应属性
+ *
+ * @param key
+ * @return
+ */
+ public static Map getFieldsByKey(Integer key) {
+ if (key == null) {
+ return new HashMap<>(0);
+ }
+
+ String strKey = key.toString();
+ Map map = getEnumMap();
+ if (map.containsKey(strKey)) {
+ return map.get(strKey).getFieldMap();
+ }
+ // 默认值
+ return BaseFieldEnum.getMatchFields();
+ }
+
+ /**
+ * 根据key获取对应属性
+ *
+ * @param key
+ * @return
+ */
+ public static SearchScopeEnum getEnumByKey(String key) {
+ for (SearchScopeEnum value : SearchScopeEnum.values()) {
+ if (Objects.equals(key, value.getKey())) {
+ return value;
+ }
+ }
+ return null;
+ }
+
+ /**
+ * 根据key获取对应属性
+ *
+ * @param key
+ * @return
+ */
+ public static SearchMatchTypeEnum getMatchTypeByKey(Integer key) {
+ if (key == null) {
+ return null;
+ }
+ String strKey = key.toString();
+ Map map = getEnumMap();
+ if (!map.containsKey(strKey)) {
+ return null;
+ }
+ return map.get(strKey).getMatchTypeEnum();
+ }
+
+ /**
+ * 获取全部枚举值的map
+ *
+ * @return
+ */
+ public static LinkedHashMap getEnumMap() {
+ LinkedHashMap map = Maps.newLinkedHashMap();
+ for (SearchScopeEnum temp : values()) {
+ map.put(temp.getKey(), temp);
+ }
+ return map;
+ }
+
+ public abstract Map getFieldMap();
+}
diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/util/enums/SearchWordStrategyEnum.java b/cl_search_api/src/main/java/com/bfd/mf/common/util/enums/SearchWordStrategyEnum.java
new file mode 100644
index 0000000..0b52c12
--- /dev/null
+++ b/cl_search_api/src/main/java/com/bfd/mf/common/util/enums/SearchWordStrategyEnum.java
@@ -0,0 +1,71 @@
+package com.bfd.mf.common.util.enums;
+
+import com.google.common.collect.Maps;
+
+import java.util.LinkedHashMap;
+
+/**
+ * 搜索词策略
+ * 包含以下全部字词
+ * 包含以下完整词句
+ * 包含以下任一字词
+ * @author lihonghao
+ */
+public enum SearchWordStrategyEnum {
+
+ /**
+ * 包含以下全部字词
+ */
+ ALL("1"),
+ /**
+ * 包含以下完整词句
+ */
+ WHOLE("2"),
+ /**
+ * 包含以下任一字词
+ */
+ ANY("3");
+
+ SearchWordStrategyEnum(String key) {
+ this.key = key;
+ }
+
+ private String key;
+
+ public String getKey() {
+ return key;
+ }
+
+ /**
+ * 判断是否是当前枚举
+ *
+ * @param key
+ * @return
+ */
+ public boolean is(String key) {
+ return getKey().equals(key);
+ }
+
+ /**
+ * 根据key获取枚举
+ *
+ * @param key 扩展名
+ * @return
+ */
+ public static SearchWordStrategyEnum getByKey(String key) {
+ return SearchWordStrategyEnum.getEnumMap().get(key);
+ }
+
+ /**
+ * 获取全部枚举值的map
+ *
+ * @return
+ */
+ public static LinkedHashMap getEnumMap() {
+ LinkedHashMap map = Maps.newLinkedHashMap();
+ for (SearchWordStrategyEnum temp : values()) {
+ map.put(temp.getKey(), temp);
+ }
+ return map;
+ }
+}
diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/util/es/EsUtils.java b/cl_search_api/src/main/java/com/bfd/mf/common/util/es/EsUtils.java
index 36449b2..f53d8f6 100644
--- a/cl_search_api/src/main/java/com/bfd/mf/common/util/es/EsUtils.java
+++ b/cl_search_api/src/main/java/com/bfd/mf/common/util/es/EsUtils.java
@@ -1,8 +1,12 @@
package com.bfd.mf.common.util.es;
import com.alibaba.fastjson.JSONObject;
+import com.bfd.mf.common.service.es.EsBaseParam;
import com.bfd.mf.common.util.constants.ESConstant;
+import com.bfd.mf.common.util.enums.BaseFieldEnum;
+import com.google.common.base.Strings;
import com.google.common.collect.Maps;
+import org.apache.lucene.search.join.ScoreMode;
import org.elasticsearch.action.admin.indices.alias.IndicesAliasesAction;
import org.elasticsearch.action.admin.indices.alias.IndicesAliasesRequestBuilder;
import org.elasticsearch.action.admin.indices.alias.IndicesAliasesResponse;
@@ -19,11 +23,10 @@ import org.elasticsearch.action.update.UpdateResponse;
import org.elasticsearch.client.Requests;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.transport.TransportAddress;
import org.elasticsearch.common.unit.TimeValue;
-import org.elasticsearch.index.query.BoolQueryBuilder;
-import org.elasticsearch.index.query.QueryBuilder;
-import org.elasticsearch.index.query.QueryBuilders;
+import org.elasticsearch.index.query.*;
import org.elasticsearch.index.reindex.*;
import org.elasticsearch.script.Script;
import org.elasticsearch.search.SearchHit;
@@ -43,10 +46,9 @@ import org.slf4j.LoggerFactory;
import org.springframework.util.Assert;
import java.net.InetAddress;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
+import java.util.*;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
public abstract class EsUtils {
@@ -55,6 +57,7 @@ public abstract class EsUtils {
private static final Map CLIENT_MAP = Maps.newHashMap();
private static final String DOCS = "docs";
+
public static void registerCluster(String clusterName, String[] addresses) {
System.setProperty("es.set.netty.runtime.available.processors", "false");
Assert.hasLength(clusterName, "Param clusterName must not be empty.");
@@ -86,6 +89,7 @@ public abstract class EsUtils {
String sortFlag, String orderFlag,
Integer size, Integer from,
Integer searchType) {
+ System.out.println("非高亮查询");
TransportClient client = getClient(clusterName);
boolean options = true;
boolean optionsf = false;
@@ -94,14 +98,6 @@ public abstract class EsUtils {
if (searchType == 0) {
collapseBuilder = new CollapseBuilder(ESConstant.DOC_ID);
}
- //高亮显示
- HighlightBuilder highlightBuilder = new HighlightBuilder();
- highlightBuilder.field("title");
- highlightBuilder.field("content");
-
- highlightBuilder.requireFieldMatch(false); //多个单词高亮的话,要把这个设置为trues
- highlightBuilder.preTags("");
- highlightBuilder.postTags("");
// 查询
// from + size 的 分页 查询方式
@@ -111,12 +107,12 @@ public abstract class EsUtils {
.setQuery(queryBuilder)
.setCollapse(collapseBuilder)
.setSize(size)
- .setFrom(from)
- .highlighter(highlightBuilder);
+ .setFrom(from);
+
+ System.out.println(requestBuilder);
SearchResponse searchResponse = requestBuilder.execute().actionGet();
- // List dataList = Lists.newLinkedList();
List dataList = new ArrayList<>();
if (searchResponse.getHits().totalHits > 0) {
SearchHit[] hits = searchResponse.getHits().getHits();
@@ -127,19 +123,173 @@ public abstract class EsUtils {
.replace("cl_major_", "")
.replace("cl_subject_", "")
.replace("cl_special_1.0_", ""));
- Map highlight = hits[i].getHighlightFields();
- data.put("highlight",highlight);
dataList.add(data);
}
}
return dataList;
}
+
+ public static List queryWithHighlight(String clusterName, String[] index,
+ final QueryBuilder queryBuilder,
+ String sortFlag, String orderFlag,
+ Integer size, Integer from,
+ Integer searchType) {
+ System.out.println("高亮查询");
+ EsBaseParam esBaseParam = new EsBaseParam();
+ TransportClient client = getClient(clusterName);
+ boolean options = true;
+ boolean optionsf = false;
+ // 现在不同任务的同一条数据不做消重,因此同一个DOCID 的数据会有多条。因此只有查主贴的时候需要用DOCID 消重
+ CollapseBuilder collapseBuilder = new CollapseBuilder(ESConstant.DATA_ID);
+ if (searchType == 0) {
+ collapseBuilder = new CollapseBuilder(ESConstant.DOC_ID);
+ }
+
+ // 高亮自定义设置
+// query.getHighLevelQueries().forEach(e -> {
+// Stream.of(e.getText()).forEach(text -> keywordMap.put(text, 10));
+// if (e.getTranslateText() != null) {
+// Stream.of(e.getTranslateText()).forEach(text -> keywordMap.put(text, 10));
+// }
+// });
+
+
+ esBaseParam.setWithHighlight(true);
+ esBaseParam.setHighlightFields(new ArrayList<>(BaseFieldEnum.getMatchFieldsWithPy().keySet()));
+ Integer numOfFragments = 2;
+ HighlightBuilder highlightBuilder = new HighlightBuilder()
+ // match进行高亮
+ .requireFieldMatch(true)
+ .order(HighlightBuilder.Order.SCORE)
+ //fragment 是指一段连续的文字。返回结果最多可以包含几段不连续的文字。默认是5。
+ .numOfFragments(numOfFragments)
+ //一段 fragment 包含多少个字符。默认100。
+// .fragmentSize(Constants.MAX_R_LENGTH / numOfFragments)
+// .noMatchSize(Constants.MAX_R_LENGTH)
+ .preTags(ESConstant.HIGHLIGHTPRETAGS)
+ .postTags(ESConstant.HIGHLIGHTPOSTTAGS);
+ BaseFieldEnum.getMatchFieldsWithPy().keySet().forEach(highlightBuilder::field);
+
+ /*
+ * 高级搜索取消自定义高亮
+ * 精确搜索进行短语高亮重定义
+ * 否则进行关键词的高亮重定义
+ */
+// if (!query.isHighLevel()) {
+// DisMaxQueryBuilder highlightQuery = QueryBuilders.disMaxQuery();
+// if (query.isAccurateQuery()) {
+// query.getAccurateList().stream().forEach(e -> {
+// highlightQuery.add(this.getMatchQueryBuilder(BaseFieldEnum.getMatchFieldsWithPy(),
+// MultiMatchQueryBuilder.Type.PHRASE_PREFIX, e));
+// });
+// } else {
+// keywordMap.forEach((keyword, boost) -> {
+// BaseFieldEnum.getMatchFieldsWithPy().forEach((field, baseBoost) -> {
+// float realBoost = baseBoost * boost * 100;
+// highlightQuery.add(QueryBuilders.termQuery(field, keyword).boost(realBoost * 2));
+// highlightQuery.add(QueryBuilders.matchPhraseQuery(field, keyword).boost(realBoost));
+// });
+// highlightQuery.add(this.getMatchQueryBuilder(BaseFieldEnum.getMatchFieldsWithPy(), MultiMatchQueryBuilder.Type.BEST_FIELDS, keyword).boost(0.5F));
+//
+// });
+// // 如果有二次搜索,因二次搜索使用短语前缀,此处需要特殊处理
+// if (query.getSk() != null && !"".equals(query.getSk().trim())) {
+// highlightQuery.add(this.getMatchQueryBuilder(BaseFieldEnum.getMatchFieldsWithPy(), MultiMatchQueryBuilder.Type.PHRASE_PREFIX, query.getSk()));
+// }
+// }
+// highlightBuilder.highlightQuery(highlightQuery);
+// }
+ esBaseParam.setHighlightBuilder(highlightBuilder);
+
+// //高亮显示
+// HighlightBuilder highlightBuilder = new HighlightBuilder();
+// //fragment 是指一段连续的文字。返回结果最多可以包含几段不连续的文字。默认是5。
+// highlightBuilder.numOfFragments(0);
+// //一段 fragment 包含多少个字符。默认100。
+// highlightBuilder.fragmentSize(800000);
+//
+// highlightBuilder.field(ESConstant.TITLE);
+// highlightBuilder.field(ESConstant.CONTENT);
+//
+// highlightBuilder.requireFieldMatch(false); //多个单词高亮的话,要把这个设置为trues
+// highlightBuilder.preTags(ESConstant.HIGHLIGHTPRETAGS);
+// highlightBuilder.postTags(ESConstant.HIGHLIGHTPOSTTAGS);
+
+ // 查询
+ // from + size 的 分页 查询方式
+ SearchRequestBuilder requestBuilder = client.prepareSearch().setIndices(index)
+ .setIndicesOptions(IndicesOptions.fromOptions(options, options, options, optionsf))
+ .addSort(sortFlag, orderFlag.equals(ESConstant.ASC) ? SortOrder.ASC : SortOrder.DESC)
+ .setQuery(queryBuilder)
+ .setCollapse(collapseBuilder)
+ .setSize(size)
+ .setFrom(from)
+ .highlighter(esBaseParam.getHighlightBuilder());
+
+ System.out.println(requestBuilder);
+ System.out.println("-----");
+
+ SearchResponse searchResponse = requestBuilder.execute().actionGet();
+ // List dataList = Lists.newLinkedList();
+
+ List dataList = new ArrayList<>();
+ if (searchResponse.getHits().totalHits > 0) {
+ for (SearchHit hit : searchResponse.getHits().getHits()) {
+ JSONObject data = new JSONObject();
+ data.putAll(hit.getSourceAsMap());
+ String fieldName[] = {ESConstant.CONTENT, ESConstant.TITLE};
+ for (int i = 0; i < fieldName.length; i++) {
+ getHighlightResult(fieldName[i], hit, data);
+ }
+ dataList.add(data);
+ }
+ }
+ return dataList;
+ }
+
+ private static void getHighlightResult(String fieldName, SearchHit hit, JSONObject data) {
+ if (hit.getHighlightFields().containsKey(fieldName)) {
+ HighlightField highlightField = hit.getHighlightFields().get(fieldName);
+ Text[] fragments = highlightField.fragments();
+ String fragmentString = "";
+ for (Text fragment : fragments) {
+ fragmentString += fragment;
+ }
+ data.put(fieldName, fragmentString);
+ }
+
+// for (SearchHit searchHit : searchHits) {
+// esDTO = new EsDTO(searchHit.getId(), searchHit.sourceAsMap());
+// esDTO.setIndex(searchHit.getIndex());
+// esDTO.setType(searchHit.getType());
+//
+// // 返回文档的高亮字段
+// if (params.isWithHighlight()) {
+// Map highlightFields = searchHit.getHighlightFields();
+// if (highlightFields != null) {
+// Map map = new HashMap<>(highlightFields.size());
+// highlightFields.forEach((k, v) -> {
+// if (v != null && v.getFragments() != null) {
+// map.put(k.replace(ES_KEYWORD_SUFFIX, ""),
+// Arrays.asList(v.getFragments()).stream().filter(e -> e != null).map(Text::toString).collect(Collectors.joining(Constants.SEPARATOR_ELLIPSIS)));
+// }
+// });
+// esDTO.setHighlightData(map);
+// }
+// }
+// pageResp.getList().add(esDTO);
+// }
+
+
+ }
+
+
public static List queryNew(String clusterName, String[] index,
- final QueryBuilder queryBuilder,
- String sortFlag, String orderFlag,
- Integer size, Integer from,
- Integer searchType) {
+ final QueryBuilder queryBuilder,
+ String sortFlag, String orderFlag,
+ Integer size, Integer from,
+ Integer searchType) {
TransportClient client = getClient(clusterName);
boolean options = true;
boolean optionsf = false;
@@ -178,6 +328,7 @@ public abstract class EsUtils {
/**
* 查询数据量 count
+ *
* @param clusterName
* @param index
* @param queryBuilder
@@ -235,18 +386,18 @@ public abstract class EsUtils {
}
public static Long queryTotalCountNew(String clusterName, String[] index,
- QueryBuilder queryBuilder,
- Integer searchType) {
+ QueryBuilder queryBuilder,
+ Integer searchType) {
TransportClient client = getClient(clusterName);
boolean options = true;
boolean optionsf = false;
// 现在不同任务的同一条数据不做消重,因此同一个DOCID 的数据会有多条。因此只有查主贴的时候需要用DOCID 消重
String count = "count";
- AggregationBuilder aggregation ;
- if(searchType == 0) {
+ AggregationBuilder aggregation;
+ if (searchType == 0) {
aggregation = AggregationBuilders.cardinality(count).field(ESConstant.DOC_ID);
- }else{
+ } else {
aggregation = AggregationBuilders.cardinality(count).field(ESConstant.DATA_ID);
}
//searchSourceBuilder.aggregation(aggregation);
@@ -256,13 +407,13 @@ public abstract class EsUtils {
.setQuery(queryBuilder)
.addAggregation(aggregation);
- // System.out.println(requestBuilder);
+ // System.out.println(requestBuilder);
Aggregations aggregations = requestBuilder.get().getAggregations();
Cardinality cardinality = aggregations.get(count);
// System.out.println("1111 : " + cardinality.getValue());
// System.out.println("2222 : " + requestBuilder.get().getHits().totalHits);
long resultCount = cardinality.getValue();
- if(searchType == 2){
+ if (searchType == 2) {
resultCount = requestBuilder.get().getHits().totalHits;
}
return resultCount;
@@ -439,18 +590,21 @@ public abstract class EsUtils {
}
}
- public static long reIndex(String clusterName, String indexList, String newIndex) {
+ public static long reIndex(String clusterName, String originalIndex, String currentIndex) {
+ // String clusterName, String originalIndex, String currentIndex,
try {
TransportClient client = getClient(clusterName);
- System.out.println(indexList + " *** " + newIndex);
+ System.out.println(originalIndex + " to : " + currentIndex);
ReindexRequestBuilder builder = ReindexAction.INSTANCE
.newRequestBuilder(client)
- .source(indexList)
- .destination(newIndex);
- String newAliex = newIndex.replace("cl_special_1.0_", "cl_major_");
+ .source(originalIndex)
+ .destination(currentIndex);
+ String newAliex = currentIndex.replace("cl_special_1.0_", "cl_major_");
BulkByScrollResponse response = builder.get();
- IndicesAliasesRequestBuilder indicesBuilder = IndicesAliasesAction.INSTANCE.newRequestBuilder(client).addAlias(newIndex, newAliex);
+ IndicesAliasesRequestBuilder indicesBuilder = IndicesAliasesAction.INSTANCE.newRequestBuilder(client)
+ .addAlias(currentIndex, newAliex);
+
IndicesAliasesResponse IndicesResponse = indicesBuilder.get();
System.out.println("******* : " + response);
System.out.println("##### : " + IndicesResponse);
@@ -503,4 +657,104 @@ public abstract class EsUtils {
e.printStackTrace();
}
}
+
+ public static long reIndexByTasks(String clusterName,
+ String originalIndex,
+ String currentIndex,
+ List tasks,
+ QueryBuilder queryBuilder) {
+ try {
+ TransportClient client = getClient(clusterName);
+
+ System.out.println(originalIndex + " *** " + currentIndex);
+ ReindexRequestBuilder builder = ReindexAction.INSTANCE
+ .newRequestBuilder(client)
+ .source(originalIndex)// 来源索引
+ .destination(currentIndex) // 目标索引
+ .filter(queryBuilder)
+ .refresh(true);
+ // builder.
+ BulkByScrollResponse response = builder.get();
+ // 添加别名,将cl_special_1.0_ 替换成 cl_major 别名
+ String newAliex = currentIndex.replace("cl_special_1.0_", "cl_major_");
+ IndicesAliasesRequestBuilder indicesBuilder = IndicesAliasesAction.INSTANCE
+ .newRequestBuilder(client)
+ .addAlias(currentIndex, newAliex);
+ IndicesAliasesResponse IndicesResponse = indicesBuilder.get();
+ System.out.println("******* : " + response);
+ System.out.println("##### : " + IndicesResponse);
+ return response.getCreated();
+ } catch (Exception e) {
+ e.printStackTrace();
+ return 0;
+ }
+ }
+
+ public static void delIndexByTasks(String clusterName, String indexName, String cid, List tasks) {
+ try {
+ TransportClient client = getClient(clusterName);
+ System.out.println("**** " + indexName);
+ cid = cid.toLowerCase();
+ DeleteByQueryRequestBuilder builder = DeleteByQueryAction.INSTANCE.newRequestBuilder(client)
+ .filter(QueryBuilders.termQuery(ESConstant.EN_SOURCE, cid))
+ .filter(QueryBuilders.termQuery(ESConstant.TASK_ID, tasks.get(0)))
+ .source(indexName);
+ BulkByScrollResponse response2 = builder.get();
+ long deleted = response2.getDeleted();
+ System.out.println(deleted);
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+
+ /**
+ * 全文检索查询拼接(非nested属性重载方法)
+ *
+ * @param fieldMap 查询字段
+ * @param type 查询类型
+ * @param text 文本
+ * @return
+ */
+ private QueryBuilder getMatchQueryBuilder(Map fieldMap, MultiMatchQueryBuilder.Type type, String text) {
+ return this.getMatchQueryBuilder(null, fieldMap, type, text);
+ }
+
+ /**
+ * 全文检索查询拼接,不支持nested属性与非nested属性混合使用,并且nested属性必须归属相同path
+ *
+ * @param fieldMap 查询字段
+ * @param type 查询类型
+ * @param text 文本
+ * @return
+ */
+ private QueryBuilder getMatchQueryBuilder(String nestedPath, Map fieldMap, MultiMatchQueryBuilder.Type type, String text) {
+ // 拼装搜索
+ QueryBuilder queryBuilder = QueryBuilders.multiMatchQuery(text)
+ .fields(fieldMap)
+ .type(type == null ? MultiMatchQueryBuilder.Type.BEST_FIELDS : type)
+ .maxExpansions(5)
+ .tieBreaker(0.3f)
+ /// 关闭高频词处理
+ //.cutoffFrequency(0.01f)
+ .lenient(Boolean.TRUE)
+ .minimumShouldMatch("60%");
+
+ // 如果有path拼接nested并返回
+ if (!Strings.isNullOrEmpty(nestedPath)) {
+ return this.nestedQuery(nestedPath, queryBuilder);
+ }
+ return queryBuilder;
+ }
+
+
+ /**
+ * 拼装nested条件
+ *
+ * @param nested
+ * @param queryBuilder
+ * @return
+ */
+ public QueryBuilder nestedQuery(String nested, QueryBuilder queryBuilder) {
+ return QueryBuilders.nestedQuery(nested, queryBuilder, ScoreMode.None);
+ }
}
diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/util/slice/SliceScrollUtil.java b/cl_search_api/src/main/java/com/bfd/mf/common/util/slice/SliceScrollUtil.java
index 6f2f65d..5635250 100644
--- a/cl_search_api/src/main/java/com/bfd/mf/common/util/slice/SliceScrollUtil.java
+++ b/cl_search_api/src/main/java/com/bfd/mf/common/util/slice/SliceScrollUtil.java
@@ -123,19 +123,14 @@ public class SliceScrollUtil {
logger.info("[SliceScrollUtil] fetchResultSubjectCache ... 统计+词云 数据查询");
List jsonObjectList = new ArrayList<>();
long calculateStartTime = System.currentTimeMillis();
- Cluster cluster = null;
+ // Cluster cluster = null;
List currentIndexList = null;
- if (null != queryRequest.getSubjectId() && !("all").equals(queryRequest.getSubjectId())) {
- logger.info("查询 【专题数据】 subjectId :{}", queryRequest.getSubjectId());
- //cluster = clusterService.findClusterByType(Cluster.CLUSTER_TYPE.special_cluster_type);
+ if(null != queryRequest.getSubjectId()){
currentIndexList = subjectQueryDataService.getIndexBySubjectIds(queryRequest.getSubjectId());
-// }else{
-// logger.info("[SliceScrollUtil] fetchResultSubjectCache : 查询 【全局数据】");
-// cluster = clusterService.findClusterByType(Cluster.CLUSTER_TYPE.special_cluster_type);
-// currentIndexList = subjectQueryDataService.getIndexListByTimeRange(queryRequest.getStartTime(),queryRequest.getEndTime());
+ }else{
+ return jsonObjectList;
}
- //Long clusterId = cluster.getId();
Long clusterId = 4L;
logger.info("[SliceScrollUtil] dataAnalysisCloud : queryDataList clusterId :{}; currentIndexList :{}", clusterId, currentIndexList.toString());
logger.info("==========进入数据分析Es and Cache,计算开始执行============");
diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/util/utility/CollectionUtils.java b/cl_search_api/src/main/java/com/bfd/mf/common/util/utility/CollectionUtils.java
index eb93f7f..30bef9e 100644
--- a/cl_search_api/src/main/java/com/bfd/mf/common/util/utility/CollectionUtils.java
+++ b/cl_search_api/src/main/java/com/bfd/mf/common/util/utility/CollectionUtils.java
@@ -63,10 +63,14 @@ public class CollectionUtils {
int index = 0;
for (Iterator it = list.iterator(); it.hasNext();) {
Map.Entry entry = (Map.Entry) it.next();
- JSONObject json= new JSONObject();
- json.put("key",entry.getKey());
- json.put("value",entry.getValue());
- listResult.add(json);
+ if(!entry.getKey().equals("")) {
+ if(entry.getKey().toString().length() > 1){
+ JSONObject json= new JSONObject();
+ json.put("key", entry.getKey());
+ json.put("value", entry.getValue());
+ listResult.add(json);
+ }
+ }
index++;
if(index == topSize){
break;
diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/web/repository/mysql/topic/TaskRepository.java b/cl_search_api/src/main/java/com/bfd/mf/common/web/repository/mysql/topic/TaskRepository.java
index 8435faf..257d1ca 100644
--- a/cl_search_api/src/main/java/com/bfd/mf/common/web/repository/mysql/topic/TaskRepository.java
+++ b/cl_search_api/src/main/java/com/bfd/mf/common/web/repository/mysql/topic/TaskRepository.java
@@ -36,6 +36,28 @@ public interface TaskRepository extends CrudRepository {
void updateTaskCount(Long id, Long totalCount, Long todayCount);
+ /**
+ * 修改数据库中拖拽任务的状态
+ * 有任务移动的时候:
+ * 移动中:任务的状态为6
+ * 专题的状态为1
+ *
+ * 移动完成:任务的状态为3
+ * 专题的状态为0
+
+ */
+ @Modifying
+ @Transactional(rollbackFor = Exception.class)
+ @Query(value = "update cl_task set crawl_status =?2 where id =?1 ", nativeQuery = true)
+ void updateTaskStatus(Long id, int crawlStatus);
+
+ @Modifying
+ @Transactional(rollbackFor = Exception.class)
+ @Query(value = "update cl_subject set status =?2 where id =?1 ", nativeQuery = true)
+ void updateSubjectStatus(Long id, int status);
+
+
+
//
// /**
diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/web/vo/params/QueryRequest.java b/cl_search_api/src/main/java/com/bfd/mf/common/web/vo/params/QueryRequest.java
index ac25ae2..35e5bd1 100644
--- a/cl_search_api/src/main/java/com/bfd/mf/common/web/vo/params/QueryRequest.java
+++ b/cl_search_api/src/main/java/com/bfd/mf/common/web/vo/params/QueryRequest.java
@@ -17,12 +17,15 @@
package com.bfd.mf.common.web.vo.params;
+import com.bfd.mf.common.service.es.HighLevelQuery;
import io.swagger.annotations.ApiModel;
import io.swagger.annotations.ApiModelProperty;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
@ApiModel(value = "查询参数对象")
public class QueryRequest implements Serializable {
@@ -32,8 +35,9 @@ public class QueryRequest implements Serializable {
// private Long subjectId;
@ApiModelProperty(value = "subjectId" , required = true , notes = "专题ID")
private String subjectId;
- @ApiModelProperty(value = "taskId" , required = true , notes = "任务ID")
- private String taskId;
+// @ApiModelProperty(value = "taskId" , required = true , notes = "任务ID")
+// private String taskId;
+
@ApiModelProperty(value = "userId",required = true)
private Long userId;
// 翻页字段
@@ -87,6 +91,91 @@ public class QueryRequest implements Serializable {
private String valueLabel;
private String categoryLabel;
+ private List tasks;
+ private String originalIndex;
+ private String currentIndex;
+
+ private List moveTasks;
+ private List delTasks;
+ private List taskIds;
+
+ private String pageType ;
+ private String userType;
+
+ public String getUserType() {
+ return userType;
+ }
+
+ public void setUserType(String userType) {
+ this.userType = userType;
+ }
+
+ public String getPageType() {
+ return pageType;
+ }
+
+ public void setPageType(String pageType) {
+ this.pageType = pageType;
+ }
+
+ public List getMoveTasks() {
+ return moveTasks;
+ }
+
+ public void setMoveTasks(List moveTasks) {
+ this.moveTasks = moveTasks;
+ }
+
+ public List getDelTasks() {
+ return delTasks;
+ }
+
+ public void setDelTasks(List delTasks) {
+ this.delTasks = delTasks;
+ }
+
+ public List getTaskIds() {
+ return taskIds;
+ }
+
+ public void setTaskIds(List taskIds) {
+ this.taskIds = taskIds;
+ }
+
+ private List highLevelQueries;
+ public List getHighLevelQueries() {
+ return highLevelQueries;
+ }
+
+ public void setHighLevelQueries(List highLevelQueries) {
+ this.highLevelQueries = highLevelQueries == null ? null : highLevelQueries.stream().filter(HighLevelQuery::isAvailable).collect(Collectors.toList());
+ }
+
+
+
+ public String getOriginalIndex() {
+ return originalIndex;
+ }
+
+ public void setOriginalIndex(String originalIndex) {
+ this.originalIndex = originalIndex;
+ }
+
+ public String getCurrentIndex() {
+ return currentIndex;
+ }
+
+ public void setCurrentIndex(String currentIndex) {
+ this.currentIndex = currentIndex;
+ }
+
+ public List getTasks() {
+ return tasks;
+ }
+
+ public void setTasks(List tasks) {
+ this.tasks = tasks;
+ }
public String getValueLabel() {
return valueLabel;
@@ -120,13 +209,13 @@ public class QueryRequest implements Serializable {
this.subjectId = subjectId;
}
- public String getTaskId() {
- return taskId;
- }
-
- public void setTaskId(String taskId) {
- this.taskId = taskId;
- }
+// public String getTaskId() {
+// return taskId;
+// }
+//
+// public void setTaskId(String taskId) {
+// this.taskId = taskId;
+// }
public String getScrollId() {
return scrollId;
@@ -381,7 +470,7 @@ public class QueryRequest implements Serializable {
public String toString() {
return "QueryRequest{" +
"subjectId='" + subjectId + '\'' +
- ", taskId='" + taskId + '\'' +
+// ", taskId='" + taskId + '\'' +
", userId=" + userId +
", page=" + page +
", limit=" + limit +
diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/web/vo/view/monitor/ESMonitorBaseEntity.java b/cl_search_api/src/main/java/com/bfd/mf/common/web/vo/view/monitor/ESMonitorBaseEntity.java
index 79c0cd0..11c3004 100644
--- a/cl_search_api/src/main/java/com/bfd/mf/common/web/vo/view/monitor/ESMonitorBaseEntity.java
+++ b/cl_search_api/src/main/java/com/bfd/mf/common/web/vo/view/monitor/ESMonitorBaseEntity.java
@@ -147,6 +147,34 @@ public class ESMonitorBaseEntity implements Comparable, Ser
private int hasVideo;
private int hasFile;
+ private String attr;
+ private String attachTag;
+ private String brand;
+
+ public String getBrand() {
+ return brand;
+ }
+
+ public void setBrand(String brand) {
+ this.brand = brand;
+ }
+
+ public String getAttachTag() {
+ return attachTag;
+ }
+
+ public void setAttachTag(String attachTag) {
+ this.attachTag = attachTag;
+ }
+
+ public String getAttr() {
+ return attr;
+ }
+
+ public void setAttr(String attr) {
+ this.attr = attr;
+ }
+
public int getViewCnt() {
return viewCnt;
}
diff --git a/cl_search_api/src/main/java/com/bfd/mf/controller/SearchDataController.java b/cl_search_api/src/main/java/com/bfd/mf/controller/SearchDataController.java
index b566c11..6e33cd8 100644
--- a/cl_search_api/src/main/java/com/bfd/mf/controller/SearchDataController.java
+++ b/cl_search_api/src/main/java/com/bfd/mf/controller/SearchDataController.java
@@ -21,8 +21,6 @@ import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestMethod;
import org.springframework.web.bind.annotation.ResponseBody;
-import java.util.ArrayList;
-
/**
* @author dujing
*/
@@ -128,6 +126,11 @@ public class SearchDataController {
/**
* 查询一条数据对应的 评论列表
+ * pageType = socialComment:评论
+ * ===
+ * pageType = socialFollow:转发和点赞
+ * userType = 1:点赞用户
+ * userType = 0:分享用户
*/
@ResponseBody
@ApiOperation(value = "查询评论列表")
@@ -135,7 +138,48 @@ public class SearchDataController {
public JSONObject getCommentsByDocId(@RequestBody QueryRequest queryRequest) {
logger.info("[getCommentsByDocId] partial / Params: {}", JSONObject.toJSONString(queryRequest));
try {
- JSONObject result = searchDataService.queryComments(queryRequest);
+ JSONObject result = searchDataService.queryCommentsNew(queryRequest);
+ return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result);
+ } catch (Exception e) {
+ logger.error("[getCommentsByDocId] Failed,The error message is :{}", e);
+ return ResponseWrapper.buildResponse(RTCodeEnum.C_SERVICE_NOT_AVAILABLE, "Query failed");
+ }
+
+ }
+
+ /**
+ * 2023-04-26 需要将主贴中的评论,转发,点在拆分出来
+ * https://caiji.percent.cn/api/sq/crawl/getCommentsByDocId
+ * https://caiji.percent.cn/api/sq/crawl/getQuotesByDocId
+ * https://caiji.percent.cn/api/sq/crawl/getAttitudesByDocId
+ * @param queryRequest
+ * @return
+ */
+ @ResponseBody
+ @ApiOperation(value = "查询转发列表")
+ @RequestMapping(value = "/getQuotesByDocId", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8)
+ public JSONObject getQuotesByDocId(@RequestBody QueryRequest queryRequest) {
+ logger.info("[getQuotesByDocId] partial / Params: {}", JSONObject.toJSONString(queryRequest));
+ try {
+ queryRequest.setPageType(ESConstant.SOCIAL_FOLLOW);
+ queryRequest.setUserType("0");
+ JSONObject result = searchDataService.queryCommentsNew(queryRequest);
+ return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result);
+ } catch (Exception e) {
+ logger.error("[getCommentsByDocId] Failed,The error message is :{}", e);
+ return ResponseWrapper.buildResponse(RTCodeEnum.C_SERVICE_NOT_AVAILABLE, "Query failed");
+ }
+
+ }
+ @ResponseBody
+ @ApiOperation(value = "查询点赞列表")
+ @RequestMapping(value = "/getAttitudesByDocId", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8)
+ public JSONObject getAttitudesByDocId(@RequestBody QueryRequest queryRequest) {
+ logger.info("[getAttitudesByDocId] partial / Params: {}", JSONObject.toJSONString(queryRequest));
+ try {
+ queryRequest.setPageType(ESConstant.SOCIAL_FOLLOW);
+ queryRequest.setUserType("1");
+ JSONObject result = searchDataService.queryCommentsNew(queryRequest);
return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result);
} catch (Exception e) {
logger.error("[getCommentsByDocId] Failed,The error message is :{}", e);
@@ -188,26 +232,10 @@ public class SearchDataController {
logger.error("[updateLabel] Failed,The error message is :{}", e);
return ResponseWrapper.buildResponse(RTCodeEnum.C_SERVICE_NOT_AVAILABLE, "Query failed");
}
-
}
- /**
- * 示例文件夹的专题复制
- */
- @ApiOperation(value = "复制专题")
- @RequestMapping(value = "/reindex/reindexSubject", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8)
- @ResponseBody
- public JSONObject reIndex(@RequestBody QueryRequest queryRequest){
- logger.info("[reIndex] partial / Params: {}", JSONObject.toJSONString(queryRequest));
- try {
- searchDataService.reIndexSubject(queryRequest);
- } catch (Exception e) {
- logger.error("[reIndex] Failed,The error message is :{}", e);
- return ResponseWrapper.buildResponse(RTCodeEnum.C_SERVICE_NOT_AVAILABLE, "Query failed");
- }
- return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, new JSONObject());
- }
+
/**
* 删除专题的接口
@@ -296,4 +324,73 @@ public class SearchDataController {
}
+ /**
+ * 示例文件夹的专题复制
+ */
+ @ApiOperation(value = "复制专题")
+ @RequestMapping(value = "/reindex/reindexSubject", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8)
+ @ResponseBody
+ public JSONObject reIndex(@RequestBody QueryRequest queryRequest){
+ logger.info("[reIndex] partial / Params: {}", JSONObject.toJSONString(queryRequest));
+ try {
+ searchDataService.reIndexSubject(queryRequest);
+ } catch (Exception e) {
+ logger.error("[reIndex] Failed,The error message is :{}", e);
+ return ResponseWrapper.buildResponse(RTCodeEnum.C_SERVICE_NOT_AVAILABLE, "Query failed");
+ }
+ return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, new JSONObject());
+ }
+ /**
+ * 2023-04-14 采集平台2.0新增接口
+ * 移动任务的数据,并将原索引中的数据删除
+ * POST
+ * crawl/subject/moveByTasks
+ * 参数
+ * {"originalIndex":"302088","moveTasks":["13889"],"currentIndex":"309980"}
+ * @param queryRequest
+ * @return
+ */
+ @ApiOperation(value = "查询数据列表")
+ @RequestMapping(value = "/subject/moveByTasks", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8)
+ @ResponseBody
+ public JSONObject reindexDataByTasks(@RequestBody QueryRequest queryRequest) {
+ logger.info("[queryDataList] partial / Params: {}", JSONObject.toJSONString(queryRequest));
+ try {
+ JSONObject result = new JSONObject();
+ long start = System.currentTimeMillis();
+ // 任务数据移动,这个需要离线移动
+ result = searchDataService.reindexByTasks(queryRequest);
+ long end = System.currentTimeMillis();
+ logger.info("接口查询时长:statr:"+ start +" ; end:"+end + " ; time = " + (end - start));
+ return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result);
+ } catch (Exception e) {
+ logger.error("[queryData] Failed,The error message is :{}", e);
+ return ResponseWrapper.buildResponse(RTCodeEnum.C_SERVICE_NOT_AVAILABLE, "Query failed");
+ }
+
+ }
+
+
+ /**
+ * 任务数据删除
+ * POST
+ * crawl/subject/deleteByTasks
+ * 参数
+ * {"index":"302088","delTasks":["13889"]}
+ * @param queryRequest
+ * @return
+ */
+ @ApiOperation(value = "查询数据列表")
+ @RequestMapping(value = "/subject/deleteByTasks", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8)
+ @ResponseBody
+ public JSONObject delDataByTasks(@RequestBody QueryRequest queryRequest) {
+ logger.info("[delDataByTasks] partial / Params: {}", JSONObject.toJSONString(queryRequest));
+ try {
+ JSONObject result = searchDataService.deleteByTasks(queryRequest);
+ return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result);
+ } catch (Exception e) {
+ logger.error("[deleteSubject] Failed,The error message is :{}", e);
+ return ResponseWrapper.buildResponse(RTCodeEnum.C_SERVICE_NOT_AVAILABLE, "Query failed");
+ }
+ }
}
diff --git a/cl_search_api/src/main/java/com/bfd/mf/service/SearchAnalysisService.java b/cl_search_api/src/main/java/com/bfd/mf/service/SearchAnalysisService.java
index 27d24ce..bca2c05 100644
--- a/cl_search_api/src/main/java/com/bfd/mf/service/SearchAnalysisService.java
+++ b/cl_search_api/src/main/java/com/bfd/mf/service/SearchAnalysisService.java
@@ -40,14 +40,16 @@ public class SearchAnalysisService {
public JSONObject getAnalysisResponse(QueryRequest queryRequest) {
JSONObject jsonObject = new JSONObject();
try{
- List esMonitorEntity = sliceScrollUtil.fetchResultSubjectCache(queryRequest, ESConstant.FIELD_LIST_ANALYSIS);
- // 渠道走势
- jsonObject = dataAnalysisTrendByDayQueryTimes(queryRequest,esMonitorEntity);
- // 获取 渠道统计结果 分类标签统计结果 价值标签统计结果
- jsonObject = dataAnalysisChannelCounts(jsonObject,esMonitorEntity);
- // 获取词云
- JSONObject cloudCounts = searchKeywordsCouldService.dataAnalysisCloud(esMonitorEntity);
- jsonObject.put("cloudCounts",cloudCounts);
+ if(null != queryRequest.getSubjectId()) {
+ List esMonitorEntity = sliceScrollUtil.fetchResultSubjectCache(queryRequest, ESConstant.FIELD_LIST_ANALYSIS);
+ // 渠道走势
+ jsonObject = dataAnalysisTrendByDayQueryTimes(queryRequest, esMonitorEntity);
+ // 获取 渠道统计结果 分类标签统计结果 价值标签统计结果
+ jsonObject = dataAnalysisChannelCounts(jsonObject, esMonitorEntity);
+ // 获取词云
+ JSONObject cloudCounts = searchKeywordsCouldService.dataAnalysisCloud(esMonitorEntity);
+ jsonObject.put("cloudCounts", cloudCounts);
+ }
}catch (Exception e){
e.printStackTrace();
}
diff --git a/cl_search_api/src/main/java/com/bfd/mf/service/SearchAuthorService.java b/cl_search_api/src/main/java/com/bfd/mf/service/SearchAuthorService.java
index 9bc7b67..e36f4c5 100644
--- a/cl_search_api/src/main/java/com/bfd/mf/service/SearchAuthorService.java
+++ b/cl_search_api/src/main/java/com/bfd/mf/service/SearchAuthorService.java
@@ -146,6 +146,7 @@ public class SearchAuthorService extends CrudService());
@@ -154,22 +155,51 @@ public class SearchAuthorService extends CrudService dataList, List esMonitorListEntity,String indexName) throws Exception {
- List