|
|
@ -26,9 +26,13 @@ import org.elasticsearch.common.settings.Settings; |
|
|
|
import org.elasticsearch.common.text.Text; |
|
|
|
import org.elasticsearch.common.transport.TransportAddress; |
|
|
|
import org.elasticsearch.common.unit.TimeValue; |
|
|
|
import org.elasticsearch.index.query.*; |
|
|
|
import org.elasticsearch.index.query.BoolQueryBuilder; |
|
|
|
import org.elasticsearch.index.query.MultiMatchQueryBuilder; |
|
|
|
import org.elasticsearch.index.query.QueryBuilder; |
|
|
|
import org.elasticsearch.index.query.QueryBuilders; |
|
|
|
import org.elasticsearch.index.reindex.*; |
|
|
|
import org.elasticsearch.script.Script; |
|
|
|
import org.elasticsearch.script.ScriptType; |
|
|
|
import org.elasticsearch.search.SearchHit; |
|
|
|
import org.elasticsearch.search.SearchHits; |
|
|
|
import org.elasticsearch.search.aggregations.AggregationBuilder; |
|
|
@ -47,8 +51,6 @@ import org.springframework.util.Assert; |
|
|
|
|
|
|
|
import java.net.InetAddress; |
|
|
|
import java.util.*; |
|
|
|
import java.util.stream.Collectors; |
|
|
|
import java.util.stream.Stream; |
|
|
|
|
|
|
|
|
|
|
|
public abstract class EsUtils { |
|
|
@ -84,20 +86,23 @@ public abstract class EsUtils { |
|
|
|
return CLIENT_MAP.get(clusterName); |
|
|
|
} |
|
|
|
|
|
|
|
public static List<JSONObject> query(String clusterName, String[] index, |
|
|
|
final QueryBuilder queryBuilder, |
|
|
|
String sortFlag, String orderFlag, |
|
|
|
Integer size, Integer from, |
|
|
|
Integer searchType) { |
|
|
|
public static List<JSONObject> query0530(String clusterName, String[] index, |
|
|
|
final QueryBuilder queryBuilder, |
|
|
|
String sortFlag, String orderFlag, |
|
|
|
Integer size, Integer from, |
|
|
|
Integer searchType) { |
|
|
|
System.out.println("非高亮查询"); |
|
|
|
TransportClient client = getClient(clusterName); |
|
|
|
boolean options = true; |
|
|
|
boolean optionsf = false; |
|
|
|
// 现在不同任务的同一条数据不做消重,因此同一个DOCID 的数据会有多条。因此只有查主贴的时候需要用DOCID 消重 |
|
|
|
CollapseBuilder collapseBuilder = new CollapseBuilder(ESConstant.DATA_ID); |
|
|
|
if (searchType == 0) { |
|
|
|
collapseBuilder = new CollapseBuilder(ESConstant.DOC_ID); |
|
|
|
} |
|
|
|
// CollapseBuilder collapseBuilder = new CollapseBuilder(ESConstant.DATA_ID); |
|
|
|
// CollapseBuilder collapseBuilder = null; |
|
|
|
// if (searchType == 0) { |
|
|
|
// collapseBuilder = new CollapseBuilder(ESConstant.DOC_ID); |
|
|
|
// } |
|
|
|
|
|
|
|
// Object[] objects= new Object[]{"9999"}; |
|
|
|
|
|
|
|
// 查询 |
|
|
|
// from + size 的 分页 查询方式 |
|
|
@ -105,9 +110,10 @@ public abstract class EsUtils { |
|
|
|
.setIndicesOptions(IndicesOptions.fromOptions(options, options, options, optionsf)) |
|
|
|
.addSort(sortFlag, orderFlag.equals(ESConstant.ASC) ? SortOrder.ASC : SortOrder.DESC) |
|
|
|
.setQuery(queryBuilder) |
|
|
|
.setCollapse(collapseBuilder) |
|
|
|
// .searchAfter(objects) |
|
|
|
//.setCollapse(collapseBuilder) |
|
|
|
.setSize(size) |
|
|
|
.setFrom(from); |
|
|
|
.setFrom(from); // 用search_after 的话,这个 from 得 == 0 |
|
|
|
|
|
|
|
System.out.println(requestBuilder); |
|
|
|
|
|
|
@ -130,11 +136,11 @@ public abstract class EsUtils { |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
public static List<JSONObject> queryWithHighlight(String clusterName, String[] index, |
|
|
|
final QueryBuilder queryBuilder, |
|
|
|
String sortFlag, String orderFlag, |
|
|
|
Integer size, Integer from, |
|
|
|
Integer searchType) { |
|
|
|
public static List<JSONObject> queryWithHighlight0530(String clusterName, String[] index, |
|
|
|
final QueryBuilder queryBuilder, |
|
|
|
String sortFlag, String orderFlag, |
|
|
|
Integer size, Integer from, |
|
|
|
Integer searchType) { |
|
|
|
System.out.println("高亮查询"); |
|
|
|
EsBaseParam esBaseParam = new EsBaseParam(); |
|
|
|
TransportClient client = getClient(clusterName); |
|
|
@ -235,22 +241,53 @@ public abstract class EsUtils { |
|
|
|
|
|
|
|
List<JSONObject> dataList = new ArrayList<>(); |
|
|
|
if (searchResponse.getHits().totalHits > 0) { |
|
|
|
for (SearchHit hit : searchResponse.getHits().getHits()) { |
|
|
|
SearchHit[] hits = searchResponse.getHits().getHits(); |
|
|
|
for (int i = 0; i < hits.length; i++) { |
|
|
|
JSONObject data = new JSONObject(); |
|
|
|
data.putAll(hit.getSourceAsMap()); |
|
|
|
String fieldName[] = {ESConstant.CONTENT, ESConstant.TITLE}; |
|
|
|
for (int i = 0; i < fieldName.length; i++) { |
|
|
|
getHighlightResult(fieldName[i], hit, data); |
|
|
|
data.putAll(hits[i].getSourceAsMap()); |
|
|
|
String fieldName[] = {ESConstant.CONTENT, ESConstant.TITLE, ESConstant.OCRTEXT, ESConstant.ASRTEXT}; |
|
|
|
for (int j = 0; j < fieldName.length; j++) { |
|
|
|
getHighlightResult(fieldName[j], hits[i], data); |
|
|
|
} |
|
|
|
data.put("subjectId", hits[i].getIndex() |
|
|
|
.replace("cl_major_", "") |
|
|
|
.replace("cl_subject_", "") |
|
|
|
.replace("cl_special_1.0_", "")); |
|
|
|
dataList.add(data); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
// if (searchResponse.getHits().totalHits > 0) { |
|
|
|
// for (SearchHit hit : searchResponse.getHits().getHits()) { |
|
|
|
// JSONObject data = new JSONObject(); |
|
|
|
// data.putAll(hits[i].getSourceAsMap()); |
|
|
|
// data.put("subjectId", hits[i].getIndex() |
|
|
|
// .replace("cl_major_", "") |
|
|
|
// .replace("cl_subject_", "") |
|
|
|
// .replace("cl_special_1.0_", "")); |
|
|
|
// dataList.add(data); |
|
|
|
// |
|
|
|
// JSONObject data = new JSONObject(); |
|
|
|
// data.putAll(hit.getSourceAsMap()); |
|
|
|
// String fieldName[] = {ESConstant.CONTENT, ESConstant.TITLE}; |
|
|
|
// for (int i = 0; i < fieldName.length; i++) { |
|
|
|
// getHighlightResult(fieldName[i], hit, data); |
|
|
|
// } |
|
|
|
// |
|
|
|
// data.put("subjectId", hit.getSourceAsMap().get() |
|
|
|
// .replace("cl_major_", "") |
|
|
|
// .replace("cl_subject_", "") |
|
|
|
// .replace("cl_special_1.0_", "")); |
|
|
|
// dataList.add(data); |
|
|
|
// } |
|
|
|
// } |
|
|
|
return dataList; |
|
|
|
} |
|
|
|
|
|
|
|
private static void getHighlightResult(String fieldName, SearchHit hit, JSONObject data) { |
|
|
|
if (hit.getHighlightFields().containsKey(fieldName)) { |
|
|
|
HighlightField highlightField = hit.getHighlightFields().get(fieldName); |
|
|
|
System.out.println("getHighlightResult highlightField : "+highlightField); |
|
|
|
Text[] fragments = highlightField.fragments(); |
|
|
|
String fragmentString = ""; |
|
|
|
for (Text fragment : fragments) { |
|
|
@ -385,9 +422,10 @@ public abstract class EsUtils { |
|
|
|
return 0L; |
|
|
|
} |
|
|
|
|
|
|
|
public static Long queryTotalCountNew(String clusterName, String[] index, |
|
|
|
QueryBuilder queryBuilder, |
|
|
|
Integer searchType) { |
|
|
|
|
|
|
|
public static Long queryTotalCountNew_0530(String clusterName, String[] index, |
|
|
|
QueryBuilder queryBuilder, |
|
|
|
Integer searchType) { |
|
|
|
|
|
|
|
TransportClient client = getClient(clusterName); |
|
|
|
boolean options = true; |
|
|
@ -395,27 +433,62 @@ public abstract class EsUtils { |
|
|
|
// 现在不同任务的同一条数据不做消重,因此同一个DOCID 的数据会有多条。因此只有查主贴的时候需要用DOCID 消重 |
|
|
|
String count = "count"; |
|
|
|
AggregationBuilder aggregation; |
|
|
|
// searchType = 0 是 主贴, |
|
|
|
if (searchType == 0) { |
|
|
|
aggregation = AggregationBuilders.cardinality(count).field(ESConstant.DOC_ID); |
|
|
|
} else { |
|
|
|
aggregation = AggregationBuilders.cardinality(count).field(ESConstant.DATA_ID); |
|
|
|
} |
|
|
|
|
|
|
|
// CollapseBuilder collapseBuilder = new CollapseBuilder(ESConstant.DATA_ID); |
|
|
|
CollapseBuilder collapseBuilder = null; |
|
|
|
if (searchType == 0) { |
|
|
|
collapseBuilder = new CollapseBuilder(ESConstant.DOC_ID); |
|
|
|
} |
|
|
|
//searchSourceBuilder.aggregation(aggregation); |
|
|
|
// from + size 的 分页 查询方式 |
|
|
|
SearchRequestBuilder requestBuilder = client.prepareSearch().setIndices(index) |
|
|
|
.setIndicesOptions(IndicesOptions.fromOptions(options, options, options, optionsf)) |
|
|
|
.setQuery(queryBuilder) |
|
|
|
//.setCollapse(collapseBuilder); |
|
|
|
.addAggregation(aggregation); |
|
|
|
|
|
|
|
// System.out.println(requestBuilder); |
|
|
|
/** |
|
|
|
* 2023-05-30 先注释掉看看情况 |
|
|
|
*/ |
|
|
|
// System.out.println("3333 : " + requestBuilder.get().getHits().totalHits); |
|
|
|
Aggregations aggregations = requestBuilder.get().getAggregations(); |
|
|
|
Cardinality cardinality = aggregations.get(count); |
|
|
|
// System.out.println("1111 : " + cardinality.getValue()); |
|
|
|
// System.out.println("2222 : " + requestBuilder.get().getHits().totalHits); |
|
|
|
long resultCount = cardinality.getValue(); |
|
|
|
if (searchType == 2) { |
|
|
|
resultCount = requestBuilder.get().getHits().totalHits; |
|
|
|
} |
|
|
|
System.out.println("cardinality : " + cardinality.getValue()); |
|
|
|
System.out.println("totalHits : " + requestBuilder.get().getHits().totalHits); |
|
|
|
// long resultCount = cardinality.getValue(); |
|
|
|
// if (searchType == 2) { |
|
|
|
// resultCount = requestBuilder.get().getHits().totalHits; |
|
|
|
// } |
|
|
|
/** |
|
|
|
* 折叠查询的参考代码 |
|
|
|
*/ |
|
|
|
// CollapseBuilder collapseBuilder = new CollapseBuilder("duplicate_id"); |
|
|
|
// InnerHitBuilder innerHitBuilder = new InnerHitBuilder(); |
|
|
|
// innerHitBuilder.setName("test"); |
|
|
|
// innerHitBuilder.setSize(0); |
|
|
|
// innerHitBuilder.setTrackScores(true); |
|
|
|
// innerHitBuilder.setIgnoreUnmapped(true); |
|
|
|
// innerHitBuilder.addSort(SortBuilders.fieldSort("level").order(SortOrder.DESC)); |
|
|
|
// collapseBuilder.setInnerHits(innerHitBuilder); |
|
|
|
// |
|
|
|
// ...... |
|
|
|
// |
|
|
|
// srb = client.prepareSearch(indexName) |
|
|
|
// .setTypes(typeName) |
|
|
|
// .setQuery(bqb) |
|
|
|
// .setFrom(params.getFrom()) |
|
|
|
// .setSize(params.getSize()) |
|
|
|
// .setCollapse(collapseBuilder) |
|
|
|
// .setPreference("_primary_first"); |
|
|
|
|
|
|
|
long resultCount = requestBuilder.get().getHits().totalHits; |
|
|
|
return resultCount; |
|
|
|
} |
|
|
|
|
|
|
@ -426,6 +499,7 @@ public abstract class EsUtils { |
|
|
|
Integer limit, |
|
|
|
String scrollId, |
|
|
|
Integer searchType) { |
|
|
|
|
|
|
|
Map<String, Object> result = new HashMap<>(); |
|
|
|
TransportClient client = getClient(clusterName); |
|
|
|
SearchResponse searchResponse = null; |
|
|
@ -590,6 +664,14 @@ public abstract class EsUtils { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
/** |
|
|
|
* 复制索引数据 |
|
|
|
* |
|
|
|
* @param clusterName |
|
|
|
* @param originalIndex |
|
|
|
* @param currentIndex |
|
|
|
* @return |
|
|
|
*/ |
|
|
|
public static long reIndex(String clusterName, String originalIndex, String currentIndex) { |
|
|
|
// String clusterName, String originalIndex, String currentIndex, |
|
|
|
try { |
|
|
@ -599,6 +681,7 @@ public abstract class EsUtils { |
|
|
|
.newRequestBuilder(client) |
|
|
|
.source(originalIndex) |
|
|
|
.destination(currentIndex); |
|
|
|
// 新建别名(查询需要用别名查,不加别名查不到哦) |
|
|
|
String newAliex = currentIndex.replace("cl_special_1.0_", "cl_major_"); |
|
|
|
BulkByScrollResponse response = builder.get(); |
|
|
|
|
|
|
@ -690,6 +773,46 @@ public abstract class EsUtils { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
/** |
|
|
|
* 2023-05-30 |
|
|
|
* |
|
|
|
* @param clusterName |
|
|
|
* @param originalIndex |
|
|
|
* @param currentIndex |
|
|
|
* @param queryBuilder |
|
|
|
* @return |
|
|
|
*/ |
|
|
|
public static long reIndexByTask(String clusterName, |
|
|
|
String originalIndex, |
|
|
|
String currentIndex, |
|
|
|
QueryBuilder queryBuilder) { |
|
|
|
try { |
|
|
|
TransportClient client = getClient(clusterName); |
|
|
|
|
|
|
|
System.out.println(originalIndex + " *** " + currentIndex); |
|
|
|
ReindexRequestBuilder builder = ReindexAction.INSTANCE |
|
|
|
.newRequestBuilder(client) |
|
|
|
.source(originalIndex)// 来源索引 |
|
|
|
.destination(currentIndex) // 目标索引 |
|
|
|
.filter(queryBuilder) |
|
|
|
.refresh(true); |
|
|
|
// builder. |
|
|
|
BulkByScrollResponse response = builder.get(); |
|
|
|
// 添加别名,将cl_special_1.0_ 替换成 cl_major 别名 |
|
|
|
String newAliex = currentIndex.replace("cl_special_1.0_", "cl_major_"); |
|
|
|
IndicesAliasesRequestBuilder indicesBuilder = IndicesAliasesAction.INSTANCE |
|
|
|
.newRequestBuilder(client) |
|
|
|
.addAlias(currentIndex, newAliex); |
|
|
|
IndicesAliasesResponse IndicesResponse = indicesBuilder.get(); |
|
|
|
System.out.println("******* : " + response); |
|
|
|
System.out.println("##### : " + IndicesResponse); |
|
|
|
return response.getCreated(); |
|
|
|
} catch (Exception e) { |
|
|
|
e.printStackTrace(); |
|
|
|
return 0; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
public static void delIndexByTasks(String clusterName, String indexName, String cid, List<String> tasks) { |
|
|
|
try { |
|
|
|
TransportClient client = getClient(clusterName); |
|
|
@ -707,6 +830,17 @@ public abstract class EsUtils { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
public static void updateByQuery(String clusterName, String currentIndex, QueryBuilder queryBuilder, Long taskId) { |
|
|
|
TransportClient client = getClient(clusterName); |
|
|
|
UpdateByQueryRequestBuilder updateByQuery = UpdateByQueryAction.INSTANCE.newRequestBuilder(client); |
|
|
|
// "source": "ctx._source['source']='路透社';" |
|
|
|
updateByQuery.source(currentIndex) |
|
|
|
.filter(queryBuilder) |
|
|
|
.size(1000) |
|
|
|
.script(new Script(ScriptType.INLINE, "painless", "ctx._source['taskId'] = '" + taskId + "'", Collections.emptyMap())); |
|
|
|
BulkByScrollResponse response = updateByQuery.get(); |
|
|
|
} |
|
|
|
|
|
|
|
/** |
|
|
|
* 全文检索查询拼接(非nested属性重载方法) |
|
|
|
* |
|
|
@ -757,4 +891,461 @@ public abstract class EsUtils { |
|
|
|
public QueryBuilder nestedQuery(String nested, QueryBuilder queryBuilder) { |
|
|
|
return QueryBuilders.nestedQuery(nested, queryBuilder, ScoreMode.None); |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
// public void testAggAndDistinct(){ |
|
|
|
// //获取注解,通过注解可以得到 indexName 和 type |
|
|
|
// Document document = Customer.class.getAnnotation(Document.class); |
|
|
|
// // dateHistogram Aggregation 是时间柱状图聚合,按照天来聚合 , |
|
|
|
// // dataAgg 为聚合结果的名称,createTime 为字段名称 |
|
|
|
// // cardinality 用来去重 |
|
|
|
// SearchQuery searchQuery = new NativeSearchQueryBuilder() |
|
|
|
// .withQuery(matchAllQuery()) |
|
|
|
// .withSearchType(SearchType.QUERY_THEN_FETCH) |
|
|
|
// .withIndices(document.indexName()).withTypes(document.type()) |
|
|
|
// .addAggregation(AggregationBuilders.dateHistogram("dataAgg").field("createTime") |
|
|
|
// .dateHistogramInterval(DateHistogramInterval.DAY) |
|
|
|
// .subAggregation(AggregationBuilders.cardinality("nameAgg").field("firstName"))) |
|
|
|
// .build(); |
|
|
|
// |
|
|
|
// // 聚合的结果 |
|
|
|
// Aggregations aggregations = elasticsearchTemplate.query(searchQuery, response -> response.getAggregations()); |
|
|
|
// Map<String, Aggregation> results = aggregations.asMap(); |
|
|
|
// Histogram histogram = (Histogram) results.get("dataAgg"); |
|
|
|
// // 将bucket list 转换成 map , key -> 名字 value-> 出现次数 |
|
|
|
// histogram.getBuckets().stream().forEach(t->{ |
|
|
|
// Histogram.Bucket histogram1 = t; |
|
|
|
// System.out.println(histogram1.getKeyAsString()); |
|
|
|
// Cardinality cardinality = histogram1.getAggregations().get("nameAgg"); |
|
|
|
// System.out.println(cardinality.getValue()); |
|
|
|
// }); |
|
|
|
// } |
|
|
|
|
|
|
|
|
|
|
|
public static Long queryTotalCountNew0530(String clusterName, String[] index, |
|
|
|
QueryBuilder queryBuilder, |
|
|
|
Integer searchType) { |
|
|
|
System.out.println("---------------------------"); |
|
|
|
long resultCount = 0l; |
|
|
|
try { |
|
|
|
|
|
|
|
TransportClient client = getClient(clusterName); |
|
|
|
boolean options = true; |
|
|
|
boolean optionsf = false; |
|
|
|
// 现在不同任务的同一条数据不做消重,因此同一个DOCID 的数据会有多条。因此只有查主贴的时候需要用DOCID 消重 |
|
|
|
String count = "count"; |
|
|
|
AggregationBuilder aggregation; |
|
|
|
// searchType = 0 是 主贴, |
|
|
|
if (searchType == 0) { |
|
|
|
aggregation = AggregationBuilders.cardinality(count).field(ESConstant.DOC_ID); |
|
|
|
} else { |
|
|
|
aggregation = AggregationBuilders.cardinality(count).field(ESConstant.DATA_ID); |
|
|
|
} |
|
|
|
|
|
|
|
// aggregation = AggregationBuilders.dateHistogram("dataAgg").field("createTimeStr") |
|
|
|
// .dateHistogramInterval(DateHistogramInterval.DAY) |
|
|
|
// .subAggregation(AggregationBuilders.cardinality("idAgg").field("dataId")); |
|
|
|
|
|
|
|
// CollapseBuilder collapseBuilder = new CollapseBuilder(ESConstant.DATA_ID); |
|
|
|
// CollapseBuilder collapseBuilder = null; |
|
|
|
// if (searchType == 0) { |
|
|
|
// collapseBuilder = new CollapseBuilder(ESConstant.DOC_ID); |
|
|
|
// } |
|
|
|
//searchSourceBuilder.aggregation(aggregation); |
|
|
|
// from + size 的 分页 查询方式 |
|
|
|
SearchRequestBuilder requestBuilder = client.prepareSearch().setIndices(index) |
|
|
|
.setIndicesOptions(IndicesOptions.fromOptions(options, options, options, optionsf)) |
|
|
|
.setQuery(queryBuilder); |
|
|
|
//.setCollapse(collapseBuilder); |
|
|
|
// .addAggregation(aggregation); |
|
|
|
|
|
|
|
|
|
|
|
// Aggregations aggregations = elasticsearchTemplate.query(searchQuery, response -> response.getAggregations()); |
|
|
|
// System.out.println(requestBuilder); |
|
|
|
/** |
|
|
|
* 2023-05-30 先注释掉看看情况 |
|
|
|
*/ |
|
|
|
// System.out.println("3333 : " + requestBuilder.get().getHits().totalHits); |
|
|
|
// Aggregations aggregations = requestBuilder.get().getAggregations(); |
|
|
|
// Cardinality cardinality = aggregations.get(count); |
|
|
|
// System.out.println("cardinality : " + cardinality.getValue()); |
|
|
|
System.out.println("totalHits : " + requestBuilder.get().getHits().totalHits); |
|
|
|
|
|
|
|
// Map<String, Aggregation> results = aggregations.asMap(); |
|
|
|
// Histogram histogram = (Histogram) results.get("dataAgg"); |
|
|
|
// // 将bucket list 转换成 map , key -> 名字 value-> 出现次数 |
|
|
|
// histogram.getBuckets().stream().forEach(t -> { |
|
|
|
// Histogram.Bucket histogram1 = t; |
|
|
|
// System.out.println(histogram1.getKeyAsString()); |
|
|
|
// Cardinality cardinality1 = histogram1.getAggregations().get("idAgg"); |
|
|
|
// System.out.println(cardinality1.getValue()); |
|
|
|
// }); |
|
|
|
// long resultCount = cardinality.getValue(); |
|
|
|
// if (searchType == 2) { |
|
|
|
// resultCount = requestBuilder.get().getHits().totalHits; |
|
|
|
// } |
|
|
|
/** |
|
|
|
* 折叠查询的参考代码 |
|
|
|
*/ |
|
|
|
// CollapseBuilder collapseBuilder = new CollapseBuilder("duplicate_id"); |
|
|
|
// InnerHitBuilder innerHitBuilder = new InnerHitBuilder(); |
|
|
|
// innerHitBuilder.setName("test"); |
|
|
|
// innerHitBuilder.setSize(0); |
|
|
|
// innerHitBuilder.setTrackScores(true); |
|
|
|
// innerHitBuilder.setIgnoreUnmapped(true); |
|
|
|
// innerHitBuilder.addSort(SortBuilders.fieldSort("level").order(SortOrder.DESC)); |
|
|
|
// collapseBuilder.setInnerHits(innerHitBuilder); |
|
|
|
// |
|
|
|
// ...... |
|
|
|
// |
|
|
|
// srb = client.prepareSearch(indexName) |
|
|
|
// .setTypes(typeName) |
|
|
|
// .setQuery(bqb) |
|
|
|
// .setFrom(params.getFrom()) |
|
|
|
// .setSize(params.getSize()) |
|
|
|
// .setCollapse(collapseBuilder) |
|
|
|
// .setPreference("_primary_first"); |
|
|
|
|
|
|
|
resultCount = requestBuilder.get().getHits().totalHits; |
|
|
|
} catch (Exception e) { |
|
|
|
e.printStackTrace(); |
|
|
|
} |
|
|
|
return resultCount; |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
public static List<JSONObject> query05301(String clusterName, String[] index, |
|
|
|
final QueryBuilder queryBuilder, |
|
|
|
String sortFlag, String orderFlag, |
|
|
|
Integer size, Integer from, |
|
|
|
Integer searchType) { |
|
|
|
System.out.println("非高亮查询"); |
|
|
|
TransportClient client = getClient(clusterName); |
|
|
|
boolean options = true; |
|
|
|
boolean optionsf = false; |
|
|
|
// 现在不同任务的同一条数据不做消重,因此同一个DOCID 的数据会有多条。因此只有查主贴的时候需要用DOCID 消重 |
|
|
|
CollapseBuilder collapseBuilder = new CollapseBuilder(ESConstant.DATA_ID); |
|
|
|
// CollapseBuilder collapseBuilder = null; |
|
|
|
if (searchType == 0) { |
|
|
|
collapseBuilder = new CollapseBuilder(ESConstant.DOC_ID); |
|
|
|
} |
|
|
|
|
|
|
|
// Object[] objects= new Object[]{"9999"}; |
|
|
|
|
|
|
|
// 查询 |
|
|
|
// from + size 的 分页 查询方式 |
|
|
|
SearchRequestBuilder requestBuilder = client.prepareSearch().setIndices(index) |
|
|
|
.setIndicesOptions(IndicesOptions.fromOptions(options, options, options, optionsf)) |
|
|
|
.addSort(sortFlag, orderFlag.equals(ESConstant.ASC) ? SortOrder.ASC : SortOrder.DESC) |
|
|
|
.setQuery(queryBuilder) |
|
|
|
// .searchAfter(objects) |
|
|
|
//.setCollapse(collapseBuilder) |
|
|
|
.setSize(size) |
|
|
|
.setFrom(from); // 用search_after 的话,这个 from 得 == 0 |
|
|
|
|
|
|
|
System.out.println(requestBuilder); |
|
|
|
|
|
|
|
|
|
|
|
SearchResponse searchResponse = requestBuilder.execute().actionGet(); |
|
|
|
List<JSONObject> dataList = new ArrayList<>(); |
|
|
|
if (searchResponse.getHits().totalHits > 0) { |
|
|
|
SearchHit[] hits = searchResponse.getHits().getHits(); |
|
|
|
for (int i = 0; i < hits.length; i++) { |
|
|
|
JSONObject data = new JSONObject(); |
|
|
|
data.putAll(hits[i].getSourceAsMap()); |
|
|
|
data.put("subjectId", hits[i].getIndex() |
|
|
|
.replace("cl_major_", "") |
|
|
|
.replace("cl_subject_", "") |
|
|
|
.replace("cl_special_1.0_", "")); |
|
|
|
dataList.add(data); |
|
|
|
} |
|
|
|
} |
|
|
|
return dataList; |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/** |
|
|
|
* 第一组查询,不做数据聚合 |
|
|
|
*/ |
|
|
|
public static Long queryTotalCountNew_0531(String clusterName, String[] index, |
|
|
|
QueryBuilder queryBuilder, |
|
|
|
Integer searchType) { |
|
|
|
System.out.println("---------------------------"); |
|
|
|
long resultCount = 0l; |
|
|
|
try { |
|
|
|
|
|
|
|
TransportClient client = getClient(clusterName); |
|
|
|
boolean options = true; |
|
|
|
boolean optionsf = false; |
|
|
|
SearchRequestBuilder requestBuilder = client.prepareSearch().setIndices(index) |
|
|
|
.setIndicesOptions(IndicesOptions.fromOptions(options, options, options, optionsf)) |
|
|
|
.setQuery(queryBuilder); |
|
|
|
|
|
|
|
System.out.println("totalHits : " + requestBuilder.get().getHits().totalHits); |
|
|
|
|
|
|
|
|
|
|
|
resultCount = requestBuilder.get().getHits().totalHits; |
|
|
|
} catch (Exception e) { |
|
|
|
e.printStackTrace(); |
|
|
|
} |
|
|
|
return resultCount; |
|
|
|
} |
|
|
|
|
|
|
|
// public static List<JSONObject> query_0531(String clusterName, String[] index, |
|
|
|
// final QueryBuilder queryBuilder, |
|
|
|
// String sortFlag, String orderFlag, |
|
|
|
// Integer size, Integer from, |
|
|
|
// Integer searchType) { |
|
|
|
// System.out.println("非高亮查询"); |
|
|
|
// TransportClient client = getClient(clusterName); |
|
|
|
// boolean options = true; |
|
|
|
// boolean optionsf = false; |
|
|
|
// // from + size 的 分页 查询方式 |
|
|
|
// SearchRequestBuilder requestBuilder = client.prepareSearch().setIndices(index) |
|
|
|
// .setIndicesOptions(IndicesOptions.fromOptions(options, options, options, optionsf)) |
|
|
|
// .addSort(sortFlag, orderFlag.equals(ESConstant.ASC) ? SortOrder.ASC : SortOrder.DESC) |
|
|
|
// .setQuery(queryBuilder) |
|
|
|
// .setSize(size) |
|
|
|
// .setFrom(from); |
|
|
|
// |
|
|
|
// System.out.println(requestBuilder); |
|
|
|
// |
|
|
|
// SearchResponse searchResponse = requestBuilder.execute().actionGet(); |
|
|
|
// List<JSONObject> dataList = new ArrayList<>(); |
|
|
|
// if (searchResponse.getHits().totalHits > 0) { |
|
|
|
// SearchHit[] hits = searchResponse.getHits().getHits(); |
|
|
|
// for (int i = 0; i < hits.length; i++) { |
|
|
|
// JSONObject data = new JSONObject(); |
|
|
|
// data.putAll(hits[i].getSourceAsMap()); |
|
|
|
// data.put("subjectId", hits[i].getIndex() |
|
|
|
// .replace("cl_major_", "") |
|
|
|
// .replace("cl_subject_", "") |
|
|
|
// .replace("cl_special_1.0_", "")); |
|
|
|
// dataList.add(data); |
|
|
|
// } |
|
|
|
// } |
|
|
|
// return dataList; |
|
|
|
// } |
|
|
|
|
|
|
|
public static List<JSONObject> queryWithHighlight(String clusterName, String[] index, |
|
|
|
final QueryBuilder queryBuilder, |
|
|
|
String sortFlag, String orderFlag, |
|
|
|
Integer size, Integer from, |
|
|
|
Integer searchType) { |
|
|
|
System.out.println("高亮查询"); |
|
|
|
EsBaseParam esBaseParam = new EsBaseParam(); |
|
|
|
TransportClient client = getClient(clusterName); |
|
|
|
boolean options = true; |
|
|
|
boolean optionsf = false; |
|
|
|
// 现在不同任务的同一条数据不做消重,因此同一个DOCID 的数据会有多条。因此只有查主贴的时候需要用DOCID 消重 |
|
|
|
CollapseBuilder collapseBuilder = new CollapseBuilder(ESConstant.DATA_ID); |
|
|
|
if (searchType == 0) { |
|
|
|
collapseBuilder = new CollapseBuilder(ESConstant.DOC_ID); |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
esBaseParam.setWithHighlight(true); |
|
|
|
esBaseParam.setHighlightFields(new ArrayList<>(BaseFieldEnum.getMatchFieldsWithPy().keySet())); |
|
|
|
Integer numOfFragments = 2; |
|
|
|
HighlightBuilder highlightBuilder = new HighlightBuilder() |
|
|
|
// match进行高亮 |
|
|
|
.requireFieldMatch(true) |
|
|
|
.order(HighlightBuilder.Order.SCORE) |
|
|
|
//fragment 是指一段连续的文字。返回结果最多可以包含几段不连续的文字。默认是5。 |
|
|
|
.numOfFragments(numOfFragments) |
|
|
|
//一段 fragment 包含多少个字符。默认100。 |
|
|
|
// .fragmentSize(Constants.MAX_R_LENGTH / numOfFragments) |
|
|
|
// .noMatchSize(Constants.MAX_R_LENGTH) |
|
|
|
.preTags(ESConstant.HIGHLIGHTPRETAGS) |
|
|
|
.postTags(ESConstant.HIGHLIGHTPOSTTAGS); |
|
|
|
BaseFieldEnum.getMatchFieldsWithPy().keySet().forEach(highlightBuilder::field); |
|
|
|
|
|
|
|
esBaseParam.setHighlightBuilder(highlightBuilder); |
|
|
|
|
|
|
|
// 查询 |
|
|
|
// from + size 的 分页 查询方式 |
|
|
|
SearchRequestBuilder requestBuilder = client.prepareSearch().setIndices(index) |
|
|
|
.setIndicesOptions(IndicesOptions.fromOptions(options, options, options, optionsf)) |
|
|
|
.addSort(sortFlag, orderFlag.equals(ESConstant.ASC) ? SortOrder.ASC : SortOrder.DESC) |
|
|
|
.setQuery(queryBuilder) |
|
|
|
.setCollapse(collapseBuilder) |
|
|
|
.setSize(size) |
|
|
|
.setFrom(from) |
|
|
|
.highlighter(esBaseParam.getHighlightBuilder()); |
|
|
|
|
|
|
|
System.out.println(requestBuilder); |
|
|
|
System.out.println("-----"); |
|
|
|
|
|
|
|
SearchResponse searchResponse = requestBuilder.execute().actionGet(); |
|
|
|
|
|
|
|
List<JSONObject> dataList = new ArrayList<>(); |
|
|
|
if (searchResponse.getHits().totalHits > 0) { |
|
|
|
SearchHit[] hits = searchResponse.getHits().getHits(); |
|
|
|
for (int i = 0; i < hits.length; i++) { |
|
|
|
JSONObject data = new JSONObject(); |
|
|
|
data.putAll(hits[i].getSourceAsMap()); |
|
|
|
String fieldName[] = {ESConstant.CONTENT, ESConstant.TITLE, ESConstant.OCRTEXT, ESConstant.ASRTEXT}; |
|
|
|
for (int j = 0; j < fieldName.length; j++) { |
|
|
|
getHighlightResult(fieldName[j], hits[i], data); |
|
|
|
} |
|
|
|
data.put("subjectId", hits[i].getIndex() |
|
|
|
.replace("cl_major_", "") |
|
|
|
.replace("cl_subject_", "") |
|
|
|
.replace("cl_special_1.0_", "")); |
|
|
|
dataList.add(data); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
return dataList; |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/** |
|
|
|
* 05-30 聚合查询 |
|
|
|
*/ |
|
|
|
|
|
|
|
public static Long queryTotalCountNew(String clusterName, String[] index, |
|
|
|
QueryBuilder queryBuilder, |
|
|
|
Integer searchType) { |
|
|
|
long resultCount = 0l; |
|
|
|
try { |
|
|
|
|
|
|
|
TransportClient client = getClient(clusterName); |
|
|
|
boolean options = true; |
|
|
|
boolean optionsf = false; |
|
|
|
String aggrCount = "count"; |
|
|
|
AggregationBuilder aggregation; |
|
|
|
// searchType = 0 是 主贴, |
|
|
|
if (searchType == 0) { |
|
|
|
aggregation = AggregationBuilders.cardinality(aggrCount).field(ESConstant.DOC_ID); |
|
|
|
} else { |
|
|
|
aggregation = AggregationBuilders.cardinality(aggrCount).field(ESConstant.DATA_ID); |
|
|
|
} |
|
|
|
|
|
|
|
SearchRequestBuilder requestBuilder = client.prepareSearch().setIndices(index) |
|
|
|
// .setIndicesOptions(IndicesOptions.fromOptions(options, options, options, optionsf)) |
|
|
|
.setQuery(queryBuilder) |
|
|
|
.addAggregation(aggregation); |
|
|
|
|
|
|
|
System.out.println("totalHits : " + requestBuilder.get().getHits().totalHits); |
|
|
|
|
|
|
|
// ParsedCardinality parsedCardinality = (ParsedCardinality) searchResponse.getAggregations().asList().get(0); |
|
|
|
// Aggregations aggregations = requestBuilder.get().getAggregations(); |
|
|
|
// Cardinality cardinality = aggregations.get(count); |
|
|
|
// System.out.println("cardinality : " + cardinality.getValue()); |
|
|
|
|
|
|
|
Aggregations aggregations = requestBuilder.get().getAggregations(); |
|
|
|
Cardinality cardinality = aggregations.get(aggrCount); |
|
|
|
System.out.println("1111 : " + aggregations.get(aggrCount)); |
|
|
|
System.out.println("cardinality : " + cardinality.getValue()); |
|
|
|
resultCount = cardinality.getValue(); |
|
|
|
// 用户数据不用ID做聚合?? |
|
|
|
if (searchType == 2) { |
|
|
|
resultCount = requestBuilder.get().getHits().totalHits; |
|
|
|
} |
|
|
|
|
|
|
|
// resultCount = requestBuilder.get().getHits().totalHits; |
|
|
|
} catch (Exception e) { |
|
|
|
e.printStackTrace(); |
|
|
|
} |
|
|
|
return resultCount; |
|
|
|
} |
|
|
|
|
|
|
|
public static List<JSONObject> query(String clusterName, String[] index, |
|
|
|
final QueryBuilder queryBuilder, |
|
|
|
String sortFlag, String orderFlag, |
|
|
|
Integer size, Integer from, |
|
|
|
Integer searchType) { |
|
|
|
System.out.println("非高亮查询"); |
|
|
|
TransportClient client = getClient(clusterName); |
|
|
|
boolean options = true; |
|
|
|
boolean optionsf = false; |
|
|
|
String aggrCount = "count"; |
|
|
|
|
|
|
|
CollapseBuilder collapseBuilder = new CollapseBuilder(ESConstant.DATA_ID); |
|
|
|
AggregationBuilder aggregationBuilder = AggregationBuilders.cardinality(aggrCount).field(ESConstant.DATA_ID); |
|
|
|
|
|
|
|
if (searchType == 0) { |
|
|
|
collapseBuilder = new CollapseBuilder(ESConstant.DOC_ID); |
|
|
|
aggregationBuilder = AggregationBuilders.cardinality(aggrCount).field(ESConstant.DOC_ID); |
|
|
|
} |
|
|
|
// from + size 的 分页 查询方式 |
|
|
|
SearchRequestBuilder requestBuilder = client.prepareSearch().setIndices(index) |
|
|
|
.setIndicesOptions(IndicesOptions.fromOptions(options, options, options, optionsf)) |
|
|
|
.addSort(sortFlag, orderFlag.equals(ESConstant.ASC) ? SortOrder.ASC : SortOrder.DESC) |
|
|
|
.setQuery(queryBuilder) |
|
|
|
.setCollapse(collapseBuilder) |
|
|
|
.addAggregation(aggregationBuilder) |
|
|
|
.setSize(size) |
|
|
|
.setFrom(from); |
|
|
|
|
|
|
|
System.out.println(requestBuilder); |
|
|
|
|
|
|
|
SearchResponse searchResponse = requestBuilder.execute().actionGet(); |
|
|
|
List<JSONObject> dataList = new ArrayList<>(); |
|
|
|
if (searchResponse.getHits().totalHits > 0) { |
|
|
|
SearchHit[] hits = searchResponse.getHits().getHits(); |
|
|
|
for (int i = 0; i < hits.length; i++) { |
|
|
|
JSONObject data = new JSONObject(); |
|
|
|
data.putAll(hits[i].getSourceAsMap()); |
|
|
|
data.put("subjectId", hits[i].getIndex() |
|
|
|
.replace("cl_major_", "") |
|
|
|
.replace("cl_subject_", "") |
|
|
|
.replace("cl_special_1.0_", "")); |
|
|
|
dataList.add(data); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
Cardinality cardinality = searchResponse.getAggregations().get(aggrCount); |
|
|
|
//总数 |
|
|
|
long value = cardinality.getValue(); |
|
|
|
|
|
|
|
System.out.println("去重总数:" + value); |
|
|
|
System.out.println("不去重的总数:" + requestBuilder.get().getHits().totalHits); |
|
|
|
|
|
|
|
return dataList; |
|
|
|
} |
|
|
|
|
|
|
|
// private long getCardinality( QueryBuilder queryBuilder,String indexName, |
|
|
|
// Integer size, Integer from) { |
|
|
|
// // 获取查询的索引列表String indexName = "sjck_personnel" |
|
|
|
// ;// 获取查询的条件列表 |
|
|
|
//// List<HashMap<String, String>> options = (List<HashMap<String, String>>) bindParams.get("conditions"); |
|
|
|
//// // 1.构建查询请求 |
|
|
|
// SearchRequest searchRequest = new SearchRequest(indexName); |
|
|
|
//// // 4.构建最外面的 |
|
|
|
//// boolQueryBoolQueryBuilder query = QueryBuilders.boolQuery(); |
|
|
|
//// // 5.构建查询请求 |
|
|
|
//// synQueryPersonnelIndexBuilder(query, options); |
|
|
|
// //6.高亮 |
|
|
|
// HighlightBuilder highlightBuilder = new HighlightBuilder(); |
|
|
|
// // 所有查询出来的字段全部高亮 |
|
|
|
// HighlightBuilder.Field highlightTitle = new HighlightBuilder.Field("*").requireFieldMatch(false); |
|
|
|
// highlightTitle.highlighterType("unified"); |
|
|
|
// highlightBuilder.field(highlightTitle); |
|
|
|
// //从第几条开始 |
|
|
|
// |
|
|
|
// // 3.构建高亮 |
|
|
|
// AggregationBuilder aggregation = AggregationBuilders.cardinality("total_size").field("concat_field"); |
|
|
|
// SearchSourceBuilder sourceBuilder = new SearchSourceBuilder() |
|
|
|
// .query(queryBuilder) |
|
|
|
// .highlighter(highlightBuilder) |
|
|
|
// .from(from) |
|
|
|
// .size(size) |
|
|
|
// .aggregation(aggregation); |
|
|
|
// // 2.将查询构建器放入查询请求中 |
|
|
|
// searchRequest.source(sourceBuilder); |
|
|
|
// SearchResponse searchResponse = null; |
|
|
|
// try { |
|
|
|
// searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT); |
|
|
|
// } catch (ElasticsearchStatusException e) { |
|
|
|
// logger.error("请检查elasticsearchIndex是否存在{},错误信息{}", e, e.getMessage()); |
|
|
|
// } catch (IOException e) { |
|
|
|
// logger.error("搜索出错了{},错误信息{}", e, e.getMessage()); |
|
|
|
// } |
|
|
|
// assert searchResponse != null; |
|
|
|
// ParsedCardinality parsedCardinality = (ParsedCardinality) searchResponse.getAggregations().asList().get(0); |
|
|
|
// return parsedCardinality.getValue(); |
|
|
|
// } |
|
|
|
|
|
|
|
} |