From cff5b2f2d79934c8c559d9c62f3ecfd9f235fc3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=9C=E9=9D=99?= Date: Fri, 12 Nov 2021 09:48:49 +0800 Subject: [PATCH] =?UTF-8?q?release-3.1.7(2021-11-12,=E6=96=B0=E5=A2=9E?= =?UTF-8?q?=E4=BA=86B=E7=AB=99=E5=92=8Cyoutube=20=E7=9A=84=E7=94=A8?= =?UTF-8?q?=E6=88=B7=E6=95=B0=E6=8D=AE=E7=9A=84=E9=87=87=E9=9B=86=EF=BC=8C?= =?UTF-8?q?=E9=A1=B5=E9=9D=A2=E7=B1=BB=E5=9E=8B=E4=B8=8D=E5=90=8C=E4=BA=8E?= =?UTF-8?q?=E5=85=B6=E4=BB=96=E7=9A=84=E7=94=A8=E6=88=B7=EF=BC=8C=E8=B0=83?= =?UTF-8?q?=E6=95=B4=E4=BA=86=E8=BF=99=E4=B8=A4=E4=B8=AA=E6=95=B0=E6=8D=AE?= =?UTF-8?q?=E7=9A=84=E6=9F=A5=E8=AF=A2=E5=92=8C=E7=BB=9F=E8=AE=A1=E3=80=82?= =?UTF-8?q?)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../com/bfd/mf/job/service/query/QueryService.java | 37 ++++++++------ .../com/bfd/mf/job/service/query/SaveService.java | 45 +++++++++++------ .../main/java/com/bfd/mf/job/util/WriteMethod.java | 21 ++++++++ .../mf/common/service/cache/TopicQueryService.java | 2 - .../common/service/es/ParseSearchScopeService.java | 2 +- .../bfd/mf/common/util/constants/ESConstant.java | 10 ++-- .../web/vo/view/monitor/ESMonitorBaseEntity.java | 38 ++++++++++++++ .../java/com/bfd/mf/service/SearchDataService.java | 59 ++++++++++++++++++++-- 8 files changed, 170 insertions(+), 44 deletions(-) create mode 100644 cl_query_data_job/src/main/java/com/bfd/mf/job/util/WriteMethod.java diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/service/query/QueryService.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/service/query/QueryService.java index 65ee8da..0bcd480 100644 --- a/cl_query_data_job/src/main/java/com/bfd/mf/job/service/query/QueryService.java +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/service/query/QueryService.java @@ -12,10 +12,7 @@ import com.bfd.mf.job.domain.entity.Task; import com.bfd.mf.job.domain.repository.SubjectRepository; import com.bfd.mf.job.domain.repository.TaskRepository; import com.bfd.mf.job.download.DownLoadFile; -import com.bfd.mf.job.util.DataCheckUtil; -import com.bfd.mf.job.util.EsUtils; -import com.bfd.mf.job.util.Kafka010Utils; -import com.bfd.mf.job.util.ReadLine; +import com.bfd.mf.job.util.*; import com.google.common.collect.Maps; import com.google.common.util.concurrent.RateLimiter; import kafka.utils.Json; @@ -106,8 +103,14 @@ public class QueryService { long taskId = task.getId().longValue(); String appId = task.getAppId(); int cache_num = 1; - taskRepository.updateStatus(cache_num, task.getId().longValue()); - cache.put(taskId+"#@#"+appId, Lists.newArrayList(0L, 0L, progressFactor, totalSegment, segment)); + Integer siteType = task.getSiteType(); + if(siteType == 5){ + cache_num = 2; + taskRepository.updateStatus(cache_num, task.getId().longValue()); + } else { + taskRepository.updateStatus(cache_num, task.getId().longValue()); + cache.put(taskId + "#@#" + appId, Lists.newArrayList(0L, 0L, progressFactor, totalSegment, segment)); + } try { P_TASK_CACHE_RANGE.put(cache); } catch (InterruptedException e) { @@ -125,8 +128,8 @@ public class QueryService { return; } String taskIdAppId = ""; - long fromMills =0L; - long toMills = 0L; + long fromMills = 0L; //1604419200000 + long toMills = 0L; // 1604505600000 for (Map.Entry> entry : range.entrySet()) { entry.getValue(); taskIdAppId = entry.getKey(); @@ -182,13 +185,13 @@ public class QueryService { fromMills = task.getCrawlStartTime().longValue(); queryBuilder = getQueryBuilder(fromMills, toMills, cid, crawlDataFlag, cacheNum, siteType); } -// LOGGER.info("Query primary, task:{}, index:{}, from:{}, to:{}, indices:{}, dsl:{}.", -// taskId, -// indexName, -// new LocalDateTime(fromMills).toString(AppConfig.DATE_TIME_FORMAT), -// new LocalDateTime(toMills).toString(AppConfig.DATE_TIME_FORMAT), -// JSONObject.toJSONString(sourceIndices), -// queryBuilder.toString()); + LOGGER.info("Query primary, task:{}, index:{}, from:{}, to:{}, indices:{}, dsl:{}.", + taskId, + indexName, + new LocalDateTime(fromMills).toString(AppConfig.DATE_TIME_FORMAT), + new LocalDateTime(toMills).toString(AppConfig.DATE_TIME_FORMAT), + JSONObject.toJSONString(sourceIndices), + queryBuilder.toString()); // 传入的参数 集群名称,索引名称,索引类型(type), 查询Builder,scroll查询页面大小,scroll查询scrollId有效时间 String finalTaskId = taskId + ""; long pubTime = fromMills; @@ -223,6 +226,10 @@ public class QueryService { if (!data.get("_id_").equals("")) { saveService.saveToEsWithFilter(config.esMiniClusterName(), finalIndexName1, data); kafkaProducer.send(config.getSendTopic(),JSONObject.toJSONString(data)); +// long crawlTime = data.getLong("crawlTime"); +// if(crawlTime < 1633795200000L){ +// WriteMethod.writeMethod("../../../error.txt",JSONObject.toJSONString(data)); +// } LOGGER.debug("Send message, indexName :{} , taskId:{} , ID :{}.", finalIndexName, task.getId(), data.getString("_id_")); // 将要拉评论的ID 添加到list 中,(电商的数据不用拉评论哦)! if (!siteType.equals(ESConstants.DOCTYPEITEM)) { diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/service/query/SaveService.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/service/query/SaveService.java index 1114833..44c9e20 100644 --- a/cl_query_data_job/src/main/java/com/bfd/mf/job/service/query/SaveService.java +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/service/query/SaveService.java @@ -17,22 +17,35 @@ public class SaveService { // 初始化自定义字段 data.put(ESConstants.TASKID, taskId); data.put("where","backtrace"); -// data.put("tag",""); -// data.put("mentionAccountUrl",new ArrayList<>()); -// data.put("mentionAccount",new ArrayList<>()); -// data.put("dns",""); -// data.put("asrText",""); -// data.put("ocrText",new ArrayList<>()); -// data.put("hasOCR",0); -// data.put("hasASR",0); -// data.put("asrLength",0); -// data.put("ocrLength",0); -// data.put("hasTrans",0); -// data.put("translateTitleLength",""); -// data.put("translateContentLength",""); -// data.put("goodrate",0); -// data.put("generalrate",0); -// data.put("poorrate",0); + if(!data.containsKey("tag")){ + data.put("tag",""); + } + if(!data.containsKey("dns")){ + data.put("dns",""); + } + if(!data.containsKey("hasOCR")){ +// data.put("asrText",""); +// data.put("ocrText",new ArrayList<>()); + data.put("hasOCR",0); + data.put("hasASR",0); + data.put("asrLength",0); + data.put("ocrLength",0); + } + if(!data.containsKey("hasTrans")){ + data.put("hasTrans",0); + data.put("translateTitleLength",""); + data.put("translateContentLength",""); + } + if(!data.containsKey("mentionAccount")){ + data.put("mentionAccountUrl",new ArrayList<>()); + data.put("mentionAccount",new ArrayList<>()); + } + if(!data.containsKey("generalrate")){ + data.put("goodrate",0); + data.put("generalrate",0); + data.put("poorrate",0); + } + } diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/util/WriteMethod.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/util/WriteMethod.java new file mode 100644 index 0000000..53e04d0 --- /dev/null +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/util/WriteMethod.java @@ -0,0 +1,21 @@ +package com.bfd.mf.job.util; + +import java.io.FileWriter; +import java.io.IOException; + +/** + * Created by BFD-229 on 2017/7/6. + */ +public class WriteMethod { + public static void writeMethod(String fileName, String json){ + try{ + FileWriter writer=new FileWriter(fileName,true); + writer.write(json+"\n"); + writer.close(); + } catch (IOException e) + { + e.printStackTrace(); + } + } + +} diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/service/cache/TopicQueryService.java b/cl_search_api/src/main/java/com/bfd/mf/common/service/cache/TopicQueryService.java index fb6038f..105debb 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/common/service/cache/TopicQueryService.java +++ b/cl_search_api/src/main/java/com/bfd/mf/common/service/cache/TopicQueryService.java @@ -190,8 +190,6 @@ public class TopicQueryService { } boolQuery.must(boolQueryBuilder); } - // } - }catch (Exception e){ e.printStackTrace(); } diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/service/es/ParseSearchScopeService.java b/cl_search_api/src/main/java/com/bfd/mf/common/service/es/ParseSearchScopeService.java index 356880b..e0939a0 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/common/service/es/ParseSearchScopeService.java +++ b/cl_search_api/src/main/java/com/bfd/mf/common/service/es/ParseSearchScopeService.java @@ -64,7 +64,7 @@ public class ParseSearchScopeService { .must(QueryBuilders.termQuery(ESConstant.DOC_TYPE,ESConstant.ITEM))); } else if(searchType == 2){ searchScopeQuery = QueryBuilders.boolQuery().must(QueryBuilders.termQuery(ESConstant.PRIMARY, 2)) - .must(QueryBuilders.termsQuery(ESConstant.PAGETYPE,"userInfoPage")); + .must(QueryBuilders.termsQuery(ESConstant.PAGETYPE,"userInfoPage","newsuser")); } return searchScopeQuery; } diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/util/constants/ESConstant.java b/cl_search_api/src/main/java/com/bfd/mf/common/util/constants/ESConstant.java index 2b92e83..19f23aa 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/common/util/constants/ESConstant.java +++ b/cl_search_api/src/main/java/com/bfd/mf/common/util/constants/ESConstant.java @@ -396,7 +396,7 @@ public class ESConstant { public static String CHANNEL = "channel"; public static final String CONTENT = "content"; public static final String SYS_SENTIMENT = "sysSentiment"; - public static String POST_SOURCE = "postSource"; + // public static String POST_SOURCE = "postSource"; public static String TRANSLATETITLE = "translateTitle"; public static String TRANSLATECONTENT = "translateContent"; @@ -596,10 +596,10 @@ public class ESConstant { public static String WEIBO_EXPRESSION_TEXT = "expressionText";// 表情文本 public static String WEIBO_FORWARD_URL = "forwardUrl";// 原文url public static String FORWARD_USER_URL = "forwardUserUrl";// 原文url - public static String WEIBO_USER_TYPE = "userType"; - public static String WEIBO_POST_SOURCE = "postSource"; - public static String WEIBO_LEVEL = "level"; - public static String WEIBO_REPLY_COMMENT = "replycomment"; + public static String USER_TYPE = "userType"; + public static String POST_SOURCE = "postSource"; + public static String LEVEL = "level"; + public static String REPLY_COMMENT = "replycomment"; diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/web/vo/view/monitor/ESMonitorBaseEntity.java b/cl_search_api/src/main/java/com/bfd/mf/common/web/vo/view/monitor/ESMonitorBaseEntity.java index 218bcf8..169e784 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/common/web/vo/view/monitor/ESMonitorBaseEntity.java +++ b/cl_search_api/src/main/java/com/bfd/mf/common/web/vo/view/monitor/ESMonitorBaseEntity.java @@ -122,6 +122,44 @@ public class ESMonitorBaseEntity implements Comparable, Ser private String promotionInfo; + private int readCount; + + public int getReadCount() { + return readCount; + } + + public void setReadCount(int readCount) { + this.readCount = readCount; + } + + private int ugc; + private int egc; + private int pgc; + + public int getUgc() { + return ugc; + } + + public void setUgc(int ugc) { + this.ugc = ugc; + } + + public int getEgc() { + return egc; + } + + public void setEgc(int egc) { + this.egc = egc; + } + + public int getPgc() { + return pgc; + } + + public void setPgc(int pgc) { + this.pgc = pgc; + } + public String getPromotionInfo() { return promotionInfo; } diff --git a/cl_search_api/src/main/java/com/bfd/mf/service/SearchDataService.java b/cl_search_api/src/main/java/com/bfd/mf/service/SearchDataService.java index 5ffb0da..5953225 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/service/SearchDataService.java +++ b/cl_search_api/src/main/java/com/bfd/mf/service/SearchDataService.java @@ -95,7 +95,7 @@ public class SearchDataService extends CrudService dataList, List esMonitorListEntity,Integer searchType) { try { @@ -227,8 +227,8 @@ public class SearchDataService extends CrudService siteOtherMap = siteMap.get(enSource); if (siteOtherMap.containsKey("site_id")) { siteId = siteMap.get(enSource).get("site_id").toString(); @@ -261,6 +261,7 @@ public class SearchDataService extends CrudService hlKeywords = (List) sourceAsMap.get(ESConstant.HL_KEYWORDS); + List hlKeywords = new ArrayList<>(); + if (sourceAsMap.get(ESConstant.HL_KEYWORDS) instanceof List){ + hlKeywords = (List) sourceAsMap.get(ESConstant.HL_KEYWORDS); + } + //List // 视频分析结果 String asrText = ""; List ocrText = new ArrayList<>(); @@ -403,18 +414,51 @@ public class SearchDataService extends CrudService