From 05ba4ced7fb5a01c50d0c550269b72236fa3ee8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=9C=E9=9D=99?= Date: Wed, 1 Dec 2021 16:02:43 +0800 Subject: [PATCH] =?UTF-8?q?release-3.1.8(2021-12-01,=E8=B0=83=E6=95=B4?= =?UTF-8?q?=E4=BA=86=E4=B8=80=E8=88=AC=E7=A6=BB=E7=BA=BF=E6=8B=89=E6=95=B0?= =?UTF-8?q?=E7=9A=84=E8=BF=87=E7=A8=8B=E4=B8=AD=E6=96=87=E4=BB=B6=E7=9A=84?= =?UTF-8?q?=E4=B8=8B=E8=BD=BD=E4=B8=8A=E4=BC=A0=E5=A4=84=E7=90=86=E7=9A=84?= =?UTF-8?q?=E9=80=BB=E8=BE=91)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .idea/compiler.xml | 1 + .../java/com/bfd/mf/job/config/ESConstants.java | 3 + .../mf/job/domain/repository/TaskRepository.java | 2 +- .../java/com/bfd/mf/job/download/DownLoadFile.java | 13 +--- .../bfd/mf/job/service/es/EsQueryMiniService.java | 10 +-- .../com/bfd/mf/job/service/query/QueryService.java | 88 ++++++++++++++++++++-- .../com/bfd/mf/job/service/query/SaveService.java | 2 - .../job/service/statistics/StatisticsService.java | 4 - .../main/java/com/bfd/mf/job/util/WriteMethod.java | 21 ++++++ .../bfd/mf/common/service/common/CrudService.java | 2 + .../service/es/EsQueryAuthorCountService.java | 15 +--- .../common/service/es/EsQueryServiceForSQMini.java | 2 - .../bfd/mf/common/service/text/TextService.java | 60 +++++++-------- .../com/bfd/mf/common/util/enums/RTCodeEnum.java | 38 ---------- .../bfd/mf/controller/SearchDataController.java | 29 +++++++ .../com/bfd/mf/service/SearchAuthorService.java | 2 - .../java/com/bfd/mf/service/SearchDataService.java | 30 +++++--- 17 files changed, 199 insertions(+), 123 deletions(-) create mode 100644 cl_query_data_job/src/main/java/com/bfd/mf/job/util/WriteMethod.java diff --git a/.idea/compiler.xml b/.idea/compiler.xml index feda9b0..f85b716 100644 --- a/.idea/compiler.xml +++ b/.idea/compiler.xml @@ -2,6 +2,7 @@ + diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/config/ESConstants.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/config/ESConstants.java index d2581fb..8b3e880 100644 --- a/cl_query_data_job/src/main/java/com/bfd/mf/job/config/ESConstants.java +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/config/ESConstants.java @@ -866,6 +866,9 @@ public class ESConstants { // 微博话题:hashTag public static String HASH_TAG = "hashTag"; public static String PICTURE_LIST = "pictureList"; + public static String UPLOADIMG = "uploadImg"; + public static String RAWIMG = "rawimg"; + public static String IMG_ = "img_"; // 表情:expression public static String EXPRESSION = "expression"; diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/domain/repository/TaskRepository.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/domain/repository/TaskRepository.java index 8bd8328..e1e7b87 100644 --- a/cl_query_data_job/src/main/java/com/bfd/mf/job/domain/repository/TaskRepository.java +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/domain/repository/TaskRepository.java @@ -13,7 +13,7 @@ import java.util.Map; public interface TaskRepository extends CrudRepository { // @Query(value = "SELECT ct.id,ct.app_id,ct.subject_id,ct.external_id,cs.site_type,ct.task_type,ct.cid,ct.crawl_status,ct.crawl_start_time,ct.crawl_end_time,ct.crawl_data_flag,ct.data_total,ct.today_data_total,ct.cache_num,ct.update_time,ct.del,ct.crawl_content_key FROM all_task.cl_task ct JOIN cl_site cs ON ct.cid = cs.cid WHERE ct.task_type <> 3 AND ct.crawl_status = 1 AND ct.cache_num = 3 AND ct.data_total = 0 AND ct.del = 0 AND ct.subject_id in (SELECT id from cl_subject csu WHERE csu.del =0) ORDER BY ct.id DESC;",nativeQuery = true) - @Query(value = "SELECT ct.id,ct.app_id,ct.subject_id,ct.external_id,cs.site_type,ct.task_type,ct.cid,ct.crawl_status,ct.crawl_start_time,ct.crawl_end_time,ct.crawl_data_flag,ct.data_total,ct.today_data_total,ct.cache_num,ct.update_time,ct.del,ct.crawl_content_key FROM all_task.cl_task ct JOIN intelligent_crawl.cl_site cs ON ct.cid = cs.cid WHERE ct.task_type <> 3 AND ct.crawl_status = 1 AND ct.cache_num = 0 AND ct.data_total = 0 AND ct.del = 0 AND ct.subject_id in (SELECT id from all_task.cl_subject csu WHERE csu.del =0) ORDER BY ct.id DESC ;",nativeQuery = true) + @Query(value = "SELECT ct.id,ct.app_id,ct.subject_id,ct.external_id,cs.site_type,ct.task_type,ct.cid,ct.crawl_status,ct.crawl_start_time,ct.crawl_end_time,ct.crawl_data_flag,ct.data_total,ct.today_data_total,ct.cache_num,ct.update_time,ct.del,ct.crawl_content_key FROM all_task.cl_task ct JOIN intelligent_crawl.cl_site cs ON ct.cid = cs.cid WHERE ct.task_type <> 3 AND ct.crawl_status = 1 AND ct.cache_num = 0 AND ct.data_total = 0 AND ct.del = 0 AND ct.subject_id in (SELECT id from all_task.cl_subject csu WHERE csu.del =0) ORDER BY ct.id DESC ;",nativeQuery = true) List findAllNewTask(); // 需要统计的任务的查询条件 1、 状态为 1 OR 0;2、状态为3,且任务完成时间再2天前的。 diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/download/DownLoadFile.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/download/DownLoadFile.java index bb2e2ee..d032227 100644 --- a/cl_query_data_job/src/main/java/com/bfd/mf/job/download/DownLoadFile.java +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/download/DownLoadFile.java @@ -41,6 +41,10 @@ public class DownLoadFile { String path = JSONObject.parseObject(result).getString(ESConstants.PATH); realresult.put(ESConstants.URL , path); realresult.put(ESConstants.SIZE , size+"KB"); + }else{ + String path = getUrl; + realresult.put(ESConstants.URL , path); + realresult.put(ESConstants.SIZE , size+"KB"); } }catch (Exception e){ e.printStackTrace(); @@ -92,15 +96,6 @@ public class DownLoadFile { return resolution; } -// public static void main(String[] args) { -// String url = "http://172.18.1.113:8080/group6/default/20211018/10/49/3/81ed5dfe30fa6adbb3bba672febd8eff.jpg"; -// try { -// getImageResolution(url); -// } catch (IOException e) { -// e.printStackTrace(); -// } -// } - public static String getFileSize(String getUrl){ String realSize = ""; // 获取大小 diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/service/es/EsQueryMiniService.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/service/es/EsQueryMiniService.java index 6846b7f..6ce5df0 100644 --- a/cl_query_data_job/src/main/java/com/bfd/mf/job/service/es/EsQueryMiniService.java +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/service/es/EsQueryMiniService.java @@ -193,17 +193,15 @@ public class EsQueryMiniService { private BoolQueryBuilder getQueryBuilder(String cid, String crawlDataFlag, Long crawlStartTime, Long crawlEndTime) { BoolQueryBuilder qb = QueryBuilders.boolQuery(); // 任务ID 筛选 - TermQueryBuilder cidTermQueryBuilder = QueryBuilders.termQuery(ESConstants.EN_SOURCE+".keyword", cid); + TermQueryBuilder cidTermQueryBuilder = QueryBuilders.termQuery(ESConstants.EN_SOURCE + ".keyword", cid); TermQueryBuilder taskIdTermQueryBuilder = QueryBuilders.termQuery(ESConstants.CRAWLDATAFLAG, crawlDataFlag); qb.must(taskIdTermQueryBuilder).must(cidTermQueryBuilder); // 时间范围筛选 只有主贴评论需要查时间,用户不需要设置时间范围 BoolQueryBuilder shouldbq = QueryBuilders.boolQuery(); RangeQueryBuilder rangeQueryBuilder = QueryBuilders - .rangeQuery(ESConstants.PUBTIME) - .gte(crawlStartTime) - .lt(crawlEndTime); - TermQueryBuilder primary1 = QueryBuilders.termQuery(ESConstants.PRIMARY,2); - shouldbq.must(rangeQueryBuilder).mustNot(primary1); + .rangeQuery(ESConstants.PUBTIME).gte(crawlStartTime).lt(crawlEndTime); + TermQueryBuilder primary2 = QueryBuilders.termQuery(ESConstants.PRIMARY,2); + shouldbq.must(rangeQueryBuilder).mustNot(primary2); // 不用统计FB 的这种粉丝的量 TermQueryBuilder pageTypeQueryBuilder = QueryBuilders.termQuery(ESConstants.PAGETYPR,"socialFans"); qb.mustNot(pageTypeQueryBuilder).should(shouldbq); diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/service/query/QueryService.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/service/query/QueryService.java index 0bcd480..dc744bf 100644 --- a/cl_query_data_job/src/main/java/com/bfd/mf/job/service/query/QueryService.java +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/service/query/QueryService.java @@ -12,6 +12,7 @@ import com.bfd.mf.job.domain.entity.Task; import com.bfd.mf.job.domain.repository.SubjectRepository; import com.bfd.mf.job.domain.repository.TaskRepository; import com.bfd.mf.job.download.DownLoadFile; +import com.bfd.mf.job.service.WriterTXTService; import com.bfd.mf.job.util.*; import com.google.common.collect.Maps; import com.google.common.util.concurrent.RateLimiter; @@ -144,6 +145,8 @@ public class QueryService { List docIdsList = new ArrayList<>(); try { // 创建过滤条件 & 任务预处理 +// fromMills = 1612108800000L; +// toMills = 1613750400000L; fromMills = task.getCrawlStartTime().longValue(); toMills = task.getCrawlEndTime().longValue(); Long year = config.getQueryDataYearStarttime(); // 获取配置文件中用直接拉年份的时间节点,现在设置的是2019年,2019年前的全部用年做索引,不拆成天 @@ -220,12 +223,23 @@ public class QueryService { // 现在判断视频、图片、文件是否下载的方式只取决于isDownload 字段 boolean isDownload = data.getBoolean(ESConstants.ISDOWNLOAD); if (isDownload) { - // String goFastAddr = defultAddr; data = downloadAndChangePath(data); } if (!data.get("_id_").equals("")) { + // 写入到专题ES中 saveService.saveToEsWithFilter(config.esMiniClusterName(), finalIndexName1, data); - kafkaProducer.send(config.getSendTopic(),JSONObject.toJSONString(data)); + // 覆盖到日期ES中 + String pubTimeStr = data.getString("pubTimeStr").split(" ")[0]; + String dateIndex = "cl_index_"+pubTimeStr; + System.out.println(config.esNormalClusterName() + " ; index : " + dateIndex); +// saveService.saveToEsWithFilter(config.esNormalClusterName(), dateIndex, data); +// kafkaProducer.send(config.getSendTopic(),JSONObject.toJSONString(data)); + + String docId = data.getString("docId"); + String id = data.getString("_id_"); + String result = pubTimeStr + "\t" + docId + "\t" + id; + WriteMethod.writeMethod("../data.txt",JSONObject.toJSONString(data)); + WriteMethod.writeMethod("../error_1123.txt",result ); // long crawlTime = data.getLong("crawlTime"); // if(crawlTime < 1633795200000L){ // WriteMethod.writeMethod("../../../error.txt",JSONObject.toJSONString(data)); @@ -249,7 +263,7 @@ public class QueryService { if (docIdsList.size() > 0) { String docType = docIdsList.get(0).split("_")[1]; String docIds[] = docIdsList.toArray(new String[0]); - // queryComments(docIds, docType, finalFromMills, finalToMills,finalTaskId,crawlDataFlag,indexName); + queryComments(docIds, docType, finalFromMills, finalToMills,finalTaskId,crawlDataFlag,indexName); } LOGGER.info("This Task is OK ! taskId = " + taskId); Integer cache_num = task.getCacheNum(); @@ -363,6 +377,7 @@ public class QueryService { System.out.println(docId + "----- filePath : " + filePath); System.out.println("===== srcFileList :" + srcFileList); System.out.println("-=-=- filePathSize : "+filePathSize); + System.out.println("-----------------forwardUrl" + data.get("forwardUrl")); data = getFilePath(data,filePath,srcFileList,filePathSize); } @@ -381,9 +396,9 @@ public class QueryService { System.out.println(docId + "----- videoPath : " + videoPath); System.out.println("===== srcVideoList :" + srcVideoList); System.out.println("-=-=- videoPathSize : "+videoPathSize); + System.out.println("-----------------videoUrl" + data.get("videoUrl")); data = getVideoPath(data,videoPath,srcVideoList,videoPathSize); } - // 图片下载 List imagePath = (List) data.get(ESConstants.IMAGEPATH); List> srcImageList = new ArrayList<>(); @@ -398,6 +413,7 @@ public class QueryService { System.out.println(docId + "----- imagePath : " + imagePath); System.out.println("===== srcImageList :" + srcImageList); System.out.println("-=-=- imagePathSize : "+imagePathSize); + System.out.println("-----------------PictureList" + data.get("pictureList")); data = getImagePath(data,imagePath,srcImageList,imagePathSize); } // System.out.println("***** "+data); @@ -583,7 +599,7 @@ public class QueryService { List> srcList = new ArrayList(); for (String downloadUrl:pathList) { Map srcMap = new HashMap<>(); - srcMap.put(ESConstants.ORIGINALURL,downloadUrl); + String resolution = ""; String videoTime = ""; try { @@ -615,12 +631,70 @@ public class QueryService { pathSizeList.add(pathSizeMap); // 这个是 用来做 gofast 和原链接替换的,key 是原链接,value 是go-fast 链接, String goFastUrl = pathSizeMap.get(ESConstants.URL); + String originalUrl = ""; + if(downloadType == 0){ + System.out.println("有附件拉~~~~~~~~~~~~~~~~~~"+ data.get(ESConstants.FORWARD_URL)); + if(data.get(ESConstants.FORWARD_URL).toString().contains(ESConstants.GOFASTURL)){ + JSONArray forwardUrl = JSONObject.parseArray( data.get(ESConstants.FORWARD_URL).toString()); + for (Object forwardUrlMap: forwardUrl) { + Map forward = (Map) JSONObject.parse(forwardUrlMap.toString()); + String fileOriginalUrl = forward.get(ESConstants.ORIGINALURL); + String fileGofastUrl = forward.get(ESConstants.GOFASTURL); + if(downloadUrl.equals(fileGofastUrl)){ + originalUrl = fileOriginalUrl; + } + } + } + }else if(downloadType == 1){ + if(data.get(ESConstants.PICTURE_LIST).toString().contains(ESConstants.IMG_)){ + JSONObject pictureListMap = JSONObject.parseObject( data.get(ESConstants.PICTURE_LIST).toString()); + for(Map.Entry entry : pictureListMap.entrySet()){ + Map picMap = (Map) entry.getValue(); + String imageOriginalUrl = picMap.get(ESConstants.RAWIMG); + String uploadImg = picMap.get(ESConstants.UPLOADIMG); + if(downloadUrl.equals(uploadImg)){ + originalUrl = imageOriginalUrl; + } + } + } + }else if(downloadType == 2){ + System.out.println("有视频拉~~~~~~~~~~~~~~~~~~"+ data.get(ESConstants.VIDEOURL)); + if(data.get(ESConstants.VIDEOURL).toString().contains(ESConstants.GOFASTURL)) { + JSONArray videoUrl = JSONObject.parseArray(data.get(ESConstants.VIDEOURL).toString()); + for (Object videoMap:videoUrl) { + Map video = (Map) JSONObject.parse(videoMap.toString()); + String videoGofastUrl = video.get(ESConstants.GOFASTURL); + String videoOriginalUrl = video.get(ESConstants.ORIGINALURL); + if (downloadUrl.equals(videoGofastUrl)) { + originalUrl = videoOriginalUrl; + } + } + }else{ + originalUrl = data.getString(ESConstants.VIDEOURL); + } + } + srcMap.put(ESConstants.ORIGINALURL,originalUrl); // 这个原始链接应该放抓取到的链接而不是下载用的链接 srcMap.put(ESConstants.GOFASTURL,goFastUrl); // 这个值使用来替换 三个 Path 的 imagePath,videoPath,filePath path.add(goFastUrl); srcList.add(srcMap); + }else{ + System.out.println("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~关注一下这个情况啊!~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"); + if(downloadType == 1){ + if(data.get(ESConstants.PICTURE_LIST).toString().contains(ESConstants.IMG_)){ + JSONObject pictureListMap = JSONObject.parseObject( data.get(ESConstants.PICTURE_LIST).toString()); + for(Map.Entry entry : pictureListMap.entrySet()){ + srcMap = new HashMap<>(); + Map picMap = (Map) entry.getValue(); + String imageOriginalUrl = picMap.get(ESConstants.RAWIMG); + srcMap.put(ESConstants.GOFASTURL,""); + srcMap.put(ESConstants.ORIGINALURL,imageOriginalUrl); // 这个原始链接应该放抓取到的链接而不是下载用的链接 + srcList.add(srcMap); + } + } + } + path.add(downloadUrl); } - }else{ // 如果 path 中的 url 是 OK的,但是不确定 pathSize 和 srcPath 的时候,需要做下面的处理 // 因为 srcPath 中需要先添加下面两个字段值,因此需要先获取。 String allDownloadUrl = domain + downloadUrl; @@ -664,7 +738,7 @@ public class QueryService { e.printStackTrace(); } } - System.out.println("pathMap === "+JsonUtils.toJSONString(pathMap)); + System.out.println("================================ "+JsonUtils.toJSONString(pathMap)); return pathMap; } diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/service/query/SaveService.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/service/query/SaveService.java index 44c9e20..e33e73d 100644 --- a/cl_query_data_job/src/main/java/com/bfd/mf/job/service/query/SaveService.java +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/service/query/SaveService.java @@ -24,8 +24,6 @@ public class SaveService { data.put("dns",""); } if(!data.containsKey("hasOCR")){ -// data.put("asrText",""); -// data.put("ocrText",new ArrayList<>()); data.put("hasOCR",0); data.put("hasASR",0); data.put("asrLength",0); diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/service/statistics/StatisticsService.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/service/statistics/StatisticsService.java index ae448b2..25da273 100644 --- a/cl_query_data_job/src/main/java/com/bfd/mf/job/service/statistics/StatisticsService.java +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/service/statistics/StatisticsService.java @@ -171,13 +171,11 @@ public class StatisticsService { break; } } - // 按附件统计 Map hasTotalMap = taskRepository.findTotalByHas(subjectId); // totalCountService.updateSubjectCount(subjectId,subjectChannelMap,subjectChannelTodayMap,subjectCrawlDatFlagMap,subjectCrawlDataFlagTodayMap); totalCountService.updateSubjectCount(subjectId,subjectChannelMap,subjectChannelTodayMap,subjectCrawlDatFlagMap,subjectCrawlDataFlagTodayMap,hasTotalMap); - }catch (Exception e){ result = false; LOGGER.error("[StatisticsService] statisticsSubject ERROR... subjectId : " + subjectId + "error : " ); @@ -268,12 +266,10 @@ public class StatisticsService { videoCount = countMap.get(ESConstants.VIDEOCOUNT); fileCount = countMap.get(ESConstants.FILECOUNT); textCount = countMap.get(ESConstants.TEXTCOUNT); - } // taskRepository.updateTaskCount(taskId,totalCount,todayCount); taskRepository.updateTaskCountAll(taskId,totalCount,todayCount,imageCount,videoCount,fileCount,textCount); } - } LOGGER.info("[StatisticsService] statisticsTask finish ..."); }catch (Exception e){ diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/util/WriteMethod.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/util/WriteMethod.java new file mode 100644 index 0000000..53e04d0 --- /dev/null +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/util/WriteMethod.java @@ -0,0 +1,21 @@ +package com.bfd.mf.job.util; + +import java.io.FileWriter; +import java.io.IOException; + +/** + * Created by BFD-229 on 2017/7/6. + */ +public class WriteMethod { + public static void writeMethod(String fileName, String json){ + try{ + FileWriter writer=new FileWriter(fileName,true); + writer.write(json+"\n"); + writer.close(); + } catch (IOException e) + { + e.printStackTrace(); + } + } + +} diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/service/common/CrudService.java b/cl_search_api/src/main/java/com/bfd/mf/common/service/common/CrudService.java index 11c3370..76b95f5 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/common/service/common/CrudService.java +++ b/cl_search_api/src/main/java/com/bfd/mf/common/service/common/CrudService.java @@ -1,6 +1,8 @@ package com.bfd.mf.common.service.common; +import com.alibaba.fastjson.JSONObject; import com.bfd.mf.common.web.entity.mysql.Model; +import com.bfd.mf.common.web.vo.params.QueryRequest; import org.springframework.data.repository.CrudRepository; import java.io.Serializable; diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/service/es/EsQueryAuthorCountService.java b/cl_search_api/src/main/java/com/bfd/mf/common/service/es/EsQueryAuthorCountService.java index 3848987..c328c99 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/common/service/es/EsQueryAuthorCountService.java +++ b/cl_search_api/src/main/java/com/bfd/mf/common/service/es/EsQueryAuthorCountService.java @@ -1,6 +1,6 @@ package com.bfd.mf.common.service.es; -import com.alibaba.fastjson.JSONObject; + import com.bfd.mf.common.service.cache.TopicQueryService; import com.bfd.mf.common.util.constants.ESConstant; import com.bfd.mf.common.util.es.EsUtils; @@ -8,12 +8,9 @@ import com.bfd.mf.common.web.vo.params.QueryRequest; import com.bfd.mf.config.BFDApiConfig; import com.bfd.mf.service.SearchAuthorService; import com.bfd.nlp.common.util.string.TStringUtils; -import lombok.extern.slf4j.Slf4j; import org.apache.commons.collections.map.HashedMap; import org.elasticsearch.index.query.BoolQueryBuilder; -import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryBuilders; -import org.elasticsearch.index.query.QueryStringQueryBuilder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; @@ -57,7 +54,7 @@ public class EsQueryAuthorCountService { } /** - * 查询语句组装 + * 查询语句组装 (这个方法只有查询用户的时候才调用了!) */ private BoolQueryBuilder getQueryBuilder(QueryRequest queryRequest) { logger.info("[EsQueryAuthorService] getQueryBuilder start ..." ); @@ -71,15 +68,7 @@ public class EsQueryAuthorCountService { BoolQueryBuilder searchTextBuilder = topicQueryService.buildSearchTextBuilder(searchType); boolQueryBuilder.filter(searchTextBuilder); - Map fields = new HashedMap(); -// if(searchScope.equals("2")){ -// fields.put(ESConstant.AUTHOR,1.0F); -// } if (TStringUtils.isNotEmpty(keyword)) { - if(searchType.equals(0)){} // 主贴,匹配 标题+内容 - - if(searchType.equals(1)){} // 回帖,匹配 内容 - if(searchType.equals(2)){ // 作者, 匹配作者 boolQueryBuilder.must(QueryBuilders.queryStringQuery("*" + keyword + "*").field(ESConstant.AUTHOR)); bqb.must(boolQueryBuilder); diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/service/es/EsQueryServiceForSQMini.java b/cl_search_api/src/main/java/com/bfd/mf/common/service/es/EsQueryServiceForSQMini.java index 05024ba..1ae48ea 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/common/service/es/EsQueryServiceForSQMini.java +++ b/cl_search_api/src/main/java/com/bfd/mf/common/service/es/EsQueryServiceForSQMini.java @@ -5,8 +5,6 @@ import com.bfd.mf.common.util.es.EsUtils; import com.bfd.mf.common.web.vo.params.QueryRequest; import com.bfd.mf.config.BFDApiConfig; import org.elasticsearch.index.query.BoolQueryBuilder; -import org.elasticsearch.search.aggregations.AggregationBuilder; -import org.elasticsearch.search.aggregations.bucket.terms.Terms; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/service/text/TextService.java b/cl_search_api/src/main/java/com/bfd/mf/common/service/text/TextService.java index a718a0b..2f90577 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/common/service/text/TextService.java +++ b/cl_search_api/src/main/java/com/bfd/mf/common/service/text/TextService.java @@ -176,40 +176,40 @@ public class TextService { /** * 获取最新情感值 */ - public void resetDataEmotion(List monitorList) { - if (monitorList != null && monitorList.size() > 0) { - List objectList = new ArrayList<>(); - JSONObject jsonObject; - for (ESMonitorEntity esMonitorEntity : monitorList) { - jsonObject = new JSONObject(); - jsonObject.put(ESConstant.TITLE, esMonitorEntity.getTitle()); - jsonObject.put(ESConstant.SUMMARY, esMonitorEntity.getContentAbstract()); - jsonObject.put(ESConstant.SENTIMENT, resetSentiment(esMonitorEntity.getSysSentimentTag())); - objectList.add(jsonObject); - } - try { - objectList = emotionPost(objectList); - } catch (Exception e) { - e.printStackTrace(); - } - resetDataEmotion(monitorList, objectList); - } - } +// public void resetDataEmotion(List monitorList) { +// if (monitorList != null && monitorList.size() > 0) { +// List objectList = new ArrayList<>(); +// JSONObject jsonObject; +// for (ESMonitorEntity esMonitorEntity : monitorList) { +// jsonObject = new JSONObject(); +// jsonObject.put(ESConstant.TITLE, esMonitorEntity.getTitle()); +// jsonObject.put(ESConstant.SUMMARY, esMonitorEntity.getContentAbstract()); +// jsonObject.put(ESConstant.SENTIMENT, resetSentiment(esMonitorEntity.getSysSentimentTag())); +// objectList.add(jsonObject); +// } +// try { +// objectList = emotionPost(objectList); +// } catch (Exception e) { +// e.printStackTrace(); +// } +// resetDataEmotion(monitorList, objectList); +// } +// } /** * 重置情感值 */ - public void resetDataEmotion(List monitorList, List jsonObjectList) { - JSONObject jsonObject; - ESMonitorEntity monitorEntity; - for (int i = 0; i < monitorList.size(); i++) { - jsonObject = jsonObjectList.get(i); - monitorEntity = monitorList.get(i); - logger.error("docId sentiment source :{}", monitorEntity.getSysSentimentTag()); - monitorEntity.setSysSentimentTag(resetSentiment(jsonObject.getInteger(ESConstant.SENTIMENT))); - logger.error("docId sentiment target {}", monitorEntity.getSysSentimentTag()); - } - } +// public void resetDataEmotion(List monitorList, List jsonObjectList) { +// JSONObject jsonObject; +// ESMonitorEntity monitorEntity; +// for (int i = 0; i < monitorList.size(); i++) { +// jsonObject = jsonObjectList.get(i); +// monitorEntity = monitorList.get(i); +// logger.error("docId sentiment source :{}", monitorEntity.getSysSentimentTag()); +// monitorEntity.setSysSentimentTag(resetSentiment(jsonObject.getInteger(ESConstant.SENTIMENT))); +// logger.error("docId sentiment target {}", monitorEntity.getSysSentimentTag()); +// } +// } /** * 情感值与数字之间的转化 diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/util/enums/RTCodeEnum.java b/cl_search_api/src/main/java/com/bfd/mf/common/util/enums/RTCodeEnum.java index beaa5f7..7918436 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/common/util/enums/RTCodeEnum.java +++ b/cl_search_api/src/main/java/com/bfd/mf/common/util/enums/RTCodeEnum.java @@ -7,11 +7,7 @@ public enum RTCodeEnum { C_OK(0, "OK"), // C_DONE(1, "Done"), // C_FAIL(-1, "Failed"), - /** - * ES Index - */ C_INDEX_EXISTS(-3, "Index Exists"), - // 自定义状态码 C_TOPIC_DISABLED(-2, "话题渠道为空,请运营人员确注意操作!!!"), C_UPLOAD_OK(200,"上传成功"), C_UPLOAD_PARSE_FAIL(201,"Excel解析失败,请检查Excel"), @@ -20,70 +16,38 @@ public enum RTCodeEnum { C_UPLOAD_FAIL(206,"上传失败") , C_UPLOAD_ERROR(204,"请上传 Excel 或 txt 文件"), C_UPLOAD_EMPTY(205,"上传的文件为空,请核查文件。"), - // Param Issue: 3** C_PARAM_ERROR(300, "Input Param Error"), - C_TOKEN_ERROR(301, "Token Validation Error"), - C_CAPTCHA_ERROR(302, "验证码错误,请重试"), - C_APPLY_DUPLICATION(303, "申请试用信息已存在, 请不要重复提交"), - C_UNBIND(304, "微信账号未绑定"), - C_STATE_EXIST(305, "请勿重复请求"), - // Data Issue: 4** C_NO_DATA_FOUND(400, "No Data Found"), - C_DATA_ERROR_PAGETIME_EXPIRE(401, "页面超时不可用,请刷新重试"), - - // System Service Issue: 5** C_SERVICE_NOT_AVAILABLE(500, "系统服务不可用,请联系管理员"), - C_BIZ_IMPACT_DAYS_NOT_SUPPRTTED(601, "Impact days not supported, we only support for 1,5,10,20 for free"), - C_SUBJECT_GRAMMAR_NOT_SUPPRTTED(701, "语法错误,请修改"), C_SUBJECT_GRAMMAR_ERROR(702, "参数错误,请求改"), - C_SUBJECT_GRAMMAR_EXPERTKEY_WORD(913, "关键词不允许超过50个"), - - // Eric added @2016年11月02日15:27:10 C_SUBJECT_DEFINITION_NOT_ACCURATE(702, "请不要设置过于宽泛的关键词组合, 这样会采集过多不相关的文章"), - C_SUBJECT_ZERO_GRAMMAR_NOT_SUPPRTTED(703, "该语法查询不到内容,请精确查询意图"), - C_SUBJECT_MORE_GRAMMAR_NOT_SUPPRTTED(704, "语法配置过于宽泛,请精确查询意图"), - - C_PERMISSION_ERROR_USER_RANOUT(801, "用户数量达到最大限制, 请联系管理员升级权限"), - C_PERMISSION_ERROR_TOPIC_RANOUT(802, "话题数量超过限制, 请联系客服人员处理"), - C_PERMISSION_ERROR_LOGIN_FAIL(803, "用户名或密码不正确,请重新登录"), - C_PERMISSION_ERROR_COMPANY_EXPIRE(804, "客户账号已过期或不可用,请联系管理员"), - C_PERMISSION_ERROR_SENDTIME_EXPIRE(805, "发送时间超过限制不可用,请修改发送时间"), - C_PERMISSION_ERROR_SCREEN_RANOUT(806, "屏蔽数量达到最大限制, 请联系管理员升级权限"), - C_PERMISSION_ERROR_CODE_EXPIRE(807, "code已过期或不可用"), - C_PERMISSION_ERROR_SHOPURL_RANOUT(808, "用户url数量达到最大限制, 请联系管理员升级权限"), - C_PERMISSION_ERROR_COMMENT_CLOSED(809, "客户账号口碑功能未开启,请联系管理员"), C_COMPANY_DEADLINE(909, "客户过期!"), - - C_UPLOAD_IMG_FAIL(901, "上传失败"), - C_VALID_PASSWORD_ERROR(904, "原始密码错误"), C_USER_PERMSSION_ERROR(908, "当前账号无任何权限,请联系管理员"), C_USER_STATUS_ERROR(910, "用户被禁用,请联系管理员"), - C_KEYWORD_LIMIT(902, "关键词不能大于客户配置词数!"), C_EXCLUDE_LIMIT(903, "排除词不能大于客户配置词数!"), - C_LACK_SPREADNUM(904, "剩余转发数不足,请与工作人员联系!"), C_WEIBO_CRAWL_ERR(905, "加载失败,请检查该微博是否被删除!"), C_WEIBO_URL_ERR(906, "微博链接不符合规范!"), @@ -92,8 +56,6 @@ public enum RTCodeEnum { C_ACCOUNT_NICK_NAME(911, "该账号已存在,不能重复添加"), C_ACCOUNT_NO_NICK_NAME(912, "该用户昵称不存在"); - - private int code; private String desc; diff --git a/cl_search_api/src/main/java/com/bfd/mf/controller/SearchDataController.java b/cl_search_api/src/main/java/com/bfd/mf/controller/SearchDataController.java index b098c9f..e24de10 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/controller/SearchDataController.java +++ b/cl_search_api/src/main/java/com/bfd/mf/controller/SearchDataController.java @@ -191,4 +191,33 @@ public class SearchDataController { } + + @ApiOperation(value = "复制专题") + @RequestMapping(value = "/reindex/reindexSubject", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8) + @ResponseBody + public JSONObject reIndex(@RequestBody QueryRequest queryRequest){ + logger.info("[reIndex] partial / Params: {}", JSONObject.toJSONString(queryRequest)); + try { + JSONObject result = searchDataService.reIndexSubject(queryRequest); + return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result); + } catch (Exception e) { + logger.error("[reIndex] Failed,The error message is :{}", e); + return ResponseWrapper.buildResponse(RTCodeEnum.C_SERVICE_NOT_AVAILABLE, "Query failed"); + } + } + + @ApiOperation(value = "删除专题") + @RequestMapping(value = "/delete/deleteBySubjectId", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8) + @ResponseBody + public JSONObject deleteSubject(@RequestBody QueryRequest queryRequest){ + logger.info("[deleteSubject] partial / Params: {}", JSONObject.toJSONString(queryRequest)); + try { + JSONObject result = searchDataService.deleteBySubjectId(queryRequest); + return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result); + } catch (Exception e) { + logger.error("[deleteSubject] Failed,The error message is :{}", e); + return ResponseWrapper.buildResponse(RTCodeEnum.C_SERVICE_NOT_AVAILABLE, "Query failed"); + } + } + } diff --git a/cl_search_api/src/main/java/com/bfd/mf/service/SearchAuthorService.java b/cl_search_api/src/main/java/com/bfd/mf/service/SearchAuthorService.java index 396c881..a16d02f 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/service/SearchAuthorService.java +++ b/cl_search_api/src/main/java/com/bfd/mf/service/SearchAuthorService.java @@ -1,6 +1,5 @@ package com.bfd.mf.service; -import com.alibaba.druid.support.json.JSONUtils; import com.alibaba.fastjson.JSONObject; import com.bfd.mf.common.service.common.CrudService; import com.bfd.mf.common.service.es.ClusterService; @@ -20,7 +19,6 @@ import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; import java.io.Serializable; -import java.text.SimpleDateFormat; import java.util.*; @Service diff --git a/cl_search_api/src/main/java/com/bfd/mf/service/SearchDataService.java b/cl_search_api/src/main/java/com/bfd/mf/service/SearchDataService.java index 5953225..0f1b943 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/service/SearchDataService.java +++ b/cl_search_api/src/main/java/com/bfd/mf/service/SearchDataService.java @@ -56,7 +56,6 @@ public class SearchDataService extends CrudService dataIdList, String orderFlag,String sortFlag, List currentIndexList, Cluster cluster) { @@ -79,7 +78,6 @@ public class SearchDataService extends CrudService> site = siteRepository.findSiteByEnSource(enSource); Map> siteMap = new HashMap<>(); @@ -816,7 +812,6 @@ public class SearchDataService extends CrudService currentIndexList) { JSONObject jsonObject = new JSONObject(); /**依据文档Id查询对应的文档*/ @@ -861,8 +856,6 @@ public class SearchDataService extends CrudService