diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/domain/repository/SubjectCountRepository.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/domain/repository/SubjectCountRepository.java index 9194515..244faeb 100644 --- a/cl_query_data_job/src/main/java/com/bfd/mf/job/domain/repository/SubjectCountRepository.java +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/domain/repository/SubjectCountRepository.java @@ -45,7 +45,8 @@ public interface SubjectCountRepository extends CrudRepository { - @Query(value = "SELECT ct.id,ct.app_id,ct.subject_id,ct.external_id,cs.site_type,ct.task_type,ct.cid,ct.crawl_status,ct.crawl_start_time,ct.crawl_end_time,ct.crawl_data_flag,ct.data_total,ct.today_data_total,ct.cache_num,ct.update_time,ct.del,ct.crawl_content_key FROM cl_task ct JOIN intelligent_crawl.cl_site cs ON ct.cid = cs.cid WHERE ct.task_type <> 3 AND ct.crawl_status = 1 AND ct.cache_num = 0 AND ct.data_total = 0 AND ct.del = 0 AND ct.subject_id in (SELECT id from cl_subject csu WHERE csu.del =0) ORDER BY ct.id DESC;",nativeQuery = true) + @Query(value = "SELECT ct.id,ct.app_id,ct.subject_id,ct.external_id,cs.site_type,ct.task_type,ct.cid,ct.crawl_status,ct.crawl_start_time,ct.crawl_end_time,ct.crawl_data_flag,ct.data_total,ct.today_data_total,ct.cache_num,ct.update_time,ct.del,ct.crawl_content_key FROM cl_task ct JOIN intelligent_crawl.cl_site cs ON ct.cid = cs.cid WHERE ct.task_type <> 3 AND ct.crawl_status = 1 AND ct.cache_num = 3 AND app_id = '61qb' AND subject_id = 12094 AND ct.data_total = 0 AND ct.del = 0 AND ct.subject_id in (SELECT id from cl_subject csu WHERE csu.del =0) ORDER BY ct.id DESC;",nativeQuery = true) + // @Query(value = "SELECT ct.id,ct.app_id,ct.subject_id,ct.external_id,cs.site_type,ct.task_type,ct.cid,ct.crawl_status,ct.crawl_start_time,ct.crawl_end_time,ct.crawl_data_flag,ct.data_total,ct.today_data_total,ct.cache_num,ct.update_time,ct.del,ct.crawl_content_key FROM cl_task ct JOIN intelligent_crawl.cl_site cs ON ct.cid = cs.cid WHERE ct.subject_id = 12094 AND ct.task_type <> 3 AND ct.crawl_status = 1 AND ct.cache_num = 0 AND ct.data_total = 0 AND ct.del = 0 AND ct.subject_id in (SELECT id from cl_subject csu WHERE csu.del =0) ORDER BY ct.id DESC ;",nativeQuery = true) List findAllNewTask(); // 需要统计的任务的查询条件 1、 状态为 1 OR 0;2、状态为3,且任务完成时间再2天前的。 @Query(value = "SELECT ct.id,ct.app_id,ct.subject_id,ct.external_id,cs.site_type, ct.task_type,ct.cid,ct.crawl_status,ct.crawl_start_time,ct.crawl_end_time,ct.crawl_data_flag,ct.data_total,ct.today_data_total,ct.cache_num,ct.update_time,ct.del,ct.crawl_content_key FROM `cl_task` ct JOIN intelligent_crawl.cl_site cs ON ct.cid = cs.cid WHERE ct.del = 0 AND ((ct.crawl_status = 1 OR ct.crawl_status = 0) OR (ct.crawl_status = 3 AND ct.end_time > date_sub(curdate(),interval 2 day))); ",nativeQuery = true) // @Query(value = "SELECT ct.id,ct.app_id,ct.subject_id,ct.external_id,cs.site_type, ct.task_type,ct.cid,ct.crawl_status,ct.crawl_start_time,ct.crawl_end_time,ct.crawl_data_flag,ct.data_total,ct.today_data_total,ct.cache_num,ct.update_time,ct.del,ct.crawl_content_key FROM `cl_task` ct JOIN intelligent_crawl.cl_site cs ON ct.cid = cs.cid WHERE ct.del = 0 AND ct.subject_id = 12505 ; ",nativeQuery = true) // @Query(value = "SELECT ct.id,ct.app_id,ct.subject_id,ct.external_id,cs.site_type, ct.task_type,ct.cid,ct.crawl_status,ct.crawl_start_time,ct.crawl_end_time,ct.crawl_data_flag,ct.data_total,ct.today_data_total,ct.cache_num,ct.update_time,ct.del,ct.crawl_content_key FROM `cl_task` ct JOIN intelligent_crawl.cl_site cs ON ct.cid = cs.cid WHERE ct.del = 0 ; ",nativeQuery = true) + //@Query(value = "SELECT ct.id,ct.app_id,ct.subject_id,ct.external_id,cs.site_type, ct.task_type,ct.cid,ct.crawl_status,ct.crawl_start_time,ct.crawl_end_time,ct.crawl_data_flag,ct.data_total,ct.today_data_total,ct.cache_num,ct.update_time,ct.del,ct.crawl_content_key FROM `cl_task` ct JOIN intelligent_crawl.cl_site cs ON ct.cid = cs.cid WHERE ct.del = 0 ; ",nativeQuery = true) List findAllBydel0(); @Query(value = "SELECT sum(data_total) FROM cl_task ct JOIN intelligent_crawl.cl_site cs ON ct.cid=cs.cid WHERE ct.del =0 AND ct.subject_id = ?1 AND cs.site_type = ?2",nativeQuery = true) @@ -37,12 +39,15 @@ public interface TaskRepository extends CrudRepository { @Query(value = " SELECT SUM(data_total) FROM cl_task WHERE end_time >?1 AND end_time 3 ",nativeQuery = true) Long findTodayDataTotal(String taskStartTime ,String taskEndTime); + @Query(value = "SELECT sum(has_image_total) image,sum(has_video_total) video,sum(has_file_total) file,SUM(has_text_total) text FROM `cl_task` WHERE subject_id = ?1 ;",nativeQuery = true) + Map findTotalByHas(BigInteger subjectId); + /** * 更新每个任务 拉数据次数 */ @Modifying @Transactional(rollbackFor = Exception.class) - @Query(value = "update cl_task set cache_num=?1 where id=?2", nativeQuery = true) + @Query(value = "update cl_task set cache_num=?1 ,update_time = now() where id=?2", nativeQuery = true) Integer updateStatus(int cache_num,long id); /** @@ -63,6 +68,11 @@ public interface TaskRepository extends CrudRepository { @Modifying @Transactional(rollbackFor = Exception.class) + @Query(value = "update cl_task set data_total =?2 , today_data_total =?3 ,has_image_total = ?4,has_video_total = ?5, has_file_total = ?6,has_text_total = ?7 where id =?1 ", nativeQuery = true) + void updateTaskCountAll(Long id, Long totalCount, Long todayCount,Long imageCount,Long videoCount,Long fileCount,Long textCount); + + @Modifying + @Transactional(rollbackFor = Exception.class) @Query(value = "update cl_task set crawl_status =?4 where id =?1 ", nativeQuery = true) void updateCrawlStatus(long taskId); diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/download/DownLoadFile.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/download/DownLoadFile.java index 66502a6..510ebcc 100644 --- a/cl_query_data_job/src/main/java/com/bfd/mf/job/download/DownLoadFile.java +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/download/DownLoadFile.java @@ -76,7 +76,6 @@ public class DownLoadFile { } public static String imagesize(String getUrl) throws IOException{ - String realUrl = "";Integer size; String realresult=""; try{ InputStream murl = new URL(getUrl).openStream(); @@ -84,7 +83,6 @@ public class DownLoadFile { int srcWidth = sourceImg .getWidth(); // 源图宽度 int srcHeight = sourceImg .getHeight(); // 源图高度 realresult=Integer.toString(srcWidth)+"×"+ Integer.toString(srcHeight); - }catch (Exception e){ e.printStackTrace(); } diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/service/alarm/AlarmService.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/service/alarm/AlarmService.java index aa847cd..3aa67f0 100644 --- a/cl_query_data_job/src/main/java/com/bfd/mf/job/service/alarm/AlarmService.java +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/service/alarm/AlarmService.java @@ -4,7 +4,6 @@ import com.alibaba.fastjson.JSONObject; import com.bfd.mf.job.config.AppConfig; import com.bfd.mf.job.config.ESConstants; import com.bfd.mf.job.domain.entity.EmailGroup; -import com.bfd.mf.job.domain.entity.TaskCount; import com.bfd.mf.job.domain.repository.EmailGroupRepository; import com.bfd.mf.job.domain.repository.ServiceLoadRepository; import com.bfd.mf.job.domain.repository.TaskCountRepository; @@ -71,7 +70,7 @@ public class AlarmService { // System.out.println(index); //logstash-2021.05.20 logstash-2021.05.21 String startTime = DateUtil.getDateTime(System.currentTimeMillis()); - String endTime = DateUtil.getDateTime(System.currentTimeMillis() - 60 * 30 * 1000); + String endTime = DateUtil.getDateTime(System.currentTimeMillis() - 480 * 60 * 1000); String type = "datasave"; QueryBuilder queryBuilder = getQueryBuilder(startTime,endTime,type); @@ -106,7 +105,7 @@ public class AlarmService { } // System.out.println(cid); if(null == cid){ - System.out.println(data); + // System.out.println(data); } if(errorCid.containsKey(cid)){ Integer errorNum = errorCid.get(cid); @@ -114,14 +113,13 @@ public class AlarmService { }else{ errorCid.put(cid,1); } - } } catch (Exception e) { e.printStackTrace(); } }); - System.out.println(JSONObject.toJSONString(errorCid)); + // System.out.println(JSONObject.toJSONString(errorCid)); // 遍历统计的map ,将 value> 10 的报警 for(Map.Entry entry : errorCid.entrySet()){ @@ -170,7 +168,7 @@ public class AlarmService { */ Integer alarm_tag = 3; Integer alarm_reason = 1; - String alarm_message = "[chenrui.li]这个站点解析失败次数为:"+errorNum; + String alarm_message = "这个站点解析失败次数为:"+errorNum; String alarm_task_url = ""; // 无法确认是哪个任务 String alarm_task_content = ""; // 无法确认是哪个任务 String alarm_cid = cid; @@ -182,7 +180,12 @@ public class AlarmService { List emailList = new ArrayList<>(); emailList.add(alarm_handler); // 根据站点查询站点的处理人 - String email_addr = getEmailByCid(cid); + System.out.println("**************** " + cid); + if(null != cid) { + String email_addr = getEmailByCid(cid); + }else{ + System.out.println("没查到邮箱,不报警"); + } String opinion = ""; Integer status = 2; String create_time = DateUtil.getDateTime(new Date().getTime()); @@ -203,6 +206,11 @@ public class AlarmService { EMailUtils.getInstance().sendEmail(6, siteMessage, emailList,"30"); } + /** + * 根据数据库中的站点配置,用站点的cid 获取这个站点负责人的邮箱地址 + * @param cid + * @return + */ private String getEmailByCid(String cid) { List emails = emailGroupRepository.getEmailGroupsByCid(cid); String alarmEmail = ""; diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/service/es/EsQueryMiniService.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/service/es/EsQueryMiniService.java index 7eed643..c1ffe91 100644 --- a/cl_query_data_job/src/main/java/com/bfd/mf/job/service/es/EsQueryMiniService.java +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/service/es/EsQueryMiniService.java @@ -161,21 +161,28 @@ public class EsQueryMiniService { TermQueryBuilder pgcTermQueryBuilder = QueryBuilders.termQuery(ESConstants.PGC,1); TermQueryBuilder egcTermQueryBuilder = QueryBuilders.termQuery(ESConstants.EGC,1); TermQueryBuilder ugcTermQueryBuilder = QueryBuilders.termQuery(ESConstants.UGC,1); + TermQueryBuilder textTermQueryBuilder = QueryBuilders.termQuery(ESConstants.ISDOWNLOAD,false); qb = getQueryBuilder(cid,crawlDataFlag,crawlStartTime,crawlEndTime); qb.must(pgcTermQueryBuilder); logger.info("QB3 查询有图片的任务数: indexName: {}. taskId : {}.{\"query\": {}}.", indexName, taskId, qb.toString().replace("\n", "").replace("\r", "").replace(" ", "")); Long imageCount = EsUtils.queryCount(clusterName, indexName, qb); - countMap.put("todayCount", todayCount); + countMap.put(ESConstants.IMAGECOUNT, imageCount); qb = getQueryBuilder(cid,crawlDataFlag,crawlStartTime,crawlEndTime); qb.must(egcTermQueryBuilder); logger.info("QB4 查询有视频的任务数: indexName: {}. taskId : {}.{\"query\": {}}.", indexName, taskId, qb.toString().replace("\n", "").replace("\r", "").replace(" ", "")); Long videoCount = EsUtils.queryCount(clusterName, indexName, qb); - countMap.put("todayCount", todayCount); + countMap.put(ESConstants.VIDEOCOUNT, videoCount); qb = getQueryBuilder(cid,crawlDataFlag,crawlStartTime,crawlEndTime); qb.must(ugcTermQueryBuilder); logger.info("QB5 查询有文件的任务数: indexName: {}. taskId : {}.{\"query\": {}}.", indexName, taskId, qb.toString().replace("\n", "").replace("\r", "").replace(" ", "")); Long fileCount = EsUtils.queryCount(clusterName, indexName, qb); - countMap.put("todayCount", todayCount); + countMap.put(ESConstants.FILECOUNT, fileCount); + qb = getQueryBuilder(cid,crawlDataFlag,crawlStartTime,crawlEndTime); + qb.must(textTermQueryBuilder); + logger.info("QB6 查询纯文本的任务数: indexName: {}. taskId : {}.{\"query\": {}}.", indexName, taskId, qb.toString().replace("\n", "").replace("\r", "").replace(" ", "")); + Long textCount = EsUtils.queryCount(clusterName, indexName, qb); + countMap.put(ESConstants.TEXTCOUNT, textCount); + logger.info("含图片的数据量:" + imageCount + " ; 含视频的数据量:" + videoCount + " ; 含文件的数据量:" + fileCount + " ; 纯文本的数据量:" + textCount); } } } diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/service/query/QueryService.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/service/query/QueryService.java index a3a96ab..e297816 100644 --- a/cl_query_data_job/src/main/java/com/bfd/mf/job/service/query/QueryService.java +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/service/query/QueryService.java @@ -89,7 +89,7 @@ public class QueryService { List taskList2 = taskRepository.findAllNewTask(); //taskList2.addAll(taskList1); for (Task task : taskList2) { - // LOGGER.info("Executing task:{}.", JSON.toJSONString(task)); + LOGGER.info("Executing task:{}.", JSON.toJSONString(task)); Long totalSegment = 1L;//(task.getDateEnd() - task.getDateStart()) / PERIOD_MILLS; // 3600000 Long segment = 1L; Double progressFactor = 1.0 / totalSegment; @@ -142,7 +142,8 @@ public class QueryService { String crawlContentKey = task.getCrawlContentKey(); // 要拉取的字段,主要看是否需要拉评论 // BigInteger subjectId = task.getSubjectId(); // Subject subject = subjectRepository.getSubjectBySubjectId(subjectId.longValue()); - String indexName = "cl_major_" + task.getSubjectId(); // 索引名称 + // String indexName = "cl_major_" + task.getSubjectId(); // 索引名称 + String indexName = "cl_major_61qb_12094"; Integer cacheNum = task.getCacheNum(); // 拉取数据的次数 // 当拉数据的次数 大于1 次的时候,再拉数据的开始时间就不用是任务设置的开始时间了,同时可以再加个采集时间范围限制一下,确保拉的数据都是任务添加之后才采集的就行 QueryBuilder queryBuilder; // 根据条件组装查询用具 @@ -438,7 +439,7 @@ public class QueryService { } // 当三个 pathSize 都为 0 的时候,表示三个下载结果都为空,为了保持页面和实际结果的统一,这块改成 false if(filePathSize.size() == 0 && videoPathSize.size() == 0 && imagePathSize.size() == 0){ - data.put(ESConstants.ISDOWNLOAD,false); + data.put(ESConstants.ISDOWNLOAD,"false"); } return data; } diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/service/statistics/StatisticsService.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/service/statistics/StatisticsService.java index fcc9cb6..5484eb4 100644 --- a/cl_query_data_job/src/main/java/com/bfd/mf/job/service/statistics/StatisticsService.java +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/service/statistics/StatisticsService.java @@ -15,6 +15,7 @@ import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; import javax.annotation.PostConstruct; +import java.math.BigDecimal; import java.math.BigInteger; import java.util.HashMap; import java.util.List; @@ -135,7 +136,7 @@ public class StatisticsService { break; } } - // 按采集方式统计数据量 + // 按采集方式统计数据量 account url keyword Map subjectCrawlDatFlagMap = new HashMap<>(); Map subjectCrawlDataFlagTodayMap = new HashMap<>(); long siteCount = 0L; @@ -170,7 +171,13 @@ public class StatisticsService { break; } } - totalCountService.updateSubjectCount(subjectId,subjectChannelMap,subjectChannelTodayMap,subjectCrawlDatFlagMap,subjectCrawlDataFlagTodayMap); + + // 按附件统计 + Map hasTotalMap = taskRepository.findTotalByHas(subjectId); + + // totalCountService.updateSubjectCount(subjectId,subjectChannelMap,subjectChannelTodayMap,subjectCrawlDatFlagMap,subjectCrawlDataFlagTodayMap); + totalCountService.updateSubjectCount(subjectId,subjectChannelMap,subjectChannelTodayMap,subjectCrawlDatFlagMap,subjectCrawlDataFlagTodayMap,hasTotalMap); + }catch (Exception e){ result = false; LOGGER.error("[StatisticsService] statisticsSubject ERROR... subjectId : " + subjectId + "error : " ); @@ -193,7 +200,8 @@ public class StatisticsService { // 统计这个专题下每种采集类型的增量 Map subjectCrawlDataFlagTodayMap = esQueryMiniService.getSubjectCrawlDataFlagTodayStatistics(miniName,indexName); // 查入或修改表 - totalCountService.updateSubjectCount(subjectId,subjectChannelMap,subjectChannelTodayMap,subjectCrawlDatFlagMap,subjectCrawlDataFlagTodayMap); + Map hasTotalMap = new HashMap<>(); + totalCountService.updateSubjectCount(subjectId,subjectChannelMap,subjectChannelTodayMap,subjectCrawlDatFlagMap,subjectCrawlDataFlagTodayMap,hasTotalMap); totalCountService.updateResultDetil(subjectId,subjectChannelMap); }catch (Exception e){ result = false; @@ -214,7 +222,8 @@ public class StatisticsService { Map crawlDataFlagMap = esQueryNormalService.getCrawlDataFlagStatistics(normalName); Map crawlDataFlagTodayMap = esQueryNormalService.getCrawlDataFlagTodayStatistics(normalName); BigInteger subjectId = new BigInteger("0"); - totalCountService.updateSubjectCount(subjectId,channelMap,channelTodayMap,crawlDataFlagMap,crawlDataFlagTodayMap); + Map hasTotalMap = new HashMap<>(); + totalCountService.updateSubjectCount(subjectId,channelMap,channelTodayMap,crawlDataFlagMap,crawlDataFlagTodayMap,hasTotalMap); }catch (Exception e){ result = false; LOGGER.error("[StatisticsService] statisticsTotal ERROR... "); @@ -254,13 +263,15 @@ public class StatisticsService { if(countMap.containsKey(ESConstants.TOTALCOUNT) && countMap.containsKey(ESConstants.TODAYCOUNT)) { totalCount = countMap.get(ESConstants.TOTALCOUNT); todayCount = countMap.get(ESConstants.TODAYCOUNT); - System.out.println("******* " + totalCount); -// imageCount = countMap.get(ESConstants.IMAGECOUNT); -// videoCount = countMap.get(ESConstants.VIDEOCOUNT); -// fileCount = countMap.get(ESConstants.FILECOUNT); -// textCount = countMap.get(ESConstants.TEXTCOUNT); + System.out.println("totalCount : " + totalCount); + imageCount = countMap.get(ESConstants.IMAGECOUNT); + videoCount = countMap.get(ESConstants.VIDEOCOUNT); + fileCount = countMap.get(ESConstants.FILECOUNT); + textCount = countMap.get(ESConstants.TEXTCOUNT); + } taskRepository.updateTaskCount(taskId,totalCount,todayCount); + taskRepository.updateTaskCountAll(taskId,totalCount,todayCount,imageCount,videoCount,fileCount,textCount); } } diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/service/statistics/TotalCountService.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/service/statistics/TotalCountService.java index 044884b..5529119 100644 --- a/cl_query_data_job/src/main/java/com/bfd/mf/job/service/statistics/TotalCountService.java +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/service/statistics/TotalCountService.java @@ -12,6 +12,7 @@ import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; +import java.math.BigDecimal; import java.math.BigInteger; import java.text.SimpleDateFormat; import java.util.*; @@ -31,7 +32,8 @@ public class TotalCountService { Map subjectChannelMap, Map subjectChannelTodayMap, Map subjectCrawlDataFlagMap, - Map subjectCrawlDataFlagTodayMap) { + Map subjectCrawlDataFlagTodayMap, + Map hasMap) { try { List channels = getAllChannels(); Collection value = subjectChannelMap.values(); @@ -77,6 +79,11 @@ public class TotalCountService { BigInteger account_type_total_count = new BigInteger("0"); BigInteger account_type_count = new BigInteger("0"); + BigInteger has_image_count = new BigInteger("0"); + BigInteger has_video_count = new BigInteger("0"); + BigInteger has_file_count = new BigInteger("0"); + BigInteger has_text_count = new BigInteger("0"); + if (subjectCrawlDataFlagMap.size() > 0) { if(null != subjectCrawlDataFlagMap.get(ESConstants.URL)) { url_type_total_count = BigInteger.valueOf(subjectCrawlDataFlagMap.get(ESConstants.URL)); @@ -191,10 +198,18 @@ public class TotalCountService { id = subjectCountRepository.findIdBySubjectId(subjectId, today); } if (null != id && !id.equals("")) { + if(hasMap.containsKey("image")) { + has_image_count = BigInteger.valueOf(Long.valueOf(hasMap.get("image") + "")); + has_video_count = BigInteger.valueOf(Long.valueOf(hasMap.get("video") + "")); + has_file_count = BigInteger.valueOf(Long.valueOf(hasMap.get("file") + "")); + has_text_count = BigInteger.valueOf(Long.valueOf(hasMap.get("text") + "")); + } + subjectCountRepository.updateBySubjectId(id, BigInteger.valueOf(sumToday), BigInteger.valueOf(sum), url_type_total_count, keyword_type_total_count, account_type_total_count, url_type_count, keyword_type_count, account_type_count, social_total_count, social_count, bbs_total_count, bbs_count, blog_total_count, blog_count, news_total_count, news_count, - search_total_count, search_count, item_total_count, item_count, video_total_count, video_count, life_total_count, life_count); + search_total_count, search_count, item_total_count, item_count, video_total_count, video_count, life_total_count, life_count, + has_image_count,has_video_count,has_file_count,has_text_count); } else { // 先获取这个专题昨天的 update_time ,然后添加到 UpdateTime 字段中 // 获取昨天日期 @@ -214,6 +229,193 @@ public class TotalCountService { LOGGER.debug("[TotalCountService] updateSubjectCount finish ... subjectId = " + subjectId); } +// public void updateSubjectCount(BigInteger subjectId, +// Map subjectChannelMap, +// Map subjectChannelTodayMap, +// Map subjectCrawlDataFlagMap, +// Map subjectCrawlDataFlagTodayMap) { +// try { +// List channels = getAllChannels(); +// Collection value = subjectChannelMap.values(); +// SubjectCount subjectCount = new SubjectCount(); +// Long sum = 0L; +// for (Long v : value) { +// if(null != v) { +// sum = sum + v; +// } +// } +// Long sumToday = 0L; +// for (Long v : subjectChannelTodayMap.values()) { +// if(null != v) { +// sumToday = sumToday + v; +// } +// } +// subjectCount.setTodayTotalCount(BigInteger.valueOf(sumToday)); +// subjectCount.setTotalCount(BigInteger.valueOf(sum)); +// Date date = new Date(); +// subjectCount.setCreateTime(date); +// +// BigInteger social_total_count = new BigInteger("0"); +// BigInteger social_count = new BigInteger("0"); +// BigInteger bbs_total_count = new BigInteger("0"); +// BigInteger bbs_count = new BigInteger("0"); +// BigInteger blog_total_count = new BigInteger("0"); +// BigInteger blog_count = new BigInteger("0"); +// BigInteger news_total_count = new BigInteger("0"); +// BigInteger news_count = new BigInteger("0"); +// BigInteger search_total_count = new BigInteger("0"); +// BigInteger search_count = new BigInteger("0"); +// BigInteger item_total_count = new BigInteger("0"); +// BigInteger item_count = new BigInteger("0"); +// BigInteger video_total_count = new BigInteger("0"); +// BigInteger video_count = new BigInteger("0"); +// BigInteger life_total_count = new BigInteger("0"); +// BigInteger life_count = new BigInteger("0"); +// +// BigInteger url_type_total_count = new BigInteger("0"); +// BigInteger url_type_count = new BigInteger("0"); +// BigInteger keyword_type_total_count = new BigInteger("0"); +// BigInteger keyword_type_count = new BigInteger("0"); +// BigInteger account_type_total_count = new BigInteger("0"); +// BigInteger account_type_count = new BigInteger("0"); +// +// if (subjectCrawlDataFlagMap.size() > 0) { +// if(null != subjectCrawlDataFlagMap.get(ESConstants.URL)) { +// url_type_total_count = BigInteger.valueOf(subjectCrawlDataFlagMap.get(ESConstants.URL)); +// } +// if(null != subjectCrawlDataFlagMap.get(ESConstants.KEYWORD)) { +// keyword_type_total_count = BigInteger.valueOf(subjectCrawlDataFlagMap.get(ESConstants.KEYWORD)); +// } +// if(null != subjectCrawlDataFlagMap.get(ESConstants.ACCOUNT)) { +// account_type_total_count = BigInteger.valueOf(subjectCrawlDataFlagMap.get(ESConstants.ACCOUNT)); +// } +// if(null != subjectCrawlDataFlagMap.get("upload")){ +// System.out.println("上传的任务的数据量对应的专题 " + subjectId +" == "+ subjectCrawlDataFlagMap.get("upload")); +// if(subjectCrawlDataFlagMap.get("upload").compareTo(0L) >0) +// sum = subjectCrawlDataFlagMap.get("upload"); +// } +// } +// subjectCount.setUrlTypeTotalCount(url_type_total_count); +// subjectCount.setKeywordTypeTotalCount(keyword_type_total_count); +// subjectCount.setAccountTypeTotalCount(account_type_total_count); +// +// if (subjectCrawlDataFlagTodayMap.size() > 0) { +// if(null != subjectCrawlDataFlagTodayMap.get(ESConstants.URL)) { +// url_type_count = BigInteger.valueOf(subjectCrawlDataFlagTodayMap.get(ESConstants.URL)); +// } +// if(null != subjectCrawlDataFlagTodayMap.get(ESConstants.KEYWORD)) { +// keyword_type_count = BigInteger.valueOf(subjectCrawlDataFlagTodayMap.get(ESConstants.KEYWORD)); +// } +// if(null != subjectCrawlDataFlagTodayMap.get(ESConstants.ACCOUNT)) { +// account_type_count = BigInteger.valueOf(subjectCrawlDataFlagTodayMap.get(ESConstants.ACCOUNT)); +// } +// } +// subjectCount.setUrlTypeCount(url_type_count); +// subjectCount.setKeywordTypeCount(keyword_type_count); +// subjectCount.setAccountTypeCount(account_type_count); +// +// +// for (String channel : channels) { +// if (subjectChannelMap.containsKey(channel) && channel.equals(ESConstants.SOCIAL)) { +// social_total_count = BigInteger.valueOf(subjectChannelMap.get(ESConstants.SOCIAL)); +// } +// if (subjectChannelTodayMap.containsKey(channel) && channel.equals(ESConstants.SOCIAL)) { +// social_count = BigInteger.valueOf(subjectChannelTodayMap.get(ESConstants.SOCIAL)); +// } +// if (subjectChannelMap.containsKey(channel) && channel.equals(ESConstants.BBS)) { +// bbs_total_count = BigInteger.valueOf(subjectChannelMap.get(ESConstants.BBS)); +// } +// if (subjectChannelTodayMap.containsKey(channel) && channel.equals(ESConstants.BBS)) { +// bbs_count = BigInteger.valueOf(subjectChannelTodayMap.get(ESConstants.BBS)); +// } +// if (subjectChannelMap.containsKey(channel) && channel.equals(ESConstants.BLOG)) { +// blog_total_count = BigInteger.valueOf(subjectChannelMap.get(ESConstants.BLOG)); +// } +// if (subjectChannelTodayMap.containsKey(channel) && channel.equals(ESConstants.BLOG)) { +// blog_count = BigInteger.valueOf(subjectChannelTodayMap.get(ESConstants.BLOG)); +// } +// if (subjectChannelMap.containsKey(channel) && channel.equals(ESConstants.NEWS)) { +// news_total_count = BigInteger.valueOf(subjectChannelMap.get(ESConstants.NEWS)); +// } +// if (subjectChannelTodayMap.containsKey(channel) && channel.equals(ESConstants.NEWS)) { +// news_count = BigInteger.valueOf(subjectChannelTodayMap.get(ESConstants.NEWS)); +// } +// if (subjectChannelMap.containsKey(channel) && channel.equals(ESConstants.SEARCH)) { +// search_total_count = BigInteger.valueOf(subjectChannelMap.get(ESConstants.SEARCH)); +// } +// if (subjectChannelTodayMap.containsKey(channel) && channel.equals(ESConstants.SEARCH)) { +// search_count = BigInteger.valueOf(subjectChannelTodayMap.get(ESConstants.SEARCH)); +// } +// if (subjectChannelMap.containsKey(channel) && channel.equals(ESConstants.ITEM)) { +// item_total_count = BigInteger.valueOf(subjectChannelMap.get(ESConstants.ITEM)); +// } +// if (subjectChannelTodayMap.containsKey(channel) && channel.equals(ESConstants.ITEM)) { +// item_count = BigInteger.valueOf(subjectChannelTodayMap.get(ESConstants.ITEM)); +// } +// if (subjectChannelMap.containsKey(channel) && channel.equals(ESConstants.VIDEO)) { +// video_total_count = BigInteger.valueOf(subjectChannelMap.get(ESConstants.VIDEO)); +// } +// if (subjectChannelTodayMap.containsKey(channel) && channel.equals(ESConstants.VIDEO)) { +// video_count = BigInteger.valueOf(subjectChannelTodayMap.get(ESConstants.VIDEO)); +// } +// if (subjectChannelMap.containsKey(channel) && channel.equals(ESConstants.LIFE)) { +// life_total_count = BigInteger.valueOf(subjectChannelMap.get(ESConstants.LIFE)); +// } +// if (subjectChannelTodayMap.containsKey(channel) && channel.equals(ESConstants.LIFE)) { +// life_count = BigInteger.valueOf(subjectChannelTodayMap.get(ESConstants.LIFE)); +// } +// subjectCount.setSocialTotalCount(social_total_count); +// subjectCount.setSocialCount(social_count); +// subjectCount.setBbsTotalCount(bbs_total_count); +// subjectCount.setBbsCount(bbs_count); +// subjectCount.setBlogTotalCount(blog_total_count); +// subjectCount.setBlogCount(blog_count); +// subjectCount.setNewsTotalCount(news_total_count); +// subjectCount.setNewsCount(news_count); +// subjectCount.setSearchTotalCount(search_total_count); +// subjectCount.setSearchCount(search_count); +// subjectCount.setItemTotalCount(item_total_count); +// subjectCount.setItemCount(item_count); +// subjectCount.setVideoTotalCount(video_total_count); +// subjectCount.setVideoCount(video_count); +// subjectCount.setLifeTotalCount(life_total_count); +// subjectCount.setLifeCount(life_count); +// } +// subjectCount.setSubjectId(subjectId); +// // 如果不存在,就调用这个方法; +// +// String today = formatter.format(date); +// BigInteger id = new BigInteger("0"); +// if (subjectId.longValue() == 0) { +// id = subjectCountRepository.findIdBySubjectDate(today); +// subjectCount.setSubjectId(null); +// } else { +// id = subjectCountRepository.findIdBySubjectId(subjectId, today); +// } +// if (null != id && !id.equals("")) { +// subjectCountRepository.updateBySubjectId(id, BigInteger.valueOf(sumToday), BigInteger.valueOf(sum), +// url_type_total_count, keyword_type_total_count, account_type_total_count, url_type_count, keyword_type_count, account_type_count, +// social_total_count, social_count, bbs_total_count, bbs_count, blog_total_count, blog_count, news_total_count, news_count, +// search_total_count, search_count, item_total_count, item_count, video_total_count, video_count, life_total_count, life_count); +// } else { +// // 先获取这个专题昨天的 update_time ,然后添加到 UpdateTime 字段中 +// // 获取昨天日期 +// String yesterday = DateUtil.parseDateByday(System.currentTimeMillis() - 1000 * 60 * 60 * 24); +// Date updateTime = subjectCountRepository.getUpdateTimeBySubjectId(subjectId,yesterday); +// if(updateTime != null){ +// subjectCount.setUpdateTime(updateTime); +// }else { +// subjectCount.setUpdateTime(new Date()); +// } +// subjectCountRepository.save(subjectCount); +// } +// }catch ( Exception e){ +// e.printStackTrace(); +// LOGGER.error("[TotalCountService] updateSubjectCount ERROR ... subjectId = " + subjectId); +// } +// LOGGER.debug("[TotalCountService] updateSubjectCount finish ... subjectId = " + subjectId); +// } + private static List getAllChannels() { List channels = new ArrayList(); channels.add(ESConstants.SOCIAL); diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/util/EMailUtils.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/util/EMailUtils.java index c578355..1937d02 100644 --- a/cl_query_data_job/src/main/java/com/bfd/mf/job/util/EMailUtils.java +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/util/EMailUtils.java @@ -44,7 +44,7 @@ public class EMailUtils { // 收件人邮箱(替换为自己知道的有效邮箱) public static String receiveMailAccount = "chaofan.tan@baifendian.com"; - private String confPath = "../etc/config.properties"; + //private String confPath = "../etc/config.properties"; private static EMailUtils instance = null; private String protocol = "smtp"; @@ -78,7 +78,7 @@ public class EMailUtils { LOG.info("EMailUtils protocol:" + protocol + " myEmailSMTPHost:" + myEmailSMTPHost + " smtpAuth: " + smtpAuth + " myEmailAccount: " + myEmailAccount - + " emailEncode: " + emailEncode + " config path: " + confPath); + + " emailEncode: " + emailEncode + " config path: " ); //+ confPath } public static EMailUtils getInstance() { @@ -92,9 +92,9 @@ public class EMailUtils { return instance; } - public void setConfigPath (String confPath) { - this.confPath = confPath; - } +// public void setConfigPath (String confPath) { +// this.confPath = confPath; +// } public void sendWechat(List emailList, String message) { @@ -164,7 +164,7 @@ public class EMailUtils { content = content.replace("{type}", "数据关键字段值为空"); break; case 6: - content = confPath.replace("{type}","解析失败次数超过100次"); + content = content.replace("{type}","解析失败次数超过100次"); break; default: return ; diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/worker/AlarmProducer.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/worker/AlarmProducer.java index ad04f67..37925a6 100644 --- a/cl_query_data_job/src/main/java/com/bfd/mf/job/worker/AlarmProducer.java +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/worker/AlarmProducer.java @@ -1,16 +1,15 @@ package com.bfd.mf.job.worker; + import com.bfd.mf.job.config.AppConfig; import com.bfd.mf.job.service.alarm.AlarmService; -import com.bfd.mf.job.service.taskCount.TaskCountService; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; +import org.apache.log4j.Logger; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Component; @Component public class AlarmProducer extends AbstractWorker { - private static final Logger LOGGER = LoggerFactory.getLogger(AlarmProducer.class); + private static final Logger LOGGER =Logger.getLogger(AlarmProducer.class); @Autowired private AppConfig config; diff --git a/cl_query_data_job/src/main/resources/application-0827.yml b/cl_query_data_job/src/main/resources/application-0827.yml new file mode 100644 index 0000000..4361830 --- /dev/null +++ b/cl_query_data_job/src/main/resources/application-0827.yml @@ -0,0 +1,105 @@ +debug: false + +logging: + level: + com.bfd.mf: debug +spring: + datasource: + driver-class-name: com.mysql.jdbc.Driver + username: root + password: bfd123 + url: jdbc:mysql://172.26.11.113:3306/intelligent_crawl?useOldAliasMetadataBehavior=true&characterEncoding=UTF-8&zeroDateTimeBehavior=round + hikari: + maximum-pool-size: 10 + minimum-idle: 1 +#spring: +# datasource: +# driver-class-name: com.mysql.jdbc.Driver +# username: root +# password: Bfd123!@# +# url: jdbc:mysql://172.18.1.134:3306/intelligent_crawl?useOldAliasMetadataBehavior=true&characterEncoding=UTF-8&zeroDateTimeBehavior=round +# hikari: +# maximum-pool-size: 10 +# minimum-idle: 1 +#spring: +# datasource: +# driver-class-name: com.mysql.jdbc.Driver +# username: root +# password: Bfd123!@# +# url: jdbc:mysql://172.18.1.134:3306/all_task?useOldAliasMetadataBehavior=true&characterEncoding=UTF-8&zeroDateTimeBehavior=round +# hikari: +# maximum-pool-size: 10 +# minimum-idle: 1 + + +worker: + version: 3.0.1 + enable-test: false + test-thread-count: 10 + test-task-id: 180 + ## 数据默认要写的 kafka + broker-list: 172.18.1.113:9092 + send-topic : databasestokafka + analysis-topic: + - sq_topic_cl_query_analysis_1 + + analysis-group: sq_group_cl_analysis_1 + ## 服务的状态,true 为启动 + enable-analysis-producer: false # 查ES写kafka + enable-analysis-consumer: false # 读kafka写ES + enable-statistics-producer: true # 统计 taskCount 和 subjectCount (采集平台) + enable-query-producer: false # 离线拉数(采集平台) + enable-backtrace-producer: false # 欧莱雅查数(采集平台,欧莱雅项目独用) + enable-rw-oly-producer: false # 欧莱雅数据导出,暂时不用 + enable-up-load-producer: false # 上传(采集平台) + enable-output-producer: false #未开发,暂留 + enable-taskcount-producer: false # 任务数量的统计,任务量和任务平均时长(运营后台) + enable-alarm-producer: false # 报警,查ES统计报警发邮件写数据库(运营后台) + ## 启动服务的线程数 + statistics-producer-thread-count: 1 + query-producer-thread-count: 10 + backtrace-producer-thread-count: 1 + rw-oly-producer-thread-count: 1 + up-load-producer-thread-count: 1 + output-producer-thread-count: 1 + taskcount-producer-thread-count: 1 + alarm-producer-thread-count: 1 + + period-s: 5 + interval-time: 1800000 + # 拉数年份查询的开始时间,现在是19年之前的合并成年了,这个时间就是:2019-01-01 00:00:00 + query-data-year-starttime: 1546272000000 + + rule-rest: http://rule.sq.baifendian.com/data_match/content/ + comment-rest: http://rule.sq.baifendian.com/reputation/addReputationTask + rule-rest-concurrency: 500 + content-limit: 2000 + failure-upper: 2000 + + goFastPostUrl : http://172.18.1.113:8080/upload + goFastDomain : http://172.18.1.113:8080 + uploadOLYExcelPath : /opt/nfsdata/excelTask/ + uploadZipPath : /opt/nfsdata/uploadFiles/ + indexNamePre : cl_major_ + + es-normal: + name: SQ_Normal_new + address: 172.18.1.134:9301 + upper: 2000-01-01 + standby: cl_index_0 + es-reply-source: + name: SQ_Mini_new + address: 172.18.1.148:9303 + upper: 2000-01-01 + standby: cl_index_0 + es-mini: + name: SQ_Mini + address: 172.18.1.147:9313 + bulk-thread-count: 5 + bulk-rate: 3 + bulk-size: 100 + es-logstash: + name: SQ_Log + address: 172.26.11.111:9301 + upper: 2021-01-01 + standby: logstash-2021.05.13 diff --git a/cl_query_data_job/src/main/resources/application-113.yml b/cl_query_data_job/src/main/resources/application-113.yml index d0ebb3e..bee8638 100644 --- a/cl_query_data_job/src/main/resources/application-113.yml +++ b/cl_query_data_job/src/main/resources/application-113.yml @@ -26,13 +26,16 @@ worker: analysis-group: sq_group_cl_analysis_1 ## 服务的状态,true 为启动 - enable-analysis-producer: false - enable-analysis-consumer: false - enable-statistics-producer: true - enable-query-producer: false - enable-backtrace-producer: false - enable-rw-oly-producer: false - enable-up-load-producer: false + enable-analysis-producer: false # 查ES写kafka + enable-analysis-consumer: false # 读kafka写ES + enable-statistics-producer: true # 统计 taskCount 和 subjectCount (采集平台) + enable-query-producer: false # 离线拉数(采集平台) + enable-backtrace-producer: false # 欧莱雅查数(采集平台,欧莱雅项目独用) + enable-rw-oly-producer: false # 欧莱雅数据导出,暂时不用 + enable-up-load-producer: false # 上传(采集平台) + enable-output-producer: false #未开发,暂留 + enable-taskcount-producer: false # 任务数量的统计,任务量和任务平均时长(运营后台) + enable-alarm-producer: false # 报警,查ES统计报警发邮件写数据库(运营后台) ## 启动服务的线程数 statistics-producer-thread-count: 1 query-producer-thread-count: 5 diff --git a/cl_query_data_job/src/main/resources/application.yml b/cl_query_data_job/src/main/resources/application.yml index 6805a53..e3c8ab2 100644 --- a/cl_query_data_job/src/main/resources/application.yml +++ b/cl_query_data_job/src/main/resources/application.yml @@ -47,14 +47,14 @@ worker: ## 服务的状态,true 为启动 enable-analysis-producer: false # 查ES写kafka enable-analysis-consumer: false # 读kafka写ES - enable-statistics-producer: true # 统计 taskCount 和 subjectCount (采集平台) + enable-statistics-producer: false # 统计 taskCount 和 subjectCount (采集平台) enable-query-producer: false # 离线拉数(采集平台) enable-backtrace-producer: false # 欧莱雅查数(采集平台,欧莱雅项目独用) enable-rw-oly-producer: false # 欧莱雅数据导出,暂时不用 enable-up-load-producer: false # 上传(采集平台) enable-output-producer: false #未开发,暂留 enable-taskcount-producer: false # 任务数量的统计,任务量和任务平均时长(运营后台) - enable-alarm-producer: false # 报警,查ES统计报警发邮件写数据库(运营后台) + enable-alarm-producer: true # 报警,查ES统计报警发邮件写数据库(运营后台) ## 启动服务的线程数 statistics-producer-thread-count: 1 query-producer-thread-count: 10 @@ -103,3 +103,22 @@ worker: address: 172.26.11.111:9301 upper: 2021-01-01 standby: logstash-2021.05.13 + +# es-mini: +# name: SQ_Log +# address: 172.26.11.111:9301 +# upper: 2018-09-01 +# standby: cl_major_ +# bulk-thread-count: 5 +# bulk-rate: 3 +# bulk-size: 100 +# es-normal: +# name: SQ_Log +# address: 172.26.11.111:9301 +# upper: 2018-09-01 +# standby: cl_index_* +# es-logstash: +# name: SQ_Log +# address: 172.26.11.111:9301 +# upper: 2021-01-01 +# standby: logstash-2021.05.13 diff --git a/cl_search_api/cl_search_api.iml b/cl_search_api/cl_search_api.iml index 378383a..5ad01ed 100644 --- a/cl_search_api/cl_search_api.iml +++ b/cl_search_api/cl_search_api.iml @@ -171,14 +171,14 @@ - - + + diff --git a/cl_search_api/pom.xml b/cl_search_api/pom.xml index f318fd8..922467e 100644 --- a/cl_search_api/pom.xml +++ b/cl_search_api/pom.xml @@ -190,7 +190,16 @@ logstash-logback-encoder 4.4 - + + ch.qos.logback + logback-core + 1.1.7 + + + ch.qos.logback + logback-classic + 1.1.7 + @@ -235,12 +244,13 @@ poi 4.1.0 - org.apache.poi poi-ooxml 4.1.0 + + com.squareup.okhttp3 okhttp diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/service/cache/TopicQueryService.java b/cl_search_api/src/main/java/com/bfd/mf/common/service/cache/TopicQueryService.java index 270c8df..2744b35 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/common/service/cache/TopicQueryService.java +++ b/cl_search_api/src/main/java/com/bfd/mf/common/service/cache/TopicQueryService.java @@ -1,5 +1,6 @@ package com.bfd.mf.common.service.cache; + import com.bfd.mf.common.service.es.EsCommonService; import com.bfd.mf.common.service.es.ParseSearchScopeService; import com.bfd.mf.common.util.constants.ESConstant; @@ -8,9 +9,8 @@ import com.bfd.mf.common.web.repository.mysql.base.SiteRepository; import com.bfd.mf.common.web.repository.mysql.topic.TaskRepository; import com.bfd.mf.common.web.vo.params.QueryRequest; import com.bfd.nlp.common.util.object.TObjectUtils; +import org.apache.log4j.Logger; import org.elasticsearch.index.query.*; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; @@ -19,7 +19,7 @@ import java.util.stream.Collectors; @Service public class TopicQueryService { - private static Logger logger = LoggerFactory.getLogger(TopicQueryService.class); + private static Logger logger = Logger.getLogger(TopicQueryService.class); @Autowired private EsCommonService esCommonService; @Autowired diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/service/es/EsQueryAuthorCountService.java b/cl_search_api/src/main/java/com/bfd/mf/common/service/es/EsQueryAuthorCountService.java index 4d3e43e..3848987 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/common/service/es/EsQueryAuthorCountService.java +++ b/cl_search_api/src/main/java/com/bfd/mf/common/service/es/EsQueryAuthorCountService.java @@ -48,7 +48,7 @@ public class EsQueryAuthorCountService { logger.debug("[EsQueryAuthorService] queryAuthorCount ..."); Integer searchType = queryRequest.getSearchType(); BoolQueryBuilder boolQueryBuilder = getQueryBuilder(queryRequest); - logger.info("[EsQueryAuthorService] queryAuthorCount indexName = " + indexName[0] + "; qb:" + "{}.", boolQueryBuilder.toString()); + logger.info("[EsQueryAuthorService] queryAuthorCount indexName :{} ; qb:{}.",indexName[0] , boolQueryBuilder.toString()); Long totalCount = EsUtils.queryTotalCount(clusterName, indexName, boolQueryBuilder,searchType); return totalCount; }catch (Exception e){ diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/service/es/EsQueryServiceForSQMini.java b/cl_search_api/src/main/java/com/bfd/mf/common/service/es/EsQueryServiceForSQMini.java index 50f790c..05024ba 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/common/service/es/EsQueryServiceForSQMini.java +++ b/cl_search_api/src/main/java/com/bfd/mf/common/service/es/EsQueryServiceForSQMini.java @@ -1,15 +1,10 @@ package com.bfd.mf.common.service.es; import com.alibaba.fastjson.JSONObject; -import com.bfd.mf.common.util.WriteMethod; -import com.bfd.mf.common.util.constants.ESConstant; import com.bfd.mf.common.util.es.EsUtils; import com.bfd.mf.common.web.vo.params.QueryRequest; import com.bfd.mf.config.BFDApiConfig; -import lombok.extern.slf4j.Slf4j; import org.elasticsearch.index.query.BoolQueryBuilder; -import org.elasticsearch.index.query.QueryBuilder; -import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.search.aggregations.AggregationBuilder; import org.elasticsearch.search.aggregations.bucket.terms.Terms; import org.slf4j.Logger; @@ -44,7 +39,7 @@ public class EsQueryServiceForSQMini { logger.debug("[EsQueryServiceForSQMini - 专题] queryDataFromOneSubject ..."); BoolQueryBuilder boolQueryBuilder = getQueryBuilder.getQueryBuilder(queryRequest); //logger.info("[EsQueryService] queryDataFromOneSubject: indexName = " + indexName[0] + "; qb:\n" + "{}.", boolQueryBuilder.toString()); - logger.info("[EsQueryService] queryDataFromOneSubject: indexName = " + indexName[0]); + logger.info("[EsQueryService] queryDataFromOneSubject: indexName : {}" ,indexName[0]); Integer limit = queryRequest.getLimit(); //每页的数量 Integer start = (queryRequest.getPage() - 1) * limit; //起始页(0,20,40....) String orderFlag = queryRequest.getOrder(); // 排序方式 asc/desc @@ -87,53 +82,53 @@ public class EsQueryServiceForSQMini { } } - public JSONObject exportDataFromOneSubjectTestGroupBy(String[] indexName, QueryRequest queryRequest) { - try { - logger.info("[exportDataFromOneSubjectTestGroupBy - 专题] exportDataFromOneSubject start ..."); - BoolQueryBuilder boolQueryBuilder =getQueryBuilder.getQueryBuilder(queryRequest); - - Integer searchType = queryRequest.getSearchType(); - String filter = "docId"; - Long totalCount = EsUtils.queryTotalCount(clusterName,indexName,boolQueryBuilder,searchType); - Integer count = Integer.parseInt(totalCount+""); +// public JSONObject exportDataFromOneSubjectTestGroupBy(String[] indexName, QueryRequest queryRequest) { +// try { +// logger.info("[exportDataFromOneSubjectTestGroupBy - 专题] exportDataFromOneSubject start ..."); +// BoolQueryBuilder boolQueryBuilder =getQueryBuilder.getQueryBuilder(queryRequest); +// +// Integer searchType = queryRequest.getSearchType(); +// String filter = "docId"; +// Long totalCount = EsUtils.queryTotalCount(clusterName,indexName,boolQueryBuilder,searchType); +// Integer count = Integer.parseInt(totalCount+""); +// +// AggregationBuilder ab = getQueryBuilder.getAggregationBuilder(queryRequest, filter,count); +// Terms terms = EsUtils.queryByTag(clusterName,indexName[0],filter,boolQueryBuilder,ab); +// // 组装返回结果 +// List docIds = parseRankData(terms); +// JSONObject result = new JSONObject(); +// if(docIds.size() > 0){ +// List monitorLists = new ArrayList<>(); +// for (String docId: docIds) { +// JSONObject jsonObject = EsUtils.queryOneDataForExport(clusterName, indexName, docId); +// monitorLists.add(jsonObject); +// } +// result.put("monitorLists",monitorLists); +// result.put("foldDocAllNumber",monitorLists.size()); +// } +// return result; +// }catch (Exception e){ +// e.printStackTrace(); +// return new JSONObject(); +// } +// } - AggregationBuilder ab = getQueryBuilder.getAggregationBuilder(queryRequest, filter,count); - Terms terms = EsUtils.queryByTag(clusterName,indexName[0],filter,boolQueryBuilder,ab); - // 组装返回结果 - List docIds = parseRankData(terms); - JSONObject result = new JSONObject(); - if(docIds.size() > 0){ - List monitorLists = new ArrayList<>(); - for (String docId: docIds) { - JSONObject jsonObject = EsUtils.queryOneDataForExport(clusterName, indexName, docId); - monitorLists.add(jsonObject); - } - result.put("monitorLists",monitorLists); - result.put("foldDocAllNumber",monitorLists.size()); - } - return result; - }catch (Exception e){ - e.printStackTrace(); - return new JSONObject(); - } - } - - private List parseRankData(Terms terms) { - List docIds = new ArrayList<>(); - try{ - if(terms.getBuckets().size() > 0) { - for (Terms.Bucket bucket : terms.getBuckets()) { - if (bucket.getKey().toString().contains("bfd")) { - String labelName = bucket.getKey().toString(); - if(bucket.getDocCount() >0){ - docIds.add(labelName); - } - } - } - } - }catch (Exception e){ - e.printStackTrace(); - } - return docIds; - } +// private List parseRankData(Terms terms) { +// List docIds = new ArrayList<>(); +// try{ +// if(terms.getBuckets().size() > 0) { +// for (Terms.Bucket bucket : terms.getBuckets()) { +// if (bucket.getKey().toString().contains("bfd")) { +// String labelName = bucket.getKey().toString(); +// if(bucket.getDocCount() >0){ +// docIds.add(labelName); +// } +// } +// } +// } +// }catch (Exception e){ +// e.printStackTrace(); +// } +// return docIds; +// } } diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/service/es/SubjectQueryDataService.java b/cl_search_api/src/main/java/com/bfd/mf/common/service/es/SubjectQueryDataService.java index 013dd9b..b9a32db 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/common/service/es/SubjectQueryDataService.java +++ b/cl_search_api/src/main/java/com/bfd/mf/common/service/es/SubjectQueryDataService.java @@ -35,7 +35,6 @@ public class SubjectQueryDataService { private Logger logger = LoggerFactory.getLogger(SubjectQueryDataService.class); private static final Long ONE_DAY = 60 * 60 * 1000L * 24; - private static final Long clusterId = 3L; @Autowired private TopicQueryService topicQueryService; @Autowired @@ -102,7 +101,7 @@ public class SubjectQueryDataService { .addSort(sortFlag, orderFlag.equals(ESConstant.ASC) ? SortOrder.ASC : SortOrder.DESC) .setSearchType(SearchType.DEFAULT); - System.out.println(" 组装后的查询语句: "+searchRequestBuilder); + // System.out.println(" 组装后的查询语句: "+searchRequestBuilder); executorService.submit(new SubjectDataQueryThread( searchResponseList, diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/util/OperatorUtil.java b/cl_search_api/src/main/java/com/bfd/mf/common/util/OperatorUtil.java deleted file mode 100644 index 4e8d433..0000000 --- a/cl_search_api/src/main/java/com/bfd/mf/common/util/OperatorUtil.java +++ /dev/null @@ -1,64 +0,0 @@ -package com.bfd.mf.common.util; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; - - -public class OperatorUtil { - private static final Character[] seps = new Character[] { '&', '|', '~', '<', '《', '>', '》', '(', '(', ')', ')', - ',', ',', '"', '“', '”' };// 分隔符,将整个表达式分隔成若干个子串进行处理 - private static final Character[] ops = new Character[] { '&', '|', '~' };// 去掉'!' - public static final List operators = new ArrayList(); - public static final List seperators = new ArrayList(); - - static { - operators.addAll(Arrays.asList(ops)); - seperators.addAll(Arrays.asList(seps)); - } - - public static boolean isOperator(char c) { - return operators.contains(c); - } - - - public static boolean isSeperator(char c) { - return seperators.contains(c); - } - - - public static int priorityCompare(char op1, char op2) { - // ()>!>&>| - switch (op1) { - case '!': - case '!': - return op2 == '(' || op2 == ')' ? -1 : 1; - case '&': - return op2 == '!' ? -1 : (op2 == '|' ? 1 : 0); - case '|': - return op2 == '|' ? 0 : -1; - } - return 0; - } - - - public static boolean matchChar(char c, char c2) { - switch (c) { - case '(': - case '(': - return c2 == ')' || c2 == ')'; - case ')': - case ')': - return c2 == '(' || c2 == '('; - case '>': - case '》': - return c2 == '<' || c2 == '《'; - case '<': - case '《': - return c2 == '>' || c2 == '》'; - default: - break; - } - return false; - } -} diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/util/ZipUtils.java b/cl_search_api/src/main/java/com/bfd/mf/common/util/ZipUtils.java index aca290f..42ee8b3 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/common/util/ZipUtils.java +++ b/cl_search_api/src/main/java/com/bfd/mf/common/util/ZipUtils.java @@ -117,13 +117,13 @@ public class ZipUtils { } - public static String getZipFileName(String zipName, String zipPath) { - String zipFileName = zipName.replace(".zip",""); - // 判断zip这个文件夹是否存在,不存在则创建 - File zipFile=new File(zipPath+zipFileName); - if(!zipFile.exists()){//如果文件夹不存在 - zipFile.mkdir();//创建文件夹 - } - return zipFileName; - } +// public static String getZipFileName(String zipName, String zipPath) { +// String zipFileName = zipName.replace(".zip",""); +// // 判断zip这个文件夹是否存在,不存在则创建 +// File zipFile=new File(zipPath+zipFileName); +// if(!zipFile.exists()){//如果文件夹不存在 +// zipFile.mkdir();//创建文件夹 +// } +// return zipFileName; +// } } diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/util/constants/ESConstant.java b/cl_search_api/src/main/java/com/bfd/mf/common/util/constants/ESConstant.java index fddf824..2b92e83 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/common/util/constants/ESConstant.java +++ b/cl_search_api/src/main/java/com/bfd/mf/common/util/constants/ESConstant.java @@ -63,8 +63,10 @@ public class ESConstant { public static final String ISDOWNLOAD = "isDownload"; public static final String VIDEOURL = "videoUrl"; - public static final String RESOLUTION = "resolution"; - public static final String VIDEOTIME = "videoTime"; + public static String SIZE = "size"; + public static String VIDEOTIME = "videoTime"; + public static String RESOLUTION = "resolution"; + public static String VIDEOLIST = "videoList"; public static final String OCRTEXT = "ocrText"; public static final String ASRTEXT = "asrText"; @@ -73,6 +75,7 @@ public class ESConstant { public static final String CATEGORYLABEL= "categoryLabel"; public static final String TAG= "tag"; + public static final String OTHERSOURCEJSON = "otherSourceJson"; /** * * * @@ -362,13 +365,17 @@ public class ESConstant { public static String CONTENT_SIMHASH = "contentSimHash"; public static String QUOTE_COUNT = "quoteCount"; public static String COLLE_CTCOUNT = "collectCount"; + // 左侧检索内容那块要显示的数字 + public static String CONTENTCOUNT = "contentCount"; + public static String COMMENTCOUNT = "commentCount"; + public static String AUTHORCOUNT = "authorCount"; /** * 内容 */ public static String CONTENT_TAG = "contentTag"; public static String DT_EN_2 = "dtEn2"; public static String DT_EN_1 = "dtEn1"; - public static String COMMENTS_COUNT = "commentsCount"; + public static String COMMENTS_COUNT = "commentsCount"; //commentsCount public static String PAGE_COMMENTS_COUNT = "pageCommentCount"; public static String ATTITUDES_COUNT = "attitudesCount"; public static String TITLE_SIMHASH = "titleSimHash"; @@ -382,6 +389,8 @@ public class ESConstant { public static String SOURCE = "source"; public static String CRAWLDATAFLAG= "crawlDataFlag"; + public static final String TOTALCOUNT = "totalCount"; + //微信专属字段,内容固定 value="微信" public static String WEIXIN_SOURCE = "weixinSource"; public static String CHANNEL = "channel"; @@ -435,6 +444,9 @@ public class ESConstant { + + + /** * _all字段 */ @@ -470,6 +482,11 @@ public class ESConstant { public static final String SHOP_LABELS = "shopLabels"; public static final String COMMENT_IMG = "commentImg"; + public static final String USER = "user"; + public static final String USERID = "userId"; + + public static final String FILENAME = "fileName"; + /** * 用户头像 */ @@ -495,6 +512,7 @@ public class ESConstant { public static String FACEBOOK = "facebook"; public static String TWITTER = "twitter"; + public static String CID = "cid"; public static String SITEID = "siteId"; public static String SITETYPE = "siteType"; public static String SITEICON = "icon"; @@ -502,6 +520,7 @@ public class ESConstant { // public static String FOLDDOCALLNUMBER = "foldDocAllNumber"; public static String ALLDOCNUMBER = "allDocNumber"; public static String SCROLLID = "scrollId"; + public static String COMMENTLISTS = "commentLists"; public static final String BAIDUKOUBEI = "baidukoubei"; @@ -630,6 +649,7 @@ public class ESConstant { public static final String ID = "id"; public static final String COMMENTS = "comments"; + public static final String COMMENT = "comment"; public static final String REPOSTS = "reposts"; diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/util/enums/RTCodeEnum.java b/cl_search_api/src/main/java/com/bfd/mf/common/util/enums/RTCodeEnum.java index 7a9d708..beaa5f7 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/common/util/enums/RTCodeEnum.java +++ b/cl_search_api/src/main/java/com/bfd/mf/common/util/enums/RTCodeEnum.java @@ -13,6 +13,13 @@ public enum RTCodeEnum { C_INDEX_EXISTS(-3, "Index Exists"), // 自定义状态码 C_TOPIC_DISABLED(-2, "话题渠道为空,请运营人员确注意操作!!!"), + C_UPLOAD_OK(200,"上传成功"), + C_UPLOAD_PARSE_FAIL(201,"Excel解析失败,请检查Excel"), + C_UPLOAD_EXIST(202,"同名Excel已存在,请改名后重新上传,谢谢!"), + C_UPLOAD_BUSY(203,"当前正在运行任务数超过5个,请稍后再尝试上传,谢谢!"), + C_UPLOAD_FAIL(206,"上传失败") , + C_UPLOAD_ERROR(204,"请上传 Excel 或 txt 文件"), + C_UPLOAD_EMPTY(205,"上传的文件为空,请核查文件。"), // Param Issue: 3** C_PARAM_ERROR(300, "Input Param Error"), @@ -86,6 +93,7 @@ public enum RTCodeEnum { C_ACCOUNT_NO_NICK_NAME(912, "该用户昵称不存在"); + private int code; private String desc; diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/util/es/EsUtils.java b/cl_search_api/src/main/java/com/bfd/mf/common/util/es/EsUtils.java index fcfb301..df17972 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/common/util/es/EsUtils.java +++ b/cl_search_api/src/main/java/com/bfd/mf/common/util/es/EsUtils.java @@ -2,10 +2,7 @@ package com.bfd.mf.common.util.es; import com.alibaba.fastjson.JSONObject; import com.bfd.mf.common.util.constants.ESConstant; -import com.bfd.mf.config.BFDApiConfig; import com.google.common.collect.Maps; -import org.apache.http.entity.ContentType; -import org.apache.http.nio.entity.NStringEntity; import org.elasticsearch.action.admin.indices.exists.indices.IndicesExistsRequest; import org.elasticsearch.action.admin.indices.exists.indices.IndicesExistsResponse; import org.elasticsearch.action.admin.indices.settings.put.UpdateSettingsRequest; @@ -28,13 +25,11 @@ import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.SearchHits; import org.elasticsearch.search.aggregations.AggregationBuilder; import org.elasticsearch.search.aggregations.bucket.terms.Terms; -import org.elasticsearch.search.collapse.CollapseBuilder; import org.elasticsearch.search.sort.SortOrder; import org.elasticsearch.transport.client.PreBuiltTransportClient; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.util.Assert; -import springfox.documentation.spring.web.json.Json; import java.net.InetAddress; import java.util.*; @@ -148,9 +143,9 @@ public abstract class EsUtils { // } Long size = response.getHits().getTotalHits(); - logger.info("[queryTotalCount] 聚合前的总量 : "+ size+ " 这个是聚合前的数据量"); + logger.info("[queryTotalCount] 聚合前的总量: {} 这个是聚合前的数据量 ",size); Long aggrSize = Long.valueOf(response.getHits().getHits().length); - logger.info("[queryTotalCount] 聚合后的数据量 : " + aggrSize); + logger.info("[queryTotalCount] 聚合后的数据量: {} ",aggrSize); // 当聚合后的结果为 10000 时 ,或者 是评论数据的时候,就不用聚合了 // if(aggrSize == 10000 || searchType == 1 || searchType == 2){ // size = size; @@ -188,7 +183,7 @@ public abstract class EsUtils { .actionGet();//注意:首次搜索并不包含数据 }catch (Exception e){ e.printStackTrace(); - logger.error(" queryForExport 首次 查询报错!!!" + indexName[0]); + logger.error(" queryForExport 首次 查询报错!!!IndexName : {} " , indexName[0]); } } else { System.out.println("翻页查询"); @@ -200,14 +195,14 @@ public abstract class EsUtils { .actionGet(); } catch (Exception e) { e.printStackTrace(); - logger.error(" queryForExport 翻页 查询报错!!!" + indexName[0]); + logger.error(" queryForExport 翻页 查询报错!!!IndexName : {} ", indexName[0]); } } //获取总数量 long totalCount = searchResponse.getHits().getTotalHits(); int page=(int)totalCount/( 2 * limit);//计算总页数,每次搜索数量为分片数*设置的size大小 //int page = 2 * limit; - System.out.println("queryForExport : " + totalCount + " ; page = " + page + " ; scrollId = " + searchResponse.getScrollId()); + // System.out.println("queryForExport : " + totalCount + " ; page = " + page + " ; scrollId = " + searchResponse.getScrollId()); for (int i = 0; i <= page; i++) { //再次发送请求,并使用上次搜索结果的ScrollId List monitorLists = parseSearchResponse(searchResponse); @@ -285,9 +280,8 @@ public abstract class EsUtils { .setQuery(queryBuilder) .addAggregation(aggregationBuilder); - logger.info("requestBuilder: "+requestBuilder.toString().replace("\n","").replace("\r","").replace(" ","")); + logger.info("requestBuilder: " , requestBuilder.toString().replace("\n","").replace("\r","").replace(" ","")); SearchResponse response = requestBuilder.execute().actionGet(); - Terms aggregation = response.getAggregations().get(filter+"Tag"); return aggregation; } @@ -304,7 +298,7 @@ public abstract class EsUtils { .actionGet();//注意:首次搜索并不包含数据 } catch (Exception e) { e.printStackTrace(); - logger.error(" queryForExport 首次 查询报错!!!" + indexName[0]); + logger.error(" queryForExport 首次 查询报错!!!IndexName : {} " ,indexName[0]); } //再次发送请求,并使用上次搜索结果的ScrollId List monitorLists = parseSearchResponse(searchResponse); @@ -328,9 +322,7 @@ public abstract class EsUtils { updateRequest.setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE); updateRequest.setRefreshPolicy("true"); UpdateResponse response = client.update(updateRequest).get(); - return response.getVersion(); - }catch (Exception e){ e.printStackTrace(); return 0L; diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/util/es/MonitorUtils.java b/cl_search_api/src/main/java/com/bfd/mf/common/util/es/MonitorUtils.java index bd1d8b0..efe4d96 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/common/util/es/MonitorUtils.java +++ b/cl_search_api/src/main/java/com/bfd/mf/common/util/es/MonitorUtils.java @@ -45,7 +45,7 @@ public class MonitorUtils { esMonitorEntityMap.put(dataId, esMonitorBaseEntity); // testSort.put(dataId,pubTimeStr); }else{ - System.out.println("[MonitorUtils] ??? : "+searchHit.getSourceAsMap().toString()); + logger.info("[MonitorUtils] {}",searchHit.getSourceAsMap().toString()); } } } diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/util/slice/SliceScrollUtil.java b/cl_search_api/src/main/java/com/bfd/mf/common/util/slice/SliceScrollUtil.java index bbb30ad..57a4312 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/common/util/slice/SliceScrollUtil.java +++ b/cl_search_api/src/main/java/com/bfd/mf/common/util/slice/SliceScrollUtil.java @@ -27,14 +27,6 @@ public class SliceScrollUtil { @Autowired private SubjectQueryDataService subjectQueryDataService; - private Object existHighEm(String str) { - if (TStringUtils.isNotEmpty(str)) { - if (str.contains("") || str.contains("")) { - return str; - } - } - return ""; - } /** * 解析数据 */ @@ -136,7 +128,7 @@ public class SliceScrollUtil { Cluster cluster = null; List currentIndexList = null; if(null != queryRequest.getSubjectId() && !("all").equals(queryRequest.getSubjectId())){ - logger.info("查询 【专题数据】 subjectId = " + queryRequest.getSubjectId()); + logger.info("查询 【专题数据】 subjectId :{}" ,queryRequest.getSubjectId()); cluster = clusterService.findClusterByType(Cluster.CLUSTER_TYPE.mini_cluster_type); // 111 currentIndexList = subjectQueryDataService.getIndexBySubjectIds(cluster, queryRequest.getSubjectId()); }else{ @@ -146,7 +138,7 @@ public class SliceScrollUtil { } Long clusterId = cluster.getId(); - logger.info("[SliceScrollUtil] dataAnalysisCloud : queryDataList clusterId = " + clusterId + " ; currentIndexList :" + currentIndexList.toString()); + logger.info("[SliceScrollUtil] dataAnalysisCloud : queryDataList clusterId :{}; currentIndexList :{}", clusterId , currentIndexList.toString()); logger.info("==========进入数据分析Es and Cache,计算开始执行============"); String sortFlag = ""; String orderFlag = "desc"; diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/util/spread/SpreadServiceUtil.java b/cl_search_api/src/main/java/com/bfd/mf/common/util/spread/SpreadServiceUtil.java deleted file mode 100644 index bbeb741..0000000 --- a/cl_search_api/src/main/java/com/bfd/mf/common/util/spread/SpreadServiceUtil.java +++ /dev/null @@ -1,360 +0,0 @@ -package com.bfd.mf.common.util.spread; - -import com.alibaba.fastjson.JSONObject; -import com.bfd.mf.common.util.enums.SpreadEnums; -import com.bfd.mf.common.util.utility.DateUtil; -import com.bfd.nlp.common.util.object.TObjectUtils; -import org.apache.commons.lang3.StringUtils; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.*; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -public class SpreadServiceUtil { - private static final Logger logger = LoggerFactory.getLogger(SpreadServiceUtil.class); - - private static final Long ONE_DAY = 24 * 60 * 60 * 1000L; - private static final Long ONE_HOUR = 60 * 60 * 1000L; -// static { -// try { -// while (!ApplicationUtil.initialFinsih) { -// logger.warn("[SpreadServiceUtil Initial] AbsSpringServiceHolder: Wait spring content initialization, sleep 1s..."); -// Thread.sleep(1000); -// } -// -// oemUrlCommonService = ApplicationUtil.getApplicationContext().getBean(OemUrlCommonService.class); -// bfdApiConfig = ApplicationUtil.getApplicationContext().getBean(BFDApiConfig.class); -// -// if (TObjectUtils.isNull(oemUrlCommonService)) { -// throw new Exception("oemUrlCommonService is null"); -// } -// -// } catch (Exception e) { -// logger.error("[SpreadServiceUtil Initial] Object {} is NULL", e.getMessage(), e); -// System.exit(-1); -// } -// } - - /** - * 获取三个月前的起始时间 - */ - public static Long getTimeOfThreeMonthsAgo(Long time) { - if (TObjectUtils.isNull(time)) { - time = System.currentTimeMillis(); - } - - Date endDate = new Date(time); - Calendar calendar = Calendar.getInstance(); - calendar.setTime(endDate); - calendar.add(Calendar.MONTH, -3); - calendar.set(Calendar.DAY_OF_MONTH, 1); - calendar.set(Calendar.HOUR_OF_DAY, 0); - calendar.set(Calendar.MINUTE, 0); - calendar.set(Calendar.SECOND, 0); - calendar.set(Calendar.MILLISECOND, 0); - logger.info("[SpreadServiceUtil] getTimeOfThreeMonthsAgo: result is {}", calendar.getTime()); - return calendar.getTimeInMillis(); - } - - private static Long getEndTime(Long time, Integer type) { - Long startTime; - if (type.equals(SpreadEnums.TREND.UNIT_HOUR)) { - startTime = getEndTimeOfHour(time); - } else { - startTime = getEndTimeOfDay(time); - } - return startTime; - } - - private static Long getEndTimeOfDay(Long time) { - Calendar calendar = Calendar.getInstance(); - calendar.setTimeInMillis(time); - calendar.set(Calendar.HOUR_OF_DAY, 23); - calendar.set(Calendar.MINUTE, 59); - calendar.set(Calendar.SECOND, 59); - calendar.set(Calendar.MILLISECOND, 999); - return calendar.getTimeInMillis(); - } - - private static Long getEndTimeOfHour(Long time) { - Calendar calendar = Calendar.getInstance(); - calendar.setTimeInMillis(time); - calendar.set(Calendar.MINUTE, 59); - calendar.set(Calendar.SECOND, 59); - calendar.set(Calendar.MILLISECOND, 999); - return calendar.getTimeInMillis(); - } - - public static Long[] getTimeList1(Long startTime, Long endTime, Integer type) { - Long[] timeList = null; - try { - Long interval; - if (type.equals(SpreadEnums.TREND.UNIT_HOUR)) { - interval = ONE_HOUR; - } else if (type.equals(SpreadEnums.TREND.UNIT_DAY)) { - interval = ONE_DAY; - } else { - throw new Exception(" !!! type is illegal !!!"); - } - Double intervalNum = Math.ceil((endTime - startTime) / (double) interval); - timeList = new Long[intervalNum.intValue() + 1]; - Long time = startTime; - for (int i = 0; i < intervalNum.intValue(); i++) { - timeList[i] = time; - time += interval; - } - timeList[intervalNum.intValue()] = endTime; - - logger.info("[SpreadServiceUtil] getTimeList: startTime is {}, endTime is {}, timeList is {}", startTime, endTime, timeList); - } catch (Exception e) { - logger.error("[SpreadServiceUtil] getTimeList: failed, startTime is {}, endTime is {}, timeList is {}, error is ", startTime, endTime, timeList, e); - } - return timeList; - } - - public static Long[] getTimeList2(Long startTime, Long endTime, Integer type) { - Long[] timeList = null; - try { - Long interval; - if (type.equals(SpreadEnums.TREND.UNIT_HOUR)) { - interval = ONE_HOUR; - } else if (type.equals(SpreadEnums.TREND.UNIT_DAY)) { - interval = ONE_DAY; - } else { - throw new Exception(" !!! type is illegal !!!"); - } - - List timeListTemp = new ArrayList<>(); - timeListTemp.add(startTime); - - if (startTime.equals(endTime)) { - timeList = new Long[timeListTemp.size()]; - return timeListTemp.toArray(timeList); - } - - Long endTimeOfStart = getEndTime(endTime, type); - if (endTimeOfStart >= endTime) { - timeListTemp.add(endTime); - timeList = new Long[timeListTemp.size()]; - return timeListTemp.toArray(timeList); - } - for (long time = endTimeOfStart + interval; time < endTime; time += interval) { - timeListTemp.add(time); - } - timeListTemp.add(endTime); - timeList = new Long[timeListTemp.size()]; - timeListTemp.toArray(timeList); - - logger.info("[SpreadServiceUtil] getTimeList: startTime is {}, endTime is {}, timeList is {}", startTime, endTime, timeList); - } catch (Exception e) { - logger.error("[SpreadServiceUtil] getTimeList: failed, startTime is {}, endTime is {}, timeList is {}, error is ", startTime, endTime, timeList, e); - } - return timeList; - } - - /** - * 将Long型时间序列列表转换为String型列表 - */ - public static String[] convertToTimeStringList(Long[] timeList, Integer type) { - String[] timeStringList = new String[timeList.length]; - try { - String dateFormat; - if (Objects.equals(type, SpreadEnums.TREND.UNIT_HOUR)) { - dateFormat = DateUtil.TIME_FORMAT; - } else if (Objects.equals(type, SpreadEnums.TREND.UNIT_DAY)) { - dateFormat = DateUtil.DATE_FORMAT; - } else { - throw new Exception(" !!! type is illegal !!!"); - } - - for (int i = 0; i < timeList.length; i++) { - timeStringList[i] = DateUtil.parseDateByFormat(timeList[i], dateFormat); - } - } catch (Exception e) { - logger.error("[SpreadServiceUtil] getTimeStringList: failed, timeList is {}, error is ", Arrays.asList(timeStringList), e); - } - return timeStringList; - } - - -// public static List> convertToTimeStringList(List> trendList,Long[] timeList, Integer type) { -// String[] timeStringList = new String[timeList.length]; -// try { -// String dateFormat; -// if (Objects.equals(type, SpreadEnums.TREND.UNIT_HOUR)) { -// dateFormat = DateUtil.TIME_FORMAT; -// } else if (Objects.equals(type, SpreadEnums.TREND.UNIT_DAY)) { -// dateFormat = DateUtil.DATE_FORMAT; -// } else { -// throw new Exception(" !!! type is illegal !!!"); -// } -// -// for (int i = 0; i < timeList.length; i++) { -// Map map = new HashMap<>(); -// timeStringList[i] = DateUtil.parseDateByFormat(timeList[i], dateFormat); -// map.put("name",timeStringList[i]); -// trendList.add(map); -// } -// } catch (Exception e) { -// logger.error("[SpreadServiceUtil] getTimeStringList: failed, timeList is {}, error is ", Arrays.asList(timeStringList), e); -// } -// // return timeStringList; -// return trendList; -// } - - - /** - * 构造list搜索返回值 - */ - public static JSONObject getSearchListData(List spreadList, Integer totalNum, Integer limit) { - JSONObject data = new JSONObject(); - Double pageNum = 0d; - try { - pageNum = Math.ceil((double) totalNum / limit); - } catch (Exception e) { - logger.error("[SpreadServiceUtil] getSearchListData: failed, spreadList is {}, limit is {}, error is ", - spreadList, limit, e); - } - data.put("totalNum", totalNum); - data.put("pageNum", pageNum.intValue()); - data.put("spreadList", spreadList); - return data; - } - - /** - * 精确过滤 - */ - public static Boolean accurateSearchResult(JSONObject spread, String field, String text) { - if (spread.containsKey(field) && spread.getString(field).equals(text)) { - return true; - } - return false; - } - - /** - * 关键词过滤 - */ - public static Boolean searchResult(JSONObject spread, String field, String text) { - if (spread.containsKey(field) && spread.getString(field).contains(text)) { - return true; - } - return false; - } - - /** - * 按照指定排序规则排序 - */ - public static List sortBySortFlag(List spreadList, String sortFlag) { - logger.info("[SpreadServiceUtil] sortBySortFlag: -排序前-spreadList is {}, sortFlag is {}", spreadList, sortFlag); - try { - if (sortFlag.equals(SpreadEnums.SORT_TYPE.PUBTIME_DESC_SORT_FLAG.getValue())) { - // 按时间倒序 - Collections.sort(spreadList, new Comparator() { - @Override - public int compare(JSONObject o1, JSONObject o2) { - // 发布时间相同的,则再看采集时间 - return o2.getLong(SpreadEnums.LIST.SPREAD_FIELD_PUBTIME).compareTo(o1.getLong(SpreadEnums.LIST.SPREAD_FIELD_PUBTIME)) == 0 ? - o2.getLong(SpreadEnums.LIST.SPREAD_FIELD_CRAWLTIME).compareTo(o1.getLong(SpreadEnums.LIST.SPREAD_FIELD_CRAWLTIME)) : - o2.getLong(SpreadEnums.LIST.SPREAD_FIELD_PUBTIME).compareTo(o1.getLong(SpreadEnums.LIST.SPREAD_FIELD_PUBTIME)); - } - }); - } - if (sortFlag.equals(SpreadEnums.SORT_TYPE.PUBTIME_ASC_SORT_FLAG.getValue())) { - // 按时间正序 - Collections.sort(spreadList, new Comparator() { - @Override - public int compare(JSONObject o1, JSONObject o2) { - // 发布时间相同的,则再看采集时间 - return o1.getLong(SpreadEnums.LIST.SPREAD_FIELD_PUBTIME).compareTo(o2.getLong(SpreadEnums.LIST.SPREAD_FIELD_PUBTIME)) == 0 ? - o1.getLong(SpreadEnums.LIST.SPREAD_FIELD_CRAWLTIME).compareTo(o2.getLong(SpreadEnums.LIST.SPREAD_FIELD_CRAWLTIME)) : - o1.getLong(SpreadEnums.LIST.SPREAD_FIELD_PUBTIME).compareTo(o2.getLong(SpreadEnums.LIST.SPREAD_FIELD_PUBTIME)); - } - }); - } - logger.info("[SpreadServiceUtil] sortBySortFlag: -排序后-spreadList is {}, sortFlag is {}", spreadList, sortFlag); - } catch (Exception e) { - logger.error("[SpreadServiceUtil] sortBySortFlag: failed, spreadList is {}, error is ", spreadList, e); - } - return spreadList; - } - - /** - * 获取指定页面list - */ - public static List getPage(List spreadList, Integer start, Integer limit) { - List pageList = new ArrayList<>(); - try { - if (start >= spreadList.size()) { - return pageList; - } - - int end = spreadList.size() >= start + limit ? start + limit : spreadList.size(); - pageList = spreadList.subList(start, end); - } catch (Exception e) { - logger.error("[SpreadServiceUtil] getPage: failed, spreadList is {}, " + - "start is {}, limit is {}, error is ", spreadList, start, limit, e); - } - return pageList; - } - - /** - * 去除括号及括号内的内容 - */ - private static String removeBrackets(String value) { - logger.info("[SpreadServiceUtil] removeBrackets: value is {}", value); - try { - // 中文括号修正为英文括号 - value = value.trim().replaceAll("(", "(").replaceAll(")", ")"); - Pattern pattern = Pattern.compile("\\((.*?)\\)"); //英文括号 - Matcher matcher = pattern.matcher(value); - while (matcher.find()) { - value = value.replaceAll(matcher.group(0), ""); // 0是包括括号, 1是只取内容 - logger.info("[SpreadServiceUtil] removeBrackets: value is {}, remove string is {}", value, matcher.group(1)); - } - value = value.trim().replaceAll("\\(", "").replaceAll("\\)", ""); - logger.info("[SpreadServiceUtil] removeBrackets: result value is {}", value); - } catch (Exception e) { - logger.error("[SpreadServiceUtil] removeBrackets: failed, value is {}, error is ", value, e); - } - return value; - } - - /** - * 过滤所有以"<"开头以">"结尾的标签 - */ - private static String filterHtml(String value) { - logger.info("[SpreadServiceUtil] filterHtml: value is {}", value); - try { - String regxpForHtml = "<([^>]*)>"; // 过滤所有以<开头以>结尾的标签 - Pattern pattern = Pattern.compile(regxpForHtml); - Matcher matcher = pattern.matcher(value); - StringBuffer sb = new StringBuffer(); - while (matcher.find()) { - matcher.appendReplacement(sb, ""); - } - matcher.appendTail(sb); - logger.info("[SpreadServiceUtil] filterHtml: result value is {}", sb.toString()); - return sb.toString(); - } catch (Exception e) { - logger.error("[SpreadServiceUtil] filterHtml: failed, value is {}, error is ", value, e); - } - return value; - } - - /** - * 过滤特殊字符 - */ - public static String filter(String value) { - // 合法性过滤 - if (StringUtils.isEmpty(value) || SpreadEnums.OriginalSource.illegal.contains(value.toLowerCase())) { - return ""; - } - // 过滤括号 - String valueAfterRemoveBrackets = removeBrackets(value); - // 过滤html标签 - String valueAfterFilterHtml = filterHtml(valueAfterRemoveBrackets); - return valueAfterFilterHtml; - } -} diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/util/subject/ExpNodeUtil.java b/cl_search_api/src/main/java/com/bfd/mf/common/util/subject/ExpNodeUtil.java deleted file mode 100644 index 3e4fd9f..0000000 --- a/cl_search_api/src/main/java/com/bfd/mf/common/util/subject/ExpNodeUtil.java +++ /dev/null @@ -1,1042 +0,0 @@ -package com.bfd.mf.common.util.subject; - -import com.bfd.mf.common.util.OperatorUtil; -import com.bfd.mf.common.web.vo.params.expression.*; -import org.elasticsearch.common.Strings; -import org.elasticsearch.index.query.BoolQueryBuilder; -import org.elasticsearch.index.query.MatchPhraseQueryBuilder; -import org.elasticsearch.index.query.QueryBuilders; -import org.elasticsearch.index.query.SpanNotQueryBuilder; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.*; - -public class ExpNodeUtil { - private static final Logger logger = LoggerFactory.getLogger(ExpNodeUtil.class); - private static int visitOrder = 0; - - /** - * 将输入表达式运算为树形图 - */ - public static ExpNode getPrefixExpressionTree(Vector input, HashSet set) throws Exception { - if (null == input || input.size() == 0) { - throw new Exception("no input object vector found"); - } - ExpNode node = null; - try { - int len = input.size(); - Object c, next = null; - char tempChar; - Stack s1 = new Stack(); - Stack s2 = new Stack(); - // 从右至左扫描表达式 - for (int i = len - 1; i >= 0; --i) { - c = input.get(i); - if (i < len - 1) { - next = input.get(i + 1); - } - if (c instanceof Integer) { - s2.push(c);// 距离 - } else if (c instanceof ExpObject) { - ExpNode leaf = new ExpNode(c); - leaf.setLeaf(true); - leaf.setOpera(false); - s2.push(leaf);// 操作数树形节点 - } else if (c instanceof Character) { - char charC = (char) c; - if (OperatorUtil.isOperator(charC)) {// 操作符 - if (isDistanceChar(charC)) {// ~ - if (s2.isEmpty() || !(s2.peek() instanceof Integer)) { - throw new IllegalArgumentException( - "Stack error,as '~' must followed by a Integer value"); - } - Integer dis = (Integer) s2.pop(); - if (null != dis && dis >= 0) { - DistanceNode opera = new DistanceNode(dis); - Object prv = input.get(i - 1); - if (i <= 0 || !(prv instanceof Character) - || ((char) input.get(i - 1) != '>' && (char) input.get(i - 1) != '》')) { - throw new IllegalArgumentException( - "Stack error,as '~' must follow a char '<' or '《'"); - } - // 右半边括号> - Stack right = new Stack(); - right.push(new HalfQuartChar('>', i)); - --i;// 开始i对应~,现在对应> - // 左右子表达式 - Stack rHalf = new Stack();// *号右边子式 - Stack lHalf = new Stack();// *号左边子式 - boolean meetStar = false;// 是否已经遇到正确的* - while (!right.isEmpty()) { - --i; - if (i < 0) { - break; - } - Object obj = input.get(i); - if (obj instanceof Character) { - if (OperatorUtil.matchChar('>', (char) obj)) {// 遇到匹配的左半括号 - if (!right.isEmpty()) { - HalfQuartChar rh = right.pop(); - if (null == rh) { - throw new IllegalArgumentException("pop of stack failed"); - } - } - if (right.isEmpty()) { - break;// 不会将对应的最左边的'<'存入栈 - } - } else if ((char) obj == '>' || (char) obj == '》') { - right.push(new HalfQuartChar('>', i)); - } else if ((char) obj == '*' && right.size() == 1) {// 最外层的* - meetStar = true; - continue; - } - } - // *分界线 - if (!meetStar) { - rHalf.push(obj); - } else { - lHalf.push(obj); - } - } - // after handle outer<(M) (N)>~10 - if (!rHalf.isEmpty() && !lHalf.isEmpty()) { - Vector lv = new Vector(); - Vector rv = new Vector(); - while (!rHalf.isEmpty()) { - rv.add(rHalf.pop()); - } - while (!lHalf.isEmpty()) { - lv.add(lHalf.pop()); - } - // 左右子Vector作为~号的左右子树 - ExpNode disNode = new ExpNode(opera); - disNode.setOpera(true); - disNode.setLeaf(false); - // - ExpNode lf = new ExpNode(lv, disNode); - ExpNode rt = new ExpNode(rv, disNode); - disNode.setLeft(lf); - disNode.setRight(rt); - s2.push(disNode);// 生成的子树入栈 - node = calc(charC, s2, null, set);// ~运算 - } - } - } else { - if (next instanceof Character - && (isRightHalfQuart((char) next) || isDistanceChar((char) next))) { - throw new Exception( - "wrong position for two neighbor operas:[" + charC + "],and:[" + next + "]"); - } - // 如果s1中已经有操作符且优先级比当前操作符高 - while (!s1.isEmpty() && s1.peek() != ')' && s1.peek() != ')' - && OperatorUtil.priorityCompare(charC, s1.peek()) <= 0) { - // 当前运算符栈不为空且要运算符栈顶运算符不是右括号且当前运算符的优先级比运算符栈顶运算符的优先级低, - // 则将运算符栈栈顶元素拿出来与操作数栈的两个栈顶元素进行运算并把运算结果压入操作数栈 - tempChar = s1.pop(); - node = calc(tempChar, s2, null, set); - } - s1.push(charC); - } - } else if (isRightHalfQuart(charC) || isRightDistance(charC)) { - if (next instanceof Character - && (isLeftHalfQuart((char) next) || isDistanceLeft((char) next))) { - throw new Exception( - "wrong position for two neighbor operas:[" + charC + "],and:[" + next + "]"); - } - if (isRightHalfQuart(charC)) { - if (next instanceof Character && isDistanceChar((char) charC)) { - throw new Exception( - "wrong position for two neighbor operas:[" + charC + "],and:[" + next + "]"); - } - } else {// > - if (next instanceof Character && ((char) charC == '&' || (char) charC == '|')) { - throw new Exception( - "wrong position for two neighbor operas:[" + charC + "],and:[" + next + "]"); - } - } - s1.push(charC); - } else if (isLeftHalfQuart(charC)) { - if (next instanceof Character - && (isDistanceChar((char) next) || (char) next == '&' || (char) next == '|')) { - throw new Exception( - "wrong position for two neighbor operas:[" + charC + "],and:[" + next + "]"); - } - // 如果是(,和(,则依次弹出S1栈顶的运算符,并压入表达式栈,直到遇到左括号为止,此时将这一对括号丢弃 - tempChar = s1.pop(); - while (!OperatorUtil.matchChar(tempChar, charC)) { - node = calc(tempChar, s2, null, set); - if (s1.isEmpty()) { - throw new IllegalArgumentException("bracket dosen't match, missing right bracket ')'."); - } - tempChar = s1.pop(); - } - } else if (isDistanceLeft(charC)) { - if (next instanceof Character && !isLeftHalfQuart((char) next) - && !isDistanceLeft((char) next)) { - throw new Exception( - "wrong position for two neighbor operas:[" + charC + "],and:[" + next + "]"); - } - // 处理<(M)*(N)>~10类型的子表达式 - if (s1.isEmpty()) { - throw new IllegalArgumentException("bracket < dosen't match, missing right bracket '>'."); - } - } else if (charC == '*' || charC == ' ') { - // 如果表达式里包含空格则不处理空格 - } else { - throw new IllegalArgumentException("wrong character '" + c + "'"); - } - } - } - while (!s1.isEmpty()) { - if (OperatorUtil.isOperator((char) s1.peek())) { - node = calc(s1.pop(), s2, null, set); - } else { - char ch = s1.pop(); - throw new Exception("Character not expected or missing its match one for:" + ch); - } - } - if (!s2.isEmpty() && s2.peek() instanceof ExpNode) { - node = (ExpNode) s2.pop(); - } - } catch (Exception e) { - logger.error("[getPrefixExpressionBuilder function] is error", e); - throw new Exception("[getPrefixExpressionBuilder function] is error", e); - } - return node; - } - - - private static boolean isDistanceChar(char ch) { - return ch == '~'; - } - - private static boolean isRightHalfQuart(char ch) { - return ch == ')' || ch == ')'; - } - - private static boolean isRightDistance(char ch) { - return ch == '》' || ch == '>'; - } - - - private static boolean isLeftHalfQuart(char ch) { - return ch == '(' || ch == '('; - } - - private static boolean isDistanceLeft(char ch) { - return ch == '《' || ch == '<'; - } - - private static ExpNode calc(char oper, Stack s2, ExpNode node, HashSet set) throws Exception { - ExpNode rst = null; - switch (oper) { - case '|': - if (s2.isEmpty() || s2.size() < 2) { - throw new Exception("missing expected ExpObject or has unexpected operator:" + oper); - } - // 二元表达式节点 - rst = new ExpNode(new OperaObject('|', OperaObject.OPERA_TYPE_TWO)); - rst.setOpera(true); - rst.setLeaf(false); - for (int i = 0; i < 2; i++) { - if (s2.peek() instanceof ExpNode) { - if (i == 0) { - rst.setLeft((ExpNode) s2.pop()); - } else if (i == 1) { - rst.setRight((ExpNode) s2.pop()); - } - } - } - s2.push(rst); - break; - case '&': - if (s2.isEmpty() || s2.size() < 2) { - throw new Exception("missing expected ExpObject or has unexpected operator:" + oper); - } - // 二元表达式节点 - rst = new ExpNode(new OperaObject('&', OperaObject.OPERA_TYPE_TWO)); - rst.setOpera(true); - rst.setLeaf(false); - for (int i = 0; i < 2; i++) { - if (s2.peek() instanceof ExpNode) { - if (i == 0) { - rst.setLeft((ExpNode) s2.pop()); - } else if (i == 1) { - rst.setRight((ExpNode) s2.pop()); - } - } - } - s2.push(rst); - break; - case '!': - break; - case '~': - if (s2.isEmpty() || !(s2.peek() instanceof ExpNode)) { - throw new Exception("missing expected ExpObject or has unexpected operator:" + oper); - } - rst = (ExpNode) s2.peek(); - Object value = rst.getValue(); - if (null == value || !(value instanceof DistanceNode)) { - rst = null; - return null; - } - break; - default: - throw new Exception("Operator not expected:" + oper); - } - return rst; - }private static ExpNode formatDistanceSubTree(ExpNode root) { - if (null == root || null == root.getValue()) { - return null; - } - // format - if (root.getValue() instanceof DistanceNode) { - ((DistanceNode) root.getValue()).setFormatFinish(true); - } - if (null != root.getLeft()) { - ExpNode left = formatDistanceSubTree(root.getLeft()); - root.setLeft(left); - } - if (null != root.getRight()) { - ExpNode right = formatDistanceSubTree(root.getRight()); - root.setRight(right); - } - return root; - } - - public static ExpNode visitAndFormatTree(ExpNode root, HashSet set) throws Exception { - if (null == root || null == root.getValue()) { - return null; - } - visitOrder++; - // visit self - Object obj = root.getValue(); - if (obj instanceof ExpObject) { - root.setLeft(null); - root.setRight(null); - root.setLeaf(true); - root.setOpera(false); - // visit - visitNodeValue(obj, visitOrder); - return root; - } - while (root.getValue() instanceof DistanceNode) { - /** - * @lei.bao
- * @2016-12-09 优化距离节点: - * 目的是将包含DistanceNode的树形图转换为只有ExpObject和Operator两种类型的结点树 - * ==》因此可以省去compute运算中的distance运算过程 - */ - ExpNode left = root.getLeft(); - if (null != left) { - if (left.getValue() instanceof Vector) { - try { - left = ExpNodeUtil.getPrefixExpressionTree((Vector) left.getValue(), set); - root.setLeft(left); - } catch (Exception e) { - logger.error("parse vector of left child for ditance failed...,will exit", e.getMessage()); - throw e; - } - } - } - ExpNode right = root.getRight(); - if (null != right) { - if (right.getValue() instanceof Vector) { - try { - right = ExpNodeUtil.getPrefixExpressionTree((Vector) right.getValue(), set); - root.setRight(right); - } catch (Exception e) { - logger.error("parse vector of left child for ditance failed...,will exit", e.getMessage()); - throw e; - } - } - } - if (null == left || null == right) { - logger.error("empty left child or right child..."); - break; - } - if (!((DistanceNode) root.getValue()).isFormatFinish()) { - // 此前还未转换过形式:~10==>A&B&AB - ExpNode node = new ExpNode(new OperaObject('&')); - node.setLeft(ExpNode.clone(left)); - node.setRight(ExpNode.clone(right)); - //format - root = formatDistanceSubTree(root); - if (null == root) { - return null; - } - ExpNode node2 = new ExpNode(new OperaObject('&')); - node2.setLeft(node); - node2.setRight(root); - // ((DistanceNode)root.getValue()).setFormatFinish(true); - root = node2; - break; - } - Object lefObj = left.getValue(); - Object rightObj = right.getValue(); - // 1:左右子树都是ExpObject对象==>转换成&运算 - if (lefObj instanceof ExpObject && rightObj instanceof ExpObject) { - int multiNum = 0; - if (!((ExpObject) lefObj).getTokens().isEmpty()) { - multiNum++; - } - if (!((ExpObject) rightObj).getTokens().isEmpty()) { - multiNum++; - } - List disObj = getDistanceObject((ExpObject) lefObj, (ExpObject) rightObj, - ((DistanceNode) root.getValue()).getDistance()); - if (null != disObj && !disObj.isEmpty()) { - if (disObj.size() == 1) { - ExpObject target = disObj.get(0); - if (null != target && null != target.getTokens() && target.getTokens().size() == 2) { - List disObjConvert = getDistanceObject((ExpObject) rightObj, (ExpObject) lefObj, - ((DistanceNode) root.getValue()).getDistance()); - if (null != disObjConvert && !disObjConvert.isEmpty() && disObjConvert.size() == 1) { - ExpObject disCon = disObjConvert.get(0); - if (null != disCon) { - ExpNode opNode = new ExpNode(new OperaObject('|')); - opNode.setLeft(new ExpNode(target)); - opNode.setRight(new ExpNode(disCon)); - root = opNode; - visitNodeValue(root.getValue(), visitOrder); - return root; - } - } - } else { - root.setValue(disObj.get(0)); - root.setLeft(null); - root.setRight(null); - visitNodeValue(root.getValue(), visitOrder); - return root; - } - } else {// >=2 - // multiNum==1 or 2 - ExpNode opNode = new ExpNode(new OperaObject('&')); - opNode.setLeft(new ExpNode(disObj.get(0))); - // - // ExpNode opLeft = new ExpNode(new OperaObject('|')); - // opLeft.setLeft(new ExpNode(disObj.get(0))); - // opLeft.setRight(new - // ExpNode(getConvertExpObject(disObj.get(0)))); - // - if (multiNum == 1) {//1-2 - ExpNode opRight = new ExpNode(new OperaObject('|')); - opRight.setLeft(new ExpNode(disObj.get(1))); - opRight.setRight(new ExpNode(getConvertExpObject(disObj.get(1)))); - opNode.setRight(opRight); - } else if (multiNum == 2) {//2-2 - opNode.setRight(new ExpNode(disObj.get(1))); - } - root = opNode; - int i = 2; - // 处理多于2个返回节点的情况 - while (i < disObj.size()) { - ExpNode opNode1 = new ExpNode(new OperaObject('&')); - // - opNode1.setLeft(root); - if (disObj.get(i).getTokens().size() != 2) { - opNode1.setRight(new ExpNode(disObj.get(i))); - } else { - ExpNode opOr = new ExpNode(new OperaObject('|')); - opOr.setLeft(new ExpNode(disObj.get(i))); - opOr.setRight(new ExpNode(getConvertExpObject(disObj.get(i)))); - opNode1.setRight(opOr); - } - root = opNode1; - i++; - } - } - } - } else - // 2:嵌套距离,左右子树为距离节点,先format字树(内部距离) - if (lefObj instanceof DistanceNode || rightObj instanceof DistanceNode) { - if (lefObj instanceof DistanceNode) { - ExpNode lft = visitAndFormatTree(root.getLeft(), set); - root.setLeft(lft); - } - if (rightObj instanceof DistanceNode) { - ExpNode rht = visitAndFormatTree(root.getRight(), set); - root.setRight(rht); - } - } // end:经过转换,子树全部变为ExpObject或者OP - - // 3:左右字树中包含OP=》转换成两两之间的距离 - // 经过转换,root可能已经变化,不再是DistanceNode类型,需要先判断 - if (root.getValue() instanceof DistanceNode) { - - lefObj = root.getLeft().getValue(); - rightObj = root.getRight().getValue(); - Character ch = null; - ExpNode leftchild = null; - ExpNode rightchild = null; - int leftOrRight = 0;// 默认无复杂运算表达式 - - // 简化distance节点,使其左右节点都是简单ExpObject - if (lefObj instanceof OperaObject) {// 左边是复杂表达式 - ch = ((OperaObject) lefObj).getOpera(); - // 左子树的两个孩子节点 - leftchild = root.getLeft().getLeft(); - rightchild = root.getLeft().getRight();// root已经变化,重新获取left - leftOrRight = 1; - } else if (rightObj instanceof OperaObject) { - ch = ((OperaObject) rightObj).getOpera(); - leftchild = root.getRight().getLeft(); - rightchild = root.getRight().getRight(); - leftOrRight = 2; - } - // 左右复杂OP表达式都存在 - if (null != leftchild && null != rightchild) { - // to-do:拆分复杂distance表达式 - ExpNode lf = ExpNode.clone(leftchild); - ExpNode rt = ExpNode.clone(rightchild); - // 目标:lf-anotherNode 和rt-anotherNode 之间做ch运算 - int distance = ((DistanceNode) obj).getDistance(); - ExpNode disNodeLeft = new ExpNode(new DistanceNode(distance)); - disNodeLeft.setOpera(true); - disNodeLeft.setLeaf(false); - // - ExpNode disNodeRight = new ExpNode(new DistanceNode(distance)); - disNodeRight.setOpera(true); - disNodeRight.setLeaf(false); - // - ExpNode anotherNode = null; - if (leftOrRight == 1) {// anotherNode取root的右子树 - anotherNode = ExpNode.clone(root.getRight()); - // - disNodeLeft.setLeft(lf); - disNodeLeft.setRight(anotherNode); - // - disNodeRight.setLeft(rt); - disNodeRight.setRight(anotherNode); - } else if (leftOrRight == 2) {// anotherNode取root左子树 - anotherNode = ExpNode.clone(root.getLeft()); - // - disNodeLeft.setLeft(anotherNode); - disNodeLeft.setRight(lf); - // - disNodeRight.setLeft(anotherNode); - disNodeRight.setRight(rt); - } - ExpNode opNode = new ExpNode(new OperaObject(ch)); - opNode.setLeft(disNodeLeft); - opNode.setRight(disNodeRight); - root = opNode;// end:处理完左右子树中包含OP的Node类型 - /* - * 至此,root类型由Distance 转成OP,子树中包含新生成的distanceNode,由后续递归过程转换 - */ - } - } - } - // visit - visitNodeValue(root.getValue(), visitOrder); - if (null != root.getLeft()) { - ExpNode ln = visitAndFormatTree(root.getLeft(), set); - if (null == ln) { - return null; - } - root.setLeft(ln); - } - if (null != root.getRight()) { - ExpNode lr = visitAndFormatTree(root.getRight(), set); - if (null == lr) { - return null; - } - root.setRight(lr); - } - return root; - } - - private static ExpObject getConvertExpObject(ExpObject exp) { - if (null == exp || null == exp.getTokens() || exp.getTokens().size() != 2) { - return null; - } - ExpObject result = new ExpObject(exp.getTokens().get(1) + " " + exp.getTokens().get(0)); - result.setFields(exp.getFields()); - result.setSlop(exp.getSlop()); - List newTokens = new ArrayList<>(); - newTokens.add(exp.getTokens().get(1)); - newTokens.add(exp.getTokens().get(0)); - result.setTokens(newTokens); - return result; - } - - public static Object ComputeNodeTree(ExpNode root, HashSet set) { - if (null == root || null == root.getValue()) { - return null; - } - Object value = root.getValue(); - if (value instanceof ExpObject) {// 叶子节点 - // 消除歧义词 - List excLst = getExcludeObject(set, (ExpObject) value); - if (null != excLst && !excLst.isEmpty()) { - // 消歧结果 - List multiQb = new ArrayList<>(); - for (ExpObject exc : excLst) { - // 对每个消歧词进行处理 - if (null != exc.getFields() && exc.getFields().length > 0 - && ((ExpObject) value).getFields().length > 0) { - int len = ((ExpObject) value).getFields().length; - if (len == 1) {// title or content - String fd = ((ExpObject) value).getFields()[0]; - // 消歧域 - if (exc.getFields().length == 1 && fd.equals(exc.getFields()[0])) {// 1-1且相等 - List sn = QueryBuilderUtil.getSpanNotQueryBuilder(exc.getFields()[0], - ((ExpObject) value).getExpression(), exc.getExpression()); - if (null != sn) { - for(int i = 0;i sn = QueryBuilderUtil.getSpanNotQueryBuilder(exc.getFields()[0], - ((ExpObject) value).getExpression(), exc.getExpression()); - if (null != sn) { - for (SpanNotQueryBuilder aSn : sn) { - BoolQueryBuilder must = QueryBuilders.boolQuery().must(aSn); - bq2.must(must); - } - } - multiQb.add(bq.should(bq1).should(bq2)); - continue; - } else {// 1-2 - for (String fdv : exc.getFields()) { - List sn = QueryBuilderUtil.getSpanNotQueryBuilder(fdv, - ((ExpObject) value).getExpression(), exc.getExpression()); - if (null != sn) { - if (fdv.equals(fd)) {// 相等 - for (SpanNotQueryBuilder aSn : sn) { - BoolQueryBuilder must = QueryBuilders.boolQuery().must(aSn); - bq1.must(must); - bq2.must(must); - } - } else { - for (SpanNotQueryBuilder aSn : sn) { - BoolQueryBuilder must = QueryBuilders.boolQuery().must(aSn); - bq2.must(must); - } - } - } - } - } - multiQb.add(bq.should(bq1).should(bq2)); - continue; - } else if (len == 2) {// title and content - if (exc.getFields().length == 1) {// 2-1 - BoolQueryBuilder bq = QueryBuilders.boolQuery(); - for (String fdv : ((ExpObject) value).getFields()) { - if (fdv.equals(exc.getFields()[0])) { - List sn = QueryBuilderUtil.getSpanNotQueryBuilder(fdv, - ((ExpObject) value).getExpression(), exc.getExpression()); - if (null != sn) { - for (SpanNotQueryBuilder aSn : sn) { - BoolQueryBuilder must = QueryBuilders.boolQuery().must(aSn); - bq.should(must); - } - } - } else { - bq.should( - QueryBuilders.matchPhraseQuery(fdv, ((ExpObject) value).getExpression()) - .slop(((ExpObject) value).getSlop())); - } - } - multiQb.add(bq); - continue; - } else if (exc.getFields().length == 2) {// 2-2 - BoolQueryBuilder bq = QueryBuilders.boolQuery(); - for (String fd : exc.getFields()) { - List sn = QueryBuilderUtil.getSpanNotQueryBuilder(fd, - ((ExpObject) value).getExpression(), exc.getExpression()); - if (null != sn) { - for (SpanNotQueryBuilder aSn : sn) { - BoolQueryBuilder must = QueryBuilders.boolQuery().must(aSn); - bq.should(must); - } - } - } - multiQb.add(bq); - continue; - } - } - } - } - // 遍历结束 - if (null != multiQb && !multiQb.isEmpty()) { - if (multiQb.size() == 1) { - return multiQb.get(0); - } - BoolQueryBuilder matchQuery = QueryBuilders.boolQuery(); - for (BoolQueryBuilder qb : multiQb) { - matchQuery.must(qb); - } - return matchQuery; - } - } - // 没有歧义词 - String exp = ((ExpObject) value).getExpression().trim(); - if (!"".equals(exp.trim())) { - if (((ExpObject) value).getFields().length == 1) { - return QueryBuilders.matchPhraseQuery(((ExpObject) value).getFields()[0], exp) - .slop(((ExpObject) value).getSlop()); - } - BoolQueryBuilder matchQuery = QueryBuilders.boolQuery(); - for (String f : ((ExpObject) value).getFields()) { - matchQuery.should(QueryBuilders.matchPhraseQuery(f, exp).slop(((ExpObject) value).getSlop())); - } - return matchQuery; - } - return null; - } else { - // 运算符(经过format之后的表达式,仅包括|,&,~) - if (null == root.getLeft() || null == root.getRight()) { - return null; - } - Object left = ComputeNodeTree(root.getLeft(), set); - Object right = ComputeNodeTree(root.getRight(), set); - if (null == left || null == right) { - return null; - } - /** - * lei bao 去掉逻辑优化,避免潜在逻辑错误 @2016-11-24 - */ - // boolean useLeft = false; - // boolean useRight = false; - if (value instanceof OperaObject) {// |,&(暂时不包括!,单独处理) - BoolQueryBuilder qb = QueryBuilders.boolQuery(); - // if (left instanceof BoolQueryBuilder && !(right instanceof - // BoolQueryBuilder)) { - // qb = (BoolQueryBuilder) left; - // useLeft = true; - // } else if (right instanceof BoolQueryBuilder && !(left - // instanceof BoolQueryBuilder)) { - // qb = (BoolQueryBuilder) right; - // useRight = true; - // } else { - // qb = QueryBuilders.boolQuery(); - // } - char opera = ((OperaObject) value).getOpera(); - switch (opera) { - case '|': - int succ = 0; - // left - if (left instanceof MatchPhraseQueryBuilder) { - qb.should((MatchPhraseQueryBuilder) left); - succ++; - } - // else if (left instanceof List) { - // for (Object obj : (List) left) { - // if (obj instanceof SpanNotQueryBuilder) { - // qb.should((SpanNotQueryBuilder) obj); - // } - // } - // succ++; - // } - else if (left instanceof BoolQueryBuilder) { - // if (!useLeft) { - qb.should((BoolQueryBuilder) left); - // } - succ++; - } - // right - if (right instanceof MatchPhraseQueryBuilder) { - qb.should((MatchPhraseQueryBuilder) right); - succ++; - } - // else if (right instanceof List) { - // for (Object obj : (List) right) { - // if (obj instanceof SpanNotQueryBuilder) { - // qb.should((SpanNotQueryBuilder) obj); - // } - // } - // succ++; - // } - else if (right instanceof BoolQueryBuilder) { - // if (!useRight) { - qb.should((BoolQueryBuilder) right); - // } - succ++; - } - if (succ == 0) { - return null; - } - break; - case '&': - // left - int succ2 = 0; - if (left instanceof MatchPhraseQueryBuilder) { - qb.must((MatchPhraseQueryBuilder) left); - succ2++; - } - // else if (left instanceof List) { - // for (Object obj : (List) left) { - // if (obj instanceof SpanNotQueryBuilder) { - // qb.must((SpanNotQueryBuilder) obj); - // } - // } - // succ2++; - // } - else if (left instanceof BoolQueryBuilder) { - // if (!useLeft) { - qb.must((BoolQueryBuilder) left); - // } - succ2++; - } - // right - if (right instanceof MatchPhraseQueryBuilder) { - qb.must((MatchPhraseQueryBuilder) right); - succ2++; - } - // else if (right instanceof List) { - // for (Object obj : (List) right) { - // if (obj instanceof SpanNotQueryBuilder) { - // qb.must((SpanNotQueryBuilder) obj); - // } - // } - // succ2++; - // } - else if (right instanceof BoolQueryBuilder) { - // if (!useRight) { - qb.must((BoolQueryBuilder) right); - // } - succ2++; - } - if (succ2 == 0) { - return null; - } - break; - default: - break; - } - return qb; - } else if (value instanceof DistanceNode) {// ~ - HashSet lfSet = getChildrenExpObject(root.getLeft()); - HashSet rtSet = getChildrenExpObject(root.getRight()); - if (null == lfSet || lfSet.isEmpty() || null == rtSet || rtSet.isEmpty()) { - return null; - } - // <(M) (N)>~10===>M&N&([every item m inM]and [every item n in - // N] distance 10 - OperaObject opera = new OperaObject('&'); - ExpNode nd = new ExpNode(opera); - nd.setLeft(root.getLeft()); - nd.setRight(root.getRight()); - Object andRst = ComputeNodeTree(nd, set);// &运算结果类型必定为BoolQueryBuilder - if (null != andRst && andRst instanceof BoolQueryBuilder) { - BoolQueryBuilder qb = (BoolQueryBuilder) andRst; - int succ = 0; - for (ExpObject lftObj : lfSet) { - for (ExpObject rtObj : rtSet) { - String[] commonField = getExcludeField(lftObj.getFields(), rtObj.getFields()); - // 暂假设都是单个词 - int succ2 = 0; - List lst = new ArrayList<>(); - for (String fd : commonField) { - String exp = (((ExpObject) lftObj).getExpression() + " " - + ((ExpObject) rtObj).getExpression()).trim(); - if (!Strings.isNullOrEmpty(exp)) { - MatchPhraseQueryBuilder multiQuery = QueryBuilders.matchPhraseQuery(fd, exp) - .slop(((DistanceNode) value).getDistance()); - if (null != multiQuery) { - // inQb.should(multiQuery); - lst.add(multiQuery); - succ2++; - } - } - } - if (succ2 > 0) { - if (lst.size() == 1) { - qb.must(lst.get(0)); - succ++; - } else if (lst.size() > 1) { - BoolQueryBuilder inQb = QueryBuilders.boolQuery(); - for (MatchPhraseQueryBuilder mq : lst) { - inQb.should(mq); - } - succ++; - qb.must(inQb); - } - } - // to-do:当左侧或者右侧某个对象为句子是需要处理其字符串数组token - } - } - if (succ > 0) { - return qb; - } else { - return null; - } - } - } - } - return null; - } - - private static List getExcludeObject(HashSet set, ExpObject obj) { - if (null == set || null == obj || Strings.isNullOrEmpty(obj.getExpression())) { - return null; - } - List lst = new ArrayList(); - for (ExpObject st : set) { - if (null != st && !Strings.isNullOrEmpty(st.getExpression()) - && st.getExpression().contains(obj.getExpression())) { - // String[] excField = getExcludeField(st.getFields(), - // obj.getFields()); - // st.setFields(excField); - lst.add(st); - } - } - return lst; - } - - /** - * 排除的字段 - */ - public static String[] getExcludeField(String[] excField, String[] objField) { - if (null == excField || excField.length == 0) { - return null; - } - if (null == objField || objField.length == 0) { - return excField; - } - List rst = new ArrayList<>(); - for (String ex : excField) { - for (String fd : objField) { - if (ex.equalsIgnoreCase(fd)) { - rst.add(ex); - } - } - } - return rst.toArray(new String[rst.size()]); - } - - private static HashSet getChildrenExpObject(ExpNode root) { - // 调用此方法之前应该确保所有节点拆解成最小表达式单元 - if (null == root || null == root.getValue()) { - return null; - } - HashSet set = new HashSet(); - if (root.getValue() instanceof ExpObject) { - set.add((ExpObject) root.getValue()); - } - if (null != root.getLeft()) { - HashSet setL = getChildrenExpObject(root.getLeft()); - if (null != setL && !setL.isEmpty()) { - set.addAll(setL); - } - } - if (null != root.getRight()) { - HashSet setR = getChildrenExpObject(root.getRight()); - if (null != setR && !setR.isEmpty()) { - set.addAll(setR); - } - } - return set; - } - - private static String getAnotherField(String fd) { - String[] allFd = new String[] { "title", "content" }; - for (String st : allFd) { - if (!st.equals(fd)) { - return st; - } - } - return null; - } - - private static List getDistanceObject(ExpObject obj1, ExpObject obj2, int distance) { - if (null == obj1 || null == obj2) { - logger.error("empty ExpObject found..."); - return null; - } - List list = new ArrayList<>(); - List tk1 = obj1.getTokens(); - List tk2 = obj2.getTokens(); - if (tk1.isEmpty() && tk2.isEmpty()) { - String exp1 = obj1.getExpression().trim(); - String exp2 = obj2.getExpression().trim(); - List tks = new ArrayList<>(); - tks.add(exp1); - tks.add(exp2); - ExpObject obj = new ExpObject(exp1 + " " + exp2, distance, tks); - obj.setFields(getExcludeField(obj1.getFields(), obj2.getFields())); - list.add(obj); - return list; - } else if (tk1.isEmpty()) {// dis2>0 - list.add(obj2); - String tk0 = obj1.getExpression(); - for (String tk : obj2.getTokens()) { - List tks = new ArrayList<>(); - tks.add(tk0); - tks.add(tk); - ExpObject rt = new ExpObject(tk0 + " " + tk, distance, tks); - rt.setFields(getExcludeField(obj1.getFields(), obj2.getFields())); - list.add(rt); - } - return list; - } else if (tk2.isEmpty()) {// dis1>0 - list.add(obj1); - String tk0 = obj2.getExpression(); - for (String tk : obj1.getTokens()) { - List tks = new ArrayList<>(); - tks.add(tk); - tks.add(tk0); - ExpObject rt = new ExpObject(tk + " " + tk0, distance, tks); - rt.setFields(getExcludeField(obj1.getFields(), obj2.getFields())); - list.add(rt); - } - return list; - } else {// neither is empty(both size>1) - list.add(obj1); - list.add(obj2); - for (String tk : obj1.getTokens()) { - for (String tkk : obj2.getTokens()) { - List tks = new ArrayList<>(); - tks.add(tk); - tks.add(tkk); - ExpObject rt = new ExpObject(tk + " " + tkk, distance, tks); - rt.setFields(getExcludeField(obj1.getFields(), obj2.getFields())); - list.add(rt); - } - } - } - return list; - } - - private static void visitNodeValue(Object obj, int order) { - if (obj instanceof ExpObject) { - // logger.info("order=" + order + ",ExpObject{" + ((ExpObject) - // obj).toString() + "}"); - System.out.print("order=" + order + ",ExpObject{" + ((ExpObject) obj).toString() + "}\n"); - } else if (obj instanceof DistanceNode) { - // logger.info("order=" + order + ",DistanceNode{" + ((DistanceNode) - // obj).toString() + "}"); - System.out.print("order=" + order + ",DistanceNode{" + ((DistanceNode) obj).toString() + "}\n"); - } else if (obj instanceof OperaObject) { - // logger.info("order=" + order + ",OperaObject{" + ((OperaObject) - // obj).toString() + "}"); - System.out.print("order=" + order + ",OperaObject{" + ((OperaObject) obj).toString() + "}\n"); - } else { - // logger.error("wrong type for order=" + order); - System.out.print("wrong type for order=" + order + "\n"); - } - } -} diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/util/subject/ExpressionParser.java b/cl_search_api/src/main/java/com/bfd/mf/common/util/subject/ExpressionParser.java deleted file mode 100644 index 5b5d15d..0000000 --- a/cl_search_api/src/main/java/com/bfd/mf/common/util/subject/ExpressionParser.java +++ /dev/null @@ -1,911 +0,0 @@ -package com.bfd.mf.common.util.subject; - -import com.bfd.mf.common.util.constants.ESConstant; -import com.bfd.mf.common.util.OperatorUtil; -import com.bfd.mf.common.web.vo.params.expression.ExpObject; -import com.bfd.mf.common.web.vo.params.expression.HalfQuartChar; -import org.elasticsearch.common.Strings; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.*; - -public class ExpressionParser { - private static final Logger logger = LoggerFactory.getLogger(ExpressionParser.class); - - /** - * 表达式预处理 - */ - public static Vector preTreatExpression(String exp, String[] fields) { - if (Strings.isNullOrEmpty(exp)) { - return null; - } - exp = exp.trim(); - Vector v = new Vector(); - Stack left = new Stack<>();// '<' - Stack leftQ = new Stack<>();// '(' - Stack leftS = new Stack<>();// “ 或" - int begin = 0, end = 0; - while (end < exp.length()) { - char c = exp.charAt(end); - // 处理<>内部空格====begin - if (c == '<' || c == '《') { - left.push(new HalfQuartChar(c, end)); - } else if (c == '》' || c == '>') { - if (!left.isEmpty()) { - left.pop(); - } - } else if (c == '(' || c == '(') { - leftQ.push(new HalfQuartChar(c, end)); - } else if (c == ')' || c == ')') { - if (!leftQ.isEmpty()) { - leftQ.pop(); - } - } else if (c == '"' || c == '“') { - leftS.push(new HalfQuartChar(c, end)); - } else if (c == '"' || c == '”') { - if (!leftS.isEmpty()) { - leftS.pop(); - } - } - // 可能是<(A) (B)>,(A)和(B)之间的空格 - if (c == ' ' && leftS.isEmpty()) {// 没有在“”内部 - if (!left.isEmpty() && end - left.peek().getPosition() > 1) { - if (end > 0 && end < exp.length() - 1) { - char prev = exp.charAt(end - 1); - char next = exp.charAt(end + 1); - if (isCharWaitSepLeft(prev) && isCharWaitSepRight(next)) { - c = '*';// 替换成*,便于后续处理 - } - } - } - } - // 处理<>内部空格===end - // find next operator - if (!OperatorUtil.isSeperator(c)) { - end++; - continue; - } else if (end > 0 && exp.charAt(end - 1) == '\\' && OperatorUtil.isOperator(c)) {// 转义符 - exp = exp.replace(exp.charAt(end - 1), ' ');// 转义符用空格代替 - end++; - continue; - } - // to-do:title 运算符转义 - if (end > begin) { - String st = exp.substring(begin, end).trim(); - if (!Strings.isNullOrEmpty(st)) { - v.add(st); - } - } - if (c != ',' && c != ',' && c != '"' && c != '“' && c != '”') { - v.add(new Character(c)); - } else { - logger.warn("miss ',' at:{}", end); - } - end++; - begin = end; - } - if (end == exp.length() && begin <= end - 1) { - if (begin == end - 1 && OperatorUtil.isSeperator(exp.charAt(begin))) {// last - // op - v.add(new Character(exp.charAt(begin))); - } else { - String sub = exp.substring(begin).trim(); - if (!Strings.isNullOrEmpty(sub)) { - v.add(sub); - } - } - } - // 遍历分隔结束,开始处理特殊字符 - for (int i = 0; i < v.size(); i++) { - Object o = v.get(i); - if ((o instanceof String && ((String) o).trim().equals("")) - || o instanceof Character && ((char) o == '\u0000' || (char) o == '\n' || (char) o == '\t')) { - // v.remove(i); - } else if (i > 0 && i < v.size() - 1) { - if (o instanceof String && ((String) o).trim().contains(" ")) { - Object prev = v.get(i - 1); - Object next = v.get(i + 1); - if (prev instanceof Character && (char) prev == '<' && next instanceof Character - && (char) next == '>') { - String[] arr = ((String) o).split(" "); - if (null != arr && arr.length == 2) { - v.set(i, arr[0]); - v.add(i, '*');// 《》中间空格用*代替,便于后续处理 - v.add(i, arr[1]); - } - } - } - } - } - // 处理~10类型 - for (int j = 0; j < v.size(); j++) { - Object obj = v.get(j); - if (obj instanceof String) { - if (j > 0) { - Object prv = v.get(j - 1); - // 处理~10类型后面的数字 - if (prv instanceof Character && (char) prv == '~') { - Integer va = Integer.parseInt((String) obj); - if (null != va) { - v.set(j, va); - } - continue; - } - } - // title:表达式 - ExpObject ebj = null; - if (((String) obj).equals("title:") || ((String) obj).equals("content:")) { - v.remove(j); - if (j < v.size() - 1) { - Object next = v.get(j);// next now is j - if (next instanceof Character && (char) next == '(' || (char) next == '(') { - Stack stk = new Stack(); - stk.push(new HalfQuartChar('(', j)); - for (int k = j + 1; k < v.size(); k++) { - Object kv = v.get(k); - if (kv instanceof Character) { - if ((char) kv == '(' || (char) kv == '(') { - stk.push(new HalfQuartChar('(', k)); - } else if ((char) kv == ')' || (char) kv == ')') { - if (!stk.isEmpty()) { - stk.pop(); - } - if (stk.isEmpty()) { - break; - } - } - } else if (kv instanceof String) { - if (((String) obj).equals("title:")) { - ebj = new ExpObject((String) kv, new String[]{"title"}); - } else { - ebj = new ExpObject((String) kv, fields); - } - v.set(k, ebj); - } - } - } - } - } else if (((String) obj).startsWith("title:") || ((String) obj).startsWith("content:")) { - if (((String) obj).startsWith("title:")) { - String value = ((String) obj).substring("title:".length()); - ebj = new ExpObject((String) value, new String[]{"title"}); - } else { - String value = ((String) obj).substring("content:".length()); - ebj = new ExpObject((String) value, fields); - } - v.set(j, ebj); - } else { - List fieldsNew = new ArrayList<>(); - fieldsNew.add(ESConstant.TITLE); - List fieldsList = Arrays.asList(fields); - fieldsNew.addAll(fieldsList); - ebj = new ExpObject((String) obj, fieldsNew.toArray(new String[fieldsNew.size()])); - v.set(j, ebj); - } - } - } - return v; - } - - /** - * 表达式预处理 不自动添加title属性 - */ - public static Vector preTreatExpressionNoTitleField(String exp, String[] fields) { - if (Strings.isNullOrEmpty(exp)) { - return null; - } - exp = exp.trim(); - Vector v = new Vector(); - Stack left = new Stack<>();// '<' - Stack leftQ = new Stack<>();// '(' - Stack leftS = new Stack<>();// “ 或" - int begin = 0, end = 0; - while (end < exp.length()) { - char c = exp.charAt(end); - // 处理<>内部空格====begin - if (c == '<' || c == '《') { - left.push(new HalfQuartChar(c, end)); - } else if (c == '》' || c == '>') { - if (!left.isEmpty()) { - left.pop(); - } - } else if (c == '(' || c == '(') { - leftQ.push(new HalfQuartChar(c, end)); - } else if (c == ')' || c == ')') { - if (!leftQ.isEmpty()) { - leftQ.pop(); - } - } else if (c == '"' || c == '“') { - leftS.push(new HalfQuartChar(c, end)); - } else if (c == '"' || c == '”') { - if (!leftS.isEmpty()) { - leftS.pop(); - } - } - // 可能是<(A) (B)>,(A)和(B)之间的空格 - if (c == ' ' && leftS.isEmpty()) {// 没有在“”内部 - if (!left.isEmpty() && end - left.peek().getPosition() > 1) { - if (end > 0 && end < exp.length() - 1) { - char prev = exp.charAt(end - 1); - char next = exp.charAt(end + 1); - if (isCharWaitSepLeft(prev) && isCharWaitSepRight(next)) { - c = '*';// 替换成*,便于后续处理 - } - } - } - } - // 处理<>内部空格===end - // find next operator - if (!OperatorUtil.isSeperator(c)) { - end++; - continue; - } else if (end > 0 && exp.charAt(end - 1) == '\\' && OperatorUtil.isOperator(c)) {// 转义符 - exp = exp.replace(exp.charAt(end - 1), ' ');// 转义符用空格代替 - end++; - continue; - } - // to-do:title 运算符转义 - if (end > begin) { - String st = exp.substring(begin, end).trim(); - if (!Strings.isNullOrEmpty(st)) { - v.add(st); - } - } - if (c != ',' && c != ',' && c != '"' && c != '“' && c != '”') { - v.add(new Character(c)); - } else { - logger.warn("miss ',' at:{}", end); - } - end++; - begin = end; - } - if (end == exp.length() && begin <= end - 1) { - if (begin == end - 1 && OperatorUtil.isSeperator(exp.charAt(begin))) {// last - // op - v.add(new Character(exp.charAt(begin))); - } else { - String sub = exp.substring(begin).trim(); - if (!Strings.isNullOrEmpty(sub)) { - v.add(sub); - } - } - } - // 遍历分隔结束,开始处理特殊字符 - for (int i = 0; i < v.size(); i++) { - Object o = v.get(i); - if ((o instanceof String && ((String) o).trim().equals("")) - || o instanceof Character && ((char) o == '\u0000' || (char) o == '\n' || (char) o == '\t')) { - // v.remove(i); - } else if (i > 0 && i < v.size() - 1) { - if (o instanceof String && ((String) o).trim().contains(" ")) { - Object prev = v.get(i - 1); - Object next = v.get(i + 1); - if (prev instanceof Character && (char) prev == '<' && next instanceof Character - && (char) next == '>') { - String[] arr = ((String) o).split(" "); - if (null != arr && arr.length == 2) { - v.set(i, arr[0]); - v.add(i, '*');// 《》中间空格用*代替,便于后续处理 - v.add(i, arr[1]); - } - } - } - } - } - // 处理~10类型 - for (int j = 0; j < v.size(); j++) { - Object obj = v.get(j); - if (obj instanceof String) { - if (j > 0) { - Object prv = v.get(j - 1); - // 处理~10类型后面的数字 - if (prv instanceof Character && (char) prv == '~') { - Integer va = Integer.parseInt((String) obj); - if (null != va) { - v.set(j, va); - } - continue; - } - } - // title:表达式 - ExpObject ebj = null; - if (((String) obj).equals("title:") || ((String) obj).equals("content:")) { - v.remove(j); - if (j < v.size() - 1) { - Object next = v.get(j);// next now is j - if (next instanceof Character && (char) next == '(' || (char) next == '(') { - Stack stk = new Stack(); - stk.push(new HalfQuartChar('(', j)); - for (int k = j + 1; k < v.size(); k++) { - Object kv = v.get(k); - if (kv instanceof Character) { - if ((char) kv == '(' || (char) kv == '(') { - stk.push(new HalfQuartChar('(', k)); - } else if ((char) kv == ')' || (char) kv == ')') { - if (!stk.isEmpty()) { - stk.pop(); - } - if (stk.isEmpty()) { - break; - } - } - } else if (kv instanceof String) { - if (((String) obj).equals("title:")) { - ebj = new ExpObject((String) kv, new String[]{"title"}); - } else { - ebj = new ExpObject((String) kv, fields); - } - v.set(k, ebj); - } - } - } - } - } else if (((String) obj).startsWith("title:") || ((String) obj).startsWith("content:")) { - if (((String) obj).startsWith("title:")) { - String value = ((String) obj).substring("title:".length()); - ebj = new ExpObject((String) value, new String[]{"title"}); - } else { - String value = ((String) obj).substring("content:".length()); - ebj = new ExpObject((String) value, fields); - } - v.set(j, ebj); - } else { - List fieldsNew = new ArrayList<>(); - //fieldsNew.add(ESConstant.TITLE); - List fieldsList = Arrays.asList(fields); - fieldsNew.addAll(fieldsList); - ebj = new ExpObject((String) obj, fieldsNew.toArray(new String[fieldsNew.size()])); - v.set(j, ebj); - } - } - } - return v; - } - - public static Vector preProcessExpression(String exp, boolean onlyKeyWord, String[] fields) throws Exception { - if (Strings.isNullOrEmpty(exp)) { - return null; - } - - exp = exp.trim(); - String[] arr = exp.split(" "); - Vector v = new Vector<>(); - int quartNum = 0; - for (String st : arr) { - st = st.trim(); - if (Strings.isNullOrEmpty(st)) { - continue; - } - int begin = 0, end = 0; - while (end < st.length()) { - char c = st.charAt(end); - // find next operator - if (!OperatorUtil.isSeperator(c)) { - end++; - continue; - } else { // c为分隔符 - if (end > 0 && st.charAt(end - 1) == '\\') {// 转义符 - st = st.replace(st.charAt(end - 1), ' ');// 转义符用空格代替 - end++; - continue; - } else if (c == '“' || (c == '"' && quartNum % 2 == 0)) { - quartNum++; - } else if (c == '”' || (c == '"' && quartNum % 2 == 1)) { - quartNum--; - } else if (quartNum > 0) { - end++; - continue; - } - } - if (end > begin) { - String st0 = st.substring(begin, end).trim(); - if (!Strings.isNullOrEmpty(st0)) { - v.add(st0); - } - } - if (c != ',' && c != ',') { - v.add(new Character(c)); - } else { - logger.warn("miss ',' at:{}", end); - } - end++; - begin = end; - } - if (end == st.length() && begin <= end - 1) { - if (begin == end - 1 && OperatorUtil.isSeperator(st.charAt(begin)) && quartNum == 0) { - v.add(new Character(st.charAt(begin))); - } else { - String sub = st.substring(begin).trim(); - if (!Strings.isNullOrEmpty(sub)) { - v.add(sub); - } - } - } - } - logger.debug("after split,vector now is:{}",v); - visitVector(v); - logger.info("=====================endLess loop============="); - logger.info("keyWord is:{}", exp); - v = checkAndFormatVector(v, onlyKeyWord, fields); - logger.debug("after checkAndFormatVector,vector now is:{}",v); - visitVector(v); - return v; - } - - public static Vector preProcessExpressionNoTitleField(String exp, boolean onlyKeyWord, String[] fields) throws Exception { - if (Strings.isNullOrEmpty(exp)) { - return null; - } - - exp = exp.trim(); - String[] arr = exp.split(" "); - Vector v = new Vector<>(); - int quartNum = 0; - for (String st : arr) { - st = st.trim(); - if (Strings.isNullOrEmpty(st)) { - continue; - } - int begin = 0, end = 0; - while (end < st.length()) { - char c = st.charAt(end); - // find next operator - if (!OperatorUtil.isSeperator(c)) { - end++; - continue; - } else { // c为分隔符 - if (end > 0 && st.charAt(end - 1) == '\\') {// 转义符 - st = st.replace(st.charAt(end - 1), ' ');// 转义符用空格代替 - end++; - continue; - } else if (c == '“' || (c == '"' && quartNum % 2 == 0)) { - quartNum++; - } else if (c == '”' || (c == '"' && quartNum % 2 == 1)) { - quartNum--; - } else if (quartNum > 0) { - end++; - continue; - } - } - if (end > begin) { - String st0 = st.substring(begin, end).trim(); - if (!Strings.isNullOrEmpty(st0)) { - v.add(st0); - } - } - if (c != ',' && c != ',') { - v.add(new Character(c)); - } else { - logger.warn("miss ',' at:{}", end); - } - end++; - begin = end; - } - if (end == st.length() && begin <= end - 1) { - if (begin == end - 1 && OperatorUtil.isSeperator(st.charAt(begin)) && quartNum == 0) { - v.add(new Character(st.charAt(begin))); - } else { - String sub = st.substring(begin).trim(); - if (!Strings.isNullOrEmpty(sub)) { - v.add(sub); - } - } - } - } - logger.debug("after split,vector now is:{}",v); - visitVector(v); - logger.info("=====================endLess loop============="); - logger.info("keyWord is:{}", exp); - v = checkAndFormatVectorNoTitleField(v, onlyKeyWord, fields); - logger.debug("after checkAndFormatVector,vector now is:{}",v); - visitVector(v); - return v; - } - - private static void visitVector(Vector vector) { - if (null == vector || vector.isEmpty()) { - logger.debug("vector is empty!"); - return; - } - for (int i = 0; i < vector.size(); i++) { - Object obj = vector.get(i); - logger.debug("i=" + i + ",value=" + obj); - } - return; - } - - private static Vector checkAndFormatVector(Vector vector, boolean onlyKeyWord, String[] fields) throws Exception { - if (null == vector || vector.isEmpty()) { - return null; - } - logger.warn("[checkAndFormatVector] fields: {}{}", fields); - List list = new ArrayList(); - for (int i = 0; i < vector.size(); i++) { - list.add(vector.get(i)); - } - logger.info("vector list is:{}", list); - Stack leftS = new Stack();// 存放“” - Stack leftQ = new Stack();// 存放<> - int qNum = 0;// <>数目 - int starNum = 0;// <>内部分隔空格个数 - for (int i = 0; i < vector.size(); i++) { - Object ob = vector.get(i); - // <>内部 - if (!leftQ.isEmpty() && leftS.isEmpty()) { - if (i > 0) { - if (isLeftWaitSeperate(vector.get(i - 1)) && isRightWaitSeperate(vector.get(i))) { - vector.insertElementAt(new Character('*'), i); - starNum++; - continue; - } else if (i > 1 && isLeftWaitSeperate(vector.get(i - 2)) - && isRightWaitSeperate(vector.get(i - 1))) { - vector.insertElementAt(new Character('*'), i - 1); - starNum++; - continue; - } - } - } - if (ob instanceof Character) { - if ((char) ob == '“' || ((char) ob == '"' && leftS.size() % 2 == 0)) { - leftS.push(new HalfQuartChar('“', i)); - vector.remove(ob); - i--; - } else if ((char) ob == '”' || ((char) ob == '"' && leftS.size() % 2 == 1)) { - if (!leftS.isEmpty()) { - leftS.pop(); - } else { - throw new Exception(" “” not match Exception,“ expected "); - } - vector.remove(ob); - i--; - } else if ((char) ob == '<' || (char) ob == '《') { - leftQ.push(new HalfQuartChar('<', i)); - qNum++; - } else if ((char) ob == '>' || (char) ob == '》') { - if (!leftQ.isEmpty()) { - leftQ.pop(); - } else { - throw new Exception("> not match Exception,< expected "); - } - } - } else if (ob instanceof String) { - if (i > 0) { - Object prv = vector.get(i - 1); - if (prv instanceof String && onlyKeyWord) {// 连续String - String ns = null; - if (!leftS.isEmpty()) {// “”内部连续String=>合并 - ns = ((String) prv).trim() + " " + ((String) ob).trim(); - } else if (leftQ.isEmpty()) {// 不在<>内部 - ns = ((String) prv).trim() + ((String) ob).trim(); - } - if (!Strings.isNullOrEmpty(ns)) { - vector.set(i - 1, ns); - vector.remove(i--); - } - } - } - } - } - // “”不匹配 - if (!leftS.isEmpty()) { - throw new Exception(" “” not match Exception!"); - } else if (!leftQ.isEmpty()) { - throw new Exception("<> not match Exception!"); - } else if (starNum != qNum) { - throw new Exception("<> phrase Exception,riht format should like: ~10"); - } - // 封装成OP或者ExpObject - for (int j = 0; j < vector.size(); j++) { - Object obj = vector.get(j); - if (obj instanceof String) { - if (j > 0) { - Object prv = vector.get(j - 1); - // 处理~10类型后面的数字 - if (prv instanceof Character && (char) prv == '~') { - Integer va = Integer.parseInt((String) obj); - if (null != va) { - vector.set(j, va); - } - continue; - } - } - // title:表达式 - ExpObject ebj = null; - if (((String) obj).equals("title:") || ((String) obj).equals("content:")) { - vector.remove(j); - if (j < vector.size() - 1) { - Object next = vector.get(j);// next now is j - if (next instanceof Character && (char) next == '(' || (char) next == '(') { - Stack stk = new Stack(); - stk.push(new HalfQuartChar('(', j)); - for (int k = j + 1; k < vector.size(); k++) { - Object kv = vector.get(k); - if (kv instanceof Character) { - if ((char) kv == '(' || (char) kv == '(') { - stk.push(new HalfQuartChar('(', k)); - } else if ((char) kv == ')' || (char) kv == ')') { - if (!stk.isEmpty()) { - stk.pop(); - } - if (stk.isEmpty()) { - break; - } - } - } else if (kv instanceof String) {// 括号内部普通字符串 - // 类似 title:(A|title:B)==》去掉title:B之前的title: - while (((String) kv).startsWith("title:") || ((String) kv).startsWith("content:")) { - int begin = ((String) kv).indexOf(':'); - kv = ((String) kv).substring(begin + 1).trim(); - } - if (((String) kv).startsWith("\\title:")// 使用转义符==>当做普通字符串处理 - || ((String) kv).startsWith("\\content:")) { - kv = ((String) kv).substring(2).trim(); - } - if (((String) obj).equals("title:")) { - ebj = new ExpObject((String) kv, new String[]{"title"}); - } else { - ebj = new ExpObject((String) kv, fields); - } - vector.set(k, ebj); - } - } - } - } - } else if (((String) obj).startsWith("title:") || ((String) obj).startsWith("content:")) { - String value = (String) obj; - while (value.startsWith("title:") || value.startsWith("content:")) { - value = value.substring(value.indexOf(":") + 1); - } - if (((String) obj).startsWith("title:")) { - ebj = new ExpObject((String) value, new String[]{"title"}); - } else { - ebj = new ExpObject((String) value, fields); - } - vector.set(j, ebj); - } else {// 普通字符串 - if (((String) obj).startsWith("\\title:")// 使用转义符==>当做普通字符串处理 - || ((String) obj).startsWith("\\content:")) { - obj = ((String) obj).substring(2).trim(); - } - List fieldsNew = new ArrayList<>(); - fieldsNew.add(ESConstant.TITLE); - List fieldsList = Arrays.asList(fields); - fieldsNew.addAll(fieldsList); - ebj = new ExpObject((String) obj, fieldsNew.toArray(new String[fieldsNew.size()])); - vector.set(j, ebj); - } - } - } - return vector; - } - - private static Vector checkAndFormatVectorNoTitleField(Vector vector, boolean onlyKeyWord, String[] fields) throws Exception { - if (null == vector || vector.isEmpty()) { - return null; - } - logger.warn("[checkAndFormatVector] fields: {}{}", fields); - List list = new ArrayList(); - for (int i = 0; i < vector.size(); i++) { - list.add(vector.get(i)); - } - logger.info("vector list is:{}", list); - Stack leftS = new Stack();// 存放“” - Stack leftQ = new Stack();// 存放<> - int qNum = 0;// <>数目 - int starNum = 0;// <>内部分隔空格个数 - for (int i = 0; i < vector.size(); i++) { - Object ob = vector.get(i); - // <>内部 - if (!leftQ.isEmpty() && leftS.isEmpty()) { - if (i > 0) { - if (isLeftWaitSeperate(vector.get(i - 1)) && isRightWaitSeperate(vector.get(i))) { - vector.insertElementAt(new Character('*'), i); - starNum++; - continue; - } else if (i > 1 && isLeftWaitSeperate(vector.get(i - 2)) - && isRightWaitSeperate(vector.get(i - 1))) { - vector.insertElementAt(new Character('*'), i - 1); - starNum++; - continue; - } - } - } - if (ob instanceof Character) { - if ((char) ob == '“' || ((char) ob == '"' && leftS.size() % 2 == 0)) { - leftS.push(new HalfQuartChar('“', i)); - vector.remove(ob); - i--; - } else if ((char) ob == '”' || ((char) ob == '"' && leftS.size() % 2 == 1)) { - if (!leftS.isEmpty()) { - leftS.pop(); - } else { - throw new Exception(" “” not match Exception,“ expected "); - } - vector.remove(ob); - i--; - } else if ((char) ob == '<' || (char) ob == '《') { - leftQ.push(new HalfQuartChar('<', i)); - qNum++; - } else if ((char) ob == '>' || (char) ob == '》') { - if (!leftQ.isEmpty()) { - leftQ.pop(); - } else { - throw new Exception("> not match Exception,< expected "); - } - } - } else if (ob instanceof String) { - if (i > 0) { - Object prv = vector.get(i - 1); - if (prv instanceof String && onlyKeyWord) {// 连续String - String ns = null; - if (!leftS.isEmpty()) {// “”内部连续String=>合并 - ns = ((String) prv).trim() + " " + ((String) ob).trim(); - } else if (leftQ.isEmpty()) {// 不在<>内部 - ns = ((String) prv).trim() + ((String) ob).trim(); - } - if (!Strings.isNullOrEmpty(ns)) { - vector.set(i - 1, ns); - vector.remove(i--); - } - } - } - } - } - // “”不匹配 - if (!leftS.isEmpty()) { - throw new Exception(" “” not match Exception!"); - } else if (!leftQ.isEmpty()) { - throw new Exception("<> not match Exception!"); - } else if (starNum != qNum) { - throw new Exception("<> phrase Exception,riht format should like: ~10"); - } - // 封装成OP或者ExpObject - for (int j = 0; j < vector.size(); j++) { - Object obj = vector.get(j); - if (obj instanceof String) { - if (j > 0) { - Object prv = vector.get(j - 1); - // 处理~10类型后面的数字 - if (prv instanceof Character && (char) prv == '~') { - Integer va = Integer.parseInt((String) obj); - if (null != va) { - vector.set(j, va); - } - continue; - } - } - // title:表达式 - ExpObject ebj = null; - if (((String) obj).equals("title:") || ((String) obj).equals("content:")) { - vector.remove(j); - if (j < vector.size() - 1) { - Object next = vector.get(j);// next now is j - if (next instanceof Character && (char) next == '(' || (char) next == '(') { - Stack stk = new Stack(); - stk.push(new HalfQuartChar('(', j)); - for (int k = j + 1; k < vector.size(); k++) { - Object kv = vector.get(k); - if (kv instanceof Character) { - if ((char) kv == '(' || (char) kv == '(') { - stk.push(new HalfQuartChar('(', k)); - } else if ((char) kv == ')' || (char) kv == ')') { - if (!stk.isEmpty()) { - stk.pop(); - } - if (stk.isEmpty()) { - break; - } - } - } else if (kv instanceof String) {// 括号内部普通字符串 - // 类似 title:(A|title:B)==》去掉title:B之前的title: - while (((String) kv).startsWith("title:") || ((String) kv).startsWith("content:")) { - int begin = ((String) kv).indexOf(':'); - kv = ((String) kv).substring(begin + 1).trim(); - } - if (((String) kv).startsWith("\\title:")// 使用转义符==>当做普通字符串处理 - || ((String) kv).startsWith("\\content:")) { - kv = ((String) kv).substring(2).trim(); - } - if (((String) obj).equals("title:")) { - ebj = new ExpObject((String) kv, new String[]{"title"}); - } else { - ebj = new ExpObject((String) kv, fields); - } - vector.set(k, ebj); - } - } - } - } - } else if (((String) obj).startsWith("title:") || ((String) obj).startsWith("content:")) { - String value = (String) obj; - while (value.startsWith("title:") || value.startsWith("content:")) { - value = value.substring(value.indexOf(":") + 1); - } - if (((String) obj).startsWith("title:")) { - ebj = new ExpObject((String) value, new String[]{"title"}); - } else { - ebj = new ExpObject((String) value, fields); - } - vector.set(j, ebj); - } else {// 普通字符串 - if (((String) obj).startsWith("\\title:")// 使用转义符==>当做普通字符串处理 - || ((String) obj).startsWith("\\content:")) { - obj = ((String) obj).substring(2).trim(); - } - List fieldsNew = new ArrayList<>(); - //fieldsNew.add(ESConstant.TITLE); - List fieldsList = Arrays.asList(fields); - fieldsNew.addAll(fieldsList); - ebj = new ExpObject((String) obj, fieldsNew.toArray(new String[fieldsNew.size()])); - vector.set(j, ebj); - } - } - } - return vector; - } - - /** - * 处理消歧语句表达式 - */ - public static HashSet handleMinusExpression(String minus, String[] fields) throws Exception { - if (Strings.isNullOrEmpty(minus)) { - return null; - } - minus = minus.replaceAll(",", ","); - minus = minus.replaceAll(" ", " "); - Vector v = preProcessExpression(minus, false, fields); - HashSet set = new HashSet(); - if (null != v && !v.isEmpty()) { - for (Object obj : v) { - if (obj instanceof ExpObject) { - set.add((ExpObject) obj); - } - } - } - return set; - } - - private static boolean isCharWaitSepLeft(char charC) { - if (!OperatorUtil.isSeperator(charC) || charC == ')' || charC == ')' || charC == '>' || charC == '》' - || charC == '"' || charC == '”') { - return true; - } - return false; - } - - private static boolean isCharWaitSepRight(char charC) { - if (!OperatorUtil.isSeperator(charC) || charC == '(' || charC == '(' || charC == '<' || charC == '《' - || charC == '"' || charC == '“') { - return true; - } - return false; - } - - private static boolean isLeftWaitSeperate(Object obj) { - if (null == obj) { - return false; - } - if (obj instanceof Character) { - return (char) obj == ')' || (char) obj == ')' || (char) obj == '"' || (char) obj == '”'; - } else if (obj instanceof String) { - return !Strings.isNullOrEmpty((String) obj); - } else if (obj instanceof Integer) {// ~20 - return true; - } - return false; - } - - private static boolean isRightWaitSeperate(Object obj) { - if (null == obj) { - return false; - } - if (obj instanceof Character) { - return (char) obj == '(' || (char) obj == '(' || (char) obj == '<' || (char) obj == '《' || (char) obj == '"' - || (char) obj == '“'; - } else if (obj instanceof String) { - return !Strings.isNullOrEmpty((String) obj); - } - return false; - } -} \ No newline at end of file diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/util/subject/QueryBuilderUtil.java b/cl_search_api/src/main/java/com/bfd/mf/common/util/subject/QueryBuilderUtil.java deleted file mode 100644 index 2f37e2a..0000000 --- a/cl_search_api/src/main/java/com/bfd/mf/common/util/subject/QueryBuilderUtil.java +++ /dev/null @@ -1,669 +0,0 @@ -package com.bfd.mf.common.util.subject; - -import com.alibaba.fastjson.JSONArray; -import com.alibaba.fastjson.JSONObject; -import com.bfd.mf.common.util.constants.ESConstant; -import com.bfd.mf.common.util.ESServerUtils; -import com.bfd.mf.common.web.repository.mysql.cache.ClusterRepository; -import com.bfd.mf.common.web.vo.params.expression.ExpNode; -import com.bfd.mf.common.web.vo.params.expression.ExpObject; -import com.bfd.nlp.common.util.object.TObjectUtils; -import com.bfd.nlp.common.util.string.TStringUtils; -import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse; -import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse.AnalyzeToken; -import org.elasticsearch.client.IndicesAdminClient; -import org.elasticsearch.common.Strings; -import org.elasticsearch.index.query.*; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.springframework.beans.factory.annotation.Autowired; - -import java.util.*; - -import static com.bfd.mf.common.util.constants.ESConstant.EXCLUDE_KEYWORDS_SPLIT_CN_SYSBOL; -import static com.bfd.mf.common.util.constants.ESConstant.EXCLUDE_KEYWORDS_SPLIT_SYSBOL; - - -public class QueryBuilderUtil { - private static final Logger logger = LoggerFactory.getLogger(QueryBuilderUtil.class); - - @Autowired - private ClusterRepository clusterRepository; - - public static BoolQueryBuilder getQueryBuilderFromExpNodeTree(ExpNode root, HashSet set) throws Exception { - if (null == root || null == root.getValue()) { - return null; - } - /* - * @2016-12-7 - * @author lei.bao - * format之后: - * 1:distance运算拆成全部单个(左右子树全部是ExpObject) - * 2:所有ExpObject都是叶子节点且规范 - */ - ExpNode rst = ExpNodeUtil.visitAndFormatTree(root, set); - // - Object rt = ExpNodeUtil.ComputeNodeTree(rst, set); - - if (rt instanceof BoolQueryBuilder) { - return (BoolQueryBuilder) rt; - } else if (rt instanceof MatchQueryBuilder) { - BoolQueryBuilder bq = QueryBuilders.boolQuery(); - return bq.must((MatchQueryBuilder) rt); - }else if(rt instanceof MatchPhraseQueryBuilder){ - BoolQueryBuilder bq = QueryBuilders.boolQuery(); - return bq.must((MatchPhraseQueryBuilder) rt); - }else if(rt instanceof AbstractQueryBuilder){ - BoolQueryBuilder bq = QueryBuilders.boolQuery(); - return bq.must((AbstractQueryBuilder) rt); - } - return null; - } - - public static BoolQueryBuilder handleExcludeExpression(String exclude, String[] fields) { - Vector v = ExpressionParser.preTreatExpression(exclude, fields); - if (null != v && !v.isEmpty()) { - BoolQueryBuilder qb = QueryBuilders.boolQuery(); - for (Object obj : v) { - if (obj instanceof ExpObject) { - BoolQueryBuilder innerQb = QueryBuilders.boolQuery(); - for (String fd : ((ExpObject) obj).getFields()) { - innerQb.should(QueryBuilders.matchPhraseQuery(fd, ((ExpObject) obj).getExpression()).slop(0)); - } - qb.mustNot(innerQb); - } - } - return qb; - } - return null; - } - - public static BoolQueryBuilder handleExcludeExpressionNoTitleField(String exclude, String[] fields) { - Vector v = ExpressionParser.preTreatExpressionNoTitleField(exclude, fields); - if (null != v && !v.isEmpty()) { - BoolQueryBuilder qb = QueryBuilders.boolQuery(); - for (Object obj : v) { - if (obj instanceof ExpObject) { - BoolQueryBuilder innerQb = QueryBuilders.boolQuery(); - for (String fd : ((ExpObject) obj).getFields()) { - innerQb.should(QueryBuilders.matchPhraseQuery(fd, ((ExpObject) obj).getExpression()).slop(0)); - } - qb.mustNot(innerQb); - } - } - return qb; - } - return null; - } - - public static BoolQueryBuilder handleTitleExcludeExpression(String exclude) { - if (TStringUtils.isNotEmpty(exclude)) { - // 标点符号修正 - exclude = exclude.replaceAll(EXCLUDE_KEYWORDS_SPLIT_CN_SYSBOL, - EXCLUDE_KEYWORDS_SPLIT_SYSBOL); - exclude = "title:(" + exclude + ")"; - Vector v = ExpressionParser.preTreatExpression(exclude, new String[]{ESConstant.TITLE}); - BoolQueryBuilder qb = QueryBuilders.boolQuery(); - BoolQueryBuilder titleExcludeBuilder = null; - if (null != v && !v.isEmpty()) { - titleExcludeBuilder = QueryBuilders.boolQuery(); - for (Object obj : v) { - if (obj instanceof ExpObject) { - QueryBuilder wordQueryBuilder = QueryBuilders.matchPhraseQuery(ESConstant.SEARCH_SCOPE_TYPE_TITLE, - ((ExpObject) obj).getExpression()).slop(0); - titleExcludeBuilder.should(wordQueryBuilder); - } - } - qb.mustNot(titleExcludeBuilder); - return qb; - } - } - return null; - } - - public static List getSpanNotQueryBuilder(String field, String shortStr, String longerStr) { - if (Strings.isNullOrEmpty(shortStr) || Strings.isNullOrEmpty(longerStr)) { - logger.error("empty short term or longer term,return NULL"); - return null; - } - int start = longerStr.indexOf(shortStr); - if (start < 0 || start >= longerStr.length()) { - return null; - } - int lenS = shortStr.length(); - if ((lenS + start) > longerStr.length()) { - return null; - } - List subs = new ArrayList(); - if (start > 0) { - subs.add(longerStr.substring(0, start)); - } - subs.add(longerStr.substring(start, start + lenS)); - if ((start + lenS) < longerStr.length()) { - subs.add(longerStr.substring(start + lenS)); - } - // call API to get include clause - AnalyzeResult anaLyRst = getAnalyzeResult(shortStr); - if (null == anaLyRst) { - return null; - } - List rst = anaLyRst.getList(); - // include clause - List spans = new ArrayList<>(); - SpanNearQueryBuilder span; - if (null != rst && rst.size() > 1) { - for (String tk : rst) { - span = new SpanNearQueryBuilder( - new SpanTermQueryBuilder(field, tk), anaLyRst.getSlop()).inOrder(true); - spans.add(span); - } - } else { - span = new SpanNearQueryBuilder( - new SpanTermQueryBuilder(field, shortStr), anaLyRst.getSlop()).inOrder(true); - spans.add(span); - } - List lst = new LinkedList<>(); - int maxDis = 0; - for (String st : subs) { - AnalyzeResult anLyRst = getAnalyzeResult(st); - if (null == anLyRst) { - continue; - } - if (anLyRst.getSlop() > maxDis) { - maxDis = anLyRst.getSlop(); - } - List rt = anLyRst.getList(); - for (String s : rt) { - if (!listContanins(lst, s)) { - lst.add(s); - } - } - } - List sns = new ArrayList<>(); - SpanNotQueryBuilder sn = null; - if (!lst.isEmpty()) { - List sners = new ArrayList<>(); - SpanNearQueryBuilder sner = null; - for (String st0 : lst) { - sner = new SpanNearQueryBuilder(new SpanTermQueryBuilder(field, st0), maxDis).inOrder(true); - sners.add(sner); - } - if (spans.size() < sners.size()) { - for (SpanNearQueryBuilder spanNearQueryBuilder : sners) { - for (SpanNearQueryBuilder span1 : spans) { - sn = new SpanNotQueryBuilder(span1, spanNearQueryBuilder); - sns.add(sn); - } - } - } - } - return sns; - } - - public static String getBoolQueryBuilderStr(BoolQueryBuilder qb) { - String st = qb.toString(); - char[] cs = st.toCharArray(); - StringBuilder sb = new StringBuilder(); - for (char c : cs) { - if (c != '\u0000' && c != '\n' && c != '\t') { - sb.append(c); - } - } - String sb1 = sb.toString().replaceAll(" ", " "); - while (sb1.contains(" ")) { - sb1 = sb1.replaceAll(" ", " "); - } - String[] st0 = sb1.toString().split(" "); - sb = new StringBuilder(); - int num = 0; - for (String s0 : st0) { - if (!Strings.isNullOrEmpty(s0)) { - while (s0.startsWith(" ") || s0.endsWith(" ")) { - s0 = s0.trim(); - } - if (s0.startsWith("\"") || s0.startsWith("\\“")) { - num++; - } else if (s0.endsWith("\"") || s0.endsWith("\\”")) { - num--; - } - if (!"".equals(s0)) { - sb.append(s0); - } - if (num > 0) { - sb.append(" "); - } - } - } - st = "BoolQueryBuilder=" + sb.toString(); - return st; - } - - public static BoolQueryBuilder getBoolQueryBuilderFromSqlStr(String str) { - BoolQueryBuilder qb = QueryBuilders.boolQuery(); - if (Strings.isNullOrEmpty(str) || !str.startsWith("BoolQueryBuilder=")) { - return qb; - } - // str = str.replaceAll("\t", " "); - int start = str.indexOf("{"); - String jsonStr = str.substring(start); - JSONObject json = (JSONObject) JSONObject.parse(jsonStr); - if (null == json) { - return qb; - } -// QueryBuilder pqT = getQueryBuilderFromJSON(json); -// if (null != pqT && pqT instanceof BoolQueryBuilder) { -// return (BoolQueryBuilder) pqT; -// } - WrapperQueryBuilder wrapper = QueryBuilders.wrapperQuery(jsonStr); - qb.must(wrapper); - return qb; - } - - private static List getQueryBuilderFromJSON(JSONObject json) { - if (null == json) { - return null; - } - JSONObject bool = (JSONObject) json.get("bool"); - if (TObjectUtils.isNull(bool)) { - // add term and so on - JSONObject query = (JSONObject) json.get("query_string"); - if (null != query) { - String opr = query.getString("default_operator"); - Operator op = null; - if (null != opr && opr.equalsIgnoreCase("and")) { - op = Operator.AND; - } else { - op = Operator.OR; - } - List tqbs = new ArrayList<>(); - QueryBuilder tqb = new QueryStringQueryBuilder(query.getString("query")) - .defaultField(query.getString("default_field")).defaultOperator(op); - tqbs.add(tqb); - return tqbs; - } - JSONObject term = (JSONObject) json.get("term"); - if (null != term) { - List tqbs = new ArrayList<>(); - QueryBuilder tqb = null; - if (null != term.get("content")) { - tqb = new TermQueryBuilder("content", term.get("content")); - } else if (null != term.get("title")) { - tqb = new TermQueryBuilder("title", term.get("title")); - } else if (null != term.get("source")) { - tqb = new TermQueryBuilder("source", term.get("source")); - } - tqbs.add(tqb); - return tqbs; - } - // match - JSONObject match = (JSONObject) json.get("match"); - if (null != match) { - JSONObject content = match.getJSONObject("content"); - if (null != content) { - List list = new ArrayList<>(); - QueryBuilder slop = QueryBuilders - .matchPhraseQuery("content", content.getString("query")) - .slop(content.getIntValue("slop")); - list.add(slop); - return list; - } else { - JSONObject title = match.getJSONObject("title"); - if (null != title) { - List list = new ArrayList<>(); - QueryBuilder slop = QueryBuilders.matchPhraseQuery("title", title.getString("query")) - .slop(title.getIntValue("slop")); - list.add(slop); - return list; - } - } - } - // multi_match - JSONObject multi_match = json.getJSONObject("multi_match"); - if (null != multi_match) { - String qry = multi_match.getString("query"); - JSONArray fields = (JSONArray) multi_match.get("fields"); - String[] arr = new String[fields.size()];// - for (int i = 0; i < fields.size(); i++) { - Object f = fields.get(i); - if (f instanceof String) { - arr[i] = (String) f; - } - } - int slop = multi_match.getIntValue("slop"); - // MultiMatchQueryBuilder mt = new MultiMatchQueryBuilder(); - List list = new ArrayList<>(); - QueryBuilder multiQuery = QueryBuilders.multiMatchQuery(qry, arr).slop(slop); - list.add(multiQuery); - return list; - } - // span_term - JSONObject span_term = (JSONObject) json.getJSONObject("span_term"); - if (null != span_term) { - JSONObject content = span_term.getJSONObject("content"); - if (null != content) { - List list = new ArrayList<>(); - QueryBuilder span_termQ = new SpanTermQueryBuilder("content", content.getString("value")); - // spanNotQ.include(span_termQ); - list.add(span_termQ); - return list; - } - JSONObject title = span_term.getJSONObject("title"); - if (null != title) { - List list = new ArrayList<>(); - QueryBuilder span_termQ = new SpanTermQueryBuilder("title", title.getString("value")); - list.add(span_termQ); - return list; - } - } - // span_not - JSONObject span_not = (JSONObject) json.get("span_not"); - List span_termQ = null; - if (null != span_not) { - List spanNotQs = new ArrayList<>(); - SpanNotQueryBuilder spanNotQ; - JSONObject include = span_not.getJSONObject("include"); - SpanNearQueryBuilder sner = null; - if (null != include) { - span_termQ = getQueryBuilderFromJSON(include); - JSONObject span_near = include.getJSONObject("span_near"); - if (null != span_near && span_near.size() > 0) { - Object array = span_near.get("clauses"); - if (null != array) { - if (array instanceof JSONObject) { - List span_termQ2 = getQueryBuilderFromJSON((JSONObject) array); - if (null != span_termQ2 && span_termQ2.size() > 0 && - span_termQ2 instanceof SpanTermQueryBuilder) { - sner = new SpanNearQueryBuilder((SpanQueryBuilder) span_termQ2.get(0), 0).inOrder(true); - } - } else if (array instanceof JSONArray) { - ListIterator arr = ((JSONArray) array).listIterator(); - while (arr.hasNext()) { - Object obj = arr.next(); - if (null == obj) - continue; - JSONObject jsn = (JSONObject) obj; - List span_termQ2 = getQueryBuilderFromJSON((JSONObject) jsn); - if (null != span_termQ2 && span_termQ2.size() > 0 && span_termQ2 instanceof SpanTermQueryBuilder) { - sner = new SpanNearQueryBuilder((SpanQueryBuilder) span_termQ2.get(0), 0) - .inOrder(true); - } - } - } - } - } - } - JSONObject exclude = span_not.getJSONObject("exclude"); - SpanNearQueryBuilder snerExclude = null; - if (null != exclude) { - JSONObject span_near = exclude.getJSONObject("span_near"); - if (null != span_near) { - Object array = span_near.get("clauses"); - if (null != array) { - if (array instanceof JSONObject) { - List span_termQs = getQueryBuilderFromJSON((JSONObject) array); - if (null != span_termQs && span_termQs.size() > 0 && span_termQs instanceof SpanTermQueryBuilder) { - // spanNotQ..exclude((SpanTermQueryBuilder) - // span_termQ); - snerExclude = new SpanNearQueryBuilder((SpanQueryBuilder) span_termQs.get(0), 0).inOrder(true); - } - } else if (array instanceof JSONArray) { - ListIterator arr = ((JSONArray) array).listIterator(); - while (arr.hasNext()) { - Object obj = arr.next(); - if (null == obj) - continue; - JSONObject jsn = (JSONObject) obj; - List span_termQs = getQueryBuilderFromJSON((JSONObject) jsn); - if (null != span_termQs && span_termQs.size() > 0 && span_termQs instanceof SpanTermQueryBuilder) { - // spanNotQ.exclude((SpanTermQueryBuilder) - // span_termQ); - snerExclude = new SpanNearQueryBuilder((SpanQueryBuilder) span_termQs.get(0), 0).inOrder(true); - } - } - } - } - } - } - if (span_termQ != null && snerExclude != null) { - spanNotQ = new SpanNotQueryBuilder((SpanQueryBuilder) span_termQ, snerExclude); - spanNotQs.add(spanNotQ); - } - if (sner != null && snerExclude != null) { - spanNotQ = new SpanNotQueryBuilder(sner, snerExclude); - spanNotQs.add(spanNotQ); - } - return spanNotQs; - } - return null; - } - // add child BoolQueryBuilder - List pqbs = new ArrayList<>(); - BoolQueryBuilder pqb = new BoolQueryBuilder(); - // - Object filter = bool.get("filter"); - if (filter instanceof JSONObject) { - JSONObject obj = (JSONObject) filter; - if (null != obj) { - List qb = getQueryBuilderFromJSON(obj); - if (null != qb) { - pqb.filter(qb.get(0)); - } - } - } else if (filter instanceof JSONArray) { - JSONArray array = (JSONArray) filter; - JSONObject[] objs = new JSONObject[array.size()]; - array.toArray(objs); - for (JSONObject obj : objs) { - List qb = getQueryBuilderFromJSON(obj); - if (null != qb) { - pqb.filter(qb.get(0)); - } - } - } - pqbs.add(pqb); - // - Object mst = bool.get("must"); - if (mst instanceof JSONObject) { - JSONObject obj = (JSONObject) mst; - if (null != obj) { - List qb = getQueryBuilderFromJSON(obj); - if (null != qb) { - pqb.must(qb.get(0)); - } - } - } else if (mst instanceof JSONArray) { - JSONArray array = (JSONArray) mst; - JSONObject[] objs = new JSONObject[array.size()]; - array.toArray(objs); - for (JSONObject obj : objs) { - List qb = getQueryBuilderFromJSON(obj); - if (null != qb) { - pqb.must(qb.get(0)); - } - } - } - pqbs.add(pqb); - Object mstNot = bool.get("must_not"); - if (mstNot instanceof JSONObject) { - JSONObject obj = (JSONObject) mstNot; - if (null != obj) { - List qb = getQueryBuilderFromJSON(obj); - if (null != qb) { - pqb.mustNot(qb.get(0)); - } - } - } else if (mstNot instanceof JSONArray) { - JSONArray array = (JSONArray) mstNot; - JSONObject[] objs = new JSONObject[array.size()]; - array.toArray(objs); - for (JSONObject obj : objs) { - if (null != obj) { - List qb = getQueryBuilderFromJSON(obj); - if (null != qb) { - pqb.mustNot(qb.get(0)); - } - } - } - } - pqbs.add(pqb); - Object should = bool.get("should"); - if (should instanceof JSONObject) { - JSONObject obj = (JSONObject) should; - if (null != obj) { - List qb = getQueryBuilderFromJSON(obj); - if (null != qb) { - pqb.mustNot(qb.get(0)); - } - } - } else if (should instanceof JSONArray) { - JSONArray array = (JSONArray) should; - JSONObject[] objs = new JSONObject[array.size()]; - array.toArray(objs); - for (JSONObject obj : objs) { - if (null != obj) { - List qb = getQueryBuilderFromJSON(obj); - if (null != qb) { - pqb.mustNot(qb.get(0)); - } - } - } - } - pqbs.add(pqb); - return pqbs; - } - - private static class AnalyzeResult { - private int slop = 0; - private List list; - - public AnalyzeResult() { - } - - public AnalyzeResult(int slop, List lst) { - this.slop = slop; - this.list = lst; - } - - public int getSlop() { - return slop; - } - - public void setSlop(int slop) { - this.slop = slop; - } - - public List getList() { - return list; - } - - public void setList(List list) { - this.list = list; - } - } - - private static AnalyzeResult getAnalyzeResult(String termStr) { - if (Strings.isNullOrEmpty(termStr)) { - return null; - } - AnalyzeResult rst = new AnalyzeResult(); - List list = new ArrayList<>(); - if (termStr.trim().length() == 1) { - list.add(termStr); - rst.setList(list); - rst.setSlop(0); - return rst; - } - ESServerUtils esServerUtil = new ESServerUtils(); - // todo - if (null == esServerUtil.getClientByClusterId(1L)) { - esServerUtil.initEsServer(); - } - // TODO - IndicesAdminClient adminClient = esServerUtil.getClientByClusterId(1L).admin().indices(); -// ESHandler esHandler = ESHandler.getESHandler("bfd_mediaforce", "192.168.189.84:9300"); -// IndicesAdminClient adminClient = esHandler.getClient().admin().indices(); - // - AnalyzeResponse analyzeResponse = adminClient.prepareAnalyze(termStr).setAnalyzer("ik").execute().actionGet(); - List lst = analyzeResponse.getTokens(); - // 分词失败 - if (null == lst || lst.isEmpty()) { - list.add(termStr); - rst.setList(list); - rst.setSlop(0); - return rst; - } else { - HashSet exclude = new HashSet<>(); - for (AnalyzeToken tk : lst) { - for (AnalyzeToken at : lst) { - if (!tk.getTerm().equals(at.getTerm()) && at.getTerm().contains(tk.getTerm())) { - exclude.add(tk); - } - } - } - List waitLst = new ArrayList<>(); - for (AnalyzeToken tk : lst) { - if (!exclude.contains(tk)) { - waitLst.add(tk); - } - } - if (waitLst.size() == 1) { - list.add(waitLst.get(0).getTerm()); - rst.setList(list); - rst.setSlop(0); - return rst; - } - // multi-term - int maxDistance = 0; - for (int i = 0; i < waitLst.size(); i++) { - AnalyzeToken tk = waitLst.get(i); - list.add(tk.getTerm()); - if (i > 0) { - AnalyzeToken prv = waitLst.get(i - 1); - int dis = tk.getPosition() - prv.getPosition() - 1; - if (dis > maxDistance) { - maxDistance = dis; - } - } - } - rst.setList(list); - rst.setSlop(maxDistance); - } - // list = dropShorterStr(list); - return rst; - } - - private static List dropShorterStr(List lst) { - if (null == lst || lst.isEmpty()) { - return null; - } - HashSet set = new HashSet<>();// 要排除的term - for (String st : lst) { - // 删除较短的分词term - for (String s : lst) { - if (!st.equals(s) && s.contains(st)) { - // lst.remove(st); - set.add(st); - } - } - } - List rst = new ArrayList<>(); - for (String st0 : lst) { - if (!set.contains(st0)) { - rst.add(st0); - } - } - return rst; - } - - private static boolean listContanins(List lst, String str) { - if (null == lst || lst.isEmpty()) { - return false; - } - for (String st : lst) { - if (st.equals(str)) { - return true; - } - } - return false; - } -} diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/web/vo/params/QueryRequest.java b/cl_search_api/src/main/java/com/bfd/mf/common/web/vo/params/QueryRequest.java index 5ef43d6..7310cc9 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/common/web/vo/params/QueryRequest.java +++ b/cl_search_api/src/main/java/com/bfd/mf/common/web/vo/params/QueryRequest.java @@ -19,7 +19,6 @@ package com.bfd.mf.common.web.vo.params; import io.swagger.annotations.ApiModel; import io.swagger.annotations.ApiModelProperty; - import java.io.Serializable; import java.util.ArrayList; import java.util.List; @@ -84,7 +83,7 @@ public class QueryRequest implements Serializable { // 是否导出 private String scrollId; - + // 崔老师项目增加的字段 private String valueLabel; private String categoryLabel; diff --git a/cl_search_api/src/main/java/com/bfd/mf/controller/SearchAnalysisController.java b/cl_search_api/src/main/java/com/bfd/mf/controller/SearchAnalysisController.java index e327c2a..c44abcc 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/controller/SearchAnalysisController.java +++ b/cl_search_api/src/main/java/com/bfd/mf/controller/SearchAnalysisController.java @@ -3,35 +3,23 @@ package com.bfd.mf.controller; import com.alibaba.fastjson.JSONObject; import com.bfd.mf.common.web.vo.params.QueryRequest; import com.bfd.mf.service.SearchAnalysisService; -import com.bfd.mf.service.SearchKeywordsCouldService; -import com.bfd.mf.common.util.analysis.DataAnalysisUtils; import com.bfd.mf.common.util.enums.RTCodeEnum; -import com.bfd.mf.common.util.slice.SliceScrollUtil; import com.bfd.mf.common.web.component.wrapper.ResponseWrapper; import com.bfd.nlp.common.util.constants.MediaTypes; import io.swagger.annotations.Api; import io.swagger.annotations.ApiOperation; -import org.apache.catalina.servlet4preview.http.HttpServletRequest; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Controller; -import org.springframework.web.bind.WebDataBinder; import org.springframework.web.bind.annotation.*; -import javax.naming.ldap.PagedResultsControl; -import javax.validation.ConstraintViolation; -import javax.validation.Validation; -import javax.validation.Validator; -import javax.validation.ValidatorFactory; -import java.util.List; -import java.util.Set; @Controller @RequestMapping("/analysis") @Api(value="数据分析结果&词云查询") public class SearchAnalysisController { - private static final Logger logger = LoggerFactory.getLogger(SearchAnalysisController.class); + private static final Logger logger = LoggerFactory.getLogger(SearchAnalysisController.class); @Autowired private SearchAnalysisService searchAnalysisService; @@ -43,31 +31,14 @@ public class SearchAnalysisController { @ApiOperation(value = "查询总体分析结果") @RequestMapping(value = "/trend/lineAll", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8) public JSONObject queryAll(@RequestBody QueryRequest queryRequest) { - logger.info("[queryAll] partial / Params: {}", JSONObject.toJSONString(queryRequest)); - JSONObject jsonObject; + logger.info("[queryAll] partial / Params: {}" ,JSONObject.toJSONString(queryRequest)); try { - jsonObject = searchAnalysisService.getAnalysisResponse(queryRequest); - //JSONObject cloudCounts = searchKeywordsCouldService.dataAnalysisCloud(queryRequest); - // jsonObject.put("cloudCounts",cloudCounts); + JSONObject jsonObject = searchAnalysisService.getAnalysisResponse(queryRequest); + return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, jsonObject); } catch (Exception e) { logger.error("[queryAll error = ]", e); return ResponseWrapper.buildResponse(RTCodeEnum.C_FAIL); } - return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, jsonObject); } -// @ResponseBody -// @ApiOperation(value = "查询词云结果") -// @RequestMapping(value = "/cloudCrawl", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8) -// public JSONObject queryWordCloudCountCrawl(@RequestBody QueryRequest queryRequest) { -// logger.info("[queryWordCloudCountCrawl] partial / Params: {}", JSONObject.toJSONString(queryRequest)); -// JSONObject cloudCounts; -// try { -// cloudCounts = searchKeywordsCouldService.dataAnalysisCloud(queryRequest); -// } catch (Exception e) { -// logger.error("[queryWordCloudCountCrawl] Failed,The error message is :{}", e); -// return ResponseWrapper.buildResponse(RTCodeEnum.C_SERVICE_NOT_AVAILABLE, "Query failed"); -// } -// return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, cloudCounts); -// } } diff --git a/cl_search_api/src/main/java/com/bfd/mf/controller/SearchAuthorController.java b/cl_search_api/src/main/java/com/bfd/mf/controller/SearchAuthorController.java index 7e53e1d..6404291 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/controller/SearchAuthorController.java +++ b/cl_search_api/src/main/java/com/bfd/mf/controller/SearchAuthorController.java @@ -1,5 +1,6 @@ package com.bfd.mf.controller; +import com.alibaba.fastjson.JSON; import com.alibaba.fastjson.JSONObject; import com.bfd.mf.common.util.constants.ESConstant; import com.bfd.mf.common.util.enums.RTCodeEnum; @@ -15,9 +16,6 @@ import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Controller; import org.springframework.web.bind.annotation.*; -import java.util.ArrayList; -import java.util.List; - @Controller @RequestMapping("/author") @ResponseBody @@ -36,9 +34,9 @@ public class SearchAuthorController { @PostMapping(value = "/subject/queryAuthors", consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8) @ResponseBody public JSONObject queryAuthors(@RequestBody QueryRequest queryRequest) { - logger.info("[queryAuthors] partial / Params: {}", JSONObject.toJSONString(queryRequest)); - JSONObject result; + logger.info("[queryAuthors] partial / Params: {}" ,JSON.toJSONString(queryRequest)); try { + JSONObject result; String scorllId = queryRequest.getScrollId(); if(null != scorllId){ // 这个是导出要用的 result = searchDataService.exportDataInSubjectIndex(queryRequest); @@ -58,32 +56,40 @@ public class SearchAuthorController { if(page >0 && queryRequest.getPage() > page){ return ResponseWrapper.buildResponse(RTCodeEnum.C_SUBJECT_GRAMMAR_ERROR, "总数和分页不匹配"); } + return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result); } catch (Exception e) { logger.error("[queryAuthors] Failed,The error message is :{}", e); return ResponseWrapper.buildResponse(RTCodeEnum.C_SERVICE_NOT_AVAILABLE, "Query failed"); } - return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result); + } /** - * 查询一个用户信息列表 + * 查询一个用户信息列表 POST 请求 */ @ApiOperation(value = "查询一个用户信息列表", httpMethod = "POST") @PostMapping(value = "/subject/queryOneAuthor", consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8) @ResponseBody public JSONObject queryAuthorByAuthorId(@RequestBody QueryRequest queryRequest) { - logger.info("[queryAuthorByAuthorId] partial / Params: {}", JSONObject.toJSONString(queryRequest)); - JSONObject result; + logger.info("[queryAuthorByAuthorId] partial / Params: {}" ,JSONObject.toJSONString(queryRequest)); try { - result = searchAuthorService.queryAuthorByAuthorId(queryRequest); + JSONObject result = searchAuthorService.queryAuthorByAuthorId(queryRequest); + return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result); } catch (Exception e) { logger.error("[queryAuthorByAuthorId] Failed,The error message is :{}", e); return ResponseWrapper.buildResponse(RTCodeEnum.C_SERVICE_NOT_AVAILABLE, "Query failed"); } - return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result); + } + /** + * 查询一个用户信息 GET 请求 + * @param subjectId + * @param authorId + * @param siteId + * @return + */ @RequestMapping(value="/subject/queryOneAuthor",method= RequestMethod.GET) @ResponseBody public JSONObject queryAuthor(String subjectId,String authorId,String siteId) { @@ -91,32 +97,32 @@ public class SearchAuthorController { queryRequest.setSubjectId(subjectId); queryRequest.setAuthorId(authorId); queryRequest.setSiteId(siteId); - logger.info("[queryAuthorByAuthorId] partial / Params: {}", JSONObject.toJSONString(queryRequest)); - JSONObject result; + logger.info("[queryAuthor] partial / Params: {}" + JSONObject.toJSONString(queryRequest)); try { - result = searchAuthorService.queryAuthorByAuthorId(queryRequest); + JSONObject result = searchAuthorService.queryAuthorByAuthorId(queryRequest); + return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result); } catch (Exception e) { - logger.error("[queryAuthorByAuthorId] Failed,The error message is :{}", e); + logger.error("[queryAuthor] Failed,The error message is :{}", e); return ResponseWrapper.buildResponse(RTCodeEnum.C_SERVICE_NOT_AVAILABLE, "Query failed"); } - return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result); + } /** - * 查询某个用户发表的主贴列表 + * 查询某个用户发表的主贴列表 这个方法不用了,现在没有这样查询的页面了 */ @ApiOperation(value = "查询一个用户发表的主贴列表", httpMethod = "POST") @PostMapping(value = "/subject/queryOneAuthorContents", consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8) @ResponseBody public JSONObject queryContentsByAuthorId(@RequestBody QueryRequest queryRequest) { - logger.info("[queryContentsByAuthorId] partial / Params: {}", JSONObject.toJSONString(queryRequest)); - JSONObject result; + logger.info("[queryContentsByAuthorId] partial / Params: {}" , JSONObject.toJSONString(queryRequest)); try { - result = searchAuthorService.queryContentsByAuthorId(queryRequest); + JSONObject result = searchAuthorService.queryContentsByAuthorId(queryRequest); + return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result); } catch (Exception e) { logger.error("[queryContentsByAuthorId] Failed,The error message is :{}", e); return ResponseWrapper.buildResponse(RTCodeEnum.C_SERVICE_NOT_AVAILABLE, "Query failed"); } - return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result); + } } diff --git a/cl_search_api/src/main/java/com/bfd/mf/controller/SearchDataController.java b/cl_search_api/src/main/java/com/bfd/mf/controller/SearchDataController.java index 0cc9781..b098c9f 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/controller/SearchDataController.java +++ b/cl_search_api/src/main/java/com/bfd/mf/controller/SearchDataController.java @@ -2,17 +2,14 @@ package com.bfd.mf.controller; import com.alibaba.fastjson.JSONObject; import com.bfd.mf.common.util.constants.ESConstant; -import com.bfd.mf.common.web.repository.mysql.base.SiteRepository; import com.bfd.mf.common.web.vo.params.QueryRequest; import com.bfd.mf.service.SearchDataService; import com.bfd.mf.common.util.enums.RTCodeEnum; import com.bfd.mf.common.web.component.wrapper.ResponseWrapper; import com.bfd.mf.service.UpdateService; -import com.bfd.nlp.common.util.encryption.MD5; import io.swagger.annotations.Api; import io.swagger.annotations.ApiImplicitParam; import io.swagger.annotations.ApiImplicitParams; -import org.apache.tomcat.util.security.MD5Encoder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; @@ -22,9 +19,6 @@ import io.swagger.annotations.ApiOperation; import com.bfd.nlp.common.util.constants.MediaTypes; import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; @Controller @RequestMapping("/crawl") @@ -36,7 +30,6 @@ public class SearchDataController { @Autowired private UpdateService updateService; - /** * 查询数据列表 */ @@ -45,8 +38,8 @@ public class SearchDataController { @ResponseBody public JSONObject queryDataList(@RequestBody QueryRequest queryRequest) { logger.info("[queryDataList] partial / Params: {}", JSONObject.toJSONString(queryRequest)); - JSONObject result = new JSONObject(); try { + JSONObject result = new JSONObject(); long start = System.currentTimeMillis(); String scorllId = queryRequest.getScrollId(); String subjectId = queryRequest.getSubjectId(); @@ -88,11 +81,12 @@ public class SearchDataController { } long end = System.currentTimeMillis(); logger.info("接口查询时长:statr:"+ start +" ; end:"+end + " ; time = " + (end - start) + " ; count = "+result.get(ESConstant.ALLDOCNUMBER)); + return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result); } catch (Exception e) { logger.error("[queryData] Failed,The error message is :{}", e); return ResponseWrapper.buildResponse(RTCodeEnum.C_SERVICE_NOT_AVAILABLE, "Query failed"); } - return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result); + } /** @@ -112,14 +106,14 @@ public class SearchDataController { queryRequest.setDocId(docId); queryRequest.setSiteId(siteId); logger.info("[getInfo] partial / Params: {}", JSONObject.toJSONString(queryRequest)); - JSONObject result; try { - result = searchDataService.queryOneDataByDocId(queryRequest); + JSONObject result = searchDataService.queryOneDataByDocId(queryRequest); + return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result); } catch (Exception e) { logger.error("[getInfo] Failed,The error message is :{}", e); return ResponseWrapper.buildResponse(RTCodeEnum.C_SERVICE_NOT_AVAILABLE, "Query failed"); } - return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result); + } @@ -131,14 +125,13 @@ public class SearchDataController { @RequestMapping(value = "/subject/getInfoByDocId", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8) public JSONObject getInfoByDocId(@RequestBody QueryRequest queryRequest) { logger.info("[getInfoByDocId] partial / Params: {}", JSONObject.toJSONString(queryRequest)); - JSONObject result; try { - result = searchDataService.queryOneDataByDocId(queryRequest); + JSONObject result = searchDataService.queryOneDataByDocId(queryRequest); + return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result); } catch (Exception e) { logger.error("[getInfoByDocId] Failed,The error message is :{}", e); return ResponseWrapper.buildResponse(RTCodeEnum.C_SERVICE_NOT_AVAILABLE, "Query failed"); } - return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result); } @@ -150,14 +143,14 @@ public class SearchDataController { @RequestMapping(value = "/getCommentsByDocId", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8) public JSONObject getCommentsByDocId(@RequestBody QueryRequest queryRequest) { logger.info("[getCommentsByDocId] partial / Params: {}", JSONObject.toJSONString(queryRequest)); - JSONObject result; try { - result = searchDataService.queryComments(queryRequest); + JSONObject result = searchDataService.queryComments(queryRequest); + return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result); } catch (Exception e) { logger.error("[getCommentsByDocId] Failed,The error message is :{}", e); return ResponseWrapper.buildResponse(RTCodeEnum.C_SERVICE_NOT_AVAILABLE, "Query failed"); } - return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result); + } @@ -167,30 +160,35 @@ public class SearchDataController { @ResponseBody public JSONObject queryDataCounts(@RequestBody QueryRequest queryRequest) { logger.info("[queryDataCounts] partial / Params: {}", JSONObject.toJSONString(queryRequest)); - JSONObject result = new JSONObject(); try { - result = searchDataService.queryDataCountsInOneIndex(queryRequest); + JSONObject result = searchDataService.queryDataCountsInOneIndex(queryRequest); + return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result); } catch (Exception e) { logger.error("[queryDataCounts] Failed,The error message is :{}", e); return ResponseWrapper.buildResponse(RTCodeEnum.C_SERVICE_NOT_AVAILABLE, "Query failed"); } - return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result); + } + /** + * 崔老师版本使修改标签调用的接口,其他版本不调用该接口 + * @param queryRequest + * @return + */ @ApiOperation(value = "修改标签") @RequestMapping(value = "/update/updateByDocId", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8) @ResponseBody public JSONObject updateLabel(@RequestBody QueryRequest queryRequest) { logger.info("[updateLabel] partial / Params: {}", JSONObject.toJSONString(queryRequest)); - JSONObject result = new JSONObject(); try { - result = updateService.updateByDocId(queryRequest); + JSONObject result = updateService.updateByDocId(queryRequest); + return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result); } catch (Exception e) { logger.error("[updateLabel] Failed,The error message is :{}", e); return ResponseWrapper.buildResponse(RTCodeEnum.C_SERVICE_NOT_AVAILABLE, "Query failed"); } - return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result); + } } diff --git a/cl_search_api/src/main/java/com/bfd/mf/controller/UploadExcelController.java b/cl_search_api/src/main/java/com/bfd/mf/controller/UploadExcelController.java index fdc18d2..aa5858d 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/controller/UploadExcelController.java +++ b/cl_search_api/src/main/java/com/bfd/mf/controller/UploadExcelController.java @@ -2,6 +2,9 @@ package com.bfd.mf.controller; import com.alibaba.fastjson.JSONObject; import com.bfd.mf.common.util.ZipUtils; +import com.bfd.mf.common.util.constants.ESConstant; +import com.bfd.mf.common.util.enums.RTCodeEnum; +import com.bfd.mf.common.web.component.wrapper.ResponseWrapper; import com.bfd.mf.config.BFDApiConfig; import com.bfd.mf.service.UploadExcelService; import io.swagger.annotations.ApiOperation; @@ -28,7 +31,6 @@ public class UploadExcelController { @Autowired private UploadExcelService uploadExcelService; - /** * 上传Excel */ @@ -38,22 +40,22 @@ public class UploadExcelController { public JSONObject insertExcelTask(@RequestParam("file") MultipartFile file, @RequestParam("userId") String userId) { logger.info("[insertExcelTask] partial / Params: {}", userId); - JSONObject jsonObject =new JSONObject(); try { + JSONObject jsonObject = new JSONObject(); Map userinfo = new HashMap<>(); - userinfo.put("user","user"); - userinfo.put("userId",userId); + userinfo.put(ESConstant.USER , ESConstant.USER ); + userinfo.put(ESConstant.USERID , userId); // MultipartFile file = request.getFile("file"); // 先查询一下Excel 名是不是已经存在,要是已经存在的话就提示让修改; String excelName = file.getOriginalFilename(); if(excelName.contains("xlsx")){ // 传的是Excel - jsonObject = aboutExcel(excelName,file,userinfo,jsonObject); + jsonObject = aboutExcel(excelName,file,userinfo); } + return jsonObject; } catch (Exception e) { - e.printStackTrace(); - jsonObject.put("message","lalalalaal 报错了"); + return ResponseWrapper.buildResponse(RTCodeEnum.C_UPLOAD_FAIL, "Upload failed"); } - return jsonObject; + } // private JSONObject aboutTxt(String excelName, MultipartFile file, Map userinfo, JSONObject jsonObject) { @@ -76,16 +78,16 @@ public class UploadExcelController { // return jsonObject; // } - private JSONObject aboutExcel(String excelName, MultipartFile file, Map userinfo, JSONObject jsonObject) { + private JSONObject aboutExcel(String excelName, MultipartFile file, Map userinfo) { try{ boolean isExist = uploadExcelService.queryByExcelName(excelName); boolean isTaskExist = uploadExcelService.queryByStatus(); if(isExist){ - jsonObject.put("code", 202); //同名Excel已存在,请改名后重新上传,谢谢! + return ResponseWrapper.buildResponse(RTCodeEnum.C_UPLOAD_EXIST, "Upload Exist"); }else if(!isTaskExist){ - jsonObject.put("code", 203); //当前正在运行任务数超过5个,请稍后再尝试上传,谢谢! + return ResponseWrapper.buildResponse(RTCodeEnum.C_UPLOAD_BUSY, "Upload Busy"); }else{ - String filePath = "/opt/nfsdata/excelTask/"; + String filePath = bfdApiConfig.getUploadOLYExcelPath(); boolean flag = uploadExcelService.uploadExcel(file, filePath); if (flag) { // 上传成功后,在 cl_parse_excel_task 表中添加对应的记录,每个表格一条记录 uploadExcelService.insertParseExcelTask(excelName, userinfo); @@ -96,16 +98,14 @@ public class UploadExcelController { if(taskUploadSuccess) { // 既然插入成功,那就获取对应的 专题ID ,插入 cl_subject_count; List subjectIds = uploadExcelService.getSubjectIdsByExcelName(excelName); - jsonObject.put("code", 200); //恭喜你,上传成功啦~ }else{ - jsonObject.put("code", 201); //Excel解析失败,请检查Excel + return ResponseWrapper.buildResponse(RTCodeEnum.C_UPLOAD_PARSE_FAIL, "Upload Parse Fail"); } + return ResponseWrapper.buildResponse(RTCodeEnum.C_UPLOAD_OK, "Upload Success"); } - }catch (Exception e){ - e.printStackTrace(); + return ResponseWrapper.buildResponse(RTCodeEnum.C_UPLOAD_FAIL, "Upload failed"); } - return jsonObject; } @@ -121,12 +121,8 @@ public class UploadExcelController { @RequestParam("subjectId") String subjectId, @RequestParam("fileRemak") String fileRemak) { logger.info("[insertExcelTask] partial / Params: {}", subjectId+" , "+fileRemak); - JSONObject jsonObject =new JSONObject(); String zipPath = bfdApiConfig.getUploadZipPath(); try { - Map userinfo = new HashMap<>(); - userinfo.put("user",user); - userinfo.put("userId",userId); String zipName = file.getOriginalFilename(); // 将文件上传到指定路径下,并返回是否上传成功的状态位。 boolean flag = uploadExcelService.uploadExcel(file, zipPath); @@ -134,97 +130,32 @@ public class UploadExcelController { if (flag) { long fileSize = ZipUtils.getFileSize(zipPath+zipName); if(fileSize < 1024){ - jsonObject.put("code", 205); - jsonObject.put("desc", "上传的文件为空,请核查文件。"); - return jsonObject; + return ResponseWrapper.buildResponse(RTCodeEnum.C_UPLOAD_EMPTY, "File Empty"); } // // 解压zip ,校验数据,非excel 的,非txt 的都需要提示 Map> fileNameMap = ZipUtils.unZip(new File(zipPath+zipName),zipPath+zipFileName); - if(fileNameMap.containsKey("fileName")) { - String fileName = fileNameMap.get("fileName").get(0); + if(fileNameMap.containsKey(ESConstant.FILENAME)) { + String fileName = fileNameMap.get(ESConstant.FILENAME).get(0); logger.info("The FileName :" + fileName); + fileNameMap.remove(ESConstant.FILENAME); // 获取一下文件的大小 if (!fileName.contains(".xlsx") && !fileName.contains(".txt")) { - jsonObject.put("code", 204); - jsonObject.put("desc", "请上传 Excel 或 txt 文件"); + return ResponseWrapper.buildResponse(RTCodeEnum.C_UPLOAD_ERROR, "File Error"); } else { // 需要在 cl_task 表中添加一个任务 boolean insertSuccess = uploadExcelService.insertTask(subjectId, user, userId, fileRemak, zipName); - jsonObject.put("code", 200); - jsonObject.put("desc","OK"); + return ResponseWrapper.buildResponse(RTCodeEnum.C_UPLOAD_OK, "Upload Success"); } - fileNameMap.remove("fileName"); }else{ - jsonObject.put("code", 204); - jsonObject.put("desc", "请上传 Excel 或 txt 文件"); + return ResponseWrapper.buildResponse(RTCodeEnum.C_UPLOAD_ERROR, "File Error"); } - }else{ - jsonObject.put("code",206); - jsonObject.put("desc","上传失败"); + return ResponseWrapper.buildResponse(RTCodeEnum.C_UPLOAD_FAIL, "Upload failed"); } - } catch (Exception e) { - e.printStackTrace(); - jsonObject.put("code",206); - jsonObject.put("desc","上传失败"); + return ResponseWrapper.buildResponse(RTCodeEnum.C_UPLOAD_FAIL, "Upload failed"); } - return jsonObject; } +} - -// /** -// * 上传Excel相关 -// */ -// private boolean queryByExcelName(String excelName) { -// try{ -// String newExcelName = excelName.replace(".xlsx",""); -// boolean success = uploadExcelService.isTaskSucess(newExcelName); -// if(success){ -// return true; -// } -// return false; -// }catch (Exception e){ -// e.printStackTrace(); -// return false; -// } -// } -// -// /** -// * 上传Excel相关 -// */ -// private boolean queryByStatus() { -// try{ -// boolean isExist = uploadExcelService.isTaskExist(); -// if(isExist){ // 如果任务为空,就说明可以添加新的任务进来,如果不为空,就不要添加新的任务进来啦~ -// return true; -// }else{ -// return false; -// } -// }catch (Exception e){ -// e.printStackTrace(); -// return false; -// } -// } -// /** -// * 上传Excel相关 -// */ -// private boolean uploadExcel(MultipartFile file,String filePath) { -// try{ -// InputStream inputStream = file.getInputStream(); -// byte[] buffer = new byte[inputStream.available()]; -// inputStream.read(buffer); -// File targetFile = new File(filePath+file.getOriginalFilename()); -// OutputStream outStream = new FileOutputStream(targetFile); -// outStream.write(buffer); -// inputStream.close(); -// outStream.close(); -// return true; -// }catch (Exception e){ -// e.printStackTrace(); -// return false; -// } -// } - -} diff --git a/cl_search_api/src/main/java/com/bfd/mf/service/SearchAnalysisService.java b/cl_search_api/src/main/java/com/bfd/mf/service/SearchAnalysisService.java index 0b4dc0d..e63b08e 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/service/SearchAnalysisService.java +++ b/cl_search_api/src/main/java/com/bfd/mf/service/SearchAnalysisService.java @@ -1,9 +1,6 @@ package com.bfd.mf.service; import com.alibaba.fastjson.JSONObject; -import com.bfd.mf.common.service.es.ClusterService; -import com.bfd.mf.common.service.es.EsQueryServiceForSQMini; -import com.bfd.mf.common.service.es.SubjectQueryDataService; import com.bfd.mf.common.util.analysis.DataAnalysisUtils; import com.bfd.mf.common.util.constants.ConditionCommon; import com.bfd.mf.common.util.constants.ESConstant; @@ -18,7 +15,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; -import springfox.documentation.spring.web.json.Json; import java.util.*; import java.util.stream.Collectors; @@ -33,12 +29,13 @@ public class SearchAnalysisService { private SliceScrollUtil sliceScrollUtil; @Autowired private SearchKeywordsCouldService searchKeywordsCouldService; - @Autowired - private ClusterService clusterService; - @Autowired - private SubjectQueryDataService subjectQueryDataService; - @Autowired - private EsQueryServiceForSQMini esQueryServiceForSQMini; + +// @Autowired +// private ClusterService clusterService; +// @Autowired +// private SubjectQueryDataService subjectQueryDataService; +// @Autowired +// private EsQueryServiceForSQMini esQueryServiceForSQMini; public JSONObject getAnalysisResponse(QueryRequest queryRequest) { JSONObject jsonObject = new JSONObject(); @@ -94,7 +91,7 @@ public class SearchAnalysisService { Map> channelMaps = new HashMap<>(); Map> valueMaps = new HashMap<>(); Map> categoryMaps = new HashMap<>(); - logger.info("dataAnalysisChannelCounts : totalNumber = " + esMonitorEntityList.size()); + logger.info("dataAnalysisChannelCounts : totalNumber :{} " , esMonitorEntityList.size()); Map channelMap = new HashMap<>(); Map valueLabelMap = new HashMap<>(); Map categoryLabelMap = new HashMap<>(); @@ -175,7 +172,7 @@ public class SearchAnalysisService { private JSONObject dataAnalysisTrendByDayQueryTimes(QueryRequest queryRequest, List esMonitorEntityList) { JSONObject jsonResult = new JSONObject(); - logger.info("dataAnalysisTrendByDayQueryTimes : totalNumber = " + esMonitorEntityList.size()); + logger.info("dataAnalysisTrendByDayQueryTimes : totalNumber :{} " , esMonitorEntityList.size()); jsonResult.put("totalNumber", esMonitorEntityList.size()); // Map emotionEngMaps = MonitorConstant.emotionEngByThresholdMaps(); try { @@ -184,7 +181,7 @@ public class SearchAnalysisService { // 发布时间相同的 return o1.getPubTime().compareTo(o2.getPubTime()) == 0 ? o1.getCrawlTime().compareTo(o2.getCrawlTime()) : o1.getPubTime().compareTo(o2.getPubTime()); }); - logger.info("总数据条数: "+esMonitorEntityList.size()); + logger.info("总数据条数:{}",esMonitorEntityList.size()); Long startTime = queryRequest.getStartTime(); Long endTime = queryRequest.getEndTime(); Long time_difference = 0L; @@ -208,8 +205,8 @@ public class SearchAnalysisService { Map> yearChannelMaps = resultMap.get("yearChannelMap"); Map> dayEmoMaps = resultMap.get("dayEmoMap"); Map> yearEmoMaps = resultMap.get("yearEmoMap"); - System.out.println("dayChannelMaps"+JSONObject.toJSONString(dayChannelMaps)); - System.out.println("yearChannelMaps"+JSONObject.toJSONString(yearChannelMaps)); +// System.out.println("dayChannelMaps"+JSONObject.toJSONString(dayChannelMaps)); +// System.out.println("yearChannelMaps"+JSONObject.toJSONString(yearChannelMaps)); List dayList = new ArrayList<>(); List yearList = new ArrayList<>(); for (Long l:timeList) { @@ -226,11 +223,11 @@ public class SearchAnalysisService { List channelTrendList = new ArrayList<>(); List emoTrendList = new ArrayList<>(); if(time_difference > ONEYEAR){ - System.out.println("按年拆"); + // System.out.println("按年拆"); channelTrendList = parseChannleMapsResult(docTypeMap, yearChannelMaps, newYearList); emoTrendList = parseEmoMapsResult(sentimentMap, yearEmoMaps, newYearList); }else{ - System.out.println(" 按天拆"); + // System.out.println(" 按天拆"); channelTrendList = parseChannleMapsResult(docTypeMap, dayChannelMaps, newDayList); emoTrendList = parseEmoMapsResult(sentimentMap, dayEmoMaps, newDayList); } diff --git a/cl_search_api/src/main/java/com/bfd/mf/service/SearchAuthorService.java b/cl_search_api/src/main/java/com/bfd/mf/service/SearchAuthorService.java index a157e5f..396c881 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/service/SearchAuthorService.java +++ b/cl_search_api/src/main/java/com/bfd/mf/service/SearchAuthorService.java @@ -26,7 +26,6 @@ import java.util.*; @Service public class SearchAuthorService extends CrudService implements Serializable { private static Logger logger = LoggerFactory.getLogger(SearchAuthorService.class); - private static SimpleDateFormat format = new SimpleDateFormat("YYYY-MM-DD HH:mm:SS"); @Autowired private ClusterService clusterService; @Autowired @@ -57,7 +56,7 @@ public class SearchAuthorService extends CrudServicedataList = esQueryAuthorService.queryAuthorListByKeyword(indexNames,queryRequest); - logger.info("[SearchAuthorService] queryAuthorList: TotalCount = " + totalCount); + logger.info("[SearchAuthorService] queryAuthorList: TotalCount :{} " , totalCount); jsonObject.put(ESConstant.ALLDOCNUMBER, totalCount); jsonObject.put(ESConstant.MONITORLISTS, dataList); }catch (Exception e){ @@ -142,13 +141,12 @@ public class SearchAuthorService extends CrudService esMonitorEntityLists = new ArrayList<>(); parseQueryResult(dataList, esMonitorEntityLists, indexName); Long totalCount = esQueryAuthorCountService.queryContentsCountByAuthorId(indexNames, queryRequest); - logger.info("[SearchAuthorService] queryContentsByAuthorId: TotalCount = " + totalCount); - jsonObject.put("foldDocAllNumber", totalCount); - jsonObject.put("monitorLists", esMonitorEntityLists); + logger.info("[SearchAuthorService] queryContentsByAuthorId: TotalCount :{} " ,totalCount); + jsonObject.put(ESConstant.ALLDOCNUMBER, totalCount); + jsonObject.put(ESConstant.MONITORLISTS, esMonitorEntityLists); }catch (Exception e){ - e.printStackTrace(); logger.error("[SearchAuthorService] queryContentsByAuthorId error !"); - jsonObject.put("foldDocAllNumber",0); + jsonObject.put(ESConstant.ALLDOCNUMBER,0); jsonObject.put(ESConstant.MONITORLISTS, new ArrayList<>()); } return jsonObject; @@ -159,8 +157,8 @@ public class SearchAuthorService extends CrudService siteIdsMap = new HashMap<>(); Map siteIconMap = new HashMap<>(); for (Map map: site) { - siteIdsMap.put(map.get("cid").toString().toLowerCase(),map.get("site_id").toString()); - siteIconMap.put(map.get("cid").toString().toLowerCase(),map.get("site_icon").toString()); + siteIdsMap.put(map.get(ESConstant.CID).toString().toLowerCase(),map.get("site_id").toString()); + siteIconMap.put(map.get(ESConstant.CID).toString().toLowerCase(),map.get("site_icon").toString()); } if(null != dataList && dataList.size() > 0) { for (JSONObject json : dataList) { @@ -235,8 +233,8 @@ public class SearchAuthorService extends CrudService> videoList = (List>) newjsonObject.get("videoList"); + //JSONObject newjsonObject = getVideoPathList(jsonObject); + //List> videoList = (List>) newjsonObject.get(ESConstant.VIDEOLIST); ESMonitorEntity esMonitorEntity = new ESMonitorEntity(); try { @@ -280,52 +278,54 @@ public class SearchAuthorService extends CrudService> videoList = new ArrayList<>(); - if(videoPath.size() > 0) { - videoUrl = videoPath.get(0).toString(); - if (jsonObject.get(ESConstant.VIDEOPATHSIZE).toString().contains("http")) { - Map videoSizeMap = (Map) JSONUtils.parse((String) jsonObject.get(ESConstant.VIDEOPATHSIZE)); - if (videoSizeMap.containsKey(videoUrl)) { - size = videoSizeMap.get(videoUrl); - } - } - Map videoMap = new HashMap<>(); - videoMap.put(ESConstant.URL, videoUrl); - videoMap.put("size", size); - videoMap.put(ESConstant.RESOLUTION, resolution); - videoMap.put(ESConstant.VIDEOTIME, videoTime); - videoList.add(videoMap); - } - jsonObject.put("videoList", videoList); - } - }catch (Exception e){ - e.printStackTrace(); - } - jsonObject.remove(ESConstant.VIDEOPATHSIZE); - jsonObject.remove(ESConstant.RESOLUTION); - jsonObject.remove(ESConstant.VIDEOTIME); - return jsonObject; - } - - private String getIndexName(QueryRequest queryRequest) { - Cluster cluster = clusterService.findClusterByType(Cluster.CLUSTER_TYPE.mini_cluster_type); // 111 - List currentIndexList = subjectQueryDataService.getIndexBySubjectIds(cluster, queryRequest.getSubjectId()); - String indexName = currentIndexList.get(0); - return indexName; - } - private String[] getIndexNames(QueryRequest queryRequest) { Cluster cluster = clusterService.findClusterByType(Cluster.CLUSTER_TYPE.mini_cluster_type); // 111 List currentIndexList = subjectQueryDataService.getIndexBySubjectIds(cluster, queryRequest.getSubjectId()); String[] indexName = currentIndexList.toArray(new String[currentIndexList.size()]); return indexName; } + +// private JSONObject getVideoPathList(JSONObject jsonObject) { +// try { +// if (jsonObject.containsKey(ESConstant.ISDOWNLOAD) && jsonObject.get(ESConstant.ISDOWNLOAD).toString().equals("true")) { +// String videoTime = jsonObject.getString(ESConstant.VIDEOTIME); +// String resolution = jsonObject.getString(ESConstant.RESOLUTION); +// List videoPath = JSONObject.parseArray(jsonObject.get(ESConstant.VIDEOPATH).toString()); +// String videoUrl = ""; +// String size = ""; +// List> videoList = new ArrayList<>(); +// if(videoPath.size() > 0) { +// videoUrl = videoPath.get(0).toString(); +// if (jsonObject.get(ESConstant.VIDEOPATHSIZE).toString().contains("http")) { +// Map videoSizeMap = (Map) JSONUtils.parse((String) jsonObject.get(ESConstant.VIDEOPATHSIZE)); +// if (videoSizeMap.containsKey(videoUrl)) { +// size = videoSizeMap.get(videoUrl); +// } +// } +// Map videoMap = new HashMap<>(); +// videoMap.put(ESConstant.URL, videoUrl); +// videoMap.put(ESConstant.SIZE, size); +// videoMap.put(ESConstant.RESOLUTION, resolution); +// videoMap.put(ESConstant.VIDEOTIME, videoTime); +// videoList.add(videoMap); +// } +// jsonObject.put(ESConstant.VIDEOLIST, videoList); +// } +// }catch (Exception e){ +// e.printStackTrace(); +// } +// jsonObject.remove(ESConstant.VIDEOPATHSIZE); +// jsonObject.remove(ESConstant.RESOLUTION); +// jsonObject.remove(ESConstant.VIDEOTIME); +// return jsonObject; +// } +// +// private String getIndexName(QueryRequest queryRequest) { +// Cluster cluster = clusterService.findClusterByType(Cluster.CLUSTER_TYPE.mini_cluster_type); // 111 +// List currentIndexList = subjectQueryDataService.getIndexBySubjectIds(cluster, queryRequest.getSubjectId()); +// String indexName = currentIndexList.get(0); +// return indexName; +// } + + } diff --git a/cl_search_api/src/main/java/com/bfd/mf/service/SearchDataService.java b/cl_search_api/src/main/java/com/bfd/mf/service/SearchDataService.java index 628613a..eb4a63d 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/service/SearchDataService.java +++ b/cl_search_api/src/main/java/com/bfd/mf/service/SearchDataService.java @@ -60,8 +60,8 @@ public class SearchDataService extends CrudService dataIdList, String orderFlag,String sortFlag, List currentIndexList, Cluster cluster) { - if(sortFlag.equals("comment")){ - sortFlag = "commentsCount"; + if(sortFlag.equals(ESConstant.COMMENT)){ + sortFlag = ESConstant.COMMENTS_COUNT; } if(sortFlag.equals("")){ sortFlag = ESConstant.PUBTIME; @@ -83,11 +83,15 @@ public class SearchDataService extends CrudService esMonitorListEntity) throws Exception { - SearchHit[] hits = response.getHits().getHits(); - for (SearchHit mainMessageHit : hits) { - ESMonitorEntity mainMonitorEntity = parseMainMessage(mainMessageHit); - esMonitorListEntity.add(mainMonitorEntity); + private void parseQueryResult(SearchResponse response, List esMonitorListEntity) { + try { + SearchHit[] hits = response.getHits().getHits(); + for (SearchHit mainMessageHit : hits) { + ESMonitorEntity mainMonitorEntity = parseMainMessage(mainMessageHit); + esMonitorListEntity.add(mainMonitorEntity); + } + }catch (Exception e){ + logger.info("[SearchDataService] parseQueryResult ERROR !"); } } /** @@ -98,7 +102,7 @@ public class SearchDataService extends CrudService> site = siteRepository.findsiteByDel(0); Map> siteMap = new HashMap<>(); for (Map map : site) { - siteMap.put(map.get("cid").toString().toLowerCase(),map); + siteMap.put(map.get(ESConstant.CID).toString().toLowerCase(),map); } if (null != dataList && dataList.size() > 0) { for (JSONObject json : dataList) { @@ -107,7 +111,7 @@ public class SearchDataService extends CrudService siteOtherMap = siteMap.get(enSource); if (siteOtherMap.containsKey("site_id")) { @@ -265,9 +267,9 @@ public class SearchDataService extends CrudService> site = siteRepository.findSiteByEnSource(enSource); Map> siteMap = new HashMap<>(); for (Map map : site) { - siteMap.put(map.get("cid").toString().toLowerCase(),map); + siteMap.put(map.get(ESConstant.CID).toString().toLowerCase(),map); } String siteId = ""; String icon = ""; String siteType = ""; Map siteOtherMap = siteMap.get(enSource); - if(enSource.equals("sina")){ + if(enSource.equals(ESConstant.SINA)){ siteId = "183"; - icon = ""; - siteType = ""; }else { if (siteOtherMap.containsKey("site_id")) { siteId = siteMap.get(enSource).get("site_id").toString(); @@ -719,15 +719,15 @@ public class SearchDataService extends CrudService smallImgs = (List) jsonObject.get(ESConstant.SMALLIMGS); List> imagePathSize = new ArrayList<>(); - if(null != smallImgs && smallImgs.size() > 0) { + if(null != smallImgs && smallImgs.size() > 0) { // 由于电商的图片不做下载,因此输出到页面的时候写死了大小和分辨率 for (Object img : smallImgs) { if (!img.toString().contains("http")) { Map imagePathMap = new HashMap<>(); String url = "http:" + img ; - imagePathMap.put(ESConstant.URL,url); - imagePathMap.put("size","4KB"); - imagePathMap.put("videoTime",""); - imagePathMap.put("resolution","50×50"); + imagePathMap.put(ESConstant.URL ,url); + imagePathMap.put(ESConstant.SIZE ,"4KB"); + imagePathMap.put(ESConstant.VIDEOTIME ,""); + imagePathMap.put(ESConstant.RESOLUTION ,"50×50"); imagePathSize.add(imagePathMap); } } @@ -795,15 +795,15 @@ public class SearchDataService extends CrudService()); } } else if(entry.getKey().equals(ESConstant.ATTITUDES_COUNT)){ - if(entry.getValue().toString().contains("totalCount")){ + if(entry.getValue().toString().contains(ESConstant.TOTALCOUNT)){ JSONObject totalCount = JSONObject.parseObject(entry.getValue().toString()); - jsonObject.put(entry.getKey(),totalCount.get("totalCount")); + jsonObject.put(entry.getKey(),totalCount.get(ESConstant.TOTALCOUNT)); } } else { jsonObject.put(entry.getKey(), entry.getValue()); @@ -828,7 +828,7 @@ public class SearchDataService extends CrudService currentIndexList = subjectQueryDataService.getIndexBySubjectIds(cluster, queryRequest.getSubjectId()); Long clusterId = cluster.getId(); - logger.info("[SearchDataService] queryDataInOneIndex: clusterId = " + clusterId + " ; currentIndexList :" + currentIndexList.toString()); + logger.info("[SearchDataService] queryDataInOneIndex: clusterId :{} ; currentIndexList : {}", clusterId,currentIndexList.toString()); // String indexName = currentIndexList.get(0); String indexNames [] = currentIndexList.toArray(new String [currentIndexList.size()]); List dataList = esQueryServiceForSQMini.queryDataFromOneSubject(indexNames, queryRequest); @@ -920,7 +920,7 @@ public class SearchDataService extends CrudService currentIndexList = subjectQueryDataService.getIndexBySubjectIds(cluster, queryRequest.getSubjectId()); Long clusterId = cluster.getId(); String [] indexName = currentIndexList.toArray(new String[currentIndexList.size()]); - logger.info("[SearchDataService] exportDataInOneIndex: IndexName = " +indexName[0] +" ; clusterId = " + clusterId + " ; currentIndexList :" + currentIndexList.toString()); + logger.info("[SearchDataService] exportDataInSubjectIndex : IndexName :{} ; clusterId :{} ; currentIndexList :{}",indexName[0], clusterId , currentIndexList.toString()); // 开始查询 jsonObject= esQueryServiceForSQMini.exportDataFromOneSubject(indexName, queryRequest); - List dataList = (List) jsonObject.get("monitorLists"); + List dataList = (List) jsonObject.get(ESConstant.MONITORLISTS); List esMonitorEntityLists = new ArrayList<>(); Integer searchType = queryRequest.getSearchType(); parseQueryResult(dataList, esMonitorEntityLists,searchType); @@ -967,10 +967,10 @@ public class SearchDataService extends CrudService dataList = (List) jsonObject.get("monitorLists"); + List dataList = (List) jsonObject.get(ESConstant.MONITORLISTS); List esMonitorEntityLists = new ArrayList<>(); Integer searchType = queryRequest.getSearchType(); parseQueryResult(dataList, esMonitorEntityLists,searchType); @@ -987,7 +987,7 @@ public class SearchDataService extends CrudService currentIndexList = subjectQueryDataService.getIndexBySubjectIds(cluster, queryRequest.getSubjectId()); Long clusterId = cluster.getId(); - logger.info("[SearchDataService] queryDataCountsInOneIndex: clusterId = " + clusterId + " ; currentIndexList :" + currentIndexList.toString()); + logger.info("[SearchDataService] queryDataCountsInOneIndex: clusterId : {}; currentIndexList : {}" , clusterId ,currentIndexList.toString()); // String indexName = currentIndexList.get(0); String indexNames [] = currentIndexList.toArray(new String [currentIndexList.size()]); Long contentCount = 0L; @@ -1003,36 +1003,12 @@ public class SearchDataService extends CrudService currentIndexList = subjectQueryDataService.getIndexBySubjectIds(cluster, queryRequest.getSubjectId()); -// Long clusterId = cluster.getId(); -// String [] indexName = currentIndexList.toArray(new String[currentIndexList.size()]); -// logger.info("[SearchDataService] exportDataInOneIndex: IndexName = " +indexName[0] +" ; clusterId = " + clusterId + " ; currentIndexList :" + currentIndexList.toString()); -// // 开始查询 -// jsonObject= esQueryServiceForSQMini.exportDataFromOneSubjectTestGroupBy(indexName, queryRequest); -// -// List dataList = (List) jsonObject.get("monitorLists"); -// List esMonitorEntityLists = new ArrayList<>(); -// Integer searchType = queryRequest.getSearchType(); -// parseQueryResult(dataList, esMonitorEntityLists,searchType); -// logger.info("Query Finish exportDataInSubjectIndexTestGroupBy size: " + esMonitorEntityLists.size()); -// jsonObject.put(ESConstant.MONITORLISTS, esMonitorEntityLists); -// }catch (Exception e){ -// e.printStackTrace(); -// } -// return jsonObject; -// } } diff --git a/cl_search_api/src/main/java/com/bfd/mf/service/SearchKeywordsCouldService.java b/cl_search_api/src/main/java/com/bfd/mf/service/SearchKeywordsCouldService.java index d70e2a8..db7a7c5 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/service/SearchKeywordsCouldService.java +++ b/cl_search_api/src/main/java/com/bfd/mf/service/SearchKeywordsCouldService.java @@ -3,10 +3,7 @@ package com.bfd.mf.service; import com.alibaba.fastjson.JSONObject; import com.bfd.mf.common.service.text.TextService; import com.bfd.mf.common.util.constants.ConditionCommon; -import com.bfd.mf.common.util.constants.ESConstant; -import com.bfd.mf.common.util.slice.SliceScrollUtil; import com.bfd.mf.common.util.utility.CollectionUtils; -import com.bfd.mf.common.web.vo.params.QueryRequest; import com.bfd.mf.common.web.vo.view.monitor.ESMonitorEntity; import com.bfd.nlp.common.util.object.TObjectUtils; import com.bfd.nlp.common.util.string.TStringUtils; diff --git a/cl_search_api/src/main/java/com/bfd/mf/service/UpdateService.java b/cl_search_api/src/main/java/com/bfd/mf/service/UpdateService.java index ca48c99..e540e23 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/service/UpdateService.java +++ b/cl_search_api/src/main/java/com/bfd/mf/service/UpdateService.java @@ -4,20 +4,12 @@ import com.alibaba.fastjson.JSONObject; import com.bfd.mf.common.service.common.CrudService; import com.bfd.mf.common.service.es.ClusterService; import com.bfd.mf.common.util.ESServerUtils; -import com.bfd.mf.common.util.constants.ESConstant; import com.bfd.mf.common.util.es.EsUtils; import com.bfd.mf.common.web.entity.mysql.SentimentModify; import com.bfd.mf.common.web.entity.mysql.cache.Cluster; import com.bfd.mf.common.web.repository.mysql.SentimentRepository; import com.bfd.mf.common.web.vo.params.QueryRequest; import com.bfd.mf.config.BFDApiConfig; -import org.elasticsearch.action.search.SearchRequestBuilder; -import org.elasticsearch.action.search.SearchResponse; -import org.elasticsearch.index.query.BoolQueryBuilder; -import org.elasticsearch.index.query.QueryBuilders; -import org.elasticsearch.index.query.ScriptQueryBuilder; -import org.elasticsearch.script.Script; -import org.elasticsearch.script.ScriptType; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; @@ -74,7 +66,7 @@ public class UpdateService extends CrudService 0) { diff --git a/cl_search_api/src/main/java/com/bfd/mf/service/UploadExcelService.java b/cl_search_api/src/main/java/com/bfd/mf/service/UploadExcelService.java index c371e1f..ccf8fb0 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/service/UploadExcelService.java +++ b/cl_search_api/src/main/java/com/bfd/mf/service/UploadExcelService.java @@ -1,6 +1,7 @@ package com.bfd.mf.service; import com.alibaba.fastjson.JSONObject; +import com.bfd.mf.common.util.constants.ESConstant; import com.bfd.mf.common.util.es.EsUtils2; import com.bfd.mf.common.web.entity.mysql.topic.ParseExcelTask; import com.bfd.mf.common.web.repository.mysql.topic.ParseExcelTaskRepository; @@ -45,7 +46,7 @@ public class UploadExcelService { public void insertParseExcelTask(String excelName,Map userinfo)throws Exception{ ParseExcelTask excelDetail = convertExcelTaskDetailEntity(excelName,userinfo); excelDetail = parseExcelTaskRepository.save(excelDetail); - logger.info("[UserNodeService] insertExcelTask : "+ JSONObject.toJSONString(excelDetail)); + logger.info("[UserNodeService] insertExcelTask : {}",JSONObject.toJSONString(excelDetail)); } // public boolean isExcelExist(String excelName) throws Exception{ @@ -87,8 +88,8 @@ public class UploadExcelService { if (null == excelName) { throw new IllegalArgumentException(" request node is null"); } - String user = (String) userinfo.get("user"); - String userId = (String) userinfo.get("userId"); + String user = (String) userinfo.get(ESConstant.USER); + String userId = (String) userinfo.get(ESConstant.USERID); ParseExcelTask excelTaskDetail = new ParseExcelTask(); excelTaskDetail.setExcelName(excelName.replace(".xlsx","")); excelTaskDetail.setCreateUser(user); @@ -166,6 +167,19 @@ public class UploadExcelService { } } + public boolean insertTask(String subjectId, String user, String userId, String fileRemak, String zipName) { + boolean flag = true; + try { + BigInteger id = BigInteger.valueOf(Long.valueOf(subjectId)); + String crawlDataFlag = "keyword:" + fileRemak; + parseExcelTaskRepository.insertTask(id,user,userId,fileRemak,zipName,crawlDataFlag); + }catch (Exception e){ + e.printStackTrace(); + return false; + } + return flag; + } + // public BigInteger getOneSubjectId() { // int subjectId = parseExcelTaskRepository.findOneSubjectId(); // BigInteger newSubjectId = new BigInteger((subjectId+1)+""); @@ -430,19 +444,6 @@ public class UploadExcelService { // return resultMap; // } - public boolean insertTask(String subjectId, String user, String userId, String fileRemak, String zipName) { - boolean flag = true; - try { - BigInteger id = BigInteger.valueOf(Long.valueOf(subjectId)); - String crawlDataFlag = "keyword:" + fileRemak; - parseExcelTaskRepository.insertTask(id,user,userId,fileRemak,zipName,crawlDataFlag); - }catch (Exception e){ - e.printStackTrace(); - return false; - } - return flag; - } - // public static void main(String[] args) { // List line = ReadLine.readLine(new File("E:\\100.txt")); // for (String l:line) { diff --git a/cl_search_api/src/main/resources/application.yml b/cl_search_api/src/main/resources/application.yml index 8fc57eb..8dac74d 100644 --- a/cl_search_api/src/main/resources/application.yml +++ b/cl_search_api/src/main/resources/application.yml @@ -7,6 +7,11 @@ server: http2: enabled: true +logging: + config: ../etc/logback.xml + level: + com.bfd.mf.controller: trace + spring: datasource: driver-class-name: com.mysql.jdbc.Driver @@ -49,6 +54,8 @@ bfd.api.mf: address: 172.18.1.134:9301 upper: 2018-09-01 standby: cl_index_* + + # es-mini: # name: SQ_Mini # address: 172.26.11.111:9301 diff --git a/cl_search_api/src/main/resources/log4j.properties b/cl_search_api/src/main/resources/log4j.properties deleted file mode 100644 index 8ca672a..0000000 --- a/cl_search_api/src/main/resources/log4j.properties +++ /dev/null @@ -1,18 +0,0 @@ -log4j.rootLogger=INFO, error - -###### error appender definition ####### -log4j.appender.error=org.apache.log4j.DailyRollingFileAppender -log4j.appender.error.File=logs/sdkclient_error.log -log4j.appender.error.Append=true -log4j.appender.error.DatePattern='.'yyyy-MM-dd-HH -log4j.appender.error.layout=org.apache.log4j.PatternLayout -log4j.appender.error.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss,SSS} %-5p [%c] %m%n - -#error only -log4j.appender.error.filter.F1=org.apache.log4j.varia.LevelRangeFilter -log4j.appender.error.filter.F1.LevelMin=ERROR -log4j.appender.error.filter.F1.LevelMax=ERROR -# -log4j.appender.error.filter.F2=org.apache.log4j.varia.LevelMatchFilter -log4j.appender.error.filter.F2.levelToMatch=WARN -log4j.appender.error.filter.F2.acceptOnMatch=false \ No newline at end of file diff --git a/cl_search_api/src/main/resources/log4j2.properties b/cl_search_api/src/main/resources/log4j2.properties deleted file mode 100644 index 07964d7..0000000 --- a/cl_search_api/src/main/resources/log4j2.properties +++ /dev/null @@ -1,32 +0,0 @@ -#### ����### -#log4j.rootLogger = stdout,D,E,I -# -#### �����Ϣ������̧ ### -#log4j.appender.stdout = org.apache.log4j.ConsoleAppender -#log4j.appender.stdout.Target = System.out -#log4j.appender.stdout.layout = org.apache.log4j.PatternLayout -#log4j.appender.stdout.layout.ConversionPattern = [%-5p] %d{yyyy-MM-dd HH:mm:ss,SSS} method:%l%n%m%n -# -#### ���INFO �������ϵ���־�ļ����� ### -#log4j.appender.I = org.apache.log4j.DailyRollingFileAppender -#log4j.appender.I.File = log_info.log -#log4j.appender.I.Append = true -#log4j.appender.I.Threshold = INFO -#log4j.appender.I.layout = org.apache.log4j.PatternLayout -#log4j.appender.I.layout.ConversionPattern = %-d{yyyy-MM-dd HH:mm:ss} [ %t:%r ] - [ %p ] %m%n -# -#### ���DEBUG �������ϵ���־�ļ����� ### -#log4j.appender.D = org.apache.log4j.DailyRollingFileAppender -#log4j.appender.D.File = log_debug.log -#log4j.appender.D.Append = true -#log4j.appender.D.Threshold = INFO -#log4j.appender.D.layout = org.apache.log4j.PatternLayout -#log4j.appender.D.layout.ConversionPattern = %-d{yyyy-MM-dd HH:mm:ss} [ %t:%r ] - [ %p ] %m%n -# -#### ���ERROR �������ϵ���־�ļ����� ### -#log4j.appender.E = org.apache.log4j.DailyRollingFileAppender -#log4j.appender.E.File = log_error.log -#log4j.appender.E.Append = true -#log4j.appender.E.Threshold = ERROR -#log4j.appender.E.layout = org.apache.log4j.PatternLayout -#log4j.appender.E.layout.ConversionPattern = %-d{yyyy-MM-dd HH:mm:ss} [ %t:%r ] - [ %p ] %m%n