Browse Source

release-3.1.4(添加了日志管理,修改了一些日志输出语句)

release-1.0
杜静 5 years ago
parent
commit
f1307cbec6
  1. 6
      cl_query_data_job/src/main/java/com/bfd/mf/job/domain/repository/SubjectCountRepository.java
  2. 14
      cl_query_data_job/src/main/java/com/bfd/mf/job/domain/repository/TaskRepository.java
  3. 2
      cl_query_data_job/src/main/java/com/bfd/mf/job/download/DownLoadFile.java
  4. 22
      cl_query_data_job/src/main/java/com/bfd/mf/job/service/alarm/AlarmService.java
  5. 13
      cl_query_data_job/src/main/java/com/bfd/mf/job/service/es/EsQueryMiniService.java
  6. 7
      cl_query_data_job/src/main/java/com/bfd/mf/job/service/query/QueryService.java
  7. 29
      cl_query_data_job/src/main/java/com/bfd/mf/job/service/statistics/StatisticsService.java
  8. 206
      cl_query_data_job/src/main/java/com/bfd/mf/job/service/statistics/TotalCountService.java
  9. 12
      cl_query_data_job/src/main/java/com/bfd/mf/job/util/EMailUtils.java
  10. 7
      cl_query_data_job/src/main/java/com/bfd/mf/job/worker/AlarmProducer.java
  11. 105
      cl_query_data_job/src/main/resources/application-0827.yml
  12. 17
      cl_query_data_job/src/main/resources/application-113.yml
  13. 23
      cl_query_data_job/src/main/resources/application.yml
  14. 4
      cl_search_api/cl_search_api.iml
  15. 14
      cl_search_api/pom.xml
  16. 6
      cl_search_api/src/main/java/com/bfd/mf/common/service/cache/TopicQueryService.java
  17. 2
      cl_search_api/src/main/java/com/bfd/mf/common/service/es/EsQueryAuthorCountService.java
  18. 103
      cl_search_api/src/main/java/com/bfd/mf/common/service/es/EsQueryServiceForSQMini.java
  19. 3
      cl_search_api/src/main/java/com/bfd/mf/common/service/es/SubjectQueryDataService.java
  20. 64
      cl_search_api/src/main/java/com/bfd/mf/common/util/OperatorUtil.java
  21. 18
      cl_search_api/src/main/java/com/bfd/mf/common/util/ZipUtils.java
  22. 26
      cl_search_api/src/main/java/com/bfd/mf/common/util/constants/ESConstant.java
  23. 8
      cl_search_api/src/main/java/com/bfd/mf/common/util/enums/RTCodeEnum.java
  24. 22
      cl_search_api/src/main/java/com/bfd/mf/common/util/es/EsUtils.java
  25. 2
      cl_search_api/src/main/java/com/bfd/mf/common/util/es/MonitorUtils.java
  26. 12
      cl_search_api/src/main/java/com/bfd/mf/common/util/slice/SliceScrollUtil.java
  27. 360
      cl_search_api/src/main/java/com/bfd/mf/common/util/spread/SpreadServiceUtil.java
  28. 1042
      cl_search_api/src/main/java/com/bfd/mf/common/util/subject/ExpNodeUtil.java
  29. 911
      cl_search_api/src/main/java/com/bfd/mf/common/util/subject/ExpressionParser.java
  30. 669
      cl_search_api/src/main/java/com/bfd/mf/common/util/subject/QueryBuilderUtil.java
  31. 3
      cl_search_api/src/main/java/com/bfd/mf/common/web/vo/params/QueryRequest.java
  32. 37
      cl_search_api/src/main/java/com/bfd/mf/controller/SearchAnalysisController.java
  33. 48
      cl_search_api/src/main/java/com/bfd/mf/controller/SearchAuthorController.java
  34. 46
      cl_search_api/src/main/java/com/bfd/mf/controller/SearchDataController.java
  35. 123
      cl_search_api/src/main/java/com/bfd/mf/controller/UploadExcelController.java
  36. 31
      cl_search_api/src/main/java/com/bfd/mf/service/SearchAnalysisService.java
  37. 106
      cl_search_api/src/main/java/com/bfd/mf/service/SearchAuthorService.java
  38. 128
      cl_search_api/src/main/java/com/bfd/mf/service/SearchDataService.java
  39. 3
      cl_search_api/src/main/java/com/bfd/mf/service/SearchKeywordsCouldService.java
  40. 10
      cl_search_api/src/main/java/com/bfd/mf/service/UpdateService.java
  41. 33
      cl_search_api/src/main/java/com/bfd/mf/service/UploadExcelService.java
  42. 7
      cl_search_api/src/main/resources/application.yml
  43. 18
      cl_search_api/src/main/resources/log4j.properties
  44. 32
      cl_search_api/src/main/resources/log4j2.properties

6
cl_query_data_job/src/main/java/com/bfd/mf/job/domain/repository/SubjectCountRepository.java

@ -45,7 +45,8 @@ public interface SubjectCountRepository extends CrudRepository<SubjectCount, Lon
"url_type_total_count =?4,keyword_type_total_count=?5,account_type_total_count=?6," +
"url_type_count=?7,keyword_type_count=?8,account_type_count=?9," +
"social_total_count=?10,social_count=?11,bbs_total_count=?12,bbs_count=?13,blog_total_count=?14,blog_count =?15,news_total_count=?16,news_count=?17," +
"search_total_count=?18,search_count=?19,item_total_count=?20,item_count=?21,video_total_count=?22,video_count=?23,life_total_count=?24,life_count=?25 " +
"search_total_count=?18,search_count=?19,item_total_count=?20,item_count=?21,video_total_count=?22,video_count=?23,life_total_count=?24,life_count=?25," +
"has_image_count=?26,has_video_count=?27,has_file_count=?28,has_text_count=?29 " +
"where id=?1", nativeQuery = true)
void updateBySubjectId(BigInteger id,
BigInteger sumToday, BigInteger sum,
@ -54,7 +55,8 @@ public interface SubjectCountRepository extends CrudRepository<SubjectCount, Lon
BigInteger social_total_count, BigInteger social_count, BigInteger bbs_total_count, BigInteger bbs_count,
BigInteger blog_total_count, BigInteger blog_count, BigInteger news_total_count, BigInteger news_count,
BigInteger search_total_count, BigInteger search_count, BigInteger item_total_count, BigInteger item_count,
BigInteger video_total_count, BigInteger video_count, BigInteger life_total_count, BigInteger life_count);
BigInteger video_total_count, BigInteger video_count, BigInteger life_total_count, BigInteger life_count,
BigInteger has_image_count,BigInteger has_video_count, BigInteger has_file_count,BigInteger has_text_count);
@Query(value = "SELECT update_time FROM cl_subject_count WHERE subject_id = ?1 AND create_time = ?2", nativeQuery = true)

14
cl_query_data_job/src/main/java/com/bfd/mf/job/domain/repository/TaskRepository.java

@ -12,13 +12,15 @@ import java.util.Map;
public interface TaskRepository extends CrudRepository<Task, Long> {
@Query(value = "SELECT ct.id,ct.app_id,ct.subject_id,ct.external_id,cs.site_type,ct.task_type,ct.cid,ct.crawl_status,ct.crawl_start_time,ct.crawl_end_time,ct.crawl_data_flag,ct.data_total,ct.today_data_total,ct.cache_num,ct.update_time,ct.del,ct.crawl_content_key FROM cl_task ct JOIN intelligent_crawl.cl_site cs ON ct.cid = cs.cid WHERE ct.task_type <> 3 AND ct.crawl_status = 1 AND ct.cache_num = 0 AND ct.data_total = 0 AND ct.del = 0 AND ct.subject_id in (SELECT id from cl_subject csu WHERE csu.del =0) ORDER BY ct.id DESC;",nativeQuery = true)
@Query(value = "SELECT ct.id,ct.app_id,ct.subject_id,ct.external_id,cs.site_type,ct.task_type,ct.cid,ct.crawl_status,ct.crawl_start_time,ct.crawl_end_time,ct.crawl_data_flag,ct.data_total,ct.today_data_total,ct.cache_num,ct.update_time,ct.del,ct.crawl_content_key FROM cl_task ct JOIN intelligent_crawl.cl_site cs ON ct.cid = cs.cid WHERE ct.task_type <> 3 AND ct.crawl_status = 1 AND ct.cache_num = 3 AND app_id = '61qb' AND subject_id = 12094 AND ct.data_total = 0 AND ct.del = 0 AND ct.subject_id in (SELECT id from cl_subject csu WHERE csu.del =0) ORDER BY ct.id DESC;",nativeQuery = true)
// @Query(value = "SELECT ct.id,ct.app_id,ct.subject_id,ct.external_id,cs.site_type,ct.task_type,ct.cid,ct.crawl_status,ct.crawl_start_time,ct.crawl_end_time,ct.crawl_data_flag,ct.data_total,ct.today_data_total,ct.cache_num,ct.update_time,ct.del,ct.crawl_content_key FROM cl_task ct JOIN intelligent_crawl.cl_site cs ON ct.cid = cs.cid WHERE ct.subject_id = 12094 AND ct.task_type <> 3 AND ct.crawl_status = 1 AND ct.cache_num = 0 AND ct.data_total = 0 AND ct.del = 0 AND ct.subject_id in (SELECT id from cl_subject csu WHERE csu.del =0) ORDER BY ct.id DESC ;",nativeQuery = true)
List<Task> findAllNewTask();
// 需要统计的任务的查询条件 1 状态为 1 OR 02状态为3且任务完成时间再2天前的
@Query(value = "SELECT ct.id,ct.app_id,ct.subject_id,ct.external_id,cs.site_type, ct.task_type,ct.cid,ct.crawl_status,ct.crawl_start_time,ct.crawl_end_time,ct.crawl_data_flag,ct.data_total,ct.today_data_total,ct.cache_num,ct.update_time,ct.del,ct.crawl_content_key FROM `cl_task` ct JOIN intelligent_crawl.cl_site cs ON ct.cid = cs.cid WHERE ct.del = 0 AND ((ct.crawl_status = 1 OR ct.crawl_status = 0) OR (ct.crawl_status = 3 AND ct.end_time > date_sub(curdate(),interval 2 day))); ",nativeQuery = true)
// @Query(value = "SELECT ct.id,ct.app_id,ct.subject_id,ct.external_id,cs.site_type, ct.task_type,ct.cid,ct.crawl_status,ct.crawl_start_time,ct.crawl_end_time,ct.crawl_data_flag,ct.data_total,ct.today_data_total,ct.cache_num,ct.update_time,ct.del,ct.crawl_content_key FROM `cl_task` ct JOIN intelligent_crawl.cl_site cs ON ct.cid = cs.cid WHERE ct.del = 0 AND ct.subject_id = 12505 ; ",nativeQuery = true)
// @Query(value = "SELECT ct.id,ct.app_id,ct.subject_id,ct.external_id,cs.site_type, ct.task_type,ct.cid,ct.crawl_status,ct.crawl_start_time,ct.crawl_end_time,ct.crawl_data_flag,ct.data_total,ct.today_data_total,ct.cache_num,ct.update_time,ct.del,ct.crawl_content_key FROM `cl_task` ct JOIN intelligent_crawl.cl_site cs ON ct.cid = cs.cid WHERE ct.del = 0 ; ",nativeQuery = true)
//@Query(value = "SELECT ct.id,ct.app_id,ct.subject_id,ct.external_id,cs.site_type, ct.task_type,ct.cid,ct.crawl_status,ct.crawl_start_time,ct.crawl_end_time,ct.crawl_data_flag,ct.data_total,ct.today_data_total,ct.cache_num,ct.update_time,ct.del,ct.crawl_content_key FROM `cl_task` ct JOIN intelligent_crawl.cl_site cs ON ct.cid = cs.cid WHERE ct.del = 0 ; ",nativeQuery = true)
List<Task> findAllBydel0();
@Query(value = "SELECT sum(data_total) FROM cl_task ct JOIN intelligent_crawl.cl_site cs ON ct.cid=cs.cid WHERE ct.del =0 AND ct.subject_id = ?1 AND cs.site_type = ?2",nativeQuery = true)
@ -37,12 +39,15 @@ public interface TaskRepository extends CrudRepository<Task, Long> {
@Query(value = " SELECT SUM(data_total) FROM cl_task WHERE end_time >?1 AND end_time <?2 AND crawl_status = 3 AND task_type <>3 ",nativeQuery = true)
Long findTodayDataTotal(String taskStartTime ,String taskEndTime);
@Query(value = "SELECT sum(has_image_total) image,sum(has_video_total) video,sum(has_file_total) file,SUM(has_text_total) text FROM `cl_task` WHERE subject_id = ?1 ;",nativeQuery = true)
Map<String,Long> findTotalByHas(BigInteger subjectId);
/**
* 更新每个任务 拉数据次数
*/
@Modifying
@Transactional(rollbackFor = Exception.class)
@Query(value = "update cl_task set cache_num=?1 where id=?2", nativeQuery = true)
@Query(value = "update cl_task set cache_num=?1 ,update_time = now() where id=?2", nativeQuery = true)
Integer updateStatus(int cache_num,long id);
/**
@ -63,6 +68,11 @@ public interface TaskRepository extends CrudRepository<Task, Long> {
@Modifying
@Transactional(rollbackFor = Exception.class)
@Query(value = "update cl_task set data_total =?2 , today_data_total =?3 ,has_image_total = ?4,has_video_total = ?5, has_file_total = ?6,has_text_total = ?7 where id =?1 ", nativeQuery = true)
void updateTaskCountAll(Long id, Long totalCount, Long todayCount,Long imageCount,Long videoCount,Long fileCount,Long textCount);
@Modifying
@Transactional(rollbackFor = Exception.class)
@Query(value = "update cl_task set crawl_status =?4 where id =?1 ", nativeQuery = true)
void updateCrawlStatus(long taskId);

2
cl_query_data_job/src/main/java/com/bfd/mf/job/download/DownLoadFile.java

@ -76,7 +76,6 @@ public class DownLoadFile {
}
public static String imagesize(String getUrl) throws IOException{
String realUrl = "";Integer size;
String realresult="";
try{
InputStream murl = new URL(getUrl).openStream();
@ -84,7 +83,6 @@ public class DownLoadFile {
int srcWidth = sourceImg .getWidth(); // 源图宽度
int srcHeight = sourceImg .getHeight(); // 源图高度
realresult=Integer.toString(srcWidth)+"×"+ Integer.toString(srcHeight);
}catch (Exception e){
e.printStackTrace();
}

22
cl_query_data_job/src/main/java/com/bfd/mf/job/service/alarm/AlarmService.java

@ -4,7 +4,6 @@ import com.alibaba.fastjson.JSONObject;
import com.bfd.mf.job.config.AppConfig;
import com.bfd.mf.job.config.ESConstants;
import com.bfd.mf.job.domain.entity.EmailGroup;
import com.bfd.mf.job.domain.entity.TaskCount;
import com.bfd.mf.job.domain.repository.EmailGroupRepository;
import com.bfd.mf.job.domain.repository.ServiceLoadRepository;
import com.bfd.mf.job.domain.repository.TaskCountRepository;
@ -71,7 +70,7 @@ public class AlarmService {
// System.out.println(index); //logstash-2021.05.20 logstash-2021.05.21
String startTime = DateUtil.getDateTime(System.currentTimeMillis());
String endTime = DateUtil.getDateTime(System.currentTimeMillis() - 60 * 30 * 1000);
String endTime = DateUtil.getDateTime(System.currentTimeMillis() - 480 * 60 * 1000);
String type = "datasave";
QueryBuilder queryBuilder = getQueryBuilder(startTime,endTime,type);
@ -106,7 +105,7 @@ public class AlarmService {
}
// System.out.println(cid);
if(null == cid){
System.out.println(data);
// System.out.println(data);
}
if(errorCid.containsKey(cid)){
Integer errorNum = errorCid.get(cid);
@ -114,14 +113,13 @@ public class AlarmService {
}else{
errorCid.put(cid,1);
}
}
} catch (Exception e) {
e.printStackTrace();
}
});
System.out.println(JSONObject.toJSONString(errorCid));
// System.out.println(JSONObject.toJSONString(errorCid));
// 遍历统计的map value> 10 的报警
for(Map.Entry<String, Integer> entry : errorCid.entrySet()){
@ -170,7 +168,7 @@ public class AlarmService {
*/
Integer alarm_tag = 3;
Integer alarm_reason = 1;
String alarm_message = "[chenrui.li]这个站点解析失败次数为:"+errorNum;
String alarm_message = "这个站点解析失败次数为:"+errorNum;
String alarm_task_url = ""; // 无法确认是哪个任务
String alarm_task_content = ""; // 无法确认是哪个任务
String alarm_cid = cid;
@ -182,7 +180,12 @@ public class AlarmService {
List<String> emailList = new ArrayList<>();
emailList.add(alarm_handler);
// 根据站点查询站点的处理人
String email_addr = getEmailByCid(cid);
System.out.println("**************** " + cid);
if(null != cid) {
String email_addr = getEmailByCid(cid);
}else{
System.out.println("没查到邮箱,不报警");
}
String opinion = "";
Integer status = 2;
String create_time = DateUtil.getDateTime(new Date().getTime());
@ -203,6 +206,11 @@ public class AlarmService {
EMailUtils.getInstance().sendEmail(6, siteMessage, emailList,"30");
}
/**
* 根据数据库中的站点配置用站点的cid 获取这个站点负责人的邮箱地址
* @param cid
* @return
*/
private String getEmailByCid(String cid) {
List<EmailGroup> emails = emailGroupRepository.getEmailGroupsByCid(cid);
String alarmEmail = "";

13
cl_query_data_job/src/main/java/com/bfd/mf/job/service/es/EsQueryMiniService.java

@ -161,21 +161,28 @@ public class EsQueryMiniService {
TermQueryBuilder pgcTermQueryBuilder = QueryBuilders.termQuery(ESConstants.PGC,1);
TermQueryBuilder egcTermQueryBuilder = QueryBuilders.termQuery(ESConstants.EGC,1);
TermQueryBuilder ugcTermQueryBuilder = QueryBuilders.termQuery(ESConstants.UGC,1);
TermQueryBuilder textTermQueryBuilder = QueryBuilders.termQuery(ESConstants.ISDOWNLOAD,false);
qb = getQueryBuilder(cid,crawlDataFlag,crawlStartTime,crawlEndTime);
qb.must(pgcTermQueryBuilder);
logger.info("QB3 查询有图片的任务数: indexName: {}. taskId : {}.{\"query\": {}}.", indexName, taskId, qb.toString().replace("\n", "").replace("\r", "").replace(" ", ""));
Long imageCount = EsUtils.queryCount(clusterName, indexName, qb);
countMap.put("todayCount", todayCount);
countMap.put(ESConstants.IMAGECOUNT, imageCount);
qb = getQueryBuilder(cid,crawlDataFlag,crawlStartTime,crawlEndTime);
qb.must(egcTermQueryBuilder);
logger.info("QB4 查询有视频的任务数: indexName: {}. taskId : {}.{\"query\": {}}.", indexName, taskId, qb.toString().replace("\n", "").replace("\r", "").replace(" ", ""));
Long videoCount = EsUtils.queryCount(clusterName, indexName, qb);
countMap.put("todayCount", todayCount);
countMap.put(ESConstants.VIDEOCOUNT, videoCount);
qb = getQueryBuilder(cid,crawlDataFlag,crawlStartTime,crawlEndTime);
qb.must(ugcTermQueryBuilder);
logger.info("QB5 查询有文件的任务数: indexName: {}. taskId : {}.{\"query\": {}}.", indexName, taskId, qb.toString().replace("\n", "").replace("\r", "").replace(" ", ""));
Long fileCount = EsUtils.queryCount(clusterName, indexName, qb);
countMap.put("todayCount", todayCount);
countMap.put(ESConstants.FILECOUNT, fileCount);
qb = getQueryBuilder(cid,crawlDataFlag,crawlStartTime,crawlEndTime);
qb.must(textTermQueryBuilder);
logger.info("QB6 查询纯文本的任务数: indexName: {}. taskId : {}.{\"query\": {}}.", indexName, taskId, qb.toString().replace("\n", "").replace("\r", "").replace(" ", ""));
Long textCount = EsUtils.queryCount(clusterName, indexName, qb);
countMap.put(ESConstants.TEXTCOUNT, textCount);
logger.info("含图片的数据量:" + imageCount + " ; 含视频的数据量:" + videoCount + " ; 含文件的数据量:" + fileCount + " ; 纯文本的数据量:" + textCount);
}
}
}

7
cl_query_data_job/src/main/java/com/bfd/mf/job/service/query/QueryService.java

@ -89,7 +89,7 @@ public class QueryService {
List<Task> taskList2 = taskRepository.findAllNewTask();
//taskList2.addAll(taskList1);
for (Task task : taskList2) {
// LOGGER.info("Executing task:{}.", JSON.toJSONString(task));
LOGGER.info("Executing task:{}.", JSON.toJSONString(task));
Long totalSegment = 1L;//(task.getDateEnd() - task.getDateStart()) / PERIOD_MILLS; // 3600000
Long segment = 1L;
Double progressFactor = 1.0 / totalSegment;
@ -142,7 +142,8 @@ public class QueryService {
String crawlContentKey = task.getCrawlContentKey(); // 要拉取的字段主要看是否需要拉评论
// BigInteger subjectId = task.getSubjectId();
// Subject subject = subjectRepository.getSubjectBySubjectId(subjectId.longValue());
String indexName = "cl_major_" + task.getSubjectId(); // 索引名称
// String indexName = "cl_major_" + task.getSubjectId(); // 索引名称
String indexName = "cl_major_61qb_12094";
Integer cacheNum = task.getCacheNum(); // 拉取数据的次数
// 当拉数据的次数 大于1 次的时候再拉数据的开始时间就不用是任务设置的开始时间了同时可以再加个采集时间范围限制一下确保拉的数据都是任务添加之后才采集的就行
QueryBuilder queryBuilder; // 根据条件组装查询用具
@ -438,7 +439,7 @@ public class QueryService {
}
// 当三个 pathSize 都为 0 的时候表示三个下载结果都为空为了保持页面和实际结果的统一这块改成 false
if(filePathSize.size() == 0 && videoPathSize.size() == 0 && imagePathSize.size() == 0){
data.put(ESConstants.ISDOWNLOAD,false);
data.put(ESConstants.ISDOWNLOAD,"false");
}
return data;
}

29
cl_query_data_job/src/main/java/com/bfd/mf/job/service/statistics/StatisticsService.java

@ -15,6 +15,7 @@ import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import javax.annotation.PostConstruct;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.util.HashMap;
import java.util.List;
@ -135,7 +136,7 @@ public class StatisticsService {
break;
}
}
// 按采集方式统计数据量
// 按采集方式统计数据量 account url keyword
Map<String,Long> subjectCrawlDatFlagMap = new HashMap<>();
Map<String,Long> subjectCrawlDataFlagTodayMap = new HashMap<>();
long siteCount = 0L;
@ -170,7 +171,13 @@ public class StatisticsService {
break;
}
}
totalCountService.updateSubjectCount(subjectId,subjectChannelMap,subjectChannelTodayMap,subjectCrawlDatFlagMap,subjectCrawlDataFlagTodayMap);
// 按附件统计
Map<String, Long> hasTotalMap = taskRepository.findTotalByHas(subjectId);
// totalCountService.updateSubjectCount(subjectId,subjectChannelMap,subjectChannelTodayMap,subjectCrawlDatFlagMap,subjectCrawlDataFlagTodayMap);
totalCountService.updateSubjectCount(subjectId,subjectChannelMap,subjectChannelTodayMap,subjectCrawlDatFlagMap,subjectCrawlDataFlagTodayMap,hasTotalMap);
}catch (Exception e){
result = false;
LOGGER.error("[StatisticsService] statisticsSubject ERROR... subjectId : " + subjectId + "error : " );
@ -193,7 +200,8 @@ public class StatisticsService {
// 统计这个专题下每种采集类型的增量
Map<String,Long> subjectCrawlDataFlagTodayMap = esQueryMiniService.getSubjectCrawlDataFlagTodayStatistics(miniName,indexName);
// 查入或修改表
totalCountService.updateSubjectCount(subjectId,subjectChannelMap,subjectChannelTodayMap,subjectCrawlDatFlagMap,subjectCrawlDataFlagTodayMap);
Map<String, Long> hasTotalMap = new HashMap<>();
totalCountService.updateSubjectCount(subjectId,subjectChannelMap,subjectChannelTodayMap,subjectCrawlDatFlagMap,subjectCrawlDataFlagTodayMap,hasTotalMap);
totalCountService.updateResultDetil(subjectId,subjectChannelMap);
}catch (Exception e){
result = false;
@ -214,7 +222,8 @@ public class StatisticsService {
Map<String,Long> crawlDataFlagMap = esQueryNormalService.getCrawlDataFlagStatistics(normalName);
Map<String,Long> crawlDataFlagTodayMap = esQueryNormalService.getCrawlDataFlagTodayStatistics(normalName);
BigInteger subjectId = new BigInteger("0");
totalCountService.updateSubjectCount(subjectId,channelMap,channelTodayMap,crawlDataFlagMap,crawlDataFlagTodayMap);
Map<String, Long> hasTotalMap = new HashMap<>();
totalCountService.updateSubjectCount(subjectId,channelMap,channelTodayMap,crawlDataFlagMap,crawlDataFlagTodayMap,hasTotalMap);
}catch (Exception e){
result = false;
LOGGER.error("[StatisticsService] statisticsTotal ERROR... ");
@ -254,13 +263,15 @@ public class StatisticsService {
if(countMap.containsKey(ESConstants.TOTALCOUNT) && countMap.containsKey(ESConstants.TODAYCOUNT)) {
totalCount = countMap.get(ESConstants.TOTALCOUNT);
todayCount = countMap.get(ESConstants.TODAYCOUNT);
System.out.println("******* " + totalCount);
// imageCount = countMap.get(ESConstants.IMAGECOUNT);
// videoCount = countMap.get(ESConstants.VIDEOCOUNT);
// fileCount = countMap.get(ESConstants.FILECOUNT);
// textCount = countMap.get(ESConstants.TEXTCOUNT);
System.out.println("totalCount : " + totalCount);
imageCount = countMap.get(ESConstants.IMAGECOUNT);
videoCount = countMap.get(ESConstants.VIDEOCOUNT);
fileCount = countMap.get(ESConstants.FILECOUNT);
textCount = countMap.get(ESConstants.TEXTCOUNT);
}
taskRepository.updateTaskCount(taskId,totalCount,todayCount);
taskRepository.updateTaskCountAll(taskId,totalCount,todayCount,imageCount,videoCount,fileCount,textCount);
}
}

206
cl_query_data_job/src/main/java/com/bfd/mf/job/service/statistics/TotalCountService.java

@ -12,6 +12,7 @@ import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.text.SimpleDateFormat;
import java.util.*;
@ -31,7 +32,8 @@ public class TotalCountService {
Map<String, Long> subjectChannelMap,
Map<String, Long> subjectChannelTodayMap,
Map<String, Long> subjectCrawlDataFlagMap,
Map<String, Long> subjectCrawlDataFlagTodayMap) {
Map<String, Long> subjectCrawlDataFlagTodayMap,
Map<String, Long> hasMap) {
try {
List<String> channels = getAllChannels();
Collection<Long> value = subjectChannelMap.values();
@ -77,6 +79,11 @@ public class TotalCountService {
BigInteger account_type_total_count = new BigInteger("0");
BigInteger account_type_count = new BigInteger("0");
BigInteger has_image_count = new BigInteger("0");
BigInteger has_video_count = new BigInteger("0");
BigInteger has_file_count = new BigInteger("0");
BigInteger has_text_count = new BigInteger("0");
if (subjectCrawlDataFlagMap.size() > 0) {
if(null != subjectCrawlDataFlagMap.get(ESConstants.URL)) {
url_type_total_count = BigInteger.valueOf(subjectCrawlDataFlagMap.get(ESConstants.URL));
@ -191,10 +198,18 @@ public class TotalCountService {
id = subjectCountRepository.findIdBySubjectId(subjectId, today);
}
if (null != id && !id.equals("")) {
if(hasMap.containsKey("image")) {
has_image_count = BigInteger.valueOf(Long.valueOf(hasMap.get("image") + ""));
has_video_count = BigInteger.valueOf(Long.valueOf(hasMap.get("video") + ""));
has_file_count = BigInteger.valueOf(Long.valueOf(hasMap.get("file") + ""));
has_text_count = BigInteger.valueOf(Long.valueOf(hasMap.get("text") + ""));
}
subjectCountRepository.updateBySubjectId(id, BigInteger.valueOf(sumToday), BigInteger.valueOf(sum),
url_type_total_count, keyword_type_total_count, account_type_total_count, url_type_count, keyword_type_count, account_type_count,
social_total_count, social_count, bbs_total_count, bbs_count, blog_total_count, blog_count, news_total_count, news_count,
search_total_count, search_count, item_total_count, item_count, video_total_count, video_count, life_total_count, life_count);
search_total_count, search_count, item_total_count, item_count, video_total_count, video_count, life_total_count, life_count,
has_image_count,has_video_count,has_file_count,has_text_count);
} else {
// 先获取这个专题昨天的 update_time ,然后添加到 UpdateTime 字段中
// 获取昨天日期
@ -214,6 +229,193 @@ public class TotalCountService {
LOGGER.debug("[TotalCountService] updateSubjectCount finish ... subjectId = " + subjectId);
}
// public void updateSubjectCount(BigInteger subjectId,
// Map<String, Long> subjectChannelMap,
// Map<String, Long> subjectChannelTodayMap,
// Map<String, Long> subjectCrawlDataFlagMap,
// Map<String, Long> subjectCrawlDataFlagTodayMap) {
// try {
// List<String> channels = getAllChannels();
// Collection<Long> value = subjectChannelMap.values();
// SubjectCount subjectCount = new SubjectCount();
// Long sum = 0L;
// for (Long v : value) {
// if(null != v) {
// sum = sum + v;
// }
// }
// Long sumToday = 0L;
// for (Long v : subjectChannelTodayMap.values()) {
// if(null != v) {
// sumToday = sumToday + v;
// }
// }
// subjectCount.setTodayTotalCount(BigInteger.valueOf(sumToday));
// subjectCount.setTotalCount(BigInteger.valueOf(sum));
// Date date = new Date();
// subjectCount.setCreateTime(date);
//
// BigInteger social_total_count = new BigInteger("0");
// BigInteger social_count = new BigInteger("0");
// BigInteger bbs_total_count = new BigInteger("0");
// BigInteger bbs_count = new BigInteger("0");
// BigInteger blog_total_count = new BigInteger("0");
// BigInteger blog_count = new BigInteger("0");
// BigInteger news_total_count = new BigInteger("0");
// BigInteger news_count = new BigInteger("0");
// BigInteger search_total_count = new BigInteger("0");
// BigInteger search_count = new BigInteger("0");
// BigInteger item_total_count = new BigInteger("0");
// BigInteger item_count = new BigInteger("0");
// BigInteger video_total_count = new BigInteger("0");
// BigInteger video_count = new BigInteger("0");
// BigInteger life_total_count = new BigInteger("0");
// BigInteger life_count = new BigInteger("0");
//
// BigInteger url_type_total_count = new BigInteger("0");
// BigInteger url_type_count = new BigInteger("0");
// BigInteger keyword_type_total_count = new BigInteger("0");
// BigInteger keyword_type_count = new BigInteger("0");
// BigInteger account_type_total_count = new BigInteger("0");
// BigInteger account_type_count = new BigInteger("0");
//
// if (subjectCrawlDataFlagMap.size() > 0) {
// if(null != subjectCrawlDataFlagMap.get(ESConstants.URL)) {
// url_type_total_count = BigInteger.valueOf(subjectCrawlDataFlagMap.get(ESConstants.URL));
// }
// if(null != subjectCrawlDataFlagMap.get(ESConstants.KEYWORD)) {
// keyword_type_total_count = BigInteger.valueOf(subjectCrawlDataFlagMap.get(ESConstants.KEYWORD));
// }
// if(null != subjectCrawlDataFlagMap.get(ESConstants.ACCOUNT)) {
// account_type_total_count = BigInteger.valueOf(subjectCrawlDataFlagMap.get(ESConstants.ACCOUNT));
// }
// if(null != subjectCrawlDataFlagMap.get("upload")){
// System.out.println("上传的任务的数据量对应的专题 " + subjectId +" == "+ subjectCrawlDataFlagMap.get("upload"));
// if(subjectCrawlDataFlagMap.get("upload").compareTo(0L) >0)
// sum = subjectCrawlDataFlagMap.get("upload");
// }
// }
// subjectCount.setUrlTypeTotalCount(url_type_total_count);
// subjectCount.setKeywordTypeTotalCount(keyword_type_total_count);
// subjectCount.setAccountTypeTotalCount(account_type_total_count);
//
// if (subjectCrawlDataFlagTodayMap.size() > 0) {
// if(null != subjectCrawlDataFlagTodayMap.get(ESConstants.URL)) {
// url_type_count = BigInteger.valueOf(subjectCrawlDataFlagTodayMap.get(ESConstants.URL));
// }
// if(null != subjectCrawlDataFlagTodayMap.get(ESConstants.KEYWORD)) {
// keyword_type_count = BigInteger.valueOf(subjectCrawlDataFlagTodayMap.get(ESConstants.KEYWORD));
// }
// if(null != subjectCrawlDataFlagTodayMap.get(ESConstants.ACCOUNT)) {
// account_type_count = BigInteger.valueOf(subjectCrawlDataFlagTodayMap.get(ESConstants.ACCOUNT));
// }
// }
// subjectCount.setUrlTypeCount(url_type_count);
// subjectCount.setKeywordTypeCount(keyword_type_count);
// subjectCount.setAccountTypeCount(account_type_count);
//
//
// for (String channel : channels) {
// if (subjectChannelMap.containsKey(channel) && channel.equals(ESConstants.SOCIAL)) {
// social_total_count = BigInteger.valueOf(subjectChannelMap.get(ESConstants.SOCIAL));
// }
// if (subjectChannelTodayMap.containsKey(channel) && channel.equals(ESConstants.SOCIAL)) {
// social_count = BigInteger.valueOf(subjectChannelTodayMap.get(ESConstants.SOCIAL));
// }
// if (subjectChannelMap.containsKey(channel) && channel.equals(ESConstants.BBS)) {
// bbs_total_count = BigInteger.valueOf(subjectChannelMap.get(ESConstants.BBS));
// }
// if (subjectChannelTodayMap.containsKey(channel) && channel.equals(ESConstants.BBS)) {
// bbs_count = BigInteger.valueOf(subjectChannelTodayMap.get(ESConstants.BBS));
// }
// if (subjectChannelMap.containsKey(channel) && channel.equals(ESConstants.BLOG)) {
// blog_total_count = BigInteger.valueOf(subjectChannelMap.get(ESConstants.BLOG));
// }
// if (subjectChannelTodayMap.containsKey(channel) && channel.equals(ESConstants.BLOG)) {
// blog_count = BigInteger.valueOf(subjectChannelTodayMap.get(ESConstants.BLOG));
// }
// if (subjectChannelMap.containsKey(channel) && channel.equals(ESConstants.NEWS)) {
// news_total_count = BigInteger.valueOf(subjectChannelMap.get(ESConstants.NEWS));
// }
// if (subjectChannelTodayMap.containsKey(channel) && channel.equals(ESConstants.NEWS)) {
// news_count = BigInteger.valueOf(subjectChannelTodayMap.get(ESConstants.NEWS));
// }
// if (subjectChannelMap.containsKey(channel) && channel.equals(ESConstants.SEARCH)) {
// search_total_count = BigInteger.valueOf(subjectChannelMap.get(ESConstants.SEARCH));
// }
// if (subjectChannelTodayMap.containsKey(channel) && channel.equals(ESConstants.SEARCH)) {
// search_count = BigInteger.valueOf(subjectChannelTodayMap.get(ESConstants.SEARCH));
// }
// if (subjectChannelMap.containsKey(channel) && channel.equals(ESConstants.ITEM)) {
// item_total_count = BigInteger.valueOf(subjectChannelMap.get(ESConstants.ITEM));
// }
// if (subjectChannelTodayMap.containsKey(channel) && channel.equals(ESConstants.ITEM)) {
// item_count = BigInteger.valueOf(subjectChannelTodayMap.get(ESConstants.ITEM));
// }
// if (subjectChannelMap.containsKey(channel) && channel.equals(ESConstants.VIDEO)) {
// video_total_count = BigInteger.valueOf(subjectChannelMap.get(ESConstants.VIDEO));
// }
// if (subjectChannelTodayMap.containsKey(channel) && channel.equals(ESConstants.VIDEO)) {
// video_count = BigInteger.valueOf(subjectChannelTodayMap.get(ESConstants.VIDEO));
// }
// if (subjectChannelMap.containsKey(channel) && channel.equals(ESConstants.LIFE)) {
// life_total_count = BigInteger.valueOf(subjectChannelMap.get(ESConstants.LIFE));
// }
// if (subjectChannelTodayMap.containsKey(channel) && channel.equals(ESConstants.LIFE)) {
// life_count = BigInteger.valueOf(subjectChannelTodayMap.get(ESConstants.LIFE));
// }
// subjectCount.setSocialTotalCount(social_total_count);
// subjectCount.setSocialCount(social_count);
// subjectCount.setBbsTotalCount(bbs_total_count);
// subjectCount.setBbsCount(bbs_count);
// subjectCount.setBlogTotalCount(blog_total_count);
// subjectCount.setBlogCount(blog_count);
// subjectCount.setNewsTotalCount(news_total_count);
// subjectCount.setNewsCount(news_count);
// subjectCount.setSearchTotalCount(search_total_count);
// subjectCount.setSearchCount(search_count);
// subjectCount.setItemTotalCount(item_total_count);
// subjectCount.setItemCount(item_count);
// subjectCount.setVideoTotalCount(video_total_count);
// subjectCount.setVideoCount(video_count);
// subjectCount.setLifeTotalCount(life_total_count);
// subjectCount.setLifeCount(life_count);
// }
// subjectCount.setSubjectId(subjectId);
// // 如果不存在就调用这个方法;
//
// String today = formatter.format(date);
// BigInteger id = new BigInteger("0");
// if (subjectId.longValue() == 0) {
// id = subjectCountRepository.findIdBySubjectDate(today);
// subjectCount.setSubjectId(null);
// } else {
// id = subjectCountRepository.findIdBySubjectId(subjectId, today);
// }
// if (null != id && !id.equals("")) {
// subjectCountRepository.updateBySubjectId(id, BigInteger.valueOf(sumToday), BigInteger.valueOf(sum),
// url_type_total_count, keyword_type_total_count, account_type_total_count, url_type_count, keyword_type_count, account_type_count,
// social_total_count, social_count, bbs_total_count, bbs_count, blog_total_count, blog_count, news_total_count, news_count,
// search_total_count, search_count, item_total_count, item_count, video_total_count, video_count, life_total_count, life_count);
// } else {
// // 先获取这个专题昨天的 update_time ,然后添加到 UpdateTime 字段中
// // 获取昨天日期
// String yesterday = DateUtil.parseDateByday(System.currentTimeMillis() - 1000 * 60 * 60 * 24);
// Date updateTime = subjectCountRepository.getUpdateTimeBySubjectId(subjectId,yesterday);
// if(updateTime != null){
// subjectCount.setUpdateTime(updateTime);
// }else {
// subjectCount.setUpdateTime(new Date());
// }
// subjectCountRepository.save(subjectCount);
// }
// }catch ( Exception e){
// e.printStackTrace();
// LOGGER.error("[TotalCountService] updateSubjectCount ERROR ... subjectId = " + subjectId);
// }
// LOGGER.debug("[TotalCountService] updateSubjectCount finish ... subjectId = " + subjectId);
// }
private static List<String> getAllChannels() {
List channels = new ArrayList();
channels.add(ESConstants.SOCIAL);

12
cl_query_data_job/src/main/java/com/bfd/mf/job/util/EMailUtils.java

@ -44,7 +44,7 @@ public class EMailUtils {
// 收件人邮箱替换为自己知道的有效邮箱
public static String receiveMailAccount = "chaofan.tan@baifendian.com";
private String confPath = "../etc/config.properties";
//private String confPath = "../etc/config.properties";
private static EMailUtils instance = null;
private String protocol = "smtp";
@ -78,7 +78,7 @@ public class EMailUtils {
LOG.info("EMailUtils protocol:" + protocol + " myEmailSMTPHost:" + myEmailSMTPHost
+ " smtpAuth: " + smtpAuth + " myEmailAccount: " + myEmailAccount
+ " emailEncode: " + emailEncode + " config path: " + confPath);
+ " emailEncode: " + emailEncode + " config path: " ); //+ confPath
}
public static EMailUtils getInstance() {
@ -92,9 +92,9 @@ public class EMailUtils {
return instance;
}
public void setConfigPath (String confPath) {
this.confPath = confPath;
}
// public void setConfigPath (String confPath) {
// this.confPath = confPath;
// }
public void sendWechat(List<String> emailList, String message) {
@ -164,7 +164,7 @@ public class EMailUtils {
content = content.replace("{type}", "数据关键字段值为空");
break;
case 6:
content = confPath.replace("{type}","解析失败次数超过100次");
content = content.replace("{type}","解析失败次数超过100次");
break;
default:
return ;

7
cl_query_data_job/src/main/java/com/bfd/mf/job/worker/AlarmProducer.java

@ -1,16 +1,15 @@
package com.bfd.mf.job.worker;
import com.bfd.mf.job.config.AppConfig;
import com.bfd.mf.job.service.alarm.AlarmService;
import com.bfd.mf.job.service.taskCount.TaskCountService;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.log4j.Logger;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
@Component
public class AlarmProducer extends AbstractWorker {
private static final Logger LOGGER = LoggerFactory.getLogger(AlarmProducer.class);
private static final Logger LOGGER =Logger.getLogger(AlarmProducer.class);
@Autowired
private AppConfig config;

105
cl_query_data_job/src/main/resources/application-0827.yml

@ -0,0 +1,105 @@
debug: false
logging:
level:
com.bfd.mf: debug
spring:
datasource:
driver-class-name: com.mysql.jdbc.Driver
username: root
password: bfd123
url: jdbc:mysql://172.26.11.113:3306/intelligent_crawl?useOldAliasMetadataBehavior=true&characterEncoding=UTF-8&zeroDateTimeBehavior=round
hikari:
maximum-pool-size: 10
minimum-idle: 1
#spring:
# datasource:
# driver-class-name: com.mysql.jdbc.Driver
# username: root
# password: Bfd123!@#
# url: jdbc:mysql://172.18.1.134:3306/intelligent_crawl?useOldAliasMetadataBehavior=true&characterEncoding=UTF-8&zeroDateTimeBehavior=round
# hikari:
# maximum-pool-size: 10
# minimum-idle: 1
#spring:
# datasource:
# driver-class-name: com.mysql.jdbc.Driver
# username: root
# password: Bfd123!@#
# url: jdbc:mysql://172.18.1.134:3306/all_task?useOldAliasMetadataBehavior=true&characterEncoding=UTF-8&zeroDateTimeBehavior=round
# hikari:
# maximum-pool-size: 10
# minimum-idle: 1
worker:
version: 3.0.1
enable-test: false
test-thread-count: 10
test-task-id: 180
## 数据默认要写的 kafka
broker-list: 172.18.1.113:9092
send-topic : databasestokafka
analysis-topic:
- sq_topic_cl_query_analysis_1
analysis-group: sq_group_cl_analysis_1
## 服务的状态,true 为启动
enable-analysis-producer: false # 查ES写kafka
enable-analysis-consumer: false # 读kafka写ES
enable-statistics-producer: true # 统计 taskCount 和 subjectCount (采集平台)
enable-query-producer: false # 离线拉数(采集平台)
enable-backtrace-producer: false # 欧莱雅查数(采集平台,欧莱雅项目独用)
enable-rw-oly-producer: false # 欧莱雅数据导出,暂时不用
enable-up-load-producer: false # 上传(采集平台)
enable-output-producer: false #未开发,暂留
enable-taskcount-producer: false # 任务数量的统计,任务量和任务平均时长(运营后台)
enable-alarm-producer: false # 报警,查ES统计报警发邮件写数据库(运营后台)
## 启动服务的线程数
statistics-producer-thread-count: 1
query-producer-thread-count: 10
backtrace-producer-thread-count: 1
rw-oly-producer-thread-count: 1
up-load-producer-thread-count: 1
output-producer-thread-count: 1
taskcount-producer-thread-count: 1
alarm-producer-thread-count: 1
period-s: 5
interval-time: 1800000
# 拉数年份查询的开始时间,现在是19年之前的合并成年了,这个时间就是:2019-01-01 00:00:00
query-data-year-starttime: 1546272000000
rule-rest: http://rule.sq.baifendian.com/data_match/content/
comment-rest: http://rule.sq.baifendian.com/reputation/addReputationTask
rule-rest-concurrency: 500
content-limit: 2000
failure-upper: 2000
goFastPostUrl : http://172.18.1.113:8080/upload
goFastDomain : http://172.18.1.113:8080
uploadOLYExcelPath : /opt/nfsdata/excelTask/
uploadZipPath : /opt/nfsdata/uploadFiles/
indexNamePre : cl_major_
es-normal:
name: SQ_Normal_new
address: 172.18.1.134:9301
upper: 2000-01-01
standby: cl_index_0
es-reply-source:
name: SQ_Mini_new
address: 172.18.1.148:9303
upper: 2000-01-01
standby: cl_index_0
es-mini:
name: SQ_Mini
address: 172.18.1.147:9313
bulk-thread-count: 5
bulk-rate: 3
bulk-size: 100
es-logstash:
name: SQ_Log
address: 172.26.11.111:9301
upper: 2021-01-01
standby: logstash-2021.05.13

17
cl_query_data_job/src/main/resources/application-113.yml

@ -26,13 +26,16 @@ worker:
analysis-group: sq_group_cl_analysis_1
## 服务的状态,true 为启动
enable-analysis-producer: false
enable-analysis-consumer: false
enable-statistics-producer: true
enable-query-producer: false
enable-backtrace-producer: false
enable-rw-oly-producer: false
enable-up-load-producer: false
enable-analysis-producer: false # 查ES写kafka
enable-analysis-consumer: false # 读kafka写ES
enable-statistics-producer: true # 统计 taskCount 和 subjectCount (采集平台)
enable-query-producer: false # 离线拉数(采集平台)
enable-backtrace-producer: false # 欧莱雅查数(采集平台,欧莱雅项目独用)
enable-rw-oly-producer: false # 欧莱雅数据导出,暂时不用
enable-up-load-producer: false # 上传(采集平台)
enable-output-producer: false #未开发,暂留
enable-taskcount-producer: false # 任务数量的统计,任务量和任务平均时长(运营后台)
enable-alarm-producer: false # 报警,查ES统计报警发邮件写数据库(运营后台)
## 启动服务的线程数
statistics-producer-thread-count: 1
query-producer-thread-count: 5

23
cl_query_data_job/src/main/resources/application.yml

@ -47,14 +47,14 @@ worker:
## 服务的状态,true 为启动
enable-analysis-producer: false # 查ES写kafka
enable-analysis-consumer: false # 读kafka写ES
enable-statistics-producer: true # 统计 taskCount 和 subjectCount (采集平台)
enable-statistics-producer: false # 统计 taskCount 和 subjectCount (采集平台)
enable-query-producer: false # 离线拉数(采集平台)
enable-backtrace-producer: false # 欧莱雅查数(采集平台,欧莱雅项目独用)
enable-rw-oly-producer: false # 欧莱雅数据导出,暂时不用
enable-up-load-producer: false # 上传(采集平台)
enable-output-producer: false #未开发,暂留
enable-taskcount-producer: false # 任务数量的统计,任务量和任务平均时长(运营后台)
enable-alarm-producer: false # 报警,查ES统计报警发邮件写数据库(运营后台)
enable-alarm-producer: true # 报警,查ES统计报警发邮件写数据库(运营后台)
## 启动服务的线程数
statistics-producer-thread-count: 1
query-producer-thread-count: 10
@ -103,3 +103,22 @@ worker:
address: 172.26.11.111:9301
upper: 2021-01-01
standby: logstash-2021.05.13
# es-mini:
# name: SQ_Log
# address: 172.26.11.111:9301
# upper: 2018-09-01
# standby: cl_major_
# bulk-thread-count: 5
# bulk-rate: 3
# bulk-size: 100
# es-normal:
# name: SQ_Log
# address: 172.26.11.111:9301
# upper: 2018-09-01
# standby: cl_index_*
# es-logstash:
# name: SQ_Log
# address: 172.26.11.111:9301
# upper: 2021-01-01
# standby: logstash-2021.05.13

4
cl_search_api/cl_search_api.iml

@ -171,14 +171,14 @@
<orderEntry type="library" name="Maven: com.googlecode.json-simple:json-simple:1.1.1" level="project" />
<orderEntry type="library" name="Maven: log4j:log4j:1.2.14" level="project" />
<orderEntry type="library" name="Maven: org.slf4j:slf4j-api:1.7.12" level="project" />
<orderEntry type="library" name="Maven: ch.qos.logback:logback-classic:1.1.2" level="project" />
<orderEntry type="library" name="Maven: commons-beanutils:commons-beanutils:1.9.2" level="project" />
<orderEntry type="library" name="Maven: commons-logging:commons-logging:1.1.1" level="project" />
<orderEntry type="library" name="Maven: commons-collections:commons-collections:3.2.1" level="project" />
<orderEntry type="library" name="Maven: com.fasterxml.jackson.core:jackson-databind:2.9.6" level="project" />
<orderEntry type="library" name="Maven: com.fasterxml.jackson.core:jackson-annotations:2.9.0" level="project" />
<orderEntry type="library" name="Maven: net.logstash.logback:logstash-logback-encoder:4.4" level="project" />
<orderEntry type="library" name="Maven: ch.qos.logback:logback-core:1.1.3" level="project" />
<orderEntry type="library" name="Maven: ch.qos.logback:logback-core:1.1.7" level="project" />
<orderEntry type="library" name="Maven: ch.qos.logback:logback-classic:1.1.7" level="project" />
<orderEntry type="library" name="Maven: io.springfox:springfox-swagger-ui:2.9.2" level="project" />
<orderEntry type="library" name="Maven: io.springfox:springfox-spring-web:2.9.2" level="project" />
<orderEntry type="library" name="Maven: io.springfox:springfox-swagger2:2.9.2" level="project" />

14
cl_search_api/pom.xml

@ -190,7 +190,16 @@
<artifactId>logstash-logback-encoder</artifactId>
<version>4.4</version>
</dependency>
<dependency>
<groupId>ch.qos.logback</groupId>
<artifactId>logback-core</artifactId>
<version>1.1.7</version>
</dependency>
<dependency>
<groupId>ch.qos.logback</groupId>
<artifactId>logback-classic</artifactId>
<version>1.1.7</version>
</dependency>
<!--<dependency>-->
<!--<groupId>com.swagger.ui</groupId>-->
<!--<artifactId>swagger-bootstrap-ui</artifactId>-->
@ -235,12 +244,13 @@
<artifactId>poi</artifactId>
<version>4.1.0</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>4.1.0</version>
</dependency>
<dependency>
<groupId>com.squareup.okhttp3</groupId>
<artifactId>okhttp</artifactId>

6
cl_search_api/src/main/java/com/bfd/mf/common/service/cache/TopicQueryService.java

@ -1,5 +1,6 @@
package com.bfd.mf.common.service.cache;
import com.bfd.mf.common.service.es.EsCommonService;
import com.bfd.mf.common.service.es.ParseSearchScopeService;
import com.bfd.mf.common.util.constants.ESConstant;
@ -8,9 +9,8 @@ import com.bfd.mf.common.web.repository.mysql.base.SiteRepository;
import com.bfd.mf.common.web.repository.mysql.topic.TaskRepository;
import com.bfd.mf.common.web.vo.params.QueryRequest;
import com.bfd.nlp.common.util.object.TObjectUtils;
import org.apache.log4j.Logger;
import org.elasticsearch.index.query.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
@ -19,7 +19,7 @@ import java.util.stream.Collectors;
@Service
public class TopicQueryService {
private static Logger logger = LoggerFactory.getLogger(TopicQueryService.class);
private static Logger logger = Logger.getLogger(TopicQueryService.class);
@Autowired
private EsCommonService esCommonService;
@Autowired

2
cl_search_api/src/main/java/com/bfd/mf/common/service/es/EsQueryAuthorCountService.java

@ -48,7 +48,7 @@ public class EsQueryAuthorCountService {
logger.debug("[EsQueryAuthorService] queryAuthorCount ...");
Integer searchType = queryRequest.getSearchType();
BoolQueryBuilder boolQueryBuilder = getQueryBuilder(queryRequest);
logger.info("[EsQueryAuthorService] queryAuthorCount indexName = " + indexName[0] + "; qb:" + "{}.", boolQueryBuilder.toString());
logger.info("[EsQueryAuthorService] queryAuthorCount indexName :{} ; qb:{}.",indexName[0] , boolQueryBuilder.toString());
Long totalCount = EsUtils.queryTotalCount(clusterName, indexName, boolQueryBuilder,searchType);
return totalCount;
}catch (Exception e){

103
cl_search_api/src/main/java/com/bfd/mf/common/service/es/EsQueryServiceForSQMini.java

@ -1,15 +1,10 @@
package com.bfd.mf.common.service.es;
import com.alibaba.fastjson.JSONObject;
import com.bfd.mf.common.util.WriteMethod;
import com.bfd.mf.common.util.constants.ESConstant;
import com.bfd.mf.common.util.es.EsUtils;
import com.bfd.mf.common.web.vo.params.QueryRequest;
import com.bfd.mf.config.BFDApiConfig;
import lombok.extern.slf4j.Slf4j;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.aggregations.AggregationBuilder;
import org.elasticsearch.search.aggregations.bucket.terms.Terms;
import org.slf4j.Logger;
@ -44,7 +39,7 @@ public class EsQueryServiceForSQMini {
logger.debug("[EsQueryServiceForSQMini - 专题] queryDataFromOneSubject ...");
BoolQueryBuilder boolQueryBuilder = getQueryBuilder.getQueryBuilder(queryRequest);
//logger.info("[EsQueryService] queryDataFromOneSubject: indexName = " + indexName[0] + "; qb:\n" + "{}.", boolQueryBuilder.toString());
logger.info("[EsQueryService] queryDataFromOneSubject: indexName = " + indexName[0]);
logger.info("[EsQueryService] queryDataFromOneSubject: indexName : {}" ,indexName[0]);
Integer limit = queryRequest.getLimit(); //每页的数量
Integer start = (queryRequest.getPage() - 1) * limit; //起始页(0,20,40....)
String orderFlag = queryRequest.getOrder(); // 排序方式 asc/desc
@ -87,53 +82,53 @@ public class EsQueryServiceForSQMini {
}
}
public JSONObject exportDataFromOneSubjectTestGroupBy(String[] indexName, QueryRequest queryRequest) {
try {
logger.info("[exportDataFromOneSubjectTestGroupBy - 专题] exportDataFromOneSubject start ...");
BoolQueryBuilder boolQueryBuilder =getQueryBuilder.getQueryBuilder(queryRequest);
Integer searchType = queryRequest.getSearchType();
String filter = "docId";
Long totalCount = EsUtils.queryTotalCount(clusterName,indexName,boolQueryBuilder,searchType);
Integer count = Integer.parseInt(totalCount+"");
// public JSONObject exportDataFromOneSubjectTestGroupBy(String[] indexName, QueryRequest queryRequest) {
// try {
// logger.info("[exportDataFromOneSubjectTestGroupBy - 专题] exportDataFromOneSubject start ...");
// BoolQueryBuilder boolQueryBuilder =getQueryBuilder.getQueryBuilder(queryRequest);
//
// Integer searchType = queryRequest.getSearchType();
// String filter = "docId";
// Long totalCount = EsUtils.queryTotalCount(clusterName,indexName,boolQueryBuilder,searchType);
// Integer count = Integer.parseInt(totalCount+"");
//
// AggregationBuilder ab = getQueryBuilder.getAggregationBuilder(queryRequest, filter,count);
// Terms terms = EsUtils.queryByTag(clusterName,indexName[0],filter,boolQueryBuilder,ab);
// // 组装返回结果
// List<String> docIds = parseRankData(terms);
// JSONObject result = new JSONObject();
// if(docIds.size() > 0){
// List<JSONObject> monitorLists = new ArrayList<>();
// for (String docId: docIds) {
// JSONObject jsonObject = EsUtils.queryOneDataForExport(clusterName, indexName, docId);
// monitorLists.add(jsonObject);
// }
// result.put("monitorLists",monitorLists);
// result.put("foldDocAllNumber",monitorLists.size());
// }
// return result;
// }catch (Exception e){
// e.printStackTrace();
// return new JSONObject();
// }
// }
AggregationBuilder ab = getQueryBuilder.getAggregationBuilder(queryRequest, filter,count);
Terms terms = EsUtils.queryByTag(clusterName,indexName[0],filter,boolQueryBuilder,ab);
// 组装返回结果
List<String> docIds = parseRankData(terms);
JSONObject result = new JSONObject();
if(docIds.size() > 0){
List<JSONObject> monitorLists = new ArrayList<>();
for (String docId: docIds) {
JSONObject jsonObject = EsUtils.queryOneDataForExport(clusterName, indexName, docId);
monitorLists.add(jsonObject);
}
result.put("monitorLists",monitorLists);
result.put("foldDocAllNumber",monitorLists.size());
}
return result;
}catch (Exception e){
e.printStackTrace();
return new JSONObject();
}
}
private List<String> parseRankData(Terms terms) {
List<String> docIds = new ArrayList<>();
try{
if(terms.getBuckets().size() > 0) {
for (Terms.Bucket bucket : terms.getBuckets()) {
if (bucket.getKey().toString().contains("bfd")) {
String labelName = bucket.getKey().toString();
if(bucket.getDocCount() >0){
docIds.add(labelName);
}
}
}
}
}catch (Exception e){
e.printStackTrace();
}
return docIds;
}
// private List<String> parseRankData(Terms terms) {
// List<String> docIds = new ArrayList<>();
// try{
// if(terms.getBuckets().size() > 0) {
// for (Terms.Bucket bucket : terms.getBuckets()) {
// if (bucket.getKey().toString().contains("bfd")) {
// String labelName = bucket.getKey().toString();
// if(bucket.getDocCount() >0){
// docIds.add(labelName);
// }
// }
// }
// }
// }catch (Exception e){
// e.printStackTrace();
// }
// return docIds;
// }
}

3
cl_search_api/src/main/java/com/bfd/mf/common/service/es/SubjectQueryDataService.java

@ -35,7 +35,6 @@ public class SubjectQueryDataService {
private Logger logger = LoggerFactory.getLogger(SubjectQueryDataService.class);
private static final Long ONE_DAY = 60 * 60 * 1000L * 24;
private static final Long clusterId = 3L;
@Autowired
private TopicQueryService topicQueryService;
@Autowired
@ -102,7 +101,7 @@ public class SubjectQueryDataService {
.addSort(sortFlag, orderFlag.equals(ESConstant.ASC) ? SortOrder.ASC : SortOrder.DESC)
.setSearchType(SearchType.DEFAULT);
System.out.println(" 组装后的查询语句: "+searchRequestBuilder);
// System.out.println(" 组装后的查询语句: "+searchRequestBuilder);
executorService.submit(new SubjectDataQueryThread(
searchResponseList,

64
cl_search_api/src/main/java/com/bfd/mf/common/util/OperatorUtil.java

@ -1,64 +0,0 @@
package com.bfd.mf.common.util;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class OperatorUtil {
private static final Character[] seps = new Character[] { '&', '|', '~', '<', '《', '>', '》', '(', '(', ')', ')',
',', ',', '"', '“', '”' };// 分隔符,将整个表达式分隔成若干个子串进行处理
private static final Character[] ops = new Character[] { '&', '|', '~' };// 去掉'!'
public static final List<Character> operators = new ArrayList<Character>();
public static final List<Character> seperators = new ArrayList<Character>();
static {
operators.addAll(Arrays.asList(ops));
seperators.addAll(Arrays.asList(seps));
}
public static boolean isOperator(char c) {
return operators.contains(c);
}
public static boolean isSeperator(char c) {
return seperators.contains(c);
}
public static int priorityCompare(char op1, char op2) {
// ()>!>&>|
switch (op1) {
case '!':
case '!':
return op2 == '(' || op2 == ')' ? -1 : 1;
case '&':
return op2 == '!' ? -1 : (op2 == '|' ? 1 : 0);
case '|':
return op2 == '|' ? 0 : -1;
}
return 0;
}
public static boolean matchChar(char c, char c2) {
switch (c) {
case '(':
case '(':
return c2 == ')' || c2 == ')';
case ')':
case ')':
return c2 == '(' || c2 == '(';
case '>':
case '》':
return c2 == '<' || c2 == '《';
case '<':
case '《':
return c2 == '>' || c2 == '》';
default:
break;
}
return false;
}
}

18
cl_search_api/src/main/java/com/bfd/mf/common/util/ZipUtils.java

@ -117,13 +117,13 @@ public class ZipUtils {
}
public static String getZipFileName(String zipName, String zipPath) {
String zipFileName = zipName.replace(".zip","");
// 判断zip这个文件夹是否存在不存在则创建
File zipFile=new File(zipPath+zipFileName);
if(!zipFile.exists()){//如果文件夹不存在
zipFile.mkdir();//创建文件夹
}
return zipFileName;
}
// public static String getZipFileName(String zipName, String zipPath) {
// String zipFileName = zipName.replace(".zip","");
// // 判断zip这个文件夹是否存在不存在则创建
// File zipFile=new File(zipPath+zipFileName);
// if(!zipFile.exists()){//如果文件夹不存在
// zipFile.mkdir();//创建文件夹
// }
// return zipFileName;
// }
}

26
cl_search_api/src/main/java/com/bfd/mf/common/util/constants/ESConstant.java

@ -63,8 +63,10 @@ public class ESConstant {
public static final String ISDOWNLOAD = "isDownload";
public static final String VIDEOURL = "videoUrl";
public static final String RESOLUTION = "resolution";
public static final String VIDEOTIME = "videoTime";
public static String SIZE = "size";
public static String VIDEOTIME = "videoTime";
public static String RESOLUTION = "resolution";
public static String VIDEOLIST = "videoList";
public static final String OCRTEXT = "ocrText";
public static final String ASRTEXT = "asrText";
@ -73,6 +75,7 @@ public class ESConstant {
public static final String CATEGORYLABEL= "categoryLabel";
public static final String TAG= "tag";
public static final String OTHERSOURCEJSON = "otherSourceJson";
/**
* *
*
@ -362,13 +365,17 @@ public class ESConstant {
public static String CONTENT_SIMHASH = "contentSimHash";
public static String QUOTE_COUNT = "quoteCount";
public static String COLLE_CTCOUNT = "collectCount";
// 左侧检索内容那块要显示的数字
public static String CONTENTCOUNT = "contentCount";
public static String COMMENTCOUNT = "commentCount";
public static String AUTHORCOUNT = "authorCount";
/**
* 内容
*/
public static String CONTENT_TAG = "contentTag";
public static String DT_EN_2 = "dtEn2";
public static String DT_EN_1 = "dtEn1";
public static String COMMENTS_COUNT = "commentsCount";
public static String COMMENTS_COUNT = "commentsCount"; //commentsCount
public static String PAGE_COMMENTS_COUNT = "pageCommentCount";
public static String ATTITUDES_COUNT = "attitudesCount";
public static String TITLE_SIMHASH = "titleSimHash";
@ -382,6 +389,8 @@ public class ESConstant {
public static String SOURCE = "source";
public static String CRAWLDATAFLAG= "crawlDataFlag";
public static final String TOTALCOUNT = "totalCount";
//微信专属字段内容固定 value="微信"
public static String WEIXIN_SOURCE = "weixinSource";
public static String CHANNEL = "channel";
@ -435,6 +444,9 @@ public class ESConstant {
/**
* _all字段
*/
@ -470,6 +482,11 @@ public class ESConstant {
public static final String SHOP_LABELS = "shopLabels";
public static final String COMMENT_IMG = "commentImg";
public static final String USER = "user";
public static final String USERID = "userId";
public static final String FILENAME = "fileName";
/**
* 用户头像
*/
@ -495,6 +512,7 @@ public class ESConstant {
public static String FACEBOOK = "facebook";
public static String TWITTER = "twitter";
public static String CID = "cid";
public static String SITEID = "siteId";
public static String SITETYPE = "siteType";
public static String SITEICON = "icon";
@ -502,6 +520,7 @@ public class ESConstant {
// public static String FOLDDOCALLNUMBER = "foldDocAllNumber";
public static String ALLDOCNUMBER = "allDocNumber";
public static String SCROLLID = "scrollId";
public static String COMMENTLISTS = "commentLists";
public static final String BAIDUKOUBEI = "baidukoubei";
@ -630,6 +649,7 @@ public class ESConstant {
public static final String ID = "id";
public static final String COMMENTS = "comments";
public static final String COMMENT = "comment";
public static final String REPOSTS = "reposts";

8
cl_search_api/src/main/java/com/bfd/mf/common/util/enums/RTCodeEnum.java

@ -13,6 +13,13 @@ public enum RTCodeEnum {
C_INDEX_EXISTS(-3, "Index Exists"),
// 自定义状态码
C_TOPIC_DISABLED(-2, "话题渠道为空,请运营人员确注意操作!!!"),
C_UPLOAD_OK(200,"上传成功"),
C_UPLOAD_PARSE_FAIL(201,"Excel解析失败,请检查Excel"),
C_UPLOAD_EXIST(202,"同名Excel已存在,请改名后重新上传,谢谢!"),
C_UPLOAD_BUSY(203,"当前正在运行任务数超过5个,请稍后再尝试上传,谢谢!"),
C_UPLOAD_FAIL(206,"上传失败") ,
C_UPLOAD_ERROR(204,"请上传 Excel 或 txt 文件"),
C_UPLOAD_EMPTY(205,"上传的文件为空,请核查文件。"),
// Param Issue: 3**
C_PARAM_ERROR(300, "Input Param Error"),
@ -86,6 +93,7 @@ public enum RTCodeEnum {
C_ACCOUNT_NO_NICK_NAME(912, "该用户昵称不存在");
private int code;
private String desc;

22
cl_search_api/src/main/java/com/bfd/mf/common/util/es/EsUtils.java

@ -2,10 +2,7 @@ package com.bfd.mf.common.util.es;
import com.alibaba.fastjson.JSONObject;
import com.bfd.mf.common.util.constants.ESConstant;
import com.bfd.mf.config.BFDApiConfig;
import com.google.common.collect.Maps;
import org.apache.http.entity.ContentType;
import org.apache.http.nio.entity.NStringEntity;
import org.elasticsearch.action.admin.indices.exists.indices.IndicesExistsRequest;
import org.elasticsearch.action.admin.indices.exists.indices.IndicesExistsResponse;
import org.elasticsearch.action.admin.indices.settings.put.UpdateSettingsRequest;
@ -28,13 +25,11 @@ import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.aggregations.AggregationBuilder;
import org.elasticsearch.search.aggregations.bucket.terms.Terms;
import org.elasticsearch.search.collapse.CollapseBuilder;
import org.elasticsearch.search.sort.SortOrder;
import org.elasticsearch.transport.client.PreBuiltTransportClient;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.util.Assert;
import springfox.documentation.spring.web.json.Json;
import java.net.InetAddress;
import java.util.*;
@ -148,9 +143,9 @@ public abstract class EsUtils {
// }
Long size = response.getHits().getTotalHits();
logger.info("[queryTotalCount] 聚合前的总量 : "+ size+ " 这个是聚合前的数据量");
logger.info("[queryTotalCount] 聚合前的总量: {} 这个是聚合前的数据量 ",size);
Long aggrSize = Long.valueOf(response.getHits().getHits().length);
logger.info("[queryTotalCount] 聚合后的数据量 : " + aggrSize);
logger.info("[queryTotalCount] 聚合后的数据量: {} ",aggrSize);
// 当聚合后的结果为 10000 或者 是评论数据的时候就不用聚合了
// if(aggrSize == 10000 || searchType == 1 || searchType == 2){
// size = size;
@ -188,7 +183,7 @@ public abstract class EsUtils {
.actionGet();//注意:首次搜索并不包含数据
}catch (Exception e){
e.printStackTrace();
logger.error(" queryForExport 首次 查询报错!!!" + indexName[0]);
logger.error(" queryForExport 首次 查询报错!!!IndexName : {} " , indexName[0]);
}
} else {
System.out.println("翻页查询");
@ -200,14 +195,14 @@ public abstract class EsUtils {
.actionGet();
} catch (Exception e) {
e.printStackTrace();
logger.error(" queryForExport 翻页 查询报错!!!" + indexName[0]);
logger.error(" queryForExport 翻页 查询报错!!!IndexName : {} ", indexName[0]);
}
}
//获取总数量
long totalCount = searchResponse.getHits().getTotalHits();
int page=(int)totalCount/( 2 * limit);//计算总页数,每次搜索数量为分片数*设置的size大小
//int page = 2 * limit;
System.out.println("queryForExport : " + totalCount + " ; page = " + page + " ; scrollId = " + searchResponse.getScrollId());
// System.out.println("queryForExport : " + totalCount + " ; page = " + page + " ; scrollId = " + searchResponse.getScrollId());
for (int i = 0; i <= page; i++) {
//再次发送请求,并使用上次搜索结果的ScrollId
List<JSONObject> monitorLists = parseSearchResponse(searchResponse);
@ -285,9 +280,8 @@ public abstract class EsUtils {
.setQuery(queryBuilder)
.addAggregation(aggregationBuilder);
logger.info("requestBuilder: "+requestBuilder.toString().replace("\n","").replace("\r","").replace(" ",""));
logger.info("requestBuilder: " , requestBuilder.toString().replace("\n","").replace("\r","").replace(" ",""));
SearchResponse response = requestBuilder.execute().actionGet();
Terms aggregation = response.getAggregations().get(filter+"Tag");
return aggregation;
}
@ -304,7 +298,7 @@ public abstract class EsUtils {
.actionGet();//注意:首次搜索并不包含数据
} catch (Exception e) {
e.printStackTrace();
logger.error(" queryForExport 首次 查询报错!!!" + indexName[0]);
logger.error(" queryForExport 首次 查询报错!!!IndexName : {} " ,indexName[0]);
}
//再次发送请求,并使用上次搜索结果的ScrollId
List<JSONObject> monitorLists = parseSearchResponse(searchResponse);
@ -328,9 +322,7 @@ public abstract class EsUtils {
updateRequest.setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE);
updateRequest.setRefreshPolicy("true");
UpdateResponse response = client.update(updateRequest).get();
return response.getVersion();
}catch (Exception e){
e.printStackTrace();
return 0L;

2
cl_search_api/src/main/java/com/bfd/mf/common/util/es/MonitorUtils.java

@ -45,7 +45,7 @@ public class MonitorUtils {
esMonitorEntityMap.put(dataId, esMonitorBaseEntity);
// testSort.put(dataId,pubTimeStr);
}else{
System.out.println("[MonitorUtils] ??? : "+searchHit.getSourceAsMap().toString());
logger.info("[MonitorUtils] {}",searchHit.getSourceAsMap().toString());
}
}
}

12
cl_search_api/src/main/java/com/bfd/mf/common/util/slice/SliceScrollUtil.java

@ -27,14 +27,6 @@ public class SliceScrollUtil {
@Autowired
private SubjectQueryDataService subjectQueryDataService;
private Object existHighEm(String str) {
if (TStringUtils.isNotEmpty(str)) {
if (str.contains("<em>") || str.contains("</em>")) {
return str;
}
}
return "";
}
/**
* 解析数据
*/
@ -136,7 +128,7 @@ public class SliceScrollUtil {
Cluster cluster = null;
List<String> currentIndexList = null;
if(null != queryRequest.getSubjectId() && !("all").equals(queryRequest.getSubjectId())){
logger.info("查询 【专题数据】 subjectId = " + queryRequest.getSubjectId());
logger.info("查询 【专题数据】 subjectId :{}" ,queryRequest.getSubjectId());
cluster = clusterService.findClusterByType(Cluster.CLUSTER_TYPE.mini_cluster_type); // 111
currentIndexList = subjectQueryDataService.getIndexBySubjectIds(cluster, queryRequest.getSubjectId());
}else{
@ -146,7 +138,7 @@ public class SliceScrollUtil {
}
Long clusterId = cluster.getId();
logger.info("[SliceScrollUtil] dataAnalysisCloud : queryDataList clusterId = " + clusterId + " ; currentIndexList :" + currentIndexList.toString());
logger.info("[SliceScrollUtil] dataAnalysisCloud : queryDataList clusterId :{}; currentIndexList :{}", clusterId , currentIndexList.toString());
logger.info("==========进入数据分析Es and Cache,计算开始执行============");
String sortFlag = "";
String orderFlag = "desc";

360
cl_search_api/src/main/java/com/bfd/mf/common/util/spread/SpreadServiceUtil.java

@ -1,360 +0,0 @@
package com.bfd.mf.common.util.spread;
import com.alibaba.fastjson.JSONObject;
import com.bfd.mf.common.util.enums.SpreadEnums;
import com.bfd.mf.common.util.utility.DateUtil;
import com.bfd.nlp.common.util.object.TObjectUtils;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class SpreadServiceUtil {
private static final Logger logger = LoggerFactory.getLogger(SpreadServiceUtil.class);
private static final Long ONE_DAY = 24 * 60 * 60 * 1000L;
private static final Long ONE_HOUR = 60 * 60 * 1000L;
// static {
// try {
// while (!ApplicationUtil.initialFinsih) {
// logger.warn("[SpreadServiceUtil Initial] AbsSpringServiceHolder: Wait spring content initialization, sleep 1s...");
// Thread.sleep(1000);
// }
//
// oemUrlCommonService = ApplicationUtil.getApplicationContext().getBean(OemUrlCommonService.class);
// bfdApiConfig = ApplicationUtil.getApplicationContext().getBean(BFDApiConfig.class);
//
// if (TObjectUtils.isNull(oemUrlCommonService)) {
// throw new Exception("oemUrlCommonService is null");
// }
//
// } catch (Exception e) {
// logger.error("[SpreadServiceUtil Initial] Object {} is NULL", e.getMessage(), e);
// System.exit(-1);
// }
// }
/**
* 获取三个月前的起始时间
*/
public static Long getTimeOfThreeMonthsAgo(Long time) {
if (TObjectUtils.isNull(time)) {
time = System.currentTimeMillis();
}
Date endDate = new Date(time);
Calendar calendar = Calendar.getInstance();
calendar.setTime(endDate);
calendar.add(Calendar.MONTH, -3);
calendar.set(Calendar.DAY_OF_MONTH, 1);
calendar.set(Calendar.HOUR_OF_DAY, 0);
calendar.set(Calendar.MINUTE, 0);
calendar.set(Calendar.SECOND, 0);
calendar.set(Calendar.MILLISECOND, 0);
logger.info("[SpreadServiceUtil] getTimeOfThreeMonthsAgo: result is {}", calendar.getTime());
return calendar.getTimeInMillis();
}
private static Long getEndTime(Long time, Integer type) {
Long startTime;
if (type.equals(SpreadEnums.TREND.UNIT_HOUR)) {
startTime = getEndTimeOfHour(time);
} else {
startTime = getEndTimeOfDay(time);
}
return startTime;
}
private static Long getEndTimeOfDay(Long time) {
Calendar calendar = Calendar.getInstance();
calendar.setTimeInMillis(time);
calendar.set(Calendar.HOUR_OF_DAY, 23);
calendar.set(Calendar.MINUTE, 59);
calendar.set(Calendar.SECOND, 59);
calendar.set(Calendar.MILLISECOND, 999);
return calendar.getTimeInMillis();
}
private static Long getEndTimeOfHour(Long time) {
Calendar calendar = Calendar.getInstance();
calendar.setTimeInMillis(time);
calendar.set(Calendar.MINUTE, 59);
calendar.set(Calendar.SECOND, 59);
calendar.set(Calendar.MILLISECOND, 999);
return calendar.getTimeInMillis();
}
public static Long[] getTimeList1(Long startTime, Long endTime, Integer type) {
Long[] timeList = null;
try {
Long interval;
if (type.equals(SpreadEnums.TREND.UNIT_HOUR)) {
interval = ONE_HOUR;
} else if (type.equals(SpreadEnums.TREND.UNIT_DAY)) {
interval = ONE_DAY;
} else {
throw new Exception(" !!! type is illegal !!!");
}
Double intervalNum = Math.ceil((endTime - startTime) / (double) interval);
timeList = new Long[intervalNum.intValue() + 1];
Long time = startTime;
for (int i = 0; i < intervalNum.intValue(); i++) {
timeList[i] = time;
time += interval;
}
timeList[intervalNum.intValue()] = endTime;
logger.info("[SpreadServiceUtil] getTimeList: startTime is {}, endTime is {}, timeList is {}", startTime, endTime, timeList);
} catch (Exception e) {
logger.error("[SpreadServiceUtil] getTimeList: failed, startTime is {}, endTime is {}, timeList is {}, error is ", startTime, endTime, timeList, e);
}
return timeList;
}
public static Long[] getTimeList2(Long startTime, Long endTime, Integer type) {
Long[] timeList = null;
try {
Long interval;
if (type.equals(SpreadEnums.TREND.UNIT_HOUR)) {
interval = ONE_HOUR;
} else if (type.equals(SpreadEnums.TREND.UNIT_DAY)) {
interval = ONE_DAY;
} else {
throw new Exception(" !!! type is illegal !!!");
}
List<Long> timeListTemp = new ArrayList<>();
timeListTemp.add(startTime);
if (startTime.equals(endTime)) {
timeList = new Long[timeListTemp.size()];
return timeListTemp.toArray(timeList);
}
Long endTimeOfStart = getEndTime(endTime, type);
if (endTimeOfStart >= endTime) {
timeListTemp.add(endTime);
timeList = new Long[timeListTemp.size()];
return timeListTemp.toArray(timeList);
}
for (long time = endTimeOfStart + interval; time < endTime; time += interval) {
timeListTemp.add(time);
}
timeListTemp.add(endTime);
timeList = new Long[timeListTemp.size()];
timeListTemp.toArray(timeList);
logger.info("[SpreadServiceUtil] getTimeList: startTime is {}, endTime is {}, timeList is {}", startTime, endTime, timeList);
} catch (Exception e) {
logger.error("[SpreadServiceUtil] getTimeList: failed, startTime is {}, endTime is {}, timeList is {}, error is ", startTime, endTime, timeList, e);
}
return timeList;
}
/**
* 将Long型时间序列列表转换为String型列表
*/
public static String[] convertToTimeStringList(Long[] timeList, Integer type) {
String[] timeStringList = new String[timeList.length];
try {
String dateFormat;
if (Objects.equals(type, SpreadEnums.TREND.UNIT_HOUR)) {
dateFormat = DateUtil.TIME_FORMAT;
} else if (Objects.equals(type, SpreadEnums.TREND.UNIT_DAY)) {
dateFormat = DateUtil.DATE_FORMAT;
} else {
throw new Exception(" !!! type is illegal !!!");
}
for (int i = 0; i < timeList.length; i++) {
timeStringList[i] = DateUtil.parseDateByFormat(timeList[i], dateFormat);
}
} catch (Exception e) {
logger.error("[SpreadServiceUtil] getTimeStringList: failed, timeList is {}, error is ", Arrays.asList(timeStringList), e);
}
return timeStringList;
}
// public static List<Map<String,Object>> convertToTimeStringList(List<Map<String,Object>> trendList,Long[] timeList, Integer type) {
// String[] timeStringList = new String[timeList.length];
// try {
// String dateFormat;
// if (Objects.equals(type, SpreadEnums.TREND.UNIT_HOUR)) {
// dateFormat = DateUtil.TIME_FORMAT;
// } else if (Objects.equals(type, SpreadEnums.TREND.UNIT_DAY)) {
// dateFormat = DateUtil.DATE_FORMAT;
// } else {
// throw new Exception(" !!! type is illegal !!!");
// }
//
// for (int i = 0; i < timeList.length; i++) {
// Map<String,Object> map = new HashMap<>();
// timeStringList[i] = DateUtil.parseDateByFormat(timeList[i], dateFormat);
// map.put("name",timeStringList[i]);
// trendList.add(map);
// }
// } catch (Exception e) {
// logger.error("[SpreadServiceUtil] getTimeStringList: failed, timeList is {}, error is ", Arrays.asList(timeStringList), e);
// }
// // return timeStringList;
// return trendList;
// }
/**
* 构造list搜索返回值
*/
public static JSONObject getSearchListData(List<JSONObject> spreadList, Integer totalNum, Integer limit) {
JSONObject data = new JSONObject();
Double pageNum = 0d;
try {
pageNum = Math.ceil((double) totalNum / limit);
} catch (Exception e) {
logger.error("[SpreadServiceUtil] getSearchListData: failed, spreadList is {}, limit is {}, error is ",
spreadList, limit, e);
}
data.put("totalNum", totalNum);
data.put("pageNum", pageNum.intValue());
data.put("spreadList", spreadList);
return data;
}
/**
* 精确过滤
*/
public static Boolean accurateSearchResult(JSONObject spread, String field, String text) {
if (spread.containsKey(field) && spread.getString(field).equals(text)) {
return true;
}
return false;
}
/**
* 关键词过滤
*/
public static Boolean searchResult(JSONObject spread, String field, String text) {
if (spread.containsKey(field) && spread.getString(field).contains(text)) {
return true;
}
return false;
}
/**
* 按照指定排序规则排序
*/
public static List<JSONObject> sortBySortFlag(List<JSONObject> spreadList, String sortFlag) {
logger.info("[SpreadServiceUtil] sortBySortFlag: -排序前-spreadList is {}, sortFlag is {}", spreadList, sortFlag);
try {
if (sortFlag.equals(SpreadEnums.SORT_TYPE.PUBTIME_DESC_SORT_FLAG.getValue())) {
// 按时间倒序
Collections.sort(spreadList, new Comparator<JSONObject>() {
@Override
public int compare(JSONObject o1, JSONObject o2) {
// 发布时间相同的则再看采集时间
return o2.getLong(SpreadEnums.LIST.SPREAD_FIELD_PUBTIME).compareTo(o1.getLong(SpreadEnums.LIST.SPREAD_FIELD_PUBTIME)) == 0 ?
o2.getLong(SpreadEnums.LIST.SPREAD_FIELD_CRAWLTIME).compareTo(o1.getLong(SpreadEnums.LIST.SPREAD_FIELD_CRAWLTIME)) :
o2.getLong(SpreadEnums.LIST.SPREAD_FIELD_PUBTIME).compareTo(o1.getLong(SpreadEnums.LIST.SPREAD_FIELD_PUBTIME));
}
});
}
if (sortFlag.equals(SpreadEnums.SORT_TYPE.PUBTIME_ASC_SORT_FLAG.getValue())) {
// 按时间正序
Collections.sort(spreadList, new Comparator<JSONObject>() {
@Override
public int compare(JSONObject o1, JSONObject o2) {
// 发布时间相同的则再看采集时间
return o1.getLong(SpreadEnums.LIST.SPREAD_FIELD_PUBTIME).compareTo(o2.getLong(SpreadEnums.LIST.SPREAD_FIELD_PUBTIME)) == 0 ?
o1.getLong(SpreadEnums.LIST.SPREAD_FIELD_CRAWLTIME).compareTo(o2.getLong(SpreadEnums.LIST.SPREAD_FIELD_CRAWLTIME)) :
o1.getLong(SpreadEnums.LIST.SPREAD_FIELD_PUBTIME).compareTo(o2.getLong(SpreadEnums.LIST.SPREAD_FIELD_PUBTIME));
}
});
}
logger.info("[SpreadServiceUtil] sortBySortFlag: -排序后-spreadList is {}, sortFlag is {}", spreadList, sortFlag);
} catch (Exception e) {
logger.error("[SpreadServiceUtil] sortBySortFlag: failed, spreadList is {}, error is ", spreadList, e);
}
return spreadList;
}
/**
* 获取指定页面list
*/
public static List<JSONObject> getPage(List<JSONObject> spreadList, Integer start, Integer limit) {
List<JSONObject> pageList = new ArrayList<>();
try {
if (start >= spreadList.size()) {
return pageList;
}
int end = spreadList.size() >= start + limit ? start + limit : spreadList.size();
pageList = spreadList.subList(start, end);
} catch (Exception e) {
logger.error("[SpreadServiceUtil] getPage: failed, spreadList is {}, " +
"start is {}, limit is {}, error is ", spreadList, start, limit, e);
}
return pageList;
}
/**
* 去除括号及括号内的内容
*/
private static String removeBrackets(String value) {
logger.info("[SpreadServiceUtil] removeBrackets: value is {}", value);
try {
// 中文括号修正为英文括号
value = value.trim().replaceAll("(", "(").replaceAll(")", ")");
Pattern pattern = Pattern.compile("\\((.*?)\\)"); //英文括号
Matcher matcher = pattern.matcher(value);
while (matcher.find()) {
value = value.replaceAll(matcher.group(0), ""); // 0是包括括号, 1是只取内容
logger.info("[SpreadServiceUtil] removeBrackets: value is {}, remove string is {}", value, matcher.group(1));
}
value = value.trim().replaceAll("\\(", "").replaceAll("\\)", "");
logger.info("[SpreadServiceUtil] removeBrackets: result value is {}", value);
} catch (Exception e) {
logger.error("[SpreadServiceUtil] removeBrackets: failed, value is {}, error is ", value, e);
}
return value;
}
/**
* 过滤所有以"<"开头以">"结尾的标签
*/
private static String filterHtml(String value) {
logger.info("[SpreadServiceUtil] filterHtml: value is {}", value);
try {
String regxpForHtml = "<([^>]*)>"; // 过滤所有以<开头以>结尾的标签
Pattern pattern = Pattern.compile(regxpForHtml);
Matcher matcher = pattern.matcher(value);
StringBuffer sb = new StringBuffer();
while (matcher.find()) {
matcher.appendReplacement(sb, "");
}
matcher.appendTail(sb);
logger.info("[SpreadServiceUtil] filterHtml: result value is {}", sb.toString());
return sb.toString();
} catch (Exception e) {
logger.error("[SpreadServiceUtil] filterHtml: failed, value is {}, error is ", value, e);
}
return value;
}
/**
* 过滤特殊字符
*/
public static String filter(String value) {
// 合法性过滤
if (StringUtils.isEmpty(value) || SpreadEnums.OriginalSource.illegal.contains(value.toLowerCase())) {
return "";
}
// 过滤括号
String valueAfterRemoveBrackets = removeBrackets(value);
// 过滤html标签
String valueAfterFilterHtml = filterHtml(valueAfterRemoveBrackets);
return valueAfterFilterHtml;
}
}

1042
cl_search_api/src/main/java/com/bfd/mf/common/util/subject/ExpNodeUtil.java
File diff suppressed because it is too large
View File

911
cl_search_api/src/main/java/com/bfd/mf/common/util/subject/ExpressionParser.java

@ -1,911 +0,0 @@
package com.bfd.mf.common.util.subject;
import com.bfd.mf.common.util.constants.ESConstant;
import com.bfd.mf.common.util.OperatorUtil;
import com.bfd.mf.common.web.vo.params.expression.ExpObject;
import com.bfd.mf.common.web.vo.params.expression.HalfQuartChar;
import org.elasticsearch.common.Strings;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.*;
public class ExpressionParser {
private static final Logger logger = LoggerFactory.getLogger(ExpressionParser.class);
/**
* 表达式预处理
*/
public static Vector<Object> preTreatExpression(String exp, String[] fields) {
if (Strings.isNullOrEmpty(exp)) {
return null;
}
exp = exp.trim();
Vector<Object> v = new Vector<Object>();
Stack<HalfQuartChar> left = new Stack<>();// '<'
Stack<HalfQuartChar> leftQ = new Stack<>();// '('
Stack<HalfQuartChar> leftS = new Stack<>();// "
int begin = 0, end = 0;
while (end < exp.length()) {
char c = exp.charAt(end);
// 处理<>内部空格====begin
if (c == '<' || c == '《') {
left.push(new HalfQuartChar(c, end));
} else if (c == '》' || c == '>') {
if (!left.isEmpty()) {
left.pop();
}
} else if (c == '(' || c == '(') {
leftQ.push(new HalfQuartChar(c, end));
} else if (c == ')' || c == ')') {
if (!leftQ.isEmpty()) {
leftQ.pop();
}
} else if (c == '"' || c == '“') {
leftS.push(new HalfQuartChar(c, end));
} else if (c == '"' || c == '”') {
if (!leftS.isEmpty()) {
leftS.pop();
}
}
// 可能是<(A) (B)>,(A)(B)之间的空格
if (c == ' ' && leftS.isEmpty()) {// 没有在内部
if (!left.isEmpty() && end - left.peek().getPosition() > 1) {
if (end > 0 && end < exp.length() - 1) {
char prev = exp.charAt(end - 1);
char next = exp.charAt(end + 1);
if (isCharWaitSepLeft(prev) && isCharWaitSepRight(next)) {
c = '*';// 替换成*,便于后续处理
}
}
}
}
// 处理<>内部空格===end
// find next operator
if (!OperatorUtil.isSeperator(c)) {
end++;
continue;
} else if (end > 0 && exp.charAt(end - 1) == '\\' && OperatorUtil.isOperator(c)) {// 转义符
exp = exp.replace(exp.charAt(end - 1), ' ');// 转义符用空格代替
end++;
continue;
}
// to-do:title 运算符转义
if (end > begin) {
String st = exp.substring(begin, end).trim();
if (!Strings.isNullOrEmpty(st)) {
v.add(st);
}
}
if (c != ',' && c != ',' && c != '"' && c != '“' && c != '”') {
v.add(new Character(c));
} else {
logger.warn("miss ',' at:{}", end);
}
end++;
begin = end;
}
if (end == exp.length() && begin <= end - 1) {
if (begin == end - 1 && OperatorUtil.isSeperator(exp.charAt(begin))) {// last
// op
v.add(new Character(exp.charAt(begin)));
} else {
String sub = exp.substring(begin).trim();
if (!Strings.isNullOrEmpty(sub)) {
v.add(sub);
}
}
}
// 遍历分隔结束,开始处理特殊字符
for (int i = 0; i < v.size(); i++) {
Object o = v.get(i);
if ((o instanceof String && ((String) o).trim().equals(""))
|| o instanceof Character && ((char) o == '\u0000' || (char) o == '\n' || (char) o == '\t')) {
// v.remove(i);
} else if (i > 0 && i < v.size() - 1) {
if (o instanceof String && ((String) o).trim().contains(" ")) {
Object prev = v.get(i - 1);
Object next = v.get(i + 1);
if (prev instanceof Character && (char) prev == '<' && next instanceof Character
&& (char) next == '>') {
String[] arr = ((String) o).split(" ");
if (null != arr && arr.length == 2) {
v.set(i, arr[0]);
v.add(i, '*');// 中间空格用*代替便于后续处理
v.add(i, arr[1]);
}
}
}
}
}
// 处理~10类型
for (int j = 0; j < v.size(); j++) {
Object obj = v.get(j);
if (obj instanceof String) {
if (j > 0) {
Object prv = v.get(j - 1);
// 处理~10类型后面的数字
if (prv instanceof Character && (char) prv == '~') {
Integer va = Integer.parseInt((String) obj);
if (null != va) {
v.set(j, va);
}
continue;
}
}
// title:表达式
ExpObject ebj = null;
if (((String) obj).equals("title:") || ((String) obj).equals("content:")) {
v.remove(j);
if (j < v.size() - 1) {
Object next = v.get(j);// next now is j
if (next instanceof Character && (char) next == '(' || (char) next == '(') {
Stack<HalfQuartChar> stk = new Stack<HalfQuartChar>();
stk.push(new HalfQuartChar('(', j));
for (int k = j + 1; k < v.size(); k++) {
Object kv = v.get(k);
if (kv instanceof Character) {
if ((char) kv == '(' || (char) kv == '(') {
stk.push(new HalfQuartChar('(', k));
} else if ((char) kv == ')' || (char) kv == ')') {
if (!stk.isEmpty()) {
stk.pop();
}
if (stk.isEmpty()) {
break;
}
}
} else if (kv instanceof String) {
if (((String) obj).equals("title:")) {
ebj = new ExpObject((String) kv, new String[]{"title"});
} else {
ebj = new ExpObject((String) kv, fields);
}
v.set(k, ebj);
}
}
}
}
} else if (((String) obj).startsWith("title:") || ((String) obj).startsWith("content:")) {
if (((String) obj).startsWith("title:")) {
String value = ((String) obj).substring("title:".length());
ebj = new ExpObject((String) value, new String[]{"title"});
} else {
String value = ((String) obj).substring("content:".length());
ebj = new ExpObject((String) value, fields);
}
v.set(j, ebj);
} else {
List<String> fieldsNew = new ArrayList<>();
fieldsNew.add(ESConstant.TITLE);
List<String> fieldsList = Arrays.asList(fields);
fieldsNew.addAll(fieldsList);
ebj = new ExpObject((String) obj, fieldsNew.toArray(new String[fieldsNew.size()]));
v.set(j, ebj);
}
}
}
return v;
}
/**
* 表达式预处理 不自动添加title属性
*/
public static Vector<Object> preTreatExpressionNoTitleField(String exp, String[] fields) {
if (Strings.isNullOrEmpty(exp)) {
return null;
}
exp = exp.trim();
Vector<Object> v = new Vector<Object>();
Stack<HalfQuartChar> left = new Stack<>();// '<'
Stack<HalfQuartChar> leftQ = new Stack<>();// '('
Stack<HalfQuartChar> leftS = new Stack<>();// "
int begin = 0, end = 0;
while (end < exp.length()) {
char c = exp.charAt(end);
// 处理<>内部空格====begin
if (c == '<' || c == '《') {
left.push(new HalfQuartChar(c, end));
} else if (c == '》' || c == '>') {
if (!left.isEmpty()) {
left.pop();
}
} else if (c == '(' || c == '(') {
leftQ.push(new HalfQuartChar(c, end));
} else if (c == ')' || c == ')') {
if (!leftQ.isEmpty()) {
leftQ.pop();
}
} else if (c == '"' || c == '“') {
leftS.push(new HalfQuartChar(c, end));
} else if (c == '"' || c == '”') {
if (!leftS.isEmpty()) {
leftS.pop();
}
}
// 可能是<(A) (B)>,(A)(B)之间的空格
if (c == ' ' && leftS.isEmpty()) {// 没有在内部
if (!left.isEmpty() && end - left.peek().getPosition() > 1) {
if (end > 0 && end < exp.length() - 1) {
char prev = exp.charAt(end - 1);
char next = exp.charAt(end + 1);
if (isCharWaitSepLeft(prev) && isCharWaitSepRight(next)) {
c = '*';// 替换成*,便于后续处理
}
}
}
}
// 处理<>内部空格===end
// find next operator
if (!OperatorUtil.isSeperator(c)) {
end++;
continue;
} else if (end > 0 && exp.charAt(end - 1) == '\\' && OperatorUtil.isOperator(c)) {// 转义符
exp = exp.replace(exp.charAt(end - 1), ' ');// 转义符用空格代替
end++;
continue;
}
// to-do:title 运算符转义
if (end > begin) {
String st = exp.substring(begin, end).trim();
if (!Strings.isNullOrEmpty(st)) {
v.add(st);
}
}
if (c != ',' && c != ',' && c != '"' && c != '“' && c != '”') {
v.add(new Character(c));
} else {
logger.warn("miss ',' at:{}", end);
}
end++;
begin = end;
}
if (end == exp.length() && begin <= end - 1) {
if (begin == end - 1 && OperatorUtil.isSeperator(exp.charAt(begin))) {// last
// op
v.add(new Character(exp.charAt(begin)));
} else {
String sub = exp.substring(begin).trim();
if (!Strings.isNullOrEmpty(sub)) {
v.add(sub);
}
}
}
// 遍历分隔结束,开始处理特殊字符
for (int i = 0; i < v.size(); i++) {
Object o = v.get(i);
if ((o instanceof String && ((String) o).trim().equals(""))
|| o instanceof Character && ((char) o == '\u0000' || (char) o == '\n' || (char) o == '\t')) {
// v.remove(i);
} else if (i > 0 && i < v.size() - 1) {
if (o instanceof String && ((String) o).trim().contains(" ")) {
Object prev = v.get(i - 1);
Object next = v.get(i + 1);
if (prev instanceof Character && (char) prev == '<' && next instanceof Character
&& (char) next == '>') {
String[] arr = ((String) o).split(" ");
if (null != arr && arr.length == 2) {
v.set(i, arr[0]);
v.add(i, '*');// 中间空格用*代替便于后续处理
v.add(i, arr[1]);
}
}
}
}
}
// 处理~10类型
for (int j = 0; j < v.size(); j++) {
Object obj = v.get(j);
if (obj instanceof String) {
if (j > 0) {
Object prv = v.get(j - 1);
// 处理~10类型后面的数字
if (prv instanceof Character && (char) prv == '~') {
Integer va = Integer.parseInt((String) obj);
if (null != va) {
v.set(j, va);
}
continue;
}
}
// title:表达式
ExpObject ebj = null;
if (((String) obj).equals("title:") || ((String) obj).equals("content:")) {
v.remove(j);
if (j < v.size() - 1) {
Object next = v.get(j);// next now is j
if (next instanceof Character && (char) next == '(' || (char) next == '(') {
Stack<HalfQuartChar> stk = new Stack<HalfQuartChar>();
stk.push(new HalfQuartChar('(', j));
for (int k = j + 1; k < v.size(); k++) {
Object kv = v.get(k);
if (kv instanceof Character) {
if ((char) kv == '(' || (char) kv == '(') {
stk.push(new HalfQuartChar('(', k));
} else if ((char) kv == ')' || (char) kv == ')') {
if (!stk.isEmpty()) {
stk.pop();
}
if (stk.isEmpty()) {
break;
}
}
} else if (kv instanceof String) {
if (((String) obj).equals("title:")) {
ebj = new ExpObject((String) kv, new String[]{"title"});
} else {
ebj = new ExpObject((String) kv, fields);
}
v.set(k, ebj);
}
}
}
}
} else if (((String) obj).startsWith("title:") || ((String) obj).startsWith("content:")) {
if (((String) obj).startsWith("title:")) {
String value = ((String) obj).substring("title:".length());
ebj = new ExpObject((String) value, new String[]{"title"});
} else {
String value = ((String) obj).substring("content:".length());
ebj = new ExpObject((String) value, fields);
}
v.set(j, ebj);
} else {
List<String> fieldsNew = new ArrayList<>();
//fieldsNew.add(ESConstant.TITLE);
List<String> fieldsList = Arrays.asList(fields);
fieldsNew.addAll(fieldsList);
ebj = new ExpObject((String) obj, fieldsNew.toArray(new String[fieldsNew.size()]));
v.set(j, ebj);
}
}
}
return v;
}
public static Vector<Object> preProcessExpression(String exp, boolean onlyKeyWord, String[] fields) throws Exception {
if (Strings.isNullOrEmpty(exp)) {
return null;
}
exp = exp.trim();
String[] arr = exp.split(" ");
Vector<Object> v = new Vector<>();
int quartNum = 0;
for (String st : arr) {
st = st.trim();
if (Strings.isNullOrEmpty(st)) {
continue;
}
int begin = 0, end = 0;
while (end < st.length()) {
char c = st.charAt(end);
// find next operator
if (!OperatorUtil.isSeperator(c)) {
end++;
continue;
} else { // c为分隔符
if (end > 0 && st.charAt(end - 1) == '\\') {// 转义符
st = st.replace(st.charAt(end - 1), ' ');// 转义符用空格代替
end++;
continue;
} else if (c == '“' || (c == '"' && quartNum % 2 == 0)) {
quartNum++;
} else if (c == '”' || (c == '"' && quartNum % 2 == 1)) {
quartNum--;
} else if (quartNum > 0) {
end++;
continue;
}
}
if (end > begin) {
String st0 = st.substring(begin, end).trim();
if (!Strings.isNullOrEmpty(st0)) {
v.add(st0);
}
}
if (c != ',' && c != ',') {
v.add(new Character(c));
} else {
logger.warn("miss ',' at:{}", end);
}
end++;
begin = end;
}
if (end == st.length() && begin <= end - 1) {
if (begin == end - 1 && OperatorUtil.isSeperator(st.charAt(begin)) && quartNum == 0) {
v.add(new Character(st.charAt(begin)));
} else {
String sub = st.substring(begin).trim();
if (!Strings.isNullOrEmpty(sub)) {
v.add(sub);
}
}
}
}
logger.debug("after split,vector now is:{}",v);
visitVector(v);
logger.info("=====================endLess loop=============");
logger.info("keyWord is:{}", exp);
v = checkAndFormatVector(v, onlyKeyWord, fields);
logger.debug("after checkAndFormatVector,vector now is:{}",v);
visitVector(v);
return v;
}
public static Vector<Object> preProcessExpressionNoTitleField(String exp, boolean onlyKeyWord, String[] fields) throws Exception {
if (Strings.isNullOrEmpty(exp)) {
return null;
}
exp = exp.trim();
String[] arr = exp.split(" ");
Vector<Object> v = new Vector<>();
int quartNum = 0;
for (String st : arr) {
st = st.trim();
if (Strings.isNullOrEmpty(st)) {
continue;
}
int begin = 0, end = 0;
while (end < st.length()) {
char c = st.charAt(end);
// find next operator
if (!OperatorUtil.isSeperator(c)) {
end++;
continue;
} else { // c为分隔符
if (end > 0 && st.charAt(end - 1) == '\\') {// 转义符
st = st.replace(st.charAt(end - 1), ' ');// 转义符用空格代替
end++;
continue;
} else if (c == '“' || (c == '"' && quartNum % 2 == 0)) {
quartNum++;
} else if (c == '”' || (c == '"' && quartNum % 2 == 1)) {
quartNum--;
} else if (quartNum > 0) {
end++;
continue;
}
}
if (end > begin) {
String st0 = st.substring(begin, end).trim();
if (!Strings.isNullOrEmpty(st0)) {
v.add(st0);
}
}
if (c != ',' && c != ',') {
v.add(new Character(c));
} else {
logger.warn("miss ',' at:{}", end);
}
end++;
begin = end;
}
if (end == st.length() && begin <= end - 1) {
if (begin == end - 1 && OperatorUtil.isSeperator(st.charAt(begin)) && quartNum == 0) {
v.add(new Character(st.charAt(begin)));
} else {
String sub = st.substring(begin).trim();
if (!Strings.isNullOrEmpty(sub)) {
v.add(sub);
}
}
}
}
logger.debug("after split,vector now is:{}",v);
visitVector(v);
logger.info("=====================endLess loop=============");
logger.info("keyWord is:{}", exp);
v = checkAndFormatVectorNoTitleField(v, onlyKeyWord, fields);
logger.debug("after checkAndFormatVector,vector now is:{}",v);
visitVector(v);
return v;
}
private static void visitVector(Vector<Object> vector) {
if (null == vector || vector.isEmpty()) {
logger.debug("vector is empty!");
return;
}
for (int i = 0; i < vector.size(); i++) {
Object obj = vector.get(i);
logger.debug("i=" + i + ",value=" + obj);
}
return;
}
private static Vector<Object> checkAndFormatVector(Vector<Object> vector, boolean onlyKeyWord, String[] fields) throws Exception {
if (null == vector || vector.isEmpty()) {
return null;
}
logger.warn("[checkAndFormatVector] fields: {}{}", fields);
List list = new ArrayList();
for (int i = 0; i < vector.size(); i++) {
list.add(vector.get(i));
}
logger.info("vector list is:{}", list);
Stack<HalfQuartChar> leftS = new Stack<HalfQuartChar>();// 存放
Stack<HalfQuartChar> leftQ = new Stack<HalfQuartChar>();// 存放<>
int qNum = 0;// <>数目
int starNum = 0;// <>内部分隔空格个数
for (int i = 0; i < vector.size(); i++) {
Object ob = vector.get(i);
// <>内部
if (!leftQ.isEmpty() && leftS.isEmpty()) {
if (i > 0) {
if (isLeftWaitSeperate(vector.get(i - 1)) && isRightWaitSeperate(vector.get(i))) {
vector.insertElementAt(new Character('*'), i);
starNum++;
continue;
} else if (i > 1 && isLeftWaitSeperate(vector.get(i - 2))
&& isRightWaitSeperate(vector.get(i - 1))) {
vector.insertElementAt(new Character('*'), i - 1);
starNum++;
continue;
}
}
}
if (ob instanceof Character) {
if ((char) ob == '“' || ((char) ob == '"' && leftS.size() % 2 == 0)) {
leftS.push(new HalfQuartChar('“', i));
vector.remove(ob);
i--;
} else if ((char) ob == '”' || ((char) ob == '"' && leftS.size() % 2 == 1)) {
if (!leftS.isEmpty()) {
leftS.pop();
} else {
throw new Exception(" “” not match Exception,“ expected ");
}
vector.remove(ob);
i--;
} else if ((char) ob == '<' || (char) ob == '《') {
leftQ.push(new HalfQuartChar('<', i));
qNum++;
} else if ((char) ob == '>' || (char) ob == '》') {
if (!leftQ.isEmpty()) {
leftQ.pop();
} else {
throw new Exception("> not match Exception,< expected ");
}
}
} else if (ob instanceof String) {
if (i > 0) {
Object prv = vector.get(i - 1);
if (prv instanceof String && onlyKeyWord) {// 连续String
String ns = null;
if (!leftS.isEmpty()) {// 内部连续String=>合并
ns = ((String) prv).trim() + " " + ((String) ob).trim();
} else if (leftQ.isEmpty()) {// 不在<>内部
ns = ((String) prv).trim() + ((String) ob).trim();
}
if (!Strings.isNullOrEmpty(ns)) {
vector.set(i - 1, ns);
vector.remove(i--);
}
}
}
}
}
// 不匹配
if (!leftS.isEmpty()) {
throw new Exception(" “” not match Exception!");
} else if (!leftQ.isEmpty()) {
throw new Exception("<> not match Exception!");
} else if (starNum != qNum) {
throw new Exception("<> phrase Exception,riht format should like: <A B>~10");
}
// 封装成OP或者ExpObject
for (int j = 0; j < vector.size(); j++) {
Object obj = vector.get(j);
if (obj instanceof String) {
if (j > 0) {
Object prv = vector.get(j - 1);
// 处理~10类型后面的数字
if (prv instanceof Character && (char) prv == '~') {
Integer va = Integer.parseInt((String) obj);
if (null != va) {
vector.set(j, va);
}
continue;
}
}
// title:表达式
ExpObject ebj = null;
if (((String) obj).equals("title:") || ((String) obj).equals("content:")) {
vector.remove(j);
if (j < vector.size() - 1) {
Object next = vector.get(j);// next now is j
if (next instanceof Character && (char) next == '(' || (char) next == '(') {
Stack<HalfQuartChar> stk = new Stack<HalfQuartChar>();
stk.push(new HalfQuartChar('(', j));
for (int k = j + 1; k < vector.size(); k++) {
Object kv = vector.get(k);
if (kv instanceof Character) {
if ((char) kv == '(' || (char) kv == '(') {
stk.push(new HalfQuartChar('(', k));
} else if ((char) kv == ')' || (char) kv == ')') {
if (!stk.isEmpty()) {
stk.pop();
}
if (stk.isEmpty()) {
break;
}
}
} else if (kv instanceof String) {// 括号内部普通字符串
// 类似 title:(A|title:B)==去掉title:B之前的title:
while (((String) kv).startsWith("title:") || ((String) kv).startsWith("content:")) {
int begin = ((String) kv).indexOf(':');
kv = ((String) kv).substring(begin + 1).trim();
}
if (((String) kv).startsWith("\\title:")// 使用转义符==>当做普通字符串处理
|| ((String) kv).startsWith("\\content:")) {
kv = ((String) kv).substring(2).trim();
}
if (((String) obj).equals("title:")) {
ebj = new ExpObject((String) kv, new String[]{"title"});
} else {
ebj = new ExpObject((String) kv, fields);
}
vector.set(k, ebj);
}
}
}
}
} else if (((String) obj).startsWith("title:") || ((String) obj).startsWith("content:")) {
String value = (String) obj;
while (value.startsWith("title:") || value.startsWith("content:")) {
value = value.substring(value.indexOf(":") + 1);
}
if (((String) obj).startsWith("title:")) {
ebj = new ExpObject((String) value, new String[]{"title"});
} else {
ebj = new ExpObject((String) value, fields);
}
vector.set(j, ebj);
} else {// 普通字符串
if (((String) obj).startsWith("\\title:")// 使用转义符==>当做普通字符串处理
|| ((String) obj).startsWith("\\content:")) {
obj = ((String) obj).substring(2).trim();
}
List<String> fieldsNew = new ArrayList<>();
fieldsNew.add(ESConstant.TITLE);
List<String> fieldsList = Arrays.asList(fields);
fieldsNew.addAll(fieldsList);
ebj = new ExpObject((String) obj, fieldsNew.toArray(new String[fieldsNew.size()]));
vector.set(j, ebj);
}
}
}
return vector;
}
private static Vector<Object> checkAndFormatVectorNoTitleField(Vector<Object> vector, boolean onlyKeyWord, String[] fields) throws Exception {
if (null == vector || vector.isEmpty()) {
return null;
}
logger.warn("[checkAndFormatVector] fields: {}{}", fields);
List list = new ArrayList();
for (int i = 0; i < vector.size(); i++) {
list.add(vector.get(i));
}
logger.info("vector list is:{}", list);
Stack<HalfQuartChar> leftS = new Stack<HalfQuartChar>();// 存放
Stack<HalfQuartChar> leftQ = new Stack<HalfQuartChar>();// 存放<>
int qNum = 0;// <>数目
int starNum = 0;// <>内部分隔空格个数
for (int i = 0; i < vector.size(); i++) {
Object ob = vector.get(i);
// <>内部
if (!leftQ.isEmpty() && leftS.isEmpty()) {
if (i > 0) {
if (isLeftWaitSeperate(vector.get(i - 1)) && isRightWaitSeperate(vector.get(i))) {
vector.insertElementAt(new Character('*'), i);
starNum++;
continue;
} else if (i > 1 && isLeftWaitSeperate(vector.get(i - 2))
&& isRightWaitSeperate(vector.get(i - 1))) {
vector.insertElementAt(new Character('*'), i - 1);
starNum++;
continue;
}
}
}
if (ob instanceof Character) {
if ((char) ob == '“' || ((char) ob == '"' && leftS.size() % 2 == 0)) {
leftS.push(new HalfQuartChar('“', i));
vector.remove(ob);
i--;
} else if ((char) ob == '”' || ((char) ob == '"' && leftS.size() % 2 == 1)) {
if (!leftS.isEmpty()) {
leftS.pop();
} else {
throw new Exception(" “” not match Exception,“ expected ");
}
vector.remove(ob);
i--;
} else if ((char) ob == '<' || (char) ob == '《') {
leftQ.push(new HalfQuartChar('<', i));
qNum++;
} else if ((char) ob == '>' || (char) ob == '》') {
if (!leftQ.isEmpty()) {
leftQ.pop();
} else {
throw new Exception("> not match Exception,< expected ");
}
}
} else if (ob instanceof String) {
if (i > 0) {
Object prv = vector.get(i - 1);
if (prv instanceof String && onlyKeyWord) {// 连续String
String ns = null;
if (!leftS.isEmpty()) {// 内部连续String=>合并
ns = ((String) prv).trim() + " " + ((String) ob).trim();
} else if (leftQ.isEmpty()) {// 不在<>内部
ns = ((String) prv).trim() + ((String) ob).trim();
}
if (!Strings.isNullOrEmpty(ns)) {
vector.set(i - 1, ns);
vector.remove(i--);
}
}
}
}
}
// 不匹配
if (!leftS.isEmpty()) {
throw new Exception(" “” not match Exception!");
} else if (!leftQ.isEmpty()) {
throw new Exception("<> not match Exception!");
} else if (starNum != qNum) {
throw new Exception("<> phrase Exception,riht format should like: <A B>~10");
}
// 封装成OP或者ExpObject
for (int j = 0; j < vector.size(); j++) {
Object obj = vector.get(j);
if (obj instanceof String) {
if (j > 0) {
Object prv = vector.get(j - 1);
// 处理~10类型后面的数字
if (prv instanceof Character && (char) prv == '~') {
Integer va = Integer.parseInt((String) obj);
if (null != va) {
vector.set(j, va);
}
continue;
}
}
// title:表达式
ExpObject ebj = null;
if (((String) obj).equals("title:") || ((String) obj).equals("content:")) {
vector.remove(j);
if (j < vector.size() - 1) {
Object next = vector.get(j);// next now is j
if (next instanceof Character && (char) next == '(' || (char) next == '(') {
Stack<HalfQuartChar> stk = new Stack<HalfQuartChar>();
stk.push(new HalfQuartChar('(', j));
for (int k = j + 1; k < vector.size(); k++) {
Object kv = vector.get(k);
if (kv instanceof Character) {
if ((char) kv == '(' || (char) kv == '(') {
stk.push(new HalfQuartChar('(', k));
} else if ((char) kv == ')' || (char) kv == ')') {
if (!stk.isEmpty()) {
stk.pop();
}
if (stk.isEmpty()) {
break;
}
}
} else if (kv instanceof String) {// 括号内部普通字符串
// 类似 title:(A|title:B)==去掉title:B之前的title:
while (((String) kv).startsWith("title:") || ((String) kv).startsWith("content:")) {
int begin = ((String) kv).indexOf(':');
kv = ((String) kv).substring(begin + 1).trim();
}
if (((String) kv).startsWith("\\title:")// 使用转义符==>当做普通字符串处理
|| ((String) kv).startsWith("\\content:")) {
kv = ((String) kv).substring(2).trim();
}
if (((String) obj).equals("title:")) {
ebj = new ExpObject((String) kv, new String[]{"title"});
} else {
ebj = new ExpObject((String) kv, fields);
}
vector.set(k, ebj);
}
}
}
}
} else if (((String) obj).startsWith("title:") || ((String) obj).startsWith("content:")) {
String value = (String) obj;
while (value.startsWith("title:") || value.startsWith("content:")) {
value = value.substring(value.indexOf(":") + 1);
}
if (((String) obj).startsWith("title:")) {
ebj = new ExpObject((String) value, new String[]{"title"});
} else {
ebj = new ExpObject((String) value, fields);
}
vector.set(j, ebj);
} else {// 普通字符串
if (((String) obj).startsWith("\\title:")// 使用转义符==>当做普通字符串处理
|| ((String) obj).startsWith("\\content:")) {
obj = ((String) obj).substring(2).trim();
}
List<String> fieldsNew = new ArrayList<>();
//fieldsNew.add(ESConstant.TITLE);
List<String> fieldsList = Arrays.asList(fields);
fieldsNew.addAll(fieldsList);
ebj = new ExpObject((String) obj, fieldsNew.toArray(new String[fieldsNew.size()]));
vector.set(j, ebj);
}
}
}
return vector;
}
/**
* 处理消歧语句表达式
*/
public static HashSet<ExpObject> handleMinusExpression(String minus, String[] fields) throws Exception {
if (Strings.isNullOrEmpty(minus)) {
return null;
}
minus = minus.replaceAll(",", ",");
minus = minus.replaceAll(" ", " ");
Vector<Object> v = preProcessExpression(minus, false, fields);
HashSet<ExpObject> set = new HashSet<ExpObject>();
if (null != v && !v.isEmpty()) {
for (Object obj : v) {
if (obj instanceof ExpObject) {
set.add((ExpObject) obj);
}
}
}
return set;
}
private static boolean isCharWaitSepLeft(char charC) {
if (!OperatorUtil.isSeperator(charC) || charC == ')' || charC == ')' || charC == '>' || charC == '》'
|| charC == '"' || charC == '”') {
return true;
}
return false;
}
private static boolean isCharWaitSepRight(char charC) {
if (!OperatorUtil.isSeperator(charC) || charC == '(' || charC == '(' || charC == '<' || charC == '《'
|| charC == '"' || charC == '“') {
return true;
}
return false;
}
private static boolean isLeftWaitSeperate(Object obj) {
if (null == obj) {
return false;
}
if (obj instanceof Character) {
return (char) obj == ')' || (char) obj == ')' || (char) obj == '"' || (char) obj == '”';
} else if (obj instanceof String) {
return !Strings.isNullOrEmpty((String) obj);
} else if (obj instanceof Integer) {// ~20
return true;
}
return false;
}
private static boolean isRightWaitSeperate(Object obj) {
if (null == obj) {
return false;
}
if (obj instanceof Character) {
return (char) obj == '(' || (char) obj == '(' || (char) obj == '<' || (char) obj == '《' || (char) obj == '"'
|| (char) obj == '“';
} else if (obj instanceof String) {
return !Strings.isNullOrEmpty((String) obj);
}
return false;
}
}

669
cl_search_api/src/main/java/com/bfd/mf/common/util/subject/QueryBuilderUtil.java

@ -1,669 +0,0 @@
package com.bfd.mf.common.util.subject;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.bfd.mf.common.util.constants.ESConstant;
import com.bfd.mf.common.util.ESServerUtils;
import com.bfd.mf.common.web.repository.mysql.cache.ClusterRepository;
import com.bfd.mf.common.web.vo.params.expression.ExpNode;
import com.bfd.mf.common.web.vo.params.expression.ExpObject;
import com.bfd.nlp.common.util.object.TObjectUtils;
import com.bfd.nlp.common.util.string.TStringUtils;
import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse;
import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse.AnalyzeToken;
import org.elasticsearch.client.IndicesAdminClient;
import org.elasticsearch.common.Strings;
import org.elasticsearch.index.query.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import java.util.*;
import static com.bfd.mf.common.util.constants.ESConstant.EXCLUDE_KEYWORDS_SPLIT_CN_SYSBOL;
import static com.bfd.mf.common.util.constants.ESConstant.EXCLUDE_KEYWORDS_SPLIT_SYSBOL;
public class QueryBuilderUtil {
private static final Logger logger = LoggerFactory.getLogger(QueryBuilderUtil.class);
@Autowired
private ClusterRepository clusterRepository;
public static BoolQueryBuilder getQueryBuilderFromExpNodeTree(ExpNode root, HashSet<ExpObject> set) throws Exception {
if (null == root || null == root.getValue()) {
return null;
}
/*
* @2016-12-7
* @author lei.bao
* format之后:
* 1:distance运算拆成全部单个(左右子树全部是ExpObject)
* 2:所有ExpObject都是叶子节点且规范
*/
ExpNode rst = ExpNodeUtil.visitAndFormatTree(root, set);
//
Object rt = ExpNodeUtil.ComputeNodeTree(rst, set);
if (rt instanceof BoolQueryBuilder) {
return (BoolQueryBuilder) rt;
} else if (rt instanceof MatchQueryBuilder) {
BoolQueryBuilder bq = QueryBuilders.boolQuery();
return bq.must((MatchQueryBuilder) rt);
}else if(rt instanceof MatchPhraseQueryBuilder){
BoolQueryBuilder bq = QueryBuilders.boolQuery();
return bq.must((MatchPhraseQueryBuilder) rt);
}else if(rt instanceof AbstractQueryBuilder){
BoolQueryBuilder bq = QueryBuilders.boolQuery();
return bq.must((AbstractQueryBuilder) rt);
}
return null;
}
public static BoolQueryBuilder handleExcludeExpression(String exclude, String[] fields) {
Vector<Object> v = ExpressionParser.preTreatExpression(exclude, fields);
if (null != v && !v.isEmpty()) {
BoolQueryBuilder qb = QueryBuilders.boolQuery();
for (Object obj : v) {
if (obj instanceof ExpObject) {
BoolQueryBuilder innerQb = QueryBuilders.boolQuery();
for (String fd : ((ExpObject) obj).getFields()) {
innerQb.should(QueryBuilders.matchPhraseQuery(fd, ((ExpObject) obj).getExpression()).slop(0));
}
qb.mustNot(innerQb);
}
}
return qb;
}
return null;
}
public static BoolQueryBuilder handleExcludeExpressionNoTitleField(String exclude, String[] fields) {
Vector<Object> v = ExpressionParser.preTreatExpressionNoTitleField(exclude, fields);
if (null != v && !v.isEmpty()) {
BoolQueryBuilder qb = QueryBuilders.boolQuery();
for (Object obj : v) {
if (obj instanceof ExpObject) {
BoolQueryBuilder innerQb = QueryBuilders.boolQuery();
for (String fd : ((ExpObject) obj).getFields()) {
innerQb.should(QueryBuilders.matchPhraseQuery(fd, ((ExpObject) obj).getExpression()).slop(0));
}
qb.mustNot(innerQb);
}
}
return qb;
}
return null;
}
public static BoolQueryBuilder handleTitleExcludeExpression(String exclude) {
if (TStringUtils.isNotEmpty(exclude)) {
// 标点符号修正
exclude = exclude.replaceAll(EXCLUDE_KEYWORDS_SPLIT_CN_SYSBOL,
EXCLUDE_KEYWORDS_SPLIT_SYSBOL);
exclude = "title:(" + exclude + ")";
Vector<Object> v = ExpressionParser.preTreatExpression(exclude, new String[]{ESConstant.TITLE});
BoolQueryBuilder qb = QueryBuilders.boolQuery();
BoolQueryBuilder titleExcludeBuilder = null;
if (null != v && !v.isEmpty()) {
titleExcludeBuilder = QueryBuilders.boolQuery();
for (Object obj : v) {
if (obj instanceof ExpObject) {
QueryBuilder wordQueryBuilder = QueryBuilders.matchPhraseQuery(ESConstant.SEARCH_SCOPE_TYPE_TITLE,
((ExpObject) obj).getExpression()).slop(0);
titleExcludeBuilder.should(wordQueryBuilder);
}
}
qb.mustNot(titleExcludeBuilder);
return qb;
}
}
return null;
}
public static List<SpanNotQueryBuilder> getSpanNotQueryBuilder(String field, String shortStr, String longerStr) {
if (Strings.isNullOrEmpty(shortStr) || Strings.isNullOrEmpty(longerStr)) {
logger.error("empty short term or longer term,return NULL");
return null;
}
int start = longerStr.indexOf(shortStr);
if (start < 0 || start >= longerStr.length()) {
return null;
}
int lenS = shortStr.length();
if ((lenS + start) > longerStr.length()) {
return null;
}
List<String> subs = new ArrayList<String>();
if (start > 0) {
subs.add(longerStr.substring(0, start));
}
subs.add(longerStr.substring(start, start + lenS));
if ((start + lenS) < longerStr.length()) {
subs.add(longerStr.substring(start + lenS));
}
// call API to get include clause
AnalyzeResult anaLyRst = getAnalyzeResult(shortStr);
if (null == anaLyRst) {
return null;
}
List<String> rst = anaLyRst.getList();
// include clause
List<SpanNearQueryBuilder> spans = new ArrayList<>();
SpanNearQueryBuilder span;
if (null != rst && rst.size() > 1) {
for (String tk : rst) {
span = new SpanNearQueryBuilder(
new SpanTermQueryBuilder(field, tk), anaLyRst.getSlop()).inOrder(true);
spans.add(span);
}
} else {
span = new SpanNearQueryBuilder(
new SpanTermQueryBuilder(field, shortStr), anaLyRst.getSlop()).inOrder(true);
spans.add(span);
}
List<String> lst = new LinkedList<>();
int maxDis = 0;
for (String st : subs) {
AnalyzeResult anLyRst = getAnalyzeResult(st);
if (null == anLyRst) {
continue;
}
if (anLyRst.getSlop() > maxDis) {
maxDis = anLyRst.getSlop();
}
List<String> rt = anLyRst.getList();
for (String s : rt) {
if (!listContanins(lst, s)) {
lst.add(s);
}
}
}
List<SpanNotQueryBuilder> sns = new ArrayList<>();
SpanNotQueryBuilder sn = null;
if (!lst.isEmpty()) {
List<SpanNearQueryBuilder> sners = new ArrayList<>();
SpanNearQueryBuilder sner = null;
for (String st0 : lst) {
sner = new SpanNearQueryBuilder(new SpanTermQueryBuilder(field, st0), maxDis).inOrder(true);
sners.add(sner);
}
if (spans.size() < sners.size()) {
for (SpanNearQueryBuilder spanNearQueryBuilder : sners) {
for (SpanNearQueryBuilder span1 : spans) {
sn = new SpanNotQueryBuilder(span1, spanNearQueryBuilder);
sns.add(sn);
}
}
}
}
return sns;
}
public static String getBoolQueryBuilderStr(BoolQueryBuilder qb) {
String st = qb.toString();
char[] cs = st.toCharArray();
StringBuilder sb = new StringBuilder();
for (char c : cs) {
if (c != '\u0000' && c != '\n' && c != '\t') {
sb.append(c);
}
}
String sb1 = sb.toString().replaceAll(" ", " ");
while (sb1.contains(" ")) {
sb1 = sb1.replaceAll(" ", " ");
}
String[] st0 = sb1.toString().split(" ");
sb = new StringBuilder();
int num = 0;
for (String s0 : st0) {
if (!Strings.isNullOrEmpty(s0)) {
while (s0.startsWith(" ") || s0.endsWith(" ")) {
s0 = s0.trim();
}
if (s0.startsWith("\"") || s0.startsWith("\\“")) {
num++;
} else if (s0.endsWith("\"") || s0.endsWith("\\”")) {
num--;
}
if (!"".equals(s0)) {
sb.append(s0);
}
if (num > 0) {
sb.append(" ");
}
}
}
st = "BoolQueryBuilder=" + sb.toString();
return st;
}
public static BoolQueryBuilder getBoolQueryBuilderFromSqlStr(String str) {
BoolQueryBuilder qb = QueryBuilders.boolQuery();
if (Strings.isNullOrEmpty(str) || !str.startsWith("BoolQueryBuilder=")) {
return qb;
}
// str = str.replaceAll("\t", " ");
int start = str.indexOf("{");
String jsonStr = str.substring(start);
JSONObject json = (JSONObject) JSONObject.parse(jsonStr);
if (null == json) {
return qb;
}
// QueryBuilder pqT = getQueryBuilderFromJSON(json);
// if (null != pqT && pqT instanceof BoolQueryBuilder) {
// return (BoolQueryBuilder) pqT;
// }
WrapperQueryBuilder wrapper = QueryBuilders.wrapperQuery(jsonStr);
qb.must(wrapper);
return qb;
}
private static List<QueryBuilder> getQueryBuilderFromJSON(JSONObject json) {
if (null == json) {
return null;
}
JSONObject bool = (JSONObject) json.get("bool");
if (TObjectUtils.isNull(bool)) {
// add term and so on
JSONObject query = (JSONObject) json.get("query_string");
if (null != query) {
String opr = query.getString("default_operator");
Operator op = null;
if (null != opr && opr.equalsIgnoreCase("and")) {
op = Operator.AND;
} else {
op = Operator.OR;
}
List<QueryBuilder> tqbs = new ArrayList<>();
QueryBuilder tqb = new QueryStringQueryBuilder(query.getString("query"))
.defaultField(query.getString("default_field")).defaultOperator(op);
tqbs.add(tqb);
return tqbs;
}
JSONObject term = (JSONObject) json.get("term");
if (null != term) {
List<QueryBuilder> tqbs = new ArrayList<>();
QueryBuilder tqb = null;
if (null != term.get("content")) {
tqb = new TermQueryBuilder("content", term.get("content"));
} else if (null != term.get("title")) {
tqb = new TermQueryBuilder("title", term.get("title"));
} else if (null != term.get("source")) {
tqb = new TermQueryBuilder("source", term.get("source"));
}
tqbs.add(tqb);
return tqbs;
}
// match
JSONObject match = (JSONObject) json.get("match");
if (null != match) {
JSONObject content = match.getJSONObject("content");
if (null != content) {
List<QueryBuilder> list = new ArrayList<>();
QueryBuilder slop = QueryBuilders
.matchPhraseQuery("content", content.getString("query"))
.slop(content.getIntValue("slop"));
list.add(slop);
return list;
} else {
JSONObject title = match.getJSONObject("title");
if (null != title) {
List<QueryBuilder> list = new ArrayList<>();
QueryBuilder slop = QueryBuilders.matchPhraseQuery("title", title.getString("query"))
.slop(title.getIntValue("slop"));
list.add(slop);
return list;
}
}
}
// multi_match
JSONObject multi_match = json.getJSONObject("multi_match");
if (null != multi_match) {
String qry = multi_match.getString("query");
JSONArray fields = (JSONArray) multi_match.get("fields");
String[] arr = new String[fields.size()];//
for (int i = 0; i < fields.size(); i++) {
Object f = fields.get(i);
if (f instanceof String) {
arr[i] = (String) f;
}
}
int slop = multi_match.getIntValue("slop");
// MultiMatchQueryBuilder mt = new MultiMatchQueryBuilder();
List<QueryBuilder> list = new ArrayList<>();
QueryBuilder multiQuery = QueryBuilders.multiMatchQuery(qry, arr).slop(slop);
list.add(multiQuery);
return list;
}
// span_term
JSONObject span_term = (JSONObject) json.getJSONObject("span_term");
if (null != span_term) {
JSONObject content = span_term.getJSONObject("content");
if (null != content) {
List<QueryBuilder> list = new ArrayList<>();
QueryBuilder span_termQ = new SpanTermQueryBuilder("content", content.getString("value"));
// spanNotQ.include(span_termQ);
list.add(span_termQ);
return list;
}
JSONObject title = span_term.getJSONObject("title");
if (null != title) {
List<QueryBuilder> list = new ArrayList<>();
QueryBuilder span_termQ = new SpanTermQueryBuilder("title", title.getString("value"));
list.add(span_termQ);
return list;
}
}
// span_not
JSONObject span_not = (JSONObject) json.get("span_not");
List<QueryBuilder> span_termQ = null;
if (null != span_not) {
List<QueryBuilder> spanNotQs = new ArrayList<>();
SpanNotQueryBuilder spanNotQ;
JSONObject include = span_not.getJSONObject("include");
SpanNearQueryBuilder sner = null;
if (null != include) {
span_termQ = getQueryBuilderFromJSON(include);
JSONObject span_near = include.getJSONObject("span_near");
if (null != span_near && span_near.size() > 0) {
Object array = span_near.get("clauses");
if (null != array) {
if (array instanceof JSONObject) {
List<QueryBuilder> span_termQ2 = getQueryBuilderFromJSON((JSONObject) array);
if (null != span_termQ2 && span_termQ2.size() > 0 &&
span_termQ2 instanceof SpanTermQueryBuilder) {
sner = new SpanNearQueryBuilder((SpanQueryBuilder) span_termQ2.get(0), 0).inOrder(true);
}
} else if (array instanceof JSONArray) {
ListIterator<Object> arr = ((JSONArray) array).listIterator();
while (arr.hasNext()) {
Object obj = arr.next();
if (null == obj)
continue;
JSONObject jsn = (JSONObject) obj;
List<QueryBuilder> span_termQ2 = getQueryBuilderFromJSON((JSONObject) jsn);
if (null != span_termQ2 && span_termQ2.size() > 0 && span_termQ2 instanceof SpanTermQueryBuilder) {
sner = new SpanNearQueryBuilder((SpanQueryBuilder) span_termQ2.get(0), 0)
.inOrder(true);
}
}
}
}
}
}
JSONObject exclude = span_not.getJSONObject("exclude");
SpanNearQueryBuilder snerExclude = null;
if (null != exclude) {
JSONObject span_near = exclude.getJSONObject("span_near");
if (null != span_near) {
Object array = span_near.get("clauses");
if (null != array) {
if (array instanceof JSONObject) {
List<QueryBuilder> span_termQs = getQueryBuilderFromJSON((JSONObject) array);
if (null != span_termQs && span_termQs.size() > 0 && span_termQs instanceof SpanTermQueryBuilder) {
// spanNotQ..exclude((SpanTermQueryBuilder)
// span_termQ);
snerExclude = new SpanNearQueryBuilder((SpanQueryBuilder) span_termQs.get(0), 0).inOrder(true);
}
} else if (array instanceof JSONArray) {
ListIterator<Object> arr = ((JSONArray) array).listIterator();
while (arr.hasNext()) {
Object obj = arr.next();
if (null == obj)
continue;
JSONObject jsn = (JSONObject) obj;
List<QueryBuilder> span_termQs = getQueryBuilderFromJSON((JSONObject) jsn);
if (null != span_termQs && span_termQs.size() > 0 && span_termQs instanceof SpanTermQueryBuilder) {
// spanNotQ.exclude((SpanTermQueryBuilder)
// span_termQ);
snerExclude = new SpanNearQueryBuilder((SpanQueryBuilder) span_termQs.get(0), 0).inOrder(true);
}
}
}
}
}
}
if (span_termQ != null && snerExclude != null) {
spanNotQ = new SpanNotQueryBuilder((SpanQueryBuilder) span_termQ, snerExclude);
spanNotQs.add(spanNotQ);
}
if (sner != null && snerExclude != null) {
spanNotQ = new SpanNotQueryBuilder(sner, snerExclude);
spanNotQs.add(spanNotQ);
}
return spanNotQs;
}
return null;
}
// add child BoolQueryBuilder
List<QueryBuilder> pqbs = new ArrayList<>();
BoolQueryBuilder pqb = new BoolQueryBuilder();
//
Object filter = bool.get("filter");
if (filter instanceof JSONObject) {
JSONObject obj = (JSONObject) filter;
if (null != obj) {
List<QueryBuilder> qb = getQueryBuilderFromJSON(obj);
if (null != qb) {
pqb.filter(qb.get(0));
}
}
} else if (filter instanceof JSONArray) {
JSONArray array = (JSONArray) filter;
JSONObject[] objs = new JSONObject[array.size()];
array.toArray(objs);
for (JSONObject obj : objs) {
List<QueryBuilder> qb = getQueryBuilderFromJSON(obj);
if (null != qb) {
pqb.filter(qb.get(0));
}
}
}
pqbs.add(pqb);
//
Object mst = bool.get("must");
if (mst instanceof JSONObject) {
JSONObject obj = (JSONObject) mst;
if (null != obj) {
List<QueryBuilder> qb = getQueryBuilderFromJSON(obj);
if (null != qb) {
pqb.must(qb.get(0));
}
}
} else if (mst instanceof JSONArray) {
JSONArray array = (JSONArray) mst;
JSONObject[] objs = new JSONObject[array.size()];
array.toArray(objs);
for (JSONObject obj : objs) {
List<QueryBuilder> qb = getQueryBuilderFromJSON(obj);
if (null != qb) {
pqb.must(qb.get(0));
}
}
}
pqbs.add(pqb);
Object mstNot = bool.get("must_not");
if (mstNot instanceof JSONObject) {
JSONObject obj = (JSONObject) mstNot;
if (null != obj) {
List<QueryBuilder> qb = getQueryBuilderFromJSON(obj);
if (null != qb) {
pqb.mustNot(qb.get(0));
}
}
} else if (mstNot instanceof JSONArray) {
JSONArray array = (JSONArray) mstNot;
JSONObject[] objs = new JSONObject[array.size()];
array.toArray(objs);
for (JSONObject obj : objs) {
if (null != obj) {
List<QueryBuilder> qb = getQueryBuilderFromJSON(obj);
if (null != qb) {
pqb.mustNot(qb.get(0));
}
}
}
}
pqbs.add(pqb);
Object should = bool.get("should");
if (should instanceof JSONObject) {
JSONObject obj = (JSONObject) should;
if (null != obj) {
List<QueryBuilder> qb = getQueryBuilderFromJSON(obj);
if (null != qb) {
pqb.mustNot(qb.get(0));
}
}
} else if (should instanceof JSONArray) {
JSONArray array = (JSONArray) should;
JSONObject[] objs = new JSONObject[array.size()];
array.toArray(objs);
for (JSONObject obj : objs) {
if (null != obj) {
List<QueryBuilder> qb = getQueryBuilderFromJSON(obj);
if (null != qb) {
pqb.mustNot(qb.get(0));
}
}
}
}
pqbs.add(pqb);
return pqbs;
}
private static class AnalyzeResult {
private int slop = 0;
private List<String> list;
public AnalyzeResult() {
}
public AnalyzeResult(int slop, List<String> lst) {
this.slop = slop;
this.list = lst;
}
public int getSlop() {
return slop;
}
public void setSlop(int slop) {
this.slop = slop;
}
public List<String> getList() {
return list;
}
public void setList(List<String> list) {
this.list = list;
}
}
private static AnalyzeResult getAnalyzeResult(String termStr) {
if (Strings.isNullOrEmpty(termStr)) {
return null;
}
AnalyzeResult rst = new AnalyzeResult();
List<String> list = new ArrayList<>();
if (termStr.trim().length() == 1) {
list.add(termStr);
rst.setList(list);
rst.setSlop(0);
return rst;
}
ESServerUtils esServerUtil = new ESServerUtils();
// todo
if (null == esServerUtil.getClientByClusterId(1L)) {
esServerUtil.initEsServer();
}
// TODO
IndicesAdminClient adminClient = esServerUtil.getClientByClusterId(1L).admin().indices();
// ESHandler esHandler = ESHandler.getESHandler("bfd_mediaforce", "192.168.189.84:9300");
// IndicesAdminClient adminClient = esHandler.getClient().admin().indices();
//
AnalyzeResponse analyzeResponse = adminClient.prepareAnalyze(termStr).setAnalyzer("ik").execute().actionGet();
List<AnalyzeToken> lst = analyzeResponse.getTokens();
// 分词失败
if (null == lst || lst.isEmpty()) {
list.add(termStr);
rst.setList(list);
rst.setSlop(0);
return rst;
} else {
HashSet<AnalyzeToken> exclude = new HashSet<>();
for (AnalyzeToken tk : lst) {
for (AnalyzeToken at : lst) {
if (!tk.getTerm().equals(at.getTerm()) && at.getTerm().contains(tk.getTerm())) {
exclude.add(tk);
}
}
}
List<AnalyzeToken> waitLst = new ArrayList<>();
for (AnalyzeToken tk : lst) {
if (!exclude.contains(tk)) {
waitLst.add(tk);
}
}
if (waitLst.size() == 1) {
list.add(waitLst.get(0).getTerm());
rst.setList(list);
rst.setSlop(0);
return rst;
}
// multi-term
int maxDistance = 0;
for (int i = 0; i < waitLst.size(); i++) {
AnalyzeToken tk = waitLst.get(i);
list.add(tk.getTerm());
if (i > 0) {
AnalyzeToken prv = waitLst.get(i - 1);
int dis = tk.getPosition() - prv.getPosition() - 1;
if (dis > maxDistance) {
maxDistance = dis;
}
}
}
rst.setList(list);
rst.setSlop(maxDistance);
}
// list = dropShorterStr(list);
return rst;
}
private static List<String> dropShorterStr(List<String> lst) {
if (null == lst || lst.isEmpty()) {
return null;
}
HashSet<String> set = new HashSet<>();// 要排除的term
for (String st : lst) {
// 删除较短的分词term
for (String s : lst) {
if (!st.equals(s) && s.contains(st)) {
// lst.remove(st);
set.add(st);
}
}
}
List<String> rst = new ArrayList<>();
for (String st0 : lst) {
if (!set.contains(st0)) {
rst.add(st0);
}
}
return rst;
}
private static boolean listContanins(List<String> lst, String str) {
if (null == lst || lst.isEmpty()) {
return false;
}
for (String st : lst) {
if (st.equals(str)) {
return true;
}
}
return false;
}
}

3
cl_search_api/src/main/java/com/bfd/mf/common/web/vo/params/QueryRequest.java

@ -19,7 +19,6 @@ package com.bfd.mf.common.web.vo.params;
import io.swagger.annotations.ApiModel;
import io.swagger.annotations.ApiModelProperty;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
@ -84,7 +83,7 @@ public class QueryRequest implements Serializable {
// 是否导出
private String scrollId;
// 崔老师项目增加的字段
private String valueLabel;
private String categoryLabel;

37
cl_search_api/src/main/java/com/bfd/mf/controller/SearchAnalysisController.java

@ -3,35 +3,23 @@ package com.bfd.mf.controller;
import com.alibaba.fastjson.JSONObject;
import com.bfd.mf.common.web.vo.params.QueryRequest;
import com.bfd.mf.service.SearchAnalysisService;
import com.bfd.mf.service.SearchKeywordsCouldService;
import com.bfd.mf.common.util.analysis.DataAnalysisUtils;
import com.bfd.mf.common.util.enums.RTCodeEnum;
import com.bfd.mf.common.util.slice.SliceScrollUtil;
import com.bfd.mf.common.web.component.wrapper.ResponseWrapper;
import com.bfd.nlp.common.util.constants.MediaTypes;
import io.swagger.annotations.Api;
import io.swagger.annotations.ApiOperation;
import org.apache.catalina.servlet4preview.http.HttpServletRequest;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.WebDataBinder;
import org.springframework.web.bind.annotation.*;
import javax.naming.ldap.PagedResultsControl;
import javax.validation.ConstraintViolation;
import javax.validation.Validation;
import javax.validation.Validator;
import javax.validation.ValidatorFactory;
import java.util.List;
import java.util.Set;
@Controller
@RequestMapping("/analysis")
@Api(value="数据分析结果&词云查询")
public class SearchAnalysisController {
private static final Logger logger = LoggerFactory.getLogger(SearchAnalysisController.class);
private static final Logger logger = LoggerFactory.getLogger(SearchAnalysisController.class);
@Autowired
private SearchAnalysisService searchAnalysisService;
@ -43,31 +31,14 @@ public class SearchAnalysisController {
@ApiOperation(value = "查询总体分析结果")
@RequestMapping(value = "/trend/lineAll", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8)
public JSONObject queryAll(@RequestBody QueryRequest queryRequest) {
logger.info("[queryAll] partial / Params: {}", JSONObject.toJSONString(queryRequest));
JSONObject jsonObject;
logger.info("[queryAll] partial / Params: {}" ,JSONObject.toJSONString(queryRequest));
try {
jsonObject = searchAnalysisService.getAnalysisResponse(queryRequest);
//JSONObject cloudCounts = searchKeywordsCouldService.dataAnalysisCloud(queryRequest);
// jsonObject.put("cloudCounts",cloudCounts);
JSONObject jsonObject = searchAnalysisService.getAnalysisResponse(queryRequest);
return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, jsonObject);
} catch (Exception e) {
logger.error("[queryAll error = ]", e);
return ResponseWrapper.buildResponse(RTCodeEnum.C_FAIL);
}
return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, jsonObject);
}
// @ResponseBody
// @ApiOperation(value = "查询词云结果")
// @RequestMapping(value = "/cloudCrawl", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8)
// public JSONObject queryWordCloudCountCrawl(@RequestBody QueryRequest queryRequest) {
// logger.info("[queryWordCloudCountCrawl] partial / Params: {}", JSONObject.toJSONString(queryRequest));
// JSONObject cloudCounts;
// try {
// cloudCounts = searchKeywordsCouldService.dataAnalysisCloud(queryRequest);
// } catch (Exception e) {
// logger.error("[queryWordCloudCountCrawl] Failed,The error message is :{}", e);
// return ResponseWrapper.buildResponse(RTCodeEnum.C_SERVICE_NOT_AVAILABLE, "Query failed");
// }
// return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, cloudCounts);
// }
}

48
cl_search_api/src/main/java/com/bfd/mf/controller/SearchAuthorController.java

@ -1,5 +1,6 @@
package com.bfd.mf.controller;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import com.bfd.mf.common.util.constants.ESConstant;
import com.bfd.mf.common.util.enums.RTCodeEnum;
@ -15,9 +16,6 @@ import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.*;
import java.util.ArrayList;
import java.util.List;
@Controller
@RequestMapping("/author")
@ResponseBody
@ -36,9 +34,9 @@ public class SearchAuthorController {
@PostMapping(value = "/subject/queryAuthors", consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8)
@ResponseBody
public JSONObject queryAuthors(@RequestBody QueryRequest queryRequest) {
logger.info("[queryAuthors] partial / Params: {}", JSONObject.toJSONString(queryRequest));
JSONObject result;
logger.info("[queryAuthors] partial / Params: {}" ,JSON.toJSONString(queryRequest));
try {
JSONObject result;
String scorllId = queryRequest.getScrollId();
if(null != scorllId){ // 这个是导出要用的
result = searchDataService.exportDataInSubjectIndex(queryRequest);
@ -58,32 +56,40 @@ public class SearchAuthorController {
if(page >0 && queryRequest.getPage() > page){
return ResponseWrapper.buildResponse(RTCodeEnum.C_SUBJECT_GRAMMAR_ERROR, "总数和分页不匹配");
}
return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result);
} catch (Exception e) {
logger.error("[queryAuthors] Failed,The error message is :{}", e);
return ResponseWrapper.buildResponse(RTCodeEnum.C_SERVICE_NOT_AVAILABLE, "Query failed");
}
return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result);
}
/**
* 查询一个用户信息列表
* 查询一个用户信息列表 POST 请求
*/
@ApiOperation(value = "查询一个用户信息列表", httpMethod = "POST")
@PostMapping(value = "/subject/queryOneAuthor", consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8)
@ResponseBody
public JSONObject queryAuthorByAuthorId(@RequestBody QueryRequest queryRequest) {
logger.info("[queryAuthorByAuthorId] partial / Params: {}", JSONObject.toJSONString(queryRequest));
JSONObject result;
logger.info("[queryAuthorByAuthorId] partial / Params: {}" ,JSONObject.toJSONString(queryRequest));
try {
result = searchAuthorService.queryAuthorByAuthorId(queryRequest);
JSONObject result = searchAuthorService.queryAuthorByAuthorId(queryRequest);
return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result);
} catch (Exception e) {
logger.error("[queryAuthorByAuthorId] Failed,The error message is :{}", e);
return ResponseWrapper.buildResponse(RTCodeEnum.C_SERVICE_NOT_AVAILABLE, "Query failed");
}
return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result);
}
/**
* 查询一个用户信息 GET 请求
* @param subjectId
* @param authorId
* @param siteId
* @return
*/
@RequestMapping(value="/subject/queryOneAuthor",method= RequestMethod.GET)
@ResponseBody
public JSONObject queryAuthor(String subjectId,String authorId,String siteId) {
@ -91,32 +97,32 @@ public class SearchAuthorController {
queryRequest.setSubjectId(subjectId);
queryRequest.setAuthorId(authorId);
queryRequest.setSiteId(siteId);
logger.info("[queryAuthorByAuthorId] partial / Params: {}", JSONObject.toJSONString(queryRequest));
JSONObject result;
logger.info("[queryAuthor] partial / Params: {}" + JSONObject.toJSONString(queryRequest));
try {
result = searchAuthorService.queryAuthorByAuthorId(queryRequest);
JSONObject result = searchAuthorService.queryAuthorByAuthorId(queryRequest);
return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result);
} catch (Exception e) {
logger.error("[queryAuthorByAuthorId] Failed,The error message is :{}", e);
logger.error("[queryAuthor] Failed,The error message is :{}", e);
return ResponseWrapper.buildResponse(RTCodeEnum.C_SERVICE_NOT_AVAILABLE, "Query failed");
}
return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result);
}
/**
* 查询某个用户发表的主贴列表
* 查询某个用户发表的主贴列表 这个方法不用了现在没有这样查询的页面了
*/
@ApiOperation(value = "查询一个用户发表的主贴列表", httpMethod = "POST")
@PostMapping(value = "/subject/queryOneAuthorContents", consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8)
@ResponseBody
public JSONObject queryContentsByAuthorId(@RequestBody QueryRequest queryRequest) {
logger.info("[queryContentsByAuthorId] partial / Params: {}", JSONObject.toJSONString(queryRequest));
JSONObject result;
logger.info("[queryContentsByAuthorId] partial / Params: {}" , JSONObject.toJSONString(queryRequest));
try {
result = searchAuthorService.queryContentsByAuthorId(queryRequest);
JSONObject result = searchAuthorService.queryContentsByAuthorId(queryRequest);
return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result);
} catch (Exception e) {
logger.error("[queryContentsByAuthorId] Failed,The error message is :{}", e);
return ResponseWrapper.buildResponse(RTCodeEnum.C_SERVICE_NOT_AVAILABLE, "Query failed");
}
return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result);
}
}

46
cl_search_api/src/main/java/com/bfd/mf/controller/SearchDataController.java

@ -2,17 +2,14 @@ package com.bfd.mf.controller;
import com.alibaba.fastjson.JSONObject;
import com.bfd.mf.common.util.constants.ESConstant;
import com.bfd.mf.common.web.repository.mysql.base.SiteRepository;
import com.bfd.mf.common.web.vo.params.QueryRequest;
import com.bfd.mf.service.SearchDataService;
import com.bfd.mf.common.util.enums.RTCodeEnum;
import com.bfd.mf.common.web.component.wrapper.ResponseWrapper;
import com.bfd.mf.service.UpdateService;
import com.bfd.nlp.common.util.encryption.MD5;
import io.swagger.annotations.Api;
import io.swagger.annotations.ApiImplicitParam;
import io.swagger.annotations.ApiImplicitParams;
import org.apache.tomcat.util.security.MD5Encoder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
@ -22,9 +19,6 @@ import io.swagger.annotations.ApiOperation;
import com.bfd.nlp.common.util.constants.MediaTypes;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@Controller
@RequestMapping("/crawl")
@ -36,7 +30,6 @@ public class SearchDataController {
@Autowired
private UpdateService updateService;
/**
* 查询数据列表
*/
@ -45,8 +38,8 @@ public class SearchDataController {
@ResponseBody
public JSONObject queryDataList(@RequestBody QueryRequest queryRequest) {
logger.info("[queryDataList] partial / Params: {}", JSONObject.toJSONString(queryRequest));
JSONObject result = new JSONObject();
try {
JSONObject result = new JSONObject();
long start = System.currentTimeMillis();
String scorllId = queryRequest.getScrollId();
String subjectId = queryRequest.getSubjectId();
@ -88,11 +81,12 @@ public class SearchDataController {
}
long end = System.currentTimeMillis();
logger.info("接口查询时长:statr:"+ start +" ; end:"+end + " ; time = " + (end - start) + " ; count = "+result.get(ESConstant.ALLDOCNUMBER));
return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result);
} catch (Exception e) {
logger.error("[queryData] Failed,The error message is :{}", e);
return ResponseWrapper.buildResponse(RTCodeEnum.C_SERVICE_NOT_AVAILABLE, "Query failed");
}
return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result);
}
/**
@ -112,14 +106,14 @@ public class SearchDataController {
queryRequest.setDocId(docId);
queryRequest.setSiteId(siteId);
logger.info("[getInfo] partial / Params: {}", JSONObject.toJSONString(queryRequest));
JSONObject result;
try {
result = searchDataService.queryOneDataByDocId(queryRequest);
JSONObject result = searchDataService.queryOneDataByDocId(queryRequest);
return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result);
} catch (Exception e) {
logger.error("[getInfo] Failed,The error message is :{}", e);
return ResponseWrapper.buildResponse(RTCodeEnum.C_SERVICE_NOT_AVAILABLE, "Query failed");
}
return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result);
}
@ -131,14 +125,13 @@ public class SearchDataController {
@RequestMapping(value = "/subject/getInfoByDocId", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8)
public JSONObject getInfoByDocId(@RequestBody QueryRequest queryRequest) {
logger.info("[getInfoByDocId] partial / Params: {}", JSONObject.toJSONString(queryRequest));
JSONObject result;
try {
result = searchDataService.queryOneDataByDocId(queryRequest);
JSONObject result = searchDataService.queryOneDataByDocId(queryRequest);
return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result);
} catch (Exception e) {
logger.error("[getInfoByDocId] Failed,The error message is :{}", e);
return ResponseWrapper.buildResponse(RTCodeEnum.C_SERVICE_NOT_AVAILABLE, "Query failed");
}
return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result);
}
@ -150,14 +143,14 @@ public class SearchDataController {
@RequestMapping(value = "/getCommentsByDocId", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8)
public JSONObject getCommentsByDocId(@RequestBody QueryRequest queryRequest) {
logger.info("[getCommentsByDocId] partial / Params: {}", JSONObject.toJSONString(queryRequest));
JSONObject result;
try {
result = searchDataService.queryComments(queryRequest);
JSONObject result = searchDataService.queryComments(queryRequest);
return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result);
} catch (Exception e) {
logger.error("[getCommentsByDocId] Failed,The error message is :{}", e);
return ResponseWrapper.buildResponse(RTCodeEnum.C_SERVICE_NOT_AVAILABLE, "Query failed");
}
return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result);
}
@ -167,30 +160,35 @@ public class SearchDataController {
@ResponseBody
public JSONObject queryDataCounts(@RequestBody QueryRequest queryRequest) {
logger.info("[queryDataCounts] partial / Params: {}", JSONObject.toJSONString(queryRequest));
JSONObject result = new JSONObject();
try {
result = searchDataService.queryDataCountsInOneIndex(queryRequest);
JSONObject result = searchDataService.queryDataCountsInOneIndex(queryRequest);
return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result);
} catch (Exception e) {
logger.error("[queryDataCounts] Failed,The error message is :{}", e);
return ResponseWrapper.buildResponse(RTCodeEnum.C_SERVICE_NOT_AVAILABLE, "Query failed");
}
return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result);
}
/**
* 崔老师版本使修改标签调用的接口其他版本不调用该接口
* @param queryRequest
* @return
*/
@ApiOperation(value = "修改标签")
@RequestMapping(value = "/update/updateByDocId", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8)
@ResponseBody
public JSONObject updateLabel(@RequestBody QueryRequest queryRequest) {
logger.info("[updateLabel] partial / Params: {}", JSONObject.toJSONString(queryRequest));
JSONObject result = new JSONObject();
try {
result = updateService.updateByDocId(queryRequest);
JSONObject result = updateService.updateByDocId(queryRequest);
return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result);
} catch (Exception e) {
logger.error("[updateLabel] Failed,The error message is :{}", e);
return ResponseWrapper.buildResponse(RTCodeEnum.C_SERVICE_NOT_AVAILABLE, "Query failed");
}
return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result);
}
}

123
cl_search_api/src/main/java/com/bfd/mf/controller/UploadExcelController.java

@ -2,6 +2,9 @@ package com.bfd.mf.controller;
import com.alibaba.fastjson.JSONObject;
import com.bfd.mf.common.util.ZipUtils;
import com.bfd.mf.common.util.constants.ESConstant;
import com.bfd.mf.common.util.enums.RTCodeEnum;
import com.bfd.mf.common.web.component.wrapper.ResponseWrapper;
import com.bfd.mf.config.BFDApiConfig;
import com.bfd.mf.service.UploadExcelService;
import io.swagger.annotations.ApiOperation;
@ -28,7 +31,6 @@ public class UploadExcelController {
@Autowired
private UploadExcelService uploadExcelService;
/**
* 上传Excel
*/
@ -38,22 +40,22 @@ public class UploadExcelController {
public JSONObject insertExcelTask(@RequestParam("file") MultipartFile file,
@RequestParam("userId") String userId) {
logger.info("[insertExcelTask] partial / Params: {}", userId);
JSONObject jsonObject =new JSONObject();
try {
JSONObject jsonObject = new JSONObject();
Map<String,Object> userinfo = new HashMap<>();
userinfo.put("user","user");
userinfo.put("userId",userId);
userinfo.put(ESConstant.USER , ESConstant.USER );
userinfo.put(ESConstant.USERID , userId);
// MultipartFile file = request.getFile("file");
// 先查询一下Excel 名是不是已经存在要是已经存在的话就提示让修改
String excelName = file.getOriginalFilename();
if(excelName.contains("xlsx")){ // 传的是Excel
jsonObject = aboutExcel(excelName,file,userinfo,jsonObject);
jsonObject = aboutExcel(excelName,file,userinfo);
}
return jsonObject;
} catch (Exception e) {
e.printStackTrace();
jsonObject.put("message","lalalalaal 报错了");
return ResponseWrapper.buildResponse(RTCodeEnum.C_UPLOAD_FAIL, "Upload failed");
}
return jsonObject;
}
// private JSONObject aboutTxt(String excelName, MultipartFile file, Map<String, Object> userinfo, JSONObject jsonObject) {
@ -76,16 +78,16 @@ public class UploadExcelController {
// return jsonObject;
// }
private JSONObject aboutExcel(String excelName, MultipartFile file, Map<String, Object> userinfo, JSONObject jsonObject) {
private JSONObject aboutExcel(String excelName, MultipartFile file, Map<String, Object> userinfo) {
try{
boolean isExist = uploadExcelService.queryByExcelName(excelName);
boolean isTaskExist = uploadExcelService.queryByStatus();
if(isExist){
jsonObject.put("code", 202); //同名Excel已存在请改名后重新上传谢谢
return ResponseWrapper.buildResponse(RTCodeEnum.C_UPLOAD_EXIST, "Upload Exist");
}else if(!isTaskExist){
jsonObject.put("code", 203); //当前正在运行任务数超过5个请稍后再尝试上传谢谢
return ResponseWrapper.buildResponse(RTCodeEnum.C_UPLOAD_BUSY, "Upload Busy");
}else{
String filePath = "/opt/nfsdata/excelTask/";
String filePath = bfdApiConfig.getUploadOLYExcelPath();
boolean flag = uploadExcelService.uploadExcel(file, filePath);
if (flag) { // 上传成功后 cl_parse_excel_task 表中添加对应的记录每个表格一条记录
uploadExcelService.insertParseExcelTask(excelName, userinfo);
@ -96,16 +98,14 @@ public class UploadExcelController {
if(taskUploadSuccess) {
// 既然插入成功那就获取对应的 专题ID 插入 cl_subject_count;
List<BigInteger> subjectIds = uploadExcelService.getSubjectIdsByExcelName(excelName);
jsonObject.put("code", 200); //恭喜你上传成功啦~
}else{
jsonObject.put("code", 201); //Excel解析失败请检查Excel
return ResponseWrapper.buildResponse(RTCodeEnum.C_UPLOAD_PARSE_FAIL, "Upload Parse Fail");
}
return ResponseWrapper.buildResponse(RTCodeEnum.C_UPLOAD_OK, "Upload Success");
}
}catch (Exception e){
e.printStackTrace();
return ResponseWrapper.buildResponse(RTCodeEnum.C_UPLOAD_FAIL, "Upload failed");
}
return jsonObject;
}
@ -121,12 +121,8 @@ public class UploadExcelController {
@RequestParam("subjectId") String subjectId,
@RequestParam("fileRemak") String fileRemak) {
logger.info("[insertExcelTask] partial / Params: {}", subjectId+" , "+fileRemak);
JSONObject jsonObject =new JSONObject();
String zipPath = bfdApiConfig.getUploadZipPath();
try {
Map<String,Object> userinfo = new HashMap<>();
userinfo.put("user",user);
userinfo.put("userId",userId);
String zipName = file.getOriginalFilename();
// 将文件上传到指定路径下并返回是否上传成功的状态位
boolean flag = uploadExcelService.uploadExcel(file, zipPath);
@ -134,97 +130,32 @@ public class UploadExcelController {
if (flag) {
long fileSize = ZipUtils.getFileSize(zipPath+zipName);
if(fileSize < 1024){
jsonObject.put("code", 205);
jsonObject.put("desc", "上传的文件为空,请核查文件。");
return jsonObject;
return ResponseWrapper.buildResponse(RTCodeEnum.C_UPLOAD_EMPTY, "File Empty");
}
// // 解压zip ,校验数据非excel 非txt 的都需要提示
Map<String, List<String>> fileNameMap = ZipUtils.unZip(new File(zipPath+zipName),zipPath+zipFileName);
if(fileNameMap.containsKey("fileName")) {
String fileName = fileNameMap.get("fileName").get(0);
if(fileNameMap.containsKey(ESConstant.FILENAME)) {
String fileName = fileNameMap.get(ESConstant.FILENAME).get(0);
logger.info("The FileName :" + fileName);
fileNameMap.remove(ESConstant.FILENAME);
// 获取一下文件的大小
if (!fileName.contains(".xlsx") && !fileName.contains(".txt")) {
jsonObject.put("code", 204);
jsonObject.put("desc", "请上传 Excel 或 txt 文件");
return ResponseWrapper.buildResponse(RTCodeEnum.C_UPLOAD_ERROR, "File Error");
} else { // 需要在 cl_task 表中添加一个任务
boolean insertSuccess = uploadExcelService.insertTask(subjectId, user, userId, fileRemak, zipName);
jsonObject.put("code", 200);
jsonObject.put("desc","OK");
return ResponseWrapper.buildResponse(RTCodeEnum.C_UPLOAD_OK, "Upload Success");
}
fileNameMap.remove("fileName");
}else{
jsonObject.put("code", 204);
jsonObject.put("desc", "请上传 Excel 或 txt 文件");
return ResponseWrapper.buildResponse(RTCodeEnum.C_UPLOAD_ERROR, "File Error");
}
}else{
jsonObject.put("code",206);
jsonObject.put("desc","上传失败");
return ResponseWrapper.buildResponse(RTCodeEnum.C_UPLOAD_FAIL, "Upload failed");
}
} catch (Exception e) {
e.printStackTrace();
jsonObject.put("code",206);
jsonObject.put("desc","上传失败");
return ResponseWrapper.buildResponse(RTCodeEnum.C_UPLOAD_FAIL, "Upload failed");
}
return jsonObject;
}
}
// /**
// * 上传Excel相关
// */
// private boolean queryByExcelName(String excelName) {
// try{
// String newExcelName = excelName.replace(".xlsx","");
// boolean success = uploadExcelService.isTaskSucess(newExcelName);
// if(success){
// return true;
// }
// return false;
// }catch (Exception e){
// e.printStackTrace();
// return false;
// }
// }
//
// /**
// * 上传Excel相关
// */
// private boolean queryByStatus() {
// try{
// boolean isExist = uploadExcelService.isTaskExist();
// if(isExist){ // 如果任务为空就说明可以添加新的任务进来如果不为空就不要添加新的任务进来啦~
// return true;
// }else{
// return false;
// }
// }catch (Exception e){
// e.printStackTrace();
// return false;
// }
// }
// /**
// * 上传Excel相关
// */
// private boolean uploadExcel(MultipartFile file,String filePath) {
// try{
// InputStream inputStream = file.getInputStream();
// byte[] buffer = new byte[inputStream.available()];
// inputStream.read(buffer);
// File targetFile = new File(filePath+file.getOriginalFilename());
// OutputStream outStream = new FileOutputStream(targetFile);
// outStream.write(buffer);
// inputStream.close();
// outStream.close();
// return true;
// }catch (Exception e){
// e.printStackTrace();
// return false;
// }
// }
}

31
cl_search_api/src/main/java/com/bfd/mf/service/SearchAnalysisService.java

@ -1,9 +1,6 @@
package com.bfd.mf.service;
import com.alibaba.fastjson.JSONObject;
import com.bfd.mf.common.service.es.ClusterService;
import com.bfd.mf.common.service.es.EsQueryServiceForSQMini;
import com.bfd.mf.common.service.es.SubjectQueryDataService;
import com.bfd.mf.common.util.analysis.DataAnalysisUtils;
import com.bfd.mf.common.util.constants.ConditionCommon;
import com.bfd.mf.common.util.constants.ESConstant;
@ -18,7 +15,6 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import springfox.documentation.spring.web.json.Json;
import java.util.*;
import java.util.stream.Collectors;
@ -33,12 +29,13 @@ public class SearchAnalysisService {
private SliceScrollUtil sliceScrollUtil;
@Autowired
private SearchKeywordsCouldService searchKeywordsCouldService;
@Autowired
private ClusterService clusterService;
@Autowired
private SubjectQueryDataService subjectQueryDataService;
@Autowired
private EsQueryServiceForSQMini esQueryServiceForSQMini;
// @Autowired
// private ClusterService clusterService;
// @Autowired
// private SubjectQueryDataService subjectQueryDataService;
// @Autowired
// private EsQueryServiceForSQMini esQueryServiceForSQMini;
public JSONObject getAnalysisResponse(QueryRequest queryRequest) {
JSONObject jsonObject = new JSONObject();
@ -94,7 +91,7 @@ public class SearchAnalysisService {
Map<String, List<DataPieCount>> channelMaps = new HashMap<>();
Map<String, List<DataPieCount>> valueMaps = new HashMap<>();
Map<String, List<DataPieCount>> categoryMaps = new HashMap<>();
logger.info("dataAnalysisChannelCounts : totalNumber = " + esMonitorEntityList.size());
logger.info("dataAnalysisChannelCounts : totalNumber :{} " , esMonitorEntityList.size());
Map<String,String> channelMap = new HashMap<>();
Map<String,String> valueLabelMap = new HashMap<>();
Map<String,String> categoryLabelMap = new HashMap<>();
@ -175,7 +172,7 @@ public class SearchAnalysisService {
private JSONObject dataAnalysisTrendByDayQueryTimes(QueryRequest queryRequest, List<ESMonitorEntity> esMonitorEntityList) {
JSONObject jsonResult = new JSONObject();
logger.info("dataAnalysisTrendByDayQueryTimes : totalNumber = " + esMonitorEntityList.size());
logger.info("dataAnalysisTrendByDayQueryTimes : totalNumber :{} " , esMonitorEntityList.size());
jsonResult.put("totalNumber", esMonitorEntityList.size());
// Map<String, String> emotionEngMaps = MonitorConstant.emotionEngByThresholdMaps();
try {
@ -184,7 +181,7 @@ public class SearchAnalysisService {
// 发布时间相同的
return o1.getPubTime().compareTo(o2.getPubTime()) == 0 ? o1.getCrawlTime().compareTo(o2.getCrawlTime()) : o1.getPubTime().compareTo(o2.getPubTime());
});
logger.info("总数据条数: "+esMonitorEntityList.size());
logger.info("总数据条数:{}",esMonitorEntityList.size());
Long startTime = queryRequest.getStartTime();
Long endTime = queryRequest.getEndTime();
Long time_difference = 0L;
@ -208,8 +205,8 @@ public class SearchAnalysisService {
Map<String, List<DataCount>> yearChannelMaps = resultMap.get("yearChannelMap");
Map<String, List<DataCount>> dayEmoMaps = resultMap.get("dayEmoMap");
Map<String, List<DataCount>> yearEmoMaps = resultMap.get("yearEmoMap");
System.out.println("dayChannelMaps"+JSONObject.toJSONString(dayChannelMaps));
System.out.println("yearChannelMaps"+JSONObject.toJSONString(yearChannelMaps));
// System.out.println("dayChannelMaps"+JSONObject.toJSONString(dayChannelMaps));
// System.out.println("yearChannelMaps"+JSONObject.toJSONString(yearChannelMaps));
List<String> dayList = new ArrayList<>();
List<String> yearList = new ArrayList<>();
for (Long l:timeList) {
@ -226,11 +223,11 @@ public class SearchAnalysisService {
List<DataCount> channelTrendList = new ArrayList<>();
List<DataCount> emoTrendList = new ArrayList<>();
if(time_difference > ONEYEAR){
System.out.println("按年拆");
// System.out.println("按年拆");
channelTrendList = parseChannleMapsResult(docTypeMap, yearChannelMaps, newYearList);
emoTrendList = parseEmoMapsResult(sentimentMap, yearEmoMaps, newYearList);
}else{
System.out.println(" 按天拆");
// System.out.println(" 按天拆");
channelTrendList = parseChannleMapsResult(docTypeMap, dayChannelMaps, newDayList);
emoTrendList = parseEmoMapsResult(sentimentMap, dayEmoMaps, newDayList);
}

106
cl_search_api/src/main/java/com/bfd/mf/service/SearchAuthorService.java

@ -26,7 +26,6 @@ import java.util.*;
@Service
public class SearchAuthorService extends CrudService<SentimentModify, SentimentRepository> implements Serializable {
private static Logger logger = LoggerFactory.getLogger(SearchAuthorService.class);
private static SimpleDateFormat format = new SimpleDateFormat("YYYY-MM-DD HH:mm:SS");
@Autowired
private ClusterService clusterService;
@Autowired
@ -57,7 +56,7 @@ public class SearchAuthorService extends CrudService<SentimentModify, SentimentR
String[] indexNames = getIndexNames(queryRequest);
Long totalCount = esQueryAuthorCountService.queryAuthorCount(indexNames, queryRequest);
List<JSONObject>dataList = esQueryAuthorService.queryAuthorListByKeyword(indexNames,queryRequest);
logger.info("[SearchAuthorService] queryAuthorList: TotalCount = " + totalCount);
logger.info("[SearchAuthorService] queryAuthorList: TotalCount :{} " , totalCount);
jsonObject.put(ESConstant.ALLDOCNUMBER, totalCount);
jsonObject.put(ESConstant.MONITORLISTS, dataList);
}catch (Exception e){
@ -142,13 +141,12 @@ public class SearchAuthorService extends CrudService<SentimentModify, SentimentR
List<ESMonitorEntity> esMonitorEntityLists = new ArrayList<>();
parseQueryResult(dataList, esMonitorEntityLists, indexName);
Long totalCount = esQueryAuthorCountService.queryContentsCountByAuthorId(indexNames, queryRequest);
logger.info("[SearchAuthorService] queryContentsByAuthorId: TotalCount = " + totalCount);
jsonObject.put("foldDocAllNumber", totalCount);
jsonObject.put("monitorLists", esMonitorEntityLists);
logger.info("[SearchAuthorService] queryContentsByAuthorId: TotalCount :{} " ,totalCount);
jsonObject.put(ESConstant.ALLDOCNUMBER, totalCount);
jsonObject.put(ESConstant.MONITORLISTS, esMonitorEntityLists);
}catch (Exception e){
e.printStackTrace();
logger.error("[SearchAuthorService] queryContentsByAuthorId error !");
jsonObject.put("foldDocAllNumber",0);
jsonObject.put(ESConstant.ALLDOCNUMBER,0);
jsonObject.put(ESConstant.MONITORLISTS, new ArrayList<>());
}
return jsonObject;
@ -159,8 +157,8 @@ public class SearchAuthorService extends CrudService<SentimentModify, SentimentR
Map<String,String> siteIdsMap = new HashMap<>();
Map<String,String> siteIconMap = new HashMap<>();
for (Map<String,Object> map: site) {
siteIdsMap.put(map.get("cid").toString().toLowerCase(),map.get("site_id").toString());
siteIconMap.put(map.get("cid").toString().toLowerCase(),map.get("site_icon").toString());
siteIdsMap.put(map.get(ESConstant.CID).toString().toLowerCase(),map.get("site_id").toString());
siteIconMap.put(map.get(ESConstant.CID).toString().toLowerCase(),map.get("site_icon").toString());
}
if(null != dataList && dataList.size() > 0) {
for (JSONObject json : dataList) {
@ -235,8 +233,8 @@ public class SearchAuthorService extends CrudService<SentimentModify, SentimentR
Integer primary = (Integer) sourceAsMap.get(ESConstant.PRIMARY);
JSONObject newjsonObject = getVideoPathList(jsonObject);
List<Map<String,String>> videoList = (List<Map<String, String>>) newjsonObject.get("videoList");
//JSONObject newjsonObject = getVideoPathList(jsonObject);
//List<Map<String,String>> videoList = (List<Map<String, String>>) newjsonObject.get(ESConstant.VIDEOLIST);
ESMonitorEntity esMonitorEntity = new ESMonitorEntity();
try {
@ -280,52 +278,54 @@ public class SearchAuthorService extends CrudService<SentimentModify, SentimentR
return esMonitorEntity;
}
private JSONObject getVideoPathList(JSONObject jsonObject) {
try {
if (jsonObject.containsKey("isDownload") && jsonObject.get("isDownload").toString().equals("true")) {
String videoTime = jsonObject.getString(ESConstant.VIDEOTIME);
String resolution = jsonObject.getString(ESConstant.RESOLUTION);
List videoPath = JSONObject.parseArray(jsonObject.get(ESConstant.VIDEOPATH).toString());
String videoUrl = "";
String size = "";
List<Map<String, String>> videoList = new ArrayList<>();
if(videoPath.size() > 0) {
videoUrl = videoPath.get(0).toString();
if (jsonObject.get(ESConstant.VIDEOPATHSIZE).toString().contains("http")) {
Map<String, String> videoSizeMap = (Map<String, String>) JSONUtils.parse((String) jsonObject.get(ESConstant.VIDEOPATHSIZE));
if (videoSizeMap.containsKey(videoUrl)) {
size = videoSizeMap.get(videoUrl);
}
}
Map<String, String> videoMap = new HashMap<>();
videoMap.put(ESConstant.URL, videoUrl);
videoMap.put("size", size);
videoMap.put(ESConstant.RESOLUTION, resolution);
videoMap.put(ESConstant.VIDEOTIME, videoTime);
videoList.add(videoMap);
}
jsonObject.put("videoList", videoList);
}
}catch (Exception e){
e.printStackTrace();
}
jsonObject.remove(ESConstant.VIDEOPATHSIZE);
jsonObject.remove(ESConstant.RESOLUTION);
jsonObject.remove(ESConstant.VIDEOTIME);
return jsonObject;
}
private String getIndexName(QueryRequest queryRequest) {
Cluster cluster = clusterService.findClusterByType(Cluster.CLUSTER_TYPE.mini_cluster_type); // 111
List<String> currentIndexList = subjectQueryDataService.getIndexBySubjectIds(cluster, queryRequest.getSubjectId());
String indexName = currentIndexList.get(0);
return indexName;
}
private String[] getIndexNames(QueryRequest queryRequest) {
Cluster cluster = clusterService.findClusterByType(Cluster.CLUSTER_TYPE.mini_cluster_type); // 111
List<String> currentIndexList = subjectQueryDataService.getIndexBySubjectIds(cluster, queryRequest.getSubjectId());
String[] indexName = currentIndexList.toArray(new String[currentIndexList.size()]);
return indexName;
}
// private JSONObject getVideoPathList(JSONObject jsonObject) {
// try {
// if (jsonObject.containsKey(ESConstant.ISDOWNLOAD) && jsonObject.get(ESConstant.ISDOWNLOAD).toString().equals("true")) {
// String videoTime = jsonObject.getString(ESConstant.VIDEOTIME);
// String resolution = jsonObject.getString(ESConstant.RESOLUTION);
// List videoPath = JSONObject.parseArray(jsonObject.get(ESConstant.VIDEOPATH).toString());
// String videoUrl = "";
// String size = "";
// List<Map<String, String>> videoList = new ArrayList<>();
// if(videoPath.size() > 0) {
// videoUrl = videoPath.get(0).toString();
// if (jsonObject.get(ESConstant.VIDEOPATHSIZE).toString().contains("http")) {
// Map<String, String> videoSizeMap = (Map<String, String>) JSONUtils.parse((String) jsonObject.get(ESConstant.VIDEOPATHSIZE));
// if (videoSizeMap.containsKey(videoUrl)) {
// size = videoSizeMap.get(videoUrl);
// }
// }
// Map<String, String> videoMap = new HashMap<>();
// videoMap.put(ESConstant.URL, videoUrl);
// videoMap.put(ESConstant.SIZE, size);
// videoMap.put(ESConstant.RESOLUTION, resolution);
// videoMap.put(ESConstant.VIDEOTIME, videoTime);
// videoList.add(videoMap);
// }
// jsonObject.put(ESConstant.VIDEOLIST, videoList);
// }
// }catch (Exception e){
// e.printStackTrace();
// }
// jsonObject.remove(ESConstant.VIDEOPATHSIZE);
// jsonObject.remove(ESConstant.RESOLUTION);
// jsonObject.remove(ESConstant.VIDEOTIME);
// return jsonObject;
// }
//
// private String getIndexName(QueryRequest queryRequest) {
// Cluster cluster = clusterService.findClusterByType(Cluster.CLUSTER_TYPE.mini_cluster_type); // 111
// List<String> currentIndexList = subjectQueryDataService.getIndexBySubjectIds(cluster, queryRequest.getSubjectId());
// String indexName = currentIndexList.get(0);
// return indexName;
// }
}

128
cl_search_api/src/main/java/com/bfd/mf/service/SearchDataService.java

@ -60,8 +60,8 @@ public class SearchDataService extends CrudService<SentimentModify, SentimentRep
private SearchResponse buildDataIdQueryCrawl(Integer from,Integer searchSize,List<String> dataIdList,
String orderFlag,String sortFlag,
List<String> currentIndexList, Cluster cluster) {
if(sortFlag.equals("comment")){
sortFlag = "commentsCount";
if(sortFlag.equals(ESConstant.COMMENT)){
sortFlag = ESConstant.COMMENTS_COUNT;
}
if(sortFlag.equals("")){
sortFlag = ESConstant.PUBTIME;
@ -83,11 +83,15 @@ public class SearchDataService extends CrudService<SentimentModify, SentimentRep
/**
* 遍历查询结果调用解析组装方法 1
*/
private void parseQueryResult(SearchResponse response, List<ESMonitorEntity> esMonitorListEntity) throws Exception {
SearchHit[] hits = response.getHits().getHits();
for (SearchHit mainMessageHit : hits) {
ESMonitorEntity mainMonitorEntity = parseMainMessage(mainMessageHit);
esMonitorListEntity.add(mainMonitorEntity);
private void parseQueryResult(SearchResponse response, List<ESMonitorEntity> esMonitorListEntity) {
try {
SearchHit[] hits = response.getHits().getHits();
for (SearchHit mainMessageHit : hits) {
ESMonitorEntity mainMonitorEntity = parseMainMessage(mainMessageHit);
esMonitorListEntity.add(mainMonitorEntity);
}
}catch (Exception e){
logger.info("[SearchDataService] parseQueryResult ERROR !");
}
}
/**
@ -98,7 +102,7 @@ public class SearchDataService extends CrudService<SentimentModify, SentimentRep
List<Map<String, Object>> site = siteRepository.findsiteByDel(0);
Map<String,Map<String,Object>> siteMap = new HashMap<>();
for (Map<String, Object> map : site) {
siteMap.put(map.get("cid").toString().toLowerCase(),map);
siteMap.put(map.get(ESConstant.CID).toString().toLowerCase(),map);
}
if (null != dataList && dataList.size() > 0) {
for (JSONObject json : dataList) {
@ -107,7 +111,7 @@ public class SearchDataService extends CrudService<SentimentModify, SentimentRep
}
}
}catch (Exception e){
e.printStackTrace();
logger.info("[SearchDataService] parseQueryResult ERROR !");
}
}
/**
@ -179,7 +183,7 @@ public class SearchDataService extends CrudService<SentimentModify, SentimentRep
// 返回索引名称查询详情时使用
esMonitorEntity.setSubjectId(indexName);
} catch (Exception e) {
e.printStackTrace();
logger.info("[SearchDataService] parseMainMessage ERROR !");
}
return esMonitorEntity;
}
@ -222,10 +226,8 @@ public class SearchDataService extends CrudService<SentimentModify, SentimentRep
String siteId = "";
String icon = "";
String siteType = "";
if(enSource.equals("sina")){
if(enSource.equals(ESConstant.SINA)){
siteId = "183";
icon = "";
siteType = "";
}else {
Map<String, Object> siteOtherMap = siteMap.get(enSource);
if (siteOtherMap.containsKey("site_id")) {
@ -265,9 +267,9 @@ public class SearchDataService extends CrudService<SentimentModify, SentimentRep
author = sourceAsMap.get(ESConstant.AUTHOR).toString();
quoteCount = sourceAsMap.get(ESConstant.QUOTE_COUNT).toString();
collentCount = sourceAsMap.get(ESConstant.COLLE_CTCOUNT).toString();
if(sourceAsMap.get(ESConstant.ATTITUDES_COUNT).toString().contains("totalCount")) {
if(sourceAsMap.get(ESConstant.ATTITUDES_COUNT).toString().contains(ESConstant.TOTALCOUNT)) {
JSONObject countMap = JSONObject.parseObject(sourceAsMap.get(ESConstant.ATTITUDES_COUNT).toString());
attitudeCount = countMap.getString("totalCount");
attitudeCount = countMap.getString(ESConstant.TOTALCOUNT);
}else{
attitudeCount = sourceAsMap.get(ESConstant.ATTITUDES_COUNT).toString();
}
@ -335,7 +337,7 @@ public class SearchDataService extends CrudService<SentimentModify, SentimentRep
if (null != sourceAsMap.get(ESConstant.VIDEOPATHSIZE)
&& !("[]").equals(sourceAsMap.get(ESConstant.VIDEOPATHSIZE))
&& !("{\"\":null}").equals(sourceAsMap.get(ESConstant.VIDEOPATHSIZE).toString())) {
if(sourceAsMap.get(ESConstant.VIDEOPATHSIZE).toString().contains("url")) {
if(sourceAsMap.get(ESConstant.VIDEOPATHSIZE).toString().contains(ESConstant.URL)) {
videoPathSize = JSONObject.parseArray(sourceAsMap.get(ESConstant.VIDEOPATHSIZE).toString());
}
}
@ -393,22 +395,22 @@ public class SearchDataService extends CrudService<SentimentModify, SentimentRep
String valueLabel = "";
String categoryLabel = "";
String tag = "";
if (sourceAsMap.containsKey("valueLabel") && null != sourceAsMap.get(ESConstant.VALUELABEL)) {
// System.out.println("11111 "+sourceAsMap.get(ESConstant.VALUELABEL));
valueLabel = sourceAsMap.get("valueLabel").toString();
if (sourceAsMap.containsKey(ESConstant.VALUELABEL) && null != sourceAsMap.get(ESConstant.VALUELABEL)) {
valueLabel = sourceAsMap.get(ESConstant.VALUELABEL).toString();
}
if(sourceAsMap.containsKey("categoryLabel")){
categoryLabel = sourceAsMap.get("categoryLabel").toString();
if(sourceAsMap.containsKey(ESConstant.CATEGORYLABEL)){
categoryLabel = sourceAsMap.get(ESConstant.CATEGORYLABEL).toString();
}
if(sourceAsMap.containsKey("tag")){
tag = sourceAsMap.get("tag").toString();
if(sourceAsMap.containsKey(ESConstant.TAG)){
tag = sourceAsMap.get(ESConstant.TAG).toString();
}
String otherSourceJson = "";
if(sourceAsMap.containsKey("otherSourceJson")){
otherSourceJson = sourceAsMap.get("otherSourceJson").toString();
if(sourceAsMap.containsKey(ESConstant.OTHERSOURCEJSON)){
otherSourceJson = sourceAsMap.get(ESConstant.OTHERSOURCEJSON).toString();
}
try {
esMonitorEntity.setDataId(dataId);
esMonitorEntity.setDocId(docId);
@ -519,7 +521,7 @@ public class SearchDataService extends CrudService<SentimentModify, SentimentRep
.must(queryBuilder);
SortOrder flag ;
if(null !=queryRequest.getOrder() && queryRequest.getOrder().equals("desc")) {
if(null !=queryRequest.getOrder() && queryRequest.getOrder().equals(ESConstant.DESC)) {
flag = SortOrder.DESC;
}else{
flag = SortOrder.ASC;
@ -563,9 +565,9 @@ public class SearchDataService extends CrudService<SentimentModify, SentimentRep
comments.add(jsonObject);
}
json.put("commentLists",comments);
json.put(ESConstant.COMMENTLISTS,comments);
size = size + searchResponse.getHits().getTotalHits();
json.put("allDocNumber",size);
json.put(ESConstant.ALLDOCNUMBER,size);
}catch (Exception e){
e.printStackTrace();
}
@ -615,7 +617,7 @@ public class SearchDataService extends CrudService<SentimentModify, SentimentRep
cluster = clusterService.findClusterByType(Cluster.CLUSTER_TYPE.mini_cluster_type); // 111
subjectId = cluster.getPrefixIndexPattern() +"_"+ subjectId;
}else{ // 如果是全部数据就直接去 渠道对应的索引查渠道可以从 docId 中截取出来
logger.info("[SearchDataService] queryOneDataByDocId 查询 全局数据 : " + subjectId);
logger.info("[SearchDataService] queryOneDataByDocId 查询 全局数据 : {}" , subjectId);
cluster = clusterService.findClusterByType(Cluster.CLUSTER_TYPE.normal_cluster_type); // 109
}
currentIndexList.add(subjectId);
@ -680,16 +682,14 @@ public class SearchDataService extends CrudService<SentimentModify, SentimentRep
List<Map<String, Object>> site = siteRepository.findSiteByEnSource(enSource);
Map<String,Map<String,Object>> siteMap = new HashMap<>();
for (Map<String, Object> map : site) {
siteMap.put(map.get("cid").toString().toLowerCase(),map);
siteMap.put(map.get(ESConstant.CID).toString().toLowerCase(),map);
}
String siteId = "";
String icon = "";
String siteType = "";
Map<String,Object> siteOtherMap = siteMap.get(enSource);
if(enSource.equals("sina")){
if(enSource.equals(ESConstant.SINA)){
siteId = "183";
icon = "";
siteType = "";
}else {
if (siteOtherMap.containsKey("site_id")) {
siteId = siteMap.get(enSource).get("site_id").toString();
@ -719,15 +719,15 @@ public class SearchDataService extends CrudService<SentimentModify, SentimentRep
if(jsonObject.containsKey(ESConstant.SMALLIMGS) && jsonObject.get(ESConstant.SMALLIMGS) instanceof List){
List<String> smallImgs = (List<String>) jsonObject.get(ESConstant.SMALLIMGS);
List<Map<String,String>> imagePathSize = new ArrayList<>();
if(null != smallImgs && smallImgs.size() > 0) {
if(null != smallImgs && smallImgs.size() > 0) { // 由于电商的图片不做下载因此输出到页面的时候写死了大小和分辨率
for (Object img : smallImgs) {
if (!img.toString().contains("http")) {
Map<String,String> imagePathMap = new HashMap<>();
String url = "http:" + img ;
imagePathMap.put(ESConstant.URL,url);
imagePathMap.put("size","4KB");
imagePathMap.put("videoTime","");
imagePathMap.put("resolution","50×50");
imagePathMap.put(ESConstant.URL ,url);
imagePathMap.put(ESConstant.SIZE ,"4KB");
imagePathMap.put(ESConstant.VIDEOTIME ,"");
imagePathMap.put(ESConstant.RESOLUTION ,"50×50");
imagePathSize.add(imagePathMap);
}
}
@ -795,15 +795,15 @@ public class SearchDataService extends CrudService<SentimentModify, SentimentRep
} else if (entry.getKey().equals(ESConstant.FILEPATHSIZE) ||
entry.getKey().equals(ESConstant.IMAGEPATHSIZE) ||
entry.getKey().equals(ESConstant.VIDEOPATHSIZE)) {
if(entry.getValue().toString() .contains("url")) {
if(entry.getValue().toString() .contains(ESConstant.URL)) {
jsonObject.put(entry.getKey(), JSONObject.parseArray(entry.getValue().toString()));
}else{
jsonObject.put(entry.getKey(),new ArrayList<>());
}
} else if(entry.getKey().equals(ESConstant.ATTITUDES_COUNT)){
if(entry.getValue().toString().contains("totalCount")){
if(entry.getValue().toString().contains(ESConstant.TOTALCOUNT)){
JSONObject totalCount = JSONObject.parseObject(entry.getValue().toString());
jsonObject.put(entry.getKey(),totalCount.get("totalCount"));
jsonObject.put(entry.getKey(),totalCount.get(ESConstant.TOTALCOUNT));
}
} else {
jsonObject.put(entry.getKey(), entry.getValue());
@ -828,7 +828,7 @@ public class SearchDataService extends CrudService<SentimentModify, SentimentRep
cluster = clusterService.findClusterByType(Cluster.CLUSTER_TYPE.mini_cluster_type); // 111
currentIndexList = subjectQueryDataService.getIndexBySubjectIds(cluster,subjectId);
Long clusterId = cluster.getId();
logger.info("[SearchDataService] queryDataList clusterId = " + clusterId + " ; currentIndexList :" + currentIndexList.toString());
logger.info("[SearchDataService] queryDataList clusterId : {}; currentIndexList : {}" ,clusterId, currentIndexList.toString());
String orderFlag = queryRequest.getOrder(); // 排序方式 asc/desc
String sortFlag = queryRequest.getSidx(); // 排序字段
@ -911,7 +911,7 @@ public class SearchDataService extends CrudService<SentimentModify, SentimentRep
Cluster cluster = clusterService.findClusterByType(Cluster.CLUSTER_TYPE.mini_cluster_type); // 111
List<String> currentIndexList = subjectQueryDataService.getIndexBySubjectIds(cluster, queryRequest.getSubjectId());
Long clusterId = cluster.getId();
logger.info("[SearchDataService] queryDataInOneIndex: clusterId = " + clusterId + " ; currentIndexList :" + currentIndexList.toString());
logger.info("[SearchDataService] queryDataInOneIndex: clusterId :{} ; currentIndexList : {}", clusterId,currentIndexList.toString());
// String indexName = currentIndexList.get(0);
String indexNames [] = currentIndexList.toArray(new String [currentIndexList.size()]);
List<JSONObject> dataList = esQueryServiceForSQMini.queryDataFromOneSubject(indexNames, queryRequest);
@ -920,7 +920,7 @@ public class SearchDataService extends CrudService<SentimentModify, SentimentRep
Integer searchType = queryRequest.getSearchType();
parseQueryResult(dataList, esMonitorEntityLists, searchType);
Long totalCount = esQueryServiceForSQMini.queryDataCountFromOneSubject(indexNames,queryRequest);
logger.info("[SearchDataService] queryDataInOneIndex: "+totalCount);
logger.info("[SearchDataService] queryDataInOneIndex: {}",totalCount);
jsonObject.put(ESConstant.ALLDOCNUMBER,totalCount);
jsonObject.put(ESConstant.MONITORLISTS,esMonitorEntityLists);
@ -938,10 +938,10 @@ public class SearchDataService extends CrudService<SentimentModify, SentimentRep
List<String> currentIndexList = subjectQueryDataService.getIndexBySubjectIds(cluster, queryRequest.getSubjectId());
Long clusterId = cluster.getId();
String [] indexName = currentIndexList.toArray(new String[currentIndexList.size()]);
logger.info("[SearchDataService] exportDataInOneIndex: IndexName = " +indexName[0] +" ; clusterId = " + clusterId + " ; currentIndexList :" + currentIndexList.toString());
logger.info("[SearchDataService] exportDataInSubjectIndex : IndexName :{} ; clusterId :{} ; currentIndexList :{}",indexName[0], clusterId , currentIndexList.toString());
// 开始查询
jsonObject= esQueryServiceForSQMini.exportDataFromOneSubject(indexName, queryRequest);
List<JSONObject> dataList = (List<JSONObject>) jsonObject.get("monitorLists");
List<JSONObject> dataList = (List<JSONObject>) jsonObject.get(ESConstant.MONITORLISTS);
List<ESMonitorEntity> esMonitorEntityLists = new ArrayList<>();
Integer searchType = queryRequest.getSearchType();
parseQueryResult(dataList, esMonitorEntityLists,searchType);
@ -967,10 +967,10 @@ public class SearchDataService extends CrudService<SentimentModify, SentimentRep
currentIndexList = subjectQueryDataService.getIndexBySubjectIds(cluster, queryRequest.getSubjectId());
}
Long clusterId = cluster.getId();
logger.info("[SearchDataService] exportDataInOneIndex: clusterId = " + clusterId + " ; currentIndexList :" + currentIndexList.toString());
logger.info("[SearchDataService] exportDataInOneIndex: clusterId :{}; currentIndexList : {}" , clusterId , currentIndexList.toString());
// 开始查询
jsonObject= esQueryServiceForSQNormal.exportDataFromIndexs(currentIndexList, queryRequest);
List<JSONObject> dataList = (List<JSONObject>) jsonObject.get("monitorLists");
List<JSONObject> dataList = (List<JSONObject>) jsonObject.get(ESConstant.MONITORLISTS);
List<ESMonitorEntity> esMonitorEntityLists = new ArrayList<>();
Integer searchType = queryRequest.getSearchType();
parseQueryResult(dataList, esMonitorEntityLists,searchType);
@ -987,7 +987,7 @@ public class SearchDataService extends CrudService<SentimentModify, SentimentRep
Cluster cluster = clusterService.findClusterByType(Cluster.CLUSTER_TYPE.mini_cluster_type); // 111
List<String> currentIndexList = subjectQueryDataService.getIndexBySubjectIds(cluster, queryRequest.getSubjectId());
Long clusterId = cluster.getId();
logger.info("[SearchDataService] queryDataCountsInOneIndex: clusterId = " + clusterId + " ; currentIndexList :" + currentIndexList.toString());
logger.info("[SearchDataService] queryDataCountsInOneIndex: clusterId : {}; currentIndexList : {}" , clusterId ,currentIndexList.toString());
// String indexName = currentIndexList.get(0);
String indexNames [] = currentIndexList.toArray(new String [currentIndexList.size()]);
Long contentCount = 0L;
@ -1003,36 +1003,12 @@ public class SearchDataService extends CrudService<SentimentModify, SentimentRep
}catch (Exception e){
e.printStackTrace();
}
jsonObject.put("contentCount",contentCount);
jsonObject.put("commentCount",commentCount);
jsonObject.put("authorCount",authorCount);
jsonObject.put(ESConstant.CONTENTCOUNT,contentCount);
jsonObject.put(ESConstant.COMMENTCOUNT,commentCount);
jsonObject.put(ESConstant.AUTHORCOUNT,authorCount);
}catch (Exception e){
e.printStackTrace();
}
return jsonObject;
}
// public JSONObject exportDataInSubjectIndexTestGroupBy(QueryRequest queryRequest) {
// JSONObject jsonObject = new JSONObject();
// try {
// // 获取 ES 的连接方式及要查询的索引列表
// Cluster cluster = clusterService.findClusterByType(Cluster.CLUSTER_TYPE.mini_cluster_type); // 111
// List<String> currentIndexList = subjectQueryDataService.getIndexBySubjectIds(cluster, queryRequest.getSubjectId());
// Long clusterId = cluster.getId();
// String [] indexName = currentIndexList.toArray(new String[currentIndexList.size()]);
// logger.info("[SearchDataService] exportDataInOneIndex: IndexName = " +indexName[0] +" ; clusterId = " + clusterId + " ; currentIndexList :" + currentIndexList.toString());
// // 开始查询
// jsonObject= esQueryServiceForSQMini.exportDataFromOneSubjectTestGroupBy(indexName, queryRequest);
//
// List<JSONObject> dataList = (List<JSONObject>) jsonObject.get("monitorLists");
// List<ESMonitorEntity> esMonitorEntityLists = new ArrayList<>();
// Integer searchType = queryRequest.getSearchType();
// parseQueryResult(dataList, esMonitorEntityLists,searchType);
// logger.info("Query Finish exportDataInSubjectIndexTestGroupBy size: " + esMonitorEntityLists.size());
// jsonObject.put(ESConstant.MONITORLISTS, esMonitorEntityLists);
// }catch (Exception e){
// e.printStackTrace();
// }
// return jsonObject;
// }
}

3
cl_search_api/src/main/java/com/bfd/mf/service/SearchKeywordsCouldService.java

@ -3,10 +3,7 @@ package com.bfd.mf.service;
import com.alibaba.fastjson.JSONObject;
import com.bfd.mf.common.service.text.TextService;
import com.bfd.mf.common.util.constants.ConditionCommon;
import com.bfd.mf.common.util.constants.ESConstant;
import com.bfd.mf.common.util.slice.SliceScrollUtil;
import com.bfd.mf.common.util.utility.CollectionUtils;
import com.bfd.mf.common.web.vo.params.QueryRequest;
import com.bfd.mf.common.web.vo.view.monitor.ESMonitorEntity;
import com.bfd.nlp.common.util.object.TObjectUtils;
import com.bfd.nlp.common.util.string.TStringUtils;

10
cl_search_api/src/main/java/com/bfd/mf/service/UpdateService.java

@ -4,20 +4,12 @@ import com.alibaba.fastjson.JSONObject;
import com.bfd.mf.common.service.common.CrudService;
import com.bfd.mf.common.service.es.ClusterService;
import com.bfd.mf.common.util.ESServerUtils;
import com.bfd.mf.common.util.constants.ESConstant;
import com.bfd.mf.common.util.es.EsUtils;
import com.bfd.mf.common.web.entity.mysql.SentimentModify;
import com.bfd.mf.common.web.entity.mysql.cache.Cluster;
import com.bfd.mf.common.web.repository.mysql.SentimentRepository;
import com.bfd.mf.common.web.vo.params.QueryRequest;
import com.bfd.mf.config.BFDApiConfig;
import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.ScriptQueryBuilder;
import org.elasticsearch.script.Script;
import org.elasticsearch.script.ScriptType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
@ -74,7 +66,7 @@ public class UpdateService extends CrudService<SentimentModify, SentimentReposit
String script ="ctx._source['valueLabel']='"+valueLabel+"';ctx._source['categoryLabel']='"+categoryLabel+"'";
String index = subjectId;
docId = docId.split("_")[2];
System.out.println(docId);
// System.out.println(docId);
// 先根据docId 查到 _id_
long updateVersion = EsUtils.updateByDocId(clusterName,index,script,docId,params);
if(updateVersion> 0) {

33
cl_search_api/src/main/java/com/bfd/mf/service/UploadExcelService.java

@ -1,6 +1,7 @@
package com.bfd.mf.service;
import com.alibaba.fastjson.JSONObject;
import com.bfd.mf.common.util.constants.ESConstant;
import com.bfd.mf.common.util.es.EsUtils2;
import com.bfd.mf.common.web.entity.mysql.topic.ParseExcelTask;
import com.bfd.mf.common.web.repository.mysql.topic.ParseExcelTaskRepository;
@ -45,7 +46,7 @@ public class UploadExcelService {
public void insertParseExcelTask(String excelName,Map<String,Object> userinfo)throws Exception{
ParseExcelTask excelDetail = convertExcelTaskDetailEntity(excelName,userinfo);
excelDetail = parseExcelTaskRepository.save(excelDetail);
logger.info("[UserNodeService] insertExcelTask : "+ JSONObject.toJSONString(excelDetail));
logger.info("[UserNodeService] insertExcelTask : {}",JSONObject.toJSONString(excelDetail));
}
// public boolean isExcelExist(String excelName) throws Exception{
@ -87,8 +88,8 @@ public class UploadExcelService {
if (null == excelName) {
throw new IllegalArgumentException("<IllegalArgumentException> request node is null");
}
String user = (String) userinfo.get("user");
String userId = (String) userinfo.get("userId");
String user = (String) userinfo.get(ESConstant.USER);
String userId = (String) userinfo.get(ESConstant.USERID);
ParseExcelTask excelTaskDetail = new ParseExcelTask();
excelTaskDetail.setExcelName(excelName.replace(".xlsx",""));
excelTaskDetail.setCreateUser(user);
@ -166,6 +167,19 @@ public class UploadExcelService {
}
}
public boolean insertTask(String subjectId, String user, String userId, String fileRemak, String zipName) {
boolean flag = true;
try {
BigInteger id = BigInteger.valueOf(Long.valueOf(subjectId));
String crawlDataFlag = "keyword:" + fileRemak;
parseExcelTaskRepository.insertTask(id,user,userId,fileRemak,zipName,crawlDataFlag);
}catch (Exception e){
e.printStackTrace();
return false;
}
return flag;
}
// public BigInteger getOneSubjectId() {
// int subjectId = parseExcelTaskRepository.findOneSubjectId();
// BigInteger newSubjectId = new BigInteger((subjectId+1)+"");
@ -430,19 +444,6 @@ public class UploadExcelService {
// return resultMap;
// }
public boolean insertTask(String subjectId, String user, String userId, String fileRemak, String zipName) {
boolean flag = true;
try {
BigInteger id = BigInteger.valueOf(Long.valueOf(subjectId));
String crawlDataFlag = "keyword:" + fileRemak;
parseExcelTaskRepository.insertTask(id,user,userId,fileRemak,zipName,crawlDataFlag);
}catch (Exception e){
e.printStackTrace();
return false;
}
return flag;
}
// public static void main(String[] args) {
// List<String> line = ReadLine.readLine(new File("E:\\100.txt"));
// for (String l:line) {

7
cl_search_api/src/main/resources/application.yml

@ -7,6 +7,11 @@ server:
http2:
enabled: true
logging:
config: ../etc/logback.xml
level:
com.bfd.mf.controller: trace
spring:
datasource:
driver-class-name: com.mysql.jdbc.Driver
@ -49,6 +54,8 @@ bfd.api.mf:
address: 172.18.1.134:9301
upper: 2018-09-01
standby: cl_index_*
# es-mini:
# name: SQ_Mini
# address: 172.26.11.111:9301

18
cl_search_api/src/main/resources/log4j.properties

@ -1,18 +0,0 @@
log4j.rootLogger=INFO, error
###### error appender definition #######
log4j.appender.error=org.apache.log4j.DailyRollingFileAppender
log4j.appender.error.File=logs/sdkclient_error.log
log4j.appender.error.Append=true
log4j.appender.error.DatePattern='.'yyyy-MM-dd-HH
log4j.appender.error.layout=org.apache.log4j.PatternLayout
log4j.appender.error.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss,SSS} %-5p [%c] %m%n
#error only
log4j.appender.error.filter.F1=org.apache.log4j.varia.LevelRangeFilter
log4j.appender.error.filter.F1.LevelMin=ERROR
log4j.appender.error.filter.F1.LevelMax=ERROR
#
log4j.appender.error.filter.F2=org.apache.log4j.varia.LevelMatchFilter
log4j.appender.error.filter.F2.levelToMatch=WARN
log4j.appender.error.filter.F2.acceptOnMatch=false

32
cl_search_api/src/main/resources/log4j2.properties

@ -1,32 +0,0 @@
#### ����###
#log4j.rootLogger = stdout,D,E,I
#
#### �����Ϣ������̧ ###
#log4j.appender.stdout = org.apache.log4j.ConsoleAppender
#log4j.appender.stdout.Target = System.out
#log4j.appender.stdout.layout = org.apache.log4j.PatternLayout
#log4j.appender.stdout.layout.ConversionPattern = [%-5p] %d{yyyy-MM-dd HH:mm:ss,SSS} method:%l%n%m%n
#
#### ���INFO �������ϵ���־�ļ����� ###
#log4j.appender.I = org.apache.log4j.DailyRollingFileAppender
#log4j.appender.I.File = log_info.log
#log4j.appender.I.Append = true
#log4j.appender.I.Threshold = INFO
#log4j.appender.I.layout = org.apache.log4j.PatternLayout
#log4j.appender.I.layout.ConversionPattern = %-d{yyyy-MM-dd HH:mm:ss} [ %t:%r ] - [ %p ] %m%n
#
#### ���DEBUG �������ϵ���־�ļ����� ###
#log4j.appender.D = org.apache.log4j.DailyRollingFileAppender
#log4j.appender.D.File = log_debug.log
#log4j.appender.D.Append = true
#log4j.appender.D.Threshold = INFO
#log4j.appender.D.layout = org.apache.log4j.PatternLayout
#log4j.appender.D.layout.ConversionPattern = %-d{yyyy-MM-dd HH:mm:ss} [ %t:%r ] - [ %p ] %m%n
#
#### ���ERROR �������ϵ���־�ļ����� ###
#log4j.appender.E = org.apache.log4j.DailyRollingFileAppender
#log4j.appender.E.File = log_error.log
#log4j.appender.E.Append = true
#log4j.appender.E.Threshold = ERROR
#log4j.appender.E.layout = org.apache.log4j.PatternLayout
#log4j.appender.E.layout.ConversionPattern = %-d{yyyy-MM-dd HH:mm:ss} [ %t:%r ] - [ %p ] %m%n
Loading…
Cancel
Save