diff --git a/.idea/compiler.xml b/.idea/compiler.xml index 88f2527..feda9b0 100644 --- a/.idea/compiler.xml +++ b/.idea/compiler.xml @@ -13,7 +13,8 @@ - + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml index e8942bd..273b71e 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -7,7 +7,7 @@ - - + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml index d6ff43e..c7f9e39 100644 --- a/.idea/modules.xml +++ b/.idea/modules.xml @@ -4,7 +4,7 @@ - + \ No newline at end of file diff --git a/cl_query_data_job/pom.xml b/cl_query_data_job/pom.xml index 1fa826d..0e37dbd 100644 --- a/cl_query_data_job/pom.xml +++ b/cl_query_data_job/pom.xml @@ -4,9 +4,9 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> 4.0.0 - cl_stream_3.1.1 + cl_stream_3.1.2 com.bfd.mf - 3.1.1-SNAPSHOT + 3.1.2-SNAPSHOT cl_query_data_job diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/config/ESConstants.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/config/ESConstants.java index ea623fc..d7bb47e 100644 --- a/cl_query_data_job/src/main/java/com/bfd/mf/job/config/ESConstants.java +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/config/ESConstants.java @@ -724,6 +724,8 @@ public class ESConstants { public static final String DATA_COUNT = "dataCount"; + public static final String PAGETYPR = "pageType"; + /** * 回溯开始时间 */ @@ -964,6 +966,13 @@ public class ESConstants { public static final String OTHER = "other"; public static final String LIFE = "life"; + public static final String TOTALCOUNT = "totalCount"; + public static final String TODAYCOUNT = "todayCount"; + public static final String IMAGECOUNT = "imageCount"; + public static final String VIDEOCOUNT = "videoCount"; + public static final String FILECOUNT = "fileCount"; + public static final String TEXTCOUNT = "textCount"; + /** * 微博信息 added by Eric 2016-10-28 15:03:31 diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/domain/entity/Alarm.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/domain/entity/Alarm.java new file mode 100644 index 0000000..5a6d884 --- /dev/null +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/domain/entity/Alarm.java @@ -0,0 +1,153 @@ +package com.bfd.mf.job.domain.entity; + +import javax.persistence.Entity; +import javax.persistence.Table; + +@Entity +@Table(name = "cl_alarm") +public class Alarm extends AbstractEntity { + private int alarmTag; + private int alarmReason; + private String alarmMessage; + private String alarmTaskUrl; + private String alarmTaskContent; + private String alarmCid; + private String alarmConfig; + private String alarmTriggerTime; + private String alarmAssignTime; + private String alarmFinishTime; + private String alarmHandler; + private String opinion; + private int status; + private String createTime; + private String updateTime; + private int del; + + public int getAlarmTag() { + return alarmTag; + } + + public void setAlarmTag(int alarmTag) { + this.alarmTag = alarmTag; + } + + public int getAlarmReason() { + return alarmReason; + } + + public void setAlarmReason(int alarmReason) { + this.alarmReason = alarmReason; + } + + public String getAlarmMessage() { + return alarmMessage; + } + + public void setAlarmMessage(String alarmMessage) { + this.alarmMessage = alarmMessage; + } + + public String getAlarmTaskUrl() { + return alarmTaskUrl; + } + + public void setAlarmTaskUrl(String alarmTaskUrl) { + this.alarmTaskUrl = alarmTaskUrl; + } + + public String getAlarmTaskContent() { + return alarmTaskContent; + } + + public void setAlarmTaskContent(String alarmTaskContent) { + this.alarmTaskContent = alarmTaskContent; + } + + public String getAlarmCid() { + return alarmCid; + } + + public void setAlarmCid(String alarmCid) { + this.alarmCid = alarmCid; + } + + public String getAlarmConfig() { + return alarmConfig; + } + + public void setAlarmConfig(String alarmConfig) { + this.alarmConfig = alarmConfig; + } + + public String getAlarmTriggerTime() { + return alarmTriggerTime; + } + + public void setAlarmTriggerTime(String alarmTriggerTime) { + this.alarmTriggerTime = alarmTriggerTime; + } + + public String getAlarmAssignTime() { + return alarmAssignTime; + } + + public void setAlarmAssignTime(String alarmAssignTime) { + this.alarmAssignTime = alarmAssignTime; + } + + public String getAlarmFinishTime() { + return alarmFinishTime; + } + + public void setAlarmFinishTime(String alarmFinishTime) { + this.alarmFinishTime = alarmFinishTime; + } + + public String getAlarmHandler() { + return alarmHandler; + } + + public void setAlarmHandler(String alarmHandler) { + this.alarmHandler = alarmHandler; + } + + public String getOpinion() { + return opinion; + } + + public void setOpinion(String opinion) { + this.opinion = opinion; + } + + public int getStatus() { + return status; + } + + public void setStatus(int status) { + this.status = status; + } + + public String getCreateTime() { + return createTime; + } + + public void setCreateTime(String createTime) { + this.createTime = createTime; + } + + public String getUpdateTime() { + return updateTime; + } + + public void setUpdateTime(String updateTime) { + this.updateTime = updateTime; + } + + public int getDel() { + return del; + } + + public void setDel(int del) { + this.del = del; + } +} diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/domain/repository/AlarmRepository.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/domain/repository/AlarmRepository.java new file mode 100644 index 0000000..75b2190 --- /dev/null +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/domain/repository/AlarmRepository.java @@ -0,0 +1,9 @@ +package com.bfd.mf.job.domain.repository; + +import com.bfd.mf.job.domain.entity.Alarm; +import org.springframework.data.repository.CrudRepository; + +public interface AlarmRepository extends CrudRepository { + + +} diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/domain/repository/TaskRepository.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/domain/repository/TaskRepository.java index 6fd0539..f1dfb86 100644 --- a/cl_query_data_job/src/main/java/com/bfd/mf/job/domain/repository/TaskRepository.java +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/domain/repository/TaskRepository.java @@ -12,34 +12,18 @@ import java.util.Map; public interface TaskRepository extends CrudRepository { -// @Query(value = "SELECT id,subject_id,external_id,site_type,task_type,cid,crawl_data_flag,cache_num,crawl_start_time,crawl_end_time,data_total,today_data_total,update_time FROM cl_task WHERE NOW() > SUBDATE(update_time,interval -15 minute) AND del = 0 AND subject_id in (SELECT id from cl_subject WHERE `status` = 0 AND del =0)", nativeQuery = true) -// List findAllTask(); - - @Query(value = "SELECT id,subject_id,external_id,site_type, task_type,cid,crawl_status,crawl_start_time,crawl_end_time,crawl_data_flag,data_total,today_data_total,cache_num,update_time,del,file_name,file_remark,crawl_content_key FROM cl_task WHERE task_type <> 3 AND crawl_status = 1 AND cache_num = 0 AND data_total = 0 AND del = 0 AND subject_id in (SELECT id from cl_subject WHERE del =0) ORDER BY id DESC ",nativeQuery = true) + @Query(value = "SELECT ct.id,ct.app_id,ct.subject_id,ct.external_id,cs.site_type,ct.task_type,ct.cid,ct.crawl_status,ct.crawl_start_time,ct.crawl_end_time,ct.crawl_data_flag,ct.data_total,ct.today_data_total,ct.cache_num,ct.update_time,ct.del,ct.crawl_content_key FROM cl_task ct JOIN intelligent_crawl.cl_site cs ON ct.cid = cs.cid WHERE ct.task_type <> 3 AND ct.crawl_status = 1 AND ct.cache_num = 0 AND ct.data_total = 0 AND ct.del = 0 AND ct.subject_id in (SELECT id from cl_subject csu WHERE csu.del =0) ORDER BY ct.id DESC;",nativeQuery = true) List findAllNewTask(); - // 统计服务查询 要统计的任务,之前由于所有任务都要半小时统计,任务太多会把E搞挂,就只统计 update_time 近一天的吧! -// @Query(value = "SELECT id,subject_id,external_id,site_type, task_type,cid,crawl_status,crawl_start_time,crawl_end_time,crawl_data_flag,data_total,today_data_total,cache_num,update_time,del,file_name,file_remark FROM cl_task WHERE del = 0 AND crawl_status <> 3",nativeQuery = true) // AND crawl_status <> 3 - // 每天只统计两种情况的任务 - //1、当天完成的任务:crawl_status=3 and end_time > 前天 - //2、状态为采集中或者 暂停的任务 crawl_status=0 or crawl_tatus=1 - // 其他的任务就不用每天都统计了!! - @Query(value = " SELECT id,subject_id,external_id,site_type, task_type,cid,crawl_status,crawl_start_time,crawl_end_time,crawl_data_flag,data_total,today_data_total,cache_num,update_time,del,file_name,file_remark,crawl_content_key FROM `cl_task` WHERE del = 0 AND ((crawl_status = 1 OR crawl_status = 0) OR (crawl_status = 3 AND end_time > date_sub(curdate(),interval 2 day))) ;",nativeQuery = true) + // 需要统计的任务的查询条件 1、 状态为 1 OR 0;2、状态为3,且任务完成时间再2天前的。 + @Query(value = "SELECT ct.id,ct.app_id,ct.subject_id,ct.external_id,cs.site_type, ct.task_type,ct.cid,ct.crawl_status,ct.crawl_start_time,ct.crawl_end_time,ct.crawl_data_flag,ct.data_total,ct.today_data_total,ct.cache_num,ct.update_time,ct.del,ct.crawl_content_key FROM `cl_task` ct JOIN intelligent_crawl.cl_site cs ON ct.cid = cs.cid WHERE ct.del = 0 AND ((ct.crawl_status = 1 OR ct.crawl_status = 0) OR (ct.crawl_status = 3 AND ct.end_time > date_sub(curdate(),interval 2 day))); ",nativeQuery = true) + // @Query(value = "SELECT ct.id,ct.app_id,ct.subject_id,ct.external_id,cs.site_type, ct.task_type,ct.cid,ct.crawl_status,ct.crawl_start_time,ct.crawl_end_time,ct.crawl_data_flag,ct.data_total,ct.today_data_total,ct.cache_num,ct.update_time,ct.del,ct.crawl_content_key FROM `cl_task` ct JOIN intelligent_crawl.cl_site cs ON ct.cid = cs.cid WHERE ct.del = 0 AND ct.subject_id = 12273 ; ",nativeQuery = true) List findAllBydel0(); -// -// @Query(value = "SELECT id,subject_id,external_id,site_type, task_type,cid,crawl_status,crawl_start_time,crawl_end_time,crawl_data_flag,data_total,today_data_total,cache_num,update_time,del FROM cl_task WHERE cache_num = 0 AND data_total = 0 AND del = 0 AND subject_id in (SELECT id from cl_subject WHERE del =0) ORDER BY id DESC ",nativeQuery = true) -// List findAllNewTask(); -// -// @Query(value = "SELECT id,subject_id,external_id,site_type, task_type,cid,crawl_status,crawl_start_time,crawl_end_time,crawl_data_flag,data_total,today_data_total,cache_num,update_time,del FROM cl_task WHERE del=0 ",nativeQuery = true) -// List findAllBydel0(); - -// @Query(value = "SELECT id,subject_id,external_id,site_type,task_type,cid,crawl_data_flag,cache_num,crawl_start_time,crawl_end_time,data_total,today_data_total,update_time FROM cl_task WHERE del=0 AND subject_id=?1",nativeQuery = true) -// List findTasksBySbujectIdAndDel0(BigInteger subjectId); - @Query(value = " SELECT SUM(data_total) FROM cl_task WHERE del=0 AND subject_id=?1 AND site_type=?2 ",nativeQuery = true) + @Query(value = "SELECT sum(data_total) FROM cl_task ct JOIN intelligent_crawl.cl_site cs ON ct.cid=cs.cid WHERE ct.del =0 AND ct.subject_id = ?1 AND cs.site_type = ?2",nativeQuery = true) Long findDataTotalBySbujectIdAndSiteType(BigInteger subjectId,int siteType); - @Query(value = " SELECT SUM(today_data_total) FROM cl_task WHERE del=0 AND subject_id=?1 AND site_type=?2 ",nativeQuery = true) + @Query(value = "SELECT sum(today_data_total) FROM cl_task ct JOIN intelligent_crawl.cl_site cs ON ct.cid=cs.cid WHERE ct.del =0 AND ct.subject_id = ?1 AND cs.site_type = ?2 ",nativeQuery = true) Long findTodayDataTotalBySbujectIdAndSiteType(BigInteger subjectId,int siteType); @Query(value = " SELECT SUM(data_total) FROM cl_task WHERE del=0 AND subject_id=?1 AND task_type=?2 ",nativeQuery = true) @@ -52,17 +36,6 @@ public interface TaskRepository extends CrudRepository { @Query(value = " SELECT SUM(data_total) FROM cl_task WHERE end_time >?1 AND end_time 3 ",nativeQuery = true) Long findTodayDataTotal(String taskStartTime ,String taskEndTime); -// @Query(value = " SELECT count(*) FROM cl_task WHERE today_data_total > 0 AND task_level < 2 AND crawl_status = 3 ",nativeQuery = true) -// Long findTodayDataTotalTaskNum(); -// @Query(value = "SELECT id,subject_id,task_type,crawl_status,file_name,del from cl_task WHERE del = 0 AND task_type = 3 AND crawl_status=1 ",nativeQuery = true) -// List getTaskNeedUpLoad(); -// @Query(value = " SELECT SUM(data_total) FROM cl_task WHERE del=0 AND subject_id=?1 AND task_type=?2",nativeQuery = true) -// Long findDataTotalBySbujectIdAndTaskType(BigInteger subjectId,int taskType); -// @Query(value = " SELECT SUM(today_data_total) FROM cl_task WHERE del=0 AND subject_id=?1 AND task_type=?2",nativeQuery = true) -// Long findTodayDataTotalBySbujectIdAndTaskType(BigInteger subjectId,int taskType); - - - /** * 更新每个任务 拉数据次数 */ @@ -104,7 +77,108 @@ public interface TaskRepository extends CrudRepository { @Query(value = "SELECT TIMESTAMPDIFF(MINUTE, start_time,end_time) FROM cl_task WHERE del = 0 AND task_type <>3 AND crawl_status = 3 AND data_total > 0 AND end_time > ?1 AND end_time < ?2 ",nativeQuery = true) List findTaskByCrawlTime(String taskStartTime, String taskEndTime); + // @Query(value = "SELECT ct.id,ct.subject_id,ct.external_id,cs.site_type, ct.task_type,ct.cid,ct.crawl_status,ct.crawl_start_time,ct.crawl_end_time,ct.crawl_data_flag,ct.data_total,ct.today_data_total,ct.cache_num,ct.update_time,ct.del,ct.file_name,ct.file_remark,ct.crawl_content_key FROM `cl_task` ct JOIN intelligent_crawl.cl_site cs ON ct.cid = cs.cid WHERE ct.del = 0 AND ct.id = ?1",nativeQuery = true) + + @Query(value = "SELECT ct.id,ct.app_id,ct.subject_id,ct.external_id,cs.site_type, ct.task_type,ct.cid,ct.crawl_status,ct.crawl_start_time,ct.crawl_end_time,ct.crawl_data_flag,ct.data_total,ct.today_data_total,ct.cache_num,ct.update_time,ct.del,ct.crawl_content_key FROM `cl_task` ct JOIN intelligent_crawl.cl_site cs ON ct.cid = cs.cid WHERE ct.del = 0 AND ct.id = ?1 ;",nativeQuery = true) + List findOneTaskByIdAndAppId(long taskId); + + + + + +// @Query(value = "SELECT id,subject_id,external_id,site_type,task_type,cid,crawl_data_flag,cache_num,crawl_start_time,crawl_end_time,data_total,today_data_total,update_time FROM cl_task WHERE NOW() > SUBDATE(update_time,interval -15 minute) AND del = 0 AND subject_id in (SELECT id from cl_subject WHERE `status` = 0 AND del =0)", nativeQuery = true) +// List findAllTask(); + +// @Query(value = "SELECT id,subject_id,external_id,site_type, task_type,cid,crawl_status,crawl_start_time,crawl_end_time,crawl_data_flag,data_total,today_data_total,cache_num,update_time,del,file_name,file_remark,crawl_content_key FROM cl_task WHERE task_type <> 3 AND crawl_status = 1 AND cache_num = 0 AND data_total = 0 AND del = 0 AND subject_id in (SELECT id from cl_subject WHERE del =0) ORDER BY id DESC ",nativeQuery = true) +// List findAllNewTask(); +// +// // 统计服务查询 要统计的任务,之前由于所有任务都要半小时统计,任务太多会把E搞挂,就只统计 update_time 近一天的吧! +//// @Query(value = "SELECT id,subject_id,external_id,site_type, task_type,cid,crawl_status,crawl_start_time,crawl_end_time,crawl_data_flag,data_total,today_data_total,cache_num,update_time,del,file_name,file_remark FROM cl_task WHERE del = 0 AND crawl_status <> 3",nativeQuery = true) // AND crawl_status <> 3 +// // 每天只统计两种情况的任务 +// //1、当天完成的任务:crawl_status=3 and end_time > 前天 +// //2、状态为采集中或者 暂停的任务 crawl_status=0 or crawl_tatus=1 +// // 其他的任务就不用每天都统计了!! +// @Query(value = " SELECT id,subject_id,external_id,site_type, task_type,cid,crawl_status,crawl_start_time,crawl_end_time,crawl_data_flag,data_total,today_data_total,cache_num,update_time,del,file_name,file_remark,crawl_content_key FROM `cl_task` WHERE del = 0 AND subject_id = 12273 AND ((crawl_status = 1 OR crawl_status = 0) OR (crawl_status = 3 AND end_time > date_sub(curdate(),interval 2 day))) ;",nativeQuery = true) +// List findAllBydel0(); +//// +//// @Query(value = "SELECT id,subject_id,external_id,site_type, task_type,cid,crawl_status,crawl_start_time,crawl_end_time,crawl_data_flag,data_total,today_data_total,cache_num,update_time,del FROM cl_task WHERE cache_num = 0 AND data_total = 0 AND del = 0 AND subject_id in (SELECT id from cl_subject WHERE del =0) ORDER BY id DESC ",nativeQuery = true) +//// List findAllNewTask(); +//// +//// @Query(value = "SELECT id,subject_id,external_id,site_type, task_type,cid,crawl_status,crawl_start_time,crawl_end_time,crawl_data_flag,data_total,today_data_total,cache_num,update_time,del FROM cl_task WHERE del=0 ",nativeQuery = true) +//// List findAllBydel0(); +// +//// @Query(value = "SELECT id,subject_id,external_id,site_type,task_type,cid,crawl_data_flag,cache_num,crawl_start_time,crawl_end_time,data_total,today_data_total,update_time FROM cl_task WHERE del=0 AND subject_id=?1",nativeQuery = true) +//// List findTasksBySbujectIdAndDel0(BigInteger subjectId); +// +// @Query(value = " SELECT SUM(data_total) FROM cl_task WHERE del=0 AND subject_id=?1 AND site_type=?2 ",nativeQuery = true) +// Long findDataTotalBySbujectIdAndSiteType(BigInteger subjectId,int siteType); +// +// @Query(value = " SELECT SUM(today_data_total) FROM cl_task WHERE del=0 AND subject_id=?1 AND site_type=?2 ",nativeQuery = true) +// Long findTodayDataTotalBySbujectIdAndSiteType(BigInteger subjectId,int siteType); +// +// @Query(value = " SELECT SUM(data_total) FROM cl_task WHERE del=0 AND subject_id=?1 AND task_type=?2 ",nativeQuery = true) +// Long findDataTotalBySbujectIdAndTaskType(BigInteger subjectId,int taskType); +// +// @Query(value = " SELECT SUM(today_data_total) FROM cl_task WHERE del=0 AND subject_id=?1 AND task_type=?2 ",nativeQuery = true) +// Long findTodayDataTotalBySbujectIdAndTaskType(BigInteger subjectId,int taskType); +// +// // 统计任务的抓取量! 任务质量 任务状态为“已完成” 的 今天入库的总数据量 / 总任务数 +// @Query(value = " SELECT SUM(data_total) FROM cl_task WHERE end_time >?1 AND end_time 3 ",nativeQuery = true) +// Long findTodayDataTotal(String taskStartTime ,String taskEndTime); +// +//// @Query(value = " SELECT count(*) FROM cl_task WHERE today_data_total > 0 AND task_level < 2 AND crawl_status = 3 ",nativeQuery = true) +//// Long findTodayDataTotalTaskNum(); +//// @Query(value = "SELECT id,subject_id,task_type,crawl_status,file_name,del from cl_task WHERE del = 0 AND task_type = 3 AND crawl_status=1 ",nativeQuery = true) +//// List getTaskNeedUpLoad(); +//// @Query(value = " SELECT SUM(data_total) FROM cl_task WHERE del=0 AND subject_id=?1 AND task_type=?2",nativeQuery = true) +//// Long findDataTotalBySbujectIdAndTaskType(BigInteger subjectId,int taskType); +//// @Query(value = " SELECT SUM(today_data_total) FROM cl_task WHERE del=0 AND subject_id=?1 AND task_type=?2",nativeQuery = true) +//// Long findTodayDataTotalBySbujectIdAndTaskType(BigInteger subjectId,int taskType); +// +// +// +// /** +// * 更新每个任务 拉数据次数 +// */ +// @Modifying +// @Transactional(rollbackFor = Exception.class) +// @Query(value = "update cl_task set cache_num=?1 where id=?2", nativeQuery = true) +// Integer updateStatus(int cache_num,long id); +// +// /** +// * 乐观锁 +// */ +// @Modifying +// @Transactional(rollbackFor = Exception.class) +// @Query(value = "update cl_task set cache_num=?1 where id=?2 and cache_num=?3", nativeQuery = true) +// Integer tryLock(Integer newStatus, long id, Integer oldStatus); +// +// /** +// * 修改每个任务的统计结果 +// */ +// @Modifying +// @Transactional(rollbackFor = Exception.class) +// @Query(value = "update cl_task set data_total =?2 , today_data_total =?3 where id =?1 ", nativeQuery = true) +// void updateTaskCount(Long id, Long totalCount, Long todayCount); +// +// @Modifying +// @Transactional(rollbackFor = Exception.class) +// @Query(value = "update cl_task set crawl_status =?4 where id =?1 ", nativeQuery = true) +// void updateCrawlStatus(long taskId); +// +// @Query(value = " SELECT id,start_time,end_time FROM cl_task WHERE del=0 AND crawl_status = 3 AND start_time >?1 AND end_time > findByCrawlTime(String taskStartTime, String taskEndTime); +// +// @Modifying +// @Transactional(rollbackFor = Exception.class) +// @Query(value = "UPDATE cl_task SET today_data_total=0 WHERE end_time 0 ", nativeQuery = true) +// void updateTodayTotalCount(String updateTime); +// +// // 获得前一天完成的任务的时间差(除欧莱雅的任务和上传的任务) +// @Query(value = "SELECT TIMESTAMPDIFF(MINUTE, start_time,end_time) FROM cl_task WHERE del = 0 AND task_type <>3 AND crawl_status = 3 AND data_total > 0 AND end_time > ?1 AND end_time < ?2 ",nativeQuery = true) +// List findTaskByCrawlTime(String taskStartTime, String taskEndTime); // +//// // /** // * 更新进度 // */ diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/service/es/EsQueryMiniService.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/service/es/EsQueryMiniService.java index dd178ed..a78893b 100644 --- a/cl_query_data_job/src/main/java/com/bfd/mf/job/service/es/EsQueryMiniService.java +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/service/es/EsQueryMiniService.java @@ -125,6 +125,9 @@ public class EsQueryMiniService { } + /** + * 查询每个任务 的总量和当天的量 以及 包含图片的量、包含视频的量、包含附件的量 + */ public Map getTaskCount(String clusterName,Long taskId, Task task,String crawlDataFlag,String indexNamePre) { Map countMap = new HashMap<>(); String indexName = indexNamePre + task.getSubjectId();//subject_id @@ -136,27 +139,24 @@ public class EsQueryMiniService { if (indexName.contains(indexNamePre)) { boolean isExists = EsUtils.indexExists(clusterName, indexName); if (isExists) { - BoolQueryBuilder qb = QueryBuilders.boolQuery(); - // 任务ID 筛选 - TermQueryBuilder cidTermQueryBuilder = QueryBuilders.termQuery(ESConstants.EN_SOURCE, cid); - TermQueryBuilder taskIdTermQueryBuilder = QueryBuilders.termQuery(ESConstants.CRAWLDATAFLAG, crawlDataFlag); - qb.must(taskIdTermQueryBuilder).must(cidTermQueryBuilder); - // 时间范围筛选 - BoolQueryBuilder shouldbq = QueryBuilders.boolQuery(); - RangeQueryBuilder rangeQueryBuilder = QueryBuilders - .rangeQuery(ESConstants.PUBTIME) - .gte(crawlStartTime) - .lt(crawlEndTime); - // 用户数据 - BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery(); - TermQueryBuilder primartTermQueryBuilder = QueryBuilders.termQuery(ESConstants.PRIMARY, 2); -// TermQueryBuilder pubTimeTermQueryBuilder = QueryBuilders.termQuery(ESConstants.PUBTIME,0); - boolQueryBuilder.must(primartTermQueryBuilder); - shouldbq.should(boolQueryBuilder).should(rangeQueryBuilder); - qb.must(shouldbq); - logger.info("QB1 : indexName: {}. taskId : {}.{\"query\": {}}.", indexName, taskId, qb.toString().replace("\n", "").replace("\r", "").replace(" ", "")); +// // 任务ID 筛选 +// TermQueryBuilder cidTermQueryBuilder = QueryBuilders.termQuery(ESConstants.EN_SOURCE, cid); +// TermQueryBuilder taskIdTermQueryBuilder = QueryBuilders.termQuery(ESConstants.CRAWLDATAFLAG, crawlDataFlag); +// qb.must(taskIdTermQueryBuilder).must(cidTermQueryBuilder); +// // 时间范围筛选 +// // BoolQueryBuilder shouldbq = QueryBuilders.boolQuery(); +// RangeQueryBuilder rangeQueryBuilder = QueryBuilders +// .rangeQuery(ESConstants.PUBTIME) +// .gte(crawlStartTime) +// .lt(crawlEndTime); +// // 不用统计FB 的这种粉丝的量 +// TermQueryBuilder pageTypeQueryBuilder = QueryBuilders.termQuery(ESConstants.PAGETYPR,"socialFans"); +// qb.mustNot(pageTypeQueryBuilder).must(rangeQueryBuilder); + BoolQueryBuilder qb = getQueryBuilder(cid,crawlDataFlag,crawlStartTime,crawlEndTime); + logger.info("QB1 查询总量: indexName: {}. taskId : {}.{\"query\": {}}.", indexName, taskId, qb.toString().replace("\n", "").replace("\r", "").replace(" ", "")); Long count = EsUtils.queryCount(clusterName, indexName, qb); countMap.put("totalCount", count); + // 上面的语句是查询 该任务的 总数据量:totalCount,下面的语句是查询 该任务当天的数据量:todayCount long current = System.currentTimeMillis(); long zero = current / (1000 * 3600 * 24) * (1000 * 3600 * 24) - TimeZone.getDefault().getRawOffset(); @@ -165,39 +165,52 @@ public class EsQueryMiniService { .rangeQuery(ESConstants.CRAWLTIME) .gte(startTime).lt(current); qb.must(rangeQueryBuilder2); - logger.info("QB2 : indexName: {}. taskId : {}.{\"query\": {}}.", indexName, taskId, qb.toString().replace("\n", "").replace("\r", "").replace(" ", "")); + logger.info("QB2 查询今日总量: indexName: {}. taskId : {}.{\"query\": {}}.", indexName, taskId, qb.toString().replace("\n", "").replace("\r", "").replace(" ", "")); Long todayCount = EsUtils.queryCount(clusterName, indexName, qb); countMap.put("todayCount", todayCount); + + // 查询包含图片的数据的量 + //videoPath == egc filePath == ugc imagePath == pgc + TermQueryBuilder pgcTermQueryBuilder = QueryBuilders.termQuery(ESConstants.PGC,1); + TermQueryBuilder egcTermQueryBuilder = QueryBuilders.termQuery(ESConstants.EGC,1); + TermQueryBuilder ugcTermQueryBuilder = QueryBuilders.termQuery(ESConstants.UGC,1); + qb = getQueryBuilder(cid,crawlDataFlag,crawlStartTime,crawlEndTime); + qb.must(pgcTermQueryBuilder); + logger.info("QB3 查询有图片的任务数: indexName: {}. taskId : {}.{\"query\": {}}.", indexName, taskId, qb.toString().replace("\n", "").replace("\r", "").replace(" ", "")); + Long imageCount = EsUtils.queryCount(clusterName, indexName, qb); + countMap.put("todayCount", todayCount); + qb = getQueryBuilder(cid,crawlDataFlag,crawlStartTime,crawlEndTime); + qb.must(egcTermQueryBuilder); + logger.info("QB4 查询有视频的任务数: indexName: {}. taskId : {}.{\"query\": {}}.", indexName, taskId, qb.toString().replace("\n", "").replace("\r", "").replace(" ", "")); + Long videoCount = EsUtils.queryCount(clusterName, indexName, qb); + countMap.put("todayCount", todayCount); + qb = getQueryBuilder(cid,crawlDataFlag,crawlStartTime,crawlEndTime); + qb.must(ugcTermQueryBuilder); + logger.info("QB5 查询有文件的任务数: indexName: {}. taskId : {}.{\"query\": {}}.", indexName, taskId, qb.toString().replace("\n", "").replace("\r", "").replace(" ", "")); + Long fileCount = EsUtils.queryCount(clusterName, indexName, qb); + countMap.put("todayCount", todayCount); } } } return countMap; } -// public Long getTaskTodayCount(String clusterName,Integer id, Map task) { -// Long count = 0L; -// String indexName = clSubject + (String) task.get("subject_id"); -// String cid = (String) task.get(ESConstants.CID); -// Long crawlStartTime = (Long) task.get("crawl_start_time"); -// Long crawlEndTime = (Long) task.get("crawl_end_time"); -// String crawlDataFlag = (String) task.get("crawl_data_flag"); -// -// if(indexName.contains(subjectPre)) { -// boolean isExists = EsUtils.indexExists(clusterName, indexName); -// if (isExists) { -// BoolQueryBuilder qb = QueryBuilders.boolQuery(); -// long current=System.currentTimeMillis(); -// long zero=current/(1000*3600*24)*(1000*3600*24)-TimeZone.getDefault().getRawOffset(); -// Long startTime = new Timestamp(zero).getTime(); -// RangeQueryBuilder rangeQueryBuilder = QueryBuilders -// .rangeQuery(ESConstants.CRAWLTIME) -// .gte(startTime) -// .lt(current); -// qb.must(rangeQueryBuilder); -//// Terms result = EsUtils.queryTag(clusterName, indexName, qb, ab, ESConstant.DOC_TYPE + "Tag"); -//// resultMap = parseTerms(result); -// } -// } -// return count; -// } + // 组装最基础的查询语句 + private BoolQueryBuilder getQueryBuilder(String cid, String crawlDataFlag, Long crawlStartTime, Long crawlEndTime) { + BoolQueryBuilder qb = QueryBuilders.boolQuery(); + // 任务ID 筛选 + TermQueryBuilder cidTermQueryBuilder = QueryBuilders.termQuery(ESConstants.EN_SOURCE, cid); + TermQueryBuilder taskIdTermQueryBuilder = QueryBuilders.termQuery(ESConstants.CRAWLDATAFLAG, crawlDataFlag); + qb.must(taskIdTermQueryBuilder).must(cidTermQueryBuilder); + // 时间范围筛选 + // BoolQueryBuilder shouldbq = QueryBuilders.boolQuery(); + RangeQueryBuilder rangeQueryBuilder = QueryBuilders + .rangeQuery(ESConstants.PUBTIME) + .gte(crawlStartTime) + .lt(crawlEndTime); + // 不用统计FB 的这种粉丝的量 + TermQueryBuilder pageTypeQueryBuilder = QueryBuilders.termQuery(ESConstants.PAGETYPR,"socialFans"); + qb.mustNot(pageTypeQueryBuilder).must(rangeQueryBuilder); + return qb; + } } diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/service/statistics/StatisticsService.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/service/statistics/StatisticsService.java index 62b9d65..0b1e6e4 100644 --- a/cl_query_data_job/src/main/java/com/bfd/mf/job/service/statistics/StatisticsService.java +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/service/statistics/StatisticsService.java @@ -38,7 +38,7 @@ public class StatisticsService { @PostConstruct public void init() { // 注册数据查询来源 -// EsUtils.registerCluster(config.esNormalClusterName(), config.esNormalAddress());// 配置文件中的 es-source + EsUtils.registerCluster(config.esNormalClusterName(), config.esNormalAddress());// 配置文件中的 es-source EsUtils.registerCluster(config.esMiniClusterName(), config.esMiniAddress()); // 配置文件中的 es-target } @@ -49,35 +49,35 @@ public class StatisticsService { LOGGER.info("------------------------------------------------------------------ StatisticsService ------------------------------------------------------"); long start = System.currentTimeMillis(); //-------统计134上的总量------------------------------------------------------------------------------------ - // String clusterName = config.esNormalClusterName(); // 获得 134 的 clusterName - //statisticsTotal(clusterName); + String clusterName = config.esNormalClusterName(); // 获得 134 的 clusterName + statisticsTotal(clusterName); long end = System.currentTimeMillis(); LOGGER.info("Statistics Total, took:{} ms.",(end - start)); //-------统计147上的 每个任务的总量------------------------------------------------------------------------- start = System.currentTimeMillis(); - String clusterName = config.esMiniClusterName(); // 获得 147 的 clusterName + clusterName = config.esMiniClusterName(); // 获得 147 的 clusterName statisticsTask(clusterName); end = System.currentTimeMillis(); LOGGER.info("Statistics Task, took:{} ms.",(end - start)); //-------统计每个专题的量------------------------------------------------------------------------------------ - start = System.currentTimeMillis(); - // 如果是正常任务的,用这种方式统计 - List subjectIds = subjectRepository.findAllSubjectIds(); - for (BigInteger subjectId: subjectIds) { - statisticsSubjectBySumTask(subjectId); - } - end = System.currentTimeMillis(); - LOGGER.info("Statistics Subject Normal, took:{} ms.",(end - start)); - // 如果是【欧莱雅】任务的,得用这个方式统计呀 +// start = System.currentTimeMillis(); +// // 如果是正常任务的,用这种方式统计 +// List subjectIds = subjectRepository.findAllSubjectIds(); +// for (BigInteger subjectId: subjectIds) { +// statisticsSubjectBySumTask(subjectId); +// } +// end = System.currentTimeMillis(); +// LOGGER.info("Statistics Subject Normal, took:{} ms.",(end - start)); +// // 如果是【欧莱雅】任务的,得用这个方式统计呀 // start = System.currentTimeMillis(); // List subjectIds1 = subjectRepository.findAllOlySubjectIds(); // for (BigInteger subjectId: subjectIds1) { // statisticsSubject(subjectId,clusterName); // } -// end = System.currentTimeMillis(); -// LOGGER.info("Statistics Subject OLY, took:{} ms.",(end - start)); + end = System.currentTimeMillis(); + LOGGER.info("Statistics Subject OLY, took:{} ms.",(end - start)); } @@ -151,18 +151,18 @@ public class StatisticsService { }else{ siteTodayCount = 0; } - switch (i) { // + switch (i) { case 0: - subjectCrawlDatFlagMap.put("keyword", siteCount); - subjectCrawlDataFlagTodayMap.put("keyword", siteTodayCount); + subjectCrawlDatFlagMap.put(ESConstants.KEYWORD, siteCount); + subjectCrawlDataFlagTodayMap.put(ESConstants.KEYWORD, siteTodayCount); break; case 1: - subjectCrawlDatFlagMap.put("account", siteCount); - subjectCrawlDataFlagTodayMap.put("account", siteTodayCount); + subjectCrawlDatFlagMap.put(ESConstants.ACCOUNT, siteCount); + subjectCrawlDataFlagTodayMap.put(ESConstants.ACCOUNT, siteTodayCount); break; case 2: - subjectCrawlDatFlagMap.put("url", siteCount); - subjectCrawlDataFlagTodayMap.put("url", siteTodayCount); + subjectCrawlDatFlagMap.put(ESConstants.URL, siteCount); + subjectCrawlDataFlagTodayMap.put(ESConstants.URL, siteTodayCount); break; case 3: subjectCrawlDatFlagMap.put("upload", siteCount); @@ -230,7 +230,6 @@ public class StatisticsService { long current = System.currentTimeMillis(); long zero = current/(1000*3600*24)*(1000*3600*24) - TimeZone.getDefault().getRawOffset(); String updateTime = DateUtil.parseDateByTime(zero); - System.out.println("----- "+ updateTime); taskRepository.updateTodayTotalCount(updateTime); EsQueryMiniService esQueryMiniService = new EsQueryMiniService(); @@ -238,7 +237,6 @@ public class StatisticsService { List taskList = taskRepository.findAllBydel0(); // 遍历任务List ,根据条件组装ES查询语句去对应的索引下查结果,然后回写到任务表中 for (Task task: taskList) { - System.out.println(" 任务ID ===== " + task); Long taskId = task.getId().longValue(); String crawlDataFlag = task.getCrawlDataFlag(); String indexNamePre = config.getIndexNamePre(); @@ -248,9 +246,17 @@ public class StatisticsService { // 直接更新 cl_task 表中的 data_total 和 today_data_total long totalCount = 0L; long todayCount = 0L; - if(countMap.containsKey("totalCount") && countMap.containsKey("todayCount")) { - totalCount = countMap.get("totalCount"); - todayCount = countMap.get("todayCount"); + long imageCount = 0L; + long videoCount = 0L; + long fileCount = 0L; + long textCount = 0L; + if(countMap.containsKey(ESConstants.TOTALCOUNT) && countMap.containsKey(ESConstants.TODAYCOUNT)) { + totalCount = countMap.get(ESConstants.TOTALCOUNT); + todayCount = countMap.get(ESConstants.TODAYCOUNT); +// imageCount = countMap.get(ESConstants.IMAGECOUNT); +// videoCount = countMap.get(ESConstants.VIDEOCOUNT); +// fileCount = countMap.get(ESConstants.FILECOUNT); +// textCount = countMap.get(ESConstants.TEXTCOUNT); } taskRepository.updateTaskCount(taskId,totalCount,todayCount); } diff --git a/cl_query_data_job/src/main/resources/application.yml b/cl_query_data_job/src/main/resources/application.yml index 48f15d5..6805a53 100644 --- a/cl_query_data_job/src/main/resources/application.yml +++ b/cl_query_data_job/src/main/resources/application.yml @@ -3,15 +3,33 @@ debug: false logging: level: com.bfd.mf: debug +#spring: +# datasource: +# driver-class-name: com.mysql.jdbc.Driver +# username: root +# password: bfd123 +# url: jdbc:mysql://172.26.11.113:3306/intelligent_crawl?useOldAliasMetadataBehavior=true&characterEncoding=UTF-8&zeroDateTimeBehavior=round +# hikari: +# maximum-pool-size: 10 +# minimum-idle: 1 spring: datasource: driver-class-name: com.mysql.jdbc.Driver - username: crawl - password: crawl - url: jdbc:mysql://172.18.1.181:3306/intelligent_crawl?useOldAliasMetadataBehavior=true&characterEncoding=UTF-8&zeroDateTimeBehavior=round + username: root + password: Bfd123!@# + url: jdbc:mysql://172.18.1.134:3306/intelligent_crawl?useOldAliasMetadataBehavior=true&characterEncoding=UTF-8&zeroDateTimeBehavior=round hikari: maximum-pool-size: 10 minimum-idle: 1 +#spring: +# datasource: +# driver-class-name: com.mysql.jdbc.Driver +# username: root +# password: Bfd123!@# +# url: jdbc:mysql://172.18.1.134:3306/all_task?useOldAliasMetadataBehavior=true&characterEncoding=UTF-8&zeroDateTimeBehavior=round +# hikari: +# maximum-pool-size: 10 +# minimum-idle: 1 worker: @@ -27,16 +45,16 @@ worker: analysis-group: sq_group_cl_analysis_1 ## 服务的状态,true 为启动 - enable-analysis-producer: false - enable-analysis-consumer: false - enable-statistics-producer: true - enable-query-producer: false - enable-backtrace-producer: false - enable-rw-oly-producer: false - enable-up-load-producer: false - enable-output-producer: false - enable-taskcount-producer: false - enable-alarm-producer: false + enable-analysis-producer: false # 查ES写kafka + enable-analysis-consumer: false # 读kafka写ES + enable-statistics-producer: true # 统计 taskCount 和 subjectCount (采集平台) + enable-query-producer: false # 离线拉数(采集平台) + enable-backtrace-producer: false # 欧莱雅查数(采集平台,欧莱雅项目独用) + enable-rw-oly-producer: false # 欧莱雅数据导出,暂时不用 + enable-up-load-producer: false # 上传(采集平台) + enable-output-producer: false #未开发,暂留 + enable-taskcount-producer: false # 任务数量的统计,任务量和任务平均时长(运营后台) + enable-alarm-producer: false # 报警,查ES统计报警发邮件写数据库(运营后台) ## 启动服务的线程数 statistics-producer-thread-count: 1 query-producer-thread-count: 10 @@ -64,29 +82,6 @@ worker: uploadZipPath : /opt/nfsdata/uploadFiles/ indexNamePre : cl_major_ - -# es-normal: -# name: SQ_Normal -# address: 172.16.10.61:9301 -# upper: 2000-01-01 -# standby: cl_major_* -# es-reply-source: -# name: SQ_Normal -# address: 172.16.10.61:9301 -# upper: 2000-01-01 -# standby: cl_major_* -# es-mini: -# name: SQ_Normal -# address: 172.16.10.61:9301 -# bulk-thread-count: 5 -# bulk-rate: 3 -# bulk-size: 100 -# es-logstash: -# name: SQ_Normal -# address: 172.16.10.61:9301 -# upper: 2021-01-01 -# standby: logstash-2021.05.13 - es-normal: name: SQ_Normal_new address: 172.18.1.134:9301 diff --git a/cl_search_api/cl_search_api.iml b/cl_search_api/cl_search_api.iml index ef86ae0..378383a 100644 --- a/cl_search_api/cl_search_api.iml +++ b/cl_search_api/cl_search_api.iml @@ -200,6 +200,5 @@ - \ No newline at end of file diff --git a/cl_search_api/pom.xml b/cl_search_api/pom.xml index d87e69e..f318fd8 100644 --- a/cl_search_api/pom.xml +++ b/cl_search_api/pom.xml @@ -5,15 +5,15 @@ 4.0.0 - cl_stream_3.1.1 + cl_stream_3.1.2 com.bfd.mf - 3.1.1-SNAPSHOT + 3.1.2-SNAPSHOT cl_search_api - Search V3.1.1 API + Search V3.1.2 API cl_search_api - 3.1.1-SNAPSHOT + 3.1.2-SNAPSHOT @@ -247,11 +247,11 @@ 3.6.0 - - it.sauronsoftware - jave - 1.0.2 - + + + + + diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/util/ReadLine.java b/cl_search_api/src/main/java/com/bfd/mf/common/util/ReadLine.java deleted file mode 100644 index 2912c01..0000000 --- a/cl_search_api/src/main/java/com/bfd/mf/common/util/ReadLine.java +++ /dev/null @@ -1,243 +0,0 @@ -package com.bfd.mf.common.util; - -import it.sauronsoftware.jave.Encoder; - -import javax.imageio.ImageIO; -import javax.imageio.ImageReader; -import javax.imageio.stream.FileImageInputStream; -import javax.imageio.stream.ImageInputStream; -import java.awt.image.BufferedImage; -import java.io.*; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; - - - -/** - * Created by BFD-229 on 2017/7/6. - */ -public class ReadLine { - - public static List readLine( File fileName){ - List list = new ArrayList (); - String line; - try { - InputStreamReader read = new InputStreamReader(new FileInputStream(fileName), "utf-8"); - BufferedReader reader = new BufferedReader(read); - while ((line = reader.readLine()) != null) { - try { - if (line.length() > 0) { - list.add(line); - } - } catch (Exception e) { - e.printStackTrace(); - } - } - return list; - }catch (UnsupportedEncodingException e) { - e.printStackTrace(); - return null; - } catch (FileNotFoundException e) { - e.printStackTrace(); - return null; - } catch (IOException e) { - e.printStackTrace(); - return null; - } - } - - -// public static List readLine(File fileName){ -// List list = new ArrayList (); -// String line; -// try { -// InputStreamReader read = new InputStreamReader(new FileInputStream(fileName), "utf-8"); -// BufferedReader reader = new BufferedReader(read); -// while ((line = reader.readLine()) != null) { -// try { -// if (line.length() > 0) { -// list.add(line); -// } -// } catch (Exception e) { -// e.printStackTrace(); -// } -// } -// return list; -// }catch (UnsupportedEncodingException e) { -// e.printStackTrace(); -// return null; -// } catch (FileNotFoundException e) { -// e.printStackTrace(); -// return null; -// } catch (IOException e) { -// e.printStackTrace(); -// return null; -// } -// } - - // 读取文件内容 - public static String readFile(String path){ - File file = new File(path); - StringBuilder result = new StringBuilder(); - try{ - BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8"));//构造一个BufferedReader类来读取文件 - String s = null; - while((s = br.readLine())!=null){//使用readLine方法,一次读一行 - result.append( System.lineSeparator() + s); - } - br.close(); - }catch(Exception e){ - e.printStackTrace(); - } - return result.toString(); - } - - - public static void readFiles(File file){ - if (file.exists()) { - System.err.println("exist"); - try { - FileInputStream fis = new FileInputStream(file); - InputStreamReader isr = new InputStreamReader(fis, "UTF-8"); - BufferedReader br = new BufferedReader(isr); - String line; - while((line = br.readLine()) != null){ - System.out.println(line); - } - br.close(); - isr.close(); - fis.close(); - } catch (FileNotFoundException e) { - e.printStackTrace(); - } catch (UnsupportedEncodingException e) { - e.printStackTrace(); - } catch (IOException e) { - e.printStackTrace(); - } - } - } - - - public static String getResolution1(File file) throws IOException { - BufferedImage image = ImageIO.read(file); - return image.getWidth() + "x" + image.getHeight(); - } - - -// public static String getResolution(File file){ -// Encoder encoder = new Encoder(); -// try { -// MultimediaInfo m = encoder.getInfo(file); -// int height = m.getVideo().getSize().getHeight(); -// int width = m.getVideo().getSize().getWidth(); -// System.out.println("width:"+width); -// System.out.println("height:" + height); -// FileInputStream fis = new FileInputStream(source); -// FileChannel fc = fis.getChannel(); -// BigDecimal fileSize = new BigDecimal(fc.size()); -// String size = fileSize.divide(new BigDecimal(1048576), 2, RoundingMode.HALF_UP) + "MB"; -// System.out.println("size:" + size); -// long duration = m.getDuration()/1000; -// System.out.println("duration:" + duration + "s"); -// } catch (Exception e) { -// e.printStackTrace(); -// } -// } - - public static String getImageDim(String path) { - String result = null; - String suffix = getFileSuffix(path); - //解码具有给定后缀的文件 - Iterator iter = ImageIO.getImageReadersBySuffix(suffix); - // System.out.println(ImageIO.getImageReadersBySuffix(suffix)); - if (iter.hasNext()) { - ImageReader reader = iter.next(); - try { - ImageInputStream stream = new FileImageInputStream(new File(path)); - reader.setInput(stream); - int width = reader.getWidth(reader.getMinIndex()); - int height = reader.getHeight(reader.getMinIndex()); - result = width + "×" + height; - } catch (IOException e) { - e.printStackTrace(); - } finally { - reader.dispose(); - } - } - // System.out.println("getImageDim:" + result); - return result; - } - - private static String getFileSuffix(final String path) { - String result = null; - if (path != null) { - result = ""; - if (path.lastIndexOf('.') != -1) { - result = path.substring(path.lastIndexOf('.')); - if (result.startsWith(".")) { - result = result.substring(1); - } - } - } - // System.out.println("getFileSuffix:" + result); - return result; - } - - - public static String videosize(String video) { - File source = new File(video); - Encoder encoder = new Encoder(); - try { - it.sauronsoftware.jave.MultimediaInfo m = encoder.getInfo(source); - return m.getVideo().getSize().getHeight() + "×" + m.getVideo().getSize().getWidth(); - } catch (Exception e) { - e.printStackTrace(); - return null; - } - } - - - -// public static String getVideoTime (String path){ -// File source = new File(path); -// Encoder encoder = new Encoder(); -// File[] file = source.listFiles(); -// long sum =0; -// for (File file2 : file) { -// try { -// MultimediaInfo m = encoder.getInfo(file2); -// long ls = m.getDuration()/1000; //ls是获取到的秒数 -// sum += ls; -// } catch (Exception e) { -// e.printStackTrace(); -// } -// } -// double sum1 = (double)sum; -// double sum2 =sum1/3600;// 转换成为了小时 -// System.out.println(sum2); -// return sum2+""; -// } -// - - -// public static byte[] readFile(String path){ -// try { -// FileInputStream fileInputStream = new FileInputStream(path); -// BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(fileInputStream)); -// String line = null; -// while ((line = bufferedReader.readLine()) != null) { -// System.out.println(line); -// } -// fileInputStream.close(); -// }catch (Exception e){ -// e.printStackTrace(); -// } -// } - - - -} diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/util/constants/ESConstant.java b/cl_search_api/src/main/java/com/bfd/mf/common/util/constants/ESConstant.java index 8f1333b..fddf824 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/common/util/constants/ESConstant.java +++ b/cl_search_api/src/main/java/com/bfd/mf/common/util/constants/ESConstant.java @@ -361,6 +361,7 @@ public class ESConstant { public static String ORIGINAL_SOURCE = "originalSource"; public static String CONTENT_SIMHASH = "contentSimHash"; public static String QUOTE_COUNT = "quoteCount"; + public static String COLLE_CTCOUNT = "collectCount"; /** * 内容 */ diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/web/vo/view/monitor/ESMonitorBaseEntity.java b/cl_search_api/src/main/java/com/bfd/mf/common/web/vo/view/monitor/ESMonitorBaseEntity.java index e8f867c..76b8aa7 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/common/web/vo/view/monitor/ESMonitorBaseEntity.java +++ b/cl_search_api/src/main/java/com/bfd/mf/common/web/vo/view/monitor/ESMonitorBaseEntity.java @@ -71,6 +71,7 @@ public class ESMonitorBaseEntity implements Comparable, Ser private String quoteCount ; private String attitudesCount; private Integer commentsCount = 0; + private String collectCount; // 词云 private List hlKeyWords; private List places; @@ -119,6 +120,15 @@ public class ESMonitorBaseEntity implements Comparable, Ser private String otherSourceJson; + + public String getCollectCount() { + return collectCount; + } + + public void setCollectCount(String collectCount) { + this.collectCount = collectCount; + } + public String getOtherSourceJson() { return otherSourceJson; } diff --git a/cl_search_api/src/main/java/com/bfd/mf/service/SearchDataService.java b/cl_search_api/src/main/java/com/bfd/mf/service/SearchDataService.java index 6e48eb4..0ea50ef 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/service/SearchDataService.java +++ b/cl_search_api/src/main/java/com/bfd/mf/service/SearchDataService.java @@ -215,6 +215,7 @@ public class SearchDataService extends CrudService - - - - - - - - - - - \ No newline at end of file diff --git a/pom.xml b/pom.xml index fd8edc1..441f0de 100644 --- a/pom.xml +++ b/pom.xml @@ -5,8 +5,8 @@ 4.0.0 com.bfd.mf - cl_stream_3.1.1 - 3.1.1-SNAPSHOT + cl_stream_3.1.2 + 3.1.2-SNAPSHOT pom