Browse Source

release-3.1.1(20210727,导出字段添加 collentCount)

release-1.0
杜静 4 years ago
parent
commit
5841a038e6
  1. 3
      .idea/compiler.xml
  2. 4
      .idea/misc.xml
  3. 2
      .idea/modules.xml
  4. 4
      cl_query_data_job/pom.xml
  5. 9
      cl_query_data_job/src/main/java/com/bfd/mf/job/config/ESConstants.java
  6. 153
      cl_query_data_job/src/main/java/com/bfd/mf/job/domain/entity/Alarm.java
  7. 9
      cl_query_data_job/src/main/java/com/bfd/mf/job/domain/repository/AlarmRepository.java
  8. 140
      cl_query_data_job/src/main/java/com/bfd/mf/job/domain/repository/TaskRepository.java
  9. 105
      cl_query_data_job/src/main/java/com/bfd/mf/job/service/es/EsQueryMiniService.java
  10. 60
      cl_query_data_job/src/main/java/com/bfd/mf/job/service/statistics/StatisticsService.java
  11. 67
      cl_query_data_job/src/main/resources/application.yml
  12. 1
      cl_search_api/cl_search_api.iml
  13. 18
      cl_search_api/pom.xml
  14. 243
      cl_search_api/src/main/java/com/bfd/mf/common/util/ReadLine.java
  15. 1
      cl_search_api/src/main/java/com/bfd/mf/common/util/constants/ESConstant.java
  16. 10
      cl_search_api/src/main/java/com/bfd/mf/common/web/vo/view/monitor/ESMonitorBaseEntity.java
  17. 5
      cl_search_api/src/main/java/com/bfd/mf/service/SearchDataService.java
  18. 0
      cl_stream_3.1.2.iml
  19. 12
      cl_stream_3.1.iml
  20. 4
      pom.xml

3
.idea/compiler.xml

@ -13,7 +13,8 @@
<bytecodeTargetLevel>
<module name="cl_query_data_job" target="1.8" />
<module name="cl_search_api" target="1.8" />
<module name="cl_stream_3.1" target="1.8" />
<module name="cl_stream_3.1.1" target="1.8" />
<module name="cl_stream_3.1.2" target="1.8" />
</bytecodeTargetLevel>
</component>
</project>

4
.idea/misc.xml

@ -7,7 +7,7 @@
</list>
</option>
</component>
<component name="ProjectRootManager" version="2" languageLevel="JDK_1_8" project-jdk-name="1.8" project-jdk-type="JavaSDK">
<output url="file://$PROJECT_DIR$/out" />
<component name="ProjectRootManager" version="2" languageLevel="JDK_1_8" default="false" project-jdk-name="1.8" project-jdk-type="JavaSDK">
<output url="file://$PROJECT_DIR$/classes" />
</component>
</project>

2
.idea/modules.xml

@ -4,7 +4,7 @@
<modules>
<module fileurl="file://$PROJECT_DIR$/cl_query_data_job/cl_query_data_job.iml" filepath="$PROJECT_DIR$/cl_query_data_job/cl_query_data_job.iml" />
<module fileurl="file://$PROJECT_DIR$/cl_search_api/cl_search_api.iml" filepath="$PROJECT_DIR$/cl_search_api/cl_search_api.iml" />
<module fileurl="file://$PROJECT_DIR$/cl_stream_3.1.iml" filepath="$PROJECT_DIR$/cl_stream_3.1.iml" />
<module fileurl="file://$PROJECT_DIR$/cl_stream_3.1.2.iml" filepath="$PROJECT_DIR$/cl_stream_3.1.2.iml" />
</modules>
</component>
</project>

4
cl_query_data_job/pom.xml

@ -4,9 +4,9 @@
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<artifactId>cl_stream_3.1.1</artifactId>
<artifactId>cl_stream_3.1.2</artifactId>
<groupId>com.bfd.mf</groupId>
<version>3.1.1-SNAPSHOT</version>
<version>3.1.2-SNAPSHOT</version>
</parent>
<artifactId>cl_query_data_job</artifactId>

9
cl_query_data_job/src/main/java/com/bfd/mf/job/config/ESConstants.java

@ -724,6 +724,8 @@ public class ESConstants {
public static final String DATA_COUNT = "dataCount";
public static final String PAGETYPR = "pageType";
/**
* 回溯开始时间
*/
@ -964,6 +966,13 @@ public class ESConstants {
public static final String OTHER = "other";
public static final String LIFE = "life";
public static final String TOTALCOUNT = "totalCount";
public static final String TODAYCOUNT = "todayCount";
public static final String IMAGECOUNT = "imageCount";
public static final String VIDEOCOUNT = "videoCount";
public static final String FILECOUNT = "fileCount";
public static final String TEXTCOUNT = "textCount";
/**
* 微博信息 added by Eric 2016-10-28 15:03:31

153
cl_query_data_job/src/main/java/com/bfd/mf/job/domain/entity/Alarm.java

@ -0,0 +1,153 @@
package com.bfd.mf.job.domain.entity;
import javax.persistence.Entity;
import javax.persistence.Table;
@Entity
@Table(name = "cl_alarm")
public class Alarm extends AbstractEntity {
private int alarmTag;
private int alarmReason;
private String alarmMessage;
private String alarmTaskUrl;
private String alarmTaskContent;
private String alarmCid;
private String alarmConfig;
private String alarmTriggerTime;
private String alarmAssignTime;
private String alarmFinishTime;
private String alarmHandler;
private String opinion;
private int status;
private String createTime;
private String updateTime;
private int del;
public int getAlarmTag() {
return alarmTag;
}
public void setAlarmTag(int alarmTag) {
this.alarmTag = alarmTag;
}
public int getAlarmReason() {
return alarmReason;
}
public void setAlarmReason(int alarmReason) {
this.alarmReason = alarmReason;
}
public String getAlarmMessage() {
return alarmMessage;
}
public void setAlarmMessage(String alarmMessage) {
this.alarmMessage = alarmMessage;
}
public String getAlarmTaskUrl() {
return alarmTaskUrl;
}
public void setAlarmTaskUrl(String alarmTaskUrl) {
this.alarmTaskUrl = alarmTaskUrl;
}
public String getAlarmTaskContent() {
return alarmTaskContent;
}
public void setAlarmTaskContent(String alarmTaskContent) {
this.alarmTaskContent = alarmTaskContent;
}
public String getAlarmCid() {
return alarmCid;
}
public void setAlarmCid(String alarmCid) {
this.alarmCid = alarmCid;
}
public String getAlarmConfig() {
return alarmConfig;
}
public void setAlarmConfig(String alarmConfig) {
this.alarmConfig = alarmConfig;
}
public String getAlarmTriggerTime() {
return alarmTriggerTime;
}
public void setAlarmTriggerTime(String alarmTriggerTime) {
this.alarmTriggerTime = alarmTriggerTime;
}
public String getAlarmAssignTime() {
return alarmAssignTime;
}
public void setAlarmAssignTime(String alarmAssignTime) {
this.alarmAssignTime = alarmAssignTime;
}
public String getAlarmFinishTime() {
return alarmFinishTime;
}
public void setAlarmFinishTime(String alarmFinishTime) {
this.alarmFinishTime = alarmFinishTime;
}
public String getAlarmHandler() {
return alarmHandler;
}
public void setAlarmHandler(String alarmHandler) {
this.alarmHandler = alarmHandler;
}
public String getOpinion() {
return opinion;
}
public void setOpinion(String opinion) {
this.opinion = opinion;
}
public int getStatus() {
return status;
}
public void setStatus(int status) {
this.status = status;
}
public String getCreateTime() {
return createTime;
}
public void setCreateTime(String createTime) {
this.createTime = createTime;
}
public String getUpdateTime() {
return updateTime;
}
public void setUpdateTime(String updateTime) {
this.updateTime = updateTime;
}
public int getDel() {
return del;
}
public void setDel(int del) {
this.del = del;
}
}

9
cl_query_data_job/src/main/java/com/bfd/mf/job/domain/repository/AlarmRepository.java

@ -0,0 +1,9 @@
package com.bfd.mf.job.domain.repository;
import com.bfd.mf.job.domain.entity.Alarm;
import org.springframework.data.repository.CrudRepository;
public interface AlarmRepository extends CrudRepository<Alarm, Long> {
}

140
cl_query_data_job/src/main/java/com/bfd/mf/job/domain/repository/TaskRepository.java

@ -12,34 +12,18 @@ import java.util.Map;
public interface TaskRepository extends CrudRepository<Task, Long> {
// @Query(value = "SELECT id,subject_id,external_id,site_type,task_type,cid,crawl_data_flag,cache_num,crawl_start_time,crawl_end_time,data_total,today_data_total,update_time FROM cl_task WHERE NOW() > SUBDATE(update_time,interval -15 minute) AND del = 0 AND subject_id in (SELECT id from cl_subject WHERE `status` = 0 AND del =0)", nativeQuery = true)
// List<Task> findAllTask();
@Query(value = "SELECT id,subject_id,external_id,site_type, task_type,cid,crawl_status,crawl_start_time,crawl_end_time,crawl_data_flag,data_total,today_data_total,cache_num,update_time,del,file_name,file_remark,crawl_content_key FROM cl_task WHERE task_type <> 3 AND crawl_status = 1 AND cache_num = 0 AND data_total = 0 AND del = 0 AND subject_id in (SELECT id from cl_subject WHERE del =0) ORDER BY id DESC ",nativeQuery = true)
@Query(value = "SELECT ct.id,ct.app_id,ct.subject_id,ct.external_id,cs.site_type,ct.task_type,ct.cid,ct.crawl_status,ct.crawl_start_time,ct.crawl_end_time,ct.crawl_data_flag,ct.data_total,ct.today_data_total,ct.cache_num,ct.update_time,ct.del,ct.crawl_content_key FROM cl_task ct JOIN intelligent_crawl.cl_site cs ON ct.cid = cs.cid WHERE ct.task_type <> 3 AND ct.crawl_status = 1 AND ct.cache_num = 0 AND ct.data_total = 0 AND ct.del = 0 AND ct.subject_id in (SELECT id from cl_subject csu WHERE csu.del =0) ORDER BY ct.id DESC;",nativeQuery = true)
List<Task> findAllNewTask();
// 统计服务查询 要统计的任务之前由于所有任务都要半小时统计任务太多会把E搞挂就只统计 update_time 近一天的吧
// @Query(value = "SELECT id,subject_id,external_id,site_type, task_type,cid,crawl_status,crawl_start_time,crawl_end_time,crawl_data_flag,data_total,today_data_total,cache_num,update_time,del,file_name,file_remark FROM cl_task WHERE del = 0 AND crawl_status <> 3",nativeQuery = true) // AND crawl_status <> 3
// 每天只统计两种情况的任务
//1当天完成的任务crawl_status=3 and end_time > 前天
//2状态为采集中或者 暂停的任务 crawl_status=0 or crawl_tatus=1
// 其他的任务就不用每天都统计了
@Query(value = " SELECT id,subject_id,external_id,site_type, task_type,cid,crawl_status,crawl_start_time,crawl_end_time,crawl_data_flag,data_total,today_data_total,cache_num,update_time,del,file_name,file_remark,crawl_content_key FROM `cl_task` WHERE del = 0 AND ((crawl_status = 1 OR crawl_status = 0) OR (crawl_status = 3 AND end_time > date_sub(curdate(),interval 2 day))) ;",nativeQuery = true)
// 需要统计的任务的查询条件 1 状态为 1 OR 02状态为3且任务完成时间再2天前的
@Query(value = "SELECT ct.id,ct.app_id,ct.subject_id,ct.external_id,cs.site_type, ct.task_type,ct.cid,ct.crawl_status,ct.crawl_start_time,ct.crawl_end_time,ct.crawl_data_flag,ct.data_total,ct.today_data_total,ct.cache_num,ct.update_time,ct.del,ct.crawl_content_key FROM `cl_task` ct JOIN intelligent_crawl.cl_site cs ON ct.cid = cs.cid WHERE ct.del = 0 AND ((ct.crawl_status = 1 OR ct.crawl_status = 0) OR (ct.crawl_status = 3 AND ct.end_time > date_sub(curdate(),interval 2 day))); ",nativeQuery = true)
// @Query(value = "SELECT ct.id,ct.app_id,ct.subject_id,ct.external_id,cs.site_type, ct.task_type,ct.cid,ct.crawl_status,ct.crawl_start_time,ct.crawl_end_time,ct.crawl_data_flag,ct.data_total,ct.today_data_total,ct.cache_num,ct.update_time,ct.del,ct.crawl_content_key FROM `cl_task` ct JOIN intelligent_crawl.cl_site cs ON ct.cid = cs.cid WHERE ct.del = 0 AND ct.subject_id = 12273 ; ",nativeQuery = true)
List<Task> findAllBydel0();
//
// @Query(value = "SELECT id,subject_id,external_id,site_type, task_type,cid,crawl_status,crawl_start_time,crawl_end_time,crawl_data_flag,data_total,today_data_total,cache_num,update_time,del FROM cl_task WHERE cache_num = 0 AND data_total = 0 AND del = 0 AND subject_id in (SELECT id from cl_subject WHERE del =0) ORDER BY id DESC ",nativeQuery = true)
// List<Task> findAllNewTask();
//
// @Query(value = "SELECT id,subject_id,external_id,site_type, task_type,cid,crawl_status,crawl_start_time,crawl_end_time,crawl_data_flag,data_total,today_data_total,cache_num,update_time,del FROM cl_task WHERE del=0 ",nativeQuery = true)
// List<Task> findAllBydel0();
// @Query(value = "SELECT id,subject_id,external_id,site_type,task_type,cid,crawl_data_flag,cache_num,crawl_start_time,crawl_end_time,data_total,today_data_total,update_time FROM cl_task WHERE del=0 AND subject_id=?1",nativeQuery = true)
// List<Task> findTasksBySbujectIdAndDel0(BigInteger subjectId);
@Query(value = " SELECT SUM(data_total) FROM cl_task WHERE del=0 AND subject_id=?1 AND site_type=?2 ",nativeQuery = true)
@Query(value = "SELECT sum(data_total) FROM cl_task ct JOIN intelligent_crawl.cl_site cs ON ct.cid=cs.cid WHERE ct.del =0 AND ct.subject_id = ?1 AND cs.site_type = ?2",nativeQuery = true)
Long findDataTotalBySbujectIdAndSiteType(BigInteger subjectId,int siteType);
@Query(value = " SELECT SUM(today_data_total) FROM cl_task WHERE del=0 AND subject_id=?1 AND site_type=?2 ",nativeQuery = true)
@Query(value = "SELECT sum(today_data_total) FROM cl_task ct JOIN intelligent_crawl.cl_site cs ON ct.cid=cs.cid WHERE ct.del =0 AND ct.subject_id = ?1 AND cs.site_type = ?2 ",nativeQuery = true)
Long findTodayDataTotalBySbujectIdAndSiteType(BigInteger subjectId,int siteType);
@Query(value = " SELECT SUM(data_total) FROM cl_task WHERE del=0 AND subject_id=?1 AND task_type=?2 ",nativeQuery = true)
@ -52,17 +36,6 @@ public interface TaskRepository extends CrudRepository<Task, Long> {
@Query(value = " SELECT SUM(data_total) FROM cl_task WHERE end_time >?1 AND end_time <?2 AND crawl_status = 3 AND task_type <>3 ",nativeQuery = true)
Long findTodayDataTotal(String taskStartTime ,String taskEndTime);
// @Query(value = " SELECT count(*) FROM cl_task WHERE today_data_total > 0 AND task_level < 2 AND crawl_status = 3 ",nativeQuery = true)
// Long findTodayDataTotalTaskNum();
// @Query(value = "SELECT id,subject_id,task_type,crawl_status,file_name,del from cl_task WHERE del = 0 AND task_type = 3 AND crawl_status=1 ",nativeQuery = true)
// List<UploadTask> getTaskNeedUpLoad();
// @Query(value = " SELECT SUM(data_total) FROM cl_task WHERE del=0 AND subject_id=?1 AND task_type=?2",nativeQuery = true)
// Long findDataTotalBySbujectIdAndTaskType(BigInteger subjectId,int taskType);
// @Query(value = " SELECT SUM(today_data_total) FROM cl_task WHERE del=0 AND subject_id=?1 AND task_type=?2",nativeQuery = true)
// Long findTodayDataTotalBySbujectIdAndTaskType(BigInteger subjectId,int taskType);
/**
* 更新每个任务 拉数据次数
*/
@ -104,7 +77,108 @@ public interface TaskRepository extends CrudRepository<Task, Long> {
@Query(value = "SELECT TIMESTAMPDIFF(MINUTE, start_time,end_time) FROM cl_task WHERE del = 0 AND task_type <>3 AND crawl_status = 3 AND data_total > 0 AND end_time > ?1 AND end_time < ?2 ",nativeQuery = true)
List<BigInteger> findTaskByCrawlTime(String taskStartTime, String taskEndTime);
// @Query(value = "SELECT ct.id,ct.subject_id,ct.external_id,cs.site_type, ct.task_type,ct.cid,ct.crawl_status,ct.crawl_start_time,ct.crawl_end_time,ct.crawl_data_flag,ct.data_total,ct.today_data_total,ct.cache_num,ct.update_time,ct.del,ct.file_name,ct.file_remark,ct.crawl_content_key FROM `cl_task` ct JOIN intelligent_crawl.cl_site cs ON ct.cid = cs.cid WHERE ct.del = 0 AND ct.id = ?1",nativeQuery = true)
@Query(value = "SELECT ct.id,ct.app_id,ct.subject_id,ct.external_id,cs.site_type, ct.task_type,ct.cid,ct.crawl_status,ct.crawl_start_time,ct.crawl_end_time,ct.crawl_data_flag,ct.data_total,ct.today_data_total,ct.cache_num,ct.update_time,ct.del,ct.crawl_content_key FROM `cl_task` ct JOIN intelligent_crawl.cl_site cs ON ct.cid = cs.cid WHERE ct.del = 0 AND ct.id = ?1 ;",nativeQuery = true)
List<Task> findOneTaskByIdAndAppId(long taskId);
// @Query(value = "SELECT id,subject_id,external_id,site_type,task_type,cid,crawl_data_flag,cache_num,crawl_start_time,crawl_end_time,data_total,today_data_total,update_time FROM cl_task WHERE NOW() > SUBDATE(update_time,interval -15 minute) AND del = 0 AND subject_id in (SELECT id from cl_subject WHERE `status` = 0 AND del =0)", nativeQuery = true)
// List<Task> findAllTask();
// @Query(value = "SELECT id,subject_id,external_id,site_type, task_type,cid,crawl_status,crawl_start_time,crawl_end_time,crawl_data_flag,data_total,today_data_total,cache_num,update_time,del,file_name,file_remark,crawl_content_key FROM cl_task WHERE task_type <> 3 AND crawl_status = 1 AND cache_num = 0 AND data_total = 0 AND del = 0 AND subject_id in (SELECT id from cl_subject WHERE del =0) ORDER BY id DESC ",nativeQuery = true)
// List<Task> findAllNewTask();
//
// // 统计服务查询 要统计的任务之前由于所有任务都要半小时统计任务太多会把E搞挂就只统计 update_time 近一天的吧
//// @Query(value = "SELECT id,subject_id,external_id,site_type, task_type,cid,crawl_status,crawl_start_time,crawl_end_time,crawl_data_flag,data_total,today_data_total,cache_num,update_time,del,file_name,file_remark FROM cl_task WHERE del = 0 AND crawl_status <> 3",nativeQuery = true) // AND crawl_status <> 3
// // 每天只统计两种情况的任务
// //1当天完成的任务crawl_status=3 and end_time > 前天
// //2状态为采集中或者 暂停的任务 crawl_status=0 or crawl_tatus=1
// // 其他的任务就不用每天都统计了
// @Query(value = " SELECT id,subject_id,external_id,site_type, task_type,cid,crawl_status,crawl_start_time,crawl_end_time,crawl_data_flag,data_total,today_data_total,cache_num,update_time,del,file_name,file_remark,crawl_content_key FROM `cl_task` WHERE del = 0 AND subject_id = 12273 AND ((crawl_status = 1 OR crawl_status = 0) OR (crawl_status = 3 AND end_time > date_sub(curdate(),interval 2 day))) ;",nativeQuery = true)
// List<Task> findAllBydel0();
////
//// @Query(value = "SELECT id,subject_id,external_id,site_type, task_type,cid,crawl_status,crawl_start_time,crawl_end_time,crawl_data_flag,data_total,today_data_total,cache_num,update_time,del FROM cl_task WHERE cache_num = 0 AND data_total = 0 AND del = 0 AND subject_id in (SELECT id from cl_subject WHERE del =0) ORDER BY id DESC ",nativeQuery = true)
//// List<Task> findAllNewTask();
////
//// @Query(value = "SELECT id,subject_id,external_id,site_type, task_type,cid,crawl_status,crawl_start_time,crawl_end_time,crawl_data_flag,data_total,today_data_total,cache_num,update_time,del FROM cl_task WHERE del=0 ",nativeQuery = true)
//// List<Task> findAllBydel0();
//
//// @Query(value = "SELECT id,subject_id,external_id,site_type,task_type,cid,crawl_data_flag,cache_num,crawl_start_time,crawl_end_time,data_total,today_data_total,update_time FROM cl_task WHERE del=0 AND subject_id=?1",nativeQuery = true)
//// List<Task> findTasksBySbujectIdAndDel0(BigInteger subjectId);
//
// @Query(value = " SELECT SUM(data_total) FROM cl_task WHERE del=0 AND subject_id=?1 AND site_type=?2 ",nativeQuery = true)
// Long findDataTotalBySbujectIdAndSiteType(BigInteger subjectId,int siteType);
//
// @Query(value = " SELECT SUM(today_data_total) FROM cl_task WHERE del=0 AND subject_id=?1 AND site_type=?2 ",nativeQuery = true)
// Long findTodayDataTotalBySbujectIdAndSiteType(BigInteger subjectId,int siteType);
//
// @Query(value = " SELECT SUM(data_total) FROM cl_task WHERE del=0 AND subject_id=?1 AND task_type=?2 ",nativeQuery = true)
// Long findDataTotalBySbujectIdAndTaskType(BigInteger subjectId,int taskType);
//
// @Query(value = " SELECT SUM(today_data_total) FROM cl_task WHERE del=0 AND subject_id=?1 AND task_type=?2 ",nativeQuery = true)
// Long findTodayDataTotalBySbujectIdAndTaskType(BigInteger subjectId,int taskType);
//
// // 统计任务的抓取量 任务质量 任务状态为已完成 今天入库的总数据量 / 总任务数
// @Query(value = " SELECT SUM(data_total) FROM cl_task WHERE end_time >?1 AND end_time <?2 AND crawl_status = 3 AND task_type <>3 ",nativeQuery = true)
// Long findTodayDataTotal(String taskStartTime ,String taskEndTime);
//
//// @Query(value = " SELECT count(*) FROM cl_task WHERE today_data_total > 0 AND task_level < 2 AND crawl_status = 3 ",nativeQuery = true)
//// Long findTodayDataTotalTaskNum();
//// @Query(value = "SELECT id,subject_id,task_type,crawl_status,file_name,del from cl_task WHERE del = 0 AND task_type = 3 AND crawl_status=1 ",nativeQuery = true)
//// List<UploadTask> getTaskNeedUpLoad();
//// @Query(value = " SELECT SUM(data_total) FROM cl_task WHERE del=0 AND subject_id=?1 AND task_type=?2",nativeQuery = true)
//// Long findDataTotalBySbujectIdAndTaskType(BigInteger subjectId,int taskType);
//// @Query(value = " SELECT SUM(today_data_total) FROM cl_task WHERE del=0 AND subject_id=?1 AND task_type=?2",nativeQuery = true)
//// Long findTodayDataTotalBySbujectIdAndTaskType(BigInteger subjectId,int taskType);
//
//
//
// /**
// * 更新每个任务 拉数据次数
// */
// @Modifying
// @Transactional(rollbackFor = Exception.class)
// @Query(value = "update cl_task set cache_num=?1 where id=?2", nativeQuery = true)
// Integer updateStatus(int cache_num,long id);
//
// /**
// * 乐观锁
// */
// @Modifying
// @Transactional(rollbackFor = Exception.class)
// @Query(value = "update cl_task set cache_num=?1 where id=?2 and cache_num=?3", nativeQuery = true)
// Integer tryLock(Integer newStatus, long id, Integer oldStatus);
//
// /**
// * 修改每个任务的统计结果
// */
// @Modifying
// @Transactional(rollbackFor = Exception.class)
// @Query(value = "update cl_task set data_total =?2 , today_data_total =?3 where id =?1 ", nativeQuery = true)
// void updateTaskCount(Long id, Long totalCount, Long todayCount);
//
// @Modifying
// @Transactional(rollbackFor = Exception.class)
// @Query(value = "update cl_task set crawl_status =?4 where id =?1 ", nativeQuery = true)
// void updateCrawlStatus(long taskId);
//
// @Query(value = " SELECT id,start_time,end_time FROM cl_task WHERE del=0 AND crawl_status = 3 AND start_time >?1 AND end_time <?2 ",nativeQuery = true)
// List<Map<String,String>> findByCrawlTime(String taskStartTime, String taskEndTime);
//
// @Modifying
// @Transactional(rollbackFor = Exception.class)
// @Query(value = "UPDATE cl_task SET today_data_total=0 WHERE end_time <?1 AND crawl_status = 3 AND today_data_total >0 ", nativeQuery = true)
// void updateTodayTotalCount(String updateTime);
//
// // 获得前一天完成的任务的时间差除欧莱雅的任务和上传的任务
// @Query(value = "SELECT TIMESTAMPDIFF(MINUTE, start_time,end_time) FROM cl_task WHERE del = 0 AND task_type <>3 AND crawl_status = 3 AND data_total > 0 AND end_time > ?1 AND end_time < ?2 ",nativeQuery = true)
// List<BigInteger> findTaskByCrawlTime(String taskStartTime, String taskEndTime);
//
////
// /**
// * 更新进度
// */

105
cl_query_data_job/src/main/java/com/bfd/mf/job/service/es/EsQueryMiniService.java

@ -125,6 +125,9 @@ public class EsQueryMiniService {
}
/**
* 查询每个任务 的总量和当天的量 以及 包含图片的量包含视频的量包含附件的量
*/
public Map<String,Long> getTaskCount(String clusterName,Long taskId, Task task,String crawlDataFlag,String indexNamePre) {
Map<String,Long> countMap = new HashMap<>();
String indexName = indexNamePre + task.getSubjectId();//subject_id
@ -136,27 +139,24 @@ public class EsQueryMiniService {
if (indexName.contains(indexNamePre)) {
boolean isExists = EsUtils.indexExists(clusterName, indexName);
if (isExists) {
BoolQueryBuilder qb = QueryBuilders.boolQuery();
// 任务ID 筛选
TermQueryBuilder cidTermQueryBuilder = QueryBuilders.termQuery(ESConstants.EN_SOURCE, cid);
TermQueryBuilder taskIdTermQueryBuilder = QueryBuilders.termQuery(ESConstants.CRAWLDATAFLAG, crawlDataFlag);
qb.must(taskIdTermQueryBuilder).must(cidTermQueryBuilder);
// 时间范围筛选
BoolQueryBuilder shouldbq = QueryBuilders.boolQuery();
RangeQueryBuilder rangeQueryBuilder = QueryBuilders
.rangeQuery(ESConstants.PUBTIME)
.gte(crawlStartTime)
.lt(crawlEndTime);
// 用户数据
BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();
TermQueryBuilder primartTermQueryBuilder = QueryBuilders.termQuery(ESConstants.PRIMARY, 2);
// TermQueryBuilder pubTimeTermQueryBuilder = QueryBuilders.termQuery(ESConstants.PUBTIME,0);
boolQueryBuilder.must(primartTermQueryBuilder);
shouldbq.should(boolQueryBuilder).should(rangeQueryBuilder);
qb.must(shouldbq);
logger.info("QB1 : indexName: {}. taskId : {}.{\"query\": {}}.", indexName, taskId, qb.toString().replace("\n", "").replace("\r", "").replace(" ", ""));
// // 任务ID 筛选
// TermQueryBuilder cidTermQueryBuilder = QueryBuilders.termQuery(ESConstants.EN_SOURCE, cid);
// TermQueryBuilder taskIdTermQueryBuilder = QueryBuilders.termQuery(ESConstants.CRAWLDATAFLAG, crawlDataFlag);
// qb.must(taskIdTermQueryBuilder).must(cidTermQueryBuilder);
// // 时间范围筛选
// // BoolQueryBuilder shouldbq = QueryBuilders.boolQuery();
// RangeQueryBuilder rangeQueryBuilder = QueryBuilders
// .rangeQuery(ESConstants.PUBTIME)
// .gte(crawlStartTime)
// .lt(crawlEndTime);
// // 不用统计FB 的这种粉丝的量
// TermQueryBuilder pageTypeQueryBuilder = QueryBuilders.termQuery(ESConstants.PAGETYPR,"socialFans");
// qb.mustNot(pageTypeQueryBuilder).must(rangeQueryBuilder);
BoolQueryBuilder qb = getQueryBuilder(cid,crawlDataFlag,crawlStartTime,crawlEndTime);
logger.info("QB1 查询总量: indexName: {}. taskId : {}.{\"query\": {}}.", indexName, taskId, qb.toString().replace("\n", "").replace("\r", "").replace(" ", ""));
Long count = EsUtils.queryCount(clusterName, indexName, qb);
countMap.put("totalCount", count);
// 上面的语句是查询 该任务的 总数据量totalCount下面的语句是查询 该任务当天的数据量todayCount
long current = System.currentTimeMillis();
long zero = current / (1000 * 3600 * 24) * (1000 * 3600 * 24) - TimeZone.getDefault().getRawOffset();
@ -165,39 +165,52 @@ public class EsQueryMiniService {
.rangeQuery(ESConstants.CRAWLTIME)
.gte(startTime).lt(current);
qb.must(rangeQueryBuilder2);
logger.info("QB2 : indexName: {}. taskId : {}.{\"query\": {}}.", indexName, taskId, qb.toString().replace("\n", "").replace("\r", "").replace(" ", ""));
logger.info("QB2 查询今日总量: indexName: {}. taskId : {}.{\"query\": {}}.", indexName, taskId, qb.toString().replace("\n", "").replace("\r", "").replace(" ", ""));
Long todayCount = EsUtils.queryCount(clusterName, indexName, qb);
countMap.put("todayCount", todayCount);
// 查询包含图片的数据的量
//videoPath == egc filePath == ugc imagePath == pgc
TermQueryBuilder pgcTermQueryBuilder = QueryBuilders.termQuery(ESConstants.PGC,1);
TermQueryBuilder egcTermQueryBuilder = QueryBuilders.termQuery(ESConstants.EGC,1);
TermQueryBuilder ugcTermQueryBuilder = QueryBuilders.termQuery(ESConstants.UGC,1);
qb = getQueryBuilder(cid,crawlDataFlag,crawlStartTime,crawlEndTime);
qb.must(pgcTermQueryBuilder);
logger.info("QB3 查询有图片的任务数: indexName: {}. taskId : {}.{\"query\": {}}.", indexName, taskId, qb.toString().replace("\n", "").replace("\r", "").replace(" ", ""));
Long imageCount = EsUtils.queryCount(clusterName, indexName, qb);
countMap.put("todayCount", todayCount);
qb = getQueryBuilder(cid,crawlDataFlag,crawlStartTime,crawlEndTime);
qb.must(egcTermQueryBuilder);
logger.info("QB4 查询有视频的任务数: indexName: {}. taskId : {}.{\"query\": {}}.", indexName, taskId, qb.toString().replace("\n", "").replace("\r", "").replace(" ", ""));
Long videoCount = EsUtils.queryCount(clusterName, indexName, qb);
countMap.put("todayCount", todayCount);
qb = getQueryBuilder(cid,crawlDataFlag,crawlStartTime,crawlEndTime);
qb.must(ugcTermQueryBuilder);
logger.info("QB5 查询有文件的任务数: indexName: {}. taskId : {}.{\"query\": {}}.", indexName, taskId, qb.toString().replace("\n", "").replace("\r", "").replace(" ", ""));
Long fileCount = EsUtils.queryCount(clusterName, indexName, qb);
countMap.put("todayCount", todayCount);
}
}
}
return countMap;
}
// public Long getTaskTodayCount(String clusterName,Integer id, Map<String, Object> task) {
// Long count = 0L;
// String indexName = clSubject + (String) task.get("subject_id");
// String cid = (String) task.get(ESConstants.CID);
// Long crawlStartTime = (Long) task.get("crawl_start_time");
// Long crawlEndTime = (Long) task.get("crawl_end_time");
// String crawlDataFlag = (String) task.get("crawl_data_flag");
//
// if(indexName.contains(subjectPre)) {
// boolean isExists = EsUtils.indexExists(clusterName, indexName);
// if (isExists) {
// BoolQueryBuilder qb = QueryBuilders.boolQuery();
// long current=System.currentTimeMillis();
// long zero=current/(1000*3600*24)*(1000*3600*24)-TimeZone.getDefault().getRawOffset();
// Long startTime = new Timestamp(zero).getTime();
// RangeQueryBuilder rangeQueryBuilder = QueryBuilders
// .rangeQuery(ESConstants.CRAWLTIME)
// .gte(startTime)
// .lt(current);
// qb.must(rangeQueryBuilder);
//// Terms result = EsUtils.queryTag(clusterName, indexName, qb, ab, ESConstant.DOC_TYPE + "Tag");
//// resultMap = parseTerms(result);
// }
// }
// return count;
// }
// 组装最基础的查询语句
private BoolQueryBuilder getQueryBuilder(String cid, String crawlDataFlag, Long crawlStartTime, Long crawlEndTime) {
BoolQueryBuilder qb = QueryBuilders.boolQuery();
// 任务ID 筛选
TermQueryBuilder cidTermQueryBuilder = QueryBuilders.termQuery(ESConstants.EN_SOURCE, cid);
TermQueryBuilder taskIdTermQueryBuilder = QueryBuilders.termQuery(ESConstants.CRAWLDATAFLAG, crawlDataFlag);
qb.must(taskIdTermQueryBuilder).must(cidTermQueryBuilder);
// 时间范围筛选
// BoolQueryBuilder shouldbq = QueryBuilders.boolQuery();
RangeQueryBuilder rangeQueryBuilder = QueryBuilders
.rangeQuery(ESConstants.PUBTIME)
.gte(crawlStartTime)
.lt(crawlEndTime);
// 不用统计FB 的这种粉丝的量
TermQueryBuilder pageTypeQueryBuilder = QueryBuilders.termQuery(ESConstants.PAGETYPR,"socialFans");
qb.mustNot(pageTypeQueryBuilder).must(rangeQueryBuilder);
return qb;
}
}

60
cl_query_data_job/src/main/java/com/bfd/mf/job/service/statistics/StatisticsService.java

@ -38,7 +38,7 @@ public class StatisticsService {
@PostConstruct
public void init() {
// 注册数据查询来源
// EsUtils.registerCluster(config.esNormalClusterName(), config.esNormalAddress());// 配置文件中的 es-source
EsUtils.registerCluster(config.esNormalClusterName(), config.esNormalAddress());// 配置文件中的 es-source
EsUtils.registerCluster(config.esMiniClusterName(), config.esMiniAddress()); // 配置文件中的 es-target
}
@ -49,35 +49,35 @@ public class StatisticsService {
LOGGER.info("------------------------------------------------------------------ StatisticsService ------------------------------------------------------");
long start = System.currentTimeMillis();
//-------统计134上的总量------------------------------------------------------------------------------------
// String clusterName = config.esNormalClusterName(); // 获得 134 clusterName
//statisticsTotal(clusterName);
String clusterName = config.esNormalClusterName(); // 获得 134 clusterName
statisticsTotal(clusterName);
long end = System.currentTimeMillis();
LOGGER.info("Statistics Total, took:{} ms.",(end - start));
//-------统计147上的 每个任务的总量-------------------------------------------------------------------------
start = System.currentTimeMillis();
String clusterName = config.esMiniClusterName(); // 获得 147 clusterName
clusterName = config.esMiniClusterName(); // 获得 147 clusterName
statisticsTask(clusterName);
end = System.currentTimeMillis();
LOGGER.info("Statistics Task, took:{} ms.",(end - start));
//-------统计每个专题的量------------------------------------------------------------------------------------
start = System.currentTimeMillis();
// 如果是正常任务的用这种方式统计
List<BigInteger> subjectIds = subjectRepository.findAllSubjectIds();
for (BigInteger subjectId: subjectIds) {
statisticsSubjectBySumTask(subjectId);
}
end = System.currentTimeMillis();
LOGGER.info("Statistics Subject Normal, took:{} ms.",(end - start));
// 如果是欧莱雅任务的得用这个方式统计呀
// start = System.currentTimeMillis();
// // 如果是正常任务的用这种方式统计
// List<BigInteger> subjectIds = subjectRepository.findAllSubjectIds();
// for (BigInteger subjectId: subjectIds) {
// statisticsSubjectBySumTask(subjectId);
// }
// end = System.currentTimeMillis();
// LOGGER.info("Statistics Subject Normal, took:{} ms.",(end - start));
// // 如果是欧莱雅任务的得用这个方式统计呀
// start = System.currentTimeMillis();
// List<BigInteger> subjectIds1 = subjectRepository.findAllOlySubjectIds();
// for (BigInteger subjectId: subjectIds1) {
// statisticsSubject(subjectId,clusterName);
// }
// end = System.currentTimeMillis();
// LOGGER.info("Statistics Subject OLY, took:{} ms.",(end - start));
end = System.currentTimeMillis();
LOGGER.info("Statistics Subject OLY, took:{} ms.",(end - start));
}
@ -151,18 +151,18 @@ public class StatisticsService {
}else{
siteTodayCount = 0;
}
switch (i) { //
switch (i) {
case 0:
subjectCrawlDatFlagMap.put("keyword", siteCount);
subjectCrawlDataFlagTodayMap.put("keyword", siteTodayCount);
subjectCrawlDatFlagMap.put(ESConstants.KEYWORD, siteCount);
subjectCrawlDataFlagTodayMap.put(ESConstants.KEYWORD, siteTodayCount);
break;
case 1:
subjectCrawlDatFlagMap.put("account", siteCount);
subjectCrawlDataFlagTodayMap.put("account", siteTodayCount);
subjectCrawlDatFlagMap.put(ESConstants.ACCOUNT, siteCount);
subjectCrawlDataFlagTodayMap.put(ESConstants.ACCOUNT, siteTodayCount);
break;
case 2:
subjectCrawlDatFlagMap.put("url", siteCount);
subjectCrawlDataFlagTodayMap.put("url", siteTodayCount);
subjectCrawlDatFlagMap.put(ESConstants.URL, siteCount);
subjectCrawlDataFlagTodayMap.put(ESConstants.URL, siteTodayCount);
break;
case 3:
subjectCrawlDatFlagMap.put("upload", siteCount);
@ -230,7 +230,6 @@ public class StatisticsService {
long current = System.currentTimeMillis();
long zero = current/(1000*3600*24)*(1000*3600*24) - TimeZone.getDefault().getRawOffset();
String updateTime = DateUtil.parseDateByTime(zero);
System.out.println("----- "+ updateTime);
taskRepository.updateTodayTotalCount(updateTime);
EsQueryMiniService esQueryMiniService = new EsQueryMiniService();
@ -238,7 +237,6 @@ public class StatisticsService {
List<Task> taskList = taskRepository.findAllBydel0();
// 遍历任务List 根据条件组装ES查询语句去对应的索引下查结果然后回写到任务表中
for (Task task: taskList) {
System.out.println(" 任务ID ===== " + task);
Long taskId = task.getId().longValue();
String crawlDataFlag = task.getCrawlDataFlag();
String indexNamePre = config.getIndexNamePre();
@ -248,9 +246,17 @@ public class StatisticsService {
// 直接更新 cl_task 表中的 data_total today_data_total
long totalCount = 0L;
long todayCount = 0L;
if(countMap.containsKey("totalCount") && countMap.containsKey("todayCount")) {
totalCount = countMap.get("totalCount");
todayCount = countMap.get("todayCount");
long imageCount = 0L;
long videoCount = 0L;
long fileCount = 0L;
long textCount = 0L;
if(countMap.containsKey(ESConstants.TOTALCOUNT) && countMap.containsKey(ESConstants.TODAYCOUNT)) {
totalCount = countMap.get(ESConstants.TOTALCOUNT);
todayCount = countMap.get(ESConstants.TODAYCOUNT);
// imageCount = countMap.get(ESConstants.IMAGECOUNT);
// videoCount = countMap.get(ESConstants.VIDEOCOUNT);
// fileCount = countMap.get(ESConstants.FILECOUNT);
// textCount = countMap.get(ESConstants.TEXTCOUNT);
}
taskRepository.updateTaskCount(taskId,totalCount,todayCount);
}

67
cl_query_data_job/src/main/resources/application.yml

@ -3,15 +3,33 @@ debug: false
logging:
level:
com.bfd.mf: debug
#spring:
# datasource:
# driver-class-name: com.mysql.jdbc.Driver
# username: root
# password: bfd123
# url: jdbc:mysql://172.26.11.113:3306/intelligent_crawl?useOldAliasMetadataBehavior=true&characterEncoding=UTF-8&zeroDateTimeBehavior=round
# hikari:
# maximum-pool-size: 10
# minimum-idle: 1
spring:
datasource:
driver-class-name: com.mysql.jdbc.Driver
username: crawl
password: crawl
url: jdbc:mysql://172.18.1.181:3306/intelligent_crawl?useOldAliasMetadataBehavior=true&characterEncoding=UTF-8&zeroDateTimeBehavior=round
username: root
password: Bfd123!@#
url: jdbc:mysql://172.18.1.134:3306/intelligent_crawl?useOldAliasMetadataBehavior=true&characterEncoding=UTF-8&zeroDateTimeBehavior=round
hikari:
maximum-pool-size: 10
minimum-idle: 1
#spring:
# datasource:
# driver-class-name: com.mysql.jdbc.Driver
# username: root
# password: Bfd123!@#
# url: jdbc:mysql://172.18.1.134:3306/all_task?useOldAliasMetadataBehavior=true&characterEncoding=UTF-8&zeroDateTimeBehavior=round
# hikari:
# maximum-pool-size: 10
# minimum-idle: 1
worker:
@ -27,16 +45,16 @@ worker:
analysis-group: sq_group_cl_analysis_1
## 服务的状态,true 为启动
enable-analysis-producer: false
enable-analysis-consumer: false
enable-statistics-producer: true
enable-query-producer: false
enable-backtrace-producer: false
enable-rw-oly-producer: false
enable-up-load-producer: false
enable-output-producer: false
enable-taskcount-producer: false
enable-alarm-producer: false
enable-analysis-producer: false # 查ES写kafka
enable-analysis-consumer: false # 读kafka写ES
enable-statistics-producer: true # 统计 taskCount 和 subjectCount (采集平台)
enable-query-producer: false # 离线拉数(采集平台)
enable-backtrace-producer: false # 欧莱雅查数(采集平台,欧莱雅项目独用)
enable-rw-oly-producer: false # 欧莱雅数据导出,暂时不用
enable-up-load-producer: false # 上传(采集平台)
enable-output-producer: false #未开发,暂留
enable-taskcount-producer: false # 任务数量的统计,任务量和任务平均时长(运营后台)
enable-alarm-producer: false # 报警,查ES统计报警发邮件写数据库(运营后台)
## 启动服务的线程数
statistics-producer-thread-count: 1
query-producer-thread-count: 10
@ -64,29 +82,6 @@ worker:
uploadZipPath : /opt/nfsdata/uploadFiles/
indexNamePre : cl_major_
# es-normal:
# name: SQ_Normal
# address: 172.16.10.61:9301
# upper: 2000-01-01
# standby: cl_major_*
# es-reply-source:
# name: SQ_Normal
# address: 172.16.10.61:9301
# upper: 2000-01-01
# standby: cl_major_*
# es-mini:
# name: SQ_Normal
# address: 172.16.10.61:9301
# bulk-thread-count: 5
# bulk-rate: 3
# bulk-size: 100
# es-logstash:
# name: SQ_Normal
# address: 172.16.10.61:9301
# upper: 2021-01-01
# standby: logstash-2021.05.13
es-normal:
name: SQ_Normal_new
address: 172.18.1.134:9301

1
cl_search_api/cl_search_api.iml

@ -200,6 +200,5 @@
<orderEntry type="library" name="Maven: org.codehaus.mojo:animal-sniffer-annotations:1.14" level="project" />
<orderEntry type="library" name="Maven: com.squareup.okhttp3:okhttp:3.6.0" level="project" />
<orderEntry type="library" name="Maven: com.squareup.okio:okio:1.11.0" level="project" />
<orderEntry type="library" name="Maven: it.sauronsoftware:jave:1.0.2" level="project" />
</component>
</module>

18
cl_search_api/pom.xml

@ -5,15 +5,15 @@
<modelVersion>4.0.0</modelVersion>
<parent>
<artifactId>cl_stream_3.1.1</artifactId>
<artifactId>cl_stream_3.1.2</artifactId>
<groupId>com.bfd.mf</groupId>
<version>3.1.1-SNAPSHOT</version>
<version>3.1.2-SNAPSHOT</version>
</parent>
<name>cl_search_api</name>
<description>Search V3.1.1 API</description>
<description>Search V3.1.2 API</description>
<artifactId>cl_search_api</artifactId>
<version>3.1.1-SNAPSHOT</version>
<version>3.1.2-SNAPSHOT</version>
@ -247,11 +247,11 @@
<version>3.6.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/it.sauronsoftware/jave -->
<dependency>
<groupId>it.sauronsoftware</groupId>
<artifactId>jave</artifactId>
<version>1.0.2</version>
</dependency>
<!--<dependency>-->
<!--<groupId>it.sauronsoftware</groupId>-->
<!--<artifactId>jave</artifactId>-->
<!--<version>1.0.2</version>-->
<!--</dependency>-->
</dependencies>

243
cl_search_api/src/main/java/com/bfd/mf/common/util/ReadLine.java

@ -1,243 +0,0 @@
package com.bfd.mf.common.util;
import it.sauronsoftware.jave.Encoder;
import javax.imageio.ImageIO;
import javax.imageio.ImageReader;
import javax.imageio.stream.FileImageInputStream;
import javax.imageio.stream.ImageInputStream;
import java.awt.image.BufferedImage;
import java.io.*;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
/**
* Created by BFD-229 on 2017/7/6.
*/
public class ReadLine {
public static List<String> readLine( File fileName){
List<String> list = new ArrayList<String> ();
String line;
try {
InputStreamReader read = new InputStreamReader(new FileInputStream(fileName), "utf-8");
BufferedReader reader = new BufferedReader(read);
while ((line = reader.readLine()) != null) {
try {
if (line.length() > 0) {
list.add(line);
}
} catch (Exception e) {
e.printStackTrace();
}
}
return list;
}catch (UnsupportedEncodingException e) {
e.printStackTrace();
return null;
} catch (FileNotFoundException e) {
e.printStackTrace();
return null;
} catch (IOException e) {
e.printStackTrace();
return null;
}
}
// public static List<JSONObject> readLine(File fileName){
// List<JSONObject> list = new ArrayList<JSONObject> ();
// String line;
// try {
// InputStreamReader read = new InputStreamReader(new FileInputStream(fileName), "utf-8");
// BufferedReader reader = new BufferedReader(read);
// while ((line = reader.readLine()) != null) {
// try {
// if (line.length() > 0) {
// list.add(line);
// }
// } catch (Exception e) {
// e.printStackTrace();
// }
// }
// return list;
// }catch (UnsupportedEncodingException e) {
// e.printStackTrace();
// return null;
// } catch (FileNotFoundException e) {
// e.printStackTrace();
// return null;
// } catch (IOException e) {
// e.printStackTrace();
// return null;
// }
// }
// 读取文件内容
public static String readFile(String path){
File file = new File(path);
StringBuilder result = new StringBuilder();
try{
BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8"));//构造一个BufferedReader类来读取文件
String s = null;
while((s = br.readLine())!=null){//使用readLine方法一次读一行
result.append( System.lineSeparator() + s);
}
br.close();
}catch(Exception e){
e.printStackTrace();
}
return result.toString();
}
public static void readFiles(File file){
if (file.exists()) {
System.err.println("exist");
try {
FileInputStream fis = new FileInputStream(file);
InputStreamReader isr = new InputStreamReader(fis, "UTF-8");
BufferedReader br = new BufferedReader(isr);
String line;
while((line = br.readLine()) != null){
System.out.println(line);
}
br.close();
isr.close();
fis.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
public static String getResolution1(File file) throws IOException {
BufferedImage image = ImageIO.read(file);
return image.getWidth() + "x" + image.getHeight();
}
// public static String getResolution(File file){
// Encoder encoder = new Encoder();
// try {
// MultimediaInfo m = encoder.getInfo(file);
// int height = m.getVideo().getSize().getHeight();
// int width = m.getVideo().getSize().getWidth();
// System.out.println("width:"+width);
// System.out.println("height:" + height);
// FileInputStream fis = new FileInputStream(source);
// FileChannel fc = fis.getChannel();
// BigDecimal fileSize = new BigDecimal(fc.size());
// String size = fileSize.divide(new BigDecimal(1048576), 2, RoundingMode.HALF_UP) + "MB";
// System.out.println("size:" + size);
// long duration = m.getDuration()/1000;
// System.out.println("duration:" + duration + "s");
// } catch (Exception e) {
// e.printStackTrace();
// }
// }
public static String getImageDim(String path) {
String result = null;
String suffix = getFileSuffix(path);
//解码具有给定后缀的文件
Iterator<ImageReader> iter = ImageIO.getImageReadersBySuffix(suffix);
// System.out.println(ImageIO.getImageReadersBySuffix(suffix));
if (iter.hasNext()) {
ImageReader reader = iter.next();
try {
ImageInputStream stream = new FileImageInputStream(new File(path));
reader.setInput(stream);
int width = reader.getWidth(reader.getMinIndex());
int height = reader.getHeight(reader.getMinIndex());
result = width + "×" + height;
} catch (IOException e) {
e.printStackTrace();
} finally {
reader.dispose();
}
}
// System.out.println("getImageDim:" + result);
return result;
}
private static String getFileSuffix(final String path) {
String result = null;
if (path != null) {
result = "";
if (path.lastIndexOf('.') != -1) {
result = path.substring(path.lastIndexOf('.'));
if (result.startsWith(".")) {
result = result.substring(1);
}
}
}
// System.out.println("getFileSuffix:" + result);
return result;
}
public static String videosize(String video) {
File source = new File(video);
Encoder encoder = new Encoder();
try {
it.sauronsoftware.jave.MultimediaInfo m = encoder.getInfo(source);
return m.getVideo().getSize().getHeight() + "×" + m.getVideo().getSize().getWidth();
} catch (Exception e) {
e.printStackTrace();
return null;
}
}
// public static String getVideoTime (String path){
// File source = new File(path);
// Encoder encoder = new Encoder();
// File[] file = source.listFiles();
// long sum =0;
// for (File file2 : file) {
// try {
// MultimediaInfo m = encoder.getInfo(file2);
// long ls = m.getDuration()/1000; //ls是获取到的秒数
// sum += ls;
// } catch (Exception e) {
// e.printStackTrace();
// }
// }
// double sum1 = (double)sum;
// double sum2 =sum1/3600;// 转换成为了小时
// System.out.println(sum2);
// return sum2+"";
// }
//
// public static byte[] readFile(String path){
// try {
// FileInputStream fileInputStream = new FileInputStream(path);
// BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(fileInputStream));
// String line = null;
// while ((line = bufferedReader.readLine()) != null) {
// System.out.println(line);
// }
// fileInputStream.close();
// }catch (Exception e){
// e.printStackTrace();
// }
// }
}

1
cl_search_api/src/main/java/com/bfd/mf/common/util/constants/ESConstant.java

@ -361,6 +361,7 @@ public class ESConstant {
public static String ORIGINAL_SOURCE = "originalSource";
public static String CONTENT_SIMHASH = "contentSimHash";
public static String QUOTE_COUNT = "quoteCount";
public static String COLLE_CTCOUNT = "collectCount";
/**
* 内容
*/

10
cl_search_api/src/main/java/com/bfd/mf/common/web/vo/view/monitor/ESMonitorBaseEntity.java

@ -71,6 +71,7 @@ public class ESMonitorBaseEntity implements Comparable<ESMonitorBaseEntity>, Ser
private String quoteCount ;
private String attitudesCount;
private Integer commentsCount = 0;
private String collectCount;
// 词云
private List<String> hlKeyWords;
private List<String> places;
@ -119,6 +120,15 @@ public class ESMonitorBaseEntity implements Comparable<ESMonitorBaseEntity>, Ser
private String otherSourceJson;
public String getCollectCount() {
return collectCount;
}
public void setCollectCount(String collectCount) {
this.collectCount = collectCount;
}
public String getOtherSourceJson() {
return otherSourceJson;
}

5
cl_search_api/src/main/java/com/bfd/mf/service/SearchDataService.java

@ -215,6 +215,7 @@ public class SearchDataService extends CrudService<SentimentModify, SentimentRep
String enSource = sourceAsMap.get(ESConstant.EN_SOURCE).toString();
String source = "";
String price = "";
String collentCount = "0";
if (sourceAsMap.containsKey(ESConstant.SOURCE)) {
source = sourceAsMap.get(ESConstant.SOURCE).toString();
}
@ -263,6 +264,7 @@ public class SearchDataService extends CrudService<SentimentModify, SentimentRep
content = sourceAsMap.get(ESConstant.CONTENT).toString();
author = sourceAsMap.get(ESConstant.AUTHOR).toString();
quoteCount = sourceAsMap.get(ESConstant.QUOTE_COUNT).toString();
collentCount = sourceAsMap.get(ESConstant.COLLE_CTCOUNT).toString();
if(sourceAsMap.get(ESConstant.ATTITUDES_COUNT).toString().contains("totalCount")) {
JSONObject countMap = JSONObject.parseObject(sourceAsMap.get(ESConstant.ATTITUDES_COUNT).toString());
attitudeCount = countMap.getString("totalCount");
@ -426,10 +428,11 @@ public class SearchDataService extends CrudService<SentimentModify, SentimentRep
esMonitorEntity.setCrawlTimeStr(sourceAsMap.get(ESConstant.CRAWLTIMESTR).toString());
esMonitorEntity.setCrawlDataFlag(crawlDataFlag);
esMonitorEntity.setHlKeyWords(hlKeywords);
// 评论数转发数点赞数
// 评论数转发数点赞数收藏数
esMonitorEntity.setCommentsCount(Integer.valueOf(sourceAsMap.getOrDefault(ESConstant.COMMENTS_COUNT, 0).toString()));
esMonitorEntity.setQuoteCount(quoteCount);
esMonitorEntity.setAttitudesCount(attitudeCount);
esMonitorEntity.setCollectCount(collentCount);
esMonitorEntity.setOcrText(ocrText);
esMonitorEntity.setAsrText(asrText);
// 用户字段

0
cl_stream_3.1.1.iml → cl_stream_3.1.2.iml

12
cl_stream_3.1.iml

@ -1,12 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<module org.jetbrains.idea.maven.project.MavenProjectsManager.isMavenModule="true" type="JAVA_MODULE" version="4">
<component name="NewModuleRootManager" LANGUAGE_LEVEL="JDK_1_8">
<output url="file://$MODULE_DIR$/target/classes" />
<output-test url="file://$MODULE_DIR$/target/test-classes" />
<content url="file://$MODULE_DIR$">
<excludeFolder url="file://$MODULE_DIR$/target" />
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

4
pom.xml

@ -5,8 +5,8 @@
<modelVersion>4.0.0</modelVersion>
<groupId>com.bfd.mf</groupId>
<artifactId>cl_stream_3.1.1</artifactId>
<version>3.1.1-SNAPSHOT</version>
<artifactId>cl_stream_3.1.2</artifactId>
<version>3.1.2-SNAPSHOT</version>
<packaging>pom</packaging>
<modules>

Loading…
Cancel
Save