Browse Source

20210714-v1

合并崔老师版本的代码跟采集平台的代码
可适配两个系统
release-1.0
杜静 4 years ago
parent
commit
0e25454182
  1. 23
      README.md
  2. 4
      cl_query_data_job/pom.xml
  3. 8
      cl_search_api/pom.xml
  4. 15
      cl_search_api/src/main/java/com/bfd/mf/common/service/es/ParseSearchScopeService.java
  5. 2
      cl_search_api/src/main/java/com/bfd/mf/common/util/constants/ESConstant.java
  6. 11
      cl_search_api/src/main/java/com/bfd/mf/common/web/repository/mysql/base/SiteRepository.java
  7. 12
      cl_search_api/src/main/java/com/bfd/mf/common/web/vo/view/monitor/ESMonitorBaseEntity.java
  8. 25
      cl_search_api/src/main/java/com/bfd/mf/service/SearchDataService.java
  9. 63
      cl_search_api/src/main/java/com/bfd/mf/service/SearchKeywordsCouldService.java
  10. 12
      cl_stream_3.1.1.iml
  11. 4
      pom.xml

23
README.md

@ -1,10 +1,13 @@
崔老师项目版本的代码
采集平台离线服务和查询接口:
cl_query_data_job:离线统计、离线拉数据 、欧莱雅离线拉数
cl_search_api:查询接口
这一版的列表页有两个下拉标注选项:分类标签和价值标签
每条数据的详情页会显示 该数据的分类标签和价值标签
同时提供分类标签和价值标签的统计结果。
采集平台离线服务和查询接口&崔老师项目融合版:
cl_search_api:
1、数据查询接口
2、数据上传接口
3、数据分析接口
4、用户查询接口
cl_query_data_job:
1、统计服务
2、离线拉数
3、欧莱雅拉数
4、上传服务
5、任务统计服务
6、报警服务

4
cl_query_data_job/pom.xml

@ -4,9 +4,9 @@
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<artifactId>cl_stream_4.0</artifactId>
<artifactId>cl_stream_3.1.1</artifactId>
<groupId>com.bfd.mf</groupId>
<version>4.0-SNAPSHOT</version>
<version>3.1.1-SNAPSHOT</version>
</parent>
<artifactId>cl_query_data_job</artifactId>

8
cl_search_api/pom.xml

@ -5,15 +5,15 @@
<modelVersion>4.0.0</modelVersion>
<parent>
<artifactId>cl_stream_4.0</artifactId>
<artifactId>cl_stream_3.1.1</artifactId>
<groupId>com.bfd.mf</groupId>
<version>4.0-SNAPSHOT</version>
<version>3.1.1-SNAPSHOT</version>
</parent>
<name>cl_search_api</name>
<description>Search V4.0 API</description>
<description>Search V3.1.1 API</description>
<artifactId>cl_search_api</artifactId>
<version>4.0-SNAPSHOT</version>
<version>3.1.1-SNAPSHOT</version>

15
cl_search_api/src/main/java/com/bfd/mf/common/service/es/ParseSearchScopeService.java

@ -57,15 +57,14 @@ public class ParseSearchScopeService {
if(searchType == 0 ){ //0:主贴;1:评论;2:用户 || ES primary=1为主贴
searchScopeQuery = QueryBuilders.boolQuery().must(QueryBuilders.termQuery(ESConstant.PRIMARY, 1));
} else if(searchType == 1){
searchScopeQuery = QueryBuilders.boolQuery().must(QueryBuilders.termQuery(ESConstant.PRIMARY, 0));
// 之前电商主贴评论是一条数据因此需要下面这样组装查询
// searchScopeQuery = QueryBuilders.boolQuery()
// .should(QueryBuilders.termQuery(ESConstant.PRIMARY, 0))
// .should(QueryBuilders.boolQuery()
// .must(QueryBuilders.termQuery(ESConstant.PRIMARY,1))
// .must(QueryBuilders.termQuery(ESConstant.DOC_TYPE,ESConstant.ITEM)));
searchScopeQuery = QueryBuilders.boolQuery()
.should(QueryBuilders.termQuery(ESConstant.PRIMARY, 0))
.should(QueryBuilders.boolQuery()
.must(QueryBuilders.termQuery(ESConstant.PRIMARY,1))
.must(QueryBuilders.termQuery(ESConstant.DOC_TYPE,ESConstant.ITEM)));
} else if(searchType == 2){
searchScopeQuery = QueryBuilders.boolQuery().must(QueryBuilders.termQuery(ESConstant.PRIMARY, 2));
searchScopeQuery = QueryBuilders.boolQuery().must(QueryBuilders.termQuery(ESConstant.PRIMARY, 2))
.must(QueryBuilders.termsQuery(ESConstant.PAGETYPE,"userInfoPage"));
}
return searchScopeQuery;
}

2
cl_search_api/src/main/java/com/bfd/mf/common/util/constants/ESConstant.java

@ -288,6 +288,8 @@ public class ESConstant {
public static final String DATA_COUNT = "dataCount";
public static final String PAGETYPE = "pageType";
/**
* 回溯开始时间
*/

11
cl_search_api/src/main/java/com/bfd/mf/common/web/repository/mysql/base/SiteRepository.java

@ -14,17 +14,10 @@ public interface SiteRepository extends CrudRepository<Site, Integer> {
@Query(value = "SELECT cid,site_id,site_icon,site_type FROM cl_site WHERE site_id IS NOT NULL AND del = 0", nativeQuery = true)
List<Map<String,Object>> findsiteByDel(int del);
// @Query(value = "select cid,site_icon from cl_site WHERE del = 0", nativeQuery = true)
// List<Map<String,Object>> findsiteIconByDel(int del);
@Query(value = "SELECT cid FROM cl_site WHERE area =?1 ", nativeQuery = true)
@Query(value = "select cid from cl_site WHERE area =?1 ", nativeQuery = true)
List<String> findCidsByArea(String area);
// @Query(value = "select id,site_id,cid from cl_site WHERE del = 0", nativeQuery = true)
// Site findAllSiteIds();
@Query(value = "SELECT cid,site_id,site_icon,site_type FROM cl_site WHERE cid = ?1 AND del = 0", nativeQuery = true)
@Query(value = "SELECT cid,site_id,site_icon,site_type FROM cl_site WHERE cid = ?1 AND is_usable =0 AND del = 0", nativeQuery = true)
List<Map<String,Object>> findSiteByEnSource(String enSource);
}

12
cl_search_api/src/main/java/com/bfd/mf/common/web/vo/view/monitor/ESMonitorBaseEntity.java

@ -67,7 +67,7 @@ public class ESMonitorBaseEntity implements Comparable<ESMonitorBaseEntity>, Ser
private String content; // 内容
private String contentSimHash; //文章SimHash
private Integer contentSize; //正文长度
// 数字
// 数字
private String quoteCount ;
private String attitudesCount;
private Integer commentsCount = 0;
@ -117,6 +117,16 @@ public class ESMonitorBaseEntity implements Comparable<ESMonitorBaseEntity>, Ser
private String categoryLabel;
private String tag;
private String otherSourceJson;
public String getOtherSourceJson() {
return otherSourceJson;
}
public void setOtherSourceJson(String otherSourceJson) {
this.otherSourceJson = otherSourceJson;
}
public String getTag() {
return tag;
}

25
cl_search_api/src/main/java/com/bfd/mf/service/SearchDataService.java

@ -183,8 +183,10 @@ public class SearchDataService extends CrudService<SentimentModify, SentimentRep
}
return esMonitorEntity;
}
/**
* 解析组装返回结果 2
* 导出时会用到这个字段解析解析组装返回结果 2
*/
private ESMonitorEntity parseMainMessage(JSONObject jsonObject,Integer searchType,
Map<String,Map<String,Object>> siteMap) throws Exception {
@ -261,7 +263,12 @@ public class SearchDataService extends CrudService<SentimentModify, SentimentRep
content = sourceAsMap.get(ESConstant.CONTENT).toString();
author = sourceAsMap.get(ESConstant.AUTHOR).toString();
quoteCount = sourceAsMap.get(ESConstant.QUOTE_COUNT).toString();
attitudeCount = sourceAsMap.get(ESConstant.ATTITUDES_COUNT).toString();
if(sourceAsMap.get(ESConstant.ATTITUDES_COUNT).toString().contains("totalCount")) {
JSONObject countMap = JSONObject.parseObject(sourceAsMap.get(ESConstant.ATTITUDES_COUNT).toString());
attitudeCount = countMap.getString("totalCount");
}else{
attitudeCount = sourceAsMap.get(ESConstant.ATTITUDES_COUNT).toString();
}
price = sourceAsMap.get(ESConstant.PRICE).toString();
}
@ -395,7 +402,10 @@ public class SearchDataService extends CrudService<SentimentModify, SentimentRep
tag = sourceAsMap.get("tag").toString();
}
String otherSourceJson = "";
if(sourceAsMap.containsKey("otherSourceJson")){
otherSourceJson = sourceAsMap.get("otherSourceJson").toString();
}
try {
esMonitorEntity.setDataId(dataId);
@ -452,10 +462,12 @@ public class SearchDataService extends CrudService<SentimentModify, SentimentRep
esMonitorEntity.setPrimary(primary);
esMonitorEntity.setPubTime(pubTime);
esMonitorEntity.setCrawlTime(Long.valueOf(sourceAsMap.get(ESConstant.CRAWLTIME).toString()));
// 下面这三个字段是崔老师项目独有的字段分类标签价值标签tag
esMonitorEntity.setValueLabel(valueLabel);
esMonitorEntity.setCategoryLabel(categoryLabel);
esMonitorEntity.setTag(tag);
// 二次评论的字段
esMonitorEntity.setOtherSourceJson(otherSourceJson);
} catch (Exception e) {
e.printStackTrace();
@ -772,6 +784,11 @@ public class SearchDataService extends CrudService<SentimentModify, SentimentRep
}else{
jsonObject.put(entry.getKey(),new ArrayList<>());
}
} else if(entry.getKey().equals(ESConstant.ATTITUDES_COUNT)){
if(entry.getValue().toString().contains("totalCount")){
JSONObject totalCount = JSONObject.parseObject(entry.getValue().toString());
jsonObject.put(entry.getKey(),totalCount.get("totalCount"));
}
} else {
jsonObject.put(entry.getKey(), entry.getValue());
}

63
cl_search_api/src/main/java/com/bfd/mf/service/SearchKeywordsCouldService.java

@ -25,69 +25,6 @@ public class SearchKeywordsCouldService{
@Autowired
private TextService textService;
@Autowired
private SliceScrollUtil sliceScrollUtil;
/**
* 词云统计接口
*/
// public JSONObject dataAnalysisCloud(QueryRequest queryRequest){
// long start = System.currentTimeMillis();
// JSONObject jsonObject = new JSONObject();
// /**词云返回个数*/
// int topSize = queryRequest.getLimit();
// logger.info("[SearchKeywordsCouldService] dataAnalysisCloud : the top size is:{} ", topSize);
// try {
// List<ESMonitorEntity> cacheEsMonitorEntityList = sliceScrollUtil.fetchResultSubjectCache(queryRequest,ESConstant.FIELD_CLOUD_ANALYSIS);
//
// Map<String, Integer> keyWordsMaps = new HashMap<>();
// Map<String, Integer> placesWordsMaps = new HashMap<>(); // 地点
// Map<String, Integer> emojiWordsMaps = new HashMap<>(); // 表情
// Map<String, Integer> hashTagWordsMaps = new HashMap<>(); // 话题
// Map<String, Integer> opinionsWordsMaps = new HashMap<>(); // 评价
//// List<String> hlKeywordsList = new ArrayList<>();
// for (ESMonitorEntity esMonitorEntity : cacheEsMonitorEntityList) {
// List<String> hlKeyWords = esMonitorEntity.getHlKeyWords();
// getMapCloudKeyWords(placesWordsMaps, esMonitorEntity.getPlaces());
// // 表情
// getExpressionMapCloudKeyWords(emojiWordsMaps, esMonitorEntity.getExpression());
// // 话题
// getMapCloudKeyWords(hashTagWordsMaps, esMonitorEntity.getHashTag());
// // 评价
// getMapCloudKeyWords(opinionsWordsMaps, esMonitorEntity.getOpinions());
//
// Map<String, Integer> finalKeyWordsMaps = keyWordsMaps;
// hlKeyWords.forEach(el -> finalKeyWordsMaps.merge(el, 1, (a, b) -> a + b));
// Set<String> wordsSets = new HashSet<>(Arrays.asList(StringUtils.split(String.valueOf(hlKeyWords), " ")));
// for (String key : wordsSets) {
// if (TStringUtils.isNotEmpty(key) && key.length() > 1 && !key.equals("null")) {
// // 统计
// key = key.replace("[","").replace("]","");
// keyWordsMaps.merge(key, weightValue, (a, b) -> a + b);
// }
// }
// }
// Map<String, Integer> keyWordsResultMap = new HashMap<>();
// Map<String, Integer> placesWordsResultMap = new HashMap<>();
// CollectionUtils.sortByValueForListSubTopSize(keyWordsMaps, topSize * 2, keyWordsResultMap);
// CollectionUtils.sortByValueForListSubTopSize(placesWordsMaps, topSize * 2, placesWordsResultMap);
// //sortByValueForListSubTopSize
// Long keyWordsStart = System.currentTimeMillis();
// jsonObject.put(ConditionCommon.WORD_CLOUD, CollectionUtils.sortByValueForList(textService.post(keyWordsResultMap, 1), topSize));
// logger.info("[keysWords Execute Time one] the time used is {} ms", System.currentTimeMillis() - keyWordsStart);
// Long placesWordsStart = System.currentTimeMillis();
// jsonObject.put(ConditionCommon.PLACE_CLOUD, CollectionUtils.sortByValueForList(textService.post(placesWordsResultMap, 2), topSize));
// logger.info("[placeWords Exceute Time two] the time used is {} ms", System.currentTimeMillis() - placesWordsStart);
// jsonObject.put(ConditionCommon.HASH_TAG_CLOUD, CollectionUtils.sortByValueForList(hashTagWordsMaps, topSize));
// jsonObject.put(ConditionCommon.EMOJI_CLOUD, CollectionUtils.sortByValueForList(emojiWordsMaps, topSize));
// jsonObject.put(ConditionCommon.OPINION_CLOUD, CollectionUtils.sortByValueForList(opinionsWordsMaps, topSize));
//
// logger.info("[SearchKeywordsCouldService ] the time used is {} ms", (System.currentTimeMillis() - start));
// } catch (Exception e) {
// logger.error("[SearchKeywordsCouldService] Is Error", e);
// }
// return jsonObject;
// }
public JSONObject dataAnalysisCloud( List<ESMonitorEntity> esMonitorEntityList){

12
cl_stream_3.1.1.iml

@ -0,0 +1,12 @@
<?xml version="1.0" encoding="UTF-8"?>
<module org.jetbrains.idea.maven.project.MavenProjectsManager.isMavenModule="true" type="JAVA_MODULE" version="4">
<component name="NewModuleRootManager" LANGUAGE_LEVEL="JDK_1_8">
<output url="file://$MODULE_DIR$/target/classes" />
<output-test url="file://$MODULE_DIR$/target/test-classes" />
<content url="file://$MODULE_DIR$">
<excludeFolder url="file://$MODULE_DIR$/target" />
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

4
pom.xml

@ -5,8 +5,8 @@
<modelVersion>4.0.0</modelVersion>
<groupId>com.bfd.mf</groupId>
<artifactId>cl_stream_4.0</artifactId>
<version>4.0-SNAPSHOT</version>
<artifactId>cl_stream_3.1.1</artifactId>
<version>3.1.1-SNAPSHOT</version>
<packaging>pom</packaging>
<modules>

Loading…
Cancel
Save