Browse Source

datasave project

sitask
张志成 4 years ago
parent
commit
d80ec50bf2
  1. BIN
      cl_stream_datasave/cl_stream_datasave-2.0-SNAPSHOT.jar
  2. 98
      cl_stream_datasave/cl_stream_datasave.iml
  3. 10
      cl_stream_datasave/foreground
  4. 39
      cl_stream_datasave/pom.xml
  5. 557
      cl_stream_datasave/src/main/foreground
  6. 39
      cl_stream_datasave/src/main/java/com/bfd/mf/datasave/download/DownLoadFile.java
  7. 278
      cl_stream_datasave/src/main/java/com/bfd/mf/datasave/download/NewsDownload.java
  8. 9
      cl_stream_datasave/src/main/java/com/bfd/mf/datasave/download/OkHttpUtils.java
  9. 768
      cl_stream_datasave/src/main/java/com/bfd/mf/datasave/listen/DataSaveManager.java
  10. 10
      cl_stream_datasave/src/main/java/com/bfd/mf/datasave/listen/ListenKafkaManager.java
  11. 44
      cl_stream_datasave/src/main/java/com/bfd/mf/datasave/listen/testkongtianyuan.java
  12. 54
      cl_stream_datasave/src/main/java/com/bfd/mf/datasave/tools/Constants.java
  13. 2
      cl_stream_datasave/src/main/java/com/bfd/mf/datasave/tools/DBUtil.java
  14. 299
      cl_stream_datasave/src/main/java/com/bfd/mf/datasave/tools/DataCheckUtil.java
  15. 24
      cl_stream_datasave/src/main/java/com/bfd/mf/datasave/tools/DataProcess.java
  16. 70
      cl_stream_datasave/src/main/java/com/bfd/mf/datasave/tools/DateUtil.java
  17. 58
      cl_stream_datasave/src/main/java/com/bfd/mf/datasave/tools/ReadFile1125.java
  18. 38
      cl_stream_datasave/src/main/java/com/bfd/mf/entity/AllKeys.java
  19. 320
      cl_stream_datasave/src/main/java/com/bfd/mf/entity/mysql/SubjectTask.java
  20. 65
      cl_stream_datasave/src/main/java/com/bfd/mf/entity/mysql/Tasklimit.java
  21. 42
      cl_stream_datasave/src/main/java/com/bfd/mf/entity/mysql/Userlimit.java
  22. 88
      cl_stream_datasave/src/main/java/com/bfd/mf/entity/mysql/cl_task.java
  23. 74
      cl_stream_datasave/src/main/java/com/bfd/mf/runstart/RunStartDataSave.java
  24. 13
      cl_stream_datasave/src/main/main5.iml
  25. 60
      cl_stream_mybatis/cl_stream_mybatis.iml
  26. 27
      cl_stream_mybatis/pom.xml
  27. 2
      cl_stream_mybatis/src/main/java/com/bfd/mf/controller/CompanyController.java
  28. 4
      cl_stream_mybatis/src/main/java/com/bfd/mf/tools/ConnectionRmi.java
  29. 17
      cl_stream_mybatis/src/main/resources/application.properties
  30. 2
      cl_stream_mybatis/src/main/resources/com/bfd/mf/spring/applicationContext.xml
  31. 53
      cl_stream_service/cl_stream_service.iml
  32. 37
      cl_stream_service/src/main/java/com/bfd/mf/entity/AllKeys.java
  33. 3
      cl_stream_service/src/main/java/com/bfd/mf/entity/TypeEntity.java
  34. 46
      cl_stream_service/src/main/java/com/bfd/mf/entity/mysql/MfChannelInfo.java
  35. 20
      cl_stream_service/src/main/java/com/bfd/mf/entity/mysql/MfDoctypeInfo.java
  36. 18
      cl_stream_service/src/main/java/com/bfd/mf/entity/mysql/MfFieldInfo.java
  37. 147
      cl_stream_service/src/main/java/com/bfd/mf/entity/mysql/SubjectTask.java
  38. 16
      cl_stream_service/src/main/java/com/bfd/mf/runstart/RunStartService.java
  39. 366
      cl_stream_service/src/main/java/com/bfd/mf/service/extendType/ForegroundExtendType.java
  40. 239
      cl_stream_service/src/main/java/com/bfd/mf/service/extendType/ParentExctendType.java
  41. 224
      cl_stream_service/src/main/java/com/bfd/mf/service/extendType/ParralleData.java
  42. 4
      cl_stream_service/src/main/java/com/bfd/mf/service/kafka/IKafka.java
  43. 21
      cl_stream_service/src/main/java/com/bfd/mf/service/kafka/ReadKafka.java
  44. 41
      cl_stream_service/src/main/java/com/bfd/mf/service/listen/ListenKafkaManager.java
  45. 10
      cl_stream_service/src/main/java/com/bfd/mf/service/listen/ListenTaskManager.java
  46. 3
      cl_stream_service/src/main/java/com/bfd/mf/service/tools/DataCheckUtil.java
  47. 38
      cl_stream_service/src/main/java/com/bfd/mf/service/tools/DateUtil.java
  48. 4
      cl_stream_service/src/main/java/com/bfd/mf/service/tools/HttpClientUtil.java
  49. 2
      cl_stream_service/src/main/java/com/bfd/mf/service/tools/RoundRobinJedisPool.java
  50. 6
      cl_stream_service/src/main/java/com/bfd/mf/service/utils/SentimentApiUtils.java
  51. 2
      cl_stream_service/src/main/java/com/bfd/mf/service/utils/WordCloudApiUtils.java
  52. 1036
      dataSaveManager/dataSaveManager.iml
  53. 1034
      serviceManager/serviceManager.iml

BIN
cl_stream_datasave/cl_stream_datasave-2.0-SNAPSHOT.jar

98
cl_stream_datasave/cl_stream_datasave.iml

@ -9,93 +9,22 @@
<sourceFolder url="file://$MODULE_DIR$/src/test/java" isTestSource="true" />
<excludeFolder url="file://$MODULE_DIR$/target" />
</content>
<orderEntry type="jdk" jdkName="1.8" jdkType="JavaSDK" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
<orderEntry type="module-library">
<library>
<CLASSES>
<root url="jar://$MODULE_DIR$/../../jarlib/kafka-0.10.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/kafka-utils/kafka/0.10/kafka-0.10.jar!/" />
</SOURCES>
</library>
</orderEntry>
<orderEntry type="module-library">
<library>
<CLASSES>
<root url="jar://$MODULE_DIR$/../../../jarlib/slf4j-api-1.7.2.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
</orderEntry>
<orderEntry type="module-library">
<library>
<CLASSES>
<root url="jar://$MODULE_DIR$/../../jarlib/bfd_harpc_service-0.0.1.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
</orderEntry>
<orderEntry type="module-library">
<library>
<CLASSES>
<root url="jar://$MODULE_DIR$/../../jarlib/BfdRedisTools-2.0.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
</orderEntry>
<orderEntry type="module-library">
<library>
<CLASSES>
<root url="jar://$MODULE_DIR$/../../jarlib/dataManager-0.0.1-SNAPSHOT.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
</orderEntry>
<orderEntry type="module-library">
<library>
<CLASSES>
<root url="jar://$MODULE_DIR$/../../jarlib/hanlp-portable-1.6.8.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
</orderEntry>
<orderEntry type="module-library">
<library>
<CLASSES>
<root url="jar://$MODULE_DIR$/../../jarlib/nlp_common_util-1.1.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
</orderEntry>
<orderEntry type="module-library">
<library>
<CLASSES>
<root url="jar://$MODULE_DIR$/../../jarlib/taskManager-0.0.1-SNAPSHOT.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
</orderEntry>
<orderEntry type="module-library">
<library>
<CLASSES>
<root url="jar://$MODULE_DIR$/../../jarlib/utils-0.0.1-SNAPSHOT.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
</orderEntry>
<orderEntry type="library" name="Maven: bfd:utils:3.0.0" level="project" />
<orderEntry type="library" name="Maven: BfdRedisTools-2.0:BfdRedisTools-2.0:1.0.0" level="project" />
<orderEntry type="library" name="Maven: org.slf4j:slf4j-api:1.7.22" level="project" />
<orderEntry type="library" name="Maven: redis.clients:jedis:2.6.0" level="project" />
<orderEntry type="library" name="Maven: org.apache.commons:commons-pool2:2.0" level="project" />
<orderEntry type="library" name="Maven: com.wandoulabs.jodis:jodis:0.1.2" level="project" />
<orderEntry type="library" name="Maven: org.apache.curator:curator-recipes:2.7.0" level="project" />
<orderEntry type="library" name="Maven: org.apache.curator:curator-framework:2.7.0" level="project" />
<orderEntry type="library" name="Maven: org.apache.curator:curator-client:2.7.0" level="project" />
<orderEntry type="library" name="Maven: org.apache.zookeeper:zookeeper:3.4.6" level="project" />
<orderEntry type="library" name="Maven: jline:jline:0.9.94" level="project" />
<orderEntry type="library" name="Maven: io.netty:netty:3.7.0.Final" level="project" />
<orderEntry type="library" name="Maven: com.google.guava:guava:14.0.1" level="project" />
<orderEntry type="library" name="Maven: commons-lang:commons-lang:2.4" level="project" />
<orderEntry type="library" name="Maven: com.bfd:elastiUtils:0.0.1-SNAPSHOT" level="project" />
<orderEntry type="library" name="Maven: kafka-utils:kafka:0.10" level="project" />
<orderEntry type="library" name="Maven: org.apache.kafka:kafka_2.10:0.10.2.0" level="project" />
@ -107,7 +36,6 @@
<orderEntry type="library" name="Maven: org.scala-lang:scala-library:2.10.6" level="project" />
<orderEntry type="library" name="Maven: org.slf4j:slf4j-log4j12:1.7.21" level="project" />
<orderEntry type="library" name="Maven: com.101tec:zkclient:0.10" level="project" />
<orderEntry type="library" name="Maven: org.apache.zookeeper:zookeeper:3.4.9" level="project" />
<orderEntry type="library" name="Maven: com.alibaba:fastjson:1.1.22" level="project" />
<orderEntry type="library" name="Maven: mysql:mysql-connector-java:5.1.29" level="project" />
<orderEntry type="library" name="Maven: org.elasticsearch:elasticsearch:6.2.3" level="project" />

10
cl_stream_datasave/foreground

@ -543,5 +543,15 @@
},
"otherSourceJson":{
"type":"keyword"
},
"dns":{
"type":"keyword"
},
"asrText":{
"type":"keyword"
},
"ocrText":{
"type":"keyword"
}
}

39
cl_stream_datasave/pom.xml

@ -26,12 +26,51 @@
<!--<scope>system</scope>-->
<!--<systemPath>${project.basedir}/../../jarlib/utils-3.0.0.jar</systemPath>-->
</dependency>
<!--<dependency>-->
<!--<groupId>BfdRedisTools-2.0</groupId>-->
<!--<artifactId>BfdRedisTools-2.0</artifactId>-->
<!--<version>1.0.0</version>-->
<!--<scope>system</scope>-->
<!--<systemPath>${project.basedir}/../../jarlib/BfdRedisTools-2.0.jar</systemPath>-->
<dependency>
<groupId>BfdRedisTools-2.0</groupId>
<artifactId>BfdRedisTools-2.0</artifactId>
<version>1.0.0</version>
<!--<systemPath>BfdRedisTools-1.0.0.jar</systemPath>-->
</dependency>
<dependency>
<artifactId>slf4j-api</artifactId>
<groupId>org.slf4j</groupId>
<version>1.7.22</version>
</dependency>
<dependency>
<groupId>redis.clients</groupId>
<artifactId>jedis</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>com.wandoulabs.jodis</groupId>
<artifactId>jodis</artifactId>
<version>0.1.2</version>
<exclusions>
<exclusion>
<artifactId>slf4j-api</artifactId>
<groupId>org.slf4j</groupId>
</exclusion>
<exclusion>
<artifactId>jedis</artifactId>
<groupId>redis.clients</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>commons-lang</groupId>
<artifactId>commons-lang</artifactId>
<version>2.4</version>
</dependency>
<dependency>
<groupId>com.bfd</groupId>
<artifactId>elastiUtils</artifactId>

557
cl_stream_datasave/src/main/foreground

@ -0,0 +1,557 @@
{
"commentUrl":{
"type":"text",
"fields":{
"keyword":{
"ignore_above":256,
"type":"keyword"
}
}
},
"channel":{
"type":"keyword"
},
"readCount":{
"type":"long"
},
"quoteCount":{
"type":"long"
},
"brand":{
"term_vector":"yes",
"type":"text",
"analyzer":"ik_smart",
"search_analyzer":"ik_smart",
"fields":{
"shingles":{
"type":"text",
"analyzer":"shingle_analyzer"
}
}
},
"brandId":{
"type":"keyword"
},
"createTimeStr":{
"type":"keyword"
},
"authornickname":{
"type":"text",
"fields":{
"keyword":{
"ignore_above":256,
"type":"keyword"
}
}
},
"contentSimHash":{
"type":"keyword"
},
"crawlDay":{
"type":"long"
},
"titleSimHash":{
"type":"keyword"
},
"commentId":{
"type":"text",
"fields":{
"keyword":{
"ignore_above":256,
"type":"keyword"
}
}
},
"originalPhrase":{
"type":"keyword"
},
"forwardContent":{
"analyzer":"ik_smart",
"type":"text",
"fields":{
"shingles":{
"analyzer":"shingle_analyzer",
"type":"text"
}
}
},
"finalPhrase":{
"type":"keyword"
},
"availability":{
"type":"integer"
},
"forwardUserId":{
"type":"keyword"
},
"forwardUserType":{
"type":"integer"
},
"forwardUserUrl":{
"type":"keyword"
},
"forwardAvatar":{
"type":"keyword"
},
"forwardImgs":{
"type":"keyword"
},
"forwardPostSource":{
"type":"keyword"
},
"forwardAttitudesCount":{
"type":"long"
},
"forwardCommentsCount":{
"type":"long"
},
"forwardQuoteCount":{
"type":"long"
},
"forwardPubTime":{
"type":"long"
},
"titleLength":{
"type":"long"
},
"forwardAuthor":{
"type":"text",
"fields":{
"keyword":{
"ignore_above":256,
"type":"keyword"
}
}
},
"sysAbstract":{
"analyzer":"ik_smart",
"type":"text"
},
"forwardUrl":{
"type":"keyword"
},
"createDate":{
"type":"date"
},
"docType":{
"type":"keyword"
},
"getSource":{
"type":"keyword"
},
"dataCount":{
"type":"integer"
},
"primary":{
"type":"integer"
},
"cate":{
"type":"keyword"
},
"sex":{
"type":"keyword"
},
"collectCount":{
"type":"long"
},
"crawlDate":{
"type":"date"
},
"avatar":{
"type":"keyword"
},
"url":{
"type":"keyword"
},
"skuProperties":{
"type":"text",
"fields":{
"keyword":{
"ignore_above":256,
"type":"keyword"
}
}
},
"expression":{
"type":"text",
"fields":{
"keyword":{
"ignore_above":256,
"type":"keyword"
}
}
},
"hashTag":{
"type":"text",
"fields":{
"keyword":{
"ignore_above":256,
"type":"keyword"
}
}
},
"places":{
"type":"text",
"fields":{
"keyword":{
"ignore_above":256,
"type":"keyword"
}
}
},
"opinions":{
"type":"text",
"fields":{
"keyword":{
"ignore_above":256,
"type":"keyword"
}
}
},
"hlKeywords":{
"type":"text",
"fields":{
"keyword":{
"ignore_above":256,
"type":"keyword"
}
}
},
"createTime":{
"type":"long"
},
"contentLength":{
"type":"integer"
},
"pubTime":{
"type":"long"
},
"fansCount":{
"type":"keyword"
},
"language":{
"type":"text",
"fields":{
"keyword":{
"ignore_above":256,
"type":"keyword"
}
}
},
"source":{
"type":"keyword"
},
"enSource":{
"type":"text",
"fields":{
"keyword":{
"ignore_above":256,
"type":"keyword"
}
}
},
"pictureList":{
"type":"text",
"fields":{
"keyword":{
"ignore_above":256,
"type":"keyword"
}
}
},
"userUrl":{
"type":"keyword"
},
"videoUrl":{
"type":"keyword"
},
"contentTag":{
"type":"text",
"fields":{
"keyword":{
"ignore_above":256,
"type":"keyword"
}
}
},
"author":{
"type":"keyword"
},
"authorId":{
"type":"keyword"
},
"authorLevel":{
"type":"keyword"
},
"sysSentiment":{
"type":"double"
},
"price":{
"type":"double"
},
"nomorprice":{
"type":"double"
},
"attitudesCount":{
"type":"keyword"
},
"createDay":{
"type":"long"
},
"postId":{
"type":"text",
"fields":{
"keyword":{
"ignore_above":256,
"type":"keyword"
}
}
},
"pubDate":{
"type":"date"
},
"sysKeywords":{
"type":"keyword"
},
"crawlTime":{
"type":"long"
},
"userType":{
"type":"text",
"fields":{
"keyword":{
"ignore_above":256,
"type":"keyword"
}
}
},
"projectName":{
"type":"keyword"
},
"lastModifiedTime":{
"type":"long"
},
"productParameter":{
"term_vector":"yes",
"type":"text",
"analyzer":"ik_smart",
"search_analyzer":"ik_smart",
"fields":{
"shingles":{
"type":"text",
"analyzer":"shingle_analyzer"
}
}
},
"docId":{
"type":"keyword"
},
"commentScore":{
"type":"long"
},
"urlHash":{
"type":"keyword"
},
"_id_":{
"type":"text",
"fields":{
"keyword":{
"ignore_above":256,
"type":"keyword"
}
}
},
"title":{
"term_vector":"yes",
"type":"text",
"analyzer":"ik_smart",
"search_analyzer":"ik_smart",
"fields":{
"shingles":{
"type":"text",
"analyzer":"shingle_analyzer"
}
}
},
"pageTranspondCount":{
"type":"text",
"fields":{
"keyword":{
"ignore_above":256,
"type":"keyword"
}
}
},
"pageCommentCount":{
"type":"text",
"fields":{
"keyword":{
"ignore_above":256,
"type":"keyword"
}
}
},
"content":{
"term_vector":"yes",
"type":"text",
"analyzer":"ik_smart",
"search_analyzer":"ik_smart",
"fields":{
"shingles":{
"type":"text",
"analyzer":"shingle_analyzer"
}
}
},
"pubDay":{
"type":"long"
},
"pubTimeStr":{
"type":"keyword"
},
"postSource":{
"type":"keyword"
},
"crawlTimeStr":{
"type":"keyword"
},
"postCount":{
"type":"keyword"
},
"friendsCount":{
"type":"keyword"
},
"commentsCount":{
"type":"long"
},
"favorCnt":{
"type":"long"
},
"viewCnt":{
"type":"long"
},
"downCnt":{
"type":"long"
},
"sign":{
"type":"keyword"
},
"isVip":{
"type":"integer"
},
"forumScore":{
"type":"keyword"
},
"impression":{
"type":"keyword"
},
"promotionInfo":{
"type":"keyword"
},
"smallImgs":{
"type":"keyword"
},
"listBrand":{
"term_vector":"yes",
"type":"text",
"analyzer":"ik_smart",
"search_analyzer":"ik_smart",
"fields":{
"shingles":{
"type":"text",
"analyzer":"shingle_analyzer"
}
}
},
"firstListBrand":{
"type":"keyword"
},
"secondListBrand":{
"type":"keyword"
},
"threeListBrand":{
"type":"keyword"
},
"fourListBrand":{
"type":"keyword"
},
"fiveListBrand":{
"type":"keyword"
},
"area":{
"type":"keyword"
},
"location":{
"type":"keyword"
},
"country":{
"type":"keyword"
},
"province":{
"type":"keyword"
},
"city":{
"type":"keyword"
},
"age":{
"type":"keyword"
},
"egc":{
"type":"integer"
},
"pgc":{
"type":"integer"
},
"ugc":{
"type":"integer"
},
"translateTitle":{
"type":"keyword"
},
"translateContent":{
"type":"keyword"
},
"filePath":{
"type":"keyword"
},
"resolution":{
"type":"keyword"
},
"extension":{
"type":"keyword"
},
"thumbnails":{
"type":"keyword"
},
"videoTime":{
"type":"keyword"
},
"isDownload":{
"type":"keyword"
},
"crawlDataFlag":{
"type":"keyword"
},
"attr":{
"type":"keyword"
},
"pageType":{
"type":"keyword"
},
"siteId":{
"type":"keyword"
},
"otherSourceJson":{
"type":"keyword"
},
"dns":{
"type":"keyword"
},
"asrText":{
"type":"keyword"
},
"ocrText":{
"type":"keyword"
}
}

39
cl_stream_datasave/src/main/java/com/bfd/mf/datasave/download/DownLoadFile.java

@ -3,32 +3,38 @@ package com.bfd.mf.datasave.download;
import com.alibaba.fastjson.JSONObject;
import okhttp3.*;
import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.util.HashMap;
import java.util.Map;
public class DownLoadFile {
public static Map<String,Object> downloadAndSaveFile(String getUrl,String putUrl){
String realUrl = "";Integer size;
String realUrl = "";double size;
Map<String,Object> realresult= new HashMap<>();
try{
String files [] = getUrl.split("/");
String fileName = getUrl.split("/")[files.length-1];
Map<String,String> header = new HashMap<>();
header.put("User-Agent","Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.125 Safari/537.36");
// header.put("Connection","close");
header.put("Connection","keep-alive");
try {
Map<String,Object> downloadresult = OkHttpUtils.doGetBytes(getUrl,header);
size= (double) downloadresult.get("size");
if (downloadresult.containsKey("content")){
byte[] content = (byte[]) downloadresult.get("content");
size= (Integer) downloadresult.get("size");
size= (double) downloadresult.get("size");
Thread.sleep(4000);
String result = DownLoadFile.upload(putUrl,fileName,content);
realUrl = JSONObject.parseObject(result).getString("url");
Thread.sleep(4000);
realUrl = JSONObject.parseObject(result).getString("src");
realresult.put("realUrl",realUrl);
realresult.put("size",size);
}
} catch (IOException e) {
e.printStackTrace();
}
@ -66,4 +72,27 @@ public class DownLoadFile {
return result;
}
public static String imagesize(String getUrl ) throws IOException{
String realUrl = "";Integer size;
String realresult="";
try{
InputStream murl = new URL(getUrl).openStream();
BufferedImage sourceImg = ImageIO.read(murl);
int srcWidth = 0; // 源图宽度
int srcHeight = 0; // 源图高度
try {
srcWidth = sourceImg .getWidth();
srcHeight = sourceImg .getHeight();
} catch (Exception e) {
e.printStackTrace();
}
realresult=Integer.toString(srcWidth)+"×"+ Integer.toString(srcHeight);
}catch (Exception e){
e.printStackTrace();
}
return realresult;
}
}

278
cl_stream_datasave/src/main/java/com/bfd/mf/datasave/download/NewsDownload.java

@ -0,0 +1,278 @@
package com.bfd.mf.datasave.download;
import com.bfd.crawler.utils.JsonUtils;
import java.io.IOException;
import java.util.*;
public class NewsDownload {
private static String myGoFastAddr = "http://172.18.1.113:8080/upload";
public static void downloadAndSaveimage(Map<String, Object> resultMap,List<Map<String,String>> imagePathSizevalue){
List<String> filePath= (List<String>) resultMap.get("filePath");
List<String> imagePath= (List<String>) resultMap.get("imagePath");
List<String> videoPath= (List<String>) resultMap.get("videoPath");
String putUrl = myGoFastAddr;
List<String> imagePathlist=new ArrayList<>();
Iterator<String> it = imagePath.iterator();
Map<String,String> rerversemap =new HashMap<>();
while(it.hasNext()){
Map<String,String> imagemap =new HashMap<>();
String geturl= it.next();
Map<String,Object> resultmap = null;
String resolution= null;String resulturl= null;String size="";
try {
try {
resultmap = DownLoadFile.downloadAndSaveFile(geturl, putUrl);
resolution = DownLoadFile.imagesize(geturl);
resulturl= (String) resultmap.get("realUrl");
resulturl =resulturl.replace("http://172.18.1.113:8080","");
size= resultmap.get("size").toString()+"KB";
} catch (IOException e) {
//System.out.print(resulturl);
e.printStackTrace();
}
} catch (Exception e) {
e.printStackTrace();
}
if (resulturl!= null && resulturl.length()!= 0){
imagemap.put("size",size);
imagemap.put("videoTime","");
imagemap.put("url",resulturl);
imagemap.put("resolution",resolution);
imagePathlist.add(resulturl);//url
imagePathSizevalue.add(imagemap);
rerversemap.put(geturl,resulturl);//原始的gofast 以及对应的gofast
}else{
imagePathlist.add(geturl);
rerversemap.put(geturl,resulturl);//原始的gofast 以及对应的gofast
}
}
resultMap.put("imagePathSize", JsonUtils.toJSONString(imagePathSizevalue));
resultMap.put("imagePath", imagePathlist);
if(imagePathSizevalue.size()>0){
resultMap.put("pgc", 1);
Map<String, Object> repicturl=gofastswitch(rerversemap,resultMap);
String picturl= (String) repicturl.get("srcimagePath");
if(picturl !=null&&picturl.length()>0){
resultMap.put("srcimagePath",picturl);
}
}
}
public static void downloadAndSaveFile(Map<String, Object> resultMap,List<Map<String,String>> filePathSizevalueList){
List<String> filePath= (List<String>) resultMap.get("filePath");
List<String> imagePath= (List<String>) resultMap.get("imagePath");
List<String> videoPath= (List<String>) resultMap.get("videoPath");
String putUrl = myGoFastAddr;
//List<Map<String,String>> filePathSizevalueList = new ArrayList<>();
List<String> filePathlist=new ArrayList<>();
Iterator<String> it = filePath.iterator();
Map<String,String> rerversemap =new HashMap<>();
while(it.hasNext()){
Map<String,String> filemap =new HashMap<>();
String geturl= it.next();
Map<String,Object> resultmap = null;
String resulturl= null;String size= null;
try {
resultmap = DownLoadFile.downloadAndSaveFile(geturl, putUrl);
resulturl = (String) resultmap.get("realUrl");
resulturl =resulturl.replace("http://172.18.1.113:8080","");
size= resultmap.get("size").toString()+"KB";
} catch (Exception e) {
e.printStackTrace();
}
if (resulturl!= null && resulturl.length()!= 0){
filemap.put("size",size);
filemap.put("videoTime","");
filemap.put("url",resulturl);
filemap.put("resolution","");
filePathlist.add(resulturl);
filePathSizevalueList.add(filemap);
rerversemap.put(geturl,resulturl);//原始的gofast 以及对应的gofast
}else {
filePathlist.add(geturl);
rerversemap.put(geturl,resulturl);//原始的gofast 以及对应的gofast
}
}
resultMap.put("filePathSize",JsonUtils.toJSONString(filePathSizevalueList));
resultMap.put("filePath", filePathlist);
if(filePathSizevalueList.size()>0){
resultMap.put("ugc",1);
Map<String, Object> forwardUrl=gofastswitch(rerversemap,resultMap);
String reforwardUrl= (String) forwardUrl.get("srcfilePath");
if(reforwardUrl !=null&&reforwardUrl.length()>0){
resultMap.put("srcfilePath",reforwardUrl);
}
}
else {
resultMap.put("ugc",0);
}
}
public static void downloadAndSavevideo(Map<String, Object> resultMap,List<Map<String,String>> videoPathSizevalueList){
List<String> videoPath= (List<String>) resultMap.get("videoPath");
String putUrl = myGoFastAddr;
// List<Map<String,String>> videoPathSizevalueList = new ArrayList<>();
String videoTime=resultMap.get("videoTime").toString();
List<String> videoPathlist=new ArrayList<>();
Map<String,String> rerversemap =new HashMap<>();
Iterator<String> it = videoPath.iterator();
while(it.hasNext()){
Map<String,String> videomap =new HashMap<>();
String geturl= it.next();
Map<String,Object> resultmap = null;
String resulturl= null;String size="";
try {
resultmap = DownLoadFile.downloadAndSaveFile(geturl, putUrl);
resulturl = (String) resultmap.get("realUrl");
resulturl =resulturl.replace("http://172.18.1.113:8080","");
size= resultmap.get("size").toString()+"KB";
} catch (Exception e) {
e.printStackTrace();
}
if (resulturl!= null && resulturl.length()!= 0){//判断 是否下载成功
videomap.put("size",size);
videomap.put("videoTime",videoTime);
videomap.put("url",resulturl);
videomap.put("resolution","");
videoPathlist.add(resulturl);
videoPathSizevalueList.add(videomap);
rerversemap.put(geturl,resulturl);//原始的gofast 以及对应的gofast
}else{
videoPathlist.add(geturl);
rerversemap.put(geturl,resulturl);//原始的gofast 以及对应的gofast
}
}
if(videoPathSizevalueList.size()>0){
resultMap.put("egc",1);
Map<String, Object> revideoUrl= null;
try {
revideoUrl = gofastswitch(rerversemap,resultMap);
} catch (Exception e) {
e.printStackTrace();
}
String videoUrl=(String) revideoUrl.get("srcvideoPath");
if(videoUrl !=null&&videoUrl.length()>0){
resultMap.put("srcvideoPath",videoUrl);
}
}
else {
resultMap.put("egc",0);
}
resultMap.put("videoPathSize",JsonUtils.toJSONString(videoPathSizevalueList));
resultMap.put("videoPath", videoPathlist);
}
private static Map<String, Object> gofastswitch(Map<String, String> rerversemap , Map<String, Object> responseMap) {//原始的gofast 以及下载后的gofast地址
Integer pgc= (Integer) responseMap.get("pgc");//图片
Integer egc= (Integer) responseMap.get("egc");//视频
Integer ugc= (Integer) responseMap.get("ugc");//文件
List<String> imagePath= (List<String>) responseMap.get("imagePath");
List<String> videoPath= (List<String>) responseMap.get("videoPath");
String storyDetailPage= (String) responseMap.get("pageType");
// pageType
// storyDetailPage
Map<String,Object> resultmap=new HashMap<>();
if (pgc.equals(1)){
try {
List<Map<String, Object>> picturepath=new ArrayList<>();
if(responseMap.get("pictureList")!=""&&!"storyDetailPage".equals(storyDetailPage)&&!"socialComment".equals(storyDetailPage)){
Map<String,Object> map=JsonUtils.parseObject((String) responseMap.get("pictureList"));
if(!map.isEmpty()){
for (Map.Entry<String, Object> entry : map.entrySet()) {
Map<String,Object> gofastmap=new HashMap<>();
Map<String,Object> revmap= (Map<String, Object>) entry.getValue();
if(revmap.containsKey("uploadImg")&&revmap.get("uploadImg")!=null&&revmap.get("uploadImg")!=""){
gofastmap.put("gofastUrl",rerversemap.get(revmap.get("uploadImg")));
gofastmap.put("originalUrl",revmap.get("img"));
}
picturepath.add(gofastmap);
}
}
}else if ("storyDetailPage".equals(storyDetailPage)){
Iterator<String> it = imagePath.iterator();
while(it.hasNext()){
Map<String,Object> revmap=new HashMap<>();
revmap.put("gofastUrl",it.next());
revmap.put("originalUrl","");
picturepath.add(revmap);
}
}
String pictureList=JsonUtils.toJSONString(picturepath);
resultmap.put("srcimagePath",pictureList);
} catch (Exception e) {
e.printStackTrace();
//log.error();
}
} if(ugc.equals(1)){
if(responseMap.get("forwardUrl")!=""&&!"storyDetailPage".equals(storyDetailPage)&&!"socialComment".equals(storyDetailPage)){
try {
List<Map<String, Object>> forwardUrl= (List<Map<String, Object>>) JsonUtils.parseArray((String) responseMap.get("forwardUrl"));
List<Map<String, Object>> anewforwardUrl=new ArrayList<>();
for( Map<String, Object> mapList : forwardUrl ) {
if(mapList.containsKey("gofastUrl")){
mapList.put("gofastUrl",rerversemap.get(mapList.get("gofastUrl")));
anewforwardUrl.add(mapList);
}else{
anewforwardUrl.add(mapList);
}
}
String reforwardUrl=JsonUtils.toJSONString(anewforwardUrl);
resultmap.put("srcfilePath",reforwardUrl);
} catch (Exception e) {
e.printStackTrace();
}
}
} if(egc.equals(1)){
List<Map<String, Object>> videoUrl=new ArrayList<>();
if (responseMap.get("videoUrl")!=""&&!"storyDetailPage".equals(storyDetailPage)&&!"socialComment".equals(storyDetailPage)){
try {
List<Map<String, Object>> zhuquvideoUrl= JsonUtils.parseArray((String)responseMap.get("videoUrl")) ;
// System.out.println(responseMap.get("videoUrl"));
for( Map<String, Object> mapList : zhuquvideoUrl ) {
// System.out.println(mapList.get("gofastUrl")+"asd");
if(mapList.containsKey("gofastUrl")){
mapList.put("gofastUrl",rerversemap.get(mapList.get("gofastUrl")));
videoUrl.add(mapList);
}else{
videoUrl.add(mapList);
}
}
} catch (Exception e) {
e.printStackTrace();
String revideoUrl=JsonUtils.toJSONString(responseMap.get("videoUrl"));
resultmap.put("srcvideoPath",revideoUrl);
}
}else if ("storyDetailPage".equals(storyDetailPage)){
String storyDetailPagevideoUrl= (String) responseMap.get("videoUrl");
Iterator<String> it = videoPath.iterator();
while(it.hasNext()){
Map<String,Object> revmap=new HashMap<>();
revmap.put("gofastUrl",it.next());
revmap.put("originalUrl",storyDetailPagevideoUrl);
videoUrl.add(revmap);
}
}
String revideoUrl =JsonUtils.toJSONString(videoUrl);
resultmap.put("srcvideoPath",revideoUrl);
}
return resultmap;
}
}

9
cl_stream_datasave/src/main/java/com/bfd/mf/datasave/download/OkHttpUtils.java

@ -20,6 +20,7 @@ public class OkHttpUtils {
.connectTimeout(10, TimeUnit.MINUTES)
.readTimeout(10,TimeUnit.MINUTES)
.writeTimeout(10,TimeUnit.MINUTES)
// .connectionPool(new ConnectionPool(2,10,TimeUnit.SECONDS))
.build();
private static String doExecute(Request request, OkHttpClient client) throws Exception{
@ -169,18 +170,20 @@ public class OkHttpUtils {
try{
response = dClient.newCall(request).execute();
System.out.println(request.url() + " => get status code is " + response.code());
if (response.isSuccessful()) {
ResponseBody body = response.body();
if (body != null) {
byte[] content=response.body().bytes();
result.put("content",content);
int size=Integer.valueOf(response.header("Content-Length"))/1024;
// int size=Integer.valueOf(response.header("Content-Length"))/1024;
double size= new Double(response.header("Content-Length")) /1024;
result.put("size",size);
}
}
return result;
}finally {
}
finally {
if (response != null)
response.close();
System.gc();

768
cl_stream_datasave/src/main/java/com/bfd/mf/datasave/listen/DataSaveManager.java

@ -3,24 +3,24 @@ package com.bfd.mf.datasave.listen;
import com.alibaba.fastjson.JSONArray;
import com.bfd.crawler.elasti.ElastiProducer;
import com.bfd.crawler.kafka7.KfkProducer;
import com.bfd.crawler.kafka7.utils.PropertiesParser;
import com.bfd.crawler.utils.JsonUtils;
import com.bfd.mf.datasave.download.DownLoadFile;
import com.bfd.mf.datasave.download.NewsDownload;
import com.bfd.mf.datasave.tools.DataCheckUtil;
import com.bfd.mf.datasave.tools.DateUtil;
import com.bfd.mf.datasave.tools.ReadLine;
import com.bfd.mf.datasave.tools.WriteMethod;
import com.bfd.mf.entity.AllKeys;
import com.bfd.mf.entity.FieldNormaliz;
import com.bfd.mf.entity.mysql.SubjectTask;
import crawler.open.util.RedisUtil;
import org.apache.commons.lang3.StringUtils;
import org.apache.log4j.Logger;
import scala.collection.generic.BitOperations;
import org.omg.Messaging.SYNC_WITH_TRANSPORT;
//import org.apache.logging.log4j.core.parser.ParseException;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
@ -28,6 +28,8 @@ import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import static com.bfd.crawler.utils.DataUtil.calcMD5;
public class DataSaveManager implements Runnable{
private static Logger log = Logger.getLogger(DataSaveManager.class);
@ -36,12 +38,14 @@ public class DataSaveManager implements Runnable{
private static Map<String, List<Map<String,String>>> subject;
private static Map<Integer,Map<String,String>> tableInfoMap;
private static String preIndex = "cl_index_";
private static String preSubject = "cl_subject_";
//private static String preSubject = "cl_major_";
private static String preSubject = "cl_major_";
private static int subjectEsNum = 1;
private static int indexEsNum = 2;
private static String indexType = "docs";
private static int bussinessType = 1;
private static String kafkaTopic = "dataFromES_";
//private static String kafkaTopic = "dataFromES_";
private static String kafkaTopic = "databasestokafka";
private static String myGoFastAddr = "http://172.18.1.113:8080/upload";
private static Map<String, Object> resultMap = AllKeys.getMap();
private static String filePath = "../datasaveputkafka_file/";//
@ -56,218 +60,667 @@ public class DataSaveManager implements Runnable{
Map<Integer,Map<String,String>> tableInfoMap){
this.data = data ;
this.fieldNormaliz = fieldNormaliz ;
this.subject = subject;
//this.subject = subject;
this.tableInfoMap = tableInfoMap;
}
public void excData(){
try{
Map<String, Object> timetMap =new HashMap<>();
int kafkaNum = fieldNormaliz.getKafkaSerName();
Map<String,Object> jsonData = JsonUtils.parseObject(data);
if(jsonData.containsKey("processtime")){
timetMap= JsonUtils.parseObject((String)jsonData.get("processtime"));
}
timetMap.put("dsbeginreadtime",System.currentTimeMillis());
Map<String,String> tableInfo = tableInfoMap.get(bussinessType) ;
String res = convertData(jsonData, tableInfo);
Map<String, Object> resultMap = getResponse(res); // resultMap 就是将要写入到 ES kafka 的一条数据
System.out.println("The Message : "+JsonUtils.toJSONString(resultMap));
//System.out.println("The Message subject: "+JsonUtils.toJSONString(subject));
resultMap.remove("processtime");
//Map<String, Object> resultindexMap = new HashMap<String, Object>(resultMap);
// System.out.println("The Message : "+JsonUtils.toJSONString(resultMap));
// 1先判断是主贴还是评论 主贴写日期索引回帖评论写 渠道索引
String dateIndexName = getIndexName(resultMap);
System.out.println(dateIndexName);
int index= Integer.parseInt(dateIndexName.split("cl_index_")[1].split("-")[0]);
if(resultMap.containsKey("primary") && resultMap.get("primary").toString().equals("0")) {
String pubTime= resultMap.get("pubTime").toString();
if(resultMap.containsKey("primary") && resultMap.get("primary").toString().equals("0")&&!resultMap.get("docType").toString().equals("item")) {
dateIndexName = preIndex + resultMap.get("docType").toString();
}else if (resultMap.containsKey("primaryPost")&&resultMap.get("primaryPost").toString().equals("5")) {
dateIndexName="cl_index_item";
}
else if(resultMap.containsKey("primary") && resultMap.get("primary").toString().equals("2")){
dateIndexName="cl_index_user";
}
else if(index>=2000&&index<2020){
dateIndexName=preIndex+index;
}
System.out.println("切割后的索引名字"+index);
//writerToKafka(2, "dataFromES_10000tw", resultMap);
// writerToSubjectES("cl_subject_10429", resultMap);
if (index>2015){
else if(index<2000){
dateIndexName="cl_index_1990";
}
try {
resultMap.remove("primaryPost");
writerToIndexES(dateIndexName, resultMap);
} catch (Exception e) {
log.error("数据写入日期es有问题,data="+JsonUtils.toJSONString(resultMap));
e.printStackTrace();
}
try {
//新闻的主贴数据存es 供列表页扩散出的详情页来用
if(resultMap.containsKey("pageType")&&"newscontent".equals(resultMap.get("pageType").toString())){
newscontnetwriterToredis(resultMap,dateIndexName);
}
} catch (Exception e) {
e.printStackTrace();
}
//System.out.println("-----------------------继续后面的步骤哇--------------------: " + JsonUtils.toJSONString(resultMap));
// 2判断数据中是否要下载标识如果有需要先下载对应的文件然后替换存储路径后再保存数据
if(resultMap.containsKey("crawlDataFlag") && resultMap.containsKey("isDownload")) { //resultMap.containsKey("isDownload")
// try {
// writerToKafka(5, "taskSign", resultMap);
// } catch (Exception e) {
// e.printStackTrace();
// }
timetMap.put("dsendreadtime",System.currentTimeMillis());
timetMap.put("dbeginsentes",System.currentTimeMillis());
//处理新闻的主贴
//对于非新闻的以及非上传的数据
if(resultMap.containsKey("crawlDataFlag")&&!resultMap.containsKey("subjectId")) {
//resultMap.containsKey("isDownload")
String key = getAllMapKey(resultMap);
//String getUrl = (String) resultMap.get("filePath");
List<String> filePathlist=new ArrayList<>();
List<String> imagePathlist=new ArrayList<>();
List<String> videoPathlist=new ArrayList<>();
List<String> filePath= (List<String>) resultMap.get("filePath");
List<String> imagePath= (List<String>) resultMap.get("imagePath");
List<String> videoPath= (List<String>) resultMap.get("videoPath");
String avatarPath=resultMap.get("avatarPath").toString();
// subject 中可以获取到这个key 对应的 专题信息
System.out.println("key == " + key + " **** " + JsonUtils.toJSONString(subject));
if(subject.containsKey(key)) {
List<Map<String, String>> subjectList = subject.get(key);
if(disposeCrawldataflag(key)) {
String getsubjectList=RedisUtil.get(key,10);
List<Map<String, String>> subjectList = (List<Map<String, String>>) JsonUtils.parseArray(getsubjectList);
for (Map<String, String> subjectMap : subjectList) {
String go_fast_addr = subjectMap.get("go_fast_addr");
List<Map<String,String>> imagePathSizevalue = new ArrayList<>();
List<Map<String,String>> videoPathSizevalueList = new ArrayList<>();
List<Map<String,String>> filePathSizevalueList = new ArrayList<>();
List<String> ocrText= (List<String>) resultMap.get("ocrText");
System.out.println(key+"=====");
long maxtime= Long.parseLong(subjectMap.get("maxtime"));
long mintme= Long.parseLong(subjectMap.get("mintime"));
long pubTimecomape= Long.parseLong(pubTime);
String subject_id = subjectMap.get("subject_id");
String isDownload = (String)resultMap.get("isDownload");
if (isDownload.equals("true")) {
String putUrl = myGoFastAddr;
if (!go_fast_addr.isEmpty()){
putUrl = go_fast_addr;
String appid = subjectMap.get("appid");
String crawl_content_key = subjectMap.get("crawl_content_key");
String primary = resultMap.get("primary").toString();
String docType = (String)resultMap.get("docType");
String pageType=(String)resultMap.get("pageType");
String asrText= (String) resultMap.get("asrText");
String hasTrans= resultMap.get("hasTrans").toString();
//String ocrText= (String) resultMap.get("ocrText");
if((pubTimecomape-maxtime<=0&&pubTimecomape-mintme>=0)||"eccontent".equals(pageType)||"2".equals(primary)||"socailFollow".equals(pageType)){
if("eccontent".equals(pageType)){
long pubtime=maxtime-1000*60*30;
//System.out.println(pubtime+"=======");
resultMap.put("pubDate",DataCheckUtil.getDate(pubtime));
resultMap.put("pubDay",DataCheckUtil.getDay(pubtime));
resultMap.put("pubTime",pubtime);
resultMap.put("pubTimeStr", DataCheckUtil.getCurrentTime(pubtime));
}
resultMap.remove("primaryPost");
if("1".equals(primary)){
writerToredis(resultMap,subject_id);
}
if (isDownload.equals("true")&&"".equals(asrText)&&ocrText.size()==0&&"0".equals(hasTrans)&&!"newscontent".equals(pageType)) {
timetMap.put("begindowloadtime",System.currentTimeMillis());
String putUrl = myGoFastAddr;//全部默认到113上的gofast
if (!avatarPath.equals("")){
String resulturl= null;
try {
Map<String,Object> resultmap = DownLoadFile.downloadAndSaveFile(avatarPath, putUrl);
resulturl = (String) resultmap.get("realUrl");
// if (resulturl.contains("172.18.1.113")){
// resulturl=resulturl.replace("172.18.1.113:8080","crawl-files.pontoaplus.com");
// }
} catch (Exception e) {
e.printStackTrace();
}
if(resulturl!= null && resulturl.length()!= 0){
resultMap.put("avatarPath", resulturl);
}
else{
resultMap.put("avatarPath", avatarPath);
}
}
if (filePath.size()>0){
//List<Map<String,String>> filePathSizevalueList = new ArrayList<>();
List<String> filePathlist=new ArrayList<>();
Iterator<String> it = filePath.iterator();
List<Map<String,String>> valueList = new ArrayList<>();
Map<String,String> filemap =new HashMap<>();
Map<String,String> rerversemap =new HashMap<>();
while(it.hasNext()){
Map<String,String> filemap =new HashMap<>();
String geturl= it.next();
Map<String,Object> resultmap = DownLoadFile.downloadAndSaveFile(geturl, putUrl);
String resulturl= (String) resultmap.get("realUrl");
String size= resultmap.get("size").toString()+"KB";
Map<String,Object> resultmap = null;
String resulturl= null;String size= null;
try {
resultmap = DownLoadFile.downloadAndSaveFile(geturl, putUrl);
resulturl = (String) resultmap.get("realUrl");
resulturl =resulturl.replace("http://172.18.1.113:8080","");
size= resultmap.get("size").toString()+"KB";
} catch (Exception e) {
e.printStackTrace();
}
if (resulturl!= null && resulturl.length()!= 0){
filemap.put(resulturl,size);
filemap.put("size",size);
filemap.put("videoTime","");
filemap.put("url",resulturl);
filemap.put("resolution","");
filePathlist.add(resulturl);
filePathSizevalueList.add(filemap);
rerversemap.put(geturl,resulturl);//原始的gofast 以及对应的gofast
}else {
System.out.print("很遗憾,怎么有下载失败了");
filePath.add(geturl);
filemap.put(geturl,size);
filePathlist.add(geturl);
rerversemap.put(geturl,resulturl);//原始的gofast 以及对应的gofast
}
}
//valueList.add(filemap);
// if(videoPathlist.size()>0){
// resultMap.put("ugc",1);
// }
// else {
// resultMap.put("ugc",0);
// }
resultMap.put("filePathSize",JsonUtils.toJSONString(filemap));
resultMap.put("filePathSize",JsonUtils.toJSONString(filePathSizevalueList));
resultMap.put("filePath", filePathlist);
if(filePathSizevalueList.size()>0){
resultMap.put("ugc",1);
Map<String, Object> forwardUrl=gofastswitch(rerversemap,resultMap);
String reforwardUrl= (String) forwardUrl.get("srcfilePath");
if(reforwardUrl !=null&&reforwardUrl.length()>0){
resultMap.put("srcfilePath",reforwardUrl);
}
}
else {
resultMap.put("ugc",0);
}
}
if (imagePath.size()>0){
List<Map<String,String>> valueList = new ArrayList<>();
Map<String,String> imagemap =new HashMap<>();
// List<Map<String,String>> imagePathSizevalue = new ArrayList<>();//初始化图片
List<String> imagePathlist=new ArrayList<>();
Iterator<String> it = imagePath.iterator();
Map<String,String> rerversemap =new HashMap<>();
while(it.hasNext()){
Map<String,String> imagemap =new HashMap<>();
String geturl= it.next();
Map<String,Object> resultmap = DownLoadFile.downloadAndSaveFile(geturl, putUrl);
String resulturl= (String) resultmap.get("realUrl");
String size= resultmap.get("size").toString()+"KB";
Map<String,Object> resultmap = null;
String resolution= null;String resulturl= null;String size="";
try {
try {
resultmap = DownLoadFile.downloadAndSaveFile(geturl, putUrl);
resolution = DownLoadFile.imagesize(geturl);
resulturl= (String) resultmap.get("realUrl");
resulturl =resulturl.replace("http://172.18.1.113:8080","");
size= resultmap.get("size").toString()+"KB";
} catch (Exception e) {
//System.out.print(resulturl);
e.printStackTrace();
}
} catch (Exception e) {
e.printStackTrace();
}
if (resulturl!= null && resulturl.length()!= 0){
imagemap.put(resulturl,size); //url +size
imagemap.put("size",size);
imagemap.put("videoTime","");
imagemap.put("url",resulturl);
imagemap.put("resolution",resolution);
imagePathlist.add(resulturl);//url
imagePathSizevalue.add(imagemap);
rerversemap.put(geturl,resulturl);//原始的gofast 以及对应的gofast
}else{
System.out.print("很遗憾,怎么有下载失败了");
imagePath.add(geturl);
imagemap.put(geturl,size);
imagePathlist.add(geturl);
System.out.print(resulturl+"=======");
rerversemap.put(geturl,resulturl);//原始的gofast 以及对应的gofast
}
}
//valueList.add(imagemap);
// if(imagePath.size()>0){
// resultMap.put("pgc",1);
// }
// else {
// resultMap.put("pgc",0);
// }
resultMap.put("imagePathSize",JsonUtils.toJSONString(imagemap));
resultMap.put("imagePathSize",JsonUtils.toJSONString(imagePathSizevalue));
resultMap.put("imagePath", imagePathlist);
if(imagePathSizevalue.size()>0){
resultMap.put("pgc", 1);
Map<String, Object> repicturl=gofastswitch(rerversemap,resultMap);
String picturl= (String) repicturl.get("srcimagePath");
if(picturl !=null&&picturl.length()>0){
resultMap.put("srcimagePath",picturl);
}
}
else {
resultMap.put("pgc",0);
}
}
if (videoPath.size()>0){
List<Map<String,String>> valueList = new ArrayList<>();
Map<String,String> videomap =new HashMap<>();
// List<Map<String,String>> videoPathSizevalueList = new ArrayList<>();
String videoTime=resultMap.get("videoTime").toString();
List<String> videoPathlist=new ArrayList<>();
Map<String,String> rerversemap =new HashMap<>();
Iterator<String> it = videoPath.iterator();
while(it.hasNext()){
Map<String,String> videomap =new HashMap<>();
String geturl= it.next();
System.out.println(putUrl+"putUrl是哪个啊");
Map<String,Object> resultmap = DownLoadFile.downloadAndSaveFile(geturl, putUrl);
String resulturl= (String) resultmap.get("realUrl");
String size= resultmap.get("size").toString()+"KB";
System.out.println("视频地址啊"+resulturl);
Map<String,Object> resultmap = null;
String resulturl= null;String size="";
try {
resultmap = DownLoadFile.downloadAndSaveFile(geturl, putUrl);
resulturl = (String) resultmap.get("realUrl");
resulturl =resulturl.replace("http://172.18.1.113:8080","");
size= resultmap.get("size").toString()+"KB";
} catch (Exception e) {
e.printStackTrace();
}
if (resulturl!= null && resulturl.length()!= 0){//判断 是否下载成功
videomap.put(geturl,size);
videoPathlist.add(resulturl);}else{
videomap.put("size",size);
videomap.put("videoTime",videoTime);
videomap.put("url",resulturl);
videomap.put("resolution","");
videoPathlist.add(resulturl);
videoPathSizevalueList.add(videomap);
rerversemap.put(geturl,resulturl);//原始的gofast 以及对应的gofast
}else{
videoPathlist.add(geturl);
videomap.put(geturl,size);
rerversemap.put(geturl,resulturl);//原始的gofast 以及对应的gofast
}
}
//valueList.add(videomap);
if(videoPathlist.size()>0){
if(videoPathSizevalueList.size()>0){
resultMap.put("egc",1);
Map<String, Object> revideoUrl= null;
try {
revideoUrl = gofastswitch(rerversemap,resultMap);
} catch (Exception e) {
e.printStackTrace();
}
String videoUrl=(String) revideoUrl.get("srcvideoPath");
if(videoUrl !=null&&videoUrl.length()>0){
resultMap.put("srcvideoPath",videoUrl);
}
}
else {
resultMap.put("egc",0);
}
resultMap.put("videoPathSize",JsonUtils.toJSONString(videomap));
resultMap.put("videoPathSize",JsonUtils.toJSONString(videoPathSizevalueList));
resultMap.put("videoPath", videoPathlist);
}
timetMap.put("enddowloadtime",System.currentTimeMillis());
}else{
//新闻主贴的处理逻辑newscontent,
// downloadPic,downloadFile,downloadVideo ,若有一个则需要进行isdown为true
// videoPath == egc
// filePath == ugc
// imagePath == pgc
if(crawl_content_key.contains("downloadPic")&&imagePath.size()>0){
NewsDownload.downloadAndSaveimage(resultMap,imagePathSizevalue);
}
if(crawl_content_key.contains("downloadFile")&&filePath.size()>0){
NewsDownload.downloadAndSaveFile(resultMap,filePathSizevalueList);
}
if(crawl_content_key.contains("downloadVideo")&&videoPath.size()>0){
NewsDownload.downloadAndSavevideo(resultMap,videoPathSizevalueList);
}
}
if (filePathSizevalueList.size()==0&&imagePathSizevalue.size()==0&&videoPathSizevalueList.size()==0){
resultMap.put("isDownload","false");
}
if(ocrText.size()>0){
resultMap.put("hasOCR",1);
resultMap.put("ocrLength",ocrText.size());
}
if(!"".equals(asrText)){
resultMap.put("hasASR",1);
resultMap.put("asrLength",asrText.length());
}
timetMap.put("dbeginsentes",System.currentTimeMillis());
String task_id = subjectMap.get("task_id");
String external_id = subjectMap.get("external_id");
resultMap.put("taskId", task_id);
resultMap.put("externalId", external_id);
String indexName = preSubject + subject_id;
if(!"134ic".equals(appid)){
indexName=preSubject+appid+"_"+subject_id;
}
try {
if(subjectMap.get("del").equals("0")){ //判断专题是否删除和专题是否在使用中
// 数据写入到对应的专题索引中
if ("1".equals(primary)){
writerToSubjectES(indexName, resultMap);
}else if("0".equals(primary)&&!"socailFollow".equals(pageType)){
boolean ishave= disposeComment(resultMap,subject_id);
if(ishave){
writerToSubjectES(indexName, resultMap);
}
}else {
writerToSubjectES(indexName, resultMap);
}
String kafka_addr = subjectMap.get("kafka_addr");
if (!kafka_addr.isEmpty()) {
// 数据写入到指定的kafka
kafkaTopic = kafkaTopic + "_" + subject_id;
//kafkaNum 指的是etc kafka 配置文件的编号
int num= checkPathExists(kafka_addr);
if (num>0){
System.out.print(num);
writerToKafka(num, "dataFromES_10000", resultMap);}
}
if(subjectMap.get("is_trans").equals("1")&&"0".equals(hasTrans)){ //判断是否需要翻译
writerToKafka(5, "trans_topic", resultMap);
}if(subjectMap.get("is_ocr").equals("1")&&"".equals(asrText)&&ocrText.size()==0){
List<String> revideoPath= (List<String>) resultMap.get("videoPath");
List<String> revideoPathlist=new ArrayList<>();
if (revideoPath.size()>0){
Iterator<String> it = revideoPath.iterator();
while(it.hasNext()) {
String url= it.next();
if (url.contains("http")){
revideoPathlist.add(url);
}else {
url="http://172.18.1.113:8892"+url;
revideoPathlist.add(url);
}
}
resultMap.put("videoPath",revideoPathlist);
writerToKafka(5, "xhs1223", resultMap);
}
}
} catch (Exception e) {
e.printStackTrace();
}
try {
writerToKafka(5, kafkaTopic, resultMap);
} catch (Exception e) {
e.printStackTrace();
}
}else {
System.out.println(" 这条数据都没有标识位,就不往专题的索引存储了呗!!!!" + resultMap.get("dataId"));
log.info("数据不在时间范围内 craldataflag = " + key + " ; data = " + JsonUtils.toJSONString(resultMap));
}
}
}else if (resultMap.containsKey("crawlDataFlag")&&!"".equals(resultMap.get("crawlDataFlag").toString())){
// if(resultMap.containsKey("Secondarypush")){//第二次推送了
// //resultMap.get("Secondarypush").toString();
// //System.out.println("Secondarypush+++++++++++++++++++++++++++++++++");
// WriteMethod.writeMethod("mysqlnocrawldataflag.txt",JsonUtils.toJSONString(resultMap));
// }else {
resultMap.put("Secondarypush","1");
WriteMethod.writeMethod("mysqlnocrawldataflag.txt",JsonUtils.toJSONString(resultMap));
try {
writerToKafka(2, "newsSecondarypush_newfilter1", resultMap);
} catch (Exception e) {
e.printStackTrace();
}
// }
}
}else if(resultMap.containsKey("crawlDataFlag") && resultMap.containsKey("subjectId")){
String indexName=preSubject+resultMap.get("subjectId");
writerToSubjectES(indexName, resultMap);
}
else {
System.out.println(" 这条数据都没有标识位,就不往专题的索引存储了呗!!!!" + resultMap.get("dataId"));
}
timetMap.put("dendsentes",System.currentTimeMillis());
resultMap.put("processtime",timetMap);
try {
writerToKafka(5, "timelimit", resultMap);
//WriteMethod.writeMethod("20210421.txt",JsonUtils.toJSONString(resultMap));
} catch (Exception e) {
e.printStackTrace();
}
try {
resultMap.remove("processtime");
writerToIndexES(dateIndexName, resultMap);
} catch (Exception e) {
log.error("数据第二次写入日期es有问题,data="+JsonUtils.toJSONString(resultMap));
e.printStackTrace();
}
}catch(Exception e){
e.printStackTrace();
// System.out.println("+++++++++++++++++++" +data);
log.error(data);
}
}
private static void writerToSubjectES(String indexName , Map<String, Object> responseMap) {
String docId=responseMap.get("docId").toString();
long dateTime = System.currentTimeMillis() ;
responseMap.put("createTime", dateTime);
responseMap.put("createTimeStr", DataCheckUtil.getCurrentTime(dateTime));
System.out.println("==========================写入到【专题】ES :==========" + indexName + " - "+responseMap.get("docId") );
WriteMethod.writeMethod("zhuti.txt",JsonUtils.toJSONString(responseMap));
// System.out.println("==========================写入到【专题】ES : ==========" + indexName + " - "+responseMap.get("videoPath") );
if (null != docId && !("").equals(docId)) {
WriteMethod.writeMethod("20210621.txt",JsonUtils.toJSONString(responseMap));
ElastiProducer elastiProducer = ElastiProducer.getInstance(bussinessType, subjectEsNum, indexName, indexType);
elastiProducer.sendMessageToEs(JsonUtils.toJSONString(responseMap));
//System.out.println("==========================写入到【专题】ES : ==========" + indexName + " - "+JsonUtils.toJSONString(responseMap) );
}
}
private static void writerToIndexES(String indexName , Map<String, Object> responseMap) {
long dateTime = System.currentTimeMillis() ;
responseMap.put("createTime", dateTime);
responseMap.put("createTimeStr", DataCheckUtil.getCurrentTime(dateTime));
String docId=responseMap.get("docId").toString();
System.out.println("==========================写入到【日期】ES : ==========" + indexName + " - "+responseMap.get("docId"));
if (null != docId && !("").equals(docId)) {
//WriteMethod.writeMethod("2021525like.txt",JsonUtils.toJSONString(responseMap));
ElastiProducer elastiProducer = ElastiProducer.getInstance(bussinessType, indexEsNum, indexName, indexType);
elastiProducer.sendMessageToEs(JsonUtils.toJSONString(responseMap));
}
}
private static void writerToredis( Map<String, Object> responseMap,String getsubject_id) {
String docId = (String) responseMap.get("docId");
String enSource = (String) responseMap.get("enSource");
String subject_id = getsubject_id;
String keys = enSource+"#"+docId+"#"+subject_id;
int dbindex = hash(keys, 9);
log.info("[ ForegroundExtendType ] 往 Redis 中灌入商品详情数据 dbIndex = " + dbindex + " ; keys = " + keys);
if (null != docId && !("").equals(docId)) {
RedisUtil.set(keys, subject_id, dbindex);
}
}
private static void newscontnetwriterToredis( Map<String, Object> responseMap,String dateIndexName) {
String url = (String) responseMap.get("url");
String subject_id = dateIndexName;
String keys = url;
int dbindex = hash(keys, 5);
dbindex=15-dbindex;
log.info("[ ForegroundExtendType ] 往 Redis 中新闻的url dbIndex = " + dbindex + " ; keys = " + keys);
if (null != url && !("").equals(url)) {
RedisUtil.set(keys, subject_id, dbindex);
}
private static void writerToKafka(int kafkaNum,String indexName, String key, Map<String, Object> responseMap) {
}
private boolean disposeComment(Map<String, Object> newdataMap,String getsubject_id) {
try{
List<Map<String,String>> subjects = subject.get(key);
if(subjects.size() > 0) {
for (Map<String, String> sub : subjects) {
String subjectId = sub.get("subject_id");
String exportToKafka = sub.get("export_to_kafka");
String kafkaAddr = sub.get("kafka_addr");
//System.out.println("indexName : " + indexName + " ; subjectId : " + subjectId);
if (indexName.contains(subjectId) && exportToKafka.equals("1")) {
System.out.println("-----------------------------------------将数据写到对应的 kafka 中 : " + kafkaAddr);
//KfkProducer.getInstance().send("test0910", JsonUtils.toJSONString(responseMap));
if(newdataMap.containsKey("docId")) {
String docId = (String) newdataMap.get("docId");
if(null != docId && !("").equals(docId)) {
String enSource = (String) newdataMap.get("enSource");
String subject_id =getsubject_id;
String keys = enSource +"#"+ docId+"#"+subject_id;
int dbindex = hash(keys, 9);
if (RedisUtil.exists(keys, dbindex)) { // 先去 redis中查询是否存在不存直接忽略
return true;
} else {
log.error("[ForegroundExtendType] exec >>> 电商灌数:该 key 在 Redis 中不存在!!! keys = " + keys + " ; dbindex = " + dbindex);
return false;
}
}
// return false;
}
return false;
}catch (Exception e){
e.printStackTrace();
return false;
}
}
private boolean disposeCrawldataflag(String crawldataflag) {
try{
//if(newdataMap.containsKey("docId")) {
if (RedisUtil.exists(crawldataflag, 10)) { // 先去 redis中查询是否存在不存直接忽略
String value = RedisUtil.get(crawldataflag,10);
if(null != value && !("").equals(value)) {
return true;
}
} else {
System.out.println("空的????????" + key);
log.error("[datasave] exec >>> 灌数:该 crwaldataflag 在 Redis 中不存在!!! keys = " + crawldataflag + " ; dbindex = " + 10);
return false;
}
// return false;
// }
return false;
}catch (Exception e){
e.printStackTrace();
return false;
}
}
private static Map<String, Object> gofastswitch(Map<String, String> rerversemap , Map<String, Object> responseMap) {//原始的gofast 以及下载后的gofast地址
Integer pgc= (Integer) responseMap.get("pgc");//图片
Integer egc= (Integer) responseMap.get("egc");//视频
Integer ugc= (Integer) responseMap.get("ugc");//文件
List<String> imagePath= (List<String>) responseMap.get("imagePath");
List<String> videoPath= (List<String>) responseMap.get("videoPath");
String storyDetailPage= (String) responseMap.get("pageType");
// pageType
// storyDetailPage
Map<String,Object> resultmap=new HashMap<>();
if (pgc.equals(1)){
try {
List<Map<String, Object>> picturepath=new ArrayList<>();
if(responseMap.get("pictureList")!=""&&!"storyDetailPage".equals(storyDetailPage)&&!"socialComment".equals(storyDetailPage)){
Map<String,Object> map=JsonUtils.parseObject((String) responseMap.get("pictureList"));
if(!map.isEmpty()){
for (Map.Entry<String, Object> entry : map.entrySet()) {
Map<String,Object> gofastmap=new HashMap<>();
Map<String,Object> revmap= (Map<String, Object>) entry.getValue();
if(revmap.containsKey("uploadImg")&&revmap.get("uploadImg")!=null&&revmap.get("uploadImg")!=""){
gofastmap.put("gofastUrl",rerversemap.get(revmap.get("uploadImg")));
gofastmap.put("originalUrl",revmap.get("img"));
}
picturepath.add(gofastmap);
}
}
}else if ("storyDetailPage".equals(storyDetailPage)){
Iterator<String> it = imagePath.iterator();
while(it.hasNext()){
Map<String,Object> revmap=new HashMap<>();
revmap.put("gofastUrl",it.next());
revmap.put("originalUrl","");
picturepath.add(revmap);
}
}
String pictureList=JsonUtils.toJSONString(picturepath);
resultmap.put("srcimagePath",pictureList);
} catch (Exception e) {
e.printStackTrace();
//log.error();
}
} if(ugc.equals(1)){
if(responseMap.get("forwardUrl")!=""&&!"storyDetailPage".equals(storyDetailPage)&&!"socialComment".equals(storyDetailPage)){
try {
List<Map<String, Object>> forwardUrl= (List<Map<String, Object>>) JsonUtils.parseArray((String) responseMap.get("forwardUrl"));
List<Map<String, Object>> anewforwardUrl=new ArrayList<>();
for( Map<String, Object> mapList : forwardUrl ) {
if(mapList.containsKey("gofastUrl")){
mapList.put("gofastUrl",rerversemap.get(mapList.get("gofastUrl")));
anewforwardUrl.add(mapList);
}else{
anewforwardUrl.add(mapList);
}
}
String reforwardUrl=JsonUtils.toJSONString(anewforwardUrl);
resultmap.put("srcfilePath",reforwardUrl);
} catch (Exception e) {
e.printStackTrace();
}
}
} if(egc.equals(1)){
List<Map<String, Object>> videoUrl=new ArrayList<>();
if (responseMap.get("videoUrl")!=""&&!"storyDetailPage".equals(storyDetailPage)&&!"socialComment".equals(storyDetailPage)){
try {
List<Map<String, Object>> zhuquvideoUrl= JsonUtils.parseArray((String)responseMap.get("videoUrl")) ;
// System.out.println(responseMap.get("videoUrl"));
for( Map<String, Object> mapList : zhuquvideoUrl ) {
// System.out.println(mapList.get("gofastUrl")+"asd");
if(mapList.containsKey("gofastUrl")){
mapList.put("gofastUrl",rerversemap.get(mapList.get("gofastUrl")));
videoUrl.add(mapList);
}else{
videoUrl.add(mapList);
}
}
} catch (Exception e) {
e.printStackTrace();
String revideoUrl=JsonUtils.toJSONString(responseMap.get("videoUrl"));
resultmap.put("srcvideoPath",revideoUrl);
}
}else if ("storyDetailPage".equals(storyDetailPage)){
String storyDetailPagevideoUrl= (String) responseMap.get("videoUrl");
Iterator<String> it = videoPath.iterator();
while(it.hasNext()){
Map<String,Object> revmap=new HashMap<>();
revmap.put("gofastUrl",it.next());
revmap.put("originalUrl",storyDetailPagevideoUrl);
videoUrl.add(revmap);
}
}
String revideoUrl =JsonUtils.toJSONString(videoUrl);
resultmap.put("srcvideoPath",revideoUrl);
}
return resultmap;
}
// private static void writerToKafka(int kafkaNum,String indexName, String key, Map<String, Object> responseMap) {
// try{
// List<Map<String,String>> subjects = subject.get(key);
// if(subjects.size() > 0) {
// for (Map<String, String> sub : subjects) {
// String subjectId = sub.get("subject_id");
// String exportToKafka = sub.get("export_to_kafka");
// String kafkaAddr = sub.get("kafka_addr");
// //System.out.println("indexName : " + indexName + " ; subjectId : " + subjectId);
// if (indexName.contains(subjectId) && exportToKafka.equals("1")) {
// System.out.println("-----------------------------------------将数据写到对应的 kafka 中 : " + kafkaAddr);
// //KfkProducer.getInstance().send("test0910", JsonUtils.toJSONString(responseMap));
// }
// }
// }else{
// System.out.println("空的????????" + key);
// }
//
// }catch (Exception e){
// e.printStackTrace();
// }
// }
private static void writerToKafka(int kafkaNum,String kafkaTopic,Map<String, Object> responseMap) {
try{
System.out.println("要写的kafka : "+kafkaNum + " ; kafkaTopic: " + kafkaTopic);
//System.out.println("要写的kafka : "+kafkaNum + " ; kafkaTopic: " + kafkaTopic);
String docId=responseMap.get("docId").toString();
if (null != docId && !("").equals(docId)) {
KfkProducer.getInstance(kafkaNum, kafkaTopic).send(kafkaTopic, JsonUtils.toJSONString(responseMap));
}
}catch (Exception e){
e.printStackTrace();
}
@ -311,59 +764,13 @@ public class DataSaveManager implements Runnable{
String crawlDataFlag = (String) responseMap.get("crawlDataFlag");
key = enSource+"#####"+crawlDataFlag;
}else{
System.out.println("数据没有标识???为什么呀?" + JsonUtils.toJSONString(responseMap));
log.error("数据没有标识???为什么呀?!! " + JsonUtils.toJSONString(responseMap));
//System.out.println("数据没有标识???为什么呀?" + JsonUtils.toJSONString(responseMap));
}
}
return key.toLowerCase();
}
// private List<String> getIndexNameList(String key , Map<String,Object> responseMap) {
// List<String> indexNames = new ArrayList<>();
// try{
// System.out.println( key + " ; task_subject: "+JsonUtils.toJSONString(subject));
// if(subject.containsKey(key)){
// List<Map<String,String>> values = subject.get(key);
// for (Map<String,String> val: values) {
// String subjectId = val.get("subject_id");
// String taskId = val.get("task_id");
// String externalId = val.get("external_id");
// System.out.println(taskId + " -- " + externalId);
// System.out.println(externalId);
// indexNames.add(preSubject + subjectId);
// }
// }else{
// //System.out.println("3333 : " + JsonUtils.toJSONString(responseMap));
// System.out.println("这条数据不用写到 【专题】 索引中哦!!! " +
// "crawlDataFlag = " +responseMap.get("crawlDataFlag") + " ; " +
// "id = " + responseMap.get("dataId") + " ; " +
// "pubTime = " + responseMap.get("pubTimeStr"));
// }
//
//// }else{
//// System.out.println("33333 "+responseMap);
//// }
//
//// for (Map.Entry<String, String> entry : subject.entrySet()) {
//// System.out.println("subject : key= " + entry.getKey() + " and value= " + entry.getValue());
//// }
//// for (Map.Entry<String, String> entry : downloadAddr.entrySet()) {
//// System.out.println("download : key= " + entry.getKey() + " and value= " + entry.getValue());
//// }
//// for (Map.Entry<String, String> entry : kafkaAddr.entrySet()) {
//// System.out.println("kafka: key= " + entry.getKey() + " and value= " + entry.getValue());
//// }
// }catch (Exception e){
// e.printStackTrace();
// }
// return indexNames;
// }
// private static String getIndexName(Map<String,Object> responseMap) {
// String pubTimeStr = responseMap.get("pubTimeStr").toString().split(" ")[0];
// String indexName = preIndex+pubTimeStr;
// return indexName;
// }
private static String getIndexName(Map<String,Object> responseMap) {
String pubTimeStr= null;
try {
@ -450,11 +857,21 @@ public class DataSaveManager implements Runnable{
String author = dataValue.toString().replaceAll("[^\\u0000-\\uFFFF]", "") ;
jsonData.put(key, author);
}
// if(key.equals("videoPath") && dataValue != null){
// List<String> list=new ArrayList<>();
// list.add(dataValue.toString());
// jsonData.put(key,list);
// }
if(key.equals("isVip") && dataValue != null){
jsonData.put(key, 1);
}
if(key.equals("price") && dataValue != null){
String price = dataValue.toString().replaceAll("¥", "").replace("$","") ;
jsonData.put(key, price);
}
if(key.equals("nomorprice") && dataValue != null){
String nomorprice = dataValue.toString().replaceAll("¥", "").replace("$","") ;
jsonData.put(key, nomorprice);
}
if(key.equals("nomorprice") && dataValue != null){
String nomorprice = dataValue.toString().replaceAll("¥", "").replace("$","") ;
jsonData.put(key, nomorprice);
}
// if(key.equals("imagePath")&&dataValue != null){
// List<String> list=new ArrayList<>();
// list.add(dataValue.toString());
@ -467,7 +884,7 @@ public class DataSaveManager implements Runnable{
// }
if(tableInfo.containsKey(key)){
System.out.print("tableInfo"+tableInfo);
// System.out.print("tableInfo"+tableInfo);
String value = tableInfo.get(key);
if(value.equals("Integer")){
if(StringUtils.isNotBlank(dataValue.toString())){
@ -484,7 +901,10 @@ public class DataSaveManager implements Runnable{
}else if(dataValuenew.endsWith("万+")){
dataValuenew = dataValuenew.substring(0,dataValuenew.indexOf("万+")) ;
jsonData.put(key, new Double(Double.valueOf(dataValuenew) * 10000).intValue()) ;
}else{
}else if (dataValue.toString().contains("店铺")){
jsonData.put(key, 1) ;
}
else{
try{
if(dataValue.toString().contains("全部评论")){
dataValue = dataValue.toString().replace("全部评论 (","").replace(")","");
@ -501,20 +921,34 @@ public class DataSaveManager implements Runnable{
jsonData.put(key, 0) ;
}
}else if(value.equals("List")){
try {
if(StringUtils.isNotBlank(dataValue.toString())){
jsonData.put(key, JSONArray.parseArray(dataValue.toString())) ;
} else{
jsonData.put(key, new ArrayList<String>()) ;
}
} catch (Exception e) {
//e.printStackTrace();
String str=dataValue.toString().replace(", ",",");
str = str.substring(1,str.length()-1).trim();
String []strs =str.split(",");
// System.out.println(strs.length+"数组的长度啊");
System.out.println(str+"数组的长度啊");
List<String> list = Arrays.asList(strs);
jsonData.put(key, list) ;
}
}else if(value.equals("Long")){
if(StringUtils.isNotBlank(dataValue.toString())){
try{
Long theValue = 0L;
if(dataValue.toString().contains("万+")){
Double dou = Double.valueOf(dataValue.toString().replace("万+",""));
if(dataValue.toString().contains("万")){
Double dou = Double.valueOf(dataValue.toString().replace("万+","").replace("万",""));
theValue =new Double(dou * 10000).longValue(); ;
jsonData.put(key,theValue) ;
}else{
}else if (dataValue.toString().contains("+")){
jsonData.put(key,Long.valueOf(dataValue.toString().replace("+",""))) ;
}
else {
jsonData.put(key,Long.valueOf(dataValue.toString())) ;
}
@ -633,6 +1067,12 @@ public class DataSaveManager implements Runnable{
}
return lDate;
}
public static int hash(String str, int size) {
String md5 = calcMD5(str);
String head = md5.substring(0, 4);
return Integer.parseInt(head, 16) % size;
}
public static int checkPathExists(String kafka_addr){//检查文件夹是否存在kafka的配置文件
int num = 0;
String filename="";

10
cl_stream_datasave/src/main/java/com/bfd/mf/datasave/listen/ListenKafkaManager.java

@ -24,7 +24,7 @@ public class ListenKafkaManager implements Runnable{
public ListenKafkaManager(FieldNormaliz fieldNormaliz){
int croePoolsize = 20 ;
int maximumPoolsize = 60;
int maximumPoolsize = 80;
long keepAliveTime = 0;
this.spiderPoolExec = new ThreadPoolExecutor(croePoolsize, maximumPoolsize, keepAliveTime, TimeUnit.SECONDS, new SynchronousQueue<Runnable>());
this.fieldNormaliz = fieldNormaliz ;
@ -32,7 +32,7 @@ public class ListenKafkaManager implements Runnable{
this.tableInfoMap = FiledTableInfo.tableInfoMap;
String kafkaname = fieldNormaliz.getKafkaName() ;
// KfkConsumer.startReadThread(queue,"Ejingdongdedup_filter1",10,"333",2);
ReadKafka readKafka = new ReadKafka(queue , kafkaname ,10 , fieldNormaliz.getGroupId(), fieldNormaliz.getKafkaSerName(),fieldNormaliz.getEsSerName());
ReadKafka readKafka = new ReadKafka(queue , kafkaname ,12 , fieldNormaliz.getGroupId(), fieldNormaliz.getKafkaSerName(),fieldNormaliz.getEsSerName());
readKafka.read();
}
@ -53,14 +53,16 @@ public class ListenKafkaManager implements Runnable{
private void addTask(String data){
while ( spiderPoolExec.getPoolSize() >= spiderPoolExec.getMaximumPoolSize() ||
spiderPoolExec.getActiveCount() >= spiderPoolExec.getMaximumPoolSize()) {
//System.out.println("线程满了啊"+spiderPoolExec.getPoolSize()+"最大线程数"+spiderPoolExec.getMaximumPoolSize()+"现有的线程数"+spiderPoolExec.getActiveCount());
// System.out.println("线程满了啊");
try {
Thread.sleep(200);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
//spiderPoolExec.submit(new DataSaveManager(data, fieldNormaliz));
//spiderPoolExec.submit(new DataSaveManager_kongtianyuan(data, fieldNormaliz,subject,tableInfoMap));
System.out.println("现有的线程数"+spiderPoolExec.getActiveCount());
spiderPoolExec.submit(new DataSaveManager(data, fieldNormaliz,subject,tableInfoMap));
}

44
cl_stream_datasave/src/main/java/com/bfd/mf/datasave/listen/testkongtianyuan.java

@ -0,0 +1,44 @@
//package com.bfd.mf.datasave.listen;
//
//import com.bfd.crawler.elasti.ElastiProducer;
//import com.bfd.crawler.utils.JsonUtils;
//import com.bfd.mf.datasave.tools.ReadFile1125;
//import com.bfd.mf.datasave.tools.ReadLine;
//import com.bfd.mf.datasave.tools.WriteMethod;
//
//import java.io.File;
//import java.util.List;
//
//public class testkongtianyuan {
// private static String preIndex = "cl_index_";
// private static String preSubject = "cl_subject_";
// private static int subjectEsNum = 1;
// private static int indexEsNum = 2;
// private static String indexType = "docs";
// private static int bussinessType = 1;
// public static void main(String[] args) {
// //List<String> properties = ReadLine.readLine(new File("C:/Users/zhicheng.zhang/Desktop/15S_1125.txt"));
// // ElastiProducer elastiProducer = ElastiProducer.getInstance(bussinessType, subjectEsNum, "cl_subject_20201125", indexType);
// ReadFile1125 readFile = new ReadFile1125("C:/Users/zhicheng.zhang/Desktop/15S_1125.txt");
//// ReadFile readFile = new ReadFile("D:/program/HiveToKafkaTool/data/juemi.txt");
//
// Thread readFileThread = new Thread(readFile, "readFileThread");
// readFileThread.start();
//
//
// joinjess a=new joinjess();
// for(int i = 0; i < 4; i++) {
// Thread joinJsonThread;
// joinJsonThread = new Thread(a, "joinJson" + i);
// joinJsonThread.start();
// }
//// // elastiProducer.sendMessageToEs(properties.get(1));
//// for(int i=0;i<properties.size();i++){
//// System.out.println(properties.size());
//// ElastiProducer elastiProducer = ElastiProducer.getInstance(bussinessType, indexEsNum, "cl_subject_20201125", indexType);
//// elastiProducer.sendMessageToEs(properties.get(i));
////
//// }
//
// }
//}

54
cl_stream_datasave/src/main/java/com/bfd/mf/datasave/tools/Constants.java

@ -0,0 +1,54 @@
package com.bfd.mf.datasave.tools;
import java.util.Map;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.ConcurrentHashMap;
public class Constants {
// private static Map<String, String> alertCacheSingle = new ConcurrentHashMap<String, String>();
private static ArrayBlockingQueue<String> lineQueue = new ArrayBlockingQueue<String>(10000);
private static ArrayBlockingQueue<String> outputQueue = new ArrayBlockingQueue<String>(100000);
private static long startLine = 1;
private static long endLine = -1;
private static boolean flag = true;
public static boolean isFlag() {
return flag;
}
public static void setFlag(boolean flag) {
Constants.flag = flag;
}
public static ArrayBlockingQueue<String> getLineQueue() {
return lineQueue;
}
public static void setLineQueue(ArrayBlockingQueue<String> lineQueue) {
Constants.lineQueue = lineQueue;
}
public static ArrayBlockingQueue<String> getOutputQueue() {
return outputQueue;
}
public static void setOutputQueue(ArrayBlockingQueue<String> outputQueue) {
Constants.outputQueue = outputQueue;
}
public static long getStartLine() {
return startLine;
}
public static void setStartLine(long startLine) {
Constants.startLine = startLine;
}
public static long getEndLine() {
return endLine;
}
public static void setEndLine(long endLine) {
Constants.endLine = endLine;
}
}

2
cl_stream_datasave/src/main/java/com/bfd/mf/datasave/tools/DBUtil.java

@ -46,7 +46,7 @@ public class DBUtil {
public List<Map<String,Object>> query(String querySql){
List<Map<String,Object>> list = new ArrayList<Map<String,Object>>();
List<String> columns = new ArrayList<String>();
//System.out.println(querySql+"");
DBConnectionManager dbm=getDBCONConnectionManager();
Connection conn=null;
while(conn == null){

299
cl_stream_datasave/src/main/java/com/bfd/mf/datasave/tools/DataCheckUtil.java

@ -0,0 +1,299 @@
package com.bfd.mf.datasave.tools;
import org.apache.commons.lang3.StringUtils;
import org.apache.log4j.Logger;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class DataCheckUtil {
public static Pattern datePattrn = Pattern.compile("^\\d{4}\\-\\d{2}\\-\\d{2}\\s\\d{2}\\:\\d{2}:\\d{2}$");
public static Pattern dayPattrn = Pattern.compile("^\\d{2,4}\\-\\d{1,2}\\-\\d{1,2}$");
private static SimpleDateFormat ddf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
public static Pattern p = Pattern.compile("\\s+");
private static final Logger LOG = Logger.getLogger(DataCheckUtil.class);
public static String chechData2(String dataStr){
dataStr = dataStr.replace("Z","");
dataStr = checkData(dataStr);
Matcher matcher = datePattrn.matcher(dataStr);
if(!matcher.find()){
System.out.println("格式错误,使用当前时间 : " + dataStr);
dataStr = DateUtil.getDateTime();
}else{
dataStr = matcher.group(0);
}
return dataStr;
}
public static String checkData(String dataStr){
SimpleDateFormat ddf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
if(StringUtils.isBlank(dataStr)){
return ddf.format(new Date());
}
if(dataStr.contains("-:")){
dataStr = dataStr.replace("-:",":");
}
if(dataStr.contains(":-")){
dataStr = dataStr.replace(":-",":");
}
Matcher matcher = datePattrn.matcher(dataStr);
if(!matcher.find()){
dataStr = dataStr.trim();
if(!p.matcher(dataStr).find()){
if(!dayPattrn.matcher(dataStr).find()){
return ddf.format(new Date());
}
}
String[] dates = dataStr.split("\\s+");
String years = "";
String times = "";
if(dates.length == 2){
years = dates[0];
times = dates[1];
}else{
years = dates[0];
}
if(years.contains("/")){
years = years.replace("/", "-");
}
String[] yearStr = years.split("-");
String yms = "" ;
if(yearStr.length == 3){
String year = yearStr[0];
String month = yearStr[1];
String day = yearStr[2];
if(year.length() == 2){
year = "20"+year;
}
if(month.length() == 1){
month = "0"+month;
}
if(day.length() == 1){
day = "0"+day;
}
yms = year+"-"+month+"-"+day;
}
String hms = "";
if(StringUtils.isBlank(times)){
hms = "00:00:00";
}else{
times = times.replace("/", ":");
if(times.contains(":")){
String[] timeStr = times.split(":");
if( timeStr.length >= 3 ){
String hours = timeStr[0];
String mins = timeStr[1];
String s = timeStr[2];
if(hours.length() == 1){
hours = "0"+hours;
}
if(mins.length() == 1){
mins = "0"+mins;
}
if(s.length() == 1){
s = "0"+s;
}
hms = hours+":"+mins+":"+s;
}else if(timeStr.length == 2){
String hours = timeStr[0];
String mins = timeStr[1];
String s = "00";
if(hours.length() == 1){
hours = "0"+hours;
}
if(mins.length() == 1){
mins = "0"+mins;
}
hms = hours+":"+mins+":"+s;
} else {
String hours = timeStr[0];
String mins = "00" ;
String s = "00";
if(hours.length() == 1){
hours = "0"+hours;
}
hms = hours+":"+mins+":"+s;
}
}else{
if(isNum(times) && times.length()==2){
hms = times+":00:00";
}else if(isNum(times) && times.length()==1){
hms = "0"+times+":00:00";
}else{
hms = "00:00:00" ;
}
}
}
if(StringUtils.isBlank(yms)){
return ddf.format(new Date());
}
if(yms != "" || hms != ""){
return yms+" "+hms;
}
}
return dataStr ;
}
private static boolean isNum(String time){
Pattern p = Pattern.compile("\\d+");
if(p.matcher(time).find()){
return true ;
}
return false ;
}
public static String convertStringTotime(String datetime){
if(StringUtils.isBlank(datetime)){
return DateUtil.getDateTime(System.currentTimeMillis());
}
String creationTime = "";
if(datetime.length() == 13){
creationTime = DateUtil.getDateTime(Long.valueOf(datetime));
}else{
creationTime = DateUtil.getDateTime(Long.valueOf(datetime) *1000);
}
return creationTime ;
}
public static long convertStringToLong(String datetime){
if(StringUtils.isBlank(datetime)){
return System.currentTimeMillis();
}
long creationTime ;
if(datetime.length() == 13){
creationTime = Long.valueOf(datetime);
}else{
creationTime = Long.valueOf(datetime) *1000;
}
return creationTime ;
}
public static long convertTimeTotime(String datetime){
if(StringUtils.isBlank(datetime)){
return System.currentTimeMillis() / 1000;
}
long creationTime ;
if(datetime.length() == 13){
creationTime = Long.valueOf(datetime) / 1000;
}else{
creationTime = Long.valueOf(datetime) ;
}
return creationTime ;
}
public static long convertDateTotime(String datetime){
if(StringUtils.isBlank(datetime)){
return System.currentTimeMillis() / 1000;
}
long creationTime = 0;
try {
SimpleDateFormat ddf1 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
creationTime = Long.valueOf(ddf1.parse(datetime).getTime()) / 1000;
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return creationTime ;
}
public static String getCurrentTime(){
long dateTime = System.currentTimeMillis() ;
SimpleDateFormat ddf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
return ddf.format(new Date(dateTime));
}
public static String getCurrentTime(long dateTime){
SimpleDateFormat ddf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
return ddf.format(new Date(dateTime));
}
public static String getDate(long dateTime){
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSXXX");
return sdf.format(new Date(dateTime));
}
public static String getDate(String dateTime){
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSXXX");
SimpleDateFormat ddf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
try {
Date date = ddf.parse(dateTime) ;
return sdf.format(date);
} catch (ParseException e) {
// TODO Auto-generated catch block
e.printStackTrace();
LOG.error("DataCheckUtil getDate() err data:"+dateTime);
}
return sdf.format(new Date());
}
public static long getDay(long dateTime){
try{
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
String dayStr = sdf.format(new Date(dateTime));
Date date = sdf.parse(dayStr);
return date.getTime();
}catch(Exception e){
e.printStackTrace();
LOG.error("DataCheckUtil getDay() err data:"+dateTime);
}
return 0;
}
public static long getDay(String dateTime){
try{
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
Date date = sdf.parse(dateTime);
return date.getTime();
}catch(Exception e){
e.printStackTrace();
LOG.error("DataCheckUtil getDay2() err data:"+dateTime);
}
return 0;
}
// public static void main(String[] args) {
// //System.out.println(checkData(""));
// /*System.out.println(System.currentTimeMillis());
// System.out.println(Calendar.getInstance().getTimeInMillis() / 1000);
// System.out.println(new Date().getTime() / 1000);
// System.out.println(DateUtil.getDateTime((System.currentTimeMillis() / 1000) * 1000));
// System.out.println(convertStringTotime("1558077405"));
// System.out.println(convertTimeTotime(null));*/
// //System.out.println(DateUtil.getTimeMillis("2019-03-01 01:01:01"));
//
// /*String aa = DataCheckUtil.convertStringTotime("1563245342");
// System.out.println(aa);*/
// /*SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
// try {
// Date date = sdf.parse("2019-03-01");
// System.out.println(date.getTime());
// } catch (ParseException e) {
// // TODO Auto-generated catch block
// e.printStackTrace();
// }*/
// System.out.println(getDate("2019-03-01 01:01:01"));
// }
}

24
cl_stream_datasave/src/main/java/com/bfd/mf/datasave/tools/DataProcess.java

@ -0,0 +1,24 @@
package com.bfd.mf.datasave.tools;
import crawler.open.util.RedisUtil;
public class DataProcess implements Runnable {
@Override
public void run() {
while (true) {
try {
String a = Constants.getLineQueue().take();
String key=a.split("@#@")[0];
String value=a.split("@#@")[1];
RedisUtil.set(key, value, 10);
// if(Constants.getLineQueue().size() == 1000){
// Constants.getLineQueue().clear();
// }
System.out.println(Constants.getLineQueue().size()+"队列的大小");
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
}

70
cl_stream_datasave/src/main/java/com/bfd/mf/datasave/tools/DateUtil.java

@ -116,6 +116,48 @@ public class DateUtil {
}
}
/**
* 返回当前时间日期减去一个小时
*/
public static String getbeforeHour(){
try{
Calendar calendar = Calendar.getInstance();
calendar.setTime(new Date());
calendar.set(Calendar.HOUR, calendar.get(Calendar.HOUR) - 1);// 当前时间减去1小时
SimpleDateFormat date = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
date.format(calendar.getTime());
return date.format(calendar.getTime());
} catch(Exception e){
log.debug("DateUtil.addDay():" + e.toString());
return "";
}
}
/**
* 返回当前时间日期减去一个小时
*/
public static String TgetbeforeHour(){
try{
Calendar calendar = Calendar.getInstance();
calendar.setTime(new Date());
calendar.set(Calendar.HOUR, calendar.get(Calendar.HOUR) - 1);// 当前时间减去1小时yyyy-MM-dd'T'HH:mm:ss.SSSXXX
SimpleDateFormat date = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSXXX");
date.format(calendar.getTime());
return date.format(calendar.getTime());
} catch(Exception e){
log.debug("DateUtil.addDay():" + e.toString());
return "";
}
}
public static int getMinute(){
int temp = 0;
@ -780,6 +822,34 @@ public class DateUtil {
timemillis = cal.getTimeInMillis() ;
return timemillis ;
}
public static long getcurr(){
Date date = new Date();
Long l_date = date.getTime();
return l_date;
}
//获取一个小时之前的时间戳
public static long getbeforonecurr(){
try {
Date date = new Date();
Long l_date = date.getTime();
return l_date-60*60*1000;
} catch (Exception e) {
return 0L;
// e.printStackTrace();
}
}
public static long getday(){
SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
Calendar calendar = Calendar.getInstance();
try {
return dateFormat.parse(dateFormat.format(calendar.getTime())).getTime();
} catch (ParseException e) {
return 0L;
}
}
public static long getsmallSec(String datetime1,String datetime2){

58
cl_stream_datasave/src/main/java/com/bfd/mf/datasave/tools/ReadFile1125.java

@ -0,0 +1,58 @@
//package com.bfd.mf.datasave.tools;
//
//import java.io.BufferedInputStream;
//import java.io.BufferedReader;
//import java.io.File;
//import java.io.FileInputStream;
//import java.io.FileNotFoundException;
//import java.io.IOException;
//import java.io.InputStreamReader;
//
//public class ReadFile1125 implements Runnable{
//
// private String filename = null;
// public ReadFile1125(String filename) {
// this.filename = filename;
// }
// @Override
// public void run() {
// // TODO Auto-generated method stub
// try {
// FileInputStream inputStream = null;
// try {
// inputStream = new FileInputStream(filename);
// } catch (FileNotFoundException e) {
// // TODO Auto-generated catch block
// e.printStackTrace();
// }
// BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream));
// String str = null;
// long count = 0;
// do{
// str = null;
// try {
// str = bufferedReader.readLine();
// System.out.println("lineQueue size: " + Constants.getLineQueue().size());
// count++;
// if (str != null && count > Constants.getStartLine()) {
// if (Constants.getEndLine() < 0 || (Constants.getEndLine() > 0 && count < Constants.getEndLine())) {
// Constants.getLineQueue().put(str);
// } else {
// System.out.println("Not process, count: " + count + " start config: " + Constants.getStartLine() + " end confid: " + Constants.getEndLine());
// }
// }
//// System.out.println("Read line:" + str);
// } catch (IOException e) {
// // TODO Auto-generated catch block
// e.printStackTrace();
// }
// } while(str != null);
// inputStream.close();
// bufferedReader.close();
// Constants.setFlag(false);
// } catch(Exception e) {
// e.printStackTrace();
// }
// }
//
//}

38
cl_stream_datasave/src/main/java/com/bfd/mf/entity/AllKeys.java

@ -1,5 +1,7 @@
package com.bfd.mf.entity;
import com.bfd.mf.datasave.tools.DateUtil;
import java.util.*;
public class AllKeys {
@ -63,6 +65,9 @@ public class AllKeys {
map.put("filePath",new ArrayList<>());
map.put("imagePath",new ArrayList<>());
map.put("videoPath",new ArrayList<>());
map.put("filePathSize",new ArrayList<>());
map.put("imagePathSize",new ArrayList<>());
map.put("videoPathSize",new ArrayList<>());
map.put("finalPhrase","");
map.put("firstListBrand","");
map.put("fiveListBrand","");
@ -111,10 +116,10 @@ public class AllKeys {
map.put("projectName","");
map.put("promotionInfo","");
map.put("province","");
map.put("pubDate",new Date());
map.put("pubDay",0L);
map.put("pubTime",0L);
map.put("pubTimeStr","");
map.put("pubDate",DateUtil.TgetbeforeHour());
map.put("pubDay",DateUtil.getday());
map.put("pubTime",DateUtil.getbeforonecurr());
map.put("pubTimeStr", DateUtil.getbeforeHour());
map.put("quoteCount",0);
map.put("readCount",0);
map.put("resolution","");
@ -142,6 +147,31 @@ public class AllKeys {
map.put("userUrl","");
map.put("videoTime","");
map.put("videoUrl","");
map.put("avatarPath","");
map.put("viewCnt",0);
map.put("channelNum","");
map.put("crawlDataFlagType","");
map.put("primaryPost","");
map.put("dns","");
map.put("asrText","");
map.put("ocrText",new ArrayList<>());
map.put("srcfilePath",new ArrayList<>());
map.put("srcimagePath",new ArrayList<>());
map.put("srcvideoPath",new ArrayList<>());
map.put("hasOCR",0);
map.put("hasASR",0);
map.put("asrLength",0);
map.put("ocrLength",0);
map.put("translateTitleLength","");
map.put("translateContentLength","");
map.put("hasTrans",0);
map.put("goodrate",0);
map.put("generalrate",0);
map.put("poorrate",0);
map.put("processtime",new HashMap<>());
map.put("tag","");
}
}

320
cl_stream_datasave/src/main/java/com/bfd/mf/entity/mysql/SubjectTask.java

@ -2,69 +2,355 @@ package com.bfd.mf.entity.mysql;
//import com.bfd.crawler.utils.JsonUtils;
import com.bfd.crawler.utils.JsonUtils;
import com.bfd.mf.datasave.listen.DataSaveManager;
import com.bfd.mf.datasave.tools.Constants;
import com.bfd.mf.datasave.tools.DBUtil;
import com.bfd.mf.datasave.tools.DateUtil;
import com.bfd.mf.datasave.tools.WriteMethod;
import crawler.open.util.RedisUtil;
import org.apache.log4j.Logger;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.*;
import java.util.concurrent.ArrayBlockingQueue;
public class SubjectTask {
import static com.bfd.mf.entity.mysql.Tasklimit.subjectTasktimelimiit;
import static com.bfd.mf.entity.mysql.Userlimit.subjectuserlimiit;
public class SubjectTask implements Runnable {
private static Logger log = Logger.getLogger(SubjectTask.class);
public static Map<String, List<Map<String,String>>> subjectTaskMap = new HashMap<>();
public static void loadSubjectTask(){
// public static void loadSubjectTask() {
// subjectTaskMap.clear();
// //List<Map<String, Object>> subjectTaskList = DBUtil.getInstance("db_stat").query("select cs.del,ct.external_id, ct.subject_id, ct.id, ct.cid, ct.crawl_data_flag,cs.kafka_switch,cs.kafka_addr,cs.go_fast_addr,cs.kafka_topic,cs.go_fast_switch from cl_subject cs Join cl_task ct on(ct.subject_id=cs.id)where (ct.crawl_status=1 or ct.crawl_status=3) and ct.del=0 ;");ct.app_id=cs.app_id and
// String time=DateUtil.getDate();
// //System.out.println(time);
// //System.out.println("结束时间"+ DateUtil.getcurr());
// List<Map<String, Object>> subjectTaskList = DBUtil.getInstance("db_stat_alltask").query("select ct.crawl_content_key,ct.create_user_id,ct.app_id,cs.del,ct.external_id, ct.subject_id, ct.id, ct.cid, ct.crawl_data_flag,cs.kafka_switch,cs.kafka_addr,cs.go_fast_addr,cs.kafka_topic,cs.go_fast_switch from cl_subject cs Join cl_task ct on(ct.subject_id=cs.id) where (ct.crawl_status=1 ) and ct.del=0 and ct.app_id=cs.app_id and ct.cid!=\"\" and ct.update_time>'"+time+"'order by ct.update_time desc;");
// System.out.println(subjectTaskList.size());
// if(subjectTaskList.size() > 0){
// String key = "";
// for(Map<String, Object> subjectTask : subjectTaskList){ //{subject_id=10222, name=我是张三, task_id=188, id=71, crawl_data_flag=aaa}
// String keytwo = "";
// if( subjectTask.get("cid").equals("Tmall")){
// key = subjectTask.get("cid") + "#####" + subjectTask.get("crawl_data_flag");
// keytwo = "Taobao"+ "#####" + subjectTask.get("crawl_data_flag");
// }
// else if (subjectTask.get("cid").equals("Taobao")){
// key = subjectTask.get("cid") + "#####" + subjectTask.get("crawl_data_flag");
// keytwo = "Tmall"+ "#####" + subjectTask.get("crawl_data_flag");
// }
// else {
// key = subjectTask.get("cid") + "#####" + subjectTask.get("crawl_data_flag");
// }
// Map<String,String> value = new HashMap<>();
// List<Map<String,String>> valueList = new ArrayList<>();
// String v_subject_id = "";
// String v_go_fast_addr = "";
// String v_kafka_switch = "";
// String v_kafka_addr = "";
// String v_task_id = "";
// String v_external_id ="";
// String v_go_fast_switch="";
// String v_kafka_topic="";
// String v_status="";
// String v_del="";
// String v_create_user_id="";
// String v_ocr="0";
// String v_trans="0";
// String v_crawl_content_key="";
// if(null != subjectTask.get("subject_id")) {
// v_subject_id = subjectTask.get("subject_id").toString();
// }
// if(null != subjectTask.get("crawl_content_key")) {
// v_crawl_content_key = subjectTask.get("crawl_content_key").toString();
// }
// if(null != subjectTask.get("go_fast_addr")) {
// v_go_fast_addr = subjectTask.get("go_fast_addr").toString();
// }
// if(null != subjectTask.get("kafka_addr")) {
// v_kafka_addr = subjectTask.get("kafka_addr").toString();
// }
// if(null != subjectTask.get("kafka_switch")){
// v_kafka_switch = subjectTask.get("kafka_switch").toString();
// }
// if(null !=subjectTask.get("id")){
// v_task_id = subjectTask.get("id").toString();
// }
// if(null !=subjectTask.get("external_id")){
// v_external_id = subjectTask.get("external_id").toString();
// }
// if(null !=subjectTask.get("go_fast_switch")){
// v_go_fast_switch = subjectTask.get("go_fast_switch").toString();
// }
// if(null !=subjectTask.get("kafka_topic")){
// v_kafka_topic = subjectTask.get("kafka_topic").toString();
// }
//// if(null !=subjectTask.get("status")){
//// v_status = subjectTask.get("status").toString();
//// }
// if(null !=subjectTask.get("del")){
// v_del = subjectTask.get("del").toString();
// }
// if(null !=subjectTask.get("create_user_id")){
// v_create_user_id = subjectTask.get("create_user_id").toString();
// }
// value.put("subject_id",v_subject_id);
// value.put("go_fast_addr",v_go_fast_addr);
// value.put("export_to_kafka",v_kafka_switch);
// value.put("kafka_addr",v_kafka_addr);
// // value.put("task_id",v_task_id);
// value.put("external_id",v_external_id);
// value.put("go_fast_switch",v_go_fast_switch);
// value.put("kafka_topic",v_kafka_topic);
// // value.put("status",v_status);//专题的状态
// value.put("del",v_del);//专题的状态
// value.put("appid",subjectTask.get("app_id").toString());
// value.put("crawl_content_key",v_crawl_content_key);
// //System.out.print(v_external_id+"external_id");
// String newkey = key.toLowerCase();
// String userkey=newkey+"#####"+subjectTask.get("app_id").toString().toLowerCase();
//
// //组装时间的参数
// if (subjectTasktimelimiit.containsKey(userkey)){
// List<Map<String,String>>timelist=subjectTasktimelimiit.get(userkey);
// if(timelist.size()==1){
// for(Map<String, String> subjectTasktime : timelist){
// value.put("maxtime",subjectTasktime.get("max_time").toString());
// value.put("mintime",subjectTasktime.get("min_time").toString());
// }
// } else{
// for(Map<String, String> subjectTasktime : timelist){
// String subject_id=subjectTasktime.get("subject_id").toString();
// if (v_subject_id.equals(subject_id)){
// value.put("maxtime",subjectTasktime.get("max_time").toString());
// value.put("mintime",subjectTasktime.get("min_time").toString());
// }
// }
// }
//
// }
//// //用户的权限
// if (subjectuserlimiit.containsKey(v_create_user_id)){
// Map<String,Object> permission= (Map<String, Object>) subjectuserlimiit.get(v_create_user_id);
// v_ocr= permission.get("is_ocr").toString();
// v_trans= permission.get("is_trans").toString();
// }
// value.put("is_ocr",v_ocr);
// value.put("is_trans",v_trans);
// //组装相同任务的任务id
// if(subjectTaskMap.containsKey(newkey)){
// valueList = subjectTaskMap.get(newkey);
// for (Map<String, String> valuetask : valueList){
// String task=valuetask.get("task_id")+","+v_task_id;
// valuetask.put("task_id",task);
// value.put("task_id",task);
// }
// valueList.add(value);
// }else{
// value.put("task_id",v_task_id);
// valueList.add(value);
// }
//
// if(keytwo.length()>0){
// String tmallnewkey = keytwo.toLowerCase();
// subjectTaskMap.put(tmallnewkey,valueList);
// }
// String redis=newkey+"$$"+JsonUtils.toJSONString(valueList);
//// try {
//// Constants.getLineQueue().put(redis);
//// } catch (InterruptedException e) {
//// e.printStackTrace();
//// }
//
// // RedisUtil.set(newkey, JsonUtils.toJSONString(valueList), 10);
// // System.out.println("结束时间"+ DateUtil.getcurr());
// subjectTaskMap.put(newkey,valueList);
// //System.out.println(newkey);
// }
// // System.out.println("结束时间"+ DateUtil.getcurr());
//
//
// //System.out.println(subjectTaskMap.size());
// log.info("当天任务的数量" + key + " ; data = " + subjectTaskMap.size());
// // SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");//设置日期格式
// //System.out.println(subjectTaskList.size());// new Date()为获取当前系统时间
// //WriteMethod.writeMethod("0621test.txt",JsonUtils.toJSONString(subjectTaskMap));
// // System.out.println(JsonUtils.toJSONString(subjectTaskMap)+"当前时间"+ DateUtil.getcurr());
// }else {
// System.out.println("kong a ");
// }
// }
// public static long updatetime = new Date().getTime()/1000;
@Override
public void run() {
while (true){
subjectTaskMap.clear();
List<Map<String, Object>> subjectTaskList = DBUtil.getInstance("db_stat").query("SELECT cst.id,ct.external_id,cst.subject_id, cst.task_id, ct.cid, ct.crawl_data_flag,cs.export_to_kafka,cs.kafka_addr,cs.go_fast_addr FROM cl_subject_task cst JOIN cl_subject cs ON (cst.subject_id = cs.id) LEFT JOIN cl_task ct ON (cst.task_id = ct.id );");
//System.out.println("***&&&&&**"+subjectTaskList+"subjectTaskList");
Userlimit.loaduser();
Tasklimit.loadTask();
long updatetime = new Date().getTime()/1000-30000;
// List<Map<String, Object>> subjectTaskList = DBUtil.getInstance("db_stat").query("select cs.del,ct.external_id, ct.subject_id, ct.id, ct.cid, ct.crawl_data_flag,cs.kafka_switch,cs.kafka_addr,cs.go_fast_addr,cs.kafka_topic,cs.go_fast_switch from cl_subject cs Join cl_task ct on(ct.subject_id=cs.id)where (ct.crawl_status=1 or ct.crawl_status=3) and ct.del=0 ;");ct.app_id=cs.app_id and
//String time=DateUtil.getDate();
//System.out.println(time);
System.out.println("结束时间"+ updatetime);
List<Map<String, Object>> subjectTaskList = DBUtil.getInstance("db_stat_alltask").query("select ct.crawl_content_key,ct.create_user_id,ct.app_id,cs.del,ct.external_id, ct.subject_id, ct.id, ct.cid, ct.crawl_data_flag,cs.kafka_switch,cs.kafka_addr,cs.go_fast_addr,cs.kafka_topic,cs.go_fast_switch from cl_subject cs Join cl_task ct on(ct.subject_id=cs.id) where (ct.crawl_status=1 ) and ct.del=0 and ct.app_id=cs.app_id and ct.cid!=\"\" and unix_timestamp(ct.update_time)>'"+updatetime+"' and ct.crawl_data_flag like '%气象侦察机%'order by ct.update_time desc;");
System.out.println(subjectTaskList.size());
if(subjectTaskList.size() > 0){
String key = "";
for(Map<String, Object> subjectTask : subjectTaskList){ //{subject_id=10222, name=我是张三, task_id=188, id=71, crawl_data_flag=aaa}
String keytwo = "";
if( subjectTask.get("cid").equals("Tmall")){
key = subjectTask.get("cid") + "#####" + subjectTask.get("crawl_data_flag");
keytwo = "Taobao"+ "#####" + subjectTask.get("crawl_data_flag");
}
else if (subjectTask.get("cid").equals("Taobao")){
key = subjectTask.get("cid") + "#####" + subjectTask.get("crawl_data_flag");
keytwo = "Tmall"+ "#####" + subjectTask.get("crawl_data_flag");
}
else {
key = subjectTask.get("cid") + "#####" + subjectTask.get("crawl_data_flag");
//System.out.print(key+"asdasd");
}
Map<String,String> value = new HashMap<>();
List<Map<String,String>> valueList = new ArrayList<>();
String v_subject_id = "";
String v_go_fast_addr = "";
String v_export_to_kafka = "";
String v_kafka_switch = "";
String v_kafka_addr = "";
String v_task_id = "";
String v_external_id ="";
String v_go_fast_switch="";
String v_kafka_topic="";
String v_status="";
String v_del="";
String v_create_user_id="";
String v_ocr="0";
String v_trans="0";
String v_crawl_content_key="";
if(null != subjectTask.get("subject_id")) {
v_subject_id = subjectTask.get("subject_id").toString();
}
if(null != subjectTask.get("crawl_content_key")) {
v_crawl_content_key = subjectTask.get("crawl_content_key").toString();
}
if(null != subjectTask.get("go_fast_addr")) {
v_go_fast_addr = subjectTask.get("go_fast_addr").toString();
}
if(null != subjectTask.get("kafka_addr")) {
v_kafka_addr = subjectTask.get("kafka_addr").toString();
}
if(null != subjectTask.get("export_to_kafka")){
v_export_to_kafka = subjectTask.get("export_to_kafka").toString();
if(null != subjectTask.get("kafka_switch")){
v_kafka_switch = subjectTask.get("kafka_switch").toString();
}
if(null !=subjectTask.get("task_id")){
v_task_id = subjectTask.get("task_id").toString();
if(null !=subjectTask.get("id")){
v_task_id = subjectTask.get("id").toString();
}
if(null !=subjectTask.get("external_id")){
v_external_id = subjectTask.get("external_id").toString();
}
if(null !=subjectTask.get("go_fast_switch")){
v_go_fast_switch = subjectTask.get("go_fast_switch").toString();
}
if(null !=subjectTask.get("kafka_topic")){
v_kafka_topic = subjectTask.get("kafka_topic").toString();
}
// if(null !=subjectTask.get("status")){
// v_status = subjectTask.get("status").toString();
// }
if(null !=subjectTask.get("del")){
v_del = subjectTask.get("del").toString();
}
if(null !=subjectTask.get("create_user_id")){
v_create_user_id = subjectTask.get("create_user_id").toString();
}
value.put("subject_id",v_subject_id);
value.put("go_fast_addr",v_go_fast_addr);
value.put("export_to_kafka",v_export_to_kafka);
value.put("export_to_kafka",v_kafka_switch);
value.put("kafka_addr",v_kafka_addr);
value.put("task_id",v_task_id);
// value.put("task_id",v_task_id);
value.put("external_id",v_external_id);
value.put("go_fast_switch",v_go_fast_switch);
value.put("kafka_topic",v_kafka_topic);
// value.put("status",v_status);//专题的状态
value.put("del",v_del);//专题的状态
value.put("appid",subjectTask.get("app_id").toString());
value.put("crawl_content_key",v_crawl_content_key);
//System.out.print(v_external_id+"external_id");
String newkey = key.toLowerCase();
String userkey=newkey+"#####"+subjectTask.get("app_id").toString().toLowerCase();
//组装时间的参数
if (subjectTasktimelimiit.containsKey(userkey)){
List<Map<String,String>>timelist=subjectTasktimelimiit.get(userkey);
if(timelist.size()==1){
for(Map<String, String> subjectTasktime : timelist){
value.put("maxtime",subjectTasktime.get("max_time").toString());
value.put("mintime",subjectTasktime.get("min_time").toString());
}
} else{
for(Map<String, String> subjectTasktime : timelist){
String subject_id=subjectTasktime.get("subject_id").toString();
if (v_subject_id.equals(subject_id)){
value.put("maxtime",subjectTasktime.get("max_time").toString());
value.put("mintime",subjectTasktime.get("min_time").toString());
}
}
}
}
// //用户的权限
if (subjectuserlimiit.containsKey(v_create_user_id)){
Map<String,Object> permission= (Map<String, Object>) subjectuserlimiit.get(v_create_user_id);
v_ocr= permission.get("is_ocr").toString();
v_trans= permission.get("is_trans").toString();
}
value.put("is_ocr",v_ocr);
value.put("is_trans",v_trans);
//组装相同任务的任务id
if(subjectTaskMap.containsKey(newkey)){
valueList = subjectTaskMap.get(newkey);
for (Map<String, String> valuetask : valueList){
String task=valuetask.get("task_id")+","+v_task_id;
valuetask.put("task_id",task);
value.put("task_id",task);
}
valueList.add(value);
}else{
value.put("task_id",v_task_id);
valueList.add(value);
}
if(keytwo.length()>0){
String tmallnewkey = keytwo.toLowerCase();
subjectTaskMap.put(tmallnewkey,valueList);
}
String redis=newkey+"@#@"+JsonUtils.toJSONString(valueList);
try {
Constants.getLineQueue().put(redis);
} catch (InterruptedException e) {
e.printStackTrace();
}
// RedisUtil.set(newkey, JsonUtils.toJSONString(valueList), 10);
// System.out.println("结束时间"+ DateUtil.getcurr());
subjectTaskMap.put(newkey,valueList);
//System.out.println(newkey);
}
//System.out.println(JsonUtils.toJSONString(subjectTaskMap));
System.out.println("结束时间"+ DateUtil.getcurr());
//System.out.println(subjectTaskMap.size());
log.info("当天任务的数量" + key + " ; data = " + subjectTaskMap.size());
//SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");//设置日期格式
//System.out.println(subjectTaskList.size());// new Date()为获取当前系统时间
//WriteMethod.writeMethod("0621test.txt",JsonUtils.toJSONString(subjectTaskMap));
//System.out.println(JsonUtils.toJSONString(subjectTaskMap)+"当前时间"+ DateUtil.getcurr());
}
try {
Thread.sleep(3000);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
}

65
cl_stream_datasave/src/main/java/com/bfd/mf/entity/mysql/Tasklimit.java

@ -0,0 +1,65 @@
package com.bfd.mf.entity.mysql;
import com.bfd.crawler.utils.JsonUtils;
import com.bfd.mf.datasave.tools.DBUtil;
import javax.xml.bind.util.JAXBSource;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/*
* 同一个专题下相同任务的采集时间范围最大的
* */
public class Tasklimit {
public static Map<String, List<Map<String,String>>>subjectTasktimelimiit = new HashMap<>();
public static void loadTask(){
subjectTasktimelimiit.clear();
List<Map<String, Object>> Tasktimelimiit = DBUtil.getInstance("db_stat_alltask").query("SELECT MIN(crawl_start_time) crawl_start_time ,MAX(crawl_end_time) crawl_end_time ,crawl_data_flag ,subject_id ,cid ,app_id from cl_task where del=0 and (crawl_status=1) and cid!=\"\" GROUP BY crawl_data_flag,cid,subject_id,app_id;");
System.out.println(Tasktimelimiit.size()+"Tasktimelimiit");
if (Tasktimelimiit.size()>0){
String newkey = "";
for(Map<String, Object> subjectTask : Tasktimelimiit) { //{subject_id=10222, name=我是张三, task_id=188, id=71, crawl_data_flag=aaa}
String keytwo = "";
Map<String,String> value = new HashMap<>();
List<Map<String,String>> valueList = new ArrayList<>();
if (subjectTask.get("cid").equals("Tmall")) {
newkey = subjectTask.get("cid") + "#####" + subjectTask.get("crawl_data_flag");
keytwo = "Taobao" + "#####" + subjectTask.get("crawl_data_flag");
} else if (subjectTask.get("cid").equals("Taobao")) {
newkey = subjectTask.get("cid") + "#####" + subjectTask.get("crawl_data_flag");
keytwo = "Tmall" + "#####" + subjectTask.get("crawl_data_flag");
} else {
newkey = subjectTask.get("cid") + "#####" + subjectTask.get("crawl_data_flag");
}
String max_time = "";
String min_time = "";
String subject_id="";
newkey=newkey+"#####" +subjectTask.get("app_id");
newkey= newkey.toLowerCase();
subject_id=subjectTask.get("subject_id").toString();
max_time=subjectTask.get("crawl_end_time").toString();
value.put("max_time",max_time);
min_time=subjectTask.get("crawl_start_time").toString();
value.put("min_time",min_time);
value.put("subject_id",subject_id);
if(subjectTasktimelimiit.containsKey(newkey)){
valueList = subjectTasktimelimiit.get(newkey);
valueList.add(value);
}else{
valueList.add(value);
}
if(keytwo.length()>0){
String tmallnewkey = keytwo.toLowerCase();
subjectTasktimelimiit.put(tmallnewkey,valueList);
}
subjectTasktimelimiit.put(newkey,valueList);
}
// System.out.println(JsonUtils.toJSONString(subjectTasktimelimiit));
}
}
}

42
cl_stream_datasave/src/main/java/com/bfd/mf/entity/mysql/Userlimit.java

@ -0,0 +1,42 @@
package com.bfd.mf.entity.mysql;
import com.bfd.crawler.utils.JsonUtils;
import com.bfd.mf.datasave.tools.DBUtil;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/*
* 用户权限表
* */
public class Userlimit {
public static Map<String, Object>subjectuserlimiit = new HashMap<>();
public static void loaduser() {
subjectuserlimiit.clear();
List<Map<String, Object>> userlimiit = DBUtil.getInstance("db_stat").query("SELECT user_id,is_ocr,is_asr,is_trans FROM `cl_user_config`");
if (userlimiit.size() > 0) {
for (Map<String, Object> subjectuser : userlimiit) {
int is_ocr=0; int is_trans=0;
String userid=(String) subjectuser.get("user_id");
if (subjectuser.containsKey("is_ocr")&&null!=subjectuser.get("is_ocr")) {
is_ocr=(int) subjectuser.get("is_ocr");
}
if (subjectuser.containsKey("is_trans")&&null!=subjectuser.get("is_trans")) {
is_trans =(int) subjectuser.get("is_trans");
}
Map<String,Object> value = new HashMap<>();
value.put("is_ocr",is_ocr);
value.put("is_trans",is_trans);
subjectuserlimiit.put(userid,value);
}
}
}
}

88
cl_stream_datasave/src/main/java/com/bfd/mf/entity/mysql/cl_task.java

@ -0,0 +1,88 @@
package com.bfd.mf.entity.mysql;
import com.bfd.mf.datasave.tools.DBUtil;
import crawler.open.util.RedisUtil;
import org.apache.log4j.Logger;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class cl_task {
private static Logger log = Logger.getLogger(cl_task.class);
public static List<String> subtaskstatuslimit = new ArrayList<>();
public static List<String> subtaskstatuslimit3 = new ArrayList<>();
public static void loadTask(){
List<Map<String, Object>> Tasktimelimiit = DBUtil.getInstance("db_stat_alltask").query("SELECT crawl_data_flag,cid FROM `cl_task` WHERE crawl_status=3 and update_time like '%2021-07-14%' GROUP BY crawl_data_flag,cid;");
if (Tasktimelimiit.size()>0){
String newkey = "";
for(Map<String, Object> subjectTask : Tasktimelimiit) {
String keytwo = "";
Map<String,String> value = new HashMap<>();
List<Map<String,String>> valueList = new ArrayList<>();
if (subjectTask.get("cid").equals("Tmall")) {
newkey = subjectTask.get("cid") + "#####" + subjectTask.get("crawl_data_flag");
keytwo = "Taobao" + "#####" + subjectTask.get("crawl_data_flag");
} else if (subjectTask.get("cid").equals("Taobao")) {
newkey = subjectTask.get("cid") + "#####" + subjectTask.get("crawl_data_flag");
keytwo = "Tmall" + "#####" + subjectTask.get("crawl_data_flag");
} else {
newkey = subjectTask.get("cid") + "#####" + subjectTask.get("crawl_data_flag");
}
subtaskstatuslimit.add(newkey);
}
}
System.out.println(subtaskstatuslimit.size()+"Tasktimelimiit");
List<Map<String, Object>> Tasktimelimiit1 = DBUtil.getInstance("db_stat_alltask").query("SELECT crawl_data_flag ,cid FROM `cl_task` WHERE crawl_status=1 and update_time like '%2021-07-14%' GROUP BY crawl_data_flag,cid;");
if (Tasktimelimiit1.size()>0){
String newkey = "";
for(Map<String, Object> subjectTask1 : Tasktimelimiit1) {
String keytwo = "";
Map<String,String> value = new HashMap<>();
List<Map<String,String>> valueList = new ArrayList<>();
if (subjectTask1.get("cid").equals("Tmall")) {
newkey = subjectTask1.get("cid") + "#####" + subjectTask1.get("crawl_data_flag");
keytwo = "Taobao" + "#####" + subjectTask1.get("crawl_data_flag");
} else if (subjectTask1.get("cid").equals("Taobao")) {
newkey = subjectTask1.get("cid") + "#####" + subjectTask1.get("crawl_data_flag");
keytwo = "Tmall" + "#####" + subjectTask1.get("crawl_data_flag");
} else {
newkey = subjectTask1.get("cid") + "#####" + subjectTask1.get("crawl_data_flag");
}
subtaskstatuslimit3.add(newkey);
}
}
int i=1;
for(String value:subtaskstatuslimit){
if (!subtaskstatuslimit3.contains(value)){
String newkey = value.toLowerCase();
if (RedisUtil.exists(newkey, 10)) { // 先去 redis中查询是否存在不存直接忽略
log.info("需要删除的任务是" + newkey);
System.out.println("需要删除的任务是" + newkey);
RedisUtil.del(newkey,10);
} else {
log.info("这个任务的状态有为1的,不需要删除" + newkey);
}
}else {
// System
}
}
System.out.println(i);
System.out.println(subtaskstatuslimit3.size()+"Tasktimelimiit");
}
}

74
cl_stream_datasave/src/main/java/com/bfd/mf/runstart/RunStartDataSave.java

@ -2,11 +2,13 @@ package com.bfd.mf.runstart;
import com.bfd.crawler.kafka7.KfkConsumer;
import com.bfd.crawler.kafka7.consts.KafkaConsts;
import com.bfd.mf.datasave.tools.DataProcess;
import com.bfd.mf.datasave.tools.DateUtil;
import com.bfd.mf.entity.DataSaveManager;
import com.bfd.mf.entity.impl.DataSaveManagerImpl;
import com.bfd.mf.datasave.tools.DBUtil;
import com.bfd.mf.entity.mysql.FiledTableInfo;
import com.bfd.mf.entity.mysql.SubjectTask;
import com.bfd.mf.entity.mysql.*;
import crawler.open.util.RedisUtil;
import org.apache.log4j.Logger;
import org.apache.log4j.PropertyConfigurator;
@ -21,24 +23,64 @@ public class RunStartDataSave {
private static String log4jPath = "../etc/log4j.properties";
private static String dbPath = "../etc/db.properties";
private static String redisPath = "../etc/145redis.properties";
static {
PropertyConfigurator.configureAndWatch(log4jPath);
DBUtil.init(dbPath);
RedisUtil.init(redisPath);
}
public static void main(String[] args) {
//KfkConsumer.startReadThread(this.queue, readTopicName,this.threadNums,this.groupId,this.kafkaServerName);
FiledTableInfo.loadTableInfo();
// cl_subject_task
startRmiService();
while (true){
SubjectTask.loadSubjectTask();
try {
Thread.sleep(30000);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
cl_task.loadTask();
// try {
// FiledTableInfo.loadTableInfo();
// startRmiService();
// } catch (Exception e) {
// e.printStackTrace();
// }
// while (true){
// try {
// //Userlimit.loaduser();
// //Tasklimit.loadTask();
// SubjectTask.loadSubjectTask();
// } catch (Exception e) {
// e.printStackTrace();
// }
//
// try {
// Thread.sleep(3000);
// } catch (Exception e) {
// e.printStackTrace();
// }
//
// }
// for (int i = 0; i < 1; i ++) {
// SubjectTask SubjectTask = new SubjectTask();
// Thread SubjectTaskThread = new Thread(SubjectTask, "dataDedupProcess" + i);
// SubjectTaskThread.start();
// }
//// try {
//// Thread.sleep(6000);
//// } catch (InterruptedException e) {
//// e.printStackTrace();
//// }
// //多线程写redis
// for (int i = 0; i < 100; i ++) {
// DataProcess dataProcess = new DataProcess();
// Thread dataProcessThread = new Thread(dataProcess, "dataDedupProcess" + i);
// dataProcessThread.start();
// }
// Timer timer2 = new Timer();
// timer.schedule(new UpdateTask(), new Date(), 4*1000);
/**后面增加把es缓存的数据关闭的时候处理 不让丢数据**/
//Runtime.getRuntime().addShutdownHook(new GeterExit());
}
@ -51,11 +93,11 @@ public class RunStartDataSave {
* 本地主机上的远程对象注册表Registry的实例 并指定端口为8888这一步必不可少Java默认端口是1099
* 必不可缺的一步缺少注册表创建则无法绑定对象到远程注册表上
***/
LocateRegistry.createRegistry(1099);
LocateRegistry.createRegistry(2099);//3888
/*** 把远程对象注册到RMI注册服务器上,并命名为taskManager ***/
/*** 绑定的URL标准格式为:rmi://host:port/name(其中协议名可以省略,下面两种写法都是正确的) ***/
Naming.bind("//127.0.0.1:1099/dataSaveManager", dataSaveManager);
Naming.bind("//127.0.0.1:2099/dataSaveManager", dataSaveManager);
System.out.println(">>>>>INFO:远程IHello对象绑定成功!");
} catch (RemoteException e) {
System.out.println("创建远程对象发生异常!");

13
cl_stream_datasave/src/main/main5.iml

@ -0,0 +1,13 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="JAVA_MODULE" version="4">
<component name="NewModuleRootManager" inherit-compiler-output="true">
<exclude-output />
<content url="file://$MODULE_DIR$">
<sourceFolder url="file://$MODULE_DIR$/java" isTestSource="false" />
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
<orderEntry type="library" name="target" level="project" />
<orderEntry type="library" name="jarlib" level="project" />
</component>
</module>

60
cl_stream_mybatis/cl_stream_mybatis.iml

@ -15,10 +15,10 @@
<sourceFolder url="file://$MODULE_DIR$/src/test/java" isTestSource="true" />
<excludeFolder url="file://$MODULE_DIR$/target" />
</content>
<orderEntry type="jdk" jdkName="1.8" jdkType="JavaSDK" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
<orderEntry type="module-library">
<library>
<library name="Maven: BfdRedisTools-2.0:BfdRedisTools-2.0:1.0.0">
<CLASSES>
<root url="jar://$MODULE_DIR$/../../jarlib/BfdRedisTools-2.0.jar!/" />
</CLASSES>
@ -26,60 +26,6 @@
<SOURCES />
</library>
</orderEntry>
<orderEntry type="module-library">
<library>
<CLASSES>
<root url="jar://$MODULE_DIR$/../../jarlib/dataManager-0.0.1-SNAPSHOT.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
</orderEntry>
<orderEntry type="module-library">
<library>
<CLASSES>
<root url="jar://$MODULE_DIR$/../../jarlib/hanlp-portable-1.6.8.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
</orderEntry>
<orderEntry type="module-library">
<library>
<CLASSES>
<root url="jar://$MODULE_DIR$/../../jarlib/kafka-0.10.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
</orderEntry>
<orderEntry type="module-library">
<library>
<CLASSES>
<root url="jar://$MODULE_DIR$/../../jarlib/taskManager-0.0.1-SNAPSHOT.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
</orderEntry>
<orderEntry type="module-library">
<library>
<CLASSES>
<root url="jar://$MODULE_DIR$/../../jarlib/utils-0.0.1-SNAPSHOT.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
</orderEntry>
<orderEntry type="module-library">
<library>
<CLASSES>
<root url="jar://$MODULE_DIR$/../../jarlib/utils-3.0.0.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
</orderEntry>
<orderEntry type="library" name="Maven: org.springframework.boot:spring-boot-starter-web:1.4.3.RELEASE" level="project" />
<orderEntry type="library" name="Maven: org.springframework.boot:spring-boot-starter:1.4.3.RELEASE" level="project" />
<orderEntry type="library" name="Maven: org.springframework.boot:spring-boot:1.4.3.RELEASE" level="project" />
@ -201,5 +147,7 @@
<orderEntry type="library" scope="RUNTIME" name="Maven: mysql:mysql-connector-java:5.1.40" level="project" />
<orderEntry type="library" name="Maven: log4j:log4j:1.2.17" level="project" />
<orderEntry type="library" name="Maven: org.slf4j:slf4j-api:1.7.25" level="project" />
<orderEntry type="library" name="Maven: com.liferay.org.apache.commons.fileupload:com.liferay.org.apache.commons.fileupload:6.2.0.1" level="project" />
<orderEntry type="library" name="Maven: commons-io:commons-io:2.5" level="project" />
</component>
</module>

27
cl_stream_mybatis/pom.xml

@ -27,6 +27,14 @@
</properties>
<dependencies>
<dependency>
<groupId>BfdRedisTools-2.0</groupId>
<artifactId>BfdRedisTools-2.0</artifactId>
<version>1.0.0</version>
<scope>system</scope>
<systemPath>${project.basedir}/../../jarlib/BfdRedisTools-2.0.jar</systemPath>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
@ -123,6 +131,16 @@
<artifactId>slf4j-api</artifactId>
<version>1.7.25</version>
</dependency>
<dependency>
<groupId>com.liferay.org.apache.commons.fileupload</groupId>
<artifactId>com.liferay.org.apache.commons.fileupload</artifactId>
<version>6.2.0.1</version>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.5</version>
</dependency>
</dependencies>
<build>
@ -153,10 +171,11 @@
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-war-plugin</artifactId>
<configuration>
<failOnMissingWebXml>false</failOnMissingWebXml>
</configuration>
<artifactId>maven-jar-plugin</artifactId>
<version>3.1.1</version>
<!--<configuration>-->
<!--<failOnMissingWebXml>false</failOnMissingWebXml>-->
<!--</configuration>-->
</plugin>
</plugins>
<resources>

2
cl_stream_mybatis/src/main/java/com/bfd/mf/controller/CompanyController.java

@ -114,7 +114,7 @@ public class CompanyController {
fieldNormaliz.setKafkaSerName(Integer.valueOf(kafkaSerName));
fieldNormaliz.setProjectName(projectName);
fieldNormaliz.setIsSemtimentApi(Integer.valueOf(isSemtimentApi));
fieldNormaliz.setKafkaSuffixName("filter1");
fieldNormaliz.setKafkaSuffixName("gxnewfilterloacal");
fieldNormalizService.add(fieldNormaliz) ;
return "importdb" ;
}

4
cl_stream_mybatis/src/main/java/com/bfd/mf/tools/ConnectionRmi.java

@ -19,7 +19,7 @@ public class ConnectionRmi {
private static ServiceManager initServiceManager() {
Registry registry;
try {
registry = LocateRegistry.getRegistry("127.0.0.1", 8899);
registry = LocateRegistry.getRegistry("127.0.0.1", 6888);
serviceManager = (ServiceManager) registry.lookup("serviceManager");
} catch (RemoteException e) {
e.printStackTrace();
@ -32,7 +32,7 @@ public class ConnectionRmi {
private static DataSaveManager initDataSaveManager() {
Registry registry;
try {
registry = LocateRegistry.getRegistry("127.0.0.1", 1099);
registry = LocateRegistry.getRegistry("127.0.0.1", 2099);
dataSaveManager = (DataSaveManager) registry.lookup("dataSaveManager");
} catch (RemoteException e) {
e.printStackTrace();

17
cl_stream_mybatis/src/main/resources/application.properties

@ -1,13 +1,18 @@
#mysql
#spring.datasource.url=jdbc:mysql://192.168.67.152/field_normaliz?useUnicode=true&characterEncoding=utf-8
spring.datasource.url=jdbc:mysql://172.18.1.152/field_normaliz?useUnicode=true&characterEncoding=utf-8
spring.datasource.username=root
spring.datasource.password=Bfd123!@#
spring.datasource.driver-class-name=com.mysql.jdbc.Driver
#spring.datasource.url=jdbc:mysql://172.26.11.113:3306/intelligent_crawl?useUnicode=true&characterEncoding=utf-8
#spring.datasource.username=root
#spring.datasource.password=Bfd123!@#
#spring.datasource.password=bfd123
#spring.datasource.driver-class-name=com.mysql.jdbc.Driver
spring.datasource.url=jdbc:mysql://172.26.11.113:3306/bfd_sq_data?useUnicode=true&characterEncoding=utf-8
spring.datasource.username=root
spring.datasource.password=bfd123
spring.datasource.driver-class-name=com.mysql.jdbc.Driver
#spring.datasource.url=jdbc:mysql://192.168.94.24:6446/intelligent_schema?useUnicode=true&characterEncoding=utf-8
#spring.datasource.username=root
#spring.datasource.password=baifendian
#spring.datasource.driver-class-name=com.mysql.jdbc.Driver
spring.mvc.view.suffix=.jsp
spring.mvc.view.prefix=/WEB-INF/

2
cl_stream_mybatis/src/main/resources/com/bfd/mf/spring/applicationContext.xml

@ -25,7 +25,7 @@ http://www.springframework.org/schema/aop http://www.springframework.org/schema/
<!-- service接口 -->
<property name="serviceInterface" value="com.bfd.mf.sendTask.SendTask" />
<!-- 注册端口 -->
<property name="registryPort" value="1200" />
<property name="registryPort" value="1600" />
</bean>
</beans>

53
cl_stream_service/cl_stream_service.iml

@ -13,54 +13,9 @@
<sourceFolder url="file://$MODULE_DIR$/src/main/resources" type="java-resource" />
<excludeFolder url="file://$MODULE_DIR$/target" />
</content>
<orderEntry type="jdk" jdkName="1.8" jdkType="JavaSDK" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
<orderEntry type="module-library">
<library>
<CLASSES>
<root url="jar://$MODULE_DIR$/../../jarlib/dataManager-0.0.1-SNAPSHOT.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
</orderEntry>
<orderEntry type="module-library">
<library>
<CLASSES>
<root url="jar://$MODULE_DIR$/../../jarlib/kafka-0.10.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
</orderEntry>
<orderEntry type="module-library">
<library>
<CLASSES>
<root url="jar://$MODULE_DIR$/../../../jarlib/slf4j-api-1.7.2.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
</orderEntry>
<orderEntry type="module-library">
<library>
<CLASSES>
<root url="jar://$MODULE_DIR$/../../jarlib/taskManager-0.0.1-SNAPSHOT.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
</orderEntry>
<orderEntry type="module-library">
<library>
<CLASSES>
<root url="jar://$MODULE_DIR$/../../jarlib/utils-3.0.0.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
</orderEntry>
<orderEntry type="module-library">
<library name="Maven: utils-0.0.1-SNAPSHOT:utils-0.0.1-SNAPSHOT:1.0.0">
<CLASSES>
<root url="jar://$MODULE_DIR$/../../jarlib/utils-0.0.1-SNAPSHOT.jar!/" />
@ -130,6 +85,11 @@
<orderEntry type="library" name="Maven: commons-collections:commons-collections:3.2.1" level="project" />
<orderEntry type="library" name="Maven: net.sf.ezmorph:ezmorph:1.0.6" level="project" />
<orderEntry type="library" name="Maven: kafka-utils:kafka:0.10" level="project" />
<orderEntry type="library" name="Maven: org.apache.kafka:kafka_2.10:0.10.2.0" level="project" />
<orderEntry type="library" name="Maven: net.sf.jopt-simple:jopt-simple:5.0.3" level="project" />
<orderEntry type="library" name="Maven: com.yammer.metrics:metrics-core:2.2.0" level="project" />
<orderEntry type="library" name="Maven: org.scala-lang:scala-library:2.10.6" level="project" />
<orderEntry type="library" name="Maven: com.101tec:zkclient:0.10" level="project" />
<orderEntry type="library" name="Maven: mysql:mysql-connector-java:5.1.29" level="project" />
<orderEntry type="library" name="Maven: org.apache.commons:commons-lang3:3.1" level="project" />
<orderEntry type="library" name="Maven: commons-logging:commons-logging:1.1.1" level="project" />
@ -174,7 +134,6 @@
<orderEntry type="library" name="Maven: org.apache.lucene:lucene-suggest:7.2.1" level="project" />
<orderEntry type="library" name="Maven: org.elasticsearch:securesm:1.2" level="project" />
<orderEntry type="library" name="Maven: org.elasticsearch:elasticsearch-cli:6.2.3" level="project" />
<orderEntry type="library" name="Maven: net.sf.jopt-simple:jopt-simple:5.0.2" level="project" />
<orderEntry type="library" name="Maven: com.carrotsearch:hppc:0.7.1" level="project" />
<orderEntry type="library" name="Maven: joda-time:joda-time:2.9.9" level="project" />
<orderEntry type="library" name="Maven: org.yaml:snakeyaml:1.17" level="project" />

37
cl_stream_service/src/main/java/com/bfd/mf/entity/AllKeys.java

@ -1,5 +1,8 @@
package com.bfd.mf.entity;
import com.bfd.mf.service.tools.DataCheckUtil;
import com.bfd.mf.service.tools.DateUtil;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
@ -65,6 +68,9 @@ public class AllKeys {
map.put("filePath",new ArrayList<>());
map.put("imagePath",new ArrayList<>());
map.put("videoPath",new ArrayList<>());
map.put("filePathSize",new ArrayList<>());
map.put("imagePathSize",new ArrayList<>());
map.put("videoPathSize",new ArrayList<>());
map.put("finalPhrase","");
map.put("firstListBrand","");
map.put("fiveListBrand","");
@ -113,10 +119,15 @@ public class AllKeys {
map.put("projectName","");
map.put("promotionInfo","");
map.put("province","");
map.put("pubDate",new Date());
map.put("pubDay",0);
map.put("pubTime",0);
map.put("pubTimeStr","");
map.put("pubDate",DataCheckUtil.getCurrentTime());
map.put("pubDay",DataCheckUtil.getCurrentTime());
map.put("pubTime", DataCheckUtil.getCurrentTime());
map.put("pubTimeStr",DataCheckUtil.getCurrentTime());
// map.put("pubDate",new Date());
// map.put("pubDay", DateUtil.getday());
// map.put("pubTime",DateUtil.getbeforonecurr());
// map.put("pubTimeStr", DateUtil.getbeforeHour());
map.put("quoteCount",0);
map.put("readCount",0);
map.put("resolution","");
@ -144,6 +155,24 @@ public class AllKeys {
map.put("userUrl","");
map.put("videoTime","");
map.put("videoUrl","");
map.put("avatarPath","");
map.put("viewCnt",0);
map.put("channelNum","");
map.put("crawlDataFlagType","");
map.put("dns","");
map.put("dns","");
map.put("asrText","");
map.put("ocrText",new ArrayList<>());
map.put("hasOCR",0);
map.put("hasASR",0);
map.put("asrLength",0);
map.put("ocrLength",0);
map.put("translateTitleLength","");
map.put("translateContentLength","");
map.put("hasTrans",0);
map.put("goodrate",0);
map.put("generalrate",0);
map.put("poorrate",0);
map.put("processtime",new HashMap<>());
}
}

3
cl_stream_service/src/main/java/com/bfd/mf/entity/TypeEntity.java

@ -8,6 +8,9 @@ public class TypeEntity {
// 海外站点页面
public static final String STORYDETAILPAGE = "storyDetailPage";
public static final String SOCIALCOMMENT = "socialComment";
//海外点赞/分享 粉丝页面
public static final String SOCAILFOLLOW = "socialFollow";
public static final String SOCAILFANS = "socialFans";
// 微博页面
public static final String KEYWORD = "keyword";
public static final String WEIBO = "weibo";

46
cl_stream_service/src/main/java/com/bfd/mf/entity/mysql/MfChannelInfo.java

@ -10,31 +10,63 @@ import java.util.Map;
public class MfChannelInfo {
public static Map<String, String> channelInfoMap = new HashMap<String, String>();
public static Map<String, String> docTypeInfos = new HashMap<String, String>();
public static Map<String, String> channelnumInfoMap = new HashMap<String, String>();
public static Map<String, String> domain_nameInfoMap = new HashMap<String, String>();
public static void loadChannelInfo(){
//List<Map<String, Object>> channelInfoList = DBUtil.getInstance("db_stat").query("select * from mf_channel_info");
List<Map<String, Object>> channelInfoList = DBUtil.getInstance("db_stat").query("select cid,site_type from cl_site");
List<Map<String, Object>> channelInfoList = DBUtil.getInstance("db_stat").query("select cid,site_type,domain_name from cl_site");
if(channelInfoList.size() > 0){
for(Map<String, Object> channelInfo : channelInfoList){
String channel = channelInfo.get("site_type").toString();
String docType = channelInfo.get("site_type").toString();
String num = channelInfo.get("site_type").toString();
String domain_name= channelInfo.get("domain_name").toString();
if(channel.equals("0")){
channel = "社交媒体";
docType = "social";
num = "0";
}
if(channel.equals("1")){
channel = "网络视频";
channel = "新闻资讯";
docType = "news";
num = "1";
}
if(channel.equals("2")){
channel = "网络资讯";
channel = "博客智库";
docType = "blog";
num = "2";
}
if(channel.equals("3")){
channel = "网络资讯";
channel = "论坛贴吧";
docType = "bbs";
num = "3";
}
if(channel.equals("4")){
channel = "网络视频";
docType = "video";
num = "4";
}
if(channel.equals("5")){
channel = "电商网站";
docType = "item";
num = "5";
}
if(channel.equals("6")){
channel = "搜索引擎";
docType = "search";
num = "6";
}
if(channel.equals("7")){
channel = "生活方式";
docType = "life";
num = "7";
}
channelInfoMap.put(channelInfo.get("cid").toString(),channel);
docTypeInfos.put(channelInfo.get("cid").toString(), docType);
channelnumInfoMap.put(channelInfo.get("cid").toString(),num);
domain_nameInfoMap.put(channelInfo.get("cid").toString(),domain_name);
//System.out.println(JsonUtils.toJSONString(domain_nameInfoMap));
}
}
System.out.println(JsonUtils.toJSONString(channelInfoMap));
}
}

20
cl_stream_service/src/main/java/com/bfd/mf/entity/mysql/MfDoctypeInfo.java

@ -3,6 +3,7 @@ package com.bfd.mf.entity.mysql;
import com.bfd.mf.service.tools.DBUtil;
import com.bfd.mf.service.tools.JsonUtils;
import com.bfd.mf.service.tools.WriteMethod;
import java.util.HashMap;
import java.util.List;
@ -22,21 +23,32 @@ public class MfDoctypeInfo {
docType = "social";
}
if(docType.equals("1")){
docType = "video";
docType = "news";
}
if(docType.equals("2")){
docType = "news";
docType = "blog";
}
if(docType.equals("3")){
docType = "news";
docType = "bbs";
}
if(docType.equals("4")){
docType = "video";
}
if(docType.equals("5")){
docType = "item";
}
if(docType.equals("6")){
docType = "search";
}
if(docType.equals("7")){
docType = "life";
}
docTypeInfos.put(souceInfo.get("cid").toString(), docType);
}
}
System.out.println(JsonUtils.toJSONString(docTypeInfos));
// System.out.println(JsonUtils.toJSONString(docTypeInfos));
// WriteMethod.writeMethod("site.txt",JsonUtils.toJSONString(docTypeInfos));
}

18
cl_stream_service/src/main/java/com/bfd/mf/entity/mysql/MfFieldInfo.java

@ -14,8 +14,7 @@ public class MfFieldInfo {
public static void loadBackstageFieldInfo(){
List<Map<String, Object>> fieldList = DBUtil.getInstance("db_stat").query("select * from mf_field_info");
System.out.println(JsonUtils.toJSONString(fieldList)+"ssss");
List<Map<String, Object>> fieldList = DBUtil.getInstance("db_stat").query("select * from mf_field_info_copy");
Map<String,Map<String, String>> allfields = new HashMap<String,Map<String, String>>();
Map<String, String> weibocontentdata = new HashMap<String,String>();
Map<String, String> ecContentdata = new HashMap<String,String>();
@ -28,9 +27,11 @@ public class MfFieldInfo {
Map<String, String> abroadcontentdata = new HashMap<String,String>();
Map<String, String> abroadcommentdata = new HashMap<String,String>();
Map<String,String> userInfodata = new HashMap<>();
Map<String,String> abroadfollowdata = new HashMap<>();
Map<String,String> abroadfansdata = new HashMap<>();
if( fieldList.size() > 0 ){
for(Map<String, Object> fielMap : fieldList){
System.out.print(fielMap.get("abroadcommentfieldname")+"userInfofieldName");
if(fielMap.get("weibocontentfieldname")!= null && StringUtils.isNotBlank(fielMap.get("weibocontentfieldname").toString())){
weibocontentdata = excField(fielMap.get("weibocontentfieldname").toString(), fielMap.get("esfieldname").toString(), weibocontentdata);
}
@ -64,6 +65,13 @@ public class MfFieldInfo {
if(fielMap.get("userinfofieldname") != null && StringUtils.isNotBlank(fielMap.get("userinfofieldname").toString())){
userInfodata = excField(fielMap.get("userinfofieldname").toString(), fielMap.get("esfieldname").toString(), userInfodata);
}
if(fielMap.get("abroadfollowfieldname") != null && StringUtils.isNotBlank(fielMap.get("abroadfollowfieldname").toString())){
abroadfollowdata = excField(fielMap.get("abroadfollowfieldname").toString(), fielMap.get("esfieldname").toString(), abroadfollowdata);
}
if(fielMap.get("abroadfansfieldname") != null && StringUtils.isNotBlank(fielMap.get("abroadfansfieldname").toString())){
abroadfansdata = excField(fielMap.get("abroadfansfieldname").toString(), fielMap.get("esfieldname").toString(), abroadfansdata);
}
}
allfields.put("keyword", weibocontentdata);
@ -81,7 +89,11 @@ public class MfFieldInfo {
allfields.put("storyDetailPage", abroadcontentdata);
allfields.put("socialComment", abroadcommentdata);
allfields.put("userInfoPage",userInfodata);
allfields.put("socialFollow",abroadfollowdata);
allfields.put("socialFans",abroadfansdata);
fieldNormalizeInfoMap.put(1, allfields) ;
// System.out.println(JsonUtils.toJSONString(fieldNormalizeInfoMap));
}
}

147
cl_stream_service/src/main/java/com/bfd/mf/entity/mysql/SubjectTask.java

@ -1,66 +1,81 @@
package com.bfd.mf.entity.mysql;
import com.bfd.crawler.utils.JsonUtils;
import com.bfd.mf.service.tools.DBUtil;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class SubjectTask {
public static Map<String, List<Map<String,String>>> subjectTaskMap = new HashMap<>();
public static void loadSubjectTask(){
subjectTaskMap.clear();
List<Map<String, Object>> subjectTaskList = DBUtil.getInstance("db_stat").query("SELECT cst.id, cst.subject_id, cst.task_id, ct.cid, ct.crawl_data_flag,cs.export_to_kafka,cs.kafka_addr,cs.go_fast_addr FROM cl_subject_task cst JOIN cl_subject cs ON (cst.subject_id = cs.id) LEFT JOIN cl_task ct ON (cst.task_id = ct.id );");
if(subjectTaskList.size() > 0){
String key = "";
for(Map<String, Object> subjectTask : subjectTaskList){ //{subject_id=10222, name=我是张三, task_id=188, id=71, crawl_data_flag=aaa}
key = subjectTask.get("cid") + "#####" + subjectTask.get("crawl_data_flag");
Map<String,String> value = new HashMap<>();
List<Map<String,String>> valueList = new ArrayList<>();
String v_subject_id = "";
String v_go_fast_addr = "";
String v_export_to_kafka = "";
String v_kafka_addr = "";
String v_task_id = "";
String v_external_id ="";
if(null != subjectTask.get("subject_id")) {
v_subject_id = subjectTask.get("subject_id").toString();
}
if(null != subjectTask.get("go_fast_addr")) {
v_go_fast_addr = subjectTask.get("go_fast_addr").toString();
}
if(null != subjectTask.get("kafka_addr")) {
v_kafka_addr = subjectTask.get("kafka_addr").toString();
}
if(null != subjectTask.get("export_to_kafka")){
v_export_to_kafka = subjectTask.get("export_to_kafka").toString();
}
if(null !=subjectTask.get("task_id")){
v_task_id = subjectTask.get("task_id").toString();
}
if(null !=subjectTask.get("external_id")){
v_task_id = subjectTask.get("external_id").toString();
}
value.put("subject_id",v_subject_id);
value.put("go_fast_addr",v_go_fast_addr);
value.put("export_to_kafka",v_export_to_kafka);
value.put("kafka_addr",v_kafka_addr);
value.put("task_id",v_task_id);
value.put("external_id",v_external_id);
key = key.toLowerCase();
if(subjectTaskMap.containsKey(key)){
valueList = subjectTaskMap.get(key);
valueList.add(value);
}else{
valueList.add(value);
}
subjectTaskMap.put(key,valueList);
}
System.out.println(JsonUtils.toJSONString(subjectTaskMap));
}
}
}
//package com.bfd.mf.entity.mysql;
//
//
//import com.bfd.crawler.utils.JsonUtils;
//import com.bfd.mf.service.tools.DBUtil;
//
//import java.util.ArrayList;
//import java.util.HashMap;
//import java.util.List;
//import java.util.Map;
//
//public class SubjectTask {
//
// public static Map<String, List<Map<String,String>>> subjectTaskMap = new HashMap<>();
// public static void loadSubjectTask(){
// subjectTaskMap.clear();
// List<Map<String, Object>> subjectTaskList = DBUtil.getInstance("db_stat").query("select cs.status, ct.external_id, ct.subject_id, ct.id, ct.cid, ct.crawl_data_flag,cs.kafka_switch,cs.kafka_addr,cs.go_fast_addr,cs.kafka_topic,cs.go_fast_switch from cl_subject cs Join cl_task ct on(ct.subject_id=cs.id);");
// if(subjectTaskList.size() > 0){
// String key = "";
// for(Map<String, Object> subjectTask : subjectTaskList){ //{subject_id=10222, name=我是张三, task_id=188, id=71, crawl_data_flag=aaa}
// key = subjectTask.get("cid") + "#####" + subjectTask.get("crawl_data_flag");
// Map<String,String> value = new HashMap<>();
// List<Map<String,String>> valueList = new ArrayList<>();
// String v_subject_id = "";
// String v_go_fast_addr = "";
// String kafka_switch = "";
// String v_kafka_addr = "";
// String v_task_id = "";
// String v_external_id ="";
// String v_go_fast_switch="";
// String v_kafka_topic="";
// String v_status="";
// if(null != subjectTask.get("subject_id")) {
// v_subject_id = subjectTask.get("subject_id").toString();
// }
// if(null != subjectTask.get("go_fast_addr")) {
// v_go_fast_addr = subjectTask.get("go_fast_addr").toString();
// }
// if(null != subjectTask.get("kafka_addr")) {
// v_kafka_addr = subjectTask.get("kafka_addr").toString();
// }
// if(null != subjectTask.get("kafka_switch")){
// kafka_switch = subjectTask.get("kafka_switch").toString();
// }
// if(null !=subjectTask.get("id")){
// v_task_id = subjectTask.get("id").toString();
// }
// if(null !=subjectTask.get("external_id")){
// v_external_id = subjectTask.get("external_id").toString();
// }
// if(null !=subjectTask.get("go_fast_switch")){
// v_go_fast_switch = subjectTask.get("go_fast_switch").toString();
// }
// if(null !=subjectTask.get("kafka_topic")){
// v_kafka_topic = subjectTask.get("kafka_topic").toString();
// }
// if(null !=subjectTask.get("status")){
// v_status = subjectTask.get("status").toString();
// }
// value.put("subject_id",v_subject_id);
// value.put("go_fast_addr",v_go_fast_addr);
// value.put("export_to_kafka",kafka_switch);
// value.put("kafka_addr",v_kafka_addr);
// value.put("task_id",v_task_id);
// value.put("external_id",v_external_id);
// value.put("go_fast_switch",v_go_fast_switch);
// value.put("kafka_topic",v_kafka_topic);
// value.put("status",v_status);//专题的状态
// key = key.toLowerCase();
// if(subjectTaskMap.containsKey(key)){
// valueList = subjectTaskMap.get(key);
// valueList.add(value);
// }else{
// valueList.add(value);
// }
// subjectTaskMap.put(key,valueList);
// }
// // System.out.println(JsonUtils.toJSONString(subjectTaskMap));
// }
// }
//}

16
cl_stream_service/src/main/java/com/bfd/mf/runstart/RunStartService.java

@ -33,6 +33,7 @@ public class RunStartService {
public static void main(String[] args) {
try {
startRmiService();
MfFieldInfo.loadBackstageFieldInfo(); // field_info
MfFieldType.loadFieldType(); // field_type
@ -40,16 +41,13 @@ public class RunStartService {
MfFieldTableInfo.loadTableInfo(); // field_table_info
MfDoctypeInfo.loadDocTypeInfo(); // doctype_info
MfChannelInfo.loadChannelInfo(); // channel_info
//MfChannelnumInfo.loadChannelnumInfo(); // channel_info
HanLPUtils.initAnalyzer();
while (true){
SubjectTask.loadSubjectTask();
try {
Thread.sleep(60000);
} catch (InterruptedException e) {
} catch (Exception e) {
e.printStackTrace();
}
}
// new AreaCategoryMappingUtils();
}
@ -72,8 +70,8 @@ public class RunStartService {
* 本地主机上的远程对象注册表Registry的实例 并指定端口为8888这一步必不可少Java默认端口是1099
* 必不可缺的一步缺少注册表创建则无法绑定对象到远程注册表上
***/
LocateRegistry.createRegistry(8899);
Naming.bind("//127.0.0.1:8899/serviceManager", serviceManager);
LocateRegistry.createRegistry(6888);//6888
Naming.bind("//127.0.0.1:6888/serviceManager", serviceManager);
/*** 把远程对象注册到RMI注册服务器上,并命名为taskManager ***/
/*** 绑定的URL标准格式为:rmi://host:port/name(其中协议名可以省略,下面两种写法都是正确的) ***/
System.out.println(">>>>>INFO:远程IHello对象绑定成功!");

366
cl_stream_service/src/main/java/com/bfd/mf/service/extendType/ForegroundExtendType.java

@ -6,9 +6,12 @@ import com.bfd.crawler.utils.JsonUtils;
import com.bfd.mf.entity.MfFieldInfo;
import com.bfd.mf.entity.FieldNormaliz;
import com.bfd.mf.entity.TypeEntity;
import com.bfd.mf.service.tools.MfMD5Util;
import com.bfd.mf.service.tools.WriteMethod;
import crawler.open.util.RedisUtil;
import org.apache.log4j.Logger;
import org.omg.Messaging.SYNC_WITH_TRANSPORT;
import java.util.*;
import static com.bfd.crawler.utils.DataUtil.calcMD5;
@ -30,6 +33,15 @@ public class ForegroundExtendType extends ParentExctendType implements Runnable{
public Map<String, String> exec() {
try {
Map<String, Object> dataMap = JsonUtils.parseObject(data);
if(dataMap==null){
return null;
}
Map<String, Object> dataMare =new HashMap<>();
if(dataMap.containsKey("processtime")){
dataMare = (Map<String, Object>) dataMap.get("processtime");
}
dataMare.put("sbeginreadtime",System.currentTimeMillis());
dataMap.put("processtime",dataMare);
String projectName = fieldNormaliz.getProjectName();
int kafkaServerName = fieldNormaliz.getKafkaSerName();
String kafkaName = fieldNormaliz.getKafkaName();
@ -44,12 +56,27 @@ public class ForegroundExtendType extends ParentExctendType implements Runnable{
dataMap.put("brand","");
}
String cid = (String) dataMap.get("cid");
String source ="";
if (dataMap.containsKey("source")){
source=(String) dataMap.get("source");
}
String dns ="";
if (dataMap.containsKey("dns")){
dns=(String) dataMap.get("dns");
}
if(dataMap.containsKey(TypeEntity.TYPE)||dataMap.containsKey(TypeEntity.PAGETYPE)){
String type ="";
if (cid.equals("sina")){
type = (String) dataMap.get(TypeEntity.PAGETYPE);
}else{
type = (String) dataMap.get(TypeEntity.TYPE);}
type = (String) dataMap.get(TypeEntity.TYPE);
//System.out.println(type+"type是是上司是");
if (type.equals(TypeEntity.STORYDETAILPAGE)){
System.out.println(type+"type是是上司是");
}
}
if(type.contains("list")){
return null;
}
@ -59,6 +86,7 @@ public class ForegroundExtendType extends ParentExctendType implements Runnable{
if(type.equals("home")||type.equals("bbsuserinfo")){
type = "userInfoPage";
}
// 如果是电商详情直接写入到 redis
if (type.equals(TypeEntity.ECCONTENT)) {
String product_id = (String) dataMap.get("product_id");
@ -66,13 +94,40 @@ public class ForegroundExtendType extends ParentExctendType implements Runnable{
int dbindex = hash(keys, 9);
LOG.info("[ ForegroundExtendType ] 往 Redis 中灌入商品详情数据 dbIndex = " + dbindex + " ; keys = " + keys);
RedisUtil.set(keys, data, dbindex);
// Map<String, Object> newdataMap = new HashMap<String, Object>(dataMap);
//往专题下写数据
Map<String, Object> newdataMap = new HashMap<String, Object>(dataMap);
Map<String, MfFieldInfo> fieldInfoMap = fieldNormaliz.getFieldInfo();
ParralleData pd = getParralleData(fieldInfoMap, newdataMap, type, cid, projectName,source,dns);
List<Map<String, String>> datas = pd.getParralleData();
datas = new ArrayList<>(new HashSet<>(datas));
//System.out.println("######" + JsonUtils.toJSONString(datas));
// needSentimentApi = 1 是需要 0 不需要
this.installData(kafkaServerName, datas, kafkaName, true,
fieldNormaliz.getIsSemtimentApi(), type, cid, projectName, kafkaSuffixName, esSerName);
} else {
Map<String, MfFieldInfo> fieldInfoMap = fieldNormaliz.getFieldInfo();
Map<String, Object> newdataMap = new HashMap<String, Object>(dataMap);
WriteMethod.writeMethod("yuanshuju.txt", JsonUtils.toJSONString(newdataMap));
if(newdataMap.containsKey("comments") && newdataMap.get("comments").toString().equals("[]")){
LOG.info("This data have no comments " + data);
return null;
}
if(newdataMap.containsKey("videoPath")){
List<String> valueList = new ArrayList<String>();
if (newdataMap.get("videoPath") instanceof String){
if(!newdataMap.get("videoPath").toString().equals("")){
valueList.add(newdataMap.get("videoPath").toString());
newdataMap.put("videoPath",valueList);}
else{
newdataMap.put("videoPath",valueList);
}
}
}
// 如果是电商评论需要把电商详情从 redis 中拿出来组装一下再进行处理
if (type.equals(TypeEntity.ECCOMMENT)) { // 如果页面类型是 电商评论
if (newdataMap.containsKey("product_id")) {
@ -95,17 +150,162 @@ public class ForegroundExtendType extends ParentExctendType implements Runnable{
}
}
newdataMap = disposeEcComment(cid, newdataMap);
if(null==newdataMap){
//System.out.println("asddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd");
return null ;
}
WriteMethod.writeMethod("yuanshuju.txt",JsonUtils.toJSONString(newdataMap));
ParralleData pd = getParralleData(fieldInfoMap, newdataMap, type, cid, projectName);
}
else if (type.equals(TypeEntity.NEWSCONTENT)){
if (newdataMap.containsKey("news_id")){
String news_id=(String) newdataMap.get("url");
String md5news_id=MfMD5Util.GetMD5Code(news_id);
newdataMap.put("news_id",md5news_id);
}
}
else if (type.equals(TypeEntity.NEWSCOMMENT)){
if (newdataMap.containsKey("news_id")){
String news_id="";
if(newdataMap.containsKey("purl")){
news_id=(String) newdataMap.get("purl");
}else{
news_id=(String) newdataMap.get("news_id");
}
String md5news_id=MfMD5Util.GetMD5Code(news_id);
List<Map<String, Object>> comments = (List<Map<String, Object>>) newdataMap.get("comments");
for (Map<String, Object> m : comments) {
if (m.containsKey("news_id")){
m.put("news_id",md5news_id);
}
}
Map<String, Object> comment = comments.get(0);
if (comment.containsKey("news_id")) {
newdataMap.put("news_id", md5news_id);
}
}
}
//社交类处理方式
else if (type.equals(TypeEntity.USERINFOPAGE)){
if(cid.equals("twitter")||cid.equals("facebook")||cid.equals("Facebook")){
String picFileServerHost= newdataMap.get("picFileServerHost").toString();
if (!newdataMap.get("pic").toString().equals("")){
String pic=newdataMap.get("pic").toString();
String gofastpic=picFileServerHost+pic;
newdataMap.put("avatarPath",gofastpic);
}
}
}
else if (type.equals(TypeEntity.STORYDETAILPAGE)){
if(cid.equals("twitter")||cid.equals("facebook")||cid.equals("Facebook")){
String picFileServerHost= newdataMap.get("picFileServerHost").toString();
String videoFileServerHost=newdataMap.get("videoFileServerHost").toString();//
String videoPath=newdataMap.get("videoPath").toString();
if (!newdataMap.get("profilePic").toString().equals("")){
String pic=newdataMap.get("profilePic").toString();
String gofastpic=picFileServerHost+pic;
newdataMap.put("avatarPath",gofastpic);
}
List<String> postPicsSrc= (List<String>) newdataMap.get("postPics");
if (postPicsSrc.size()>0){
List<String> valueList = new ArrayList<String>();
Iterator<String> it = postPicsSrc.iterator();
while(it.hasNext()){
String geturl= it.next();
String relpostPicsSrc=picFileServerHost+geturl;
valueList.add(relpostPicsSrc);
}
newdataMap.put("postPics",valueList);
}
if(!newdataMap.get("videoPath").toString().equals("[]")){
String videoPatha= newdataMap.get("videoPath").toString().replace("[","").replace("]","");
//System.out.println("valueList是个啥子嘛"+videoPatha);
String gofasvideoPath=videoFileServerHost+videoPatha;
gofasvideoPath=gofasvideoPath.replace("[","").replace("]","");
// System.out.println("==="+gofasvideoPath+"gofasvideoPath");
//System.out.println(newdataMap.get("videoPath").toString()+"======");
List<String> valueList = new ArrayList<String>();
valueList.add(gofasvideoPath);
newdataMap.put("videoPath",valueList);
//}
}else{
if(newdataMap.containsKey("videoPath")){
if (newdataMap.get("videoPath") instanceof String){
List<String> valueList = new ArrayList<String>();
//System.out.println("20201125");
//valueList.add(newdataMap.get("videoPath").toString());
newdataMap.put("videoPath",new ArrayList<String>());
}
}
}
if(newdataMap.get("profilePic").toString().equals("")&&postPicsSrc.size()==0&&newdataMap.get("videoPath").toString().equals("[]")){
newdataMap.put("isDownload",false);
}
}
}else if(type.equals(TypeEntity.SOCIALCOMMENT)){
List<Map<String, Object>> comments = (List<Map<String, Object>>) newdataMap.get("comment");
for (Map<String, Object> m : comments) {
String picFileServerHost= m.get("picFileServerHost").toString();
if (m.containsKey("commentPic")){
if (!m.get("commentPic").toString().equals("")){
List<String> valueList = new ArrayList<String>();
valueList.add(picFileServerHost+m.get("commentPic"));
newdataMap.put("imagePath",valueList);
}
}if(m.containsKey("reviewerProfilePic")&&!m.get("reviewerProfilePic").toString().equals("")){
newdataMap.put("avatarPath",picFileServerHost+m.get("reviewerProfilePic"));
}
}
}
else if(type.equals(TypeEntity.SOCAILFANS)){
List<Map<String, Object>> fans = (List<Map<String, Object>>) newdataMap.get("fans");
if(fans.size()>0){
for (Map<String, Object> m : fans) {
if (m.containsKey("pic")&&!m.get("pic").toString().equals("")){
newdataMap.put("avatarPath","https://si.pdeepmatrix.com"+m.get("pic"));
}
}
}
}
else if(type.equals(TypeEntity.SOCAILFOLLOW)){
List<Map<String, Object>> socialFollow = new ArrayList<>();
if(newdataMap.containsKey("likelist")){
socialFollow=(List<Map<String, Object>>) newdataMap.get("likelist");
}else if(newdataMap.containsKey("repost")) {
socialFollow=(List<Map<String, Object>>) newdataMap.get("repost");
}
for (Map<String, Object> m : socialFollow) {
if (m.containsKey("pic")&&!m.get("pic").toString().equals("")){
newdataMap.put("avatarPath","https://si.pdeepmatrix.com"+m.get("pic"));
}
}
}
// System.out.println("============="+"asdasdasdads"+"=========="+type);
if(type.equals(TypeEntity.BBSPOST)){//bbs处理逻辑
//System.out.println("asdasdasdads");
putBBSpostData( cid, type, projectName, fieldInfoMap, dataMap, kafkaServerName, esSerName, kafkaSuffixName,source,dns);
}
else{
ParralleData pd = getParralleData(fieldInfoMap, newdataMap, type, cid, projectName,source,dns);
List<Map<String, String>> datas = pd.getParralleData();
datas = new ArrayList<>(new HashSet<>(datas));
//System.out.println("######" + JsonUtils.toJSONString(datas));
try {
// needSentimentApi = 1 是需要 0 不需要
this.installData(kafkaServerName, datas, kafkaName, true,
fieldNormaliz.getIsSemtimentApi(), type, cid, projectName, kafkaSuffixName, esSerName);
} catch (Exception e) {
e.printStackTrace();
}
}
}
}
@ -126,6 +326,7 @@ public class ForegroundExtendType extends ParentExctendType implements Runnable{
Map<String, Object> attr = (Map<String, Object>) dataMap.get("attr");
if(attr.containsKey("crawlDataFlag")){
String crawlDataFlag = (String) attr.get("crawlDataFlag");
//System.out.println("====="+crawlDataFlag+"crawlDataFlag123456789");
dataMap.put("crawlDataFlag",crawlDataFlag);
}
if(attr.containsKey("listbrand")){
@ -147,6 +348,17 @@ public class ForegroundExtendType extends ParentExctendType implements Runnable{
if (attr.containsKey("attachtag")) {
Map<String, Object> attachtag = (Map<String, Object>) attr.get("attachtag");
if (attachtag.containsKey("crawlDataFlag")) {
// String crawlDataFlag = (String) attachtag.get("crawlDataFlag");
// if(crawlDataFlag.contains(":")){
// String crawlDataFlagtype=crawlDataFlag.split(":")[0];
// if(crawlDataFlagtype.equals("url")){
// dataMap.put("crawlDataFlagType","2");
// }else if (crawlDataFlagtype.contains("account")){
// dataMap.put("crawlDataFlagType","1");
// }else if (crawlDataFlagtype.contains("keyword")){
// dataMap.put("crawlDataFlagType","0");
// }
// }
dataMap.put("crawlDataFlag", attachtag.get("crawlDataFlag"));
} else {
dataMap.put("crawlDataFlag", "没有数据采集标识位");
@ -174,27 +386,29 @@ public class ForegroundExtendType extends ParentExctendType implements Runnable{
* 这个方法中字段映射取的是 前台ES 的字段映射哦
*/
private ParralleData getParralleData(Map<String, MfFieldInfo> fieldInfoMap, Map<String, Object> newdataMap,
String type, String cid, String projectName) {
String type, String cid, String projectName,String source,String dns) {
try{
Map<Integer, Map<String, Map<String, String>>> fieldData = fieldNormaliz.getFieldDataMap();
Map<String, Map<String, String>> fieldtypeDataMap = fieldData.get(1);
System.out.println( cid + " --- " + type + " *** "+JsonUtils.toJSONString(fieldtypeDataMap));
//System.out.println( cid + " --- " + type + " *** "+JsonUtils.toJSONString(fieldtypeDataMap));
Map<String, String> fieldDataMap = fieldtypeDataMap.get(type);
Map<String, Object> fixFieldMap = this.loadFixedField(type, 1, cid);// 获取一些必须的字段数据
Map<String, Object> fixFieldMap = this.loadFixedField(type, 1, cid,source,dns);// 获取一些必须的字段数据
Set<String> keyset = fixFieldMap.keySet();
//System.out.print(keyset+"sadasdasd");
for (String key : keyset) {
fieldDataMap.put(key, key);
}
System.out.print("cid"+"ssssssssss"+JsonUtils.toJSONString(fieldDataMap));
// System.out.println("cid"+"ssssssssss"+JsonUtils.toJSONString(fieldDataMap));
fieldDataMap.remove("cid");
newdataMap.putAll(fixFieldMap);
//System.out.println(newdataMap+"我是基础参数啊");
String datanew = JSONObject.toJSONString(newdataMap); // 组装了基础参数的数据
String datanew = JSONObject.toJSONString(newdataMap); // 组装了基础参数的数据
// System.out.println(datanew+"我是基础参数啊datanew");
MfFieldInfo fieldInfo = fieldInfoMap.get(type);
List<String> kafkaJsonString = fieldInfo.getKafkaJsonString(); // [attr]
List<String> kafkaJsonArray = fieldInfo.getKafkaJsonArray(); // [comments, replys]
// System.out.println("\"===============\"我的数据啊"+kafkaJsonArray+"===============");
// fieldDataMap 是映射好的字段Map
ParralleData pd = new ParralleData();
@ -239,62 +453,72 @@ public class ForegroundExtendType extends ParentExctendType implements Runnable{
* bbs 数据灌入 ES
* 因为论坛是不区分页面类型的 bbspost 如果有 replys 中的只要有 replys 就说明有评论需要将 replys 中的内容灌入到 type_comment 的索引中
*/
// private void putBBSpostData(int bussinessType, String cid, String type,
// String projectName, Map<String, MfFieldInfo> fieldInfoMap,
// Map<String, Object> dataMap, int kafkaServerName,
// int esSerName,String kafkaSuffixName) {
// try{
// if(bussinessType != 1) { // 往后台灌
// Map<String, Object> newdataMap = new HashMap<String, Object>(dataMap);
// MfFieldInfo fieldInfo = fieldInfoMap.get(type);
// List<String> kafkaJsonString = fieldInfo.getKafkaJsonString();
// List<String> kafkaJsonArray = fieldInfo.getKafkaJsonArray();
// Map<String, Object> fixFieldMap = this.loadFixedField(type, bussinessType, 1, cid);
// newdataMap.putAll(fixFieldMap);
// String datanew = JSONObject.toJSONString(newdataMap);
// Map<Integer, Map<String, Map<String, String>>> fieldData = fieldNormaliz.getFieldDataMap();
// Map<String, Map<String, String>> fieldtypeDataMap = fieldData.get(2);
// Map<String, String> fieldDataMap = fieldtypeDataMap.get(type);
// Set<String> keyset = fixFieldMap.keySet();
// for (String key : keyset) {
// fieldDataMap.put(key, key);
// }
// ParralleData pd = new ParralleData();
// this.exeFileData(datanew, kafkaJsonString, kafkaJsonArray, fieldDataMap,
// pd.getChunkId("", 0, -1), pd, type, bussinessType, projectName, cid);
// List<Map<String, String>> datas = pd.getParralleData();
// datas = new ArrayList<Map<String, String>>(new HashSet<Map<String, String>>(datas));
//
// this.installData(kafkaServerName, datas, kafkaName, true, fieldNormaliz.getIsSemtimentApi(),
// type, cid, bussinessType, projectName, kafkaSuffixName, esSerName);
//
// Map<String, Object> newdataMap2 = new HashMap<String, Object>(dataMap);
// if(newdataMap2.containsKey("replys") || newdataMap2.get("replys").toString().length() > 2) {
// MfFieldInfo fieldInfo2 = fieldInfoMap.get(type + "_comment");
// List<String> kafkaJsonString2 = fieldInfo2.getKafkaJsonString();
// List<String> kafkaJsonArray2 = fieldInfo2.getKafkaJsonArray();
// Map<String, Object> fixFieldMap2 = this.loadFixedField(type, bussinessType, 0, cid);
// newdataMap2.putAll(fixFieldMap2);
// String datanew2 = JSONObject.toJSONString(newdataMap2);
// Map<String, String> fieldDataMap2 = fieldtypeDataMap.get(type + "_comment");
// Set<String> keyset2 = fixFieldMap2.keySet();
// for (String key : keyset2) {
// fieldDataMap2.put(key, key);
// }
// ParralleData pd2 = new ParralleData();
// this.exeFileData(datanew2, kafkaJsonString2, kafkaJsonArray2, fieldDataMap2,
// pd2.getChunkId("", 0, -1), pd2, type + "_comment", bussinessType, projectName, cid);
// List<Map<String, String>> datas2 = pd2.getParralleData();
// datas2 = new ArrayList<Map<String, String>>(new HashSet<Map<String, String>>(datas2));
//
// this.installData(kafkaServerName, datas2, kafkaName, false, fieldNormaliz.getIsSemtimentApi(),
// type + "_comment", cid, bussinessType, projectName, kafkaSuffixName, esSerName);
//
// }else{
// System.out.println("============没有回帖=============================================================================");
// System.out.println(JSONObject.toJSONString(newdataMap2));
private void putBBSpostData( String cid, String type,
String projectName, Map<String, MfFieldInfo> fieldInfoMap,
Map<String, Object> dataMap, int kafkaServerName,
int esSerName,String kafkaSuffixName,String source,String dns) {
int bussinessType=2;
try{
if(bussinessType != 1) { // 往后台灌
Map<String, Object> newdataMap = new HashMap<String, Object>(dataMap);
if(newdataMap.containsKey("contents")){
if (newdataMap.containsKey("replys")) {
newdataMap.remove("replys");
}
ParralleData pd = getParralleData(fieldInfoMap, newdataMap, type, cid, projectName,source,dns);
List<Map<String, String>> datas = pd.getParralleData();
datas = new ArrayList<>(new HashSet<>(datas));
this.installData(kafkaServerName, datas, kafkaName, true,
fieldNormaliz.getIsSemtimentApi(), type, cid, projectName, kafkaSuffixName, esSerName);
}
Map<String, Object> newdataMap2 = new HashMap<String, Object>(dataMap);
if(newdataMap2.containsKey("replys") && !newdataMap2.get("replys").toString().equals("[]")) {
//if(newdataMap.get("replys").toString().equals("[]")){
// System.out.println("+++++++====================---------------"+newdataMap2.get("replys"));
//}
// }else { // 往前台灌
type=type + "_comment";
Map<Integer, Map<String, Map<String, String>>> fieldDatareply = fieldNormaliz.getFieldDataMap();
Map<String, Map<String, String>> fieldtypeDataMapreply = fieldDatareply.get(1);
//System.out.println( cid + " --- " + type + " *** "+JsonUtils.toJSONString(fieldtypeDataMap));
Map<String, String> fieldDataMapreply = fieldtypeDataMapreply.get(type);
Map<String, Object> fixFieldMapreply = this.loadFixedField(type, 0, cid,source,dns);// 获取一些必须的字段数据
Set<String> keyset = fixFieldMapreply.keySet();
for (String key : keyset) {
fieldDataMapreply.put(key, key);
}
// System.out.println("cid"+"ssssssssss"+JsonUtils.toJSONString(fieldDataMap));
fieldDataMapreply.remove("cid");
newdataMap2.putAll(fixFieldMapreply);
String datanew = JSONObject.toJSONString(newdataMap2); // 组装了基础参数的数据
// System.out.println(datanew+"我是基础参数啊datanew");
MfFieldInfo fieldInfo = fieldInfoMap.get(type);
List<String> kafkaJsonString = fieldInfo.getKafkaJsonString(); // [attr]
List<String> kafkaJsonArray = fieldInfo.getKafkaJsonArray(); // [comments, replys]
//System.out.println("\"===============\"我的数据啊"+kafkaJsonArray+"===============");
// fieldDataMap 是映射好的字段Map
ParralleData pd2 = new ParralleData();
this.exeFileData(datanew, kafkaJsonString, kafkaJsonArray, fieldDataMapreply,
pd2.getChunkId("", 0, -1), pd2, type, projectName, cid);
List<Map<String, String>> datareply = pd2.getParralleData();
datareply = new ArrayList<>(new HashSet<>(datareply));
this.installData(kafkaServerName, datareply, kafkaName, true,
fieldNormaliz.getIsSemtimentApi(), type, cid, projectName, kafkaSuffixName, esSerName);
}else{
System.out.println("============没有回帖=============================================================================");
System.out.println(JSONObject.toJSONString(newdataMap));
}
}else { // 往前台灌
// if(dataMap.containsKey("contents")){ // 说明要灌bbs 的主贴有时有replys有时没有replys
// Map<String, Object> newdataMap = new HashMap<String, Object>(dataMap);
// if (newdataMap.containsKey("replys")) {
@ -362,11 +586,11 @@ public class ForegroundExtendType extends ParentExctendType implements Runnable{
// System.out.println("-- "+JSONObject.toJSONString(dataMap));
// }
// }
// }
// }catch (Exception e){
// e.printStackTrace();
// }
// }
}
}catch (Exception e){
e.printStackTrace();
}
}
/**
@ -382,6 +606,7 @@ public class ForegroundExtendType extends ParentExctendType implements Runnable{
if (RedisUtil.exists(keys, dbindex)) { // 先去 redis中查询是否存在不存直接忽略
LOG.info("[ForegroundExtendType] exec >>> 电商灌数:该商品在 Redis 中有!!! keys = " + keys + " ; dbindex = " + dbindex);
newdataMap = getECContentDetail(keys, newdataMap, dbindex);
if (newdataMap == null) {
LOG.error("[ForegroundExtendType] exec >>> 电商灌数:从Redis中获取电商详情信息失败!!! keys = " + keys + " ; dbindex = " + dbindex);
return null;
@ -407,6 +632,9 @@ public class ForegroundExtendType extends ParentExctendType implements Runnable{
String value = RedisUtil.get(keys,dbindex);
if(null != value && !("").equals(value)) {
Map<String, Object> eccontentMap = JsonUtils.parseObject(value);
eccontentMap.remove("type");
eccontentMap.remove("attr");
eccontentMap.remove("creation_time");
newdataMap.putAll(eccontentMap);
// System.out.println("======================== " + JsonUtils.toJSONString(eccontentMap));
// if (eccontentMap.containsKey("itemname")) {

239
cl_stream_service/src/main/java/com/bfd/mf/service/extendType/ParentExctendType.java

@ -5,10 +5,7 @@ import com.alibaba.fastjson.JSONArray;
import com.bfd.crawler.kafka7.KfkProducer;
import com.bfd.mf.entity.AreaCategoryEntity;
import com.bfd.mf.entity.TypeEntity;
import com.bfd.mf.entity.mysql.MfChannelInfo;
import com.bfd.mf.entity.mysql.MfDoctypeInfo;
import com.bfd.mf.entity.mysql.MfSouceInfo;
import com.bfd.mf.entity.mysql.SubjectTask;
import com.bfd.mf.entity.mysql.*;
import com.bfd.mf.service.tools.*;
import com.bfd.mf.service.utils.AvailcontentApiUtils;
import com.bfd.mf.service.utils.HlkkwUtils;
@ -37,27 +34,46 @@ public class ParentExctendType {
/**
* 组装一些默认值 1
*/
public Map<String,Object> loadFixedField(String type ,int bbsifcontent,String cid){
public Map<String,Object> loadFixedField(String type ,int bbsifcontent,String cid,String source,String dns){
Map<String,Object> fixFieldMap = new HashMap<String,Object>();
long dateTime = System.currentTimeMillis() ;
fixFieldMap.put("enSource",cid.toLowerCase());
if("".equals(source)||StringUtils.isBlank(source)) {
fixFieldMap.put("source", MfSouceInfo.souceInfos.get(cid));
}
if(type.equals(TypeEntity.KEYWORD) || type.equals(TypeEntity.WEIBO) ||
type.equals(TypeEntity.NEWSCONTENT) || type.equals(TypeEntity.REPOST) ||
type.equals(TypeEntity.STORYDETAILPAGE)){ // 微博关键词微博大V新闻主贴论坛海外社交主贴
type.equals(TypeEntity.STORYDETAILPAGE) ||type.equals(TypeEntity.BBSPOST)|| type.equals("bbspost_comment")){ // 微博关键词微博大V新闻主贴论坛海外社交主贴
if(bbsifcontent == 1){
fixFieldMap.put("primary", 1);
fixFieldMap.put("primaryPost", "1");
}else{
fixFieldMap.put("primary", 0);
fixFieldMap.put("primaryPost", "0");
fixFieldMap.put("sign", 2); // 2=评论1=转发
//fixFieldMap.put("sign", 2); // 2=评论1=转发
}
}else if (type.contains("user")){
fixFieldMap.put("primary",2);
fixFieldMap.put("primaryPost", "2");
}else{
}else if (type.equals(TypeEntity.ECCOMMENT)){
fixFieldMap.put("primary",0);
fixFieldMap.put("primaryPost", "0");
}else if (type.equals(TypeEntity.ECCONTENT)){
fixFieldMap.put("primary",1);
fixFieldMap.put("primaryPost", "5");
}else if (type.equals(TypeEntity.SOCIALCOMMENT)){
fixFieldMap.put("primary",0);
fixFieldMap.put("sign", 2);
}
else if (type.equals(TypeEntity.SOCAILFOLLOW)){
fixFieldMap.put("primary",0);
fixFieldMap.put("sign", 1);
}else if (type.equals(TypeEntity.SOCAILFANS)){
fixFieldMap.put("primary",2);
fixFieldMap.put("sign", 2);//2 粉丝 1用户
}
else{
fixFieldMap.put("primary", 0);
fixFieldMap.put("primaryPost", "0");
if(type.equals(TypeEntity.REPOST)){ // 转发
@ -75,6 +91,12 @@ public class ParentExctendType {
// 数据类型 站点相关 相关
fixFieldMap.put("docType", MfDoctypeInfo.docTypeInfos.get(cid));
fixFieldMap.put("channel", MfChannelInfo.channelInfoMap.get(cid));
fixFieldMap.put("channelNum", MfChannelInfo.channelnumInfoMap.get(cid));
if("".equals(dns)||StringUtils.isBlank(dns)) {
fixFieldMap.put("dns", MfChannelInfo.domain_nameInfoMap.get(cid));
}
fixFieldMap.put("contentTag","nomal");
// 商情后台打标预留的字段
@ -98,9 +120,9 @@ public class ParentExctendType {
int chunkId, ParralleData pd, String type, String projectName, String cid){
try {
JSONObject dataMap = JSONObject.fromObject(fieldValue);
// System.out.println(dataMap);
Set<String> keySet = dataMap.keySet();
for(String colKey: keySet){
//WriteMethod.writeMethod("10b2.txt",colKey);
Object colValue = dataMap.get(colKey) ;
if(kafkaJsonString.contains(colKey)){
try{
@ -151,10 +173,18 @@ public class ParentExctendType {
//LOG.info("[ParentExctendType] installData : cid = " + cid + " ; data = " + JsonUtils.toJSONString(data));
if (data.size() > 0) {
Map<String, String> newmap = new HashMap<String, String>(data);
Map<String, Object> time = new HashMap<>();
String title = "";
String primary ="";
if(newmap.containsKey("primary")){
primary = newmap.get("primary");
}
//= newmap.get("primary");
if(newmap.containsKey("title")){
title = newmap.get("title");
}
time.put("sendreadtime",System.currentTimeMillis());
String content = "";
if(newmap.containsKey("content")) {
content = newmap.get("content");
@ -165,120 +195,173 @@ public class ParentExctendType {
newmap.put("titleSimHash", newmap.get("contentSimHash"));
// 调用之前替换掉 content 中乱七八糟的符号之类的
}
if (StringUtils.isNotBlank(content)) {
String pubTime=newmap.get("pubTime");
//System.out.print("发表时间是啥啊"+newmap.get("pubTime")); //System.out.print("发表时间是啥啊"+newmap.get("pubTime"));
Integer contentLength= Integer.valueOf(newmap.get("contentLength"));
time.put("beginsentiment",System.currentTimeMillis());
if (StringUtils.isNotBlank(content)&&"1".equals(primary)) {
newmap = callhlKeyword(iscallhlk, content, newmap); //hlKeywords & sysKeywords 提取
newmap = callsysAbstract(content, newmap); // sysAbstract 提取
newmap = callOpinions(content, newmap); // 词云-评价 提取
newmap = callPlace(title,content,newmap); // 词云-地点 提取
try {
SentimentApiUtils sentimentApiUtils = new SentimentApiUtils();
Double sentiment = sentimentApiUtils.getSentimentValue(content);
newmap.put("sysSentiment",sentiment.toString());
} catch (Exception e) {
e.printStackTrace();
}
}
content = StringFilter(content);
try {
//if (pubTime.compareTo("1601481600866")<=0&& pubTime.compareTo("1609430399866")>=0){
this.callPhrase(title, content, newmap, data.get("docType")); // 长文本处理
//}
// 如果网络不同不能调用文本相关的结果
} catch (Exception e) {
e.printStackTrace();
}
}
newmap = typeIsKeyword(type,newmap,title,content);
newmap = aboutAddress(newmap);
time.put("endsentiment",System.currentTimeMillis());
time.put("sbeginsentkafka",System.currentTimeMillis());
if(!newmap.containsKey("isDownload")){
newmap.put("isDownload","false");
}
if(!newmap.containsKey("_id_") && newmap.containsKey("dataId")){
newmap.put("_id_",MfMD5Util.GetMD5Code(newmap.get("dataId")));
}
if(newmap.containsKey("docType") && newmap.get("docType").toString().equals("item")){
newmap.put("primary","1");
newmap.remove("primaryPost");
}
// if(newmap.containsKey("docType") && newmap.get("docType").toString().equals("item")){
// newmap.put("primary","1");
// newmap.remove("primaryPost");
// }
if(type.contains("comment") || type.contains("socialComment")){
//System.out.println("评论数据哦,docId = " + newmap.get("docId") + " == "+newmap.get("content") + " ----- " + newmap.get("postId") + " -- "+newmap.get("commentId"));
String dataId = cid+"#"+newmap.get("docId")+"#"+newmap.get("pubTime")+"#"+newmap.get("author")+"#"+newmap.get("content");
newmap.put("dataId",MfMD5Util.GetMD5Code(dataId));
newmap.put("_id_",MfMD5Util.GetMD5Code(dataId));
}
JSONObject jsonObject = JSONObject.fromObject(newmap);
if(!newmap.containsKey("docId")){
WriteMethod.writeMethod("error.txt", jsonObject.toString());
}
WriteMethod.writeMethod("result.txt", jsonObject.toString());
// System.out.println("kafkaServerName :" + kafkaServerName + " | " + "kafkaTopic : " +KafkaTopic + " | suffixName: " + kafkaSuffixName );
KfkProducer.getInstance(kafkaServerName,KafkaTopic).send(KafkaTopic+"_"+kafkaSuffixName, jsonObject.toString());
}
else if(type.contains("socialFans")){
String dataId = cid+"#"+newmap.get("docId")+"#"+newmap.get("pubTime")+"#"+newmap.get("forwardUserId")+"#"+newmap.get("forwardUrl");
newmap.put("dataId",MfMD5Util.GetMD5Code(dataId));
newmap.put("_id_",MfMD5Util.GetMD5Code(dataId));
}
else if(type.contains("socialFollow")){
String dataId = cid+"#"+newmap.get("docId")+"#"+newmap.get("pubTime")+"#"+newmap.get("authorId")+"#"+newmap.get("forwardUrl");
newmap.put("dataId",MfMD5Util.GetMD5Code(dataId));
newmap.put("_id_",MfMD5Util.GetMD5Code(dataId));
}
public void installData(int kafkaServerName,String KafkaTopic,String kafkaSuffixName,List<Map<String, String>> datas,
String type, String cid) {
LOG.debug("ParentExctendType installData >>> start install data!!!!!");
for (Map<String, String> data : datas) {
//LOG.info("[ParentExctendType] installData : cid = " + cid + " ; data = " + JsonUtils.toJSONString(data));
if (data.size() > 0) {
Map<String, String> newmap = new HashMap<String, String>(data);
String title = "";
if(newmap.containsKey("title")){
title = newmap.get("title");
}
String content = "";
if(newmap.containsKey("content")) {
content = newmap.get("content");
if (!newmap.containsKey("title")) {
title = content;
newmap.put("title", content);
newmap.put("titleLength", newmap.get("contentLength"));
newmap.put("titleSimHash", newmap.get("contentSimHash"));
// 调用之前替换掉 content 中乱七八糟的符号之类的
}
// if (StringUtils.isNotBlank(content)) {
// newmap = callhlKeyword(iscallhlk, content, newmap); //hlKeywords & sysKeywords 提取
// newmap = callsysAbstract(content, newmap); // sysAbstract 提取
// newmap = callOpinions(content, newmap); // 词云-评价 提取
// }
// content = StringFilter(content);
// this.callPhrase(title, content, newmap, data.get("docType"));
// 如果网络不同不能调用文本相关的结果
time.put("sendsentkafka",System.currentTimeMillis());
if(newmap.containsKey("processtime")){
// String dataMare = newmap.get("processtime");
try {
Map<String, Object> datare = (Map<String, Object>) JsonUtils.parseObject((String)newmap.get("processtime"));
datare.putAll(time);
newmap.put("processtime",JsonUtils.toJSONString(datare));
} catch (Exception e) {
e.printStackTrace();
}
newmap = typeIsKeyword(type,newmap,title,content);
newmap = aboutAddress(newmap);
if(!newmap.containsKey("isDownload")){
newmap.put("isDownload","false");
}
if(!newmap.containsKey("_id_")){
newmap.put("_id_",MfMD5Util.GetMD5Code(newmap.get("dataId")));
}
JSONObject jsonObject = JSONObject.fromObject(newmap);
if(type.contains("comment")){
System.out.println("评论数据哦,docId = " + newmap.get("docId") + " == "+newmap.get("content"));
String dataId = cid+"#"+newmap.get("docId")+"#"+newmap.get("pubTime")+"#"+newmap.get("author")+"#"+newmap.get("content");
newmap.put("dataId",MfMD5Util.GetMD5Code(dataId));
newmap.put("_id_",MfMD5Util.GetMD5Code(dataId));
if(!newmap.containsKey("docId")){
WriteMethod.writeMethod("error.txt", jsonObject.toString());
}
Map<String, List<Map<String,String>>> subjectTaskMap = SubjectTask.subjectTaskMap;
String crawlDataFlag = newmap.get("crawlDataFlag");
if(subjectTaskMap.containsKey(crawlDataFlag)){
System.out.println("----- " + subjectTaskMap.get(crawlDataFlag));
}
if(newmap.containsKey("pubTimeStr")){
String pubTimeStr = newmap.get("pubTimeStr");
String indexName = "cl_index_" + pubTimeStr.split(" ")[0].trim();
newmap.put("indexName",indexName);
}
//System.out.println("+ kafkaServerName + "+jsonObject.toString().length()+"========");
JSONObject jsonObject = JSONObject.fromObject(newmap);
WriteMethod.writeMethod("result.txt", jsonObject.toString());
try {
if(jsonObject.toString().length()<104000000){
KfkProducer.getInstance(kafkaServerName,KafkaTopic).send(KafkaTopic+"_"+kafkaSuffixName, jsonObject.toString());
}else {
// System.out.println ("=========================================bomms沙卡拉卡==========================================================================");
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
}
// public void installData(int kafkaServerName,String KafkaTopic,String kafkaSuffixName,List<Map<String, String>> datas,
// String type, String cid) {
// LOG.debug("ParentExctendType installData >>> start install data!!!!!");
// for (Map<String, String> data : datas) {
// //LOG.info("[ParentExctendType] installData : cid = " + cid + " ; data = " + JsonUtils.toJSONString(data));
// if (data.size() > 0) {
// Map<String, String> newmap = new HashMap<String, String>(data);
// String title = "";
// if(newmap.containsKey("title")){
// title = newmap.get("title");
// }
//
// String content = "";
// if(newmap.containsKey("content")) {
// content = newmap.get("content");
// if (!newmap.containsKey("title")) {
// title = content;
// newmap.put("title", content);
// newmap.put("titleLength", newmap.get("contentLength"));
// newmap.put("titleSimHash", newmap.get("contentSimHash"));
// // 调用之前替换掉 content 中乱七八糟的符号之类的
// }
//// if (StringUtils.isNotBlank(content)) {
//// newmap = callhlKeyword(iscallhlk, content, newmap); //hlKeywords & sysKeywords 提取
//// newmap = callsysAbstract(content, newmap); // sysAbstract 提取
//// newmap = callOpinions(content, newmap); // 词云-评价 提取
//// }
//// content = StringFilter(content);
//// this.callPhrase(title, content, newmap, data.get("docType"));
// // 如果网络不同不能调用文本相关的结果
// }
// newmap = typeIsKeyword(type,newmap,title,content);
// newmap = aboutAddress(newmap);
//
//
// if(!newmap.containsKey("isDownload")){
// newmap.put("isDownload","false");
// }
// if(!newmap.containsKey("_id_")){
// newmap.put("_id_",MfMD5Util.GetMD5Code(newmap.get("dataId")));
// }
//
// if(type.contains("comment")){
// System.out.println("评论数据哦,docId = " + newmap.get("docId") + " == "+newmap.get("content"));
// String dataId = cid+"#"+newmap.get("docId")+"#"+newmap.get("pubTime")+"#"+newmap.get("author")+"#"+newmap.get("content");
// newmap.put("dataId",MfMD5Util.GetMD5Code(dataId));
// newmap.put("_id_",MfMD5Util.GetMD5Code(dataId));
// }
//
// Map<String, List<Map<String,String>>> subjectTaskMap = SubjectTask.subjectTaskMap;
// String crawlDataFlag = newmap.get("crawlDataFlag");
// if(subjectTaskMap.containsKey(crawlDataFlag)){
// System.out.println("----- " + subjectTaskMap.get(crawlDataFlag));
// }
//
// if(newmap.containsKey("pubTimeStr")){
// String pubTimeStr = newmap.get("pubTimeStr");
// String indexName = "cl_index_" + pubTimeStr.split(" ")[0].trim();
// newmap.put("indexName",indexName);
// }
//
// JSONObject jsonObject = JSONObject.fromObject(newmap);
// //WriteMethod.writeMethod("result.txt", jsonObject.toString());
// //KfkProducer.getInstance(kafkaServerName,KafkaTopic).send(KafkaTopic+"_"+kafkaSuffixName, jsonObject.toString());
// }
// }
// }
private Map<String,String> aboutAddress(Map<String, String> newmap) {
try {
if (AreaCategoryMappingUtils.set.size() > 0) {

224
cl_stream_service/src/main/java/com/bfd/mf/service/extendType/ParralleData.java

@ -48,8 +48,21 @@ public class ParralleData {
}
public void addData(int chunkId, String dataName, String value, String type, String projectName, String cid) {
WriteMethod.writeMethod("10b.txt",chunkId + " === " + dataName + " === " + value + " === " + cid);
String doctype = MfDoctypeInfo.docTypeInfos.get(cid);
// WriteMethod.writeMethod("10b.txt",chunkId + " === " + dataName + " === " + value + " === " + cid);
try {
String doctype="";
if(cid.contains(":baidu")||cid.contains(":google")){
doctype ="search";
List<String> valueList = new ArrayList<String>();
valueList.add("搜索引擎");
this.addData(chunkId, "channel", valueList);
List<String> doctypevalueList = new ArrayList<String>();
doctypevalueList.add("search");
this.addData(chunkId, "docType", doctypevalueList);
cid=cid.split(":")[0];
}else{
doctype = MfDoctypeInfo.docTypeInfos.get(cid);}
if(dataName.equals("crawlTime")){
List<String> valueList = new ArrayList<String>();
valueList.add(DataCheckUtil.convertStringTotime(value));
@ -83,9 +96,20 @@ public class ParralleData {
}
valueList.add(value);
valueList2.add(DateUtil.getDateTime(Long.valueOf(value)));
}else if(StringUtils.isBlank(value)){
valueList.add("0");
valueList2.add("");
}
else if(value.equals("0")&&type.equals("socialFollow")){
if(value.length() == 10){
value = Long.valueOf(value) * 1000 +"";
}else{
value = Long.valueOf(value) +"";
}
valueList.add(value);
valueList2.add(DateUtil.getDateTime(Long.valueOf(value)));
}
else if(StringUtils.isBlank(value)){
value=DateUtil.getbeforeHour();//当前时间减去1小时
valueList.add(DateUtil.getbeforonecurr()+"");
valueList2.add(value);
}
this.addData(chunkId, "pubTime", valueList);
this.addData(chunkId, dataName, valueList2);
@ -242,70 +266,77 @@ public class ParralleData {
this.addData(chunkId, "postId", postIdList);
if (type.equals("userInfoPage")){
this.addData(chunkId, "authorId", postIdList);
}else if(type.equals("socialFans")){
this.addData(chunkId, "authorId", postIdList);
}
String docId = cid+"#"+value;
List<String> docIdList = new ArrayList<String>() ;
docIdList.add("bfd_"+doctype+"_"+MfMD5Util.GetMD5Code(docId));
this.addData(chunkId,"docId",docIdList);
List<String> dataiIdList = new ArrayList<>();
dataiIdList.add(MfMD5Util.GetMD5Code(docId));
this.addData(chunkId,"_id_",dataiIdList);
List<String> dataIdList = new ArrayList<>();
dataIdList.add(MfMD5Util.GetMD5Code("bfd_"+doctype+"_"+MfMD5Util.GetMD5Code(docId)));
this.addData(chunkId,"dataId",dataIdList);
this.addData(chunkId,"_id_",dataIdList);
} else if(dataName.equals("attitudesCount")){
List<String> attitudesCountList ;
if(type.equals(TypeEntity.STORYDETAILPAGE)){
if(StringUtils.isNotBlank(value)){
JSONObject dataMap = JSONObject.fromObject(value);
if(dataMap.containsKey("totalCount")){
attitudesCountList = new ArrayList<String>();
String totalCount = dataMap.get("totalCount").toString();
attitudesCountList.add(totalCount);
this.addData(chunkId, dataName,attitudesCountList);
}
if(dataMap.containsKey("likeCount")){
attitudesCountList = new ArrayList<String>();
String likeCount = dataMap.get("likeCount").toString();
attitudesCountList.add(likeCount);
this.addData(chunkId, "firstListBrand",attitudesCountList);
}
if(dataMap.containsKey("loveCount")){
attitudesCountList = new ArrayList<String>();
String loveCount = dataMap.get("loveCount").toString();
attitudesCountList.add(loveCount);
this.addData(chunkId, "secondListBrand",attitudesCountList);
}
if(dataMap.containsKey("hahaCount")){
attitudesCountList = new ArrayList<String>();
String hahaCount = dataMap.get("hahaCount").toString();
attitudesCountList.add(hahaCount);
this.addData(chunkId, "threeListBrand",attitudesCountList);
}
if(dataMap.containsKey("angryCount")){
attitudesCountList = new ArrayList<String>();
String angryCount = dataMap.get("angryCount").toString();
attitudesCountList.add(angryCount);
this.addData(chunkId, "fourListBrand",attitudesCountList);
}
if(dataMap.containsKey("wowCount")){
attitudesCountList = new ArrayList<String>();
String wowCount = dataMap.get("wowCount").toString();
attitudesCountList.add(wowCount);
this.addData(chunkId, "fiveListBrand",attitudesCountList);
}
if(dataMap.containsKey("sadCount")){
attitudesCountList = new ArrayList<String>();
String sadCount = dataMap.get("sadCount").toString();
attitudesCountList.add(sadCount);
this.addData(chunkId, "listBrand",attitudesCountList);
}
}
}else{
// if(type.equals(TypeEntity.STORYDETAILPAGE)){
// if(StringUtils.isNotBlank(value)){
// JSONObject dataMap = JSONObject.fromObject(value);
// if(dataMap.containsKey("totalCount")){
// attitudesCountList = new ArrayList<String>();
// String totalCount = dataMap.get("totalCount").toString();
// attitudesCountList.add(totalCount);
// this.addData(chunkId, dataName,attitudesCountList);
// }
// if(dataMap.containsKey("likeCount")){
// attitudesCountList = new ArrayList<String>();
// String likeCount = dataMap.get("likeCount").toString();
// attitudesCountList.add(likeCount);
// this.addData(chunkId, "firstListBrand",attitudesCountList);
// }
// if(dataMap.containsKey("loveCount")){
// attitudesCountList = new ArrayList<String>();
// String loveCount = dataMap.get("loveCount").toString();
// attitudesCountList.add(loveCount);
// this.addData(chunkId, "secondListBrand",attitudesCountList);
// }
// if(dataMap.containsKey("hahaCount")){
// attitudesCountList = new ArrayList<String>();
// String hahaCount = dataMap.get("hahaCount").toString();
// attitudesCountList.add(hahaCount);
// this.addData(chunkId, "threeListBrand",attitudesCountList);
// }
// if(dataMap.containsKey("angryCount")){
// attitudesCountList = new ArrayList<String>();
// String angryCount = dataMap.get("angryCount").toString();
// attitudesCountList.add(angryCount);
// this.addData(chunkId, "fourListBrand",attitudesCountList);
// }
// if(dataMap.containsKey("wowCount")){
// attitudesCountList = new ArrayList<String>();
// String wowCount = dataMap.get("wowCount").toString();
// attitudesCountList.add(wowCount);
// this.addData(chunkId, "fiveListBrand",attitudesCountList);
// }
// if(dataMap.containsKey("sadCount")){
// attitudesCountList = new ArrayList<String>();
// String sadCount = dataMap.get("sadCount").toString();
// attitudesCountList.add(sadCount);
// this.addData(chunkId, "listBrand",attitudesCountList);
// }
// }
// attitudesCountList = new ArrayList<String>();
// attitudesCountList.add(value) ;
// this.addData(chunkId, dataName,attitudesCountList);
// }
// else{
attitudesCountList = new ArrayList<String>();
attitudesCountList.add(value) ;
this.addData(chunkId, dataName,attitudesCountList);
}
} else if(dataName.equals("projectName")){
List<String> projectNameList = new ArrayList<String>() ;
if(type.startsWith("bbs")){
@ -326,51 +357,68 @@ public class ParralleData {
List<String> valueList = new ArrayList<String>();
valueList.add(value);
this.addData(chunkId, dataName, valueList);
} else if (dataName.equals("filePath")){
List<String> valueList = new ArrayList<String>();
List<String> valueList2 = new ArrayList<String>();
if (value!= null && value.length()!= 0){
if(value instanceof String){
valueList.add(value);
this.addData(chunkId, "filePath", valueList);
}
valueList2.add("1");
this.addData(chunkId, "ugc", valueList2);
else if(dataName.equals("crawlDataFlag")){
if(value.contains(":")){
String crawlDataFlagtype=value.split(":")[0];
if(crawlDataFlagtype.equals("url")){
List<String> crawlDataFlagTypeList = new ArrayList<>();
crawlDataFlagTypeList.add("2");
this.addData(chunkId, "crawlDataFlagType", crawlDataFlagTypeList);
}
this.addData(chunkId, dataName, valueList);
}else if (dataName.equals("imagePath")){
List<String> valueList = new ArrayList<String>();
List<String> valueList2 = new ArrayList<String>();
if (value!= null && value.length()!= 0){
if(value instanceof String){
valueList.add(value);
this.addData(chunkId, "imagePath", valueList);
else if (crawlDataFlagtype.contains("account")){
List<String> crawlDataFlagTypeList = new ArrayList<>();
crawlDataFlagTypeList.add("1");
this.addData(chunkId, "crawlDataFlagType", crawlDataFlagTypeList);
}else if (crawlDataFlagtype.contains("keyword")){
List<String> crawlDataFlagTypeList = new ArrayList<>();
crawlDataFlagTypeList.add("0");
this.addData(chunkId, "crawlDataFlagType", crawlDataFlagTypeList);
}
valueList2.add("1");
this.addData(chunkId, "pgc", valueList2);
}
this.addData(chunkId, dataName, valueList);
}else if (dataName.equals("videoPath")){
System.out.print(value+"videoPath是是");
List<String> valueList = new ArrayList<String>();
List<String> valueList2 = new ArrayList<String>();
if (value!= null && value.length()!= 0){
if(value instanceof String){
valueList.add(value);
System.out.println(valueList+"valuevalue是");
this.addData(chunkId, "videoPath", valueList);
this.addData(chunkId, "crawlDataFlag", valueList);
}
// else if(dataName.equals("channel")){
// System.out.println("channel是个是是是是是是"+dataName.equals("channel"));
// }
// else if (dataName.equals("filePath")){
// List<String> valueList = new ArrayList<String>();
// List<String> valueList2 = new ArrayList<String>();
// if (!value.toString().equals("[]")){
// valueList2.add("1");
// this.addData(chunkId, "ugc", valueList2);
// }
// // this.addData(chunkId, dataName, valueList);
// }else if (dataName.equals("imagePath")){
// List<String> valueList = new ArrayList<String>();
// List<String> valueList2 = new ArrayList<String>();
// if (!value.toString().equals("[]")){
// valueList2.add("1");
// this.addData(chunkId, "pgc", valueList2);
// }
// //this.addData(chunkId, dataName, valueList);
// }else if (dataName.equals("videoPath")){
// List<String> valueList = new ArrayList<String>();
// List<String> valueList2 = new ArrayList<String>();
// if (!value.toString().equals("[]")){
// valueList2.add("1");
// this.addData(chunkId, "egc", valueList2);
// }
// //this.addData(chunkId, dataName, valueList);
// }
}
valueList2.add("1");
this.addData(chunkId, "egc", valueList2);
}
this.addData(chunkId, dataName, valueList);
}
else{
List<String> valueList = new ArrayList<String>();
valueList.add(value);
this.addData(chunkId, dataName, valueList);
}
} catch (NumberFormatException e) {
e.printStackTrace();
LOG.error("value============"+value+"dataName"+dataName);
}
}
private String upperCase(String str) {

4
cl_stream_service/src/main/java/com/bfd/mf/service/kafka/IKafka.java

@ -4,8 +4,8 @@ import java.util.List;
public interface IKafka {
public void read();
public void read(String readTopicName);
public void read(String readTopicName, String groupId);
// public void read(String readTopicName);
// public void read(String readTopicName, String groupId);
public void write(int kafakSerName, List<String> data, String writeTopicName);
public void stop();
}

21
cl_stream_service/src/main/java/com/bfd/mf/service/kafka/ReadKafka.java

@ -28,17 +28,20 @@ public class ReadKafka implements IKafka{
public void read(){
KfkConsumer.startReadThread(this.queue, this.defaultReadTopicName,this.threadNums,this.groupId,this.kafkaServerName);
System.out.println("++++++++++++++++"+this.queue.size()+"==========================="+this.defaultReadTopicName);
}
@Override
public void read(String readTopicName) {
KfkConsumer.startReadThread(this.queue, readTopicName,this.threadNums,this.groupId,this.kafkaServerName);
}
@Override
public void read(String readTopicName, String groupId) {
KfkConsumer.startReadThread(this.queue, readTopicName,this.threadNums,groupId,this.kafkaServerName);
}
// @Override
// public void read(String readTopicName) {
// System.out.println("++++++++++++++++"+readTopicName+"===========================");
// KfkConsumer.startReadThread(this.queue, readTopicName,this.threadNums,this.groupId,this.kafkaServerName);
//
// }
//
// @Override
// public void read(String readTopicName, String groupId) {
// KfkConsumer.startReadThread(this.queue, readTopicName,this.threadNums,groupId,this.kafkaServerName);
// }
@Override
public void write(int kafakSerName, List<String> data,String writeTopicName) {

41
cl_stream_service/src/main/java/com/bfd/mf/service/listen/ListenKafkaManager.java

@ -7,15 +7,12 @@ import com.bfd.mf.service.extendType.ForegroundExtendType;
import com.bfd.mf.service.kafka.ReadKafka;
import com.bfd.mf.service.tools.DateUtil;
import java.util.concurrent.LinkedBlockingDeque;
import java.util.concurrent.SynchronousQueue;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.*;
public class ListenKafkaManager implements Runnable{
private LinkedBlockingDeque<String> queue= new LinkedBlockingDeque<String>(5000);
private LinkedBlockingDeque<String> queue= new LinkedBlockingDeque<String>(10000);
private boolean isRun = true;
@ -29,13 +26,13 @@ public class ListenKafkaManager implements Runnable{
public ListenKafkaManager(FieldNormaliz fieldNormaliz){
String kafkaname = fieldNormaliz.getKafkaName() ;
int croePoolsize = 30 ;
int maximumPoolsize = 60;
int croePoolsize = 20 ;
int maximumPoolsize = 100;
long keepAliveTime = 0;
this.spiderPoolExec = new ThreadPoolExecutor(croePoolsize, maximumPoolsize, keepAliveTime, TimeUnit.SECONDS, new SynchronousQueue<Runnable>());
this.fieldNormaliz = fieldNormaliz ;
this.kfkProducer = KfkProducer.getInstance(fieldNormaliz.getKafkaSerName(),kafkaname+"_err");
ReadKafka readKafka = new ReadKafka(queue , kafkaname ,10, fieldNormaliz.getGroupId(), fieldNormaliz.getKafkaSerName(),fieldNormaliz.getEsSerName());
ReadKafka readKafka = new ReadKafka(queue , kafkaname ,12, fieldNormaliz.getGroupId(), fieldNormaliz.getKafkaSerName(),fieldNormaliz.getEsSerName());
readKafka.read();
}
@ -43,30 +40,50 @@ public class ListenKafkaManager implements Runnable{
@Override
public void run() {
while(isRun){
//System.out.println("+=+=+=+=+=+=+=+=+=+=++++++"+this.queue.size());
if(this.queue.size() < 1){
DateUtil.sleep(1);
continue;
}
String data = this.queue.poll();
if(data == null) continue ;
addTask(data);
if (data.equals("__Exit__")) break ;
this.addTask(data);
addNum++;
}
}
public int getReadKafkaNum(){
System.out.print(addNum+"addmummaaaa ");
return addNum ;
}
private void addTask(String data){
while (spiderPoolExec.getPoolSize() >= spiderPoolExec.getMaximumPoolSize() || spiderPoolExec.getActiveCount() >= spiderPoolExec.getMaximumPoolSize()) {
while(this.spiderPoolExec.getPoolSize() >= this.spiderPoolExec.getMaximumPoolSize() || this.spiderPoolExec.getActiveCount() >= this.spiderPoolExec.getMaximumPoolSize()) {
try {
Thread.sleep(200);
System.out.println("+=+=+=+=+=+=+=+=+=+=++++++"+this.queue.size());
System.out.println("线程满了啊"+spiderPoolExec.getPoolSize()+"最大线程数"+spiderPoolExec.getMaximumPoolSize()+"现有的线程数"+spiderPoolExec.getActiveCount());
System.out.println("线程满了啊");
Thread.sleep(2000);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
spiderPoolExec.submit(new ForegroundExtendType(data, fieldNormaliz, kfkProducer));
try {
this.spiderPoolExec.submit(new ForegroundExtendType(data, fieldNormaliz, kfkProducer));
} catch (Exception e) {
e.printStackTrace();
}
// try {
// Future future=this.spiderPoolExec.submit(new ForegroundExtendType(data, fieldNormaliz, kfkProducer));
// future.get();
// } catch (Exception e) {
// System.out.println("线程异常了");
// e.printStackTrace();
// }
}
public void setSwitch(boolean flag){

10
cl_stream_service/src/main/java/com/bfd/mf/service/listen/ListenTaskManager.java

@ -28,9 +28,15 @@ public class ListenTaskManager {
listenkafkaTopicThreadObj.add(esSerName+"#"+kafkaServerName+"#"+kafkaTopicName);
fieldNormaliz.setFieldDataMap(MfFieldInfo.fieldNormalizeInfoMap);
fieldNormaliz.setFieldInfo(MfFieldType.fieldStringTypes);
System.out.println("@@@@@@@@@@ " + JsonUtils.toJSONString(fieldNormaliz));
ListenKafkaManager listenKafkaManager = new ListenKafkaManager(fieldNormaliz);
//System.out.println("@@@@@@@@@@ " + JsonUtils.toJSONString(fieldNormaliz));
ListenKafkaManager listenKafkaManager = null;
try {
listenKafkaManager = new ListenKafkaManager(fieldNormaliz);
new Thread(listenKafkaManager).start();
} catch (Exception e) {
System.out.println("线程异常了啊啊啊啊啊啊啊啊啊啊啊啊");
e.printStackTrace();
}
listenKafkaManagers.put(kafkaTopicName, listenKafkaManager);
}else{
LOG.debug("[ListenTaskManager] addKafkaTopicListen >>> kafkaTopicName :"+kafkaTopicName +" 任务已经存在");

3
cl_stream_service/src/main/java/com/bfd/mf/service/tools/DataCheckUtil.java

@ -217,8 +217,9 @@ public class DataCheckUtil {
}
public static String getCurrentTime(){
long dateTime = System.currentTimeMillis() ;
SimpleDateFormat ddf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
return ddf.format(new Date());
return ddf.format(new Date(dateTime));
}
public static String getCurrentTime(long dateTime){

38
cl_stream_service/src/main/java/com/bfd/mf/service/tools/DateUtil.java

@ -861,6 +861,44 @@ public class DateUtil {
}
}
public static long getday(){
SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
Calendar calendar = Calendar.getInstance();
try {
return dateFormat.parse(dateFormat.format(calendar.getTime())).getTime();
} catch (ParseException e) {
return 0L;
}
}
/**
* 返回当前时间日期减去一个小时
*/
public static String getbeforeHour(){
try{
Calendar calendar = Calendar.getInstance();
calendar.setTime(new Date());
calendar.set(Calendar.HOUR, calendar.get(Calendar.HOUR) - 1);// 当前时间减去1小时
SimpleDateFormat date = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
date.format(calendar.getTime());
return date.format(calendar.getTime());
} catch(Exception e){
log.debug("DateUtil.addDay():" + e.toString());
return "";
}
}
//获取一个小时之前的时间戳
public static long getbeforonecurr(){
try {
Date date = new Date();
Long l_date = date.getTime();
return l_date-60*60*1000;
} catch (Exception e) {
return 0L;
// e.printStackTrace();
}
}
// public static void main(String[] args) {
// String flag = getDateTime(0);
// System.out.println(flag);

4
cl_stream_service/src/main/java/com/bfd/mf/service/tools/HttpClientUtil.java

@ -64,7 +64,7 @@ public class HttpClientUtil {
if(!"".equals(str)){
httpget.setURI(new URI(httpget.getURI().toString() + "?" + str));
}
System.out.println("executing request " + httpget.getURI());
//System.out.println("executing request " + httpget.getURI());
for(String key: headers.keySet()){
httpget.setHeader(key,headers.get(key).toString());
}
@ -75,7 +75,7 @@ public class HttpClientUtil {
// 获取响应实体
HttpEntity entity = response.getEntity();
// 响应状
System.out.println(response.getStatusLine());
//System.out.println(response.getStatusLine());
result.put("code", response.getStatusLine().getStatusCode());
if (entity != null) {
String content = EntityUtils.toString(entity);

2
cl_stream_service/src/main/java/com/bfd/mf/service/tools/RoundRobinJedisPool.java

@ -64,7 +64,7 @@ public class RoundRobinJedisPool implements JedisResourcePool {
private static final int CURATOR_RETRY_BASE_SLEEP_MS = 100;
private static final int CURATOR_RETRY_MAX_SLEEP_MS = 30 * 1000;
private static final int CURATOR_RETRY_MAX_SLEEP_MS = 30 * 10000;
private static final int JEDIS_POOL_TIMEOUT_UNSET = -1;

6
cl_stream_service/src/main/java/com/bfd/mf/service/utils/SentimentApiUtils.java

@ -30,12 +30,16 @@ public class SentimentApiUtils {
params.put("sentiment", "0");
lists.add(params);
try {
result = HttpClientUtil.httpPost(apiUrl, lists);
} catch (Exception e) {
e.printStackTrace();
}
//System.out.println(result);
try {
List<Map<String, Object>> results = (List<Map<String, Object>>) JsonUtils.parseArray(result);
double score = Double.valueOf(results.get(0).get("sentiment").toString());
// long b = System.currentTimeMillis();
long b = System.currentTimeMillis();
// System.out.println(b-a);
return score ;
} catch (Exception e) {

2
cl_stream_service/src/main/java/com/bfd/mf/service/utils/WordCloudApiUtils.java

@ -14,7 +14,7 @@ public class WordCloudApiUtils {
datanews.put("content",content);
String result = HttpClientUtil.httpPost(apiUrl, datanews);
Map<String,Object> resultMap = JSONObject.parseObject(result);
System.out.println(resultMap);
// System.out.println(resultMap);
return resultMap.toString();
}
}

1036
dataSaveManager/dataSaveManager.iml
File diff suppressed because it is too large
View File

1034
serviceManager/serviceManager.iml
File diff suppressed because it is too large
View File

Loading…
Cancel
Save