diff --git a/cl_stream_datasave/cl_stream_datasave-2.0-SNAPSHOT.jar b/cl_stream_datasave/cl_stream_datasave-2.0-SNAPSHOT.jar
deleted file mode 100644
index 6fbc644..0000000
Binary files a/cl_stream_datasave/cl_stream_datasave-2.0-SNAPSHOT.jar and /dev/null differ
diff --git a/cl_stream_datasave/cl_stream_datasave.iml b/cl_stream_datasave/cl_stream_datasave.iml
index fdd6fc8..c7198d5 100644
--- a/cl_stream_datasave/cl_stream_datasave.iml
+++ b/cl_stream_datasave/cl_stream_datasave.iml
@@ -9,93 +9,22 @@
-
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
@@ -107,7 +36,6 @@
-
diff --git a/cl_stream_datasave/foreground b/cl_stream_datasave/foreground
index b71826e..5db8751 100644
--- a/cl_stream_datasave/foreground
+++ b/cl_stream_datasave/foreground
@@ -543,5 +543,15 @@
},
"otherSourceJson":{
"type":"keyword"
+ },
+ "dns":{
+ "type":"keyword"
+ },
+ "asrText":{
+ "type":"keyword"
+ },
+ "ocrText":{
+ "type":"keyword"
}
+
}
\ No newline at end of file
diff --git a/cl_stream_datasave/pom.xml b/cl_stream_datasave/pom.xml
index 52dc113..118a6a7 100644
--- a/cl_stream_datasave/pom.xml
+++ b/cl_stream_datasave/pom.xml
@@ -26,12 +26,51 @@
+
+
+
+
+
+
+
+
+ BfdRedisTools-2.0
+ BfdRedisTools-2.0
+ 1.0.0
+
+
+
slf4j-api
org.slf4j
1.7.22
+ redis.clients
+ jedis
+ 2.6.0
+
+
+ com.wandoulabs.jodis
+ jodis
+ 0.1.2
+
+
+ slf4j-api
+ org.slf4j
+
+
+ jedis
+ redis.clients
+
+
+
+
+ commons-lang
+ commons-lang
+ 2.4
+
+
com.bfd
elastiUtils
diff --git a/cl_stream_datasave/src/main/foreground b/cl_stream_datasave/src/main/foreground
new file mode 100644
index 0000000..5db8751
--- /dev/null
+++ b/cl_stream_datasave/src/main/foreground
@@ -0,0 +1,557 @@
+{
+ "commentUrl":{
+ "type":"text",
+ "fields":{
+ "keyword":{
+ "ignore_above":256,
+ "type":"keyword"
+ }
+ }
+ },
+ "channel":{
+ "type":"keyword"
+ },
+ "readCount":{
+ "type":"long"
+ },
+ "quoteCount":{
+ "type":"long"
+ },
+ "brand":{
+ "term_vector":"yes",
+ "type":"text",
+ "analyzer":"ik_smart",
+ "search_analyzer":"ik_smart",
+ "fields":{
+ "shingles":{
+ "type":"text",
+ "analyzer":"shingle_analyzer"
+ }
+ }
+ },
+ "brandId":{
+ "type":"keyword"
+ },
+ "createTimeStr":{
+ "type":"keyword"
+ },
+ "authornickname":{
+ "type":"text",
+ "fields":{
+ "keyword":{
+ "ignore_above":256,
+ "type":"keyword"
+ }
+ }
+ },
+ "contentSimHash":{
+ "type":"keyword"
+ },
+ "crawlDay":{
+ "type":"long"
+ },
+ "titleSimHash":{
+ "type":"keyword"
+ },
+ "commentId":{
+ "type":"text",
+ "fields":{
+ "keyword":{
+ "ignore_above":256,
+ "type":"keyword"
+ }
+ }
+ },
+ "originalPhrase":{
+ "type":"keyword"
+ },
+ "forwardContent":{
+ "analyzer":"ik_smart",
+ "type":"text",
+ "fields":{
+ "shingles":{
+ "analyzer":"shingle_analyzer",
+ "type":"text"
+ }
+ }
+ },
+ "finalPhrase":{
+ "type":"keyword"
+ },
+ "availability":{
+ "type":"integer"
+ },
+ "forwardUserId":{
+ "type":"keyword"
+ },
+ "forwardUserType":{
+ "type":"integer"
+ },
+ "forwardUserUrl":{
+ "type":"keyword"
+ },
+ "forwardAvatar":{
+ "type":"keyword"
+ },
+ "forwardImgs":{
+ "type":"keyword"
+ },
+ "forwardPostSource":{
+ "type":"keyword"
+ },
+ "forwardAttitudesCount":{
+ "type":"long"
+ },
+ "forwardCommentsCount":{
+ "type":"long"
+ },
+ "forwardQuoteCount":{
+ "type":"long"
+ },
+ "forwardPubTime":{
+ "type":"long"
+ },
+ "titleLength":{
+ "type":"long"
+ },
+ "forwardAuthor":{
+ "type":"text",
+ "fields":{
+ "keyword":{
+ "ignore_above":256,
+ "type":"keyword"
+ }
+ }
+ },
+ "sysAbstract":{
+ "analyzer":"ik_smart",
+ "type":"text"
+ },
+ "forwardUrl":{
+ "type":"keyword"
+ },
+ "createDate":{
+ "type":"date"
+ },
+ "docType":{
+ "type":"keyword"
+ },
+ "getSource":{
+ "type":"keyword"
+ },
+ "dataCount":{
+ "type":"integer"
+ },
+ "primary":{
+ "type":"integer"
+ },
+ "cate":{
+ "type":"keyword"
+ },
+ "sex":{
+ "type":"keyword"
+ },
+ "collectCount":{
+ "type":"long"
+ },
+ "crawlDate":{
+ "type":"date"
+ },
+ "avatar":{
+ "type":"keyword"
+ },
+ "url":{
+ "type":"keyword"
+ },
+ "skuProperties":{
+ "type":"text",
+ "fields":{
+ "keyword":{
+ "ignore_above":256,
+ "type":"keyword"
+ }
+ }
+ },
+ "expression":{
+ "type":"text",
+ "fields":{
+ "keyword":{
+ "ignore_above":256,
+ "type":"keyword"
+ }
+ }
+ },
+ "hashTag":{
+ "type":"text",
+ "fields":{
+ "keyword":{
+ "ignore_above":256,
+ "type":"keyword"
+ }
+ }
+ },
+ "places":{
+ "type":"text",
+ "fields":{
+ "keyword":{
+ "ignore_above":256,
+ "type":"keyword"
+ }
+ }
+ },
+ "opinions":{
+ "type":"text",
+ "fields":{
+ "keyword":{
+ "ignore_above":256,
+ "type":"keyword"
+ }
+ }
+ },
+ "hlKeywords":{
+ "type":"text",
+ "fields":{
+ "keyword":{
+ "ignore_above":256,
+ "type":"keyword"
+ }
+ }
+ },
+ "createTime":{
+ "type":"long"
+ },
+ "contentLength":{
+ "type":"integer"
+ },
+ "pubTime":{
+ "type":"long"
+ },
+ "fansCount":{
+ "type":"keyword"
+ },
+ "language":{
+ "type":"text",
+ "fields":{
+ "keyword":{
+ "ignore_above":256,
+ "type":"keyword"
+ }
+ }
+ },
+ "source":{
+ "type":"keyword"
+ },
+ "enSource":{
+ "type":"text",
+ "fields":{
+ "keyword":{
+ "ignore_above":256,
+ "type":"keyword"
+ }
+ }
+ },
+ "pictureList":{
+ "type":"text",
+ "fields":{
+ "keyword":{
+ "ignore_above":256,
+ "type":"keyword"
+ }
+ }
+ },
+ "userUrl":{
+ "type":"keyword"
+ },
+ "videoUrl":{
+ "type":"keyword"
+ },
+ "contentTag":{
+ "type":"text",
+ "fields":{
+ "keyword":{
+ "ignore_above":256,
+ "type":"keyword"
+ }
+ }
+ },
+ "author":{
+ "type":"keyword"
+ },
+ "authorId":{
+ "type":"keyword"
+ },
+ "authorLevel":{
+ "type":"keyword"
+ },
+ "sysSentiment":{
+ "type":"double"
+ },
+ "price":{
+ "type":"double"
+ },
+ "nomorprice":{
+ "type":"double"
+ },
+ "attitudesCount":{
+ "type":"keyword"
+ },
+ "createDay":{
+ "type":"long"
+ },
+ "postId":{
+ "type":"text",
+ "fields":{
+ "keyword":{
+ "ignore_above":256,
+ "type":"keyword"
+ }
+ }
+ },
+ "pubDate":{
+ "type":"date"
+ },
+ "sysKeywords":{
+ "type":"keyword"
+ },
+ "crawlTime":{
+ "type":"long"
+ },
+ "userType":{
+ "type":"text",
+ "fields":{
+ "keyword":{
+ "ignore_above":256,
+ "type":"keyword"
+ }
+ }
+ },
+ "projectName":{
+ "type":"keyword"
+ },
+ "lastModifiedTime":{
+ "type":"long"
+ },
+ "productParameter":{
+ "term_vector":"yes",
+ "type":"text",
+ "analyzer":"ik_smart",
+ "search_analyzer":"ik_smart",
+ "fields":{
+ "shingles":{
+ "type":"text",
+ "analyzer":"shingle_analyzer"
+ }
+ }
+ },
+ "docId":{
+ "type":"keyword"
+ },
+ "commentScore":{
+ "type":"long"
+ },
+ "urlHash":{
+ "type":"keyword"
+ },
+ "_id_":{
+ "type":"text",
+ "fields":{
+ "keyword":{
+ "ignore_above":256,
+ "type":"keyword"
+ }
+ }
+ },
+ "title":{
+ "term_vector":"yes",
+ "type":"text",
+ "analyzer":"ik_smart",
+ "search_analyzer":"ik_smart",
+ "fields":{
+ "shingles":{
+ "type":"text",
+ "analyzer":"shingle_analyzer"
+ }
+ }
+ },
+ "pageTranspondCount":{
+ "type":"text",
+ "fields":{
+ "keyword":{
+ "ignore_above":256,
+ "type":"keyword"
+ }
+ }
+ },
+ "pageCommentCount":{
+ "type":"text",
+ "fields":{
+ "keyword":{
+ "ignore_above":256,
+ "type":"keyword"
+ }
+ }
+ },
+ "content":{
+ "term_vector":"yes",
+ "type":"text",
+ "analyzer":"ik_smart",
+ "search_analyzer":"ik_smart",
+ "fields":{
+ "shingles":{
+ "type":"text",
+ "analyzer":"shingle_analyzer"
+ }
+ }
+ },
+ "pubDay":{
+ "type":"long"
+ },
+ "pubTimeStr":{
+ "type":"keyword"
+ },
+ "postSource":{
+ "type":"keyword"
+ },
+ "crawlTimeStr":{
+ "type":"keyword"
+ },
+ "postCount":{
+ "type":"keyword"
+ },
+ "friendsCount":{
+ "type":"keyword"
+ },
+ "commentsCount":{
+ "type":"long"
+ },
+ "favorCnt":{
+ "type":"long"
+ },
+ "viewCnt":{
+ "type":"long"
+ },
+ "downCnt":{
+ "type":"long"
+ },
+ "sign":{
+ "type":"keyword"
+ },
+ "isVip":{
+ "type":"integer"
+ },
+ "forumScore":{
+ "type":"keyword"
+ },
+ "impression":{
+ "type":"keyword"
+ },
+ "promotionInfo":{
+ "type":"keyword"
+ },
+ "smallImgs":{
+ "type":"keyword"
+ },
+ "listBrand":{
+ "term_vector":"yes",
+ "type":"text",
+ "analyzer":"ik_smart",
+ "search_analyzer":"ik_smart",
+ "fields":{
+ "shingles":{
+ "type":"text",
+ "analyzer":"shingle_analyzer"
+ }
+ }
+ },
+ "firstListBrand":{
+ "type":"keyword"
+ },
+ "secondListBrand":{
+ "type":"keyword"
+ },
+ "threeListBrand":{
+ "type":"keyword"
+ },
+ "fourListBrand":{
+ "type":"keyword"
+ },
+ "fiveListBrand":{
+ "type":"keyword"
+ },
+ "area":{
+ "type":"keyword"
+ },
+ "location":{
+ "type":"keyword"
+ },
+ "country":{
+ "type":"keyword"
+ },
+ "province":{
+ "type":"keyword"
+ },
+ "city":{
+ "type":"keyword"
+ },
+ "age":{
+ "type":"keyword"
+ },
+ "egc":{
+ "type":"integer"
+ },
+ "pgc":{
+ "type":"integer"
+ },
+ "ugc":{
+ "type":"integer"
+ },
+ "translateTitle":{
+ "type":"keyword"
+ },
+ "translateContent":{
+ "type":"keyword"
+ },
+ "filePath":{
+ "type":"keyword"
+ },
+ "resolution":{
+ "type":"keyword"
+ },
+ "extension":{
+ "type":"keyword"
+ },
+ "thumbnails":{
+ "type":"keyword"
+ },
+ "videoTime":{
+ "type":"keyword"
+ },
+ "isDownload":{
+ "type":"keyword"
+ },
+ "crawlDataFlag":{
+ "type":"keyword"
+ },
+ "attr":{
+ "type":"keyword"
+ },
+ "pageType":{
+ "type":"keyword"
+ },
+ "siteId":{
+ "type":"keyword"
+ },
+ "otherSourceJson":{
+ "type":"keyword"
+ },
+ "dns":{
+ "type":"keyword"
+ },
+ "asrText":{
+ "type":"keyword"
+ },
+ "ocrText":{
+ "type":"keyword"
+ }
+
+}
\ No newline at end of file
diff --git a/cl_stream_datasave/src/main/java/com/bfd/mf/datasave/download/DownLoadFile.java b/cl_stream_datasave/src/main/java/com/bfd/mf/datasave/download/DownLoadFile.java
index ed2e2d6..5c30703 100644
--- a/cl_stream_datasave/src/main/java/com/bfd/mf/datasave/download/DownLoadFile.java
+++ b/cl_stream_datasave/src/main/java/com/bfd/mf/datasave/download/DownLoadFile.java
@@ -3,32 +3,38 @@ package com.bfd.mf.datasave.download;
import com.alibaba.fastjson.JSONObject;
import okhttp3.*;
+import javax.imageio.ImageIO;
+import java.awt.image.BufferedImage;
import java.io.IOException;
+import java.io.InputStream;
+import java.net.URL;
import java.util.HashMap;
import java.util.Map;
public class DownLoadFile {
public static Map downloadAndSaveFile(String getUrl,String putUrl){
- String realUrl = "";Integer size;
+ String realUrl = "";double size;
Map realresult= new HashMap<>();
try{
String files [] = getUrl.split("/");
String fileName = getUrl.split("/")[files.length-1];
Map header = new HashMap<>();
header.put("User-Agent","Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.125 Safari/537.36");
- // header.put("Connection","close");
+ header.put("Connection","keep-alive");
try {
Map downloadresult = OkHttpUtils.doGetBytes(getUrl,header);
+ size= (double) downloadresult.get("size");
if (downloadresult.containsKey("content")){
byte[] content = (byte[]) downloadresult.get("content");
- size= (Integer) downloadresult.get("size");
+ size= (double) downloadresult.get("size");
Thread.sleep(4000);
String result = DownLoadFile.upload(putUrl,fileName,content);
-
- realUrl = JSONObject.parseObject(result).getString("url");
+ Thread.sleep(4000);
+ realUrl = JSONObject.parseObject(result).getString("src");
realresult.put("realUrl",realUrl);
realresult.put("size",size);
}
+
} catch (IOException e) {
e.printStackTrace();
}
@@ -66,4 +72,27 @@ public class DownLoadFile {
return result;
}
+ public static String imagesize(String getUrl ) throws IOException{
+ String realUrl = "";Integer size;
+ String realresult="";
+ try{
+ InputStream murl = new URL(getUrl).openStream();
+ BufferedImage sourceImg = ImageIO.read(murl);
+ int srcWidth = 0; // 源图宽度
+ int srcHeight = 0; // 源图高度
+ try {
+ srcWidth = sourceImg .getWidth();
+ srcHeight = sourceImg .getHeight();
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ realresult=Integer.toString(srcWidth)+"×"+ Integer.toString(srcHeight);
+
+ }catch (Exception e){
+ e.printStackTrace();
+ }
+ return realresult;
+ }
+
+
}
diff --git a/cl_stream_datasave/src/main/java/com/bfd/mf/datasave/download/NewsDownload.java b/cl_stream_datasave/src/main/java/com/bfd/mf/datasave/download/NewsDownload.java
new file mode 100644
index 0000000..d31374a
--- /dev/null
+++ b/cl_stream_datasave/src/main/java/com/bfd/mf/datasave/download/NewsDownload.java
@@ -0,0 +1,278 @@
+package com.bfd.mf.datasave.download;
+
+import com.bfd.crawler.utils.JsonUtils;
+
+import java.io.IOException;
+import java.util.*;
+
+public class NewsDownload {
+ private static String myGoFastAddr = "http://172.18.1.113:8080/upload";
+ public static void downloadAndSaveimage(Map resultMap,List