53 changed files with 3928 additions and 3133 deletions
-
BINcl_stream_datasave/cl_stream_datasave-2.0-SNAPSHOT.jar
-
98cl_stream_datasave/cl_stream_datasave.iml
-
10cl_stream_datasave/foreground
-
39cl_stream_datasave/pom.xml
-
557cl_stream_datasave/src/main/foreground
-
39cl_stream_datasave/src/main/java/com/bfd/mf/datasave/download/DownLoadFile.java
-
278cl_stream_datasave/src/main/java/com/bfd/mf/datasave/download/NewsDownload.java
-
9cl_stream_datasave/src/main/java/com/bfd/mf/datasave/download/OkHttpUtils.java
-
804cl_stream_datasave/src/main/java/com/bfd/mf/datasave/listen/DataSaveManager.java
-
10cl_stream_datasave/src/main/java/com/bfd/mf/datasave/listen/ListenKafkaManager.java
-
44cl_stream_datasave/src/main/java/com/bfd/mf/datasave/listen/testkongtianyuan.java
-
54cl_stream_datasave/src/main/java/com/bfd/mf/datasave/tools/Constants.java
-
2cl_stream_datasave/src/main/java/com/bfd/mf/datasave/tools/DBUtil.java
-
299cl_stream_datasave/src/main/java/com/bfd/mf/datasave/tools/DataCheckUtil.java
-
24cl_stream_datasave/src/main/java/com/bfd/mf/datasave/tools/DataProcess.java
-
70cl_stream_datasave/src/main/java/com/bfd/mf/datasave/tools/DateUtil.java
-
58cl_stream_datasave/src/main/java/com/bfd/mf/datasave/tools/ReadFile1125.java
-
38cl_stream_datasave/src/main/java/com/bfd/mf/entity/AllKeys.java
-
322cl_stream_datasave/src/main/java/com/bfd/mf/entity/mysql/SubjectTask.java
-
65cl_stream_datasave/src/main/java/com/bfd/mf/entity/mysql/Tasklimit.java
-
42cl_stream_datasave/src/main/java/com/bfd/mf/entity/mysql/Userlimit.java
-
88cl_stream_datasave/src/main/java/com/bfd/mf/entity/mysql/cl_task.java
-
74cl_stream_datasave/src/main/java/com/bfd/mf/runstart/RunStartDataSave.java
-
13cl_stream_datasave/src/main/main5.iml
-
60cl_stream_mybatis/cl_stream_mybatis.iml
-
27cl_stream_mybatis/pom.xml
-
2cl_stream_mybatis/src/main/java/com/bfd/mf/controller/CompanyController.java
-
4cl_stream_mybatis/src/main/java/com/bfd/mf/tools/ConnectionRmi.java
-
19cl_stream_mybatis/src/main/resources/application.properties
-
2cl_stream_mybatis/src/main/resources/com/bfd/mf/spring/applicationContext.xml
-
53cl_stream_service/cl_stream_service.iml
-
37cl_stream_service/src/main/java/com/bfd/mf/entity/AllKeys.java
-
3cl_stream_service/src/main/java/com/bfd/mf/entity/TypeEntity.java
-
48cl_stream_service/src/main/java/com/bfd/mf/entity/mysql/MfChannelInfo.java
-
20cl_stream_service/src/main/java/com/bfd/mf/entity/mysql/MfDoctypeInfo.java
-
18cl_stream_service/src/main/java/com/bfd/mf/entity/mysql/MfFieldInfo.java
-
147cl_stream_service/src/main/java/com/bfd/mf/entity/mysql/SubjectTask.java
-
34cl_stream_service/src/main/java/com/bfd/mf/runstart/RunStartService.java
-
380cl_stream_service/src/main/java/com/bfd/mf/service/extendType/ForegroundExtendType.java
-
257cl_stream_service/src/main/java/com/bfd/mf/service/extendType/ParentExctendType.java
-
692cl_stream_service/src/main/java/com/bfd/mf/service/extendType/ParralleData.java
-
4cl_stream_service/src/main/java/com/bfd/mf/service/kafka/IKafka.java
-
21cl_stream_service/src/main/java/com/bfd/mf/service/kafka/ReadKafka.java
-
43cl_stream_service/src/main/java/com/bfd/mf/service/listen/ListenKafkaManager.java
-
14cl_stream_service/src/main/java/com/bfd/mf/service/listen/ListenTaskManager.java
-
3cl_stream_service/src/main/java/com/bfd/mf/service/tools/DataCheckUtil.java
-
38cl_stream_service/src/main/java/com/bfd/mf/service/tools/DateUtil.java
-
4cl_stream_service/src/main/java/com/bfd/mf/service/tools/HttpClientUtil.java
-
2cl_stream_service/src/main/java/com/bfd/mf/service/tools/RoundRobinJedisPool.java
-
14cl_stream_service/src/main/java/com/bfd/mf/service/utils/SentimentApiUtils.java
-
2cl_stream_service/src/main/java/com/bfd/mf/service/utils/WordCloudApiUtils.java
-
1036dataSaveManager/dataSaveManager.iml
-
1034serviceManager/serviceManager.iml
@ -0,0 +1,557 @@ |
|||
{ |
|||
"commentUrl":{ |
|||
"type":"text", |
|||
"fields":{ |
|||
"keyword":{ |
|||
"ignore_above":256, |
|||
"type":"keyword" |
|||
} |
|||
} |
|||
}, |
|||
"channel":{ |
|||
"type":"keyword" |
|||
}, |
|||
"readCount":{ |
|||
"type":"long" |
|||
}, |
|||
"quoteCount":{ |
|||
"type":"long" |
|||
}, |
|||
"brand":{ |
|||
"term_vector":"yes", |
|||
"type":"text", |
|||
"analyzer":"ik_smart", |
|||
"search_analyzer":"ik_smart", |
|||
"fields":{ |
|||
"shingles":{ |
|||
"type":"text", |
|||
"analyzer":"shingle_analyzer" |
|||
} |
|||
} |
|||
}, |
|||
"brandId":{ |
|||
"type":"keyword" |
|||
}, |
|||
"createTimeStr":{ |
|||
"type":"keyword" |
|||
}, |
|||
"authornickname":{ |
|||
"type":"text", |
|||
"fields":{ |
|||
"keyword":{ |
|||
"ignore_above":256, |
|||
"type":"keyword" |
|||
} |
|||
} |
|||
}, |
|||
"contentSimHash":{ |
|||
"type":"keyword" |
|||
}, |
|||
"crawlDay":{ |
|||
"type":"long" |
|||
}, |
|||
"titleSimHash":{ |
|||
"type":"keyword" |
|||
}, |
|||
"commentId":{ |
|||
"type":"text", |
|||
"fields":{ |
|||
"keyword":{ |
|||
"ignore_above":256, |
|||
"type":"keyword" |
|||
} |
|||
} |
|||
}, |
|||
"originalPhrase":{ |
|||
"type":"keyword" |
|||
}, |
|||
"forwardContent":{ |
|||
"analyzer":"ik_smart", |
|||
"type":"text", |
|||
"fields":{ |
|||
"shingles":{ |
|||
"analyzer":"shingle_analyzer", |
|||
"type":"text" |
|||
} |
|||
} |
|||
}, |
|||
"finalPhrase":{ |
|||
"type":"keyword" |
|||
}, |
|||
"availability":{ |
|||
"type":"integer" |
|||
}, |
|||
"forwardUserId":{ |
|||
"type":"keyword" |
|||
}, |
|||
"forwardUserType":{ |
|||
"type":"integer" |
|||
}, |
|||
"forwardUserUrl":{ |
|||
"type":"keyword" |
|||
}, |
|||
"forwardAvatar":{ |
|||
"type":"keyword" |
|||
}, |
|||
"forwardImgs":{ |
|||
"type":"keyword" |
|||
}, |
|||
"forwardPostSource":{ |
|||
"type":"keyword" |
|||
}, |
|||
"forwardAttitudesCount":{ |
|||
"type":"long" |
|||
}, |
|||
"forwardCommentsCount":{ |
|||
"type":"long" |
|||
}, |
|||
"forwardQuoteCount":{ |
|||
"type":"long" |
|||
}, |
|||
"forwardPubTime":{ |
|||
"type":"long" |
|||
}, |
|||
"titleLength":{ |
|||
"type":"long" |
|||
}, |
|||
"forwardAuthor":{ |
|||
"type":"text", |
|||
"fields":{ |
|||
"keyword":{ |
|||
"ignore_above":256, |
|||
"type":"keyword" |
|||
} |
|||
} |
|||
}, |
|||
"sysAbstract":{ |
|||
"analyzer":"ik_smart", |
|||
"type":"text" |
|||
}, |
|||
"forwardUrl":{ |
|||
"type":"keyword" |
|||
}, |
|||
"createDate":{ |
|||
"type":"date" |
|||
}, |
|||
"docType":{ |
|||
"type":"keyword" |
|||
}, |
|||
"getSource":{ |
|||
"type":"keyword" |
|||
}, |
|||
"dataCount":{ |
|||
"type":"integer" |
|||
}, |
|||
"primary":{ |
|||
"type":"integer" |
|||
}, |
|||
"cate":{ |
|||
"type":"keyword" |
|||
}, |
|||
"sex":{ |
|||
"type":"keyword" |
|||
}, |
|||
"collectCount":{ |
|||
"type":"long" |
|||
}, |
|||
"crawlDate":{ |
|||
"type":"date" |
|||
}, |
|||
"avatar":{ |
|||
"type":"keyword" |
|||
}, |
|||
"url":{ |
|||
"type":"keyword" |
|||
}, |
|||
"skuProperties":{ |
|||
"type":"text", |
|||
"fields":{ |
|||
"keyword":{ |
|||
"ignore_above":256, |
|||
"type":"keyword" |
|||
} |
|||
} |
|||
}, |
|||
"expression":{ |
|||
"type":"text", |
|||
"fields":{ |
|||
"keyword":{ |
|||
"ignore_above":256, |
|||
"type":"keyword" |
|||
} |
|||
} |
|||
}, |
|||
"hashTag":{ |
|||
"type":"text", |
|||
"fields":{ |
|||
"keyword":{ |
|||
"ignore_above":256, |
|||
"type":"keyword" |
|||
} |
|||
} |
|||
}, |
|||
"places":{ |
|||
"type":"text", |
|||
"fields":{ |
|||
"keyword":{ |
|||
"ignore_above":256, |
|||
"type":"keyword" |
|||
} |
|||
} |
|||
}, |
|||
"opinions":{ |
|||
"type":"text", |
|||
"fields":{ |
|||
"keyword":{ |
|||
"ignore_above":256, |
|||
"type":"keyword" |
|||
} |
|||
} |
|||
}, |
|||
"hlKeywords":{ |
|||
"type":"text", |
|||
"fields":{ |
|||
"keyword":{ |
|||
"ignore_above":256, |
|||
"type":"keyword" |
|||
} |
|||
} |
|||
}, |
|||
"createTime":{ |
|||
"type":"long" |
|||
}, |
|||
"contentLength":{ |
|||
"type":"integer" |
|||
}, |
|||
"pubTime":{ |
|||
"type":"long" |
|||
}, |
|||
"fansCount":{ |
|||
"type":"keyword" |
|||
}, |
|||
"language":{ |
|||
"type":"text", |
|||
"fields":{ |
|||
"keyword":{ |
|||
"ignore_above":256, |
|||
"type":"keyword" |
|||
} |
|||
} |
|||
}, |
|||
"source":{ |
|||
"type":"keyword" |
|||
}, |
|||
"enSource":{ |
|||
"type":"text", |
|||
"fields":{ |
|||
"keyword":{ |
|||
"ignore_above":256, |
|||
"type":"keyword" |
|||
} |
|||
} |
|||
}, |
|||
"pictureList":{ |
|||
"type":"text", |
|||
"fields":{ |
|||
"keyword":{ |
|||
"ignore_above":256, |
|||
"type":"keyword" |
|||
} |
|||
} |
|||
}, |
|||
"userUrl":{ |
|||
"type":"keyword" |
|||
}, |
|||
"videoUrl":{ |
|||
"type":"keyword" |
|||
}, |
|||
"contentTag":{ |
|||
"type":"text", |
|||
"fields":{ |
|||
"keyword":{ |
|||
"ignore_above":256, |
|||
"type":"keyword" |
|||
} |
|||
} |
|||
}, |
|||
"author":{ |
|||
"type":"keyword" |
|||
}, |
|||
"authorId":{ |
|||
"type":"keyword" |
|||
}, |
|||
"authorLevel":{ |
|||
"type":"keyword" |
|||
}, |
|||
"sysSentiment":{ |
|||
"type":"double" |
|||
}, |
|||
"price":{ |
|||
"type":"double" |
|||
}, |
|||
"nomorprice":{ |
|||
"type":"double" |
|||
}, |
|||
"attitudesCount":{ |
|||
"type":"keyword" |
|||
}, |
|||
"createDay":{ |
|||
"type":"long" |
|||
}, |
|||
"postId":{ |
|||
"type":"text", |
|||
"fields":{ |
|||
"keyword":{ |
|||
"ignore_above":256, |
|||
"type":"keyword" |
|||
} |
|||
} |
|||
}, |
|||
"pubDate":{ |
|||
"type":"date" |
|||
}, |
|||
"sysKeywords":{ |
|||
"type":"keyword" |
|||
}, |
|||
"crawlTime":{ |
|||
"type":"long" |
|||
}, |
|||
"userType":{ |
|||
"type":"text", |
|||
"fields":{ |
|||
"keyword":{ |
|||
"ignore_above":256, |
|||
"type":"keyword" |
|||
} |
|||
} |
|||
}, |
|||
"projectName":{ |
|||
"type":"keyword" |
|||
}, |
|||
"lastModifiedTime":{ |
|||
"type":"long" |
|||
}, |
|||
"productParameter":{ |
|||
"term_vector":"yes", |
|||
"type":"text", |
|||
"analyzer":"ik_smart", |
|||
"search_analyzer":"ik_smart", |
|||
"fields":{ |
|||
"shingles":{ |
|||
"type":"text", |
|||
"analyzer":"shingle_analyzer" |
|||
} |
|||
} |
|||
}, |
|||
"docId":{ |
|||
"type":"keyword" |
|||
}, |
|||
"commentScore":{ |
|||
"type":"long" |
|||
}, |
|||
"urlHash":{ |
|||
"type":"keyword" |
|||
}, |
|||
"_id_":{ |
|||
"type":"text", |
|||
"fields":{ |
|||
"keyword":{ |
|||
"ignore_above":256, |
|||
"type":"keyword" |
|||
} |
|||
} |
|||
}, |
|||
"title":{ |
|||
"term_vector":"yes", |
|||
"type":"text", |
|||
"analyzer":"ik_smart", |
|||
"search_analyzer":"ik_smart", |
|||
"fields":{ |
|||
"shingles":{ |
|||
"type":"text", |
|||
"analyzer":"shingle_analyzer" |
|||
} |
|||
} |
|||
}, |
|||
"pageTranspondCount":{ |
|||
"type":"text", |
|||
"fields":{ |
|||
"keyword":{ |
|||
"ignore_above":256, |
|||
"type":"keyword" |
|||
} |
|||
} |
|||
}, |
|||
"pageCommentCount":{ |
|||
"type":"text", |
|||
"fields":{ |
|||
"keyword":{ |
|||
"ignore_above":256, |
|||
"type":"keyword" |
|||
} |
|||
} |
|||
}, |
|||
"content":{ |
|||
"term_vector":"yes", |
|||
"type":"text", |
|||
"analyzer":"ik_smart", |
|||
"search_analyzer":"ik_smart", |
|||
"fields":{ |
|||
"shingles":{ |
|||
"type":"text", |
|||
"analyzer":"shingle_analyzer" |
|||
} |
|||
} |
|||
}, |
|||
"pubDay":{ |
|||
"type":"long" |
|||
}, |
|||
"pubTimeStr":{ |
|||
"type":"keyword" |
|||
}, |
|||
"postSource":{ |
|||
"type":"keyword" |
|||
}, |
|||
"crawlTimeStr":{ |
|||
"type":"keyword" |
|||
}, |
|||
"postCount":{ |
|||
"type":"keyword" |
|||
}, |
|||
"friendsCount":{ |
|||
"type":"keyword" |
|||
}, |
|||
"commentsCount":{ |
|||
"type":"long" |
|||
}, |
|||
"favorCnt":{ |
|||
"type":"long" |
|||
}, |
|||
"viewCnt":{ |
|||
"type":"long" |
|||
}, |
|||
"downCnt":{ |
|||
"type":"long" |
|||
}, |
|||
"sign":{ |
|||
"type":"keyword" |
|||
}, |
|||
"isVip":{ |
|||
"type":"integer" |
|||
}, |
|||
"forumScore":{ |
|||
"type":"keyword" |
|||
}, |
|||
"impression":{ |
|||
"type":"keyword" |
|||
}, |
|||
"promotionInfo":{ |
|||
"type":"keyword" |
|||
}, |
|||
"smallImgs":{ |
|||
"type":"keyword" |
|||
}, |
|||
"listBrand":{ |
|||
"term_vector":"yes", |
|||
"type":"text", |
|||
"analyzer":"ik_smart", |
|||
"search_analyzer":"ik_smart", |
|||
"fields":{ |
|||
"shingles":{ |
|||
"type":"text", |
|||
"analyzer":"shingle_analyzer" |
|||
} |
|||
} |
|||
}, |
|||
"firstListBrand":{ |
|||
"type":"keyword" |
|||
}, |
|||
"secondListBrand":{ |
|||
"type":"keyword" |
|||
}, |
|||
"threeListBrand":{ |
|||
"type":"keyword" |
|||
}, |
|||
"fourListBrand":{ |
|||
"type":"keyword" |
|||
}, |
|||
"fiveListBrand":{ |
|||
"type":"keyword" |
|||
}, |
|||
"area":{ |
|||
"type":"keyword" |
|||
}, |
|||
"location":{ |
|||
"type":"keyword" |
|||
}, |
|||
"country":{ |
|||
"type":"keyword" |
|||
}, |
|||
"province":{ |
|||
"type":"keyword" |
|||
}, |
|||
"city":{ |
|||
"type":"keyword" |
|||
}, |
|||
"age":{ |
|||
"type":"keyword" |
|||
}, |
|||
"egc":{ |
|||
"type":"integer" |
|||
}, |
|||
"pgc":{ |
|||
"type":"integer" |
|||
}, |
|||
"ugc":{ |
|||
"type":"integer" |
|||
}, |
|||
"translateTitle":{ |
|||
"type":"keyword" |
|||
}, |
|||
"translateContent":{ |
|||
"type":"keyword" |
|||
}, |
|||
"filePath":{ |
|||
"type":"keyword" |
|||
}, |
|||
"resolution":{ |
|||
"type":"keyword" |
|||
}, |
|||
"extension":{ |
|||
"type":"keyword" |
|||
}, |
|||
"thumbnails":{ |
|||
"type":"keyword" |
|||
}, |
|||
"videoTime":{ |
|||
"type":"keyword" |
|||
}, |
|||
"isDownload":{ |
|||
"type":"keyword" |
|||
}, |
|||
"crawlDataFlag":{ |
|||
"type":"keyword" |
|||
}, |
|||
"attr":{ |
|||
"type":"keyword" |
|||
}, |
|||
"pageType":{ |
|||
"type":"keyword" |
|||
}, |
|||
"siteId":{ |
|||
"type":"keyword" |
|||
}, |
|||
"otherSourceJson":{ |
|||
"type":"keyword" |
|||
}, |
|||
"dns":{ |
|||
"type":"keyword" |
|||
}, |
|||
"asrText":{ |
|||
"type":"keyword" |
|||
}, |
|||
"ocrText":{ |
|||
"type":"keyword" |
|||
} |
|||
|
|||
} |
@ -0,0 +1,278 @@ |
|||
package com.bfd.mf.datasave.download; |
|||
|
|||
import com.bfd.crawler.utils.JsonUtils; |
|||
|
|||
import java.io.IOException; |
|||
import java.util.*; |
|||
|
|||
public class NewsDownload { |
|||
private static String myGoFastAddr = "http://172.18.1.113:8080/upload"; |
|||
public static void downloadAndSaveimage(Map<String, Object> resultMap,List<Map<String,String>> imagePathSizevalue){ |
|||
List<String> filePath= (List<String>) resultMap.get("filePath"); |
|||
List<String> imagePath= (List<String>) resultMap.get("imagePath"); |
|||
List<String> videoPath= (List<String>) resultMap.get("videoPath"); |
|||
String putUrl = myGoFastAddr; |
|||
List<String> imagePathlist=new ArrayList<>(); |
|||
Iterator<String> it = imagePath.iterator(); |
|||
Map<String,String> rerversemap =new HashMap<>(); |
|||
while(it.hasNext()){ |
|||
Map<String,String> imagemap =new HashMap<>(); |
|||
String geturl= it.next(); |
|||
Map<String,Object> resultmap = null; |
|||
String resolution= null;String resulturl= null;String size=""; |
|||
try { |
|||
try { |
|||
resultmap = DownLoadFile.downloadAndSaveFile(geturl, putUrl); |
|||
resolution = DownLoadFile.imagesize(geturl); |
|||
resulturl= (String) resultmap.get("realUrl"); |
|||
resulturl =resulturl.replace("http://172.18.1.113:8080",""); |
|||
size= resultmap.get("size").toString()+"KB"; |
|||
} catch (IOException e) { |
|||
//System.out.print(resulturl); |
|||
e.printStackTrace(); |
|||
} |
|||
} catch (Exception e) { |
|||
e.printStackTrace(); |
|||
} |
|||
if (resulturl!= null && resulturl.length()!= 0){ |
|||
imagemap.put("size",size); |
|||
imagemap.put("videoTime",""); |
|||
imagemap.put("url",resulturl); |
|||
imagemap.put("resolution",resolution); |
|||
imagePathlist.add(resulturl);//url |
|||
imagePathSizevalue.add(imagemap); |
|||
rerversemap.put(geturl,resulturl);//原始的gofast 以及对应的gofast |
|||
}else{ |
|||
imagePathlist.add(geturl); |
|||
rerversemap.put(geturl,resulturl);//原始的gofast 以及对应的gofast |
|||
} |
|||
|
|||
} |
|||
resultMap.put("imagePathSize", JsonUtils.toJSONString(imagePathSizevalue)); |
|||
resultMap.put("imagePath", imagePathlist); |
|||
if(imagePathSizevalue.size()>0){ |
|||
resultMap.put("pgc", 1); |
|||
Map<String, Object> repicturl=gofastswitch(rerversemap,resultMap); |
|||
String picturl= (String) repicturl.get("srcimagePath"); |
|||
if(picturl !=null&&picturl.length()>0){ |
|||
resultMap.put("srcimagePath",picturl); |
|||
} |
|||
} |
|||
} |
|||
|
|||
|
|||
public static void downloadAndSaveFile(Map<String, Object> resultMap,List<Map<String,String>> filePathSizevalueList){ |
|||
List<String> filePath= (List<String>) resultMap.get("filePath"); |
|||
List<String> imagePath= (List<String>) resultMap.get("imagePath"); |
|||
List<String> videoPath= (List<String>) resultMap.get("videoPath"); |
|||
String putUrl = myGoFastAddr; |
|||
//List<Map<String,String>> filePathSizevalueList = new ArrayList<>(); |
|||
List<String> filePathlist=new ArrayList<>(); |
|||
Iterator<String> it = filePath.iterator(); |
|||
Map<String,String> rerversemap =new HashMap<>(); |
|||
while(it.hasNext()){ |
|||
Map<String,String> filemap =new HashMap<>(); |
|||
String geturl= it.next(); |
|||
Map<String,Object> resultmap = null; |
|||
String resulturl= null;String size= null; |
|||
try { |
|||
resultmap = DownLoadFile.downloadAndSaveFile(geturl, putUrl); |
|||
resulturl = (String) resultmap.get("realUrl"); |
|||
resulturl =resulturl.replace("http://172.18.1.113:8080",""); |
|||
size= resultmap.get("size").toString()+"KB"; |
|||
} catch (Exception e) { |
|||
e.printStackTrace(); |
|||
} |
|||
|
|||
if (resulturl!= null && resulturl.length()!= 0){ |
|||
filemap.put("size",size); |
|||
filemap.put("videoTime",""); |
|||
filemap.put("url",resulturl); |
|||
filemap.put("resolution",""); |
|||
filePathlist.add(resulturl); |
|||
filePathSizevalueList.add(filemap); |
|||
rerversemap.put(geturl,resulturl);//原始的gofast 以及对应的gofast |
|||
}else { |
|||
filePathlist.add(geturl); |
|||
rerversemap.put(geturl,resulturl);//原始的gofast 以及对应的gofast |
|||
} |
|||
|
|||
} |
|||
resultMap.put("filePathSize",JsonUtils.toJSONString(filePathSizevalueList)); |
|||
resultMap.put("filePath", filePathlist); |
|||
if(filePathSizevalueList.size()>0){ |
|||
resultMap.put("ugc",1); |
|||
Map<String, Object> forwardUrl=gofastswitch(rerversemap,resultMap); |
|||
String reforwardUrl= (String) forwardUrl.get("srcfilePath"); |
|||
if(reforwardUrl !=null&&reforwardUrl.length()>0){ |
|||
resultMap.put("srcfilePath",reforwardUrl); |
|||
} |
|||
} |
|||
else { |
|||
resultMap.put("ugc",0); |
|||
} |
|||
|
|||
|
|||
} |
|||
|
|||
public static void downloadAndSavevideo(Map<String, Object> resultMap,List<Map<String,String>> videoPathSizevalueList){ |
|||
List<String> videoPath= (List<String>) resultMap.get("videoPath"); |
|||
String putUrl = myGoFastAddr; |
|||
// List<Map<String,String>> videoPathSizevalueList = new ArrayList<>(); |
|||
String videoTime=resultMap.get("videoTime").toString(); |
|||
List<String> videoPathlist=new ArrayList<>(); |
|||
Map<String,String> rerversemap =new HashMap<>(); |
|||
Iterator<String> it = videoPath.iterator(); |
|||
while(it.hasNext()){ |
|||
Map<String,String> videomap =new HashMap<>(); |
|||
String geturl= it.next(); |
|||
Map<String,Object> resultmap = null; |
|||
String resulturl= null;String size=""; |
|||
try { |
|||
resultmap = DownLoadFile.downloadAndSaveFile(geturl, putUrl); |
|||
resulturl = (String) resultmap.get("realUrl"); |
|||
resulturl =resulturl.replace("http://172.18.1.113:8080",""); |
|||
size= resultmap.get("size").toString()+"KB"; |
|||
} catch (Exception e) { |
|||
e.printStackTrace(); |
|||
} |
|||
if (resulturl!= null && resulturl.length()!= 0){//判断 是否下载成功 |
|||
videomap.put("size",size); |
|||
videomap.put("videoTime",videoTime); |
|||
videomap.put("url",resulturl); |
|||
videomap.put("resolution",""); |
|||
videoPathlist.add(resulturl); |
|||
videoPathSizevalueList.add(videomap); |
|||
rerversemap.put(geturl,resulturl);//原始的gofast 以及对应的gofast |
|||
}else{ |
|||
videoPathlist.add(geturl); |
|||
rerversemap.put(geturl,resulturl);//原始的gofast 以及对应的gofast |
|||
} |
|||
} |
|||
if(videoPathSizevalueList.size()>0){ |
|||
resultMap.put("egc",1); |
|||
Map<String, Object> revideoUrl= null; |
|||
try { |
|||
revideoUrl = gofastswitch(rerversemap,resultMap); |
|||
} catch (Exception e) { |
|||
e.printStackTrace(); |
|||
} |
|||
String videoUrl=(String) revideoUrl.get("srcvideoPath"); |
|||
if(videoUrl !=null&&videoUrl.length()>0){ |
|||
resultMap.put("srcvideoPath",videoUrl); |
|||
} |
|||
} |
|||
else { |
|||
resultMap.put("egc",0); |
|||
} |
|||
resultMap.put("videoPathSize",JsonUtils.toJSONString(videoPathSizevalueList)); |
|||
resultMap.put("videoPath", videoPathlist); |
|||
} |
|||
|
|||
|
|||
|
|||
|
|||
private static Map<String, Object> gofastswitch(Map<String, String> rerversemap , Map<String, Object> responseMap) {//原始的gofast 以及下载后的gofast地址 |
|||
Integer pgc= (Integer) responseMap.get("pgc");//图片 |
|||
Integer egc= (Integer) responseMap.get("egc");//视频 |
|||
Integer ugc= (Integer) responseMap.get("ugc");//文件 |
|||
List<String> imagePath= (List<String>) responseMap.get("imagePath"); |
|||
List<String> videoPath= (List<String>) responseMap.get("videoPath"); |
|||
String storyDetailPage= (String) responseMap.get("pageType"); |
|||
// pageType |
|||
// storyDetailPage |
|||
Map<String,Object> resultmap=new HashMap<>(); |
|||
if (pgc.equals(1)){ |
|||
try { |
|||
List<Map<String, Object>> picturepath=new ArrayList<>(); |
|||
if(responseMap.get("pictureList")!=""&&!"storyDetailPage".equals(storyDetailPage)&&!"socialComment".equals(storyDetailPage)){ |
|||
Map<String,Object> map=JsonUtils.parseObject((String) responseMap.get("pictureList")); |
|||
if(!map.isEmpty()){ |
|||
|
|||
for (Map.Entry<String, Object> entry : map.entrySet()) { |
|||
Map<String,Object> gofastmap=new HashMap<>(); |
|||
Map<String,Object> revmap= (Map<String, Object>) entry.getValue(); |
|||
if(revmap.containsKey("uploadImg")&&revmap.get("uploadImg")!=null&&revmap.get("uploadImg")!=""){ |
|||
gofastmap.put("gofastUrl",rerversemap.get(revmap.get("uploadImg"))); |
|||
gofastmap.put("originalUrl",revmap.get("img")); |
|||
} |
|||
picturepath.add(gofastmap); |
|||
} |
|||
} |
|||
}else if ("storyDetailPage".equals(storyDetailPage)){ |
|||
Iterator<String> it = imagePath.iterator(); |
|||
while(it.hasNext()){ |
|||
Map<String,Object> revmap=new HashMap<>(); |
|||
revmap.put("gofastUrl",it.next()); |
|||
revmap.put("originalUrl",""); |
|||
picturepath.add(revmap); |
|||
} |
|||
} |
|||
String pictureList=JsonUtils.toJSONString(picturepath); |
|||
resultmap.put("srcimagePath",pictureList); |
|||
} catch (Exception e) { |
|||
e.printStackTrace(); |
|||
//log.error(); |
|||
} |
|||
} if(ugc.equals(1)){ |
|||
if(responseMap.get("forwardUrl")!=""&&!"storyDetailPage".equals(storyDetailPage)&&!"socialComment".equals(storyDetailPage)){ |
|||
try { |
|||
List<Map<String, Object>> forwardUrl= (List<Map<String, Object>>) JsonUtils.parseArray((String) responseMap.get("forwardUrl")); |
|||
List<Map<String, Object>> anewforwardUrl=new ArrayList<>(); |
|||
for( Map<String, Object> mapList : forwardUrl ) { |
|||
if(mapList.containsKey("gofastUrl")){ |
|||
mapList.put("gofastUrl",rerversemap.get(mapList.get("gofastUrl"))); |
|||
anewforwardUrl.add(mapList); |
|||
}else{ |
|||
anewforwardUrl.add(mapList); |
|||
} |
|||
} |
|||
String reforwardUrl=JsonUtils.toJSONString(anewforwardUrl); |
|||
resultmap.put("srcfilePath",reforwardUrl); |
|||
|
|||
} catch (Exception e) { |
|||
e.printStackTrace(); |
|||
|
|||
} |
|||
} |
|||
} if(egc.equals(1)){ |
|||
List<Map<String, Object>> videoUrl=new ArrayList<>(); |
|||
if (responseMap.get("videoUrl")!=""&&!"storyDetailPage".equals(storyDetailPage)&&!"socialComment".equals(storyDetailPage)){ |
|||
try { |
|||
List<Map<String, Object>> zhuquvideoUrl= JsonUtils.parseArray((String)responseMap.get("videoUrl")) ; |
|||
// System.out.println(responseMap.get("videoUrl")); |
|||
for( Map<String, Object> mapList : zhuquvideoUrl ) { |
|||
// System.out.println(mapList.get("gofastUrl")+"asd"); |
|||
if(mapList.containsKey("gofastUrl")){ |
|||
mapList.put("gofastUrl",rerversemap.get(mapList.get("gofastUrl"))); |
|||
videoUrl.add(mapList); |
|||
}else{ |
|||
videoUrl.add(mapList); |
|||
} |
|||
} |
|||
|
|||
} catch (Exception e) { |
|||
e.printStackTrace(); |
|||
String revideoUrl=JsonUtils.toJSONString(responseMap.get("videoUrl")); |
|||
resultmap.put("srcvideoPath",revideoUrl); |
|||
} |
|||
}else if ("storyDetailPage".equals(storyDetailPage)){ |
|||
String storyDetailPagevideoUrl= (String) responseMap.get("videoUrl"); |
|||
Iterator<String> it = videoPath.iterator(); |
|||
while(it.hasNext()){ |
|||
Map<String,Object> revmap=new HashMap<>(); |
|||
revmap.put("gofastUrl",it.next()); |
|||
revmap.put("originalUrl",storyDetailPagevideoUrl); |
|||
videoUrl.add(revmap); |
|||
} |
|||
|
|||
} |
|||
String revideoUrl =JsonUtils.toJSONString(videoUrl); |
|||
resultmap.put("srcvideoPath",revideoUrl); |
|||
} |
|||
|
|||
|
|||
return resultmap; |
|||
} |
|||
|
|||
} |
@ -0,0 +1,44 @@ |
|||
//package com.bfd.mf.datasave.listen; |
|||
// |
|||
//import com.bfd.crawler.elasti.ElastiProducer; |
|||
//import com.bfd.crawler.utils.JsonUtils; |
|||
//import com.bfd.mf.datasave.tools.ReadFile1125; |
|||
//import com.bfd.mf.datasave.tools.ReadLine; |
|||
//import com.bfd.mf.datasave.tools.WriteMethod; |
|||
// |
|||
//import java.io.File; |
|||
//import java.util.List; |
|||
// |
|||
//public class testkongtianyuan { |
|||
// private static String preIndex = "cl_index_"; |
|||
// private static String preSubject = "cl_subject_"; |
|||
// private static int subjectEsNum = 1; |
|||
// private static int indexEsNum = 2; |
|||
// private static String indexType = "docs"; |
|||
// private static int bussinessType = 1; |
|||
// public static void main(String[] args) { |
|||
// //List<String> properties = ReadLine.readLine(new File("C:/Users/zhicheng.zhang/Desktop/15S_1125.txt")); |
|||
// // ElastiProducer elastiProducer = ElastiProducer.getInstance(bussinessType, subjectEsNum, "cl_subject_20201125", indexType); |
|||
// ReadFile1125 readFile = new ReadFile1125("C:/Users/zhicheng.zhang/Desktop/15S_1125.txt"); |
|||
//// ReadFile readFile = new ReadFile("D:/program/HiveToKafkaTool/data/juemi.txt"); |
|||
// |
|||
// Thread readFileThread = new Thread(readFile, "readFileThread"); |
|||
// readFileThread.start(); |
|||
// |
|||
// |
|||
// joinjess a=new joinjess(); |
|||
// for(int i = 0; i < 4; i++) { |
|||
// Thread joinJsonThread; |
|||
// joinJsonThread = new Thread(a, "joinJson" + i); |
|||
// joinJsonThread.start(); |
|||
// } |
|||
//// // elastiProducer.sendMessageToEs(properties.get(1)); |
|||
//// for(int i=0;i<properties.size();i++){ |
|||
//// System.out.println(properties.size()); |
|||
//// ElastiProducer elastiProducer = ElastiProducer.getInstance(bussinessType, indexEsNum, "cl_subject_20201125", indexType); |
|||
//// elastiProducer.sendMessageToEs(properties.get(i)); |
|||
//// |
|||
//// } |
|||
// |
|||
// } |
|||
//} |
@ -0,0 +1,54 @@ |
|||
package com.bfd.mf.datasave.tools; |
|||
|
|||
import java.util.Map; |
|||
import java.util.concurrent.ArrayBlockingQueue; |
|||
import java.util.concurrent.ConcurrentHashMap; |
|||
|
|||
public class Constants { |
|||
|
|||
// private static Map<String, String> alertCacheSingle = new ConcurrentHashMap<String, String>(); |
|||
|
|||
private static ArrayBlockingQueue<String> lineQueue = new ArrayBlockingQueue<String>(10000); |
|||
private static ArrayBlockingQueue<String> outputQueue = new ArrayBlockingQueue<String>(100000); |
|||
|
|||
private static long startLine = 1; |
|||
private static long endLine = -1; |
|||
private static boolean flag = true; |
|||
|
|||
|
|||
|
|||
public static boolean isFlag() { |
|||
return flag; |
|||
} |
|||
public static void setFlag(boolean flag) { |
|||
Constants.flag = flag; |
|||
} |
|||
public static ArrayBlockingQueue<String> getLineQueue() { |
|||
return lineQueue; |
|||
} |
|||
public static void setLineQueue(ArrayBlockingQueue<String> lineQueue) { |
|||
Constants.lineQueue = lineQueue; |
|||
} |
|||
public static ArrayBlockingQueue<String> getOutputQueue() { |
|||
return outputQueue; |
|||
} |
|||
public static void setOutputQueue(ArrayBlockingQueue<String> outputQueue) { |
|||
Constants.outputQueue = outputQueue; |
|||
} |
|||
public static long getStartLine() { |
|||
return startLine; |
|||
} |
|||
public static void setStartLine(long startLine) { |
|||
Constants.startLine = startLine; |
|||
} |
|||
public static long getEndLine() { |
|||
return endLine; |
|||
} |
|||
public static void setEndLine(long endLine) { |
|||
Constants.endLine = endLine; |
|||
} |
|||
|
|||
|
|||
|
|||
|
|||
} |
@ -0,0 +1,299 @@ |
|||
package com.bfd.mf.datasave.tools; |
|||
|
|||
import org.apache.commons.lang3.StringUtils; |
|||
import org.apache.log4j.Logger; |
|||
|
|||
import java.text.ParseException; |
|||
import java.text.SimpleDateFormat; |
|||
import java.util.Date; |
|||
import java.util.regex.Matcher; |
|||
import java.util.regex.Pattern; |
|||
|
|||
|
|||
public class DataCheckUtil { |
|||
|
|||
public static Pattern datePattrn = Pattern.compile("^\\d{4}\\-\\d{2}\\-\\d{2}\\s\\d{2}\\:\\d{2}:\\d{2}$"); |
|||
|
|||
public static Pattern dayPattrn = Pattern.compile("^\\d{2,4}\\-\\d{1,2}\\-\\d{1,2}$"); |
|||
|
|||
private static SimpleDateFormat ddf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); |
|||
|
|||
public static Pattern p = Pattern.compile("\\s+"); |
|||
|
|||
private static final Logger LOG = Logger.getLogger(DataCheckUtil.class); |
|||
|
|||
public static String chechData2(String dataStr){ |
|||
dataStr = dataStr.replace("Z",""); |
|||
dataStr = checkData(dataStr); |
|||
Matcher matcher = datePattrn.matcher(dataStr); |
|||
if(!matcher.find()){ |
|||
System.out.println("格式错误,使用当前时间 : " + dataStr); |
|||
dataStr = DateUtil.getDateTime(); |
|||
}else{ |
|||
dataStr = matcher.group(0); |
|||
} |
|||
return dataStr; |
|||
} |
|||
|
|||
public static String checkData(String dataStr){ |
|||
SimpleDateFormat ddf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); |
|||
if(StringUtils.isBlank(dataStr)){ |
|||
return ddf.format(new Date()); |
|||
} |
|||
if(dataStr.contains("-:")){ |
|||
dataStr = dataStr.replace("-:",":"); |
|||
} |
|||
if(dataStr.contains(":-")){ |
|||
dataStr = dataStr.replace(":-",":"); |
|||
} |
|||
|
|||
Matcher matcher = datePattrn.matcher(dataStr); |
|||
|
|||
if(!matcher.find()){ |
|||
dataStr = dataStr.trim(); |
|||
if(!p.matcher(dataStr).find()){ |
|||
if(!dayPattrn.matcher(dataStr).find()){ |
|||
return ddf.format(new Date()); |
|||
} |
|||
} |
|||
|
|||
String[] dates = dataStr.split("\\s+"); |
|||
String years = ""; |
|||
String times = ""; |
|||
if(dates.length == 2){ |
|||
years = dates[0]; |
|||
times = dates[1]; |
|||
}else{ |
|||
years = dates[0]; |
|||
} |
|||
|
|||
if(years.contains("/")){ |
|||
years = years.replace("/", "-"); |
|||
} |
|||
String[] yearStr = years.split("-"); |
|||
String yms = "" ; |
|||
if(yearStr.length == 3){ |
|||
String year = yearStr[0]; |
|||
String month = yearStr[1]; |
|||
String day = yearStr[2]; |
|||
if(year.length() == 2){ |
|||
year = "20"+year; |
|||
} |
|||
if(month.length() == 1){ |
|||
month = "0"+month; |
|||
} |
|||
if(day.length() == 1){ |
|||
day = "0"+day; |
|||
} |
|||
yms = year+"-"+month+"-"+day; |
|||
} |
|||
|
|||
String hms = ""; |
|||
if(StringUtils.isBlank(times)){ |
|||
hms = "00:00:00"; |
|||
}else{ |
|||
times = times.replace("/", ":"); |
|||
if(times.contains(":")){ |
|||
String[] timeStr = times.split(":"); |
|||
if( timeStr.length >= 3 ){ |
|||
String hours = timeStr[0]; |
|||
String mins = timeStr[1]; |
|||
String s = timeStr[2]; |
|||
|
|||
if(hours.length() == 1){ |
|||
hours = "0"+hours; |
|||
} |
|||
if(mins.length() == 1){ |
|||
mins = "0"+mins; |
|||
} |
|||
if(s.length() == 1){ |
|||
s = "0"+s; |
|||
} |
|||
hms = hours+":"+mins+":"+s; |
|||
}else if(timeStr.length == 2){ |
|||
String hours = timeStr[0]; |
|||
String mins = timeStr[1]; |
|||
String s = "00"; |
|||
if(hours.length() == 1){ |
|||
hours = "0"+hours; |
|||
} |
|||
if(mins.length() == 1){ |
|||
mins = "0"+mins; |
|||
} |
|||
hms = hours+":"+mins+":"+s; |
|||
} else { |
|||
String hours = timeStr[0]; |
|||
String mins = "00" ; |
|||
String s = "00"; |
|||
if(hours.length() == 1){ |
|||
hours = "0"+hours; |
|||
} |
|||
hms = hours+":"+mins+":"+s; |
|||
} |
|||
}else{ |
|||
if(isNum(times) && times.length()==2){ |
|||
hms = times+":00:00"; |
|||
}else if(isNum(times) && times.length()==1){ |
|||
hms = "0"+times+":00:00"; |
|||
}else{ |
|||
hms = "00:00:00" ; |
|||
} |
|||
} |
|||
} |
|||
if(StringUtils.isBlank(yms)){ |
|||
return ddf.format(new Date()); |
|||
} |
|||
if(yms != "" || hms != ""){ |
|||
return yms+" "+hms; |
|||
} |
|||
} |
|||
return dataStr ; |
|||
} |
|||
|
|||
private static boolean isNum(String time){ |
|||
Pattern p = Pattern.compile("\\d+"); |
|||
if(p.matcher(time).find()){ |
|||
return true ; |
|||
} |
|||
return false ; |
|||
} |
|||
|
|||
public static String convertStringTotime(String datetime){ |
|||
if(StringUtils.isBlank(datetime)){ |
|||
return DateUtil.getDateTime(System.currentTimeMillis()); |
|||
} |
|||
String creationTime = ""; |
|||
if(datetime.length() == 13){ |
|||
creationTime = DateUtil.getDateTime(Long.valueOf(datetime)); |
|||
}else{ |
|||
creationTime = DateUtil.getDateTime(Long.valueOf(datetime) *1000); |
|||
} |
|||
return creationTime ; |
|||
|
|||
} |
|||
|
|||
public static long convertStringToLong(String datetime){ |
|||
if(StringUtils.isBlank(datetime)){ |
|||
return System.currentTimeMillis(); |
|||
} |
|||
long creationTime ; |
|||
if(datetime.length() == 13){ |
|||
creationTime = Long.valueOf(datetime); |
|||
}else{ |
|||
creationTime = Long.valueOf(datetime) *1000; |
|||
} |
|||
return creationTime ; |
|||
} |
|||
|
|||
public static long convertTimeTotime(String datetime){ |
|||
if(StringUtils.isBlank(datetime)){ |
|||
return System.currentTimeMillis() / 1000; |
|||
} |
|||
long creationTime ; |
|||
if(datetime.length() == 13){ |
|||
creationTime = Long.valueOf(datetime) / 1000; |
|||
}else{ |
|||
creationTime = Long.valueOf(datetime) ; |
|||
} |
|||
return creationTime ; |
|||
|
|||
} |
|||
|
|||
|
|||
public static long convertDateTotime(String datetime){ |
|||
if(StringUtils.isBlank(datetime)){ |
|||
return System.currentTimeMillis() / 1000; |
|||
} |
|||
long creationTime = 0; |
|||
try { |
|||
SimpleDateFormat ddf1 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); |
|||
creationTime = Long.valueOf(ddf1.parse(datetime).getTime()) / 1000; |
|||
} catch (Exception e) { |
|||
// TODO Auto-generated catch block |
|||
e.printStackTrace(); |
|||
} |
|||
return creationTime ; |
|||
|
|||
} |
|||
|
|||
public static String getCurrentTime(){ |
|||
long dateTime = System.currentTimeMillis() ; |
|||
SimpleDateFormat ddf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); |
|||
return ddf.format(new Date(dateTime)); |
|||
} |
|||
|
|||
public static String getCurrentTime(long dateTime){ |
|||
SimpleDateFormat ddf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); |
|||
return ddf.format(new Date(dateTime)); |
|||
} |
|||
|
|||
public static String getDate(long dateTime){ |
|||
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSXXX"); |
|||
return sdf.format(new Date(dateTime)); |
|||
} |
|||
|
|||
public static String getDate(String dateTime){ |
|||
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSXXX"); |
|||
SimpleDateFormat ddf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); |
|||
try { |
|||
Date date = ddf.parse(dateTime) ; |
|||
return sdf.format(date); |
|||
} catch (ParseException e) { |
|||
// TODO Auto-generated catch block |
|||
e.printStackTrace(); |
|||
LOG.error("DataCheckUtil getDate() err data:"+dateTime); |
|||
|
|||
} |
|||
return sdf.format(new Date()); |
|||
} |
|||
|
|||
public static long getDay(long dateTime){ |
|||
try{ |
|||
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd"); |
|||
String dayStr = sdf.format(new Date(dateTime)); |
|||
Date date = sdf.parse(dayStr); |
|||
return date.getTime(); |
|||
}catch(Exception e){ |
|||
e.printStackTrace(); |
|||
LOG.error("DataCheckUtil getDay() err data:"+dateTime); |
|||
} |
|||
return 0; |
|||
} |
|||
|
|||
public static long getDay(String dateTime){ |
|||
try{ |
|||
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd"); |
|||
Date date = sdf.parse(dateTime); |
|||
return date.getTime(); |
|||
}catch(Exception e){ |
|||
e.printStackTrace(); |
|||
LOG.error("DataCheckUtil getDay2() err data:"+dateTime); |
|||
} |
|||
return 0; |
|||
} |
|||
|
|||
|
|||
// public static void main(String[] args) { |
|||
// //System.out.println(checkData("")); |
|||
// /*System.out.println(System.currentTimeMillis()); |
|||
// System.out.println(Calendar.getInstance().getTimeInMillis() / 1000); |
|||
// System.out.println(new Date().getTime() / 1000); |
|||
// System.out.println(DateUtil.getDateTime((System.currentTimeMillis() / 1000) * 1000)); |
|||
// System.out.println(convertStringTotime("1558077405")); |
|||
// System.out.println(convertTimeTotime(null));*/ |
|||
// //System.out.println(DateUtil.getTimeMillis("2019-03-01 01:01:01")); |
|||
// |
|||
// /*String aa = DataCheckUtil.convertStringTotime("1563245342"); |
|||
// System.out.println(aa);*/ |
|||
// /*SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd"); |
|||
// try { |
|||
// Date date = sdf.parse("2019-03-01"); |
|||
// System.out.println(date.getTime()); |
|||
// } catch (ParseException e) { |
|||
// // TODO Auto-generated catch block |
|||
// e.printStackTrace(); |
|||
// }*/ |
|||
// System.out.println(getDate("2019-03-01 01:01:01")); |
|||
// } |
|||
|
|||
} |
@ -0,0 +1,24 @@ |
|||
package com.bfd.mf.datasave.tools; |
|||
|
|||
import crawler.open.util.RedisUtil; |
|||
|
|||
public class DataProcess implements Runnable { |
|||
@Override |
|||
public void run() { |
|||
while (true) { |
|||
try { |
|||
|
|||
String a = Constants.getLineQueue().take(); |
|||
String key=a.split("@#@")[0]; |
|||
String value=a.split("@#@")[1]; |
|||
RedisUtil.set(key, value, 10); |
|||
// if(Constants.getLineQueue().size() == 1000){ |
|||
// Constants.getLineQueue().clear(); |
|||
// } |
|||
System.out.println(Constants.getLineQueue().size()+"队列的大小"); |
|||
} catch (InterruptedException e) { |
|||
e.printStackTrace(); |
|||
} |
|||
} |
|||
} |
|||
} |
@ -0,0 +1,58 @@ |
|||
//package com.bfd.mf.datasave.tools; |
|||
// |
|||
//import java.io.BufferedInputStream; |
|||
//import java.io.BufferedReader; |
|||
//import java.io.File; |
|||
//import java.io.FileInputStream; |
|||
//import java.io.FileNotFoundException; |
|||
//import java.io.IOException; |
|||
//import java.io.InputStreamReader; |
|||
// |
|||
//public class ReadFile1125 implements Runnable{ |
|||
// |
|||
// private String filename = null; |
|||
// public ReadFile1125(String filename) { |
|||
// this.filename = filename; |
|||
// } |
|||
// @Override |
|||
// public void run() { |
|||
// // TODO Auto-generated method stub |
|||
// try { |
|||
// FileInputStream inputStream = null; |
|||
// try { |
|||
// inputStream = new FileInputStream(filename); |
|||
// } catch (FileNotFoundException e) { |
|||
// // TODO Auto-generated catch block |
|||
// e.printStackTrace(); |
|||
// } |
|||
// BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream)); |
|||
// String str = null; |
|||
// long count = 0; |
|||
// do{ |
|||
// str = null; |
|||
// try { |
|||
// str = bufferedReader.readLine(); |
|||
// System.out.println("lineQueue size: " + Constants.getLineQueue().size()); |
|||
// count++; |
|||
// if (str != null && count > Constants.getStartLine()) { |
|||
// if (Constants.getEndLine() < 0 || (Constants.getEndLine() > 0 && count < Constants.getEndLine())) { |
|||
// Constants.getLineQueue().put(str); |
|||
// } else { |
|||
// System.out.println("Not process, count: " + count + " start config: " + Constants.getStartLine() + " end confid: " + Constants.getEndLine()); |
|||
// } |
|||
// } |
|||
//// System.out.println("Read line:" + str); |
|||
// } catch (IOException e) { |
|||
// // TODO Auto-generated catch block |
|||
// e.printStackTrace(); |
|||
// } |
|||
// } while(str != null); |
|||
// inputStream.close(); |
|||
// bufferedReader.close(); |
|||
// Constants.setFlag(false); |
|||
// } catch(Exception e) { |
|||
// e.printStackTrace(); |
|||
// } |
|||
// } |
|||
// |
|||
//} |
@ -0,0 +1,65 @@ |
|||
package com.bfd.mf.entity.mysql; |
|||
|
|||
import com.bfd.crawler.utils.JsonUtils; |
|||
import com.bfd.mf.datasave.tools.DBUtil; |
|||
|
|||
import javax.xml.bind.util.JAXBSource; |
|||
import java.util.ArrayList; |
|||
import java.util.HashMap; |
|||
import java.util.List; |
|||
import java.util.Map; |
|||
/* |
|||
* 同一个专题下相同任务的采集时间范围最大的 |
|||
* */ |
|||
public class Tasklimit { |
|||
public static Map<String, List<Map<String,String>>>subjectTasktimelimiit = new HashMap<>(); |
|||
public static void loadTask(){ |
|||
subjectTasktimelimiit.clear(); |
|||
List<Map<String, Object>> Tasktimelimiit = DBUtil.getInstance("db_stat_alltask").query("SELECT MIN(crawl_start_time) crawl_start_time ,MAX(crawl_end_time) crawl_end_time ,crawl_data_flag ,subject_id ,cid ,app_id from cl_task where del=0 and (crawl_status=1) and cid!=\"\" GROUP BY crawl_data_flag,cid,subject_id,app_id;"); |
|||
System.out.println(Tasktimelimiit.size()+"Tasktimelimiit"); |
|||
if (Tasktimelimiit.size()>0){ |
|||
String newkey = ""; |
|||
for(Map<String, Object> subjectTask : Tasktimelimiit) { //{subject_id=10222, name=我是张三, task_id=188, id=71, crawl_data_flag=aaa} |
|||
String keytwo = ""; |
|||
Map<String,String> value = new HashMap<>(); |
|||
List<Map<String,String>> valueList = new ArrayList<>(); |
|||
|
|||
if (subjectTask.get("cid").equals("Tmall")) { |
|||
newkey = subjectTask.get("cid") + "#####" + subjectTask.get("crawl_data_flag"); |
|||
keytwo = "Taobao" + "#####" + subjectTask.get("crawl_data_flag"); |
|||
} else if (subjectTask.get("cid").equals("Taobao")) { |
|||
newkey = subjectTask.get("cid") + "#####" + subjectTask.get("crawl_data_flag"); |
|||
keytwo = "Tmall" + "#####" + subjectTask.get("crawl_data_flag"); |
|||
} else { |
|||
newkey = subjectTask.get("cid") + "#####" + subjectTask.get("crawl_data_flag"); |
|||
} |
|||
String max_time = ""; |
|||
String min_time = ""; |
|||
String subject_id=""; |
|||
newkey=newkey+"#####" +subjectTask.get("app_id"); |
|||
newkey= newkey.toLowerCase(); |
|||
subject_id=subjectTask.get("subject_id").toString(); |
|||
|
|||
max_time=subjectTask.get("crawl_end_time").toString(); |
|||
value.put("max_time",max_time); |
|||
min_time=subjectTask.get("crawl_start_time").toString(); |
|||
value.put("min_time",min_time); |
|||
value.put("subject_id",subject_id); |
|||
|
|||
if(subjectTasktimelimiit.containsKey(newkey)){ |
|||
valueList = subjectTasktimelimiit.get(newkey); |
|||
valueList.add(value); |
|||
}else{ |
|||
valueList.add(value); |
|||
} |
|||
if(keytwo.length()>0){ |
|||
String tmallnewkey = keytwo.toLowerCase(); |
|||
subjectTasktimelimiit.put(tmallnewkey,valueList); |
|||
} |
|||
subjectTasktimelimiit.put(newkey,valueList); |
|||
} |
|||
// System.out.println(JsonUtils.toJSONString(subjectTasktimelimiit)); |
|||
|
|||
} |
|||
} |
|||
} |
@ -0,0 +1,42 @@ |
|||
package com.bfd.mf.entity.mysql; |
|||
|
|||
import com.bfd.crawler.utils.JsonUtils; |
|||
import com.bfd.mf.datasave.tools.DBUtil; |
|||
|
|||
import java.util.ArrayList; |
|||
import java.util.HashMap; |
|||
import java.util.List; |
|||
import java.util.Map; |
|||
|
|||
/* |
|||
* 用户权限表 |
|||
* */ |
|||
public class Userlimit { |
|||
public static Map<String, Object>subjectuserlimiit = new HashMap<>(); |
|||
public static void loaduser() { |
|||
subjectuserlimiit.clear(); |
|||
List<Map<String, Object>> userlimiit = DBUtil.getInstance("db_stat").query("SELECT user_id,is_ocr,is_asr,is_trans FROM `cl_user_config`"); |
|||
if (userlimiit.size() > 0) { |
|||
for (Map<String, Object> subjectuser : userlimiit) { |
|||
int is_ocr=0; int is_trans=0; |
|||
String userid=(String) subjectuser.get("user_id"); |
|||
if (subjectuser.containsKey("is_ocr")&&null!=subjectuser.get("is_ocr")) { |
|||
is_ocr=(int) subjectuser.get("is_ocr"); |
|||
} |
|||
if (subjectuser.containsKey("is_trans")&&null!=subjectuser.get("is_trans")) { |
|||
is_trans =(int) subjectuser.get("is_trans"); |
|||
} |
|||
Map<String,Object> value = new HashMap<>(); |
|||
value.put("is_ocr",is_ocr); |
|||
value.put("is_trans",is_trans); |
|||
subjectuserlimiit.put(userid,value); |
|||
} |
|||
} |
|||
|
|||
} |
|||
|
|||
} |
|||
|
|||
|
|||
|
|||
|
@ -0,0 +1,88 @@ |
|||
package com.bfd.mf.entity.mysql; |
|||
|
|||
import com.bfd.mf.datasave.tools.DBUtil; |
|||
import crawler.open.util.RedisUtil; |
|||
import org.apache.log4j.Logger; |
|||
|
|||
import java.util.ArrayList; |
|||
import java.util.HashMap; |
|||
import java.util.List; |
|||
import java.util.Map; |
|||
|
|||
public class cl_task { |
|||
private static Logger log = Logger.getLogger(cl_task.class); |
|||
public static List<String> subtaskstatuslimit = new ArrayList<>(); |
|||
public static List<String> subtaskstatuslimit3 = new ArrayList<>(); |
|||
public static void loadTask(){ |
|||
List<Map<String, Object>> Tasktimelimiit = DBUtil.getInstance("db_stat_alltask").query("SELECT crawl_data_flag,cid FROM `cl_task` WHERE crawl_status=3 and update_time like '%2021-07-14%' GROUP BY crawl_data_flag,cid;"); |
|||
|
|||
if (Tasktimelimiit.size()>0){ |
|||
String newkey = ""; |
|||
for(Map<String, Object> subjectTask : Tasktimelimiit) { |
|||
String keytwo = ""; |
|||
Map<String,String> value = new HashMap<>(); |
|||
List<Map<String,String>> valueList = new ArrayList<>(); |
|||
|
|||
if (subjectTask.get("cid").equals("Tmall")) { |
|||
newkey = subjectTask.get("cid") + "#####" + subjectTask.get("crawl_data_flag"); |
|||
keytwo = "Taobao" + "#####" + subjectTask.get("crawl_data_flag"); |
|||
} else if (subjectTask.get("cid").equals("Taobao")) { |
|||
newkey = subjectTask.get("cid") + "#####" + subjectTask.get("crawl_data_flag"); |
|||
keytwo = "Tmall" + "#####" + subjectTask.get("crawl_data_flag"); |
|||
} else { |
|||
newkey = subjectTask.get("cid") + "#####" + subjectTask.get("crawl_data_flag"); |
|||
} |
|||
subtaskstatuslimit.add(newkey); |
|||
} |
|||
} |
|||
System.out.println(subtaskstatuslimit.size()+"Tasktimelimiit"); |
|||
|
|||
|
|||
List<Map<String, Object>> Tasktimelimiit1 = DBUtil.getInstance("db_stat_alltask").query("SELECT crawl_data_flag ,cid FROM `cl_task` WHERE crawl_status=1 and update_time like '%2021-07-14%' GROUP BY crawl_data_flag,cid;"); |
|||
if (Tasktimelimiit1.size()>0){ |
|||
String newkey = ""; |
|||
for(Map<String, Object> subjectTask1 : Tasktimelimiit1) { |
|||
String keytwo = ""; |
|||
Map<String,String> value = new HashMap<>(); |
|||
List<Map<String,String>> valueList = new ArrayList<>(); |
|||
|
|||
if (subjectTask1.get("cid").equals("Tmall")) { |
|||
newkey = subjectTask1.get("cid") + "#####" + subjectTask1.get("crawl_data_flag"); |
|||
keytwo = "Taobao" + "#####" + subjectTask1.get("crawl_data_flag"); |
|||
} else if (subjectTask1.get("cid").equals("Taobao")) { |
|||
newkey = subjectTask1.get("cid") + "#####" + subjectTask1.get("crawl_data_flag"); |
|||
keytwo = "Tmall" + "#####" + subjectTask1.get("crawl_data_flag"); |
|||
} else { |
|||
newkey = subjectTask1.get("cid") + "#####" + subjectTask1.get("crawl_data_flag"); |
|||
} |
|||
subtaskstatuslimit3.add(newkey); |
|||
|
|||
|
|||
} |
|||
} |
|||
int i=1; |
|||
for(String value:subtaskstatuslimit){ |
|||
if (!subtaskstatuslimit3.contains(value)){ |
|||
|
|||
String newkey = value.toLowerCase(); |
|||
if (RedisUtil.exists(newkey, 10)) { // 先去 redis中查询是否存在,不存直接忽略 |
|||
log.info("需要删除的任务是" + newkey); |
|||
System.out.println("需要删除的任务是" + newkey); |
|||
RedisUtil.del(newkey,10); |
|||
} else { |
|||
log.info("这个任务的状态有为1的,不需要删除" + newkey); |
|||
} |
|||
|
|||
|
|||
|
|||
}else { |
|||
// System |
|||
} |
|||
} |
|||
|
|||
System.out.println(i); |
|||
System.out.println(subtaskstatuslimit3.size()+"Tasktimelimiit"); |
|||
|
|||
|
|||
} |
|||
} |
@ -0,0 +1,13 @@ |
|||
<?xml version="1.0" encoding="UTF-8"?> |
|||
<module type="JAVA_MODULE" version="4"> |
|||
<component name="NewModuleRootManager" inherit-compiler-output="true"> |
|||
<exclude-output /> |
|||
<content url="file://$MODULE_DIR$"> |
|||
<sourceFolder url="file://$MODULE_DIR$/java" isTestSource="false" /> |
|||
</content> |
|||
<orderEntry type="inheritedJdk" /> |
|||
<orderEntry type="sourceFolder" forTests="false" /> |
|||
<orderEntry type="library" name="target" level="project" /> |
|||
<orderEntry type="library" name="jarlib" level="project" /> |
|||
</component> |
|||
</module> |
@ -1,66 +1,81 @@ |
|||
package com.bfd.mf.entity.mysql; |
|||
|
|||
|
|||
import com.bfd.crawler.utils.JsonUtils; |
|||
import com.bfd.mf.service.tools.DBUtil; |
|||
|
|||
import java.util.ArrayList; |
|||
import java.util.HashMap; |
|||
import java.util.List; |
|||
import java.util.Map; |
|||
|
|||
public class SubjectTask { |
|||
|
|||
public static Map<String, List<Map<String,String>>> subjectTaskMap = new HashMap<>(); |
|||
public static void loadSubjectTask(){ |
|||
subjectTaskMap.clear(); |
|||
List<Map<String, Object>> subjectTaskList = DBUtil.getInstance("db_stat").query("SELECT cst.id, cst.subject_id, cst.task_id, ct.cid, ct.crawl_data_flag,cs.export_to_kafka,cs.kafka_addr,cs.go_fast_addr FROM cl_subject_task cst JOIN cl_subject cs ON (cst.subject_id = cs.id) LEFT JOIN cl_task ct ON (cst.task_id = ct.id );"); |
|||
if(subjectTaskList.size() > 0){ |
|||
String key = ""; |
|||
for(Map<String, Object> subjectTask : subjectTaskList){ //{subject_id=10222, name=我是张三, task_id=188, id=71, crawl_data_flag=aaa} |
|||
key = subjectTask.get("cid") + "#####" + subjectTask.get("crawl_data_flag"); |
|||
Map<String,String> value = new HashMap<>(); |
|||
List<Map<String,String>> valueList = new ArrayList<>(); |
|||
String v_subject_id = ""; |
|||
String v_go_fast_addr = ""; |
|||
String v_export_to_kafka = ""; |
|||
String v_kafka_addr = ""; |
|||
String v_task_id = ""; |
|||
String v_external_id =""; |
|||
if(null != subjectTask.get("subject_id")) { |
|||
v_subject_id = subjectTask.get("subject_id").toString(); |
|||
} |
|||
if(null != subjectTask.get("go_fast_addr")) { |
|||
v_go_fast_addr = subjectTask.get("go_fast_addr").toString(); |
|||
} |
|||
if(null != subjectTask.get("kafka_addr")) { |
|||
v_kafka_addr = subjectTask.get("kafka_addr").toString(); |
|||
} |
|||
if(null != subjectTask.get("export_to_kafka")){ |
|||
v_export_to_kafka = subjectTask.get("export_to_kafka").toString(); |
|||
} |
|||
if(null !=subjectTask.get("task_id")){ |
|||
v_task_id = subjectTask.get("task_id").toString(); |
|||
} |
|||
if(null !=subjectTask.get("external_id")){ |
|||
v_task_id = subjectTask.get("external_id").toString(); |
|||
} |
|||
value.put("subject_id",v_subject_id); |
|||
value.put("go_fast_addr",v_go_fast_addr); |
|||
value.put("export_to_kafka",v_export_to_kafka); |
|||
value.put("kafka_addr",v_kafka_addr); |
|||
value.put("task_id",v_task_id); |
|||
value.put("external_id",v_external_id); |
|||
key = key.toLowerCase(); |
|||
if(subjectTaskMap.containsKey(key)){ |
|||
valueList = subjectTaskMap.get(key); |
|||
valueList.add(value); |
|||
}else{ |
|||
valueList.add(value); |
|||
} |
|||
subjectTaskMap.put(key,valueList); |
|||
} |
|||
System.out.println(JsonUtils.toJSONString(subjectTaskMap)); |
|||
} |
|||
} |
|||
} |
|||
//package com.bfd.mf.entity.mysql; |
|||
// |
|||
// |
|||
//import com.bfd.crawler.utils.JsonUtils; |
|||
//import com.bfd.mf.service.tools.DBUtil; |
|||
// |
|||
//import java.util.ArrayList; |
|||
//import java.util.HashMap; |
|||
//import java.util.List; |
|||
//import java.util.Map; |
|||
// |
|||
//public class SubjectTask { |
|||
// |
|||
// public static Map<String, List<Map<String,String>>> subjectTaskMap = new HashMap<>(); |
|||
// public static void loadSubjectTask(){ |
|||
// subjectTaskMap.clear(); |
|||
// List<Map<String, Object>> subjectTaskList = DBUtil.getInstance("db_stat").query("select cs.status, ct.external_id, ct.subject_id, ct.id, ct.cid, ct.crawl_data_flag,cs.kafka_switch,cs.kafka_addr,cs.go_fast_addr,cs.kafka_topic,cs.go_fast_switch from cl_subject cs Join cl_task ct on(ct.subject_id=cs.id);"); |
|||
// if(subjectTaskList.size() > 0){ |
|||
// String key = ""; |
|||
// for(Map<String, Object> subjectTask : subjectTaskList){ //{subject_id=10222, name=我是张三, task_id=188, id=71, crawl_data_flag=aaa} |
|||
// key = subjectTask.get("cid") + "#####" + subjectTask.get("crawl_data_flag"); |
|||
// Map<String,String> value = new HashMap<>(); |
|||
// List<Map<String,String>> valueList = new ArrayList<>(); |
|||
// String v_subject_id = ""; |
|||
// String v_go_fast_addr = ""; |
|||
// String kafka_switch = ""; |
|||
// String v_kafka_addr = ""; |
|||
// String v_task_id = ""; |
|||
// String v_external_id =""; |
|||
// String v_go_fast_switch=""; |
|||
// String v_kafka_topic=""; |
|||
// String v_status=""; |
|||
// if(null != subjectTask.get("subject_id")) { |
|||
// v_subject_id = subjectTask.get("subject_id").toString(); |
|||
// } |
|||
// if(null != subjectTask.get("go_fast_addr")) { |
|||
// v_go_fast_addr = subjectTask.get("go_fast_addr").toString(); |
|||
// } |
|||
// if(null != subjectTask.get("kafka_addr")) { |
|||
// v_kafka_addr = subjectTask.get("kafka_addr").toString(); |
|||
// } |
|||
// if(null != subjectTask.get("kafka_switch")){ |
|||
// kafka_switch = subjectTask.get("kafka_switch").toString(); |
|||
// } |
|||
// if(null !=subjectTask.get("id")){ |
|||
// v_task_id = subjectTask.get("id").toString(); |
|||
// } |
|||
// if(null !=subjectTask.get("external_id")){ |
|||
// v_external_id = subjectTask.get("external_id").toString(); |
|||
// } |
|||
// if(null !=subjectTask.get("go_fast_switch")){ |
|||
// v_go_fast_switch = subjectTask.get("go_fast_switch").toString(); |
|||
// } |
|||
// if(null !=subjectTask.get("kafka_topic")){ |
|||
// v_kafka_topic = subjectTask.get("kafka_topic").toString(); |
|||
// } |
|||
// if(null !=subjectTask.get("status")){ |
|||
// v_status = subjectTask.get("status").toString(); |
|||
// } |
|||
// value.put("subject_id",v_subject_id); |
|||
// value.put("go_fast_addr",v_go_fast_addr); |
|||
// value.put("export_to_kafka",kafka_switch); |
|||
// value.put("kafka_addr",v_kafka_addr); |
|||
// value.put("task_id",v_task_id); |
|||
// value.put("external_id",v_external_id); |
|||
// value.put("go_fast_switch",v_go_fast_switch); |
|||
// value.put("kafka_topic",v_kafka_topic); |
|||
// value.put("status",v_status);//专题的状态 |
|||
// key = key.toLowerCase(); |
|||
// if(subjectTaskMap.containsKey(key)){ |
|||
// valueList = subjectTaskMap.get(key); |
|||
// valueList.add(value); |
|||
// }else{ |
|||
// valueList.add(value); |
|||
// } |
|||
// subjectTaskMap.put(key,valueList); |
|||
// } |
|||
// // System.out.println(JsonUtils.toJSONString(subjectTaskMap)); |
|||
// } |
|||
// } |
|||
//} |
1036
dataSaveManager/dataSaveManager.iml
File diff suppressed because it is too large
View File
File diff suppressed because it is too large
View File
1034
serviceManager/serviceManager.iml
File diff suppressed because it is too large
View File
File diff suppressed because it is too large
View File
Write
Preview
Loading…
Cancel
Save
Reference in new issue