@ -3,24 +3,24 @@ package com.bfd.mf.datasave.listen;
import com.alibaba.fastjson.JSONArray ;
import com.alibaba.fastjson.JSONArray ;
import com.bfd.crawler.elasti.ElastiProducer ;
import com.bfd.crawler.elasti.ElastiProducer ;
import com.bfd.crawler.kafka7.KfkProducer ;
import com.bfd.crawler.kafka7.KfkProducer ;
import com.bfd.crawler.kafka7.utils.PropertiesParser ;
import com.bfd.crawler.utils.JsonUtils ;
import com.bfd.crawler.utils.JsonUtils ;
import com.bfd.mf.datasave.download.DownLoadFile ;
import com.bfd.mf.datasave.download.DownLoadFile ;
import com.bfd.mf.datasave.download.NewsDownload ;
import com.bfd.mf.datasave.tools.DataCheckUtil ;
import com.bfd.mf.datasave.tools.DateUtil ;
import com.bfd.mf.datasave.tools.DateUtil ;
import com.bfd.mf.datasave.tools.ReadLine ;
import com.bfd.mf.datasave.tools.ReadLine ;
import com.bfd.mf.datasave.tools.WriteMethod ;
import com.bfd.mf.datasave.tools.WriteMethod ;
import com.bfd.mf.entity.AllKeys ;
import com.bfd.mf.entity.AllKeys ;
import com.bfd.mf.entity.FieldNormaliz ;
import com.bfd.mf.entity.FieldNormaliz ;
import com.bfd.mf.entity.mysql.SubjectTask ;
import crawler.open.util.RedisUtil ;
import org.apache.commons.lang3.StringUtils ;
import org.apache.commons.lang3.StringUtils ;
import org.apache.log4j.Logger ;
import org.apache.log4j.Logger ;
import scala.collection.generic.BitOperations ;
import org.omg.Messaging.SYNC_WITH_TRANSPORT ;
/ / import org.apache.logging.log4j.core.parser.ParseException ;
/ / import org.apache.logging.log4j.core.parser.ParseException ;
import java.io.BufferedReader ;
import java.io.File ;
import java.io.File ;
import java.io.FileReader ;
import java.io.IOException ;
import java.text.DateFormat ;
import java.text.DateFormat ;
import java.text.ParseException ;
import java.text.ParseException ;
import java.text.SimpleDateFormat ;
import java.text.SimpleDateFormat ;
@ -28,6 +28,8 @@ import java.util.*;
import java.util.regex.Matcher ;
import java.util.regex.Matcher ;
import java.util.regex.Pattern ;
import java.util.regex.Pattern ;
import static com.bfd.crawler.utils.DataUtil.calcMD5 ;
public class DataSaveManager implements Runnable {
public class DataSaveManager implements Runnable {
private static Logger log = Logger . getLogger ( DataSaveManager . class ) ;
private static Logger log = Logger . getLogger ( DataSaveManager . class ) ;
@ -36,12 +38,14 @@ public class DataSaveManager implements Runnable{
private static Map < String , List < Map < String , String > > > subject ;
private static Map < String , List < Map < String , String > > > subject ;
private static Map < Integer , Map < String , String > > tableInfoMap ;
private static Map < Integer , Map < String , String > > tableInfoMap ;
private static String preIndex = "cl_index_" ;
private static String preIndex = "cl_index_" ;
private static String preSubject = "cl_subject_" ;
/ / private static String preSubject = "cl_major_" ;
private static String preSubject = "cl_major_" ;
private static int subjectEsNum = 1 ;
private static int subjectEsNum = 1 ;
private static int indexEsNum = 2 ;
private static int indexEsNum = 2 ;
private static String indexType = "docs" ;
private static String indexType = "docs" ;
private static int bussinessType = 1 ;
private static int bussinessType = 1 ;
private static String kafkaTopic = "dataFromES_" ;
/ / private static String kafkaTopic = "dataFromES_" ;
private static String kafkaTopic = "databasestokafka" ;
private static String myGoFastAddr = "http://172.18.1.113:8080/upload" ;
private static String myGoFastAddr = "http://172.18.1.113:8080/upload" ;
private static Map < String , Object > resultMap = AllKeys . getMap ( ) ;
private static Map < String , Object > resultMap = AllKeys . getMap ( ) ;
private static String filePath = "../datasaveputkafka_file/" ; / /
private static String filePath = "../datasaveputkafka_file/" ; / /
@ -56,218 +60,667 @@ public class DataSaveManager implements Runnable{
Map < Integer , Map < String , String > > tableInfoMap ) {
Map < Integer , Map < String , String > > tableInfoMap ) {
this . data = data ;
this . data = data ;
this . fieldNormaliz = fieldNormaliz ;
this . fieldNormaliz = fieldNormaliz ;
this . subject = subject ;
/ / this . subject = subject ;
this . tableInfoMap = tableInfoMap ;
this . tableInfoMap = tableInfoMap ;
}
}
public void excData ( ) {
public void excData ( ) {
try {
try {
Map < String , Object > timetMap = new HashMap < > ( ) ;
int kafkaNum = fieldNormaliz . getKafkaSerName ( ) ;
int kafkaNum = fieldNormaliz . getKafkaSerName ( ) ;
Map < String , Object > jsonData = JsonUtils . parseObject ( data ) ;
Map < String , Object > jsonData = JsonUtils . parseObject ( data ) ;
if ( jsonData . containsKey ( "processtime" ) ) {
timetMap = JsonUtils . parseObject ( ( String ) jsonData . get ( "processtime" ) ) ;
}
timetMap . put ( "dsbeginreadtime" , System . currentTimeMillis ( ) ) ;
Map < String , String > tableInfo = tableInfoMap . get ( bussinessType ) ;
Map < String , String > tableInfo = tableInfoMap . get ( bussinessType ) ;
String res = convertData ( jsonData , tableInfo ) ;
String res = convertData ( jsonData , tableInfo ) ;
Map < String , Object > resultMap = getResponse ( res ) ; / / resultMap 就是将要写入到 ES 和 kafka 的一条数据
Map < String , Object > resultMap = getResponse ( res ) ; / / resultMap 就是将要写入到 ES 和 kafka 的一条数据
System . out . println ( "The Message : " + JsonUtils . toJSONString ( resultMap ) ) ;
/ / System . out . println ( "The Message subject: " + JsonUtils . toJSONString ( subject ) ) ;
resultMap . remove ( "processtime" ) ;
/ / Map < String , Object > resultindexMap = new HashMap < String , Object > ( resultMap ) ;
/ / System . out . println ( "The Message : " + JsonUtils . toJSONString ( resultMap ) ) ;
/ / 1 、 先判断是主贴还是评论 主贴写日期索引 , 回帖评论写 渠道索引
/ / 1 、 先判断是主贴还是评论 主贴写日期索引 , 回帖评论写 渠道索引
String dateIndexName = getIndexName ( resultMap ) ;
String dateIndexName = getIndexName ( resultMap ) ;
System . out . println ( dateIndexName ) ;
int index = Integer . parseInt ( dateIndexName . split ( "cl_index_" ) [ 1 ] . split ( "-" ) [ 0 ] ) ;
int index = Integer . parseInt ( dateIndexName . split ( "cl_index_" ) [ 1 ] . split ( "-" ) [ 0 ] ) ;
if ( resultMap . containsKey ( "primary" ) & & resultMap . get ( "primary" ) . toString ( ) . equals ( "0" ) ) {
String pubTime = resultMap . get ( "pubTime" ) . toString ( ) ;
if ( resultMap . containsKey ( "primary" ) & & resultMap . get ( "primary" ) . toString ( ) . equals ( "0" ) & & ! resultMap . get ( "docType" ) . toString ( ) . equals ( "item" ) ) {
dateIndexName = preIndex + resultMap . get ( "docType" ) . toString ( ) ;
dateIndexName = preIndex + resultMap . get ( "docType" ) . toString ( ) ;
} else if ( resultMap . containsKey ( "primaryPost" ) & & resultMap . get ( "primaryPost" ) . toString ( ) . equals ( "5" ) ) {
dateIndexName = "cl_index_item" ;
}
else if ( resultMap . containsKey ( "primary" ) & & resultMap . get ( "primary" ) . toString ( ) . equals ( "2" ) ) {
dateIndexName = "cl_index_user" ;
}
}
System . out . println ( "切割后的索引名字" + index ) ;
/ / writerToKafka ( 2 , "dataFromES_10000tw" , resultMap ) ;
/ / writerToSubjectES ( "cl_subject_10429" , resultMap ) ;
if ( index > 2015 ) {
else if ( index > = 2000 & & index < 2020 ) {
dateIndexName = preIndex + index ;
}
else if ( index < 2000 ) {
dateIndexName = "cl_index_1990" ;
}
try {
resultMap . remove ( "primaryPost" ) ;
writerToIndexES ( dateIndexName , resultMap ) ;
writerToIndexES ( dateIndexName , resultMap ) ;
} catch ( Exception e ) {
log . error ( "数据写入日期es有问题,data=" + JsonUtils . toJSONString ( resultMap ) ) ;
e . printStackTrace ( ) ;
}
try {
/ / 新闻的主贴数据存es 供列表页扩散出的详情页来用
if ( resultMap . containsKey ( "pageType" ) & & "newscontent" . equals ( resultMap . get ( "pageType" ) . toString ( ) ) ) {
newscontnetwriterToredis ( resultMap , dateIndexName ) ;
}
}
/ / System . out . println ( "-----------------------继续后面的步骤哇--------------------: " + JsonUtils . toJSONString ( resultMap ) ) ;
/ / 2 、 判断数据中是否要下载标识 , 如果有需要先下载对应的文件 , 然后替换存储路径后再保存数据
if ( resultMap . containsKey ( "crawlDataFlag" ) & & resultMap . containsKey ( "isDownload" ) ) { / / resultMap . containsKey ( "isDownload" )
} catch ( Exception e ) {
e . printStackTrace ( ) ;
}
/ / try {
/ / writerToKafka ( 5 , "taskSign" , resultMap ) ;
/ / } catch ( Exception e ) {
/ / e . printStackTrace ( ) ;
/ / }
timetMap . put ( "dsendreadtime" , System . currentTimeMillis ( ) ) ;
timetMap . put ( "dbeginsentes" , System . currentTimeMillis ( ) ) ;
/ / 处理新闻的主贴
/ / 对于非新闻的以及非上传的数据
if ( resultMap . containsKey ( "crawlDataFlag" ) & & ! resultMap . containsKey ( "subjectId" ) ) {
/ / resultMap . containsKey ( "isDownload" )
String key = getAllMapKey ( resultMap ) ;
String key = getAllMapKey ( resultMap ) ;
/ / String getUrl = ( String ) resultMap . get ( "filePath" ) ;
List < String > filePathlist = new ArrayList < > ( ) ;
List < String > imagePathlist = new ArrayList < > ( ) ;
List < String > videoPathlist = new ArrayList < > ( ) ;
List < String > filePath = ( List < String > ) resultMap . get ( "filePath" ) ;
List < String > filePath = ( List < String > ) resultMap . get ( "filePath" ) ;
List < String > imagePath = ( List < String > ) resultMap . get ( "imagePath" ) ;
List < String > imagePath = ( List < String > ) resultMap . get ( "imagePath" ) ;
List < String > videoPath = ( List < String > ) resultMap . get ( "videoPath" ) ;
List < String > videoPath = ( List < String > ) resultMap . get ( "videoPath" ) ;
String avatarPath = resultMap . get ( "avatarPath" ) . toString ( ) ;
/ / 从 subject 中可以获取到这个key 对应的 专题信息
/ / 从 subject 中可以获取到这个key 对应的 专题信息
System . out . println ( "key == " + key + " **** " + JsonUtils . toJSONString ( subject ) ) ;
if ( subject . containsKey ( key ) ) {
List < Map < String , String > > subjectList = subject . get ( key ) ;
if ( disposeCrawldataflag ( key ) ) {
String getsubjectList = RedisUtil . get ( key , 10 ) ;
List < Map < String , String > > subjectList = ( List < Map < String , String > > ) JsonUtils . parseArray ( getsubjectList ) ;
for ( Map < String , String > subjectMap : subjectList ) {
for ( Map < String , String > subjectMap : subjectList ) {
String go_fast_addr = subjectMap . get ( "go_fast_addr" ) ;
List < Map < String , String > > imagePathSizevalue = new ArrayList < > ( ) ;
List < Map < String , String > > videoPathSizevalueList = new ArrayList < > ( ) ;
List < Map < String , String > > filePathSizevalueList = new ArrayList < > ( ) ;
List < String > ocrText = ( List < String > ) resultMap . get ( "ocrText" ) ;
System . out . println ( key + "=====" ) ;
long maxtime = Long . parseLong ( subjectMap . get ( "maxtime" ) ) ;
long mintme = Long . parseLong ( subjectMap . get ( "mintime" ) ) ;
long pubTimecomape = Long . parseLong ( pubTime ) ;
String subject_id = subjectMap . get ( "subject_id" ) ;
String subject_id = subjectMap . get ( "subject_id" ) ;
String isDownload = ( String ) resultMap . get ( "isDownload" ) ;
String isDownload = ( String ) resultMap . get ( "isDownload" ) ;
if ( isDownload . equals ( "true" ) ) {
String putUrl = myGoFastAddr ;
if ( ! go_fast_addr . isEmpty ( ) ) {
putUrl = go_fast_addr ;
String appid = subjectMap . get ( "appid" ) ;
String crawl_content_key = subjectMap . get ( "crawl_content_key" ) ;
String primary = resultMap . get ( "primary" ) . toString ( ) ;
String docType = ( String ) resultMap . get ( "docType" ) ;
String pageType = ( String ) resultMap . get ( "pageType" ) ;
String asrText = ( String ) resultMap . get ( "asrText" ) ;
String hasTrans = resultMap . get ( "hasTrans" ) . toString ( ) ;
/ / String ocrText = ( String ) resultMap . get ( "ocrText" ) ;
if ( ( pubTimecomape - maxtime < = 0 & & pubTimecomape - mintme > = 0 ) | | "eccontent" . equals ( pageType ) | | "2" . equals ( primary ) | | "socailFollow" . equals ( pageType ) ) {
if ( "eccontent" . equals ( pageType ) ) {
long pubtime = maxtime - 1000 * 60 * 30 ;
/ / System . out . println ( pubtime + "=======" ) ;
resultMap . put ( "pubDate" , DataCheckUtil . getDate ( pubtime ) ) ;
resultMap . put ( "pubDay" , DataCheckUtil . getDay ( pubtime ) ) ;
resultMap . put ( "pubTime" , pubtime ) ;
resultMap . put ( "pubTimeStr" , DataCheckUtil . getCurrentTime ( pubtime ) ) ;
}
resultMap . remove ( "primaryPost" ) ;
if ( "1" . equals ( primary ) ) {
writerToredis ( resultMap , subject_id ) ;
}
if ( isDownload . equals ( "true" ) & & "" . equals ( asrText ) & & ocrText . size ( ) = = 0 & & "0" . equals ( hasTrans ) & & ! "newscontent" . equals ( pageType ) ) {
timetMap . put ( "begindowloadtime" , System . currentTimeMillis ( ) ) ;
String putUrl = myGoFastAddr ; / / 全部默认到113上的gofast
if ( ! avatarPath . equals ( "" ) ) {
String resulturl = null ;
try {
Map < String , Object > resultmap = DownLoadFile . downloadAndSaveFile ( avatarPath , putUrl ) ;
resulturl = ( String ) resultmap . get ( "realUrl" ) ;
/ / if ( resulturl . contains ( "172.18.1.113" ) ) {
/ / resulturl = resulturl . replace ( "172.18.1.113:8080" , "crawl-files.pontoaplus.com" ) ;
/ / }
} catch ( Exception e ) {
e . printStackTrace ( ) ;
}
if ( resulturl ! = null & & resulturl . length ( ) ! = 0 ) {
resultMap . put ( "avatarPath" , resulturl ) ;
}
else {
resultMap . put ( "avatarPath" , avatarPath ) ;
}
}
}
if ( filePath . size ( ) > 0 ) {
if ( filePath . size ( ) > 0 ) {
/ / List < Map < String , String > > filePathSizevalueList = new ArrayList < > ( ) ;
List < String > filePathlist = new ArrayList < > ( ) ;
Iterator < String > it = filePath . iterator ( ) ;
Iterator < String > it = filePath . iterator ( ) ;
List < Map < String , String > > valueList = new ArrayList < > ( ) ;
Map < String , String > filemap = new HashMap < > ( ) ;
Map < String , String > rerversemap = new HashMap < > ( ) ;
while ( it . hasNext ( ) ) {
while ( it . hasNext ( ) ) {
Map < String , String > filemap = new HashMap < > ( ) ;
String geturl = it . next ( ) ;
String geturl = it . next ( ) ;
Map < String , Object > resultmap = DownLoadFile . downloadAndSaveFile ( geturl , putUrl ) ;
String resulturl = ( String ) resultmap . get ( "realUrl" ) ;
String size = resultmap . get ( "size" ) . toString ( ) + "KB" ;
Map < String , Object > resultmap = null ;
String resulturl = null ; String size = null ;
try {
resultmap = DownLoadFile . downloadAndSaveFile ( geturl , putUrl ) ;
resulturl = ( String ) resultmap . get ( "realUrl" ) ;
resulturl = resulturl . replace ( "http://172.18.1.113:8080" , "" ) ;
size = resultmap . get ( "size" ) . toString ( ) + "KB" ;
} catch ( Exception e ) {
e . printStackTrace ( ) ;
}
if ( resulturl ! = null & & resulturl . length ( ) ! = 0 ) {
if ( resulturl ! = null & & resulturl . length ( ) ! = 0 ) {
filemap . put ( resulturl , size ) ;
filemap . put ( "size" , size ) ;
filemap . put ( "videoTime" , "" ) ;
filemap . put ( "url" , resulturl ) ;
filemap . put ( "resolution" , "" ) ;
filePathlist . add ( resulturl ) ;
filePathlist . add ( resulturl ) ;
filePathSizevalueList . add ( filemap ) ;
rerversemap . put ( geturl , resulturl ) ; / / 原始的gofast 以及对应的gofast
} else {
} else {
System . out . print ( "很遗憾,怎么有下载失败了" ) ;
filePath . add ( geturl ) ;
filemap . put ( geturl , size ) ;
filePathlist . add ( geturl ) ;
rerversemap . put ( geturl , resulturl ) ; / / 原始的gofast 以及对应的gofast
}
}
}
}
/ / valueList . add ( filemap ) ;
/ / if ( videoPathlist . size ( ) > 0 ) {
/ / resultMap . put ( "ugc" , 1 ) ;
/ / }
/ / else {
/ / resultMap . put ( "ugc" , 0 ) ;
/ / }
resultMap . put ( "filePathSize" , JsonUtils . toJSONString ( filemap ) ) ;
resultMap . put ( "filePathSize" , JsonUtils . toJSONString ( filePathSizevalueList ) ) ;
resultMap . put ( "filePath" , filePathlist ) ;
resultMap . put ( "filePath" , filePathlist ) ;
if ( filePathSizevalueList . size ( ) > 0 ) {
resultMap . put ( "ugc" , 1 ) ;
Map < String , Object > forwardUrl = gofastswitch ( rerversemap , resultMap ) ;
String reforwardUrl = ( String ) forwardUrl . get ( "srcfilePath" ) ;
if ( reforwardUrl ! = null & & reforwardUrl . length ( ) > 0 ) {
resultMap . put ( "srcfilePath" , reforwardUrl ) ;
}
}
else {
resultMap . put ( "ugc" , 0 ) ;
}
}
}
if ( imagePath . size ( ) > 0 ) {
if ( imagePath . size ( ) > 0 ) {
List < Map < String , String > > valueList = new ArrayList < > ( ) ;
Map < String , String > imagemap = new HashMap < > ( ) ;
/ / List < Map < String , String > > imagePathSize value = new ArrayList < > ( ) ; / / 初始化图片
List < String > imagePathlist = new ArrayList < > ( ) ;
Iterator < String > it = imagePath . iterator ( ) ;
Iterator < String > it = imagePath . iterator ( ) ;
Map < String , String > rerversemap = new HashMap < > ( ) ;
while ( it . hasNext ( ) ) {
while ( it . hasNext ( ) ) {
Map < String , String > imagemap = new HashMap < > ( ) ;
String geturl = it . next ( ) ;
String geturl = it . next ( ) ;
Map < String , Object > resultmap = DownLoadFile . downloadAndSaveFile ( geturl , putUrl ) ;
String resulturl = ( String ) resultmap . get ( "realUrl" ) ;
String size = resultmap . get ( "size" ) . toString ( ) + "KB" ;
Map < String , Object > resultmap = null ;
String resolution = null ; String resulturl = null ; String size = "" ;
try {
try {
resultmap = DownLoadFile . downloadAndSaveFile ( geturl , putUrl ) ;
resolution = DownLoadFile . imagesize ( geturl ) ;
resulturl = ( String ) resultmap . get ( "realUrl" ) ;
resulturl = resulturl . replace ( "http://172.18.1.113:8080" , "" ) ;
size = resultmap . get ( "size" ) . toString ( ) + "KB" ;
} catch ( Exception e ) {
/ / System . out . print ( resulturl ) ;
e . printStackTrace ( ) ;
}
} catch ( Exception e ) {
e . printStackTrace ( ) ;
}
if ( resulturl ! = null & & resulturl . length ( ) ! = 0 ) {
if ( resulturl ! = null & & resulturl . length ( ) ! = 0 ) {
imagemap . put ( resulturl , size ) ; / / url + size
imagemap . put ( "size" , size ) ;
imagemap . put ( "videoTime" , "" ) ;
imagemap . put ( "url" , resulturl ) ;
imagemap . put ( "resolution" , resolution ) ;
imagePathlist . add ( resulturl ) ; / / url
imagePathlist . add ( resulturl ) ; / / url
imagePathSizevalue . add ( imagemap ) ;
rerversemap . put ( geturl , resulturl ) ; / / 原始的gofast 以及对应的gofast
} else {
} else {
System . out . print ( "很遗憾,怎么有下载失败了" ) ;
imagePath . add ( geturl ) ;
imagemap . put ( geturl , size ) ;
imagePathlist . add ( geturl ) ;
System . out . print ( resulturl + "=======" ) ;
rervers emap. put ( geturl , resulturl ) ; / / 原始的gofast 以及对应的gofast
}
}
}
}
/ / valueList . add ( imagemap ) ;
/ / if ( imagePath . size ( ) > 0 ) {
/ / resultMap . put ( "pgc" , 1 ) ;
/ / }
/ / else {
/ / resultMap . put ( "pgc" , 0 ) ;
/ / }
resultMap . put ( "imagePathSize" , JsonUtils . toJSONString ( imagemap ) ) ;
resultMap . put ( "imagePathSize" , JsonUtils . toJSONString ( imagePathSizevalue ) ) ;
resultMap . put ( "imagePath" , imagePathlist ) ;
resultMap . put ( "imagePath" , imagePathlist ) ;
if ( imagePathSizevalue . size ( ) > 0 ) {
resultMap . put ( "pgc" , 1 ) ;
Map < String , Object > repicturl = gofastswitch ( rerversemap , resultMap ) ;
String picturl = ( String ) repicturl . get ( "srcimagePath" ) ;
if ( picturl ! = null & & picturl . length ( ) > 0 ) {
resultMap . put ( "srcimagePath" , picturl ) ;
}
}
else {
resultMap . put ( "pgc" , 0 ) ;
}
}
}
if ( videoPath . size ( ) > 0 ) {
if ( videoPath . size ( ) > 0 ) {
List < Map < String , String > > valueList = new ArrayList < > ( ) ;
Map < String , String > videomap = new HashMap < > ( ) ;
/ / List < Map < String , String > > videoPathSizevalueList = new ArrayList < > ( ) ;
String videoTime = resultMap . get ( "videoTime" ) . toString ( ) ;
List < String > videoPathlist = new ArrayList < > ( ) ;
Map < String , String > rerversemap = new HashMap < > ( ) ;
Iterator < String > it = videoPath . iterator ( ) ;
Iterator < String > it = videoPath . iterator ( ) ;
while ( it . hasNext ( ) ) {
while ( it . hasNext ( ) ) {
Map < String , String > videomap = new HashMap < > ( ) ;
String geturl = it . next ( ) ;
String geturl = it . next ( ) ;
System . out . println ( putUrl + "putUrl是哪个啊" ) ;
Map < String , Object > resultmap = DownLoadFile . downloadAndSaveFile ( geturl , putUrl ) ;
String resulturl = ( String ) resultmap . get ( "realUrl" ) ;
String size = resultmap . get ( "size" ) . toString ( ) + "KB" ;
System . out . println ( "视频地址啊" + resulturl ) ;
Map < String , Object > resultmap = null ;
String resulturl = null ; String size = "" ;
try {
resultmap = DownLoadFile . downloadAndSaveFile ( geturl , putUrl ) ;
resulturl = ( String ) resultmap . get ( "realUrl" ) ;
resulturl = resulturl . replace ( "http://172.18.1.113:8080" , "" ) ;
size = resultmap . get ( "size" ) . toString ( ) + "KB" ;
} catch ( Exception e ) {
e . printStackTrace ( ) ;
}
if ( resulturl ! = null & & resulturl . length ( ) ! = 0 ) { / / 判断 是否下载成功
if ( resulturl ! = null & & resulturl . length ( ) ! = 0 ) { / / 判断 是否下载成功
videomap . put ( geturl , size ) ;
videoPathlist . add ( resulturl ) ; } else {
videomap . put ( "size" , size ) ;
videomap . put ( "videoTime" , videoTime ) ;
videomap . put ( "url" , resulturl ) ;
videomap . put ( "resolution" , "" ) ;
videoPathlist . add ( resulturl ) ;
videoPathSizevalueList . add ( videomap ) ;
rerversemap . put ( geturl , resulturl ) ; / / 原始的gofast 以及对应的gofast
} else {
videoPathlist . add ( geturl ) ;
videoPathlist . add ( geturl ) ;
videomap . put ( geturl , size ) ;
rerverse map. put ( geturl , resulturl ) ; / / 原始的gofast 以及对应的gofast
}
}
}
}
/ / valueList . add ( videomap ) ;
if ( videoPathlist . size ( ) > 0 ) {
if ( videoPathSizevalueList . size ( ) > 0 ) {
resultMap . put ( "egc" , 1 ) ;
resultMap . put ( "egc" , 1 ) ;
Map < String , Object > revideoUrl = null ;
try {
revideoUrl = gofastswitch ( rerversemap , resultMap ) ;
} catch ( Exception e ) {
e . printStackTrace ( ) ;
}
String videoUrl = ( String ) revideoUrl . get ( "srcvideoPath" ) ;
if ( videoUrl ! = null & & videoUrl . length ( ) > 0 ) {
resultMap . put ( "srcvideoPath" , videoUrl ) ;
}
}
}
else {
else {
resultMap . put ( "egc" , 0 ) ;
resultMap . put ( "egc" , 0 ) ;
}
}
resultMap . put ( "videoPathSize" , JsonUtils . toJSONString ( videomap ) ) ;
resultMap . put ( "videoPathSize" , JsonUtils . toJSONString ( videoPathSizevalueList ) ) ;
resultMap . put ( "videoPath" , videoPathlist ) ;
resultMap . put ( "videoPath" , videoPathlist ) ;
}
}
timetMap . put ( "enddowloadtime" , System . currentTimeMillis ( ) ) ;
} else {
/ / 新闻主贴的处理逻辑newscontent ,
/ / downloadPic , downloadFile , downloadVideo , 若有一个则需要进行isdown为true
/ / videoPath = = egc
/ / filePath = = ugc
/ / imagePath = = pgc
if ( crawl_content_key . contains ( "downloadPic" ) & & imagePath . size ( ) > 0 ) {
NewsDownload . downloadAndSaveimage ( resultMap , imagePathSizevalue ) ;
}
if ( crawl_content_key . contains ( "downloadFile" ) & & filePath . size ( ) > 0 ) {
NewsDownload . downloadAndSaveFile ( resultMap , filePathSizevalueList ) ;
}
if ( crawl_content_key . contains ( "downloadVideo" ) & & videoPath . size ( ) > 0 ) {
NewsDownload . downloadAndSavevideo ( resultMap , videoPathSizevalueList ) ;
}
}
}
if ( filePathSizevalueList . size ( ) = = 0 & & imagePathSizevalue . size ( ) = = 0 & & videoPathSizevalueList . size ( ) = = 0 ) {
resultMap . put ( "isDownload" , "false" ) ;
}
if ( ocrText . size ( ) > 0 ) {
resultMap . put ( "hasOCR" , 1 ) ;
resultMap . put ( "ocrLength" , ocrText . size ( ) ) ;
}
if ( ! "" . equals ( asrText ) ) {
resultMap . put ( "hasASR" , 1 ) ;
resultMap . put ( "asrLength" , asrText . length ( ) ) ;
}
timetMap . put ( "dbeginsentes" , System . currentTimeMillis ( ) ) ;
String task_id = subjectMap . get ( "task_id" ) ;
String task_id = subjectMap . get ( "task_id" ) ;
String external_id = subjectMap . get ( "external_id" ) ;
String external_id = subjectMap . get ( "external_id" ) ;
resultMap . put ( "taskId" , task_id ) ;
resultMap . put ( "taskId" , task_id ) ;
resultMap . put ( "externalId" , external_id ) ;
resultMap . put ( "externalId" , external_id ) ;
String indexName = preSubject + subject_id ;
String indexName = preSubject + subject_id ;
if ( ! "134ic" . equals ( appid ) ) {
indexName = preSubject + appid + "_" + subject_id ;
}
try {
if ( subjectMap . get ( "del" ) . equals ( "0" ) ) { / / 判断专题是否删除和专题是否在使用中
/ / 数据写入到对应的专题索引中
/ / 数据写入到对应的专题索引中
if ( "1" . equals ( primary ) ) {
writerToSubjectES ( indexName , resultMap ) ;
} else if ( "0" . equals ( primary ) & & ! "socailFollow" . equals ( pageType ) ) {
boolean ishave = disposeComment ( resultMap , subject_id ) ;
if ( ishave ) {
writerToSubjectES ( indexName , resultMap ) ;
writerToSubjectES ( indexName , resultMap ) ;
}
} else {
writerToSubjectES ( indexName , resultMap ) ;
}
}
if ( subjectMap . get ( "is_trans" ) . equals ( "1" ) & & "0" . equals ( hasTrans ) ) { / / 判断是否需要翻译
writerToKafka ( 5 , "trans_topic" , resultMap ) ;
} if ( subjectMap . get ( "is_ocr" ) . equals ( "1" ) & & "" . equals ( asrText ) & & ocrText . size ( ) = = 0 ) {
List < String > revideoPath = ( List < String > ) resultMap . get ( "videoPath" ) ;
List < String > revideoPathlist = new ArrayList < > ( ) ;
if ( revideoPath . size ( ) > 0 ) {
Iterator < String > it = revideoPath . iterator ( ) ;
while ( it . hasNext ( ) ) {
String url = it . next ( ) ;
if ( url . contains ( "http" ) ) {
revideoPathlist . add ( url ) ;
} else {
url = "http://172.18.1.113:8892" + url ;
revideoPathlist . add ( url ) ;
}
}
resultMap . put ( "videoPath" , revideoPathlist ) ;
writerToKafka ( 5 , "xhs1223" , resultMap ) ;
}
String kafka_addr = subjectMap . get ( "kafka_addr" ) ;
if ( ! kafka_addr . isEmpty ( ) ) {
/ / 数据写入到指定的kafka 中
kafkaTopic = kafkaTopic + "_" + subject_id ;
/ / kafkaNum 指的是etc 中 kafka 配置文件的编号
int num = checkPathExists ( kafka_addr ) ;
if ( num > 0 ) {
System . out . print ( num ) ;
writerToKafka ( num , "dataFromES_10000" , resultMap ) ; }
}
}
} catch ( Exception e ) {
e . printStackTrace ( ) ;
}
}
try {
writerToKafka ( 5 , kafkaTopic , resultMap ) ;
} catch ( Exception e ) {
e . printStackTrace ( ) ;
}
}
} else {
System . out . println ( " 这条数据都没有标识位,就不往专题的索引存储了呗!!!!" + resultMap . get ( "dataId" ) ) ;
} else {
log . info ( "数据不在时间范围内 craldataflag = " + key + " ; data = " + JsonUtils . toJSONString ( resultMap ) ) ;
}
}
} else if ( resultMap . containsKey ( "crawlDataFlag" ) & & ! "" . equals ( resultMap . get ( "crawlDataFlag" ) . toString ( ) ) ) {
/ / if ( resultMap . containsKey ( "Secondarypush" ) ) { / / 第二次推送了
/ / / / resultMap . get ( "Secondarypush" ) . toString ( ) ;
/ / / / System . out . println ( "Secondarypush+++++++++++++++++++++++++++++++++" ) ;
/ / WriteMethod . writeMethod ( "mysqlnocrawldataflag.txt" , JsonUtils . toJSONString ( resultMap ) ) ;
/ / } else {
resultMap . put ( "Secondarypush" , "1" ) ;
WriteMethod . writeMethod ( "mysqlnocrawldataflag.txt" , JsonUtils . toJSONString ( resultMap ) ) ;
try {
writerToKafka ( 2 , "newsSecondarypush_newfilter1" , resultMap ) ;
} catch ( Exception e ) {
e . printStackTrace ( ) ;
}
/ / }
}
} else if ( resultMap . containsKey ( "crawlDataFlag" ) & & resultMap . containsKey ( "subjectId" ) ) {
String indexName = preSubject + resultMap . get ( "subjectId" ) ;
writerToSubjectES ( indexName , resultMap ) ;
}
}
else {
System . out . println ( " 这条数据都没有标识位,就不往专题的索引存储了呗!!!!" + resultMap . get ( "dataId" ) ) ;
}
timetMap . put ( "dendsentes" , System . currentTimeMillis ( ) ) ;
resultMap . put ( "processtime" , timetMap ) ;
try {
writerToKafka ( 5 , "timelimit" , resultMap ) ;
/ / WriteMethod . writeMethod ( "20210421.txt" , JsonUtils . toJSONString ( resultMap ) ) ;
} catch ( Exception e ) {
e . printStackTrace ( ) ;
}
try {
resultMap . remove ( "processtime" ) ;
writerToIndexES ( dateIndexName , resultMap ) ;
} catch ( Exception e ) {
log . error ( "数据第二次写入日期es有问题,data=" + JsonUtils . toJSONString ( resultMap ) ) ;
e . printStackTrace ( ) ;
}
} catch ( Exception e ) {
} catch ( Exception e ) {
e . printStackTrace ( ) ;
e . printStackTrace ( ) ;
/ / System . out . println ( "+++++++++++++++++++" + data ) ;
log . error ( data ) ;
log . error ( data ) ;
}
}
}
}
private static void writerToSubjectES ( String indexName , Map < String , Object > responseMap ) {
private static void writerToSubjectES ( String indexName , Map < String , Object > responseMap ) {
System . out . println ( "==========================写入到【专题】ES : ==========" + indexName + " - " + responseMap . get ( "docId" ) ) ;
WriteMethod . writeMethod ( "zhuti.txt" , JsonUtils . toJSONString ( responseMap ) ) ;
/ / System . out . println ( "==========================写入到【专题】ES : ==========" + indexName + " - " + responseMap . get ( "videoPath" ) ) ;
String docId = responseMap . get ( "docId" ) . toString ( ) ;
long dateTime = System . currentTimeMillis ( ) ;
responseMap . put ( "createTime" , dateTime ) ;
responseMap . put ( "createTimeStr" , DataCheckUtil . getCurrentTime ( dateTime ) ) ;
System . out . println ( "==========================写入到【专题】ES :==========" + indexName + " - " + responseMap . get ( "docId" ) ) ;
if ( null ! = docId & & ! ( "" ) . equals ( docId ) ) {
WriteMethod . writeMethod ( "20210621.txt" , JsonUtils . toJSONString ( responseMap ) ) ;
ElastiProducer elastiProducer = ElastiProducer . getInstance ( bussinessType , subjectEsNum , indexName , indexType ) ;
ElastiProducer elastiProducer = ElastiProducer . getInstance ( bussinessType , subjectEsNum , indexName , indexType ) ;
elastiProducer . sendMessageToEs ( JsonUtils . toJSONString ( responseMap ) ) ;
elastiProducer . sendMessageToEs ( JsonUtils . toJSONString ( responseMap ) ) ;
/ / System . out . println ( "==========================写入到【专题】ES : ==========" + indexName + " - " + JsonUtils . toJSONString ( responseMap ) ) ;
}
}
}
private static void writerToIndexES ( String indexName , Map < String , Object > responseMap ) {
private static void writerToIndexES ( String indexName , Map < String , Object > responseMap ) {
long dateTime = System . currentTimeMillis ( ) ;
responseMap . put ( "createTime" , dateTime ) ;
responseMap . put ( "createTimeStr" , DataCheckUtil . getCurrentTime ( dateTime ) ) ;
String docId = responseMap . get ( "docId" ) . toString ( ) ;
System . out . println ( "==========================写入到【日期】ES : ==========" + indexName + " - " + responseMap . get ( "docId" ) ) ;
System . out . println ( "==========================写入到【日期】ES : ==========" + indexName + " - " + responseMap . get ( "docId" ) ) ;
if ( null ! = docId & & ! ( "" ) . equals ( docId ) ) {
/ / WriteMethod . writeMethod ( "2021525like.txt" , JsonUtils . toJSONString ( responseMap ) ) ;
ElastiProducer elastiProducer = ElastiProducer . getInstance ( bussinessType , indexEsNum , indexName , indexType ) ;
ElastiProducer elastiProducer = ElastiProducer . getInstance ( bussinessType , indexEsNum , indexName , indexType ) ;
elastiProducer . sendMessageToEs ( JsonUtils . toJSONString ( responseMap ) ) ;
elastiProducer . sendMessageToEs ( JsonUtils . toJSONString ( responseMap ) ) ;
}
}
private static void writerToredis ( Map < String , Object > responseMap , String getsubject_id ) {
String docId = ( String ) responseMap . get ( "docId" ) ;
String enSource = ( String ) responseMap . get ( "enSource" ) ;
String subject_id = getsubject_id ;
String keys = enSource + "#" + docId + "#" + subject_id ;
int dbindex = hash ( keys , 9 ) ;
log . info ( "[ ForegroundExtendType ] 往 Redis 中灌入商品详情数据 dbIndex = " + dbindex + " ; keys = " + keys ) ;
if ( null ! = docId & & ! ( "" ) . equals ( docId ) ) {
RedisUtil . set ( keys , subject_id , dbindex ) ;
}
}
}
private static void writerToKafka ( int kafkaNum , String indexName , String key , Map < String , Object > responseMap ) {
private static void newscontnetwriterToredis ( Map < String , Object > responseMap , String dateIndexName ) {
String url = ( String ) responseMap . get ( "url" ) ;
String subject_id = dateIndexName ;
String keys = url ;
int dbindex = hash ( keys , 5 ) ;
dbindex = 15 - dbindex ;
log . info ( "[ ForegroundExtendType ] 往 Redis 中新闻的url dbIndex = " + dbindex + " ; keys = " + keys ) ;
if ( null ! = url & & ! ( "" ) . equals ( url ) ) {
RedisUtil . set ( keys , subject_id , dbindex ) ;
}
}
private boolean disposeComment ( Map < String , Object > newdataMap , String getsubject_id ) {
try {
try {
List < Map < String , String > > subjects = subject . get ( key ) ;
if ( subjects . size ( ) > 0 ) {
for ( Map < String , String > sub : subjects ) {
String subjectId = sub . get ( "subject_id" ) ;
String exportToKafka = sub . get ( "export_to_kafka" ) ;
String kafkaAddr = sub . get ( "kafka_addr" ) ;
/ / System . out . println ( "indexName : " + indexName + " ; subjectId : " + subjectId ) ;
if ( indexName . contains ( subjectId ) & & exportToKafka . equals ( "1" ) ) {
System . out . println ( "-----------------------------------------将数据写到对应的 kafka 中 : " + kafkaAddr ) ;
/ / KfkProducer . getInstance ( ) . send ( "test0910" , JsonUtils . toJSONString ( responseMap ) ) ;
if ( newdataMap . containsKey ( "docId" ) ) {
String docId = ( String ) newdataMap . get ( "docId" ) ;
if ( null ! = docId & & ! ( "" ) . equals ( docId ) ) {
String enSource = ( String ) newdataMap . get ( "enSource" ) ;
String subject_id = getsubject_id ;
String keys = enSource + "#" + docId + "#" + subject_id ;
int dbindex = hash ( keys , 9 ) ;
if ( RedisUtil . exists ( keys , dbindex ) ) { / / 先去 redis中查询是否存在 , 不存直接忽略
return true ;
} else {
log . error ( "[ForegroundExtendType] exec >>> 电商灌数:该 key 在 Redis 中不存在!!! keys = " + keys + " ; dbindex = " + dbindex ) ;
return false ;
}
}
}
}
} else {
System . out . println ( "空的????????" + key ) ;
/ / return false ;
}
return false ;
} catch ( Exception e ) {
e . printStackTrace ( ) ;
return false ;
}
}
private boolean disposeCrawldataflag ( String crawldataflag ) {
try {
/ / if ( newdataMap . containsKey ( "docId" ) ) {
if ( RedisUtil . exists ( crawldataflag , 10 ) ) { / / 先去 redis中查询是否存在 , 不存直接忽略
String value = RedisUtil . get ( crawldataflag , 10 ) ;
if ( null ! = value & & ! ( "" ) . equals ( value ) ) {
return true ;
}
} else {
log . error ( "[datasave] exec >>> 灌数:该 crwaldataflag 在 Redis 中不存在!!! keys = " + crawldataflag + " ; dbindex = " + 10 ) ;
return false ;
}
}
/ / return false ;
/ / }
return false ;
} catch ( Exception e ) {
} catch ( Exception e ) {
e . printStackTrace ( ) ;
e . printStackTrace ( ) ;
return false ;
}
}
private static Map < String , Object > gofastswitch ( Map < String , String > rerversemap , Map < String , Object > responseMap ) { / / 原始的gofast 以及下载后的gofast地址
Integer pgc = ( Integer ) responseMap . get ( "pgc" ) ; / / 图片
Integer egc = ( Integer ) responseMap . get ( "egc" ) ; / / 视频
Integer ugc = ( Integer ) responseMap . get ( "ugc" ) ; / / 文件
List < String > imagePath = ( List < String > ) responseMap . get ( "imagePath" ) ;
List < String > videoPath = ( List < String > ) responseMap . get ( "videoPath" ) ;
String storyDetailPage = ( String ) responseMap . get ( "pageType" ) ;
/ / pageType
/ / storyDetailPage
Map < String , Object > resultmap = new HashMap < > ( ) ;
if ( pgc . equals ( 1 ) ) {
try {
List < Map < String , Object > > picturepath = new ArrayList < > ( ) ;
if ( responseMap . get ( "pictureList" ) ! = "" & & ! "storyDetailPage" . equals ( storyDetailPage ) & & ! "socialComment" . equals ( storyDetailPage ) ) {
Map < String , Object > map = JsonUtils . parseObject ( ( String ) responseMap . get ( "pictureList" ) ) ;
if ( ! map . isEmpty ( ) ) {
for ( Map . Entry < String , Object > entry : map . entrySet ( ) ) {
Map < String , Object > gofastmap = new HashMap < > ( ) ;
Map < String , Object > revmap = ( Map < String , Object > ) entry . getValue ( ) ;
if ( revmap . containsKey ( "uploadImg" ) & & revmap . get ( "uploadImg" ) ! = null & & revmap . get ( "uploadImg" ) ! = "" ) {
gofastmap . put ( "gofastUrl" , rerversemap . get ( revmap . get ( "uploadImg" ) ) ) ;
gofastmap . put ( "originalUrl" , revmap . get ( "img" ) ) ;
}
picturepath . add ( gofastmap ) ;
}
}
} else if ( "storyDetailPage" . equals ( storyDetailPage ) ) {
Iterator < String > it = imagePath . iterator ( ) ;
while ( it . hasNext ( ) ) {
Map < String , Object > revmap = new HashMap < > ( ) ;
revmap . put ( "gofastUrl" , it . next ( ) ) ;
revmap . put ( "originalUrl" , "" ) ;
picturepath . add ( revmap ) ;
}
}
String pictureList = JsonUtils . toJSONString ( picturepath ) ;
resultmap . put ( "srcimagePath" , pictureList ) ;
} catch ( Exception e ) {
e . printStackTrace ( ) ;
/ / log . error ( ) ;
}
} if ( ugc . equals ( 1 ) ) {
if ( responseMap . get ( "forwardUrl" ) ! = "" & & ! "storyDetailPage" . equals ( storyDetailPage ) & & ! "socialComment" . equals ( storyDetailPage ) ) {
try {
List < Map < String , Object > > forwardUrl = ( List < Map < String , Object > > ) JsonUtils . parseArray ( ( String ) responseMap . get ( "forwardUrl" ) ) ;
List < Map < String , Object > > anewforwardUrl = new ArrayList < > ( ) ;
for ( Map < String , Object > mapList : forwardUrl ) {
if ( mapList . containsKey ( "gofastUrl" ) ) {
mapList . put ( "gofastUrl" , rerversemap . get ( mapList . get ( "gofastUrl" ) ) ) ;
anewforwardUrl . add ( mapList ) ;
} else {
anewforwardUrl . add ( mapList ) ;
}
}
String reforwardUrl = JsonUtils . toJSONString ( anewforwardUrl ) ;
resultmap . put ( "srcfilePath" , reforwardUrl ) ;
} catch ( Exception e ) {
e . printStackTrace ( ) ;
}
}
} if ( egc . equals ( 1 ) ) {
List < Map < String , Object > > videoUrl = new ArrayList < > ( ) ;
if ( responseMap . get ( "videoUrl" ) ! = "" & & ! "storyDetailPage" . equals ( storyDetailPage ) & & ! "socialComment" . equals ( storyDetailPage ) ) {
try {
List < Map < String , Object > > zhuquvideoUrl = JsonUtils . parseArray ( ( String ) responseMap . get ( "videoUrl" ) ) ;
/ / System . out . println ( responseMap . get ( "videoUrl" ) ) ;
for ( Map < String , Object > mapList : zhuquvideoUrl ) {
/ / System . out . println ( mapList . get ( "gofastUrl" ) + "asd" ) ;
if ( mapList . containsKey ( "gofastUrl" ) ) {
mapList . put ( "gofastUrl" , rerversemap . get ( mapList . get ( "gofastUrl" ) ) ) ;
videoUrl . add ( mapList ) ;
} else {
videoUrl . add ( mapList ) ;
}
}
} catch ( Exception e ) {
e . printStackTrace ( ) ;
String revideoUrl = JsonUtils . toJSONString ( responseMap . get ( "videoUrl" ) ) ;
resultmap . put ( "srcvideoPath" , revideoUrl ) ;
}
}
} else if ( "storyDetailPage" . equals ( storyDetailPage ) ) {
String storyDetailPagevideoUrl = ( String ) responseMap . get ( "videoUrl" ) ;
Iterator < String > it = videoPath . iterator ( ) ;
while ( it . hasNext ( ) ) {
Map < String , Object > revmap = new HashMap < > ( ) ;
revmap . put ( "gofastUrl" , it . next ( ) ) ;
revmap . put ( "originalUrl" , storyDetailPagevideoUrl ) ;
videoUrl . add ( revmap ) ;
}
}
}
String revideoUrl = JsonUtils . toJSONString ( videoUrl ) ;
resultmap . put ( "srcvideoPath" , revideoUrl ) ;
}
return resultmap ;
}
/ / private static void writerToKafka ( int kafkaNum , String indexName , String key , Map < String , Object > responseMap ) {
/ / try {
/ / List < Map < String , String > > subjects = subject . get ( key ) ;
/ / if ( subjects . size ( ) > 0 ) {
/ / for ( Map < String , String > sub : subjects ) {
/ / String subjectId = sub . get ( "subject_id" ) ;
/ / String exportToKafka = sub . get ( "export_to_kafka" ) ;
/ / String kafkaAddr = sub . get ( "kafka_addr" ) ;
/ / / / System . out . println ( "indexName : " + indexName + " ; subjectId : " + subjectId ) ;
/ / if ( indexName . contains ( subjectId ) & & exportToKafka . equals ( "1" ) ) {
/ / System . out . println ( "-----------------------------------------将数据写到对应的 kafka 中 : " + kafkaAddr ) ;
/ / / / KfkProducer . getInstance ( ) . send ( "test0910" , JsonUtils . toJSONString ( responseMap ) ) ;
/ / }
/ / }
/ / } else {
/ / System . out . println ( "空的????????" + key ) ;
/ / }
/ /
/ / } catch ( Exception e ) {
/ / e . printStackTrace ( ) ;
/ / }
/ / }
private static void writerToKafka ( int kafkaNum , String kafkaTopic , Map < String , Object > responseMap ) {
private static void writerToKafka ( int kafkaNum , String kafkaTopic , Map < String , Object > responseMap ) {
try {
try {
System . out . println ( "要写的kafka : " + kafkaNum + " ; kafkaTopic: " + kafkaTopic ) ;
/ / System . out . println ( "要写的kafka : " + kafkaNum + " ; kafkaTopic: " + kafkaTopic ) ;
String docId = responseMap . get ( "docId" ) . toString ( ) ;
if ( null ! = docId & & ! ( "" ) . equals ( docId ) ) {
KfkProducer . getInstance ( kafkaNum , kafkaTopic ) . send ( kafkaTopic , JsonUtils . toJSONString ( responseMap ) ) ;
KfkProducer . getInstance ( kafkaNum , kafkaTopic ) . send ( kafkaTopic , JsonUtils . toJSONString ( responseMap ) ) ;
}
} catch ( Exception e ) {
} catch ( Exception e ) {
e . printStackTrace ( ) ;
e . printStackTrace ( ) ;
}
}
@ -311,59 +764,13 @@ public class DataSaveManager implements Runnable{
String crawlDataFlag = ( String ) responseMap . get ( "crawlDataFlag" ) ;
String crawlDataFlag = ( String ) responseMap . get ( "crawlDataFlag" ) ;
key = enSource + "#####" + crawlDataFlag ;
key = enSource + "#####" + crawlDataFlag ;
} else {
} else {
System . out . println ( "数据没有标识???为什么呀?" + JsonUtils . toJSONString ( responseMap ) ) ;
log . error ( "数据没有标识???为什么呀?!! " + JsonUtils . toJSONString ( responseMap ) ) ;
/ / System . out . println ( "数据没有标识???为什么呀?" + JsonUtils . toJSONString ( responseMap ) ) ;
}
}
}
}
return key . toLowerCase ( ) ;
return key . toLowerCase ( ) ;
}
}
/ / private List < String > getIndexNameList ( String key , Map < String , Object > responseMap ) {
/ / List < String > indexNames = new ArrayList < > ( ) ;
/ / try {
/ / System . out . println ( key + " ; task_subject: " + JsonUtils . toJSONString ( subject ) ) ;
/ / if ( subject . containsKey ( key ) ) {
/ / List < Map < String , String > > values = subject . get ( key ) ;
/ / for ( Map < String , String > val : values ) {
/ / String subjectId = val . get ( "subject_id" ) ;
/ / String taskId = val . get ( "task_id" ) ;
/ / String externalId = val . get ( "external_id" ) ;
/ / System . out . println ( taskId + " -- " + externalId ) ;
/ / System . out . println ( externalId ) ;
/ / indexNames . add ( preSubject + subjectId ) ;
/ / }
/ / } else {
/ / / / System . out . println ( "3333 : " + JsonUtils . toJSONString ( responseMap ) ) ;
/ / System . out . println ( "这条数据不用写到 【专题】 索引中哦!!! " +
/ / "crawlDataFlag = " + responseMap . get ( "crawlDataFlag" ) + " ; " +
/ / "id = " + responseMap . get ( "dataId" ) + " ; " +
/ / "pubTime = " + responseMap . get ( "pubTimeStr" ) ) ;
/ / }
/ /
/ / / / } else {
/ / / / System . out . println ( "33333 " + responseMap ) ;
/ / / / }
/ /
/ / / / for ( Map . Entry < String , String > entry : subject . entrySet ( ) ) {
/ / / / System . out . println ( "subject : key= " + entry . getKey ( ) + " and value= " + entry . getValue ( ) ) ;
/ / / / }
/ / / / for ( Map . Entry < String , String > entry : downloadAddr . entrySet ( ) ) {
/ / / / System . out . println ( "download : key= " + entry . getKey ( ) + " and value= " + entry . getValue ( ) ) ;
/ / / / }
/ / / / for ( Map . Entry < String , String > entry : kafkaAddr . entrySet ( ) ) {
/ / / / System . out . println ( "kafka: key= " + entry . getKey ( ) + " and value= " + entry . getValue ( ) ) ;
/ / / / }
/ / } catch ( Exception e ) {
/ / e . printStackTrace ( ) ;
/ / }
/ / return indexNames ;
/ / }
/ / private static String getIndexName ( Map < String , Object > responseMap ) {
/ / String pubTimeStr = responseMap . get ( "pubTimeStr" ) . toString ( ) . split ( " " ) [ 0 ] ;
/ / String indexName = preIndex + pubTimeStr ;
/ / return indexName ;
/ / }
private static String getIndexName ( Map < String , Object > responseMap ) {
private static String getIndexName ( Map < String , Object > responseMap ) {
String pubTimeStr = null ;
String pubTimeStr = null ;
try {
try {
@ -450,11 +857,21 @@ public class DataSaveManager implements Runnable{
String author = dataValue . toString ( ) . replaceAll ( "[^\\u0000-\\uFFFF]" , "" ) ;
String author = dataValue . toString ( ) . replaceAll ( "[^\\u0000-\\uFFFF]" , "" ) ;
jsonData . put ( key , author ) ;
jsonData . put ( key , author ) ;
}
}
/ / if ( key . equals ( "videoPath" ) & & dataValue ! = null ) {
/ / List < String > list = new ArrayList < > ( ) ;
/ / list . add ( dataValue . toString ( ) ) ;
/ / jsonData . put ( key , list ) ;
/ / }
if ( key . equals ( "isVip" ) & & dataValue ! = null ) {
jsonData . put ( key , 1 ) ;
}
if ( key . equals ( "price" ) & & dataValue ! = null ) {
String price = dataValue . toString ( ) . replaceAll ( "¥" , "" ) . replace ( "$" , "" ) ;
jsonData . put ( key , price ) ;
}
if ( key . equals ( "nomorprice" ) & & dataValue ! = null ) {
String nomorprice = dataValue . toString ( ) . replaceAll ( "¥" , "" ) . replace ( "$" , "" ) ;
jsonData . put ( key , nomorprice ) ;
}
if ( key . equals ( "nomorprice" ) & & dataValue ! = null ) {
String nomorprice = dataValue . toString ( ) . replaceAll ( "¥" , "" ) . replace ( "$" , "" ) ;
jsonData . put ( key , nomorprice ) ;
}
/ / if ( key . equals ( "imagePath" ) & & dataValue ! = null ) {
/ / if ( key . equals ( "imagePath" ) & & dataValue ! = null ) {
/ / List < String > list = new ArrayList < > ( ) ;
/ / List < String > list = new ArrayList < > ( ) ;
/ / list . add ( dataValue . toString ( ) ) ;
/ / list . add ( dataValue . toString ( ) ) ;
@ -467,7 +884,7 @@ public class DataSaveManager implements Runnable{
/ / }
/ / }
if ( tableInfo . containsKey ( key ) ) {
if ( tableInfo . containsKey ( key ) ) {
System . out . print ( "tableInfo" + tableInfo ) ;
/ / System . out . print ( "tableInfo" + tableInfo ) ;
String value = tableInfo . get ( key ) ;
String value = tableInfo . get ( key ) ;
if ( value . equals ( "Integer" ) ) {
if ( value . equals ( "Integer" ) ) {
if ( StringUtils . isNotBlank ( dataValue . toString ( ) ) ) {
if ( StringUtils . isNotBlank ( dataValue . toString ( ) ) ) {
@ -484,7 +901,10 @@ public class DataSaveManager implements Runnable{
} else if ( dataValuenew . endsWith ( "万+" ) ) {
} else if ( dataValuenew . endsWith ( "万+" ) ) {
dataValuenew = dataValuenew . substring ( 0 , dataValuenew . indexOf ( "万+" ) ) ;
dataValuenew = dataValuenew . substring ( 0 , dataValuenew . indexOf ( "万+" ) ) ;
jsonData . put ( key , new Double ( Double . valueOf ( dataValuenew ) * 10000 ) . intValue ( ) ) ;
jsonData . put ( key , new Double ( Double . valueOf ( dataValuenew ) * 10000 ) . intValue ( ) ) ;
} else {
} else if ( dataValue . toString ( ) . contains ( "店铺" ) ) {
jsonData . put ( key , 1 ) ;
}
else {
try {
try {
if ( dataValue . toString ( ) . contains ( "全部评论" ) ) {
if ( dataValue . toString ( ) . contains ( "全部评论" ) ) {
dataValue = dataValue . toString ( ) . replace ( "全部评论 (" , "" ) . replace ( ")" , "" ) ;
dataValue = dataValue . toString ( ) . replace ( "全部评论 (" , "" ) . replace ( ")" , "" ) ;
@ -501,20 +921,34 @@ public class DataSaveManager implements Runnable{
jsonData . put ( key , 0 ) ;
jsonData . put ( key , 0 ) ;
}
}
} else if ( value . equals ( "List" ) ) {
} else if ( value . equals ( "List" ) ) {
try {
if ( StringUtils . isNotBlank ( dataValue . toString ( ) ) ) {
if ( StringUtils . isNotBlank ( dataValue . toString ( ) ) ) {
jsonData . put ( key , JSONArray . parseArray ( dataValue . toString ( ) ) ) ;
jsonData . put ( key , JSONArray . parseArray ( dataValue . toString ( ) ) ) ;
} else {
} else {
jsonData . put ( key , new ArrayList < String > ( ) ) ;
jsonData . put ( key , new ArrayList < String > ( ) ) ;
}
}
} catch ( Exception e ) {
/ / e . printStackTrace ( ) ;
String str = dataValue . toString ( ) . replace ( ", " , "," ) ;
str = str . substring ( 1 , str . length ( ) - 1 ) . trim ( ) ;
String [ ] strs = str . split ( "," ) ;
/ / System . out . println ( strs . length + "数组的长度啊" ) ;
System . out . println ( str + "数组的长度啊" ) ;
List < String > list = Arrays . asList ( strs ) ;
jsonData . put ( key , list ) ;
}
} else if ( value . equals ( "Long" ) ) {
} else if ( value . equals ( "Long" ) ) {
if ( StringUtils . isNotBlank ( dataValue . toString ( ) ) ) {
if ( StringUtils . isNotBlank ( dataValue . toString ( ) ) ) {
try {
try {
Long theValue = 0L ;
Long theValue = 0L ;
if ( dataValue . toString ( ) . contains ( "万+" ) ) {
Double dou = Double . valueOf ( dataValue . toString ( ) . replace ( "万+" , "" ) ) ;
if ( dataValue . toString ( ) . contains ( "万" ) ) {
Double dou = Double . valueOf ( dataValue . toString ( ) . replace ( "万+" , "" ) . replace ( "万" , "" ) ) ;
theValue = new Double ( dou * 10000 ) . longValue ( ) ; ;
theValue = new Double ( dou * 10000 ) . longValue ( ) ; ;
jsonData . put ( key , theValue ) ;
jsonData . put ( key , theValue ) ;
} else {
} else if ( dataValue . toString ( ) . contains ( "+" ) ) {
jsonData . put ( key , Long . valueOf ( dataValue . toString ( ) . replace ( "+" , "" ) ) ) ;
}
else {
jsonData . put ( key , Long . valueOf ( dataValue . toString ( ) ) ) ;
jsonData . put ( key , Long . valueOf ( dataValue . toString ( ) ) ) ;
}
}
@ -633,6 +1067,12 @@ public class DataSaveManager implements Runnable{
}
}
return lDate ;
return lDate ;
}
}
public static int hash ( String str , int size ) {
String md5 = calcMD5 ( str ) ;
String head = md5 . substring ( 0 , 4 ) ;
return Integer . parseInt ( head , 16 ) % size ;
}
public static int checkPathExists ( String kafka_addr ) { / / 检查文件夹是否存在kafka的配置文件
public static int checkPathExists ( String kafka_addr ) { / / 检查文件夹是否存在kafka的配置文件
int num = 0 ;
int num = 0 ;
String filename = "" ;
String filename = "" ;