Browse Source

release-3.1.5(2021-10-27 ,修复了离线拉数的时候 文件的下载的一个BUG ,下载失败不改pgc\ugc\egc 的值)

release-1.0
杜静 4 years ago
parent
commit
950171d6db
  1. 13
      .idea/libraries/Maven__com_github_housepower_clickhouse_native_jdbc_1_7_stable.xml
  2. 13
      .idea/libraries/Maven__javax_xml_bind_jaxb_api_2_3_0.xml
  3. 13
      .idea/libraries/Maven__org_apache_httpcomponents_httpmime_4_5_2.xml
  4. 13
      .idea/libraries/Maven__ru_yandex_clickhouse_clickhouse_jdbc_0_2.xml
  5. 6
      cl_query_data_job/cl_query_data_job.iml
  6. 22
      cl_query_data_job/pom.xml
  7. 34
      cl_query_data_job/src/main/java/com/bfd/mf/job/download/DownLoadFile.java
  8. 7
      cl_query_data_job/src/main/java/com/bfd/mf/job/download/OkHttpUtils.java
  9. 110
      cl_query_data_job/src/main/java/com/bfd/mf/job/service/query/QueryService.java
  10. 4
      cl_search_api/src/main/java/com/bfd/mf/common/service/es/SubjectQueryDataService.java

13
.idea/libraries/Maven__com_github_housepower_clickhouse_native_jdbc_1_7_stable.xml

@ -0,0 +1,13 @@
<component name="libraryTable">
<library name="Maven: com.github.housepower:clickhouse-native-jdbc:1.7-stable">
<CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/com/github/housepower/clickhouse-native-jdbc/1.7-stable/clickhouse-native-jdbc-1.7-stable.jar!/" />
</CLASSES>
<JAVADOC>
<root url="jar://$MAVEN_REPOSITORY$/com/github/housepower/clickhouse-native-jdbc/1.7-stable/clickhouse-native-jdbc-1.7-stable-javadoc.jar!/" />
</JAVADOC>
<SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/com/github/housepower/clickhouse-native-jdbc/1.7-stable/clickhouse-native-jdbc-1.7-stable-sources.jar!/" />
</SOURCES>
</library>
</component>

13
.idea/libraries/Maven__javax_xml_bind_jaxb_api_2_3_0.xml

@ -0,0 +1,13 @@
<component name="libraryTable">
<library name="Maven: javax.xml.bind:jaxb-api:2.3.0">
<CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/javax/xml/bind/jaxb-api/2.3.0/jaxb-api-2.3.0.jar!/" />
</CLASSES>
<JAVADOC>
<root url="jar://$MAVEN_REPOSITORY$/javax/xml/bind/jaxb-api/2.3.0/jaxb-api-2.3.0-javadoc.jar!/" />
</JAVADOC>
<SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/javax/xml/bind/jaxb-api/2.3.0/jaxb-api-2.3.0-sources.jar!/" />
</SOURCES>
</library>
</component>

13
.idea/libraries/Maven__org_apache_httpcomponents_httpmime_4_5_2.xml

@ -0,0 +1,13 @@
<component name="libraryTable">
<library name="Maven: org.apache.httpcomponents:httpmime:4.5.2">
<CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/org/apache/httpcomponents/httpmime/4.5.2/httpmime-4.5.2.jar!/" />
</CLASSES>
<JAVADOC>
<root url="jar://$MAVEN_REPOSITORY$/org/apache/httpcomponents/httpmime/4.5.2/httpmime-4.5.2-javadoc.jar!/" />
</JAVADOC>
<SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/org/apache/httpcomponents/httpmime/4.5.2/httpmime-4.5.2-sources.jar!/" />
</SOURCES>
</library>
</component>

13
.idea/libraries/Maven__ru_yandex_clickhouse_clickhouse_jdbc_0_2.xml

@ -0,0 +1,13 @@
<component name="libraryTable">
<library name="Maven: ru.yandex.clickhouse:clickhouse-jdbc:0.2">
<CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/ru/yandex/clickhouse/clickhouse-jdbc/0.2/clickhouse-jdbc-0.2.jar!/" />
</CLASSES>
<JAVADOC>
<root url="jar://$MAVEN_REPOSITORY$/ru/yandex/clickhouse/clickhouse-jdbc/0.2/clickhouse-jdbc-0.2-javadoc.jar!/" />
</JAVADOC>
<SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/ru/yandex/clickhouse/clickhouse-jdbc/0.2/clickhouse-jdbc-0.2-sources.jar!/" />
</SOURCES>
</library>
</component>

6
cl_query_data_job/cl_query_data_job.iml

@ -124,7 +124,6 @@
<orderEntry type="library" name="Maven: io.netty:netty-transport:4.1.13.Final" level="project" /> <orderEntry type="library" name="Maven: io.netty:netty-transport:4.1.13.Final" level="project" />
<orderEntry type="library" name="Maven: org.elasticsearch.client:elasticsearch-rest-high-level-client:6.0.0" level="project" /> <orderEntry type="library" name="Maven: org.elasticsearch.client:elasticsearch-rest-high-level-client:6.0.0" level="project" />
<orderEntry type="library" name="Maven: org.elasticsearch.client:elasticsearch-rest-client:6.0.0" level="project" /> <orderEntry type="library" name="Maven: org.elasticsearch.client:elasticsearch-rest-client:6.0.0" level="project" />
<orderEntry type="library" name="Maven: org.apache.httpcomponents:httpclient:4.5.2" level="project" />
<orderEntry type="library" name="Maven: org.apache.httpcomponents:httpcore:4.4.5" level="project" /> <orderEntry type="library" name="Maven: org.apache.httpcomponents:httpcore:4.4.5" level="project" />
<orderEntry type="library" name="Maven: org.apache.httpcomponents:httpasyncclient:4.1.2" level="project" /> <orderEntry type="library" name="Maven: org.apache.httpcomponents:httpasyncclient:4.1.2" level="project" />
<orderEntry type="library" name="Maven: org.apache.httpcomponents:httpcore-nio:4.4.5" level="project" /> <orderEntry type="library" name="Maven: org.apache.httpcomponents:httpcore-nio:4.4.5" level="project" />
@ -200,5 +199,10 @@
<orderEntry type="library" name="Maven: javax.mail:javax.mail-api:1.6.2" level="project" /> <orderEntry type="library" name="Maven: javax.mail:javax.mail-api:1.6.2" level="project" />
<orderEntry type="library" name="Maven: com.sun.mail:javax.mail:1.6.2" level="project" /> <orderEntry type="library" name="Maven: com.sun.mail:javax.mail:1.6.2" level="project" />
<orderEntry type="library" name="Maven: javax.activation:activation:1.1" level="project" /> <orderEntry type="library" name="Maven: javax.activation:activation:1.1" level="project" />
<orderEntry type="library" name="Maven: com.github.housepower:clickhouse-native-jdbc:1.7-stable" level="project" />
<orderEntry type="library" name="Maven: ru.yandex.clickhouse:clickhouse-jdbc:0.2" level="project" />
<orderEntry type="library" name="Maven: org.apache.httpcomponents:httpclient:4.5.2" level="project" />
<orderEntry type="library" name="Maven: org.apache.httpcomponents:httpmime:4.5.2" level="project" />
<orderEntry type="library" name="Maven: javax.xml.bind:jaxb-api:2.3.0" level="project" />
</component> </component>
</module> </module>

22
cl_query_data_job/pom.xml

@ -194,8 +194,28 @@
<artifactId>javax.mail</artifactId> <artifactId>javax.mail</artifactId>
<version>1.6.2</version> <version>1.6.2</version>
</dependency> </dependency>
<!-- clickhouse -->
<!--<dependency>-->
<!--<groupId>ru.yandex.clickhouse</groupId>-->
<!--<artifactId>clickhouse-jdbc</artifactId>-->
<!--<version>0.2.6</version>-->
<!--</dependency>-->
<!--<dependency>-->
<!--<groupId>com.github.housepower</groupId>-->
<!--<artifactId>clickhouse-native-jdbc</artifactId>-->
<!--<version>1.6-stable</version>-->
<!--</dependency>-->
<dependency>
<groupId>com.github.housepower</groupId>
<artifactId>clickhouse-native-jdbc</artifactId>
<version>1.7-stable</version>
</dependency>
<dependency>
<groupId>ru.yandex.clickhouse</groupId>
<artifactId>clickhouse-jdbc</artifactId>
<version>0.2</version>
</dependency>
</dependencies> </dependencies>

34
cl_query_data_job/src/main/java/com/bfd/mf/job/download/DownLoadFile.java

@ -6,7 +6,6 @@ import okhttp3.*;
import javax.imageio.ImageIO; import javax.imageio.ImageIO;
import java.awt.image.BufferedImage; import java.awt.image.BufferedImage;
import java.awt.image.DataBufferByte;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
@ -28,15 +27,17 @@ public class DownLoadFile {
header.put("Connection","keep-alive"); header.put("Connection","keep-alive");
Map<String,Object> downloadresult = OkHttpUtils.doGetBytes(getUrl,header); Map<String,Object> downloadresult = OkHttpUtils.doGetBytes(getUrl,header);
double size= (double) downloadresult.get(ESConstants.SIZE);
double size = 0;
if(downloadresult.containsKey(ESConstants.SIZE)) {
size = (double) downloadresult.get(ESConstants.SIZE);
}
if (downloadresult.containsKey(ESConstants.CONTENT) && size > 0){ if (downloadresult.containsKey(ESConstants.CONTENT) && size > 0){
byte[] content = (byte[]) downloadresult.get(ESConstants.CONTENT); byte[] content = (byte[]) downloadresult.get(ESConstants.CONTENT);
//size= (double) downloadresult.get("size");
size = Double.valueOf(String.format("%.2f", size)); size = Double.valueOf(String.format("%.2f", size));
Thread.sleep(3000);
Thread.sleep(4000);
String result = DownLoadFile.upload(putUrl,fileName,content); String result = DownLoadFile.upload(putUrl,fileName,content);
Thread.sleep(3000);
Thread.sleep(4000);
String path = JSONObject.parseObject(result).getString(ESConstants.PATH); String path = JSONObject.parseObject(result).getString(ESConstants.PATH);
realresult.put(ESConstants.URL , path); realresult.put(ESConstants.URL , path);
realresult.put(ESConstants.SIZE , size+"KB"); realresult.put(ESConstants.SIZE , size+"KB");
@ -47,14 +48,6 @@ public class DownLoadFile {
return realresult; return realresult;
} }
// public static void main(String[] args) {
// String getUrl = "https://wx4.sinaimg.cn/mw690/001NtKpRly1guw9jh90poj60u01hcaqj02.jpg";
// String putUrl = "http://172.18.1.113:8080/upload";
// Map<String,String> realresult = downloadAndSaveFile(getUrl,putUrl);
// System.out.println(JSONObject.toJSONString(realresult));
// }
public static String upload(String uploadUrl,String fileName,byte[] content) { public static String upload(String uploadUrl,String fileName,byte[] content) {
String result = ""; String result = "";
try { try {
@ -85,18 +78,29 @@ public class DownLoadFile {
public static String getImageResolution(String getUrl) throws IOException{ public static String getImageResolution(String getUrl) throws IOException{
String resolution = "" ; String resolution = "" ;
try{ try{
if(getUrl.endsWith(".png")) {
InputStream murl = new URL(getUrl).openStream(); InputStream murl = new URL(getUrl).openStream();
BufferedImage sourceImg = ImageIO.read(murl); BufferedImage sourceImg = ImageIO.read(murl);
int srcWidth = sourceImg.getWidth(); // 源图宽度 int srcWidth = sourceImg.getWidth(); // 源图宽度
int srcHeight = sourceImg.getHeight(); // 源图高度 int srcHeight = sourceImg.getHeight(); // 源图高度
resolution = Integer.toString(srcWidth)+"×"+ Integer.toString(srcHeight); resolution = Integer.toString(srcWidth)+"×"+ Integer.toString(srcHeight);
}
}catch (Exception e){ }catch (Exception e){
System.out.println("ERROR URL : " + getUrl); System.out.println("ERROR URL : " + getUrl);
// e.printStackTrace();
e.printStackTrace();
} }
return resolution; return resolution;
} }
// public static void main(String[] args) {
// String url = "http://172.18.1.113:8080/group6/default/20211018/10/49/3/81ed5dfe30fa6adbb3bba672febd8eff.jpg";
// try {
// getImageResolution(url);
// } catch (IOException e) {
// e.printStackTrace();
// }
// }
public static String getFileSize(String getUrl){ public static String getFileSize(String getUrl){
String realSize = ""; String realSize = "";
// 获取大小 // 获取大小

7
cl_query_data_job/src/main/java/com/bfd/mf/job/download/OkHttpUtils.java

@ -181,13 +181,14 @@ public class OkHttpUtils {
} }
} }
} }
return result;
}
finally {
}catch (Exception e){
e.printStackTrace();
} finally {
if (response != null) if (response != null)
response.close(); response.close();
System.gc(); System.gc();
} }
return result;
} }
} }

110
cl_query_data_job/src/main/java/com/bfd/mf/job/service/query/QueryService.java

@ -162,14 +162,14 @@ public class QueryService {
// System.out.println("id ==== "+id); // System.out.println("id ==== "+id);
// System.out.println(subjectId); // System.out.println(subjectId);
// String appId = task.getAppId(); // String appId = task.getAppId();
System.out.println("**** " + appId);
// System.out.println("**** " + appId);
String indexName = "cl_major_"; String indexName = "cl_major_";
if (appId.contains("ic")) { if (appId.contains("ic")) {
indexName = indexName + subjectId; indexName = indexName + subjectId;
} else { } else {
indexName = indexName + appId.toLowerCase() + "_" + subjectId; //cl_major_61qb_12094 indexName = indexName + appId.toLowerCase() + "_" + subjectId; //cl_major_61qb_12094
} }
System.out.println("indexName = " + indexName);
// System.out.println("indexName = " + indexName);
Integer cacheNum = task.getCacheNum(); // 拉取数据的次数 Integer cacheNum = task.getCacheNum(); // 拉取数据的次数
// 当拉数据的次数 大于1 次的时候再拉数据的开始时间就不用是任务设置的开始时间了同时可以再加个采集时间范围限制一下确保拉的数据都是任务添加之后才采集的就行 // 当拉数据的次数 大于1 次的时候再拉数据的开始时间就不用是任务设置的开始时间了同时可以再加个采集时间范围限制一下确保拉的数据都是任务添加之后才采集的就行
QueryBuilder queryBuilder; // 根据条件组装查询用具 QueryBuilder queryBuilder; // 根据条件组装查询用具
@ -182,19 +182,20 @@ public class QueryService {
fromMills = task.getCrawlStartTime().longValue(); fromMills = task.getCrawlStartTime().longValue();
queryBuilder = getQueryBuilder(fromMills, toMills, cid, crawlDataFlag, cacheNum, siteType); queryBuilder = getQueryBuilder(fromMills, toMills, cid, crawlDataFlag, cacheNum, siteType);
} }
LOGGER.info("Query primary, task:{}, index:{}, from:{}, to:{}, indices:{}, dsl:{}.",
taskId,
indexName,
new LocalDateTime(fromMills).toString(AppConfig.DATE_TIME_FORMAT),
new LocalDateTime(toMills).toString(AppConfig.DATE_TIME_FORMAT),
JSONObject.toJSONString(sourceIndices),
queryBuilder.toString());
// LOGGER.info("Query primary, task:{}, index:{}, from:{}, to:{}, indices:{}, dsl:{}.",
// taskId,
// indexName,
// new LocalDateTime(fromMills).toString(AppConfig.DATE_TIME_FORMAT),
// new LocalDateTime(toMills).toString(AppConfig.DATE_TIME_FORMAT),
// JSONObject.toJSONString(sourceIndices),
// queryBuilder.toString());
// 传入的参数 集群名称索引名称索引类型type, 查询Builder,scroll查询页面大小,scroll查询scrollId有效时间 // 传入的参数 集群名称索引名称索引类型type, 查询Builder,scroll查询页面大小,scroll查询scrollId有效时间
String finalTaskId = taskId + ""; String finalTaskId = taskId + "";
long pubTime = fromMills; long pubTime = fromMills;
long finalFromMills = fromMills; long finalFromMills = fromMills;
long finalToMills = toMills; long finalToMills = toMills;
String finalIndexName = indexName; String finalIndexName = indexName;
String finalIndexName1 = indexName;
EsUtils.scrollQuery(clusterName, sourceIndices, ESConstants.INDEX_TYPE, EsUtils.scrollQuery(clusterName, sourceIndices, ESConstants.INDEX_TYPE,
queryBuilder, ESConstants.SCROLL_PAGE_SIZE, ESConstants.SCROLL_MINUTES, queryBuilder, ESConstants.SCROLL_PAGE_SIZE, ESConstants.SCROLL_MINUTES,
dataList -> { dataList -> {
@ -220,8 +221,8 @@ public class QueryService {
data = downloadAndChangePath(data); data = downloadAndChangePath(data);
} }
if (!data.get("_id_").equals("")) { if (!data.get("_id_").equals("")) {
// saveService.saveToEsWithFilter(config.esMiniClusterName(), indexName, data);
// kafkaProducer.send(config.getSendTopic(),JSONObject.toJSONString(data));
saveService.saveToEsWithFilter(config.esMiniClusterName(), finalIndexName1, data);
kafkaProducer.send(config.getSendTopic(),JSONObject.toJSONString(data));
LOGGER.debug("Send message, indexName :{} , taskId:{} , ID :{}.", finalIndexName, task.getId(), data.getString("_id_")); LOGGER.debug("Send message, indexName :{} , taskId:{} , ID :{}.", finalIndexName, task.getId(), data.getString("_id_"));
// 将要拉评论的ID 添加到list ,电商的数据不用拉评论哦 // 将要拉评论的ID 添加到list ,电商的数据不用拉评论哦
if (!siteType.equals(ESConstants.DOCTYPEITEM)) { if (!siteType.equals(ESConstants.DOCTYPEITEM)) {
@ -340,26 +341,61 @@ public class QueryService {
*/ */
private JSONObject downloadAndChangePath(JSONObject data) { private JSONObject downloadAndChangePath(JSONObject data) {
try { try {
String docId = (String) data.get(ESConstants.DOC_ID);
// 文件下载 之所以提取对应的src 字段是因为如果附件被下载过了这个字段的值需要一一对应就直接填写了不用再做下载回填了 // 文件下载 之所以提取对应的src 字段是因为如果附件被下载过了这个字段的值需要一一对应就直接填写了不用再做下载回填了
List<String> filePath = (List<String>) data.get(ESConstants.FILEPATH); List<String> filePath = (List<String>) data.get(ESConstants.FILEPATH);
List<Map<String,String>> srcFileList = JsonUtils.parseArray( data.get(ESConstants.SRCFILEPATH).toString());
List<Map<String,String>> filePathSize = new ArrayList<>();
List<Map<String, String>> srcFileList = new ArrayList<>();
if(data.containsKey(ESConstants.FILEPATHSIZE)) {
filePathSize = JsonUtils.parseArray(data.get(ESConstants.FILEPATHSIZE).toString());
}
if(data.containsKey(ESConstants.SRCFILEPATH)) {
srcFileList = JsonUtils.parseArray(data.get(ESConstants.SRCFILEPATH).toString());
}
if(filePath.size() > 0){ if(filePath.size() > 0){
data = getFilePath(data,filePath,srcFileList);
System.out.println(docId + "----- filePath : " + filePath);
System.out.println("===== srcFileList :" + srcFileList);
System.out.println("-=-=- filePathSize : "+filePathSize);
data = getFilePath(data,filePath,srcFileList,filePathSize);
} }
// 视频下载 // 视频下载
List<String> videoPath = (List<String>) data.get(ESConstants.VIDEOPATH); List<String> videoPath = (List<String>) data.get(ESConstants.VIDEOPATH);
List<Map<String,String>> srcVideoList = new ArrayList<>();
List<Map<String,String>> videoPathSize = new ArrayList<>();
if(videoPath.size() > 0){ if(videoPath.size() > 0){
List<Map<String,String>> srcVideoList = JsonUtils.parseArray( data.get(ESConstants.SRCVIDEOPATH).toString());
data = getVideoPath(data,videoPath,srcVideoList);
if(data.containsKey(ESConstants.VIDEOPATHSIZE)){
videoPathSize = JsonUtils.parseArray(data.get(ESConstants.VIDEOPATHSIZE).toString());
} }
if(data.containsKey(ESConstants.SRCVIDEOPATH)){
srcVideoList = JsonUtils.parseArray( data.get(ESConstants.SRCVIDEOPATH).toString());
}
System.out.println(docId + "----- videoPath : " + videoPath);
System.out.println("===== srcVideoList :" + srcVideoList);
System.out.println("-=-=- videoPathSize : "+videoPathSize);
data = getVideoPath(data,videoPath,srcVideoList,videoPathSize);
}
// 图片下载 // 图片下载
List<String> imagePath = (List<String>) data.get(ESConstants.IMAGEPATH); List<String> imagePath = (List<String>) data.get(ESConstants.IMAGEPATH);
List<Map<String,String>> srcImageList = JsonUtils.parseArray( data.get(ESConstants.SRCIMAGEPATH).toString());
List<Map<String,String>> srcImageList = new ArrayList<>();
List<Map<String,String>> imagePathSize = new ArrayList<>();
if(data.containsKey(ESConstants.IMAGEPATHSIZE)){
imagePathSize = JsonUtils.parseArray(data.get(ESConstants.IMAGEPATHSIZE).toString());
}
if(data.containsKey(ESConstants.SRCIMAGEPATH)){
srcImageList = JsonUtils.parseArray( data.get(ESConstants.SRCIMAGEPATH).toString());
}
if(imagePath.size() > 0){ if(imagePath.size() > 0){
data = getImagePath(data,imagePath,srcImageList);
System.out.println(docId + "----- imagePath : " + imagePath);
System.out.println("===== srcImageList :" + srcImageList);
System.out.println("-=-=- imagePathSize : "+imagePathSize);
data = getImagePath(data,imagePath,srcImageList,imagePathSize);
} }
// System.out.println("***** "+data);
// isDownload 填写 // isDownload 填写
if(filePath.size() == 0 && videoPath.size() == 0 && imagePath.size() == 0){
if(filePathSize.size() == 0 && videoPathSize.size() == 0 && imagePathSize.size() == 0){
data.put(ESConstants.ISDOWNLOAD,"false"); data.put(ESConstants.ISDOWNLOAD,"false");
} }
} catch (Exception e) { } catch (Exception e) {
@ -375,9 +411,9 @@ public class QueryService {
// 当三个 pathSize 都为 0 的时候表示三个下载结果都为空为了保持页面和实际结果的统一这块改成 false // 当三个 pathSize 都为 0 的时候表示三个下载结果都为空为了保持页面和实际结果的统一这块改成 false
} }
private JSONObject getImagePath(JSONObject data, List<String> imagePath, List<Map<String,String>> srcImageList) {
Map<String,Object> pathMap = getPathSize(imagePath,1,data,srcImageList);
LOGGER.info("下载图片后的 pathMap : {}.",JsonUtils.toJSONString(pathMap));
private JSONObject getImagePath(JSONObject data, List<String> imagePath, List<Map<String,String>> srcImageList,List<Map<String,String>> pathSize) {
Map<String,Object> pathMap = getPathSize(imagePath,1,data,srcImageList,pathSize);
LOGGER.info("Image : 下载图片后的 pathMap : {}.",JsonUtils.toJSONString(pathMap));
if(pathMap.size() > 0) { if(pathMap.size() > 0) {
imagePath = (List<String>) pathMap.get(ESConstants.PATH); imagePath = (List<String>) pathMap.get(ESConstants.PATH);
data.put(ESConstants.IMAGEPATH, imagePath); data.put(ESConstants.IMAGEPATH, imagePath);
@ -433,9 +469,9 @@ public class QueryService {
return data; return data;
} }
private JSONObject getVideoPath(JSONObject data, List<String> videoPath, List<Map<String,String>> srcVideoList ) {
Map<String,Object> pathMap = getPathSize(videoPath,2,data,srcVideoList);
LOGGER.info("下载视频后的 pathMap : {}.",JsonUtils.toJSONString(pathMap));
private JSONObject getVideoPath(JSONObject data, List<String> videoPath, List<Map<String,String>> srcVideoList ,List<Map<String,String>> pathSize) {
Map<String,Object> pathMap = getPathSize(videoPath,2,data,srcVideoList,pathSize);
LOGGER.info("Video : 下载视频后的 pathMap : {}.",JsonUtils.toJSONString(pathMap));
// 先做判断如果 pathMap == 0 的话对应的 videoPathvideoPathSizesrcVideoPath 都保持不变即可 // 先做判断如果 pathMap == 0 的话对应的 videoPathvideoPathSizesrcVideoPath 都保持不变即可
if(pathMap.size() > 0) { if(pathMap.size() > 0) {
// videoPath 字段填充 // videoPath 字段填充
@ -491,10 +527,10 @@ public class QueryService {
return data; return data;
} }
private JSONObject getFilePath(JSONObject data, List<String> filePath, List<Map<String,String>> srcFileList) {
private JSONObject getFilePath(JSONObject data, List<String> filePath, List<Map<String,String>> srcFileList,List<Map<String,String>> pathSize) {
// 调用下载接口下载并将附件上传到自己的go-fast // 调用下载接口下载并将附件上传到自己的go-fast
Map<String,Object> pathMap = getPathSize(filePath,0,data,srcFileList);
LOGGER.info("下载文件后的 pathMap : {}.",JsonUtils.toJSONString(pathMap));
Map<String,Object> pathMap = getPathSize(filePath,0,data,srcFileList,pathSize);
LOGGER.info("File : 下载文件后的 pathMap : {}.",JsonUtils.toJSONString(pathMap));
if(pathMap.size() > 0) { if(pathMap.size() > 0) {
// 下载替换后的 path List // 下载替换后的 path List
filePath = (List<String>) pathMap.get(ESConstants.PATH); filePath = (List<String>) pathMap.get(ESConstants.PATH);
@ -530,7 +566,9 @@ public class QueryService {
/** /**
* downloadType =0 文件 =1 图片 = 2 视频 * downloadType =0 文件 =1 图片 = 2 视频
*/ */
private Map<String,Object> getPathSize(List<String> pathList, Integer downloadType, JSONObject data,List<Map<String,String>> srcxxxList) {
private Map<String,Object> getPathSize(List<String> pathList, Integer downloadType, JSONObject data,
List<Map<String,String>> srcxxxList ,
List<Map<String,String>> xxxpathSize) {
String domain = config.getGoFastDomain(); String domain = config.getGoFastDomain();
Map<String,Object> pathMap = new HashMap<>(); Map<String,Object> pathMap = new HashMap<>();
List<Map<String,String>> pathSizeList = new ArrayList<>(); List<Map<String,String>> pathSizeList = new ArrayList<>();
@ -546,10 +584,14 @@ public class QueryService {
// 一下三种情况都是需要下载的情况另外还有 path 中的链接不需要下载的情况怎么补全pathSize srcPath 字段 // 一下三种情况都是需要下载的情况另外还有 path 中的链接不需要下载的情况怎么补全pathSize srcPath 字段
// 很可能 path 字段本身不需要下载但是其他两个字段都需要下载后补全才行 // 很可能 path 字段本身不需要下载但是其他两个字段都需要下载后补全才行
if (downloadUrl.contains("http") || downloadUrl.contains("group1") || downloadUrl.contains("group2")) { if (downloadUrl.contains("http") || downloadUrl.contains("group1") || downloadUrl.contains("group2")) {
if(downloadUrl.contains("group1") || downloadUrl.contains("group2")){
if(!downloadUrl.contains("http")){
downloadUrl = domain +downloadUrl; downloadUrl = domain +downloadUrl;
} }
Map<String, String> pathSizeMap = DownLoadFile.downloadAndSaveFile(downloadUrl, config.getGoFastPostUrl());
System.out.println("downloadUrl = "+downloadUrl);
Map<String, String> pathSizeMap = new HashMap<>();
if(!downloadUrl.contains("null.py")){
pathSizeMap = DownLoadFile.downloadAndSaveFile(downloadUrl, config.getGoFastPostUrl());
}
// LOGGER.info("[QueryService] getPathSize goFaskAddr {}. resultMap {}.", config.getGoFastPostUrl(), pathSizeMap); // LOGGER.info("[QueryService] getPathSize goFaskAddr {}. resultMap {}.", config.getGoFastPostUrl(), pathSizeMap);
// Map<String, String> pathSizeMap = DownLoadFile.downloadAndSaveFile(downloadUrl, "http://172.18.1.113:8080/upload"); // Map<String, String> pathSizeMap = DownLoadFile.downloadAndSaveFile(downloadUrl, "http://172.18.1.113:8080/upload");
if (pathSizeMap.size() > 0) { if (pathSizeMap.size() > 0) {
@ -604,13 +646,18 @@ public class QueryService {
}else { }else {
pathMap.put(ESConstants.SRCLIST, srcList); pathMap.put(ESConstants.SRCLIST, srcList);
} }
if(xxxpathSize.size() > 0){
pathMap.put(ESConstants.PATHSIZELIST,xxxpathSize);
}else{
pathMap.put(ESConstants.PATHSIZELIST,pathSizeList);
}
pathMap.put(ESConstants.PATH, path); pathMap.put(ESConstants.PATH, path);
} }
} catch (IOException e) { } catch (IOException e) {
e.printStackTrace(); e.printStackTrace();
} }
} }
System.out.println("pathMap === "+JsonUtils.toJSONString(pathMap));
return pathMap; return pathMap;
} }
@ -628,7 +675,6 @@ public class QueryService {
ESConstants.PUBTIME, startTime - 2 * ONE_MINUTE, endTime); ESConstants.PUBTIME, startTime - 2 * ONE_MINUTE, endTime);
boolQueryBuilder.must(pubTimeRange); boolQueryBuilder.must(pubTimeRange);
} }
// 筛选站点 因为天猫淘宝的数据是交叉的因此如果需要拉某一个站点的时候需将两个站点的数据都拉出来 // 筛选站点 因为天猫淘宝的数据是交叉的因此如果需要拉某一个站点的时候需将两个站点的数据都拉出来
if(cid.equals(ESConstants.TAOBAO) || cid.equals(ESConstants.TMALL)){ if(cid.equals(ESConstants.TAOBAO) || cid.equals(ESConstants.TMALL)){
boolQueryBuilder.must(QueryBuilders.termsQuery(ESConstants.EN_SOURCE, ESConstants.TAOBAO,ESConstants.TMALL)); boolQueryBuilder.must(QueryBuilders.termsQuery(ESConstants.EN_SOURCE, ESConstants.TAOBAO,ESConstants.TMALL));

4
cl_search_api/src/main/java/com/bfd/mf/common/service/es/SubjectQueryDataService.java

@ -71,6 +71,9 @@ public class SubjectQueryDataService {
BoolQueryBuilder searchTextBuilder = topicQueryService.buildSearchTextBuilder(searchType ); BoolQueryBuilder searchTextBuilder = topicQueryService.buildSearchTextBuilder(searchType );
boolQueryBuilder.filter(searchTextBuilder); boolQueryBuilder.filter(searchTextBuilder);
if (TStringUtils.isNotEmpty(keyword)) { if (TStringUtils.isNotEmpty(keyword)) {
if(keyword.contains("&&")){
}
if(searchType == 0){ // 主贴的话 标题和内容 if(searchType == 0){ // 主贴的话 标题和内容
MatchPhraseQueryBuilder titleQuery = QueryBuilders.matchPhraseQuery(ESConstant.TITLE, keyword).slop(0); MatchPhraseQueryBuilder titleQuery = QueryBuilders.matchPhraseQuery(ESConstant.TITLE, keyword).slop(0);
MatchPhraseQueryBuilder contentQuery = QueryBuilders.matchPhraseQuery(ESConstant.CONTENT, keyword).slop(0); MatchPhraseQueryBuilder contentQuery = QueryBuilders.matchPhraseQuery(ESConstant.CONTENT, keyword).slop(0);
@ -81,6 +84,7 @@ public class SubjectQueryDataService {
}else if (searchType == 2){ // 用户 就只查 用户名 }else if (searchType == 2){ // 用户 就只查 用户名
boolQueryBuilder.must(QueryBuilders.queryStringQuery("*"+keyword+"*").field(ESConstant.AUTHOR)); boolQueryBuilder.must(QueryBuilders.queryStringQuery("*"+keyword+"*").field(ESConstant.AUTHOR));
} }
// 如果 keyword 中有特殊符号表示需要做 and or not 的查询
} }
if (sortFlag.equals("")) { if (sortFlag.equals("")) {
sortFlag = "pubTime"; sortFlag = "pubTime";

Loading…
Cancel
Save