You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
160 lines
7.1 KiB
160 lines
7.1 KiB
package com.zyzs.otherdatasave.service;
|
|
|
|
import com.alibaba.fastjson.JSONObject;
|
|
import com.bfd.crawler.elasti.ElastiProducerHigh;
|
|
import com.bfd.crawler.utils.JsonUtils;
|
|
import com.zyzs.otherdatasave.bean.Clini;
|
|
import com.zyzs.otherdatasave.bean.Paper;
|
|
import com.zyzs.otherdatasave.cache.Constants;
|
|
import com.zyzs.otherdatasave.util.DataCheckUtil;
|
|
import org.slf4j.Logger;
|
|
import org.slf4j.LoggerFactory;
|
|
import org.springframework.stereotype.Service;
|
|
|
|
import javax.annotation.PostConstruct;
|
|
import java.time.LocalDateTime;
|
|
import java.time.format.DateTimeFormatter;
|
|
import java.util.*;
|
|
|
|
import static com.zyzs.otherdatasave.util.MfMD5Util.GetMD5Code;
|
|
@Service
|
|
public class QueryPaper {
|
|
|
|
private static final Logger LOGGER = LoggerFactory.getLogger(QueryPaper.class);
|
|
@PostConstruct
|
|
public void init() {
|
|
|
|
}
|
|
|
|
public void query() {
|
|
String inputMessage = Constants.getPaper().poll();// poll -->若队列为空,返回null
|
|
if (Objects.isNull(inputMessage)) {
|
|
return;
|
|
}
|
|
Map<String, Object> messageMap = new HashMap<>();
|
|
if (inputMessage.length()>10) {
|
|
try {
|
|
try {
|
|
messageMap = JsonUtils.parseObject(inputMessage);
|
|
} catch (Exception e) {
|
|
e.printStackTrace();
|
|
}
|
|
Paper paper=new Paper();
|
|
paper.setTitle((String) messageMap.get("title"));
|
|
paper.setCrawlUrl((String) messageMap.get("crawlUrl"));
|
|
paper.setContent((String) messageMap.get("content"));
|
|
|
|
//
|
|
// List<String> authorList = new ArrayList<>();
|
|
// String author=(String) messageMap.get("author");
|
|
// if(author.contains(";")){
|
|
// // 如果包含分号,则按分号分割
|
|
// authorList = Arrays.asList(author.split(";"));
|
|
// }else if (author.contains(",")){
|
|
// // 如果包含分号,则按分号分割
|
|
// authorList = Arrays.asList(author.split(","));
|
|
// }
|
|
// if(authorList.size()>0){
|
|
// List authors=new ArrayList<>();
|
|
// for (String name:authorList){
|
|
// Map ma=new HashMap();
|
|
// ma.put("id","");
|
|
// ma.put("name",name);
|
|
// // 组织信息
|
|
// Map or =new HashMap();
|
|
// or.put("id","");
|
|
// or.put("name","");
|
|
// List organs=new ArrayList<>();
|
|
// organs.add(or);
|
|
// ma.put("organs",organs);
|
|
// authors.add(ma);
|
|
// }
|
|
// paper.setAuthors(authors);
|
|
// }
|
|
|
|
if (messageMap.containsKey("filePath")){
|
|
List<String> file = (List) messageMap.get("filePath");
|
|
List<String> cleanedList = new ArrayList<>();
|
|
for (String url : file) {
|
|
// 使用replaceAll方法移除匹配的<url>标签
|
|
String cleanedUrl = url.replaceAll("http://192.168.0.41:8081", "");
|
|
cleanedList.add(cleanedUrl);
|
|
}
|
|
paper.setFilePath(cleanedList);
|
|
paper.setFilePathSize((List) messageMap.get("filePathSize"));
|
|
}
|
|
|
|
List<Map> authors= (List<Map>) messageMap.get("authors");
|
|
if(authors.size()>0){
|
|
List authorsall=new ArrayList<>();
|
|
for (Map<String,Object>keyValueMap : authors){
|
|
for (Map.Entry<String, Object> entry : keyValueMap.entrySet()) {
|
|
String key = entry.getKey(); // 获取键
|
|
Object value = entry.getValue(); // 获取值
|
|
Map ma=new HashMap();
|
|
ma.put("id","");
|
|
ma.put("name",key);
|
|
// 组织信息
|
|
Map or =new HashMap();
|
|
or.put("id","");
|
|
or.put("name",value);
|
|
List organs=new ArrayList<>();
|
|
organs.add(or);
|
|
ma.put("organs",organs);
|
|
authorsall.add(ma);
|
|
paper.setAuthors(authorsall);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
//来源信息
|
|
Map source=new HashMap();
|
|
source.put("id", "");
|
|
source.put("name", messageMap.get("crawlUrl"));
|
|
source.put("type", "");
|
|
List sourcelist=new ArrayList<>();
|
|
sourcelist.add(source);
|
|
paper.setSource(sourcelist);
|
|
|
|
String inputDate = (String) messageMap.get("pubDate");
|
|
String outputFormat = "yyyy-MM-dd";
|
|
// 定义输入格式
|
|
DateTimeFormatter inputFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
|
|
// 解析输入字符串为 LocalDateTime 对象
|
|
LocalDateTime dateTime = LocalDateTime.parse(inputDate, inputFormatter);
|
|
// 格式化为所需的输出格式
|
|
String formattedDate = dateTime.format(DateTimeFormatter.ofPattern(outputFormat));
|
|
paper.setPubDate(formattedDate);
|
|
paper.setDoi((String) messageMap.get("doi"));
|
|
paper.setClassify((String) messageMap.get("classify"));
|
|
paper.setKeywords((String) messageMap.get("keywords"));
|
|
paper.setSummary((String) messageMap.get("summary"));
|
|
paper.setTopics((String) messageMap.get("topics"));
|
|
paper.setFieldsSubject((String) messageMap.get("fieldsSubject"));
|
|
paper.setReferences((String) messageMap.get("references"));
|
|
String docid = GetMD5Code((String) messageMap.get("doi"));
|
|
paper.setDocId(docid);
|
|
paper.setDataId(docid);
|
|
paper.set_id_(docid);
|
|
paper.setCountry((String) messageMap.get("country"));
|
|
|
|
paper.setTranslatetitle("");
|
|
paper.setTranslatekeywords ("");
|
|
paper.setTranslatesummary("");
|
|
paper.setIsshow("20250520");
|
|
long dateTimenow = System.currentTimeMillis();
|
|
paper.setCreateTime(dateTimenow);
|
|
String createTimeStr=DataCheckUtil.getCurrentTime(dateTimenow);
|
|
paper.setCreateTimeStr(createTimeStr);
|
|
LOGGER.info("Parse Paper={}", JSONObject.toJSON(paper));
|
|
ElastiProducerHigh elastiProducer = ElastiProducerHigh.getInstance(1, 3, "cl_special_1.0_paper_csci","_doc" );
|
|
elastiProducer.sendMessageToEs(JsonUtils.toJSONString(paper));
|
|
}
|
|
catch (Exception e) {
|
|
LOGGER.info("Parse PaperError={}", JSONObject.toJSON(inputMessage));
|
|
e.printStackTrace();
|
|
}
|
|
}
|
|
}
|
|
}
|