|
|
package com.zyzs.otherdatasave.service;
import com.alibaba.fastjson.JSONObject;import com.bfd.crawler.elasti.ElastiProducerHigh;import com.bfd.crawler.utils.JsonUtils;import com.zyzs.otherdatasave.bean.Clini;import com.zyzs.otherdatasave.bean.Paper;import com.zyzs.otherdatasave.cache.Constants;import com.zyzs.otherdatasave.util.DataCheckUtil;import org.slf4j.Logger;import org.slf4j.LoggerFactory;import org.springframework.stereotype.Service;
import javax.annotation.PostConstruct;import java.time.LocalDateTime;import java.time.format.DateTimeFormatter;import java.util.*;
import static com.zyzs.otherdatasave.util.MfMD5Util.GetMD5Code;@Servicepublic class QueryPaper {
private static final Logger LOGGER = LoggerFactory.getLogger(QueryPaper.class); @PostConstruct public void init() {
}
public void query() { String inputMessage = Constants.getPaper().poll();// poll -->若队列为空,返回null
if (Objects.isNull(inputMessage)) { return; } Map<String, Object> messageMap = new HashMap<>(); if (inputMessage.length()>10) { try { try { messageMap = JsonUtils.parseObject(inputMessage); } catch (Exception e) { e.printStackTrace(); } Paper paper=new Paper(); paper.setTitle((String) messageMap.get("title")); paper.setCrawlUrl((String) messageMap.get("crawlUrl")); paper.setContent((String) messageMap.get("content"));
//
// List<String> authorList = new ArrayList<>();
// String author=(String) messageMap.get("author");
// if(author.contains(";")){
// // 如果包含分号,则按分号分割
// authorList = Arrays.asList(author.split(";"));
// }else if (author.contains(",")){
// // 如果包含分号,则按分号分割
// authorList = Arrays.asList(author.split(","));
// }
// if(authorList.size()>0){
// List authors=new ArrayList<>();
// for (String name:authorList){
// Map ma=new HashMap();
// ma.put("id","");
// ma.put("name",name);
// // 组织信息
// Map or =new HashMap();
// or.put("id","");
// or.put("name","");
// List organs=new ArrayList<>();
// organs.add(or);
// ma.put("organs",organs);
// authors.add(ma);
// }
// paper.setAuthors(authors);
// }
if (messageMap.containsKey("filePath")){ List<String> file = (List) messageMap.get("filePath"); List<String> cleanedList = new ArrayList<>(); for (String url : file) { // 使用replaceAll方法移除匹配的<url>标签
String cleanedUrl = url.replaceAll("http://192.168.0.41:8081", ""); cleanedList.add(cleanedUrl); } paper.setFilePath(cleanedList); paper.setFilePathSize((List) messageMap.get("filePathSize")); }
List<Map> authors= (List<Map>) messageMap.get("authors"); if(authors.size()>0){ List authorsall=new ArrayList<>(); for (Map<String,Object>keyValueMap : authors){ for (Map.Entry<String, Object> entry : keyValueMap.entrySet()) { String key = entry.getKey(); // 获取键
Object value = entry.getValue(); // 获取值
Map ma=new HashMap(); ma.put("id",""); ma.put("name",key); // 组织信息
Map or =new HashMap(); or.put("id",""); or.put("name",value); List organs=new ArrayList<>(); organs.add(or); ma.put("organs",organs); authorsall.add(ma); paper.setAuthors(authorsall); } } }
//来源信息
Map source=new HashMap(); source.put("id", ""); source.put("name", messageMap.get("crawlUrl")); source.put("type", ""); List sourcelist=new ArrayList<>(); sourcelist.add(source); paper.setSource(sourcelist);
String inputDate = (String) messageMap.get("pubDate"); String outputFormat = "yyyy-MM-dd"; // 定义输入格式
DateTimeFormatter inputFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"); // 解析输入字符串为 LocalDateTime 对象
LocalDateTime dateTime = LocalDateTime.parse(inputDate, inputFormatter); // 格式化为所需的输出格式
String formattedDate = dateTime.format(DateTimeFormatter.ofPattern(outputFormat)); paper.setPubDate(formattedDate); paper.setDoi((String) messageMap.get("doi")); paper.setClassify((String) messageMap.get("classify")); paper.setKeywords((String) messageMap.get("keywords")); paper.setSummary((String) messageMap.get("summary")); paper.setTopics((String) messageMap.get("topics")); paper.setFieldsSubject((String) messageMap.get("fieldsSubject")); paper.setReferences((String) messageMap.get("references")); String docid = GetMD5Code((String) messageMap.get("doi")); paper.setDocId(docid); paper.setDataId(docid); paper.set_id_(docid); paper.setCountry((String) messageMap.get("country"));
paper.setTranslatetitle(""); paper.setTranslatekeywords (""); paper.setTranslatesummary(""); paper.setIsshow("20250520"); long dateTimenow = System.currentTimeMillis(); paper.setCreateTime(dateTimenow); String createTimeStr=DataCheckUtil.getCurrentTime(dateTimenow); paper.setCreateTimeStr(createTimeStr); LOGGER.info("Parse Paper={}", JSONObject.toJSON(paper)); ElastiProducerHigh elastiProducer = ElastiProducerHigh.getInstance(1, 3, "cl_special_1.0_paper_csci","_doc" ); elastiProducer.sendMessageToEs(JsonUtils.toJSONString(paper)); } catch (Exception e) { LOGGER.info("Parse PaperError={}", JSONObject.toJSON(inputMessage)); e.printStackTrace(); } } }}
|