package com.zyzs.otherdatasave.service; import com.alibaba.fastjson.JSONObject; import com.bfd.crawler.elasti.ElastiProducerHigh; import com.bfd.crawler.utils.JsonUtils; import com.zyzs.otherdatasave.bean.Clini; import com.zyzs.otherdatasave.bean.Paper; import com.zyzs.otherdatasave.cache.Constants; import com.zyzs.otherdatasave.util.DataCheckUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.stereotype.Service; import javax.annotation.PostConstruct; import java.time.LocalDateTime; import java.time.format.DateTimeFormatter; import java.util.*; import static com.zyzs.otherdatasave.util.MfMD5Util.GetMD5Code; @Service public class QueryPaper { private static final Logger LOGGER = LoggerFactory.getLogger(QueryPaper.class); @PostConstruct public void init() { } public void query() { String inputMessage = Constants.getPaper().poll();// poll -->若队列为空,返回null if (Objects.isNull(inputMessage)) { return; } Map messageMap = new HashMap<>(); if (inputMessage.length()>10) { try { try { messageMap = JsonUtils.parseObject(inputMessage); } catch (Exception e) { e.printStackTrace(); } Paper paper=new Paper(); paper.setTitle((String) messageMap.get("title")); paper.setCrawlUrl((String) messageMap.get("crawlUrl")); paper.setContent((String) messageMap.get("content")); // // List authorList = new ArrayList<>(); // String author=(String) messageMap.get("author"); // if(author.contains(";")){ // // 如果包含分号,则按分号分割 // authorList = Arrays.asList(author.split(";")); // }else if (author.contains(",")){ // // 如果包含分号,则按分号分割 // authorList = Arrays.asList(author.split(",")); // } // if(authorList.size()>0){ // List authors=new ArrayList<>(); // for (String name:authorList){ // Map ma=new HashMap(); // ma.put("id",""); // ma.put("name",name); // // 组织信息 // Map or =new HashMap(); // or.put("id",""); // or.put("name",""); // List organs=new ArrayList<>(); // organs.add(or); // ma.put("organs",organs); // authors.add(ma); // } // paper.setAuthors(authors); // } if (messageMap.containsKey("filePath")){ List file = (List) messageMap.get("filePath"); List cleanedList = new ArrayList<>(); for (String url : file) { // 使用replaceAll方法移除匹配的标签 String cleanedUrl = url.replaceAll("http://192.168.0.41:8081", ""); cleanedList.add(cleanedUrl); } paper.setFilePath(cleanedList); paper.setFilePathSize((List) messageMap.get("filePathSize")); } List authors= (List) messageMap.get("authors"); if(authors.size()>0){ List authorsall=new ArrayList<>(); for (MapkeyValueMap : authors){ for (Map.Entry entry : keyValueMap.entrySet()) { String key = entry.getKey(); // 获取键 Object value = entry.getValue(); // 获取值 Map ma=new HashMap(); ma.put("id",""); ma.put("name",key); // 组织信息 Map or =new HashMap(); or.put("id",""); or.put("name",value); List organs=new ArrayList<>(); organs.add(or); ma.put("organs",organs); authorsall.add(ma); paper.setAuthors(authorsall); } } } //来源信息 Map source=new HashMap(); source.put("id", ""); source.put("name", messageMap.get("crawlUrl")); source.put("type", ""); List sourcelist=new ArrayList<>(); sourcelist.add(source); paper.setSource(sourcelist); String inputDate = (String) messageMap.get("pubDate"); String outputFormat = "yyyy-MM-dd"; // 定义输入格式 DateTimeFormatter inputFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"); // 解析输入字符串为 LocalDateTime 对象 LocalDateTime dateTime = LocalDateTime.parse(inputDate, inputFormatter); // 格式化为所需的输出格式 String formattedDate = dateTime.format(DateTimeFormatter.ofPattern(outputFormat)); paper.setPubDate(formattedDate); paper.setDoi((String) messageMap.get("doi")); paper.setClassify((String) messageMap.get("classify")); paper.setKeywords((String) messageMap.get("keywords")); paper.setSummary((String) messageMap.get("summary")); paper.setTopics((String) messageMap.get("topics")); paper.setFieldsSubject((String) messageMap.get("fieldsSubject")); paper.setReferences((String) messageMap.get("references")); String docid = GetMD5Code((String) messageMap.get("doi")); paper.setDocId(docid); paper.setDataId(docid); paper.set_id_(docid); paper.setCountry((String) messageMap.get("country")); paper.setTranslatetitle(""); paper.setTranslatekeywords (""); paper.setTranslatesummary(""); paper.setIsshow("20250520"); long dateTimenow = System.currentTimeMillis(); paper.setCreateTime(dateTimenow); String createTimeStr=DataCheckUtil.getCurrentTime(dateTimenow); paper.setCreateTimeStr(createTimeStr); LOGGER.info("Parse Paper={}", JSONObject.toJSON(paper)); ElastiProducerHigh elastiProducer = ElastiProducerHigh.getInstance(1, 3, "cl_special_1.0_paper_csci","_doc" ); elastiProducer.sendMessageToEs(JsonUtils.toJSONString(paper)); } catch (Exception e) { LOGGER.info("Parse PaperError={}", JSONObject.toJSON(inputMessage)); e.printStackTrace(); } } } }