commit
ffc71d82fa
18 changed files with 1293 additions and 0 deletions
-
3.gitignore
-
163pom.xml
-
28src/main/java/com/bfd/youzhiapi/YouzhiApiApplication.java
-
55src/main/java/com/bfd/youzhiapi/config/DataTypeEnum.java
-
25src/main/java/com/bfd/youzhiapi/config/GlobalConfig.java
-
19src/main/java/com/bfd/youzhiapi/entity/CacheEntity.java
-
33src/main/java/com/bfd/youzhiapi/entity/KfkEntity.java
-
33src/main/java/com/bfd/youzhiapi/entity/TaskEntity.java
-
31src/main/java/com/bfd/youzhiapi/mapper/ScheduleMapper.java
-
336src/main/java/com/bfd/youzhiapi/service/ScheduleService.java
-
88src/main/java/com/bfd/youzhiapi/util/HttpUtil.java
-
86src/main/java/com/bfd/youzhiapi/util/KfkUtil.java
-
270src/main/java/com/bfd/youzhiapi/util/Md5SignUtil.java
-
29src/main/java/com/bfd/youzhiapi/util/Utils.java
-
26src/main/resources/application.yml
-
38src/main/resources/logback-spring.xml
-
17src/main/resources/mapper/ScheduleMapper.xml
-
13src/test/java/com/bfd/youzhiapi/YouzhiApiApplicationTests.java
@ -0,0 +1,3 @@ |
|||
/target/ |
|||
/logs/ |
|||
/.idea/ |
@ -0,0 +1,163 @@ |
|||
<?xml version="1.0" encoding="UTF-8"?> |
|||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" |
|||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd"> |
|||
<modelVersion>4.0.0</modelVersion> |
|||
<groupId>com.bfd</groupId> |
|||
<artifactId>youzhiApi</artifactId> |
|||
<version>0.0.1-SNAPSHOT</version> |
|||
<name>youzhiApi</name> |
|||
<description>youzhiApi</description> |
|||
<properties> |
|||
<java.version>1.8</java.version> |
|||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> |
|||
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding> |
|||
<spring-boot.version>2.6.13</spring-boot.version> |
|||
</properties> |
|||
<dependencies> |
|||
<dependency> |
|||
<groupId>org.springframework.boot</groupId> |
|||
<artifactId>spring-boot-starter-web</artifactId> |
|||
</dependency> |
|||
<dependency> |
|||
<groupId>org.mybatis.spring.boot</groupId> |
|||
<artifactId>mybatis-spring-boot-starter</artifactId> |
|||
<version>2.2.2</version> |
|||
</dependency> |
|||
|
|||
<dependency> |
|||
<groupId>com.mysql</groupId> |
|||
<artifactId>mysql-connector-j</artifactId> |
|||
<scope>runtime</scope> |
|||
</dependency> |
|||
<dependency> |
|||
<groupId>org.projectlombok</groupId> |
|||
<artifactId>lombok</artifactId> |
|||
<optional>true</optional> |
|||
</dependency> |
|||
<dependency> |
|||
<groupId>org.springframework.boot</groupId> |
|||
<artifactId>spring-boot-starter-test</artifactId> |
|||
<scope>test</scope> |
|||
</dependency> |
|||
<dependency> |
|||
<groupId>com.squareup.okhttp3</groupId> |
|||
<artifactId>okhttp</artifactId> |
|||
</dependency> |
|||
<dependency> |
|||
<groupId>com.alibaba.fastjson2</groupId> |
|||
<artifactId>fastjson2</artifactId> |
|||
<version>2.0.17</version> |
|||
</dependency> |
|||
<dependency> |
|||
<groupId>cn.hutool</groupId> |
|||
<artifactId>hutool-all</artifactId> |
|||
<version>5.8.27</version> |
|||
</dependency> |
|||
<dependency> |
|||
<groupId>org.apache.kafka</groupId> |
|||
<artifactId>kafka-clients</artifactId> |
|||
<version>2.7.1</version> |
|||
</dependency> |
|||
<dependency> |
|||
<groupId>org.jsoup</groupId> |
|||
<artifactId>jsoup</artifactId> |
|||
<version>1.7.3</version> |
|||
</dependency> |
|||
</dependencies> |
|||
<dependencyManagement> |
|||
<dependencies> |
|||
<dependency> |
|||
<groupId>org.springframework.boot</groupId> |
|||
<artifactId>spring-boot-dependencies</artifactId> |
|||
<version>${spring-boot.version}</version> |
|||
<type>pom</type> |
|||
<scope>import</scope> |
|||
</dependency> |
|||
</dependencies> |
|||
</dependencyManagement> |
|||
|
|||
<build> |
|||
<plugins> |
|||
<plugin> |
|||
<groupId>org.apache.maven.plugins</groupId> |
|||
<artifactId>maven-jar-plugin</artifactId> |
|||
<configuration> |
|||
<!--不打入jar包的文件类型或者路径--> |
|||
<excludes> |
|||
<exclude>*.properties</exclude> |
|||
<exclude>*.yml</exclude> |
|||
<exclude>*.yaml</exclude> |
|||
</excludes> |
|||
<archive> |
|||
<manifest> |
|||
<!-- 执行的主程序路径 --> |
|||
<mainClass>com.bfd.youzhiapi.YouzhiApiApplication</mainClass> |
|||
<!--是否要把第三方jar放到manifest的classpath中--> |
|||
<addClasspath>true</addClasspath> |
|||
<!--生成的manifest中classpath的前缀,因为要把第三方jar放到lib目录下,所以classpath的前缀是lib/--> |
|||
<classpathPrefix>lib/</classpathPrefix> |
|||
<!-- 打包时 MANIFEST.MF 文件不记录的时间戳版本 --> |
|||
<useUniqueVersions>false</useUniqueVersions> |
|||
</manifest> |
|||
<manifestEntries> |
|||
<!-- 在 Class-Path 下添加配置文件的路径 --> |
|||
<Class-Path>config/</Class-Path> |
|||
</manifestEntries> |
|||
</archive> |
|||
</configuration> |
|||
</plugin> |
|||
<plugin> |
|||
<groupId>org.apache.maven.plugins</groupId> |
|||
<artifactId>maven-dependency-plugin</artifactId> |
|||
<executions> |
|||
<execution> |
|||
<id>copy</id> |
|||
<phase>package</phase> |
|||
<goals> |
|||
<goal>copy-dependencies</goal> |
|||
</goals> |
|||
<configuration> |
|||
<outputDirectory>${project.build.directory}/lib/</outputDirectory> |
|||
</configuration> |
|||
</execution> |
|||
</executions> |
|||
</plugin> |
|||
|
|||
<plugin> |
|||
<artifactId>maven-resources-plugin</artifactId> |
|||
<executions> |
|||
<execution> |
|||
<id>copy-resources</id> |
|||
<phase>package</phase> |
|||
<goals> |
|||
<goal>copy-resources</goal> |
|||
</goals> |
|||
<configuration> |
|||
<resources> |
|||
<!--把配置文件打包到指定路径--> |
|||
<resource> |
|||
<directory>src/main/resources/</directory> |
|||
<includes> |
|||
<include>*.properties</include> |
|||
<include>*.yml</include> |
|||
<exclude>*.yaml</exclude> |
|||
</includes> |
|||
</resource> |
|||
</resources> |
|||
<outputDirectory>${project.build.directory}/config</outputDirectory> |
|||
</configuration> |
|||
</execution> |
|||
</executions> |
|||
</plugin> |
|||
<plugin> |
|||
<groupId>org.apache.maven.plugins</groupId> |
|||
<artifactId>maven-compiler-plugin</artifactId> |
|||
<configuration> |
|||
<source>8</source> |
|||
<target>8</target> |
|||
</configuration> |
|||
</plugin> |
|||
</plugins> |
|||
</build> |
|||
|
|||
</project> |
@ -0,0 +1,28 @@ |
|||
package com.bfd.youzhiapi; |
|||
|
|||
import com.bfd.youzhiapi.service.ScheduleService; |
|||
import org.springframework.boot.SpringApplication; |
|||
import org.springframework.boot.autoconfigure.SpringBootApplication; |
|||
import org.springframework.context.ConfigurableApplicationContext; |
|||
import org.springframework.scheduling.annotation.EnableScheduling; |
|||
|
|||
import javax.annotation.Resource; |
|||
|
|||
@SpringBootApplication |
|||
@EnableScheduling |
|||
public class YouzhiApiApplication { |
|||
|
|||
@Resource |
|||
ScheduleService scheduleService; |
|||
public static void main(String[] args) { |
|||
ConfigurableApplicationContext run = SpringApplication.run(YouzhiApiApplication.class, args); |
|||
YouzhiApiApplication bean = run.getBean(YouzhiApiApplication.class); |
|||
// bean.start(); |
|||
} |
|||
|
|||
// public void start(){ |
|||
// Thread thread = new Thread(scheduleService); |
|||
// thread.start(); |
|||
// } |
|||
|
|||
} |
@ -0,0 +1,55 @@ |
|||
package com.bfd.youzhiapi.config; |
|||
|
|||
/** |
|||
* @author guowei |
|||
* 采集库 字段 和检索接口 映射枚举类 |
|||
*/ |
|||
public enum DataTypeEnum { |
|||
|
|||
|
|||
//期刊论文 |
|||
PERIODICAL(10, 1,"学术期刊"), |
|||
//学位论文 |
|||
DISSERTATION(20, 2,"学位论文"), |
|||
//会议论文 |
|||
CONFERENCE(30, 3,"会议"); |
|||
|
|||
|
|||
private final Integer code; |
|||
|
|||
private final Integer field; |
|||
|
|||
private final String type; |
|||
|
|||
DataTypeEnum(Integer code, Integer field,String type) { |
|||
this.code = code; |
|||
this.field = field; |
|||
this.type = type; |
|||
} |
|||
|
|||
public Integer getCode() { |
|||
return code; |
|||
} |
|||
|
|||
public Integer getField(){return field;} |
|||
|
|||
public String getType(){return type;} |
|||
|
|||
public static Integer getCodeByField(Integer field) { |
|||
for (DataTypeEnum dataTypeEnum : DataTypeEnum.values()) { |
|||
if (dataTypeEnum.getField()==(field)) { |
|||
return dataTypeEnum.getCode(); |
|||
} |
|||
} |
|||
return null; // 或者可以抛出异常,或返回一个默认值 |
|||
} |
|||
|
|||
public static String getTypeByCode(Integer code) { |
|||
for (DataTypeEnum dataTypeEnum : DataTypeEnum.values()) { |
|||
if (dataTypeEnum.getCode()==(code)) { |
|||
return dataTypeEnum.getType(); |
|||
} |
|||
} |
|||
return null; // 或者可以抛出异常,或返回一个默认值 |
|||
} |
|||
} |
@ -0,0 +1,25 @@ |
|||
package com.bfd.youzhiapi.config; |
|||
|
|||
import org.springframework.stereotype.Component; |
|||
|
|||
/** |
|||
* @author guowei |
|||
*/ |
|||
@Component |
|||
public class GlobalConfig { |
|||
|
|||
/** |
|||
* 外部接口所用 appId |
|||
*/ |
|||
public static final String APPID = "c4d532304c6b4497b1ad"; |
|||
|
|||
/** |
|||
* 外部接口所用 appSecret |
|||
*/ |
|||
public static final String APPSECRET = "dc41973ee03e471887c77c4a532dbfc3"; |
|||
|
|||
/** |
|||
* 外部接口所用 机构Id |
|||
*/ |
|||
public static final Integer ORGANID = 392; |
|||
} |
@ -0,0 +1,19 @@ |
|||
package com.bfd.youzhiapi.entity; |
|||
|
|||
import lombok.Data; |
|||
|
|||
/** |
|||
* @author guowei |
|||
*/ |
|||
@Data |
|||
public class CacheEntity { |
|||
private int id; |
|||
|
|||
private String doi; |
|||
|
|||
private String downloadId; |
|||
|
|||
private String uploadTime; |
|||
|
|||
private String downloadUrl; |
|||
} |
@ -0,0 +1,33 @@ |
|||
package com.bfd.youzhiapi.entity; |
|||
|
|||
import lombok.Data; |
|||
|
|||
import java.util.ArrayList; |
|||
import java.util.List; |
|||
|
|||
/** |
|||
* @author guowei |
|||
*/ |
|||
@Data |
|||
public class KfkEntity { |
|||
private Object attr; |
|||
private String author; |
|||
private String brief; |
|||
private String cid = "Nkyzd"; |
|||
private String content; |
|||
private String dedupKey; |
|||
|
|||
private String field; |
|||
private String forwardcontent; |
|||
private String iid; |
|||
private Boolean isDownload = false; |
|||
private String news_id; |
|||
private String post_time; |
|||
private String searchKeyword; |
|||
private String source; |
|||
private List tasks = new ArrayList<>(); |
|||
private String title; |
|||
private String type = "newscontent"; |
|||
private String url; |
|||
private String version = "1"; |
|||
} |
@ -0,0 +1,33 @@ |
|||
package com.bfd.youzhiapi.entity; |
|||
|
|||
import lombok.Data; |
|||
|
|||
/** |
|||
* @author guowei |
|||
*/ |
|||
@Data |
|||
public class TaskEntity { |
|||
private Integer rid; |
|||
private Integer siteId; |
|||
private String cid; |
|||
private String channelName; |
|||
private String keyword; |
|||
private Integer pageTypeID; |
|||
private Integer weight; |
|||
private String url; |
|||
private Integer pageIdx; |
|||
private Integer nextPageTime; |
|||
private Integer status; |
|||
private Integer intv; |
|||
private String attachTag; |
|||
private String lastcrawltime; |
|||
private String nextcrawltime; |
|||
private String createTime; |
|||
private String modiTime; |
|||
private Integer crawl_mode; |
|||
private Integer crawl_account; |
|||
private String page_switchs; |
|||
private Integer task_hash_code; |
|||
private Long crawlStartTime; |
|||
private Long crawlEndTime; |
|||
} |
@ -0,0 +1,31 @@ |
|||
package com.bfd.youzhiapi.mapper; |
|||
|
|||
import com.bfd.youzhiapi.entity.CacheEntity; |
|||
import com.bfd.youzhiapi.entity.TaskEntity; |
|||
import org.apache.ibatis.annotations.Mapper; |
|||
import org.springframework.stereotype.Repository; |
|||
|
|||
import java.util.List; |
|||
|
|||
/** |
|||
* @author guowei |
|||
*/ |
|||
@Mapper |
|||
public interface ScheduleMapper { |
|||
|
|||
/** |
|||
* 查询任务 |
|||
* @param status |
|||
* @return |
|||
*/ |
|||
List<TaskEntity> queryTaskByStatus(int status); |
|||
|
|||
/** |
|||
* 更改任务状态 |
|||
* @param taskEntity |
|||
* @return |
|||
*/ |
|||
int updateTaskStatus(TaskEntity taskEntity); |
|||
|
|||
CacheEntity queryCacheByDoi(String doi); |
|||
} |
@ -0,0 +1,336 @@ |
|||
package com.bfd.youzhiapi.service; |
|||
|
|||
import cn.hutool.core.date.DateUtil; |
|||
import cn.hutool.core.util.IdUtil; |
|||
import cn.hutool.core.util.StrUtil; |
|||
import cn.hutool.crypto.SecureUtil; |
|||
import com.alibaba.fastjson2.JSON; |
|||
import com.alibaba.fastjson2.JSONArray; |
|||
import com.alibaba.fastjson2.JSONObject; |
|||
import com.bfd.youzhiapi.config.DataTypeEnum; |
|||
import com.bfd.youzhiapi.config.GlobalConfig; |
|||
import com.bfd.youzhiapi.entity.CacheEntity; |
|||
import com.bfd.youzhiapi.entity.KfkEntity; |
|||
import com.bfd.youzhiapi.entity.TaskEntity; |
|||
import com.bfd.youzhiapi.mapper.ScheduleMapper; |
|||
import com.bfd.youzhiapi.util.HttpUtil; |
|||
import com.bfd.youzhiapi.util.KfkUtil; |
|||
import com.bfd.youzhiapi.util.Md5SignUtil; |
|||
import com.bfd.youzhiapi.util.Utils; |
|||
import lombok.extern.slf4j.Slf4j; |
|||
import org.jsoup.Jsoup; |
|||
import org.springframework.scheduling.annotation.Scheduled; |
|||
import org.springframework.stereotype.Service; |
|||
|
|||
import javax.annotation.Resource; |
|||
import java.util.*; |
|||
import java.util.regex.Matcher; |
|||
import java.util.regex.Pattern; |
|||
|
|||
/** |
|||
* @author guowei |
|||
*/ |
|||
@Service |
|||
@Slf4j |
|||
public class ScheduleService{ |
|||
@Resource |
|||
ScheduleMapper scheduleMapper; |
|||
|
|||
@Scheduled(cron = "0 0/2 * * * ?") |
|||
public void run() { |
|||
List<TaskEntity> taskEntities = scheduleMapper.queryTaskByStatus(1); |
|||
log.info("查询到{}条未采集任务", taskEntities.size()); |
|||
for (TaskEntity task : taskEntities) { |
|||
|
|||
log.info("开始采集任务:{}", task.getKeyword()); |
|||
//最大翻页限制20页 |
|||
Integer maxPageNum = task.getNextPageTime(); |
|||
if (maxPageNum == -1){ maxPageNum = 20;} |
|||
|
|||
String attachTag = task.getAttachTag(); |
|||
JSONObject attrJSON = JSONObject.parseObject(attachTag); |
|||
if (!attrJSON.containsKey("field")) { |
|||
log.error("没找到field,keyword:{},跳过采集", task.getKeyword()); |
|||
continue; |
|||
} |
|||
Integer field = attrJSON.getInteger("field"); |
|||
String documentType = attrJSON.getString("documentType"); |
|||
String[] documentSplit = documentType.split(","); |
|||
List<String> apiType = new ArrayList<>(); |
|||
//根据field 映射 检索接口的参数 |
|||
for (String type : documentSplit) { |
|||
Integer codeByField = DataTypeEnum.getCodeByField(Integer.valueOf(type)); |
|||
apiType.add(String.valueOf(codeByField)); |
|||
} |
|||
if (apiType.size() == 0) { |
|||
log.error("没找到documentType,keyword:{},跳过采集", task.getKeyword()); |
|||
continue; |
|||
} else { |
|||
log.info("采集类型:{}", String.join(",", apiType)); |
|||
} |
|||
Long crawlStartTime = task.getCrawlStartTime(); |
|||
Long crawlEndTime = task.getCrawlEndTime(); |
|||
//获取采集范围 年份 |
|||
int yearStart = Utils.getYearFromTimestamp(crawlStartTime); |
|||
int yearend = Utils.getYearFromTimestamp(crawlEndTime); |
|||
log.info("采集年份范围:{} ~ {}",yearStart,yearend); |
|||
//每个采集类型都采集一遍 |
|||
for(String type:apiType) { |
|||
int currentPageNum = 1; |
|||
log.info("开始采集 {} 类型数据",type); |
|||
do { |
|||
try { |
|||
SortedMap<Object, Object> parameters = new TreeMap<Object, Object>(); |
|||
parameters.put("randomStr", DateUtil.format(new Date(), "yyyyMMddHHmmss")); |
|||
parameters.put("appId", GlobalConfig.APPID); |
|||
parameters.put("title", task.getKeyword()); |
|||
parameters.put("page", currentPageNum); |
|||
parameters.put("pageSize", 10); |
|||
parameters.put("type", type); |
|||
String sign = Md5SignUtil.sign(parameters); |
|||
parameters.put("sign", sign); |
|||
String data = HttpUtil.getData(JSON.toJSONString(parameters)); |
|||
JSONObject jsonObject = JSONObject.parseObject(data); |
|||
JSONObject data1 = jsonObject.getJSONObject("data"); |
|||
JSONArray records = data1.getJSONArray("records"); |
|||
for (Object record : records) { |
|||
JSONObject item = (JSONObject) record; |
|||
KfkEntity kfk = new KfkEntity(); |
|||
Map attr = new HashMap<>(); |
|||
attr.put("attachTag", JSON.parse(task.getAttachTag())); |
|||
kfk.setAttr(attr); |
|||
kfk.setAuthor(item.getString("author")); |
|||
kfk.setContent(item.getString("abstractE")); |
|||
kfk.setForwardcontent(item.getString("abstractE")); |
|||
String organ = item.getString("organ"); |
|||
Map brief = new HashMap<>(); |
|||
|
|||
kfk.setField(String.valueOf(field)); |
|||
kfk.setTitle(Jsoup.parse(item.getString("title")).text()); |
|||
kfk.setSearchKeyword(task.getKeyword()); |
|||
String source = ""; |
|||
switch (item.getInteger("type")) { |
|||
case 10: |
|||
source = item.getString("name"); |
|||
break; |
|||
case 20: |
|||
source = item.getString("school"); |
|||
break; |
|||
case 30: |
|||
source = item.getString("conferenceName"); |
|||
break; |
|||
} |
|||
kfk.setSource(source); |
|||
// if (item.getString("abstractURL") != null) { |
|||
// kfk.setUrl(item.getString("abstractURL")); |
|||
// } else { |
|||
// kfk.setUrl(item.getString("pdfURL")); |
|||
// } |
|||
String uuid = IdUtil.simpleUUID(); |
|||
kfk.setIid(uuid); |
|||
kfk.setNews_id(uuid); |
|||
kfk.setUrl(uuid); |
|||
String year = item.getString("year"); |
|||
//判断年份 是不是4位数字 (有错误数据的情况) |
|||
boolean fourDigitNumber = Utils.isFourDigitNumber(year); |
|||
if (!fourDigitNumber) { |
|||
log.error("year不是4位数字,跳过,year:{}", year); |
|||
continue; |
|||
} |
|||
if (!Utils.isYearInRange(Integer.parseInt(year), yearStart, yearend)) { |
|||
log.error("year不在采集年份范围,跳过,year:{}", year); |
|||
continue; |
|||
} |
|||
kfk.setPost_time(item.getString("year") + "-01-01 00:00:00"); |
|||
kfk.setField(String.valueOf(field)); |
|||
|
|||
//没有机构字段 并且是学位论文,学校作为机构 |
|||
if (organ == null || organ == "") { |
|||
organ = item.getString("school"); |
|||
List agencys = new ArrayList<>(); |
|||
if (organ == null || organ == "") { |
|||
organ = ""; |
|||
} else { |
|||
Map<String, Object> agency = new HashMap<>(); |
|||
agency.put("name", organ); |
|||
agency.put("url", IdUtil.simpleUUID()); // 添加第二个字段 |
|||
agencys.add(agency); |
|||
} |
|||
brief.put("agency", organ); |
|||
brief.put("agencys", agencys); |
|||
brief.put("author", item.getString("author")); |
|||
Map<String, Object> agencyAuthor = new HashMap<>(); |
|||
agencyAuthor.put("agency", organ); |
|||
agencyAuthor.put("author", item.getString("author")); // 添加第二个字段 |
|||
List authorAndAgency = new ArrayList<>(); |
|||
authorAndAgency.add(agencyAuthor); |
|||
brief.put("authorAndAgency", authorAndAgency); |
|||
} else { |
|||
List agencys = new ArrayList<>(); |
|||
List agencyString = new ArrayList<>(); |
|||
Pattern pattern = Pattern.compile("\\[([a-z\\d])\\]([^;]+)"); |
|||
Matcher matcher = pattern.matcher(organ); |
|||
Map agencyMap = new HashMap<>(); |
|||
while (matcher.find()) { |
|||
String key = matcher.group(1); // 获取编号 |
|||
String value = matcher.group(2).trim(); // 获取机构名称并去除前后空格 |
|||
agencyMap.put(key, value); |
|||
agencyString.add(value); |
|||
Map agency = new HashMap<>(); |
|||
agency.put("name", value); |
|||
agency.put("url", IdUtil.simpleUUID()); |
|||
agencys.add(agency); |
|||
} |
|||
brief.put("agency", String.join(",", agencyString)); |
|||
brief.put("agencys", agencys); |
|||
|
|||
// 正则表达式匹配模式,匹配 "姓名[编号][编号]..." |
|||
// Pattern patternAuthor = Pattern.compile("([\\p{L} .]+)(\\[\\d+])+(?=;|$)"); |
|||
Pattern patternAuthor = Pattern.compile("([\\p{L} .-]+)((\\[\\d+\\])|(\\[[a-zA-Z,]+\\]))+(?=;|$)"); |
|||
Matcher matcherAuthor = patternAuthor.matcher(item.getString("author")); |
|||
Map<String, List<String>> authorAffiliations = new HashMap<>(); |
|||
while (matcherAuthor.find()) { |
|||
String name = matcherAuthor.group(1).trim(); // 获取姓名并去除前后空格 |
|||
String affiliationPart = matcherAuthor.group(0); // 获取整个匹配串 |
|||
|
|||
// 提取所有编号 |
|||
List<String> affiliations = new ArrayList<>(); |
|||
Matcher numberMatcher = Pattern.compile("\\[(\\d+)]").matcher(affiliationPart); |
|||
while (numberMatcher.find()) { |
|||
affiliations.add(numberMatcher.group(1)); |
|||
} |
|||
Matcher letterMatcher = Pattern.compile("\\[(.*?)\\]").matcher(affiliationPart); |
|||
if (letterMatcher.find()) { |
|||
String values = letterMatcher.group(1); |
|||
String[] items = values.split(","); |
|||
for (String key : items) { |
|||
affiliations.add(key); |
|||
} |
|||
} |
|||
authorAffiliations.put(name, affiliations); |
|||
} |
|||
Set<String> strings = authorAffiliations.keySet(); |
|||
List authorAndAgency = new ArrayList<>(); |
|||
if (strings.size() > 0) { |
|||
brief.put("author", String.join(",", strings)); |
|||
kfk.setAuthor(String.join(",", strings)); |
|||
for (String name : strings) { |
|||
List<String> organNum = authorAffiliations.get(name); |
|||
for (String authorOrgan : organNum) { |
|||
Map<String, Object> agencyAuthor = new HashMap<>(); |
|||
agencyAuthor.put("agency", agencyMap.get(authorOrgan)); |
|||
agencyAuthor.put("author", name); |
|||
authorAndAgency.add(agencyAuthor); |
|||
} |
|||
} |
|||
brief.put("authorAndAgency", authorAndAgency); |
|||
} else { |
|||
brief.put("author", item.getString("author")); |
|||
brief.put("authorAndAgency", authorAndAgency); |
|||
} |
|||
|
|||
} |
|||
|
|||
brief.put("author_agency_urls", new ArrayList<>()); |
|||
brief.put("author_urls", new ArrayList<>()); |
|||
brief.put("data", DataTypeEnum.getTypeByCode(item.getInteger("type"))); |
|||
brief.put("date", DateUtil.formatDate(DateUtil.parse(kfk.getPost_time()))); |
|||
brief.put("detailUrl", kfk.getUrl()); |
|||
brief.put("download", ""); |
|||
brief.put("eisci", ""); |
|||
brief.put("fileUrl", ""); |
|||
brief.put("funding", ""); |
|||
brief.put("id", item.getString("id")); |
|||
brief.put("initial_mark", ""); |
|||
brief.put("keyword", task.getKeyword()); |
|||
brief.put("keywords", item.getString("keyword")); |
|||
String language = item.getString("language"); |
|||
if (StrUtil.isEmpty(language)){ |
|||
language = "英语"; |
|||
} |
|||
brief.put("language", language); |
|||
brief.put("paramter", new HashMap<>()); |
|||
brief.put("publish_agency_urls", new ArrayList<>()); |
|||
brief.put("quote", ""); |
|||
brief.put("source", source); |
|||
brief.put("summary", kfk.getContent().replace("\"", "")); |
|||
brief.put("title", kfk.getTitle()); |
|||
brief.put("wxtype", language); |
|||
kfk.setBrief(JSON.toJSONString(brief)); |
|||
|
|||
|
|||
/**2024.12.27 新增下载文件需求,下载文件需要3个接口 |
|||
* 1.新增单条需求(根据doi) |
|||
* 2.获取清单,已上传时间查询 |
|||
* 3.获取下载链接 |
|||
* 根据doi 查询缓存表里是否已经下载过附件,有的话 直接下载推送 |
|||
* 没有的话 放入队列等待处理完成 |
|||
*/ |
|||
// String doi = item.getString("doi"); |
|||
// CacheEntity cacheEntity = scheduleMapper.queryCacheByDoi(doi); |
|||
// if (cacheEntity == null){ |
|||
// log.info("缓存库没有此数据,新增数据"); |
|||
// //组装参数 |
|||
// SortedMap<Object, Object> params = new TreeMap<Object, Object>(); |
|||
// params.put("randomStr", DateUtil.format(new Date(), "yyyyMMddHHmmss")); |
|||
// params.put("appId", GlobalConfig.APPID); |
|||
// params.put("organId",GlobalConfig.ORGANID); |
|||
// params.put("doi",doi); |
|||
// String uploadSign = Md5SignUtil.sign(params); |
|||
// params.put("sign", uploadSign); |
|||
// String uploadData = HttpUtil.uploadDoi(JSON.toJSONString(params)); |
|||
// JSONObject uploadJon = JSONObject.parseObject(uploadData); |
|||
// |
|||
// cacheEntity = new CacheEntity(); |
|||
// cacheEntity.setDoi(doi); |
|||
// |
|||
// } |
|||
System.out.println(JSON.toJSONString(kfk)); |
|||
KfkUtil.sendKafka(JSON.toJSONString(kfk)); |
|||
|
|||
} |
|||
|
|||
|
|||
Integer totalPage = data1.getInteger("pages"); |
|||
if (maxPageNum > totalPage) { |
|||
maxPageNum = totalPage; |
|||
} |
|||
log.info("第{}页采集,最大限制页数{},共{}页", currentPageNum, maxPageNum, totalPage); |
|||
currentPageNum++; |
|||
Thread.sleep(1000 * 5); |
|||
} catch (Exception e) { |
|||
e.printStackTrace(); |
|||
log.error("采集失败", e); |
|||
} |
|||
} while (currentPageNum <= maxPageNum); |
|||
log.info("采集完成 {} 类型数据",type); |
|||
} |
|||
log.info("关键词:{} 采集完成",task.getKeyword()); |
|||
task.setStatus(3); |
|||
scheduleMapper.updateTaskStatus(task); |
|||
|
|||
} |
|||
} |
|||
|
|||
public static void main(String[] args) { |
|||
Map<String, List<String>> authorAffiliations = new HashMap<>(); |
|||
|
|||
String author = "Utku Kumbul[1]; Faruk Uysal[1]; Cicero S. Vaucher[1][2]; Alexander Yarovoy[1]"; |
|||
Pattern pattern = Pattern.compile("([\\p{L} .]+)(\\[\\d+])+(?=;|$)"); |
|||
Matcher matcher = pattern.matcher(author); |
|||
while (matcher.find()) { |
|||
String name = matcher.group(1).trim(); // 获取姓名并去除前后空格 |
|||
String affiliationPart = matcher.group(0); // 获取整个匹配串 |
|||
|
|||
// 提取所有编号 |
|||
List<String> affiliations = new ArrayList<>(); |
|||
Matcher numberMatcher = Pattern.compile("\\[(\\d+)]").matcher(affiliationPart); |
|||
while (numberMatcher.find()) { |
|||
affiliations.add(numberMatcher.group(1)); |
|||
} |
|||
authorAffiliations.put(name, affiliations); |
|||
} |
|||
// 打印结果 |
|||
authorAffiliations.forEach((k, v) -> System.out.println(k + " -> " + v)); |
|||
} |
|||
} |
@ -0,0 +1,88 @@ |
|||
package com.bfd.youzhiapi.util; |
|||
|
|||
import com.alibaba.fastjson2.JSONObject; |
|||
import lombok.extern.slf4j.Slf4j; |
|||
import okhttp3.*; |
|||
import org.springframework.stereotype.Component; |
|||
|
|||
import java.util.concurrent.TimeUnit; |
|||
|
|||
/** |
|||
* @author guowei |
|||
*/ |
|||
@Component |
|||
@Slf4j |
|||
public class HttpUtil { |
|||
|
|||
/** |
|||
* 关键词检索 请求接口 |
|||
* @param parameters |
|||
* @return |
|||
*/ |
|||
public static String getData(String parameters){ |
|||
String result = ""; |
|||
try { |
|||
// 目标 URL |
|||
String url = "http://api.keyanzhidian.com/api/literature/search"; |
|||
// 创建 JSON 请求体 |
|||
MediaType JSON = MediaType.parse("application/json; charset=utf-8"); |
|||
// 使用 fastjson 构建 JSON |
|||
// JSONObject jsonObject = new JSONObject(); |
|||
// jsonObject.put("content", content); |
|||
// String json = jsonObject.toJSONString(); |
|||
RequestBody body = RequestBody.create(JSON, parameters); |
|||
// 构建 POST 请求 |
|||
Request request = new Request.Builder() |
|||
.url(url) |
|||
.post(body) |
|||
.build(); |
|||
OkHttpClient client = new OkHttpClient.Builder() |
|||
.connectTimeout(60, TimeUnit.SECONDS) // 连接超时 |
|||
.readTimeout(30, TimeUnit.SECONDS) // 读取超时 |
|||
.writeTimeout(15, TimeUnit.SECONDS) // 写入超时 |
|||
.build(); |
|||
Response response = client.newCall(request).execute(); |
|||
if (response.isSuccessful()) { |
|||
result = response.body().string(); |
|||
log.warn("Response: " + result); |
|||
} |
|||
}catch (Exception e){ |
|||
e.printStackTrace(); |
|||
} |
|||
return result; |
|||
} |
|||
|
|||
/** |
|||
* 新增单条需求 |
|||
* @param parameters |
|||
* @return |
|||
*/ |
|||
public static String uploadDoi(String parameters){ |
|||
String result = ""; |
|||
try { |
|||
// 目标 URL |
|||
String url = "http://api.keyanzhidian.com/api/demand/create"; |
|||
// 创建 JSON 请求体 |
|||
MediaType JSON = MediaType.parse("application/json; charset=utf-8"); |
|||
RequestBody body = RequestBody.create(JSON, parameters); |
|||
// 构建 POST 请求 |
|||
Request request = new Request.Builder() |
|||
.url(url) |
|||
.post(body) |
|||
.build(); |
|||
OkHttpClient client = new OkHttpClient.Builder() |
|||
.connectTimeout(60, TimeUnit.SECONDS) // 连接超时 |
|||
.readTimeout(30, TimeUnit.SECONDS) // 读取超时 |
|||
.writeTimeout(15, TimeUnit.SECONDS) // 写入超时 |
|||
.build(); |
|||
Response response = client.newCall(request).execute(); |
|||
if (response.isSuccessful()) { |
|||
result = response.body().string(); |
|||
log.warn("Response: " + result); |
|||
} |
|||
}catch (Exception e){ |
|||
e.printStackTrace(); |
|||
} |
|||
return result; |
|||
} |
|||
} |
@ -0,0 +1,86 @@ |
|||
package com.bfd.youzhiapi.util; |
|||
|
|||
import lombok.extern.slf4j.Slf4j; |
|||
import org.apache.kafka.clients.producer.KafkaProducer; |
|||
import org.apache.kafka.clients.producer.ProducerConfig; |
|||
import org.apache.kafka.clients.producer.ProducerRecord; |
|||
import org.apache.kafka.common.serialization.StringSerializer; |
|||
import org.springframework.beans.factory.annotation.Value; |
|||
import org.springframework.stereotype.Component; |
|||
|
|||
import java.util.Properties; |
|||
|
|||
/** |
|||
* @author guowei |
|||
* kfk工具类 |
|||
*/ |
|||
@Component |
|||
@Slf4j |
|||
public class KfkUtil { |
|||
private static String topic; |
|||
|
|||
private static String brokerList; |
|||
|
|||
@Value("${crawl.kafka.topic}") |
|||
public void setTopic(String topic) { |
|||
KfkUtil.topic = topic; |
|||
} |
|||
|
|||
@Value("${crawl.kafka.brokers}") |
|||
public void setBrokerList(String brokerList) { |
|||
KfkUtil.brokerList = brokerList; |
|||
} |
|||
private static KafkaProducer<String, String> kafkaProducer; |
|||
|
|||
public static int num = 0; |
|||
|
|||
/** |
|||
* 获取KafkaProducer实例 |
|||
*/ |
|||
public static KafkaProducer<String, String> getProducer() { |
|||
// synchronized (kafkaProducer) { |
|||
if (kafkaProducer == null) { |
|||
Properties props = new Properties(); |
|||
//xxx服务器ip |
|||
props.put("bootstrap.servers", brokerList); |
|||
// props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG,brokerList); |
|||
//所有follower都响应了才认为消息提交成功,即"committed" |
|||
props.put("acks", "all"); |
|||
//retries = MAX 无限重试,直到你意识到出现了问题:) |
|||
props.put("retries", 3); |
|||
//producer将试图批处理消息记录,以减少请求次数.默认的批量处理消息字节数 |
|||
props.put("batch.size", 16384); |
|||
//batch.size当批量的数据大小达到设定值后,就会立即发送,不顾下面的linger.ms |
|||
//延迟1ms发送,这项设置将通过增加小的延迟来完成--即,不是立即发送一条记录,producer将会等待给定的延迟时间以允许其他消息记录发送,这些消息记录可以批量处理 |
|||
props.put("linger.ms", 1); |
|||
//producer可以用来缓存数据的内存大小。 |
|||
props.put("buffer.memory", 33554432); |
|||
props.put("key.serializer", |
|||
StringSerializer.class.getName()); |
|||
props.put("value.serializer", |
|||
StringSerializer.class.getName()); |
|||
kafkaProducer = new KafkaProducer<String, String>(props); |
|||
} |
|||
// } |
|||
return kafkaProducer; |
|||
} |
|||
|
|||
/** |
|||
* 关闭KafkaProducer实例 |
|||
*/ |
|||
public static void closeProducer() { |
|||
if (kafkaProducer != null) { |
|||
log.info("----------close producer----------"); |
|||
kafkaProducer.close(); |
|||
kafkaProducer = null; |
|||
} |
|||
} |
|||
|
|||
public static void sendKafka(String resultData) { |
|||
KafkaProducer<String, String> producer = getProducer(); |
|||
ProducerRecord<String, String> se = new ProducerRecord<String, String>(topic, resultData); |
|||
producer.send(se); |
|||
log.info("发送kafka成功"); |
|||
// num++; |
|||
} |
|||
} |
@ -0,0 +1,270 @@ |
|||
package com.bfd.youzhiapi.util; |
|||
|
|||
|
|||
import cn.hutool.core.date.DateUtil; |
|||
import com.alibaba.fastjson2.JSON; |
|||
import com.alibaba.fastjson2.JSONObject; |
|||
import com.bfd.youzhiapi.config.GlobalConfig; |
|||
import lombok.extern.slf4j.Slf4j; |
|||
import org.springframework.stereotype.Component; |
|||
|
|||
import java.nio.charset.Charset; |
|||
import java.nio.charset.StandardCharsets; |
|||
import java.security.MessageDigest; |
|||
import java.util.*; |
|||
|
|||
/** |
|||
* Title: 接口签名工具类 |
|||
* |
|||
* <p> |
|||
* Description: |
|||
* </p> |
|||
* |
|||
* @author |
|||
*/ |
|||
@Component |
|||
@Slf4j |
|||
public class Md5SignUtil { |
|||
public static String Encoding_utf8 = "UTF-8"; |
|||
private static final String hexDigits[] = {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "a", "b", "c", "d", "e", "f"}; |
|||
|
|||
/** |
|||
* 签名sign |
|||
* @param parameters |
|||
* 数据拼接 |
|||
* 将所有发送或者接收到的数据为集合M,将集合M内非空参数值的参数按照参数名ASCII码从小到大排序(字典序),使用URL键值对的格式(即key1=value1&key2=value2…)拼接成字符串stringA。 |
|||
* 特别注意以下重要规则: ◆ 参数名ASCII码从小到大排序(字典序); ◆ 如果参数的值为空不参与签名; ◆ 参数名区分大小写; ◆ |
|||
* 验证调用返回或主动通知时,传送的sign参数不参与签名,将生成的签名与该sign值作校验。 数据签名 |
|||
* 在stringA最后拼接上key得到stringSignTemp字符串,并对stringSignTemp进行MD5运算,再将得到的字符串所有字符转换为大写,得到sign值signValue。 |
|||
* @return |
|||
*/ |
|||
public static String sign(SortedMap<Object, Object> parameters) { |
|||
StringBuffer sb = new StringBuffer(); |
|||
Set es = parameters.entrySet();// 所有参与传参的参数按照accsii排序(升序) |
|||
Iterator it = es.iterator(); |
|||
while (it.hasNext()) { |
|||
Map.Entry entry = (Map.Entry) it.next(); |
|||
String k = (String) entry.getKey(); |
|||
Object v = entry.getValue(); |
|||
if (null != v && !"".equals(v) && !"sign".equals(k) && !"key".equals(k)) { |
|||
sb.append(k + "=" + v + "&"); |
|||
} |
|||
} |
|||
sb.append("appSecret=" + GlobalConfig.APPSECRET); |
|||
log.debug("Md5SignUtil.sign加密串为:{}", sb.toString()); |
|||
String sign = MD5Encode(sb.toString(), Encoding_utf8).toUpperCase(); |
|||
log.debug("Md5SignUtil.sign加密串后的签名为:{}", sign); |
|||
return sign; |
|||
} |
|||
|
|||
/** |
|||
* 返回结果签名sign |
|||
* |
|||
* @param responseDRO |
|||
* @param key |
|||
* @return |
|||
*/ |
|||
// public static String sign(ResponseDRO responseDRO, String key) { |
|||
// JSONObject infoJson = (JSONObject) JSONObject.toJSON(responseDRO); |
|||
// StringBuilder sb = new StringBuilder(); |
|||
// Object[] sortArra = infoJson.keySet().toArray(); |
|||
// // 按照ASCII排序 |
|||
// Arrays.sort(sortArra); |
|||
// for (Object k : sortArra) { |
|||
// Object v = infoJson.get(k); |
|||
// if (null != v && !"".equals(v)) { |
|||
// if (v instanceof JSONArray) { |
|||
// JSONArray tempJson = (JSONArray) v; |
|||
// List<SortedMap<String, Object>> allList = new ArrayList<>(); |
|||
// for (int i = 0; i < tempJson.size(); i++) { |
|||
// JSONObject obj = tempJson.getJSONObject(i); |
|||
// Map<String, Object> objMap = new HashMap<>(); |
|||
// for (Map.Entry<String, Object> entry : obj.entrySet()) { |
|||
// objMap.put(entry.getKey(), entry.getValue()); |
|||
// } |
|||
// |
|||
// SortedMap<String, Object> temps = new TreeMap<>();// 升序 |
|||
// temps.putAll(objMap); |
|||
// allList.add(temps); |
|||
// } |
|||
// v = JSON.toJSONString(allList); |
|||
// } |
|||
// sb.append(k).append("=").append(v).append("&"); |
|||
// } |
|||
// } |
|||
// sb.append("appSecret=" + key); |
|||
// logger.debug("Md5SignUtil.sign加密串为:{}", sb.toString()); |
|||
// String sign = MD5Encode(sb.toString(), Encoding_utf8).toUpperCase(); |
|||
// logger.debug("Md5SignUtil.sign加密串后的签名为:{}", sign); |
|||
// return sign; |
|||
// } |
|||
|
|||
/** |
|||
* 请求参数sign签名 |
|||
* |
|||
* @param baseQuery |
|||
* @param key |
|||
* @return |
|||
*/ |
|||
// public static String sign(BaseQuery baseQuery, String key) { |
|||
// JSONObject infoJson = (JSONObject) JSONObject.toJSON(baseQuery); |
|||
// StringBuilder sb = new StringBuilder(); |
|||
// Object[] sortArra = infoJson.keySet().toArray(); |
|||
// // 按照ASCII排序 |
|||
// Arrays.sort(sortArra); |
|||
// for (Object k : sortArra) { |
|||
// Object v = infoJson.get(k); |
|||
// if (null != v && !"".equals(v) && !"sign".equals(k)) { |
|||
// if (v instanceof JSONArray) { |
|||
// JSONArray tempJson = (JSONArray) v; |
|||
// List<SortedMap<String, Object>> allList = new ArrayList<>(); |
|||
// for (int i = 0; i < tempJson.size(); i++) { |
|||
// JSONObject obj = tempJson.getJSONObject(i); |
|||
// Map<String, Object> objMap = new HashMap<>(); |
|||
// for (Map.Entry<String, Object> entry : obj.entrySet()) { |
|||
// objMap.put(entry.getKey(), entry.getValue()); |
|||
// } |
|||
// |
|||
// SortedMap<String, Object> temps = new TreeMap<>();// 升序 |
|||
// temps.putAll(objMap); |
|||
// allList.add(temps); |
|||
// } |
|||
// v = JSON.toJSONString(allList); |
|||
// } |
|||
// sb.append(k).append("=").append(v).append("&"); |
|||
// } |
|||
// } |
|||
// sb.append("appSecret=" + key); |
|||
// logger.info("Md5SignUtil.sign加密串为:{}", sb.toString()); |
|||
// String sign = MD5Encode(sb.toString(), Encoding_utf8).toUpperCase(); |
|||
// logger.info("Md5SignUtil.sign加密串后的签名为:{}", sign); |
|||
// return sign; |
|||
// } |
|||
|
|||
/** |
|||
* 字符串 md5加密 |
|||
* |
|||
* @param str |
|||
* @param charsetName |
|||
* @return |
|||
*/ |
|||
public static String MD5Encode(String str, String charsetName) { |
|||
String resultString = null; |
|||
try { |
|||
resultString = new String(str); |
|||
MessageDigest md = MessageDigest.getInstance("MD5"); |
|||
if (charsetName == null || "".equals(charsetName)) { |
|||
resultString = byteArrayToHexString(md.digest(resultString.getBytes())); |
|||
} else { |
|||
resultString = byteArrayToHexString(md.digest(resultString.getBytes(charsetName))); |
|||
} |
|||
} catch (Exception e) { |
|||
} |
|||
return resultString; |
|||
} |
|||
|
|||
private static String byteArrayToHexString(byte b[]) { |
|||
StringBuffer resultSb = new StringBuffer(); |
|||
for (int i = 0; i < b.length; i++) |
|||
resultSb.append(byteToHexString(b[i])); |
|||
|
|||
return resultSb.toString().toUpperCase(); |
|||
} |
|||
|
|||
private static String byteToHexString(byte b) { |
|||
int n = b; |
|||
if (n < 0) { |
|||
n += 256; |
|||
} |
|||
int d1 = n / 16; |
|||
int d2 = n % 16; |
|||
return hexDigits[d1] + hexDigits[d2]; |
|||
} |
|||
|
|||
/** |
|||
* md5加密 默认加密串小写 |
|||
* |
|||
* @param |
|||
* @return |
|||
*/ |
|||
// public static String md5(String plaintext) { |
|||
// return md5(plaintext, StandardCharsets.UTF_8); |
|||
// } |
|||
|
|||
|
|||
|
|||
public static void main(String[] args) { |
|||
|
|||
String key = "dc41973ee03e471887c77c4a532dbfc3"; |
|||
|
|||
String appId = "c4d532304c6b4497b1ad"; |
|||
String name = "radar"; |
|||
String randomStr = DateUtil.format(new Date(), "yyyyMMddHHmmss"); |
|||
// String randomStr = "20241225114130"; |
|||
List typeSet = new ArrayList<>(); |
|||
typeSet.add(10); |
|||
|
|||
|
|||
SortedMap<Object, Object> parameters = new TreeMap<Object, Object>(); |
|||
|
|||
parameters.put("randomStr", randomStr); |
|||
parameters.put("appId", appId); |
|||
parameters.put("title", name); |
|||
parameters.put("page",1); |
|||
parameters.put("pageSize",10); |
|||
// parameters.put("year","2024"); |
|||
|
|||
parameters.put("type", "30"); |
|||
// parameters.put("id","1010052234917"); |
|||
|
|||
// String apiSign = "D3CC78105AA4C5F594AE733E78DB6E02"; |
|||
// log.info("接口传入的签名串是:" + apiSign); |
|||
String mySign = sign(parameters); |
|||
log.info("生成的签名串是:" + mySign); |
|||
parameters.put("sign",mySign); |
|||
System.out.println(new JSONObject(parameters)); |
|||
// String data = HttpUtil.getData(JSON.toJSONString(parameters)); |
|||
// System.out.println(data); |
|||
|
|||
SortedMap<Object, Object> parameterss = new TreeMap<Object, Object>(); |
|||
parameterss.put("appId", appId); |
|||
parameterss.put("randomStr", randomStr); |
|||
parameterss.put("doi", "10.1109/ICCC57789.2023.10164991"); |
|||
parameterss.put("organId",392); |
|||
String mySigns = sign(parameterss); |
|||
log.info("生成的签名串是:" + mySigns); |
|||
parameterss.put("sign",mySigns); |
|||
System.out.println(new JSONObject(parameterss)); |
|||
|
|||
SortedMap<Object, Object> parametersss = new TreeMap<Object, Object>(); |
|||
parametersss.put("appId", appId); |
|||
parametersss.put("randomStr", randomStr); |
|||
// parametersss.put("status", 2); |
|||
parametersss.put("page",1); |
|||
parametersss.put("pageSize",10); |
|||
parametersss.put("organId",392); |
|||
String mySignss = sign(parametersss); |
|||
log.info("生成的签名串是:" + mySignss); |
|||
parametersss.put("sign",mySignss); |
|||
System.out.println(new JSONObject(parametersss)); |
|||
|
|||
SortedMap<Object, Object> parameterssss = new TreeMap<Object, Object>(); |
|||
parameterssss.put("appId", appId); |
|||
parameterssss.put("randomStr", randomStr); |
|||
// parametersss.put("status", 2); |
|||
parameterssss.put("id",8417); |
|||
String mySignsss = sign(parameterssss); |
|||
log.info("生成的签名串是:" + mySignsss); |
|||
parameterssss.put("sign",mySignsss); |
|||
System.out.println(new JSONObject(parameterssss)); |
|||
|
|||
// PreOrderResponseDto yardResultDto = new PreOrderResponseDto(); |
|||
// yardResultDto.setRetCode(retCode); |
|||
// yardResultDto.setRetMsg(retMsg); |
|||
// yardResultDto.setRandomStr(randomStr); |
|||
// yardResultDto.setSign(Md5SignUtil.sign(yardResultDto , key)); |
|||
// System.out.println(yardResultDto); |
|||
|
|||
} |
|||
|
|||
} |
@ -0,0 +1,29 @@ |
|||
package com.bfd.youzhiapi.util; |
|||
|
|||
import org.springframework.stereotype.Component; |
|||
|
|||
import java.time.Instant; |
|||
import java.time.ZoneId; |
|||
import java.time.ZonedDateTime; |
|||
|
|||
/** |
|||
* @author guowei |
|||
*/ |
|||
@Component |
|||
public class Utils { |
|||
|
|||
public static boolean isFourDigitNumber(String input) { |
|||
return input.matches("\\d{4}"); |
|||
} |
|||
|
|||
public static int getYearFromTimestamp(long timestamp) { |
|||
// 使用 UTC 时区转换为年份 |
|||
ZonedDateTime dateTime = Instant.ofEpochMilli(timestamp).atZone(ZoneId.of("Asia/Shanghai")); |
|||
return dateTime.getYear(); |
|||
} |
|||
|
|||
public static boolean isYearInRange(int year, int startYear, int endYear) { |
|||
// 判断年份是否在区间内 |
|||
return year >= startYear && year <= endYear; |
|||
} |
|||
} |
@ -0,0 +1,26 @@ |
|||
spring: |
|||
datasource: |
|||
driver-class-name: com.mysql.cj.jdbc.Driver |
|||
url: jdbc:mysql://172.18.1.134:3306/cnki_crawl |
|||
username: crawl666 |
|||
password: lx2a4jN1xFT96kj20LU= |
|||
crawl: |
|||
kafka: |
|||
topic: zhiWangTest2 |
|||
brokers: 172.18.1.146:9092,172.18.1.147:9092,172.18.1.148:9092 |
|||
mybatis: |
|||
mapper-locations: classpath:mapper/*.xml |
|||
#目的是为了省略resultType里的代码量 |
|||
type-aliases-package: com.bfd.youzhiapi.entity |
|||
configuration: |
|||
log-impl: org.apache.ibatis.logging.stdout.StdOutImpl |
|||
server: |
|||
port: 7071 |
|||
#日志级别 |
|||
logging: |
|||
level: |
|||
com: |
|||
bfd: INFO |
|||
#日志路径 |
|||
log: |
|||
path: ./logs |
@ -0,0 +1,38 @@ |
|||
<configuration> |
|||
<!-- 属性文件:在properties文件中找到对应的配置项 --> |
|||
<springProperty scope="context" name="logging.path" source="logging.log.path"/> |
|||
<springProperty scope="context" name="logging.level" source="logging.level.com.bfd"/> |
|||
<!-- 默认的控制台日志输出,一般生产环境都是后台启动,这个没太大作用 --> |
|||
<!-- <appender name="STDOUT" |
|||
class="ch.qos.logback.core.ConsoleAppender"> |
|||
<encoder class="ch.qos.logback.classic.encoder.PatternLayoutEncoder"> |
|||
<Pattern>%d{HH:mm:ss.SSS} %-5level %logger{80} - %msg%n</Pattern> |
|||
</encoder> |
|||
</appender> --> |
|||
|
|||
<appender name="GLMAPPER-LOGGERONE" |
|||
class="ch.qos.logback.core.rolling.RollingFileAppender"> |
|||
<append>true</append> |
|||
<filter class="ch.qos.logback.classic.filter.ThresholdFilter"> |
|||
<level>${logging.level}</level> |
|||
</filter> |
|||
<file> |
|||
${logging.path}/crawlSchedule.log |
|||
<!-- ${logging.path}/sendKafka.log --> |
|||
</file> |
|||
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy"> |
|||
<FileNamePattern>${logging.path}/crawlSchedule.log.%d{yyyy-MM-dd}</FileNamePattern> |
|||
<!-- <FileNamePattern>${logging.path}/sendKafka.log.%d{yyyy-MM-dd}</FileNamePattern> --> |
|||
<MaxHistory>7</MaxHistory> |
|||
</rollingPolicy> |
|||
<encoder class="ch.qos.logback.classic.encoder.PatternLayoutEncoder"> |
|||
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %line %-5level %logger{50} - %msg%n</pattern> |
|||
<charset>UTF-8</charset> |
|||
</encoder> |
|||
</appender> |
|||
|
|||
<root level="info"> |
|||
<appender-ref ref="GLMAPPER-LOGGERONE"/> |
|||
<!-- <appender-ref ref="STDOUT"/> --> |
|||
</root> |
|||
</configuration> |
@ -0,0 +1,17 @@ |
|||
<?xml version="1.0" encoding="UTF-8" ?> |
|||
<!DOCTYPE mapper |
|||
PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" |
|||
"http://mybatis.org/dtd/mybatis-3-mapper.dtd"> |
|||
<mapper namespace="com.bfd.youzhiapi.mapper.ScheduleMapper"> |
|||
<update id="updateTaskStatus"> |
|||
update `newslist_111` set status=#{status} where rid = #{rid} |
|||
</update> |
|||
|
|||
<select id="queryTaskByStatus" parameterType="int" resultType="com.bfd.youzhiapi.entity.TaskEntity"> |
|||
SELECT * FROM `newslist_111` WHERE status = #{status} |
|||
</select> |
|||
|
|||
<select id="queryCacheByDoi" resultType="com.bfd.youzhiapi.entity.CacheEntity"> |
|||
SELECT downloadId,downloadUrl FROM kyzd_cache WHERE doi = #{doi} |
|||
</select> |
|||
</mapper> |
@ -0,0 +1,13 @@ |
|||
package com.bfd.youzhiapi; |
|||
|
|||
import org.junit.jupiter.api.Test; |
|||
import org.springframework.boot.test.context.SpringBootTest; |
|||
|
|||
@SpringBootTest |
|||
class YouzhiApiApplicationTests { |
|||
|
|||
@Test |
|||
void contextLoads() { |
|||
} |
|||
|
|||
} |
Write
Preview
Loading…
Cancel
Save
Reference in new issue