commit
ffc71d82fa
18 changed files with 1293 additions and 0 deletions
-
3.gitignore
-
163pom.xml
-
28src/main/java/com/bfd/youzhiapi/YouzhiApiApplication.java
-
55src/main/java/com/bfd/youzhiapi/config/DataTypeEnum.java
-
25src/main/java/com/bfd/youzhiapi/config/GlobalConfig.java
-
19src/main/java/com/bfd/youzhiapi/entity/CacheEntity.java
-
33src/main/java/com/bfd/youzhiapi/entity/KfkEntity.java
-
33src/main/java/com/bfd/youzhiapi/entity/TaskEntity.java
-
31src/main/java/com/bfd/youzhiapi/mapper/ScheduleMapper.java
-
336src/main/java/com/bfd/youzhiapi/service/ScheduleService.java
-
88src/main/java/com/bfd/youzhiapi/util/HttpUtil.java
-
86src/main/java/com/bfd/youzhiapi/util/KfkUtil.java
-
270src/main/java/com/bfd/youzhiapi/util/Md5SignUtil.java
-
29src/main/java/com/bfd/youzhiapi/util/Utils.java
-
26src/main/resources/application.yml
-
38src/main/resources/logback-spring.xml
-
17src/main/resources/mapper/ScheduleMapper.xml
-
13src/test/java/com/bfd/youzhiapi/YouzhiApiApplicationTests.java
@ -0,0 +1,3 @@ |
|||||
|
/target/ |
||||
|
/logs/ |
||||
|
/.idea/ |
@ -0,0 +1,163 @@ |
|||||
|
<?xml version="1.0" encoding="UTF-8"?> |
||||
|
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" |
||||
|
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd"> |
||||
|
<modelVersion>4.0.0</modelVersion> |
||||
|
<groupId>com.bfd</groupId> |
||||
|
<artifactId>youzhiApi</artifactId> |
||||
|
<version>0.0.1-SNAPSHOT</version> |
||||
|
<name>youzhiApi</name> |
||||
|
<description>youzhiApi</description> |
||||
|
<properties> |
||||
|
<java.version>1.8</java.version> |
||||
|
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> |
||||
|
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding> |
||||
|
<spring-boot.version>2.6.13</spring-boot.version> |
||||
|
</properties> |
||||
|
<dependencies> |
||||
|
<dependency> |
||||
|
<groupId>org.springframework.boot</groupId> |
||||
|
<artifactId>spring-boot-starter-web</artifactId> |
||||
|
</dependency> |
||||
|
<dependency> |
||||
|
<groupId>org.mybatis.spring.boot</groupId> |
||||
|
<artifactId>mybatis-spring-boot-starter</artifactId> |
||||
|
<version>2.2.2</version> |
||||
|
</dependency> |
||||
|
|
||||
|
<dependency> |
||||
|
<groupId>com.mysql</groupId> |
||||
|
<artifactId>mysql-connector-j</artifactId> |
||||
|
<scope>runtime</scope> |
||||
|
</dependency> |
||||
|
<dependency> |
||||
|
<groupId>org.projectlombok</groupId> |
||||
|
<artifactId>lombok</artifactId> |
||||
|
<optional>true</optional> |
||||
|
</dependency> |
||||
|
<dependency> |
||||
|
<groupId>org.springframework.boot</groupId> |
||||
|
<artifactId>spring-boot-starter-test</artifactId> |
||||
|
<scope>test</scope> |
||||
|
</dependency> |
||||
|
<dependency> |
||||
|
<groupId>com.squareup.okhttp3</groupId> |
||||
|
<artifactId>okhttp</artifactId> |
||||
|
</dependency> |
||||
|
<dependency> |
||||
|
<groupId>com.alibaba.fastjson2</groupId> |
||||
|
<artifactId>fastjson2</artifactId> |
||||
|
<version>2.0.17</version> |
||||
|
</dependency> |
||||
|
<dependency> |
||||
|
<groupId>cn.hutool</groupId> |
||||
|
<artifactId>hutool-all</artifactId> |
||||
|
<version>5.8.27</version> |
||||
|
</dependency> |
||||
|
<dependency> |
||||
|
<groupId>org.apache.kafka</groupId> |
||||
|
<artifactId>kafka-clients</artifactId> |
||||
|
<version>2.7.1</version> |
||||
|
</dependency> |
||||
|
<dependency> |
||||
|
<groupId>org.jsoup</groupId> |
||||
|
<artifactId>jsoup</artifactId> |
||||
|
<version>1.7.3</version> |
||||
|
</dependency> |
||||
|
</dependencies> |
||||
|
<dependencyManagement> |
||||
|
<dependencies> |
||||
|
<dependency> |
||||
|
<groupId>org.springframework.boot</groupId> |
||||
|
<artifactId>spring-boot-dependencies</artifactId> |
||||
|
<version>${spring-boot.version}</version> |
||||
|
<type>pom</type> |
||||
|
<scope>import</scope> |
||||
|
</dependency> |
||||
|
</dependencies> |
||||
|
</dependencyManagement> |
||||
|
|
||||
|
<build> |
||||
|
<plugins> |
||||
|
<plugin> |
||||
|
<groupId>org.apache.maven.plugins</groupId> |
||||
|
<artifactId>maven-jar-plugin</artifactId> |
||||
|
<configuration> |
||||
|
<!--不打入jar包的文件类型或者路径--> |
||||
|
<excludes> |
||||
|
<exclude>*.properties</exclude> |
||||
|
<exclude>*.yml</exclude> |
||||
|
<exclude>*.yaml</exclude> |
||||
|
</excludes> |
||||
|
<archive> |
||||
|
<manifest> |
||||
|
<!-- 执行的主程序路径 --> |
||||
|
<mainClass>com.bfd.youzhiapi.YouzhiApiApplication</mainClass> |
||||
|
<!--是否要把第三方jar放到manifest的classpath中--> |
||||
|
<addClasspath>true</addClasspath> |
||||
|
<!--生成的manifest中classpath的前缀,因为要把第三方jar放到lib目录下,所以classpath的前缀是lib/--> |
||||
|
<classpathPrefix>lib/</classpathPrefix> |
||||
|
<!-- 打包时 MANIFEST.MF 文件不记录的时间戳版本 --> |
||||
|
<useUniqueVersions>false</useUniqueVersions> |
||||
|
</manifest> |
||||
|
<manifestEntries> |
||||
|
<!-- 在 Class-Path 下添加配置文件的路径 --> |
||||
|
<Class-Path>config/</Class-Path> |
||||
|
</manifestEntries> |
||||
|
</archive> |
||||
|
</configuration> |
||||
|
</plugin> |
||||
|
<plugin> |
||||
|
<groupId>org.apache.maven.plugins</groupId> |
||||
|
<artifactId>maven-dependency-plugin</artifactId> |
||||
|
<executions> |
||||
|
<execution> |
||||
|
<id>copy</id> |
||||
|
<phase>package</phase> |
||||
|
<goals> |
||||
|
<goal>copy-dependencies</goal> |
||||
|
</goals> |
||||
|
<configuration> |
||||
|
<outputDirectory>${project.build.directory}/lib/</outputDirectory> |
||||
|
</configuration> |
||||
|
</execution> |
||||
|
</executions> |
||||
|
</plugin> |
||||
|
|
||||
|
<plugin> |
||||
|
<artifactId>maven-resources-plugin</artifactId> |
||||
|
<executions> |
||||
|
<execution> |
||||
|
<id>copy-resources</id> |
||||
|
<phase>package</phase> |
||||
|
<goals> |
||||
|
<goal>copy-resources</goal> |
||||
|
</goals> |
||||
|
<configuration> |
||||
|
<resources> |
||||
|
<!--把配置文件打包到指定路径--> |
||||
|
<resource> |
||||
|
<directory>src/main/resources/</directory> |
||||
|
<includes> |
||||
|
<include>*.properties</include> |
||||
|
<include>*.yml</include> |
||||
|
<exclude>*.yaml</exclude> |
||||
|
</includes> |
||||
|
</resource> |
||||
|
</resources> |
||||
|
<outputDirectory>${project.build.directory}/config</outputDirectory> |
||||
|
</configuration> |
||||
|
</execution> |
||||
|
</executions> |
||||
|
</plugin> |
||||
|
<plugin> |
||||
|
<groupId>org.apache.maven.plugins</groupId> |
||||
|
<artifactId>maven-compiler-plugin</artifactId> |
||||
|
<configuration> |
||||
|
<source>8</source> |
||||
|
<target>8</target> |
||||
|
</configuration> |
||||
|
</plugin> |
||||
|
</plugins> |
||||
|
</build> |
||||
|
|
||||
|
</project> |
@ -0,0 +1,28 @@ |
|||||
|
package com.bfd.youzhiapi; |
||||
|
|
||||
|
import com.bfd.youzhiapi.service.ScheduleService; |
||||
|
import org.springframework.boot.SpringApplication; |
||||
|
import org.springframework.boot.autoconfigure.SpringBootApplication; |
||||
|
import org.springframework.context.ConfigurableApplicationContext; |
||||
|
import org.springframework.scheduling.annotation.EnableScheduling; |
||||
|
|
||||
|
import javax.annotation.Resource; |
||||
|
|
||||
|
@SpringBootApplication |
||||
|
@EnableScheduling |
||||
|
public class YouzhiApiApplication { |
||||
|
|
||||
|
@Resource |
||||
|
ScheduleService scheduleService; |
||||
|
public static void main(String[] args) { |
||||
|
ConfigurableApplicationContext run = SpringApplication.run(YouzhiApiApplication.class, args); |
||||
|
YouzhiApiApplication bean = run.getBean(YouzhiApiApplication.class); |
||||
|
// bean.start(); |
||||
|
} |
||||
|
|
||||
|
// public void start(){ |
||||
|
// Thread thread = new Thread(scheduleService); |
||||
|
// thread.start(); |
||||
|
// } |
||||
|
|
||||
|
} |
@ -0,0 +1,55 @@ |
|||||
|
package com.bfd.youzhiapi.config; |
||||
|
|
||||
|
/** |
||||
|
* @author guowei |
||||
|
* 采集库 字段 和检索接口 映射枚举类 |
||||
|
*/ |
||||
|
public enum DataTypeEnum { |
||||
|
|
||||
|
|
||||
|
//期刊论文 |
||||
|
PERIODICAL(10, 1,"学术期刊"), |
||||
|
//学位论文 |
||||
|
DISSERTATION(20, 2,"学位论文"), |
||||
|
//会议论文 |
||||
|
CONFERENCE(30, 3,"会议"); |
||||
|
|
||||
|
|
||||
|
private final Integer code; |
||||
|
|
||||
|
private final Integer field; |
||||
|
|
||||
|
private final String type; |
||||
|
|
||||
|
DataTypeEnum(Integer code, Integer field,String type) { |
||||
|
this.code = code; |
||||
|
this.field = field; |
||||
|
this.type = type; |
||||
|
} |
||||
|
|
||||
|
public Integer getCode() { |
||||
|
return code; |
||||
|
} |
||||
|
|
||||
|
public Integer getField(){return field;} |
||||
|
|
||||
|
public String getType(){return type;} |
||||
|
|
||||
|
public static Integer getCodeByField(Integer field) { |
||||
|
for (DataTypeEnum dataTypeEnum : DataTypeEnum.values()) { |
||||
|
if (dataTypeEnum.getField()==(field)) { |
||||
|
return dataTypeEnum.getCode(); |
||||
|
} |
||||
|
} |
||||
|
return null; // 或者可以抛出异常,或返回一个默认值 |
||||
|
} |
||||
|
|
||||
|
public static String getTypeByCode(Integer code) { |
||||
|
for (DataTypeEnum dataTypeEnum : DataTypeEnum.values()) { |
||||
|
if (dataTypeEnum.getCode()==(code)) { |
||||
|
return dataTypeEnum.getType(); |
||||
|
} |
||||
|
} |
||||
|
return null; // 或者可以抛出异常,或返回一个默认值 |
||||
|
} |
||||
|
} |
@ -0,0 +1,25 @@ |
|||||
|
package com.bfd.youzhiapi.config; |
||||
|
|
||||
|
import org.springframework.stereotype.Component; |
||||
|
|
||||
|
/** |
||||
|
* @author guowei |
||||
|
*/ |
||||
|
@Component |
||||
|
public class GlobalConfig { |
||||
|
|
||||
|
/** |
||||
|
* 外部接口所用 appId |
||||
|
*/ |
||||
|
public static final String APPID = "c4d532304c6b4497b1ad"; |
||||
|
|
||||
|
/** |
||||
|
* 外部接口所用 appSecret |
||||
|
*/ |
||||
|
public static final String APPSECRET = "dc41973ee03e471887c77c4a532dbfc3"; |
||||
|
|
||||
|
/** |
||||
|
* 外部接口所用 机构Id |
||||
|
*/ |
||||
|
public static final Integer ORGANID = 392; |
||||
|
} |
@ -0,0 +1,19 @@ |
|||||
|
package com.bfd.youzhiapi.entity; |
||||
|
|
||||
|
import lombok.Data; |
||||
|
|
||||
|
/** |
||||
|
* @author guowei |
||||
|
*/ |
||||
|
@Data |
||||
|
public class CacheEntity { |
||||
|
private int id; |
||||
|
|
||||
|
private String doi; |
||||
|
|
||||
|
private String downloadId; |
||||
|
|
||||
|
private String uploadTime; |
||||
|
|
||||
|
private String downloadUrl; |
||||
|
} |
@ -0,0 +1,33 @@ |
|||||
|
package com.bfd.youzhiapi.entity; |
||||
|
|
||||
|
import lombok.Data; |
||||
|
|
||||
|
import java.util.ArrayList; |
||||
|
import java.util.List; |
||||
|
|
||||
|
/** |
||||
|
* @author guowei |
||||
|
*/ |
||||
|
@Data |
||||
|
public class KfkEntity { |
||||
|
private Object attr; |
||||
|
private String author; |
||||
|
private String brief; |
||||
|
private String cid = "Nkyzd"; |
||||
|
private String content; |
||||
|
private String dedupKey; |
||||
|
|
||||
|
private String field; |
||||
|
private String forwardcontent; |
||||
|
private String iid; |
||||
|
private Boolean isDownload = false; |
||||
|
private String news_id; |
||||
|
private String post_time; |
||||
|
private String searchKeyword; |
||||
|
private String source; |
||||
|
private List tasks = new ArrayList<>(); |
||||
|
private String title; |
||||
|
private String type = "newscontent"; |
||||
|
private String url; |
||||
|
private String version = "1"; |
||||
|
} |
@ -0,0 +1,33 @@ |
|||||
|
package com.bfd.youzhiapi.entity; |
||||
|
|
||||
|
import lombok.Data; |
||||
|
|
||||
|
/** |
||||
|
* @author guowei |
||||
|
*/ |
||||
|
@Data |
||||
|
public class TaskEntity { |
||||
|
private Integer rid; |
||||
|
private Integer siteId; |
||||
|
private String cid; |
||||
|
private String channelName; |
||||
|
private String keyword; |
||||
|
private Integer pageTypeID; |
||||
|
private Integer weight; |
||||
|
private String url; |
||||
|
private Integer pageIdx; |
||||
|
private Integer nextPageTime; |
||||
|
private Integer status; |
||||
|
private Integer intv; |
||||
|
private String attachTag; |
||||
|
private String lastcrawltime; |
||||
|
private String nextcrawltime; |
||||
|
private String createTime; |
||||
|
private String modiTime; |
||||
|
private Integer crawl_mode; |
||||
|
private Integer crawl_account; |
||||
|
private String page_switchs; |
||||
|
private Integer task_hash_code; |
||||
|
private Long crawlStartTime; |
||||
|
private Long crawlEndTime; |
||||
|
} |
@ -0,0 +1,31 @@ |
|||||
|
package com.bfd.youzhiapi.mapper; |
||||
|
|
||||
|
import com.bfd.youzhiapi.entity.CacheEntity; |
||||
|
import com.bfd.youzhiapi.entity.TaskEntity; |
||||
|
import org.apache.ibatis.annotations.Mapper; |
||||
|
import org.springframework.stereotype.Repository; |
||||
|
|
||||
|
import java.util.List; |
||||
|
|
||||
|
/** |
||||
|
* @author guowei |
||||
|
*/ |
||||
|
@Mapper |
||||
|
public interface ScheduleMapper { |
||||
|
|
||||
|
/** |
||||
|
* 查询任务 |
||||
|
* @param status |
||||
|
* @return |
||||
|
*/ |
||||
|
List<TaskEntity> queryTaskByStatus(int status); |
||||
|
|
||||
|
/** |
||||
|
* 更改任务状态 |
||||
|
* @param taskEntity |
||||
|
* @return |
||||
|
*/ |
||||
|
int updateTaskStatus(TaskEntity taskEntity); |
||||
|
|
||||
|
CacheEntity queryCacheByDoi(String doi); |
||||
|
} |
@ -0,0 +1,336 @@ |
|||||
|
package com.bfd.youzhiapi.service; |
||||
|
|
||||
|
import cn.hutool.core.date.DateUtil; |
||||
|
import cn.hutool.core.util.IdUtil; |
||||
|
import cn.hutool.core.util.StrUtil; |
||||
|
import cn.hutool.crypto.SecureUtil; |
||||
|
import com.alibaba.fastjson2.JSON; |
||||
|
import com.alibaba.fastjson2.JSONArray; |
||||
|
import com.alibaba.fastjson2.JSONObject; |
||||
|
import com.bfd.youzhiapi.config.DataTypeEnum; |
||||
|
import com.bfd.youzhiapi.config.GlobalConfig; |
||||
|
import com.bfd.youzhiapi.entity.CacheEntity; |
||||
|
import com.bfd.youzhiapi.entity.KfkEntity; |
||||
|
import com.bfd.youzhiapi.entity.TaskEntity; |
||||
|
import com.bfd.youzhiapi.mapper.ScheduleMapper; |
||||
|
import com.bfd.youzhiapi.util.HttpUtil; |
||||
|
import com.bfd.youzhiapi.util.KfkUtil; |
||||
|
import com.bfd.youzhiapi.util.Md5SignUtil; |
||||
|
import com.bfd.youzhiapi.util.Utils; |
||||
|
import lombok.extern.slf4j.Slf4j; |
||||
|
import org.jsoup.Jsoup; |
||||
|
import org.springframework.scheduling.annotation.Scheduled; |
||||
|
import org.springframework.stereotype.Service; |
||||
|
|
||||
|
import javax.annotation.Resource; |
||||
|
import java.util.*; |
||||
|
import java.util.regex.Matcher; |
||||
|
import java.util.regex.Pattern; |
||||
|
|
||||
|
/** |
||||
|
* @author guowei |
||||
|
*/ |
||||
|
@Service |
||||
|
@Slf4j |
||||
|
public class ScheduleService{ |
||||
|
@Resource |
||||
|
ScheduleMapper scheduleMapper; |
||||
|
|
||||
|
@Scheduled(cron = "0 0/2 * * * ?") |
||||
|
public void run() { |
||||
|
List<TaskEntity> taskEntities = scheduleMapper.queryTaskByStatus(1); |
||||
|
log.info("查询到{}条未采集任务", taskEntities.size()); |
||||
|
for (TaskEntity task : taskEntities) { |
||||
|
|
||||
|
log.info("开始采集任务:{}", task.getKeyword()); |
||||
|
//最大翻页限制20页 |
||||
|
Integer maxPageNum = task.getNextPageTime(); |
||||
|
if (maxPageNum == -1){ maxPageNum = 20;} |
||||
|
|
||||
|
String attachTag = task.getAttachTag(); |
||||
|
JSONObject attrJSON = JSONObject.parseObject(attachTag); |
||||
|
if (!attrJSON.containsKey("field")) { |
||||
|
log.error("没找到field,keyword:{},跳过采集", task.getKeyword()); |
||||
|
continue; |
||||
|
} |
||||
|
Integer field = attrJSON.getInteger("field"); |
||||
|
String documentType = attrJSON.getString("documentType"); |
||||
|
String[] documentSplit = documentType.split(","); |
||||
|
List<String> apiType = new ArrayList<>(); |
||||
|
//根据field 映射 检索接口的参数 |
||||
|
for (String type : documentSplit) { |
||||
|
Integer codeByField = DataTypeEnum.getCodeByField(Integer.valueOf(type)); |
||||
|
apiType.add(String.valueOf(codeByField)); |
||||
|
} |
||||
|
if (apiType.size() == 0) { |
||||
|
log.error("没找到documentType,keyword:{},跳过采集", task.getKeyword()); |
||||
|
continue; |
||||
|
} else { |
||||
|
log.info("采集类型:{}", String.join(",", apiType)); |
||||
|
} |
||||
|
Long crawlStartTime = task.getCrawlStartTime(); |
||||
|
Long crawlEndTime = task.getCrawlEndTime(); |
||||
|
//获取采集范围 年份 |
||||
|
int yearStart = Utils.getYearFromTimestamp(crawlStartTime); |
||||
|
int yearend = Utils.getYearFromTimestamp(crawlEndTime); |
||||
|
log.info("采集年份范围:{} ~ {}",yearStart,yearend); |
||||
|
//每个采集类型都采集一遍 |
||||
|
for(String type:apiType) { |
||||
|
int currentPageNum = 1; |
||||
|
log.info("开始采集 {} 类型数据",type); |
||||
|
do { |
||||
|
try { |
||||
|
SortedMap<Object, Object> parameters = new TreeMap<Object, Object>(); |
||||
|
parameters.put("randomStr", DateUtil.format(new Date(), "yyyyMMddHHmmss")); |
||||
|
parameters.put("appId", GlobalConfig.APPID); |
||||
|
parameters.put("title", task.getKeyword()); |
||||
|
parameters.put("page", currentPageNum); |
||||
|
parameters.put("pageSize", 10); |
||||
|
parameters.put("type", type); |
||||
|
String sign = Md5SignUtil.sign(parameters); |
||||
|
parameters.put("sign", sign); |
||||
|
String data = HttpUtil.getData(JSON.toJSONString(parameters)); |
||||
|
JSONObject jsonObject = JSONObject.parseObject(data); |
||||
|
JSONObject data1 = jsonObject.getJSONObject("data"); |
||||
|
JSONArray records = data1.getJSONArray("records"); |
||||
|
for (Object record : records) { |
||||
|
JSONObject item = (JSONObject) record; |
||||
|
KfkEntity kfk = new KfkEntity(); |
||||
|
Map attr = new HashMap<>(); |
||||
|
attr.put("attachTag", JSON.parse(task.getAttachTag())); |
||||
|
kfk.setAttr(attr); |
||||
|
kfk.setAuthor(item.getString("author")); |
||||
|
kfk.setContent(item.getString("abstractE")); |
||||
|
kfk.setForwardcontent(item.getString("abstractE")); |
||||
|
String organ = item.getString("organ"); |
||||
|
Map brief = new HashMap<>(); |
||||
|
|
||||
|
kfk.setField(String.valueOf(field)); |
||||
|
kfk.setTitle(Jsoup.parse(item.getString("title")).text()); |
||||
|
kfk.setSearchKeyword(task.getKeyword()); |
||||
|
String source = ""; |
||||
|
switch (item.getInteger("type")) { |
||||
|
case 10: |
||||
|
source = item.getString("name"); |
||||
|
break; |
||||
|
case 20: |
||||
|
source = item.getString("school"); |
||||
|
break; |
||||
|
case 30: |
||||
|
source = item.getString("conferenceName"); |
||||
|
break; |
||||
|
} |
||||
|
kfk.setSource(source); |
||||
|
// if (item.getString("abstractURL") != null) { |
||||
|
// kfk.setUrl(item.getString("abstractURL")); |
||||
|
// } else { |
||||
|
// kfk.setUrl(item.getString("pdfURL")); |
||||
|
// } |
||||
|
String uuid = IdUtil.simpleUUID(); |
||||
|
kfk.setIid(uuid); |
||||
|
kfk.setNews_id(uuid); |
||||
|
kfk.setUrl(uuid); |
||||
|
String year = item.getString("year"); |
||||
|
//判断年份 是不是4位数字 (有错误数据的情况) |
||||
|
boolean fourDigitNumber = Utils.isFourDigitNumber(year); |
||||
|
if (!fourDigitNumber) { |
||||
|
log.error("year不是4位数字,跳过,year:{}", year); |
||||
|
continue; |
||||
|
} |
||||
|
if (!Utils.isYearInRange(Integer.parseInt(year), yearStart, yearend)) { |
||||
|
log.error("year不在采集年份范围,跳过,year:{}", year); |
||||
|
continue; |
||||
|
} |
||||
|
kfk.setPost_time(item.getString("year") + "-01-01 00:00:00"); |
||||
|
kfk.setField(String.valueOf(field)); |
||||
|
|
||||
|
//没有机构字段 并且是学位论文,学校作为机构 |
||||
|
if (organ == null || organ == "") { |
||||
|
organ = item.getString("school"); |
||||
|
List agencys = new ArrayList<>(); |
||||
|
if (organ == null || organ == "") { |
||||
|
organ = ""; |
||||
|
} else { |
||||
|
Map<String, Object> agency = new HashMap<>(); |
||||
|
agency.put("name", organ); |
||||
|
agency.put("url", IdUtil.simpleUUID()); // 添加第二个字段 |
||||
|
agencys.add(agency); |
||||
|
} |
||||
|
brief.put("agency", organ); |
||||
|
brief.put("agencys", agencys); |
||||
|
brief.put("author", item.getString("author")); |
||||
|
Map<String, Object> agencyAuthor = new HashMap<>(); |
||||
|
agencyAuthor.put("agency", organ); |
||||
|
agencyAuthor.put("author", item.getString("author")); // 添加第二个字段 |
||||
|
List authorAndAgency = new ArrayList<>(); |
||||
|
authorAndAgency.add(agencyAuthor); |
||||
|
brief.put("authorAndAgency", authorAndAgency); |
||||
|
} else { |
||||
|
List agencys = new ArrayList<>(); |
||||
|
List agencyString = new ArrayList<>(); |
||||
|
Pattern pattern = Pattern.compile("\\[([a-z\\d])\\]([^;]+)"); |
||||
|
Matcher matcher = pattern.matcher(organ); |
||||
|
Map agencyMap = new HashMap<>(); |
||||
|
while (matcher.find()) { |
||||
|
String key = matcher.group(1); // 获取编号 |
||||
|
String value = matcher.group(2).trim(); // 获取机构名称并去除前后空格 |
||||
|
agencyMap.put(key, value); |
||||
|
agencyString.add(value); |
||||
|
Map agency = new HashMap<>(); |
||||
|
agency.put("name", value); |
||||
|
agency.put("url", IdUtil.simpleUUID()); |
||||
|
agencys.add(agency); |
||||
|
} |
||||
|
brief.put("agency", String.join(",", agencyString)); |
||||
|
brief.put("agencys", agencys); |
||||
|
|
||||
|
// 正则表达式匹配模式,匹配 "姓名[编号][编号]..." |
||||
|
// Pattern patternAuthor = Pattern.compile("([\\p{L} .]+)(\\[\\d+])+(?=;|$)"); |
||||
|
Pattern patternAuthor = Pattern.compile("([\\p{L} .-]+)((\\[\\d+\\])|(\\[[a-zA-Z,]+\\]))+(?=;|$)"); |
||||
|
Matcher matcherAuthor = patternAuthor.matcher(item.getString("author")); |
||||
|
Map<String, List<String>> authorAffiliations = new HashMap<>(); |
||||
|
while (matcherAuthor.find()) { |
||||
|
String name = matcherAuthor.group(1).trim(); // 获取姓名并去除前后空格 |
||||
|
String affiliationPart = matcherAuthor.group(0); // 获取整个匹配串 |
||||
|
|
||||
|
// 提取所有编号 |
||||
|
List<String> affiliations = new ArrayList<>(); |
||||
|
Matcher numberMatcher = Pattern.compile("\\[(\\d+)]").matcher(affiliationPart); |
||||
|
while (numberMatcher.find()) { |
||||
|
affiliations.add(numberMatcher.group(1)); |
||||
|
} |
||||
|
Matcher letterMatcher = Pattern.compile("\\[(.*?)\\]").matcher(affiliationPart); |
||||
|
if (letterMatcher.find()) { |
||||
|
String values = letterMatcher.group(1); |
||||
|
String[] items = values.split(","); |
||||
|
for (String key : items) { |
||||
|
affiliations.add(key); |
||||
|
} |
||||
|
} |
||||
|
authorAffiliations.put(name, affiliations); |
||||
|
} |
||||
|
Set<String> strings = authorAffiliations.keySet(); |
||||
|
List authorAndAgency = new ArrayList<>(); |
||||
|
if (strings.size() > 0) { |
||||
|
brief.put("author", String.join(",", strings)); |
||||
|
kfk.setAuthor(String.join(",", strings)); |
||||
|
for (String name : strings) { |
||||
|
List<String> organNum = authorAffiliations.get(name); |
||||
|
for (String authorOrgan : organNum) { |
||||
|
Map<String, Object> agencyAuthor = new HashMap<>(); |
||||
|
agencyAuthor.put("agency", agencyMap.get(authorOrgan)); |
||||
|
agencyAuthor.put("author", name); |
||||
|
authorAndAgency.add(agencyAuthor); |
||||
|
} |
||||
|
} |
||||
|
brief.put("authorAndAgency", authorAndAgency); |
||||
|
} else { |
||||
|
brief.put("author", item.getString("author")); |
||||
|
brief.put("authorAndAgency", authorAndAgency); |
||||
|
} |
||||
|
|
||||
|
} |
||||
|
|
||||
|
brief.put("author_agency_urls", new ArrayList<>()); |
||||
|
brief.put("author_urls", new ArrayList<>()); |
||||
|
brief.put("data", DataTypeEnum.getTypeByCode(item.getInteger("type"))); |
||||
|
brief.put("date", DateUtil.formatDate(DateUtil.parse(kfk.getPost_time()))); |
||||
|
brief.put("detailUrl", kfk.getUrl()); |
||||
|
brief.put("download", ""); |
||||
|
brief.put("eisci", ""); |
||||
|
brief.put("fileUrl", ""); |
||||
|
brief.put("funding", ""); |
||||
|
brief.put("id", item.getString("id")); |
||||
|
brief.put("initial_mark", ""); |
||||
|
brief.put("keyword", task.getKeyword()); |
||||
|
brief.put("keywords", item.getString("keyword")); |
||||
|
String language = item.getString("language"); |
||||
|
if (StrUtil.isEmpty(language)){ |
||||
|
language = "英语"; |
||||
|
} |
||||
|
brief.put("language", language); |
||||
|
brief.put("paramter", new HashMap<>()); |
||||
|
brief.put("publish_agency_urls", new ArrayList<>()); |
||||
|
brief.put("quote", ""); |
||||
|
brief.put("source", source); |
||||
|
brief.put("summary", kfk.getContent().replace("\"", "")); |
||||
|
brief.put("title", kfk.getTitle()); |
||||
|
brief.put("wxtype", language); |
||||
|
kfk.setBrief(JSON.toJSONString(brief)); |
||||
|
|
||||
|
|
||||
|
/**2024.12.27 新增下载文件需求,下载文件需要3个接口 |
||||
|
* 1.新增单条需求(根据doi) |
||||
|
* 2.获取清单,已上传时间查询 |
||||
|
* 3.获取下载链接 |
||||
|
* 根据doi 查询缓存表里是否已经下载过附件,有的话 直接下载推送 |
||||
|
* 没有的话 放入队列等待处理完成 |
||||
|
*/ |
||||
|
// String doi = item.getString("doi"); |
||||
|
// CacheEntity cacheEntity = scheduleMapper.queryCacheByDoi(doi); |
||||
|
// if (cacheEntity == null){ |
||||
|
// log.info("缓存库没有此数据,新增数据"); |
||||
|
// //组装参数 |
||||
|
// SortedMap<Object, Object> params = new TreeMap<Object, Object>(); |
||||
|
// params.put("randomStr", DateUtil.format(new Date(), "yyyyMMddHHmmss")); |
||||
|
// params.put("appId", GlobalConfig.APPID); |
||||
|
// params.put("organId",GlobalConfig.ORGANID); |
||||
|
// params.put("doi",doi); |
||||
|
// String uploadSign = Md5SignUtil.sign(params); |
||||
|
// params.put("sign", uploadSign); |
||||
|
// String uploadData = HttpUtil.uploadDoi(JSON.toJSONString(params)); |
||||
|
// JSONObject uploadJon = JSONObject.parseObject(uploadData); |
||||
|
// |
||||
|
// cacheEntity = new CacheEntity(); |
||||
|
// cacheEntity.setDoi(doi); |
||||
|
// |
||||
|
// } |
||||
|
System.out.println(JSON.toJSONString(kfk)); |
||||
|
KfkUtil.sendKafka(JSON.toJSONString(kfk)); |
||||
|
|
||||
|
} |
||||
|
|
||||
|
|
||||
|
Integer totalPage = data1.getInteger("pages"); |
||||
|
if (maxPageNum > totalPage) { |
||||
|
maxPageNum = totalPage; |
||||
|
} |
||||
|
log.info("第{}页采集,最大限制页数{},共{}页", currentPageNum, maxPageNum, totalPage); |
||||
|
currentPageNum++; |
||||
|
Thread.sleep(1000 * 5); |
||||
|
} catch (Exception e) { |
||||
|
e.printStackTrace(); |
||||
|
log.error("采集失败", e); |
||||
|
} |
||||
|
} while (currentPageNum <= maxPageNum); |
||||
|
log.info("采集完成 {} 类型数据",type); |
||||
|
} |
||||
|
log.info("关键词:{} 采集完成",task.getKeyword()); |
||||
|
task.setStatus(3); |
||||
|
scheduleMapper.updateTaskStatus(task); |
||||
|
|
||||
|
} |
||||
|
} |
||||
|
|
||||
|
public static void main(String[] args) { |
||||
|
Map<String, List<String>> authorAffiliations = new HashMap<>(); |
||||
|
|
||||
|
String author = "Utku Kumbul[1]; Faruk Uysal[1]; Cicero S. Vaucher[1][2]; Alexander Yarovoy[1]"; |
||||
|
Pattern pattern = Pattern.compile("([\\p{L} .]+)(\\[\\d+])+(?=;|$)"); |
||||
|
Matcher matcher = pattern.matcher(author); |
||||
|
while (matcher.find()) { |
||||
|
String name = matcher.group(1).trim(); // 获取姓名并去除前后空格 |
||||
|
String affiliationPart = matcher.group(0); // 获取整个匹配串 |
||||
|
|
||||
|
// 提取所有编号 |
||||
|
List<String> affiliations = new ArrayList<>(); |
||||
|
Matcher numberMatcher = Pattern.compile("\\[(\\d+)]").matcher(affiliationPart); |
||||
|
while (numberMatcher.find()) { |
||||
|
affiliations.add(numberMatcher.group(1)); |
||||
|
} |
||||
|
authorAffiliations.put(name, affiliations); |
||||
|
} |
||||
|
// 打印结果 |
||||
|
authorAffiliations.forEach((k, v) -> System.out.println(k + " -> " + v)); |
||||
|
} |
||||
|
} |
@ -0,0 +1,88 @@ |
|||||
|
package com.bfd.youzhiapi.util; |
||||
|
|
||||
|
import com.alibaba.fastjson2.JSONObject; |
||||
|
import lombok.extern.slf4j.Slf4j; |
||||
|
import okhttp3.*; |
||||
|
import org.springframework.stereotype.Component; |
||||
|
|
||||
|
import java.util.concurrent.TimeUnit; |
||||
|
|
||||
|
/** |
||||
|
* @author guowei |
||||
|
*/ |
||||
|
@Component |
||||
|
@Slf4j |
||||
|
public class HttpUtil { |
||||
|
|
||||
|
/** |
||||
|
* 关键词检索 请求接口 |
||||
|
* @param parameters |
||||
|
* @return |
||||
|
*/ |
||||
|
public static String getData(String parameters){ |
||||
|
String result = ""; |
||||
|
try { |
||||
|
// 目标 URL |
||||
|
String url = "http://api.keyanzhidian.com/api/literature/search"; |
||||
|
// 创建 JSON 请求体 |
||||
|
MediaType JSON = MediaType.parse("application/json; charset=utf-8"); |
||||
|
// 使用 fastjson 构建 JSON |
||||
|
// JSONObject jsonObject = new JSONObject(); |
||||
|
// jsonObject.put("content", content); |
||||
|
// String json = jsonObject.toJSONString(); |
||||
|
RequestBody body = RequestBody.create(JSON, parameters); |
||||
|
// 构建 POST 请求 |
||||
|
Request request = new Request.Builder() |
||||
|
.url(url) |
||||
|
.post(body) |
||||
|
.build(); |
||||
|
OkHttpClient client = new OkHttpClient.Builder() |
||||
|
.connectTimeout(60, TimeUnit.SECONDS) // 连接超时 |
||||
|
.readTimeout(30, TimeUnit.SECONDS) // 读取超时 |
||||
|
.writeTimeout(15, TimeUnit.SECONDS) // 写入超时 |
||||
|
.build(); |
||||
|
Response response = client.newCall(request).execute(); |
||||
|
if (response.isSuccessful()) { |
||||
|
result = response.body().string(); |
||||
|
log.warn("Response: " + result); |
||||
|
} |
||||
|
}catch (Exception e){ |
||||
|
e.printStackTrace(); |
||||
|
} |
||||
|
return result; |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 新增单条需求 |
||||
|
* @param parameters |
||||
|
* @return |
||||
|
*/ |
||||
|
public static String uploadDoi(String parameters){ |
||||
|
String result = ""; |
||||
|
try { |
||||
|
// 目标 URL |
||||
|
String url = "http://api.keyanzhidian.com/api/demand/create"; |
||||
|
// 创建 JSON 请求体 |
||||
|
MediaType JSON = MediaType.parse("application/json; charset=utf-8"); |
||||
|
RequestBody body = RequestBody.create(JSON, parameters); |
||||
|
// 构建 POST 请求 |
||||
|
Request request = new Request.Builder() |
||||
|
.url(url) |
||||
|
.post(body) |
||||
|
.build(); |
||||
|
OkHttpClient client = new OkHttpClient.Builder() |
||||
|
.connectTimeout(60, TimeUnit.SECONDS) // 连接超时 |
||||
|
.readTimeout(30, TimeUnit.SECONDS) // 读取超时 |
||||
|
.writeTimeout(15, TimeUnit.SECONDS) // 写入超时 |
||||
|
.build(); |
||||
|
Response response = client.newCall(request).execute(); |
||||
|
if (response.isSuccessful()) { |
||||
|
result = response.body().string(); |
||||
|
log.warn("Response: " + result); |
||||
|
} |
||||
|
}catch (Exception e){ |
||||
|
e.printStackTrace(); |
||||
|
} |
||||
|
return result; |
||||
|
} |
||||
|
} |
@ -0,0 +1,86 @@ |
|||||
|
package com.bfd.youzhiapi.util; |
||||
|
|
||||
|
import lombok.extern.slf4j.Slf4j; |
||||
|
import org.apache.kafka.clients.producer.KafkaProducer; |
||||
|
import org.apache.kafka.clients.producer.ProducerConfig; |
||||
|
import org.apache.kafka.clients.producer.ProducerRecord; |
||||
|
import org.apache.kafka.common.serialization.StringSerializer; |
||||
|
import org.springframework.beans.factory.annotation.Value; |
||||
|
import org.springframework.stereotype.Component; |
||||
|
|
||||
|
import java.util.Properties; |
||||
|
|
||||
|
/** |
||||
|
* @author guowei |
||||
|
* kfk工具类 |
||||
|
*/ |
||||
|
@Component |
||||
|
@Slf4j |
||||
|
public class KfkUtil { |
||||
|
private static String topic; |
||||
|
|
||||
|
private static String brokerList; |
||||
|
|
||||
|
@Value("${crawl.kafka.topic}") |
||||
|
public void setTopic(String topic) { |
||||
|
KfkUtil.topic = topic; |
||||
|
} |
||||
|
|
||||
|
@Value("${crawl.kafka.brokers}") |
||||
|
public void setBrokerList(String brokerList) { |
||||
|
KfkUtil.brokerList = brokerList; |
||||
|
} |
||||
|
private static KafkaProducer<String, String> kafkaProducer; |
||||
|
|
||||
|
public static int num = 0; |
||||
|
|
||||
|
/** |
||||
|
* 获取KafkaProducer实例 |
||||
|
*/ |
||||
|
public static KafkaProducer<String, String> getProducer() { |
||||
|
// synchronized (kafkaProducer) { |
||||
|
if (kafkaProducer == null) { |
||||
|
Properties props = new Properties(); |
||||
|
//xxx服务器ip |
||||
|
props.put("bootstrap.servers", brokerList); |
||||
|
// props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG,brokerList); |
||||
|
//所有follower都响应了才认为消息提交成功,即"committed" |
||||
|
props.put("acks", "all"); |
||||
|
//retries = MAX 无限重试,直到你意识到出现了问题:) |
||||
|
props.put("retries", 3); |
||||
|
//producer将试图批处理消息记录,以减少请求次数.默认的批量处理消息字节数 |
||||
|
props.put("batch.size", 16384); |
||||
|
//batch.size当批量的数据大小达到设定值后,就会立即发送,不顾下面的linger.ms |
||||
|
//延迟1ms发送,这项设置将通过增加小的延迟来完成--即,不是立即发送一条记录,producer将会等待给定的延迟时间以允许其他消息记录发送,这些消息记录可以批量处理 |
||||
|
props.put("linger.ms", 1); |
||||
|
//producer可以用来缓存数据的内存大小。 |
||||
|
props.put("buffer.memory", 33554432); |
||||
|
props.put("key.serializer", |
||||
|
StringSerializer.class.getName()); |
||||
|
props.put("value.serializer", |
||||
|
StringSerializer.class.getName()); |
||||
|
kafkaProducer = new KafkaProducer<String, String>(props); |
||||
|
} |
||||
|
// } |
||||
|
return kafkaProducer; |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 关闭KafkaProducer实例 |
||||
|
*/ |
||||
|
public static void closeProducer() { |
||||
|
if (kafkaProducer != null) { |
||||
|
log.info("----------close producer----------"); |
||||
|
kafkaProducer.close(); |
||||
|
kafkaProducer = null; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
public static void sendKafka(String resultData) { |
||||
|
KafkaProducer<String, String> producer = getProducer(); |
||||
|
ProducerRecord<String, String> se = new ProducerRecord<String, String>(topic, resultData); |
||||
|
producer.send(se); |
||||
|
log.info("发送kafka成功"); |
||||
|
// num++; |
||||
|
} |
||||
|
} |
@ -0,0 +1,270 @@ |
|||||
|
package com.bfd.youzhiapi.util; |
||||
|
|
||||
|
|
||||
|
import cn.hutool.core.date.DateUtil; |
||||
|
import com.alibaba.fastjson2.JSON; |
||||
|
import com.alibaba.fastjson2.JSONObject; |
||||
|
import com.bfd.youzhiapi.config.GlobalConfig; |
||||
|
import lombok.extern.slf4j.Slf4j; |
||||
|
import org.springframework.stereotype.Component; |
||||
|
|
||||
|
import java.nio.charset.Charset; |
||||
|
import java.nio.charset.StandardCharsets; |
||||
|
import java.security.MessageDigest; |
||||
|
import java.util.*; |
||||
|
|
||||
|
/** |
||||
|
* Title: 接口签名工具类 |
||||
|
* |
||||
|
* <p> |
||||
|
* Description: |
||||
|
* </p> |
||||
|
* |
||||
|
* @author |
||||
|
*/ |
||||
|
@Component |
||||
|
@Slf4j |
||||
|
public class Md5SignUtil { |
||||
|
public static String Encoding_utf8 = "UTF-8"; |
||||
|
private static final String hexDigits[] = {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "a", "b", "c", "d", "e", "f"}; |
||||
|
|
||||
|
/** |
||||
|
* 签名sign |
||||
|
* @param parameters |
||||
|
* 数据拼接 |
||||
|
* 将所有发送或者接收到的数据为集合M,将集合M内非空参数值的参数按照参数名ASCII码从小到大排序(字典序),使用URL键值对的格式(即key1=value1&key2=value2…)拼接成字符串stringA。 |
||||
|
* 特别注意以下重要规则: ◆ 参数名ASCII码从小到大排序(字典序); ◆ 如果参数的值为空不参与签名; ◆ 参数名区分大小写; ◆ |
||||
|
* 验证调用返回或主动通知时,传送的sign参数不参与签名,将生成的签名与该sign值作校验。 数据签名 |
||||
|
* 在stringA最后拼接上key得到stringSignTemp字符串,并对stringSignTemp进行MD5运算,再将得到的字符串所有字符转换为大写,得到sign值signValue。 |
||||
|
* @return |
||||
|
*/ |
||||
|
public static String sign(SortedMap<Object, Object> parameters) { |
||||
|
StringBuffer sb = new StringBuffer(); |
||||
|
Set es = parameters.entrySet();// 所有参与传参的参数按照accsii排序(升序) |
||||
|
Iterator it = es.iterator(); |
||||
|
while (it.hasNext()) { |
||||
|
Map.Entry entry = (Map.Entry) it.next(); |
||||
|
String k = (String) entry.getKey(); |
||||
|
Object v = entry.getValue(); |
||||
|
if (null != v && !"".equals(v) && !"sign".equals(k) && !"key".equals(k)) { |
||||
|
sb.append(k + "=" + v + "&"); |
||||
|
} |
||||
|
} |
||||
|
sb.append("appSecret=" + GlobalConfig.APPSECRET); |
||||
|
log.debug("Md5SignUtil.sign加密串为:{}", sb.toString()); |
||||
|
String sign = MD5Encode(sb.toString(), Encoding_utf8).toUpperCase(); |
||||
|
log.debug("Md5SignUtil.sign加密串后的签名为:{}", sign); |
||||
|
return sign; |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 返回结果签名sign |
||||
|
* |
||||
|
* @param responseDRO |
||||
|
* @param key |
||||
|
* @return |
||||
|
*/ |
||||
|
// public static String sign(ResponseDRO responseDRO, String key) { |
||||
|
// JSONObject infoJson = (JSONObject) JSONObject.toJSON(responseDRO); |
||||
|
// StringBuilder sb = new StringBuilder(); |
||||
|
// Object[] sortArra = infoJson.keySet().toArray(); |
||||
|
// // 按照ASCII排序 |
||||
|
// Arrays.sort(sortArra); |
||||
|
// for (Object k : sortArra) { |
||||
|
// Object v = infoJson.get(k); |
||||
|
// if (null != v && !"".equals(v)) { |
||||
|
// if (v instanceof JSONArray) { |
||||
|
// JSONArray tempJson = (JSONArray) v; |
||||
|
// List<SortedMap<String, Object>> allList = new ArrayList<>(); |
||||
|
// for (int i = 0; i < tempJson.size(); i++) { |
||||
|
// JSONObject obj = tempJson.getJSONObject(i); |
||||
|
// Map<String, Object> objMap = new HashMap<>(); |
||||
|
// for (Map.Entry<String, Object> entry : obj.entrySet()) { |
||||
|
// objMap.put(entry.getKey(), entry.getValue()); |
||||
|
// } |
||||
|
// |
||||
|
// SortedMap<String, Object> temps = new TreeMap<>();// 升序 |
||||
|
// temps.putAll(objMap); |
||||
|
// allList.add(temps); |
||||
|
// } |
||||
|
// v = JSON.toJSONString(allList); |
||||
|
// } |
||||
|
// sb.append(k).append("=").append(v).append("&"); |
||||
|
// } |
||||
|
// } |
||||
|
// sb.append("appSecret=" + key); |
||||
|
// logger.debug("Md5SignUtil.sign加密串为:{}", sb.toString()); |
||||
|
// String sign = MD5Encode(sb.toString(), Encoding_utf8).toUpperCase(); |
||||
|
// logger.debug("Md5SignUtil.sign加密串后的签名为:{}", sign); |
||||
|
// return sign; |
||||
|
// } |
||||
|
|
||||
|
/** |
||||
|
* 请求参数sign签名 |
||||
|
* |
||||
|
* @param baseQuery |
||||
|
* @param key |
||||
|
* @return |
||||
|
*/ |
||||
|
// public static String sign(BaseQuery baseQuery, String key) { |
||||
|
// JSONObject infoJson = (JSONObject) JSONObject.toJSON(baseQuery); |
||||
|
// StringBuilder sb = new StringBuilder(); |
||||
|
// Object[] sortArra = infoJson.keySet().toArray(); |
||||
|
// // 按照ASCII排序 |
||||
|
// Arrays.sort(sortArra); |
||||
|
// for (Object k : sortArra) { |
||||
|
// Object v = infoJson.get(k); |
||||
|
// if (null != v && !"".equals(v) && !"sign".equals(k)) { |
||||
|
// if (v instanceof JSONArray) { |
||||
|
// JSONArray tempJson = (JSONArray) v; |
||||
|
// List<SortedMap<String, Object>> allList = new ArrayList<>(); |
||||
|
// for (int i = 0; i < tempJson.size(); i++) { |
||||
|
// JSONObject obj = tempJson.getJSONObject(i); |
||||
|
// Map<String, Object> objMap = new HashMap<>(); |
||||
|
// for (Map.Entry<String, Object> entry : obj.entrySet()) { |
||||
|
// objMap.put(entry.getKey(), entry.getValue()); |
||||
|
// } |
||||
|
// |
||||
|
// SortedMap<String, Object> temps = new TreeMap<>();// 升序 |
||||
|
// temps.putAll(objMap); |
||||
|
// allList.add(temps); |
||||
|
// } |
||||
|
// v = JSON.toJSONString(allList); |
||||
|
// } |
||||
|
// sb.append(k).append("=").append(v).append("&"); |
||||
|
// } |
||||
|
// } |
||||
|
// sb.append("appSecret=" + key); |
||||
|
// logger.info("Md5SignUtil.sign加密串为:{}", sb.toString()); |
||||
|
// String sign = MD5Encode(sb.toString(), Encoding_utf8).toUpperCase(); |
||||
|
// logger.info("Md5SignUtil.sign加密串后的签名为:{}", sign); |
||||
|
// return sign; |
||||
|
// } |
||||
|
|
||||
|
/** |
||||
|
* 字符串 md5加密 |
||||
|
* |
||||
|
* @param str |
||||
|
* @param charsetName |
||||
|
* @return |
||||
|
*/ |
||||
|
public static String MD5Encode(String str, String charsetName) { |
||||
|
String resultString = null; |
||||
|
try { |
||||
|
resultString = new String(str); |
||||
|
MessageDigest md = MessageDigest.getInstance("MD5"); |
||||
|
if (charsetName == null || "".equals(charsetName)) { |
||||
|
resultString = byteArrayToHexString(md.digest(resultString.getBytes())); |
||||
|
} else { |
||||
|
resultString = byteArrayToHexString(md.digest(resultString.getBytes(charsetName))); |
||||
|
} |
||||
|
} catch (Exception e) { |
||||
|
} |
||||
|
return resultString; |
||||
|
} |
||||
|
|
||||
|
private static String byteArrayToHexString(byte b[]) { |
||||
|
StringBuffer resultSb = new StringBuffer(); |
||||
|
for (int i = 0; i < b.length; i++) |
||||
|
resultSb.append(byteToHexString(b[i])); |
||||
|
|
||||
|
return resultSb.toString().toUpperCase(); |
||||
|
} |
||||
|
|
||||
|
private static String byteToHexString(byte b) { |
||||
|
int n = b; |
||||
|
if (n < 0) { |
||||
|
n += 256; |
||||
|
} |
||||
|
int d1 = n / 16; |
||||
|
int d2 = n % 16; |
||||
|
return hexDigits[d1] + hexDigits[d2]; |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* md5加密 默认加密串小写 |
||||
|
* |
||||
|
* @param |
||||
|
* @return |
||||
|
*/ |
||||
|
// public static String md5(String plaintext) { |
||||
|
// return md5(plaintext, StandardCharsets.UTF_8); |
||||
|
// } |
||||
|
|
||||
|
|
||||
|
|
||||
|
public static void main(String[] args) { |
||||
|
|
||||
|
String key = "dc41973ee03e471887c77c4a532dbfc3"; |
||||
|
|
||||
|
String appId = "c4d532304c6b4497b1ad"; |
||||
|
String name = "radar"; |
||||
|
String randomStr = DateUtil.format(new Date(), "yyyyMMddHHmmss"); |
||||
|
// String randomStr = "20241225114130"; |
||||
|
List typeSet = new ArrayList<>(); |
||||
|
typeSet.add(10); |
||||
|
|
||||
|
|
||||
|
SortedMap<Object, Object> parameters = new TreeMap<Object, Object>(); |
||||
|
|
||||
|
parameters.put("randomStr", randomStr); |
||||
|
parameters.put("appId", appId); |
||||
|
parameters.put("title", name); |
||||
|
parameters.put("page",1); |
||||
|
parameters.put("pageSize",10); |
||||
|
// parameters.put("year","2024"); |
||||
|
|
||||
|
parameters.put("type", "30"); |
||||
|
// parameters.put("id","1010052234917"); |
||||
|
|
||||
|
// String apiSign = "D3CC78105AA4C5F594AE733E78DB6E02"; |
||||
|
// log.info("接口传入的签名串是:" + apiSign); |
||||
|
String mySign = sign(parameters); |
||||
|
log.info("生成的签名串是:" + mySign); |
||||
|
parameters.put("sign",mySign); |
||||
|
System.out.println(new JSONObject(parameters)); |
||||
|
// String data = HttpUtil.getData(JSON.toJSONString(parameters)); |
||||
|
// System.out.println(data); |
||||
|
|
||||
|
SortedMap<Object, Object> parameterss = new TreeMap<Object, Object>(); |
||||
|
parameterss.put("appId", appId); |
||||
|
parameterss.put("randomStr", randomStr); |
||||
|
parameterss.put("doi", "10.1109/ICCC57789.2023.10164991"); |
||||
|
parameterss.put("organId",392); |
||||
|
String mySigns = sign(parameterss); |
||||
|
log.info("生成的签名串是:" + mySigns); |
||||
|
parameterss.put("sign",mySigns); |
||||
|
System.out.println(new JSONObject(parameterss)); |
||||
|
|
||||
|
SortedMap<Object, Object> parametersss = new TreeMap<Object, Object>(); |
||||
|
parametersss.put("appId", appId); |
||||
|
parametersss.put("randomStr", randomStr); |
||||
|
// parametersss.put("status", 2); |
||||
|
parametersss.put("page",1); |
||||
|
parametersss.put("pageSize",10); |
||||
|
parametersss.put("organId",392); |
||||
|
String mySignss = sign(parametersss); |
||||
|
log.info("生成的签名串是:" + mySignss); |
||||
|
parametersss.put("sign",mySignss); |
||||
|
System.out.println(new JSONObject(parametersss)); |
||||
|
|
||||
|
SortedMap<Object, Object> parameterssss = new TreeMap<Object, Object>(); |
||||
|
parameterssss.put("appId", appId); |
||||
|
parameterssss.put("randomStr", randomStr); |
||||
|
// parametersss.put("status", 2); |
||||
|
parameterssss.put("id",8417); |
||||
|
String mySignsss = sign(parameterssss); |
||||
|
log.info("生成的签名串是:" + mySignsss); |
||||
|
parameterssss.put("sign",mySignsss); |
||||
|
System.out.println(new JSONObject(parameterssss)); |
||||
|
|
||||
|
// PreOrderResponseDto yardResultDto = new PreOrderResponseDto(); |
||||
|
// yardResultDto.setRetCode(retCode); |
||||
|
// yardResultDto.setRetMsg(retMsg); |
||||
|
// yardResultDto.setRandomStr(randomStr); |
||||
|
// yardResultDto.setSign(Md5SignUtil.sign(yardResultDto , key)); |
||||
|
// System.out.println(yardResultDto); |
||||
|
|
||||
|
} |
||||
|
|
||||
|
} |
@ -0,0 +1,29 @@ |
|||||
|
package com.bfd.youzhiapi.util; |
||||
|
|
||||
|
import org.springframework.stereotype.Component; |
||||
|
|
||||
|
import java.time.Instant; |
||||
|
import java.time.ZoneId; |
||||
|
import java.time.ZonedDateTime; |
||||
|
|
||||
|
/** |
||||
|
* @author guowei |
||||
|
*/ |
||||
|
@Component |
||||
|
public class Utils { |
||||
|
|
||||
|
public static boolean isFourDigitNumber(String input) { |
||||
|
return input.matches("\\d{4}"); |
||||
|
} |
||||
|
|
||||
|
public static int getYearFromTimestamp(long timestamp) { |
||||
|
// 使用 UTC 时区转换为年份 |
||||
|
ZonedDateTime dateTime = Instant.ofEpochMilli(timestamp).atZone(ZoneId.of("Asia/Shanghai")); |
||||
|
return dateTime.getYear(); |
||||
|
} |
||||
|
|
||||
|
public static boolean isYearInRange(int year, int startYear, int endYear) { |
||||
|
// 判断年份是否在区间内 |
||||
|
return year >= startYear && year <= endYear; |
||||
|
} |
||||
|
} |
@ -0,0 +1,26 @@ |
|||||
|
spring: |
||||
|
datasource: |
||||
|
driver-class-name: com.mysql.cj.jdbc.Driver |
||||
|
url: jdbc:mysql://172.18.1.134:3306/cnki_crawl |
||||
|
username: crawl666 |
||||
|
password: lx2a4jN1xFT96kj20LU= |
||||
|
crawl: |
||||
|
kafka: |
||||
|
topic: zhiWangTest2 |
||||
|
brokers: 172.18.1.146:9092,172.18.1.147:9092,172.18.1.148:9092 |
||||
|
mybatis: |
||||
|
mapper-locations: classpath:mapper/*.xml |
||||
|
#目的是为了省略resultType里的代码量 |
||||
|
type-aliases-package: com.bfd.youzhiapi.entity |
||||
|
configuration: |
||||
|
log-impl: org.apache.ibatis.logging.stdout.StdOutImpl |
||||
|
server: |
||||
|
port: 7071 |
||||
|
#日志级别 |
||||
|
logging: |
||||
|
level: |
||||
|
com: |
||||
|
bfd: INFO |
||||
|
#日志路径 |
||||
|
log: |
||||
|
path: ./logs |
@ -0,0 +1,38 @@ |
|||||
|
<configuration> |
||||
|
<!-- 属性文件:在properties文件中找到对应的配置项 --> |
||||
|
<springProperty scope="context" name="logging.path" source="logging.log.path"/> |
||||
|
<springProperty scope="context" name="logging.level" source="logging.level.com.bfd"/> |
||||
|
<!-- 默认的控制台日志输出,一般生产环境都是后台启动,这个没太大作用 --> |
||||
|
<!-- <appender name="STDOUT" |
||||
|
class="ch.qos.logback.core.ConsoleAppender"> |
||||
|
<encoder class="ch.qos.logback.classic.encoder.PatternLayoutEncoder"> |
||||
|
<Pattern>%d{HH:mm:ss.SSS} %-5level %logger{80} - %msg%n</Pattern> |
||||
|
</encoder> |
||||
|
</appender> --> |
||||
|
|
||||
|
<appender name="GLMAPPER-LOGGERONE" |
||||
|
class="ch.qos.logback.core.rolling.RollingFileAppender"> |
||||
|
<append>true</append> |
||||
|
<filter class="ch.qos.logback.classic.filter.ThresholdFilter"> |
||||
|
<level>${logging.level}</level> |
||||
|
</filter> |
||||
|
<file> |
||||
|
${logging.path}/crawlSchedule.log |
||||
|
<!-- ${logging.path}/sendKafka.log --> |
||||
|
</file> |
||||
|
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy"> |
||||
|
<FileNamePattern>${logging.path}/crawlSchedule.log.%d{yyyy-MM-dd}</FileNamePattern> |
||||
|
<!-- <FileNamePattern>${logging.path}/sendKafka.log.%d{yyyy-MM-dd}</FileNamePattern> --> |
||||
|
<MaxHistory>7</MaxHistory> |
||||
|
</rollingPolicy> |
||||
|
<encoder class="ch.qos.logback.classic.encoder.PatternLayoutEncoder"> |
||||
|
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %line %-5level %logger{50} - %msg%n</pattern> |
||||
|
<charset>UTF-8</charset> |
||||
|
</encoder> |
||||
|
</appender> |
||||
|
|
||||
|
<root level="info"> |
||||
|
<appender-ref ref="GLMAPPER-LOGGERONE"/> |
||||
|
<!-- <appender-ref ref="STDOUT"/> --> |
||||
|
</root> |
||||
|
</configuration> |
@ -0,0 +1,17 @@ |
|||||
|
<?xml version="1.0" encoding="UTF-8" ?> |
||||
|
<!DOCTYPE mapper |
||||
|
PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" |
||||
|
"http://mybatis.org/dtd/mybatis-3-mapper.dtd"> |
||||
|
<mapper namespace="com.bfd.youzhiapi.mapper.ScheduleMapper"> |
||||
|
<update id="updateTaskStatus"> |
||||
|
update `newslist_111` set status=#{status} where rid = #{rid} |
||||
|
</update> |
||||
|
|
||||
|
<select id="queryTaskByStatus" parameterType="int" resultType="com.bfd.youzhiapi.entity.TaskEntity"> |
||||
|
SELECT * FROM `newslist_111` WHERE status = #{status} |
||||
|
</select> |
||||
|
|
||||
|
<select id="queryCacheByDoi" resultType="com.bfd.youzhiapi.entity.CacheEntity"> |
||||
|
SELECT downloadId,downloadUrl FROM kyzd_cache WHERE doi = #{doi} |
||||
|
</select> |
||||
|
</mapper> |
@ -0,0 +1,13 @@ |
|||||
|
package com.bfd.youzhiapi; |
||||
|
|
||||
|
import org.junit.jupiter.api.Test; |
||||
|
import org.springframework.boot.test.context.SpringBootTest; |
||||
|
|
||||
|
@SpringBootTest |
||||
|
class YouzhiApiApplicationTests { |
||||
|
|
||||
|
@Test |
||||
|
void contextLoads() { |
||||
|
} |
||||
|
|
||||
|
} |
Write
Preview
Loading…
Cancel
Save
Reference in new issue