Browse Source

未知类型文件不予解析

master
maojian 2 weeks ago
parent
commit
0da2f9b6a3
  1. 1
      src/main/java/com/bfd/parse/entity/Constants.java
  2. 27
      src/main/java/com/bfd/parse/process/FileTaskProcess.java

1
src/main/java/com/bfd/parse/entity/Constants.java

@ -161,6 +161,7 @@ public class Constants {
public static final String XLS = "xls";
public static final String XLSX = "xlsx";
public static final String PDF = "pdf";
public static final String TXT = "txt";
public static final String DOC = "doc";
public static final String DOCX = "docx";
public static final String PPT = "ppt";

27
src/main/java/com/bfd/parse/process/FileTaskProcess.java

@ -30,6 +30,8 @@ import com.bfd.parse.utils.DataUtil;
import com.bfd.parse.utils.DownLoadUtil;
import com.bfd.parse.utils.SpringBootKafka;
import kotlin.jvm.Throws;
/**
* @author jian.mao
* @date 2024年2月4日
@ -99,7 +101,7 @@ public class FileTaskProcess implements Runnable{
* @return
* @throws IOException
*/
private String readFileToSend(String path, String saveFilePath) throws IOException {
private String readFileToSend(String path, String saveFilePath) throws Exception {
StringBuffer sb = new StringBuffer();
DownLoadUtil.downloadFile(path, saveFilePath);
if (saveFilePath.endsWith(Constants.DOC) || saveFilePath.endsWith(Constants.DOCX)) {
@ -107,10 +109,13 @@ public class FileTaskProcess implements Runnable{
}else if(saveFilePath.endsWith(Constants.PDF)){
//pdf 纯文字版本
sb.append(readPdfFile(saveFilePath));
} else {
for (String line : FileUtils.readLines(new File(saveFilePath))) {
} else if(saveFilePath.endsWith(Constants.TXT)){
for (String line : FileUtils.readLines(new File(saveFilePath))) {
sb.append(line);
}
} else {
throw new Exception("未知文件类型。");
}
return sb.toString();
}
@ -154,6 +159,9 @@ public class FileTaskProcess implements Runnable{
// 提取文本内容
PDFTextStripper pdfStripper = new PDFTextStripper();
String text = pdfStripper.getText(document);
if(text.replace("\r\n", Constants.EMPTY).replace("\n", Constants.EMPTY).replace("\r", Constants.EMPTY).equals(Constants.EMPTY)) {
text = "文档内容为空";
}
// 关闭文档
document.close();
return text;
@ -182,4 +190,17 @@ public class FileTaskProcess implements Runnable{
log.error("删除文件异常:{}",filePath);
}
}
public static void main(String[] args) throws IOException {
// 加载PDF文件
File file = new File("C:\\Users\\55007\\Downloads\\2025001_04.pdf");
PDDocument document = PDDocument.load(file);
// 提取文本内容
PDFTextStripper pdfStripper = new PDFTextStripper();
String text = pdfStripper.getText(document).replace("\r\n", "");
System.out.println(text.equals(""));
// 关闭文档
document.close();
}
}
Loading…
Cancel
Save