Browse Source

word转换pdf修复

master
maojian 2 months ago
parent
commit
b454573984
  1. 3
      document-convert-service/src/main/java/com/bw/convert/entity/Constants.java
  2. 233
      document-convert-service/src/main/java/com/bw/convert/service/impl/ConvertTaskServiceImpl.java
  3. 2
      opai-api/src/main/java/com/bw/opai/config/WebMvcConfig.java

3
document-convert-service/src/main/java/com/bw/convert/entity/Constants.java

@ -40,7 +40,8 @@ public class Constants {
public static final String JOBID = "jobId"; public static final String JOBID = "jobId";
public static final String PROGRESS = "Progress"; public static final String PROGRESS = "Progress";
public static final String JOBDOCUMENTS = "JobDocuments"; public static final String JOBDOCUMENTS = "JobDocuments";
public static final String OutputDocuments = "OutputDocuments";
public static final String OUTPUTDOCUMENTS = "OutputDocuments";
public static final String INPUTFILES = "InputFiles";
public static final String FILES = "Files"; public static final String FILES = "Files";
public static final String IDRENAME = "#id"; public static final String IDRENAME = "#id";

233
document-convert-service/src/main/java/com/bw/convert/service/impl/ConvertTaskServiceImpl.java

@ -157,6 +157,119 @@ public class ConvertTaskServiceImpl implements ConvertTaskService {
//识别中 -- 放回队列 //识别中 -- 放回队列
ConfigCache.resultQueue.put(task); ConfigCache.resultQueue.put(task);
}else if (progress == 100) { }else if (progress == 100) {
parseOcrRes(task,jobId);
}else {
//识别异常
log.error("文档转换异常:{}",resStr);
//发送失败结果
AppResultDoc entity = new AppResultDoc();
entity.setInput(task);
entity.setTaskId((String)task.get(Constants.TASKID));
entity.setAppId((Integer)task.get(Constants.ID));
long now = System.currentTimeMillis();
entity.setCreateTime(now);
Map<String, Object> result = new HashMap<String, Object>(16);
result.put(Constants.ERROR, "转换失败");
entity.setResult(result);
entity.setStatus(2);
entity.setDel(0);
//回传给api服务保存
DownLoadUtil.doPost(saveUrl, JSONObject.toJSONString(entity));
}
} catch (Throwable e) {
// TODO: handle exception
log.error("创建文档解析任务异常。e:",e);
//发送失败结果
AppResultDoc entity = new AppResultDoc();
entity.setInput(task);
entity.setTaskId((String)task.get(Constants.TASKID));
entity.setAppId((Integer)task.get(Constants.ID));
long now = System.currentTimeMillis();
entity.setCreateTime(now);
Map<String, Object> result = new HashMap<String, Object>(16);
result.put(Constants.ERROR, "源文件解析异常");
entity.setResult(result);
entity.setStatus(2);
entity.setDel(0);
//回传给api服务保存
DownLoadUtil.doPost(saveUrl, JSONObject.toJSONString(entity));
}
}
/**
* 将Base64编码的Word文档读取为文本
* @param base64Word Base64字符串
* @return Word文本内容
* @throws Exception
*/
public String readWordFromBase64(String base64Word) throws Exception {
byte[] bytes = Base64.getDecoder().decode(base64Word);
try (InputStream is = new ByteArrayInputStream(bytes)) {
// 尝试读取为docx格式
try {
XWPFDocument docx = new XWPFDocument(is);
XWPFWordExtractor extractor = new XWPFWordExtractor(docx);
return extractor.getText();
} catch (Exception e) {
e.printStackTrace();
return null;
}
}
}
/**
* Base64 字符串写入指定路径文件
*
* @param base64byte Base64 内容支持 data:image/png;base64,xxx
* @param fileType 文件类型 png/jpg/pdf
* @param filePath 保存目录路径 /data/upload/
* @param fileName 文件名称
* @return true 成功false 失败
*/
public boolean base64ToFile(String base64byte, String filePath,String fileName) {
try {
if (base64byte == null || base64byte.isEmpty()) {
return false;
}
// 处理 base64
if (base64byte.contains(",")) {
base64byte = base64byte.substring(base64byte.indexOf(",") + 1);
}
byte[] bytes = Base64.getDecoder().decode(base64byte);
// 创建目录
File dir = new File(filePath);
if (!dir.exists() && !dir.mkdirs()) {
return false;
}
// 生成文件名
File file = new File(dir, fileName);
try (FileOutputStream fos = new FileOutputStream(file)) {
fos.write(bytes);
fos.flush();
}
return true;
} catch (Exception e) {
// 生产中建议打日志
log.error("Base64 写文件失败", e);
return false;
}
}
/**
* 解析查询结果
* @param task
* @param jobId
*/
private void parseOcrRes(Map<String, Object> task,String jobId) {
//文件类型 //文件类型
String fileType = (String) task.get(Constants.DOC_TYPE); String fileType = (String) task.get(Constants.DOC_TYPE);
//识别成功 -- 获取请求结果 //识别成功 -- 获取请求结果
@ -166,8 +279,17 @@ public class ConvertTaskServiceImpl implements ConvertTaskService {
String queryResStr = DownLoadUtil.doGet(lastQueryUrl); String queryResStr = DownLoadUtil.doGet(lastQueryUrl);
Map<String, Object> queryRes = JSONObject.parseObject(queryResStr); Map<String, Object> queryRes = JSONObject.parseObject(queryResStr);
List<Map<String, Object>> jobDocuments = (List<Map<String, Object>>) queryRes.get(Constants.JOBDOCUMENTS); List<Map<String, Object>> jobDocuments = (List<Map<String, Object>>) queryRes.get(Constants.JOBDOCUMENTS);
List<Map<String, Object>> outputDocuments = (List<Map<String, Object>>) jobDocuments.get(0).get(Constants.OutputDocuments);
List<Map<String, Object>> files = (List<Map<String, Object>>) outputDocuments.get(0).get(Constants.FILES);
List<Map<String, Object>> outputDocuments = null;
List<Map<String, Object>> files = null;
if(jobDocuments != null && jobDocuments.size() > 0) {
outputDocuments = (List<Map<String, Object>>) jobDocuments.get(0).get(Constants.OUTPUTDOCUMENTS);
files = (List<Map<String, Object>>) outputDocuments.get(0).get(Constants.FILES);
}else {
List<Map<String, Object>> InputFiles = (List<Map<String, Object>>) queryRes.get(Constants.INPUTFILES);
outputDocuments = (List<Map<String, Object>>)InputFiles.get(0).get(Constants.OUTPUTDOCUMENTS);
files = (List<Map<String, Object>>) outputDocuments.get(outputDocuments.size() - 1).get(Constants.FILES);
}
for (int i =0;i<files.size();i++) { for (int i =0;i<files.size();i++) {
Map<String, Object> map = files.get(i); Map<String, Object> map = files.get(i);
String fileContents = (String) map.get(Constants.FILECONTENTS); String fileContents = (String) map.get(Constants.FILECONTENTS);
@ -240,7 +362,7 @@ public class ConvertTaskServiceImpl implements ConvertTaskService {
} }
}else { }else {
//识别异常 //识别异常
log.error("html文档转换异常:{}",resStr);
log.error("html文档转换异常:{}",queryResStr);
//发送失败结果 //发送失败结果
AppResultDoc entity = new AppResultDoc(); AppResultDoc entity = new AppResultDoc();
entity.setInput(task); entity.setInput(task);
@ -291,7 +413,7 @@ public class ConvertTaskServiceImpl implements ConvertTaskService {
DownLoadUtil.doPost(saveUrl, JSONObject.toJSONString(entity)); DownLoadUtil.doPost(saveUrl, JSONObject.toJSONString(entity));
}else { }else {
//识别异常 //识别异常
log.error("文档转换异常:{}",resStr);
log.error("文档转换异常:{}",queryResStr);
//发送失败结果 //发送失败结果
AppResultDoc entity = new AppResultDoc(); AppResultDoc entity = new AppResultDoc();
entity.setInput(task); entity.setInput(task);
@ -309,112 +431,9 @@ public class ConvertTaskServiceImpl implements ConvertTaskService {
} }
} }
} }
}else {
//识别异常
log.error("文档转换异常:{}",resStr);
//发送失败结果
AppResultDoc entity = new AppResultDoc();
entity.setInput(task);
entity.setTaskId((String)task.get(Constants.TASKID));
entity.setAppId((Integer)task.get(Constants.ID));
long now = System.currentTimeMillis();
entity.setCreateTime(now);
Map<String, Object> result = new HashMap<String, Object>(16);
result.put(Constants.ERROR, "转换失败");
entity.setResult(result);
entity.setStatus(2);
entity.setDel(0);
//回传给api服务保存
DownLoadUtil.doPost(saveUrl, JSONObject.toJSONString(entity));
}
} catch (Throwable e) {
// TODO: handle exception
log.error("创建文档解析任务异常。e:",e);
//发送失败结果
AppResultDoc entity = new AppResultDoc();
entity.setInput(task);
entity.setTaskId((String)task.get(Constants.TASKID));
entity.setAppId((Integer)task.get(Constants.ID));
long now = System.currentTimeMillis();
entity.setCreateTime(now);
Map<String, Object> result = new HashMap<String, Object>(16);
result.put(Constants.ERROR, "源文件解析异常");
entity.setResult(result);
entity.setStatus(2);
entity.setDel(0);
//回传给api服务保存
DownLoadUtil.doPost(saveUrl, JSONObject.toJSONString(entity));
}
} }
/**
* 将Base64编码的Word文档读取为文本
* @param base64Word Base64字符串
* @return Word文本内容
* @throws Exception
*/
public String readWordFromBase64(String base64Word) throws Exception {
byte[] bytes = Base64.getDecoder().decode(base64Word);
try (InputStream is = new ByteArrayInputStream(bytes)) {
// 尝试读取为docx格式
try {
XWPFDocument docx = new XWPFDocument(is);
XWPFWordExtractor extractor = new XWPFWordExtractor(docx);
return extractor.getText();
} catch (Exception e) {
e.printStackTrace();
return null;
}
}
}
/**
* Base64 字符串写入指定路径文件
*
* @param base64byte Base64 内容支持 data:image/png;base64,xxx
* @param fileType 文件类型 png/jpg/pdf
* @param filePath 保存目录路径 /data/upload/
* @param fileName 文件名称
* @return true 成功false 失败
*/
public boolean base64ToFile(String base64byte, String filePath,String fileName) {
try {
if (base64byte == null || base64byte.isEmpty()) {
return false;
}
// 处理 base64
if (base64byte.contains(",")) {
base64byte = base64byte.substring(base64byte.indexOf(",") + 1);
}
byte[] bytes = Base64.getDecoder().decode(base64byte);
// 创建目录
File dir = new File(filePath);
if (!dir.exists() && !dir.mkdirs()) {
return false;
}
// 生成文件名
File file = new File(dir, fileName);
try (FileOutputStream fos = new FileOutputStream(file)) {
fos.write(bytes);
fos.flush();
}
return true;
} catch (Exception e) {
// 生产中建议打日志
log.error("Base64 写文件失败", e);
return false;
}
}
public static void main(String[] args) throws Exception { public static void main(String[] args) throws Exception {
// OcrTaskServiceImpl ocrTaskServiceImpl = new OcrTaskServiceImpl(); // OcrTaskServiceImpl ocrTaskServiceImpl = new OcrTaskServiceImpl();
// String base64Word = "77u/VGFibGUgMS4gRGlzdHJpYnV0aW9uIG9mIGNvbmZpcm1lZCBhbnRocmF4IGNhc2VzIGJ5IGNvdW50cnkgYW5kIHllYXIsIEVVL0VFQSwgMjAxNS0yMDE5DQpDb3VudHJ5CTIwMTUJMjAxNgkyMDE3CTIwMTgJMjAxOQ0KCU51bWJlcglOdW1iZXIJTnVtYmVyCU51bWJlcglOdW1iZXINCkF1c3RyaWEJMAkwCTAJMAkwDQpCZWxnaXVtCTAJMAkwCTAJMA0KQnVsZ2FyaWEJMgkwCTEJMAkwDQpDcm9hdGlhCTAJMAkwCTAJMA0KQ3lwcnVzCTAJMAkwCTAJMA0KQ3plY2hpYQkwCTAJMAkwCTANCkRlbm1hcmsJMAkwCTAJMAkwDQpFc3RvbmlhCTAJMAkwCTAJMA0KRmlubGFuZAkwCTAJMAkwCTANCkZyYW5jZQkwCTAJMAkwCTANCkdlcm1hbnkJMAkwCTAJMAkwDQpHcmVlY2UJMAkwCTAJMAkwDQpIdW5nYXJ5CTAJMAkwCTAJMQ0KSWNlbGFuZAkwCTAJMAkwCTANCklyZWxhbmQJMAkwCTAJMAkwDQpJdGFseQkwCTAJMAkwCTANCkxhdHZpYQkwCTAJMAkwCTANClVlY2h0ZW5zdGVpbgnigKIJ4oCiCeKAognigKIJ4oCiDQpVdGh1YW5pYQkwCTAJMAkwCTANCkx1eGVtYm91cmcJMAkwCTAJMAkwDQpNYWx0YQkwCTAJMAkwCTANCk5ldGhlcmxhbmRzCTAJMAkwCTEJMA=="; // String base64Word = "77u/VGFibGUgMS4gRGlzdHJpYnV0aW9uIG9mIGNvbmZpcm1lZCBhbnRocmF4IGNhc2VzIGJ5IGNvdW50cnkgYW5kIHllYXIsIEVVL0VFQSwgMjAxNS0yMDE5DQpDb3VudHJ5CTIwMTUJMjAxNgkyMDE3CTIwMTgJMjAxOQ0KCU51bWJlcglOdW1iZXIJTnVtYmVyCU51bWJlcglOdW1iZXINCkF1c3RyaWEJMAkwCTAJMAkwDQpCZWxnaXVtCTAJMAkwCTAJMA0KQnVsZ2FyaWEJMgkwCTEJMAkwDQpDcm9hdGlhCTAJMAkwCTAJMA0KQ3lwcnVzCTAJMAkwCTAJMA0KQ3plY2hpYQkwCTAJMAkwCTANCkRlbm1hcmsJMAkwCTAJMAkwDQpFc3RvbmlhCTAJMAkwCTAJMA0KRmlubGFuZAkwCTAJMAkwCTANCkZyYW5jZQkwCTAJMAkwCTANCkdlcm1hbnkJMAkwCTAJMAkwDQpHcmVlY2UJMAkwCTAJMAkwDQpIdW5nYXJ5CTAJMAkwCTAJMQ0KSWNlbGFuZAkwCTAJMAkwCTANCklyZWxhbmQJMAkwCTAJMAkwDQpJdGFseQkwCTAJMAkwCTANCkxhdHZpYQkwCTAJMAkwCTANClVlY2h0ZW5zdGVpbgnigKIJ4oCiCeKAognigKIJ4oCiDQpVdGh1YW5pYQkwCTAJMAkwCTANCkx1eGVtYm91cmcJMAkwCTAJMAkwDQpNYWx0YQkwCTAJMAkwCTANCk5ldGhlcmxhbmRzCTAJMAkwCTEJMA==";

2
opai-api/src/main/java/com/bw/opai/config/WebMvcConfig.java

@ -19,6 +19,8 @@ public class WebMvcConfig implements WebMvcConfigurer {
registry.addInterceptor(authInterceptor) registry.addInterceptor(authInterceptor)
// 拦截所有业务接口 // 拦截所有业务接口
.addPathPatterns("/apps/**") .addPathPatterns("/apps/**")
// 放行 datasave
.excludePathPatterns("/apps/datasave")
// 放行登录注册接口否则就死循环了 // 放行登录注册接口否则就死循环了
.excludePathPatterns("/auth/**") .excludePathPatterns("/auth/**")
.excludePathPatterns("/static/**"); .excludePathPatterns("/static/**");

Loading…
Cancel
Save