|
|
|
@ -157,6 +157,119 @@ public class ConvertTaskServiceImpl implements ConvertTaskService { |
|
|
|
//识别中 -- 放回队列 |
|
|
|
ConfigCache.resultQueue.put(task); |
|
|
|
}else if (progress == 100) { |
|
|
|
parseOcrRes(task,jobId); |
|
|
|
}else { |
|
|
|
//识别异常 |
|
|
|
log.error("文档转换异常:{}",resStr); |
|
|
|
//发送失败结果 |
|
|
|
AppResultDoc entity = new AppResultDoc(); |
|
|
|
entity.setInput(task); |
|
|
|
entity.setTaskId((String)task.get(Constants.TASKID)); |
|
|
|
entity.setAppId((Integer)task.get(Constants.ID)); |
|
|
|
long now = System.currentTimeMillis(); |
|
|
|
entity.setCreateTime(now); |
|
|
|
Map<String, Object> result = new HashMap<String, Object>(16); |
|
|
|
result.put(Constants.ERROR, "转换失败"); |
|
|
|
entity.setResult(result); |
|
|
|
entity.setStatus(2); |
|
|
|
entity.setDel(0); |
|
|
|
//回传给api服务保存 |
|
|
|
DownLoadUtil.doPost(saveUrl, JSONObject.toJSONString(entity)); |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
} catch (Throwable e) { |
|
|
|
// TODO: handle exception |
|
|
|
log.error("创建文档解析任务异常。e:",e); |
|
|
|
//发送失败结果 |
|
|
|
AppResultDoc entity = new AppResultDoc(); |
|
|
|
entity.setInput(task); |
|
|
|
entity.setTaskId((String)task.get(Constants.TASKID)); |
|
|
|
entity.setAppId((Integer)task.get(Constants.ID)); |
|
|
|
long now = System.currentTimeMillis(); |
|
|
|
entity.setCreateTime(now); |
|
|
|
Map<String, Object> result = new HashMap<String, Object>(16); |
|
|
|
result.put(Constants.ERROR, "源文件解析异常"); |
|
|
|
entity.setResult(result); |
|
|
|
entity.setStatus(2); |
|
|
|
entity.setDel(0); |
|
|
|
//回传给api服务保存 |
|
|
|
DownLoadUtil.doPost(saveUrl, JSONObject.toJSONString(entity)); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
/** |
|
|
|
* 将Base64编码的Word文档读取为文本 |
|
|
|
* @param base64Word Base64字符串 |
|
|
|
* @return Word文本内容 |
|
|
|
* @throws Exception |
|
|
|
*/ |
|
|
|
public String readWordFromBase64(String base64Word) throws Exception { |
|
|
|
byte[] bytes = Base64.getDecoder().decode(base64Word); |
|
|
|
try (InputStream is = new ByteArrayInputStream(bytes)) { |
|
|
|
// 尝试读取为docx格式 |
|
|
|
try { |
|
|
|
XWPFDocument docx = new XWPFDocument(is); |
|
|
|
XWPFWordExtractor extractor = new XWPFWordExtractor(docx); |
|
|
|
return extractor.getText(); |
|
|
|
} catch (Exception e) { |
|
|
|
e.printStackTrace(); |
|
|
|
return null; |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
/** |
|
|
|
* Base64 字符串写入指定路径文件 |
|
|
|
* |
|
|
|
* @param base64byte Base64 内容(支持 data:image/png;base64,xxx) |
|
|
|
* @param fileType 文件类型,如 png/jpg/pdf |
|
|
|
* @param filePath 保存目录路径,如 /data/upload/ |
|
|
|
* @param fileName 文件名称 |
|
|
|
* @return true 成功,false 失败 |
|
|
|
*/ |
|
|
|
public boolean base64ToFile(String base64byte, String filePath,String fileName) { |
|
|
|
try { |
|
|
|
if (base64byte == null || base64byte.isEmpty()) { |
|
|
|
return false; |
|
|
|
} |
|
|
|
|
|
|
|
// 处理 base64 头 |
|
|
|
if (base64byte.contains(",")) { |
|
|
|
base64byte = base64byte.substring(base64byte.indexOf(",") + 1); |
|
|
|
} |
|
|
|
|
|
|
|
byte[] bytes = Base64.getDecoder().decode(base64byte); |
|
|
|
|
|
|
|
// 创建目录 |
|
|
|
File dir = new File(filePath); |
|
|
|
if (!dir.exists() && !dir.mkdirs()) { |
|
|
|
return false; |
|
|
|
} |
|
|
|
|
|
|
|
// 生成文件名 |
|
|
|
File file = new File(dir, fileName); |
|
|
|
|
|
|
|
try (FileOutputStream fos = new FileOutputStream(file)) { |
|
|
|
fos.write(bytes); |
|
|
|
fos.flush(); |
|
|
|
} |
|
|
|
|
|
|
|
return true; |
|
|
|
} catch (Exception e) { |
|
|
|
// 生产中建议打日志 |
|
|
|
log.error("Base64 写文件失败", e); |
|
|
|
return false; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/** |
|
|
|
* 解析查询结果 |
|
|
|
* @param task |
|
|
|
* @param jobId |
|
|
|
*/ |
|
|
|
private void parseOcrRes(Map<String, Object> task,String jobId) { |
|
|
|
//文件类型 |
|
|
|
String fileType = (String) task.get(Constants.DOC_TYPE); |
|
|
|
//识别成功 -- 获取请求结果 |
|
|
|
@ -166,8 +279,17 @@ public class ConvertTaskServiceImpl implements ConvertTaskService { |
|
|
|
String queryResStr = DownLoadUtil.doGet(lastQueryUrl); |
|
|
|
Map<String, Object> queryRes = JSONObject.parseObject(queryResStr); |
|
|
|
List<Map<String, Object>> jobDocuments = (List<Map<String, Object>>) queryRes.get(Constants.JOBDOCUMENTS); |
|
|
|
List<Map<String, Object>> outputDocuments = (List<Map<String, Object>>) jobDocuments.get(0).get(Constants.OutputDocuments); |
|
|
|
List<Map<String, Object>> files = (List<Map<String, Object>>) outputDocuments.get(0).get(Constants.FILES); |
|
|
|
List<Map<String, Object>> outputDocuments = null; |
|
|
|
List<Map<String, Object>> files = null; |
|
|
|
if(jobDocuments != null && jobDocuments.size() > 0) { |
|
|
|
outputDocuments = (List<Map<String, Object>>) jobDocuments.get(0).get(Constants.OUTPUTDOCUMENTS); |
|
|
|
files = (List<Map<String, Object>>) outputDocuments.get(0).get(Constants.FILES); |
|
|
|
}else { |
|
|
|
List<Map<String, Object>> InputFiles = (List<Map<String, Object>>) queryRes.get(Constants.INPUTFILES); |
|
|
|
outputDocuments = (List<Map<String, Object>>)InputFiles.get(0).get(Constants.OUTPUTDOCUMENTS); |
|
|
|
files = (List<Map<String, Object>>) outputDocuments.get(outputDocuments.size() - 1).get(Constants.FILES); |
|
|
|
} |
|
|
|
|
|
|
|
for (int i =0;i<files.size();i++) { |
|
|
|
Map<String, Object> map = files.get(i); |
|
|
|
String fileContents = (String) map.get(Constants.FILECONTENTS); |
|
|
|
@ -240,7 +362,7 @@ public class ConvertTaskServiceImpl implements ConvertTaskService { |
|
|
|
} |
|
|
|
}else { |
|
|
|
//识别异常 |
|
|
|
log.error("html文档转换异常:{}",resStr); |
|
|
|
log.error("html文档转换异常:{}",queryResStr); |
|
|
|
//发送失败结果 |
|
|
|
AppResultDoc entity = new AppResultDoc(); |
|
|
|
entity.setInput(task); |
|
|
|
@ -291,7 +413,7 @@ public class ConvertTaskServiceImpl implements ConvertTaskService { |
|
|
|
DownLoadUtil.doPost(saveUrl, JSONObject.toJSONString(entity)); |
|
|
|
}else { |
|
|
|
//识别异常 |
|
|
|
log.error("文档转换异常:{}",resStr); |
|
|
|
log.error("文档转换异常:{}",queryResStr); |
|
|
|
//发送失败结果 |
|
|
|
AppResultDoc entity = new AppResultDoc(); |
|
|
|
entity.setInput(task); |
|
|
|
@ -309,112 +431,9 @@ public class ConvertTaskServiceImpl implements ConvertTaskService { |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
}else { |
|
|
|
//识别异常 |
|
|
|
log.error("文档转换异常:{}",resStr); |
|
|
|
//发送失败结果 |
|
|
|
AppResultDoc entity = new AppResultDoc(); |
|
|
|
entity.setInput(task); |
|
|
|
entity.setTaskId((String)task.get(Constants.TASKID)); |
|
|
|
entity.setAppId((Integer)task.get(Constants.ID)); |
|
|
|
long now = System.currentTimeMillis(); |
|
|
|
entity.setCreateTime(now); |
|
|
|
Map<String, Object> result = new HashMap<String, Object>(16); |
|
|
|
result.put(Constants.ERROR, "转换失败"); |
|
|
|
entity.setResult(result); |
|
|
|
entity.setStatus(2); |
|
|
|
entity.setDel(0); |
|
|
|
//回传给api服务保存 |
|
|
|
DownLoadUtil.doPost(saveUrl, JSONObject.toJSONString(entity)); |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
} catch (Throwable e) { |
|
|
|
// TODO: handle exception |
|
|
|
log.error("创建文档解析任务异常。e:",e); |
|
|
|
//发送失败结果 |
|
|
|
AppResultDoc entity = new AppResultDoc(); |
|
|
|
entity.setInput(task); |
|
|
|
entity.setTaskId((String)task.get(Constants.TASKID)); |
|
|
|
entity.setAppId((Integer)task.get(Constants.ID)); |
|
|
|
long now = System.currentTimeMillis(); |
|
|
|
entity.setCreateTime(now); |
|
|
|
Map<String, Object> result = new HashMap<String, Object>(16); |
|
|
|
result.put(Constants.ERROR, "源文件解析异常"); |
|
|
|
entity.setResult(result); |
|
|
|
entity.setStatus(2); |
|
|
|
entity.setDel(0); |
|
|
|
//回传给api服务保存 |
|
|
|
DownLoadUtil.doPost(saveUrl, JSONObject.toJSONString(entity)); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
/** |
|
|
|
* 将Base64编码的Word文档读取为文本 |
|
|
|
* @param base64Word Base64字符串 |
|
|
|
* @return Word文本内容 |
|
|
|
* @throws Exception |
|
|
|
*/ |
|
|
|
public String readWordFromBase64(String base64Word) throws Exception { |
|
|
|
byte[] bytes = Base64.getDecoder().decode(base64Word); |
|
|
|
try (InputStream is = new ByteArrayInputStream(bytes)) { |
|
|
|
// 尝试读取为docx格式 |
|
|
|
try { |
|
|
|
XWPFDocument docx = new XWPFDocument(is); |
|
|
|
XWPFWordExtractor extractor = new XWPFWordExtractor(docx); |
|
|
|
return extractor.getText(); |
|
|
|
} catch (Exception e) { |
|
|
|
e.printStackTrace(); |
|
|
|
return null; |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
/** |
|
|
|
* Base64 字符串写入指定路径文件 |
|
|
|
* |
|
|
|
* @param base64byte Base64 内容(支持 data:image/png;base64,xxx) |
|
|
|
* @param fileType 文件类型,如 png/jpg/pdf |
|
|
|
* @param filePath 保存目录路径,如 /data/upload/ |
|
|
|
* @param fileName 文件名称 |
|
|
|
* @return true 成功,false 失败 |
|
|
|
*/ |
|
|
|
public boolean base64ToFile(String base64byte, String filePath,String fileName) { |
|
|
|
try { |
|
|
|
if (base64byte == null || base64byte.isEmpty()) { |
|
|
|
return false; |
|
|
|
} |
|
|
|
|
|
|
|
// 处理 base64 头 |
|
|
|
if (base64byte.contains(",")) { |
|
|
|
base64byte = base64byte.substring(base64byte.indexOf(",") + 1); |
|
|
|
} |
|
|
|
|
|
|
|
byte[] bytes = Base64.getDecoder().decode(base64byte); |
|
|
|
|
|
|
|
// 创建目录 |
|
|
|
File dir = new File(filePath); |
|
|
|
if (!dir.exists() && !dir.mkdirs()) { |
|
|
|
return false; |
|
|
|
} |
|
|
|
|
|
|
|
// 生成文件名 |
|
|
|
File file = new File(dir, fileName); |
|
|
|
|
|
|
|
try (FileOutputStream fos = new FileOutputStream(file)) { |
|
|
|
fos.write(bytes); |
|
|
|
fos.flush(); |
|
|
|
} |
|
|
|
|
|
|
|
return true; |
|
|
|
} catch (Exception e) { |
|
|
|
// 生产中建议打日志 |
|
|
|
log.error("Base64 写文件失败", e); |
|
|
|
return false; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
public static void main(String[] args) throws Exception { |
|
|
|
// OcrTaskServiceImpl ocrTaskServiceImpl = new OcrTaskServiceImpl(); |
|
|
|
// String base64Word = "77u/VGFibGUgMS4gRGlzdHJpYnV0aW9uIG9mIGNvbmZpcm1lZCBhbnRocmF4IGNhc2VzIGJ5IGNvdW50cnkgYW5kIHllYXIsIEVVL0VFQSwgMjAxNS0yMDE5DQpDb3VudHJ5CTIwMTUJMjAxNgkyMDE3CTIwMTgJMjAxOQ0KCU51bWJlcglOdW1iZXIJTnVtYmVyCU51bWJlcglOdW1iZXINCkF1c3RyaWEJMAkwCTAJMAkwDQpCZWxnaXVtCTAJMAkwCTAJMA0KQnVsZ2FyaWEJMgkwCTEJMAkwDQpDcm9hdGlhCTAJMAkwCTAJMA0KQ3lwcnVzCTAJMAkwCTAJMA0KQ3plY2hpYQkwCTAJMAkwCTANCkRlbm1hcmsJMAkwCTAJMAkwDQpFc3RvbmlhCTAJMAkwCTAJMA0KRmlubGFuZAkwCTAJMAkwCTANCkZyYW5jZQkwCTAJMAkwCTANCkdlcm1hbnkJMAkwCTAJMAkwDQpHcmVlY2UJMAkwCTAJMAkwDQpIdW5nYXJ5CTAJMAkwCTAJMQ0KSWNlbGFuZAkwCTAJMAkwCTANCklyZWxhbmQJMAkwCTAJMAkwDQpJdGFseQkwCTAJMAkwCTANCkxhdHZpYQkwCTAJMAkwCTANClVlY2h0ZW5zdGVpbgnigKIJ4oCiCeKAognigKIJ4oCiDQpVdGh1YW5pYQkwCTAJMAkwCTANCkx1eGVtYm91cmcJMAkwCTAJMAkwDQpNYWx0YQkwCTAJMAkwCTANCk5ldGhlcmxhbmRzCTAJMAkwCTEJMA=="; |
|
|
|
|