图片解析应用
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

135 lines
4.3 KiB

  1. #coding:utf8
  2. import queue_manager
  3. import logging
  4. from cnocr import CnOcr
  5. import onnxruntime as ort
  6. from dataUtil import get_value
  7. import uuid
  8. import json
  9. import requests
  10. import os
  11. from global_dict import global_scenes_manager
  12. import global_dict
  13. import time
  14. # 初始化日志
  15. logger = logging.getLogger(__name__)
  16. # 初始化 OCR 实例
  17. ocr = CnOcr()
  18. def ocr_process():
  19. """独立线程处理队列中的 OCR 任务"""
  20. logger.info("ocr线程启动----")
  21. while global_dict.is_start:
  22. result = {}
  23. results = {}
  24. save_path = ''
  25. # 获取任务
  26. size = queue_manager.get_size()
  27. if size> 0 :
  28. task = queue_manager.get_task()
  29. else:
  30. logger.info('队列暂无任务-----')
  31. time.sleep(3)
  32. continue
  33. try:
  34. logger.info('task size:{},task:{}'.format(size,task))
  35. # 根据版本号判断
  36. scenes_id = str(task['scenes_id'])
  37. task_version = str(task['version'])
  38. cache_version = global_scenes_manager[scenes_id]
  39. if not task_version == cache_version:
  40. logger.info('任务已暂停:{}'.format(task))
  41. continue
  42. filePathFormula = task['input']['filePath']
  43. data = task['data']
  44. img_path_url = get_value(data,filePathFormula)
  45. file_name = str(uuid.uuid4())
  46. extension = get_file_extension(img_path_url)
  47. save_path = './files/{}.{}'.format(file_name,extension)
  48. download_file(img_path_url,save_path)
  49. # 执行 OCR 识别
  50. logger.info(f"识别开始-----")
  51. identification_result = ocr.ocr(save_path)
  52. text = ''
  53. for item in identification_result:
  54. text += item['text']
  55. results['isLast'] = True
  56. results['content'] = text
  57. results['id'] = file_name
  58. result['results'] = json.dumps(results)
  59. result['status'] = 1
  60. result['message'] = '成功'
  61. task['result'] = result
  62. except Exception as e:
  63. logger.error(f"Error processing OCR task: {e}")
  64. results['isLast'] = True
  65. id = str(uuid.uuid4())
  66. results['id'] = id
  67. result['results'] = json.dumps(results)
  68. result['status'] = 2
  69. result['message'] = '识别失败'
  70. # 标记任务完成并发送到 Kafka
  71. delete_file(save_path)
  72. queue_manager.task_done(task)
  73. else:
  74. logger.info("执行线程安全退出-----")
  75. def download_file(url, save_path):
  76. """
  77. :param url:
  78. :param save_path:
  79. """
  80. try:
  81. # 发送 HTTP GET 请求下载文件
  82. response = requests.get(url, stream=True)
  83. response.raise_for_status() # 检查请求是否成功
  84. # 将文件写入指定的保存路径
  85. with open(save_path, 'wb') as file:
  86. for chunk in response.iter_content(chunk_size=8192):
  87. if chunk:
  88. file.write(chunk)
  89. logger.info(f"文件已成功下载并保存到: {save_path}")
  90. except requests.exceptions.RequestException as e:
  91. logger.error(f"文件下载失败: {e}")
  92. def get_file_extension(url):
  93. # 找到最后一个 '.' 的位置
  94. dot_index = url.rfind('.')
  95. # 找到 '?' 或 '#' 的位置(如果有的话),这些符号通常用于查询参数或锚点
  96. query_index = url.find('?', dot_index)
  97. hash_index = url.find('#', dot_index)
  98. # 确定扩展名的结束位置
  99. end_index = min(query_index if query_index != -1 else len(url),
  100. hash_index if hash_index != -1 else len(url))
  101. # 提取扩展名
  102. extension = url[dot_index + 1:end_index]
  103. return extension
  104. def delete_file(file_path):
  105. """
  106. :param file_path:
  107. :return: None
  108. """
  109. try:
  110. # 检查文件是否存在
  111. if os.path.exists(file_path):
  112. os.remove(file_path) # 删除文件
  113. logger.info(f"文件 '{file_path}' 已成功删除。")
  114. else:
  115. logger.warning(f"文件 '{file_path}' 不存在。")
  116. except Exception as e:
  117. logger.error(f"删除文件 '{file_path}' 时发生错误: {e}")