#coding:utf8 import queue_manager import logging from cnocr import CnOcr import onnxruntime as ort from dataUtil import get_value import uuid import json import requests import os from global_dict import global_scenes_manager import global_dict import time # 初始化日志 logger = logging.getLogger(__name__) # 初始化 OCR 实例 ocr = CnOcr() def ocr_process(): """独立线程处理队列中的 OCR 任务""" logger.info("ocr线程启动----") while global_dict.is_start: result = {} results = {} save_path = '' # 获取任务 size = queue_manager.get_size() if size> 0 : task = queue_manager.get_task() else: logger.info('队列暂无任务-----') time.sleep(3) continue try: logger.info('task size:{},task:{}'.format(size,task)) # 根据版本号判断 scenes_id = str(task['scenes_id']) task_version = str(task['version']) cache_version = global_scenes_manager[scenes_id] if not task_version == cache_version: logger.info('任务已暂停:{}'.format(task)) continue filePathFormula = task['input']['filePath'] data = task['data'] img_path_url = get_value(data,filePathFormula) file_name = str(uuid.uuid4()) extension = get_file_extension(img_path_url) save_path = './files/{}.{}'.format(file_name,extension) download_file(img_path_url,save_path) # 执行 OCR 识别 logger.info(f"识别开始-----") identification_result = ocr.ocr(save_path) text = '' for item in identification_result: text += item['text'] results['isLast'] = True results['content'] = text results['id'] = file_name result['results'] = json.dumps(results) result['status'] = 1 result['message'] = '成功' task['result'] = result except Exception as e: logger.error(f"Error processing OCR task: {e}") results['isLast'] = True id = str(uuid.uuid4()) results['id'] = id result['results'] = json.dumps(results) result['status'] = 2 result['message'] = '识别失败' # 标记任务完成并发送到 Kafka delete_file(save_path) queue_manager.task_done(task) else: logger.info("执行线程安全退出-----") def download_file(url, save_path): """ 下载文件并保存到指定路径。 :param url: 文件的下载链接 :param save_path: 保存文件的完整路径(包括文件名) """ try: # 发送 HTTP GET 请求下载文件 response = requests.get(url, stream=True) response.raise_for_status() # 检查请求是否成功 # 将文件写入指定的保存路径 with open(save_path, 'wb') as file: for chunk in response.iter_content(chunk_size=8192): if chunk: file.write(chunk) logger.info(f"文件已成功下载并保存到: {save_path}") except requests.exceptions.RequestException as e: logger.error(f"文件下载失败: {e}") def get_file_extension(url): # 找到最后一个 '.' 的位置 dot_index = url.rfind('.') # 找到 '?' 或 '#' 的位置(如果有的话),这些符号通常用于查询参数或锚点 query_index = url.find('?', dot_index) hash_index = url.find('#', dot_index) # 确定扩展名的结束位置 end_index = min(query_index if query_index != -1 else len(url), hash_index if hash_index != -1 else len(url)) # 提取扩展名 extension = url[dot_index + 1:end_index] return extension def delete_file(file_path): """ 删除指定路径的文件。 :param file_path: 要删除的文件路径 :return: None """ try: # 检查文件是否存在 if os.path.exists(file_path): os.remove(file_path) # 删除文件 logger.info(f"文件 '{file_path}' 已成功删除。") else: logger.warning(f"文件 '{file_path}' 不存在。") except Exception as e: logger.error(f"删除文件 '{file_path}' 时发生错误: {e}")