import logging import os import sys from pdfminer.layout import LTLine sys.path.append(os.path.dirname(__file__) + "/../") import traceback import cv2 from format_convert import get_memory_info from format_convert.utils import judge_error_code, add_div, LineTable from format_convert.table_correct import get_rotated_image from format_convert.convert_need_interface import from_otr_interface, from_ocr_interface def image_preprocess(image_np, image_path, use_ocr=True): logging.info("into image_preprocess") try: # 长 宽 # resize_size = (1024, 768) # 限制图片大小 # resize_image(image_path, resize_size) # 图片倾斜校正,写入原来的图片路径 g_r_i = get_rotated_image(image_np, image_path) if g_r_i == [-1]: return [-1], [], [], 0 # otr需要图片resize, 写入另一个路径 image_np = cv2.imread(image_path) best_h, best_w = get_best_predict_size(image_np) image_resize = cv2.resize(image_np, (best_w, best_h), interpolation=cv2.INTER_AREA) # image_resize_path = image_path[:-4] + "_resize" + image_path[-4:] image_resize_path = image_path.split(".")[0] + "_resize." + image_path.split(".")[-1] cv2.imwrite(image_resize_path, image_resize) # 调用otr模型接口 with open(image_resize_path, "rb") as f: image_bytes = f.read() points, split_lines, bboxes, outline_points, lines = from_otr_interface(image_bytes) if judge_error_code(points): return points, [], [], 0 # 将resize后得到的bbox根据比例还原 ratio = (image_np.shape[0]/best_h, image_np.shape[1]/best_w) for i in range(len(bboxes)): bbox = bboxes[i] bboxes[i] = [(int(bbox[0][0]*ratio[1]), int(bbox[0][1]*ratio[0])), (int(bbox[1][0]*ratio[1]), int(bbox[1][1]*ratio[0]))] for i in range(len(split_lines)): line = split_lines[i] split_lines[i] = [(int(line[0][0]*ratio[1]), int(line[0][1]*ratio[0])), (int(line[1][0]*ratio[1]), int(line[1][1]*ratio[0]))] for i in range(len(points)): point = points[i] points[i] = (int(point[0]*ratio[1]), int(point[1]*ratio[0])) for i in range(len(outline_points)): point = outline_points[i] outline_points[i] = [(int(point[0][0]*ratio[1]), int(point[0][1]*ratio[0])), (int(point[1][0]*ratio[1]), int(point[1][1]*ratio[0]))] for i in range(len(lines)): point = lines[i] lines[i] = [int(point[0]*ratio[1]), int(point[1]*ratio[0]), int(point[2]*ratio[1]), int(point[3]*ratio[0])] # 查看是否能输出正确框 for box in bboxes: cv2.rectangle(image_np, box[0], box[1], (0, 255, 0), 2) # cv2.namedWindow('bbox', 0) # cv2.imshow("bbox", image_np) # cv2.waitKey(0) # 调用ocr模型接口 with open(image_path, "rb") as f: image_bytes = f.read() # 有表格 if len(bboxes) >= 2: text_list, bbox_list = from_ocr_interface(image_bytes, True) if judge_error_code(text_list): return text_list, [], [], 0 # for i in range(len(text_list)): # print(text_list[i], bbox_list[i]) # 查看是否能输出正确框 # for box in bbox_list: # cv2.rectangle(image_np, (int(box[0][0]), int(box[0][1])), # (int(box[2][0]), int(box[2][1])), (255, 0, 0), 1) # cv2.namedWindow('bbox', 0) # cv2.imshow("bbox", image_np) # cv2.waitKey(0) # text, column_list = get_formatted_table(text_list, bbox_list, bboxes, split_lines) # 调用现成方法形成表格 try: from format_convert.convert_tree import TableLine list_lines = [] for line in lines: list_lines.append(LTLine(1, (line[0], line[1]), (line[2], line[3]))) from format_convert.convert_tree import TextBox list_text_boxes = [] print("=============1") for i in range(len(bbox_list)): bbox = bbox_list[i] b_text = text_list[i] print("text:",b_text,"bbox:",bbox) list_text_boxes.append(TextBox([bbox[0][0], bbox[0][1], bbox[2][0], bbox[2][1]], b_text)) lt = LineTable() tables, obj_in_table, _ = lt.recognize_table(list_text_boxes, list_lines,False) text = [tables, obj_in_table] column_list = [] except: traceback.print_exc() text = [-8] column_list = [] if judge_error_code(text): return text, [], [], 0 is_table = 1 return text, column_list, outline_points, is_table # 无表格 else: if use_ocr: text = from_ocr_interface(image_bytes) if judge_error_code(text): return text, [], [], 0 is_table = 0 return text, [], [], is_table else: is_table = 0 return None, [], [], is_table except Exception as e: logging.info("image_preprocess error") print("image_preprocess", traceback.print_exc()) return [-1], [], [], 0 @get_memory_info.memory_decorator def picture2text(path, html=False): logging.info("into picture2text") try: # 判断图片中表格 img = cv2.imread(path) if img is None: return [-3] text, column_list, outline_points, is_table = image_preprocess(img, path) if judge_error_code(text): return text if html: text = add_div(text) return [text] except Exception as e: logging.info("picture2text error!") print("picture2text", traceback.print_exc()) return [-1] def get_best_predict_size(image_np, times=64): sizes = [] for i in range(1, 100): if i*times <= 3000: sizes.append(i*times) sizes.sort(key=lambda x: x, reverse=True) min_len = 10000 best_height = sizes[0] for height in sizes: if abs(image_np.shape[0] - height) < min_len: min_len = abs(image_np.shape[0] - height) best_height = height min_len = 10000 best_width = sizes[0] for width in sizes: if abs(image_np.shape[1] - width) < min_len: min_len = abs(image_np.shape[1] - width) best_width = width return best_height, best_width