123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191 |
- import logging
- import os
- import sys
- from pdfminer.layout import LTLine
- sys.path.append(os.path.dirname(__file__) + "/../")
- import traceback
- import cv2
- from format_convert import get_memory_info
- from format_convert.utils import judge_error_code, add_div, LineTable
- from format_convert.table_correct import get_rotated_image
- from format_convert.convert_need_interface import from_otr_interface, from_ocr_interface
- def image_preprocess(image_np, image_path, use_ocr=True):
- logging.info("into image_preprocess")
- try:
- # 长 宽
- # resize_size = (1024, 768)
- # 限制图片大小
- # resize_image(image_path, resize_size)
- # 图片倾斜校正,写入原来的图片路径
- g_r_i = get_rotated_image(image_np, image_path)
- if g_r_i == [-1]:
- return [-1], [], [], 0
- # otr需要图片resize, 写入另一个路径
- image_np = cv2.imread(image_path)
- best_h, best_w = get_best_predict_size(image_np)
- image_resize = cv2.resize(image_np, (best_w, best_h), interpolation=cv2.INTER_AREA)
- # image_resize_path = image_path[:-4] + "_resize" + image_path[-4:]
- image_resize_path = image_path.split(".")[0] + "_resize." + image_path.split(".")[-1]
- cv2.imwrite(image_resize_path, image_resize)
- # 调用otr模型接口
- with open(image_resize_path, "rb") as f:
- image_bytes = f.read()
- points, split_lines, bboxes, outline_points, lines = from_otr_interface(image_bytes)
- if judge_error_code(points):
- return points, [], [], 0
- # 将resize后得到的bbox根据比例还原
- ratio = (image_np.shape[0]/best_h, image_np.shape[1]/best_w)
- for i in range(len(bboxes)):
- bbox = bboxes[i]
- bboxes[i] = [(int(bbox[0][0]*ratio[1]), int(bbox[0][1]*ratio[0])),
- (int(bbox[1][0]*ratio[1]), int(bbox[1][1]*ratio[0]))]
- for i in range(len(split_lines)):
- line = split_lines[i]
- split_lines[i] = [(int(line[0][0]*ratio[1]), int(line[0][1]*ratio[0])),
- (int(line[1][0]*ratio[1]), int(line[1][1]*ratio[0]))]
- for i in range(len(points)):
- point = points[i]
- points[i] = (int(point[0]*ratio[1]), int(point[1]*ratio[0]))
- for i in range(len(outline_points)):
- point = outline_points[i]
- outline_points[i] = [(int(point[0][0]*ratio[1]), int(point[0][1]*ratio[0])),
- (int(point[1][0]*ratio[1]), int(point[1][1]*ratio[0]))]
- for i in range(len(lines)):
- point = lines[i]
- lines[i] = [int(point[0]*ratio[1]), int(point[1]*ratio[0]),
- int(point[2]*ratio[1]), int(point[3]*ratio[0])]
- # 查看是否能输出正确框
- for box in bboxes:
- cv2.rectangle(image_np, box[0], box[1], (0, 255, 0), 2)
- # cv2.namedWindow('bbox', 0)
- # cv2.imshow("bbox", image_np)
- # cv2.waitKey(0)
- # 调用ocr模型接口
- with open(image_path, "rb") as f:
- image_bytes = f.read()
- # 有表格
- if len(bboxes) >= 2:
- text_list, bbox_list = from_ocr_interface(image_bytes, True)
- if judge_error_code(text_list):
- return text_list, [], [], 0
- # for i in range(len(text_list)):
- # print(text_list[i], bbox_list[i])
- # 查看是否能输出正确框
- # for box in bbox_list:
- # cv2.rectangle(image_np, (int(box[0][0]), int(box[0][1])),
- # (int(box[2][0]), int(box[2][1])), (255, 0, 0), 1)
- # cv2.namedWindow('bbox', 0)
- # cv2.imshow("bbox", image_np)
- # cv2.waitKey(0)
- # text, column_list = get_formatted_table(text_list, bbox_list, bboxes, split_lines)
- # 调用现成方法形成表格
- try:
- from format_convert.convert_tree import TableLine
- list_lines = []
- for line in lines:
- list_lines.append(LTLine(1, (line[0], line[1]), (line[2], line[3])))
- from format_convert.convert_tree import TextBox
- list_text_boxes = []
- print("=============1")
- for i in range(len(bbox_list)):
- bbox = bbox_list[i]
- b_text = text_list[i]
- print("text:",b_text,"bbox:",bbox)
- list_text_boxes.append(TextBox([bbox[0][0], bbox[0][1],
- bbox[2][0], bbox[2][1]], b_text))
- lt = LineTable()
- tables, obj_in_table, _ = lt.recognize_table(list_text_boxes, list_lines,False)
- text = [tables, obj_in_table]
- column_list = []
- except:
- traceback.print_exc()
- text = [-8]
- column_list = []
- if judge_error_code(text):
- return text, [], [], 0
- is_table = 1
- return text, column_list, outline_points, is_table
- # 无表格
- else:
- if use_ocr:
- text = from_ocr_interface(image_bytes)
- if judge_error_code(text):
- return text, [], [], 0
- is_table = 0
- return text, [], [], is_table
- else:
- is_table = 0
- return None, [], [], is_table
- except Exception as e:
- logging.info("image_preprocess error")
- print("image_preprocess", traceback.print_exc())
- return [-1], [], [], 0
- @get_memory_info.memory_decorator
- def picture2text(path, html=False):
- logging.info("into picture2text")
- try:
- # 判断图片中表格
- img = cv2.imread(path)
- if img is None:
- return [-3]
- text, column_list, outline_points, is_table = image_preprocess(img, path)
- if judge_error_code(text):
- return text
- if html:
- text = add_div(text)
- return [text]
- except Exception as e:
- logging.info("picture2text error!")
- print("picture2text", traceback.print_exc())
- return [-1]
- def get_best_predict_size(image_np, times=64):
- sizes = []
- for i in range(1, 100):
- if i*times <= 3000:
- sizes.append(i*times)
- sizes.sort(key=lambda x: x, reverse=True)
- min_len = 10000
- best_height = sizes[0]
- for height in sizes:
- if abs(image_np.shape[0] - height) < min_len:
- min_len = abs(image_np.shape[0] - height)
- best_height = height
- min_len = 10000
- best_width = sizes[0]
- for width in sizes:
- if abs(image_np.shape[1] - width) < min_len:
- min_len = abs(image_np.shape[1] - width)
- best_width = width
- return best_height, best_width
|