|
@@ -15,7 +15,7 @@ import traceback
|
|
|
import cv2
|
|
|
from isr.pre_process import count_red_pixel
|
|
|
from format_convert.utils import judge_error_code, add_div, LineTable, get_table_html, get_logger, log, \
|
|
|
- memory_decorator, pil_resize
|
|
|
+ memory_decorator, pil_resize, np2bytes, ocr_cant_read
|
|
|
from format_convert.convert_need_interface import from_otr_interface, from_ocr_interface, from_gpu_interface_redis, \
|
|
|
from_idc_interface, from_isr_interface
|
|
|
from format_convert.table_correct import get_rotated_image
|
|
@@ -84,130 +84,116 @@ def image_process(image_np, image_path, is_from_pdf=False, is_from_docx=False, u
|
|
|
textbox_list.remove(_obj)
|
|
|
return textbox_list
|
|
|
|
|
|
- log("into image_preprocess")
|
|
|
- try:
|
|
|
- if image_np is None:
|
|
|
- return []
|
|
|
-
|
|
|
- # 整体分辨率限制
|
|
|
- threshold = 2000
|
|
|
- if image_np.shape[0] > threshold or image_np.shape[1] > threshold:
|
|
|
- h, w = get_best_predict_size2(image_np, threshold=threshold)
|
|
|
- log("global image resize " + str(image_np.shape[:2]) + " -> " + str(h) + "," + str(w))
|
|
|
- image_np = pil_resize(image_np, h, w)
|
|
|
-
|
|
|
+ def idc_process(_image_np):
|
|
|
# 图片倾斜校正,写入原来的图片路径
|
|
|
# print("image_process", image_path)
|
|
|
- g_r_i = get_rotated_image(image_np, image_path)
|
|
|
- if judge_error_code(g_r_i):
|
|
|
- if is_from_docx:
|
|
|
- return []
|
|
|
- else:
|
|
|
- return g_r_i
|
|
|
- image_np = cv2.imread(image_path)
|
|
|
- image_np_copy = copy.deepcopy(image_np)
|
|
|
- if image_np is None:
|
|
|
- return []
|
|
|
+ # g_r_i = get_rotated_image(_image_np, image_path)
|
|
|
+ # if judge_error_code(g_r_i):
|
|
|
+ # if is_from_docx:
|
|
|
+ # return []
|
|
|
+ # else:
|
|
|
+ # return g_r_i
|
|
|
+ # _image_np = cv2.imread(image_path)
|
|
|
+ # if _image_np is None:
|
|
|
+ # return []
|
|
|
+ # return _image_np
|
|
|
|
|
|
- # if image_np is None:
|
|
|
+ # if _image_np is None:
|
|
|
# return []
|
|
|
- #
|
|
|
- # # idc模型实现图片倾斜校正
|
|
|
- # image_resize = pil_resize(image_np, 640, 640)
|
|
|
+
|
|
|
+ # idc模型实现图片倾斜校正
|
|
|
+ h, w = get_best_predict_size2(_image_np, 1080)
|
|
|
+ image_resize = pil_resize(_image_np, h, w)
|
|
|
# image_resize_path = image_path.split(".")[0] + "_resize_idc." + image_path.split(".")[-1]
|
|
|
# cv2.imwrite(image_resize_path, image_resize)
|
|
|
- #
|
|
|
+
|
|
|
# with open(image_resize_path, "rb") as f:
|
|
|
# image_bytes = f.read()
|
|
|
- # angle = from_idc_interface(image_bytes)
|
|
|
- # if judge_error_code(angle):
|
|
|
- # if is_from_docx:
|
|
|
- # return []
|
|
|
- # else:
|
|
|
- # return angle
|
|
|
- # # 根据角度旋转
|
|
|
- # image_pil = Image.fromarray(image_np)
|
|
|
- # image_np = np.array(image_pil.rotate(angle, expand=1))
|
|
|
- # # 写入
|
|
|
+ image_bytes = np2bytes(image_resize)
|
|
|
+ angle = from_idc_interface(image_bytes)
|
|
|
+ if judge_error_code(angle):
|
|
|
+ if is_from_docx:
|
|
|
+ return []
|
|
|
+ else:
|
|
|
+ return angle
|
|
|
+ # 根据角度旋转
|
|
|
+ image_pil = Image.fromarray(_image_np)
|
|
|
+ _image_np = np.array(image_pil.rotate(angle, expand=1))
|
|
|
+ # 写入
|
|
|
# idc_path = image_path.split(".")[0] + "_idc." + image_path.split(".")[-1]
|
|
|
# cv2.imwrite(idc_path, image_np)
|
|
|
+ return _image_np
|
|
|
|
|
|
+ def isr_process(_image_np):
|
|
|
+ image_np_copy = copy.deepcopy(_image_np)
|
|
|
# isr模型去除印章
|
|
|
_isr_time = time.time()
|
|
|
- if count_red_pixel(image_np):
|
|
|
+ if count_red_pixel(_image_np):
|
|
|
# 红色像素达到一定值才过模型
|
|
|
with open(image_path, "rb") as f:
|
|
|
image_bytes = f.read()
|
|
|
- image_np = from_isr_interface(image_bytes)
|
|
|
- if judge_error_code(image_np):
|
|
|
+ _image_np = from_isr_interface(image_bytes)
|
|
|
+ if judge_error_code(_image_np):
|
|
|
if is_from_docx:
|
|
|
return []
|
|
|
else:
|
|
|
- return image_np
|
|
|
+ return _image_np
|
|
|
# [1]代表检测不到印章,直接返回
|
|
|
- if isinstance(image_np, list) and image_np == [1]:
|
|
|
+ if isinstance(_image_np, list) and _image_np == [1]:
|
|
|
log("no seals detected!")
|
|
|
- image_np = image_np_copy
|
|
|
+ _image_np = image_np_copy
|
|
|
else:
|
|
|
isr_path = image_path.split(".")[0] + "_isr." + image_path.split(".")[-1]
|
|
|
- cv2.imwrite(isr_path, image_np)
|
|
|
+ cv2.imwrite(isr_path, _image_np)
|
|
|
log("isr total time "+str(time.time()-_isr_time))
|
|
|
+ return _image_np
|
|
|
|
|
|
+ def ocr_process(_image_np):
|
|
|
+ # ocr图片过大内存溢出,需resize
|
|
|
+ start_time = time.time()
|
|
|
+ # 调用ocr模型接口
|
|
|
+ image_bytes = np2bytes(_image_np)
|
|
|
+ text_list, bbox_list = from_ocr_interface(image_bytes, is_table=True)
|
|
|
+ if judge_error_code(text_list):
|
|
|
+ return text_list, text_list
|
|
|
+
|
|
|
+ for i in range(len(bbox_list)):
|
|
|
+ point = bbox_list[i]
|
|
|
+ bbox_list[i] = [[int(point[0][0]), int(point[0][1])],
|
|
|
+ [int(point[1][0]), int(point[1][1])],
|
|
|
+ [int(point[2][0]), int(point[2][1])],
|
|
|
+ [int(point[3][0]), int(point[3][1])]]
|
|
|
+ return text_list, bbox_list
|
|
|
+
|
|
|
+ def otr_process(_image_np):
|
|
|
# otr模型识别表格,需要图片resize成模型所需大小, 写入另一个路径
|
|
|
- best_h, best_w = get_best_predict_size(image_np)
|
|
|
- # image_resize = cv2.resize(image_np, (best_w, best_h), interpolation=cv2.INTER_AREA)
|
|
|
- image_resize = pil_resize(image_np, best_h, best_w)
|
|
|
- image_resize_path = image_path.split(".")[0] + "_resize_otr." + image_path.split(".")[-1]
|
|
|
- cv2.imwrite(image_resize_path, image_resize)
|
|
|
+ best_h, best_w = get_best_predict_size(_image_np)
|
|
|
+ image_resize = pil_resize(_image_np, best_h, best_w)
|
|
|
+ # image_resize_path = image_path.split(".")[0] + "_resize_otr." + image_path.split(".")[-1]
|
|
|
+ # cv2.imwrite(image_resize_path, image_resize)
|
|
|
|
|
|
# 调用otr模型接口
|
|
|
- with open(image_resize_path, "rb") as f:
|
|
|
- image_bytes = f.read()
|
|
|
+ # with open(image_resize_path, "rb") as f:
|
|
|
+ # image_bytes = f.read()
|
|
|
+ image_bytes = np2bytes(image_resize)
|
|
|
list_line = from_otr_interface(image_bytes, is_from_pdf)
|
|
|
if judge_error_code(list_line):
|
|
|
- return list_line
|
|
|
+ if is_from_docx:
|
|
|
+ return []
|
|
|
+ else:
|
|
|
+ return list_line
|
|
|
|
|
|
# otr resize后得到的bbox根据比例还原
|
|
|
start_time = time.time()
|
|
|
- ratio = (image_np.shape[0]/best_h, image_np.shape[1]/best_w)
|
|
|
+ ratio = (_image_np.shape[0]/best_h, _image_np.shape[1]/best_w)
|
|
|
for i in range(len(list_line)):
|
|
|
point = list_line[i]
|
|
|
list_line[i] = [int(point[0]*ratio[1]), int(point[1]*ratio[0]),
|
|
|
int(point[2]*ratio[1]), int(point[3]*ratio[0])]
|
|
|
log("otr resize bbox recover " + str(time.time()-start_time))
|
|
|
+ return list_line
|
|
|
|
|
|
- # ocr图片过大内存溢出,需resize
|
|
|
- start_time = time.time()
|
|
|
- threshold = 3000
|
|
|
- ocr_resize_flag = 0
|
|
|
- if image_np.shape[0] >= threshold or image_np.shape[1] >= threshold:
|
|
|
- ocr_resize_flag = 1
|
|
|
- best_h, best_w = get_best_predict_size2(image_np, threshold)
|
|
|
- # image_resize = cv2.resize(image_np, (best_w, best_h), interpolation=cv2.INTER_AREA)
|
|
|
- image_resize = pil_resize(image_np, best_h, best_w)
|
|
|
- image_resize_path = image_path.split(".")[0] + "_resize_ocr." + image_path.split(".")[-1]
|
|
|
- cv2.imwrite(image_resize_path, image_resize)
|
|
|
- log("ocr resize before " + str(time.time()-start_time))
|
|
|
-
|
|
|
- # 调用ocr模型接口
|
|
|
- with open(image_resize_path, "rb") as f:
|
|
|
- image_bytes = f.read()
|
|
|
- text_list, bbox_list = from_ocr_interface(image_bytes, is_table=True)
|
|
|
- if judge_error_code(text_list):
|
|
|
- return text_list
|
|
|
-
|
|
|
- # ocr resize后的bbox还原
|
|
|
- if ocr_resize_flag:
|
|
|
- ratio = (image_np.shape[0]/best_h, image_np.shape[1]/best_w)
|
|
|
- else:
|
|
|
- ratio = (1, 1)
|
|
|
- for i in range(len(bbox_list)):
|
|
|
- point = bbox_list[i]
|
|
|
- bbox_list[i] = [[int(point[0][0]*ratio[1]), int(point[0][1]*ratio[0])],
|
|
|
- [int(point[1][0]*ratio[1]), int(point[1][1]*ratio[0])],
|
|
|
- [int(point[2][0]*ratio[1]), int(point[2][1]*ratio[0])],
|
|
|
- [int(point[3][0]*ratio[1]), int(point[3][1]*ratio[0])]]
|
|
|
-
|
|
|
+ def table_process(list_line, text_list, bbox_list):
|
|
|
# 调用现成方法形成表格
|
|
|
try:
|
|
|
from format_convert.convert_tree import TableLine
|
|
@@ -229,17 +215,71 @@ def image_process(image_np, image_path, is_from_pdf=False, is_from_docx=False, u
|
|
|
|
|
|
# 合并同一行textbox
|
|
|
list_text_boxes = merge_textbox(list_text_boxes, obj_in_table)
|
|
|
-
|
|
|
- obj_list = []
|
|
|
- for table in tables:
|
|
|
- obj_list.append(_Table(table["table"], table["bbox"]))
|
|
|
- for text_box in list_text_boxes:
|
|
|
- if text_box not in obj_in_table:
|
|
|
- obj_list.append(_Sentence(text_box.get_text(), text_box.bbox))
|
|
|
- return obj_list
|
|
|
+ return list_text_boxes, tables, obj_in_table
|
|
|
except:
|
|
|
traceback.print_exc()
|
|
|
- return [-8]
|
|
|
+ return [-8], [-8], [-8]
|
|
|
+
|
|
|
+ log("into image_preprocess")
|
|
|
+ try:
|
|
|
+ if image_np is None:
|
|
|
+ return []
|
|
|
+
|
|
|
+ # 整体分辨率限制
|
|
|
+ threshold = 2000
|
|
|
+ if image_np.shape[0] > threshold or image_np.shape[1] > threshold:
|
|
|
+ h, w = get_best_predict_size2(image_np, threshold=threshold)
|
|
|
+ log("global image resize " + str(image_np.shape[:2]) + " -> " + str(h) + "," + str(w))
|
|
|
+ image_np = pil_resize(image_np, h, w)
|
|
|
+
|
|
|
+ # 印章去除
|
|
|
+ image_np = isr_process(image_np)
|
|
|
+ if isinstance(image_np, list):
|
|
|
+ return image_np
|
|
|
+
|
|
|
+ # 文字识别
|
|
|
+ text_list, box_list = ocr_process(image_np)
|
|
|
+ if judge_error_code(text_list):
|
|
|
+ return text_list
|
|
|
+
|
|
|
+ # 判断ocr识别是否正确
|
|
|
+ if ocr_cant_read(text_list, box_list):
|
|
|
+ # 方向分类
|
|
|
+ image_np = idc_process(image_np)
|
|
|
+ # cv2.imshow("idc_process", image_np)
|
|
|
+ # cv2.waitKey(0)
|
|
|
+ if isinstance(image_np, list):
|
|
|
+ return image_np
|
|
|
+
|
|
|
+ # 文字识别
|
|
|
+ text_list1, box_list_1 = ocr_process(image_np)
|
|
|
+ if judge_error_code(text_list1):
|
|
|
+ return text_list1
|
|
|
+
|
|
|
+ # 比较字数
|
|
|
+ # print("ocr process", len("".join(text_list)), len("".join(text_list1)))
|
|
|
+ if len("".join(text_list)) < len("".join(text_list1)):
|
|
|
+ text_list = text_list1
|
|
|
+ box_list = box_list_1
|
|
|
+
|
|
|
+ # 表格识别
|
|
|
+ line_list = otr_process(image_np)
|
|
|
+ if judge_error_code(line_list):
|
|
|
+ return line_list
|
|
|
+
|
|
|
+ # 表格生成
|
|
|
+ text_box_list, table_list, obj_in_table_list = table_process(line_list, text_list, box_list)
|
|
|
+ if judge_error_code(table_list):
|
|
|
+ return table_list
|
|
|
+
|
|
|
+ # 对象生成
|
|
|
+ obj_list = []
|
|
|
+ for table in table_list:
|
|
|
+ obj_list.append(_Table(table["table"], table["bbox"]))
|
|
|
+ for text_box in text_box_list:
|
|
|
+ if text_box not in obj_in_table_list:
|
|
|
+ obj_list.append(_Sentence(text_box.get_text(), text_box.bbox))
|
|
|
+ return obj_list
|
|
|
|
|
|
except Exception as e:
|
|
|
log("image_preprocess error")
|