Explorar o código

ocr尺寸修复

fangjiasheng %!s(int64=2) %!d(string=hai) anos
pai
achega
f8494f3c68
Modificáronse 1 ficheiros con 12 adicións e 6 borrados
  1. 12 6
      format_convert/convert_image.py

+ 12 - 6
format_convert/convert_image.py

@@ -125,6 +125,7 @@ def image_process(image_np, image_path, is_from_pdf=False, is_from_docx=False, u
         return _image_np
 
     def isr_process(_image_np):
+        log("isr_process image shape " + str(_image_np.shape))
         image_np_copy = copy.deepcopy(_image_np)
         # isr模型去除印章
         _isr_time = time.time()
@@ -145,13 +146,17 @@ def image_process(image_np, image_path, is_from_pdf=False, is_from_docx=False, u
         return _image_np
 
     def ocr_process(_image_np, _threshold=1024):
-        # ocr图片过大内存溢出,需resize
         log("ocr_process image shape " + str(_image_np.shape))
+
+        # ocr图片过大内存溢出,需resize
         # 大图按比例缩小,小图维持不变;若统一拉伸成固定大小如1024会爆显存
+        ratio = (1, 1)
         if _image_np.shape[0] >= _threshold or _image_np.shape[1] >= _threshold:
             best_h, best_w = get_best_predict_size2(_image_np, 1024)
             _image_np = pil_resize(_image_np, best_h, best_w)
             log("ocr_process image resize " + str(_image_np.shape))
+            ratio = (image_np.shape[0]/best_h, image_np.shape[1]/best_w)
+
         # 调用ocr模型接口
         image_bytes = np2bytes(_image_np)
         text_list, bbox_list = from_ocr_interface(image_bytes, is_table=True)
@@ -160,13 +165,14 @@ def image_process(image_np, image_path, is_from_pdf=False, is_from_docx=False, u
 
         for i in range(len(bbox_list)):
             point = bbox_list[i]
-            bbox_list[i] = [[int(point[0][0]), int(point[0][1])],
-                            [int(point[1][0]), int(point[1][1])],
-                            [int(point[2][0]), int(point[2][1])],
-                            [int(point[3][0]), int(point[3][1])]]
+            bbox_list[i] = [[int(point[0][0]*ratio[0]), int(point[0][1]*ratio[1])],
+                            [int(point[1][0]*ratio[0]), int(point[1][1]*ratio[1])],
+                            [int(point[2][0]*ratio[0]), int(point[2][1]*ratio[1])],
+                            [int(point[3][0]*ratio[0]), int(point[3][1]*ratio[1])]]
         return text_list, bbox_list
 
     def otr_process(_image_np):
+        log("otr_process image shape " + str(_image_np.shape))
         # otr模型识别表格,需要图片resize成模型所需大小, 写入另一个路径
         best_h, best_w = get_best_predict_size(_image_np)
         image_resize = pil_resize(_image_np, best_h, best_w)
@@ -867,4 +873,4 @@ def image_process_old(image_np, image_path, is_from_pdf=False, is_from_docx=Fals
 
 
 if __name__ == "__main__":
-    image_slice_new(cv2.imread("C:/Users/Administrator/Desktop/test_image/error23.png"))
+    image_slice_new(cv2.imread("C:/Users/Administrator/Desktop/test_image/1653566873838.png"))