Эх сурвалжийг харах

降低图片输入分辨率

fangjiasheng 2 жил өмнө
parent
commit
20031a3c5e

+ 12 - 5
format_convert/convert_image.py

@@ -86,6 +86,12 @@ def image_process(image_np, image_path, is_from_pdf=False, is_from_docx=False, u
 
 
     log("into image_preprocess")
     log("into image_preprocess")
     try:
     try:
+        # 整体分辨率限制
+        if image_np.shape[0] > 2000 or image_np.shape[1] > 2000:
+            h, w = get_best_predict_size2(image_np, threshold=2000)
+            log("global image resize " + str(image_np.shape[:2]) + " -> " + str(h) + "," + str(w))
+            image_np = pil_resize(image_np, h, w)
+
         # 图片倾斜校正,写入原来的图片路径
         # 图片倾斜校正,写入原来的图片路径
         # print("image_process", image_path)
         # print("image_process", image_path)
         g_r_i = get_rotated_image(image_np, image_path)
         g_r_i = get_rotated_image(image_np, image_path)
@@ -217,7 +223,9 @@ def image_process(image_np, image_path, is_from_pdf=False, is_from_docx=False, u
         # ocr图片过大内存溢出,需resize
         # ocr图片过大内存溢出,需resize
         start_time = time.time()
         start_time = time.time()
         threshold = 3000
         threshold = 3000
+        ocr_resize_flag = 0
         if image_np.shape[0] >= threshold or image_np.shape[1] >= threshold:
         if image_np.shape[0] >= threshold or image_np.shape[1] >= threshold:
+            ocr_resize_flag = 1
             best_h, best_w = get_best_predict_size2(image_np, threshold)
             best_h, best_w = get_best_predict_size2(image_np, threshold)
             # image_resize = cv2.resize(image_np, (best_w, best_h), interpolation=cv2.INTER_AREA)
             # image_resize = cv2.resize(image_np, (best_w, best_h), interpolation=cv2.INTER_AREA)
             image_resize = pil_resize(image_np, best_h, best_w)
             image_resize = pil_resize(image_np, best_h, best_w)
@@ -246,7 +254,10 @@ def image_process(image_np, image_path, is_from_pdf=False, is_from_docx=False, u
         #     return []
         #     return []
 
 
         # ocr resize后的bbox还原
         # ocr resize后的bbox还原
-        ratio = (image_np.shape[0]/best_h, image_np.shape[1]/best_w)
+        if ocr_resize_flag:
+            ratio = (image_np.shape[0]/best_h, image_np.shape[1]/best_w)
+        else:
+            ratio = (1, 1)
         for i in range(len(bbox_list)):
         for i in range(len(bbox_list)):
             point = bbox_list[i]
             point = bbox_list[i]
             bbox_list[i] = [[int(point[0][0]*ratio[1]), int(point[0][1]*ratio[0])],
             bbox_list[i] = [[int(point[0][0]*ratio[1]), int(point[0][1]*ratio[0])],
@@ -254,10 +265,6 @@ def image_process(image_np, image_path, is_from_pdf=False, is_from_docx=False, u
                             [int(point[2][0]*ratio[1]), int(point[2][1]*ratio[0])],
                             [int(point[2][0]*ratio[1]), int(point[2][1]*ratio[0])],
                             [int(point[3][0]*ratio[1]), int(point[3][1]*ratio[0])]]
                             [int(point[3][0]*ratio[1]), int(point[3][1]*ratio[0])]]
 
 
-        # for _a,_b in zip(text_list,bbox_list):
-        #     print("bbox1",_a,_b)
-
-
         # 调用现成方法形成表格
         # 调用现成方法形成表格
         try:
         try:
             from format_convert.convert_tree import TableLine
             from format_convert.convert_tree import TableLine