Эх сурвалжийг харах

Merge remote-tracking branch 'origin/master'

fangjiasheng 2 жил өмнө
parent
commit
939a261fd7

+ 1 - 3
format_convert/convert_pdf.py

@@ -831,8 +831,6 @@ class PDFConvert:
         #     image_count = 1
         # else:
         layout = self.get_layout(page, page_no)
-        self.width = layout.width
-        self.height = layout.height
         if self._doc.error_code is not None:
             return
         if judge_error_code(layout):
@@ -912,7 +910,7 @@ class PDFConvert:
                     # 查看提取的图片高宽,太大则用pdf输出图进行ocr识别
                     img_test = Image.open(io.BytesIO(image_stream))
                     # img_test.show()
-                    if image.height >= self.height-100 and image.width >= self.width-100:
+                    if image.height >= 1000 and image.width >= 1000:
                         print("pdf2text LTImage stream output size", img_test.size)
                         page_image = self.get_page_image(page_no)
                         if judge_error_code(page_image):

+ 0 - 2
format_convert/utils.py

@@ -1084,8 +1084,6 @@ class LineTable:
             #check y
             if len(_line)>0:
                 _bbox = _line[0].get("bbox")
-                print(1,_bbox)
-                print(2,bbox)
                 if abs(min(_bbox[1],_bbox[3])-min(bbox[1],bbox[3]))>margin or abs(max(_bbox[1],_bbox[3])-max(bbox[1],bbox[3]))>margin:
                     print("check position y false")
                     return False