|
@@ -831,8 +831,6 @@ class PDFConvert:
|
|
|
# image_count = 1
|
|
|
# else:
|
|
|
layout = self.get_layout(page, page_no)
|
|
|
- self.width = layout.width
|
|
|
- self.height = layout.height
|
|
|
if self._doc.error_code is not None:
|
|
|
return
|
|
|
if judge_error_code(layout):
|
|
@@ -912,7 +910,7 @@ class PDFConvert:
|
|
|
# 查看提取的图片高宽,太大则用pdf输出图进行ocr识别
|
|
|
img_test = Image.open(io.BytesIO(image_stream))
|
|
|
# img_test.show()
|
|
|
- if image.height >= self.height-100 and image.width >= self.width-100:
|
|
|
+ if image.height >= 1000 and image.width >= 1000:
|
|
|
print("pdf2text LTImage stream output size", img_test.size)
|
|
|
page_image = self.get_page_image(page_no)
|
|
|
if judge_error_code(page_image):
|