fangjiasheng
/
FORMAT_CONVERSION_MAXCOMPUTE


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133
							import base64
import json
import os
import re
from glob import glob
import PyPDF2
import cv2
import fitz
import six
from PIL import Image
from PyPDF2 import PdfFileMerger

from format_convert.convert_pdf import PDFConvert
from format_convert.utils import pil2np
from isr.isr_interface import isr, IsrModels, remove_seal
from isr.post_process import get_seal_part, replace_seal_part

file_path = r'D:\BIDI_DOC\比地_文档\方案.pdf'
output_path = '../format_convert/temp/a1/'
output_pdf_path = r'D:\BIDI_DOC\比地_文档\方案_去印章.pdf'


def get_pdf_image():
    obj = PDFConvert(file_path, output_path)
    obj.convert(limit_page_cnt=1000)


def use_isr():
    isr_yolo_model, isr_model = IsrModels().get_model()

    paths = glob(output_path + '*')
    for p in paths:
        print('p', p)
        with open(p, 'rb') as f:
            img_bytes = f.read()

        img_base64 = base64.b64encode(img_bytes)
        result = isr(img_base64, isr_yolo_model, isr_model)
        img_new = result.get('image')
        if isinstance(img_new, list):
            img_new = cv2.imread(p)

        name = p.split(os.sep)[-1]
        new_p = output_path + 'new_' + name
        print('new_p', new_p)
        cv2.imwrite(new_p, img_new)


def base64_to_pil(string):
    try:
        # my own train data
        string = bytes(string, 'utf-8')
        base64_data = base64.b64decode(string)
        # with open('temp.jpg', 'wb') as f:
        #     f.write(base64_data)
        # print("base64_to_PIL")
        buf = six.BytesIO()
        buf.write(base64_data)
        buf.seek(0)
        img = Image.open(buf).convert('RGB')
        return img
    except Exception as e:
        print(e)
        return None


def manual_detect_re_isr():
    isr_yolo_model, isr_model = IsrModels().get_model()

    paths = glob(output_path + '*.json')
    for p in paths:
        print(p)
        with open(p) as f:
            json_data = json.loads(f.read())
        shapes = json_data.get('shapes')
        image_data = json_data.get('imageData')
        image_pil = base64_to_pil(image_data)
        image_np = pil2np(image_pil)
        image_path = output_path + json_data.get('imagePath')
        lines = []
        boxes = []
        for shape in shapes:
            lines.append(shape['points'])
            [x0, y0], [x1, y1] = shape['points']
            box = [[int(x0), int(y0)], [int(x1), int(y1)]]
            # label = shape['label']
            boxes.append(box)

        part_list = get_seal_part(image_np, boxes, [])
        new_part_list = []
        for part in part_list:
            part_remove = remove_seal(part, isr_model)
            new_part_list.append(part_remove)
        img_replace = replace_seal_part(image_np, new_part_list, boxes)
        cv2.imwrite(image_path, img_replace)


def image_to_pdf():
    paths = glob(output_path + 'new_*.png')
    image_index_list = []
    for p in paths:
        print(p)
        name = p.split(os.sep)[-1]
        index = int(re.search('\d+', name).group())
        image_index_list.append([p, index])

        # 去除顶部logo
        image_np = cv2.imread(p)
        image_np[:145, :, :] = 255
        cv2.imwrite(p, image_np)
    image_index_list.sort(key=lambda x: x[1])

    pdf_doc = fitz.open()

    for p, index in image_index_list:
        print('index', index)
        # 用fitz读图片
        img = fitz.open(p)
        # 将图片转换为图片pdf
        img_pdf_page = img.convert_to_pdf()
        # 用fitz读图片pdf
        img_pdf_page = fitz.open("pdf", img_pdf_page)
        # 将图片pdf插入到主pdf中
        pdf_doc.insert_pdf(img_pdf_page, from_page=0, to_page=len(img_pdf_page) - 1)
        img.close()

    # 将结果写入新的PDF文件
    pdf_doc.save(output_pdf_path)
    pdf_doc.close()


if __name__ == '__main__':
    image_to_pdf()