fangjiasheng
/
FORMAT_CONVERSION_MAXCOMPUTE


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148
							#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Jun 21 10:53:51 2022
pre_process.py
@author: fangjiasheng
"""
import json
import base64
import random
import traceback
from glob import glob

import numpy as np
import six
import cv2
from PIL import Image
import fitz
Image.MAX_IMAGE_PIXELS = 2300000000


def get_img_label(img_np, size, cls_num=4):
    height, width = size
    img_pil = Image.fromarray(cv2.cvtColor(img_np, cv2.COLOR_BGR2RGB))

    # 图片缩放
    img_pil = img_pil.resize((int(width), int(height)), Image.BICUBIC)

    # 生成旋转后的图片及其角度
    img_label_list = [[np.array(img_pil), 0]]
    # 图片旋转
    angle_first = int(360/cls_num)
    i = 1
    for angle in range(angle_first, 360, angle_first):
        img_label_list.append([np.array(img_pil.rotate(angle, expand=1)), i])
        i += 1

    # for _img, _label in img_label_list:
    #     cv2.imshow("img", _img)
    #     cv2.waitKey(0)
    return img_label_list


def gen(paths, batch_size=2, shape=(640, 640), cls_num=4, is_test=False):
    num = len(paths)

    i = 0
    while True:
        height, width = shape
        if is_test:
            X = np.zeros((batch_size, height, width, 3))
            Y = np.zeros((batch_size, cls_num))
        else:
            X = np.zeros((batch_size * cls_num, height, width, 3))
            Y = np.zeros((batch_size * cls_num, cls_num))
        img_np_list = []

        for j in range(batch_size):
            if i >= num:
                i = 0
                np.random.shuffle(paths)
            p = paths[i]
            i += 1

            # limit pixels 89478485
            img_np = cv2.imread(p)
            if img_np.shape[0] * img_np.shape[1] * img_np.shape[2] >= 89478485:
                # print("image too large, limit 89478485 pixels", img_np.shape)
                new_i = random.randint(0, num-1)
                if i != new_i:
                    p = paths[new_i]

            img_label_list = get_img_label(img_np, size=(height, width), cls_num=cls_num)
            random.shuffle(img_label_list)
            if is_test:
                img_label_list = random.sample(img_label_list, 1)

            for c in range(cls_num):
                if c >= len(img_label_list):
                    break

                img = img_label_list[c][0]
                img_np_list.append(img)

                # 模糊
                if_blur = random.choice([0, 1])
                # print(if_blur, img_label_list[c][1])
                if if_blur:
                    # 高斯模糊
                    sigmaX = random.randint(1, 2)
                    sigmaY = random.randint(1, 2)
                    img = cv2.GaussianBlur(img, (5, 5), sigmaX, sigmaY)

                # cv2.imshow("gen", img)
                # cv2.waitKey(0)
                # print("gen image size", img.shape)

                # label
                label_list = [0]*cls_num
                label_list[img_label_list[c][1]] = 1
                label = np.array(label_list)

                # print(p, img_label_list[c][1])

                X[j+c] = img
                Y[j+c] = label
        # print("X.shape", X.shape)
        if is_test:
            yield X, Y, img_np_list
        else:
            yield X, Y


def get_image_from_pdf():
    paths = glob("C:/Users/Administrator/Desktop/test_pdf/*")
    save_dir = "D:/Project/image_direction_classification/data/1/"

    i = 0
    for path in paths:
        try:
            doc = fitz.open(path)
            output_image_dict = {}
            page_count = doc.page_count
            for page_no in range(page_count):
                try:
                    page = doc.loadPage(page_no)
                    output = save_dir + "pdf_" + str(i) + ".png"
                    i += 1
                    rotate = int(0)
                    # 每个尺寸的缩放系数为1.3，这将为我们生成分辨率提高2.6的图像。
                    # 此处若是不做设置，默认图片大小为：792X612, dpi=96
                    # (1.33333333 --> 1056x816)   (2 --> 1584x1224)
                    # (1.183, 2.28 --> 1920x1080)
                    zoom_x = 1.3
                    zoom_y = 1.3
                    mat = fitz.Matrix(zoom_x, zoom_y).preRotate(rotate)
                    pix = page.getPixmap(matrix=mat, alpha=False)
                    pix.writePNG(output)
                except:
                    continue
        except Exception as e:
            print("pdf2Image", traceback.print_exc())
            continue


if __name__ == '__main__':
    get_img_label("data/0/7248_fe52d616989e19e6967e0461ef19b149.jpg", (640, 640))
    # get_image_from_pdf()