#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Tue Jun 21 10:53:51 2022 pre_process.py @author: fangjiasheng """ import json import base64 import random import traceback from glob import glob import numpy as np import six import cv2 from PIL import Image import fitz Image.MAX_IMAGE_PIXELS = 2300000000 def get_img_label(img_np, size, cls_num=4): height, width = size img_pil = Image.fromarray(cv2.cvtColor(img_np, cv2.COLOR_BGR2RGB)) # 图片缩放 img_pil = img_pil.resize((int(width), int(height)), Image.BICUBIC) # 生成旋转后的图片及其角度 img_label_list = [[np.array(img_pil), 0]] # 图片旋转 angle_first = int(360/cls_num) i = 1 for angle in range(angle_first, 360, angle_first): img_label_list.append([np.array(img_pil.rotate(angle, expand=1)), i]) i += 1 # for _img, _label in img_label_list: # cv2.imshow("img", _img) # cv2.waitKey(0) return img_label_list def gen(paths, batch_size=2, shape=(640, 640), cls_num=4, is_test=False): num = len(paths) i = 0 while True: height, width = shape if is_test: X = np.zeros((batch_size, height, width, 3)) Y = np.zeros((batch_size, cls_num)) else: X = np.zeros((batch_size * cls_num, height, width, 3)) Y = np.zeros((batch_size * cls_num, cls_num)) img_np_list = [] for j in range(batch_size): if i >= num: i = 0 np.random.shuffle(paths) p = paths[i] i += 1 # limit pixels 89478485 img_np = cv2.imread(p) if img_np.shape[0] * img_np.shape[1] * img_np.shape[2] >= 89478485: # print("image too large, limit 89478485 pixels", img_np.shape) new_i = random.randint(0, num-1) if i != new_i: p = paths[new_i] img_label_list = get_img_label(img_np, size=(height, width), cls_num=cls_num) random.shuffle(img_label_list) if is_test: img_label_list = random.sample(img_label_list, 1) for c in range(cls_num): if c >= len(img_label_list): break img = img_label_list[c][0] img_np_list.append(img) # 模糊 if_blur = random.choice([0, 1]) # print(if_blur, img_label_list[c][1]) if if_blur: # 高斯模糊 sigmaX = random.randint(1, 2) sigmaY = random.randint(1, 2) img = cv2.GaussianBlur(img, (5, 5), sigmaX, sigmaY) # cv2.imshow("gen", img) # cv2.waitKey(0) # print("gen image size", img.shape) # label label_list = [0]*cls_num label_list[img_label_list[c][1]] = 1 label = np.array(label_list) # print(p, img_label_list[c][1]) X[j+c] = img Y[j+c] = label # print("X.shape", X.shape) if is_test: yield X, Y, img_np_list else: yield X, Y def get_image_from_pdf(): paths = glob("C:/Users/Administrator/Desktop/test_pdf/*") save_dir = "D:/Project/image_direction_classification/data/1/" i = 0 for path in paths: try: doc = fitz.open(path) output_image_dict = {} page_count = doc.page_count for page_no in range(page_count): try: page = doc.loadPage(page_no) output = save_dir + "pdf_" + str(i) + ".png" i += 1 rotate = int(0) # 每个尺寸的缩放系数为1.3,这将为我们生成分辨率提高2.6的图像。 # 此处若是不做设置,默认图片大小为:792X612, dpi=96 # (1.33333333 --> 1056x816) (2 --> 1584x1224) # (1.183, 2.28 --> 1920x1080) zoom_x = 1.3 zoom_y = 1.3 mat = fitz.Matrix(zoom_x, zoom_y).preRotate(rotate) pix = page.getPixmap(matrix=mat, alpha=False) pix.writePNG(output) except: continue except Exception as e: print("pdf2Image", traceback.print_exc()) continue if __name__ == '__main__': get_img_label("data/0/7248_fe52d616989e19e6967e0461ef19b149.jpg", (640, 640)) # get_image_from_pdf()