123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148 |
- #!/usr/bin/env python3
- # -*- coding: utf-8 -*-
- """
- Created on Tue Jun 21 10:53:51 2022
- pre_process.py
- @author: fangjiasheng
- """
- import json
- import base64
- import random
- import traceback
- from glob import glob
- import numpy as np
- import six
- import cv2
- from PIL import Image
- import fitz
- Image.MAX_IMAGE_PIXELS = 2300000000
- def get_img_label(img_np, size, cls_num=4):
- height, width = size
- img_pil = Image.fromarray(cv2.cvtColor(img_np, cv2.COLOR_BGR2RGB))
- # 图片缩放
- img_pil = img_pil.resize((int(width), int(height)), Image.BICUBIC)
- # 生成旋转后的图片及其角度
- img_label_list = [[np.array(img_pil), 0]]
- # 图片旋转
- angle_first = int(360/cls_num)
- i = 1
- for angle in range(angle_first, 360, angle_first):
- img_label_list.append([np.array(img_pil.rotate(angle, expand=1)), i])
- i += 1
- # for _img, _label in img_label_list:
- # cv2.imshow("img", _img)
- # cv2.waitKey(0)
- return img_label_list
- def gen(paths, batch_size=2, shape=(640, 640), cls_num=4, is_test=False):
- num = len(paths)
- i = 0
- while True:
- height, width = shape
- if is_test:
- X = np.zeros((batch_size, height, width, 3))
- Y = np.zeros((batch_size, cls_num))
- else:
- X = np.zeros((batch_size * cls_num, height, width, 3))
- Y = np.zeros((batch_size * cls_num, cls_num))
- img_np_list = []
- for j in range(batch_size):
- if i >= num:
- i = 0
- np.random.shuffle(paths)
- p = paths[i]
- i += 1
- # limit pixels 89478485
- img_np = cv2.imread(p)
- if img_np.shape[0] * img_np.shape[1] * img_np.shape[2] >= 89478485:
- # print("image too large, limit 89478485 pixels", img_np.shape)
- new_i = random.randint(0, num-1)
- if i != new_i:
- p = paths[new_i]
- img_label_list = get_img_label(img_np, size=(height, width), cls_num=cls_num)
- random.shuffle(img_label_list)
- if is_test:
- img_label_list = random.sample(img_label_list, 1)
- for c in range(cls_num):
- if c >= len(img_label_list):
- break
- img = img_label_list[c][0]
- img_np_list.append(img)
- # 模糊
- if_blur = random.choice([0, 1])
- # print(if_blur, img_label_list[c][1])
- if if_blur:
- # 高斯模糊
- sigmaX = random.randint(1, 2)
- sigmaY = random.randint(1, 2)
- img = cv2.GaussianBlur(img, (5, 5), sigmaX, sigmaY)
- # cv2.imshow("gen", img)
- # cv2.waitKey(0)
- # print("gen image size", img.shape)
- # label
- label_list = [0]*cls_num
- label_list[img_label_list[c][1]] = 1
- label = np.array(label_list)
- # print(p, img_label_list[c][1])
- X[j+c] = img
- Y[j+c] = label
- # print("X.shape", X.shape)
- if is_test:
- yield X, Y, img_np_list
- else:
- yield X, Y
- def get_image_from_pdf():
- paths = glob("C:/Users/Administrator/Desktop/test_pdf/*")
- save_dir = "D:/Project/image_direction_classification/data/1/"
- i = 0
- for path in paths:
- try:
- doc = fitz.open(path)
- output_image_dict = {}
- page_count = doc.page_count
- for page_no in range(page_count):
- try:
- page = doc.loadPage(page_no)
- output = save_dir + "pdf_" + str(i) + ".png"
- i += 1
- rotate = int(0)
- # 每个尺寸的缩放系数为1.3,这将为我们生成分辨率提高2.6的图像。
- # 此处若是不做设置,默认图片大小为:792X612, dpi=96
- # (1.33333333 --> 1056x816) (2 --> 1584x1224)
- # (1.183, 2.28 --> 1920x1080)
- zoom_x = 1.3
- zoom_y = 1.3
- mat = fitz.Matrix(zoom_x, zoom_y).preRotate(rotate)
- pix = page.getPixmap(matrix=mat, alpha=False)
- pix.writePNG(output)
- except:
- continue
- except Exception as e:
- print("pdf2Image", traceback.print_exc())
- continue
- if __name__ == '__main__':
- get_img_label("data/0/7248_fe52d616989e19e6967e0461ef19b149.jpg", (640, 640))
- # get_image_from_pdf()
|