#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Thu Sep 9 23:11:51 2020 image @author: chineseocr """ import json import base64 import random import numpy as np import six import cv2 from PIL import Image from numpy import cos,sin,pi from matplotlib.colors import rgb_to_hsv, hsv_to_rgb def plot_lines(img,lines,linetype=2): tmp = np.copy(img) for line in lines: p1,p2 = line cv2.line(tmp,(int(p1[0]),int(p1[1])),(int(p2[0]),int(p2[1])),(0,0,0),linetype,lineType=cv2.LINE_AA) return Image.fromarray(tmp) def base64_to_PIL(string): try: # my own train data string = bytes(string, 'utf-8') base64_data = base64.b64decode(string) # with open('temp.jpg', 'wb') as f: # f.write(base64_data) # print("base64_to_PIL") buf = six.BytesIO() buf.write(base64_data) buf.seek(0) img = Image.open(buf).convert('RGB') return img except Exception as e: print(e) return None def read_json(p): with open(p) as f: jsonData = json.loads(f.read()) shapes = jsonData.get('shapes') imageData = jsonData.get('imageData') lines = [] labels = [] for shape in shapes: lines.append(shape['points']) [x0, y0], [x1, y1] = shape['points'] label = shape['label'] if label == '0': if abs(y1-y0) > 500: label = '1' # else: # print("image read_json y<50", x0, y0, x1, y1, label) elif label == '1': if abs(x1-x0) > 500: label = '0' # else: # print("image read_json x<50", x0, y0, x1, y1, label) labels.append(label) img = base64_to_PIL(imageData) return img, lines, labels def rotate(x, y, angle, cx, cy): """ 点(x,y) 绕(cx,cy)点旋转 """ angle = angle*pi/180 x_new = (x-cx)*cos(angle) - (y-cy)*sin(angle)+cx y_new = (x-cx)*sin(angle) + (y-cy)*cos(angle)+cy return x_new, y_new def box_rotate(box, angle=0, imgH=0, imgW=0): """ 对坐标进行旋转 逆时针方向 0\90\180\270, """ x1, y1, x2, y2, x3, y3, x4, y4 = box[:8] if angle == 90: x1_, y1_ = y2, imgW-x2 x2_, y2_ = y3, imgW-x3 x3_, y3_ = y4, imgW-x4 x4_, y4_ = y1, imgW-x1 elif angle == 180: x1_, y1_ = imgW-x3, imgH-y3 x2_, y2_ = imgW-x4, imgH-y4 x3_, y3_ = imgW-x1, imgH-y1 x4_, y4_ = imgW-x2, imgH-y2 elif angle == 270: x1_, y1_ = imgH-y4, x4 x2_, y2_ = imgH-y1, x1 x3_, y3_ = imgH-y2, x2 x4_, y4_ = imgH-y3, x3 else: x1_, y1_, x2_, y2_, x3_, y3_, x4_, y4_ = x1, y1, x2, y2, x3, y3, x4, y4 return (x1_, y1_, x2_, y2_, x3_, y3_, x4_, y4_) def angle_transpose(p, angle, w, h): x, y = p if angle == 90: x, y = y, w-x elif angle == 180: x, y = w-x, h-y elif angle == 270: x, y = h-y, x return x, y def img_argument(img, lines, labels, size=(512, 512)): w, h = img.size # 80%几率进行旋转-5~5度 if np.random.randint(0, 100) > 80: degree = np.random.uniform(-5, 5) else: degree = 0 # degree = np.random.uniform(-5,5) # 旋转线条 newlines = [] for line in lines: p1, p2 = line p1 = rotate(p1[0], p1[1], degree, w/2, h/2) p2 = rotate(p2[0], p2[1], degree, w/2, h/2) newlines.append([p1, p2]) # img = img.rotate(-degree,center=(w/2,h/2),resample=Image.BILINEAR,fillcolor=(128,128,128)) # 旋转图片 img = img.rotate(-degree, center=(w/2, h/2), resample=Image.BILINEAR) # 随机选择90度倍数旋转 angle = np.random.choice([0, 90, 180, 270], 1)[0] newlables = [] # 旋转线条 for i in range(len(newlines)): p1, p2 = newlines[i] p1 = angle_transpose(p1, angle, w, h) p2 = angle_transpose(p2, angle, w, h) newlines[i] = [p1, p2] # 旋转角度为90,270度时,横竖线的Label互换 if angle in [90, 270]: if labels[i] == '0': newlables.append('1') else: newlables.append('0') else: newlables.append(labels[i]) # 旋转图片 if angle == 90: img = img.transpose(Image.ROTATE_90) elif angle == 180: img = img.transpose(Image.ROTATE_180) elif angle == 270: img = img.transpose(Image.ROTATE_270) return img, newlines, newlables def fill_lines(img, lines, linetype=2): tmp = np.copy(img) for line in lines: p1, p2 = line cv2.line(tmp, (int(p1[0]), int(p1[1])), (int(p2[0]), int(p2[1])), 255, linetype, lineType=cv2.LINE_AA) return tmp def get_img_label_origin(p,size,linetype=1): img,lines,labels = read_json(p) img,lines=img_resize_origin(img,lines,target_size=512,max_size=1024) img,lines,labels =img_argument(img,lines,labels,size) img,lines,labels=get_random_data(img,lines,labels, size=size) lines = np.array(lines) labels = np.array(labels) labelImg0 = np.zeros(size[::-1],dtype='uint8') labelImg1 = np.zeros(size[::-1],dtype='uint8') ind = np.where(labels=='0')[0] labelImg0 = fill_lines(labelImg0,lines[ind],linetype=linetype) ind = np.where(labels=='1')[0] labelImg1 = fill_lines(labelImg1,lines[ind],linetype=linetype) labelY = np.zeros((size[1],size[0],2),dtype='uint8') labelY[:,:,0] = labelImg0 labelY[:,:,1] = labelImg1 cv2.imshow("get_img_label", labelImg1) cv2.waitKey(0) cv2.imshow("get_img_label", labelImg0) cv2.waitKey(0) labelY = labelY>0 return np.array(img),lines,labelY def gen_origin(paths,batchsize=2,linetype=2): num =len(paths) i=0 while True: #sizes = [512,512,512,512,640,1024] ##多尺度训练 #size = np.random.choice(sizes,1)[0] size = 640 X = np.zeros((batchsize,size,size,3)) Y = np.zeros((batchsize,size,size,2)) for j in range(batchsize): if i>=num: i=0 np.random.shuffle(paths) p = paths[i] i+=1 #linetype=2 img,lines,labelImg=get_img_label_origin(p,size=(size,size),linetype=linetype) cv2.imshow("gen", img) cv2.waitKey(0) print("gen image size", img.shape) X[j] = img Y[j] = labelImg yield X,Y def get_img_label(p, size, linetype=1): # 读取json格式数据 img, lines, labels = read_json(p) # if img.size[1] > 1200: # return np.array([]), np.array([]), np.array([]), np.array([]) height, width = size # width, height = img.size # size = np.array(img).shape[:-1] # print("get_img_label origin image_PIL size", img.size) # 图片缩放 img, lines = img_resize_by_padding_crop(img, lines, (height, width)) # img, lines = img_resize(img, lines, target_size=size, max_size=1024) # print("get_img_label train image_PIL size", img.size) # print("get_img_label train image_np shape", np.array(img).shape) # 图片增强(各种角度旋转) # img, lines, labels = img_argument(img, lines, labels, size) # print("shape3", np.array(img).shape) # 图片轻微缩放 + 图片失真 # img, lines, labels = get_random_data(img, lines, labels, size=size) lines = np.array(lines) labels = np.array(labels) # size (640, 640) 将size的两个值倒过来 # labelImg0 = np.zeros(size[::-1], dtype='uint8') # labelImg1 = np.zeros(size[::-1], dtype='uint8') labelImg0 = np.zeros((height, width), dtype='uint8') labelImg1 = np.zeros((height, width), dtype='uint8') # print("get_img_label np zero shape", labelImg0.shape, (height, width)) # 在空图片上画线 ind = np.where(labels == '0')[0] labelImg0 = fill_lines(labelImg0, lines[ind], linetype=linetype) ind = np.where(labels == '1')[0] labelImg1 = fill_lines(labelImg1, lines[ind], linetype=linetype) # print("label image shape", labelImg0.shape, labelImg1.shape) # 将只有横竖线的图片堆叠 labelY = np.zeros((height, width, 2), dtype='uint8') labelY[:, :, 0] = labelImg0 labelY[:, :, 1] = labelImg1 # cv2.imshow("get_img_label", labelImg1) # cv2.waitKey(0) # cv2.imshow("get_img_label1", labelImg0) # cv2.waitKey(0) # print("get_img_label label col size", labelImg1.shape) # print("get_img_label label row size", labelImg0.shape) # 对每个像素点进行二分类 labelY = labelY > 0 return np.array(img), lines, labelY, (height, width) def get_img_label1(p, linetype=1): # 读取json格式数据 img, lines, labels = read_json(p) width, height = img.size lines = np.array(lines) labels = np.array(labels) labelImg0 = np.zeros((height, width), dtype='uint8') labelImg1 = np.zeros((height, width), dtype='uint8') # 在空图片上画线 ind = np.where(labels == '0')[0] labelImg0 = fill_lines(labelImg0, lines[ind], linetype=linetype) ind = np.where(labels == '1')[0] labelImg1 = fill_lines(labelImg1, lines[ind], linetype=linetype) # 将只有横竖线的图片堆叠 labelY = np.zeros((height, width, 2), dtype='uint8') labelY[:, :, 0] = labelImg0 labelY[:, :, 1] = labelImg1 cv2.imshow("get_img_label", labelImg1) cv2.waitKey(0) cv2.imshow("get_img_label", labelImg0) cv2.waitKey(0) # 对每个像素点进行二分类 labelY = labelY > 0 return np.array(img), lines, labelY, (height, width) def rand(a=0., b=1.): # rand(): [0-1) return np.random.rand()*(b-a) + a def get_random_data(image, lines, labels, size=(1024, 1024), jitter=.3, hue=.1, sat=1.5, val=1.5): """ random preprocessing for real-time data augmentation """ iw, ih = image.size # resize image w, h = size new_ar = w/h * rand(1-jitter, 1+jitter) / rand(1-jitter, 1+jitter) # scale = rand(.2, 2) scale = rand(0.2, 3) if new_ar < 1: nh = int(scale*h) nw = int(nh*new_ar) else: nw = int(scale*w) nh = int(nw/new_ar) image = image.resize((nw, nh), Image.BICUBIC) # 将原图粘贴到另一张空白图上 dx = int(rand(0, w-nw)) dy = int(rand(0, h-nh)) new_image = Image.new('RGB', (w, h), (128, 128, 128)) new_image.paste(image, (dx, dy)) image = new_image # 图片失真 hue = rand(-hue, hue) sat = rand(1, sat) if rand() < .5 else 1/rand(1, sat) val = rand(1, val) if rand() < .5 else 1/rand(1, val) x = rgb_to_hsv(np.array(image)/255.) x[..., 0] += hue x[..., 0][x[..., 0] > 1] -= 1 x[..., 0][x[..., 0] < 0] += 1 x[..., 1] *= sat x[..., 2] *= val x[x > 1] = 1 x[x < 0] = 0 # numpy array, 0 to 1 image_data = hsv_to_rgb(x) N = len(lines) for i in range(N): p1, p2 = lines[i] p1 = p1[0]*nw/iw+dx, p1[1]*nh/ih + dy p2 = p2[0]*nw/iw+dx, p2[1]*nh/ih + dy lines[i] = [p1, p2] return image_data, lines, labels def gen2(paths, batchsize=2, linetype=2): num = len(paths) i = 0 while True: # 多尺度训练 sizes = [1152, 1024, 1280, 896, 768, 640, 1024] # size = np.random.choice(sizes, 1)[0] # height = np.random.choice(sizes, 1)[0] # width = int(height/1.4) heights = [1024, 896, 768, 640] widths = [768, 640, 512, 384] height = np.random.choice(heights, 1)[0] width = np.random.choice(widths, 1)[0] # height = 1024 # width = 768 # size = (1024, 512) X = np.zeros((batchsize, height, width, 3)) Y = np.zeros((batchsize, height, width, 2)) for j in range(batchsize): if i >= num: i = 0 np.random.shuffle(paths) p = paths[i] i += 1 # linetype=2 print("gen input size", (width, height)) img, lines, labelImg = get_img_label(p, size=(width, height), linetype=linetype) # 高斯模糊 # sigmaX = random.randint(1, 10) # sigmaY = random.randint(1, 10) # img = cv2.GaussianBlur(img, (5, 5), sigmaX, sigmaY) cv2.imshow("gen", img) cv2.waitKey(0) print("gen image size", img.shape) X[j] = img Y[j] = labelImg yield X, Y def gen(paths, batchsize=2, linetype=2): num = len(paths) i = 0 while True: # 多尺度训练 sizes = [1152, 1024, 1280, 896, 768, 640, 1024] # size = np.random.choice(sizes, 1)[0] # height = np.random.choice(sizes, 1)[0] # width = int(height/1.4) # heights = [1024, 896, 768, 640, 512, 384, 256, 128] # widths = [1024, 896, 768, 640, 512, 384, 256, 128] heights = [3008, 2944, 2880, 2816, 2752, 2688, 2624, 2560, 2496, 2432, 2368, 2304, 2240, 2176, 2112, 2048, 1984, 1920, 1856, 1792, 1728, 1664, 1600, 1536, 1472, 1408, 1344, 1280, 1216, 1152, 1088, 1024, 960, 896, 832, 768, 704, 640, 576, 512] widths = [2048, 1984, 1920, 1856, 1792, 1728, 1664, 1600, 1536, 1472, 1408, 1344, 1280, 1216, 1152, 1088, 1024, 960, 896, 832, 768, 704, 640, 576, 512] height = np.random.choice(heights, 1)[0] width = np.random.choice(widths, 1)[0] # height = 1024 # width = 768 # size = (1024, 512) # print("gen batch shape", height, width) X = np.zeros((batchsize, height, width, 3)) Y = np.zeros((batchsize, height, width, 2)) for j in range(batchsize): if i >= num: i = 0 np.random.shuffle(paths) p = paths[i] i += 1 # linetype=2 # print("gen input size", (height, width)) img, lines, labelImg, size = get_img_label(p, size=(height, width), linetype=linetype) # if not img.any(): # print("image too large, jump") # continue # height, width = size # X = np.zeros((batchsize, height, width, 3)) # Y = np.zeros((batchsize, height, width, 2)) # if_blur = np.random.choice([0, 1], 1)[0] # if if_blur: # # 高斯模糊 # sigmaX = random.randint(1, 3) # sigmaY = random.randint(1, 3) # img = cv2.GaussianBlur(img, (5, 5), sigmaX, sigmaY) # cv2.imshow("gen", img) # cv2.waitKey(0) # print("gen image size", img.shape) # cv2.imshow("gen", labelImg[:, :, 0]) # cv2.waitKey(0) # print("gen label image size", labelImg[:, :, 0]) X[j] = img Y[j] = labelImg yield X, Y def gen1(paths, batchsize=2, linetype=2): num = len(paths) i = 0 while True: batchsize = 1 for j in range(batchsize): if i >= num: i = 0 np.random.shuffle(paths) p = paths[i] i += 1 img, lines, labelImg, size = get_img_label(p, linetype=linetype) height, width = size X = np.zeros((batchsize, height, width, 3)) Y = np.zeros((batchsize, height, width, 2)) if_blur = np.random.choice([0, 1], 1)[0] if if_blur: # 高斯模糊 sigmaX = random.randint(1, 3) sigmaY = random.randint(1, 3) img = cv2.GaussianBlur(img, (5, 5), sigmaX, sigmaY) print("gen image size", img.shape) cv2.imshow("gen", img) cv2.waitKey(0) # cv2.imshow("gen", labelImg[:, :, 0]) # cv2.waitKey(0) # print("gen label image size", labelImg[:, :, 0].shape) X[j] = img Y[j] = labelImg yield X, Y def img_resize(im, lines, target_size, max_size=None): w, h = im.size w_t, h_t = target_size print("img_resize", im.size, target_size) # im_size_min = np.min(im.size) # im_size_max = np.max(im.size) im_scale_w = float(w_t)/float(w) im_scale_h = float(h_t)/float(h) im = im.resize((int(w*im_scale_w), int(h*im_scale_h)), Image.BICUBIC) N = len(lines) for i in range(N): p1, p2 = lines[i] p1 = p1[0]*im_scale_w, p1[1]*im_scale_h p2 = p2[0]*im_scale_w, p2[1]*im_scale_h lines[i] = [p1, p2] return im, lines def img_resize_origin(im,lines,target_size=600,max_size=1500): w,h = im.size im_size_min = np.min(im.size) im_size_max = np.max(im.size) im_scale = float(target_size)/float(im_size_min) if max_size is not None: if np.round(im_scale * im_size_max) > max_size: im_scale = float(max_size)/float(im_size_max) im = im.resize((int(w*im_scale),int(h*im_scale)),Image.BICUBIC) N = len(lines) for i in range(N): p1,p2 = lines[i] p1 = p1[0]*im_scale,p1[1]*im_scale p2 = p2[0]*im_scale,p2[1]*im_scale lines[i] = [p1,p2] return im, lines def img_resize_by_padding_crop(im, lines, target_size): w, h = im.size h_t, w_t = target_size # print("img_resize_by_padding_crop", im.size, target_size) # PIL -> CV2 img = cv2.cvtColor(np.asarray(im), cv2.COLOR_RGB2BGR) # print("img_resize_by_padding_crop image_np shape0", img.shape) # 图像边缘扩充/裁剪 change_w_flag = 0 change_h_flag = 0 if w_t >= w and h_t >= h: change_height = int((h_t - h)/2) change_width = int((w_t - w)/2) img = cv2.copyMakeBorder(img, change_height, change_height, change_width, change_width, cv2.BORDER_CONSTANT, value=(255, 255, 255)) change_w_flag = 1 change_h_flag = 1 # print("img_resize_by_padding_crop 1 1") elif w_t >= w: change_width = int((w_t - w)/2) change_height = int((h - h_t)/2) img = cv2.copyMakeBorder(img, 0, 0, change_width, change_width, cv2.BORDER_CONSTANT, value=(255, 255, 255)) img = img[change_height:h-change_height, :] change_w_flag = 1 change_h_flag = -1 # print("img_resize_by_padding_crop 1 -1") elif h_t >= h: change_height = int((h_t - h)/2) change_width = int((w - w_t)/2) img = cv2.copyMakeBorder(img, change_height, change_height, 0, 0, cv2.BORDER_CONSTANT, value=(255, 255, 255)) img = img[:, change_width:w-change_width] change_w_flag = -1 change_h_flag = 1 # print("img_resize_by_padding_crop -1 1") else: if abs(h - h_t) % 2 != 0: change_height = int((h - h_t)/2) change_height += 1 else: change_height = int((h - h_t)/2) if abs(w - w_t) % 2 != 0: change_width = int((w - w_t)/2) change_width += 1 else: change_width = int((w - w_t)/2) img = img[change_height:h-change_height, change_width:w-change_width] change_w_flag = -1 change_h_flag = -1 # print("img_resize_by_padding_crop -1 -1") # print("img_resize_by_padding_crop image_np shape1", img.shape) # image shape 和 target大小不同 if img.shape[0] < h_t: img = cv2.copyMakeBorder(img, h_t-img.shape[0], 0, 0, 0, cv2.BORDER_CONSTANT, value=(255, 255, 255)) if img.shape[1] < w_t: img = cv2.copyMakeBorder(img, 0, 0, w_t-img.shape[1], 0, cv2.BORDER_CONSTANT, value=(255, 255, 255)) if img.shape[0] > h_t: img = img[:h_t, :, :] if img.shape[1] > w_t: img = img[:, :w_t, :] # print("img_resize_by_padding_crop image_np shape2", img.shape) # CV2 -> PIL im = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) N = len(lines) for i in range(N): p1, p2 = lines[i] p1 = p1[0] + change_w_flag * change_width, p1[1] + change_h_flag * change_height p2 = p2[0] + change_w_flag * change_width, p2[1] + change_h_flag * change_height lines[i] = [p1, p2] return im, lines if __name__ == '__main__': _list = [] for i in range(100, 1, -1): _list.append(i*64) print(_list)