random_crop_data.py 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200
  1. import random
  2. import cv2
  3. import numpy as np
  4. __all__ = ['EastRandomCropData', 'PSERandomCrop']
  5. # random crop algorithm similar to https://github.com/argman/EAST
  6. class EastRandomCropData():
  7. def __init__(self, size=(640, 640), max_tries=50, min_crop_side_ratio=0.1, require_original_image=False, keep_ratio=True):
  8. self.size = size
  9. self.max_tries = max_tries
  10. self.min_crop_side_ratio = min_crop_side_ratio
  11. self.require_original_image = require_original_image
  12. self.keep_ratio = keep_ratio
  13. def __call__(self, data: dict) -> dict:
  14. """
  15. 从scales中随机选择一个尺度,对图片和文本框进行缩放
  16. :param data: {'img':,'text_polys':,'texts':,'ignore_tags':}
  17. :return:
  18. """
  19. im = data['img']
  20. text_polys = data['text_polys']
  21. ignore_tags = data['ignore_tags']
  22. texts = data['texts']
  23. all_care_polys = [text_polys[i] for i, tag in enumerate(ignore_tags) if not tag]
  24. # 计算crop区域
  25. crop_x, crop_y, crop_w, crop_h = self.crop_area(im, all_care_polys)
  26. # crop 图片 保持比例填充
  27. scale_w = self.size[0] / crop_w
  28. scale_h = self.size[1] / crop_h
  29. scale = min(scale_w, scale_h)
  30. h = int(crop_h * scale)
  31. w = int(crop_w * scale)
  32. if self.keep_ratio:
  33. if len(im.shape) == 3:
  34. padimg = np.zeros((self.size[1], self.size[0], im.shape[2]), im.dtype)
  35. else:
  36. padimg = np.zeros((self.size[1], self.size[0]), im.dtype)
  37. padimg[:h, :w] = cv2.resize(im[crop_y:crop_y + crop_h, crop_x:crop_x + crop_w], (w, h))
  38. img = padimg
  39. else:
  40. img = cv2.resize(im[crop_y:crop_y + crop_h, crop_x:crop_x + crop_w], tuple(self.size))
  41. # crop 文本框
  42. text_polys_crop = []
  43. ignore_tags_crop = []
  44. texts_crop = []
  45. try:
  46. for poly, text, tag in zip(text_polys, texts, ignore_tags):
  47. poly = ((np.array(poly) - (crop_x, crop_y)) * scale).astype('float32')
  48. if not self.is_poly_outside_rect(poly, 0, 0, w, h):
  49. text_polys_crop.append(poly)
  50. ignore_tags_crop.append(tag)
  51. texts_crop.append(text)
  52. data['img'] = img
  53. data['text_polys'] = text_polys_crop
  54. data['ignore_tags'] = ignore_tags_crop
  55. data['texts'] = texts_crop
  56. except:
  57. a = 1
  58. return data
  59. def is_poly_in_rect(self, poly, x, y, w, h):
  60. poly = np.array(poly)
  61. if poly[:, 0].min() < x or poly[:, 0].max() > x + w:
  62. return False
  63. if poly[:, 1].min() < y or poly[:, 1].max() > y + h:
  64. return False
  65. return True
  66. def is_poly_outside_rect(self, poly, x, y, w, h):
  67. poly = np.array(poly)
  68. if poly[:, 0].max() < x or poly[:, 0].min() > x + w:
  69. return True
  70. if poly[:, 1].max() < y or poly[:, 1].min() > y + h:
  71. return True
  72. return False
  73. def split_regions(self, axis):
  74. regions = []
  75. min_axis = 0
  76. for i in range(1, axis.shape[0]):
  77. if axis[i] != axis[i - 1] + 1:
  78. region = axis[min_axis:i]
  79. min_axis = i
  80. regions.append(region)
  81. return regions
  82. def random_select(self, axis, max_size):
  83. xx = np.random.choice(axis, size=2)
  84. xmin = np.min(xx)
  85. xmax = np.max(xx)
  86. xmin = np.clip(xmin, 0, max_size - 1)
  87. xmax = np.clip(xmax, 0, max_size - 1)
  88. return xmin, xmax
  89. def region_wise_random_select(self, regions, max_size):
  90. selected_index = list(np.random.choice(len(regions), 2))
  91. selected_values = []
  92. for index in selected_index:
  93. axis = regions[index]
  94. xx = int(np.random.choice(axis, size=1))
  95. selected_values.append(xx)
  96. xmin = min(selected_values)
  97. xmax = max(selected_values)
  98. return xmin, xmax
  99. def crop_area(self, im, text_polys):
  100. h, w = im.shape[:2]
  101. h_array = np.zeros(h, dtype=np.int32)
  102. w_array = np.zeros(w, dtype=np.int32)
  103. for points in text_polys:
  104. points = np.round(points, decimals=0).astype(np.int32)
  105. minx = np.min(points[:, 0])
  106. maxx = np.max(points[:, 0])
  107. w_array[minx:maxx] = 1
  108. miny = np.min(points[:, 1])
  109. maxy = np.max(points[:, 1])
  110. h_array[miny:maxy] = 1
  111. # ensure the cropped area not across a text
  112. h_axis = np.where(h_array == 0)[0]
  113. w_axis = np.where(w_array == 0)[0]
  114. if len(h_axis) == 0 or len(w_axis) == 0:
  115. return 0, 0, w, h
  116. h_regions = self.split_regions(h_axis)
  117. w_regions = self.split_regions(w_axis)
  118. for i in range(self.max_tries):
  119. if len(w_regions) > 1:
  120. xmin, xmax = self.region_wise_random_select(w_regions, w)
  121. else:
  122. xmin, xmax = self.random_select(w_axis, w)
  123. if len(h_regions) > 1:
  124. ymin, ymax = self.region_wise_random_select(h_regions, h)
  125. else:
  126. ymin, ymax = self.random_select(h_axis, h)
  127. if xmax - xmin < self.min_crop_side_ratio * w or ymax - ymin < self.min_crop_side_ratio * h:
  128. # area too small
  129. continue
  130. num_poly_in_rect = 0
  131. for poly in text_polys:
  132. if not self.is_poly_outside_rect(poly, xmin, ymin, xmax - xmin, ymax - ymin):
  133. num_poly_in_rect += 1
  134. break
  135. if num_poly_in_rect > 0:
  136. return xmin, ymin, xmax - xmin, ymax - ymin
  137. return 0, 0, w, h
  138. class PSERandomCrop():
  139. def __init__(self, size):
  140. self.size = size
  141. def __call__(self, data):
  142. imgs = data['imgs']
  143. h, w = imgs[0].shape[0:2]
  144. th, tw = self.size
  145. if w == tw and h == th:
  146. return imgs
  147. # label中存在文本实例,并且按照概率进行裁剪,使用threshold_label_map控制
  148. if np.max(imgs[2]) > 0 and random.random() > 3 / 8:
  149. # 文本实例的左上角点
  150. tl = np.min(np.where(imgs[2] > 0), axis=1) - self.size
  151. tl[tl < 0] = 0
  152. # 文本实例的右下角点
  153. br = np.max(np.where(imgs[2] > 0), axis=1) - self.size
  154. br[br < 0] = 0
  155. # 保证选到右下角点时,有足够的距离进行crop
  156. br[0] = min(br[0], h - th)
  157. br[1] = min(br[1], w - tw)
  158. for _ in range(50000):
  159. i = random.randint(tl[0], br[0])
  160. j = random.randint(tl[1], br[1])
  161. # 保证shrink_label_map有文本
  162. if imgs[1][i:i + th, j:j + tw].sum() <= 0:
  163. continue
  164. else:
  165. break
  166. else:
  167. i = random.randint(0, h - th)
  168. j = random.randint(0, w - tw)
  169. # return i, j, th, tw
  170. for idx in range(len(imgs)):
  171. if len(imgs[idx].shape) == 3:
  172. imgs[idx] = imgs[idx][i:i + th, j:j + tw, :]
  173. else:
  174. imgs[idx] = imgs[idx][i:i + th, j:j + tw]
  175. data['imgs'] = imgs
  176. return data