FCE_aug.py 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732
  1. # copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. """
  15. This code is refer from:
  16. https://github.com/open-mmlab/mmocr/blob/main/mmocr/datasets/pipelines/transforms.py
  17. """
  18. import numpy as np
  19. import torchvision.transforms
  20. from PIL import Image, ImageDraw
  21. import cv2
  22. from shapely.geometry import Polygon
  23. import math
  24. from torchocr.utils.poly_nms import poly_intersection
  25. from torchvision.transforms import ColorJitter as Jitter
  26. class Pad(object):
  27. def __init__(self, size=None, size_div=32, **kwargs):
  28. if size is not None and not isinstance(size, (int, list, tuple)):
  29. raise TypeError("Type of target_size is invalid. Now is {}".format(
  30. type(size)))
  31. if isinstance(size, int):
  32. size = [size, size]
  33. self.size = size
  34. self.size_div = size_div
  35. def __call__(self, data):
  36. img = data['img']
  37. img_h, img_w = img.shape[0], img.shape[1]
  38. if self.size:
  39. resize_h2, resize_w2 = self.size
  40. assert (
  41. img_h < resize_h2 and img_w < resize_w2
  42. ), '(h, w) of target size should be greater than (img_h, img_w)'
  43. else:
  44. resize_h2 = max(
  45. int(math.ceil(img.shape[0] / self.size_div) * self.size_div),
  46. self.size_div)
  47. resize_w2 = max(
  48. int(math.ceil(img.shape[1] / self.size_div) * self.size_div),
  49. self.size_div)
  50. img = cv2.copyMakeBorder(
  51. img,
  52. 0,
  53. resize_h2 - img_h,
  54. 0,
  55. resize_w2 - img_w,
  56. cv2.BORDER_CONSTANT,
  57. value=0)
  58. data['img'] = img
  59. return data
  60. class ColorJitter(object):
  61. def __init__(self, brightness=0, contrast=0, saturation=0, hue=0, **kwargs):
  62. self.aug = Jitter(brightness, contrast, saturation, hue)
  63. def __call__(self, data):
  64. image = data['img']
  65. pil_img=Image.fromarray(image).convert('RGB')
  66. image = np.asarray(self.aug(pil_img))
  67. data['img'] = image
  68. return data
  69. class RandomScaling:
  70. def __init__(self, size=800, scale=(3. / 4, 5. / 2), **kwargs):
  71. """Random scale the image while keeping aspect.
  72. Args:
  73. size (int) : Base size before scaling.
  74. scale (tuple(float)) : The range of scaling.
  75. """
  76. assert isinstance(size, int)
  77. assert isinstance(scale, float) or isinstance(scale, tuple)
  78. self.size = size
  79. self.scale = scale if isinstance(scale, tuple) \
  80. else (1 - scale, 1 + scale)
  81. def __call__(self, data):
  82. image = data['img']
  83. text_polys = data['text_polys']
  84. h, w, _ = image.shape
  85. aspect_ratio = np.random.uniform(min(self.scale), max(self.scale))
  86. scales = self.size * 1.0 / max(h, w) * aspect_ratio
  87. scales = np.array([scales, scales])
  88. out_size = (int(h * scales[1]), int(w * scales[0]))
  89. image = cv2.resize(image, out_size[::-1])
  90. try:
  91. data['img'] = image
  92. text_polys[:, :, 0::2] = text_polys[:, :, 0::2] * scales[1]
  93. text_polys[:, :, 1::2] = text_polys[:, :, 1::2] * scales[0]
  94. data['text_polys'] = text_polys
  95. except:
  96. print('1')
  97. return data
  98. class RandomCropFlip:
  99. def __init__(self,
  100. pad_ratio=0.1,
  101. crop_ratio=0.5,
  102. iter_num=1,
  103. min_area_ratio=0.2,
  104. **kwargs):
  105. """Random crop and flip a patch of the image.
  106. Args:
  107. crop_ratio (float): The ratio of cropping.
  108. iter_num (int): Number of operations.
  109. min_area_ratio (float): Minimal area ratio between cropped patch
  110. and original image.
  111. """
  112. assert isinstance(crop_ratio, float)
  113. assert isinstance(iter_num, int)
  114. assert isinstance(min_area_ratio, float)
  115. self.pad_ratio = pad_ratio
  116. self.epsilon = 1e-2
  117. self.crop_ratio = crop_ratio
  118. self.iter_num = iter_num
  119. self.min_area_ratio = min_area_ratio
  120. def __call__(self, results):
  121. for i in range(self.iter_num):
  122. results = self.random_crop_flip(results)
  123. return results
  124. def random_crop_flip(self, results):
  125. image = results['img']
  126. polygons = results['text_polys']
  127. ignore_tags = results['ignore_tags']
  128. if len(polygons) == 0:
  129. return results
  130. if np.random.random() >= self.crop_ratio:
  131. return results
  132. h, w, _ = image.shape
  133. area = h * w
  134. pad_h = int(h * self.pad_ratio)
  135. pad_w = int(w * self.pad_ratio)
  136. h_axis, w_axis = self.generate_crop_target(image, polygons, pad_h,
  137. pad_w)
  138. if len(h_axis) == 0 or len(w_axis) == 0:
  139. return results
  140. attempt = 0
  141. while attempt < 50:
  142. attempt += 1
  143. polys_keep = []
  144. polys_new = []
  145. ignore_tags_keep = []
  146. ignore_tags_new = []
  147. xx = np.random.choice(w_axis, size=2)
  148. xmin = np.min(xx) - pad_w
  149. xmax = np.max(xx) - pad_w
  150. xmin = np.clip(xmin, 0, w - 1)
  151. xmax = np.clip(xmax, 0, w - 1)
  152. yy = np.random.choice(h_axis, size=2)
  153. ymin = np.min(yy) - pad_h
  154. ymax = np.max(yy) - pad_h
  155. ymin = np.clip(ymin, 0, h - 1)
  156. ymax = np.clip(ymax, 0, h - 1)
  157. if (xmax - xmin) * (ymax - ymin) < area * self.min_area_ratio:
  158. # area too small
  159. continue
  160. pts = np.stack([[xmin, xmax, xmax, xmin],
  161. [ymin, ymin, ymax, ymax]]).T.astype(np.int32)
  162. pp = Polygon(pts)
  163. fail_flag = False
  164. for polygon, ignore_tag in zip(polygons, ignore_tags):
  165. ppi = Polygon(polygon.reshape(-1, 2))
  166. ppiou, _ = poly_intersection(ppi, pp, buffer=0)
  167. if np.abs(ppiou - float(ppi.area)) > self.epsilon and \
  168. np.abs(ppiou) > self.epsilon:
  169. fail_flag = True
  170. break
  171. elif np.abs(ppiou - float(ppi.area)) < self.epsilon:
  172. polys_new.append(polygon)
  173. ignore_tags_new.append(ignore_tag)
  174. else:
  175. polys_keep.append(polygon)
  176. ignore_tags_keep.append(ignore_tag)
  177. if fail_flag:
  178. continue
  179. else:
  180. break
  181. cropped = image[ymin:ymax, xmin:xmax, :]
  182. select_type = np.random.randint(3)
  183. if select_type == 0:
  184. img = np.ascontiguousarray(cropped[:, ::-1])
  185. elif select_type == 1:
  186. img = np.ascontiguousarray(cropped[::-1, :])
  187. else:
  188. img = np.ascontiguousarray(cropped[::-1, ::-1])
  189. image[ymin:ymax, xmin:xmax, :] = img
  190. results['img'] = image
  191. if len(polys_new) != 0:
  192. height, width, _ = cropped.shape
  193. if select_type == 0:
  194. for idx, polygon in enumerate(polys_new):
  195. poly = polygon.reshape(-1, 2)
  196. poly[:, 0] = width - poly[:, 0] + 2 * xmin
  197. polys_new[idx] = poly
  198. elif select_type == 1:
  199. for idx, polygon in enumerate(polys_new):
  200. poly = polygon.reshape(-1, 2)
  201. poly[:, 1] = height - poly[:, 1] + 2 * ymin
  202. polys_new[idx] = poly
  203. else:
  204. for idx, polygon in enumerate(polys_new):
  205. poly = polygon.reshape(-1, 2)
  206. poly[:, 0] = width - poly[:, 0] + 2 * xmin
  207. poly[:, 1] = height - poly[:, 1] + 2 * ymin
  208. polys_new[idx] = poly
  209. polygons = polys_keep + polys_new
  210. ignore_tags = ignore_tags_keep + ignore_tags_new
  211. results['text_polys'] = np.array(polygons)
  212. results['ignore_tags'] = ignore_tags
  213. return results
  214. def generate_crop_target(self, image, all_polys, pad_h, pad_w):
  215. """Generate crop target and make sure not to crop the polygon
  216. instances.
  217. Args:
  218. image (ndarray): The image waited to be crop.
  219. all_polys (list[list[ndarray]]): All polygons including ground
  220. truth polygons and ground truth ignored polygons.
  221. pad_h (int): Padding length of height.
  222. pad_w (int): Padding length of width.
  223. Returns:
  224. h_axis (ndarray): Vertical cropping range.
  225. w_axis (ndarray): Horizontal cropping range.
  226. """
  227. h, w, _ = image.shape
  228. h_array = np.zeros((h + pad_h * 2), dtype=np.int32)
  229. w_array = np.zeros((w + pad_w * 2), dtype=np.int32)
  230. text_polys = []
  231. for polygon in all_polys:
  232. rect = cv2.minAreaRect(polygon.astype(np.int32).reshape(-1, 2))
  233. box = cv2.boxPoints(rect)
  234. box = np.int0(box)
  235. text_polys.append([box[0], box[1], box[2], box[3]])
  236. polys = np.array(text_polys, dtype=np.int32)
  237. for poly in polys:
  238. poly = np.round(poly, decimals=0).astype(np.int32)
  239. minx = np.min(poly[:, 0])
  240. maxx = np.max(poly[:, 0])
  241. w_array[minx + pad_w:maxx + pad_w] = 1
  242. miny = np.min(poly[:, 1])
  243. maxy = np.max(poly[:, 1])
  244. h_array[miny + pad_h:maxy + pad_h] = 1
  245. h_axis = np.where(h_array == 0)[0]
  246. w_axis = np.where(w_array == 0)[0]
  247. return h_axis, w_axis
  248. class RandomCropPolyInstances:
  249. """Randomly crop images and make sure to contain at least one intact
  250. instance."""
  251. def __init__(self, crop_ratio=5.0 / 8.0, min_side_ratio=0.4, **kwargs):
  252. super().__init__()
  253. self.crop_ratio = crop_ratio
  254. self.min_side_ratio = min_side_ratio
  255. def sample_valid_start_end(self, valid_array, min_len, max_start, min_end):
  256. assert isinstance(min_len, int)
  257. assert len(valid_array) > min_len
  258. start_array = valid_array.copy()
  259. max_start = min(len(start_array) - min_len, max_start)
  260. start_array[max_start:] = 0
  261. start_array[0] = 1
  262. diff_array = np.hstack([0, start_array]) - np.hstack([start_array, 0])
  263. region_starts = np.where(diff_array < 0)[0]
  264. region_ends = np.where(diff_array > 0)[0]
  265. region_ind = np.random.randint(0, len(region_starts))
  266. start = np.random.randint(region_starts[region_ind],
  267. region_ends[region_ind])
  268. end_array = valid_array.copy()
  269. min_end = max(start + min_len, min_end)
  270. end_array[:min_end] = 0
  271. end_array[-1] = 1
  272. diff_array = np.hstack([0, end_array]) - np.hstack([end_array, 0])
  273. region_starts = np.where(diff_array < 0)[0]
  274. region_ends = np.where(diff_array > 0)[0]
  275. region_ind = np.random.randint(0, len(region_starts))
  276. end = np.random.randint(region_starts[region_ind],
  277. region_ends[region_ind])
  278. return start, end
  279. def sample_crop_box(self, img_size, results):
  280. """Generate crop box and make sure not to crop the polygon instances.
  281. Args:
  282. img_size (tuple(int)): The image size (h, w).
  283. results (dict): The results dict.
  284. """
  285. assert isinstance(img_size, tuple)
  286. h, w = img_size[:2]
  287. key_masks = results['text_polys']
  288. x_valid_array = np.ones(w, dtype=np.int32)
  289. y_valid_array = np.ones(h, dtype=np.int32)
  290. selected_mask = key_masks[np.random.randint(0, len(key_masks))]
  291. selected_mask = selected_mask.reshape((-1, 2)).astype(np.int32)
  292. max_x_start = max(np.min(selected_mask[:, 0]) - 2, 0)
  293. min_x_end = min(np.max(selected_mask[:, 0]) + 3, w - 1)
  294. max_y_start = max(np.min(selected_mask[:, 1]) - 2, 0)
  295. min_y_end = min(np.max(selected_mask[:, 1]) + 3, h - 1)
  296. for mask in key_masks:
  297. mask = mask.reshape((-1, 2)).astype(np.int32)
  298. clip_x = np.clip(mask[:, 0], 0, w - 1)
  299. clip_y = np.clip(mask[:, 1], 0, h - 1)
  300. min_x, max_x = np.min(clip_x), np.max(clip_x)
  301. min_y, max_y = np.min(clip_y), np.max(clip_y)
  302. x_valid_array[min_x - 2:max_x + 3] = 0
  303. y_valid_array[min_y - 2:max_y + 3] = 0
  304. min_w = int(w * self.min_side_ratio)
  305. min_h = int(h * self.min_side_ratio)
  306. x1, x2 = self.sample_valid_start_end(x_valid_array, min_w, max_x_start,
  307. min_x_end)
  308. y1, y2 = self.sample_valid_start_end(y_valid_array, min_h, max_y_start,
  309. min_y_end)
  310. return np.array([x1, y1, x2, y2])
  311. def crop_img(self, img, bbox):
  312. assert img.ndim == 3
  313. h, w, _ = img.shape
  314. assert 0 <= bbox[1] < bbox[3] <= h
  315. assert 0 <= bbox[0] < bbox[2] <= w
  316. return img[bbox[1]:bbox[3], bbox[0]:bbox[2]]
  317. def __call__(self, results):
  318. image = results['img']
  319. polygons = results['text_polys']
  320. ignore_tags = results['ignore_tags']
  321. if len(polygons) < 1:
  322. return results
  323. if np.random.random_sample() < self.crop_ratio:
  324. crop_box = self.sample_crop_box(image.shape, results)
  325. img = self.crop_img(image, crop_box)
  326. results['img'] = img
  327. # crop and filter masks
  328. x1, y1, x2, y2 = crop_box
  329. w = max(x2 - x1, 1)
  330. h = max(y2 - y1, 1)
  331. polygons[:, :, 0::2] = polygons[:, :, 0::2] - x1
  332. polygons[:, :, 1::2] = polygons[:, :, 1::2] - y1
  333. valid_masks_list = []
  334. valid_tags_list = []
  335. for ind, polygon in enumerate(polygons):
  336. if (polygon[:, ::2] > -4).all() and (
  337. polygon[:, ::2] < w + 4).all() and (
  338. polygon[:, 1::2] > -4).all() and (
  339. polygon[:, 1::2] < h + 4).all():
  340. polygon[:, ::2] = np.clip(polygon[:, ::2], 0, w)
  341. polygon[:, 1::2] = np.clip(polygon[:, 1::2], 0, h)
  342. valid_masks_list.append(polygon)
  343. valid_tags_list.append(ignore_tags[ind])
  344. results['text_polys'] = np.array(valid_masks_list)
  345. results['ignore_tags'] = valid_tags_list
  346. return results
  347. def __repr__(self):
  348. repr_str = self.__class__.__name__
  349. return repr_str
  350. class RandomRotatePolyInstances:
  351. def __init__(self,
  352. rotate_ratio=0.5,
  353. max_angle=10,
  354. pad_with_fixed_color=False,
  355. pad_value=(0, 0, 0),
  356. **kwargs):
  357. """Randomly rotate images and polygon masks.
  358. Args:
  359. rotate_ratio (float): The ratio of samples to operate rotation.
  360. max_angle (int): The maximum rotation angle.
  361. pad_with_fixed_color (bool): The flag for whether to pad rotated
  362. image with fixed value. If set to False, the rotated image will
  363. be padded onto cropped image.
  364. pad_value (tuple(int)): The color value for padding rotated image.
  365. """
  366. self.rotate_ratio = rotate_ratio
  367. self.max_angle = max_angle
  368. self.pad_with_fixed_color = pad_with_fixed_color
  369. self.pad_value = pad_value
  370. def rotate(self, center, points, theta, center_shift=(0, 0)):
  371. # rotate points.
  372. (center_x, center_y) = center
  373. center_y = -center_y
  374. x, y = points[:, ::2], points[:, 1::2]
  375. y = -y
  376. theta = theta / 180 * math.pi
  377. cos = math.cos(theta)
  378. sin = math.sin(theta)
  379. x = (x - center_x)
  380. y = (y - center_y)
  381. _x = center_x + x * cos - y * sin + center_shift[0]
  382. _y = -(center_y + x * sin + y * cos) + center_shift[1]
  383. points[:, ::2], points[:, 1::2] = _x, _y
  384. return points
  385. def cal_canvas_size(self, ori_size, degree):
  386. assert isinstance(ori_size, tuple)
  387. angle = degree * math.pi / 180.0
  388. h, w = ori_size[:2]
  389. cos = math.cos(angle)
  390. sin = math.sin(angle)
  391. canvas_h = int(w * math.fabs(sin) + h * math.fabs(cos))
  392. canvas_w = int(w * math.fabs(cos) + h * math.fabs(sin))
  393. canvas_size = (canvas_h, canvas_w)
  394. return canvas_size
  395. def sample_angle(self, max_angle):
  396. angle = np.random.random_sample() * 2 * max_angle - max_angle
  397. return angle
  398. def rotate_img(self, img, angle, canvas_size):
  399. h, w = img.shape[:2]
  400. rotation_matrix = cv2.getRotationMatrix2D((w / 2, h / 2), angle, 1)
  401. rotation_matrix[0, 2] += int((canvas_size[1] - w) / 2)
  402. rotation_matrix[1, 2] += int((canvas_size[0] - h) / 2)
  403. if self.pad_with_fixed_color:
  404. target_img = cv2.warpAffine(
  405. img,
  406. rotation_matrix, (canvas_size[1], canvas_size[0]),
  407. flags=cv2.INTER_NEAREST,
  408. borderValue=self.pad_value)
  409. else:
  410. mask = np.zeros_like(img)
  411. (h_ind, w_ind) = (np.random.randint(0, h * 7 // 8),
  412. np.random.randint(0, w * 7 // 8))
  413. img_cut = img[h_ind:(h_ind + h // 9), w_ind:(w_ind + w // 9)]
  414. img_cut = cv2.resize(img_cut, (canvas_size[1], canvas_size[0]))
  415. mask = cv2.warpAffine(
  416. mask,
  417. rotation_matrix, (canvas_size[1], canvas_size[0]),
  418. borderValue=[1, 1, 1])
  419. target_img = cv2.warpAffine(
  420. img,
  421. rotation_matrix, (canvas_size[1], canvas_size[0]),
  422. borderValue=[0, 0, 0])
  423. target_img = target_img + img_cut * mask
  424. return target_img
  425. def __call__(self, results):
  426. if np.random.random_sample() < self.rotate_ratio:
  427. image = results['img']
  428. polygons = results['text_polys']
  429. h, w = image.shape[:2]
  430. angle = self.sample_angle(self.max_angle)
  431. canvas_size = self.cal_canvas_size((h, w), angle)
  432. center_shift = (int((canvas_size[1] - w) / 2), int(
  433. (canvas_size[0] - h) / 2))
  434. image = self.rotate_img(image, angle, canvas_size)
  435. results['img'] = image
  436. # rotate polygons
  437. rotated_masks = []
  438. for mask in polygons:
  439. rotated_mask = self.rotate((w / 2, h / 2), mask, angle,
  440. center_shift)
  441. rotated_masks.append(rotated_mask)
  442. results['text_polys'] = np.array(rotated_masks)
  443. return results
  444. def __repr__(self):
  445. repr_str = self.__class__.__name__
  446. return repr_str
  447. class SquareResizePad:
  448. def __init__(self,
  449. target_size,
  450. pad_ratio=0.6,
  451. pad_with_fixed_color=False,
  452. pad_value=(0, 0, 0),
  453. **kwargs):
  454. """Resize or pad images to be square shape.
  455. Args:
  456. target_size (int): The target size of square shaped image.
  457. pad_with_fixed_color (bool): The flag for whether to pad rotated
  458. image with fixed value. If set to False, the rescales image will
  459. be padded onto cropped image.
  460. pad_value (tuple(int)): The color value for padding rotated image.
  461. """
  462. assert isinstance(target_size, int)
  463. assert isinstance(pad_ratio, float)
  464. assert isinstance(pad_with_fixed_color, bool)
  465. assert isinstance(pad_value, tuple)
  466. self.target_size = target_size
  467. self.pad_ratio = pad_ratio
  468. self.pad_with_fixed_color = pad_with_fixed_color
  469. self.pad_value = pad_value
  470. def resize_img(self, img, keep_ratio=True):
  471. h, w, _ = img.shape
  472. if keep_ratio:
  473. t_h = self.target_size if h >= w else int(h * self.target_size / w)
  474. t_w = self.target_size if h <= w else int(w * self.target_size / h)
  475. else:
  476. t_h = t_w = self.target_size
  477. img = cv2.resize(img, (t_w, t_h))
  478. return img, (t_h, t_w)
  479. def square_pad(self, img):
  480. h, w = img.shape[:2]
  481. if h == w:
  482. return img, (0, 0)
  483. pad_size = max(h, w)
  484. if self.pad_with_fixed_color:
  485. expand_img = np.ones((pad_size, pad_size, 3), dtype=np.uint8)
  486. expand_img[:] = self.pad_value
  487. else:
  488. (h_ind, w_ind) = (np.random.randint(0, h * 7 // 8),
  489. np.random.randint(0, w * 7 // 8))
  490. img_cut = img[h_ind:(h_ind + h // 9), w_ind:(w_ind + w // 9)]
  491. expand_img = cv2.resize(img_cut, (pad_size, pad_size))
  492. if h > w:
  493. y0, x0 = 0, (h - w) // 2
  494. else:
  495. y0, x0 = (w - h) // 2, 0
  496. expand_img[y0:y0 + h, x0:x0 + w] = img
  497. offset = (x0, y0)
  498. return expand_img, offset
  499. def square_pad_mask(self, points, offset):
  500. x0, y0 = offset
  501. pad_points = points.copy()
  502. pad_points[::2] = pad_points[::2] + x0
  503. pad_points[1::2] = pad_points[1::2] + y0
  504. return pad_points
  505. def __call__(self, results):
  506. image = results['img']
  507. polygons = results['text_polys']
  508. h, w = image.shape[:2]
  509. if np.random.random_sample() < self.pad_ratio:
  510. image, out_size = self.resize_img(image, keep_ratio=True)
  511. image, offset = self.square_pad(image)
  512. else:
  513. image, out_size = self.resize_img(image, keep_ratio=False)
  514. offset = (0, 0)
  515. results['img'] = image
  516. try:
  517. polygons[:, :, 0::2] = polygons[:, :, 0::2] * out_size[
  518. 1] / w + offset[0]
  519. polygons[:, :, 1::2] = polygons[:, :, 1::2] * out_size[
  520. 0] / h + offset[1]
  521. except:
  522. pass
  523. results['text_polys'] = polygons
  524. return results
  525. def __repr__(self):
  526. repr_str = self.__class__.__name__
  527. return repr_str
  528. class DetResizeForTest(object):
  529. def __init__(self, **kwargs):
  530. super(DetResizeForTest, self).__init__()
  531. self.resize_type = 0
  532. if 'image_shape' in kwargs:
  533. self.image_shape = kwargs['image_shape']
  534. self.resize_type = 1
  535. elif 'limit_side_len' in kwargs:
  536. self.limit_side_len = kwargs['limit_side_len']
  537. self.limit_type = kwargs.get('limit_type', 'min')
  538. elif 'resize_long' in kwargs:
  539. self.resize_type = 2
  540. self.resize_long = kwargs.get('resize_long', 960)
  541. else:
  542. self.limit_side_len = 736
  543. self.limit_type = 'min'
  544. def __call__(self, data):
  545. img = data['img']
  546. src_h, src_w, _ = img.shape
  547. if self.resize_type == 0:
  548. # img, shape = self.resize_image_type0(img)
  549. img, [ratio_h, ratio_w] = self.resize_image_type0(img)
  550. elif self.resize_type == 2:
  551. img, [ratio_h, ratio_w] = self.resize_image_type2(img)
  552. else:
  553. # img, shape = self.resize_image_type1(img)
  554. img, [ratio_h, ratio_w] = self.resize_image_type1(img)
  555. data['img'] = img
  556. data['shape'] = np.array([src_h, src_w, ratio_h, ratio_w])
  557. return data
  558. def resize_image_type1(self, img):
  559. resize_h, resize_w = self.image_shape
  560. ori_h, ori_w = img.shape[:2] # (h, w, c)
  561. ratio_h = float(resize_h) / ori_h
  562. ratio_w = float(resize_w) / ori_w
  563. img = cv2.resize(img, (int(resize_w), int(resize_h)))
  564. # return img, np.array([ori_h, ori_w])
  565. return img, [ratio_h, ratio_w]
  566. def resize_image_type0(self, img):
  567. """
  568. resize image to a size multiple of 32 which is required by the network
  569. args:
  570. img(array): array with shape [h, w, c]
  571. return(tuple):
  572. img, (ratio_h, ratio_w)
  573. """
  574. limit_side_len = self.limit_side_len
  575. h, w, c = img.shape
  576. # limit the max side
  577. if self.limit_type == 'max':
  578. if max(h, w) > limit_side_len:
  579. if h > w:
  580. ratio = float(limit_side_len) / h
  581. else:
  582. ratio = float(limit_side_len) / w
  583. else:
  584. ratio = 1.
  585. elif self.limit_type == 'min':
  586. if min(h, w) < limit_side_len:
  587. if h < w:
  588. ratio = float(limit_side_len) / h
  589. else:
  590. ratio = float(limit_side_len) / w
  591. else:
  592. ratio = 1.
  593. elif self.limit_type == 'resize_long':
  594. ratio = float(limit_side_len) / max(h, w)
  595. else:
  596. raise Exception('not support limit type, image ')
  597. resize_h = int(h * ratio)
  598. resize_w = int(w * ratio)
  599. resize_h = max(int(round(resize_h / 32) * 32), 32)
  600. resize_w = max(int(round(resize_w / 32) * 32), 32)
  601. try:
  602. if int(resize_w) <= 0 or int(resize_h) <= 0:
  603. return None, (None, None)
  604. img = cv2.resize(img, (int(resize_w), int(resize_h)))
  605. except:
  606. print(img.shape, resize_w, resize_h)
  607. sys.exit(0)
  608. ratio_h = resize_h / float(h)
  609. ratio_w = resize_w / float(w)
  610. return img, [ratio_h, ratio_w]
  611. def resize_image_type2(self, img):
  612. h, w, _ = img.shape
  613. resize_w = w
  614. resize_h = h
  615. if resize_h > resize_w:
  616. ratio = float(self.resize_long) / resize_h
  617. else:
  618. ratio = float(self.resize_long) / resize_w
  619. resize_h = int(resize_h * ratio)
  620. resize_w = int(resize_w * ratio)
  621. max_stride = 128
  622. resize_h = (resize_h + max_stride - 1) // max_stride * max_stride
  623. resize_w = (resize_w + max_stride - 1) // max_stride * max_stride
  624. img = cv2.resize(img, (int(resize_w), int(resize_h)))
  625. ratio_h = resize_h / float(h)
  626. ratio_w = resize_w / float(w)
  627. return img, [ratio_h, ratio_w]