ICDAR15CropSave.py 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384
  1. '''
  2. @Author: Jeffery Sheng (Zhenfei Sheng)
  3. @Time: 2020/5/21 18:34
  4. @File: ICDAR15CropSave.py
  5. '''
  6. import os
  7. import cv2
  8. from glob import glob
  9. from tqdm import tqdm
  10. class icdar2015CropSave:
  11. def __init__(self, img_dir :str, gt_dir :str, save_data_dir :str,
  12. train_val_split_ratio: float or None=0.1):
  13. self.save_id = 1
  14. self.img_dir = os.path.abspath(img_dir)
  15. self.gt_dir = os.path.abspath(gt_dir)
  16. if not os.path.exists(save_data_dir):
  17. os.mkdir(save_data_dir)
  18. self.save_data_dir = save_data_dir
  19. self.train_val_split_ratio = train_val_split_ratio
  20. def crop_save(self) -> None:
  21. all_img_paths = glob(os.path.join(self.img_dir, '*.jpg'))
  22. all_gt_paths = glob(os.path.join(self.gt_dir, '*.txt'))
  23. # check length
  24. assert len(all_img_paths) == len(all_gt_paths)
  25. # create lists to store text-line
  26. text_lines = list()
  27. # start to crop and save
  28. for img_path in tqdm(all_img_paths):
  29. img = cv2.imread(img_path)
  30. gt_path = os.path.join(self.gt_dir, 'gt_' + os.path.basename(img_path).replace('.jpg', '.txt'))
  31. with open(gt_path, 'r', encoding='utf-8-sig') as file:
  32. lines = file.readlines()
  33. for line in lines:
  34. line = line.strip().split(',')
  35. # get points
  36. x1, y1, x2, y2, x3, y3, x4, y4 = list(map(int, line[: 8]))
  37. # get transcript
  38. trans = line[8]
  39. if trans in {'', '*', '###'}:
  40. continue
  41. # check & make dir
  42. save_img_dir = os.path.join(self.save_data_dir, 'images')
  43. if not os.path.exists(save_img_dir):
  44. os.mkdir(save_img_dir)
  45. # build save img path
  46. save_img_path = os.path.join(save_img_dir, f'textbox_{self.save_id}.jpg')
  47. # check if rectangle
  48. if len({x1, y1, x2, y2, x3, y3, x4, y4}) == 4:
  49. # save rectangle
  50. cv2.imwrite(save_img_path, img[y1: y4, x1: x2])
  51. # if polygon, save minimize circumscribed rectangle
  52. else:
  53. x_min, x_max = min((x1, x2, x3, x4)), max((x1, x2, x3, x4))
  54. y_min, y_max = min((y1, y2, y3, y4)), max((y1, y2, y3, y4))
  55. cv2.imwrite(save_img_path, img[y_min: y_max, x_min: x_max])
  56. # save to text-line
  57. text_lines.append(f'textbox_{self.save_id}.jpg\t{trans}\n')
  58. # save_id self increase
  59. self.save_id += 1
  60. if self.train_val_split_ratio:
  61. train = text_lines[: int(round((1-self.train_val_split_ratio)*self.save_id))]
  62. val = text_lines[int(round((1-self.train_val_split_ratio)*self.save_id)): ]
  63. # save text-line file
  64. with open(os.path.join(self.save_data_dir, 'train.txt'), 'w') as save_file:
  65. save_file.writelines(train)
  66. with open(os.path.join(self.save_data_dir, 'val.txt'), 'w') as save_file:
  67. save_file.writelines(val)
  68. print(f'{self.save_id-1} text-box images and 2 text-line file are saved.')
  69. else:
  70. # save text-line file
  71. with open(os.path.join(self.save_data_dir, 'train.txt'), 'w') as save_file:
  72. save_file.writelines(text_lines)
  73. print(f'{self.save_id-1} text-box images and 1 text-line file are saved.')
  74. if __name__ == '__main__':
  75. img_dir = '/data/disk7/private/szf/Datasets/ICDAR2015/train'
  76. gt_dir = '/data/disk7/private/szf/Datasets/ICDAR2015/train_local_trans'
  77. save_data_dir = '/data/disk7/private/szf/Datasets/ICDAR2015/data'
  78. icdar2015CropSave(img_dir, gt_dir, save_data_dir).crop_save()