convert_icdar2015_rec.py 3.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849
  1. import os
  2. import cv2
  3. import numpy as np
  4. if __name__ == '__main__':
  5. icdar2015_directory = '/data/OCR/ICDAR2015'
  6. target_directory = '/data/OCR/ICDAR2015/converted_data'
  7. target_image_directory = os.path.join(target_directory, 'image')
  8. train_image_directory = os.path.join(icdar2015_directory, 'ch4_training_images')
  9. train_gt_directory = os.path.join(icdar2015_directory, 'ch4_training_localization_transcription_gt')
  10. test_image_directory = os.path.join(icdar2015_directory, 'ch4_test_images')
  11. test_gt_directory = os.path.join(icdar2015_directory, 'Challenge4_Test_Task4_GT')
  12. os.makedirs(target_directory, exist_ok=True)
  13. os.makedirs(target_image_directory, exist_ok=True)
  14. for m_name, m_image_directory, m_gt_directory in zip(['train', 'eval'],
  15. [train_image_directory, test_image_directory],
  16. [train_gt_directory, test_gt_directory]):
  17. m_index = 0
  18. with open(os.path.join(target_directory, m_name + '.txt'), mode='w', encoding='utf-8') as to_write:
  19. for m_image_file in os.listdir(m_image_directory):
  20. m_gt_file = os.path.join(m_gt_directory, 'gt_' + os.path.splitext(m_image_file)[0] + '.txt')
  21. m_img = cv2.imread(os.path.join(m_image_directory, m_image_file))
  22. with open(m_gt_file, mode='r', encoding='utf-8') as to_read:
  23. # 识别阶段只考虑每行中非###的字段
  24. for m_line in to_read:
  25. m_line = m_line.strip('\ufeff\n')
  26. if not m_line.endswith('###'):
  27. # 前八个为从左上角开始的四个点的坐标,这里是四个点的多边形,可能是矩形罢了,用逗号进行了间隔
  28. coordinates_and_transcript = m_line.split(',')
  29. # 保留字符串中唯一的一个空格,去除多个空格
  30. transcript = ' '.join(''.join(coordinates_and_transcript[8:]).split())
  31. if len(transcript) == 0:
  32. continue
  33. np_coordinates = np.array([int(_) for _ in coordinates_and_transcript[:8]]).reshape((-1, 2))
  34. min_x, min_y = np.min(np_coordinates, axis=0)
  35. max_x, max_y = np.max(np_coordinates, axis=0)
  36. m_width = max_x - min_x + 1
  37. m_height = max_y - min_y + 1
  38. m_target_roi = np.zeros((m_height, m_width, m_img.shape[2]), dtype=np.uint8)
  39. m_region = np.array([np_coordinates - [min_x, min_y]], dtype=np.int32)
  40. m_target_roi = cv2.fillPoly(m_target_roi,
  41. m_region,
  42. (255,) * m_img.shape[2])
  43. m_target_roi = cv2.bitwise_and(m_img[min_y:max_y + 1, min_x:max_x + 1, ...], m_target_roi)
  44. target_image_name = f'{m_name}_{m_index}.jpg'
  45. cv2.imwrite(os.path.join(target_image_directory, target_image_name), m_target_roi)
  46. m_index += 1
  47. to_write.write(f'{target_image_name}\t{transcript}\n')
  48. to_write.flush()