12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849 |
- import os
- import cv2
- import numpy as np
- if __name__ == '__main__':
- icdar2015_directory = '/data/OCR/ICDAR2015'
- target_directory = '/data/OCR/ICDAR2015/converted_data'
- target_image_directory = os.path.join(target_directory, 'image')
- train_image_directory = os.path.join(icdar2015_directory, 'ch4_training_images')
- train_gt_directory = os.path.join(icdar2015_directory, 'ch4_training_localization_transcription_gt')
- test_image_directory = os.path.join(icdar2015_directory, 'ch4_test_images')
- test_gt_directory = os.path.join(icdar2015_directory, 'Challenge4_Test_Task4_GT')
- os.makedirs(target_directory, exist_ok=True)
- os.makedirs(target_image_directory, exist_ok=True)
- for m_name, m_image_directory, m_gt_directory in zip(['train', 'eval'],
- [train_image_directory, test_image_directory],
- [train_gt_directory, test_gt_directory]):
- m_index = 0
- with open(os.path.join(target_directory, m_name + '.txt'), mode='w', encoding='utf-8') as to_write:
- for m_image_file in os.listdir(m_image_directory):
- m_gt_file = os.path.join(m_gt_directory, 'gt_' + os.path.splitext(m_image_file)[0] + '.txt')
- m_img = cv2.imread(os.path.join(m_image_directory, m_image_file))
- with open(m_gt_file, mode='r', encoding='utf-8') as to_read:
- # 识别阶段只考虑每行中非###的字段
- for m_line in to_read:
- m_line = m_line.strip('\ufeff\n')
- if not m_line.endswith('###'):
- # 前八个为从左上角开始的四个点的坐标,这里是四个点的多边形,可能是矩形罢了,用逗号进行了间隔
- coordinates_and_transcript = m_line.split(',')
- # 保留字符串中唯一的一个空格,去除多个空格
- transcript = ' '.join(''.join(coordinates_and_transcript[8:]).split())
- if len(transcript) == 0:
- continue
- np_coordinates = np.array([int(_) for _ in coordinates_and_transcript[:8]]).reshape((-1, 2))
- min_x, min_y = np.min(np_coordinates, axis=0)
- max_x, max_y = np.max(np_coordinates, axis=0)
- m_width = max_x - min_x + 1
- m_height = max_y - min_y + 1
- m_target_roi = np.zeros((m_height, m_width, m_img.shape[2]), dtype=np.uint8)
- m_region = np.array([np_coordinates - [min_x, min_y]], dtype=np.int32)
- m_target_roi = cv2.fillPoly(m_target_roi,
- m_region,
- (255,) * m_img.shape[2])
- m_target_roi = cv2.bitwise_and(m_img[min_y:max_y + 1, min_x:max_x + 1, ...], m_target_roi)
- target_image_name = f'{m_name}_{m_index}.jpg'
- cv2.imwrite(os.path.join(target_image_directory, target_image_name), m_target_roi)
- m_index += 1
- to_write.write(f'{target_image_name}\t{transcript}\n')
- to_write.flush()
|