gen_label.py 2.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879
  1. #copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. #Licensed under the Apache License, Version 2.0 (the "License");
  4. #you may not use this file except in compliance with the License.
  5. #You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. #Unless required by applicable law or agreed to in writing, software
  10. #distributed under the License is distributed on an "AS IS" BASIS,
  11. #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. #See the License for the specific language governing permissions and
  13. #limitations under the License.
  14. import os
  15. import argparse
  16. import json
  17. def gen_rec_label(input_path, out_label):
  18. with open(out_label, 'w') as out_file:
  19. with open(input_path, 'r') as f:
  20. for line in f.readlines():
  21. tmp = line.strip('\n').replace(" ", "").split(',')
  22. img_path, label = tmp[0], tmp[1]
  23. label = label.replace("\"", "")
  24. out_file.write(img_path + '\t' + label + '\n')
  25. def gen_det_label(root_path, input_dir, out_label):
  26. with open(out_label, 'w') as out_file:
  27. for label_file in os.listdir(input_dir):
  28. img_path = root_path + label_file[3:-4] + ".jpg"
  29. label = []
  30. with open(os.path.join(input_dir, label_file), 'r') as f:
  31. for line in f.readlines():
  32. tmp = line.strip("\n\r").replace("\xef\xbb\xbf",
  33. "").split(',')
  34. points = tmp[:8]
  35. s = []
  36. for i in range(0, len(points), 2):
  37. b = points[i:i + 2]
  38. b = [int(t) for t in b]
  39. s.append(b)
  40. result = {"transcription": tmp[8], "points": s}
  41. label.append(result)
  42. out_file.write(img_path + '\t' + json.dumps(
  43. label, ensure_ascii=False) + '\n')
  44. if __name__ == "__main__":
  45. parser = argparse.ArgumentParser()
  46. parser.add_argument(
  47. '--mode',
  48. type=str,
  49. default="rec",
  50. help='Generate rec_label or det_label, can be set rec or det')
  51. parser.add_argument(
  52. '--root_path',
  53. type=str,
  54. default=".",
  55. help='The root directory of images.Only takes effect when mode=det ')
  56. parser.add_argument(
  57. '--input_path',
  58. type=str,
  59. default=".",
  60. help='Input_label or input path to be converted')
  61. parser.add_argument(
  62. '--output_label',
  63. type=str,
  64. default="out_label.txt",
  65. help='Output file name')
  66. args = parser.parse_args()
  67. if args.mode == "rec":
  68. print("Generate rec label")
  69. gen_rec_label(args.input_path, args.output_label)
  70. elif args.mode == "det":
  71. gen_det_label(args.root_path, args.input_path, args.output_label)