convert_image.py 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180
  1. import logging
  2. import os
  3. import sys
  4. sys.path.append(os.path.dirname(__file__) + "/../")
  5. import traceback
  6. import cv2
  7. from format_convert import get_memory_info
  8. from format_convert.utils import judge_error_code, add_div, LineTable
  9. from format_convert.table_correct import get_rotated_image
  10. from format_convert.convert_need_interface import from_otr_interface, from_ocr_interface
  11. def image_preprocess(image_np, image_path, use_ocr=True):
  12. logging.info("into image_preprocess")
  13. try:
  14. # 长 宽
  15. # resize_size = (1024, 768)
  16. # 限制图片大小
  17. # resize_image(image_path, resize_size)
  18. # 图片倾斜校正,写入原来的图片路径
  19. g_r_i = get_rotated_image(image_np, image_path)
  20. if g_r_i == [-1]:
  21. return [-1], [], [], 0
  22. # otr需要图片resize, 写入另一个路径
  23. image_np = cv2.imread(image_path)
  24. best_h, best_w = get_best_predict_size(image_np)
  25. image_resize = cv2.resize(image_np, (best_w, best_h), interpolation=cv2.INTER_AREA)
  26. # image_resize_path = image_path[:-4] + "_resize" + image_path[-4:]
  27. image_resize_path = image_path.split(".")[0] + "_resize." + image_path.split(".")[-1]
  28. cv2.imwrite(image_resize_path, image_resize)
  29. # 调用otr模型接口
  30. with open(image_resize_path, "rb") as f:
  31. image_bytes = f.read()
  32. points, split_lines, bboxes, outline_points = from_otr_interface(image_bytes)
  33. if judge_error_code(points):
  34. return points, [], [], 0
  35. # 将resize后得到的bbox根据比例还原
  36. ratio = (image_np.shape[0]/best_h, image_np.shape[1]/best_w)
  37. for i in range(len(bboxes)):
  38. bbox = bboxes[i]
  39. bboxes[i] = [(int(bbox[0][0]*ratio[1]), int(bbox[0][1]*ratio[0])),
  40. (int(bbox[1][0]*ratio[1]), int(bbox[1][1]*ratio[0]))]
  41. for i in range(len(split_lines)):
  42. line = split_lines[i]
  43. split_lines[i] = [(int(line[0][0]*ratio[1]), int(line[0][1]*ratio[0])),
  44. (int(line[1][0]*ratio[1]), int(line[1][1]*ratio[0]))]
  45. for i in range(len(points)):
  46. point = points[i]
  47. points[i] = (int(point[0]*ratio[1]), int(point[1]*ratio[0]))
  48. for i in range(len(outline_points)):
  49. point = outline_points[i]
  50. outline_points[i] = [(int(point[0][0]*ratio[1]), int(point[0][1]*ratio[0])),
  51. (int(point[1][0]*ratio[1]), int(point[1][1]*ratio[0]))]
  52. # 查看是否能输出正确框
  53. for box in bboxes:
  54. cv2.rectangle(image_np, box[0], box[1], (0, 255, 0), 2)
  55. # cv2.namedWindow('bbox', 0)
  56. # cv2.imshow("bbox", image_np)
  57. # cv2.waitKey(0)
  58. # 调用ocr模型接口
  59. with open(image_path, "rb") as f:
  60. image_bytes = f.read()
  61. # 有表格
  62. if len(bboxes) >= 2:
  63. text_list, bbox_list = from_ocr_interface(image_bytes, True)
  64. if judge_error_code(text_list):
  65. return text_list, [], [], 0
  66. # for i in range(len(text_list)):
  67. # print(text_list[i], bbox_list[i])
  68. # 查看是否能输出正确框
  69. # for box in bbox_list:
  70. # cv2.rectangle(image_np, (int(box[0][0]), int(box[0][1])),
  71. # (int(box[2][0]), int(box[2][1])), (255, 0, 0), 1)
  72. # cv2.namedWindow('bbox', 0)
  73. # cv2.imshow("bbox", image_np)
  74. # cv2.waitKey(0)
  75. # text, column_list = get_formatted_table(text_list, bbox_list, bboxes, split_lines)
  76. # 调用现成方法形成表格
  77. try:
  78. from format_convert.convert_tree import TableLine
  79. list_lines = []
  80. for bbox in bboxes:
  81. list_lines.append(TableLine(bbox))
  82. from format_convert.convert_tree import TextBox
  83. list_text_boxes = []
  84. for i in range(len(bbox_list)):
  85. bbox = bbox_list[i]
  86. b_text = text_list[i]
  87. list_text_boxes.append(TextBox([bbox[3], bbox[1]], b_text))
  88. lt = LineTable()
  89. tables, obj_in_table, _ = lt.recognize_table(list_text_boxes, list_lines)
  90. text = [tables, obj_in_table]
  91. column_list = []
  92. except:
  93. traceback.print_exc()
  94. text = [-8]
  95. column_list = []
  96. if judge_error_code(text):
  97. return text, [], [], 0
  98. is_table = 1
  99. return text, column_list, outline_points, is_table
  100. # 无表格
  101. else:
  102. if use_ocr:
  103. text = from_ocr_interface(image_bytes)
  104. if judge_error_code(text):
  105. return text, [], [], 0
  106. is_table = 0
  107. return text, [], [], is_table
  108. else:
  109. is_table = 0
  110. return None, [], [], is_table
  111. except Exception as e:
  112. logging.info("image_preprocess error")
  113. print("image_preprocess", traceback.print_exc())
  114. return [-1], [], [], 0
  115. @get_memory_info.memory_decorator
  116. def picture2text(path, html=False):
  117. logging.info("into picture2text")
  118. try:
  119. # 判断图片中表格
  120. img = cv2.imread(path)
  121. if img is None:
  122. return [-3]
  123. text, column_list, outline_points, is_table = image_preprocess(img, path)
  124. if judge_error_code(text):
  125. return text
  126. if html:
  127. text = add_div(text)
  128. return [text]
  129. except Exception as e:
  130. logging.info("picture2text error!")
  131. print("picture2text", traceback.print_exc())
  132. return [-1]
  133. def get_best_predict_size(image_np, times=64):
  134. sizes = []
  135. for i in range(1, 100):
  136. if i*times <= 3000:
  137. sizes.append(i*times)
  138. sizes.sort(key=lambda x: x, reverse=True)
  139. min_len = 10000
  140. best_height = sizes[0]
  141. for height in sizes:
  142. if abs(image_np.shape[0] - height) < min_len:
  143. min_len = abs(image_np.shape[0] - height)
  144. best_height = height
  145. min_len = 10000
  146. best_width = sizes[0]
  147. for width in sizes:
  148. if abs(image_np.shape[1] - width) < min_len:
  149. min_len = abs(image_np.shape[1] - width)
  150. best_width = width
  151. return best_height, best_width