convert_image.py 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173
  1. import logging
  2. import os
  3. import sys
  4. sys.path.append(os.path.dirname(__file__) + "/../")
  5. from pdfminer.layout import LTLine
  6. import traceback
  7. import cv2
  8. from format_convert import get_memory_info
  9. from format_convert.utils import judge_error_code, add_div, LineTable, get_table_html
  10. from format_convert.table_correct import get_rotated_image
  11. from format_convert.convert_need_interface import from_otr_interface, from_ocr_interface
  12. def image_process(image_np, image_path, use_ocr=True):
  13. from format_convert.convert_tree import _Table, _Sentence
  14. logging.info("into image_preprocess")
  15. try:
  16. # 图片倾斜校正,写入原来的图片路径
  17. g_r_i = get_rotated_image(image_np, image_path)
  18. if g_r_i == [-1]:
  19. return [-1]
  20. # otr需要图片resize, 写入另一个路径
  21. image_np = cv2.imread(image_path)
  22. if image_np is None:
  23. return []
  24. best_h, best_w = get_best_predict_size(image_np)
  25. image_resize = cv2.resize(image_np, (best_w, best_h), interpolation=cv2.INTER_AREA)
  26. # image_resize_path = image_path[:-4] + "_resize" + image_path[-4:]
  27. image_resize_path = image_path.split(".")[0] + "_resize." + image_path.split(".")[-1]
  28. cv2.imwrite(image_resize_path, image_resize)
  29. # 调用otr模型接口
  30. with open(image_resize_path, "rb") as f:
  31. image_bytes = f.read()
  32. list_line = from_otr_interface(image_bytes)
  33. if judge_error_code(list_line):
  34. return list_line
  35. # 将resize后得到的bbox根据比例还原
  36. ratio = (image_np.shape[0]/best_h, image_np.shape[1]/best_w)
  37. for i in range(len(list_line)):
  38. point = list_line[i]
  39. list_line[i] = [int(point[0]*ratio[1]), int(point[1]*ratio[0]),
  40. int(point[2]*ratio[1]), int(point[3]*ratio[0])]
  41. # 调用ocr模型接口
  42. with open(image_path, "rb") as f:
  43. image_bytes = f.read()
  44. text_list, bbox_list = from_ocr_interface(image_bytes, True)
  45. if judge_error_code(text_list):
  46. return text_list
  47. # 调用现成方法形成表格
  48. try:
  49. from format_convert.convert_tree import TableLine
  50. list_lines = []
  51. for line in list_line:
  52. list_lines.append(LTLine(1, (line[0], line[1]), (line[2], line[3])))
  53. from format_convert.convert_tree import TextBox
  54. list_text_boxes = []
  55. for i in range(len(bbox_list)):
  56. bbox = bbox_list[i]
  57. b_text = text_list[i]
  58. list_text_boxes.append(TextBox([bbox[0][0], bbox[0][1],
  59. bbox[2][0], bbox[2][1]], b_text))
  60. lt = LineTable()
  61. tables, obj_in_table, _ = lt.recognize_table(list_text_boxes, list_lines, False)
  62. text = [tables, obj_in_table]
  63. column_list = []
  64. obj_list = []
  65. for table in tables:
  66. obj_list.append(_Table(table["table"], table["bbox"]))
  67. for text_box in list_text_boxes:
  68. if text_box not in obj_in_table:
  69. obj_list.append(_Sentence(text_box.get_text(), text_box.bbox))
  70. return obj_list
  71. except:
  72. traceback.print_exc()
  73. return [-8]
  74. except Exception as e:
  75. logging.info("image_preprocess error")
  76. print("image_preprocess", traceback.print_exc())
  77. return [-1]
  78. @get_memory_info.memory_decorator
  79. def picture2text(path, html=False):
  80. logging.info("into picture2text")
  81. try:
  82. # 判断图片中表格
  83. img = cv2.imread(path)
  84. if img is None:
  85. return [-3]
  86. text = image_process(img, path)
  87. if judge_error_code(text):
  88. return text
  89. if html:
  90. text = add_div(text)
  91. return [text]
  92. except Exception as e:
  93. logging.info("picture2text error!")
  94. print("picture2text", traceback.print_exc())
  95. return [-1]
  96. def get_best_predict_size(image_np, times=64):
  97. sizes = []
  98. for i in range(1, 100):
  99. if i*times <= 3000:
  100. sizes.append(i*times)
  101. sizes.sort(key=lambda x: x, reverse=True)
  102. min_len = 10000
  103. best_height = sizes[0]
  104. for height in sizes:
  105. if abs(image_np.shape[0] - height) < min_len:
  106. min_len = abs(image_np.shape[0] - height)
  107. best_height = height
  108. min_len = 10000
  109. best_width = sizes[0]
  110. for width in sizes:
  111. if abs(image_np.shape[1] - width) < min_len:
  112. min_len = abs(image_np.shape[1] - width)
  113. best_width = width
  114. return best_height, best_width
  115. class ImageConvert:
  116. def __init__(self, path, unique_type_dir):
  117. from format_convert.convert_tree import _Document
  118. self._doc = _Document(path)
  119. self.path = path
  120. self.unique_type_dir = unique_type_dir
  121. def init_package(self):
  122. # 各个包初始化
  123. try:
  124. with open(self.path, "rb") as f:
  125. self.image = f.read()
  126. except:
  127. logging.info("cannot open image!")
  128. traceback.print_exc()
  129. self._doc.error_code = [-3]
  130. def convert(self):
  131. from format_convert.convert_tree import _Page, _Image
  132. self.init_package()
  133. if self._doc.error_code is not None:
  134. return
  135. _page = _Page(None, 0)
  136. _image = _Image(self.image, self.path)
  137. _page.add_child(_image)
  138. self._doc.add_child(_page)
  139. def get_html(self):
  140. try:
  141. self.convert()
  142. except:
  143. traceback.print_exc()
  144. self._doc.error_code = [-1]
  145. if self._doc.error_code is not None:
  146. return self._doc.error_code
  147. return self._doc.get_html()