convert_image.py 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870
  1. # encoding=utf8
  2. import copy
  3. import inspect
  4. import io
  5. import logging
  6. import os
  7. import sys
  8. import time
  9. import requests
  10. import numpy as np
  11. from PIL import Image
  12. sys.path.append(os.path.dirname(__file__) + "/../")
  13. from pdfminer.layout import LTLine
  14. import traceback
  15. import cv2
  16. from isr.pre_process import count_red_pixel
  17. from format_convert.utils import judge_error_code, add_div, LineTable, get_table_html, get_logger, log, \
  18. memory_decorator, pil_resize, np2bytes, ocr_cant_read
  19. from format_convert.convert_need_interface import from_otr_interface, from_ocr_interface, from_gpu_interface_redis, \
  20. from_idc_interface, from_isr_interface
  21. from format_convert.table_correct import get_rotated_image
  22. def image_process(image_np, image_path, is_from_pdf=False, is_from_docx=False, use_ocr=True):
  23. from format_convert.convert_tree import _Table, _Sentence
  24. def get_cluster(t_list, b_list, axis):
  25. zip_list = list(zip(t_list, b_list))
  26. if len(zip_list) == 0:
  27. return t_list, b_list
  28. if len(zip_list[0]) > 0:
  29. zip_list.sort(key=lambda x: x[1][axis][1])
  30. cluster_list = []
  31. margin = 5
  32. for text, bbox in zip_list:
  33. _find = 0
  34. for cluster in cluster_list:
  35. if abs(cluster[1] - bbox[axis][1]) <= margin:
  36. cluster[0].append([text, bbox])
  37. cluster[1] = bbox[axis][1]
  38. _find = 1
  39. break
  40. if not _find:
  41. cluster_list.append([[[text, bbox]], bbox[axis][1]])
  42. new_text_list = []
  43. new_bbox_list = []
  44. for cluster in cluster_list:
  45. # print("=============convert_image")
  46. # print("cluster_list", cluster)
  47. center_y = 0
  48. for text, bbox in cluster[0]:
  49. center_y += bbox[axis][1]
  50. center_y = int(center_y / len(cluster[0]))
  51. for text, bbox in cluster[0]:
  52. bbox[axis][1] = center_y
  53. new_text_list.append(text)
  54. new_bbox_list.append(bbox)
  55. # print("cluster_list", cluster)
  56. return new_text_list, new_bbox_list
  57. def merge_textbox(textbox_list, in_objs):
  58. delete_obj = []
  59. threshold = 5
  60. textbox_list.sort(key=lambda x:x.bbox[0])
  61. for k in range(len(textbox_list)):
  62. tb1 = textbox_list[k]
  63. if tb1 not in in_objs and tb1 not in delete_obj:
  64. for m in range(k+1, len(textbox_list)):
  65. tb2 = textbox_list[m]
  66. if tb2 in in_objs:
  67. continue
  68. if abs(tb1.bbox[1]-tb2.bbox[1]) <= threshold \
  69. and abs(tb1.bbox[3]-tb2.bbox[3]) <= threshold:
  70. if tb1.bbox[0] <= tb2.bbox[0]:
  71. tb1.text = tb1.text + tb2.text
  72. else:
  73. tb1.text = tb2.text + tb1.text
  74. tb1.bbox[0] = min(tb1.bbox[0], tb2.bbox[0])
  75. tb1.bbox[2] = max(tb1.bbox[2], tb2.bbox[2])
  76. delete_obj.append(tb2)
  77. for _obj in delete_obj:
  78. if _obj in textbox_list:
  79. textbox_list.remove(_obj)
  80. return textbox_list
  81. def idc_process(_image_np):
  82. # 图片倾斜校正,写入原来的图片路径
  83. # print("image_process", image_path)
  84. # g_r_i = get_rotated_image(_image_np, image_path)
  85. # if judge_error_code(g_r_i):
  86. # if is_from_docx:
  87. # return []
  88. # else:
  89. # return g_r_i
  90. # _image_np = cv2.imread(image_path)
  91. # if _image_np is None:
  92. # return []
  93. # return _image_np
  94. # if _image_np is None:
  95. # return []
  96. # idc模型实现图片倾斜校正
  97. h, w = get_best_predict_size2(_image_np, 1080)
  98. image_resize = pil_resize(_image_np, h, w)
  99. # image_resize_path = image_path.split(".")[0] + "_resize_idc." + image_path.split(".")[-1]
  100. # cv2.imwrite(image_resize_path, image_resize)
  101. # with open(image_resize_path, "rb") as f:
  102. # image_bytes = f.read()
  103. image_bytes = np2bytes(image_resize)
  104. angle = from_idc_interface(image_bytes)
  105. if judge_error_code(angle):
  106. if is_from_docx:
  107. return []
  108. else:
  109. return angle
  110. # 根据角度旋转
  111. image_pil = Image.fromarray(_image_np)
  112. _image_np = np.array(image_pil.rotate(angle, expand=1))
  113. # 写入
  114. # idc_path = image_path.split(".")[0] + "_idc." + image_path.split(".")[-1]
  115. # cv2.imwrite(idc_path, image_np)
  116. return _image_np
  117. def isr_process(_image_np):
  118. image_np_copy = copy.deepcopy(_image_np)
  119. # isr模型去除印章
  120. _isr_time = time.time()
  121. if count_red_pixel(_image_np):
  122. # 红色像素达到一定值才过模型
  123. image_bytes = np2bytes(_image_np)
  124. _image_np = from_isr_interface(image_bytes)
  125. if judge_error_code(_image_np):
  126. if is_from_docx:
  127. return []
  128. else:
  129. return _image_np
  130. # [1]代表检测不到印章,直接返回
  131. if isinstance(_image_np, list) and _image_np == [1]:
  132. log("no seals detected!")
  133. _image_np = image_np_copy
  134. log("isr total time "+str(time.time()-_isr_time))
  135. return _image_np
  136. def ocr_process(_image_np, _threshold=1024):
  137. # ocr图片过大内存溢出,需resize
  138. log("ocr_process image shape " + str(_image_np.shape))
  139. # 大图按比例缩小,小图维持不变;若统一拉伸成固定大小如1024会爆显存
  140. if _image_np.shape[0] >= _threshold or _image_np.shape[1] >= _threshold:
  141. best_h, best_w = get_best_predict_size2(_image_np, 1024)
  142. _image_np = pil_resize(_image_np, best_h, best_w)
  143. log("ocr_process image resize " + str(_image_np.shape))
  144. # 调用ocr模型接口
  145. image_bytes = np2bytes(_image_np)
  146. text_list, bbox_list = from_ocr_interface(image_bytes, is_table=True)
  147. if judge_error_code(text_list):
  148. return text_list, text_list
  149. for i in range(len(bbox_list)):
  150. point = bbox_list[i]
  151. bbox_list[i] = [[int(point[0][0]), int(point[0][1])],
  152. [int(point[1][0]), int(point[1][1])],
  153. [int(point[2][0]), int(point[2][1])],
  154. [int(point[3][0]), int(point[3][1])]]
  155. return text_list, bbox_list
  156. def otr_process(_image_np):
  157. # otr模型识别表格,需要图片resize成模型所需大小, 写入另一个路径
  158. best_h, best_w = get_best_predict_size(_image_np)
  159. image_resize = pil_resize(_image_np, best_h, best_w)
  160. # image_resize_path = image_path.split(".")[0] + "_resize_otr." + image_path.split(".")[-1]
  161. # cv2.imwrite(image_resize_path, image_resize)
  162. # 调用otr模型接口
  163. # with open(image_resize_path, "rb") as f:
  164. # image_bytes = f.read()
  165. image_bytes = np2bytes(image_resize)
  166. list_line = from_otr_interface(image_bytes, is_from_pdf)
  167. if judge_error_code(list_line):
  168. if is_from_docx:
  169. return []
  170. else:
  171. return list_line
  172. # otr resize后得到的bbox根据比例还原
  173. start_time = time.time()
  174. ratio = (_image_np.shape[0]/best_h, _image_np.shape[1]/best_w)
  175. for i in range(len(list_line)):
  176. point = list_line[i]
  177. list_line[i] = [int(point[0]*ratio[1]), int(point[1]*ratio[0]),
  178. int(point[2]*ratio[1]), int(point[3]*ratio[0])]
  179. log("otr resize bbox recover " + str(time.time()-start_time))
  180. return list_line
  181. def table_process(list_line, text_list, bbox_list):
  182. # 调用现成方法形成表格
  183. try:
  184. from format_convert.convert_tree import TableLine
  185. list_lines = []
  186. for line in list_line:
  187. list_lines.append(LTLine(1, (line[0], line[1]), (line[2], line[3])))
  188. from format_convert.convert_tree import TextBox
  189. list_text_boxes = []
  190. for i in range(len(bbox_list)):
  191. bbox = bbox_list[i]
  192. b_text = text_list[i]
  193. list_text_boxes.append(TextBox([bbox[0][0], bbox[0][1],
  194. bbox[2][0], bbox[2][1]], b_text))
  195. # for _textbox in list_text_boxes:
  196. # print("==",_textbox.get_text())
  197. lt = LineTable()
  198. tables, obj_in_table, _ = lt.recognize_table(list_text_boxes, list_lines, False)
  199. # 合并同一行textbox
  200. list_text_boxes = merge_textbox(list_text_boxes, obj_in_table)
  201. return list_text_boxes, tables, obj_in_table
  202. except:
  203. traceback.print_exc()
  204. return [-8], [-8], [-8]
  205. log("into image_preprocess")
  206. try:
  207. if image_np is None:
  208. return []
  209. if image_np.shape[0] <= 20 or image_np.shape[1] <= 20:
  210. return []
  211. # 判断是否需要长图分割
  212. slice_flag = need_image_slice(image_np)
  213. log("need_image_slice " + str(slice_flag) + " " + str(image_np.shape))
  214. idc_flag = False
  215. image_np_list = [image_np]
  216. if slice_flag:
  217. # 方向分类
  218. image_np = idc_process(image_np)
  219. idc_flag = True
  220. if isinstance(image_np, list):
  221. return image_np
  222. # 再判断
  223. if need_image_slice(image_np):
  224. # 长图分割
  225. image_np_list = image_slice_new(image_np)
  226. if len(image_np_list) < 1:
  227. return [-12]
  228. all_obj_list = []
  229. _add_y = 0
  230. for image_np in image_np_list:
  231. print("sub image shape", image_np.shape)
  232. # 整体分辨率限制
  233. threshold = 2000
  234. if image_np.shape[0] > threshold or image_np.shape[1] > threshold:
  235. h, w = get_best_predict_size2(image_np, threshold=threshold)
  236. log("global image resize " + str(image_np.shape[:2]) + " -> " + str(h) + "," + str(w))
  237. image_np = pil_resize(image_np, h, w)
  238. # 印章去除
  239. image_np = isr_process(image_np)
  240. if isinstance(image_np, list):
  241. return image_np
  242. # 文字识别
  243. text_list, box_list = ocr_process(image_np)
  244. if judge_error_code(text_list):
  245. return text_list
  246. # 判断ocr识别是否正确
  247. if ocr_cant_read(text_list, box_list) and not idc_flag:
  248. # 方向分类
  249. image_np = idc_process(image_np)
  250. # cv2.imshow("idc_process", image_np)
  251. # cv2.waitKey(0)
  252. if isinstance(image_np, list):
  253. return image_np
  254. # 文字识别
  255. text_list1, box_list_1 = ocr_process(image_np)
  256. if judge_error_code(text_list1):
  257. return text_list1
  258. # 比较字数
  259. # print("ocr process", len("".join(text_list)), len("".join(text_list1)))
  260. if len("".join(text_list)) < len("".join(text_list1)):
  261. text_list = text_list1
  262. box_list = box_list_1
  263. # 表格识别
  264. line_list = otr_process(image_np)
  265. if judge_error_code(line_list):
  266. return line_list
  267. # 表格生成
  268. text_box_list, table_list, obj_in_table_list = table_process(line_list, text_list, box_list)
  269. if judge_error_code(table_list):
  270. return table_list
  271. # 对象生成
  272. obj_list = []
  273. for table in table_list:
  274. obj_list.append(_Table(table["table"], table["bbox"]))
  275. for text_box in text_box_list:
  276. if text_box not in obj_in_table_list:
  277. obj_list.append(_Sentence(text_box.get_text(), text_box.bbox))
  278. # 修正y
  279. if len(image_np_list) > 1:
  280. list_y = []
  281. for obj in obj_list:
  282. obj.y += _add_y
  283. list_y.append(obj.y)
  284. if len(list_y) > 0:
  285. _add_y = max(list_y)
  286. # 合并
  287. all_obj_list += obj_list
  288. return all_obj_list
  289. except Exception as e:
  290. log("image_preprocess error")
  291. traceback.print_exc()
  292. return [-1]
  293. @memory_decorator
  294. def picture2text(path, html=False):
  295. log("into picture2text")
  296. try:
  297. # 判断图片中表格
  298. img = cv2.imread(path)
  299. if img is None:
  300. return [-3]
  301. text = image_process(img, path)
  302. if judge_error_code(text):
  303. return text
  304. if html:
  305. text = add_div(text)
  306. return [text]
  307. except Exception as e:
  308. log("picture2text error!")
  309. print("picture2text", traceback.print_exc())
  310. return [-1]
  311. def get_best_predict_size(image_np, times=64):
  312. sizes = []
  313. for i in range(1, 100):
  314. if i*times <= 1300:
  315. sizes.append(i*times)
  316. sizes.sort(key=lambda x: x, reverse=True)
  317. min_len = 10000
  318. best_height = sizes[0]
  319. for height in sizes:
  320. if abs(image_np.shape[0] - height) < min_len:
  321. min_len = abs(image_np.shape[0] - height)
  322. best_height = height
  323. min_len = 10000
  324. best_width = sizes[0]
  325. for width in sizes:
  326. if abs(image_np.shape[1] - width) < min_len:
  327. min_len = abs(image_np.shape[1] - width)
  328. best_width = width
  329. return best_height, best_width
  330. def get_best_predict_size2(image_np, threshold=3000):
  331. h, w = image_np.shape[:2]
  332. scale = threshold / max(h, w)
  333. h = int(h * scale)
  334. w = int(w * scale)
  335. return h, w
  336. def image_slice(image_np):
  337. """
  338. slice the image if the height is to large
  339. :return:
  340. """
  341. _sum = np.average(image_np, axis=1)
  342. list_white_line = []
  343. list_ave = list(_sum)
  344. for _i in range(len(list_ave)):
  345. if (list_ave[_i] > 250).all():
  346. list_white_line.append(_i)
  347. set_white_line = set(list_white_line)
  348. width = image_np.shape[1]
  349. height = image_np.shape[0]
  350. list_images = []
  351. _begin = 0
  352. _end = 0
  353. while 1:
  354. if _end > height:
  355. break
  356. _end += width
  357. while 1:
  358. if _begin in set_white_line:
  359. break
  360. if _begin > height:
  361. break
  362. _begin += 1
  363. _image = image_np[_begin:_end, ...]
  364. list_images.append(_image)
  365. _begin = _end
  366. log("image_slice into %d parts" % (len(list_images)))
  367. return list_images
  368. def image_slice_new(image_np):
  369. """
  370. 长图分割
  371. :return:
  372. """
  373. height, width = image_np.shape[:2]
  374. image_origin = copy.deepcopy(image_np)
  375. # 去除黑边
  376. image_np = remove_black_border(image_np)
  377. # 1. 转化成灰度图
  378. image_np = cv2.cvtColor(image_np, cv2.COLOR_BGR2GRAY)
  379. # 2. 二值化
  380. ret, binary = cv2.threshold(image_np, 125, 255, cv2.THRESH_BINARY_INV)
  381. # 3. 膨胀和腐蚀操作的核函数
  382. kernal = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
  383. # 4. 膨胀一次,让轮廓突出
  384. dilation = cv2.dilate(binary, kernal, iterations=1)
  385. # dilation = np.add(np.int0(np.full(dilation.shape, 255)), -1 * np.int0(dilation))
  386. # dilation = np.uint8(dilation)
  387. # cv2.namedWindow("dilation", 0)
  388. # cv2.resizeWindow("dilation", 1000, 800)
  389. # cv2.imshow("dilation", dilation)
  390. # cv2.waitKey(0)
  391. # cv2.imwrite("error.jpg", dilation)
  392. # 按行求平均
  393. width_avg = np.average(np.float32(dilation), axis=1)
  394. zero_index = np.where(width_avg == 0.)[0]
  395. # print(height, width)
  396. # print(width_avg)
  397. # print(width_avg.shape)
  398. # print(zero_index)
  399. # print(zero_index.shape)
  400. # zero_index.sort(key=lambda x: x)
  401. # 截取范围内寻找分割点
  402. max_distance = int(width / 2)
  403. image_list = []
  404. last_h = 0
  405. for i in range(height // width + 1):
  406. h = last_h + width
  407. # 前后的分割点
  408. zero_h_after = zero_index[np.where(zero_index >= h)]
  409. zero_h_before = zero_index[np.where(zero_index <= h)]
  410. # print("last_h, h", last_h, h)
  411. # print("last_h, h", last_h, h)
  412. # print(zero_index.shape)
  413. # print("zero_h_after.shape", zero_h_after.shape)
  414. if zero_h_after.shape[0] == 0:
  415. # 最后一截
  416. last_image = image_origin[last_h:, :, :]
  417. if last_image.shape[0] <= max_distance:
  418. image_list[-1] = np.concatenate([image_list[-1], last_image], axis=0)
  419. else:
  420. image_list.append(last_image)
  421. break
  422. # 分割点距离不能太远
  423. cut_h = zero_h_after.tolist()[0]
  424. if abs(h - cut_h) <= max_distance:
  425. image_list.append(image_origin[last_h:cut_h, :, :])
  426. last_h = cut_h
  427. # 后面找不到往前找
  428. else:
  429. cut_h = zero_h_before.tolist()[-1]
  430. if abs(cut_h - h) <= max_distance:
  431. image_list.append(image_origin[last_h:cut_h, :, :])
  432. last_h = cut_h
  433. # i = 0
  434. # for im in image_list:
  435. # print(im.shape)
  436. # cv2.imwrite("error" + str(i) + ".jpg", im)
  437. # i += 1
  438. # cv2.namedWindow("im", 0)
  439. # cv2.resizeWindow("im", 1000, 800)
  440. # cv2.imshow("im", im)
  441. # cv2.waitKey(0)
  442. log("image_slice into %d parts" % (len(image_list)))
  443. return image_list
  444. def need_image_slice(image_np):
  445. h, w = image_np.shape[:2]
  446. # if h > 3000 and w < 2000:
  447. # return True
  448. if 2. <= h / w and w >= 100:
  449. return True
  450. return False
  451. def remove_black_border(img_np):
  452. try:
  453. # 阈值
  454. threshold = 100
  455. # 转换为灰度图像
  456. gray = cv2.cvtColor(img_np, cv2.COLOR_RGB2GRAY)
  457. # 获取图片尺寸
  458. h, w = gray.shape[:2]
  459. # 无法区分黑色区域超过一半的情况
  460. rowc = gray[:, int(1/2*w)]
  461. colc = gray[int(1/2*h), :]
  462. rowflag = np.argwhere(rowc > threshold)
  463. colflag = np.argwhere(colc > threshold)
  464. left, bottom, right, top = rowflag[0, 0], colflag[-1, 0], rowflag[-1, 0], colflag[0, 0]
  465. # cv2.imshow('remove_black_border', img_np[left:right, top:bottom, :])
  466. # cv2.waitKey()
  467. return img_np[left:right, top:bottom, :]
  468. except:
  469. return img_np
  470. class ImageConvert:
  471. def __init__(self, path, unique_type_dir):
  472. from format_convert.convert_tree import _Document
  473. self._doc = _Document(path)
  474. self.path = path
  475. self.unique_type_dir = unique_type_dir
  476. def init_package(self):
  477. # 各个包初始化
  478. try:
  479. with open(self.path, "rb") as f:
  480. self.image = f.read()
  481. except:
  482. log("cannot open image!")
  483. traceback.print_exc()
  484. self._doc.error_code = [-3]
  485. def convert(self):
  486. from format_convert.convert_tree import _Page, _Image
  487. self.init_package()
  488. if self._doc.error_code is not None:
  489. return
  490. _page = _Page(None, 0)
  491. _image = _Image(self.image, self.path)
  492. _page.add_child(_image)
  493. self._doc.add_child(_page)
  494. def get_html(self):
  495. try:
  496. self.convert()
  497. except:
  498. traceback.print_exc()
  499. self._doc.error_code = [-1]
  500. if self._doc.error_code is not None:
  501. return self._doc.error_code
  502. return self._doc.get_html()
  503. def image_process_old(image_np, image_path, is_from_pdf=False, is_from_docx=False, use_ocr=True):
  504. from format_convert.convert_tree import _Table, _Sentence
  505. def get_cluster(t_list, b_list, axis):
  506. zip_list = list(zip(t_list, b_list))
  507. if len(zip_list) == 0:
  508. return t_list, b_list
  509. if len(zip_list[0]) > 0:
  510. zip_list.sort(key=lambda x: x[1][axis][1])
  511. cluster_list = []
  512. margin = 5
  513. for text, bbox in zip_list:
  514. _find = 0
  515. for cluster in cluster_list:
  516. if abs(cluster[1] - bbox[axis][1]) <= margin:
  517. cluster[0].append([text, bbox])
  518. cluster[1] = bbox[axis][1]
  519. _find = 1
  520. break
  521. if not _find:
  522. cluster_list.append([[[text, bbox]], bbox[axis][1]])
  523. new_text_list = []
  524. new_bbox_list = []
  525. for cluster in cluster_list:
  526. # print("=============convert_image")
  527. # print("cluster_list", cluster)
  528. center_y = 0
  529. for text, bbox in cluster[0]:
  530. center_y += bbox[axis][1]
  531. center_y = int(center_y / len(cluster[0]))
  532. for text, bbox in cluster[0]:
  533. bbox[axis][1] = center_y
  534. new_text_list.append(text)
  535. new_bbox_list.append(bbox)
  536. # print("cluster_list", cluster)
  537. return new_text_list, new_bbox_list
  538. def merge_textbox(textbox_list, in_objs):
  539. delete_obj = []
  540. threshold = 5
  541. textbox_list.sort(key=lambda x:x.bbox[0])
  542. for k in range(len(textbox_list)):
  543. tb1 = textbox_list[k]
  544. if tb1 not in in_objs and tb1 not in delete_obj:
  545. for m in range(k+1, len(textbox_list)):
  546. tb2 = textbox_list[m]
  547. if tb2 in in_objs:
  548. continue
  549. if abs(tb1.bbox[1]-tb2.bbox[1]) <= threshold \
  550. and abs(tb1.bbox[3]-tb2.bbox[3]) <= threshold:
  551. if tb1.bbox[0] <= tb2.bbox[0]:
  552. tb1.text = tb1.text + tb2.text
  553. else:
  554. tb1.text = tb2.text + tb1.text
  555. tb1.bbox[0] = min(tb1.bbox[0], tb2.bbox[0])
  556. tb1.bbox[2] = max(tb1.bbox[2], tb2.bbox[2])
  557. delete_obj.append(tb2)
  558. for _obj in delete_obj:
  559. if _obj in textbox_list:
  560. textbox_list.remove(_obj)
  561. return textbox_list
  562. log("into image_preprocess")
  563. try:
  564. if image_np is None:
  565. return []
  566. # 整体分辨率限制
  567. if image_np.shape[0] > 2000 or image_np.shape[1] > 2000:
  568. h, w = get_best_predict_size2(image_np, threshold=2000)
  569. log("global image resize " + str(image_np.shape[:2]) + " -> " + str(h) + "," + str(w))
  570. image_np = pil_resize(image_np, h, w)
  571. # 图片倾斜校正,写入原来的图片路径
  572. # print("image_process", image_path)
  573. g_r_i = get_rotated_image(image_np, image_path)
  574. if judge_error_code(g_r_i):
  575. if is_from_docx:
  576. return []
  577. else:
  578. return g_r_i
  579. image_np = cv2.imread(image_path)
  580. image_np_copy = copy.deepcopy(image_np)
  581. if image_np is None:
  582. return []
  583. # if image_np is None:
  584. # return []
  585. #
  586. # # idc模型实现图片倾斜校正
  587. # image_resize = pil_resize(image_np, 640, 640)
  588. # image_resize_path = image_path.split(".")[0] + "_resize_idc." + image_path.split(".")[-1]
  589. # cv2.imwrite(image_resize_path, image_resize)
  590. #
  591. # with open(image_resize_path, "rb") as f:
  592. # image_bytes = f.read()
  593. # angle = from_idc_interface(image_bytes)
  594. # if judge_error_code(angle):
  595. # if is_from_docx:
  596. # return []
  597. # else:
  598. # return angle
  599. # # 根据角度旋转
  600. # image_pil = Image.fromarray(image_np)
  601. # image_np = np.array(image_pil.rotate(angle, expand=1))
  602. # # 写入
  603. # idc_path = image_path.split(".")[0] + "_idc." + image_path.split(".")[-1]
  604. # cv2.imwrite(idc_path, image_np)
  605. # isr模型去除印章
  606. _isr_time = time.time()
  607. if count_red_pixel(image_np):
  608. # 红色像素达到一定值才过模型
  609. with open(image_path, "rb") as f:
  610. image_bytes = f.read()
  611. image_np = from_isr_interface(image_bytes)
  612. if judge_error_code(image_np):
  613. if is_from_docx:
  614. return []
  615. else:
  616. return image_np
  617. # [1]代表检测不到印章,直接返回
  618. if isinstance(image_np, list) and image_np == [1]:
  619. log("no seals detected!")
  620. image_np = image_np_copy
  621. else:
  622. isr_path = image_path.split(".")[0] + "_isr." + image_path.split(".")[-1]
  623. cv2.imwrite(isr_path, image_np)
  624. log("isr total time "+str(time.time()-_isr_time))
  625. # otr模型识别表格,需要图片resize成模型所需大小, 写入另一个路径
  626. best_h, best_w = get_best_predict_size(image_np)
  627. # image_resize = cv2.resize(image_np, (best_w, best_h), interpolation=cv2.INTER_AREA)
  628. image_resize = pil_resize(image_np, best_h, best_w)
  629. image_resize_path = image_path.split(".")[0] + "_resize_otr." + image_path.split(".")[-1]
  630. cv2.imwrite(image_resize_path, image_resize)
  631. # 调用otr模型接口
  632. with open(image_resize_path, "rb") as f:
  633. image_bytes = f.read()
  634. list_line = from_otr_interface(image_bytes, is_from_pdf)
  635. if judge_error_code(list_line):
  636. return list_line
  637. # # 预处理
  638. # if is_from_pdf:
  639. # prob = 0.2
  640. # else:
  641. # prob = 0.5
  642. # with open(image_resize_path, "rb") as f:
  643. # image_bytes = f.read()
  644. # img_new, inputs = table_preprocess(image_bytes, prob)
  645. # if type(img_new) is list and judge_error_code(img_new):
  646. # return img_new
  647. # log("img_new.shape " + str(img_new.shape))
  648. #
  649. # # 调用模型运行接口
  650. # _dict = {"inputs": inputs, "md5": _global.get("md5")}
  651. # result = from_gpu_interface(_dict, model_type="otr", predictor_type="")
  652. # if judge_error_code(result):
  653. # logging.error("from_gpu_interface failed! " + str(result))
  654. # raise requests.exceptions.RequestException
  655. #
  656. # pred = result.get("preds")
  657. # gpu_time = result.get("gpu_time")
  658. # log("otr model predict time " + str(gpu_time))
  659. #
  660. # # # 解压numpy
  661. # # decompressed_array = io.BytesIO()
  662. # # decompressed_array.write(pred)
  663. # # decompressed_array.seek(0)
  664. # # pred = np.load(decompressed_array, allow_pickle=True)['arr_0']
  665. # # log("inputs.shape" + str(pred.shape))
  666. #
  667. # 调用gpu共享内存处理
  668. # _dict = {"inputs": inputs, "md5": _global.get("md5")}
  669. # result = from_gpu_share_memory(_dict, model_type="otr", predictor_type="")
  670. # if judge_error_code(result):
  671. # logging.error("from_gpu_interface failed! " + str(result))
  672. # raise requests.exceptions.RequestException
  673. #
  674. # pred = result.get("preds")
  675. # gpu_time = result.get("gpu_time")
  676. # log("otr model predict time " + str(gpu_time))
  677. #
  678. # # 后处理
  679. # list_line = table_postprocess(img_new, pred, prob)
  680. # log("len(list_line) " + str(len(list_line)))
  681. # if judge_error_code(list_line):
  682. # return list_line
  683. # otr resize后得到的bbox根据比例还原
  684. start_time = time.time()
  685. ratio = (image_np.shape[0]/best_h, image_np.shape[1]/best_w)
  686. for i in range(len(list_line)):
  687. point = list_line[i]
  688. list_line[i] = [int(point[0]*ratio[1]), int(point[1]*ratio[0]),
  689. int(point[2]*ratio[1]), int(point[3]*ratio[0])]
  690. log("otr resize bbox recover " + str(time.time()-start_time))
  691. # ocr图片过大内存溢出,需resize
  692. start_time = time.time()
  693. threshold = 3000
  694. ocr_resize_flag = 0
  695. if image_np.shape[0] >= threshold or image_np.shape[1] >= threshold:
  696. ocr_resize_flag = 1
  697. best_h, best_w = get_best_predict_size2(image_np, threshold)
  698. # image_resize = cv2.resize(image_np, (best_w, best_h), interpolation=cv2.INTER_AREA)
  699. image_resize = pil_resize(image_np, best_h, best_w)
  700. log("ocr_process image resize " + str(image_resize.shape))
  701. image_resize_path = image_path.split(".")[0] + "_resize_ocr." + image_path.split(".")[-1]
  702. cv2.imwrite(image_resize_path, image_resize)
  703. log("ocr resize before " + str(time.time()-start_time))
  704. # 调用ocr模型接口
  705. with open(image_resize_path, "rb") as f:
  706. image_bytes = f.read()
  707. text_list, bbox_list = from_ocr_interface(image_bytes, is_table=True)
  708. if judge_error_code(text_list):
  709. return text_list
  710. # # PaddleOCR内部包括预处理,调用模型运行接口,后处理
  711. # paddle_ocr = PaddleOCR(use_angle_cls=True, lang="ch")
  712. # results = paddle_ocr.ocr(image_resize, det=True, rec=True, cls=True)
  713. # # 循环每张图片识别结果
  714. # text_list = []
  715. # bbox_list = []
  716. # for line in results:
  717. # # print("ocr_interface line", line)
  718. # text_list.append(line[-1][0])
  719. # bbox_list.append(line[0])
  720. # if len(text_list) == 0:
  721. # return []
  722. # ocr resize后的bbox还原
  723. if ocr_resize_flag:
  724. ratio = (image_np.shape[0]/best_h, image_np.shape[1]/best_w)
  725. else:
  726. ratio = (1, 1)
  727. for i in range(len(bbox_list)):
  728. point = bbox_list[i]
  729. bbox_list[i] = [[int(point[0][0]*ratio[1]), int(point[0][1]*ratio[0])],
  730. [int(point[1][0]*ratio[1]), int(point[1][1]*ratio[0])],
  731. [int(point[2][0]*ratio[1]), int(point[2][1]*ratio[0])],
  732. [int(point[3][0]*ratio[1]), int(point[3][1]*ratio[0])]]
  733. # 调用现成方法形成表格
  734. try:
  735. from format_convert.convert_tree import TableLine
  736. list_lines = []
  737. for line in list_line:
  738. list_lines.append(LTLine(1, (line[0], line[1]), (line[2], line[3])))
  739. from format_convert.convert_tree import TextBox
  740. list_text_boxes = []
  741. for i in range(len(bbox_list)):
  742. bbox = bbox_list[i]
  743. b_text = text_list[i]
  744. list_text_boxes.append(TextBox([bbox[0][0], bbox[0][1],
  745. bbox[2][0], bbox[2][1]], b_text))
  746. # for _textbox in list_text_boxes:
  747. # print("==",_textbox.get_text())
  748. lt = LineTable()
  749. tables, obj_in_table, _ = lt.recognize_table(list_text_boxes, list_lines, False)
  750. # 合并同一行textbox
  751. list_text_boxes = merge_textbox(list_text_boxes, obj_in_table)
  752. obj_list = []
  753. for table in tables:
  754. obj_list.append(_Table(table["table"], table["bbox"]))
  755. for text_box in list_text_boxes:
  756. if text_box not in obj_in_table:
  757. obj_list.append(_Sentence(text_box.get_text(), text_box.bbox))
  758. return obj_list
  759. except:
  760. traceback.print_exc()
  761. return [-8]
  762. except Exception as e:
  763. log("image_preprocess error")
  764. traceback.print_exc()
  765. return [-1]
  766. if __name__ == "__main__":
  767. image_slice_new(cv2.imread("C:/Users/Administrator/Desktop/test_image/error23.png"))