table_line_new.py 45 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160
  1. import copy
  2. import time
  3. import traceback
  4. import numpy as np
  5. import cv2
  6. import matplotlib.pyplot as plt
  7. from format_convert.utils import log, pil_resize
  8. def table_line(img, model, size=(512, 1024), prob=0.2, is_test=0):
  9. log("into table_line, prob is " + str(prob))
  10. # resize
  11. w, h = size
  12. img_new = pil_resize(img, h, w)
  13. img_show = copy.deepcopy(img_new)
  14. # predict
  15. start_time = time.time()
  16. pred = model.predict(np.array([img_new]))
  17. pred = pred[0]
  18. log("otr model predict time " + str(time.time() - start_time))
  19. # show
  20. show(pred, title='pred', prob=prob, mode=1, is_test=is_test)
  21. # 根据点获取线
  22. start_time = time.time()
  23. line_list = points2lines(pred, False, prob=prob)
  24. log("points2lines " + str(time.time() - start_time))
  25. if not line_list:
  26. return []
  27. show(line_list, title="points2lines", mode=2, is_test=is_test)
  28. # 清除短线
  29. start_time = time.time()
  30. line_list = delete_short_lines(line_list, img_new.shape)
  31. show(line_list, title="delete_short_lines", mode=2, is_test=is_test)
  32. log("delete_short_lines " + str(time.time() - start_time))
  33. # 分成横竖线
  34. start_time = time.time()
  35. row_line_list = []
  36. col_line_list = []
  37. for line in line_list:
  38. if line[0] == line[2]:
  39. col_line_list.append(line)
  40. elif line[1] == line[3]:
  41. row_line_list.append(line)
  42. log("divide rows and cols " + str(time.time() - start_time))
  43. # 两种线都需要存在,否则跳过
  44. if not row_line_list or not col_line_list:
  45. return []
  46. # 合并错开线
  47. start_time = time.time()
  48. row_line_list = merge_line(row_line_list, axis=0)
  49. col_line_list = merge_line(col_line_list, axis=1)
  50. show(row_line_list + col_line_list, title="merge_line", mode=2, is_test=is_test)
  51. log("merge_line " + str(time.time() - start_time))
  52. # 计算交点
  53. cross_points = get_points(row_line_list, col_line_list, (img_new.shape[0], img_new.shape[1]))
  54. if not cross_points:
  55. return []
  56. # 删除无交点线 需重复两次才删的干净
  57. row_line_list, col_line_list = delete_single_lines(row_line_list, col_line_list, cross_points)
  58. cross_points = get_points(row_line_list, col_line_list, (img_new.shape[0], img_new.shape[1]))
  59. row_line_list, col_line_list = delete_single_lines(row_line_list, col_line_list, cross_points)
  60. if not row_line_list or not col_line_list:
  61. return []
  62. # 多个表格分割线,获取多个表格区域
  63. start_time = time.time()
  64. split_lines, split_y = get_split_line(cross_points, col_line_list, img_new)
  65. area_row_line_list, area_col_line_list, area_point_list = get_split_area(split_y, row_line_list, col_line_list, cross_points)
  66. log("get_split_area " + str(time.time() - start_time))
  67. # 根据区域循环
  68. need_split_flag = False
  69. for i in range(len(area_point_list)):
  70. sub_row_line_list = area_row_line_list[i]
  71. sub_col_line_list = area_col_line_list[i]
  72. sub_point_list = area_point_list[i]
  73. # 修复边框
  74. start_time = time.time()
  75. new_rows, new_cols, long_rows, long_cols = fix_outline(img_new,
  76. sub_row_line_list,
  77. sub_col_line_list,
  78. sub_point_list)
  79. # 如有补线
  80. if new_rows or new_cols:
  81. # 连接至补线的延长线
  82. if long_rows:
  83. sub_row_line_list = long_rows
  84. if long_cols:
  85. sub_col_line_list = long_cols
  86. # 新的补线
  87. if new_rows:
  88. sub_row_line_list += new_rows
  89. if new_cols:
  90. sub_col_line_list += new_cols
  91. need_split_flag = True
  92. area_row_line_list[i] = sub_row_line_list
  93. area_col_line_list[i] = sub_col_line_list
  94. row_line_list = [y for x in area_row_line_list for y in x]
  95. col_line_list = [y for x in area_col_line_list for y in x]
  96. if need_split_flag:
  97. # 修复边框后重新计算交点
  98. cross_points = get_points(row_line_list, col_line_list, (img_new.shape[0], img_new.shape[1]))
  99. split_lines, split_y = get_split_line(cross_points, col_line_list, img_new)
  100. area_row_line_list, area_col_line_list, area_point_list = get_split_area(split_y, row_line_list, col_line_list, cross_points)
  101. show(cross_points, title="get_points", img=img_show, mode=4, is_test=is_test)
  102. show(split_lines, title="split_lines", img=img_show, mode=3, is_test=is_test)
  103. show(row_line_list + col_line_list, title="fix_outline", mode=2, is_test=is_test)
  104. log("fix_outline " + str(time.time() - start_time))
  105. # 根据区域循环
  106. for i in range(len(area_point_list)):
  107. sub_row_line_list = area_row_line_list[i]
  108. sub_col_line_list = area_col_line_list[i]
  109. sub_point_list = area_point_list[i]
  110. # 验证轮廓的4个交点
  111. sub_row_line_list, sub_col_line_list = fix_4_points(sub_point_list, sub_row_line_list, sub_col_line_list)
  112. # 把四个边线在加一次
  113. sub_point_list = get_points(sub_row_line_list, sub_col_line_list, (img_new.shape[0], img_new.shape[1]))
  114. sub_row_line_list, sub_col_line_list = add_outline(sub_point_list, sub_row_line_list, sub_col_line_list)
  115. # 修复内部缺线
  116. start_time = time.time()
  117. sub_row_line_list, sub_col_line_list = fix_inner(sub_row_line_list, sub_col_line_list, sub_point_list)
  118. log("fix_inner " + str(time.time() - start_time))
  119. show(sub_row_line_list + sub_col_line_list, title="fix_inner1", mode=2, is_test=is_test)
  120. # 合并错开
  121. start_time = time.time()
  122. sub_row_line_list = merge_line(sub_row_line_list, axis=0)
  123. sub_col_line_list = merge_line(sub_col_line_list, axis=1)
  124. log("merge_line " + str(time.time() - start_time))
  125. show(sub_row_line_list + sub_col_line_list, title="merge_line", mode=2, is_test=is_test)
  126. # 修复内部线后重新计算交点
  127. start_time = time.time()
  128. cross_points = get_points(sub_row_line_list, sub_col_line_list, (img_new.shape[0], img_new.shape[1]))
  129. show(cross_points, title="get_points3", img=img_show, mode=4, is_test=is_test)
  130. # 消除线突出,获取标准的线
  131. area_row_line_list[i], area_col_line_list[i] = get_standard_lines(sub_row_line_list, sub_col_line_list)
  132. show(area_row_line_list[i] + area_col_line_list[i], title="get_standard_lines", mode=2, is_test=is_test)
  133. row_line_list = [y for x in area_row_line_list for y in x]
  134. col_line_list = [y for x in area_col_line_list for y in x]
  135. line_list = row_line_list + col_line_list
  136. # 打印处理后线
  137. show(line_list, title="all", img=img_show, mode=5, is_test=is_test)
  138. log("otr postprocess table_line " + str(time.time() - start_time))
  139. return line_list
  140. def table_line_pdf(line_list, page_w, page_h, is_test=0):
  141. for i, line in enumerate(line_list):
  142. line_list[i] = [int(x) for x in line]
  143. img_new = np.full([int(page_h+1), int(page_w+1), 3], 255, dtype=np.uint8)
  144. img_show = copy.deepcopy(img_new)
  145. # 分成横竖线
  146. start_time = time.time()
  147. row_line_list = []
  148. col_line_list = []
  149. for line in line_list:
  150. if line[0] == line[2]:
  151. col_line_list.append(line)
  152. elif line[1] == line[3]:
  153. row_line_list.append(line)
  154. log("pdf divide rows and cols " + str(time.time() - start_time))
  155. show(row_line_list + col_line_list, title="divide", mode=2, is_test=is_test)
  156. # 两种线都需要存在,否则跳过
  157. if not row_line_list or not col_line_list:
  158. return []
  159. # 计算交点
  160. cross_points = get_points(row_line_list, col_line_list, (img_new.shape[0], img_new.shape[1]))
  161. if not cross_points:
  162. return []
  163. show(cross_points, title="get_points", img=img_show, mode=4, is_test=is_test)
  164. # 多个表格分割线,获取多个表格区域
  165. start_time = time.time()
  166. split_lines, split_y = get_split_line(cross_points, col_line_list, img_new)
  167. area_row_line_list, area_col_line_list, area_point_list = get_split_area(split_y, row_line_list, col_line_list, cross_points)
  168. log("pdf get_split_area " + str(time.time() - start_time))
  169. show(split_lines, title="split_lines", img=img_show, mode=3, is_test=is_test)
  170. # 根据区域循环
  171. need_split_flag = False
  172. for i in range(len(area_point_list)):
  173. sub_row_line_list = area_row_line_list[i]
  174. sub_col_line_list = area_col_line_list[i]
  175. sub_point_list = area_point_list[i]
  176. # 修复边框
  177. start_time = time.time()
  178. new_rows, new_cols, long_rows, long_cols = fix_outline(img_new,
  179. sub_row_line_list,
  180. sub_col_line_list,
  181. sub_point_list)
  182. # 如有补线
  183. if new_rows or new_cols:
  184. # 连接至补线的延长线
  185. if long_rows:
  186. sub_row_line_list = long_rows
  187. if long_cols:
  188. sub_col_line_list = long_cols
  189. # 新的补线
  190. if new_rows:
  191. sub_row_line_list += new_rows
  192. if new_cols:
  193. sub_col_line_list += new_cols
  194. need_split_flag = True
  195. area_row_line_list[i] = sub_row_line_list
  196. area_col_line_list[i] = sub_col_line_list
  197. row_line_list = [y for x in area_row_line_list for y in x]
  198. col_line_list = [y for x in area_col_line_list for y in x]
  199. if need_split_flag:
  200. # 修复边框后重新计算交点
  201. cross_points = get_points(row_line_list, col_line_list, (img_new.shape[0], img_new.shape[1]))
  202. split_lines, split_y = get_split_line(cross_points, col_line_list, img_new)
  203. area_row_line_list, area_col_line_list, area_point_list = get_split_area(split_y, row_line_list, col_line_list, cross_points)
  204. log("pdf fix_outline " + str(time.time() - start_time))
  205. # 根据区域循环
  206. for i in range(len(area_point_list)):
  207. sub_row_line_list = area_row_line_list[i]
  208. sub_col_line_list = area_col_line_list[i]
  209. sub_point_list = area_point_list[i]
  210. # 修复内部缺线
  211. start_time = time.time()
  212. sub_row_line_list, sub_col_line_list = fix_inner(sub_row_line_list, sub_col_line_list, sub_point_list)
  213. log("pdf fix_inner " + str(time.time() - start_time))
  214. show(sub_row_line_list + sub_col_line_list, title="fix_inner1", mode=2, is_test=is_test)
  215. # 修复内部线后重新计算交点
  216. start_time = time.time()
  217. cross_points = get_points(sub_row_line_list, sub_col_line_list, (img_new.shape[0], img_new.shape[1]))
  218. show(cross_points, title="get_points3", img=img_show, mode=4, is_test=is_test)
  219. row_line_list = [y for x in area_row_line_list for y in x]
  220. col_line_list = [y for x in area_col_line_list for y in x]
  221. line_list = row_line_list + col_line_list
  222. # 打印处理后线
  223. show(line_list, title="all", img=img_show, mode=5, is_test=is_test)
  224. log("pdf otr postprocess table_line " + str(time.time() - start_time))
  225. return line_list
  226. def show(pred_or_lines, title='', prob=0.2, img=None, mode=1, is_test=0):
  227. if not is_test:
  228. return
  229. if mode == 1:
  230. plt.figure()
  231. plt.title(title)
  232. _array = []
  233. for _h in range(len(pred_or_lines)):
  234. _line = []
  235. for _w in range(len(pred_or_lines[_h])):
  236. _prob = pred_or_lines[_h][_w]
  237. if _prob[0] > prob:
  238. _line.append((0, 0, 255))
  239. elif _prob[1] > prob:
  240. _line.append((255, 0, 0))
  241. else:
  242. _line.append((255, 255, 255))
  243. _array.append(_line)
  244. # plt.axis('off')
  245. plt.imshow(np.array(_array))
  246. plt.show()
  247. elif mode == 2:
  248. plt.figure()
  249. plt.title(title)
  250. for _line in pred_or_lines:
  251. x0, y0, x1, y1 = _line
  252. plt.plot([x0, x1], [y0, y1])
  253. plt.show()
  254. elif mode == 3:
  255. for _line in pred_or_lines:
  256. x0, y0 = _line[0]
  257. x1, y1 = _line[1]
  258. cv2.line(img, [int(x0), int(y0)], [int(x1), int(y1)], (0, 0, 255), 2)
  259. cv2.namedWindow(title, cv2.WINDOW_NORMAL)
  260. cv2.imshow(title, img)
  261. cv2.waitKey(0)
  262. elif mode == 4:
  263. for point in pred_or_lines:
  264. point = [int(x) for x in point]
  265. cv2.circle(img, (point[0], point[1]), 1, (0, 255, 0), 2)
  266. cv2.namedWindow(title, cv2.WINDOW_NORMAL)
  267. cv2.imshow(title, img)
  268. cv2.waitKey(0)
  269. elif mode == 5:
  270. for _line in pred_or_lines:
  271. x0, y0, x1, y1 = _line
  272. cv2.line(img, [int(x0), int(y0)], [int(x1), int(y1)], (0, 255, 0), 2)
  273. cv2.namedWindow(title, cv2.WINDOW_NORMAL)
  274. cv2.imshow(title, img)
  275. cv2.waitKey(0)
  276. def points2lines(pred, sourceP_LB=True, prob=0.2, line_width=8, padding=3, min_len=10,
  277. cell_width=13):
  278. _time = time.time()
  279. log("starting points2lines")
  280. height = len(pred)
  281. width = len(pred[0])
  282. _sum = list(np.sum(np.array((pred[..., 0] > prob)).astype(int), axis=1))
  283. h_index = -1
  284. h_lines = []
  285. v_lines = []
  286. _step = line_width
  287. while 1:
  288. h_index += 1
  289. if h_index >= height:
  290. break
  291. w_index = -1
  292. if sourceP_LB:
  293. h_i = height - 1 - h_index
  294. else:
  295. h_i = h_index
  296. _start = None
  297. if _sum[h_index] < min_len:
  298. continue
  299. last_back = 0
  300. while 1:
  301. if w_index >= width:
  302. if _start is not None:
  303. _end = w_index - 1
  304. _bbox = [_start, h_i, _end, h_i]
  305. _dict = {"bbox": _bbox}
  306. h_lines.append(_dict)
  307. _start = None
  308. break
  309. _h, _v = pred[h_i][w_index]
  310. if _h > prob:
  311. if _start is None:
  312. _start = w_index
  313. w_index += _step
  314. else:
  315. if _start is not None:
  316. _end = w_index - 1
  317. _bbox = [_start, h_i, _end, h_i]
  318. _dict = {"bbox": _bbox}
  319. h_lines.append(_dict)
  320. _start = None
  321. w_index -= _step // 2
  322. if w_index <= last_back:
  323. w_index = last_back + _step // 2
  324. last_back = w_index
  325. log("starting points2lines 1")
  326. w_index = -1
  327. _sum = list(np.sum(np.array((pred[..., 1] > prob)).astype(int), axis=0))
  328. _step = line_width
  329. while 1:
  330. w_index += 1
  331. if w_index >= width:
  332. break
  333. if _sum[w_index] < min_len:
  334. continue
  335. h_index = -1
  336. _start = None
  337. last_back = 0
  338. list_test = []
  339. list_lineprob = []
  340. while 1:
  341. if h_index >= height:
  342. if _start is not None:
  343. _end = last_h
  344. _bbox = [w_index, _start, w_index, _end]
  345. _dict = {"bbox": _bbox}
  346. v_lines.append(_dict)
  347. _start = None
  348. list_test.append(_dict)
  349. break
  350. if sourceP_LB:
  351. h_i = height - 1 - h_index
  352. else:
  353. h_i = h_index
  354. _h, _v = pred[h_index][w_index]
  355. list_lineprob.append((h_index, _v))
  356. if _v > prob:
  357. if _start is None:
  358. _start = h_i
  359. h_index += _step
  360. else:
  361. if _start is not None:
  362. _end = last_h
  363. _bbox = [w_index, _start, w_index, _end]
  364. _dict = {"bbox": _bbox}
  365. v_lines.append(_dict)
  366. _start = None
  367. list_test.append(_dict)
  368. h_index -= _step // 2
  369. if h_index <= last_back:
  370. h_index = last_back + _step // 2
  371. last_back = h_index
  372. last_h = h_i
  373. log("starting points2lines 2")
  374. for _line in h_lines:
  375. _bbox = _line["bbox"]
  376. _bbox = [max(_bbox[0] - 2, 0), (_bbox[1] + _bbox[3]) / 2, _bbox[2] + 2, (_bbox[1] + _bbox[3]) / 2]
  377. _line["bbox"] = _bbox
  378. for _line in v_lines:
  379. _bbox = _line["bbox"]
  380. _bbox = [(_bbox[0] + _bbox[2]) / 2, max(_bbox[1] - 2, 0), (_bbox[0] + _bbox[2]) / 2, _bbox[3] + 2]
  381. _line["bbox"] = _bbox
  382. h_lines = lines_cluster(h_lines, line_width=line_width)
  383. v_lines = lines_cluster(v_lines, line_width=line_width)
  384. list_line = []
  385. for _line in h_lines:
  386. _bbox = _line["bbox"]
  387. _bbox = [max(_bbox[0] - 1, 0), (_bbox[1] + _bbox[3]) / 2, _bbox[2] + 1, (_bbox[1] + _bbox[3]) / 2]
  388. list_line.append(_bbox)
  389. for _line in v_lines:
  390. _bbox = _line["bbox"]
  391. _bbox = [(_bbox[0] + _bbox[2]) / 2, max(_bbox[1] - 1, 0), (_bbox[0] + _bbox[2]) / 2, _bbox[3] + 1]
  392. list_line.append(_bbox)
  393. log("points2lines cost %.2fs" % (time.time() - _time))
  394. # import matplotlib.pyplot as plt
  395. # plt.figure()
  396. # for _line in list_line:
  397. # x0,y0,x1,y1 = _line
  398. # plt.plot([x0,x1],[y0,y1])
  399. # for _line in list_line:
  400. # x0,y0,x1,y1 = _line.bbox
  401. # plt.plot([x0,x1],[y0,y1])
  402. # for point in list_crosspoints:
  403. # plt.scatter(point.get("point")[0],point.get("point")[1])
  404. # plt.show()
  405. return list_line
  406. def lines_cluster(list_lines, line_width):
  407. after_len = 0
  408. prelength = len(list_lines)
  409. append_width = line_width // 2
  410. while 1:
  411. c_lines = []
  412. first_len = after_len
  413. for _line in list_lines:
  414. bbox = _line["bbox"]
  415. _find = False
  416. for c_l_i in range(len(c_lines)):
  417. c_l = c_lines[len(c_lines) - c_l_i - 1]
  418. bbox1 = c_l["bbox"]
  419. bboxa = [max(0, bbox[0] - append_width), max(0, bbox[1] - append_width), bbox[2] + append_width,
  420. bbox[3] + append_width]
  421. bboxb = [max(0, bbox1[0] - append_width), max(0, bbox1[1] - append_width), bbox1[2] + append_width,
  422. bbox1[3] + append_width]
  423. _iou = getIOU(bboxa, bboxb)
  424. if _iou > 0:
  425. new_bbox = [min(bbox[0], bbox[2], bbox1[0], bbox1[2]), min(bbox[1], bbox[3], bbox1[1], bbox1[3]),
  426. max(bbox[0], bbox[2], bbox1[0], bbox1[2]), max(bbox[1], bbox[3], bbox1[1], bbox1[3])]
  427. _find = True
  428. c_l["bbox"] = new_bbox
  429. break
  430. if not _find:
  431. c_lines.append(_line)
  432. after_len = len(c_lines)
  433. if first_len == after_len:
  434. break
  435. list_lines = c_lines
  436. log("cluster lines from %d to %d" % (prelength, len(list_lines)))
  437. return c_lines
  438. def getIOU(bbox0, bbox1):
  439. width = abs(max(bbox0[2], bbox1[2]) - min(bbox0[0], bbox1[0])) - (
  440. abs(bbox0[2] - bbox0[0]) + abs(bbox1[2] - bbox1[0]))
  441. height = abs(max(bbox0[3], bbox1[3]) - min(bbox0[1], bbox1[1])) - (
  442. abs(bbox0[3] - bbox0[1]) + abs(bbox1[3] - bbox1[1]))
  443. if width <= 0 and height <= 0:
  444. iou = abs(width * height / min(abs((bbox0[2] - bbox0[0]) * (bbox0[3] - bbox0[1])),
  445. abs((bbox1[2] - bbox1[0]) * (bbox1[3] - bbox1[1]))))
  446. # print("getIOU", iou)
  447. return iou + 0.1
  448. return 0
  449. def delete_short_lines(list_lines, image_shape, scale=100):
  450. # 排除太短的线
  451. x_min_len = max(5, int(image_shape[0] / scale))
  452. y_min_len = max(5, int(image_shape[1] / scale))
  453. new_list_lines = []
  454. for line in list_lines:
  455. if line[0] == line[2]:
  456. if abs(line[3] - line[1]) >= y_min_len:
  457. # print("y_min_len", abs(line[3] - line[1]), y_min_len)
  458. new_list_lines.append(line)
  459. else:
  460. if abs(line[2] - line[0]) >= x_min_len:
  461. # print("x_min_len", abs(line[2] - line[0]), x_min_len)
  462. new_list_lines.append(line)
  463. return new_list_lines
  464. def delete_single_lines(row_line_list, col_line_list, point_list):
  465. new_col_line_list = []
  466. min_point_cnt = 2
  467. for line in col_line_list:
  468. p_cnt = 0
  469. for p in point_list:
  470. # if line[0] == p[0] and line[1] <= p[1] <= line[3]:
  471. if line[0] == p[0]:
  472. p_cnt += 1
  473. if p_cnt >= min_point_cnt:
  474. new_col_line_list.append(line)
  475. break
  476. new_row_line_list = []
  477. for line in row_line_list:
  478. p_cnt = 0
  479. for p in point_list:
  480. # if line[1] == p[1] and line[0] <= p[0] <= line[2]:
  481. if line[1] == p[1]:
  482. p_cnt += 1
  483. if p_cnt >= min_point_cnt:
  484. new_row_line_list.append(line)
  485. break
  486. return new_row_line_list, new_col_line_list
  487. def merge_line(lines, axis, threshold=5):
  488. """
  489. 解决模型预测一条直线错开成多条直线,合并成一条直线
  490. :param lines: 线条列表
  491. :param axis: 0:横线 1:竖线
  492. :param threshold: 两条线间像素差阈值
  493. :return: 合并后的线条列表
  494. """
  495. # 任意一条line获取该合并的line,横线往下找,竖线往右找
  496. lines.sort(key=lambda x: (x[axis], x[1 - axis]))
  497. merged_lines = []
  498. used_lines = []
  499. for line1 in lines:
  500. if line1 in used_lines:
  501. continue
  502. merged_line = [line1]
  503. used_lines.append(line1)
  504. for line2 in lines:
  505. if line2 in used_lines:
  506. continue
  507. if line1[1 - axis] - threshold <= line2[1 - axis] <= line1[1 - axis] + threshold:
  508. # 计算基准长度
  509. min_axis = 10000
  510. max_axis = 0
  511. for line3 in merged_line:
  512. if line3[axis] < min_axis:
  513. min_axis = line3[axis]
  514. if line3[axis + 2] > max_axis:
  515. max_axis = line3[axis + 2]
  516. # 判断两条线有无交集
  517. if min_axis <= line2[axis] <= max_axis \
  518. or min_axis <= line2[axis + 2] <= max_axis:
  519. merged_line.append(line2)
  520. used_lines.append(line2)
  521. if merged_line:
  522. merged_lines.append(merged_line)
  523. # 合并line
  524. result_lines = []
  525. for merged_line in merged_lines:
  526. # 获取line宽的平均值
  527. axis_average = 0
  528. for line in merged_line:
  529. axis_average += line[1 - axis]
  530. axis_average = int(axis_average / len(merged_line))
  531. # 获取最长line两端
  532. merged_line.sort(key=lambda x: (x[axis]))
  533. axis_start = merged_line[0][axis]
  534. merged_line.sort(key=lambda x: (x[axis + 2]))
  535. axis_end = merged_line[-1][axis + 2]
  536. if axis:
  537. result_lines.append([axis_average, axis_start, axis_average, axis_end])
  538. else:
  539. result_lines.append([axis_start, axis_average, axis_end, axis_average])
  540. return result_lines
  541. def get_points(row_lines, col_lines, image_size):
  542. # 创建空图
  543. row_img = np.zeros(image_size, np.uint8)
  544. col_img = np.zeros(image_size, np.uint8)
  545. # 画线
  546. threshold = 5
  547. for row in row_lines:
  548. cv2.line(row_img, (int(row[0] - threshold), int(row[1])), (int(row[2] + threshold), int(row[3])), (255, 255, 255), 1)
  549. for col in col_lines:
  550. cv2.line(col_img, (int(col[0]), int(col[1] - threshold)), (int(col[2]), int(col[3] + threshold)), (255, 255, 255), 1)
  551. # 求出交点
  552. point_img = np.bitwise_and(row_img, col_img)
  553. # cv2.imwrite("get_points.jpg", row_img+col_img)
  554. # cv2.imshow("get_points", row_img+col_img)
  555. # cv2.waitKey(0)
  556. # 识别黑白图中的白色交叉点,将横纵坐标取出
  557. ys, xs = np.where(point_img > 0)
  558. points = []
  559. for i in range(len(xs)):
  560. points.append((xs[i], ys[i]))
  561. points.sort(key=lambda x: (x[0], x[1]))
  562. return points
  563. def fix_outline(image, row_line_list, col_line_list, point_list, scale=25):
  564. log("into fix_outline")
  565. x_min_len = max(10, int(image.shape[0] / scale))
  566. y_min_len = max(10, int(image.shape[1] / scale))
  567. if len(row_line_list) <= 1 or len(col_line_list) <= 1:
  568. return [], [], row_line_list, col_line_list
  569. # 预测线取上下左右4个边(会有超出表格部分) [(), ()]
  570. row_line_list.sort(key=lambda x: (x[1], x[0]))
  571. up_line = row_line_list[0]
  572. bottom_line = row_line_list[-1]
  573. col_line_list.sort(key=lambda x: x[0])
  574. left_line = col_line_list[0]
  575. right_line = col_line_list[-1]
  576. # 计算单格高度宽度
  577. if len(row_line_list) > 1:
  578. height_dict = {}
  579. for j in range(len(row_line_list)):
  580. if j + 1 > len(row_line_list) - 1:
  581. break
  582. height = abs(int(row_line_list[j][3] - row_line_list[j + 1][3]))
  583. if height >= 10:
  584. if height in height_dict.keys():
  585. height_dict[height] = height_dict[height] + 1
  586. else:
  587. height_dict[height] = 1
  588. height_list = [[x, height_dict[x]] for x in height_dict.keys()]
  589. if height_list:
  590. height_list.sort(key=lambda x: (x[1], -x[0]), reverse=True)
  591. # print("box_height", height_list)
  592. box_height = height_list[0][0]
  593. else:
  594. box_height = y_min_len
  595. else:
  596. box_height = y_min_len
  597. if len(col_line_list) > 1:
  598. box_width = abs(col_line_list[1][2] - col_line_list[0][2])
  599. else:
  600. box_width = x_min_len
  601. # 设置轮廓线需超出阈值
  602. if box_height >= 2 * y_min_len:
  603. fix_h_len = y_min_len
  604. else:
  605. fix_h_len = box_height * 2 / 3
  606. if box_width >= 2 * x_min_len:
  607. fix_w_len = x_min_len
  608. else:
  609. fix_w_len = box_width * 2 / 3
  610. # 判断超出部分的长度,超出一定长度就补线
  611. new_row_lines = []
  612. new_col_lines = []
  613. all_longer_row_lines = []
  614. all_longer_col_lines = []
  615. # print('box_height, box_width, fix_h_len, fix_w_len', box_height, box_width, fix_h_len, fix_w_len)
  616. # print('bottom_line, left_line, right_line', bottom_line, left_line, right_line)
  617. # 补左右两条竖线超出来的线的row
  618. if up_line[1] - left_line[1] >= fix_h_len and up_line[1] - right_line[1] >= fix_h_len:
  619. if up_line[1] - left_line[1] >= up_line[1] - right_line[1]:
  620. new_row_lines.append([left_line[0], left_line[1], right_line[0], left_line[1]])
  621. new_col_y = left_line[1]
  622. # 补了row,要将其他短的col连到row上
  623. for j in range(len(col_line_list)):
  624. col = col_line_list[j]
  625. if abs(new_col_y - col[1]) <= box_height:
  626. col_line_list[j][1] = min([new_col_y, col[1]])
  627. else:
  628. new_row_lines.append([left_line[0], right_line[1], right_line[0], right_line[1]])
  629. new_col_y = right_line[1]
  630. # 补了row,要将其他短的col连到row上
  631. for j in range(len(col_line_list)):
  632. col = col_line_list[j]
  633. # 且距离不能相差太大
  634. if abs(new_col_y - col[1]) <= box_height:
  635. col_line_list[j][1] = min([new_col_y, col[1]])
  636. if left_line[3] - bottom_line[3] >= fix_h_len and right_line[3] - bottom_line[3] >= fix_h_len:
  637. if left_line[3] - bottom_line[3] >= right_line[3] - bottom_line[3]:
  638. new_row_lines.append([left_line[2], left_line[3], right_line[2], left_line[3]])
  639. new_col_y = left_line[3]
  640. # 补了row,要将其他短的col连到row上
  641. for j in range(len(col_line_list)):
  642. col = col_line_list[j]
  643. # 且距离不能相差太大
  644. if abs(new_col_y - col[3]) <= box_height:
  645. col_line_list[j][3] = max([new_col_y, col[3]])
  646. else:
  647. new_row_lines.append([left_line[2], right_line[3], right_line[2], right_line[3]])
  648. new_col_y = right_line[3]
  649. # 补了row,要将其他短的col连到row上
  650. for j in range(len(col_line_list)):
  651. col = col_line_list[j]
  652. # 且距离不能相差太大
  653. if abs(new_col_y - col[3]) <= box_height:
  654. col_line_list[j][3] = max([new_col_y, col[3]])
  655. # 补上下两条横线超出来的线的col
  656. if left_line[0] - up_line[0] >= fix_w_len and left_line[0] - bottom_line[0] >= fix_w_len:
  657. if left_line[0] - up_line[0] >= left_line[0] - bottom_line[0]:
  658. new_col_lines.append([up_line[0], up_line[1], up_line[0], bottom_line[1]])
  659. new_row_x = up_line[0]
  660. # 补了col,要将其他短的row连到col上
  661. for j in range(len(row_line_list)):
  662. row = row_line_list[j]
  663. # 且距离不能相差太大
  664. if abs(new_row_x - row[0]) <= box_width:
  665. row_line_list[j][0] = min([new_row_x, row[0]])
  666. else:
  667. new_col_lines.append([bottom_line[0], up_line[1], bottom_line[0], bottom_line[1]])
  668. new_row_x = bottom_line[0]
  669. # 补了col,要将其他短的row连到col上
  670. for j in range(len(row_line_list)):
  671. row = row_line_list[j]
  672. # 且距离不能相差太大
  673. if abs(new_row_x - row[0]) <= box_width:
  674. row_line_list[j][0] = min([new_row_x, row[0]])
  675. if up_line[2] - right_line[2] >= fix_w_len and bottom_line[2] - right_line[2] >= fix_w_len:
  676. if up_line[2] - right_line[2] >= bottom_line[2] - right_line[2]:
  677. new_col_lines.append([up_line[2], up_line[3], up_line[2], bottom_line[3]])
  678. new_row_x = up_line[2]
  679. # 补了col,要将其他短的row连到col上
  680. for j in range(len(row_line_list)):
  681. row = row_line_list[j]
  682. # 且距离不能相差太大
  683. if abs(new_row_x - row[2]) <= box_width:
  684. row_line_list[j][2] = max([new_row_x, row[2]])
  685. else:
  686. new_col_lines.append([bottom_line[2], up_line[3], bottom_line[2], bottom_line[3]])
  687. new_row_x = bottom_line[2]
  688. # 补了col,要将其他短的row连到col上
  689. for j in range(len(row_line_list)):
  690. row = row_line_list[j]
  691. # 且距离不能相差太大
  692. if abs(new_row_x - row[2]) <= box_width:
  693. row_line_list[j][2] = max([new_row_x, row[2]])
  694. all_longer_row_lines += row_line_list
  695. all_longer_col_lines += col_line_list
  696. # print('new_row_lines, new_col_lines', new_row_lines, new_col_lines)
  697. # print('all_longer_row_lines, all_longer_col_lines', all_longer_row_lines, all_longer_col_lines)
  698. return new_row_lines, new_col_lines, all_longer_row_lines, all_longer_col_lines
  699. def fix_inner(row_line_list, col_line_list, point_list):
  700. def fix(fix_lines, assist_lines, split_points, axis):
  701. new_line_point_list = []
  702. delete_line_point_list = []
  703. for line1 in fix_lines:
  704. min_assist_line = [[], []]
  705. min_distance = [1000, 1000]
  706. if_find = [0, 0]
  707. # 获取fix_line中的所有col point,里面可能不包括两个顶点,col point是交点,顶点可能不是交点
  708. fix_line_points = []
  709. for point in split_points:
  710. if abs(point[1 - axis] - line1[1 - axis]) <= 2:
  711. if line1[axis] <= point[axis] <= line1[axis + 2]:
  712. fix_line_points.append(point)
  713. # 找出离两个顶点最近的assist_line, 并且assist_line与fix_line不相交
  714. line1_point = [line1[:2], line1[2:]]
  715. for i in range(2):
  716. point = line1_point[i]
  717. for line2 in assist_lines:
  718. if not if_find[i] and abs(point[axis] - line2[axis]) <= 2:
  719. if line1[1 - axis] <= point[1 - axis] <= line2[1 - axis + 2]:
  720. # print("line1, match line2", line1, line2)
  721. if_find[i] = 1
  722. break
  723. else:
  724. if abs(point[axis] - line2[axis]) < min_distance[i] and line2[1 - axis] <= point[1 - axis] <= \
  725. line2[1 - axis + 2]:
  726. if line1[axis] <= line2[axis] <= line1[axis + 2]:
  727. continue
  728. min_distance[i] = abs(line1[axis] - line2[axis])
  729. min_assist_line[i] = line2
  730. if len(min_assist_line[0]) == 0 and len(min_assist_line[1]) == 0:
  731. continue
  732. # 找出离assist_line最近的交点
  733. min_distance = [1000, 1000]
  734. min_col_point = [[], []]
  735. for i in range(2):
  736. # print("顶点", i, line1_point[i])
  737. if min_assist_line[i]:
  738. for point in fix_line_points:
  739. if abs(point[axis] - min_assist_line[i][axis]) < min_distance[i]:
  740. min_distance[i] = abs(point[axis] - min_assist_line[i][axis])
  741. min_col_point[i] = point
  742. # print("min_col_point", min_col_point)
  743. # print("min_assist_line", min_assist_line)
  744. if len(min_col_point[0]) == 0 and len(min_col_point[1]) == 0:
  745. continue
  746. # 顶点到交点的距离(多出来的线)需大于assist_line到交点的距离(bbox的边)的1/3
  747. # print("line1_point", line1_point)
  748. if min_assist_line[0] and min_assist_line[0] == min_assist_line[1]:
  749. if min_assist_line[0][axis] < line1_point[0][axis]:
  750. bbox_len = abs(min_col_point[0][axis] - min_assist_line[0][axis])
  751. line_distance = abs(min_col_point[0][axis] - line1_point[0][axis])
  752. if bbox_len / 3 <= line_distance <= bbox_len:
  753. if axis == 1:
  754. add_point = (line1_point[0][1 - axis], min_assist_line[0][axis])
  755. else:
  756. add_point = (min_assist_line[0][axis], line1_point[0][1 - axis])
  757. new_line_point_list.append([line1, add_point])
  758. elif min_assist_line[1][axis] > line1_point[1][axis]:
  759. bbox_len = abs(min_col_point[1][axis] - min_assist_line[1][axis])
  760. line_distance = abs(min_col_point[1][axis] - line1_point[1][axis])
  761. if bbox_len / 3 <= line_distance <= bbox_len:
  762. if axis == 1:
  763. add_point = (line1_point[1][1 - axis], min_assist_line[1][axis])
  764. else:
  765. add_point = (min_assist_line[1][axis], line1_point[1][1 - axis])
  766. new_line_point_list.append([line1, add_point])
  767. else:
  768. for i in range(2):
  769. if min_col_point[i]:
  770. bbox_len = abs(min_col_point[i][axis] - min_assist_line[i][axis])
  771. line_distance = abs(min_col_point[i][axis] - line1_point[i][axis])
  772. # print("bbox_len, line_distance", bbox_len, line_distance)
  773. if bbox_len / 3 <= line_distance <= bbox_len:
  774. if axis == 1:
  775. add_point = (line1_point[i][1 - axis], min_assist_line[i][axis])
  776. else:
  777. add_point = (min_assist_line[i][axis], line1_point[i][1 - axis])
  778. new_line_point_list.append([line1, add_point])
  779. return new_line_point_list
  780. row_line_list_copy = copy.deepcopy(row_line_list)
  781. col_line_list_copy = copy.deepcopy(col_line_list)
  782. try:
  783. new_point_list = fix(col_line_list, row_line_list, point_list, axis=1)
  784. for line, new_point in new_point_list:
  785. if line in col_line_list:
  786. index = col_line_list.index(line)
  787. point1 = line[:2]
  788. point2 = line[2:]
  789. if new_point[1] >= point2[1]:
  790. col_line_list[index] = [point1[0], point1[1], new_point[0], new_point[1]]
  791. elif new_point[1] <= point1[1]:
  792. col_line_list[index] = [new_point[0], new_point[1], point2[0], point2[1]]
  793. new_point_list = fix(row_line_list, col_line_list, point_list, axis=0)
  794. for line, new_point in new_point_list:
  795. if line in row_line_list:
  796. index = row_line_list.index(line)
  797. point1 = line[:2]
  798. point2 = line[2:]
  799. if new_point[0] >= point2[0]:
  800. row_line_list[index] = [point1[0], point1[1], new_point[0], new_point[1]]
  801. elif new_point[0] <= point1[0]:
  802. row_line_list[index] = [new_point[0], new_point[1], point2[0], point2[1]]
  803. return row_line_list, col_line_list
  804. except:
  805. traceback.print_exc()
  806. return row_line_list_copy, col_line_list_copy
  807. def fix_4_points(cross_points, row_line_list, col_line_list):
  808. if not (len(row_line_list) >= 2 and len(col_line_list) >= 2):
  809. return row_line_list, col_line_list
  810. cross_points.sort(key=lambda x: (x[0], x[1]))
  811. left_up_p = cross_points[0]
  812. right_down_p = cross_points[-1]
  813. cross_points.sort(key=lambda x: (-x[0], x[1]))
  814. right_up_p = cross_points[0]
  815. left_down_p = cross_points[-1]
  816. # print('left_up_p', left_up_p, 'left_down_p', left_down_p)
  817. # print('right_up_p', right_up_p, 'right_down_p', right_down_p)
  818. min_x = min(left_up_p[0], left_down_p[0], right_down_p[0], right_up_p[0])
  819. max_x = max(left_up_p[0], left_down_p[0], right_down_p[0], right_up_p[0])
  820. min_y = min(left_up_p[1], left_down_p[1], right_down_p[1], right_up_p[1])
  821. max_y = max(left_up_p[1], left_down_p[1], right_down_p[1], right_up_p[1])
  822. if left_up_p[0] != min_x or left_up_p[1] != min_y:
  823. log('轮廓左上角交点有问题')
  824. row_line_list.append([min_x, min_y, max_x, min_y])
  825. col_line_list.append([min_x, min_y, min_x, max_y])
  826. if left_down_p[0] != min_x or left_down_p[1] != max_y:
  827. log('轮廓左下角交点有问题')
  828. row_line_list.append([min_x, max_y, max_x, max_y])
  829. col_line_list.append([min_x, min_y, min_x, max_y])
  830. if right_up_p[0] != max_x or right_up_p[1] != min_y:
  831. log('轮廓右上角交点有问题')
  832. row_line_list.append([min_x, max_y, max_x, max_y])
  833. col_line_list.append([max_x, min_y, max_x, max_y])
  834. if right_down_p[0] != max_x or right_down_p[1] != max_y:
  835. log('轮廓右下角交点有问题')
  836. row_line_list.append([min_x, max_y, max_x, max_y])
  837. col_line_list.append([max_x, min_y, max_x, max_y])
  838. return row_line_list, col_line_list
  839. def get_split_line(points, col_lines, image_np, threshold=5):
  840. # 线贴着边缘无法得到split_y,导致无法分区
  841. for _col in col_lines:
  842. if _col[3] >= image_np.shape[0] - 5:
  843. _col[3] = image_np.shape[0] - 6
  844. if _col[1] <= 0 + 5:
  845. _col[1] = 6
  846. # print("get_split_line", image_np.shape)
  847. points.sort(key=lambda x: (x[1], x[0]))
  848. # 遍历y坐标,并判断y坐标与上一个y坐标是否存在连接线
  849. i = 0
  850. split_line_y = []
  851. for point in points:
  852. # 从已分开的线下面开始判断
  853. if split_line_y:
  854. if point[1] <= split_line_y[-1] + threshold:
  855. last_y = point[1]
  856. continue
  857. if last_y <= split_line_y[-1] + threshold:
  858. last_y = point[1]
  859. continue
  860. if i == 0:
  861. last_y = point[1]
  862. i += 1
  863. continue
  864. current_line = (last_y, point[1])
  865. split_flag = 1
  866. for col in col_lines:
  867. # 只要找到一条col包含就不是分割线
  868. if current_line[0] >= col[1] - 3 and current_line[1] <= col[3] + 3:
  869. split_flag = 0
  870. break
  871. if split_flag:
  872. split_line_y.append(current_line[0] + 5)
  873. split_line_y.append(current_line[1] - 5)
  874. last_y = point[1]
  875. # 加上收尾分割线
  876. points.sort(key=lambda x: (x[1], x[0]))
  877. y_min = points[0][1]
  878. y_max = points[-1][1]
  879. if y_min - threshold < 0:
  880. split_line_y.append(0)
  881. else:
  882. split_line_y.append(y_min - threshold)
  883. if y_max + threshold > image_np.shape[0]:
  884. split_line_y.append(image_np.shape[0])
  885. else:
  886. split_line_y.append(y_max + threshold)
  887. split_line_y = list(set(split_line_y))
  888. # 剔除两条相隔太近分割线
  889. temp_split_line_y = []
  890. split_line_y.sort(key=lambda x: x)
  891. last_y = -20
  892. for y in split_line_y:
  893. if y - last_y >= 20:
  894. temp_split_line_y.append(y)
  895. last_y = y
  896. split_line_y = temp_split_line_y
  897. # 生成分割线
  898. split_line = []
  899. for y in split_line_y:
  900. split_line.append([(0, y), (image_np.shape[1], y)])
  901. split_line.append([(0, 0), (image_np.shape[1], 0)])
  902. split_line.append([(0, image_np.shape[0]), (image_np.shape[1], image_np.shape[0])])
  903. split_line.sort(key=lambda x: x[0][1])
  904. return split_line, split_line_y
  905. def get_split_area(split_y, row_line_list, col_line_list, cross_points):
  906. # 分割线纵坐标
  907. if len(split_y) < 2:
  908. return [], [], []
  909. split_y.sort(key=lambda x: x)
  910. # new_split_y = []
  911. # for i in range(1, len(split_y), 2):
  912. # new_split_y.append(int((split_y[i] + split_y[i - 1]) / 2))
  913. area_row_line_list = []
  914. area_col_line_list = []
  915. area_point_list = []
  916. for i in range(1, len(split_y)):
  917. y = split_y[i]
  918. last_y = split_y[i - 1]
  919. split_row = []
  920. for row in row_line_list:
  921. if last_y <= row[3] <= y:
  922. split_row.append(row)
  923. split_col = []
  924. for col in col_line_list:
  925. if last_y <= col[1] <= y or last_y <= col[3] <= y or col[1] < last_y < y < col[3]:
  926. split_col.append(col)
  927. split_point = []
  928. for point in cross_points:
  929. if last_y <= point[1] <= y:
  930. split_point.append(point)
  931. # 满足条件才能形成表格区域
  932. if len(split_row) >= 2 and len(split_col) >= 2 and len(split_point) >= 4:
  933. # print('len(split_row), len(split_col), len(split_point)', len(split_row), len(split_col), len(split_point))
  934. area_row_line_list.append(split_row)
  935. area_col_line_list.append(split_col)
  936. area_point_list.append(split_point)
  937. return area_row_line_list, area_col_line_list, area_point_list
  938. def get_standard_lines(row_line_list, col_line_list):
  939. new_row_line_list = []
  940. for row in row_line_list:
  941. w1 = row[0]
  942. w2 = row[2]
  943. # 横线的两个顶点分别找到最近的竖线
  944. min_distance = [10000, 10000]
  945. min_dis_w = [None, None]
  946. for col in col_line_list:
  947. if abs(col[0] - w1) < min_distance[0]:
  948. min_distance[0] = abs(col[0] - w1)
  949. min_dis_w[0] = col[0]
  950. if abs(col[0] - w2) < min_distance[1]:
  951. min_distance[1] = abs(col[0] - w2)
  952. min_dis_w[1] = col[0]
  953. if min_dis_w[0] is not None:
  954. row[0] = min_dis_w[0]
  955. if min_dis_w[1] is not None:
  956. row[2] = min_dis_w[1]
  957. new_row_line_list.append(row)
  958. new_col_line_list = []
  959. for col in col_line_list:
  960. h1 = col[1]
  961. h2 = col[3]
  962. # 横线的两个顶点分别找到最近的竖线
  963. min_distance = [10000, 10000]
  964. min_dis_w = [None, None]
  965. for row in row_line_list:
  966. if abs(row[1] - h1) < min_distance[0]:
  967. min_distance[0] = abs(row[1] - h1)
  968. min_dis_w[0] = row[1]
  969. if abs(row[1] - h2) < min_distance[1]:
  970. min_distance[1] = abs(row[1] - h2)
  971. min_dis_w[1] = row[1]
  972. if min_dis_w[0] is not None:
  973. col[1] = min_dis_w[0]
  974. if min_dis_w[1] is not None:
  975. col[3] = min_dis_w[1]
  976. new_col_line_list.append(col)
  977. # all_line_list = []
  978. # # 横线竖线两个维度
  979. # for i in range(2):
  980. # axis = i
  981. # cross_points.sort(key=lambda x: (x[axis], x[1-axis]))
  982. # current_axis = cross_points[0][axis]
  983. # points = []
  984. # line_list = []
  985. # for p in cross_points:
  986. # if p[axis] == current_axis:
  987. # points.append(p)
  988. # else:
  989. # if points:
  990. # line_list.append([points[0][0], points[0][1], points[-1][0], points[-1][1]])
  991. # points = [p]
  992. # current_axis = p[axis]
  993. # if points:
  994. # line_list.append([points[0][0], points[0][1], points[-1][0], points[-1][1]])
  995. # all_line_list.append(line_list)
  996. # new_col_line_list, new_row_line_list = all_line_list
  997. return new_col_line_list, new_row_line_list
  998. def add_outline(cross_points, row_line_list, col_line_list):
  999. cross_points.sort(key=lambda x: (x[0], x[1]))
  1000. left_up_p = cross_points[0]
  1001. right_down_p = cross_points[-1]
  1002. row_line_list.append([left_up_p[0], left_up_p[1], right_down_p[0], left_up_p[1]])
  1003. row_line_list.append([left_up_p[0], right_down_p[1], right_down_p[0], right_down_p[1]])
  1004. col_line_list.append([left_up_p[0], left_up_p[1], left_up_p[0], right_down_p[1]])
  1005. col_line_list.append([right_down_p[0], left_up_p[1], right_down_p[0], right_down_p[1]])
  1006. return row_line_list, col_line_list