|
@@ -0,0 +1,1057 @@
|
|
|
+import copy
|
|
|
+import time
|
|
|
+import traceback
|
|
|
+import numpy as np
|
|
|
+import cv2
|
|
|
+import matplotlib.pyplot as plt
|
|
|
+from format_convert.utils import log, pil_resize
|
|
|
+
|
|
|
+
|
|
|
+def table_line(img, model, size=(512, 1024), prob=0.2, is_test=0):
|
|
|
+ log("into table_line, prob is " + str(prob))
|
|
|
+
|
|
|
+ # resize
|
|
|
+ w, h = size
|
|
|
+ img_new = pil_resize(img, h, w)
|
|
|
+ img_show = copy.deepcopy(img_new)
|
|
|
+
|
|
|
+ # predict
|
|
|
+ start_time = time.time()
|
|
|
+ pred = model.predict(np.array([img_new]))
|
|
|
+ pred = pred[0]
|
|
|
+ log("otr model predict time " + str(time.time() - start_time))
|
|
|
+
|
|
|
+ # show
|
|
|
+ show(pred, title='pred', prob=prob, mode=1, is_test=is_test)
|
|
|
+
|
|
|
+ # 根据点获取线
|
|
|
+ start_time = time.time()
|
|
|
+ line_list = points2lines(pred, False, prob=prob)
|
|
|
+ log("points2lines " + str(time.time() - start_time))
|
|
|
+ if not line_list:
|
|
|
+ return []
|
|
|
+ show(line_list, title="points2lines", mode=2, is_test=is_test)
|
|
|
+
|
|
|
+ # 清除短线
|
|
|
+ start_time = time.time()
|
|
|
+ line_list = delete_short_lines(line_list, img_new.shape)
|
|
|
+ show(line_list, title="delete_short_lines", mode=2, is_test=is_test)
|
|
|
+ log("delete_short_lines " + str(time.time() - start_time))
|
|
|
+
|
|
|
+ # 分成横竖线
|
|
|
+ start_time = time.time()
|
|
|
+ row_line_list = []
|
|
|
+ col_line_list = []
|
|
|
+ for line in line_list:
|
|
|
+ if line[0] == line[2]:
|
|
|
+ col_line_list.append(line)
|
|
|
+ elif line[1] == line[3]:
|
|
|
+ row_line_list.append(line)
|
|
|
+ log("divide rows and cols " + str(time.time() - start_time))
|
|
|
+
|
|
|
+ # 两种线都需要存在,否则跳过
|
|
|
+ if not row_line_list or not col_line_list:
|
|
|
+ return []
|
|
|
+
|
|
|
+ # 合并错开线
|
|
|
+ start_time = time.time()
|
|
|
+ row_line_list = merge_line(row_line_list, axis=0)
|
|
|
+ col_line_list = merge_line(col_line_list, axis=1)
|
|
|
+ show(row_line_list + col_line_list, title="merge_line", mode=2, is_test=is_test)
|
|
|
+ log("merge_line " + str(time.time() - start_time))
|
|
|
+
|
|
|
+ # 计算交点
|
|
|
+ cross_points = get_points(row_line_list, col_line_list, (img_new.shape[0], img_new.shape[1]))
|
|
|
+ if not cross_points:
|
|
|
+ return []
|
|
|
+ # 删除无交点线 需重复两次才删的干净
|
|
|
+ row_line_list, col_line_list = delete_single_lines(row_line_list, col_line_list, cross_points)
|
|
|
+ cross_points = get_points(row_line_list, col_line_list, (img_new.shape[0], img_new.shape[1]))
|
|
|
+ row_line_list, col_line_list = delete_single_lines(row_line_list, col_line_list, cross_points)
|
|
|
+ if not row_line_list or not col_line_list:
|
|
|
+ return []
|
|
|
+
|
|
|
+ # 多个表格分割线,获取多个表格区域
|
|
|
+ start_time = time.time()
|
|
|
+ split_lines, split_y = get_split_line(cross_points, col_line_list, img_new)
|
|
|
+ area_row_line_list, area_col_line_list, area_point_list = get_split_area(split_y, row_line_list, col_line_list, cross_points)
|
|
|
+ log("get_split_area " + str(time.time() - start_time))
|
|
|
+
|
|
|
+ # 根据区域循环
|
|
|
+ need_split_flag = False
|
|
|
+ for i in range(len(area_point_list)):
|
|
|
+ sub_row_line_list = area_row_line_list[i]
|
|
|
+ sub_col_line_list = area_col_line_list[i]
|
|
|
+ sub_point_list = area_point_list[i]
|
|
|
+
|
|
|
+ # 修复边框
|
|
|
+ start_time = time.time()
|
|
|
+ new_rows, new_cols, long_rows, long_cols = fix_outline(img_new,
|
|
|
+ sub_row_line_list,
|
|
|
+ sub_col_line_list,
|
|
|
+ sub_point_list)
|
|
|
+
|
|
|
+ # 如有补线
|
|
|
+ if new_rows or new_cols:
|
|
|
+ # 连接至补线的延长线
|
|
|
+ if long_rows:
|
|
|
+ sub_row_line_list = long_rows
|
|
|
+ if long_cols:
|
|
|
+ sub_col_line_list = long_cols
|
|
|
+ # 新的补线
|
|
|
+ if new_rows:
|
|
|
+ sub_row_line_list += new_rows
|
|
|
+ if new_cols:
|
|
|
+ sub_col_line_list += new_cols
|
|
|
+ need_split_flag = True
|
|
|
+ area_row_line_list[i] = sub_row_line_list
|
|
|
+ area_col_line_list[i] = sub_col_line_list
|
|
|
+
|
|
|
+ row_line_list = [y for x in area_row_line_list for y in x]
|
|
|
+ col_line_list = [y for x in area_col_line_list for y in x]
|
|
|
+
|
|
|
+ if need_split_flag:
|
|
|
+ # 修复边框后重新计算交点
|
|
|
+ cross_points = get_points(row_line_list, col_line_list, (img_new.shape[0], img_new.shape[1]))
|
|
|
+ split_lines, split_y = get_split_line(cross_points, col_line_list, img_new)
|
|
|
+ area_row_line_list, area_col_line_list, area_point_list = get_split_area(split_y, row_line_list, col_line_list, cross_points)
|
|
|
+
|
|
|
+ show(cross_points, title="get_points", img=img_show, mode=4, is_test=is_test)
|
|
|
+ show(split_lines, title="split_lines", img=img_show, mode=3, is_test=is_test)
|
|
|
+ show(row_line_list + col_line_list, title="fix_outline", mode=2, is_test=is_test)
|
|
|
+ log("fix_outline " + str(time.time() - start_time))
|
|
|
+
|
|
|
+ # 根据区域循环
|
|
|
+ for i in range(len(area_point_list)):
|
|
|
+ sub_row_line_list = area_row_line_list[i]
|
|
|
+ sub_col_line_list = area_col_line_list[i]
|
|
|
+ sub_point_list = area_point_list[i]
|
|
|
+
|
|
|
+ # 验证轮廓的4个交点
|
|
|
+ sub_row_line_list, sub_col_line_list = fix_4_points(sub_point_list, sub_row_line_list, sub_col_line_list)
|
|
|
+
|
|
|
+ # 把四个边线在加一次
|
|
|
+ sub_point_list = get_points(sub_row_line_list, sub_col_line_list, (img_new.shape[0], img_new.shape[1]))
|
|
|
+ sub_row_line_list, sub_col_line_list = add_outline(sub_point_list, sub_row_line_list, sub_col_line_list)
|
|
|
+
|
|
|
+ # 修复内部缺线
|
|
|
+ start_time = time.time()
|
|
|
+ sub_row_line_list, sub_col_line_list = fix_inner(sub_row_line_list, sub_col_line_list, sub_point_list)
|
|
|
+ log("fix_inner " + str(time.time() - start_time))
|
|
|
+ show(sub_row_line_list + sub_col_line_list, title="fix_inner1", mode=2, is_test=is_test)
|
|
|
+
|
|
|
+ # 合并错开
|
|
|
+ start_time = time.time()
|
|
|
+ sub_row_line_list = merge_line(sub_row_line_list, axis=0)
|
|
|
+ sub_col_line_list = merge_line(sub_col_line_list, axis=1)
|
|
|
+ log("merge_line " + str(time.time() - start_time))
|
|
|
+ show(sub_row_line_list + sub_col_line_list, title="merge_line", mode=2, is_test=is_test)
|
|
|
+
|
|
|
+ # 修复内部线后重新计算交点
|
|
|
+ start_time = time.time()
|
|
|
+ cross_points = get_points(sub_row_line_list, sub_col_line_list, (img_new.shape[0], img_new.shape[1]))
|
|
|
+ show(cross_points, title="get_points3", img=img_show, mode=4, is_test=is_test)
|
|
|
+
|
|
|
+ # 消除线突出,获取标准的线
|
|
|
+ area_row_line_list[i], area_col_line_list[i] = get_standard_lines(sub_row_line_list, sub_col_line_list)
|
|
|
+ show(area_row_line_list[i] + area_col_line_list[i], title="get_standard_lines", mode=2, is_test=is_test)
|
|
|
+
|
|
|
+ row_line_list = [y for x in area_row_line_list for y in x]
|
|
|
+ col_line_list = [y for x in area_col_line_list for y in x]
|
|
|
+
|
|
|
+ line_list = row_line_list + col_line_list
|
|
|
+ # 打印处理后线
|
|
|
+ show(line_list, title="all", img=img_show, mode=5, is_test=is_test)
|
|
|
+ log("otr postprocess table_line " + str(time.time() - start_time))
|
|
|
+ return line_list
|
|
|
+
|
|
|
+
|
|
|
+def show(pred_or_lines, title='', prob=0.2, img=None, mode=1, is_test=0):
|
|
|
+ if not is_test:
|
|
|
+ return
|
|
|
+
|
|
|
+ if mode == 1:
|
|
|
+ plt.figure()
|
|
|
+ plt.title(title)
|
|
|
+
|
|
|
+ _array = []
|
|
|
+ for _h in range(len(pred_or_lines)):
|
|
|
+ _line = []
|
|
|
+ for _w in range(len(pred_or_lines[_h])):
|
|
|
+ _prob = pred_or_lines[_h][_w]
|
|
|
+ if _prob[0] > prob:
|
|
|
+ _line.append((0, 0, 255))
|
|
|
+ elif _prob[1] > prob:
|
|
|
+ _line.append((255, 0, 0))
|
|
|
+ else:
|
|
|
+ _line.append((255, 255, 255))
|
|
|
+ _array.append(_line)
|
|
|
+ # plt.axis('off')
|
|
|
+ plt.imshow(np.array(_array))
|
|
|
+ plt.show()
|
|
|
+
|
|
|
+ elif mode == 2:
|
|
|
+ plt.figure()
|
|
|
+ plt.title(title)
|
|
|
+ for _line in pred_or_lines:
|
|
|
+ x0, y0, x1, y1 = _line
|
|
|
+ plt.plot([x0, x1], [y0, y1])
|
|
|
+ plt.show()
|
|
|
+
|
|
|
+ elif mode == 3:
|
|
|
+ for _line in pred_or_lines:
|
|
|
+ x0, y0 = _line[0]
|
|
|
+ x1, y1 = _line[1]
|
|
|
+ cv2.line(img, [int(x0), int(y0)], [int(x1), int(y1)], (0, 0, 255), 2)
|
|
|
+ cv2.namedWindow(title, cv2.WINDOW_NORMAL)
|
|
|
+ cv2.imshow(title, img)
|
|
|
+ cv2.waitKey(0)
|
|
|
+
|
|
|
+ elif mode == 4:
|
|
|
+ for point in pred_or_lines:
|
|
|
+ point = [int(x) for x in point]
|
|
|
+ cv2.circle(img, (point[0], point[1]), 1, (0, 255, 0), 2)
|
|
|
+ cv2.namedWindow(title, cv2.WINDOW_NORMAL)
|
|
|
+ cv2.imshow(title, img)
|
|
|
+ cv2.waitKey(0)
|
|
|
+
|
|
|
+ elif mode == 5:
|
|
|
+ for _line in pred_or_lines:
|
|
|
+ x0, y0, x1, y1 = _line
|
|
|
+ cv2.line(img, [int(x0), int(y0)], [int(x1), int(y1)], (0, 255, 0), 2)
|
|
|
+ cv2.namedWindow(title, cv2.WINDOW_NORMAL)
|
|
|
+ cv2.imshow(title, img)
|
|
|
+ cv2.waitKey(0)
|
|
|
+
|
|
|
+
|
|
|
+def points2lines(pred, sourceP_LB=True, prob=0.2, line_width=8, padding=3, min_len=10,
|
|
|
+ cell_width=13):
|
|
|
+ _time = time.time()
|
|
|
+
|
|
|
+ log("starting points2lines")
|
|
|
+ height = len(pred)
|
|
|
+ width = len(pred[0])
|
|
|
+
|
|
|
+ _sum = list(np.sum(np.array((pred[..., 0] > prob)).astype(int), axis=1))
|
|
|
+
|
|
|
+ h_index = -1
|
|
|
+ h_lines = []
|
|
|
+ v_lines = []
|
|
|
+ _step = line_width
|
|
|
+ while 1:
|
|
|
+ h_index += 1
|
|
|
+ if h_index >= height:
|
|
|
+ break
|
|
|
+ w_index = -1
|
|
|
+ if sourceP_LB:
|
|
|
+ h_i = height - 1 - h_index
|
|
|
+ else:
|
|
|
+ h_i = h_index
|
|
|
+ _start = None
|
|
|
+ if _sum[h_index] < min_len:
|
|
|
+ continue
|
|
|
+ last_back = 0
|
|
|
+ while 1:
|
|
|
+ if w_index >= width:
|
|
|
+ if _start is not None:
|
|
|
+ _end = w_index - 1
|
|
|
+ _bbox = [_start, h_i, _end, h_i]
|
|
|
+ _dict = {"bbox": _bbox}
|
|
|
+ h_lines.append(_dict)
|
|
|
+ _start = None
|
|
|
+ break
|
|
|
+ _h, _v = pred[h_i][w_index]
|
|
|
+ if _h > prob:
|
|
|
+ if _start is None:
|
|
|
+ _start = w_index
|
|
|
+ w_index += _step
|
|
|
+ else:
|
|
|
+ if _start is not None:
|
|
|
+ _end = w_index - 1
|
|
|
+ _bbox = [_start, h_i, _end, h_i]
|
|
|
+ _dict = {"bbox": _bbox}
|
|
|
+ h_lines.append(_dict)
|
|
|
+ _start = None
|
|
|
+
|
|
|
+ w_index -= _step // 2
|
|
|
+ if w_index <= last_back:
|
|
|
+ w_index = last_back + _step // 2
|
|
|
+ last_back = w_index
|
|
|
+
|
|
|
+ log("starting points2lines 1")
|
|
|
+ w_index = -1
|
|
|
+
|
|
|
+ _sum = list(np.sum(np.array((pred[..., 1] > prob)).astype(int), axis=0))
|
|
|
+ _step = line_width
|
|
|
+ while 1:
|
|
|
+ w_index += 1
|
|
|
+ if w_index >= width:
|
|
|
+ break
|
|
|
+ if _sum[w_index] < min_len:
|
|
|
+ continue
|
|
|
+ h_index = -1
|
|
|
+ _start = None
|
|
|
+ last_back = 0
|
|
|
+ list_test = []
|
|
|
+ list_lineprob = []
|
|
|
+ while 1:
|
|
|
+ if h_index >= height:
|
|
|
+ if _start is not None:
|
|
|
+ _end = last_h
|
|
|
+ _bbox = [w_index, _start, w_index, _end]
|
|
|
+ _dict = {"bbox": _bbox}
|
|
|
+ v_lines.append(_dict)
|
|
|
+ _start = None
|
|
|
+ list_test.append(_dict)
|
|
|
+ break
|
|
|
+ if sourceP_LB:
|
|
|
+ h_i = height - 1 - h_index
|
|
|
+ else:
|
|
|
+ h_i = h_index
|
|
|
+
|
|
|
+ _h, _v = pred[h_index][w_index]
|
|
|
+ list_lineprob.append((h_index, _v))
|
|
|
+ if _v > prob:
|
|
|
+ if _start is None:
|
|
|
+ _start = h_i
|
|
|
+ h_index += _step
|
|
|
+ else:
|
|
|
+ if _start is not None:
|
|
|
+ _end = last_h
|
|
|
+ _bbox = [w_index, _start, w_index, _end]
|
|
|
+ _dict = {"bbox": _bbox}
|
|
|
+ v_lines.append(_dict)
|
|
|
+ _start = None
|
|
|
+ list_test.append(_dict)
|
|
|
+
|
|
|
+ h_index -= _step // 2
|
|
|
+ if h_index <= last_back:
|
|
|
+ h_index = last_back + _step // 2
|
|
|
+ last_back = h_index
|
|
|
+
|
|
|
+ last_h = h_i
|
|
|
+ log("starting points2lines 2")
|
|
|
+
|
|
|
+ for _line in h_lines:
|
|
|
+ _bbox = _line["bbox"]
|
|
|
+ _bbox = [max(_bbox[0] - 2, 0), (_bbox[1] + _bbox[3]) / 2, _bbox[2] + 2, (_bbox[1] + _bbox[3]) / 2]
|
|
|
+ _line["bbox"] = _bbox
|
|
|
+
|
|
|
+ for _line in v_lines:
|
|
|
+ _bbox = _line["bbox"]
|
|
|
+ _bbox = [(_bbox[0] + _bbox[2]) / 2, max(_bbox[1] - 2, 0), (_bbox[0] + _bbox[2]) / 2, _bbox[3] + 2]
|
|
|
+ _line["bbox"] = _bbox
|
|
|
+
|
|
|
+ h_lines = lines_cluster(h_lines, line_width=line_width)
|
|
|
+ v_lines = lines_cluster(v_lines, line_width=line_width)
|
|
|
+
|
|
|
+ list_line = []
|
|
|
+ for _line in h_lines:
|
|
|
+ _bbox = _line["bbox"]
|
|
|
+ _bbox = [max(_bbox[0] - 1, 0), (_bbox[1] + _bbox[3]) / 2, _bbox[2] + 1, (_bbox[1] + _bbox[3]) / 2]
|
|
|
+ list_line.append(_bbox)
|
|
|
+ for _line in v_lines:
|
|
|
+ _bbox = _line["bbox"]
|
|
|
+ _bbox = [(_bbox[0] + _bbox[2]) / 2, max(_bbox[1] - 1, 0), (_bbox[0] + _bbox[2]) / 2, _bbox[3] + 1]
|
|
|
+ list_line.append(_bbox)
|
|
|
+
|
|
|
+ log("points2lines cost %.2fs" % (time.time() - _time))
|
|
|
+
|
|
|
+ # import matplotlib.pyplot as plt
|
|
|
+ # plt.figure()
|
|
|
+ # for _line in list_line:
|
|
|
+ # x0,y0,x1,y1 = _line
|
|
|
+ # plt.plot([x0,x1],[y0,y1])
|
|
|
+ # for _line in list_line:
|
|
|
+ # x0,y0,x1,y1 = _line.bbox
|
|
|
+ # plt.plot([x0,x1],[y0,y1])
|
|
|
+ # for point in list_crosspoints:
|
|
|
+ # plt.scatter(point.get("point")[0],point.get("point")[1])
|
|
|
+ # plt.show()
|
|
|
+
|
|
|
+ return list_line
|
|
|
+
|
|
|
+
|
|
|
+def lines_cluster(list_lines, line_width):
|
|
|
+ after_len = 0
|
|
|
+ prelength = len(list_lines)
|
|
|
+ append_width = line_width // 2
|
|
|
+ while 1:
|
|
|
+ c_lines = []
|
|
|
+ first_len = after_len
|
|
|
+
|
|
|
+ for _line in list_lines:
|
|
|
+ bbox = _line["bbox"]
|
|
|
+ _find = False
|
|
|
+ for c_l_i in range(len(c_lines)):
|
|
|
+ c_l = c_lines[len(c_lines) - c_l_i - 1]
|
|
|
+ bbox1 = c_l["bbox"]
|
|
|
+ bboxa = [max(0, bbox[0] - append_width), max(0, bbox[1] - append_width), bbox[2] + append_width,
|
|
|
+ bbox[3] + append_width]
|
|
|
+ bboxb = [max(0, bbox1[0] - append_width), max(0, bbox1[1] - append_width), bbox1[2] + append_width,
|
|
|
+ bbox1[3] + append_width]
|
|
|
+
|
|
|
+ _iou = getIOU(bboxa, bboxb)
|
|
|
+ if _iou > 0:
|
|
|
+ new_bbox = [min(bbox[0], bbox[2], bbox1[0], bbox1[2]), min(bbox[1], bbox[3], bbox1[1], bbox1[3]),
|
|
|
+ max(bbox[0], bbox[2], bbox1[0], bbox1[2]), max(bbox[1], bbox[3], bbox1[1], bbox1[3])]
|
|
|
+ _find = True
|
|
|
+ c_l["bbox"] = new_bbox
|
|
|
+ break
|
|
|
+ if not _find:
|
|
|
+ c_lines.append(_line)
|
|
|
+ after_len = len(c_lines)
|
|
|
+ if first_len == after_len:
|
|
|
+ break
|
|
|
+ list_lines = c_lines
|
|
|
+ log("cluster lines from %d to %d" % (prelength, len(list_lines)))
|
|
|
+ return c_lines
|
|
|
+
|
|
|
+
|
|
|
+def getIOU(bbox0, bbox1):
|
|
|
+ width = abs(max(bbox0[2], bbox1[2]) - min(bbox0[0], bbox1[0])) - (
|
|
|
+ abs(bbox0[2] - bbox0[0]) + abs(bbox1[2] - bbox1[0]))
|
|
|
+ height = abs(max(bbox0[3], bbox1[3]) - min(bbox0[1], bbox1[1])) - (
|
|
|
+ abs(bbox0[3] - bbox0[1]) + abs(bbox1[3] - bbox1[1]))
|
|
|
+ if width <= 0 and height <= 0:
|
|
|
+ iou = abs(width * height / min(abs((bbox0[2] - bbox0[0]) * (bbox0[3] - bbox0[1])),
|
|
|
+ abs((bbox1[2] - bbox1[0]) * (bbox1[3] - bbox1[1]))))
|
|
|
+ # print("getIOU", iou)
|
|
|
+ return iou + 0.1
|
|
|
+ return 0
|
|
|
+
|
|
|
+
|
|
|
+def delete_short_lines(list_lines, image_shape, scale=100):
|
|
|
+ # 排除太短的线
|
|
|
+ x_min_len = max(5, int(image_shape[0] / scale))
|
|
|
+ y_min_len = max(5, int(image_shape[1] / scale))
|
|
|
+ new_list_lines = []
|
|
|
+ for line in list_lines:
|
|
|
+ if line[0] == line[2]:
|
|
|
+ if abs(line[3] - line[1]) >= y_min_len:
|
|
|
+ # print("y_min_len", abs(line[3] - line[1]), y_min_len)
|
|
|
+ new_list_lines.append(line)
|
|
|
+ else:
|
|
|
+ if abs(line[2] - line[0]) >= x_min_len:
|
|
|
+ # print("x_min_len", abs(line[2] - line[0]), x_min_len)
|
|
|
+ new_list_lines.append(line)
|
|
|
+ return new_list_lines
|
|
|
+
|
|
|
+
|
|
|
+def delete_single_lines(row_line_list, col_line_list, point_list):
|
|
|
+ new_col_line_list = []
|
|
|
+ min_point_cnt = 2
|
|
|
+ for line in col_line_list:
|
|
|
+ p_cnt = 0
|
|
|
+ for p in point_list:
|
|
|
+ # if line[0] == p[0] and line[1] <= p[1] <= line[3]:
|
|
|
+ if line[0] == p[0]:
|
|
|
+ p_cnt += 1
|
|
|
+ if p_cnt >= min_point_cnt:
|
|
|
+ new_col_line_list.append(line)
|
|
|
+ break
|
|
|
+ new_row_line_list = []
|
|
|
+ for line in row_line_list:
|
|
|
+ p_cnt = 0
|
|
|
+ for p in point_list:
|
|
|
+ # if line[1] == p[1] and line[0] <= p[0] <= line[2]:
|
|
|
+ if line[1] == p[1]:
|
|
|
+ p_cnt += 1
|
|
|
+ if p_cnt >= min_point_cnt:
|
|
|
+ new_row_line_list.append(line)
|
|
|
+ break
|
|
|
+ return new_row_line_list, new_col_line_list
|
|
|
+
|
|
|
+
|
|
|
+def merge_line(lines, axis, threshold=5):
|
|
|
+ """
|
|
|
+ 解决模型预测一条直线错开成多条直线,合并成一条直线
|
|
|
+
|
|
|
+ :param lines: 线条列表
|
|
|
+ :param axis: 0:横线 1:竖线
|
|
|
+ :param threshold: 两条线间像素差阈值
|
|
|
+ :return: 合并后的线条列表
|
|
|
+ """
|
|
|
+ # 任意一条line获取该合并的line,横线往下找,竖线往右找
|
|
|
+ lines.sort(key=lambda x: (x[axis], x[1 - axis]))
|
|
|
+ merged_lines = []
|
|
|
+ used_lines = []
|
|
|
+ for line1 in lines:
|
|
|
+ if line1 in used_lines:
|
|
|
+ continue
|
|
|
+
|
|
|
+ merged_line = [line1]
|
|
|
+ used_lines.append(line1)
|
|
|
+ for line2 in lines:
|
|
|
+ if line2 in used_lines:
|
|
|
+ continue
|
|
|
+
|
|
|
+ if line1[1 - axis] - threshold <= line2[1 - axis] <= line1[1 - axis] + threshold:
|
|
|
+ # 计算基准长度
|
|
|
+ min_axis = 10000
|
|
|
+ max_axis = 0
|
|
|
+ for line3 in merged_line:
|
|
|
+ if line3[axis] < min_axis:
|
|
|
+ min_axis = line3[axis]
|
|
|
+ if line3[axis + 2] > max_axis:
|
|
|
+ max_axis = line3[axis + 2]
|
|
|
+ # 判断两条线有无交集
|
|
|
+ if min_axis <= line2[axis] <= max_axis \
|
|
|
+ or min_axis <= line2[axis + 2] <= max_axis:
|
|
|
+ merged_line.append(line2)
|
|
|
+ used_lines.append(line2)
|
|
|
+ if merged_line:
|
|
|
+ merged_lines.append(merged_line)
|
|
|
+
|
|
|
+ # 合并line
|
|
|
+ result_lines = []
|
|
|
+ for merged_line in merged_lines:
|
|
|
+ # 获取line宽的平均值
|
|
|
+ axis_average = 0
|
|
|
+ for line in merged_line:
|
|
|
+ axis_average += line[1 - axis]
|
|
|
+ axis_average = int(axis_average / len(merged_line))
|
|
|
+
|
|
|
+ # 获取最长line两端
|
|
|
+ merged_line.sort(key=lambda x: (x[axis]))
|
|
|
+ axis_start = merged_line[0][axis]
|
|
|
+ merged_line.sort(key=lambda x: (x[axis + 2]))
|
|
|
+ axis_end = merged_line[-1][axis + 2]
|
|
|
+
|
|
|
+ if axis:
|
|
|
+ result_lines.append([axis_average, axis_start, axis_average, axis_end])
|
|
|
+ else:
|
|
|
+ result_lines.append([axis_start, axis_average, axis_end, axis_average])
|
|
|
+ return result_lines
|
|
|
+
|
|
|
+
|
|
|
+def get_points(row_lines, col_lines, image_size):
|
|
|
+ # 创建空图
|
|
|
+ row_img = np.zeros(image_size, np.uint8)
|
|
|
+ col_img = np.zeros(image_size, np.uint8)
|
|
|
+
|
|
|
+ # 画线
|
|
|
+ threshold = 5
|
|
|
+ for row in row_lines:
|
|
|
+ cv2.line(row_img, (int(row[0] - threshold), int(row[1])), (int(row[2] + threshold), int(row[3])), (255, 255, 255), 1)
|
|
|
+ for col in col_lines:
|
|
|
+ cv2.line(col_img, (int(col[0]), int(col[1] - threshold)), (int(col[2]), int(col[3] + threshold)), (255, 255, 255), 1)
|
|
|
+
|
|
|
+ # 求出交点
|
|
|
+ point_img = np.bitwise_and(row_img, col_img)
|
|
|
+ # cv2.imwrite("get_points.jpg", row_img+col_img)
|
|
|
+ # cv2.imshow("get_points", row_img+col_img)
|
|
|
+ # cv2.waitKey(0)
|
|
|
+
|
|
|
+ # 识别黑白图中的白色交叉点,将横纵坐标取出
|
|
|
+ ys, xs = np.where(point_img > 0)
|
|
|
+ points = []
|
|
|
+ for i in range(len(xs)):
|
|
|
+ points.append((xs[i], ys[i]))
|
|
|
+ points.sort(key=lambda x: (x[0], x[1]))
|
|
|
+ return points
|
|
|
+
|
|
|
+
|
|
|
+def fix_outline(image, row_line_list, col_line_list, point_list, scale=25):
|
|
|
+ log("into fix_outline")
|
|
|
+ x_min_len = max(10, int(image.shape[0] / scale))
|
|
|
+ y_min_len = max(10, int(image.shape[1] / scale))
|
|
|
+
|
|
|
+ if len(row_line_list) <= 1 or len(col_line_list) <= 1:
|
|
|
+ return [], [], row_line_list, col_line_list
|
|
|
+
|
|
|
+ # 预测线取上下左右4个边(会有超出表格部分) [(), ()]
|
|
|
+ row_line_list.sort(key=lambda x: (x[1], x[0]))
|
|
|
+ up_line = row_line_list[0]
|
|
|
+ bottom_line = row_line_list[-1]
|
|
|
+ col_line_list.sort(key=lambda x: x[0])
|
|
|
+ left_line = col_line_list[0]
|
|
|
+ right_line = col_line_list[-1]
|
|
|
+
|
|
|
+ # 计算单格高度宽度
|
|
|
+ if len(row_line_list) > 1:
|
|
|
+ height_dict = {}
|
|
|
+ for j in range(len(row_line_list)):
|
|
|
+ if j + 1 > len(row_line_list) - 1:
|
|
|
+ break
|
|
|
+ height = abs(int(row_line_list[j][3] - row_line_list[j + 1][3]))
|
|
|
+ if height >= 10:
|
|
|
+ if height in height_dict.keys():
|
|
|
+ height_dict[height] = height_dict[height] + 1
|
|
|
+ else:
|
|
|
+ height_dict[height] = 1
|
|
|
+ height_list = [[x, height_dict[x]] for x in height_dict.keys()]
|
|
|
+ if height_list:
|
|
|
+ height_list.sort(key=lambda x: (x[1], -x[0]), reverse=True)
|
|
|
+ # print("box_height", height_list)
|
|
|
+ box_height = height_list[0][0]
|
|
|
+ else:
|
|
|
+ box_height = y_min_len
|
|
|
+ else:
|
|
|
+ box_height = y_min_len
|
|
|
+ if len(col_line_list) > 1:
|
|
|
+ box_width = abs(col_line_list[1][2] - col_line_list[0][2])
|
|
|
+ else:
|
|
|
+ box_width = x_min_len
|
|
|
+
|
|
|
+ # 设置轮廓线需超出阈值
|
|
|
+ if box_height >= 2 * y_min_len:
|
|
|
+ fix_h_len = y_min_len
|
|
|
+ else:
|
|
|
+ fix_h_len = box_height * 2 / 3
|
|
|
+ if box_width >= 2 * x_min_len:
|
|
|
+ fix_w_len = x_min_len
|
|
|
+ else:
|
|
|
+ fix_w_len = box_width * 2 / 3
|
|
|
+
|
|
|
+ # 判断超出部分的长度,超出一定长度就补线
|
|
|
+ new_row_lines = []
|
|
|
+ new_col_lines = []
|
|
|
+ all_longer_row_lines = []
|
|
|
+ all_longer_col_lines = []
|
|
|
+
|
|
|
+ # print('box_height, box_width, fix_h_len, fix_w_len', box_height, box_width, fix_h_len, fix_w_len)
|
|
|
+ # print('bottom_line, left_line, right_line', bottom_line, left_line, right_line)
|
|
|
+
|
|
|
+ # 补左右两条竖线超出来的线的row
|
|
|
+ if up_line[1] - left_line[1] >= fix_h_len and up_line[1] - right_line[1] >= fix_h_len:
|
|
|
+ if up_line[1] - left_line[1] >= up_line[1] - right_line[1]:
|
|
|
+ new_row_lines.append([left_line[0], left_line[1], right_line[0], left_line[1]])
|
|
|
+ new_col_y = left_line[1]
|
|
|
+ # 补了row,要将其他短的col连到row上
|
|
|
+ for j in range(len(col_line_list)):
|
|
|
+ col = col_line_list[j]
|
|
|
+ if abs(new_col_y - col[1]) <= box_height:
|
|
|
+ col_line_list[j][1] = min([new_col_y, col[1]])
|
|
|
+ else:
|
|
|
+ new_row_lines.append([left_line[0], right_line[1], right_line[0], right_line[1]])
|
|
|
+ new_col_y = right_line[1]
|
|
|
+ # 补了row,要将其他短的col连到row上
|
|
|
+ for j in range(len(col_line_list)):
|
|
|
+ col = col_line_list[j]
|
|
|
+ # 且距离不能相差太大
|
|
|
+ if abs(new_col_y - col[1]) <= box_height:
|
|
|
+ col_line_list[j][1] = min([new_col_y, col[1]])
|
|
|
+ if left_line[3] - bottom_line[3] >= fix_h_len and right_line[3] - bottom_line[3] >= fix_h_len:
|
|
|
+ if left_line[3] - bottom_line[3] >= right_line[3] - bottom_line[3]:
|
|
|
+ new_row_lines.append([left_line[2], left_line[3], right_line[2], left_line[3]])
|
|
|
+ new_col_y = left_line[3]
|
|
|
+ # 补了row,要将其他短的col连到row上
|
|
|
+ for j in range(len(col_line_list)):
|
|
|
+ col = col_line_list[j]
|
|
|
+ # 且距离不能相差太大
|
|
|
+ if abs(new_col_y - col[3]) <= box_height:
|
|
|
+ col_line_list[j][3] = max([new_col_y, col[3]])
|
|
|
+ else:
|
|
|
+ new_row_lines.append([left_line[2], right_line[3], right_line[2], right_line[3]])
|
|
|
+ new_col_y = right_line[3]
|
|
|
+ # 补了row,要将其他短的col连到row上
|
|
|
+ for j in range(len(col_line_list)):
|
|
|
+ col = col_line_list[j]
|
|
|
+ # 且距离不能相差太大
|
|
|
+ if abs(new_col_y - col[3]) <= box_height:
|
|
|
+ col_line_list[j][3] = max([new_col_y, col[3]])
|
|
|
+
|
|
|
+ # 补上下两条横线超出来的线的col
|
|
|
+ if left_line[0] - up_line[0] >= fix_w_len and left_line[0] - bottom_line[0] >= fix_w_len:
|
|
|
+ if left_line[0] - up_line[0] >= left_line[0] - bottom_line[0]:
|
|
|
+ new_col_lines.append([up_line[0], up_line[1], up_line[0], bottom_line[1]])
|
|
|
+ new_row_x = up_line[0]
|
|
|
+ # 补了col,要将其他短的row连到col上
|
|
|
+ for j in range(len(row_line_list)):
|
|
|
+ row = row_line_list[j]
|
|
|
+ # 且距离不能相差太大
|
|
|
+ if abs(new_row_x - row[0]) <= box_width:
|
|
|
+ row_line_list[j][0] = min([new_row_x, row[0]])
|
|
|
+ else:
|
|
|
+ new_col_lines.append([bottom_line[0], up_line[1], bottom_line[0], bottom_line[1]])
|
|
|
+ new_row_x = bottom_line[0]
|
|
|
+ # 补了col,要将其他短的row连到col上
|
|
|
+ for j in range(len(row_line_list)):
|
|
|
+ row = row_line_list[j]
|
|
|
+ # 且距离不能相差太大
|
|
|
+ if abs(new_row_x - row[0]) <= box_width:
|
|
|
+ row_line_list[j][0] = min([new_row_x, row[0]])
|
|
|
+ if up_line[2] - right_line[2] >= fix_w_len and bottom_line[2] - right_line[2] >= fix_w_len:
|
|
|
+ if up_line[2] - right_line[2] >= bottom_line[2] - right_line[2]:
|
|
|
+ new_col_lines.append([up_line[2], up_line[3], up_line[2], bottom_line[3]])
|
|
|
+ new_row_x = up_line[2]
|
|
|
+ # 补了col,要将其他短的row连到col上
|
|
|
+ for j in range(len(row_line_list)):
|
|
|
+ row = row_line_list[j]
|
|
|
+ # 且距离不能相差太大
|
|
|
+ if abs(new_row_x - row[2]) <= box_width:
|
|
|
+ row_line_list[j][2] = max([new_row_x, row[2]])
|
|
|
+ else:
|
|
|
+ new_col_lines.append([bottom_line[2], up_line[3], bottom_line[2], bottom_line[3]])
|
|
|
+ new_row_x = bottom_line[2]
|
|
|
+ # 补了col,要将其他短的row连到col上
|
|
|
+ for j in range(len(row_line_list)):
|
|
|
+ row = row_line_list[j]
|
|
|
+ # 且距离不能相差太大
|
|
|
+ if abs(new_row_x - row[2]) <= box_width:
|
|
|
+ row_line_list[j][2] = max([new_row_x, row[2]])
|
|
|
+
|
|
|
+ all_longer_row_lines += row_line_list
|
|
|
+ all_longer_col_lines += col_line_list
|
|
|
+
|
|
|
+ # print('new_row_lines, new_col_lines', new_row_lines, new_col_lines)
|
|
|
+ # print('all_longer_row_lines, all_longer_col_lines', all_longer_row_lines, all_longer_col_lines)
|
|
|
+ return new_row_lines, new_col_lines, all_longer_row_lines, all_longer_col_lines
|
|
|
+
|
|
|
+
|
|
|
+def fix_inner(row_line_list, col_line_list, point_list):
|
|
|
+ def fix(fix_lines, assist_lines, split_points, axis):
|
|
|
+ new_line_point_list = []
|
|
|
+ delete_line_point_list = []
|
|
|
+ for line1 in fix_lines:
|
|
|
+ min_assist_line = [[], []]
|
|
|
+ min_distance = [1000, 1000]
|
|
|
+ if_find = [0, 0]
|
|
|
+
|
|
|
+ # 获取fix_line中的所有col point,里面可能不包括两个顶点,col point是交点,顶点可能不是交点
|
|
|
+ fix_line_points = []
|
|
|
+ for point in split_points:
|
|
|
+ if abs(point[1 - axis] - line1[1 - axis]) <= 2:
|
|
|
+ if line1[axis] <= point[axis] <= line1[axis + 2]:
|
|
|
+ fix_line_points.append(point)
|
|
|
+
|
|
|
+ # 找出离两个顶点最近的assist_line, 并且assist_line与fix_line不相交
|
|
|
+ line1_point = [line1[:2], line1[2:]]
|
|
|
+ for i in range(2):
|
|
|
+ point = line1_point[i]
|
|
|
+ for line2 in assist_lines:
|
|
|
+ if not if_find[i] and abs(point[axis] - line2[axis]) <= 2:
|
|
|
+ if line1[1 - axis] <= point[1 - axis] <= line2[1 - axis + 2]:
|
|
|
+ # print("line1, match line2", line1, line2)
|
|
|
+ if_find[i] = 1
|
|
|
+ break
|
|
|
+ else:
|
|
|
+ if abs(point[axis] - line2[axis]) < min_distance[i] and line2[1 - axis] <= point[1 - axis] <= \
|
|
|
+ line2[1 - axis + 2]:
|
|
|
+ if line1[axis] <= line2[axis] <= line1[axis + 2]:
|
|
|
+ continue
|
|
|
+ min_distance[i] = abs(line1[axis] - line2[axis])
|
|
|
+ min_assist_line[i] = line2
|
|
|
+
|
|
|
+ if len(min_assist_line[0]) == 0 and len(min_assist_line[1]) == 0:
|
|
|
+ continue
|
|
|
+
|
|
|
+ # 找出离assist_line最近的交点
|
|
|
+ min_distance = [1000, 1000]
|
|
|
+ min_col_point = [[], []]
|
|
|
+ for i in range(2):
|
|
|
+ # print("顶点", i, line1_point[i])
|
|
|
+ if min_assist_line[i]:
|
|
|
+ for point in fix_line_points:
|
|
|
+ if abs(point[axis] - min_assist_line[i][axis]) < min_distance[i]:
|
|
|
+ min_distance[i] = abs(point[axis] - min_assist_line[i][axis])
|
|
|
+ min_col_point[i] = point
|
|
|
+
|
|
|
+ # print("min_col_point", min_col_point)
|
|
|
+ # print("min_assist_line", min_assist_line)
|
|
|
+
|
|
|
+ if len(min_col_point[0]) == 0 and len(min_col_point[1]) == 0:
|
|
|
+ continue
|
|
|
+
|
|
|
+ # 顶点到交点的距离(多出来的线)需大于assist_line到交点的距离(bbox的边)的1/3
|
|
|
+ # print("line1_point", line1_point)
|
|
|
+ if min_assist_line[0] and min_assist_line[0] == min_assist_line[1]:
|
|
|
+ if min_assist_line[0][axis] < line1_point[0][axis]:
|
|
|
+ bbox_len = abs(min_col_point[0][axis] - min_assist_line[0][axis])
|
|
|
+ line_distance = abs(min_col_point[0][axis] - line1_point[0][axis])
|
|
|
+ if bbox_len / 3 <= line_distance <= bbox_len:
|
|
|
+ if axis == 1:
|
|
|
+ add_point = (line1_point[0][1 - axis], min_assist_line[0][axis])
|
|
|
+ else:
|
|
|
+ add_point = (min_assist_line[0][axis], line1_point[0][1 - axis])
|
|
|
+ new_line_point_list.append([line1, add_point])
|
|
|
+ elif min_assist_line[1][axis] > line1_point[1][axis]:
|
|
|
+ bbox_len = abs(min_col_point[1][axis] - min_assist_line[1][axis])
|
|
|
+ line_distance = abs(min_col_point[1][axis] - line1_point[1][axis])
|
|
|
+ if bbox_len / 3 <= line_distance <= bbox_len:
|
|
|
+ if axis == 1:
|
|
|
+ add_point = (line1_point[1][1 - axis], min_assist_line[1][axis])
|
|
|
+ else:
|
|
|
+ add_point = (min_assist_line[1][axis], line1_point[1][1 - axis])
|
|
|
+ new_line_point_list.append([line1, add_point])
|
|
|
+ else:
|
|
|
+ for i in range(2):
|
|
|
+ if min_col_point[i]:
|
|
|
+ bbox_len = abs(min_col_point[i][axis] - min_assist_line[i][axis])
|
|
|
+ line_distance = abs(min_col_point[i][axis] - line1_point[i][axis])
|
|
|
+ # print("bbox_len, line_distance", bbox_len, line_distance)
|
|
|
+ if bbox_len / 3 <= line_distance <= bbox_len:
|
|
|
+ if axis == 1:
|
|
|
+ add_point = (line1_point[i][1 - axis], min_assist_line[i][axis])
|
|
|
+ else:
|
|
|
+ add_point = (min_assist_line[i][axis], line1_point[i][1 - axis])
|
|
|
+ new_line_point_list.append([line1, add_point])
|
|
|
+
|
|
|
+ return new_line_point_list
|
|
|
+
|
|
|
+ row_line_list_copy = copy.deepcopy(row_line_list)
|
|
|
+ col_line_list_copy = copy.deepcopy(col_line_list)
|
|
|
+ try:
|
|
|
+ new_point_list = fix(col_line_list, row_line_list, point_list, axis=1)
|
|
|
+ for line, new_point in new_point_list:
|
|
|
+ if line in col_line_list:
|
|
|
+ index = col_line_list.index(line)
|
|
|
+ point1 = line[:2]
|
|
|
+ point2 = line[2:]
|
|
|
+ if new_point[1] >= point2[1]:
|
|
|
+ col_line_list[index] = [point1[0], point1[1], new_point[0], new_point[1]]
|
|
|
+ elif new_point[1] <= point1[1]:
|
|
|
+ col_line_list[index] = [new_point[0], new_point[1], point2[0], point2[1]]
|
|
|
+
|
|
|
+ new_point_list = fix(row_line_list, col_line_list, point_list, axis=0)
|
|
|
+ for line, new_point in new_point_list:
|
|
|
+ if line in row_line_list:
|
|
|
+ index = row_line_list.index(line)
|
|
|
+ point1 = line[:2]
|
|
|
+ point2 = line[2:]
|
|
|
+ if new_point[0] >= point2[0]:
|
|
|
+ row_line_list[index] = [point1[0], point1[1], new_point[0], new_point[1]]
|
|
|
+ elif new_point[0] <= point1[0]:
|
|
|
+ row_line_list[index] = [new_point[0], new_point[1], point2[0], point2[1]]
|
|
|
+ return row_line_list, col_line_list
|
|
|
+ except:
|
|
|
+ traceback.print_exc()
|
|
|
+ return row_line_list_copy, col_line_list_copy
|
|
|
+
|
|
|
+
|
|
|
+def fix_4_points(cross_points, row_line_list, col_line_list):
|
|
|
+ if not (len(row_line_list) >= 2 and len(col_line_list) >= 2):
|
|
|
+ return row_line_list, col_line_list
|
|
|
+
|
|
|
+ cross_points.sort(key=lambda x: (x[0], x[1]))
|
|
|
+ left_up_p = cross_points[0]
|
|
|
+ right_down_p = cross_points[-1]
|
|
|
+ cross_points.sort(key=lambda x: (-x[0], x[1]))
|
|
|
+ right_up_p = cross_points[0]
|
|
|
+ left_down_p = cross_points[-1]
|
|
|
+ # print('left_up_p', left_up_p, 'left_down_p', left_down_p)
|
|
|
+ # print('right_up_p', right_up_p, 'right_down_p', right_down_p)
|
|
|
+
|
|
|
+ min_x = min(left_up_p[0], left_down_p[0], right_down_p[0], right_up_p[0])
|
|
|
+ max_x = max(left_up_p[0], left_down_p[0], right_down_p[0], right_up_p[0])
|
|
|
+ min_y = min(left_up_p[1], left_down_p[1], right_down_p[1], right_up_p[1])
|
|
|
+ max_y = max(left_up_p[1], left_down_p[1], right_down_p[1], right_up_p[1])
|
|
|
+
|
|
|
+ if left_up_p[0] != min_x or left_up_p[1] != min_y:
|
|
|
+ log('轮廓左上角交点有问题')
|
|
|
+ row_line_list.append([min_x, min_y, max_x, min_y])
|
|
|
+ col_line_list.append([min_x, min_y, min_x, max_y])
|
|
|
+ if left_down_p[0] != min_x or left_down_p[1] != max_y:
|
|
|
+ log('轮廓左下角交点有问题')
|
|
|
+ row_line_list.append([min_x, max_y, max_x, max_y])
|
|
|
+ col_line_list.append([min_x, min_y, min_x, max_y])
|
|
|
+ if right_up_p[0] != max_x or right_up_p[1] != min_y:
|
|
|
+ log('轮廓右上角交点有问题')
|
|
|
+ row_line_list.append([min_x, max_y, max_x, max_y])
|
|
|
+ col_line_list.append([max_x, min_y, max_x, max_y])
|
|
|
+ if right_down_p[0] != max_x or right_down_p[1] != max_y:
|
|
|
+ log('轮廓右下角交点有问题')
|
|
|
+ row_line_list.append([min_x, max_y, max_x, max_y])
|
|
|
+ col_line_list.append([max_x, min_y, max_x, max_y])
|
|
|
+
|
|
|
+ return row_line_list, col_line_list
|
|
|
+
|
|
|
+
|
|
|
+def get_split_line(points, col_lines, image_np, threshold=5):
|
|
|
+ # 线贴着边缘无法得到split_y,导致无法分区
|
|
|
+ for _col in col_lines:
|
|
|
+ if _col[3] >= image_np.shape[0] - 5:
|
|
|
+ _col[3] = image_np.shape[0] - 6
|
|
|
+
|
|
|
+ if _col[1] <= 0 + 5:
|
|
|
+ _col[1] = 6
|
|
|
+
|
|
|
+ # print("get_split_line", image_np.shape)
|
|
|
+ points.sort(key=lambda x: (x[1], x[0]))
|
|
|
+ # 遍历y坐标,并判断y坐标与上一个y坐标是否存在连接线
|
|
|
+ i = 0
|
|
|
+ split_line_y = []
|
|
|
+ for point in points:
|
|
|
+ # 从已分开的线下面开始判断
|
|
|
+ if split_line_y:
|
|
|
+ if point[1] <= split_line_y[-1] + threshold:
|
|
|
+ last_y = point[1]
|
|
|
+ continue
|
|
|
+ if last_y <= split_line_y[-1] + threshold:
|
|
|
+ last_y = point[1]
|
|
|
+ continue
|
|
|
+
|
|
|
+ if i == 0:
|
|
|
+ last_y = point[1]
|
|
|
+ i += 1
|
|
|
+ continue
|
|
|
+
|
|
|
+ current_line = (last_y, point[1])
|
|
|
+ split_flag = 1
|
|
|
+ for col in col_lines:
|
|
|
+ # 只要找到一条col包含就不是分割线
|
|
|
+ if current_line[0] >= col[1] - 3 and current_line[1] <= col[3] + 3:
|
|
|
+ split_flag = 0
|
|
|
+ break
|
|
|
+
|
|
|
+ if split_flag:
|
|
|
+ split_line_y.append(current_line[0] + 5)
|
|
|
+ split_line_y.append(current_line[1] - 5)
|
|
|
+
|
|
|
+ last_y = point[1]
|
|
|
+
|
|
|
+ # 加上收尾分割线
|
|
|
+ points.sort(key=lambda x: (x[1], x[0]))
|
|
|
+ y_min = points[0][1]
|
|
|
+ y_max = points[-1][1]
|
|
|
+ if y_min - threshold < 0:
|
|
|
+ split_line_y.append(0)
|
|
|
+ else:
|
|
|
+ split_line_y.append(y_min - threshold)
|
|
|
+ if y_max + threshold > image_np.shape[0]:
|
|
|
+ split_line_y.append(image_np.shape[0])
|
|
|
+ else:
|
|
|
+ split_line_y.append(y_max + threshold)
|
|
|
+ split_line_y = list(set(split_line_y))
|
|
|
+
|
|
|
+ # 剔除两条相隔太近分割线
|
|
|
+ temp_split_line_y = []
|
|
|
+ split_line_y.sort(key=lambda x: x)
|
|
|
+ last_y = -20
|
|
|
+ for y in split_line_y:
|
|
|
+ if y - last_y >= 20:
|
|
|
+ temp_split_line_y.append(y)
|
|
|
+ last_y = y
|
|
|
+ split_line_y = temp_split_line_y
|
|
|
+
|
|
|
+ # 生成分割线
|
|
|
+ split_line = []
|
|
|
+ for y in split_line_y:
|
|
|
+ split_line.append([(0, y), (image_np.shape[1], y)])
|
|
|
+ split_line.append([(0, 0), (image_np.shape[1], 0)])
|
|
|
+ split_line.append([(0, image_np.shape[0]), (image_np.shape[1], image_np.shape[0])])
|
|
|
+ split_line.sort(key=lambda x: x[0][1])
|
|
|
+ return split_line, split_line_y
|
|
|
+
|
|
|
+
|
|
|
+def get_split_area(split_y, row_line_list, col_line_list, cross_points):
|
|
|
+ # 分割线纵坐标
|
|
|
+ if len(split_y) < 2:
|
|
|
+ return [], [], []
|
|
|
+
|
|
|
+ split_y.sort(key=lambda x: x)
|
|
|
+ # new_split_y = []
|
|
|
+ # for i in range(1, len(split_y), 2):
|
|
|
+ # new_split_y.append(int((split_y[i] + split_y[i - 1]) / 2))
|
|
|
+
|
|
|
+ area_row_line_list = []
|
|
|
+ area_col_line_list = []
|
|
|
+ area_point_list = []
|
|
|
+
|
|
|
+ for i in range(1, len(split_y)):
|
|
|
+ y = split_y[i]
|
|
|
+ last_y = split_y[i - 1]
|
|
|
+
|
|
|
+ split_row = []
|
|
|
+ for row in row_line_list:
|
|
|
+ if last_y <= row[3] <= y:
|
|
|
+ split_row.append(row)
|
|
|
+
|
|
|
+ split_col = []
|
|
|
+ for col in col_line_list:
|
|
|
+ if last_y <= col[1] <= y or last_y <= col[3] <= y or col[1] < last_y < y < col[3]:
|
|
|
+ split_col.append(col)
|
|
|
+
|
|
|
+ split_point = []
|
|
|
+ for point in cross_points:
|
|
|
+ if last_y <= point[1] <= y:
|
|
|
+ split_point.append(point)
|
|
|
+
|
|
|
+ # 满足条件才能形成表格区域
|
|
|
+ if len(split_row) >= 2 and len(split_col) >= 2 and len(split_point) >= 4:
|
|
|
+ # print('len(split_row), len(split_col), len(split_point)', len(split_row), len(split_col), len(split_point))
|
|
|
+ area_row_line_list.append(split_row)
|
|
|
+ area_col_line_list.append(split_col)
|
|
|
+ area_point_list.append(split_point)
|
|
|
+
|
|
|
+ return area_row_line_list, area_col_line_list, area_point_list
|
|
|
+
|
|
|
+
|
|
|
+def get_standard_lines(row_line_list, col_line_list):
|
|
|
+ new_row_line_list = []
|
|
|
+ for row in row_line_list:
|
|
|
+ w1 = row[0]
|
|
|
+ w2 = row[2]
|
|
|
+ # 横线的两个顶点分别找到最近的竖线
|
|
|
+ min_distance = [10000, 10000]
|
|
|
+ min_dis_w = [None, None]
|
|
|
+ for col in col_line_list:
|
|
|
+ if abs(col[0] - w1) < min_distance[0]:
|
|
|
+ min_distance[0] = abs(col[0] - w1)
|
|
|
+ min_dis_w[0] = col[0]
|
|
|
+ if abs(col[0] - w2) < min_distance[1]:
|
|
|
+ min_distance[1] = abs(col[0] - w2)
|
|
|
+ min_dis_w[1] = col[0]
|
|
|
+ if min_dis_w[0] is not None:
|
|
|
+ row[0] = min_dis_w[0]
|
|
|
+ if min_dis_w[1] is not None:
|
|
|
+ row[2] = min_dis_w[1]
|
|
|
+ new_row_line_list.append(row)
|
|
|
+
|
|
|
+ new_col_line_list = []
|
|
|
+ for col in col_line_list:
|
|
|
+ h1 = col[1]
|
|
|
+ h2 = col[3]
|
|
|
+ # 横线的两个顶点分别找到最近的竖线
|
|
|
+ min_distance = [10000, 10000]
|
|
|
+ min_dis_w = [None, None]
|
|
|
+ for row in row_line_list:
|
|
|
+ if abs(row[1] - h1) < min_distance[0]:
|
|
|
+ min_distance[0] = abs(row[1] - h1)
|
|
|
+ min_dis_w[0] = row[1]
|
|
|
+ if abs(row[1] - h2) < min_distance[1]:
|
|
|
+ min_distance[1] = abs(row[1] - h2)
|
|
|
+ min_dis_w[1] = row[1]
|
|
|
+ if min_dis_w[0] is not None:
|
|
|
+ col[1] = min_dis_w[0]
|
|
|
+ if min_dis_w[1] is not None:
|
|
|
+ col[3] = min_dis_w[1]
|
|
|
+ new_col_line_list.append(col)
|
|
|
+
|
|
|
+
|
|
|
+ # all_line_list = []
|
|
|
+ # # 横线竖线两个维度
|
|
|
+ # for i in range(2):
|
|
|
+ # axis = i
|
|
|
+ # cross_points.sort(key=lambda x: (x[axis], x[1-axis]))
|
|
|
+ # current_axis = cross_points[0][axis]
|
|
|
+ # points = []
|
|
|
+ # line_list = []
|
|
|
+ # for p in cross_points:
|
|
|
+ # if p[axis] == current_axis:
|
|
|
+ # points.append(p)
|
|
|
+ # else:
|
|
|
+ # if points:
|
|
|
+ # line_list.append([points[0][0], points[0][1], points[-1][0], points[-1][1]])
|
|
|
+ # points = [p]
|
|
|
+ # current_axis = p[axis]
|
|
|
+ # if points:
|
|
|
+ # line_list.append([points[0][0], points[0][1], points[-1][0], points[-1][1]])
|
|
|
+ # all_line_list.append(line_list)
|
|
|
+ # new_col_line_list, new_row_line_list = all_line_list
|
|
|
+
|
|
|
+ return new_col_line_list, new_row_line_list
|
|
|
+
|
|
|
+
|
|
|
+def add_outline(cross_points, row_line_list, col_line_list):
|
|
|
+ cross_points.sort(key=lambda x: (x[0], x[1]))
|
|
|
+ left_up_p = cross_points[0]
|
|
|
+ right_down_p = cross_points[-1]
|
|
|
+
|
|
|
+ row_line_list.append([left_up_p[0], left_up_p[1], right_down_p[0], left_up_p[1]])
|
|
|
+ row_line_list.append([left_up_p[0], right_down_p[1], right_down_p[0], right_down_p[1]])
|
|
|
+ col_line_list.append([left_up_p[0], left_up_p[1], left_up_p[0], right_down_p[1]])
|
|
|
+ col_line_list.append([right_down_p[0], left_up_p[1], right_down_p[0], right_down_p[1]])
|
|
|
+ return row_line_list, col_line_list
|
|
|
+
|