|
@@ -4,7 +4,7 @@ import traceback
|
|
|
import numpy as np
|
|
|
import cv2
|
|
|
import matplotlib.pyplot as plt
|
|
|
-from format_convert.utils import log, pil_resize
|
|
|
+from format_convert.utils import log, pil_resize, memory_decorator
|
|
|
|
|
|
|
|
|
def table_line(img, model, size=(512, 1024), prob=0.2, is_test=0):
|
|
@@ -166,12 +166,17 @@ def table_line(img, model, size=(512, 1024), prob=0.2, is_test=0):
|
|
|
return line_list
|
|
|
|
|
|
|
|
|
+@memory_decorator
|
|
|
def table_line_pdf_post_process(line_list, page_w, page_h, is_test=0):
|
|
|
+ log('into table_line_pdf_post_process')
|
|
|
for i, line in enumerate(line_list):
|
|
|
line_list[i] = [int(x) for x in line]
|
|
|
|
|
|
+ # log('pdf img_new h w ' + str(int(page_h+1)) + ' ' + str(int(page_w+1)))
|
|
|
img_new = np.full([int(page_h+1), int(page_w+1), 3], 255, dtype=np.uint8)
|
|
|
+ # log('pdf np.full')
|
|
|
img_show = copy.deepcopy(img_new)
|
|
|
+ # log('pdf copy.deepcopy')
|
|
|
|
|
|
show(line_list, title="table_line_pdf start", mode=2, is_test=is_test)
|
|
|
|
|
@@ -198,6 +203,7 @@ def table_line_pdf_post_process(line_list, page_w, page_h, is_test=0):
|
|
|
# 合并线
|
|
|
row_line_list = merge_line(row_line_list, axis=0)
|
|
|
col_line_list = merge_line(col_line_list, axis=1)
|
|
|
+ # log("pdf merge_line1 " + str(time.time() - start_time))
|
|
|
show(row_line_list + col_line_list, title="merge", mode=2, is_test=is_test)
|
|
|
|
|
|
# 计算交点
|
|
@@ -211,7 +217,7 @@ def table_line_pdf_post_process(line_list, page_w, page_h, is_test=0):
|
|
|
start_time = time.time()
|
|
|
split_lines, split_y = get_split_line(cross_points, col_line_list, img_new)
|
|
|
area_row_line_list, area_col_line_list, area_point_list = get_split_area(split_y, row_line_list, col_line_list, cross_points)
|
|
|
- log("pdf get_split_area " + str(time.time() - start_time))
|
|
|
+ # log("pdf get_split_area " + str(time.time() - start_time))
|
|
|
show(split_lines, title="split_lines", img=img_show, mode=3, is_test=is_test)
|
|
|
|
|
|
# 根据区域循环
|
|
@@ -227,6 +233,7 @@ def table_line_pdf_post_process(line_list, page_w, page_h, is_test=0):
|
|
|
sub_row_line_list,
|
|
|
sub_col_line_list,
|
|
|
sub_point_list)
|
|
|
+ # log("pdf fix_outline1 " + str(time.time() - start_time))
|
|
|
|
|
|
# 如有补线
|
|
|
if new_rows or new_cols:
|
|
@@ -252,7 +259,7 @@ def table_line_pdf_post_process(line_list, page_w, page_h, is_test=0):
|
|
|
cross_points = get_points(row_line_list, col_line_list, (img_new.shape[0], img_new.shape[1]))
|
|
|
split_lines, split_y = get_split_line(cross_points, col_line_list, img_new)
|
|
|
area_row_line_list, area_col_line_list, area_point_list = get_split_area(split_y, row_line_list, col_line_list, cross_points)
|
|
|
- # log("pdf fix_outline " + str(time.time() - start_time))
|
|
|
+ # log("pdf fix_outline2 " + str(time.time() - start_time))
|
|
|
|
|
|
# 根据区域循环
|
|
|
for i in range(len(area_point_list)):
|
|
@@ -262,6 +269,7 @@ def table_line_pdf_post_process(line_list, page_w, page_h, is_test=0):
|
|
|
|
|
|
# 验证轮廓的4个交点
|
|
|
sub_row_line_list, sub_col_line_list = fix_4_points(sub_point_list, sub_row_line_list, sub_col_line_list)
|
|
|
+ # log("pdf fix_4_points " + str(time.time() - start_time))
|
|
|
|
|
|
# 把四个边线在加一次
|
|
|
sub_point_list = get_points(sub_row_line_list, sub_col_line_list, (img_new.shape[0], img_new.shape[1]))
|
|
@@ -282,6 +290,7 @@ def table_line_pdf_post_process(line_list, page_w, page_h, is_test=0):
|
|
|
# 合并线
|
|
|
area_row_line_list[i] = merge_line(sub_row_line_list, axis=0)
|
|
|
area_col_line_list[i] = merge_line(sub_col_line_list, axis=1)
|
|
|
+ # log("pdf merge_line2 " + str(time.time() - start_time))
|
|
|
|
|
|
row_line_list = [y for x in area_row_line_list for y in x]
|
|
|
col_line_list = [y for x in area_col_line_list for y in x]
|
|
@@ -289,7 +298,7 @@ def table_line_pdf_post_process(line_list, page_w, page_h, is_test=0):
|
|
|
line_list = row_line_list + col_line_list
|
|
|
# 打印处理后线
|
|
|
show(line_list, title="all", img=img_show, mode=5, is_test=is_test)
|
|
|
- # log("table_line_pdf cost: " + str(time.time() - start_time))
|
|
|
+ log("table_line_pdf cost: " + str(time.time() - start_time))
|
|
|
return line_list
|
|
|
|
|
|
|
|
@@ -590,6 +599,7 @@ def delete_single_lines(row_line_list, col_line_list, point_list):
|
|
|
return new_row_line_list, new_col_line_list
|
|
|
|
|
|
|
|
|
+@memory_decorator
|
|
|
def merge_line(lines, axis, threshold=5):
|
|
|
"""
|
|
|
解决模型预测一条直线错开成多条直线,合并成一条直线
|
|
@@ -600,6 +610,7 @@ def merge_line(lines, axis, threshold=5):
|
|
|
:return: 合并后的线条列表
|
|
|
"""
|
|
|
# 任意一条line获取该合并的line,横线往下找,竖线往右找
|
|
|
+ start_time = time.time()
|
|
|
lines.sort(key=lambda x: (x[axis], x[1 - axis]))
|
|
|
merged_lines = []
|
|
|
used_lines = []
|
|
@@ -649,6 +660,7 @@ def merge_line(lines, axis, threshold=5):
|
|
|
result_lines.append([axis_average, axis_start, axis_average, axis_end])
|
|
|
else:
|
|
|
result_lines.append([axis_start, axis_average, axis_end, axis_average])
|
|
|
+ log('merge_line2 cost: ' + str(time.time()-start_time))
|
|
|
return result_lines
|
|
|
|
|
|
|