|
@@ -503,11 +503,12 @@ def slash_replace(_str, reverse=False):
|
|
return _str
|
|
return _str
|
|
|
|
|
|
|
|
|
|
-class LineTable:
|
|
|
|
- def recognize_table(self, list_textbox, list_line):
|
|
|
|
|
|
+class LineTable():
|
|
|
|
+ def recognize_table(self,list_textbox, list_line,sourceP_LB=True):
|
|
self.list_line = list_line
|
|
self.list_line = list_line
|
|
self.list_crosspoints = self.recognize_crosspoints(list_line)
|
|
self.list_crosspoints = self.recognize_crosspoints(list_line)
|
|
|
|
|
|
|
|
+
|
|
# 聚类
|
|
# 聚类
|
|
cluster_crosspoints = []
|
|
cluster_crosspoints = []
|
|
for _point in self.list_crosspoints:
|
|
for _point in self.list_crosspoints:
|
|
@@ -539,7 +540,7 @@ class LineTable:
|
|
in_objs = set()
|
|
in_objs = set()
|
|
list_tables = []
|
|
list_tables = []
|
|
for l_rect in list_l_rect:
|
|
for l_rect in list_l_rect:
|
|
- _ta = self.rect2table(list_textbox,l_rect,in_objs)
|
|
|
|
|
|
+ _ta = self.rect2table(list_textbox,l_rect,in_objs,sourceP_LB=sourceP_LB)
|
|
if _ta:
|
|
if _ta:
|
|
list_tables.append(_ta)
|
|
list_tables.append(_ta)
|
|
self._plot(list_line, list_textbox)
|
|
self._plot(list_line, list_textbox)
|
|
@@ -876,7 +877,7 @@ class LineTable:
|
|
ta = {"bbox":table_bbox,"table":_table}
|
|
ta = {"bbox":table_bbox,"table":_table}
|
|
return ta
|
|
return ta
|
|
|
|
|
|
- def rect2table(self, list_textbox, list_rect, in_objs, margin=0.2, fixspan=True):
|
|
|
|
|
|
+ def rect2table(self, list_textbox, list_rect, in_objs, margin=0.2, fixspan=True,sourceP_LB=True):
|
|
_table = []
|
|
_table = []
|
|
set_x = set()
|
|
set_x = set()
|
|
set_y = set()
|
|
set_y = set()
|
|
@@ -896,7 +897,7 @@ class LineTable:
|
|
clusters_rects.append([_rect])
|
|
clusters_rects.append([_rect])
|
|
|
|
|
|
print("clusters_rects", len(clusters_rects))
|
|
print("clusters_rects", len(clusters_rects))
|
|
- clusters_rects.sort(key=lambda x:x[0].bbox[3],reverse=True)
|
|
|
|
|
|
+ clusters_rects.sort(key=lambda x:x[0].bbox[3],reverse=sourceP_LB)
|
|
for l_cr in clusters_rects:
|
|
for l_cr in clusters_rects:
|
|
l_cr.sort(key=lambda x:x.bbox[0])
|
|
l_cr.sort(key=lambda x:x.bbox[0])
|
|
|
|
|
|
@@ -914,7 +915,7 @@ class LineTable:
|
|
list_y = list(set_y)
|
|
list_y = list(set_y)
|
|
|
|
|
|
list_x.sort(key=lambda x:x)
|
|
list_x.sort(key=lambda x:x)
|
|
- list_y.sort(key=lambda x:x,reverse=True)
|
|
|
|
|
|
+ list_y.sort(key=lambda x:x,reverse=sourceP_LB)
|
|
|
|
|
|
pop_x = []
|
|
pop_x = []
|
|
for i in range(len(list_x)-1):
|
|
for i in range(len(list_x)-1):
|
|
@@ -951,7 +952,7 @@ class LineTable:
|
|
_table.append(table_line)
|
|
_table.append(table_line)
|
|
|
|
|
|
list_textbox.sort(key=lambda x:x.bbox[0])
|
|
list_textbox.sort(key=lambda x:x.bbox[0])
|
|
- list_textbox.sort(key=lambda x:x.bbox[3],reverse=True)
|
|
|
|
|
|
+ list_textbox.sort(key=lambda x:x.bbox[3],reverse=sourceP_LB)
|
|
for textbox in list_textbox:
|
|
for textbox in list_textbox:
|
|
(x0,y0,x1,y1) = textbox.bbox
|
|
(x0,y0,x1,y1) = textbox.bbox
|
|
_text = textbox.get_text()
|
|
_text = textbox.get_text()
|