|
@@ -362,6 +362,11 @@ def tableToText(soup):
|
|
|
|
|
|
last_head = h
|
|
|
|
|
|
+ # print("h", h)
|
|
|
+ # print("last_is_same_value", last_is_same_value)
|
|
|
+ # print("is_same_value", is_same_value)
|
|
|
+ # print("is_all_key", is_all_key)
|
|
|
+ # print("is_same_with_lastHead", is_same_with_lastHead)
|
|
|
if last_is_same_value:
|
|
|
last_is_same_value = is_same_value
|
|
|
continue
|
|
@@ -372,8 +377,9 @@ def tableToText(soup):
|
|
|
continue
|
|
|
if not is_all_key:
|
|
|
if not is_same_with_lastHead:
|
|
|
- head_list.append(h)
|
|
|
-
|
|
|
+ # 该块只有表头一行不合法
|
|
|
+ if h - head_list[-1] > 1:
|
|
|
+ head_list.append(h)
|
|
|
|
|
|
head_list.append(height)
|
|
|
return head_list
|
|
@@ -1012,6 +1018,7 @@ def tableToText(soup):
|
|
|
inner_table, head_list = set_head_model(inner_table)
|
|
|
# inner_table,head_list = setHead_incontext(inner_table,pat_head)
|
|
|
# print("table_head", inner_table)
|
|
|
+ # print("head_list", head_list)
|
|
|
# for begin in range(len(head_list[:-1])):
|
|
|
# for item in inner_table[head_list[begin]:head_list[begin+1]]:
|
|
|
# print(item)
|