|
@@ -343,7 +343,7 @@ def tableToText(soup):
|
|
|
same_value = inner_table[h][0][0]
|
|
|
for w in range(width):
|
|
|
if last_head is not None:
|
|
|
- if inner_table[h-1][w][0]!=fix_value and inner_table[h-1][w][1] == 0:
|
|
|
+ if inner_table[h-1][w][0] != fix_value and inner_table[h-1][w][0] != "" and inner_table[h-1][w][1] == 0:
|
|
|
is_all_key = False
|
|
|
|
|
|
if inner_table[h][w][0]==1:
|
|
@@ -372,9 +372,11 @@ def tableToText(soup):
|
|
|
continue
|
|
|
|
|
|
if is_same_value:
|
|
|
- head_list.append(h)
|
|
|
- last_is_same_value = is_same_value
|
|
|
- continue
|
|
|
+ # 该块只有表头一行不合法
|
|
|
+ if h - head_list[-1] > 1:
|
|
|
+ head_list.append(h)
|
|
|
+ last_is_same_value = is_same_value
|
|
|
+ continue
|
|
|
if not is_all_key:
|
|
|
if not is_same_with_lastHead:
|
|
|
# 该块只有表头一行不合法
|
|
@@ -426,6 +428,7 @@ def tableToText(soup):
|
|
|
return inner_table,head_list
|
|
|
|
|
|
def set_head_model(inner_table):
|
|
|
+ origin_inner_table = copy.deepcopy(inner_table)
|
|
|
for i in range(len(inner_table)):
|
|
|
for j in range(len(inner_table[i])):
|
|
|
# 删掉单格前后符号,以免影响表头预测
|
|
@@ -440,7 +443,7 @@ def tableToText(soup):
|
|
|
# 组合结果
|
|
|
for i in range(len(inner_table)):
|
|
|
for j in range(len(inner_table[i])):
|
|
|
- inner_table[i][j] = [inner_table[i][j], int(predict_list[i][j])]
|
|
|
+ inner_table[i][j] = [origin_inner_table[i][j][0], int(predict_list[i][j])]
|
|
|
head_list = sliceTable(inner_table)
|
|
|
return inner_table, head_list
|
|
|
|