Parcourir la source

Merge remote-tracking branch 'origin/master'

luojiehua il y a 2 ans
Parent
commit
40764c19f4
1 fichiers modifiés avec 8 ajouts et 5 suppressions
  1. 8 5
      BiddingKG/dl/interface/Preprocessing.py

+ 8 - 5
BiddingKG/dl/interface/Preprocessing.py

@@ -343,7 +343,7 @@ def tableToText(soup):
             same_value = inner_table[h][0][0]
             for w in range(width):
                 if last_head is not None:
-                    if inner_table[h-1][w][0]!=fix_value and inner_table[h-1][w][1] == 0:
+                    if inner_table[h-1][w][0] != fix_value and inner_table[h-1][w][0] != "" and inner_table[h-1][w][1] == 0:
                         is_all_key = False
 
                     if inner_table[h][w][0]==1:
@@ -372,9 +372,11 @@ def tableToText(soup):
                 continue
 
             if is_same_value:
-                head_list.append(h)
-                last_is_same_value = is_same_value
-                continue
+                # 该块只有表头一行不合法
+                if h - head_list[-1] > 1:
+                    head_list.append(h)
+                    last_is_same_value = is_same_value
+                    continue
             if not is_all_key:
                 if not is_same_with_lastHead:
                     # 该块只有表头一行不合法
@@ -426,6 +428,7 @@ def tableToText(soup):
         return inner_table,head_list
 
     def set_head_model(inner_table):
+        origin_inner_table = copy.deepcopy(inner_table)
         for i in range(len(inner_table)):
             for j in range(len(inner_table[i])):
                 # 删掉单格前后符号,以免影响表头预测
@@ -440,7 +443,7 @@ def tableToText(soup):
         # 组合结果
         for i in range(len(inner_table)):
             for j in range(len(inner_table[i])):
-                inner_table[i][j] = [inner_table[i][j], int(predict_list[i][j])]
+                inner_table[i][j] = [origin_inner_table[i][j][0], int(predict_list[i][j])]
         head_list = sliceTable(inner_table)
         return inner_table, head_list