Prechádzať zdrojové kódy

表格分块sliceTable加入限制,块行数>1

fangjiasheng 2 rokov pred
rodič
commit
1d0891da2f
1 zmenil súbory, kde vykonal 9 pridanie a 2 odobranie
  1. 9 2
      BiddingKG/dl/interface/Preprocessing.py

+ 9 - 2
BiddingKG/dl/interface/Preprocessing.py

@@ -362,6 +362,11 @@ def tableToText(soup):
 
             last_head = h
 
+            # print("h", h)
+            # print("last_is_same_value", last_is_same_value)
+            # print("is_same_value", is_same_value)
+            # print("is_all_key", is_all_key)
+            # print("is_same_with_lastHead", is_same_with_lastHead)
             if last_is_same_value:
                 last_is_same_value = is_same_value
                 continue
@@ -372,8 +377,9 @@ def tableToText(soup):
                 continue
             if not is_all_key:
                 if not is_same_with_lastHead:
-                    head_list.append(h)
-
+                    # 该块只有表头一行不合法
+                    if h - head_list[-1] > 1:
+                        head_list.append(h)
 
         head_list.append(height)
         return head_list
@@ -1012,6 +1018,7 @@ def tableToText(soup):
             inner_table, head_list = set_head_model(inner_table)
             # inner_table,head_list = setHead_incontext(inner_table,pat_head)
             # print("table_head", inner_table)
+            # print("head_list", head_list)
             # for begin in range(len(head_list[:-1])):
             #     for item in inner_table[head_list[begin]:head_list[begin+1]]:
             #         print(item)