|
@@ -2575,7 +2575,7 @@ class ProductAttributesPredictor():
|
|
break
|
|
break
|
|
# print(set(col0_l))
|
|
# print(set(col0_l))
|
|
# print('head: ',set(col0_l) & self.header_set)
|
|
# print('head: ',set(col0_l) & self.header_set)
|
|
- if len(set(col0_l) & self.header_set) > len(col0_l) * 0.2:
|
|
|
|
|
|
+ if len(set(col0_l) & self.header_set) > len(col0_l) * 0.2 and len(col0_l)==len(col1_l): # 保证两个列数一致
|
|
header_list2 = []
|
|
header_list2 = []
|
|
product = demand = budget = order_begin = order_end = ""
|
|
product = demand = budget = order_begin = order_end = ""
|
|
for i in range(len(col0_l)):
|
|
for i in range(len(col0_l)):
|
|
@@ -4849,7 +4849,7 @@ class TablePremExtractor(object):
|
|
else:
|
|
else:
|
|
table_items.append(trs[j])
|
|
table_items.append(trs[j])
|
|
else:
|
|
else:
|
|
- print('表头,内容 列数不一致', len(trs[i]), len(trs[j]))
|
|
|
|
|
|
+ # print('表头,内容 列数不一致', len(trs[i]), len(trs[j]))
|
|
break
|
|
break
|
|
if len(table_items) > 0:
|
|
if len(table_items) > 0:
|
|
df = pd.DataFrame(table_items)
|
|
df = pd.DataFrame(table_items)
|