Ver código fonte

修复表格要素及候选人提取表头属性不一致中断匹配逻辑

lsm 2 anos atrás
pai
commit
c845a71130
1 arquivos alterados com 2 adições e 2 exclusões
  1. 2 2
      BiddingKG/dl/interface/predictor.py

+ 2 - 2
BiddingKG/dl/interface/predictor.py

@@ -4711,7 +4711,7 @@ class TablePremExtractor(object):
 
             if set([project_code, package_code_raw, project_name,tenderee,tenderer,budget_,bid_amount_,win_sort]) & self.headerset != set(): # 只要有一项为表头 停止匹配
                 break
-            if set([project_code, package_code_raw, project_name,tenderee,tenderer,budget_,bid_amount_,win_sort]) - set(['', ' ']) == set():  # 全部为空 停止匹配
+            if len(set([project_code, package_code_raw, project_name,tenderee,tenderer,budget_,bid_amount_,win_sort])- set(['', ' '])) < 2:  # 内容为空或全部一样 停止匹配
                 break
 
             if re.search('详见', project_name):  # 去除某些表达: 详见招标文件
@@ -4969,7 +4969,7 @@ class CandidateExtractor(object):
 
             if set([package_code_raw, candidate_, win_or_not, bid_amount_, win_sort, win_tenderer, second_tenderer, third_tenderer]) & self.headerset != set(): # 包含表头, 停止匹配
                 break
-            if set([package_code_raw, candidate_, win_or_not, bid_amount_, win_sort, win_tenderer, second_tenderer, third_tenderer]) - set(['', ' ']) == set():  # 全部为空 停止匹配
+            if len(set([package_code_raw, candidate_, win_or_not, bid_amount_, win_sort, win_tenderer, second_tenderer, third_tenderer]) - set(['', ' '])) < 2:  # 全部为空或内容一样 停止匹配
                 break
 
             if candidate_ != "" and win_sort == "" and headers['candidate'][0] > 0: # 修复某些表头不说 排名,直接用候选人代替