|
@@ -4711,7 +4711,7 @@ class TablePremExtractor(object):
|
|
|
|
|
|
if set([project_code, package_code_raw, project_name,tenderee,tenderer,budget_,bid_amount_,win_sort]) & self.headerset != set(): # 只要有一项为表头 停止匹配
|
|
if set([project_code, package_code_raw, project_name,tenderee,tenderer,budget_,bid_amount_,win_sort]) & self.headerset != set(): # 只要有一项为表头 停止匹配
|
|
break
|
|
break
|
|
- if set([project_code, package_code_raw, project_name,tenderee,tenderer,budget_,bid_amount_,win_sort]) - set(['', ' ']) == set(): # 全部为空 停止匹配
|
|
|
|
|
|
+ if len(set([project_code, package_code_raw, project_name,tenderee,tenderer,budget_,bid_amount_,win_sort])- set(['', ' '])) < 2: # 内容为空或全部一样 停止匹配
|
|
break
|
|
break
|
|
|
|
|
|
if re.search('详见', project_name): # 去除某些表达: 详见招标文件
|
|
if re.search('详见', project_name): # 去除某些表达: 详见招标文件
|
|
@@ -4969,7 +4969,7 @@ class CandidateExtractor(object):
|
|
|
|
|
|
if set([package_code_raw, candidate_, win_or_not, bid_amount_, win_sort, win_tenderer, second_tenderer, third_tenderer]) & self.headerset != set(): # 包含表头, 停止匹配
|
|
if set([package_code_raw, candidate_, win_or_not, bid_amount_, win_sort, win_tenderer, second_tenderer, third_tenderer]) & self.headerset != set(): # 包含表头, 停止匹配
|
|
break
|
|
break
|
|
- if set([package_code_raw, candidate_, win_or_not, bid_amount_, win_sort, win_tenderer, second_tenderer, third_tenderer]) - set(['', ' ']) == set(): # 全部为空 停止匹配
|
|
|
|
|
|
+ if len(set([package_code_raw, candidate_, win_or_not, bid_amount_, win_sort, win_tenderer, second_tenderer, third_tenderer]) - set(['', ' '])) < 2: # 全部为空或内容一样 停止匹配
|
|
break
|
|
break
|
|
|
|
|
|
if candidate_ != "" and win_sort == "" and headers['candidate'][0] > 0: # 修复某些表头不说 排名,直接用候选人代替
|
|
if candidate_ != "" and win_sort == "" and headers['candidate'][0] > 0: # 修复某些表头不说 排名,直接用候选人代替
|