|
@@ -4672,6 +4672,11 @@ class TablePremExtractor(object):
|
|
bid_amount_ = df.loc[i, headers['bid_amount'][0]] if "bid_amount" in headers else ""
|
|
bid_amount_ = df.loc[i, headers['bid_amount'][0]] if "bid_amount" in headers else ""
|
|
win_sort = df.loc[i, headers['win_sort'][0]] if "win_sort" in headers else ""
|
|
win_sort = df.loc[i, headers['win_sort'][0]] if "win_sort" in headers else ""
|
|
|
|
|
|
|
|
+ if set([project_code, package_code_raw, project_name,tenderee,tenderer,budget_,bid_amount_,win_sort]) & self.headerset != set(): # 只要有一项为表头 停止匹配
|
|
|
|
+ break
|
|
|
|
+ if set([project_code, package_code_raw, project_name,tenderee,tenderer,budget_,bid_amount_,win_sort]) - set(['', ' ']) == set(): # 全部为空 停止匹配
|
|
|
|
+ break
|
|
|
|
+
|
|
if package_code_raw == "" and re.search('第?[0-9一二三四五六七八九十a-zZ-Z]{1,4}(标[段号的包项]|([分子]?包|包[组件号]))$|^(标[段号的包项]|([分子]?包|包[组件号]))号?:?[0-9一二三四五六七八九十a-zZ-Z]{1,4}$', project_name):
|
|
if package_code_raw == "" and re.search('第?[0-9一二三四五六七八九十a-zZ-Z]{1,4}(标[段号的包项]|([分子]?包|包[组件号]))$|^(标[段号的包项]|([分子]?包|包[组件号]))号?:?[0-9一二三四五六七八九十a-zZ-Z]{1,4}$', project_name):
|
|
package_code_raw = project_name
|
|
package_code_raw = project_name
|
|
project_name = ""
|
|
project_name = ""
|
|
@@ -4923,6 +4928,11 @@ class CandidateExtractor(object):
|
|
second_tenderer = df.loc[i, headers['second_tenderer'][0]] if "second_tenderer" in headers else ""
|
|
second_tenderer = df.loc[i, headers['second_tenderer'][0]] if "second_tenderer" in headers else ""
|
|
third_tenderer = df.loc[i, headers['third_tenderer'][0]] if "third_tenderer" in headers else ""
|
|
third_tenderer = df.loc[i, headers['third_tenderer'][0]] if "third_tenderer" in headers else ""
|
|
|
|
|
|
|
|
+ if set([package_code_raw, candidate_, win_or_not, bid_amount_, win_sort, win_tenderer, second_tenderer, third_tenderer]) & self.headerset != set(): # 包含表头, 停止匹配
|
|
|
|
+ break
|
|
|
|
+ if set([package_code_raw, candidate_, win_or_not, bid_amount_, win_sort, win_tenderer, second_tenderer, third_tenderer]) - set(['', ' ']) == set(): # 全部为空 停止匹配
|
|
|
|
+ break
|
|
|
|
+
|
|
if candidate_ != "" and win_sort == "" and headers['candidate'][0] > 0: # 修复某些表头不说 排名,直接用候选人代替
|
|
if candidate_ != "" and win_sort == "" and headers['candidate'][0] > 0: # 修复某些表头不说 排名,直接用候选人代替
|
|
col_indx = headers['candidate'][0] -1
|
|
col_indx = headers['candidate'][0] -1
|
|
pre_col = df.loc[i, col_indx]
|
|
pre_col = df.loc[i, col_indx]
|