|
@@ -705,7 +705,7 @@ class PREMPredict():
|
|
|
elif re.search('第[4-9四五六]中标候选人', front): #修复第4以上的预测错为中标人
|
|
|
label = 5
|
|
|
values[label] = 0.5
|
|
|
- elif re.search('(序号|排名|排序|名次):[4-9],', front): # 293225236 附件中 排名预测错误
|
|
|
+ elif re.search('(序号|排名|排序|名次):([4-9]|\d{2,}),', front): # 293225236 附件中 排名预测错误
|
|
|
values[2] = 0.5
|
|
|
label = 5
|
|
|
elif re.search('是否中标:是,供应商', front) and label == 5:
|
|
@@ -4722,7 +4722,7 @@ class TablePremExtractor(object):
|
|
|
header_dic['budget'] = (i, text)
|
|
|
break
|
|
|
if ('project_code' in header_dic or 'package_code' in header_dic or 'project_name' in header_dic) and (
|
|
|
- 'tenderee' in header_dic or 'tenderer' in header_dic or'budget' in header_dic): # 包含标段及招标人或招标金额或中标人的进行提取
|
|
|
+ 'tenderer' in header_dic or'budget' in header_dic): # 包含标段及招标金额或中标人的进行提取
|
|
|
return flag, contain_header, header_dic
|
|
|
elif ('tenderer' in header_dic) and ('bid_amount' in header_dic): # 包含中标人及中标金额的进行提取
|
|
|
return flag,contain_header, header_dic
|
|
@@ -4901,6 +4901,17 @@ class TablePremExtractor(object):
|
|
|
|
|
|
rs_dic = {}
|
|
|
for table in tables:
|
|
|
+
|
|
|
+ text = table.text.strip()
|
|
|
+ previous = table.findPreviousSibling()
|
|
|
+ text2 = previous .text.strip() if previous else ""
|
|
|
+ # text2 = table.findPreviousSibling().text.strip() if table.findPreviousSibling() != None else ""
|
|
|
+ if re.search('项目业主|业\s*主', text) and re.search('业\s*绩', text+text2): # 包含业绩的表格过滤掉,不进行处理
|
|
|
+ tb_ex = table.extract()
|
|
|
+ if previous:
|
|
|
+ sib = previous.extract()
|
|
|
+ continue
|
|
|
+
|
|
|
trs = self.tb.table2list(table)
|
|
|
# table.extract()
|
|
|
i = 0
|