2 anos atrás · a4135a8673
--- a/BiddingKG/dl/interface/getAttributes.py
+++ b/BiddingKG/dl/interface/getAttributes.py
@@ -3514,6 +3514,13 @@ def update_prem(old_prem, new_prem):
 
				             for k in del_k:
			
 
				                 old_prem.pop(k)
			
 
				 
			
 
				+        if 'Project' in old_prem:
			
 
				+            for d in old_prem['Project']['roleList']:
			
 
				+                if d['role_name'] in ['tenderee', 'agency']:
			
 
				+                    tenderree_ = d['role_text']
			
 
				+                    if tenderree_ in str(new_prem) and re.search('公司', tenderree_):
			
 
				+                        old_prem['Project']['roleList'].remove(d) # 如果旧预测的招标人/代理人在表格预测里面去掉，防止错误召回，以表格提取的为准
			
 
				+
			
 
				         for k, v in new_prem.items():
			
 
				             if k == 'Project':
			
 
				                 if 'Project' in old_prem:
			
--- a/BiddingKG/dl/interface/predictor.py
+++ b/BiddingKG/dl/interface/predictor.py
@@ -705,7 +705,7 @@ class PREMPredict():
 
				                 elif re.search('第[4-9四五六]中标候选人', front):  #修复第4以上的预测错为中标人
			
 
				                     label = 5
			
 
				                     values[label] = 0.5
			
 
				-                elif re.search('(序号|排名|排序|名次)：[4-9]，', front): # 293225236 附件中 排名预测错误
			
 
				+                elif re.search('(序号|排名|排序|名次)：([4-9]|\d{2,})，', front): # 293225236 附件中 排名预测错误
			
 
				                     values[2] = 0.5
			
 
				                     label = 5
			
 
				             elif re.search('是否中标：是，供应商', front) and label == 5:
			
@@ -4722,7 +4722,7 @@ class TablePremExtractor(object):
 
				                             header_dic['budget'] = (i, text)
			
 
				                             break
			
 
				             if ('project_code' in header_dic or 'package_code' in header_dic or 'project_name' in header_dic) and (
			
 
				-                     'tenderee' in header_dic or 'tenderer' in header_dic or'budget' in header_dic): # 包含标段及招标人或招标金额或中标人的进行提取
			
 
				+                     'tenderer' in header_dic or'budget' in header_dic): # 包含标段及招标金额或中标人的进行提取
			
 
				                 return flag, contain_header, header_dic
			
 
				             elif ('tenderer' in header_dic) and ('bid_amount' in header_dic): # 包含中标人及中标金额的进行提取
			
 
				                 return flag,contain_header, header_dic
			
@@ -4901,6 +4901,17 @@ class TablePremExtractor(object):
 
				 
			
 
				         rs_dic = {}
			
 
				         for table in tables:
			
 
				+
			
 
				+            text = table.text.strip()
			
 
				+            previous = table.findPreviousSibling()
			
 
				+            text2 = previous .text.strip() if previous else ""
			
 
				+            # text2 = table.findPreviousSibling().text.strip() if table.findPreviousSibling() != None else ""
			
 
				+            if re.search('项目业主|业\s*主', text) and re.search('业\s*绩', text+text2): # 包含业绩的表格过滤掉，不进行处理
			
 
				+                tb_ex = table.extract()
			
 
				+                if previous:
			
 
				+                    sib = previous.extract()
			
 
				+                continue
			
 
				+
			
 
				             trs = self.tb.table2list(table)
			
 
				             # table.extract()
			
 
				             i = 0