|
@@ -1311,7 +1311,7 @@ class RoleRulePredictor():
|
|
|
self.pattern_winTenderer_left = "(?P<winTenderer_left>" \
|
|
|
"(乙|竞得|受让|买受|签约|施工|供货|供应?|合作|承做|承包|承建|承销|承保|承接|承制|承租((包))?)(候选)?(人|单位|机构|供应商|方|公司|企业|厂商|商|社会资本方?)(:?单位名称|:?名称|盖章)?[::是为]+$" \
|
|
|
"|(选定单位|指定的中介服务机构|实施主体|中标银行|中标通知书,致)[::是为]+$" \
|
|
|
- "|((评审结果|名次|排名|中标结果)[::]*第?[一1]名?)[::是为]+$" \
|
|
|
+ "|((评审结果|名次|排名|中标结果)[::]*第?[一1]名?)[::是为]+$|成交供应商信息[,:]?(序号1)?:?" \
|
|
|
"|单一来源(采购)?(供应商|供货商|服务商|方式向)$|((中标|成交)(结果|信息))[::是为]+$" \
|
|
|
"|现(公布|宣布|公示)中标单位如下:$|现将中标单位(公布|公示)如下:$|现宣布以下(企业|单位|公司)中标:$)" # 承办单位:不作为中标 83914772
|
|
|
self.pattern_winTenderer_left_w0 = "(?P<winTenderer_left_w0>" \
|
|
@@ -2241,7 +2241,7 @@ class MoneyGrade():
|
|
|
if ser:
|
|
|
groupdict = pattern.split('>')[0].replace('(?P<', '')
|
|
|
_role, _direct, _prob = groupdict.split('_')
|
|
|
- if re.search('单价', context[-4:]) or float(entity.entity_text):
|
|
|
+ if re.search('单价', context[-4:]) or re.search('(最低|风险)控制价', context) or float(entity.entity_text)<100:
|
|
|
_prob = 6
|
|
|
_label = role2id.get(_role)
|
|
|
if _label != entity.label:
|
|
@@ -2249,16 +2249,16 @@ class MoneyGrade():
|
|
|
_prob = int(_prob) * 0.1
|
|
|
# print('规则修改金额概率前:', entity.entity_text, entity.label, entity.values)
|
|
|
if in_att:
|
|
|
- _prob = _prob - 0.2
|
|
|
+ _prob = max(0.5, _prob - 0.2)
|
|
|
entity.values[_label] = _prob + entity.values[_label] / 20
|
|
|
not_found = 0
|
|
|
# print('规则修改金额概率后:', entity.entity_text, entity.label, entity.values)
|
|
|
break
|
|
|
if not_found and entity.values[entity.label] > min_prob:
|
|
|
- if re.search('单价', context[-4:]) or float(entity.entity_text)<100:
|
|
|
+ if re.search('单价', context[-4:]) or re.search('(最低|风险)控制价', context) or float(entity.entity_text)<100:
|
|
|
_prob = 0.6
|
|
|
elif in_att:
|
|
|
- _prob = min_prob - 0.1
|
|
|
+ _prob = max(0.5, min_prob - 0.1)
|
|
|
else:
|
|
|
_prob = min_prob
|
|
|
# _prob = min_prob - 0.1 if in_att else min_prob
|
|
@@ -4963,7 +4963,11 @@ class TableTag2List():
|
|
|
# insert into self._output
|
|
|
try:
|
|
|
if text_process != None:
|
|
|
- text = [re.sub('\xa0','',text_process(cell,final=False)),0]
|
|
|
+ # text = [re.sub('\xa0', '', text_process(cell, final=False)), 0]
|
|
|
+ td_text = re.sub('\xa0', '', text_process(cell, final=False))
|
|
|
+ if td_text == "":
|
|
|
+ td_text = ' '
|
|
|
+ text = [td_text,0]
|
|
|
else:
|
|
|
text = str(cell.get_text()).replace("\x06", "").replace("\x05", "").replace("\x07", "").replace('\\', '').replace("(", "(").replace(')', ')').replace('?', '')
|
|
|
text = re.sub('\s', '', text)[:200] # 只需取前200字即可
|