|
@@ -35,7 +35,8 @@ dict_predictor = {"codeName":{"predictor":None,"Lock":RLock()},
|
|
|
"punish":{"predictor":None,"Lock":RLock()},
|
|
|
"product":{"predictor":None,"Lock":RLock()},
|
|
|
"product_attrs":{"predictor":None,"Lock":RLock()},
|
|
|
- "channel": {"predictor": None, "Lock": RLock()}}
|
|
|
+ "channel": {"predictor": None, "Lock": RLock()},
|
|
|
+ "deposit_payment_way": {"predictor": None, "Lock": RLock()}}
|
|
|
|
|
|
|
|
|
def getPredictor(_type):
|
|
@@ -62,6 +63,8 @@ def getPredictor(_type):
|
|
|
dict_predictor[_type]["predictor"] = ProductAttributesPredictor()
|
|
|
if _type == "channel":
|
|
|
dict_predictor[_type]["predictor"] = DocChannel()
|
|
|
+ if _type == 'deposit_payment_way':
|
|
|
+ dict_predictor[_type]["predictor"] = DepositPaymentWay()
|
|
|
return dict_predictor[_type]["predictor"]
|
|
|
raise NameError("no this type of predictor")
|
|
|
|
|
@@ -542,6 +545,7 @@ class PREMPredict():
|
|
|
list_entitys:文章的entitys
|
|
|
@return:角色模型的输入数据
|
|
|
'''
|
|
|
+ text_list = []
|
|
|
data_x = []
|
|
|
points_entitys = []
|
|
|
for list_entity,list_sentence in zip(list_entitys,list_sentences):
|
|
@@ -556,6 +560,7 @@ class PREMPredict():
|
|
|
while(p_sentences<len(list_sentence)):
|
|
|
sentence = list_sentence[p_sentences]
|
|
|
if entity.doc_id==sentence.doc_id and entity.sentence_index==sentence.sentence_index:
|
|
|
+ text_list.append(sentence.sentence_text[max(0, entity.wordOffset_begin-10):entity.wordOffset_end+10])
|
|
|
#item_x = embedding(spanWindow(tokens=sentence.tokens,begin_index=entity.begin_index,end_index=entity.end_index,size=settings.MODEL_ROLE_INPUT_SHAPE[1]),shape=settings.MODEL_ROLE_INPUT_SHAPE)
|
|
|
item_x = self.model_role.encode(tokens=sentence.tokens,begin_index=entity.begin_index,end_index=entity.end_index,entity_text=entity.entity_text)
|
|
|
data_x.append(item_x)
|
|
@@ -568,7 +573,7 @@ class PREMPredict():
|
|
|
if len(points_entitys)==0:
|
|
|
return None
|
|
|
|
|
|
- return [data_x,points_entitys]
|
|
|
+ return [data_x,points_entitys, text_list]
|
|
|
|
|
|
|
|
|
def search_money_data(self,list_sentences,list_entitys):
|
|
@@ -579,6 +584,7 @@ class PREMPredict():
|
|
|
list_entitys:文章的entitys
|
|
|
@return:金额模型的输入数据
|
|
|
'''
|
|
|
+ text_list = []
|
|
|
data_x = []
|
|
|
points_entitys = []
|
|
|
for list_entity,list_sentence in zip(list_entitys,list_sentences):
|
|
@@ -594,6 +600,7 @@ class PREMPredict():
|
|
|
while(p_sentences<len(list_sentence)):
|
|
|
sentence = list_sentence[p_sentences]
|
|
|
if entity.doc_id==sentence.doc_id and entity.sentence_index==sentence.sentence_index:
|
|
|
+ text_list.append(sentence.sentence_text[max(0, entity.wordOffset_begin - 8):entity.wordOffset_end])
|
|
|
#item_x = embedding(spanWindow(tokens=sentence.tokens,begin_index=entity.begin_index,end_index=entity.end_index,size=settings.MODEL_MONEY_INPUT_SHAPE[1]),shape=settings.MODEL_MONEY_INPUT_SHAPE)
|
|
|
#item_x = embedding_word(spanWindow(tokens=sentence.tokens, begin_index=entity.begin_index, end_index=entity.end_index, size=10, center_include=True, word_flag=True),shape=settings.MODEL_MONEY_INPUT_SHAPE)
|
|
|
item_x = self.model_money.encode(tokens=sentence.tokens,begin_index=entity.begin_index,end_index=entity.end_index)
|
|
@@ -606,7 +613,7 @@ class PREMPredict():
|
|
|
if len(points_entitys)==0:
|
|
|
return None
|
|
|
|
|
|
- return [data_x,points_entitys]
|
|
|
+ return [data_x,points_entitys, text_list]
|
|
|
|
|
|
def predict_role(self,list_sentences, list_entitys):
|
|
|
datas = self.search_role_data(list_sentences, list_entitys)
|
|
@@ -614,6 +621,7 @@ class PREMPredict():
|
|
|
if datas is None:
|
|
|
return
|
|
|
points_entitys = datas[1]
|
|
|
+ text_list = datas[2]
|
|
|
|
|
|
|
|
|
if USE_PAI_EAS:
|
|
@@ -641,17 +649,24 @@ class PREMPredict():
|
|
|
for i in range(len(predict_y)):
|
|
|
entity = points_entitys[i]
|
|
|
label = np.argmax(predict_y[i])
|
|
|
- values = []
|
|
|
- for item in predict_y[i]:
|
|
|
- values.append(item)
|
|
|
- entity.set_Role(label,values)
|
|
|
-
|
|
|
+ values = predict_y[i]
|
|
|
+ text = text_list[i]
|
|
|
+ if label == 2:
|
|
|
+ if re.search('中标单位和.{,25}签订合同', text):
|
|
|
+ label = 0
|
|
|
+ values[label] = 0.501
|
|
|
+ elif re.search('尊敬的供应商:.{,25}我公司', text):
|
|
|
+ label = 0
|
|
|
+ values[label] = 0.801
|
|
|
+ entity.set_Role(label, values)
|
|
|
+
|
|
|
def predict_money(self,list_sentences,list_entitys):
|
|
|
datas = self.search_money_data(list_sentences, list_entitys)
|
|
|
if datas is None:
|
|
|
return
|
|
|
points_entitys = datas[1]
|
|
|
_data = datas[0]
|
|
|
+ text_list = datas[2]
|
|
|
if USE_PAI_EAS:
|
|
|
_data = np.transpose(np.array(_data),(1,0,2,3))
|
|
|
request = tf_predict_pb2.PredictRequest()
|
|
@@ -677,7 +692,10 @@ class PREMPredict():
|
|
|
entity = points_entitys[i]
|
|
|
label = np.argmax(predict_y[i])
|
|
|
values = predict_y[i]
|
|
|
- if label ==0 and entity.notes=="投资":
|
|
|
+ text = text_list[i]
|
|
|
+ if label == 1 and re.search('[::,。](总金额|总价|单价)', text):
|
|
|
+ values[label] = 0.49
|
|
|
+ elif label ==0 and entity.notes in ["投资", "工程造价"]:
|
|
|
values[label] = 0.49
|
|
|
entity.set_Money(label, values)
|
|
|
|
|
@@ -1065,17 +1083,17 @@ class FormPredictor():
|
|
|
class RoleRulePredictor():
|
|
|
|
|
|
def __init__(self):
|
|
|
- self.pattern_tenderee_left = "(?P<tenderee_left>((遴选|采购|招标|项目|竞价|议价|需求|最终|建设|转让|招租|甲|议标|合同主体|比选)(?:人|公司|单位|组织|用户|业主|方|部门)|文章来源|业主名称|需方|询价单位)(是|为|信息|:|:|\s*)$)"
|
|
|
+ self.pattern_tenderee_left = "(?P<tenderee_left>((遴选|采购|招标|项目|竞价|议价|需求|最终|建设|业主|转让|招租|甲|议标|合同主体|比选|委托|询价)(?:人|公司|单位|组织|用户|业主|方|部门)|文章来源|需方)(名称)?(是|为|信息|:|:|\s*)$)"
|
|
|
self.pattern_tenderee_center = "(?P<tenderee_center>(受.{,20}委托))"
|
|
|
self.pattern_tenderee_right = "(?P<tenderee_right>^(\((以下简称)?[\"”]?(招标|采购)(人|单位|机构)\)?))" #|(^[^.。,,::](采购|竞价|招标|施工|监理|中标|物资)(公告|公示|项目|结果|招标))|的.*正在进行询比价)
|
|
|
|
|
|
self.pattern_agency_left = "(?P<agency_left>(代理(?:人|机构|公司|单位|组织)|专业采购机构|集中采购机构|集采机构|招标机构)(.{,4}名,?称|全称|是|为|:|:|[,,]?\s*)$|(受.{,20}委托))"
|
|
|
self.pattern_agency_right = "(?P<agency_right>^(\((以下简称)?[\"”]?(代理)(人|单位|机构)\))|受.{,15}委托)"
|
|
|
# 2020//11/24 大网站规则 中标关键词添加 选定单位|指定的中介服务机构
|
|
|
- self.pattern_winTenderer_left = "(?P<winTenderer_left>((中标|中选|中价|乙|成交|承做|施工|供货|承包|竞得|受让)(候选)?(人|单位|机构|供应商|方|公司|厂商|商)[::是为]+$|(选定单位|指定的中介服务机构))[::是为,]+$|(第[一1](名|((中标|中选|中价|成交)?(候选)?(人|单位|机构|供应商))))[::是为]+$|((评审结果|名次|排名)[::]第?[一1]名?)$|单一来源(采购)?方式向$|((中标|成交)(结果|信息))(是|为|:|:)$|(单一来源采购(供应商|供货商|服务商))$|[^候选]((分包|标包){,5}供应商|供货商|服务商|供应商名称|服务机构|供方)[::]$)"
|
|
|
+ self.pattern_winTenderer_left = "(?P<winTenderer_left>((中标|中选|中价|乙|成交|承做|施工|供货|承包|竞得|受让)(候选)?(人|单位|机构|各?供应商|方|公司|厂商|商)[::是为]+$|(选定单位|指定的中介服务机构))[::是为,]+$|(第[一1](名|((中标|中选|中价|成交)?(候选)?(人|单位|机构|供应商))))[::是为]+$|((评审结果|名次|排名)[::]第?[一1]名?)$|单一来源(采购)?方式向$|((中标|成交)(结果|信息))(是|为|:|:)$|(单一来源采购(供应商|供货商|服务商))$|[^候选]((分包|标包){,5}供应商|供货商|服务商|供应商名称|服务机构|供方)[::]$)"
|
|
|
# self.pattern_winTenderer_center = "(?P<winTenderer_center>第[一1].{,20}[是为]((中标|中选|中价|成交|施工)(人|单位|机构|供应商|公司)|供应商)[::是为])"
|
|
|
- self.pattern_winTenderer_right = "(?P<winTenderer_right>^[是为\(]((采购(供应商|供货商|服务商)|(第[一1]|预)?(拟?(中标|中选|中价|成交)(候选)?(人|单位|机构|供应商|公司|厂商)))))"
|
|
|
- self.pattern_winTenderer_whole = "(?P<winTenderer_center>贵公司.{,15}以.{,15}中标|最终由.{,15}竞买成功|经.{,15}决定[以由].{,15}公司中标|谈判结果:由.{5,20}供货)" # 2020//11/24 大网站规则 中标关键词添加 谈判结果:由.{5,20}供货
|
|
|
+ self.pattern_winTenderer_right = "(?P<winTenderer_right>(^[是为\(]((采购(供应商|供货商|服务商)|(第[一1]|预)?(拟?(中标|中选|中价|成交)(候选)?(人|单位|机构|供应商|公司|厂商)))))|^(报价|价格)最低,确定为本项目成交供应商)"
|
|
|
+ self.pattern_winTenderer_whole = "(?P<winTenderer_center>贵公司.{,15}以.{,15}中标|最终由.{,15}竞买成功|经.{,15}决定[以由].{,15}公司中标|谈判结果:由.{5,20}供货)|中标通知书.{,15}你方" # 2020//11/24 大网站规则 中标关键词添加 谈判结果:由.{5,20}供货
|
|
|
|
|
|
# self.pattern_winTenderer_location = "(中标|中选|中价|乙|成交|承做|施工|供货|承包|竞得|受让)(候选)?(人|单位|机构|供应商|方|公司|厂商|商)|(供应商|供货商|服务商)[::]?$|(第[一1](名|((中标|中选|中价|成交)?(候选)?(人|单位|机构|供应商))))(是|为|:|:|\s*$)|((评审结果|名次|排名)[::]第?[一1]名?)|(单一来源(采购)?方式向.?$)"
|
|
|
|
|
@@ -1193,7 +1211,7 @@ class RoleRulePredictor():
|
|
|
_role = _group.split("_")[0]
|
|
|
_direct = _group.split("_")[1]
|
|
|
_label = {"tenderee":0,"agency":1,"winTenderer":2,"secondTenderer":3,"thirdTenderer":4}.get(_role)
|
|
|
- if _i_span==0 and _direct=="left":
|
|
|
+ if _i_span==0 and _direct=="left" and '各供应商' not in _v_group: #2021/12/22 修正错误中标召回 例子208668937
|
|
|
_flag = True
|
|
|
_distance = abs((len(list_spans[_i_span])-_iter.span()[1]))
|
|
|
list_distance[int(_label)] = min(_distance,list_distance[int(_label)])
|
|
@@ -2186,6 +2204,39 @@ class DocChannel():
|
|
|
# return self.id2type[id], prob
|
|
|
return [{'docchannel':self.id2type[id]}]
|
|
|
|
|
|
+# 保证金支付方式提取
|
|
|
+class DepositPaymentWay():
|
|
|
+ def __init__(self,):
|
|
|
+ self.pt = '(保证金的?(交纳|缴纳|应按下列|入账|支付)方式)[::]*([^,。]{,60})'
|
|
|
+ self.pt2 = '保证金(必?须以|必?须?通过|以)(.{,8})方式'
|
|
|
+ kws = ['银行转账', '公?对公方?式?转账', '对公转账', '柜台转账', '(线上|网上)自?行?(缴纳|交纳|缴退|收退)',
|
|
|
+ '网上银行支付', '现金存入', '直接缴纳', '支票', '汇票', '本票', '电汇', '转账', '汇款', '随机码',
|
|
|
+ '入账', '基本账户转出', '基本账户汇入', '诚信库中登记的账户转出',
|
|
|
+ '银行保函', '电子保函', '担保函', '保证保险', '合法担保机构出具的担保', '金融机构、担保机构出具的保函']
|
|
|
+ self.kws = sorted(kws, key=lambda x: len(x), reverse=True)
|
|
|
+
|
|
|
+ def predict(self,content):
|
|
|
+ pay_way = {'deposit_patment_way':''}
|
|
|
+ result = []
|
|
|
+ pay = re.search(self.pt, content)
|
|
|
+ if pay:
|
|
|
+ # print(pay.group(0))
|
|
|
+ pay = pay.group(3)
|
|
|
+ for it in re.finditer('|'.join(self.kws), pay):
|
|
|
+ result.append(it.group(0))
|
|
|
+ pay_way['deposit_patment_way'] = ';'.join(result)
|
|
|
+ return pay_way
|
|
|
+ pay = re.search(self.pt2, content)
|
|
|
+ if pay:
|
|
|
+ # print(pay.group(0))
|
|
|
+ pay = pay.group(2)
|
|
|
+ for it in re.finditer('|'.join(self.kws), pay):
|
|
|
+ result.append(it.group(0))
|
|
|
+ pay_way['deposit_patment_way'] = ';'.join(result)
|
|
|
+ return pay_way
|
|
|
+ else:
|
|
|
+ return pay_way
|
|
|
+
|
|
|
def getSavedModel():
|
|
|
#predictor = FormPredictor()
|
|
|
graph = tf.Graph()
|