Эх сурвалжийг харах

Merge remote-tracking branch 'origin/master'

lsm 3 долоо хоног өмнө
parent
commit
feec997d4c

+ 17 - 4
BiddingKG/dl/channel/channel_bert.py

@@ -443,6 +443,17 @@ def channel_predict(title,text):
     # to torch data
     text = [text]
     text_max_len = 2000
+    # text = [tokenizer.encode_plus(
+    #     _t,
+    #     add_special_tokens=True,  # 添加特殊标记,如[CLS]和[SEP]
+    #     max_length=text_max_len,  # 设置最大长度
+    #     padding='max_length',  # 填充到最大长度
+    #     truncation=True,  # 截断超过最大长度的文本
+    #     return_attention_mask=True,  # 返回attention_mask
+    #     return_tensors='pt'  # 返回PyTorch张量
+    # ) for _t in text]
+    # text = [torch.LongTensor(np.array([_t['input_ids'].numpy()[0] for _t in text])).to(device),
+    #      torch.LongTensor(np.array([_t['attention_mask'].numpy()[0] for _t in text])).to(device)]
     text = [tokenizer.encode_plus(
         _t,
         add_special_tokens=True,  # 添加特殊标记,如[CLS]和[SEP]
@@ -450,10 +461,11 @@ def channel_predict(title,text):
         padding='max_length',  # 填充到最大长度
         truncation=True,  # 截断超过最大长度的文本
         return_attention_mask=True,  # 返回attention_mask
-        return_tensors='pt'  # 返回PyTorch张量
+        # return_tensors='pt'  # 返回PyTorch张量
+        return_tensors=None  #不返回PyTorch张量
     ) for _t in text]
-    text = [torch.LongTensor(np.array([_t['input_ids'].numpy()[0] for _t in text])).to(device),
-         torch.LongTensor(np.array([_t['attention_mask'].numpy()[0] for _t in text])).to(device)]
+    text = [torch.LongTensor(np.array([_t['input_ids'] for _t in text])).to(device),
+            torch.LongTensor(np.array([_t['attention_mask'] for _t in text])).to(device)]
     # predict
     with torch.no_grad():
         outputs = model(None, text)
@@ -513,6 +525,7 @@ def merge_channel(list_articles,channel_dic,original_docchannel):
         front_text_len = len(text)//3 if len(text)>300 else 100
         front_text = text[:front_text_len]
         pred_channel = class_dict[pred_channel]
+        # print('pred_channel',pred_channel,'docchannel',docchannel,'original_docchannel',original_docchannel)
         if pred_channel == docchannel:
             channel_dic['docchannel']['use_original_docchannel'] = 0
         else:
@@ -580,9 +593,9 @@ def merge_channel(list_articles,channel_dic,original_docchannel):
             # print(text, '\n pred_res', pred)
             if pred is not None and original_docchannel: # 无original_docchannel时不进行对比校正
                 channel_dic = merge_rule(title,text,docchannel,pred,channel_dic,original_docchannel)
-
     elif doctype=='采招数据' and docchannel=="":
         pred = channel_predict(title, text)
+        # print(text, '\n pred_res', pred)
         if pred is not None:
             pred = class_dict[pred]
             channel_dic['docchannel']['docchannel'] = pred

+ 5 - 1
BiddingKG/dl/interface/Preprocessing.py

@@ -3950,7 +3950,7 @@ def get_preprocessed_entitys(list_sentences,useselffool=True,cost_time=dict()):
                            begin_index_temp, end_index_temp,in_attachment=in_attachment))
 
             # 时间实体格式补充
-            re_time_new = re.compile("20\d{2}-\d{1,2}-\d{1,2}|20\d{2}/\d{1,2}/\d{1,2}|20\d{2}\.\d{1,2}\.\d{1,2}|20\d{2}(?:0[1-9]|1[0-2])(?:0[1-9]|[1-2][0-9]|3[0-1])")
+            re_time_new = re.compile("20\d{2}-\d{1,2}-\d{1,2}|20\d{2}-(:?0[1-9]|1[0-2]|[1-9])|20\d{2}/\d{1,2}/\d{1,2}|20\d{2}\.\d{1,2}\.\d{1,2}|20\d{2}(?:0[1-9]|1[0-2])(?:0[1-9]|[1-2][0-9]|3[0-1])")
             entity_type = "time"
             for _time in re.finditer(re_time_new,sentence_text):
                 entity_text = _time.group()
@@ -3970,6 +3970,10 @@ def get_preprocessed_entitys(list_sentences,useselffool=True,cost_time=dict()):
                     if _time.end()!=len(sentence_text) and re.search("[\da-zA-z]",sentence_text[_time.end():_time.end()+1]):
                         continue
                     entity_text = entity_text[:4] + "-" + entity_text[4:6] + "-" + entity_text[6:8]
+                # 例:2025-05
+                if re.search("^20\d{2}-(:?0[1-9]|1[0-2]|[1-9])$",entity_text):
+                    if _time.end()!=len(sentence_text) and re.search("[\da-zA-z]",sentence_text[_time.end():_time.end()+1]):
+                        continue
                 if not timeFormat(entity_text):
                     continue
 

+ 4 - 3
BiddingKG/dl/interface/extract.py

@@ -318,10 +318,11 @@ def predict(doc_id,text,title="",page_time="",web_source_no='',web_source_name="
     start_time = time.time()
     sentence2_list, sentence2_list_attach = extract_sentence_list(list_sentences[0])
     parse_document = ParseDocument(text, True,list_obj=sentence2_list)
-    requirement_text, aptitude_text, addr_bidopen_text, addr_bidsend_text, out_lines, requirement_scope, pinmu_name, list_policy = extract_parameters(parse_document)
+    requirement_text, aptitude_text, addr_bidopen_text, addr_bidsend_text, out_lines, requirement_scope, pinmu_name, list_policy, winter_scope = extract_parameters(parse_document)
+
     if sentence2_list_attach!=[] and requirement_text == '' and aptitude_text == '' and addr_bidopen_text=="":
         parse_document = ParseDocument(text, True, list_obj=sentence2_list_attach)
-        requirement_text, aptitude_text, addr_bidopen_text, addr_bidsend_text, out_lines, requirement_scope, pinmu_name, list_policy = extract_parameters(parse_document)
+        requirement_text, aptitude_text, addr_bidopen_text, addr_bidsend_text, out_lines, requirement_scope, pinmu_name, list_policy, winter_scope = extract_parameters(parse_document)
     # print('out_lines',out_lines)
     # if addr_bidopen_text == '':
     #     addr_bidopen_text = extract_addr(list_articles[0].content)
@@ -430,7 +431,7 @@ def predict(doc_id,text,title="",page_time="",web_source_no='',web_source_name="
     entityLink.link_entitys(list_entitys)
     doctitle_refine = entityLink.doctitle_refine(title)
     nlp_enterprise,nlp_enterprise_attachment, dict_enterprise = entityLink.get_nlp_enterprise(list_entitys[0])
-    prem = getAttributes.getPREMs(list_sentences,list_entitys,list_articles,list_outlines,page_time)
+    prem = getAttributes.getPREMs(list_sentences,list_entitys,list_articles,list_outlines,page_time,winter_scope)
     log("get attributes done of doc_id%s"%(doc_id))
     cost_time["attrs"] = round(time.time()-start_time,2)
 

+ 230 - 38
BiddingKG/dl/interface/getAttributes.py

@@ -886,7 +886,12 @@ def getPackagesFromArticle(list_sentence, list_entity):
 # km配对方法
 def dispatch(match_list):
     main_roles = list(set([match.main_role for match in match_list]))
+    # print('main_roles',[i.entity_text for i in main_roles])
     attributes = list(set([match.attribute for match in match_list]))
+    # try:
+    #     print('attributes',[i.entity_text for i in attributes])
+    # except:
+    #     pass
 
     label = np.zeros(shape=(len(main_roles), len(attributes)))
     for match in match_list:
@@ -905,7 +910,7 @@ def dispatch(match_list):
 from BiddingKG.dl.common.Utils import getUnifyMoney
 from BiddingKG.dl.interface.modelFactory import Model_relation_extraction
 relationExtraction_model = Model_relation_extraction()
-def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_sentence,list_entity,list_outline,on_value = 0.5,on_value_person=0.5,sentence_len=4):
+def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_sentence,list_entity,list_outline,winter_scope,on_value = 0.5,on_value_person=0.5,sentence_len=4):
     '''
     @param:
         PackDict:文章包dict
@@ -1284,6 +1289,7 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
     agency_contact = set()
     agency_phone = set()
     winter_contact = set()
+    rule_winter_phone = set()
     for _person in person_list:
         if _person.label == 1:
             tenderee_contact.add(_person.entity_text)
@@ -1303,22 +1309,39 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
                    '[\(|\(]0[1-9]\d{1,2}[\)|\)]-?[2-9]\d{6}\d?-?\d{,4}|' \
                    '[2-9]\d{6,7})'
     re_tenderee_phone = re.compile(
-        "(?:(?:(?:采购|招标|议价|议标|比选)(?:人|公司|单位|组织|部门)|建设(?:单位|业主)|(?:采购|招标|甲)方|询价单位|项目业主|业主)[^。代理]{0,5}(?:电话|联系方式|联系人|联系电话)[::]?[^。]{0,7}?)"
+        # "(?:(?:(?:采购|招标|议价|议标|比选|业主|委托)(?:人|公司|单位|组织|部门)|建设(?:单位|业主)|(?:采购|招标|甲)方|询价单位|项目业主|业主)(?:单位)?[^。代理]{0,5}(?:电话|联系方式|联系人|联系电话)[::]?[^。]{0,7}?)"
+        "(?:(?:(?:遴选|寻源|采购|招标|竞价|议价|比选|(?:[^受被]|^)委托|询比?价|比价|评选|谈判|邀标|邀请|洽谈|约谈|选取|抽取|抽选|项目|需求?|甲方?|转让|招租|议标|合同主体|挂牌|出租|出让|出售|标卖|处置|发包|最终|建设|业主|竞卖|申购|公选)"
+        "(?:人|方|商|单位|组织|用户|业主|主体|部门|公司|企业))(?:单位)?[^。代理]{0,5}(?:电话|联系方式|联系人|联系电话)[::]?[^。]{0,7}?)"
         # 电话号码
         + phone_pattern)
     # 例:"采购人地址和联系方式:峨边彝族自治县教育局,0833-5226788,"
     re_tenderee_phone2 = re.compile(
-        "(?:(?:(?:采购|招标|议价|议标|比选)(?:人|公司|单位|组织|部门)|建设(?:单位|业主)|(?:采购|招标|甲)方|询价单位|项目业主|业主)[^。代理]{0,3}(?:地址)[^。]{0,3}(?:电话|联系方式|联系人|联系电话)[::]?[^。]{0,20}?)"
+        # "(?:(?:(?:采购|招标|议价|议标|比选|业主)(?:人|公司|单位|组织|部门)|建设(?:单位|业主)|(?:采购|招标|甲)方|询价单位|项目业主|业主)(?:单位)?[^。代理]{0,3}(?:地址)[^。]{0,3}(?:电话|联系方式|联系人|联系电话)[::]?[^。]{0,20}?)"
+        "(?:(?:(?:遴选|寻源|采购|招标|竞价|议价|比选|(?:[^受被]|^)委托|询比?价|比价|评选|谈判|邀标|邀请|洽谈|约谈|选取|抽取|抽选|项目|需求?|甲方?|转让|招租|议标|合同主体|挂牌|出租|出让|出售|标卖|处置|发包|最终|建设|业主|竞卖|申购|公选)"
+        "(?:人|方|商|单位|组织|用户|业主|主体|部门|公司|企业))(?:单位)?[^。代理]{0,3}(?:地址)[^。]{0,3}(?:电话|联系方式|联系人|联系电话|联系人和联系方式)[::]?[^。]{0,20}?)"
         # 电话号码
         + phone_pattern)
     re_agent_phone = re.compile(
-        "(?:(?:代理(?:人|机构|公司|单位|组织|方)|采购机构|集中采购机构|集采机构|招标机构)[^。]{0,5}(?:电话|联系方式|联系人|联系电话)[::]?[^。]{0,7}?)"
+        "(?:(?:(?:代理|[受被]委托)(?:人|方|商|机构|公司|单位|组织|企业)|采购机构|集中采购机构|集采机构|招标机构)[^。]{0,5}(?:电话|联系方式|联系人|联系电话|联系人和联系方式)[::]?[^。]{0,7}?)"
         # 电话号码
         + phone_pattern)
     re_agent_phone2 = re.compile(
-        "(?:(?:代理(?:人|机构|公司|单位|组织|方)|采购机构|集中采购机构|集采机构|招标机构)[^。]{0,3}(?:地址)[^。]{0,3}(?:电话|联系方式|联系人|联系电话)[::]?[^。]{0,20}?)"
+        "(?:(?:(?:代理|[受被]委托)(?:人|方|商|机构|公司|单位|组织|企业)|采购机构|集中采购机构|集采机构|招标机构)[^。]{0,3}(?:地址)[^。]{0,3}(?:电话|联系方式|联系人|联系电话|联系人和联系方式)[::]?[^。]{0,20}?)"
         # 电话号码
         + phone_pattern)
+    re_win_tenderer_phone = re.compile(
+        "(?:(?:(?:乙|竞得|受让|买受|签约|供货|供应?|合作|承做|承包|承建|承销|承保|承接|承制|承担|承修|承租(?:(包))?|入围|入选|竞买|中标|中选|中价|中签|成交|候选)"
+        "(?:候选|投标)?(?:人|单位|(?:中介)?(?:服务)?机构|供应商|客户|方|公司|企业|厂商|商|社会资本方?)|选定单位|中[标选]银行|成交对象)[^。审核]{0,5}(?:负责人|联系人|项目)?(?:经理|电话|联系方式|联系人|负责人|联系电话|联系人和联系方式)[::]?[^。]{0,7}?)"
+        + phone_pattern)
+    re_win_tenderer_phone2 = re.compile(
+        "(?:(?:(?:乙|竞得|受让|买受|签约|供货|供应?|合作|承做|承包|承建|承销|承保|承接|承制|承担|承修|承租(?:(包))?|入围|入选|竞买|中标|中选|中价|中签|成交|候选)"
+        "(?:候选|投标)?(?:人|单位|(?:中介)?(?:服务)?机构|供应商|客户|方|公司|企业|厂商|商|社会资本方?)|选定单位|中[标选]银行|成交对象)[^。]{0,3}(?:地址)[^。审核]{0,3}(?:负责人|联系人|项目)?(?:经理|电话|联系方式|联系人|负责人|联系电话|联系人和联系方式)[::]?[^。]{0,20}?)"
+        + phone_pattern)
+    not_win_tenderer_contact = re.compile("纪检|监察|质疑|投诉|监督|受理|请.{0,4}(联系|与)"
+                                          "|(遴选|寻源|采购|招标|竞价|议价|比选|委托|询比?价|比价|评选|谈判|邀标|邀请|洽谈|约谈|选取|抽取|抽选|项目|需求?|甲方?|转让|招租|议标|合同主体|挂牌|出租|出让|出售|标卖|处置|发包|最终|建设|业主|竞卖|申购|公选|发布|代理|拍卖|转出){1,2}"
+                                          "(人|方|商|单位|组织|用户|业主|主体|部门|公司|企业|工厂|银行|机构){0,2}"
+                                          "[\u4e00-\u9fa5]{0,4}(联系|咨询|电话)(人|电话|方式)?")
+
     content = ""
     for _sentence in list_sentence:
         content += "".join(_sentence.tokens)
@@ -1364,6 +1387,20 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
                     for one_phone in _phone:
                         PackDict["Project"]["roleList"][i].linklist.append(("", one_phone))
                         agency_phone.add(one_phone)
+    # 中标人联系方式规则筛选
+    _winter_phone = re.findall(re_win_tenderer_phone, content)
+    if _winter_phone:
+        for _phone in _winter_phone:
+            _phone = _phone.split("/")
+            for one_phone in _phone:
+                rule_winter_phone.add(one_phone)
+    _winter_phone2 = re.findall(re_win_tenderer_phone2, content)
+    if _winter_phone2:
+        for _phone in _winter_phone2:
+            _phone = _phone.split("/")
+            for one_phone in _phone:
+                rule_winter_phone.add(one_phone)
+
     # 正则提取电话号码实体
     # key_word = re.compile('((?:电话|联系方式|联系人).{0,4}?)([0-1]\d{6,11})')
     phone = re.compile('1[3-9][0-9][-—-―]?\d{4}[-—-―]?\d{4}|'
@@ -1441,7 +1478,7 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
                         error_numStr_index.append(numStr_index)
                         last_phone_mask = False
                         continue
-                if re.search("身份证号?码?|注册[证号]|帐号|编[号码]|报价|费率|标价|证号|价格|型号|附件|代码|列号|行号|税号|[\(\(]万?元[\)\)]|[a-zA-Z]+\d*$", re.sub(",","",phone_left)):
+                if re.search("身份证号?码?|注册[证号]|帐号|编[号码]|报价|费率|标价|证号|资格证|资质|价格|金额|型号|附件|代码|列号|行号|税号|[\(\(]万?元[\)\)]|[a-zA-Z]+\d*$", re.sub(",","",phone_left)):
                     error_numStr_index.append(numStr_index)
                     last_phone_mask = False
                     continue
@@ -1609,7 +1646,9 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
                         if (_subject.label==0 and _object.entity_text in agency_contact ) or (_subject.label==1 and _object.entity_text in tenderee_contact):
                             continue
                         # 角色为中标候选人,排除"质疑|投诉|监督|受理"相关的联系人
-                        if _subject.label in [2,3,4] and re.search("纪检|监察|质疑|投诉|监督|受理|项目(单位)?联系|^联系人|请.{0,4}联系",list_sentence[_object.sentence_index].sentence_text[max(0,_object.wordOffset_begin-10):_object.wordOffset_begin]):
+                        # if _subject.label in [2,3,4] and re.search("纪检|监察|质疑|投诉|监督|受理|项目(单位)?联系|^联系人|请.{0,4}联系",list_sentence[_object.sentence_index].sentence_text[max(0,_object.wordOffset_begin-10):_object.wordOffset_begin]):
+                        if _subject.label in [2,3,4] and re.search(not_win_tenderer_contact,list_sentence[_object.sentence_index].sentence_text[max(0,_object.wordOffset_begin-15):_object.wordOffset_begin]):
+                            # print('not_win_tenderer_contact1')
                             continue
                         # 角色为招标/代理人,排除"纪检|监察"相关的联系人
                         if _subject.label in [0,1] and re.search("纪检|监察|乙方|中标",list_sentence[_object.sentence_index].sentence_text[max(0,_object.wordOffset_begin - 10):_object.wordOffset_begin]):
@@ -1676,7 +1715,8 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
                             if temp.begin_index>combo[0].begin_index:
                                 is_continue = True
                                 break
-                if is_continue: continue
+                if is_continue:
+                    continue
                 combo[0].person_phone.append(combo[1])
                 linked_connetPerson.add(combo[0])
                 linked_phone.add(combo[1])
@@ -1699,7 +1739,7 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
         if entity.entity_type in ['company', 'org'] and entity.label!=5:
             match_nums = 0
             company_nums = 0  # 经过其他公司的数量
-            location_nums = 0  # 经过电话的数量
+            location_nums = 0  # 经过住址的数量
             for after_index in range(ent_idx + 1, min(len(company_lacation_EntityList), ent_idx + 5)):
                 after_entity = company_lacation_EntityList[after_index]
                 if after_entity.entity_type == "location":
@@ -1711,7 +1751,7 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
                     sentence_distance = after_entity.sentence_index - entity.sentence_index
                     value = (-1 / 2 * (distance ** 2)) / 10000
                     if sentence_distance == 0:
-                        if distance < 80:
+                        if distance < 60:
                             t_match_list.append(Match(entity, after_entity, value))
                             match_nums += 1
                             if company_nums:
@@ -1745,7 +1785,8 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
             # print('loc_relation2',_company.entity_text,_relation.entity_text)
             _company.pointer_address = _relation
     # "联系人——联系电话" 链接规则补充
-    person_phone_EntityList = [ent for ent in pre_entity+ phone_entitys if ent.entity_type not in ['company','org','location']]
+    # person_phone_EntityList = [ent for ent in pre_entity+ phone_entitys if ent.entity_type not in ['company','org','location']]
+    person_phone_EntityList = [ent for ent in pre_entity+ phone_entitys if ent.entity_type not in ['location']]
     person_phone_EntityList = sorted(person_phone_EntityList, key=lambda x: (x.sentence_index, x.begin_index))
     t_match_list = []
     for ent_idx in range(len(person_phone_EntityList)):
@@ -1775,7 +1816,7 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
                             else:
                                 break
                     else:
-                        if distance < 40:
+                        if distance < 30:
                             # value = (-1 / 2 * (distance ** 2)) / 10000
                             t_match_list.append(Match(entity, after_entity, value))
                             match_nums += 1
@@ -1783,8 +1824,10 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
                                 byNotPerson_match_nums += 1
                             else:
                                 break
-                else:
+                elif after_entity.entity_type == "person":
                     person_nums += 1
+                elif after_entity.entity_type in ["company","org"]:
+                    break
             # 前向查找属性
             if ent_idx != 0 and (not match_nums or not byNotPerson_match_nums):
                 previous_entity = person_phone_EntityList[ent_idx - 1]
@@ -1792,12 +1835,13 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
                     # if previous_entity.sentence_index == entity.sentence_index:
                     distance = (tokens_num_dict[entity.sentence_index] + entity.begin_index) - (
                             tokens_num_dict[previous_entity.sentence_index] + previous_entity.end_index)
-                    if distance < 40:
+                    if distance < 30:
                         # 前向 没有 /10000
                         value = (-1 / 2 * (distance ** 2))
                         t_match_list.append(Match(entity, previous_entity, value))
     # km算法分配求解(person-phone)
     t_match_list = [mat for mat in t_match_list if mat.main_role not in linked_connetPerson and mat.attribute not in linked_phone]
+    # print([(mat.main_role.entity_text,mat.attribute.entity_text) for mat in t_match_list])
     personphone_result = dispatch(t_match_list)
     personphone_result = sorted(personphone_result, key=lambda x: (x[0].sentence_index, x[0].begin_index))
     for match in personphone_result:
@@ -1911,9 +1955,11 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
                             person_phone = [phone for phone in per.person_phone] if per.person_phone else []
                             if not person_phone:
                                 if per.entity_text not in tenderee_contact and per.entity_text not in agency_contact:
-                                    PackDict[k]["roleList"][i].linklist.append((per.entity_text, ""))
-                                    winter_contact.add(per.entity_text)
-                                    continue
+                                    # 角色为中标候选人,联系人无号码且上文没有联系关键词时排除
+                                    if re.search("联系人|联系方式|电话|负责人|经理|法人|法定代表人",list_sentence[per.sentence_index].sentence_text[max(0, per.wordOffset_begin - 10):per.wordOffset_begin]):
+                                        PackDict[k]["roleList"][i].linklist.append((per.entity_text, ""))
+                                        winter_contact.add(per.entity_text)
+                                        continue
                             for _p in person_phone:
                                 if per.entity_text not in tenderee_contact and _p.entity_text not in tenderee_phone and \
                                         per.entity_text not in agency_contact and _p.entity_text not in agency_phone:
@@ -1945,6 +1991,122 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
                             for _p in person_phone:
                                 if per.entity_text not in tenderee_contact and _p.entity_text not in tenderee_phone and per.entity_text not in winter_contact:
                                     PackDict[k]["roleList"][i].linklist.append((per.entity_text, _p.entity_text))
+
+    # 使用中标信息大纲提取联系人
+    winter_scope_group = []
+    if winter_scope:
+        winter_scope_begin = winter_scope[0]
+        winter_scope_end = winter_scope[1]
+        # print(list_sentence[winter_scope_begin[0]].sentence_text[winter_scope_begin[1]:winter_scope_end[1]])
+        winter_temporary_list = []
+        for entity in list_entity:
+            if entity.entity_type in ['org', 'company', 'person']:
+                winter_temporary_list.append(entity)
+        winter_temporary_list = sorted(winter_temporary_list, key=lambda x: (x.sentence_index, x.begin_index))
+        winter_temporary_list2 = []
+        for _entity in winter_temporary_list:
+            if _entity.sentence_index>=winter_scope_begin[0] and _entity.sentence_index<=winter_scope_end[0]:
+                if (_entity.sentence_index==winter_scope_begin[0] and _entity.wordOffset_begin>=winter_scope_begin[1]) or \
+                        _entity.sentence_index>winter_scope_begin[0]:
+                    if (_entity.sentence_index == winter_scope_end[0] and _entity.wordOffset_end<=winter_scope_end[1]) or \
+                            _entity.sentence_index<winter_scope_end[0]:
+                        winter_temporary_list2.append(_entity)
+        # print('winter_scope_entity',[i.entity_text for i in winter_temporary_list2])
+        winter_scope_group = winter_temporary_list2
+
+        match_list_winter = []
+        for index in range(len(winter_scope_group)):
+            entity = winter_scope_group[index]
+            if entity.entity_type in ['company','org']:
+                match_nums = 0
+                for after_index in range(index + 1, min(len(winter_scope_group), index + 4)):
+                    after_entity = winter_scope_group[after_index]
+                    if match_nums > 2:
+                        break
+                    if after_entity.entity_type == 'person':
+                        distance = (tokens_num_dict[after_entity.sentence_index] + after_entity.begin_index) - (
+                                tokens_num_dict[entity.sentence_index] + entity.end_index)
+                        # 实体为中标人/候选人,联系人已确定类别【1,2】
+                        if entity.label in [2, 3, 4] and after_entity.label in [1, 2]:
+                            break
+                        if entity.label in [2, 3, 4] and distance >= 30:
+                            break
+                        # 角色为中标候选人,排除"质疑|投诉|监督|受理"相关的联系人
+                        if entity.label in [2, 3, 4] and re.search(not_win_tenderer_contact, list_sentence[after_entity.sentence_index].sentence_text[max(0,after_entity.wordOffset_begin - 15):after_entity.wordOffset_begin]):
+                            break
+                        # 角色为中标候选人,联系人无号码且上文没有联系关键词时排除
+                        if entity.label in [2, 3, 4] and not after_entity.person_phone and not re.search(
+                                "联系人|联系方式|电话|负责人|经理|法人|法定代表人", list_sentence[after_entity.sentence_index].sentence_text[max(0,after_entity.wordOffset_begin - 10):after_entity.wordOffset_begin]):
+                            continue
+                        # 角色为招标/代理人,排除"纪检|监察"相关的联系人
+                        if entity.label in [0, 1] and re.search("纪检|监察|乙方|中标", list_sentence[after_entity.sentence_index].sentence_text[max(0,after_entity.wordOffset_begin - 10):after_entity.wordOffset_begin]):
+                            break
+                        if after_entity.sentence_index != 0 and after_entity.wordOffset_begin <= 10:
+                            if entity.label in [2, 3, 4] and re.search("请.{0,5}联系",list_sentence[after_entity.sentence_index - 1].sentence_text[-10:] +
+                                                                       list_sentence[after_entity.sentence_index].sentence_text[0:after_entity.wordOffset_begin]):
+                                continue
+                        if distance < 80:
+                            if (entity.label == 0 and after_entity.label == 1) or (
+                                    entity.label == 1 and after_entity.label == 2):
+                                distance = distance / 100
+                            value = (-1 / 2 * (distance ** 2)) / 10000
+                            match_list_winter.append(Match(entity, after_entity, value))
+                            match_nums += 1
+                # 前向查找匹配
+                if index != 0:
+                    previous_entity = winter_scope_group[index - 1]
+                    if previous_entity.entity_type == 'person' and previous_entity.label in [1,2,3]:
+                        if entity.label in [2, 3, 4] and previous_entity.label in [1, 2]:
+                            continue
+                        # 角色为中标候选人,排除"质疑|投诉|监督|受理"相关的联系人
+                        if entity.label in [2, 3, 4] and re.search(not_win_tenderer_contact, list_sentence[previous_entity.sentence_index].sentence_text[
+                                                                                             max(0,previous_entity.wordOffset_begin - 15):previous_entity.wordOffset_begin]):
+                            break
+                        # 角色为中标候选人,联系人无号码且上文没有联系关键词时排除
+                        if entity.label in [2, 3, 4] and not previous_entity.person_phone and not re.search(
+                                "联系人|联系方式|电话|负责人|经理|法人|法定代表人",list_sentence[previous_entity.sentence_index].sentence_text[
+                                max(0, previous_entity.wordOffset_begin - 10):previous_entity.wordOffset_begin]):
+                            continue
+                        # 角色为招标/代理人,排除"纪检|监察"相关的联系人
+                        if entity.label in [0, 1] and re.search("纪检|监察|乙方|中标", list_sentence[previous_entity.sentence_index].sentence_text[
+                                                                               max(0,previous_entity.wordOffset_begin - 10):previous_entity.wordOffset_begin]):
+                            break
+                        if previous_entity.sentence_index == entity.sentence_index:
+                            distance = (tokens_num_dict[entity.sentence_index] + entity.begin_index) - (
+                                    tokens_num_dict[
+                                        previous_entity.sentence_index] + previous_entity.end_index)
+                            if distance < 30:
+                                # 距离相等时,前向添加处罚值
+                                # distance += 1
+                                # 前向 没有 /10000
+                                value = (-1 / 2 * (distance ** 2))
+                                match_list_winter.append(Match(entity, previous_entity, value))
+        # test
+        # match_list_winter = company_contact_link([winter_scope_group])
+        # km算法分配求解
+        result_winter = dispatch(match_list_winter)
+        for match in result_winter:
+            _company = match[0]
+            _person = match[1]
+            _person = _person.entity_text
+            # 更新中标人联系方式
+            if _company.label==2:
+                phone_ = [i.entity_text for i in match[1].person_phone] if match[1].person_phone else []
+                for k in PackDict.keys():
+                    for i in range(len(PackDict[k]["roleList"])):
+                        if PackDict[k]["roleList"][i].role_name == "win_tenderer":
+                            if PackDict[k]["roleList"][i].entity_text == _company.entity_text:
+                                if _person not in tenderee_contact and len(set(phone_) & set(tenderee_phone)) == 0 and \
+                                        _person not in agency_contact and len(set(phone_) & set(agency_phone)) == 0:
+                                    if not phone_:
+                                        PackDict[k]["roleList"][i].linklist.append((_person, ""))
+                                    for p in phone_:
+                                        PackDict[k]["roleList"][i].linklist.append((_person, p))
+                if phone_:
+                    for p in phone_:
+                        rule_winter_phone.add(p)
+                    # print('rule_winter_phone',rule_winter_phone)
+
     re_split = re.compile("[^\u4e00-\u9fa5、](十一|十二|十三|十四|十五|一|二|三|四|五|六|七|八|九|十)、")
     split_list = [0] * 16
     split_dict = {
@@ -2043,11 +2205,17 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
                                 # 实体为中标人/候选人,联系人已确定类别【1,2】
                                 if entity.label in [2, 3, 4] and after_entity.label in [1, 2]:
                                     break
-                                if entity.label in [2, 3, 4] and distance>=20:
+                                if entity.label in [2, 3, 4] and distance>=30:
                                     break
                                 # 角色为中标候选人,排除"质疑|投诉|监督|受理"相关的联系人
-                                if entity.label in [2, 3, 4] and re.search("纪检|监察|质疑|投诉|监督|受理|项目(单位)?联系|(采购|招标)人?联系", list_sentence[after_entity.sentence_index].sentence_text[max(0,after_entity.wordOffset_begin - 10):after_entity.wordOffset_begin]):
+                                # if entity.label in [2, 3, 4] and re.search("纪检|监察|质疑|投诉|监督|受理|项目(单位|业主)?联系|(采购|招标)人?联系|请.{0,4}联系", list_sentence[after_entity.sentence_index].sentence_text[max(0,after_entity.wordOffset_begin - 10):after_entity.wordOffset_begin]):
+                                if entity.label in [2, 3, 4] and re.search(not_win_tenderer_contact, list_sentence[after_entity.sentence_index].sentence_text[max(0,after_entity.wordOffset_begin - 15):after_entity.wordOffset_begin]):
+                                    # print('not_win_tenderer_contact2')
                                     break
+                                # 角色为中标候选人,联系人无号码且上文没有联系关键词时排除
+                                # print('test',after_entity.entity_text,after_entity.person_phone,list_sentence[after_entity.sentence_index].sentence_text[max(0,after_entity.wordOffset_begin - 10):after_entity.wordOffset_begin])
+                                if entity.label in [2, 3, 4] and not after_entity.person_phone and not re.search("联系人|联系方式|电话|负责人|经理|法人|法定代表人",list_sentence[after_entity.sentence_index].sentence_text[max(0,after_entity.wordOffset_begin - 10):after_entity.wordOffset_begin]):
+                                    continue
                                 # 角色为招标/代理人,排除"纪检|监察"相关的联系人
                                 if entity.label in [0,1] and re.search("纪检|监察|乙方|中标",list_sentence[after_entity.sentence_index].sentence_text[max(0,after_entity.wordOffset_begin - 10):after_entity.wordOffset_begin]):
                                     break
@@ -2134,7 +2302,9 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
                                             new_split_list[split_index][1]:
                                         mid_sentence = mid_sentence[max(0, phone_begin - 15):phone_begin].replace(",", "")
                                         if re.search(key_phone, mid_sentence):
-                                            if entity.label in [2, 3, 4] and re.search("纪检|监察|质疑|投诉|监督|受理|项目(单位)?联系",mid_sentence[-8:]):
+                                            # if entity.label in [2, 3, 4] and re.search("纪检|监察|质疑|投诉|监督|受理|项目(单位|业主)?联系|(采购|招标)人?联系|请.{0,4}联系",mid_sentence[-10:]):
+                                            if entity.label in [2, 3, 4] and re.search(not_win_tenderer_contact,mid_sentence[-15:]):
+                                                # print('not_win_tenderer_contact3')
                                                 pass
                                             else:
                                                 distance = 1
@@ -2187,7 +2357,9 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
                                         p_phone = [p.entity_text for p in next_entity.person_phone] if next_entity.person_phone else []
                                         if next_entity.entity_type == 'person' and _phone in p_phone:
                                             pass
-                                        elif entity.label in [2, 3, 4] and re.search("纪检|监察|质疑|投诉|监督|受理|项目(单位)?联系", mid_sentence[-8:]):
+                                        # elif entity.label in [2, 3, 4] and re.search("纪检|监察|质疑|投诉|监督|受理|项目(单位|业主)?联系|(采购|招标)人?联系|请.{0,4}联系", mid_sentence[-10:]):
+                                        elif entity.label in [2, 3, 4] and re.search(not_win_tenderer_contact, mid_sentence[-15:]):
+                                            # print('not_win_tenderer_contact4')
                                             pass
                                         else:
                                             distance = (tokens_num_dict[
@@ -2211,6 +2383,19 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
                                 if previous_entity.entity_type == 'person' and previous_entity.label in [1, 2, 3]:
                                     if entity.label in [2, 3, 4] and previous_entity.label in [1, 2]:
                                         continue
+                                    # 角色为中标候选人,排除"质疑|投诉|监督|受理"相关的联系人
+                                    if entity.label in [2, 3, 4] and re.search(not_win_tenderer_contact,list_sentence[previous_entity.sentence_index].sentence_text[
+                                                                               max(0,previous_entity.wordOffset_begin - 15):previous_entity.wordOffset_begin]):
+                                        # print('not_win_tenderer_contact2')
+                                        break
+                                    # 角色为中标候选人,联系人无号码且上文没有联系关键词时排除
+                                    if entity.label in [2, 3,4] and not previous_entity.person_phone and not re.search("联系人|联系方式|电话|负责人|经理|法人|法定代表人",
+                                            list_sentence[previous_entity.sentence_index].sentence_text[max(0,previous_entity.wordOffset_begin - 10):previous_entity.wordOffset_begin]):
+                                        continue
+                                    # 角色为招标/代理人,排除"纪检|监察"相关的联系人
+                                    if entity.label in [0, 1] and re.search("纪检|监察|乙方|中标", list_sentence[previous_entity.sentence_index].sentence_text[
+                                                                                           max(0,previous_entity.wordOffset_begin - 10):previous_entity.wordOffset_begin]):
+                                        break
                                     if previous_entity.sentence_index == entity.sentence_index:
                                         distance = (tokens_num_dict[entity.sentence_index] + entity.begin_index) - (
                                                 tokens_num_dict[
@@ -2382,10 +2567,12 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
 
     # "roleList"中联系人电话去重
     tenderee_agency_phone = []
+    tenderee_agency_contact = []
     for k in PackDict.keys():
         for i in range(len(PackDict[k]["roleList"])):
             if PackDict[k]["roleList"][i].role_name in ['agency','tenderee']:
                 tenderee_agency_phone.extend([person_phone[1] for person_phone in PackDict[k]["roleList"][i].linklist if person_phone[1]])
+                tenderee_agency_contact.extend([person_phone[0]+'-'+person_phone[1] for person_phone in PackDict[k]["roleList"][i].linklist])
             # 带有联系人的电话
             with_person = [person_phone[1] for person_phone in PackDict[k]["roleList"][i].linklist if person_phone[0]]
             # 带有电话的联系人
@@ -2405,22 +2592,25 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
     for k in PackDict.keys():
         for i in range(len(PackDict[k]["roleList"])):
             if PackDict[k]["roleList"][i].role_name in ['win_tenderer', 'second_tenderer','third_tenderer']:
-                if tenderee_agency_phone:
+                if tenderee_agency_phone or tenderee_agency_contact:
                     remove_list = []
                     for item in PackDict[k]["roleList"][i].linklist:
                         if item[1] and item[1] in tenderee_agency_phone:
                             remove_list.append(item)
+                        elif item[0]+'-'+item[1] in tenderee_agency_contact:
+                            remove_list.append(item)
+                    for _item in remove_list:
+                        PackDict[k]["roleList"][i].linklist.remove(_item)
+                elif not tenderee_agency_phone:
+                    # 公告中无招标代理联系方式时,可排除中标联系方式
+                    remove_list = []
+                    for _item in PackDict[k]["roleList"][i].linklist:
+                        # 排除非正则规则识别的联系方式
+                        if _item[1] not in rule_winter_phone:
+                            remove_list.append(_item)
+                    # print('remove_list',remove_list)
                     for _item in remove_list:
                         PackDict[k]["roleList"][i].linklist.remove(_item)
-                # else:
-                #     # 公告中无招标代理联系方式时,可排除中标联系方式
-                #     remove_list = []
-                #     for _item in PackDict[k]["roleList"][i].linklist:
-                #         # 有联系方式
-                #         if _item[1]:
-                #             remove_list.append(_item)
-                #     for _item in remove_list:
-                #         PackDict[k]["roleList"][i].linklist.remove(_item)
     # PackDict更新company/org地址
     last_role_prob = {}
     for ent in pre_entity:
@@ -2932,7 +3122,7 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
     # 公告中只有"招标人"且无"联系人"链接时
     if len(PackDict)==1:
         k = list(PackDict.keys())[0]
-        tenderee_agency_role = [role for role in PackDict[k]["roleList"] if role.role_name in ['tenderee','agency']]
+        tenderee_agency_role = [role for role in PackDict[k]["roleList"] if role.role_name in ['tenderee','agency','win_tenderer']]
         if len(tenderee_agency_role)==1:
             exist_person = []
             exist_phone = []
@@ -2949,7 +3139,8 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
                     if not get_contacts:
                         # 根据大纲Outline类召回联系人
                         for outline in list_outline:
-                            if re.search("联系人|联系方|联系方式|联系电话|电话|负责人|与.{2,4}联系",outline.outline_summary):
+                            if re.search("联系人|联系方|联系方式|联系电话|电话|负责人|与.{2,4}联系",outline.outline_summary) and \
+                                    not re.search("代理|乙方|竞得|受让|买受|签约|供货|供应|承做|承包|承建|承销|承保|承接|承制|承担|承修|承租(?:(包))?|入围|入选|竞买|中标|中选|中价|中签|成交|候选",outline.outline_summary):
                                 for t_person in [p for p in temporary_list2 if p.entity_type=='person' and p.label==3]:
                                     if words_num_dict[t_person.sentence_index] + t_person.wordOffset_begin >= words_num_dict[outline.sentence_begin_index] + outline.wordOffset_begin and words_num_dict[
                                         t_person.sentence_index] + t_person.wordOffset_end < words_num_dict[outline.sentence_end_index] + outline.wordOffset_end:
@@ -2993,7 +3184,8 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
                             for _start, _end in new_split_list:
                                 temp_sentence = _content[_start:_end]
                                 sentence_outline = temp_sentence.split(",::")[0]
-                                if re.search("联系人|联系方|联系方式|联系电话|电话|负责人|与.{2,4}联系", sentence_outline):
+                                if re.search("联系人|联系方|联系方式|联系电话|电话|负责人|与.{2,4}联系", sentence_outline) and \
+                                        not re.search("代理|乙方|竞得|受让|买受|签约|供货|供应|承做|承包|承建|承销|承保|承接|承制|承担|承修|承租(?:(包))?|入围|入选|竞买|中标|中选|中价|中签|成交|候选",sentence_outline):
                                     sentence_phone = phone.findall(temp_sentence)
                                     if sentence_phone:
                                         if sentence_phone[0] in [ent.entity_text for ent in phone_entitys] and sentence_phone[0] not in ",".join(exist_phone):
@@ -3067,7 +3259,7 @@ def initPackageAttr(RoleList,PackageSet,win_tenderer_set,tenderee_or_agency_set,
         packDict[item.packageName]["roleList"].append(Role(item.role_name,item.entity_text,item.role_prob,0,0.0,[],set(item.multi_winner)-win_tenderer_set-tenderee_or_agency_set)) #Role(角色名称,实体名称,角色阈值,金额,金额阈值,连接列表,多中标人)
     return packDict
                 
-def getPackageRoleMoney(list_sentence,list_entity,list_outline):
+def getPackageRoleMoney(list_sentence,list_entity,list_outline,winter_scope):
     '''
     @param:
         list_sentence:文章的句子list
@@ -3087,7 +3279,7 @@ def getPackageRoleMoney(list_sentence,list_entity,list_outline):
     # PackDict = initPackageAttr(RoleList, PackageSet)
     PackDict = initPackageAttr(RoleList, PackageSet, win_tenderer_set,tenderee_or_agency_set,main_body_pack)
 
-    PackDict = findAttributeAfterEntity(PackDict, RoleSet, PackageList, PackageSet, list_sentence, list_entity, list_outline)
+    PackDict = findAttributeAfterEntity(PackDict, RoleSet, PackageList, PackageSet, list_sentence, list_entity, list_outline, winter_scope)
     return PackDict
 
 def turnBidWay(bidway):
@@ -4270,7 +4462,7 @@ def getProjectContacts(list_entity, list_sentence):
 
     return {'project_contacts':project_contacts_list}
 
-def getPREMs(list_sentences,list_entitys,list_articles,list_outlines,page_time):
+def getPREMs(list_sentences,list_entitys,list_articles,list_outlines,page_time,winter_scope):
     '''
     @param:
         list_sentence:所有文章的句子list
@@ -4279,7 +4471,7 @@ def getPREMs(list_sentences,list_entitys,list_articles,list_outlines,page_time):
     '''
     result = []
     for list_sentence,list_entity,list_article,list_outline in zip(list_sentences,list_entitys,list_articles,list_outlines):
-        RoleList = getPackageRoleMoney(list_sentence,list_entity,list_outline)
+        RoleList = getPackageRoleMoney(list_sentence,list_entity,list_outline,winter_scope)
         result.append(dict({"prem": RoleList, "docid": list_article.doc_id},
                            **getTimeAttributes(list_entity, list_sentence,page_time),
                            **getProjectContacts(list_entity, list_sentence),

+ 26 - 1
BiddingKG/dl/interface/outline_extractor.py

@@ -57,6 +57,8 @@ def extract_sentence_list(sentence_list):
 requirement_pattern = "(采购需求|需求分析|项目说明|(采购|合同|招标|询比?价|项目|服务|工程|标的|需求|建设)(的?(主要|简要|基本|具体|名称及))?" \
                           "(内容|概况|概述|范围|信息|规模|简介|介绍|说明|摘要|情况)([及与和]((其它|\w{,2})[要需]求|发包范围|数量))?" \
                       "|招标项目技术要求|服务要求|服务需求|项目目标|需求内容如下|建设规模)为?([::,]|$)"
+winter_pattern = "((乙方|竞得|受让|买受|签约|供货|供应|承做|承包|承建|承销|承保|承接|承制|承担|承修|承租(?:(包))?|入围|入选|竞买|中标|中选|中价|中签|成交|候选)[\u4e00-\u9fa5]{0,5}" \
+                        "(公示)?(信息|概况|情况|名称|联系人|联系方式|负责人)|中标公示单位)为?([::,、]|$)"
 aptitude_pattern = "资质(资格)要求|资格(资质)要求|单位要求|资质及业绩要求|((资格|资质|准入)[的及]?(要求|条件|标准|限定|门槛)|竞买资格及要求|供应商报价须知)|按以下要求参与竞买|((报名|应征|竞买|投标|竞投|受让|报价|竞价|竞包|竞租|承租|申请|参与|参选|遴选)的?(人|方|单位|企业|客户|机构)?|供应商|受让方)((必?须|需|应[该当]?)(具备|满足|符合|提供)+以?下?)?的?(一般|基本|主要)?(条件|要求|资格(能力)?|资质)+|乙方应当符合下列要求|参与比选条件|合格的投标人|询价要求"
 
 addr_bidopen_pattern = "([开评]标|开启|评选|比选|磋商|遴选|寻源|采购|招标|竞价|议价|委托|询比?价|比价|谈判|邀标|邀请|洽谈|约谈|选取|抽取|抽选|递交\w{,4}文件)[))]?(时间[与及和、])?(地址|地点)([与及和、]时间)?([::,]|$)|开启([::,]|$)"
@@ -77,6 +79,7 @@ def extract_parameters(parse_document):
     addr_bidopen_text = '' # 开标地址
     addr_bidsend_text = '' # 投标地址
     requirement_scope = [] # 采购内容始末位置
+    winter_scope = [] # 中标信息始末位置
     pinmu_name = '' # 品目名称
     list_policy = [] # 政策法规
     out_lines = []
@@ -107,6 +110,28 @@ def extract_parameters(parse_document):
                     requirement_scope.append(e)
                     _data_i += len(childs)
                     _data_i -= 1
+
+    _data_i = -1
+    # 中标信息
+    while _data_i<len(list_data)-1:
+        _data_i += 1
+        _data = list_data[_data_i]
+        _type = _data["type"]
+        _text = _data["text"].strip()
+        # print(_data.keys())
+        if _type=="sentence":
+            # print('_text',_text)
+            # print('sentence_title',_data["sentence_title"])
+            if _data["sentence_title"] is not None:
+                if re.search(winter_pattern,_text[:30]) is not None:
+                    b = (_data['sentence_index'], _data['wordOffset_begin'])
+                    childs = get_childs([_data])
+                    e = (childs[-1]['sentence_index'], childs[-1]["wordOffset_end"]) if len(childs)>0 else (_data['sentence_index'], _data['wordOffset_end'])
+                    winter_scope.append(b)
+                    winter_scope.append(e)
+                    _data_i += len(childs)
+                    _data_i -= 1
+
     _data_i = -1
     while _data_i<len(list_data)-1:
         _data_i += 1
@@ -196,7 +221,7 @@ def extract_parameters(parse_document):
         pinmu_name = pinmu_name[ser.end():]
         if re.search('[^\w]$', pinmu_name):
             pinmu_name = pinmu_name[:-1]
-    return requirement_text, aptitude_text, addr_bidopen_text, addr_bidsend_text, out_lines, requirement_scope, pinmu_name, list_policy
+    return requirement_text, aptitude_text, addr_bidopen_text, addr_bidsend_text, out_lines, requirement_scope, pinmu_name, list_policy, winter_scope
 
 def extract_addr(content):
     '''

+ 3 - 2
BiddingKG/dl/interface/predictor.py

@@ -3781,6 +3781,7 @@ class ProductAttributesPredictor():
             order_times = []
             for entity in list_entity:
                 if entity.entity_type=='time':
+                    # print('time',entity.entity_text)
                     sentence = list_sentence[entity.sentence_index]
                     s = spanWindow(tokens=sentence.tokens, begin_index=entity.begin_index,
                                    end_index=entity.end_index,size=20)
@@ -4184,7 +4185,7 @@ class DocChannel():
           '采购意向': '采购意向|招标意向|选取意向|意向公告|意向公示',
           '采购意向neg': '发布政府采购意向|采购意向公告已于',
           '招标预告': '(预计|计划)(招标|采购|发标|发包)(时间|日期)|采购(计划编号|需求方案|预告|预案)|(预|需求)公示|需求(方案|信息|论证|公告|公示)',
-          '招标公告': '(采购|招标|竞选|报名)条件|报名(时间|流程|方法|要求|\w{,5}材料)[:\s]|[^\w]成交规则|参加竞价采购交易资格|(申请人|投标人|供应商|报价人|参选人)的?资格(要求|条件)|获取(采购|招标|询价|议价|竞价|比价|比选|遴选|邀请|邀标|磋商|洽谈|约谈|谈判|竞谈|应答)文件|(采购|招标|询价|议价|竞价|比价|比选|遴选|邀请|邀标|磋商|洽谈|约谈|谈判|竞谈|应答)文件的?(获取|领取)|评选方式:?\s*价格最低',
+          '招标公告': '(采购|招标|竞选|报名)条件|报名(时间|流程|方法|要求|\w{,5}材料)[:\s]|[^\w]成交规则|参加竞价采购交易资格|(申请人|投标人|供应商|报价人|参选人)的?资格(要求|条件)|获取(采购|招标|询价|议价|竞价|比价|比选|遴选|邀请|邀标|磋商|洽谈|约谈|谈判|竞谈|应答)文件|(采购|招标|询价|议价|竞价|比价|比选|遴选|邀请|邀标|磋商|洽谈|约谈|谈判|竞谈|应答)文件的?(获取|领取)|评选方式:?\s*价格最低|求购信息',
           '资审结果': '资审及业绩公示|资审结果及业绩|资格后审情况报告|资格(后审|预审|审查)结果(公告|公示)|(预审|审查)工作已经?结束|未通过原因', #|资格
           '招标答疑': '现澄清(为|如下)|答疑补遗|澄清内容如下|第[0-9一二三四五]次澄清|答疑澄清|(最高(投标)?限价|控制价|拦标价)公示',  # |异议的回复
           '公告变更': '第[\d一二]次变更|(更正|变更)(公告|公示|信息|内容|事项|原因|理由|日期|时间|如下)|原公告((主要)?(信息|内容)|发布时间)|(变更|更正)[前后]内容|现?在?(变更|更正|修改|更改)(内容)?为|(公告|如下|信息|内容|事项|结果|文件|发布|时间|日期)(更正|变更)',
@@ -4197,7 +4198,7 @@ class DocChannel():
           '中标信息neg': '按项目控制价下浮\d%即为成交价|成交原则|不得确定为(中标|成交)|招标人按下列原则选择中标人|评选成交供应商:|拟邀请供应商|除单一来源采购项目外|单一来源除外|(各.{,5}|尊敬的)(供应商|供货商)[:\s]|竞拍起止时间:|询价结果[\s\n::]*不公开|本项目已具备招标条件|现对该项目进行招标公告|发布\w{2}结果后\d天内送达|本次\w{2}结果不对外公示|供应商\s*资格要求|成交情况:\s*[流废]标|中标单位:本次招标拟?中标单位\d家|通知中标单位|影响(成交|中标)结果|确定为成交供应商|(成交|中标|中选)公[告示](发布|\w{,2})后|竞价成交后', # 503076535 按照服务方案的优劣 确定为成交供应商
       # |确定成交供应商[:,\s]
           '合同公告': '合同(公告|公示|信息|内容)|合同(编号|名称|主体|基本情况|完成(日期|时间))|(供应商乙方|乙方供应商):|合同总?金额|履约信息',
-          '废标公告': '(终止|中止|废标|流标|流采|失败|作废|异常|撤销)(结果)?(公告|公示|招标|采购|竞价)|(谈判结果为|结果类型):?废标|((本|该)(项目|标段|合同|合同包|采购包|次)\w{,5})((失败|终止|流标|废标)|予以废标|(按|做|作)?(流标|废标|废置)处理)|(采购|招标|询价|议价|竞价|比价|比选|遴选|邀请|邀标|磋商|洽谈|约谈|谈判|竞谈|应答|项目)(终止|中止|废标|流标|失败|作废|异常|撤销)',
+          '废标公告': '(终止|中止|废标|流标|流采|失败|作废|异常|撤销)(结果)?(公告|公示|招标|采购|竞价)|(谈判结果为|结果类型|采购结果):?废标|((本|该)(项目|标段|合同|合同包|采购包|次)\w{,5})((失败|终止|流标|废标)|予以废标|(按|做|作)?(流标|废标|废置)处理)|(采购|招标|询价|议价|竞价|比价|比选|遴选|邀请|邀标|磋商|洽谈|约谈|谈判|竞谈|应答|项目)(终止|中止|废标|流标|失败|作废|异常|撤销)',
           '废标公告2': '(无效|中止|终止|废标|流标|失败|作废|异常|撤销)的?(原因|理由)|本项目因故取消|本(项目|次)(公开)?\w{2}失败|已终止\s*原因:|(人|人数|供应商|单位)(不足|未达\w{,3}数量)|已终止|不足[3三]家|无(废标)|成交情况:\s*[流废]标|现予以废置|报名未够三家',
           '废标公告neg': '超过此报价将作为[废流]标处理|否则按[废流]标处理|终止规则:|成交规则:|视为流标|竞价失败的一切其他情形|是否废标:否|若不足三家公司参与|供应商数量:?\s*报名供应商不足三家|有效报价不足三家,\s*系统自动废标|如遇项目流[标采]' # 503076535 供应商数量: 报名供应商不足三家。
       }