Преглед изворни кода

公告关键词规则优化

znj пре 3 месеци
родитељ
комит
0c340975f8

+ 1 - 1
BiddingKG/dl/channel/channel_bert.py

@@ -596,7 +596,7 @@ def merge_channel(list_articles,channel_dic,original_docchannel):
             main_text = text
         main_text = text_process(main_text)
         # if re.search("采购实施月份|采购月份|预计(招标|采购|发标|发包)(时间|月份)|招标公告预计发布时间",main_text[:max(500,len(main_text)//2)]):
-        if re.search("采购实施月份|采购月份|预计(招标|采购|发标|发包)(时间|月份)|招标公告预计发布时间",main_text):
+        if re.search("采购实施月份|采购月份|(计划|预计|预期)(招标|采购|发标|发包)(时间|月份)|招标公告预计发布时间",main_text):
             front_text_len = len(main_text) // 3 if len(main_text) > 300 else 100
             front_text = main_text[:front_text_len]
             if re.search("意向|意愿",title) or re.search("意向|意愿",front_text):

+ 8 - 8
BiddingKG/dl/interface/getAttributes.py

@@ -2043,7 +2043,7 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
                                 if entity.label in [2, 3, 4] and distance>=20:
                                     break
                                 # 角色为中标候选人,排除"质疑|投诉|监督|受理"相关的联系人
-                                if entity.label in [2, 3, 4] and re.search("纪检|监察|质疑|投诉|监督|受理|项目(单位)?联系", list_sentence[after_entity.sentence_index].sentence_text[max(0,after_entity.wordOffset_begin - 10):after_entity.wordOffset_begin]):
+                                if entity.label in [2, 3, 4] and re.search("纪检|监察|质疑|投诉|监督|受理|项目(单位)?联系|(采购|招标)人?联系", list_sentence[after_entity.sentence_index].sentence_text[max(0,after_entity.wordOffset_begin - 10):after_entity.wordOffset_begin]):
                                     break
                                 # 角色为招标/代理人,排除"纪检|监察"相关的联系人
                                 if entity.label in [0,1] and re.search("纪检|监察",list_sentence[after_entity.sentence_index].sentence_text[max(0,after_entity.wordOffset_begin - 10):after_entity.wordOffset_begin]):
@@ -2953,7 +2953,7 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
                                         if t_person.person_phone:
                                             _phone = [p.entity_text for p in t_person.person_phone]
                                             for _p in _phone:
-                                                if t_person.entity_text not in exist_person and _p not in exist_phone:
+                                                if t_person.entity_text not in exist_person and _p not in ",".join(exist_phone):
                                                     tenderee_agency_role[0].linklist.append((t_person.entity_text, _p))
                                                     get_contacts = True
                                             break
@@ -2963,7 +2963,7 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
                                 if not get_contacts:
                                     sentence_phone = phone.findall(outline.outline_text)
                                     if sentence_phone:
-                                        if sentence_phone[0] not in exist_phone:
+                                        if sentence_phone[0] not in ",".join(exist_phone):
                                             tenderee_agency_role[0].linklist.append(("", sentence_phone[0]))
                                             get_contacts = True
                                             break
@@ -2974,14 +2974,14 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
                                 if _entity.person_phone:
                                     _phone = [p.entity_text for p in _entity.person_phone]
                                     for _p in _phone:
-                                        if _entity.entity_text not in exist_person and _p not in exist_phone:
+                                        if _entity.entity_text not in exist_person and _p not in ",".join(exist_phone):
                                             tenderee_agency_role[0].linklist.append((_entity.entity_text, _p))
                                             get_contacts = True
                                     break
                     if not get_contacts:
                         # 如果文中只有一个“phone”实体,则直接取为联系人电话
                         if len(phone_entitys) == 1:
-                            if phone_entitys[0].entity_text not in exist_phone:
+                            if phone_entitys[0].entity_text not in ",".join(exist_phone):
                                 tenderee_agency_role[0].linklist.append(("", phone_entitys[0].entity_text))
                                 get_contacts = True
                     if not get_contacts:
@@ -2993,7 +2993,7 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
                                 if re.search("联系人|联系方|联系方式|联系电话|电话|负责人|与.{2,4}联系", sentence_outline):
                                     sentence_phone = phone.findall(temp_sentence)
                                     if sentence_phone:
-                                        if sentence_phone[0] in [ent.entity_text for ent in phone_entitys] and sentence_phone[0] not in exist_phone:
+                                        if sentence_phone[0] in [ent.entity_text for ent in phone_entitys] and sentence_phone[0] not in ",".join(exist_phone):
                                             tenderee_agency_role[0].linklist.append(("", sentence_phone[0]))
                                             get_contacts = True
                                             break
@@ -3008,11 +3008,11 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
                         for _pattern in contact_pattern_list:
                             get_tenderee_contacts = False
                             for regular_match in re.finditer(_pattern, _content):
-                                match_text = _content[regular_match.end():regular_match.end() + 40]
+                                match_text = _content[regular_match.end():regular_match.end() + 50]
                                 match_text = match_text.split("。")[0]
                                 sentence_phone = phone.findall(match_text)
                                 if sentence_phone:
-                                    if sentence_phone[0] not in exist_phone:
+                                    if sentence_phone[0] not in ",".join(exist_phone):
                                         tenderee_agency_role[0].linklist.append(("", sentence_phone[0]))
                                         get_tenderee_contacts = True
                                         break

+ 13 - 4
BiddingKG/dl/interface/predictor.py

@@ -4147,14 +4147,14 @@ class DocChannel():
           '产权交易': '经营权|承包权|使用权|租赁权|股权|债权|排污权|化学需氧量|储备量|竞价销售|销售结果|出租|招租|拍租|竞租|续租|挂牌|出让|废[旧弃]?(物资|设备|资源|金属|钢筋|料)处[置理]',
           '产权交易2': '使用权|租赁权|股权|债权|排污权|竞价销售|销售结果|出租|招租|拍租|竞租|续租|挂牌|出让|废[旧弃]?(物资|设备|资源|金属|钢筋|料)处[置理]',
           # '采招数据': '(采购|招标|询价|议价|比价|比选|遴选|邀请|邀标|磋商|洽谈|约谈|谈判|征询|调研)的?(公告|公示|中标|成交|结果|$)|工程招标|定点服务|(设备|服务|\w{2})[直采]购|(建设|改造)项目|工程|拦标价|控制价|银行|资格选定|资金|公款|存款|存放|现金管理|招募|入围|入库',
-          '采招数据': '(采购|招标|询价|议价|比价|比选|遴选|邀请|邀标|磋商|洽谈|约谈|谈判|征询|调研)的?(公告|公示|中标|成交|结果|$)|工程招标|定点服务|(设备|服务|\w{2})[直采]购|(建设|改造)项目|拦标价|控制价|资格选定|资格认定|资金|公款|存款|现金管理|招募|入库',
+          '采招数据': '(采购|招标|询价|议价|比价|比选|遴选|邀请|邀标|磋商|洽谈|约谈|谈判|征询|调研)的?(公告|公示|中标|成交|结果|$)|工程招标|定点服务|(设备|服务|\w{2})[直采]购|(建设|改造)项目|拦标价|控制价|资格选定|资格认定|资金|公款|存款|现金管理|招募|入库|遴选.{,25}(服务|事务所|机构)',
           # |竞价 采招/产权都有竞价方式 # 意向|需求|预公?告|报建|总承包|工程|施工|设计|勘察|代理|监理 |变更|答疑|澄清|中标|成交|合同|废标|流标
           '新闻资讯': '(考试|面试|笔试)成绩|成绩的?(公告|公示|公布)|公开招聘|招聘(公告|简章|启事|合同制)|疫情防控\s{,5}(通知|情况|提示)|行政审批结果'
       }
       self.life_dic = {
           '采购意向': '采购意向|招标意向|选取意向|意向公告|意向公示',
           '采购意向neg': '发布政府采购意向|采购意向公告已于',
-          '招标预告': '(预计|计划)(采购|招标)(时间|日期)|采购(计划编号|需求方案|预告|预案)|(预|需求)公示|需求(方案|信息|论证|公告|公示)',
+          '招标预告': '(预计|计划)(招标|采购|发标|发包)(时间|日期)|采购(计划编号|需求方案|预告|预案)|(预|需求)公示|需求(方案|信息|论证|公告|公示)',
           '招标公告': '(采购|招标|竞选|报名)条件|报名(时间|流程|方法|要求|\w{,5}材料)[:\s]|[^\w]成交规则|参加竞价采购交易资格|(申请人|投标人|供应商|报价人|参选人)的?资格(要求|条件)|获取(采购|招标|询价|议价|竞价|比价|比选|遴选|邀请|邀标|磋商|洽谈|约谈|谈判|竞谈|应答)文件|(采购|招标|询价|议价|竞价|比价|比选|遴选|邀请|邀标|磋商|洽谈|约谈|谈判|竞谈|应答)文件的?(获取|领取)|评选方式:?\s*价格最低',
           '资审结果': '资审及业绩公示|资审结果及业绩|资格后审情况报告|资格(后审|预审|审查)结果(公告|公示)|(预审|审查)工作已经?结束|未通过原因', #|资格
           '招标答疑': '现澄清(为|如下)|答疑补遗|澄清内容如下|第[0-9一二三四五]次澄清|答疑澄清|(最高(投标)?限价|控制价|拦标价)公示',  # |异议的回复
@@ -4779,11 +4779,14 @@ class DocChannel():
               return False
 
       tenderee = ""
+      agency = ""
       try:
           for k, v in prem['prem'].items():
               for link in v['roleList']:
                   if link['role_name'] == 'tenderee' and tenderee == "":
                       tenderee = link['role_text']
+                  if link['role_name'] == 'agency' and agency == "":
+                      agency = link['role_text']
       except Exception as e:
           # print('解析prem 获取招标人、代理人出错')
           pass
@@ -4794,6 +4797,9 @@ class DocChannel():
       if tenderee:
           title = title.replace(tenderee, " ")
           text = text.replace(tenderee, " ")
+      if agency:
+          title = title.replace(agency, " ")
+          text = text.replace(agency, " ")
       prem_json = json.dumps(prem, ensure_ascii=False)
       if result['docchannel']['docchannel'] in ['中标信息', '合同公告'] and origin_dic.get(
               original_docchannel, '') in ['招标公告', '采购意向', '招标预告', '公告变更'] and is_contain_winner(
@@ -4807,7 +4813,10 @@ class DocChannel():
           msc += '最终规则修改:中标公告无中标人且包含新闻资讯关键词,返回新闻资讯类型'
       elif result['docchannel']['docchannel'] == '废标公告' and is_contain_winner(prem_json) and re.search(
               self.title_life_dic['废标公告'], title) == None:
-          result['docchannel']['docchannel'] = '中标信息'
+          if re.search(self.title_life_dic['合同公告'], title):
+            result['docchannel']['docchannel'] = '合同公告'
+          else:
+            result['docchannel']['docchannel'] = '中标信息'
           msc += '最终规则修改:预测为废标却有中标人且标题无废标关键词改为中标信息;'
       elif result['docchannel']['docchannel'] in ['招标答疑'] and re.search(
               self.title_life_dic['招标答疑'], title) == None and origin_dic.get(
@@ -8356,7 +8365,7 @@ class EntityTypeRulePredictor():
         self.pattern_addr_delivery = '(交货|交付|收货|提货|交接|送货(安装)?|送达|到货|供货|卸货)((期|时间)[及和、])?)?地[点址区]?[:为]'
         self.pattern_addr_project = '(项目|施工|实施|建设|工程|服务|展示|看样|拍卖)(实施|服务|现场)?(地[点址区]|位置|所在地区?)(位于)?[:为]|项目位于|所在(区域|地区):|存放地[点址]?[:为]'
         self.pattern_addr_contact = '(联系|收件人?|邮寄)地[点址区][:为]|行政区:'
-        self.pattern_time_planned = '(计划|预计|预期)(采购|招标|发包)时间|招标(公告|文件)(预计|预期|计划)发布时间'
+        self.pattern_time_planned = '(计划|预计|预期)(招标|采购|发标|发包)时间|招标(公告|文件)(预计|预期|计划)发布时间'
         self.pattern_code_investment = '投资(审批)?项目[编代]码[:为]'
         self.pattern_addr_dic = {'addr_bidopen': self.pattern_addr_bidopen,
                                  'addr_bidsend': self.pattern_addr_bidsend,