4 ماه پیش · 9aa6785256
--- a/BiddingKG/dl/channel/channel_bert.py
+++ b/BiddingKG/dl/channel/channel_bert.py
@@ -596,7 +596,7 @@ def merge_channel(list_articles,channel_dic,original_docchannel):
 
				             main_text = text
			
 
				         main_text = text_process(main_text)
			
 
				         # if re.search("采购实施月份|采购月份|预计(招标|采购|发标|发包)(时间|月份)|招标公告预计发布时间",main_text[:max(500,len(main_text)//2)]):
			
 
				-        if re.search("采购实施月份|采购月份|预计(招标|采购|发标|发包)(时间|月份)|招标公告预计发布时间",main_text):
			
 
				+        if re.search("采购实施月份|采购月份|(计划|预计|预期)(招标|采购|发标|发包)(时间|月份)|招标公告预计发布时间",main_text):
			
 
				             front_text_len = len(main_text) // 3 if len(main_text) > 300 else 100
			
 
				             front_text = main_text[:front_text_len]
			
 
				             if re.search("意向|意愿",title) or re.search("意向|意愿",front_text):
			
--- a/BiddingKG/dl/interface/extract.py
+++ b/BiddingKG/dl/interface/extract.py
--- a/BiddingKG/dl/interface/getAttributes.py
+++ b/BiddingKG/dl/interface/getAttributes.py
@@ -2043,7 +2043,7 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
 
				                                 if entity.label in [2, 3, 4] and distance>=20:
			
 
				                                     break
			
 
				                                 # 角色为中标候选人，排除"质疑|投诉|监督|受理"相关的联系人
			
 
				-                                if entity.label in [2, 3, 4] and re.search("纪检|监察|质疑|投诉|监督|受理|项目(单位)?联系", list_sentence[after_entity.sentence_index].sentence_text[max(0,after_entity.wordOffset_begin - 10):after_entity.wordOffset_begin]):
			
 
				+                                if entity.label in [2, 3, 4] and re.search("纪检|监察|质疑|投诉|监督|受理|项目(单位)?联系|(采购|招标)人?联系", list_sentence[after_entity.sentence_index].sentence_text[max(0,after_entity.wordOffset_begin - 10):after_entity.wordOffset_begin]):
			
 
				                                     break
			
 
				                                 # 角色为招标/代理人，排除"纪检|监察"相关的联系人
			
 
				                                 if entity.label in [0,1] and re.search("纪检|监察",list_sentence[after_entity.sentence_index].sentence_text[max(0,after_entity.wordOffset_begin - 10):after_entity.wordOffset_begin]):
			
@@ -2953,7 +2953,7 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
 
				                                         if t_person.person_phone:
			
 
				                                             _phone = [p.entity_text for p in t_person.person_phone]
			
 
				                                             for _p in _phone:
			
 
				-                                                if t_person.entity_text not in exist_person and _p not in exist_phone:
			
 
				+                                                if t_person.entity_text not in exist_person and _p not in ",".join(exist_phone):
			
 
				                                                     tenderee_agency_role[0].linklist.append((t_person.entity_text, _p))
			
 
				                                                     get_contacts = True
			
 
				                                             break
			
@@ -2963,7 +2963,7 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
 
				                                 if not get_contacts:
			
 
				                                     sentence_phone = phone.findall(outline.outline_text)
			
 
				                                     if sentence_phone:
			
 
				-                                        if sentence_phone[0] not in exist_phone:
			
 
				+                                        if sentence_phone[0] not in ",".join(exist_phone):
			
 
				                                             tenderee_agency_role[0].linklist.append(("", sentence_phone[0]))
			
 
				                                             get_contacts = True
			
 
				                                             break
			
@@ -2974,14 +2974,14 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
 
				                                 if _entity.person_phone:
			
 
				                                     _phone = [p.entity_text for p in _entity.person_phone]
			
 
				                                     for _p in _phone:
			
 
				-                                        if _entity.entity_text not in exist_person and _p not in exist_phone:
			
 
				+                                        if _entity.entity_text not in exist_person and _p not in ",".join(exist_phone):
			
 
				                                             tenderee_agency_role[0].linklist.append((_entity.entity_text, _p))
			
 
				                                             get_contacts = True
			
 
				                                     break
			
 
				                     if not get_contacts:
			
 
				                         # 如果文中只有一个“phone”实体，则直接取为联系人电话
			
 
				                         if len(phone_entitys) == 1:
			
 
				-                            if phone_entitys[0].entity_text not in exist_phone:
			
 
				+                            if phone_entitys[0].entity_text not in ",".join(exist_phone):
			
 
				                                 tenderee_agency_role[0].linklist.append(("", phone_entitys[0].entity_text))
			
 
				                                 get_contacts = True
			
 
				                     if not get_contacts:
			
@@ -2993,7 +2993,7 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
 
				                                 if re.search("联系人|联系方|联系方式|联系电话|电话|负责人|与.{2,4}联系", sentence_outline):
			
 
				                                     sentence_phone = phone.findall(temp_sentence)
			
 
				                                     if sentence_phone:
			
 
				-                                        if sentence_phone[0] in [ent.entity_text for ent in phone_entitys] and sentence_phone[0] not in exist_phone:
			
 
				+                                        if sentence_phone[0] in [ent.entity_text for ent in phone_entitys] and sentence_phone[0] not in ",".join(exist_phone):
			
 
				                                             tenderee_agency_role[0].linklist.append(("", sentence_phone[0]))
			
 
				                                             get_contacts = True
			
 
				                                             break
			
@@ -3008,11 +3008,11 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
 
				                         for _pattern in contact_pattern_list:
			
 
				                             get_tenderee_contacts = False
			
 
				                             for regular_match in re.finditer(_pattern, _content):
			
 
				-                                match_text = _content[regular_match.end():regular_match.end() + 40]
			
 
				+                                match_text = _content[regular_match.end():regular_match.end() + 50]
			
 
				                                 match_text = match_text.split("。")[0]
			
 
				                                 sentence_phone = phone.findall(match_text)
			
 
				                                 if sentence_phone:
			
 
				-                                    if sentence_phone[0] not in exist_phone:
			
 
				+                                    if sentence_phone[0] not in ",".join(exist_phone):
			
 
				                                         tenderee_agency_role[0].linklist.append(("", sentence_phone[0]))
			
 
				                                         get_tenderee_contacts = True
			
 
				                                         break
			
--- a/BiddingKG/dl/interface/predictor.py
+++ b/BiddingKG/dl/interface/predictor.py
@@ -4151,14 +4151,14 @@ class DocChannel():
 
				           '产权交易': '经营权|承包权|使用权|租赁权|股权|债权|排污权|化学需氧量|储备量|竞价销售|销售结果|出租|招租|拍租|竞租|续租|挂牌|出让|废[旧弃]?(物资|设备|资源|金属|钢筋|料)处[置理]',
			
 
				           '产权交易2': '使用权|租赁权|股权|债权|排污权|竞价销售|销售结果|出租|招租|拍租|竞租|续租|挂牌|出让|废[旧弃]?(物资|设备|资源|金属|钢筋|料)处[置理]',
			
 
				           # '采招数据': '(采购|招标|询价|议价|比价|比选|遴选|邀请|邀标|磋商|洽谈|约谈|谈判|征询|调研)的?(公告|公示|中标|成交|结果|$)|工程招标|定点服务|(设备|服务|\w{2})[直采]购|(建设|改造)项目|工程|拦标价|控制价|银行|资格选定|资金|公款|存款|存放|现金管理|招募|入围|入库',
			
 
				-          '采招数据': '(采购|招标|询价|议价|比价|比选|遴选|邀请|邀标|磋商|洽谈|约谈|谈判|征询|调研)的?(公告|公示|中标|成交|结果|$)|工程招标|定点服务|(设备|服务|\w{2})[直采]购|(建设|改造)项目|拦标价|控制价|资格选定|资格认定|资金|公款|存款|现金管理|招募|入库',
			
 
				+          '采招数据': '(采购|招标|询价|议价|比价|比选|遴选|邀请|邀标|磋商|洽谈|约谈|谈判|征询|调研)的?(公告|公示|中标|成交|结果|$)|工程招标|定点服务|(设备|服务|\w{2})[直采]购|(建设|改造)项目|拦标价|控制价|资格选定|资格认定|资金|公款|存款|现金管理|招募|入库|遴选.{,25}(服务|事务所|机构)',
			
 
				           # |竞价 采招/产权都有竞价方式 # 意向|需求|预公?告|报建|总承包|工程|施工|设计|勘察|代理|监理 |变更|答疑|澄清|中标|成交|合同|废标|流标
			
 
				           '新闻资讯': '(考试|面试|笔试)成绩|成绩的?(公告|公示|公布)|公开招聘|招聘(公告|简章|启事|合同制)|疫情防控\s{,5}(通知|情况|提示)|行政审批结果'
			
 
				       }
			
 
				       self.life_dic = {
			
 
				           '采购意向': '采购意向|招标意向|选取意向|意向公告|意向公示',
			
 
				           '采购意向neg': '发布政府采购意向|采购意向公告已于',
			
 
				-          '招标预告': '(预计|计划)(采购|招标)(时间|日期)|采购(计划编号|需求方案|预告|预案)|(预|需求)公示|需求(方案|信息|论证|公告|公示)',
			
 
				+          '招标预告': '(预计|计划)(招标|采购|发标|发包)(时间|日期)|采购(计划编号|需求方案|预告|预案)|(预|需求)公示|需求(方案|信息|论证|公告|公示)',
			
 
				           '招标公告': '(采购|招标|竞选|报名)条件|报名(时间|流程|方法|要求|\w{,5}材料)[：\s]|[^\w]成交规则|参加竞价采购交易资格|(申请人|投标人|供应商|报价人|参选人)的?资格(要求|条件)|获取(采购|招标|询价|议价|竞价|比价|比选|遴选|邀请|邀标|磋商|洽谈|约谈|谈判|竞谈|应答)文件|(采购|招标|询价|议价|竞价|比价|比选|遴选|邀请|邀标|磋商|洽谈|约谈|谈判|竞谈|应答)文件的?(获取|领取)|评选方式：?\s*价格最低',
			
 
				           '资审结果': '资审及业绩公示|资审结果及业绩|资格后审情况报告|资格(后审|预审|审查)结果(公告|公示)|(预审|审查)工作已经?结束|未通过原因', #|资格
			
 
				           '招标答疑': '现澄清(为|如下)|答疑补遗|澄清内容如下|第[0-9一二三四五]次澄清|答疑澄清|(最高(投标)?限价|控制价|拦标价)公示',  # |异议的回复
			
@@ -4783,11 +4783,14 @@ class DocChannel():
 
				               return False
			
 
				 
			
 
				       tenderee = ""
			
 
				+      agency = ""
			
 
				       try:
			
 
				           for k, v in prem['prem'].items():
			
 
				               for link in v['roleList']:
			
 
				                   if link['role_name'] == 'tenderee' and tenderee == "":
			
 
				                       tenderee = link['role_text']
			
 
				+                  if link['role_name'] == 'agency' and agency == "":
			
 
				+                      agency = link['role_text']
			
 
				       except Exception as e:
			
 
				           # print('解析prem 获取招标人、代理人出错')
			
 
				           pass
			
@@ -4798,6 +4801,9 @@ class DocChannel():
 
				       if tenderee:
			
 
				           title = title.replace(tenderee, " ")
			
 
				           text = text.replace(tenderee, " ")
			
 
				+      if agency:
			
 
				+          title = title.replace(agency, " ")
			
 
				+          text = text.replace(agency, " ")
			
 
				       prem_json = json.dumps(prem, ensure_ascii=False)
			
 
				       if result['docchannel']['docchannel'] in ['中标信息', '合同公告'] and origin_dic.get(
			
 
				               original_docchannel, '') in ['招标公告', '采购意向', '招标预告', '公告变更'] and is_contain_winner(
			
@@ -4811,7 +4817,10 @@ class DocChannel():
 
				           msc += '最终规则修改：中标公告无中标人且包含新闻资讯关键词，返回新闻资讯类型'
			
 
				       elif result['docchannel']['docchannel'] == '废标公告' and is_contain_winner(prem_json) and re.search(
			
 
				               self.title_life_dic['废标公告'], title) == None:
			
 
				-          result['docchannel']['docchannel'] = '中标信息'
			
 
				+          if re.search(self.title_life_dic['合同公告'], title):
			
 
				+            result['docchannel']['docchannel'] = '合同公告'
			
 
				+          else:
			
 
				+            result['docchannel']['docchannel'] = '中标信息'
			
 
				           msc += '最终规则修改：预测为废标却有中标人且标题无废标关键词改为中标信息;'
			
 
				       elif result['docchannel']['docchannel'] in ['招标答疑'] and re.search(
			
 
				               self.title_life_dic['招标答疑'], title) == None and origin_dic.get(
			
@@ -8583,7 +8592,7 @@ class EntityTypeRulePredictor():
 
				         self.pattern_addr_delivery = '(交货|交付|收货|提货|交接|送货(安装)?|送达|到货|供货|卸货)((期|时间)[及和、])?）?(地[点址区]?|区域)[：为]'
			
 
				         self.pattern_addr_project = '(项目|施工|实施|建设|工程|服务|展示|看样|拍卖)(实施|服务|现场)?(地[点址区]|位置|所在地区?)(位于)?[：为]|项目位于|[^\w]所[属在](区域|地区?)：|存放地[点址]?[：为]' # 银行所属区域：北京市西城区 不作项目地址
			
 
				         self.pattern_addr_contact = '(联系|收件人?|邮寄)地[点址区][：为]|行政区：'
			
 
				-        self.pattern_time_planned = '(计划|预计|预期)(采购|招标|发包)时间|招标(公告|文件)(预计|预期|计划)发布时间'
			
 
				+        self.pattern_time_planned = '(计划|预计|预期)(招标|采购|发标|发包)时间|招标(公告|文件)(预计|预期|计划)发布时间'
			
 
				         self.pattern_code_investment = '投资(审批)?项目[编代]码[：为]'
			
 
				         self.pattern_addr_dic = {'addr_bidopen': self.pattern_addr_bidopen,
			
 
				                                  'addr_bidsend': self.pattern_addr_bidsend,
			
--- a/BiddingKG/dl/table_head/model_40_2_0.959.pth
+++ b/BiddingKG/dl/table_head/model_40_2_0.959.pth
--- a/BiddingKG/dl/table_head/models/model_torch.py
+++ b/BiddingKG/dl/table_head/models/model_torch.py
@@ -73,6 +73,100 @@ class TableHeadModel(nn.Module):
 
				         cnn3d_x = torch.permute(cnn3d_x, [2, 3, 1, 0])
			
 
				         cnn3d_x = cnn3d_x.contiguous().view(row, col, char_num * self.char_embed_expand)
			
 
				 
			
 
				+        # dnn
			
 
				+        x = self.dense3(cnn3d_x)
			
 
				+        x = self.ln_dnn_2(x)
			
 
				+        x = self.relu(x)
			
 
				+        x = self.dense4(x)
			
 
				+        x = self.sigmoid(x)
			
 
				+        x = torch.squeeze(x, -1)
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+class TableHeadModel2(nn.Module):
			
 
				+    def __init__(self):
			
 
				+        super(TableHeadModel2, self).__init__()
			
 
				+        self.char_num = 20
			
 
				+        self.char_embed = 60
			
 
				+        self.char_embed_expand = 128
			
 
				+
			
 
				+        self.dense0 = nn.Linear(self.char_embed, self.char_embed_expand)
			
 
				+
			
 
				+        self.dense3 = nn.Linear(self.char_num * self.char_embed_expand, 64)
			
 
				+        self.dense4 = nn.Linear(64, 1)
			
 
				+
			
 
				+        self.sigmoid = nn.Sigmoid()
			
 
				+
			
 
				+        self.ln_dnn_2 = nn.LayerNorm([64])
			
 
				+
			
 
				+        self.device = torch.device("cpu")
			
 
				+
			
 
				+        self.relu = nn.LeakyReLU()
			
 
				+        self.dropout = nn.Dropout(0.6)
			
 
				+
			
 
				+        # self.cnn1d_0 = nn.Conv1d(self.char_embed_expand,
			
 
				+        #                          self.char_embed_expand,
			
 
				+        #                          (3,), padding=self.get_padding(3))
			
 
				+        # self.cnn1d_1 = nn.Conv1d(self.char_embed_expand,
			
 
				+        #                          self.char_embed_expand,
			
 
				+        #                          (3,), padding=self.get_padding(3))
			
 
				+
			
 
				+        encoder_layer1 = nn.TransformerEncoderLayer(d_model=self.char_embed_expand, nhead=2,
			
 
				+                                                    dim_feedforward=128, batch_first=True)
			
 
				+        self.transformer1 = nn.TransformerEncoder(encoder_layer1, 2)
			
 
				+        self.ln_encoder_0 = nn.LayerNorm([self.char_embed_expand])
			
 
				+
			
 
				+        self.cnn3d_0 = nn.Conv3d(self.char_embed_expand, self.char_embed_expand,
			
 
				+                                 (3, 3, 3), padding=self.get_padding(3))
			
 
				+        self.cnn3d_1 = nn.Conv3d(self.char_embed_expand, self.char_embed_expand,
			
 
				+                                 (3, 3, 3), padding=self.get_padding(3))
			
 
				+        # self.cnn3d_2 = nn.Conv3d(self.char_embed, self.char_embed,
			
 
				+        #                          (3, 3, 3), padding=self.get_padding(3))
			
 
				+
			
 
				+    def get_padding(self, kernel_size, stride=1):
			
 
				+        return (kernel_size - 1) // 2 * stride
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        batch, row, col, char_num, char_embed = x.shape
			
 
				+
			
 
				+        # Embedding
			
 
				+        x = torch.squeeze(x, 0)
			
 
				+        x = x.view([row*col, char_num, char_embed])
			
 
				+        x = self.dense0(x)
			
 
				+
			
 
				+        # transformer
			
 
				+        box_attention = self.transformer1(x)
			
 
				+        box_attention = self.ln_encoder_0(box_attention)
			
 
				+        box_attention = torch.permute(box_attention, [0, 2, 1])
			
 
				+        box_attention = box_attention.contiguous().view(row, col, char_num, self.char_embed_expand)
			
 
				+        box_attention = torch.unsqueeze(box_attention, 0)
			
 
				+
			
 
				+        # cnn1d_x = torch.permute(cnn1d_x, [0, 2, 1])
			
 
				+        # cnn1d_x = self.cnn1d_0(cnn1d_x)
			
 
				+        # cnn1d_x = self.relu(cnn1d_x)
			
 
				+        # cnn1d_x = self.dropout(cnn1d_x)
			
 
				+        # cnn1d_x = self.cnn1d_1(cnn1d_x)
			
 
				+        # cnn1d_x = self.relu(cnn1d_x)
			
 
				+        # cnn1d_x = self.dropout(cnn1d_x)
			
 
				+        #
			
 
				+        # cnn1d_x = torch.permute(cnn1d_x, [0, 2, 1])
			
 
				+        # cnn1d_x = cnn1d_x.contiguous().view(row, col, char_num, self.char_embed_expand)
			
 
				+        # cnn1d_x = torch.unsqueeze(cnn1d_x, 0)
			
 
				+        # print(cnn1d_x.shape)
			
 
				+
			
 
				+        # cnn 3d
			
 
				+        cnn3d_x = torch.permute(box_attention, [0, 4, 3, 1, 2])
			
 
				+        cnn3d_x = self.cnn3d_0(cnn3d_x)
			
 
				+        cnn3d_x = self.relu(cnn3d_x)
			
 
				+        cnn3d_x = self.dropout(cnn3d_x)
			
 
				+        cnn3d_x = self.cnn3d_1(cnn3d_x)
			
 
				+        cnn3d_x = self.relu(cnn3d_x)
			
 
				+        cnn3d_x = self.dropout(cnn3d_x)
			
 
				+
			
 
				+        cnn3d_x = torch.squeeze(cnn3d_x, 0)
			
 
				+        cnn3d_x = torch.permute(cnn3d_x, [2, 3, 1, 0])
			
 
				+        cnn3d_x = cnn3d_x.contiguous().view(row, col, char_num * self.char_embed_expand)
			
 
				+
			
 
				         # dnn
			
 
				         x = self.dense3(cnn3d_x)
			
 
				         x = self.ln_dnn_2(x)
			
--- a/BiddingKG/dl/table_head/predict_torch.py
+++ b/BiddingKG/dl/table_head/predict_torch.py
@@ -6,10 +6,12 @@ from torch.utils.data import DataLoader
 
				 
			
 
				 sys.path.append(os.path.abspath(os.path.dirname(__file__) + "/../../../"))
			
 
				 from BiddingKG.dl.table_head.models.model_torch import TableHeadModel
			
 
				+# from BiddingKG.dl.table_head.models.model_torch import TableHeadModel2
			
 
				 from BiddingKG.dl.table_head.pre_process_torch import CustomDatasetTiny40, set_same_table_head, set_label
			
 
				 
			
 
				 device = torch.device("cpu")
			
 
				-model_path = os.path.abspath(os.path.dirname(__file__)) + '/model_40_0.951.pth'
			
 
				+model_path = os.path.abspath(os.path.dirname(__file__)) + '/model_40_0.959.pth'
			
 
				+# model_path = os.path.abspath(os.path.dirname(__file__)) + '/model_40_2_0.959.pth'
			
 
				 batch_size = 1
			
 
				 
			
 
				 
			
@@ -18,6 +20,7 @@ def predict(table_text_list):
 
				         print("="*15, "init table_head model", "="*15)
			
 
				         # 实例化模型
			
 
				         model = TableHeadModel()
			
 
				+        # model = TableHeadModel2()
			
 
				         model.to(device)
			
 
				         model.load_state_dict(torch.load(model_path, map_location=torch.device(device)))
			
 
				         # 将模型设置为评估模式