3 năm trước cách đây · 057e6eb28a
--- a/BiddingKG/dl/common/Utils.py
+++ b/BiddingKG/dl/common/Utils.py
@@ -297,7 +297,7 @@ def changeIndexFromWordToWords(tokens,word_index):
 
				     after_index = 0
			
 
				     for i in range(len(tokens)):
			
 
				         after_index = after_index+len(tokens[i])
			
 
				-        if before_index<=word_index and after_index>=word_index:
			
 
				+        if before_index<=word_index and after_index>word_index:
			
 
				             return i
			
 
				         before_index = after_index
			
 
				         
			
--- a/BiddingKG/dl/entityLink/entityLink.py
+++ b/BiddingKG/dl/entityLink/entityLink.py
@@ -205,6 +205,7 @@ def calibrateEnterprise(list_articles,list_sentences,list_entitys):
 
				                 break
			
 
				         for p_sentence in list_sentence:
			
 
				             sentence = p_sentence.sentence_text
			
 
				+            sentence_entitys = [(ent.entity_text,ent.wordOffset_begin,ent.wordOffset_end) for ent in list_entity if ent.sentence_index==p_sentence.sentence_index and ent.entity_type in ['org','company']]
			
 
				             list_match = match_enterprise_max_first(sentence)
			
 
				             # print("list_match", list_match)
			
 
				 
			
@@ -247,7 +248,7 @@ def calibrateEnterprise(list_articles,list_sentences,list_entitys):
 
				                             match_replace = True
			
 
				                             match_add = True
			
 
				                             begin_index = changeIndexFromWordToWords(tokens,_match["begin_index"])
			
 
				-                            end_index = changeIndexFromWordToWords(tokens,_match["end_index"])
			
 
				+                            end_index = changeIndexFromWordToWords(tokens,_match["end_index"]-1)
			
 
				                             list_calibrate.append({"type":"update","from":p_entity.entity_text,"to":_match["entity_text"]})
			
 
				                             p_entity.entity_text = _match["entity_text"]
			
 
				                             p_entity.wordOffset_begin = _match["begin_index"]
			
@@ -262,7 +263,7 @@ def calibrateEnterprise(list_articles,list_sentences,list_entitys):
 
				                                 entity_type = "company"
			
 
				 
			
 
				                                 begin_index = changeIndexFromWordToWords(tokens,list_match[_match_h]["begin_index"])
			
 
				-                                end_index = changeIndexFromWordToWords(tokens,list_match[_match_h]["end_index"])
			
 
				+                                end_index = changeIndexFromWordToWords(tokens,list_match[_match_h]["end_index"]-1)
			
 
				                                 entity_id = "%s_%d_%d_%d"%(doc_id,sentence_index,begin_index,end_index)
			
 
				                                 add_entity = Entity(p_sentence.doc_id,entity_id,entity_text,entity_type,sentence_index,begin_index,end_index,list_match[_match_h]["begin_index"],list_match[_match_h]["end_index"])
			
 
				                                 add_entity.if_dict_match = 1
			
@@ -286,7 +287,7 @@ def calibrateEnterprise(list_articles,list_sentences,list_entitys):
 
				                                 else:
			
 
				                                     match_replace = True
			
 
				                                     begin_index = changeIndexFromWordToWords(tokens,_match["begin_index"])
			
 
				-                                    end_index = changeIndexFromWordToWords(tokens,_match["end_index"])
			
 
				+                                    end_index = changeIndexFromWordToWords(tokens,_match["end_index"]-1)
			
 
				                                     list_calibrate.append({"type":"update","from":p_entity.entity_text,"to":_match["entity_text"]})
			
 
				                                     p_entity.entity_text = _match["entity_text"]
			
 
				                                     p_entity.wordOffset_begin = _match["begin_index"]
			
@@ -295,23 +296,25 @@ def calibrateEnterprise(list_articles,list_sentences,list_entitys):
 
				                                     p_entity.end_index = end_index
			
 
				                                     p_entity.if_dict_match = 1
			
 
				                         elif _match["end_index"]>=p_entity.wordOffset_end:
			
 
				-                            match_replace = True
			
 
				-                            begin_index = changeIndexFromWordToWords(tokens,_match["begin_index"])
			
 
				-                            end_index = changeIndexFromWordToWords(tokens,_match["end_index"])
			
 
				-                            list_calibrate.append({"type":"update","from":p_entity.entity_text,"to":_match["entity_text"]})
			
 
				-                            p_entity.entity_text = _match["entity_text"]
			
 
				-                            p_entity.wordOffset_begin = _match["begin_index"]
			
 
				-                            p_entity.wordOffset_end = _match["end_index"]
			
 
				-                            p_entity.begin_index = begin_index
			
 
				-                            p_entity.end_index = end_index
			
 
				-                            p_entity.entity_type = "company"
			
 
				-                            p_entity.if_dict_match = 1
			
 
				+                            # 原entity列表已有实体，则不重复添加
			
 
				+                            if (_match["entity_text"],_match["begin_index"],_match["end_index"]) not in sentence_entitys:
			
 
				+                                match_replace = True
			
 
				+                                begin_index = changeIndexFromWordToWords(tokens,_match["begin_index"])
			
 
				+                                end_index = changeIndexFromWordToWords(tokens,_match["end_index"]-1)
			
 
				+                                list_calibrate.append({"type":"update","from":p_entity.entity_text,"to":_match["entity_text"]})
			
 
				+                                p_entity.entity_text = _match["entity_text"]
			
 
				+                                p_entity.wordOffset_begin = _match["begin_index"]
			
 
				+                                p_entity.wordOffset_end = _match["end_index"]
			
 
				+                                p_entity.begin_index = begin_index
			
 
				+                                p_entity.end_index = end_index
			
 
				+                                p_entity.entity_type = "company"
			
 
				+                                p_entity.if_dict_match = 1
			
 
				                     elif _match["begin_index"]<p_entity.wordOffset_end and _match["end_index"]>p_entity.wordOffset_end:
			
 
				                         find_flag = True
			
 
				                         if p_entity.entity_type in ("org","company"):
			
 
				                             match_replace = True
			
 
				                             begin_index = changeIndexFromWordToWords(tokens,_match["begin_index"])
			
 
				-                            end_index = changeIndexFromWordToWords(tokens,_match["end_index"])
			
 
				+                            end_index = changeIndexFromWordToWords(tokens,_match["end_index"]-1)
			
 
				                             list_calibrate.append({"type":"update","from":p_entity.entity_text,"to":_match["entity_text"]})
			
 
				                             p_entity.entity_text = _match["entity_text"]
			
 
				                             p_entity.wordOffset_begin = _match["begin_index"]
			
@@ -325,7 +328,7 @@ def calibrateEnterprise(list_articles,list_sentences,list_entitys):
 
				                     entity_type = "company"
			
 
				 
			
 
				                     begin_index = changeIndexFromWordToWords(tokens,_match["begin_index"])
			
 
				-                    end_index = changeIndexFromWordToWords(tokens,_match["end_index"])
			
 
				+                    end_index = changeIndexFromWordToWords(tokens,_match["end_index"]-1)
			
 
				                     entity_id = "%s_%d_%d_%d"%(doc_id,sentence_index,begin_index,end_index)
			
 
				                     add_entity = Entity(p_sentence.doc_id,entity_id,entity_text,entity_type,sentence_index,begin_index,end_index,_match["begin_index"],_match["end_index"])
			
 
				                     list_entity.append(add_entity)
			
--- a/BiddingKG/dl/interface/Entitys.py
+++ b/BiddingKG/dl/interface/Entitys.py
@@ -172,6 +172,8 @@ class Entity():
 
				         self.if_dict_match = 0  # 2021/12/21 新增，判断公司实体是否由字典识别得到
			
 
				         self.is_total_money = 0  # 2021/12/29 新增，判断金额是否总价
			
 
				         self.is_unit_money = 0  # 2021/12/29 新增，判断金额是否单价
			
 
				+        self.pointer_serviceTime = None  # 2022/01/05 新增，中标人对应链接"服务期限(工期)"
			
 
				+        self.pointer_ratio = None  # 2022/01/05 新增，中标人对应链接"中投标金额->费率、下浮率"
			
 
				         self.origin_entity_text = ''  # 2022/1/5 新增，记录字典替换的原来的实体名
			
 
				 
			
 
				     def set_Role(self,role_label,role_values):
			
@@ -262,11 +264,15 @@ class Role():
 
				         self.money_prob = money_prob
			
 
				         self.linklist = linklist
			
 
				         self.money_unit = '' # 2021/8/17 新增 保存金额单位
			
 
				-        
			
 
				+        # 中投标人属性
			
 
				+        self.ratio = "" #2022/01/06 新增 保存中投标金额相关费率
			
 
				+        self.serviceTime = "" #2021/01/06 新增 保存服务期限(工期)
			
 
				+
			
 
				     def getString(self):
			
 
				         self.linklist = [item for item in set(self.linklist)]
			
 
				         # result = [self.role_name,fitDataByRule(self.entity_text),self.money,self.linklist]
			
 
				-        result = [self.role_name,fitDataByRule(self.entity_text),self.money,self.linklist, self.money_unit]
			
 
				+        # result = [self.role_name,fitDataByRule(self.entity_text),self.money,self.linklist, self.money_unit]
			
 
				+        result = [self.role_name,fitDataByRule(self.entity_text),self.money,self.linklist, self.money_unit,self.ratio,self.serviceTime]
			
 
				         return result
			
 
				 
			
 
				 # 用于KM算法的组合配对
			
--- a/BiddingKG/dl/interface/Preprocessing.py
+++ b/BiddingKG/dl/interface/Preprocessing.py
@@ -1984,7 +1984,8 @@ def get_preprocessed_entitys(list_sentences,useselffool=True,cost_time=dict()):
 
				             # "联系人"正则补充提取  2021/11/15 新增
			
 
				             list_person_text = [entity.entity_text for entity in list_sentence_entitys if entity.entity_type=='person']
			
 
				             error_text = ['交易','机构','教育','项目','公司','中标','开标','截标','监督','政府','国家','中国','技术','投标','传真','网址','电子邮',
			
 
				-                          '联系','联系电','联系地','采购代','邮政编','邮政','电话','手机','手机号','联系人','地址','地点','邮箱','邮编','联系方','招标','招标人','代理','代理人','采购','附件']
			
 
				+                          '联系','联系电','联系地','采购代','邮政编','邮政','电话','手机','手机号','联系人','地址','地点','邮箱','邮编','联系方','招标','招标人','代理',
			
 
				+                          '代理人','采购','附件','注意','登录','报名','踏勘']
			
 
				             list_person_text = set(list_person_text + error_text)
			
 
				             re_person = re.compile("联系人[:：]([\u4e00-\u9fa5]工)|"
			
 
				                                    "联系人[:：]([\u4e00-\u9fa5]{2,3})(?=联系)|"
			
--- a/BiddingKG/dl/interface/getAttributes.py
+++ b/BiddingKG/dl/interface/getAttributes.py
@@ -768,6 +768,26 @@ def getPackagesFromArticle(list_sentence,list_entity):
 
				                 PackageList.append(copy_pack)
			
 
				     return PackageList,PackageSet,dict_packageCode
			
 
				 
			
 
				+# km配对方法
			
 
				+def dispatch(match_list):
			
 
				+    main_roles = list(set([match.main_role for match in match_list]))
			
 
				+    attributes = list(set([match.attribute for match in match_list]))
			
 
				+
			
 
				+    label = np.zeros(shape=(len(main_roles), len(attributes)))
			
 
				+    for match in match_list:
			
 
				+        main_role = match.main_role
			
 
				+        attribute = match.attribute
			
 
				+        value = match.value
			
 
				+        label[main_roles.index(main_role), attributes.index(attribute)] = value + 10000
			
 
				+    # print(label)
			
 
				+    gragh = -label
			
 
				+    # km算法
			
 
				+    row, col = linear_sum_assignment(gragh)
			
 
				+    max_dispatch = [(i, j) for i, j, value in zip(row, col, gragh[row, col]) if value]
			
 
				+    # return [Match(main_roles[row], attributes[col]) for row, col in max_dispatch]
			
 
				+    return [(main_roles[row], attributes[col]) for row, col in max_dispatch]
			
 
				+
			
 
				+from BiddingKG.dl.common.Utils import getUnifyMoney
			
 
				 from BiddingKG.dl.interface.modelFactory import Model_relation_extraction
			
 
				 relationExtraction_model = Model_relation_extraction()
			
 
				 def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_entity,list_sentence,on_value = 0.5,on_value_person=0.5,sentence_len=4):
			
@@ -812,7 +832,15 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_entity
 
				                     packDict[packageName]["roleList"][i].money_unit = money.money_unit
			
 
				                 # print('链接中的金额：{0}, 单位：{1}'.format(money.entity_text, money.money_unit))
			
 
				         return packDict
			
 
				-    
			
 
				+    def addRatioByEntity(packDict,packageName,entity,ratio):
			
 
				+        for i in range(len(packDict[packageName]["roleList"])):
			
 
				+            if packDict[packageName]["roleList"][i].entity_text==entity:
			
 
				+                packDict[packageName]["roleList"][i].ratio = ratio.entity_text
			
 
				+    def addServiceTimeByEntity(packDict,packageName,entity,serviceTime):
			
 
				+        for i in range(len(packDict[packageName]["roleList"])):
			
 
				+            if packDict[packageName]["roleList"][i].entity_text==entity:
			
 
				+                packDict[packageName]["roleList"][i].serviceTime = serviceTime.entity_text
			
 
				+
			
 
				     #根据实体名称得到角色
			
 
				     def getRoleWithText(packDict,entity_text):
			
 
				         for pack in packDict.keys():
			
@@ -838,8 +866,8 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_entity
 
				                 # print('连接前修改大于50亿金额：前面是后面的一万倍则把前面金额/10000')
			
 
				     
			
 
				     #遍历所有实体
			
 
				-    while(p_entity<len(list_entity)):
			
 
				-        entity = list_entity[p_entity]
			
 
				+    # while(p_entity<len(list_entity)):
			
 
				+    #     entity = list_entity[p_entity]
			
 
				         '''
			
 
				         #招标金额从后往前找
			
 
				         if entity.entity_type=="money":
			
@@ -902,88 +930,206 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_entity
 
				 
			
 
				 
			
 
				         #如果实体属于角色集合，则往后找属性
			
 
				-        if doesEntityOrLinkedEntity_inRoleSet(entity, roleSet):
			
 
				-            
			
 
				-            p_entity += 1
			
 
				-            #循环查找符合的属性
			
 
				-            while(p_entity<len(list_entity)):
			
 
				-                
			
 
				-                entity_after = list_entity[p_entity]
			
 
				-                if entity_after.sentence_index-entity.sentence_index>=sentence_len:
			
 
				-                    p_entity -= 1
			
 
				-                    break
			
 
				-                #若是遇到公司实体，则跳出循环
			
 
				-                if entity_after.entity_type in ['org','company']:
			
 
				-                    p_entity -= 1
			
 
				-                    break
			
 
				-                if entity_after.values is not None:
			
 
				-                    if entity_after.entity_type=="money":
			
 
				-                        if entity_after.values[entity_after.label]>=on_value:
			
 
				-                            '''
			
 
				-                            #招标金额从后往前找
			
 
				-                            if str(entity_after.label)=="0":
			
 
				-                                packagePointer,_ = getPackage(PackageList,entity.sentence_index,entity.begin_index,"money-"+str(entity.label))
			
 
				-                                if packagePointer is None:
			
 
				-                                    packageName = "Project"
			
 
				+        # if doesEntityOrLinkedEntity_inRoleSet(entity, roleSet):
			
 
				+        #
			
 
				+        #     p_entity += 1
			
 
				+        #     #循环查找符合的属性
			
 
				+        #     while(p_entity<len(list_entity)):
			
 
				+        #
			
 
				+        #         entity_after = list_entity[p_entity]
			
 
				+        #         if entity_after.sentence_index-entity.sentence_index>=sentence_len:
			
 
				+        #             p_entity -= 1
			
 
				+        #             break
			
 
				+        #         #若是遇到公司实体，则跳出循环
			
 
				+        #         if entity_after.entity_type in ['org','company']:
			
 
				+        #             p_entity -= 1
			
 
				+        #             break
			
 
				+        #         if entity_after.values is not None:
			
 
				+        #             if entity_after.entity_type=="money":
			
 
				+        #                 if entity_after.values[entity_after.label]>=on_value:
			
 
				+        #                     '''
			
 
				+        #                     #招标金额从后往前找
			
 
				+        #                     if str(entity_after.label)=="0":
			
 
				+        #                         packagePointer,_ = getPackage(PackageList,entity.sentence_index,entity.begin_index,"money-"+str(entity.label))
			
 
				+        #                         if packagePointer is None:
			
 
				+        #                             packageName = "Project"
			
 
				+        #                         else:
			
 
				+        #                             packageName = packagePointer.entity_text
			
 
				+        #                         addMoneyByRoleid(PackDict, packageName, "0", entity_after.entity_text, entity_after.values[entity_after.label])
			
 
				+        #                     '''
			
 
				+        #                     if str(entity_after.label)=="1":
			
 
				+        #                         #print(entity_after.entity_text,entity.entity_text)
			
 
				+        #                         _list_entitys = [entity]+entity.linked_entitys
			
 
				+        #                         if len(PackageSet)>0:
			
 
				+        #                             packagePointer,_ = getPackage(PackageList,entity_after.sentence_index,entity_after.begin_index,"money-"+str(entity_after.entity_text)+"-"+str(entity_after.label))
			
 
				+        #                             if packagePointer is None:
			
 
				+        #                                 packageName_entity = "Project"
			
 
				+        #                             else:
			
 
				+        #                                 packageName_entity = packagePointer.entity_text
			
 
				+        #                         else:
			
 
				+        #                             packageName_entity = "Project"
			
 
				+        #                         if str(entity.label) in ["2","3","4"]:
			
 
				+        #                             # addMoneyByEntity(PackDict, packageName_entity, entity.entity_text, entity_after.entity_text, entity_after.values[entity_after.label])
			
 
				+        #                             if entity_after.notes == '单价' or float(entity_after.entity_text)<5000: #2021/12/17 调整小金额阈值，避免203608823.html 两次金额一次万元没提取到的情况
			
 
				+        #                                 addMoneyByEntity(PackDict, packageName_entity, entity.entity_text, entity_after,
			
 
				+        #                                                  0.5)
			
 
				+        #                                 entity.pointer_money = entity_after
			
 
				+        #                                 # print('role zhao money', entity.entity_text, '中标金额：', entity_after.entity_text)
			
 
				+        #                             else:
			
 
				+        #                                 addMoneyByEntity(PackDict, packageName_entity, entity.entity_text, entity_after,
			
 
				+        #                                                  entity_after.values[entity_after.label])
			
 
				+        #                                 entity.pointer_money = entity_after
			
 
				+        #                                 # print('role zhao money', entity.entity_text, '中标金额：', entity_after.entity_text)
			
 
				+        #                                 if entity_after.values[entity_after.label]>0.6:
			
 
				+        #                                     break # 2021/7/16 新增，找到中标金额，非单价即停止，不再往后找金额
			
 
				+        #                             #add pointer_money
			
 
				+        #                             # entity.pointer_money = entity_after
			
 
				+        #                             # print('role zhao money', entity.entity_text, '中标金额：', entity_after.entity_text)
			
 
				+        #                             # if entity_after.notes!='单价':
			
 
				+        #                             #     break  # 2021/7/16 新增，找到中标金额即停止，不再往后找金额
			
 
				+        #                 '''
			
 
				+        #             if entity_after.entity_type=="person":
			
 
				+        #                 if entity_after.values[entity_after.label]>=on_value_person:
			
 
				+        #                     if str(entity_after.label)=="1":
			
 
				+        #                         for i in range(len(roleList)):
			
 
				+        #                             if roleList[i].role_name=="tenderee":
			
 
				+        #                                 roleList[i].linklist.append((entity_after.entity_text,entity_after.person_phone))
			
 
				+        #                     elif str(entity_after.label)=="2":
			
 
				+        #                         for i in range(len(roleList)):
			
 
				+        #                             if roleList[i].role_name=="agency":
			
 
				+        #                                 roleList[i].linklist.append((entity_after.entity_text,entity_after.person_phone))
			
 
				+        #                     elif str(entity_after.label)=="3":
			
 
				+        #                         _list_entitys = [entity]+entity.linked_entitys
			
 
				+        #                         for _entity in _list_entitys:
			
 
				+        #                             for i in range(len(roleList)):
			
 
				+        #                                 if roleList[i].entity_text==_entity.entity_text:
			
 
				+        #                                     if entity_after.sentence_index-_entity.sentence_index>1 and len(roleList[i].linklist)>0:
			
 
				+        #                                         break
			
 
				+        #                                     roleList[i].linklist.append((entity_after.entity_text,entity_after.person_phone))
			
 
				+        #             '''
			
 
				+        #
			
 
				+        #         p_entity += 1
			
 
				+        #
			
 
				+        # p_entity += 1
			
 
				+    # 记录每句的分词数量
			
 
				+    tokens_num_dict = dict()
			
 
				+    last_tokens_num = 0
			
 
				+    for sentence in list_sentence:
			
 
				+        _index = sentence.sentence_index
			
 
				+        if _index == 0:
			
 
				+            tokens_num_dict[_index] = 0
			
 
				+        else:
			
 
				+            tokens_num_dict[_index] = tokens_num_dict[_index - 1] + last_tokens_num
			
 
				+        last_tokens_num = len(sentence.tokens)
			
 
				+    attribute_type = ['money','serviceTime','ratio']# 'money'仅指“中投标金额”
			
 
				+    for link_attribute in attribute_type:
			
 
				+        temp_entity_list = []
			
 
				+        if link_attribute=="money":
			
 
				+            temp_entity_list = [ent for ent in list_entity if (ent.entity_type in ['org','company'] and ent.label in [2,3,4]) or
			
 
				+                                (ent.entity_type=='money' and ent.label==1 and ent.values[ent.label]>=0.5)]
			
 
				+            # 删除重复的‘中投标金额’，一般为大小写两种样式
			
 
				+            drop_tendererMoney = []
			
 
				+            for ent_idx in range(len(temp_entity_list)-1):
			
 
				+                entity = temp_entity_list[ent_idx]
			
 
				+                if entity.entity_type=='money':
			
 
				+                    next_entity = temp_entity_list[ent_idx+1]
			
 
				+                    if next_entity.entity_type=='money':
			
 
				+                        if getUnifyMoney(entity.entity_text)==getUnifyMoney(next_entity.entity_text):
			
 
				+                            if (tokens_num_dict[next_entity.sentence_index] + next_entity.begin_index) - (
			
 
				+                                               tokens_num_dict[entity.sentence_index] + entity.end_index) < 10:
			
 
				+                                drop_tendererMoney.append(next_entity)
			
 
				+            for _drop in drop_tendererMoney:
			
 
				+                temp_entity_list.remove(_drop)
			
 
				+        elif link_attribute=="serviceTime":
			
 
				+            temp_entity_list = [ent for ent in list_entity if (ent.entity_type in ['org','company'] and ent.label in [2,3,4]) or
			
 
				+                                ent.entity_type=='serviceTime']
			
 
				+        elif link_attribute=="ratio":
			
 
				+            temp_entity_list = [ent for ent in list_entity if (ent.entity_type in ['org','company'] and ent.label in [2,3,4]) or
			
 
				+                                ent.entity_type=='ratio']
			
 
				+        temp_entity_list = sorted(temp_entity_list,key=lambda x: (x.sentence_index, x.begin_index))
			
 
				+        temp_match_list = []
			
 
				+        for ent_idx in range(len(temp_entity_list)):
			
 
				+            entity = temp_entity_list[ent_idx]
			
 
				+            if entity.entity_type in ['org','company']:
			
 
				+                match_nums = 0
			
 
				+                tenderer_nums = 0 #经过其他中投标人的数量
			
 
				+                byNotTenderer_match_nums = 0 #跟在中投标人后面的属性
			
 
				+                for after_index in range(ent_idx + 1, min(len(temp_entity_list), ent_idx + 4)):
			
 
				+                    after_entity = temp_entity_list[after_index]
			
 
				+                    if after_entity.entity_type == link_attribute:
			
 
				+                        distance = (tokens_num_dict[after_entity.sentence_index] + after_entity.begin_index) - (
			
 
				+                                           tokens_num_dict[entity.sentence_index] + entity.end_index)
			
 
				+                        sentence_distance = after_entity.sentence_index - entity.sentence_index
			
 
				+                        if sentence_distance == 0:
			
 
				+                            if distance < 100:
			
 
				+                                value = (-1 / 2 * (distance ** 2)) / 10000
			
 
				+                                temp_match_list.append(Match(entity, after_entity, value))
			
 
				+                                match_nums += 1
			
 
				+                                if not tenderer_nums:
			
 
				+                                    byNotTenderer_match_nums += 1
			
 
				                                 else:
			
 
				-                                    packageName = packagePointer.entity_text
			
 
				-                                addMoneyByRoleid(PackDict, packageName, "0", entity_after.entity_text, entity_after.values[entity_after.label])
			
 
				-                            '''
			
 
				-                            if str(entity_after.label)=="1":
			
 
				-                                #print(entity_after.entity_text,entity.entity_text)
			
 
				-                                _list_entitys = [entity]+entity.linked_entitys
			
 
				-                                if len(PackageSet)>0:
			
 
				-                                    packagePointer,_ = getPackage(PackageList,entity_after.sentence_index,entity_after.begin_index,"money-"+str(entity_after.entity_text)+"-"+str(entity_after.label))
			
 
				-                                    if packagePointer is None:
			
 
				-                                        packageName_entity = "Project"
			
 
				-                                    else:
			
 
				-                                        packageName_entity = packagePointer.entity_text
			
 
				+                                    break
			
 
				+                        else:
			
 
				+                            if distance < 60:
			
 
				+                                value = (-1 / 2 * (distance ** 2)) / 10000
			
 
				+                                temp_match_list.append(Match(entity, after_entity, value))
			
 
				+                                match_nums += 1
			
 
				+                                if not tenderer_nums:
			
 
				+                                    byNotTenderer_match_nums += 1
			
 
				                                 else:
			
 
				-                                    packageName_entity = "Project"
			
 
				-                                if str(entity.label) in ["2","3","4"]:
			
 
				-                                    # addMoneyByEntity(PackDict, packageName_entity, entity.entity_text, entity_after.entity_text, entity_after.values[entity_after.label])
			
 
				-                                    if entity_after.notes == '单价' or float(entity_after.entity_text)<5000: #2021/12/17 调整小金额阈值，避免203608823.html 两次金额一次万元没提取到的情况
			
 
				-                                        addMoneyByEntity(PackDict, packageName_entity, entity.entity_text, entity_after,
			
 
				-                                                         0.5)
			
 
				-                                        entity.pointer_money = entity_after
			
 
				-                                        # print('role zhao money', entity.entity_text, '中标金额：', entity_after.entity_text)
			
 
				-                                    else:
			
 
				-                                        addMoneyByEntity(PackDict, packageName_entity, entity.entity_text, entity_after,
			
 
				-                                                         entity_after.values[entity_after.label])
			
 
				-                                        entity.pointer_money = entity_after
			
 
				-                                        # print('role zhao money', entity.entity_text, '中标金额：', entity_after.entity_text)
			
 
				-                                        if entity_after.values[entity_after.label]>0.6:
			
 
				-                                            break # 2021/7/16 新增，找到中标金额，非单价即停止，不再往后找金额
			
 
				-                                    #add pointer_money
			
 
				-                                    # entity.pointer_money = entity_after
			
 
				-                                    # print('role zhao money', entity.entity_text, '中标金额：', entity_after.entity_text)
			
 
				-                                    # if entity_after.notes!='单价':
			
 
				-                                    #     break  # 2021/7/16 新增，找到中标金额即停止，不再往后找金额
			
 
				-                        '''
			
 
				-                    if entity_after.entity_type=="person":
			
 
				-                        if entity_after.values[entity_after.label]>=on_value_person:
			
 
				-                            if str(entity_after.label)=="1":
			
 
				-                                for i in range(len(roleList)):
			
 
				-                                    if roleList[i].role_name=="tenderee":
			
 
				-                                        roleList[i].linklist.append((entity_after.entity_text,entity_after.person_phone))
			
 
				-                            elif str(entity_after.label)=="2":
			
 
				-                                for i in range(len(roleList)):
			
 
				-                                    if roleList[i].role_name=="agency":
			
 
				-                                        roleList[i].linklist.append((entity_after.entity_text,entity_after.person_phone))
			
 
				-                            elif str(entity_after.label)=="3":
			
 
				-                                _list_entitys = [entity]+entity.linked_entitys
			
 
				-                                for _entity in _list_entitys:
			
 
				-                                    for i in range(len(roleList)):
			
 
				-                                        if roleList[i].entity_text==_entity.entity_text:
			
 
				-                                            if entity_after.sentence_index-_entity.sentence_index>1 and len(roleList[i].linklist)>0:
			
 
				-                                                break
			
 
				-                                            roleList[i].linklist.append((entity_after.entity_text,entity_after.person_phone))
			
 
				-                    '''
			
 
				-                    
			
 
				-                p_entity += 1  
			
 
				-                
			
 
				-        p_entity += 1
			
 
				-    
			
 
				+                                    break
			
 
				+                    else:
			
 
				+                        tenderer_nums += 1
			
 
				+                #前向查找属性
			
 
				+                if not match_nums or not byNotTenderer_match_nums:
			
 
				+                    previous_entity = temp_entity_list[ent_idx - 1]
			
 
				+                    if previous_entity.entity_type == link_attribute:
			
 
				+                        if previous_entity.sentence_index == entity.sentence_index:
			
 
				+                            distance = (tokens_num_dict[entity.sentence_index] + entity.begin_index) - (
			
 
				+                                    tokens_num_dict[
			
 
				+                                        previous_entity.sentence_index] + previous_entity.end_index)
			
 
				+                            if distance < 20:
			
 
				+                                # 前向 没有 /10000
			
 
				+                                value = (-1 / 2 * (distance ** 2))
			
 
				+                                temp_match_list.append(Match(entity, previous_entity, value))
			
 
				+        # km算法分配求解
			
 
				+        dispatch_result = dispatch(temp_match_list)
			
 
				+        # print(dispatch_result)
			
 
				+        for match in dispatch_result:
			
 
				+            _entity = match[0]
			
 
				+            _attribute = match[1]
			
 
				+            if link_attribute=='money':
			
 
				+                _entity.pointer_money = _attribute
			
 
				+                packagePointer, _ = getPackage(PackageList, _attribute.sentence_index, _attribute.begin_index,
			
 
				+                                               "money-" + str(_attribute.entity_text) + "-" + str(_attribute.label))
			
 
				+                if packagePointer is None:
			
 
				+                    packageName_entity = "Project"
			
 
				+                else:
			
 
				+                    packageName_entity = packagePointer.entity_text
			
 
				+                if _attribute.notes == '单价' or float(_attribute.entity_text) < 5000:  # 2021/12/17 调整小金额阈值，避免203608823.html 两次金额一次万元没提取到的情况
			
 
				+                    addMoneyByEntity(PackDict, packageName_entity, _entity.entity_text, _attribute,0.5)
			
 
				+                else:
			
 
				+                    addMoneyByEntity(PackDict, packageName_entity, _entity.entity_text, _attribute,
			
 
				+                                     _attribute.values[_attribute.label])
			
 
				+            elif link_attribute=='serviceTime':
			
 
				+                _entity.pointer_serviceTime = _attribute
			
 
				+                packagePointer, _ = getPackage(PackageList, _attribute.sentence_index, _attribute.begin_index,
			
 
				+                                               "serviceTime-" + str(_attribute.entity_text) + "-" + str(_attribute.label))
			
 
				+                if packagePointer is None:
			
 
				+                    packageName_entity = "Project"
			
 
				+                else:
			
 
				+                    packageName_entity = packagePointer.entity_text
			
 
				+                addServiceTimeByEntity(PackDict, packageName_entity, _entity.entity_text, _attribute)
			
 
				+            elif link_attribute=='ratio':
			
 
				+                _entity.pointer_ratio = _attribute
			
 
				+                packagePointer, _ = getPackage(PackageList, _attribute.sentence_index, _attribute.begin_index,
			
 
				+                                               "ratio-" + str(_attribute.entity_text) + "-" + str(_attribute.label))
			
 
				+                if packagePointer is None:
			
 
				+                    packageName_entity = "Project"
			
 
				+                else:
			
 
				+                    packageName_entity = packagePointer.entity_text
			
 
				+                addRatioByEntity(PackDict, packageName_entity, _entity.entity_text, _attribute)
			
 
				+
			
 
				     ''''''
			
 
				     # 通过模型分类的招标/代理联系人
			
 
				     list_sentence = sorted(list_sentence, key=lambda x: x.sentence_index)
			
@@ -1073,24 +1219,6 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_entity
 
				                     for one_phone in _phone:
			
 
				                         PackDict["Project"]["roleList"][i].linklist.append(("", one_phone))
			
 
				                         agency_phone.add(one_phone)
			
 
				-    # km配对方法
			
 
				-    def dispatch(match_list):
			
 
				-        main_roles = list(set([match.main_role for match in match_list]))
			
 
				-        attributes = list(set([match.attribute for match in match_list]))
			
 
				-
			
 
				-        label = np.zeros(shape=(len(main_roles), len(attributes)))
			
 
				-        for match in match_list:
			
 
				-            main_role = match.main_role
			
 
				-            attribute = match.attribute
			
 
				-            value = match.value
			
 
				-            label[main_roles.index(main_role), attributes.index(attribute)] = value + 10000
			
 
				-        # print(label)
			
 
				-        gragh = -label
			
 
				-        # km算法
			
 
				-        row, col = linear_sum_assignment(gragh)
			
 
				-        max_dispatch = [(i, j) for i, j, value in zip(row, col, gragh[row, col]) if value]
			
 
				-        # return [Match(main_roles[row], attributes[col]) for row, col in max_dispatch]
			
 
				-        return [(main_roles[row], attributes[col]) for row, col in max_dispatch]
			
 
				 
			
 
				     # 正则提取电话号码实体
			
 
				     # key_word = re.compile('((?:电话|联系方式|联系人).{0,4}?)([0-1]\d{6,11})')
			
@@ -1193,15 +1321,15 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_entity
 
				         # 去重结果
			
 
				         relation_list = list(set(relation_list))
			
 
				     # print(relation_list)
			
 
				-    tokens_num_dict = dict()
			
 
				-    last_tokens_num = 0
			
 
				-    for sentence in list_sentence:
			
 
				-        _index = sentence.sentence_index
			
 
				-        if _index == 0:
			
 
				-            tokens_num_dict[_index] = 0
			
 
				-        else:
			
 
				-            tokens_num_dict[_index] = tokens_num_dict[_index - 1] + last_tokens_num
			
 
				-        last_tokens_num = len(sentence.tokens)
			
 
				+    # tokens_num_dict = dict()
			
 
				+    # last_tokens_num = 0
			
 
				+    # for sentence in list_sentence:
			
 
				+    #     _index = sentence.sentence_index
			
 
				+    #     if _index == 0:
			
 
				+    #         tokens_num_dict[_index] = 0
			
 
				+    #     else:
			
 
				+    #         tokens_num_dict[_index] = tokens_num_dict[_index - 1] + last_tokens_num
			
 
				+    #     last_tokens_num = len(sentence.tokens)
			
 
				     right_combination = [('org','person'),('company','person'),('company','location'),('org','location'),('person','phone')]
			
 
				     linked_company = set()
			
 
				     linked_person = set()
			
@@ -1570,7 +1698,7 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_entity
 
				                                                 match_nums += 1
			
 
				                         # 实体无匹配时，尝试前向查找匹配
			
 
				                         if not match_nums:
			
 
				-                            if entity.label != 5 and entity.values[entity.label] > 0.5 and index != 0:
			
 
				+                            if (entity.label != 5 or entity.entity_text in roleSet) and entity.values[entity.label] >= 0.5 and index != 0:
			
 
				                                 previous_entity = split_entitys[index - 1]
			
 
				                                 if previous_entity.entity_type == 'person' and previous_entity.label in [1, 2, 3]:
			
 
				                                     if entity.label in [2, 3, 4] and previous_entity.label in [1, 2]:
			
@@ -2127,14 +2255,27 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_entity
 
				                     PackDict[pack]["roleList"][i].money = float(PackDict[pack]["roleList"][i].money) / 10000
			
 
				                     # print('通过其他中标人投标金额校正中标金额')
			
 
				 
			
 
				+    for item in list_pop:
			
 
				+        PackDict.pop(item)
			
 
				+
			
 
				+    # 公告中只有"招标人"且无"联系人"链接时，直接取文中倒数第一个联系人
			
 
				+    if len(PackDict)==1:
			
 
				+        k = list(PackDict.keys())[0]
			
 
				+        if len(PackDict[k]["roleList"])==1:
			
 
				+            if PackDict[k]["roleList"][0].role_name == "tenderee":
			
 
				+                if not PackDict[k]["roleList"][0].linklist:
			
 
				+                    for _entity in temporary_list2[::-1]:
			
 
				+                        if _entity.entity_type=='person' and _entity.label==3:
			
 
				+                            if _entity.person_phone:
			
 
				+                                _phone = [p.entity_text for p in _entity.person_phone]
			
 
				+                                for _p in _phone:
			
 
				+                                    PackDict[k]["roleList"][0].linklist.append((_entity.entity_text, _p))
			
 
				+                                break
			
 
				 
			
 
				     for pack in PackDict.keys():
			
 
				         for i in range(len(PackDict[pack]["roleList"])):
			
 
				             PackDict[pack]["roleList"][i] = PackDict[pack]["roleList"][i].getString()
			
 
				 
			
 
				-    for item in list_pop:
			
 
				-        PackDict.pop(item)
			
 
				-        
			
 
				     return PackDict 
			
 
				 
			
 
				 def initPackageAttr(RoleList,PackageSet):
			
@@ -2241,9 +2382,9 @@ def getTimeAttributes(list_entity,list_sentence):
 
				     time_entitys = sorted(time_entitys,key=lambda x:(x.sentence_index, x.begin_index))
			
 
				     list_sentence = sorted(list_sentence,key=lambda x:x.sentence_index)
			
 
				     dict_time = {
			
 
				-        "time_release": [],
			
 
				-        "time_bidopen": [],
			
 
				-        "time_bidclose": [],
			
 
				+        "time_release": [], # 1 发布时间
			
 
				+        "time_bidopen": [], # 2 开标时间
			
 
				+        "time_bidclose": [], # 3 截标时间
			
 
				         'time_bidstart': [],  # 12 投标（开始）时间、响应文件接收（开始）时间
			
 
				 
			
 
				         'time_publicityStart': [],  # 4 公示开始时间（公示时间、公示期）
			
@@ -2424,18 +2565,11 @@ def getTimeAttributes(list_entity,list_sentence):
 
				 def getOtherAttributes(list_entity):
			
 
				     dict_other = {"moneysource":"",
			
 
				                   "person_review":[],
			
 
				-                  # "time_release":"",
			
 
				-                  # "time_bidopen":"",
			
 
				-                  # "time_bidclose":"",
			
 
				                   "serviceTime":"",
			
 
				                   "product":[],
			
 
				                   "total_tendereeMoney":0,
			
 
				                   "total_tendereeMoneyUnit":''}
			
 
				-    # dict_time = {
			
 
				-    #     "time_release": [],
			
 
				-    #     "time_bidopen": [],
			
 
				-    #     "time_bidclose": []
			
 
				-    # }
			
 
				+
			
 
				     for entity in list_entity:
			
 
				         if entity.entity_type == 'bidway':
			
 
				             dict_other["bidway"] = turnBidWay(entity.entity_text)
			
@@ -2443,18 +2577,6 @@ def getOtherAttributes(list_entity):
 
				             dict_other["moneysource"] = entity.entity_text
			
 
				         elif entity.entity_type=='serviceTime':
			
 
				             dict_other["serviceTime"] = entity.entity_text
			
 
				-        # elif entity.entity_type == 'time' and entity.label==1:
			
 
				-        #     if entity.values[entity.label]>0.6:
			
 
				-        #         dict_time['time_release'].append((timeFormat(entity.entity_text),entity.values[entity.label]))
			
 
				-        #     # dict_other["time_release"] = timeFormat(entity.entity_text)
			
 
				-        # elif entity.entity_type == 'time' and entity.label==2:
			
 
				-        #     if entity.values[entity.label]>0.6:
			
 
				-        #         dict_time['time_bidopen'].append((timeFormat(entity.entity_text),entity.values[entity.label]))
			
 
				-        #     # dict_other["time_bidopen"] = timeFormat(entity.entity_text)
			
 
				-        # elif entity.entity_type == 'time' and entity.label == 3:
			
 
				-        #     if entity.values[entity.label]>0.6:
			
 
				-        #         dict_time['time_bidclose'].append((timeFormat(entity.entity_text),entity.values[entity.label]))
			
 
				-        #     # dict_other["time_bidclose"] = timeFormat(entity.entity_text)
			
 
				         elif entity.entity_type=="person" and entity.label ==4:
			
 
				             dict_other["person_review"].append(entity.entity_text)
			
 
				         elif entity.entity_type=='product':
			
@@ -2462,12 +2584,7 @@ def getOtherAttributes(list_entity):
 
				         elif entity.entity_type=='money' and entity.notes=='总投资' and dict_other["total_tendereeMoney"]<float(entity.entity_text):
			
 
				                 dict_other["total_tendereeMoney"] = float(entity.entity_text)
			
 
				                 dict_other["total_tendereeMoneyUnit"] = entity.money_unit
			
 
				-    # 时间类别
			
 
				-    # for time_type,value in dict_time.items():
			
 
				-    #     list_time = dict_time[time_type]
			
 
				-    #     if list_time:
			
 
				-    #         list_time.sort(key=lambda x:x[1],reverse=True)
			
 
				-    #         dict_other[time_type] = list_time[0][0]
			
 
				+
			
 
				     dict_other["product"] = list(set(dict_other["product"]))
			
 
				     return dict_other
			
 
				 
			
--- a/BiddingKG/dl/interface/predictor.py
+++ b/BiddingKG/dl/interface/predictor.py
@@ -794,7 +794,7 @@ class EPCPredict():
 
				                 values.append(item)
			
 
				             # phone_number = phone[i]
			
 
				             # entity.set_Person(label,values,phone_number)
			
 
				-            entity.set_Person(label,values,None)
			
 
				+            entity.set_Person(label,values,[])
			
 
				         # 为联系人匹配电话
			
 
				         # self.person_search_phone(list_sentences, list_entitys)
			
 
				 
			
--- a/BiddingKG/dl/metrics/extractMetric.py
+++ b/BiddingKG/dl/metrics/extractMetric.py
@@ -259,7 +259,6 @@ class ExtractMetric():
 
				     def extractFromInterface(self,content):
			
 
				         return json.loads(test("",content))
			
 
				 
			
 
				-
			
 
				     def getDiff(self,_inter,_inter2):
			
 
				         _dict = {}
			
 
				         for k in ["code","product","person_review"]:
			
@@ -367,7 +366,7 @@ class ExtractMetric():
 
				 
			
 
				     def getMetrics(self,list_diff):
			
 
				         dict_key_count = {}
			
 
				-        print("all_count:",list_diff)
			
 
				+        # print("all_count:",list_diff)
			
 
				         for _diff in list_diff:
			
 
				             for k,v in _diff.items():
			
 
				                 if k not in dict_key_count:
			
--- a/BiddingKG/dl/test/test4.py
+++ b/BiddingKG/dl/test/test4.py
@@ -46,7 +46,7 @@ def test(name,content):
 
				 if __name__=="__main__":
			
 
				     # filename = "比地_52_79929693.html"
			
 
				     #text = codecs.open("C:\\Users\\User\\Desktop\\数据20191014\\"+filename,"r",encoding="utf8").read()
			
 
				-    text = codecs.open("C:\\Users\\\Administrator\\Desktop\\2.html","r",encoding="utf8").read()
			
 
				+    text = codecs.open("C:\\Users\\\Administrator\\Desktop\\test12354.txt","r",encoding="utf8").read()
			
 
				     content = str(BeautifulSoup(text).find("div",id="pcontent"))
			
 
				     # df_a = {"html":[]}
			
 
				     # df_a["html"].append(re.sub('\r|\n|\r\n',"",content))
			
@@ -74,10 +74,9 @@ if __name__=="__main__":
 
				     # 广州比地数据科技有限公司翻译服务工程招标
			
 
				     # '''
			
 
				     # print(predict("12",content,title="关于人防工程技术咨询服务项目【重新招标】单一来源谈判的通知"))
			
 
				-
			
 
				     # print(predict("12", content,"打印机"))
			
 
				-    a = time.time()
			
 
				-    print(predict("12", content,"打印机"))
			
 
				+    # print(predict("12", text,"打印机"))
			
 
				     # test(12,content)
			
 
				-    print("takes",time.time()-a)
			
 
				+    test(12,text)
			
 
				+    print("takes",time.time()-_time1)
			
 
				     pass
			
--- a/BiddingKG/dl/test/测试整个要素提取流程.py
+++ b/BiddingKG/dl/test/测试整个要素提取流程.py
@@ -136,21 +136,34 @@ def predict(doc_id,text):
 
				                 print(entity.entity_text, entity.begin_index, entity.end_index)
			
 
				             elif entity.entity_type in ['org','company']:
			
 
				                 _sentence = list_sentences[0][entity.sentence_index]
			
 
				+                print(entity.entity_type)
			
 
				                 if entity.pointer_person:
			
 
				                     print("公司->联系人1：",end=' ')
			
 
				                     print(entity.entity_text,[i.entity_text for i in entity.pointer_person],entity.label,entity.values)
			
 
				                     # print(entity.entity_text,entity.label,entity.values)
			
 
				-                    # print(_sentence.sentence_text,_sentence.tokens[entity.begin_index:entity.end_index+1])
			
 
				+                    print(_sentence.sentence_text,_sentence.tokens[entity.begin_index:entity.end_index+1])
			
 
				                 else:
			
 
				                     print("公司->联系人2：", end=' ')
			
 
				                     print(entity.entity_text, entity.pointer_person,entity.label,entity.values)
			
 
				-                    # print(_sentence.sentence_text,_sentence.tokens[entity.begin_index:entity.end_index+1])
			
 
				+                    print(_sentence.sentence_text,_sentence.tokens[entity.begin_index:entity.end_index+1])
			
 
				                     pass
			
 
				+                if entity.label in [2,3,4]:
			
 
				+                    if entity.pointer_money:
			
 
				+                        print("公司->中投标金额：", end=' ')
			
 
				+                        print(entity.entity_text, entity.pointer_money.entity_text)
			
 
				+                    if entity.pointer_serviceTime:
			
 
				+                        print("公司->工期：", end=' ')
			
 
				+                        print(entity.entity_text, entity.pointer_serviceTime.entity_text)
			
 
				+                    if entity.pointer_ratio:
			
 
				+                        print("公司->费率：", end=' ')
			
 
				+                        print(entity.entity_text, entity.pointer_ratio.entity_text)
			
 
				                 # print(entity.pointer_pack)
			
 
				             # elif entity.entity_type =='serviceTime':
			
 
				             #     print(entity.entity_text)
			
 
				             #     if entity.pointer_pack:
			
 
				             #         print('pointer_pack_name:',entity.pointer_pack.entity_text)
			
 
				+            # elif entity.entity_type =='money':
			
 
				+            #     print('money',entity.entity_text,entity.label)
			
 
				             # elif entity.entity_type in ['package']:
			
 
				             #     print('pack_entity:',entity.entity_text)
			
 
				             # print(entity.entity_text,entity.entity_type,entity.label,entity.values,entity.sentence_index,entity.wordOffset_begin,entity.wordOffset_end)