Jelajahi Sumber

公司-中投标金额链接方法

znj 3 tahun lalu
induk
melakukan
4b6dacbed4

+ 7 - 1
BiddingKG/dl/interface/Entitys.py

@@ -172,6 +172,8 @@ class Entity():
         self.if_dict_match = 0  # 2021/12/21 新增,判断公司实体是否由字典识别得到
         self.is_total_money = 0  # 2021/12/29 新增,判断金额是否总价
         self.is_unit_money = 0  # 2021/12/29 新增,判断金额是否单价
+        self.pointer_serviceTime = None  # 2022/01/05 新增,中标人对应链接"服务期限(工期)"
+        self.pointer_ratio = None  # 2022/01/05 新增,中标人对应链接"中投标金额->费率、下浮率"
 
     def set_Role(self,role_label,role_values):
         self.label = int(role_label)
@@ -261,11 +263,15 @@ class Role():
         self.money_prob = money_prob
         self.linklist = linklist
         self.money_unit = '' # 2021/8/17 新增 保存金额单位
+        # 中投标人属性
+        self.ratio = "" #2022/01/06 新增 保存中投标金额相关费率
+        self.serviceTime = "" #2021/01/06 新增 保存服务期限(工期)
         
     def getString(self):
         self.linklist = [item for item in set(self.linklist)]
         # result = [self.role_name,fitDataByRule(self.entity_text),self.money,self.linklist]
-        result = [self.role_name,fitDataByRule(self.entity_text),self.money,self.linklist, self.money_unit]
+        # result = [self.role_name,fitDataByRule(self.entity_text),self.money,self.linklist, self.money_unit]
+        result = [self.role_name,fitDataByRule(self.entity_text),self.money,self.linklist, self.money_unit,self.ratio,self.serviceTime]
         return result
 
 # 用于KM算法的组合配对

+ 3 - 2
BiddingKG/dl/interface/Preprocessing.py

@@ -1686,7 +1686,7 @@ def get_preprocessed_entitys(list_sentences,useselffool=True,cost_time=dict()):
                     if k == 'text':
                         entity = v
                 b = it.start() + len(keyword)
-                e = it.end() - 1
+                e = it.end()-1
                 if (b, e, 'org', entity) not in ner_entitys and (b, e, 'company', entity) not in ner_entitys:
                     ner_entitys.append((b, e, 'company', entity))
 
@@ -1951,7 +1951,8 @@ def get_preprocessed_entitys(list_sentences,useselffool=True,cost_time=dict()):
                         else:
                             entity_text = str(getUnifyMoney(entity_text))
 
-                    if float(entity_text)<100 or float(entity_text)>100000000000:
+                    # if float(entity_text)<100 or float(entity_text)>100000000000:
+                    if float(entity_text)<50 or float(entity_text)>100000000000:
                         # print('过滤掉金额:float(entity_text)<100 or float(entity_text)>100000000000', entity_text, unit)
                         continue
 

+ 243 - 139
BiddingKG/dl/interface/getAttributes.py

@@ -768,6 +768,26 @@ def getPackagesFromArticle(list_sentence,list_entity):
                 PackageList.append(copy_pack)
     return PackageList,PackageSet,dict_packageCode
 
+# km配对方法
+def dispatch(match_list):
+    main_roles = list(set([match.main_role for match in match_list]))
+    attributes = list(set([match.attribute for match in match_list]))
+
+    label = np.zeros(shape=(len(main_roles), len(attributes)))
+    for match in match_list:
+        main_role = match.main_role
+        attribute = match.attribute
+        value = match.value
+        label[main_roles.index(main_role), attributes.index(attribute)] = value + 10000
+    # print(label)
+    gragh = -label
+    # km算法
+    row, col = linear_sum_assignment(gragh)
+    max_dispatch = [(i, j) for i, j, value in zip(row, col, gragh[row, col]) if value]
+    # return [Match(main_roles[row], attributes[col]) for row, col in max_dispatch]
+    return [(main_roles[row], attributes[col]) for row, col in max_dispatch]
+
+from BiddingKG.dl.common.Utils import getUnifyMoney
 from BiddingKG.dl.interface.modelFactory import Model_relation_extraction
 relationExtraction_model = Model_relation_extraction()
 def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_entity,list_sentence,on_value = 0.5,on_value_person=0.5,sentence_len=4):
@@ -812,7 +832,15 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_entity
                     packDict[packageName]["roleList"][i].money_unit = money.money_unit
                 # print('链接中的金额:{0}, 单位:{1}'.format(money.entity_text, money.money_unit))
         return packDict
-    
+    def addRatioByEntity(packDict,packageName,entity,ratio):
+        for i in range(len(packDict[packageName]["roleList"])):
+            if packDict[packageName]["roleList"][i].entity_text==entity:
+                packDict[packageName]["roleList"][i].ratio = ratio.entity_text
+    def addServiceTimeByEntity(packDict,packageName,entity,serviceTime):
+        for i in range(len(packDict[packageName]["roleList"])):
+            if packDict[packageName]["roleList"][i].entity_text==entity:
+                packDict[packageName]["roleList"][i].serviceTime = serviceTime.entity_text
+
     #根据实体名称得到角色
     def getRoleWithText(packDict,entity_text):
         for pack in packDict.keys():
@@ -838,8 +866,8 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_entity
                 # print('连接前修改大于50亿金额:前面是后面的一万倍则把前面金额/10000')
     
     #遍历所有实体
-    while(p_entity<len(list_entity)):
-        entity = list_entity[p_entity]
+    # while(p_entity<len(list_entity)):
+    #     entity = list_entity[p_entity]
         '''
         #招标金额从后往前找
         if entity.entity_type=="money":
@@ -902,88 +930,206 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_entity
 
 
         #如果实体属于角色集合,则往后找属性
-        if doesEntityOrLinkedEntity_inRoleSet(entity, roleSet):
-            
-            p_entity += 1
-            #循环查找符合的属性
-            while(p_entity<len(list_entity)):
-                
-                entity_after = list_entity[p_entity]
-                if entity_after.sentence_index-entity.sentence_index>=sentence_len:
-                    p_entity -= 1
-                    break
-                #若是遇到公司实体,则跳出循环
-                if entity_after.entity_type in ['org','company']:
-                    p_entity -= 1
-                    break
-                if entity_after.values is not None:
-                    if entity_after.entity_type=="money":
-                        if entity_after.values[entity_after.label]>=on_value:
-                            '''
-                            #招标金额从后往前找
-                            if str(entity_after.label)=="0":
-                                packagePointer,_ = getPackage(PackageList,entity.sentence_index,entity.begin_index,"money-"+str(entity.label))
-                                if packagePointer is None:
-                                    packageName = "Project"
+        # if doesEntityOrLinkedEntity_inRoleSet(entity, roleSet):
+        #
+        #     p_entity += 1
+        #     #循环查找符合的属性
+        #     while(p_entity<len(list_entity)):
+        #
+        #         entity_after = list_entity[p_entity]
+        #         if entity_after.sentence_index-entity.sentence_index>=sentence_len:
+        #             p_entity -= 1
+        #             break
+        #         #若是遇到公司实体,则跳出循环
+        #         if entity_after.entity_type in ['org','company']:
+        #             p_entity -= 1
+        #             break
+        #         if entity_after.values is not None:
+        #             if entity_after.entity_type=="money":
+        #                 if entity_after.values[entity_after.label]>=on_value:
+        #                     '''
+        #                     #招标金额从后往前找
+        #                     if str(entity_after.label)=="0":
+        #                         packagePointer,_ = getPackage(PackageList,entity.sentence_index,entity.begin_index,"money-"+str(entity.label))
+        #                         if packagePointer is None:
+        #                             packageName = "Project"
+        #                         else:
+        #                             packageName = packagePointer.entity_text
+        #                         addMoneyByRoleid(PackDict, packageName, "0", entity_after.entity_text, entity_after.values[entity_after.label])
+        #                     '''
+        #                     if str(entity_after.label)=="1":
+        #                         #print(entity_after.entity_text,entity.entity_text)
+        #                         _list_entitys = [entity]+entity.linked_entitys
+        #                         if len(PackageSet)>0:
+        #                             packagePointer,_ = getPackage(PackageList,entity_after.sentence_index,entity_after.begin_index,"money-"+str(entity_after.entity_text)+"-"+str(entity_after.label))
+        #                             if packagePointer is None:
+        #                                 packageName_entity = "Project"
+        #                             else:
+        #                                 packageName_entity = packagePointer.entity_text
+        #                         else:
+        #                             packageName_entity = "Project"
+        #                         if str(entity.label) in ["2","3","4"]:
+        #                             # addMoneyByEntity(PackDict, packageName_entity, entity.entity_text, entity_after.entity_text, entity_after.values[entity_after.label])
+        #                             if entity_after.notes == '单价' or float(entity_after.entity_text)<5000: #2021/12/17 调整小金额阈值,避免203608823.html 两次金额一次万元没提取到的情况
+        #                                 addMoneyByEntity(PackDict, packageName_entity, entity.entity_text, entity_after,
+        #                                                  0.5)
+        #                                 entity.pointer_money = entity_after
+        #                                 # print('role zhao money', entity.entity_text, '中标金额:', entity_after.entity_text)
+        #                             else:
+        #                                 addMoneyByEntity(PackDict, packageName_entity, entity.entity_text, entity_after,
+        #                                                  entity_after.values[entity_after.label])
+        #                                 entity.pointer_money = entity_after
+        #                                 # print('role zhao money', entity.entity_text, '中标金额:', entity_after.entity_text)
+        #                                 if entity_after.values[entity_after.label]>0.6:
+        #                                     break # 2021/7/16 新增,找到中标金额,非单价即停止,不再往后找金额
+        #                             #add pointer_money
+        #                             # entity.pointer_money = entity_after
+        #                             # print('role zhao money', entity.entity_text, '中标金额:', entity_after.entity_text)
+        #                             # if entity_after.notes!='单价':
+        #                             #     break  # 2021/7/16 新增,找到中标金额即停止,不再往后找金额
+        #                 '''
+        #             if entity_after.entity_type=="person":
+        #                 if entity_after.values[entity_after.label]>=on_value_person:
+        #                     if str(entity_after.label)=="1":
+        #                         for i in range(len(roleList)):
+        #                             if roleList[i].role_name=="tenderee":
+        #                                 roleList[i].linklist.append((entity_after.entity_text,entity_after.person_phone))
+        #                     elif str(entity_after.label)=="2":
+        #                         for i in range(len(roleList)):
+        #                             if roleList[i].role_name=="agency":
+        #                                 roleList[i].linklist.append((entity_after.entity_text,entity_after.person_phone))
+        #                     elif str(entity_after.label)=="3":
+        #                         _list_entitys = [entity]+entity.linked_entitys
+        #                         for _entity in _list_entitys:
+        #                             for i in range(len(roleList)):
+        #                                 if roleList[i].entity_text==_entity.entity_text:
+        #                                     if entity_after.sentence_index-_entity.sentence_index>1 and len(roleList[i].linklist)>0:
+        #                                         break
+        #                                     roleList[i].linklist.append((entity_after.entity_text,entity_after.person_phone))
+        #             '''
+        #
+        #         p_entity += 1
+        #
+        # p_entity += 1
+    # 记录每句的分词数量
+    tokens_num_dict = dict()
+    last_tokens_num = 0
+    for sentence in list_sentence:
+        _index = sentence.sentence_index
+        if _index == 0:
+            tokens_num_dict[_index] = 0
+        else:
+            tokens_num_dict[_index] = tokens_num_dict[_index - 1] + last_tokens_num
+        last_tokens_num = len(sentence.tokens)
+    attribute_type = ['money','serviceTime','ratio']# 'money'仅指“中投标金额”
+    for link_attribute in attribute_type:
+        temp_entity_list = []
+        if link_attribute=="money":
+            temp_entity_list = [ent for ent in list_entity if (ent.entity_type in ['org','company'] and ent.label in [2,3,4]) or
+                                (ent.entity_type=='money' and ent.label==1)]
+            # 删除重复的‘中投标金额’,一般为大小写两种样式
+            drop_tendererMoney = []
+            for ent_idx in range(len(temp_entity_list)-1):
+                entity = temp_entity_list[ent_idx]
+                if entity.entity_type=='money':
+                    next_entity = temp_entity_list[ent_idx+1]
+                    if next_entity.entity_type=='money':
+                        if getUnifyMoney(entity.entity_text)==getUnifyMoney(next_entity.entity_text):
+                            if (tokens_num_dict[next_entity.sentence_index] + next_entity.begin_index) - (
+                                               tokens_num_dict[entity.sentence_index] + entity.end_index) < 10:
+                                drop_tendererMoney.append(next_entity)
+            for _drop in drop_tendererMoney:
+                temp_entity_list.remove(_drop)
+        elif link_attribute=="serviceTime":
+            temp_entity_list = [ent for ent in list_entity if (ent.entity_type in ['org','company'] and ent.label in [2,3,4]) or
+                                ent.entity_type=='serviceTime']
+        elif link_attribute=="ratio":
+            temp_entity_list = [ent for ent in list_entity if (ent.entity_type in ['org','company'] and ent.label in [2,3,4]) or
+                                ent.entity_type=='ratio']
+        temp_entity_list = sorted(temp_entity_list,key=lambda x: (x.sentence_index, x.begin_index))
+        temp_match_list = []
+        for ent_idx in range(len(temp_entity_list)):
+            entity = temp_entity_list[ent_idx]
+            if entity.entity_type in ['org','company']:
+                match_nums = 0
+                tenderer_nums = 0 #经过其他中投标人的数量
+                byNotTenderer_match_nums = 0 #跟在中投标人后面的属性
+                for after_index in range(ent_idx + 1, min(len(temp_entity_list), ent_idx + 4)):
+                    after_entity = temp_entity_list[after_index]
+                    if after_entity.entity_type == link_attribute:
+                        distance = (tokens_num_dict[after_entity.sentence_index] + after_entity.begin_index) - (
+                                           tokens_num_dict[entity.sentence_index] + entity.end_index)
+                        sentence_distance = after_entity.sentence_index - entity.sentence_index
+                        if sentence_distance == 0:
+                            if distance < 100:
+                                value = (-1 / 2 * (distance ** 2)) / 10000
+                                temp_match_list.append(Match(entity, after_entity, value))
+                                match_nums += 1
+                                if not tenderer_nums:
+                                    byNotTenderer_match_nums += 1
                                 else:
-                                    packageName = packagePointer.entity_text
-                                addMoneyByRoleid(PackDict, packageName, "0", entity_after.entity_text, entity_after.values[entity_after.label])
-                            '''
-                            if str(entity_after.label)=="1":
-                                #print(entity_after.entity_text,entity.entity_text)
-                                _list_entitys = [entity]+entity.linked_entitys
-                                if len(PackageSet)>0:
-                                    packagePointer,_ = getPackage(PackageList,entity_after.sentence_index,entity_after.begin_index,"money-"+str(entity_after.entity_text)+"-"+str(entity_after.label))
-                                    if packagePointer is None:
-                                        packageName_entity = "Project"
-                                    else:
-                                        packageName_entity = packagePointer.entity_text
+                                    break
+                        else:
+                            if distance < 60:
+                                value = (-1 / 2 * (distance ** 2)) / 10000
+                                temp_match_list.append(Match(entity, after_entity, value))
+                                match_nums += 1
+                                if not tenderer_nums:
+                                    byNotTenderer_match_nums += 1
                                 else:
-                                    packageName_entity = "Project"
-                                if str(entity.label) in ["2","3","4"]:
-                                    # addMoneyByEntity(PackDict, packageName_entity, entity.entity_text, entity_after.entity_text, entity_after.values[entity_after.label])
-                                    if entity_after.notes == '单价' or float(entity_after.entity_text)<5000: #2021/12/17 调整小金额阈值,避免203608823.html 两次金额一次万元没提取到的情况
-                                        addMoneyByEntity(PackDict, packageName_entity, entity.entity_text, entity_after,
-                                                         0.5)
-                                        entity.pointer_money = entity_after
-                                        # print('role zhao money', entity.entity_text, '中标金额:', entity_after.entity_text)
-                                    else:
-                                        addMoneyByEntity(PackDict, packageName_entity, entity.entity_text, entity_after,
-                                                         entity_after.values[entity_after.label])
-                                        entity.pointer_money = entity_after
-                                        # print('role zhao money', entity.entity_text, '中标金额:', entity_after.entity_text)
-                                        if entity_after.values[entity_after.label]>0.6:
-                                            break # 2021/7/16 新增,找到中标金额,非单价即停止,不再往后找金额
-                                    #add pointer_money
-                                    # entity.pointer_money = entity_after
-                                    # print('role zhao money', entity.entity_text, '中标金额:', entity_after.entity_text)
-                                    # if entity_after.notes!='单价':
-                                    #     break  # 2021/7/16 新增,找到中标金额即停止,不再往后找金额
-                        '''
-                    if entity_after.entity_type=="person":
-                        if entity_after.values[entity_after.label]>=on_value_person:
-                            if str(entity_after.label)=="1":
-                                for i in range(len(roleList)):
-                                    if roleList[i].role_name=="tenderee":
-                                        roleList[i].linklist.append((entity_after.entity_text,entity_after.person_phone))
-                            elif str(entity_after.label)=="2":
-                                for i in range(len(roleList)):
-                                    if roleList[i].role_name=="agency":
-                                        roleList[i].linklist.append((entity_after.entity_text,entity_after.person_phone))
-                            elif str(entity_after.label)=="3":
-                                _list_entitys = [entity]+entity.linked_entitys
-                                for _entity in _list_entitys:
-                                    for i in range(len(roleList)):
-                                        if roleList[i].entity_text==_entity.entity_text:
-                                            if entity_after.sentence_index-_entity.sentence_index>1 and len(roleList[i].linklist)>0:
-                                                break
-                                            roleList[i].linklist.append((entity_after.entity_text,entity_after.person_phone))
-                    '''
-                    
-                p_entity += 1  
-                
-        p_entity += 1
-    
+                                    break
+                    else:
+                        tenderer_nums += 1
+                #前向查找属性
+                if not match_nums or not byNotTenderer_match_nums:
+                    previous_entity = temp_entity_list[ent_idx - 1]
+                    if previous_entity.entity_type == link_attribute:
+                        if previous_entity.sentence_index == entity.sentence_index:
+                            distance = (tokens_num_dict[entity.sentence_index] + entity.begin_index) - (
+                                    tokens_num_dict[
+                                        previous_entity.sentence_index] + previous_entity.end_index)
+                            if distance < 20:
+                                # 前向 没有 /10000
+                                value = (-1 / 2 * (distance ** 2))
+                                temp_match_list.append(Match(entity, previous_entity, value))
+        # km算法分配求解
+        dispatch_result = dispatch(temp_match_list)
+        # print(dispatch_result)
+        for match in dispatch_result:
+            _entity = match[0]
+            _attribute = match[1]
+            if link_attribute=='money':
+                _entity.pointer_money = _attribute
+                packagePointer, _ = getPackage(PackageList, _attribute.sentence_index, _attribute.begin_index,
+                                               "money-" + str(_attribute.entity_text) + "-" + str(_attribute.label))
+                if packagePointer is None:
+                    packageName_entity = "Project"
+                else:
+                    packageName_entity = packagePointer.entity_text
+                if _attribute.notes == '单价' or float(_attribute.entity_text) < 5000:  # 2021/12/17 调整小金额阈值,避免203608823.html 两次金额一次万元没提取到的情况
+                    addMoneyByEntity(PackDict, packageName_entity, _entity.entity_text, _attribute,0.5)
+                else:
+                    addMoneyByEntity(PackDict, packageName_entity, _entity.entity_text, _attribute,
+                                     _attribute.values[_attribute.label])
+            elif link_attribute=='serviceTime':
+                _entity.pointer_serviceTime = _attribute
+                packagePointer, _ = getPackage(PackageList, _attribute.sentence_index, _attribute.begin_index,
+                                               "serviceTime-" + str(_attribute.entity_text) + "-" + str(_attribute.label))
+                if packagePointer is None:
+                    packageName_entity = "Project"
+                else:
+                    packageName_entity = packagePointer.entity_text
+                addServiceTimeByEntity(PackDict, packageName_entity, _entity.entity_text, _attribute)
+            elif link_attribute=='ratio':
+                _entity.pointer_ratio = _attribute
+                packagePointer, _ = getPackage(PackageList, _attribute.sentence_index, _attribute.begin_index,
+                                               "ratio-" + str(_attribute.entity_text) + "-" + str(_attribute.label))
+                if packagePointer is None:
+                    packageName_entity = "Project"
+                else:
+                    packageName_entity = packagePointer.entity_text
+                addRatioByEntity(PackDict, packageName_entity, _entity.entity_text, _attribute)
+
     ''''''
     # 通过模型分类的招标/代理联系人
     list_sentence = sorted(list_sentence, key=lambda x: x.sentence_index)
@@ -1073,24 +1219,6 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_entity
                     for one_phone in _phone:
                         PackDict["Project"]["roleList"][i].linklist.append(("", one_phone))
                         agency_phone.add(one_phone)
-    # km配对方法
-    def dispatch(match_list):
-        main_roles = list(set([match.main_role for match in match_list]))
-        attributes = list(set([match.attribute for match in match_list]))
-
-        label = np.zeros(shape=(len(main_roles), len(attributes)))
-        for match in match_list:
-            main_role = match.main_role
-            attribute = match.attribute
-            value = match.value
-            label[main_roles.index(main_role), attributes.index(attribute)] = value + 10000
-        # print(label)
-        gragh = -label
-        # km算法
-        row, col = linear_sum_assignment(gragh)
-        max_dispatch = [(i, j) for i, j, value in zip(row, col, gragh[row, col]) if value]
-        # return [Match(main_roles[row], attributes[col]) for row, col in max_dispatch]
-        return [(main_roles[row], attributes[col]) for row, col in max_dispatch]
 
     # 正则提取电话号码实体
     # key_word = re.compile('((?:电话|联系方式|联系人).{0,4}?)([0-1]\d{6,11})')
@@ -1193,15 +1321,15 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_entity
         # 去重结果
         relation_list = list(set(relation_list))
     # print(relation_list)
-    tokens_num_dict = dict()
-    last_tokens_num = 0
-    for sentence in list_sentence:
-        _index = sentence.sentence_index
-        if _index == 0:
-            tokens_num_dict[_index] = 0
-        else:
-            tokens_num_dict[_index] = tokens_num_dict[_index - 1] + last_tokens_num
-        last_tokens_num = len(sentence.tokens)
+    # tokens_num_dict = dict()
+    # last_tokens_num = 0
+    # for sentence in list_sentence:
+    #     _index = sentence.sentence_index
+    #     if _index == 0:
+    #         tokens_num_dict[_index] = 0
+    #     else:
+    #         tokens_num_dict[_index] = tokens_num_dict[_index - 1] + last_tokens_num
+    #     last_tokens_num = len(sentence.tokens)
     right_combination = [('org','person'),('company','person'),('company','location'),('org','location'),('person','phone')]
     linked_company = set()
     linked_person = set()
@@ -2241,9 +2369,9 @@ def getTimeAttributes(list_entity,list_sentence):
     time_entitys = sorted(time_entitys,key=lambda x:(x.sentence_index, x.begin_index))
     list_sentence = sorted(list_sentence,key=lambda x:x.sentence_index)
     dict_time = {
-        "time_release": [],
-        "time_bidopen": [],
-        "time_bidclose": [],
+        "time_release": [], # 1 发布时间
+        "time_bidopen": [], # 2 开标时间
+        "time_bidclose": [], # 3 截标时间
         'time_bidstart': [],  # 12 投标(开始)时间、响应文件接收(开始)时间
 
         'time_publicityStart': [],  # 4 公示开始时间(公示时间、公示期)
@@ -2424,18 +2552,11 @@ def getTimeAttributes(list_entity,list_sentence):
 def getOtherAttributes(list_entity):
     dict_other = {"moneysource":"",
                   "person_review":[],
-                  # "time_release":"",
-                  # "time_bidopen":"",
-                  # "time_bidclose":"",
                   "serviceTime":"",
                   "product":[],
                   "total_tendereeMoney":0,
                   "total_tendereeMoneyUnit":''}
-    # dict_time = {
-    #     "time_release": [],
-    #     "time_bidopen": [],
-    #     "time_bidclose": []
-    # }
+
     for entity in list_entity:
         if entity.entity_type == 'bidway':
             dict_other["bidway"] = turnBidWay(entity.entity_text)
@@ -2443,18 +2564,6 @@ def getOtherAttributes(list_entity):
             dict_other["moneysource"] = entity.entity_text
         elif entity.entity_type=='serviceTime':
             dict_other["serviceTime"] = entity.entity_text
-        # elif entity.entity_type == 'time' and entity.label==1:
-        #     if entity.values[entity.label]>0.6:
-        #         dict_time['time_release'].append((timeFormat(entity.entity_text),entity.values[entity.label]))
-        #     # dict_other["time_release"] = timeFormat(entity.entity_text)
-        # elif entity.entity_type == 'time' and entity.label==2:
-        #     if entity.values[entity.label]>0.6:
-        #         dict_time['time_bidopen'].append((timeFormat(entity.entity_text),entity.values[entity.label]))
-        #     # dict_other["time_bidopen"] = timeFormat(entity.entity_text)
-        # elif entity.entity_type == 'time' and entity.label == 3:
-        #     if entity.values[entity.label]>0.6:
-        #         dict_time['time_bidclose'].append((timeFormat(entity.entity_text),entity.values[entity.label]))
-        #     # dict_other["time_bidclose"] = timeFormat(entity.entity_text)
         elif entity.entity_type=="person" and entity.label ==4:
             dict_other["person_review"].append(entity.entity_text)
         elif entity.entity_type=='product':
@@ -2462,12 +2571,7 @@ def getOtherAttributes(list_entity):
         elif entity.entity_type=='money' and entity.notes=='总投资' and dict_other["total_tendereeMoney"]<float(entity.entity_text):
                 dict_other["total_tendereeMoney"] = float(entity.entity_text)
                 dict_other["total_tendereeMoneyUnit"] = entity.money_unit
-    # 时间类别
-    # for time_type,value in dict_time.items():
-    #     list_time = dict_time[time_type]
-    #     if list_time:
-    #         list_time.sort(key=lambda x:x[1],reverse=True)
-    #         dict_other[time_type] = list_time[0][0]
+
     dict_other["product"] = list(set(dict_other["product"]))
     return dict_other
 

+ 3 - 2
BiddingKG/dl/test/test4.py

@@ -42,7 +42,7 @@ def test(name,content):
 if __name__=="__main__":
     # filename = "比地_52_79929693.html"
     #text = codecs.open("C:\\Users\\User\\Desktop\\数据20191014\\"+filename,"r",encoding="utf8").read()
-    text = codecs.open("C:\\Users\\\Administrator\\Desktop\\2.html","r",encoding="utf8").read()
+    text = codecs.open("C:\\Users\\\Administrator\\Desktop\\test12354.txt","r",encoding="utf8").read()
     content = str(BeautifulSoup(text).find("div",id="pcontent"))
     # df_a = {"html":[]}
     # df_a["html"].append(re.sub('\r|\n|\r\n',"",content))
@@ -70,7 +70,8 @@ if __name__=="__main__":
     # 广州比地数据科技有限公司翻译服务工程招标
     # '''
     # print(predict("12",content,title="关于人防工程技术咨询服务项目【重新招标】单一来源谈判的通知"))
-    print(predict("12", content,"打印机"))
+    # print(predict("12", content,"打印机"))
+    print(predict("12", text,"打印机"))
     # test(12,content)
     print("takes",time.time()-_time1)
     pass

+ 15 - 2
BiddingKG/dl/test/测试整个要素提取流程.py

@@ -136,21 +136,34 @@ def predict(doc_id,text):
                 print(entity.entity_text, entity.begin_index, entity.end_index)
             elif entity.entity_type in ['org','company']:
                 _sentence = list_sentences[0][entity.sentence_index]
+                print(entity.entity_type)
                 if entity.pointer_person:
                     print("公司->联系人1:",end=' ')
                     print(entity.entity_text,[i.entity_text for i in entity.pointer_person],entity.label,entity.values)
                     # print(entity.entity_text,entity.label,entity.values)
-                    # print(_sentence.sentence_text,_sentence.tokens[entity.begin_index:entity.end_index+1])
+                    print(_sentence.sentence_text,_sentence.tokens[entity.begin_index:entity.end_index+1])
                 else:
                     print("公司->联系人2:", end=' ')
                     print(entity.entity_text, entity.pointer_person,entity.label,entity.values)
-                    # print(_sentence.sentence_text,_sentence.tokens[entity.begin_index:entity.end_index+1])
+                    print(_sentence.sentence_text,_sentence.tokens[entity.begin_index:entity.end_index+1])
                     pass
+                if entity.label in [2,3,4]:
+                    if entity.pointer_money:
+                        print("公司->中投标金额:", end=' ')
+                        print(entity.entity_text, entity.pointer_money.entity_text)
+                    if entity.pointer_serviceTime:
+                        print("公司->工期:", end=' ')
+                        print(entity.entity_text, entity.pointer_serviceTime.entity_text)
+                    if entity.pointer_ratio:
+                        print("公司->费率:", end=' ')
+                        print(entity.entity_text, entity.pointer_ratio.entity_text)
                 # print(entity.pointer_pack)
             # elif entity.entity_type =='serviceTime':
             #     print(entity.entity_text)
             #     if entity.pointer_pack:
             #         print('pointer_pack_name:',entity.pointer_pack.entity_text)
+            # elif entity.entity_type =='money':
+            #     print('money',entity.entity_text,entity.label)
             # elif entity.entity_type in ['package']:
             #     print('pack_entity:',entity.entity_text)
             # print(entity.entity_text,entity.entity_type,entity.label,entity.values,entity.sentence_index,entity.wordOffset_begin,entity.wordOffset_end)