znj преди 1 година
родител
ревизия
32e835d4b1
променени са 2 файла, в които са добавени 27 реда и са изтрити 17 реда
  1. 25 14
      BiddingKG/dl/interface/getAttributes.py
  2. 2 3
      BiddingKG/dl/interface/predictor.py

+ 25 - 14
BiddingKG/dl/interface/getAttributes.py

@@ -1137,6 +1137,8 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
                 byNotTenderer_match_nums = 0 #跟在中投标人后面的属性
                 for after_index in range(ent_idx + 1, min(len(temp_entity_list), ent_idx + 4)):
                     after_entity = temp_entity_list[after_index]
+                    if entity.in_attachment != after_entity.in_attachment:
+                        break
                     if after_entity.entity_type == link_attribute:
                         distance = (tokens_num_dict[after_entity.sentence_index] + after_entity.begin_index) - (
                                            tokens_num_dict[entity.sentence_index] + entity.end_index)
@@ -1539,6 +1541,8 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
             _subject = relation[0]
             _object = relation[2]
             if isinstance(_subject,Entity) and isinstance(_object,Entity) and (_subject.entity_type,_object.entity_type) in right_combination:
+                if _subject.in_attachment != _object.in_attachment:
+                    continue
                 if relation[1]==predicate:
                     distance = (tokens_num_dict[_object.sentence_index] + _object.begin_index) - (
                             tokens_num_dict[_subject.sentence_index] + _subject.end_index)
@@ -1960,6 +1964,8 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
                         match_nums = 0
                         for after_index in range(index + 1, min(len(split_entitys), index + 4)):
                             after_entity = split_entitys[after_index]
+                            if entity.in_attachment != after_entity.in_attachment:
+                                break
                             if after_entity.entity_type in ['person']:
                                 distance = (tokens_num_dict[after_entity.sentence_index] + after_entity.begin_index) - (
                                                    tokens_num_dict[entity.sentence_index] + entity.end_index)
@@ -2049,21 +2055,24 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
                                             new_split_list[split_index][1]:
                                         mid_sentence = mid_sentence[max(0, phone_begin - 15):phone_begin].replace(",", "")
                                         if re.search(key_phone, mid_sentence):
-                                            distance = 1
-                                            if is_same_sentence:
-                                                if phone_begin <= 200:
-                                                    if entity.label in [2,3,4] and phone_begin>80:
-                                                        break
-                                                    value = (-1 / 2 * (distance ** 2)) / 10000
-                                                    match_list2.append(Match(entity, (entity, _phone), value))
-                                                    match_nums += 1
+                                            if entity.label in [2, 3, 4] and re.search("质疑|投诉|监督|受理|项目(单位)?联系",mid_sentence[-8:]):
+                                                pass
                                             else:
-                                                if phone_begin <= 60:
-                                                    if entity.label in [2,3,4] and phone_begin>40:
-                                                        break
-                                                    value = (-1 / 2 * (distance ** 2)) / 10000
-                                                    match_list2.append(Match(entity, (entity, _phone), value))
-                                                    match_nums += 1
+                                                distance = 1
+                                                if is_same_sentence:
+                                                    if phone_begin <= 200:
+                                                        if entity.label in [2,3,4] and phone_begin>80:
+                                                            break
+                                                        value = (-1 / 2 * (distance ** 2)) / 10000
+                                                        match_list2.append(Match(entity, (entity, _phone), value))
+                                                        match_nums += 1
+                                                else:
+                                                    if phone_begin <= 60:
+                                                        if entity.label in [2,3,4] and phone_begin>40:
+                                                            break
+                                                        value = (-1 / 2 * (distance ** 2)) / 10000
+                                                        match_list2.append(Match(entity, (entity, _phone), value))
+                                                        match_nums += 1
                         else:
                             next_entity = split_entitys[index + 1]
                             if next_entity.entity_type in ["org","company"]:
@@ -2099,6 +2108,8 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
                                         p_phone = [p.entity_text for p in next_entity.person_phone] if next_entity.person_phone else []
                                         if next_entity.entity_type == 'person' and _phone in p_phone:
                                             pass
+                                        elif entity.label in [2, 3, 4] and re.search("质疑|投诉|监督|受理|项目(单位)?联系", mid_sentence[-8:]):
+                                            pass
                                         else:
                                             distance = (tokens_num_dict[
                                                             next_entity.sentence_index] + next_entity.begin_index) - (

+ 2 - 3
BiddingKG/dl/interface/predictor.py

@@ -3183,7 +3183,7 @@ class ProductAttributesPredictor():
                                         re_price = re.findall("[零壹贰叁肆伍陆柒捌玖拾佰仟萬億圆十百千万亿元角分]{3,}|\d[\d,]*(?:\.\d+)?万?",_unitPrice)
                                         if re_price:
                                             _unitPrice = re_price[0]
-                                            if '万元' in header_list[2] and '万' not in _unitPrice:
+                                            if '万元' in header_list[3] and '万' not in _unitPrice:
                                                 _unitPrice += '万元'
                                             unitPrice = getUnifyMoney(_unitPrice)
                                             if unitPrice>=10000*10000:
@@ -3216,7 +3216,7 @@ class ProductAttributesPredictor():
                                             if float(budget)>= 100000*10000:
                                                 budget = ""
                                 if id8 != "":
-                                    if re.search('\w', deal_list[id8]):
+                                    if re.search('\w', deal_list[id8]) and re.search("采购(实施)?(时间|月份|日期)",header_list2[3]):
                                         order_time = deal_list[id8].strip()
                                         order_begin, order_end = self.fix_time(order_time, html, page_time)
                                 # print(quantity,unitPrice,brand,specs)
@@ -3255,7 +3255,6 @@ class ProductAttributesPredictor():
                         demand_dic = {'demand_info': {'data': demand_link, 'header': headers_demand, 'header_col': header_col}}
                     else:
                         demand_dic = {'demand_info': {'data': [], 'header': [], 'header_col': []}}
-
                     product_attrs[0] = attr_dic
                     if len(product_attrs[1]['demand_info']['data']) == 0:
                         product_attrs[1] = demand_dic