Forráskód Böngészése

采购意向金额识别修复

znj 3 éve
szülő
commit
7f3dee2b39

+ 5 - 1
BiddingKG/dl/interface/getAttributes.py

@@ -1287,7 +1287,7 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
             item = res_set[item_idx]
             phone_left = sentence_text[max(0, item[1] - 10):item[1]]
             phone_right = sentence_text[item[2]:item[2] + 8]
-            if re.search("电话|手机|联系人|联系方式",re.sub(",","",phone_left)):
+            if re.search("电话|手机|联系[方]|联系方式",re.sub(",","",phone_left)):
                 pass
             else:
                 # 排除“传真号”和其它错误项
@@ -1301,6 +1301,10 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
                 if re.search("^\d{0,4}[.,]\d{2,}|^[0-9a-zA-Z\.]*@|^\d*[a-zA-Z]+|元", phone_right):
                     last_phone_mask = False
                     continue
+                # 号码含有0过多,不符合规则
+                if re.search("0{5,}",item[0]):
+                    last_phone_mask = False
+                    continue
                 # 前后跟着字母
                 if re.search("[a-zA-Z/]+$", phone_left) or re.search("^[a-zA-Z/]+", phone_right):
                     last_phone_mask = False

+ 3 - 3
BiddingKG/dl/interface/predictor.py

@@ -2285,7 +2285,7 @@ class ProductAttributesPredictor():
                         elif re.search('采购预算|预算金额', col0_l[i]):
                             header_list2.append(col0_l[i])
                             budget = col1_l[i]
-                            re_price = re.findall("[零壹贰叁肆伍陆柒捌玖拾佰仟萬億圆十百千万亿元角分]{3,}|\d[\d,]*(?:\.\d+)?", budget)
+                            re_price = re.findall("[零壹贰叁肆伍陆柒捌玖拾佰仟萬億圆十百千万亿元角分]{3,}|\d[\d,]*(?:\.\d+)?万?", budget)
                             if re_price:
                                 budget = re_price[0]
                                 if '万元' in col0_l[i] and '万' not in budget:
@@ -2359,7 +2359,7 @@ class ProductAttributesPredictor():
                         if id3 != "":
                             if re.search('\d+|[零壹贰叁肆伍陆柒捌玖拾佰仟萬億十百千万亿元角分]{3,}', tds[id3]):
                                 unitPrice = tds[id3]
-                                re_price = re.findall("[零壹贰叁肆伍陆柒捌玖拾佰仟萬億圆十百千万亿元角分]{3,}|\d[\d,]*(?:\.\d+)?",unitPrice)
+                                re_price = re.findall("[零壹贰叁肆伍陆柒捌玖拾佰仟萬億圆十百千万亿元角分]{3,}|\d[\d,]*(?:\.\d+)?万?",unitPrice)
                                 if re_price:
                                     unitPrice = re_price[0]
                                     if '万元' in header_list[2] and '万' not in unitPrice:
@@ -2388,7 +2388,7 @@ class ProductAttributesPredictor():
                         if id7 != "":
                             if re.search('\d+|[零壹贰叁肆伍陆柒捌玖拾佰仟萬億十百千万亿元角分]{3,}', tds[id7]):
                                 budget = tds[id7]
-                                re_price = re.findall("[零壹贰叁肆伍陆柒捌玖拾佰仟萬億圆十百千万亿元角分]{3,}|\d[\d,]*(?:\.\d+)?", budget)
+                                re_price = re.findall("[零壹贰叁肆伍陆柒捌玖拾佰仟萬億圆十百千万亿元角分]{3,}|\d[\d,]*(?:\.\d+)?万?", budget)
                                 if re_price:
                                     budget = re_price[0]
                                     if '万元' in header_list[2] and '万' not in budget: