2 سال پیش · bba127d991
--- a/BiddingKG/dl/interface/Entitys.py
+++ b/BiddingKG/dl/interface/Entitys.py
@@ -204,6 +204,7 @@ class Entity():
 
				         self.origin_entity_text = ''  # 2022/1/5 新增，记录字典替换的原来的实体名
			
 
				         self.in_attachment = in_attachment  # 2022/02/10添加，实体是否在附件中
			
 
				         self.prob = prob  # 2022/06/20添加，实体的概率
			
 
				+        self.ratio_value = None # 2022/10/18 新增费率处理数据，(value,ratio_type) 费率数值,类型
			
 
				 
			
 
				     def set_Role(self,role_label,role_values):
			
 
				         self.label = int(role_label)
			
@@ -294,7 +295,7 @@ class Role():
 
				         self.linklist = linklist
			
 
				         self.money_unit = '' # 2021/8/17 新增 保存金额单位
			
 
				         # 中投标人属性
			
 
				-        self.ratio = "" #2022/01/06 新增 保存中投标金额相关费率
			
 
				+        self.ratio = None #2022/01/06 新增 保存中投标金额相关费率 (ratio_value,ratio_type)
			
 
				         self.serviceTime = "" #2021/01/06 新增 保存服务期限(工期)
			
 
				         self.address = ""  #2022/08/08 新增 角色地址
			
 
				 
			
@@ -307,17 +308,35 @@ class Role():
 
				         downward_floating_ratio = "" # 下浮率
			
 
				         discount_ratio = "" # 折扣率/费率
			
 
				         if self.ratio:
			
 
				-            num_value = re.search("[\d\.]+",self.ratio).group()
			
 
				-            num_value = float(num_value)
			
 
				-            if re.search("%|百分之",self.ratio):
			
 
				-                num_value = num_value / 100
			
 
				-            num_value = str('%.4f'%(num_value))
			
 
				-            if re.search("上浮",self.ratio):
			
 
				-                floating_ratio = num_value
			
 
				-            elif re.search("下浮",self.ratio):
			
 
				-                downward_floating_ratio = num_value
			
 
				-            else:
			
 
				-                discount_ratio = num_value
			
 
				+            # num_value = re.search("\d+(?:\.\d+)?",self.ratio).group()
			
 
				+            # num_value = float(num_value)
			
 
				+            # _decimal = str(num_value).split('.')[1]
			
 
				+            # if _decimal=='0':
			
 
				+            #     round_len = 0
			
 
				+            # else:
			
 
				+            #     round_len = len(_decimal)
			
 
				+            # if re.search("%|百分之",self.ratio):
			
 
				+            #     num_value = num_value * 0.01
			
 
				+            #     round_len += 2
			
 
				+            # elif re.search("‰|千分之",self.ratio):
			
 
				+            #     num_value = num_value * 0.001
			
 
				+            #     round_len += 3
			
 
				+            # num_value = str(round(num_value,round_len))
			
 
				+            #
			
 
				+            # if re.search("上浮",self.ratio):
			
 
				+            #     floating_ratio = num_value
			
 
				+            # elif re.search("下浮",self.ratio):
			
 
				+            #     downward_floating_ratio = num_value
			
 
				+            # else:
			
 
				+            #     discount_ratio = num_value
			
 
				+            ratio_type = self.ratio[1]
			
 
				+            ratio_value = str(self.ratio[0])
			
 
				+            if ratio_type=='floating_ratio':
			
 
				+                floating_ratio = ratio_value
			
 
				+            elif ratio_type=='downward_floating_ratio':
			
 
				+                downward_floating_ratio = ratio_value
			
 
				+            elif ratio_type=='discount_ratio':
			
 
				+                discount_ratio = ratio_value
			
 
				         result = {'role_name':self.role_name,'role_text':fitDataByRule(self.entity_text),
			
 
				                   'role_money': {'money':self.money,'money_unit':self.money_unit,'floating_ratio':floating_ratio,'downward_floating_ratio':downward_floating_ratio,'discount_ratio':discount_ratio},
			
 
				                   'linklist': self.linklist,'serviceTime':self.serviceTime,'address':self.address}
			
--- a/BiddingKG/dl/interface/Preprocessing.py
+++ b/BiddingKG/dl/interface/Preprocessing.py
@@ -1004,8 +1004,10 @@ def tableToText(soup):
 
				             for _tr in _tbody.find_all(recursive=False):
			
 
				                 len_td = len(_tr.find_all(recursive=False))
			
 
				                 _td_len_list.append(len_td)
			
 
				-            if len(list(set(_td_len_list)))>8:
			
 
				-                return None
			
 
				+            if _td_len_list:
			
 
				+                if len(list(set(_td_len_list)))>=8 or max(_td_len_list)>100:
			
 
				+                    return None
			
 
				+
			
 
				         fixSpan(tbody)
			
 
				         inner_table = getTable(tbody)
			
 
				         inner_table = fixTable(inner_table)
			
@@ -1060,12 +1062,20 @@ def tableToText(soup):
 
				     # 遍历表格中的每个tbody
			
 
				     tbodies = []
			
 
				     in_attachment = False
			
 
				+    tmp_part_list = []
			
 
				     for _part in soup.find_all():
			
 
				         if _part.name=='table':
			
 
				-            tbodies.append((_part,in_attachment))
			
 
				+            if _part in tmp_part_list:
			
 
				+                # 表格在合并的附件中
			
 
				+                tbodies.append((_part,True))
			
 
				+            else:
			
 
				+                tbodies.append((_part,in_attachment))
			
 
				         elif _part.name=='div':
			
 
				             if 'class' in _part.attrs and "richTextFetch" in _part['class']:
			
 
				                 in_attachment = True
			
 
				+            # 记录被合并到正文的附件信息 find_all
			
 
				+            if 'filemd5' in _part.attrs and in_attachment==False:
			
 
				+                tmp_part_list = _part.find_all()
			
 
				     #逆序处理嵌套表格
			
 
				     for tbody_index in range(1,len(tbodies)+1):
			
 
				         tbody,_in_attachment = tbodies[len(tbodies)-tbody_index]
			
@@ -2467,9 +2477,9 @@ def get_preprocessed_entitys(list_sentences,useselffool=True,cost_time=dict()):
 
				             #                       "front_m":"((?P<text_front_m>(?:[（\(]?\s*(?P<unit_front_m_before>[万元]+)\s*[）\)])\s*[,，:：]*(\s*[^壹贰叁肆伍陆柒捌玖拾佰仟萬億分万元]{,7}?))(?P<money_front_m>[0-9][\d,]*(?:\.\d+)?(?:，?)[百千万亿元]*)())",
			
 
				             #                       "behind_m":"(()()(?P<money_behind_m>[0-9][\d,，]*(?:\.\d+)?(?:，?)[百千万亿]*)[\(（]?(?P<unit_behind_m>[万元]+(?P<filter_unit3>[台个只]*))[\)）]?)"}
			
 
				             list_money_pattern = {"cn":"(()()(?P<money_cn>[零壹贰叁肆伍陆柒捌玖拾佰仟萬億圆十百千万亿元角分]{3,})())",
			
 
				-                                  "key_word": "((?P<text_key_word>(?:[￥¥]+，?|[单报标限总]价|金额|成交报?价|价格|预算(金额)?|(监理|设计|勘察)(服务)?费|标的基本情况|CNY|成交结果|成交额|中标额)(?:[,，（\(]*\s*(人民币)?(?P<unit_key_word_before>[万亿]?[美日欧]?元?(?P<filter_unit2>[台个只吨]*))\s*(/?费率)?(人民币)?[）\)]?)\s*[，,:：]*(\s*[^壹贰叁肆伍陆柒捌玖拾佰仟萬億分万元编号时间]{,8}?))(第[123一二三]名[：:])?(\d+(\*\d+%)+=)?(?P<money_key_word>[0-9][\d,]*(?:\.\d+)?(?:，?)[百千]{,1})(?:[（\(]?(?P<filter_>[%])*\s*(单位[:：])?(?P<unit_key_word_behind>[万亿]?[美日欧]?元?(?P<filter_unit1>[台只吨斤棵株页亩方条天]*))\s*[）\)]?))",
			
 
				-                                  "front_m":"((?P<text_front_m>(?:[（\(]?\s*(?P<unit_front_m_before>[万亿]?[美日欧]?元)\s*[）\)])\s*[,，:：]*(\s*[^壹贰叁肆伍陆柒捌玖拾佰仟萬億分万元]{,7}?))(?P<money_front_m>[0-9][\d,]*(?:\.\d+)?(?:，?)[百千]*)())",
			
 
				-                                  "behind_m":"(()()(?P<money_behind_m>[0-9][\d,]*(?:\.\d+)?(?:，?)[百千]*)(人民币)?[\(（]?(?P<unit_behind_m>[万亿]?[美日欧]?元(?P<filter_unit3>[台个只吨斤棵株页亩方条米]*))[\)）]?)"}
			
 
				+                                  "key_word": "((?P<text_key_word>(?:[￥¥]+，?|[单报标限总]价|金额|成交报?价|价格|预算(金额)?|(监理|设计|勘察)(服务)?费|标的基本情况|CNY|成交结果|成交额|中标额)(?:[,，（\(]*\s*(人民币)?(?P<unit_key_word_before>[万亿]?(?:[美日欧]元|元)?(?P<filter_unit2>[台个只吨]*))\s*(/?费率)?(人民币)?[）\)]?)\s*[，,:：]*(\s*[^壹贰叁肆伍陆柒捌玖拾佰仟萬億分万元编号时间]{,8}?))(第[123一二三]名[：:])?(\d+(\*\d+%)+=)?(?P<money_key_word>[0-9][\d,]*(?:\.\d+)?(?:，?)[百千]{,1})(?:[（\(]?(?P<filter_>[%])*\s*(单位[:：])?(?P<unit_key_word_behind>[万亿]?(?:[美日欧]元|元)?(?P<filter_unit1>[台只吨斤棵株页亩方条天]*))\s*[）\)]?))",
			
 
				+                                  "front_m":"((?P<text_front_m>(?:[（\(]?\s*(?P<unit_front_m_before>[万亿]?(?:[美日欧]元|元))\s*[）\)])\s*[,，:：]*(\s*[^壹贰叁肆伍陆柒捌玖拾佰仟萬億分万元]{,7}?))(?P<money_front_m>[0-9][\d,]*(?:\.\d+)?(?:，?)[百千]*)())",
			
 
				+                                  "behind_m":"(()()(?P<money_behind_m>[0-9][\d,]*(?:\.\d+)?(?:，?)[百千]*)(人民币)?[\(（]?(?P<unit_behind_m>[万亿]?(?:[美日欧]元|元)(?P<filter_unit3>[台个只吨斤棵株页亩方条米]*))[\)）]?)"}
			
 
				             # 2021/7/19 调整金额，单位提取正则，修复部分金额因为单位提取失败被过滤问题。
			
 
				 
			
 
				             pattern_money = re.compile("%s|%s|%s|%s"%(list_money_pattern["cn"],list_money_pattern["key_word"],list_money_pattern["behind_m"],list_money_pattern["front_m"]))
			
@@ -2675,15 +2685,6 @@ def get_preprocessed_entitys(list_sentences,useselffool=True,cost_time=dict()):
 
				                         # print('过滤掉金额 notSure and unit=="" and float(entity_text)>100*10000：', entity_text, unit)
			
 
				                         continue
			
 
				 
			
 
				-                    if re.search("美元",_match.group()):
			
 
				-                        Dollar2RMB = 7
			
 
				-                        entity_text = str(float(entity_text)*Dollar2RMB)
			
 
				-                    elif re.search("日元",_match.group()):
			
 
				-                        JPyen2RMB = 0.05
			
 
				-                        entity_text = str(float(entity_text)*JPyen2RMB)
			
 
				-                    elif re.search("欧元",_match.group()):
			
 
				-                        Euro2RMB = 6.9
			
 
				-                        entity_text = str(float(entity_text)*Euro2RMB)
			
 
				 
			
 
				                     _exists = False
			
 
				                     for item in list_sentence_entitys:
			
@@ -2863,9 +2864,11 @@ def get_preprocessed_entitys(list_sentences,useselffool=True,cost_time=dict()):
 
				                         break
			
 
				                 entity_id = "%s_%d_%d_%d" % (doc_id, sentence_index, begin_index, end_index)
			
 
				                 entity_text = ratio['body']
			
 
				-                list_sentence_entitys.append(
			
 
				-                    Entity(doc_id, entity_id, entity_text, entity_type, sentence_index, begin_index, end_index,
			
 
				-                           begin_index_temp, end_index_temp,in_attachment=in_attachment))
			
 
				+                ratio_value = (ratio['value'],ratio['type'])
			
 
				+                _entity = Entity(doc_id, entity_id, entity_text, entity_type, sentence_index, begin_index, end_index,
			
 
				+                           begin_index_temp, end_index_temp,in_attachment=in_attachment)
			
 
				+                _entity.ratio_value = ratio_value
			
 
				+                list_sentence_entitys.append(_entity)
			
 
				 
			
 
				             list_sentence_entitys.sort(key=lambda x:x.begin_index)
			
 
				             list_entitys_temp = list_entitys_temp+list_sentence_entitys
			
--- a/BiddingKG/dl/interface/getAttributes.py
+++ b/BiddingKG/dl/interface/getAttributes.py
@@ -31,6 +31,12 @@ dict_role_id = {"0":"tenderee",
 
				                 "3":"second_tenderer",
			
 
				                 "4":"third_tenderer"}
			
 
				 
			
 
				+role2id_dict = {"tenderee":0,
			
 
				+                "agency":1,
			
 
				+                "win_tenderer":2,
			
 
				+                "second_tenderer":3,
			
 
				+                "third_tenderer":4}
			
 
				+
			
 
				 def getPackage(packageList,sentence_index,begin_index,roleid,MAX_DIS=None,DIRECT=None):
			
 
				     '''
			
 
				     @param:
			
@@ -851,7 +857,7 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
 
				     def addRatioByEntity(packDict,packageName,entity,ratio):
			
 
				         for i in range(len(packDict[packageName]["roleList"])):
			
 
				             if packDict[packageName]["roleList"][i].entity_text==entity:
			
 
				-                packDict[packageName]["roleList"][i].ratio = ratio.entity_text
			
 
				+                packDict[packageName]["roleList"][i].ratio = ratio.ratio_value
			
 
				     def addServiceTimeByEntity(packDict,packageName,entity,serviceTime):
			
 
				         for i in range(len(packDict[packageName]["roleList"])):
			
 
				             if packDict[packageName]["roleList"][i].entity_text==entity:
			
@@ -1253,8 +1259,9 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
 
				                        '0[1-9]\d{1,2}[-—－―]?[1-9]\d{6}\d?(?=[1-9]\d{6,7})|'
			
 
				                        '0[1-9]\d{1,2}[-—－―]?[1-9]\d{6}\d?|'
			
 
				                        '[\（|\(]0[1-9]\d{1,2}[\）|\)]-?\d{7,8}-?\d{,4}|'
			
 
				+                       '400\d{7}转\d{1,4}|'
			
 
				                        '[2-9]\d{6,7}')
			
 
				-    url_pattern = re.compile("http[s]?://(?:[a-zA-Z]|[0-9]|[$\-_@.&+=\?:/]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+")
			
 
				+    url_pattern = re.compile("http[s]?://(?:[a-zA-Z]|[0-9]|[#$\-_@.&+=\?:/]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+")
			
 
				     email_pattern = re.compile("[a-zA-Z0-9][a-zA-Z0-9_-]+(?:\.[a-zA-Z0-9_-]+)*@"
			
 
				                             "[a-zA-Z0-9_-]+(?:\.[a-zA-Z0-9_-]+)*(?:\.[a-zA-Z]{2,})")
			
 
				     phone_entitys = []
			
@@ -1308,7 +1315,7 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
 
				                 pass
			
 
				             else:
			
 
				                 # 排除“传真号”和其它错误项
			
 
				-                if re.search("传，?真|信，?箱|邮，?[箱件]|QQ|qq", phone_left):
			
 
				+                if re.search("传，?真|信，?箱|邮，?[编箱件]|QQ|qq", phone_left):
			
 
				                     if not re.search("电，?话", phone_left):
			
 
				                         error_numStr_index.append(numStr_index)
			
 
				                         last_phone_mask = False
			
@@ -1350,6 +1357,20 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
 
				                             error_numStr_index.append(numStr_index)
			
 
				                             last_phone_mask = False
			
 
				                             continue
			
 
				+                left_context = re.search("[\da-zA-Z\-—－―]+$",sentence_text[:item[1]])
			
 
				+                if left_context:
			
 
				+                    if len(left_context.group()) != len("".join(re.findall(phone, left_context.group()))):
			
 
				+                    # if not re.search("(" + phone.pattern + ")$", left_context.group()):
			
 
				+                        error_numStr_index.append(numStr_index)
			
 
				+                        last_phone_mask = False
			
 
				+                        continue
			
 
				+                right_context = re.search("^[\da-zA-Z\-—－―]+", sentence_text[item[2]:])
			
 
				+                if right_context:
			
 
				+                    if len(right_context.group()) != len("".join(re.findall(phone, right_context.group()))):
			
 
				+                    # if not re.search("^(" + phone.pattern + ")", right_context.group()):
			
 
				+                        error_numStr_index.append(numStr_index)
			
 
				+                        last_phone_mask = False
			
 
				+                        continue
			
 
				                 # if:上一个phone实体不符合条件
			
 
				                 if not last_phone_mask:
			
 
				                     item_start = item[1]
			
@@ -1525,52 +1546,58 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
 
				                                 break
			
 
				                 # print(3,combo[0].entity_text,combo[1].entity_text)
			
 
				 
			
 
				-        # "公司——地址" 链接规则补充
			
 
				-        company_lacation_EntityList = [ent for ent in pre_entity if ent.entity_type in ['company', 'org', 'location']]
			
 
				-        company_lacation_EntityList = sorted(company_lacation_EntityList, key=lambda x: (x.sentence_index, x.begin_index))
			
 
				-        t_match_list = []
			
 
				-        for ent_idx in range(len(company_lacation_EntityList)):
			
 
				-            entity = company_lacation_EntityList[ent_idx]
			
 
				-            if entity.entity_type in ['company', 'org']:
			
 
				-                match_nums = 0
			
 
				-                company_nums = 0  # 经过其他公司的数量
			
 
				-                location_nums = 0  # 经过电话的数量
			
 
				-                for after_index in range(ent_idx + 1, min(len(company_lacation_EntityList), ent_idx + 5)):
			
 
				-                    after_entity = company_lacation_EntityList[after_index]
			
 
				-                    if after_entity.entity_type == "location":
			
 
				-                        distance = (tokens_num_dict[after_entity.sentence_index] + after_entity.begin_index) - (
			
 
				-                                tokens_num_dict[entity.sentence_index] + entity.end_index)
			
 
				-                        location_nums += 1
			
 
				-                        if distance > 100 or location_nums >= 3:
			
 
				-                            break
			
 
				-                        sentence_distance = after_entity.sentence_index - entity.sentence_index
			
 
				-                        value = (-1 / 2 * (distance ** 2)) / 10000
			
 
				-                        if sentence_distance == 0:
			
 
				-                            if distance < 80:
			
 
				-                                t_match_list.append(Match(entity, after_entity, value))
			
 
				-                                match_nums += 1
			
 
				-                                if company_nums:
			
 
				-                                    break
			
 
				-                        else:
			
 
				-                            if distance < 50:
			
 
				-                                t_match_list.append(Match(entity, after_entity, value))
			
 
				-                                match_nums += 1
			
 
				-                                if company_nums:
			
 
				-                                    break
			
 
				+    # "公司——地址" 链接规则补充
			
 
				+    company_lacation_EntityList = [ent for ent in pre_entity if ent.entity_type in ['company', 'org', 'location']]
			
 
				+    # company_lacation_EntityList = [ent for ent in pre_entity if (ent.entity_type in ['company', 'org'] and ent.label!=5) or ent.entity_type=="location"]
			
 
				+    company_lacation_EntityList = sorted(company_lacation_EntityList, key=lambda x: (x.sentence_index, x.begin_index))
			
 
				+    t_match_list = []
			
 
				+    for ent_idx in range(len(company_lacation_EntityList)):
			
 
				+        entity = company_lacation_EntityList[ent_idx]
			
 
				+        if entity.entity_type in ['company', 'org'] and entity.label!=5:
			
 
				+            match_nums = 0
			
 
				+            company_nums = 0  # 经过其他公司的数量
			
 
				+            location_nums = 0  # 经过电话的数量
			
 
				+            for after_index in range(ent_idx + 1, min(len(company_lacation_EntityList), ent_idx + 5)):
			
 
				+                after_entity = company_lacation_EntityList[after_index]
			
 
				+                if after_entity.entity_type == "location":
			
 
				+                    distance = (tokens_num_dict[after_entity.sentence_index] + after_entity.begin_index) - (
			
 
				+                            tokens_num_dict[entity.sentence_index] + entity.end_index)
			
 
				+                    location_nums += 1
			
 
				+                    if distance > 100 or location_nums >= 3:
			
 
				+                        break
			
 
				+                    sentence_distance = after_entity.sentence_index - entity.sentence_index
			
 
				+                    value = (-1 / 2 * (distance ** 2)) / 10000
			
 
				+                    if sentence_distance == 0:
			
 
				+                        if distance < 80:
			
 
				+                            t_match_list.append(Match(entity, after_entity, value))
			
 
				+                            match_nums += 1
			
 
				+                            if company_nums:
			
 
				+                                break
			
 
				                     else:
			
 
				-                        # type:company/org
			
 
				-                        company_nums += 1
			
 
				-                        if entity.label in [2, 3, 4] and after_entity.label in [0, 1]:
			
 
				-                            break
			
 
				+                        if distance < 50:
			
 
				+                            t_match_list.append(Match(entity, after_entity, value))
			
 
				+                            match_nums += 1
			
 
				+                            if company_nums:
			
 
				+                                break
			
 
				+                else:
			
 
				+                    # type:company/org
			
 
				+                    company_nums += 1
			
 
				+                    if entity.label in [2, 3, 4] and after_entity.label in [0, 1]:
			
 
				+                        break
			
 
				+                    if entity.label in [0, 1] and after_entity.label in [2, 3, 4]:
			
 
				+                        break
			
 
				 
			
 
				-        # km算法分配求解
			
 
				-        relate_location_result = dispatch(t_match_list)
			
 
				-        relate_location_result = sorted(relate_location_result, key=lambda x: (x[0].sentence_index, x[0].begin_index))
			
 
				-        for match in relate_location_result:
			
 
				-            _company = match[0]
			
 
				-            _relation = match[1]
			
 
				-            if not _company.pointer_address:
			
 
				-                _company.pointer_address = _relation
			
 
				+    # km算法分配求解
			
 
				+    # for item in t_match_list:
			
 
				+    #     print("loc_rela",item.main_role.entity_text,item.attribute.entity_text)
			
 
				+    relate_location_result = dispatch(t_match_list)
			
 
				+    relate_location_result = sorted(relate_location_result, key=lambda x: (x[0].sentence_index, x[0].begin_index))
			
 
				+    for match in relate_location_result:
			
 
				+        _company = match[0]
			
 
				+        _relation = match[1]
			
 
				+        # print("loc_rela2", _company.entity_text, _relation.entity_text, )
			
 
				+        if not _company.pointer_address:
			
 
				+            _company.pointer_address = _relation
			
 
				     # "联系人——联系电话" 链接规则补充
			
 
				     person_phone_EntityList = [ent for ent in pre_entity+ phone_entitys if ent.entity_type not in ['company','org','location']]
			
 
				     person_phone_EntityList = sorted(person_phone_EntityList, key=lambda x: (x.sentence_index, x.begin_index))
			
@@ -2182,6 +2209,7 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
 
				                 PackDict[k]["roleList"][i].linklist.remove(_item)
			
 
				 
			
 
				     # PackDict更新company/org地址
			
 
				+    last_role_prob = {}
			
 
				     for ent in pre_entity:
			
 
				         if ent.entity_type in ['company','org']:
			
 
				             if ent.pointer_address:
			
@@ -2190,9 +2218,16 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
 
				                         if PackDict[k]["roleList"][i].entity_text == ent.entity_text:
			
 
				                             if not PackDict[k]["roleList"][i].address:
			
 
				                                 PackDict[k]["roleList"][i].address = ent.pointer_address.entity_text
			
 
				+                                last_role_prob[PackDict[k]["roleList"][i].role_name] = ent.values[role2id_dict[PackDict[k]["roleList"][i].role_name]]
			
 
				                             else:
			
 
				-                                if len(ent.pointer_address.entity_text) > len(PackDict[k]["roleList"][i].address):
			
 
				-                                    PackDict[k]["roleList"][i].address = ent.pointer_address.entity_text
			
 
				+                                if PackDict[k]["roleList"][i].role_name in ['tenderee','agency']:
			
 
				+                                    # 角色为招标/代理人时，取其实体概率高的链接地址作为角色address
			
 
				+                                    if ent.values[role2id_dict[PackDict[k]["roleList"][i].role_name]] > last_role_prob[PackDict[k]["roleList"][i].role_name]:
			
 
				+                                        PackDict[k]["roleList"][i].address = ent.pointer_address.entity_text
			
 
				+                                        last_role_prob[PackDict[k]["roleList"][i].role_name] = ent.values[role2id_dict[PackDict[k]["roleList"][i].role_name]]
			
 
				+                                else:
			
 
				+                                    if len(ent.pointer_address.entity_text) > len(PackDict[k]["roleList"][i].address):
			
 
				+                                        PackDict[k]["roleList"][i].address = ent.pointer_address.entity_text
			
 
				 
			
 
				     # 联系人——电子邮箱链接
			
 
				     temporary_list3 = [entity for entity in list_entity if entity.entity_type=='email' or (entity.entity_type=='person' and entity.label in [1,2,3])]
			
--- a/BiddingKG/dl/interface/predictor.py
+++ b/BiddingKG/dl/interface/predictor.py
@@ -1659,7 +1659,7 @@ class TendereeRuleRecall():
 
				                             self.get_tenderee = True
			
 
				                     else:
			
 
				                         if re.search('医院|学校|大学|中学|小学|幼儿园|政府|部|委员会|署|行|局|厅|处|室|科|股|站', ent.entity_text
			
 
				-                                     ) or not re.search('(采购|招标|投标|交易|代理|拍卖|咨询|顾问|管理)', ent.entity_text) or re.search("自行.?采购",list_sentences[0][ent.sentence_index]):
			
 
				+                                     ) or not re.search('(采购|招标|投标|交易|代理|拍卖|咨询|顾问|管理)', ent.entity_text) or re.search("自行.?采购",list_sentences[0][ent.sentence_index].sentence_text):
			
 
				                             ent.label = 0
			
 
				                             ent.values[0] = 0.5 + ent.values[0] / 10
			
 
				                             self.get_tenderee = True
			
@@ -2804,88 +2804,88 @@ class ProductAttributesPredictor():
 
				                             headers.append('_'.join(header_list))
			
 
				                             headers_demand.append('_'.join(header_list2))
			
 
				                             header_col.append('_'.join(tmp_head_list))
			
 
				-                        # print('header_dic: ',header_dic)
			
 
				-                        id1 = header_dic.get('名称', "")
			
 
				-                        id2 = header_dic.get('数量', "")
			
 
				-                        id3 = header_dic.get('单价', "")
			
 
				-                        id4 = header_dic.get('品牌', "")
			
 
				-                        id5 = header_dic.get('规格', "")
			
 
				-
			
 
				-                        id6 = header_dic.get('需求', "")
			
 
				-                        id7 = header_dic.get('预算', "")
			
 
				-                        id8 = header_dic.get('时间', "")
			
 
				-                        if re.search('[a-zA-Z\u4e00-\u9fa5]', deal_list[id1]) and deal_list[id1] not in self.header_set and \
			
 
				-                                re.search('备注|汇总|合计|总价|价格|金额|公司|附件|详见|无$|xxx', deal_list[id1]) == None:
			
 
				-                            product = deal_list[id1]
			
 
				-                            if id2 != "":
			
 
				-                                if re.search('\d+|[壹贰叁肆伍陆柒捌玖拾一二三四五六七八九十]', deal_list[id2]):
			
 
				-                                    quantity = deal_list[id2]
			
 
				-                                else:
			
 
				-                                    quantity = ""
			
 
				-                            if id3 != "":
			
 
				-                                if re.search('\d+|[零壹贰叁肆伍陆柒捌玖拾佰仟萬億十百千万亿元角分]{3,}', deal_list[id3]):
			
 
				-                                    _unitPrice = deal_list[id3]
			
 
				-                                    re_price = re.findall("[零壹贰叁肆伍陆柒捌玖拾佰仟萬億圆十百千万亿元角分]{3,}|\d[\d,]*(?:\.\d+)?万?",_unitPrice)
			
 
				-                                    if re_price:
			
 
				-                                        _unitPrice = re_price[0]
			
 
				-                                        if '万元' in header_list[2] and '万' not in _unitPrice:
			
 
				-                                            _unitPrice += '万元'
			
 
				-                                        unitPrice = str(getUnifyMoney(_unitPrice))
			
 
				-                            if id4 != "":
			
 
				-                                if re.search('\w', deal_list[id4]):
			
 
				-                                    brand = deal_list[id4]
			
 
				-                                else:
			
 
				-                                    brand = ""
			
 
				-                            if id5 != "":
			
 
				-                                if re.search('\w', deal_list[id5]):
			
 
				-                                    specs = deal_list[id5]
			
 
				-                                else:
			
 
				-                                    specs = ""
			
 
				-                            if id6 != "":
			
 
				-                                if re.search('\w', deal_list[id6]):
			
 
				-                                    demand = deal_list[id6]
			
 
				-                                else:
			
 
				-                                    demand = ""
			
 
				-                            if id7 != "":
			
 
				-                                if re.search('\d+|[零壹贰叁肆伍陆柒捌玖拾佰仟萬億十百千万亿元角分]{3,}', deal_list[id7]):
			
 
				-                                    _budget = deal_list[id7]
			
 
				-                                    re_price = re.findall("[零壹贰叁肆伍陆柒捌玖拾佰仟萬億圆十百千万亿元角分]{3,}|\d[\d,]*(?:\.\d+)?万?",_budget)
			
 
				-                                    if re_price:
			
 
				-                                        _budget = re_price[0]
			
 
				-                                        if '万元' in header_list2[2] and '万' not in _budget:
			
 
				-                                            _budget += '万元'
			
 
				-                                        budget = str(getUnifyMoney(_budget))
			
 
				-
			
 
				-                            if id8 != "":
			
 
				-                                if re.search('\w', deal_list[id8]):
			
 
				-                                    order_time = deal_list[id8].strip()
			
 
				-                                    order_begin, order_end = self.fix_time(order_time, html, page_time)
			
 
				-                            # print(quantity,unitPrice,brand,specs)
			
 
				-                            if quantity != "" or unitPrice != "" or brand != "" or specs != "":
			
 
				-                                link = {'product': product, 'quantity': quantity, 'unitPrice': unitPrice,
			
 
				-                                        'brand': brand[:50], 'specs': specs}
			
 
				-                                if link not in product_link:
			
 
				-                                    product_link.append(link)
			
 
				-                                    # mat = re.match('([0-9.,]+)[(（]?\w{,3}[)）]?$', link['quantity'])
			
 
				-                                    # if link['unitPrice'] != "" and mat:
			
 
				-                                    #     try:
			
 
				-                                    #         total_product_money += float(link['unitPrice']) * float(
			
 
				-                                    #             mat.group(1).replace(',', ''))
			
 
				-                                    #     except:
			
 
				-                                    #         log('产品属性单价数量相乘出错, 单价： %s, 数量： %s' % (
			
 
				-                                    #         link['unitPrice'], link['quantity']))
			
 
				-                            if order_begin != "" and order_end != "":
			
 
				-                                order_begin_year = int(order_begin.split("-")[0])
			
 
				-                                order_end_year = int(order_end.split("-")[0])
			
 
				-                                # 限制附件错误识别时间
			
 
				-                                if order_begin_year >= 2050 or order_end_year >= 2050:
			
 
				-                                    order_begin = order_end = ""
			
 
				-                            # print(budget, order_time)
			
 
				-                            if budget != "" and order_time != "":
			
 
				-                                link = {'project_name': product, 'product': [], 'demand': demand, 'budget': budget,
			
 
				-                                        'order_begin': order_begin, 'order_end': order_end}
			
 
				-                                if link not in demand_link:
			
 
				-                                    demand_link.append(link)
			
 
				+                            # print('header_dic: ',header_dic)
			
 
				+                            id1 = header_dic.get('名称', "")
			
 
				+                            id2 = header_dic.get('数量', "")
			
 
				+                            id3 = header_dic.get('单价', "")
			
 
				+                            id4 = header_dic.get('品牌', "")
			
 
				+                            id5 = header_dic.get('规格', "")
			
 
				+
			
 
				+                            id6 = header_dic.get('需求', "")
			
 
				+                            id7 = header_dic.get('预算', "")
			
 
				+                            id8 = header_dic.get('时间', "")
			
 
				+                            if re.search('[a-zA-Z\u4e00-\u9fa5]', deal_list[id1]) and deal_list[id1] not in self.header_set and \
			
 
				+                                    re.search('备注|汇总|合计|总价|价格|金额|公司|附件|详见|无$|xxx', deal_list[id1]) == None:
			
 
				+                                product = deal_list[id1]
			
 
				+                                if id2 != "":
			
 
				+                                    if re.search('\d+|[壹贰叁肆伍陆柒捌玖拾一二三四五六七八九十]', deal_list[id2]):
			
 
				+                                        quantity = deal_list[id2]
			
 
				+                                    else:
			
 
				+                                        quantity = ""
			
 
				+                                if id3 != "":
			
 
				+                                    if re.search('\d+|[零壹贰叁肆伍陆柒捌玖拾佰仟萬億十百千万亿元角分]{3,}', deal_list[id3]):
			
 
				+                                        _unitPrice = deal_list[id3]
			
 
				+                                        re_price = re.findall("[零壹贰叁肆伍陆柒捌玖拾佰仟萬億圆十百千万亿元角分]{3,}|\d[\d,]*(?:\.\d+)?万?",_unitPrice)
			
 
				+                                        if re_price:
			
 
				+                                            _unitPrice = re_price[0]
			
 
				+                                            if '万元' in header_list[2] and '万' not in _unitPrice:
			
 
				+                                                _unitPrice += '万元'
			
 
				+                                            unitPrice = str(getUnifyMoney(_unitPrice))
			
 
				+                                if id4 != "":
			
 
				+                                    if re.search('\w', deal_list[id4]):
			
 
				+                                        brand = deal_list[id4]
			
 
				+                                    else:
			
 
				+                                        brand = ""
			
 
				+                                if id5 != "":
			
 
				+                                    if re.search('\w', deal_list[id5]):
			
 
				+                                        specs = deal_list[id5]
			
 
				+                                    else:
			
 
				+                                        specs = ""
			
 
				+                                if id6 != "":
			
 
				+                                    if re.search('\w', deal_list[id6]):
			
 
				+                                        demand = deal_list[id6]
			
 
				+                                    else:
			
 
				+                                        demand = ""
			
 
				+                                if id7 != "":
			
 
				+                                    if re.search('\d+|[零壹贰叁肆伍陆柒捌玖拾佰仟萬億十百千万亿元角分]{3,}', deal_list[id7]):
			
 
				+                                        _budget = deal_list[id7]
			
 
				+                                        re_price = re.findall("[零壹贰叁肆伍陆柒捌玖拾佰仟萬億圆十百千万亿元角分]{3,}|\d[\d,]*(?:\.\d+)?万?",_budget)
			
 
				+                                        if re_price:
			
 
				+                                            _budget = re_price[0]
			
 
				+                                            if '万元' in header_list2[2] and '万' not in _budget:
			
 
				+                                                _budget += '万元'
			
 
				+                                            budget = str(getUnifyMoney(_budget))
			
 
				+
			
 
				+                                if id8 != "":
			
 
				+                                    if re.search('\w', deal_list[id8]):
			
 
				+                                        order_time = deal_list[id8].strip()
			
 
				+                                        order_begin, order_end = self.fix_time(order_time, html, page_time)
			
 
				+                                # print(quantity,unitPrice,brand,specs)
			
 
				+                                if quantity != "" or unitPrice != "" or brand != "" or specs != "":
			
 
				+                                    link = {'product': product, 'quantity': quantity, 'unitPrice': unitPrice,
			
 
				+                                            'brand': brand[:50], 'specs': specs}
			
 
				+                                    if link not in product_link:
			
 
				+                                        product_link.append(link)
			
 
				+                                        # mat = re.match('([0-9.,]+)[(（]?\w{,3}[)）]?$', link['quantity'])
			
 
				+                                        # if link['unitPrice'] != "" and mat:
			
 
				+                                        #     try:
			
 
				+                                        #         total_product_money += float(link['unitPrice']) * float(
			
 
				+                                        #             mat.group(1).replace(',', ''))
			
 
				+                                        #     except:
			
 
				+                                        #         log('产品属性单价数量相乘出错, 单价： %s, 数量： %s' % (
			
 
				+                                        #         link['unitPrice'], link['quantity']))
			
 
				+                                if order_begin != "" and order_end != "":
			
 
				+                                    order_begin_year = int(order_begin.split("-")[0])
			
 
				+                                    order_end_year = int(order_end.split("-")[0])
			
 
				+                                    # 限制附件错误识别时间
			
 
				+                                    if order_begin_year >= 2050 or order_end_year >= 2050:
			
 
				+                                        order_begin = order_end = ""
			
 
				+                                # print(budget, order_time)
			
 
				+                                if budget != "" and order_time != "":
			
 
				+                                    link = {'project_name': product, 'product': [], 'demand': demand, 'budget': budget,
			
 
				+                                            'order_begin': order_begin, 'order_end': order_end}
			
 
				+                                    if link not in demand_link:
			
 
				+                                        demand_link.append(link)
			
 
				 
			
 
				                     if len(product_link) > 0:
			
 
				                         attr_dic = {'product_attrs': {'data': product_link, 'header': list(set(headers)), 'header_col': list(set(header_col))}}
			
--- a/BiddingKG/dl/ratio/re_ratio.py
+++ b/BiddingKG/dl/ratio/re_ratio.py
@@ -1,14 +1,16 @@
 
				 import re
			
 
				-
			
 
				+from decimal import Decimal
			
 
				 # ratio = '([（(]?(上浮|下浮)(率|)(报价|)([(（]?%[）)]?|)[)）]?[：: ，]{0,3}[0-9]+.?[0-9]*[(（]?%?[）)]?)'
			
 
				 # ratio = '(([（(]?(上浮|下浮)费?(率|)(报价|)[)）]?|([中投]标|报价|总价)?费率|折扣率)([(（]?%[）)]?|)[）)]?[为：: ，]{0,3}[0-9]+\.?[0-9]{0,3}[(（]?%?[）)]?)'
			
 
				-ratio = re.compile('(([（(]?(上浮|下浮)费?(率|)(报价|)[)）]?|([中投]标|报价|总价)?费率|折扣率)([(（]?%[）)]?|)[）)]?[为：: ，]{0,3}[0-9]+\.?[0-9]{0,3}[(（]?%?[）)]?'
			
 
				-                   '|[0-9]+\.?[0-9]{0,3}[(（]?%?[）)]?[(（]?(费率|折扣率|(上浮|下浮)费?率)[）)]?)')
			
 
				-ratio = ratio.pattern
			
 
				 
			
 
				+ratio = re.compile('(([（(]?(上浮|下浮)费?(率|)(报价|)[)）]?|([中投]标|报价|总价)?费率|折扣率|优惠率)([(（]?[%‰][）)]?|)(报价|取值|)([(（].{1,20}[）)])?[）)]?[为是：: ，]{0,3}'
			
 
				+                   '([0-9]{1,2}(?:\.[0-9]+)?[(（]?[%‰]?[）)]?|[百千]分之[零壹贰叁肆伍陆柒捌玖拾一二三四五六七八九十]+(?:点[零壹贰叁肆伍陆柒捌玖拾一二三四五六七八九十]+)?)'
			
 
				+                   '|[0-9]{1,2}(?:\.[0-9]+)?[(（]?[%‰][）)]?[(（]?[\u4e00-\u9fa5]{,2}(?:费率|折扣率|优惠率|(上浮|下浮)费?率)[）)]?)')
			
 
				+ratio = ratio.pattern
			
 
				+# print(ratio)
			
 
				 
			
 
				-# 基准利率上浮率）：大写：百分之叁拾点零零，小写：30.00%，
			
 
				-# 基准利率上浮率：百分之三十（30%）
			
 
				+# 基准利率上浮率）：大写：百分之叁拾点零零，小写：30.00%， X
			
 
				+# 基准利率上浮率：百分之三十（30%） X
			
 
				 # 租金上浮率
			
 
				 # 上浮率活期20%
			
 
				 # 上浮率：活期20%、一年定期35%
			
@@ -25,7 +27,12 @@ def re_standard_ratio(_str):
 
				             m_span = m.span()
			
 
				             keyword_index = [m_span[0], m_span[1]]
			
 
				             keyword = m_dict.get("value")
			
 
				-            ratio_list.append([keyword, keyword_index])
			
 
				+            left = _str[max(0,m_span[0]-15):m_span[0]]
			
 
				+            right = _str[m_span[1]:m_span[1]+10]
			
 
				+            context = left + keyword + right
			
 
				+            print(1,keyword)
			
 
				+            if not re.search("利率",context) and not re.search("^[万元]",right):
			
 
				+                ratio_list.append([keyword, keyword_index])
			
 
				 
			
 
				     return ratio_list
			
 
				 
			
@@ -39,20 +46,172 @@ def re_ratio(text):
 
				 def extract_ratio(text):
			
 
				     result_list = []
			
 
				     total_money_list = re_ratio(text)
			
 
				+    # print(total_money_list)
			
 
				     if total_money_list:
			
 
				         for word, text_index in total_money_list:
			
 
				-            d = {"body": word, "begin_index": text_index[0],
			
 
				-                 "end_index": text_index[1]}
			
 
				-            result_list.append(d)
			
 
				+            num_value = re.search("\d+(?:\.\d+)?[(（]?[%‰]?|[零壹贰叁肆伍陆柒捌玖拾佰百一二三四五六七八九十]+(?:点[零壹贰叁肆伍陆柒捌玖拾一二三四五六七八九十]+)?(?!分之)", word).group()
			
 
				+            if re.search("[零壹贰叁肆伍陆柒捌玖拾佰一二三四五六七八九十]",num_value):
			
 
				+                if '点' in num_value:
			
 
				+                    num_split = num_value.split("点")
			
 
				+                    round_len = len(num_split[1])
			
 
				+                    num_integer = num_split[0]
			
 
				+                    if re.search("^[十拾佰百]",num_integer):
			
 
				+                        num_integer = "壹" + num_integer
			
 
				+                    num_value = getUnifyNum(num_integer)
			
 
				+                    for index,num_word in enumerate(list(num_split[1])):
			
 
				+                        num_value = float(num_value) + getDigitsDic(num_word) * 0.1**(index+1)
			
 
				+                else:
			
 
				+                    round_len = 0
			
 
				+                    num_value = getUnifyNum(num_value)
			
 
				+                    num_value = float(num_value)
			
 
				+                if re.search("%|百分之", word):
			
 
				+                    num_value = num_value / 100
			
 
				+                    round_len += 2
			
 
				+                elif re.search("‰|千分之", word):
			
 
				+                    num_value = num_value / 1000
			
 
				+                    round_len += 3
			
 
				+            else:
			
 
				+                # if not re.search("[%‰]",word):
			
 
				+                #     continue
			
 
				+                match_text = num_value
			
 
				+                num_value = float(re.sub('[(（]|[%‰]','',num_value))
			
 
				+                _decimal = str(num_value).split('.')[1]
			
 
				+                if _decimal == '0':
			
 
				+                    round_len = 0
			
 
				+                else:
			
 
				+                    round_len = len(_decimal)
			
 
				+                if num_value<1 and not re.search('[%‰]',match_text):
			
 
				+                    pass
			
 
				+
			
 
				+                else:
			
 
				+                    if re.search("%|百分之",word):
			
 
				+                        num_value = num_value / 100
			
 
				+                        round_len += 2
			
 
				+                    elif re.search("‰|千分之",word):
			
 
				+                        num_value = num_value / 1000
			
 
				+                        round_len += 3
			
 
				+                    else:
			
 
				+                        num_value = num_value / 100
			
 
				+                        round_len += 2
			
 
				+
			
 
				+            num_value = round(num_value, round_len)
			
 
				+            # print(word,num_value)
			
 
				+            if re.search("上浮",word):
			
 
				+                ratio_type = 'floating_ratio'
			
 
				+            elif re.search("下浮|优惠",word):
			
 
				+                ratio_type = 'downward_floating_ratio'
			
 
				+            elif re.search("折扣",word):
			
 
				+                if num_value>0.5:
			
 
				+                    ratio_type = 'discount_ratio'
			
 
				+                else:
			
 
				+                    ratio_type = 'downward_floating_ratio'
			
 
				+            else:
			
 
				+                ratio_type = 'discount_ratio'
			
 
				+            if num_value<=1:
			
 
				+                d = {"body": word, "begin_index": text_index[0],
			
 
				+                     "end_index": text_index[1],"value":num_value,"type":ratio_type}
			
 
				+                result_list.append(d)
			
 
				     return result_list
			
 
				 
			
 
				 
			
 
				+def getDigitsDic(unit):
			
 
				+    '''
			
 
				+    @summary:拿到中文对应的数字
			
 
				+    '''
			
 
				+    DigitsDic = {"零": 0, "壹": 1, "贰": 2, "叁": 3, "肆": 4, "伍": 5, "陆": 6, "柒": 7, "捌": 8, "玖": 9,
			
 
				+                 "〇": 0, "一": 1, "二": 2, "三": 3, "四": 4, "五": 5, "六": 6, "七": 7, "八": 8, "九": 9}
			
 
				+    return DigitsDic.get(unit)
			
 
				+
			
 
				+
			
 
				+def getMultipleFactor(unit):
			
 
				+    '''
			
 
				+    @summary:拿到单位对应的值
			
 
				+    '''
			
 
				+    MultipleFactor = {"兆": Decimal(1000000000000), "亿": Decimal(100000000), "万": Decimal(10000), "仟": Decimal(1000),
			
 
				+                      "千": Decimal(1000), "佰": Decimal(100), "百": Decimal(100), "拾": Decimal(10), "十": Decimal(10),
			
 
				+                      "元": Decimal(1), "圆": Decimal(1), "角": round(Decimal(0.1), 1), "分": round(Decimal(0.01), 2)}
			
 
				+    return MultipleFactor.get(unit)
			
 
				+
			
 
				+
			
 
				+def getUnifyNum(money):
			
 
				+    '''
			
 
				+    @summary:将中文金额字符串转换为数字金额
			
 
				+    @param:
			
 
				+        money:中文金额字符串
			
 
				+    @return: decimal,数据金额
			
 
				+    '''
			
 
				+
			
 
				+    MAX_MONEY = 1000000000000
			
 
				+    MAX_NUM = 12
			
 
				+    # 去掉逗号
			
 
				+    money = re.sub("[，,]", "", money)
			
 
				+    money = re.sub("[^0-9.一二三四五六七八九零壹贰叁肆伍陆柒捌玖拾佰仟萬億圆十百千万亿元角分]", "", money)
			
 
				+    result = Decimal(0)
			
 
				+    chnDigits = ["零", "壹", "贰", "叁", "肆", "伍", "陆", "柒", "捌", "玖","一","二","三","四","五","六","七","八","九"]
			
 
				+    chnFactorUnits = ["圆", "元", "兆", "亿", "万", "仟", "佰", "拾", "角", "分", '十', '百', '千']
			
 
				+
			
 
				+    LowMoneypattern = re.compile("^[\d,]+(\.\d+)?$")
			
 
				+    BigMoneypattern = re.compile("^零?(?P<BigMoney>[%s])$" % ("".join(chnDigits)))
			
 
				+    try:
			
 
				+        if re.search(LowMoneypattern, money) is not None:
			
 
				+            return Decimal(money)
			
 
				+        elif re.search(BigMoneypattern, money) is not None:
			
 
				+            return getDigitsDic(re.search(BigMoneypattern, money).group("BigMoney"))
			
 
				+        for factorUnit in chnFactorUnits:
			
 
				+            if re.search(re.compile(".*%s.*" % (factorUnit)), money) is not None:
			
 
				+                subMoneys = re.split(re.compile("%s(?!.*%s.*)" % (factorUnit, factorUnit)), money)
			
 
				+                if re.search(re.compile("^(\d+)(\.\d+)?$"), subMoneys[0]) is not None:
			
 
				+                    if MAX_MONEY / getMultipleFactor(factorUnit) < Decimal(subMoneys[0]):
			
 
				+                        return Decimal(0)
			
 
				+                    result += Decimal(subMoneys[0]) * (getMultipleFactor(factorUnit))
			
 
				+                elif len(subMoneys[0]) == 1:
			
 
				+                    if re.search(re.compile("^[%s]$" % ("".join(chnDigits))), subMoneys[0]) is not None:
			
 
				+                        result += Decimal(getDigitsDic(subMoneys[0])) * (getMultipleFactor(factorUnit))
			
 
				+                # subMoneys[0]中无金额单位，不可再拆分
			
 
				+                elif subMoneys[0] == "":
			
 
				+                    result += 0
			
 
				+                elif re.search(re.compile("[%s]" % ("".join(chnFactorUnits))), subMoneys[0]) is None:
			
 
				+                    # print(subMoneys)
			
 
				+                    # subMoneys[0] = subMoneys[0][0]
			
 
				+                    result += Decimal(getUnifyNum(subMoneys[0])) * (getMultipleFactor(factorUnit))
			
 
				+                else:
			
 
				+                    result += Decimal(getUnifyNum(subMoneys[0])) * (getMultipleFactor(factorUnit))
			
 
				+                if len(subMoneys) > 1:
			
 
				+                    if re.search(re.compile("^(\d+(,)?)+(\.\d+)?[百千万亿]?\s?(元)?$"), subMoneys[1]) is not None:
			
 
				+                        result += Decimal(subMoneys[1])
			
 
				+                    elif len(subMoneys[1]) == 1:
			
 
				+                        if re.search(re.compile("^[%s]$" % ("".join(chnDigits))), subMoneys[1]) is not None:
			
 
				+                            result += Decimal(getDigitsDic(subMoneys[1]))
			
 
				+                    else:
			
 
				+                        result += Decimal(getUnifyNum(subMoneys[1]))
			
 
				+                break
			
 
				+    except Exception as e:
			
 
				+        # traceback.print_exc()
			
 
				+        return Decimal(0)
			
 
				+    return result
			
 
				+
			
 
				+
			
 
				 def test_str():
			
 
				     s = '政府采购项目招标方式：公开招标，联系人：黎明。代理机构地址：广州市天河区'
			
 
				     s = '年利率较基准利率的上浮率（%）： 30 活期存款下浮率：0.455% 协定存的下浮率，（1-下浮率）' \
			
 
				         ' 上浮率....  上浮率30（%）  (下浮率%):43  下浮率报价0.5%'
			
 
				-    s = '费率或单价等：报价：94.00%， 幕墙工程费率为25.08%， 投标成本警戒费率为90%， 下浮率3.15%'
			
 
				-
			
 
				+    s = '''费率%）61.20万
			
 
				+费率（精确到小数点后两位）60.00%
			
 
				+下浮率取值13%
			
 
				+下浮率报价13%
			
 
				+下浮率 百分之十点零陆(10.00%
			
 
				+下浮率 大写:无 下浮率百分之贰拾陆 无 小写: 下浮26%
			
 
				+下浮率% 30
			
 
				+成交优惠率% 5.00
			
 
				+下浮率 0.25
			
 
				+下浮率 0.25%
			
 
				+中标金额：57.75%（商业优惠率）
			
 
				+费率）:1800
			
 
				+费率）:12
			
 
				+折扣率（%）：99.2063
			
 
				+投标报价：96.00%（折扣率
			
 
				+'''
			
 
				+    # s = '下浮率 百分之十点零陆(10.00%'
			
 
				     print(extract_ratio(s))
			
 
				 
			
 
				 
			
--- a/BiddingKG/dl/table_head/predict.py
+++ b/BiddingKG/dl/table_head/predict.py