2 rokov pred · bba127d991
--- a/BiddingKG/dl/interface/Entitys.py
+++ b/BiddingKG/dl/interface/Entitys.py
@@ -204,6 +204,7 @@ class Entity():
 
															         self.origin_entity_text = ''  # 2022/1/5 新增，记录字典替换的原来的实体名
														
 
															         self.in_attachment = in_attachment  # 2022/02/10添加，实体是否在附件中
														
 
															         self.prob = prob  # 2022/06/20添加，实体的概率
														
 
															+        self.ratio_value = None # 2022/10/18 新增费率处理数据，(value,ratio_type) 费率数值,类型
														
 
															     def set_Role(self,role_label,role_values):
														
 
															         self.label = int(role_label)
														
@@ -294,7 +295,7 @@ class Role():
 
															         self.linklist = linklist
														
 
															         self.money_unit = '' # 2021/8/17 新增 保存金额单位
														
 
															         # 中投标人属性
														
 
															-        self.ratio = "" #2022/01/06 新增 保存中投标金额相关费率
														
 
															+        self.ratio = None #2022/01/06 新增 保存中投标金额相关费率 (ratio_value,ratio_type)
														
 
															         self.serviceTime = "" #2021/01/06 新增 保存服务期限(工期)
														
 
															         self.address = ""  #2022/08/08 新增 角色地址
														
@@ -307,17 +308,35 @@ class Role():
 
															         downward_floating_ratio = "" # 下浮率
														
 
															         discount_ratio = "" # 折扣率/费率
														
 
															         if self.ratio:
														
 
															-            num_value = re.search("[\d\.]+",self.ratio).group()
														
 
															-            num_value = float(num_value)
														
 
															-            if re.search("%|百分之",self.ratio):
														
 
															-                num_value = num_value / 100
														
 
															-            num_value = str('%.4f'%(num_value))
														
 
															-            if re.search("上浮",self.ratio):
														
 
															-                floating_ratio = num_value
														
 
															-            elif re.search("下浮",self.ratio):
														
 
															-                downward_floating_ratio = num_value
														
 
															-            else:
														
 
															-                discount_ratio = num_value
														
 
															+            # num_value = re.search("\d+(?:\.\d+)?",self.ratio).group()
														
 
															+            # num_value = float(num_value)
														
 
															+            # _decimal = str(num_value).split('.')[1]
														
 
															+            # if _decimal=='0':
														
 
															+            #     round_len = 0
														
 
															+            # else:
														
 
															+            #     round_len = len(_decimal)
														
 
															+            # if re.search("%|百分之",self.ratio):
														
 
															+            #     num_value = num_value * 0.01
														
 
															+            #     round_len += 2
														
 
															+            # elif re.search("‰|千分之",self.ratio):
														
 
															+            #     num_value = num_value * 0.001
														
 
															+            #     round_len += 3
														
 
															+            # num_value = str(round(num_value,round_len))
														
 
															+            #
														
 
															+            # if re.search("上浮",self.ratio):
														
 
															+            #     floating_ratio = num_value
														
 
															+            # elif re.search("下浮",self.ratio):
														
 
															+            #     downward_floating_ratio = num_value
														
 
															+            # else:
														
 
															+            #     discount_ratio = num_value
														
 
															+            ratio_type = self.ratio[1]
														
 
															+            ratio_value = str(self.ratio[0])
														
 
															+            if ratio_type=='floating_ratio':
														
 
															+                floating_ratio = ratio_value
														
 
															+            elif ratio_type=='downward_floating_ratio':
														
 
															+                downward_floating_ratio = ratio_value
														
 
															+            elif ratio_type=='discount_ratio':
														
 
															+                discount_ratio = ratio_value
														
 
															         result = {'role_name':self.role_name,'role_text':fitDataByRule(self.entity_text),
														
 
															                   'role_money': {'money':self.money,'money_unit':self.money_unit,'floating_ratio':floating_ratio,'downward_floating_ratio':downward_floating_ratio,'discount_ratio':discount_ratio},
														
 
															                   'linklist': self.linklist,'serviceTime':self.serviceTime,'address':self.address}
														
--- a/BiddingKG/dl/interface/Preprocessing.py
+++ b/BiddingKG/dl/interface/Preprocessing.py
@@ -1004,8 +1004,10 @@ def tableToText(soup):
 
															             for _tr in _tbody.find_all(recursive=False):
														
 
															                 len_td = len(_tr.find_all(recursive=False))
														
 
															                 _td_len_list.append(len_td)
														
 
															-            if len(list(set(_td_len_list)))>8:
														
 
															-                return None
														
 
															+            if _td_len_list:
														
 
															+                if len(list(set(_td_len_list)))>=8 or max(_td_len_list)>100:
														
 
															+                    return None
														
 
															+
														
 
															         fixSpan(tbody)
														
 
															         inner_table = getTable(tbody)
														
 
															         inner_table = fixTable(inner_table)
														
@@ -1060,12 +1062,20 @@ def tableToText(soup):
 
															     # 遍历表格中的每个tbody
														
 
															     tbodies = []
														
 
															     in_attachment = False
														
 
															+    tmp_part_list = []
														
 
															     for _part in soup.find_all():
														
 
															         if _part.name=='table':
														
 
															-            tbodies.append((_part,in_attachment))
														
 
															+            if _part in tmp_part_list:
														
 
															+                # 表格在合并的附件中
														
 
															+                tbodies.append((_part,True))
														
 
															+            else:
														
 
															+                tbodies.append((_part,in_attachment))
														
 
															         elif _part.name=='div':
														
 
															             if 'class' in _part.attrs and "richTextFetch" in _part['class']:
														
 
															                 in_attachment = True
														
 
															+            # 记录被合并到正文的附件信息 find_all
														
 
															+            if 'filemd5' in _part.attrs and in_attachment==False:
														
 
															+                tmp_part_list = _part.find_all()
														
 
															     #逆序处理嵌套表格
														
 
															     for tbody_index in range(1,len(tbodies)+1):
														
 
															         tbody,_in_attachment = tbodies[len(tbodies)-tbody_index]
														
@@ -2467,9 +2477,9 @@ def get_preprocessed_entitys(list_sentences,useselffool=True,cost_time=dict()):
 
															             #                       "front_m":"((?P<text_front_m>(?:[（\(]?\s*(?P<unit_front_m_before>[万元]+)\s*[）\)])\s*[,，:：]*(\s*[^壹贰叁肆伍陆柒捌玖拾佰仟萬億分万元]{,7}?))(?P<money_front_m>[0-9][\d,]*(?:\.\d+)?(?:，?)[百千万亿元]*)())",
														
 
															             #                       "behind_m":"(()()(?P<money_behind_m>[0-9][\d,，]*(?:\.\d+)?(?:，?)[百千万亿]*)[\(（]?(?P<unit_behind_m>[万元]+(?P<filter_unit3>[台个只]*))[\)）]?)"}
														
 
															             list_money_pattern = {"cn":"(()()(?P<money_cn>[零壹贰叁肆伍陆柒捌玖拾佰仟萬億圆十百千万亿元角分]{3,})())",
														
 
															-                                  "key_word": "((?P<text_key_word>(?:[￥¥]+，?|[单报标限总]价|金额|成交报?价|价格|预算(金额)?|(监理|设计|勘察)(服务)?费|标的基本情况|CNY|成交结果|成交额|中标额)(?:[,，（\(]*\s*(人民币)?(?P<unit_key_word_before>[万亿]?[美日欧]?元?(?P<filter_unit2>[台个只吨]*))\s*(/?费率)?(人民币)?[）\)]?)\s*[，,:：]*(\s*[^壹贰叁肆伍陆柒捌玖拾佰仟萬億分万元编号时间]{,8}?))(第[123一二三]名[：:])?(\d+(\*\d+%)+=)?(?P<money_key_word>[0-9][\d,]*(?:\.\d+)?(?:，?)[百千]{,1})(?:[（\(]?(?P<filter_>[%])*\s*(单位[:：])?(?P<unit_key_word_behind>[万亿]?[美日欧]?元?(?P<filter_unit1>[台只吨斤棵株页亩方条天]*))\s*[）\)]?))",
														
 
															-                                  "front_m":"((?P<text_front_m>(?:[（\(]?\s*(?P<unit_front_m_before>[万亿]?[美日欧]?元)\s*[）\)])\s*[,，:：]*(\s*[^壹贰叁肆伍陆柒捌玖拾佰仟萬億分万元]{,7}?))(?P<money_front_m>[0-9][\d,]*(?:\.\d+)?(?:，?)[百千]*)())",
														
 
															-                                  "behind_m":"(()()(?P<money_behind_m>[0-9][\d,]*(?:\.\d+)?(?:，?)[百千]*)(人民币)?[\(（]?(?P<unit_behind_m>[万亿]?[美日欧]?元(?P<filter_unit3>[台个只吨斤棵株页亩方条米]*))[\)）]?)"}
														
 
															+                                  "key_word": "((?P<text_key_word>(?:[￥¥]+，?|[单报标限总]价|金额|成交报?价|价格|预算(金额)?|(监理|设计|勘察)(服务)?费|标的基本情况|CNY|成交结果|成交额|中标额)(?:[,，（\(]*\s*(人民币)?(?P<unit_key_word_before>[万亿]?(?:[美日欧]元|元)?(?P<filter_unit2>[台个只吨]*))\s*(/?费率)?(人民币)?[）\)]?)\s*[，,:：]*(\s*[^壹贰叁肆伍陆柒捌玖拾佰仟萬億分万元编号时间]{,8}?))(第[123一二三]名[：:])?(\d+(\*\d+%)+=)?(?P<money_key_word>[0-9][\d,]*(?:\.\d+)?(?:，?)[百千]{,1})(?:[（\(]?(?P<filter_>[%])*\s*(单位[:：])?(?P<unit_key_word_behind>[万亿]?(?:[美日欧]元|元)?(?P<filter_unit1>[台只吨斤棵株页亩方条天]*))\s*[）\)]?))",
														
 
															+                                  "front_m":"((?P<text_front_m>(?:[（\(]?\s*(?P<unit_front_m_before>[万亿]?(?:[美日欧]元|元))\s*[）\)])\s*[,，:：]*(\s*[^壹贰叁肆伍陆柒捌玖拾佰仟萬億分万元]{,7}?))(?P<money_front_m>[0-9][\d,]*(?:\.\d+)?(?:，?)[百千]*)())",
														
 
															+                                  "behind_m":"(()()(?P<money_behind_m>[0-9][\d,]*(?:\.\d+)?(?:，?)[百千]*)(人民币)?[\(（]?(?P<unit_behind_m>[万亿]?(?:[美日欧]元|元)(?P<filter_unit3>[台个只吨斤棵株页亩方条米]*))[\)）]?)"}
														
 
															             # 2021/7/19 调整金额，单位提取正则，修复部分金额因为单位提取失败被过滤问题。
														
 
															             pattern_money = re.compile("%s|%s|%s|%s"%(list_money_pattern["cn"],list_money_pattern["key_word"],list_money_pattern["behind_m"],list_money_pattern["front_m"]))
														
@@ -2675,15 +2685,6 @@ def get_preprocessed_entitys(list_sentences,useselffool=True,cost_time=dict()):
 
															                         # print('过滤掉金额 notSure and unit=="" and float(entity_text)>100*10000：', entity_text, unit)
														
 
															                         continue
														
 
															-                    if re.search("美元",_match.group()):
														
 
															-                        Dollar2RMB = 7
														
 
															-                        entity_text = str(float(entity_text)*Dollar2RMB)
														
 
															-                    elif re.search("日元",_match.group()):
														
 
															-                        JPyen2RMB = 0.05
														
 
															-                        entity_text = str(float(entity_text)*JPyen2RMB)
														
 
															-                    elif re.search("欧元",_match.group()):
														
 
															-                        Euro2RMB = 6.9
														
 
															-                        entity_text = str(float(entity_text)*Euro2RMB)
														
 
															                     _exists = False
														
 
															                     for item in list_sentence_entitys:
														
@@ -2863,9 +2864,11 @@ def get_preprocessed_entitys(list_sentences,useselffool=True,cost_time=dict()):
 
															                         break
														
 
															                 entity_id = "%s_%d_%d_%d" % (doc_id, sentence_index, begin_index, end_index)
														
 
															                 entity_text = ratio['body']
														
 
															-                list_sentence_entitys.append(
														
 
															-                    Entity(doc_id, entity_id, entity_text, entity_type, sentence_index, begin_index, end_index,
														
 
															-                           begin_index_temp, end_index_temp,in_attachment=in_attachment))
														
 
															+                ratio_value = (ratio['value'],ratio['type'])
														
 
															+                _entity = Entity(doc_id, entity_id, entity_text, entity_type, sentence_index, begin_index, end_index,
														
 
															+                           begin_index_temp, end_index_temp,in_attachment=in_attachment)
														
 
															+                _entity.ratio_value = ratio_value
														
 
															+                list_sentence_entitys.append(_entity)
														
 
															             list_sentence_entitys.sort(key=lambda x:x.begin_index)
														
 
															             list_entitys_temp = list_entitys_temp+list_sentence_entitys
														
--- a/BiddingKG/dl/interface/getAttributes.py
+++ b/BiddingKG/dl/interface/getAttributes.py
@@ -31,6 +31,12 @@ dict_role_id = {"0":"tenderee",
 
															                 "3":"second_tenderer",
														
 
															                 "4":"third_tenderer"}
														
 
															+role2id_dict = {"tenderee":0,
														
 
															+                "agency":1,
														
 
															+                "win_tenderer":2,
														
 
															+                "second_tenderer":3,
														
 
															+                "third_tenderer":4}
														
 
															+
														
 
															 def getPackage(packageList,sentence_index,begin_index,roleid,MAX_DIS=None,DIRECT=None):
														
 
															     '''
														
 
															     @param:
														
@@ -851,7 +857,7 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
 
															     def addRatioByEntity(packDict,packageName,entity,ratio):
														
 
															         for i in range(len(packDict[packageName]["roleList"])):
														
 
															             if packDict[packageName]["roleList"][i].entity_text==entity:
														
 
															-                packDict[packageName]["roleList"][i].ratio = ratio.entity_text
														
 
															+                packDict[packageName]["roleList"][i].ratio = ratio.ratio_value
														
 
															     def addServiceTimeByEntity(packDict,packageName,entity,serviceTime):
														
 
															         for i in range(len(packDict[packageName]["roleList"])):
														
 
															             if packDict[packageName]["roleList"][i].entity_text==entity:
														
@@ -1253,8 +1259,9 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
 
															                        '0[1-9]\d{1,2}[-—－―]?[1-9]\d{6}\d?(?=[1-9]\d{6,7})|'
														
 
															                        '0[1-9]\d{1,2}[-—－―]?[1-9]\d{6}\d?|'
														
 
															                        '[\（|\(]0[1-9]\d{1,2}[\）|\)]-?\d{7,8}-?\d{,4}|'
														
 
															+                       '400\d{7}转\d{1,4}|'
														
 
															                        '[2-9]\d{6,7}')
														
 
															-    url_pattern = re.compile("http[s]?://(?:[a-zA-Z]|[0-9]|[$\-_@.&+=\?:/]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+")
														
 
															+    url_pattern = re.compile("http[s]?://(?:[a-zA-Z]|[0-9]|[#$\-_@.&+=\?:/]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+")
														
 
															     email_pattern = re.compile("[a-zA-Z0-9][a-zA-Z0-9_-]+(?:\.[a-zA-Z0-9_-]+)*@"
														
 
															                             "[a-zA-Z0-9_-]+(?:\.[a-zA-Z0-9_-]+)*(?:\.[a-zA-Z]{2,})")
														
 
															     phone_entitys = []
														
@@ -1308,7 +1315,7 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
 
															                 pass
														
 
															             else:
														
 
															                 # 排除“传真号”和其它错误项
														
 
															-                if re.search("传，?真|信，?箱|邮，?[箱件]|QQ|qq", phone_left):
														
 
															+                if re.search("传，?真|信，?箱|邮，?[编箱件]|QQ|qq", phone_left):
														
 
															                     if not re.search("电，?话", phone_left):
														
 
															                         error_numStr_index.append(numStr_index)
														
 
															                         last_phone_mask = False
														
@@ -1350,6 +1357,20 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
 
															                             error_numStr_index.append(numStr_index)
														
 
															                             last_phone_mask = False
														
 
															                             continue
														
 
															+                left_context = re.search("[\da-zA-Z\-—－―]+$",sentence_text[:item[1]])
														
 
															+                if left_context:
														
 
															+                    if len(left_context.group()) != len("".join(re.findall(phone, left_context.group()))):
														
 
															+                    # if not re.search("(" + phone.pattern + ")$", left_context.group()):
														
 
															+                        error_numStr_index.append(numStr_index)
														
 
															+                        last_phone_mask = False
														
 
															+                        continue
														
 
															+                right_context = re.search("^[\da-zA-Z\-—－―]+", sentence_text[item[2]:])
														
 
															+                if right_context:
														
 
															+                    if len(right_context.group()) != len("".join(re.findall(phone, right_context.group()))):
														
 
															+                    # if not re.search("^(" + phone.pattern + ")", right_context.group()):
														
 
															+                        error_numStr_index.append(numStr_index)
														
 
															+                        last_phone_mask = False
														
 
															+                        continue
														
 
															                 # if:上一个phone实体不符合条件
														
 
															                 if not last_phone_mask:
														
 
															                     item_start = item[1]
														
@@ -1525,52 +1546,58 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
 
															                                 break
														
 
															                 # print(3,combo[0].entity_text,combo[1].entity_text)
														
 
															-        # "公司——地址" 链接规则补充
														
 
															-        company_lacation_EntityList = [ent for ent in pre_entity if ent.entity_type in ['company', 'org', 'location']]
														
 
															-        company_lacation_EntityList = sorted(company_lacation_EntityList, key=lambda x: (x.sentence_index, x.begin_index))
														
 
															-        t_match_list = []
														
 
															-        for ent_idx in range(len(company_lacation_EntityList)):
														
 
															-            entity = company_lacation_EntityList[ent_idx]
														
 
															-            if entity.entity_type in ['company', 'org']:
														
 
															-                match_nums = 0
														
 
															-                company_nums = 0  # 经过其他公司的数量
														
 
															-                location_nums = 0  # 经过电话的数量
														
 
															-                for after_index in range(ent_idx + 1, min(len(company_lacation_EntityList), ent_idx + 5)):
														
 
															-                    after_entity = company_lacation_EntityList[after_index]
														
 
															-                    if after_entity.entity_type == "location":
														
 
															-                        distance = (tokens_num_dict[after_entity.sentence_index] + after_entity.begin_index) - (
														
 
															-                                tokens_num_dict[entity.sentence_index] + entity.end_index)
														
 
															-                        location_nums += 1
														
 
															-                        if distance > 100 or location_nums >= 3:
														
 
															-                            break
														
 
															-                        sentence_distance = after_entity.sentence_index - entity.sentence_index
														
 
															-                        value = (-1 / 2 * (distance ** 2)) / 10000
														
 
															-                        if sentence_distance == 0:
														
 
															-                            if distance < 80:
														
 
															-                                t_match_list.append(Match(entity, after_entity, value))
														
 
															-                                match_nums += 1
														
 
															-                                if company_nums:
														
 
															-                                    break
														
 
															-                        else:
														
 
															-                            if distance < 50:
														
 
															-                                t_match_list.append(Match(entity, after_entity, value))
														
 
															-                                match_nums += 1
														
 
															-                                if company_nums:
														
 
															-                                    break
														
 
															+    # "公司——地址" 链接规则补充
														
 
															+    company_lacation_EntityList = [ent for ent in pre_entity if ent.entity_type in ['company', 'org', 'location']]
														
 
															+    # company_lacation_EntityList = [ent for ent in pre_entity if (ent.entity_type in ['company', 'org'] and ent.label!=5) or ent.entity_type=="location"]
														
 
															+    company_lacation_EntityList = sorted(company_lacation_EntityList, key=lambda x: (x.sentence_index, x.begin_index))
														
 
															+    t_match_list = []
														
 
															+    for ent_idx in range(len(company_lacation_EntityList)):
														
 
															+        entity = company_lacation_EntityList[ent_idx]
														
 
															+        if entity.entity_type in ['company', 'org'] and entity.label!=5:
														
 
															+            match_nums = 0
														
 
															+            company_nums = 0  # 经过其他公司的数量
														
 
															+            location_nums = 0  # 经过电话的数量
														
 
															+            for after_index in range(ent_idx + 1, min(len(company_lacation_EntityList), ent_idx + 5)):
														
 
															+                after_entity = company_lacation_EntityList[after_index]
														
 
															+                if after_entity.entity_type == "location":
														
 
															+                    distance = (tokens_num_dict[after_entity.sentence_index] + after_entity.begin_index) - (
														
 
															+                            tokens_num_dict[entity.sentence_index] + entity.end_index)
														
 
															+                    location_nums += 1
														
 
															+                    if distance > 100 or location_nums >= 3:
														
 
															+                        break
														
 
															+                    sentence_distance = after_entity.sentence_index - entity.sentence_index
														
 
															+                    value = (-1 / 2 * (distance ** 2)) / 10000
														
 
															+                    if sentence_distance == 0:
														
 
															+                        if distance < 80:
														
 
															+                            t_match_list.append(Match(entity, after_entity, value))
														
 
															+                            match_nums += 1
														
 
															+                            if company_nums:
														
 
															+                                break
														
 
															                     else:
														
 
															-                        # type:company/org
														
 
															-                        company_nums += 1
														
 
															-                        if entity.label in [2, 3, 4] and after_entity.label in [0, 1]:
														
 
															-                            break
														
 
															+                        if distance < 50:
														
 
															+                            t_match_list.append(Match(entity, after_entity, value))
														
 
															+                            match_nums += 1
														
 
															+                            if company_nums:
														
 
															+                                break
														
 
															+                else:
														
 
															+                    # type:company/org
														
 
															+                    company_nums += 1
														
 
															+                    if entity.label in [2, 3, 4] and after_entity.label in [0, 1]:
														
 
															+                        break
														
 
															+                    if entity.label in [0, 1] and after_entity.label in [2, 3, 4]:
														
 
															+                        break
														
 
															-        # km算法分配求解
														
 
															-        relate_location_result = dispatch(t_match_list)
														
 
															-        relate_location_result = sorted(relate_location_result, key=lambda x: (x[0].sentence_index, x[0].begin_index))
														
 
															-        for match in relate_location_result:
														
 
															-            _company = match[0]
														
 
															-            _relation = match[1]
														
 
															-            if not _company.pointer_address:
														
 
															-                _company.pointer_address = _relation
														
 
															+    # km算法分配求解
														
 
															+    # for item in t_match_list:
														
 
															+    #     print("loc_rela",item.main_role.entity_text,item.attribute.entity_text)
														
 
															+    relate_location_result = dispatch(t_match_list)
														
 
															+    relate_location_result = sorted(relate_location_result, key=lambda x: (x[0].sentence_index, x[0].begin_index))
														
 
															+    for match in relate_location_result:
														
 
															+        _company = match[0]
														
 
															+        _relation = match[1]
														
 
															+        # print("loc_rela2", _company.entity_text, _relation.entity_text, )
														
 
															+        if not _company.pointer_address:
														
 
															+            _company.pointer_address = _relation
														
 
															     # "联系人——联系电话" 链接规则补充
														
 
															     person_phone_EntityList = [ent for ent in pre_entity+ phone_entitys if ent.entity_type not in ['company','org','location']]
														
 
															     person_phone_EntityList = sorted(person_phone_EntityList, key=lambda x: (x.sentence_index, x.begin_index))
														
@@ -2182,6 +2209,7 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
 
															                 PackDict[k]["roleList"][i].linklist.remove(_item)
														
 
															     # PackDict更新company/org地址
														
 
															+    last_role_prob = {}
														
 
															     for ent in pre_entity:
														
 
															         if ent.entity_type in ['company','org']:
														
 
															             if ent.pointer_address:
														
@@ -2190,9 +2218,16 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
 
															                         if PackDict[k]["roleList"][i].entity_text == ent.entity_text:
														
 
															                             if not PackDict[k]["roleList"][i].address:
														
 
															                                 PackDict[k]["roleList"][i].address = ent.pointer_address.entity_text
														
 
															+                                last_role_prob[PackDict[k]["roleList"][i].role_name] = ent.values[role2id_dict[PackDict[k]["roleList"][i].role_name]]
														
 
															                             else:
														
 
															-                                if len(ent.pointer_address.entity_text) > len(PackDict[k]["roleList"][i].address):
														
 
															-                                    PackDict[k]["roleList"][i].address = ent.pointer_address.entity_text
														
 
															+                                if PackDict[k]["roleList"][i].role_name in ['tenderee','agency']:
														
 
															+                                    # 角色为招标/代理人时，取其实体概率高的链接地址作为角色address
														
 
															+                                    if ent.values[role2id_dict[PackDict[k]["roleList"][i].role_name]] > last_role_prob[PackDict[k]["roleList"][i].role_name]:
														
 
															+                                        PackDict[k]["roleList"][i].address = ent.pointer_address.entity_text
														
 
															+                                        last_role_prob[PackDict[k]["roleList"][i].role_name] = ent.values[role2id_dict[PackDict[k]["roleList"][i].role_name]]
														
 
															+                                else:
														
 
															+                                    if len(ent.pointer_address.entity_text) > len(PackDict[k]["roleList"][i].address):
														
 
															+                                        PackDict[k]["roleList"][i].address = ent.pointer_address.entity_text
														
 
															     # 联系人——电子邮箱链接
														
 
															     temporary_list3 = [entity for entity in list_entity if entity.entity_type=='email' or (entity.entity_type=='person' and entity.label in [1,2,3])]
														
--- a/BiddingKG/dl/interface/predictor.py
+++ b/BiddingKG/dl/interface/predictor.py
@@ -1659,7 +1659,7 @@ class TendereeRuleRecall():
 
															                             self.get_tenderee = True
														
 
															                     else:
														
 
															                         if re.search('医院|学校|大学|中学|小学|幼儿园|政府|部|委员会|署|行|局|厅|处|室|科|股|站', ent.entity_text
														
 
															-                                     ) or not re.search('(采购|招标|投标|交易|代理|拍卖|咨询|顾问|管理)', ent.entity_text) or re.search("自行.?采购",list_sentences[0][ent.sentence_index]):
														
 
															+                                     ) or not re.search('(采购|招标|投标|交易|代理|拍卖|咨询|顾问|管理)', ent.entity_text) or re.search("自行.?采购",list_sentences[0][ent.sentence_index].sentence_text):
														
 
															                             ent.label = 0
														
 
															                             ent.values[0] = 0.5 + ent.values[0] / 10
														
 
															                             self.get_tenderee = True
														
@@ -2804,88 +2804,88 @@ class ProductAttributesPredictor():
 
															                             headers.append('_'.join(header_list))
														
 
															                             headers_demand.append('_'.join(header_list2))
														
 
															                             header_col.append('_'.join(tmp_head_list))
														
 
															-                        # print('header_dic: ',header_dic)
														
 
															-                        id1 = header_dic.get('名称', "")
														
 
															-                        id2 = header_dic.get('数量', "")
														
 
															-                        id3 = header_dic.get('单价', "")
														
 
															-                        id4 = header_dic.get('品牌', "")
														
 
															-                        id5 = header_dic.get('规格', "")
														
 
															-
														
 
															-                        id6 = header_dic.get('需求', "")
														
 
															-                        id7 = header_dic.get('预算', "")
														
 
															-                        id8 = header_dic.get('时间', "")
														
 
															-                        if re.search('[a-zA-Z\u4e00-\u9fa5]', deal_list[id1]) and deal_list[id1] not in self.header_set and \
														
 
															-                                re.search('备注|汇总|合计|总价|价格|金额|公司|附件|详见|无$|xxx', deal_list[id1]) == None:
														
 
															-                            product = deal_list[id1]
														
 
															-                            if id2 != "":
														
 
															-                                if re.search('\d+|[壹贰叁肆伍陆柒捌玖拾一二三四五六七八九十]', deal_list[id2]):
														
 
															-                                    quantity = deal_list[id2]
														
 
															-                                else:
														
 
															-                                    quantity = ""
														
 
															-                            if id3 != "":
														
 
															-                                if re.search('\d+|[零壹贰叁肆伍陆柒捌玖拾佰仟萬億十百千万亿元角分]{3,}', deal_list[id3]):
														
 
															-                                    _unitPrice = deal_list[id3]
														
 
															-                                    re_price = re.findall("[零壹贰叁肆伍陆柒捌玖拾佰仟萬億圆十百千万亿元角分]{3,}|\d[\d,]*(?:\.\d+)?万?",_unitPrice)
														
 
															-                                    if re_price:
														
 
															-                                        _unitPrice = re_price[0]
														
 
															-                                        if '万元' in header_list[2] and '万' not in _unitPrice:
														
 
															-                                            _unitPrice += '万元'
														
 
															-                                        unitPrice = str(getUnifyMoney(_unitPrice))
														
 
															-                            if id4 != "":
														
 
															-                                if re.search('\w', deal_list[id4]):
														
 
															-                                    brand = deal_list[id4]
														
 
															-                                else:
														
 
															-                                    brand = ""
														
 
															-                            if id5 != "":
														
 
															-                                if re.search('\w', deal_list[id5]):
														
 
															-                                    specs = deal_list[id5]
														
 
															-                                else:
														
 
															-                                    specs = ""
														
 
															-                            if id6 != "":
														
 
															-                                if re.search('\w', deal_list[id6]):
														
 
															-                                    demand = deal_list[id6]
														
 
															-                                else:
														
 
															-                                    demand = ""
														
 
															-                            if id7 != "":
														
 
															-                                if re.search('\d+|[零壹贰叁肆伍陆柒捌玖拾佰仟萬億十百千万亿元角分]{3,}', deal_list[id7]):
														
 
															-                                    _budget = deal_list[id7]
														
 
															-                                    re_price = re.findall("[零壹贰叁肆伍陆柒捌玖拾佰仟萬億圆十百千万亿元角分]{3,}|\d[\d,]*(?:\.\d+)?万?",_budget)
														
 
															-                                    if re_price:
														
 
															-                                        _budget = re_price[0]
														
 
															-                                        if '万元' in header_list2[2] and '万' not in _budget:
														
 
															-                                            _budget += '万元'
														
 
															-                                        budget = str(getUnifyMoney(_budget))
														
 
															-
														
 
															-                            if id8 != "":
														
 
															-                                if re.search('\w', deal_list[id8]):
														
 
															-                                    order_time = deal_list[id8].strip()
														
 
															-                                    order_begin, order_end = self.fix_time(order_time, html, page_time)
														
 
															-                            # print(quantity,unitPrice,brand,specs)
														
 
															-                            if quantity != "" or unitPrice != "" or brand != "" or specs != "":
														
 
															-                                link = {'product': product, 'quantity': quantity, 'unitPrice': unitPrice,
														
 
															-                                        'brand': brand[:50], 'specs': specs}
														
 
															-                                if link not in product_link:
														
 
															-                                    product_link.append(link)
														
 
															-                                    # mat = re.match('([0-9.,]+)[(（]?\w{,3}[)）]?$', link['quantity'])
														
 
															-                                    # if link['unitPrice'] != "" and mat:
														
 
															-                                    #     try:
														
 
															-                                    #         total_product_money += float(link['unitPrice']) * float(
														
 
															-                                    #             mat.group(1).replace(',', ''))
														
 
															-                                    #     except:
														
 
															-                                    #         log('产品属性单价数量相乘出错, 单价： %s, 数量： %s' % (
														
 
															-                                    #         link['unitPrice'], link['quantity']))
														
 
															-                            if order_begin != "" and order_end != "":
														
 
															-                                order_begin_year = int(order_begin.split("-")[0])
														
 
															-                                order_end_year = int(order_end.split("-")[0])
														
 
															-                                # 限制附件错误识别时间
														
 
															-                                if order_begin_year >= 2050 or order_end_year >= 2050:
														
 
															-                                    order_begin = order_end = ""
														
 
															-                            # print(budget, order_time)
														
 
															-                            if budget != "" and order_time != "":
														
 
															-                                link = {'project_name': product, 'product': [], 'demand': demand, 'budget': budget,
														
 
															-                                        'order_begin': order_begin, 'order_end': order_end}
														
 
															-                                if link not in demand_link:
														
 
															-                                    demand_link.append(link)
														
 
															+                            # print('header_dic: ',header_dic)
														
 
															+                            id1 = header_dic.get('名称', "")
														
 
															+                            id2 = header_dic.get('数量', "")
														
 
															+                            id3 = header_dic.get('单价', "")
														
 
															+                            id4 = header_dic.get('品牌', "")
														
 
															+                            id5 = header_dic.get('规格', "")
														
 
															+
														
 
															+                            id6 = header_dic.get('需求', "")
														
 
															+                            id7 = header_dic.get('预算', "")
														
 
															+                            id8 = header_dic.get('时间', "")
														
 
															+                            if re.search('[a-zA-Z\u4e00-\u9fa5]', deal_list[id1]) and deal_list[id1] not in self.header_set and \
														
 
															+                                    re.search('备注|汇总|合计|总价|价格|金额|公司|附件|详见|无$|xxx', deal_list[id1]) == None:
														
 
															+                                product = deal_list[id1]
														
 
															+                                if id2 != "":
														
 
															+                                    if re.search('\d+|[壹贰叁肆伍陆柒捌玖拾一二三四五六七八九十]', deal_list[id2]):
														
 
															+                                        quantity = deal_list[id2]
														
 
															+                                    else:
														
 
															+                                        quantity = ""
														
 
															+                                if id3 != "":
														
 
															+                                    if re.search('\d+|[零壹贰叁肆伍陆柒捌玖拾佰仟萬億十百千万亿元角分]{3,}', deal_list[id3]):
														
 
															+                                        _unitPrice = deal_list[id3]
														
 
															+                                        re_price = re.findall("[零壹贰叁肆伍陆柒捌玖拾佰仟萬億圆十百千万亿元角分]{3,}|\d[\d,]*(?:\.\d+)?万?",_unitPrice)
														
 
															+                                        if re_price:
														
 
															+                                            _unitPrice = re_price[0]
														
 
															+                                            if '万元' in header_list[2] and '万' not in _unitPrice:
														
 
															+                                                _unitPrice += '万元'
														
 
															+                                            unitPrice = str(getUnifyMoney(_unitPrice))
														
 
															+                                if id4 != "":
														
 
															+                                    if re.search('\w', deal_list[id4]):
														
 
															+                                        brand = deal_list[id4]
														
 
															+                                    else:
														
 
															+                                        brand = ""
														
 
															+                                if id5 != "":
														
 
															+                                    if re.search('\w', deal_list[id5]):
														
 
															+                                        specs = deal_list[id5]
														
 
															+                                    else:
														
 
															+                                        specs = ""
														
 
															+                                if id6 != "":
														
 
															+                                    if re.search('\w', deal_list[id6]):
														
 
															+                                        demand = deal_list[id6]
														
 
															+                                    else:
														
 
															+                                        demand = ""
														
 
															+                                if id7 != "":
														
 
															+                                    if re.search('\d+|[零壹贰叁肆伍陆柒捌玖拾佰仟萬億十百千万亿元角分]{3,}', deal_list[id7]):
														
 
															+                                        _budget = deal_list[id7]
														
 
															+                                        re_price = re.findall("[零壹贰叁肆伍陆柒捌玖拾佰仟萬億圆十百千万亿元角分]{3,}|\d[\d,]*(?:\.\d+)?万?",_budget)
														
 
															+                                        if re_price:
														
 
															+                                            _budget = re_price[0]
														
 
															+                                            if '万元' in header_list2[2] and '万' not in _budget:
														
 
															+                                                _budget += '万元'
														
 
															+                                            budget = str(getUnifyMoney(_budget))
														
 
															+
														
 
															+                                if id8 != "":
														
 
															+                                    if re.search('\w', deal_list[id8]):
														
 
															+                                        order_time = deal_list[id8].strip()
														
 
															+                                        order_begin, order_end = self.fix_time(order_time, html, page_time)
														
 
															+                                # print(quantity,unitPrice,brand,specs)
														
 
															+                                if quantity != "" or unitPrice != "" or brand != "" or specs != "":
														
 
															+                                    link = {'product': product, 'quantity': quantity, 'unitPrice': unitPrice,
														
 
															+                                            'brand': brand[:50], 'specs': specs}
														
 
															+                                    if link not in product_link:
														
 
															+                                        product_link.append(link)
														
 
															+                                        # mat = re.match('([0-9.,]+)[(（]?\w{,3}[)）]?$', link['quantity'])
														
 
															+                                        # if link['unitPrice'] != "" and mat:
														
 
															+                                        #     try:
														
 
															+                                        #         total_product_money += float(link['unitPrice']) * float(
														
 
															+                                        #             mat.group(1).replace(',', ''))
														
 
															+                                        #     except:
														
 
															+                                        #         log('产品属性单价数量相乘出错, 单价： %s, 数量： %s' % (
														
 
															+                                        #         link['unitPrice'], link['quantity']))
														
 
															+                                if order_begin != "" and order_end != "":
														
 
															+                                    order_begin_year = int(order_begin.split("-")[0])
														
 
															+                                    order_end_year = int(order_end.split("-")[0])
														
 
															+                                    # 限制附件错误识别时间
														
 
															+                                    if order_begin_year >= 2050 or order_end_year >= 2050:
														
 
															+                                        order_begin = order_end = ""
														
 
															+                                # print(budget, order_time)
														
 
															+                                if budget != "" and order_time != "":
														
 
															+                                    link = {'project_name': product, 'product': [], 'demand': demand, 'budget': budget,
														
 
															+                                            'order_begin': order_begin, 'order_end': order_end}
														
 
															+                                    if link not in demand_link:
														
 
															+                                        demand_link.append(link)
														
 
															                     if len(product_link) > 0:
														
 
															                         attr_dic = {'product_attrs': {'data': product_link, 'header': list(set(headers)), 'header_col': list(set(header_col))}}
														
--- a/BiddingKG/dl/ratio/re_ratio.py
+++ b/BiddingKG/dl/ratio/re_ratio.py
@@ -1,14 +1,16 @@
 
															 import re
														
 
															-
														
 
															+from decimal import Decimal
														
 
															 # ratio = '([（(]?(上浮|下浮)(率|)(报价|)([(（]?%[）)]?|)[)）]?[：: ，]{0,3}[0-9]+.?[0-9]*[(（]?%?[）)]?)'
														
 
															 # ratio = '(([（(]?(上浮|下浮)费?(率|)(报价|)[)）]?|([中投]标|报价|总价)?费率|折扣率)([(（]?%[）)]?|)[）)]?[为：: ，]{0,3}[0-9]+\.?[0-9]{0,3}[(（]?%?[）)]?)'
														
 
															-ratio = re.compile('(([（(]?(上浮|下浮)费?(率|)(报价|)[)）]?|([中投]标|报价|总价)?费率|折扣率)([(（]?%[）)]?|)[）)]?[为：: ，]{0,3}[0-9]+\.?[0-9]{0,3}[(（]?%?[）)]?'
														
 
															-                   '|[0-9]+\.?[0-9]{0,3}[(（]?%?[）)]?[(（]?(费率|折扣率|(上浮|下浮)费?率)[）)]?)')
														
 
															-ratio = ratio.pattern
														
 
															+ratio = re.compile('(([（(]?(上浮|下浮)费?(率|)(报价|)[)）]?|([中投]标|报价|总价)?费率|折扣率|优惠率)([(（]?[%‰][）)]?|)(报价|取值|)([(（].{1,20}[）)])?[）)]?[为是：: ，]{0,3}'
														
 
															+                   '([0-9]{1,2}(?:\.[0-9]+)?[(（]?[%‰]?[）)]?|[百千]分之[零壹贰叁肆伍陆柒捌玖拾一二三四五六七八九十]+(?:点[零壹贰叁肆伍陆柒捌玖拾一二三四五六七八九十]+)?)'
														
 
															+                   '|[0-9]{1,2}(?:\.[0-9]+)?[(（]?[%‰][）)]?[(（]?[\u4e00-\u9fa5]{,2}(?:费率|折扣率|优惠率|(上浮|下浮)费?率)[）)]?)')
														
 
															+ratio = ratio.pattern
														
 
															+# print(ratio)
														
 
															-# 基准利率上浮率）：大写：百分之叁拾点零零，小写：30.00%，
														
 
															-# 基准利率上浮率：百分之三十（30%）
														
 
															+# 基准利率上浮率）：大写：百分之叁拾点零零，小写：30.00%， X
														
 
															+# 基准利率上浮率：百分之三十（30%） X
														
 
															 # 租金上浮率
														
 
															 # 上浮率活期20%
														
 
															 # 上浮率：活期20%、一年定期35%
														
@@ -25,7 +27,12 @@ def re_standard_ratio(_str):
 
															             m_span = m.span()
														
 
															             keyword_index = [m_span[0], m_span[1]]
														
 
															             keyword = m_dict.get("value")
														
 
															-            ratio_list.append([keyword, keyword_index])
														
 
															+            left = _str[max(0,m_span[0]-15):m_span[0]]
														
 
															+            right = _str[m_span[1]:m_span[1]+10]
														
 
															+            context = left + keyword + right
														
 
															+            print(1,keyword)
														
 
															+            if not re.search("利率",context) and not re.search("^[万元]",right):
														
 
															+                ratio_list.append([keyword, keyword_index])
														
 
															     return ratio_list
														
@@ -39,20 +46,172 @@ def re_ratio(text):
 
															 def extract_ratio(text):
														
 
															     result_list = []
														
 
															     total_money_list = re_ratio(text)
														
 
															+    # print(total_money_list)
														
 
															     if total_money_list:
														
 
															         for word, text_index in total_money_list:
														
 
															-            d = {"body": word, "begin_index": text_index[0],
														
 
															-                 "end_index": text_index[1]}
														
 
															-            result_list.append(d)
														
 
															+            num_value = re.search("\d+(?:\.\d+)?[(（]?[%‰]?|[零壹贰叁肆伍陆柒捌玖拾佰百一二三四五六七八九十]+(?:点[零壹贰叁肆伍陆柒捌玖拾一二三四五六七八九十]+)?(?!分之)", word).group()
														
 
															+            if re.search("[零壹贰叁肆伍陆柒捌玖拾佰一二三四五六七八九十]",num_value):
														
 
															+                if '点' in num_value:
														
 
															+                    num_split = num_value.split("点")
														
 
															+                    round_len = len(num_split[1])
														
 
															+                    num_integer = num_split[0]
														
 
															+                    if re.search("^[十拾佰百]",num_integer):
														
 
															+                        num_integer = "壹" + num_integer
														
 
															+                    num_value = getUnifyNum(num_integer)
														
 
															+                    for index,num_word in enumerate(list(num_split[1])):
														
 
															+                        num_value = float(num_value) + getDigitsDic(num_word) * 0.1**(index+1)
														
 
															+                else:
														
 
															+                    round_len = 0
														
 
															+                    num_value = getUnifyNum(num_value)
														
 
															+                    num_value = float(num_value)
														
 
															+                if re.search("%|百分之", word):
														
 
															+                    num_value = num_value / 100
														
 
															+                    round_len += 2
														
 
															+                elif re.search("‰|千分之", word):
														
 
															+                    num_value = num_value / 1000
														
 
															+                    round_len += 3
														
 
															+            else:
														
 
															+                # if not re.search("[%‰]",word):
														
 
															+                #     continue
														
 
															+                match_text = num_value
														
 
															+                num_value = float(re.sub('[(（]|[%‰]','',num_value))
														
 
															+                _decimal = str(num_value).split('.')[1]
														
 
															+                if _decimal == '0':
														
 
															+                    round_len = 0
														
 
															+                else:
														
 
															+                    round_len = len(_decimal)
														
 
															+                if num_value<1 and not re.search('[%‰]',match_text):
														
 
															+                    pass
														
 
															+
														
 
															+                else:
														
 
															+                    if re.search("%|百分之",word):
														
 
															+                        num_value = num_value / 100
														
 
															+                        round_len += 2
														
 
															+                    elif re.search("‰|千分之",word):
														
 
															+                        num_value = num_value / 1000
														
 
															+                        round_len += 3
														
 
															+                    else:
														
 
															+                        num_value = num_value / 100
														
 
															+                        round_len += 2
														
 
															+
														
 
															+            num_value = round(num_value, round_len)
														
 
															+            # print(word,num_value)
														
 
															+            if re.search("上浮",word):
														
 
															+                ratio_type = 'floating_ratio'
														
 
															+            elif re.search("下浮|优惠",word):
														
 
															+                ratio_type = 'downward_floating_ratio'
														
 
															+            elif re.search("折扣",word):
														
 
															+                if num_value>0.5:
														
 
															+                    ratio_type = 'discount_ratio'
														
 
															+                else:
														
 
															+                    ratio_type = 'downward_floating_ratio'
														
 
															+            else:
														
 
															+                ratio_type = 'discount_ratio'
														
 
															+            if num_value<=1:
														
 
															+                d = {"body": word, "begin_index": text_index[0],
														
 
															+                     "end_index": text_index[1],"value":num_value,"type":ratio_type}
														
 
															+                result_list.append(d)
														
 
															     return result_list
														
 
															+def getDigitsDic(unit):
														
 
															+    '''
														
 
															+    @summary:拿到中文对应的数字
														
 
															+    '''
														
 
															+    DigitsDic = {"零": 0, "壹": 1, "贰": 2, "叁": 3, "肆": 4, "伍": 5, "陆": 6, "柒": 7, "捌": 8, "玖": 9,
														
 
															+                 "〇": 0, "一": 1, "二": 2, "三": 3, "四": 4, "五": 5, "六": 6, "七": 7, "八": 8, "九": 9}
														
 
															+    return DigitsDic.get(unit)
														
 
															+
														
 
															+
														
 
															+def getMultipleFactor(unit):
														
 
															+    '''
														
 
															+    @summary:拿到单位对应的值
														
 
															+    '''
														
 
															+    MultipleFactor = {"兆": Decimal(1000000000000), "亿": Decimal(100000000), "万": Decimal(10000), "仟": Decimal(1000),
														
 
															+                      "千": Decimal(1000), "佰": Decimal(100), "百": Decimal(100), "拾": Decimal(10), "十": Decimal(10),
														
 
															+                      "元": Decimal(1), "圆": Decimal(1), "角": round(Decimal(0.1), 1), "分": round(Decimal(0.01), 2)}
														
 
															+    return MultipleFactor.get(unit)
														
 
															+
														
 
															+
														
 
															+def getUnifyNum(money):
														
 
															+    '''
														
 
															+    @summary:将中文金额字符串转换为数字金额
														
 
															+    @param:
														
 
															+        money:中文金额字符串
														
 
															+    @return: decimal,数据金额
														
 
															+    '''
														
 
															+
														
 
															+    MAX_MONEY = 1000000000000
														
 
															+    MAX_NUM = 12
														
 
															+    # 去掉逗号
														
 
															+    money = re.sub("[，,]", "", money)
														
 
															+    money = re.sub("[^0-9.一二三四五六七八九零壹贰叁肆伍陆柒捌玖拾佰仟萬億圆十百千万亿元角分]", "", money)
														
 
															+    result = Decimal(0)
														
 
															+    chnDigits = ["零", "壹", "贰", "叁", "肆", "伍", "陆", "柒", "捌", "玖","一","二","三","四","五","六","七","八","九"]
														
 
															+    chnFactorUnits = ["圆", "元", "兆", "亿", "万", "仟", "佰", "拾", "角", "分", '十', '百', '千']
														
 
															+
														
 
															+    LowMoneypattern = re.compile("^[\d,]+(\.\d+)?$")
														
 
															+    BigMoneypattern = re.compile("^零?(?P<BigMoney>[%s])$" % ("".join(chnDigits)))
														
 
															+    try:
														
 
															+        if re.search(LowMoneypattern, money) is not None:
														
 
															+            return Decimal(money)
														
 
															+        elif re.search(BigMoneypattern, money) is not None:
														
 
															+            return getDigitsDic(re.search(BigMoneypattern, money).group("BigMoney"))
														
 
															+        for factorUnit in chnFactorUnits:
														
 
															+            if re.search(re.compile(".*%s.*" % (factorUnit)), money) is not None:
														
 
															+                subMoneys = re.split(re.compile("%s(?!.*%s.*)" % (factorUnit, factorUnit)), money)
														
 
															+                if re.search(re.compile("^(\d+)(\.\d+)?$"), subMoneys[0]) is not None:
														
 
															+                    if MAX_MONEY / getMultipleFactor(factorUnit) < Decimal(subMoneys[0]):
														
 
															+                        return Decimal(0)
														
 
															+                    result += Decimal(subMoneys[0]) * (getMultipleFactor(factorUnit))
														
 
															+                elif len(subMoneys[0]) == 1:
														
 
															+                    if re.search(re.compile("^[%s]$" % ("".join(chnDigits))), subMoneys[0]) is not None:
														
 
															+                        result += Decimal(getDigitsDic(subMoneys[0])) * (getMultipleFactor(factorUnit))
														
 
															+                # subMoneys[0]中无金额单位，不可再拆分
														
 
															+                elif subMoneys[0] == "":
														
 
															+                    result += 0
														
 
															+                elif re.search(re.compile("[%s]" % ("".join(chnFactorUnits))), subMoneys[0]) is None:
														
 
															+                    # print(subMoneys)
														
 
															+                    # subMoneys[0] = subMoneys[0][0]
														
 
															+                    result += Decimal(getUnifyNum(subMoneys[0])) * (getMultipleFactor(factorUnit))
														
 
															+                else:
														
 
															+                    result += Decimal(getUnifyNum(subMoneys[0])) * (getMultipleFactor(factorUnit))
														
 
															+                if len(subMoneys) > 1:
														
 
															+                    if re.search(re.compile("^(\d+(,)?)+(\.\d+)?[百千万亿]?\s?(元)?$"), subMoneys[1]) is not None:
														
 
															+                        result += Decimal(subMoneys[1])
														
 
															+                    elif len(subMoneys[1]) == 1:
														
 
															+                        if re.search(re.compile("^[%s]$" % ("".join(chnDigits))), subMoneys[1]) is not None:
														
 
															+                            result += Decimal(getDigitsDic(subMoneys[1]))
														
 
															+                    else:
														
 
															+                        result += Decimal(getUnifyNum(subMoneys[1]))
														
 
															+                break
														
 
															+    except Exception as e:
														
 
															+        # traceback.print_exc()
														
 
															+        return Decimal(0)
														
 
															+    return result
														
 
															+
														
 
															+
														
 
															 def test_str():
														
 
															     s = '政府采购项目招标方式：公开招标，联系人：黎明。代理机构地址：广州市天河区'
														
 
															     s = '年利率较基准利率的上浮率（%）： 30 活期存款下浮率：0.455% 协定存的下浮率，（1-下浮率）' \
														
 
															         ' 上浮率....  上浮率30（%）  (下浮率%):43  下浮率报价0.5%'
														
 
															-    s = '费率或单价等：报价：94.00%， 幕墙工程费率为25.08%， 投标成本警戒费率为90%， 下浮率3.15%'
														
 
															-
														
 
															+    s = '''费率%）61.20万
														
 
															+费率（精确到小数点后两位）60.00%
														
 
															+下浮率取值13%
														
 
															+下浮率报价13%
														
 
															+下浮率 百分之十点零陆(10.00%
														
 
															+下浮率 大写:无 下浮率百分之贰拾陆 无 小写: 下浮26%
														
 
															+下浮率% 30
														
 
															+成交优惠率% 5.00
														
 
															+下浮率 0.25
														
 
															+下浮率 0.25%
														
 
															+中标金额：57.75%（商业优惠率）
														
 
															+费率）:1800
														
 
															+费率）:12
														
 
															+折扣率（%）：99.2063
														
 
															+投标报价：96.00%（折扣率
														
 
															+'''
														
 
															+    # s = '下浮率 百分之十点零陆(10.00%'
														
 
															     print(extract_ratio(s))
														
--- a/BiddingKG/dl/table_head/predict.py
+++ b/BiddingKG/dl/table_head/predict.py