luojiehua
/
BIDI_ML_INFO_EXTRACTION


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960
							import re

ratio = '((上浮|下浮)(率|).{0,2}[0-9.]+%)'


def re_standard_ratio(_str):
    reg_standard = "(?P<value>" + ratio + ")"
    match = re.finditer(reg_standard, _str)
    ratio_list = []
    if match:
        for m in match:
            m_dict = m.groupdict()
            m_span = m.span()
            keyword_index = [m_span[0], m_span[1]]
            keyword = m_dict.get("value")
            ratio_list.append([keyword, keyword_index])

    return ratio_list


def re_ratio(text):
    # 查找符合标准形式的 总价
    ratio_list = re_standard_ratio(text)
    return ratio_list


def extract_ratio(text):
    result_list = []
    total_money_list = re_ratio(text)
    if total_money_list:
        for word, text_index in total_money_list:
            d = {"body": word, "begin_index": text_index[0],
                 "end_index": text_index[1]}
            result_list.append(d)
    return result_list


def test_str():
    s = '政府采购项目招标方式：公开招标，联系人：黎明。代理机构地址：广州市天河区'
    s = '年利率较基准利率的上浮率：30% 活期存款年利率：0.455% 协定存'
    print(extract_ratio(s))


def test_html():
    html_path = "C:/Users/Administrator/Desktop/3.html"

    with open(html_path, "r") as f:
        s = f.read()

    print(extract_ratio(s))


if __name__ == "__main__":
    # extract_bidway(s)

    # path = "D:\\BIDI_DOC\\比地_文档\\比率_result.csv"
    test_str()
    # test_html(path)
    pass