1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768 |
- import re
- ratio = '([((]?(上浮|下浮)(率|)(报价|)([((]?%[))]?|)[))]?[:: ,]{0,3}[0-9]+.?[0-9]*[((]?%?[))]?)'
- # 基准利率上浮率):大写:百分之叁拾点零零,小写:30.00%,
- # 基准利率上浮率:百分之三十(30%)
- # 租金上浮率
- # 上浮率活期20%
- # 上浮率:活期20%、一年定期35%
- # 下浮率报价0.5%
- def re_standard_ratio(_str):
- reg_standard = "(?P<value>" + ratio + ")"
- match = re.finditer(reg_standard, _str)
- ratio_list = []
- if match:
- for m in match:
- m_dict = m.groupdict()
- m_span = m.span()
- keyword_index = [m_span[0], m_span[1]]
- keyword = m_dict.get("value")
- ratio_list.append([keyword, keyword_index])
- return ratio_list
- def re_ratio(text):
- # 查找符合标准形式的 总价
- ratio_list = re_standard_ratio(text)
- return ratio_list
- def extract_ratio(text):
- result_list = []
- total_money_list = re_ratio(text)
- if total_money_list:
- for word, text_index in total_money_list:
- d = {"body": word, "begin_index": text_index[0],
- "end_index": text_index[1]}
- result_list.append(d)
- return result_list
- def test_str():
- s = '政府采购项目招标方式:公开招标,联系人:黎明。代理机构地址:广州市天河区'
- s = '年利率较基准利率的上浮率(%): 30 活期存款下浮率:0.455% 协定存的下浮率,(1-下浮率)' \
- ' 上浮率.... 上浮率30(%) (下浮率%):43 下浮率报价0.5%'
- print(extract_ratio(s))
- def test_html():
- html_path = "C:/Users/Administrator/Desktop/3.html"
- with open(html_path, "r") as f:
- s = f.read()
- print(extract_ratio(s))
- if __name__ == "__main__":
- # extract_bidway(s)
- # path = "D:\\BIDI_DOC\\比地_文档\\比率_result.csv"
- test_str()
- # test_html(path)
- pass
|