re_ratio.py 1.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960
  1. import re
  2. ratio = '((上浮|下浮)(率|).{0,2}[0-9.]+%)'
  3. def re_standard_ratio(_str):
  4. reg_standard = "(?P<value>" + ratio + ")"
  5. match = re.finditer(reg_standard, _str)
  6. ratio_list = []
  7. if match:
  8. for m in match:
  9. m_dict = m.groupdict()
  10. m_span = m.span()
  11. keyword_index = [m_span[0], m_span[1]]
  12. keyword = m_dict.get("value")
  13. ratio_list.append([keyword, keyword_index])
  14. return ratio_list
  15. def re_ratio(text):
  16. # 查找符合标准形式的 总价
  17. ratio_list = re_standard_ratio(text)
  18. return ratio_list
  19. def extract_ratio(text):
  20. result_list = []
  21. total_money_list = re_ratio(text)
  22. if total_money_list:
  23. for word, text_index in total_money_list:
  24. d = {"body": word, "begin_index": text_index[0],
  25. "end_index": text_index[1]}
  26. result_list.append(d)
  27. return result_list
  28. def test_str():
  29. s = '政府采购项目招标方式:公开招标,联系人:黎明。代理机构地址:广州市天河区'
  30. s = '年利率较基准利率的上浮率:30% 活期存款年利率:0.455% 协定存'
  31. print(extract_ratio(s))
  32. def test_html():
  33. html_path = "C:/Users/Administrator/Desktop/3.html"
  34. with open(html_path, "r") as f:
  35. s = f.read()
  36. print(extract_ratio(s))
  37. if __name__ == "__main__":
  38. # extract_bidway(s)
  39. # path = "D:\\BIDI_DOC\\比地_文档\\比率_result.csv"
  40. test_str()
  41. # test_html(path)
  42. pass