re_ratio.py 2.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475
  1. import re
  2. # ratio = '([((]?(上浮|下浮)(率|)(报价|)([((]?%[))]?|)[))]?[:: ,]{0,3}[0-9]+.?[0-9]*[((]?%?[))]?)'
  3. # ratio = '(([((]?(上浮|下浮)费?(率|)(报价|)[))]?|([中投]标|报价|总价)?费率|折扣率)([((]?%[))]?|)[))]?[为:: ,]{0,3}[0-9]+\.?[0-9]{0,3}[((]?%?[))]?)'
  4. ratio = re.compile('(([((]?(上浮|下浮)费?(率|)(报价|)[))]?|([中投]标|报价|总价)?费率|折扣率)([((]?%[))]?|)[))]?[为:: ,]{0,3}[0-9]+\.?[0-9]{0,3}[((]?%?[))]?'
  5. '|[0-9]+\.?[0-9]{0,3}[((]?%?[))]?[((]?(费率|折扣率|(上浮|下浮)费?率)[))]?)')
  6. ratio = ratio.pattern
  7. # 基准利率上浮率):大写:百分之叁拾点零零,小写:30.00%,
  8. # 基准利率上浮率:百分之三十(30%)
  9. # 租金上浮率
  10. # 上浮率活期20%
  11. # 上浮率:活期20%、一年定期35%
  12. # 下浮率报价0.5%
  13. def re_standard_ratio(_str):
  14. reg_standard = "(?P<value>" + ratio + ")"
  15. match = re.finditer(reg_standard, _str)
  16. ratio_list = []
  17. if match:
  18. for m in match:
  19. m_dict = m.groupdict()
  20. m_span = m.span()
  21. keyword_index = [m_span[0], m_span[1]]
  22. keyword = m_dict.get("value")
  23. ratio_list.append([keyword, keyword_index])
  24. return ratio_list
  25. def re_ratio(text):
  26. # 查找符合标准形式的 总价
  27. ratio_list = re_standard_ratio(text)
  28. return ratio_list
  29. def extract_ratio(text):
  30. result_list = []
  31. total_money_list = re_ratio(text)
  32. if total_money_list:
  33. for word, text_index in total_money_list:
  34. d = {"body": word, "begin_index": text_index[0],
  35. "end_index": text_index[1]}
  36. result_list.append(d)
  37. return result_list
  38. def test_str():
  39. s = '政府采购项目招标方式:公开招标,联系人:黎明。代理机构地址:广州市天河区'
  40. s = '年利率较基准利率的上浮率(%): 30 活期存款下浮率:0.455% 协定存的下浮率,(1-下浮率)' \
  41. ' 上浮率.... 上浮率30(%) (下浮率%):43 下浮率报价0.5%'
  42. s = '费率或单价等:报价:94.00%, 幕墙工程费率为25.08%, 投标成本警戒费率为90%, 下浮率3.15%'
  43. print(extract_ratio(s))
  44. def test_html():
  45. html_path = "C:/Users/Administrator/Desktop/3.html"
  46. with open(html_path, "r") as f:
  47. s = f.read()
  48. print(extract_ratio(s))
  49. if __name__ == "__main__":
  50. # extract_bidway(s)
  51. # path = "D:\\BIDI_DOC\\比地_文档\\比率_result.csv"
  52. test_str()
  53. # test_html(path)
  54. pass