commonutil.py 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136
  1. #coding:utf8
  2. import re
  3. from decimal import *
  4. import logging
  5. logging.basicConfig(level = logging.INFO,format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
  6. logger = logging.getLogger(__name__)
  7. def log(msg):
  8. logger.info(msg)
  9. def find_index(list,text):
  10. for item in list:
  11. index = text.find(item)
  12. if index>=0:
  13. return index
  14. return -1
  15. def get_index_in_tokens(index,tokes):
  16. length = 0
  17. i = 0
  18. for item in tokes:
  19. if length>=index:
  20. return i
  21. length += len(item)
  22. i += 1
  23. return -1
  24. def combine(list1,list2):
  25. result = []
  26. for item1 in list1:
  27. for item2 in list2:
  28. result.append(str(item1)+str(item2))
  29. return result
  30. def getUnifyMoney(money):
  31. #print(money)
  32. money = re.sub("[,,]","",money)
  33. result = Decimal(0)
  34. chnDigits = ["零", "壹", "贰", "叁", "肆", "伍", "陆", "柒", "捌", "玖"]
  35. chnFactorUnits = ["兆", "亿", "万", "仟", "佰", "拾","元","角","分"]
  36. lcChnDigits = ["〇", "一", "二", "三", "四", "五", "六", "七", "八", "九"]
  37. lcChnFactorUnits = ["兆", "亿", "万", "千", "百", "十","元","角","分"]
  38. DigitsDic = {"零":0, "壹":1, "贰":2, "叁":3, "肆":4, "伍":5, "陆":6, "柒":7, "捌":8, "玖":9,
  39. "〇":0, "一":1, "二":2, "三":3, "四":4, "五":5, "六":6, "七":7, "八":8, "九":9}
  40. MultipleFactor = {"兆":Decimal(1000000000000),"亿":Decimal(100000000),"万":Decimal(10000),"仟":Decimal(1000),"千":Decimal(1000),"佰":Decimal(100),"百":Decimal(100),"拾":Decimal(10),"十":Decimal(10),"元":Decimal(1),"角":round(Decimal(0.1),1),"分":round(Decimal(0.01),2)}
  41. LowMoneypattern = re.compile("^(\d+,?)+(\.\d+)?$")
  42. BigMoneypattern = re.compile("^[%s]$"%("".join(chnDigits)))
  43. if re.search(LowMoneypattern,money) is not None:
  44. return Decimal(money)
  45. elif re.search(BigMoneypattern,money) is not None:
  46. return DigitsDic.get(money)
  47. for factorUnit in chnFactorUnits:
  48. if re.search(re.compile(".*%s.*"%(factorUnit)),money) is not None:
  49. subMoneys = re.split(re.compile("%s(?!.*%s.*)"%(factorUnit,factorUnit)),money)
  50. if re.search(re.compile("^(\d+(,)?)+(\.\d+)?$"),subMoneys[0]) is not None:
  51. result += Decimal(subMoneys[0])*(MultipleFactor.get(factorUnit))
  52. elif len(subMoneys[0])==1:
  53. if re.search(re.compile("^[%s]$"%("".join(chnDigits))),subMoneys[0]) is not None:
  54. result += Decimal(DigitsDic.get(subMoneys[0]))*(MultipleFactor.get(factorUnit))
  55. else:
  56. result += Decimal(getUnifyMoney(subMoneys[0]))*(MultipleFactor.get(factorUnit))
  57. if len(subMoneys)>1:
  58. if re.search(re.compile("^(\d+(,)?)+(\.\d+)?[百千万亿]?\s?(元)?$"),subMoneys[1]) is not None:
  59. result += Decimal(subMoneys[1])
  60. elif len(subMoneys[1])==1:
  61. if re.search(re.compile("^[%s]$"%("".join(chnDigits))),subMoneys[1]) is not None:
  62. result += Decimal(DigitsDic.get(subMoneys[1]))
  63. else:
  64. result += Decimal(getUnifyMoney(subMoneys[1]))
  65. break
  66. return result
  67. def getDigitsDic(unit):
  68. DigitsDic = {"零":0, "壹":1, "贰":2, "叁":3, "肆":4, "伍":5, "陆":6, "柒":7, "捌":8, "玖":9,
  69. "〇":0, "一":1, "二":2, "三":3, "四":4, "五":5, "六":6, "七":7, "八":8, "九":9}
  70. return DigitsDic.get(unit)
  71. def getMultipleFactor(unit):
  72. MultipleFactor = {"兆":Decimal(1000000000000),"亿":100000000,"万":10000,"仟":1000,"千":1000,"佰":100,"百":100,"拾":10,"十":10,"元":1,"角":0.1,"分":0.01}
  73. return MultipleFactor.get(unit)
  74. def getUnifyMoney2(money1):
  75. chnDigits = ["零", "壹", "贰", "叁", "肆", "伍", "陆", "柒", "捌", "玖"]
  76. chnFactorUnits = ["兆", "亿", "万", "仟", "佰", "拾","元","角","分"]
  77. DigitsDic = {"零":0, "壹":1, "贰":2, "叁":3, "肆":4, "伍":5, "陆":6, "柒":7, "捌":8, "玖":9,
  78. "〇":0, "一":1, "二":2, "三":3, "四":4, "五":5, "六":6, "七":7, "八":8, "九":9}
  79. MultipleFactor = {"兆":float(1000000000000),"亿":100000000,"万":10000,"仟":1000,"千":1000,"佰":100,"百":100,"拾":10,"十":10,"元":1,"角":0.1,"分":0.01}
  80. LowMoneypattern = re.compile("(\d+,?)+(\.\d+)?")
  81. BigMoneypattern = re.compile("^[%s]$"%("".join(chnDigits)))
  82. if re.search(LowMoneypattern,money1) is not None:
  83. result = money1
  84. else:
  85. money = re.sub(re.compile("[^%s]"%("".join(chnDigits)+"".join(chnFactorUnits))),'',money1)
  86. result = 0
  87. index = 0
  88. last_Factor_index = -1
  89. last_factor = -1
  90. while(index<len(money)):
  91. if re.search("^[%s]$"%("".join(chnFactorUnits)),money[index]) is not None:
  92. factor_index = chnFactorUnits.index(money[index])
  93. if factor_index<last_Factor_index:
  94. factor = money[index]
  95. temp_result = 0
  96. for i in range(last_factor+1,index):
  97. if re.search("^[%s]$"%("".join(chnDigits)),money[i]) is not None:
  98. if i<index-1:
  99. if re.search("^[%s]$"%("".join(chnFactorUnits)),money[i+1]) is not None:
  100. temp_result += DigitsDic.get(money[i])
  101. else:
  102. temp_result += DigitsDic.get(money[i])
  103. result += temp_result*MultipleFactor.get(money[index])
  104. last_factor = index
  105. last_Factor_index = -1
  106. else:
  107. last_Factor_index = factor_index
  108. index += 1
  109. temp_result = 0
  110. for i in range(last_factor+1,index):
  111. if re.search("^[%s]$"%("".join(chnDigits)),money[i]) is not None:
  112. if i<index-1:
  113. if re.search("^[%s]$"%("".join(chnFactorUnits)),money[i+1]) is not None:
  114. temp_result *= MultipleFactor.get(money[i+1])
  115. else:
  116. temp_result += MultipleFactor.get(money[i])
  117. result += temp_result
  118. return str(result)