123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136 |
- #coding:utf8
- import re
- from decimal import *
- import logging
- logging.basicConfig(level = logging.INFO,format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
- logger = logging.getLogger(__name__)
- def log(msg):
- logger.info(msg)
- def find_index(list,text):
- for item in list:
- index = text.find(item)
- if index>=0:
- return index
- return -1
- def get_index_in_tokens(index,tokes):
- length = 0
- i = 0
- for item in tokes:
- if length>=index:
- return i
- length += len(item)
- i += 1
- return -1
- def combine(list1,list2):
- result = []
- for item1 in list1:
- for item2 in list2:
- result.append(str(item1)+str(item2))
- return result
- def getUnifyMoney(money):
- #print(money)
-
- money = re.sub("[,,]","",money)
- result = Decimal(0)
- chnDigits = ["零", "壹", "贰", "叁", "肆", "伍", "陆", "柒", "捌", "玖"]
- chnFactorUnits = ["兆", "亿", "万", "仟", "佰", "拾","元","角","分"]
- lcChnDigits = ["〇", "一", "二", "三", "四", "五", "六", "七", "八", "九"]
- lcChnFactorUnits = ["兆", "亿", "万", "千", "百", "十","元","角","分"]
-
- DigitsDic = {"零":0, "壹":1, "贰":2, "叁":3, "肆":4, "伍":5, "陆":6, "柒":7, "捌":8, "玖":9,
- "〇":0, "一":1, "二":2, "三":3, "四":4, "五":5, "六":6, "七":7, "八":8, "九":9}
-
- MultipleFactor = {"兆":Decimal(1000000000000),"亿":Decimal(100000000),"万":Decimal(10000),"仟":Decimal(1000),"千":Decimal(1000),"佰":Decimal(100),"百":Decimal(100),"拾":Decimal(10),"十":Decimal(10),"元":Decimal(1),"角":round(Decimal(0.1),1),"分":round(Decimal(0.01),2)}
-
- LowMoneypattern = re.compile("^(\d+,?)+(\.\d+)?$")
- BigMoneypattern = re.compile("^[%s]$"%("".join(chnDigits)))
- if re.search(LowMoneypattern,money) is not None:
- return Decimal(money)
- elif re.search(BigMoneypattern,money) is not None:
- return DigitsDic.get(money)
-
- for factorUnit in chnFactorUnits:
- if re.search(re.compile(".*%s.*"%(factorUnit)),money) is not None:
- subMoneys = re.split(re.compile("%s(?!.*%s.*)"%(factorUnit,factorUnit)),money)
- if re.search(re.compile("^(\d+(,)?)+(\.\d+)?$"),subMoneys[0]) is not None:
- result += Decimal(subMoneys[0])*(MultipleFactor.get(factorUnit))
- elif len(subMoneys[0])==1:
- if re.search(re.compile("^[%s]$"%("".join(chnDigits))),subMoneys[0]) is not None:
- result += Decimal(DigitsDic.get(subMoneys[0]))*(MultipleFactor.get(factorUnit))
- else:
- result += Decimal(getUnifyMoney(subMoneys[0]))*(MultipleFactor.get(factorUnit))
-
- if len(subMoneys)>1:
- if re.search(re.compile("^(\d+(,)?)+(\.\d+)?[百千万亿]?\s?(元)?$"),subMoneys[1]) is not None:
- result += Decimal(subMoneys[1])
- elif len(subMoneys[1])==1:
- if re.search(re.compile("^[%s]$"%("".join(chnDigits))),subMoneys[1]) is not None:
- result += Decimal(DigitsDic.get(subMoneys[1]))
- else:
- result += Decimal(getUnifyMoney(subMoneys[1]))
- break
- return result
- def getDigitsDic(unit):
- DigitsDic = {"零":0, "壹":1, "贰":2, "叁":3, "肆":4, "伍":5, "陆":6, "柒":7, "捌":8, "玖":9,
- "〇":0, "一":1, "二":2, "三":3, "四":4, "五":5, "六":6, "七":7, "八":8, "九":9}
- return DigitsDic.get(unit)
- def getMultipleFactor(unit):
- MultipleFactor = {"兆":Decimal(1000000000000),"亿":100000000,"万":10000,"仟":1000,"千":1000,"佰":100,"百":100,"拾":10,"十":10,"元":1,"角":0.1,"分":0.01}
- return MultipleFactor.get(unit)
-
- def getUnifyMoney2(money1):
- chnDigits = ["零", "壹", "贰", "叁", "肆", "伍", "陆", "柒", "捌", "玖"]
- chnFactorUnits = ["兆", "亿", "万", "仟", "佰", "拾","元","角","分"]
- DigitsDic = {"零":0, "壹":1, "贰":2, "叁":3, "肆":4, "伍":5, "陆":6, "柒":7, "捌":8, "玖":9,
- "〇":0, "一":1, "二":2, "三":3, "四":4, "五":5, "六":6, "七":7, "八":8, "九":9}
-
- MultipleFactor = {"兆":float(1000000000000),"亿":100000000,"万":10000,"仟":1000,"千":1000,"佰":100,"百":100,"拾":10,"十":10,"元":1,"角":0.1,"分":0.01}
- LowMoneypattern = re.compile("(\d+,?)+(\.\d+)?")
- BigMoneypattern = re.compile("^[%s]$"%("".join(chnDigits)))
- if re.search(LowMoneypattern,money1) is not None:
- result = money1
- else:
- money = re.sub(re.compile("[^%s]"%("".join(chnDigits)+"".join(chnFactorUnits))),'',money1)
- result = 0
- index = 0
- last_Factor_index = -1
- last_factor = -1
- while(index<len(money)):
- if re.search("^[%s]$"%("".join(chnFactorUnits)),money[index]) is not None:
- factor_index = chnFactorUnits.index(money[index])
- if factor_index<last_Factor_index:
- factor = money[index]
- temp_result = 0
- for i in range(last_factor+1,index):
- if re.search("^[%s]$"%("".join(chnDigits)),money[i]) is not None:
- if i<index-1:
- if re.search("^[%s]$"%("".join(chnFactorUnits)),money[i+1]) is not None:
- temp_result += DigitsDic.get(money[i])
- else:
- temp_result += DigitsDic.get(money[i])
- result += temp_result*MultipleFactor.get(money[index])
- last_factor = index
- last_Factor_index = -1
- else:
- last_Factor_index = factor_index
- index += 1
- temp_result = 0
- for i in range(last_factor+1,index):
- if re.search("^[%s]$"%("".join(chnDigits)),money[i]) is not None:
- if i<index-1:
- if re.search("^[%s]$"%("".join(chnFactorUnits)),money[i+1]) is not None:
- temp_result *= MultipleFactor.get(money[i+1])
- else:
- temp_result += MultipleFactor.get(money[i])
- result += temp_result
- return str(result)
|