#coding:utf8 import re from decimal import * import logging logging.basicConfig(level = logging.INFO,format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) def log(msg): logger.info(msg) def find_index(list,text): for item in list: index = text.find(item) if index>=0: return index return -1 def get_index_in_tokens(index,tokes): length = 0 i = 0 for item in tokes: if length>=index: return i length += len(item) i += 1 return -1 def combine(list1,list2): result = [] for item1 in list1: for item2 in list2: result.append(str(item1)+str(item2)) return result def getUnifyMoney(money): #print(money) money = re.sub("[,,]","",money) result = Decimal(0) chnDigits = ["零", "壹", "贰", "叁", "肆", "伍", "陆", "柒", "捌", "玖"] chnFactorUnits = ["兆", "亿", "万", "仟", "佰", "拾","元","角","分"] lcChnDigits = ["〇", "一", "二", "三", "四", "五", "六", "七", "八", "九"] lcChnFactorUnits = ["兆", "亿", "万", "千", "百", "十","元","角","分"] DigitsDic = {"零":0, "壹":1, "贰":2, "叁":3, "肆":4, "伍":5, "陆":6, "柒":7, "捌":8, "玖":9, "〇":0, "一":1, "二":2, "三":3, "四":4, "五":5, "六":6, "七":7, "八":8, "九":9} MultipleFactor = {"兆":Decimal(1000000000000),"亿":Decimal(100000000),"万":Decimal(10000),"仟":Decimal(1000),"千":Decimal(1000),"佰":Decimal(100),"百":Decimal(100),"拾":Decimal(10),"十":Decimal(10),"元":Decimal(1),"角":round(Decimal(0.1),1),"分":round(Decimal(0.01),2)} LowMoneypattern = re.compile("^(\d+,?)+(\.\d+)?$") BigMoneypattern = re.compile("^[%s]$"%("".join(chnDigits))) if re.search(LowMoneypattern,money) is not None: return Decimal(money) elif re.search(BigMoneypattern,money) is not None: return DigitsDic.get(money) for factorUnit in chnFactorUnits: if re.search(re.compile(".*%s.*"%(factorUnit)),money) is not None: subMoneys = re.split(re.compile("%s(?!.*%s.*)"%(factorUnit,factorUnit)),money) if re.search(re.compile("^(\d+(,)?)+(\.\d+)?$"),subMoneys[0]) is not None: result += Decimal(subMoneys[0])*(MultipleFactor.get(factorUnit)) elif len(subMoneys[0])==1: if re.search(re.compile("^[%s]$"%("".join(chnDigits))),subMoneys[0]) is not None: result += Decimal(DigitsDic.get(subMoneys[0]))*(MultipleFactor.get(factorUnit)) else: result += Decimal(getUnifyMoney(subMoneys[0]))*(MultipleFactor.get(factorUnit)) if len(subMoneys)>1: if re.search(re.compile("^(\d+(,)?)+(\.\d+)?[百千万亿]?\s?(元)?$"),subMoneys[1]) is not None: result += Decimal(subMoneys[1]) elif len(subMoneys[1])==1: if re.search(re.compile("^[%s]$"%("".join(chnDigits))),subMoneys[1]) is not None: result += Decimal(DigitsDic.get(subMoneys[1])) else: result += Decimal(getUnifyMoney(subMoneys[1])) break return result def getDigitsDic(unit): DigitsDic = {"零":0, "壹":1, "贰":2, "叁":3, "肆":4, "伍":5, "陆":6, "柒":7, "捌":8, "玖":9, "〇":0, "一":1, "二":2, "三":3, "四":4, "五":5, "六":6, "七":7, "八":8, "九":9} return DigitsDic.get(unit) def getMultipleFactor(unit): MultipleFactor = {"兆":Decimal(1000000000000),"亿":100000000,"万":10000,"仟":1000,"千":1000,"佰":100,"百":100,"拾":10,"十":10,"元":1,"角":0.1,"分":0.01} return MultipleFactor.get(unit) def getUnifyMoney2(money1): chnDigits = ["零", "壹", "贰", "叁", "肆", "伍", "陆", "柒", "捌", "玖"] chnFactorUnits = ["兆", "亿", "万", "仟", "佰", "拾","元","角","分"] DigitsDic = {"零":0, "壹":1, "贰":2, "叁":3, "肆":4, "伍":5, "陆":6, "柒":7, "捌":8, "玖":9, "〇":0, "一":1, "二":2, "三":3, "四":4, "五":5, "六":6, "七":7, "八":8, "九":9} MultipleFactor = {"兆":float(1000000000000),"亿":100000000,"万":10000,"仟":1000,"千":1000,"佰":100,"百":100,"拾":10,"十":10,"元":1,"角":0.1,"分":0.01} LowMoneypattern = re.compile("(\d+,?)+(\.\d+)?") BigMoneypattern = re.compile("^[%s]$"%("".join(chnDigits))) if re.search(LowMoneypattern,money1) is not None: result = money1 else: money = re.sub(re.compile("[^%s]"%("".join(chnDigits)+"".join(chnFactorUnits))),'',money1) result = 0 index = 0 last_Factor_index = -1 last_factor = -1 while(index