123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960 |
- #!/usr/bin/python
- #coding=utf-8
- #Program:
- # transform the full name in pos* files into short name
- #Date:
- # 2016-3-16
- from entity_match import *
- ENTITY_FILE = "./company_full_short.csv"
- entity_dict = loaddict1(ENTITY_FILE)
- def loaddict1(filename):
- dict = {}
- file = open(filename, "r")
- file.readline()
- for line in file.readlines():
- coms = line.split()
- full = coms[0]
- short = coms[1]
- dict[full] = short
- return dict
- def transformFile(filename, dict):
- comp = ['总公司','公司','有限','集团','股份','投资','发展','责任','合伙','销售','合作']
- symbol = ['(',')','《','》','(',')']
- fin = open(filename, "r")
- #fout = open(filename.split('.')[0] + ".out.csv", "w")
- for line in fin.readlines():
- coms = line.split(",")
- com1 = coms[0]
- com2 = coms[1]
-
- #for word in comp:
- # com1 = com1.replace(word, '');
- # com2 = com2.replace(word, '');
- #for word in symbol:
- # com1 = com1.replace(word, '');
- # com2 = com2.replace(word, '');
-
-
-
- try:
- com1 = link(com1, entity_dict)
- if com1 == None or com1 == '':
- continue
- except:
- pass
- for c in com2.split(','):
- try:
- c = link(c, entity_dict)
- if c == None or c == '':
- continue
- except:
- pass
-
- print(com1+','+c)
- #fout.write((com1 + "," + c))
- #fout.close()
|