transform.py 1.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960
  1. #!/usr/bin/python
  2. #coding=utf-8
  3. #Program:
  4. # transform the full name in pos* files into short name
  5. #Date:
  6. # 2016-3-16
  7. from entity_match import *
  8. ENTITY_FILE = "./company_full_short.csv"
  9. entity_dict = loaddict1(ENTITY_FILE)
  10. def loaddict1(filename):
  11. dict = {}
  12. file = open(filename, "r")
  13. file.readline()
  14. for line in file.readlines():
  15. coms = line.split()
  16. full = coms[0]
  17. short = coms[1]
  18. dict[full] = short
  19. return dict
  20. def transformFile(filename, dict):
  21. comp = ['总公司','公司','有限','集团','股份','投资','发展','责任','合伙','销售','合作']
  22. symbol = ['(',')','《','》','(',')']
  23. fin = open(filename, "r")
  24. #fout = open(filename.split('.')[0] + ".out.csv", "w")
  25. for line in fin.readlines():
  26. coms = line.split(",")
  27. com1 = coms[0]
  28. com2 = coms[1]
  29. #for word in comp:
  30. # com1 = com1.replace(word, '');
  31. # com2 = com2.replace(word, '');
  32. #for word in symbol:
  33. # com1 = com1.replace(word, '');
  34. # com2 = com2.replace(word, '');
  35. try:
  36. com1 = link(com1, entity_dict)
  37. if com1 == None or com1 == '':
  38. continue
  39. except:
  40. pass
  41. for c in com2.split(','):
  42. try:
  43. c = link(c, entity_dict)
  44. if c == None or c == '':
  45. continue
  46. except:
  47. pass
  48. print(com1+','+c)
  49. #fout.write((com1 + "," + c))
  50. #fout.close()