testEntityLink.py 1.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950
  1. #coding:utf8
  2. import fool
  3. import numpy as np
  4. from keras.layers.wrappers import TimeDistributed
  5. a = ["广东省南方电网集团有限责任公司陕西分公司","广东南网","南网"]
  6. print(set(a))
  7. class EntityLink():
  8. def __init__(self):
  9. companyUnitList = ["有限公司","公司","有限责任公司","有限","责任","集团","分公司"]
  10. posUnitList = ["ns"]
  11. self.companyUnit = set(companyUnitList)
  12. self.posUnit = set(posUnitList)
  13. def removeUnit(self,list):
  14. print(np.shape(list))
  15. assert np.shape(list)[0]==2
  16. result = []
  17. for i in range(2):
  18. l_item = []
  19. for item in list[i]:
  20. if item[0] not in self.companyUnit:
  21. if item[1] not in self.posUnit:
  22. l_item.append(item[0])
  23. result.append(l_item)
  24. return result
  25. def getCompability(self,list):
  26. list = self.removeUnit(list)
  27. set1 = set("".join(list[0]))
  28. set2 = set("".join(list[1]))
  29. print(list)
  30. print(set1)
  31. print(set2)
  32. common = set1&set2
  33. compability = []
  34. compability.append(len(common)/len(set1))
  35. compability.append(len(common)/len(set2))
  36. return max(compability)
  37. el = EntityLink()
  38. pos = fool.pos_cut(a)
  39. print(el.getCompability(fool.pos_cut(a)))