1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950 |
- #coding:utf8
- import fool
- import numpy as np
- from keras.layers.wrappers import TimeDistributed
- a = ["广东省南方电网集团有限责任公司陕西分公司","广东南网","南网"]
- print(set(a))
- class EntityLink():
-
- def __init__(self):
- companyUnitList = ["有限公司","公司","有限责任公司","有限","责任","集团","分公司"]
- posUnitList = ["ns"]
- self.companyUnit = set(companyUnitList)
- self.posUnit = set(posUnitList)
- def removeUnit(self,list):
- print(np.shape(list))
- assert np.shape(list)[0]==2
- result = []
- for i in range(2):
- l_item = []
- for item in list[i]:
- if item[0] not in self.companyUnit:
- if item[1] not in self.posUnit:
- l_item.append(item[0])
- result.append(l_item)
- return result
-
- def getCompability(self,list):
- list = self.removeUnit(list)
- set1 = set("".join(list[0]))
- set2 = set("".join(list[1]))
- print(list)
- print(set1)
- print(set2)
- common = set1&set2
- compability = []
- compability.append(len(common)/len(set1))
- compability.append(len(common)/len(set2))
- return max(compability)
-
- el = EntityLink()
- pos = fool.pos_cut(a)
- print(el.getCompability(fool.pos_cut(a)))
|