#coding:utf8 import fool import numpy as np from keras.layers.wrappers import TimeDistributed a = ["广东省南方电网集团有限责任公司陕西分公司","广东南网","南网"] print(set(a)) class EntityLink(): def __init__(self): companyUnitList = ["有限公司","公司","有限责任公司","有限","责任","集团","分公司"] posUnitList = ["ns"] self.companyUnit = set(companyUnitList) self.posUnit = set(posUnitList) def removeUnit(self,list): print(np.shape(list)) assert np.shape(list)[0]==2 result = [] for i in range(2): l_item = [] for item in list[i]: if item[0] not in self.companyUnit: if item[1] not in self.posUnit: l_item.append(item[0]) result.append(l_item) return result def getCompability(self,list): list = self.removeUnit(list) set1 = set("".join(list[0])) set2 = set("".join(list[1])) print(list) print(set1) print(set2) common = set1&set2 compability = [] compability.append(len(common)/len(set1)) compability.append(len(common)/len(set2)) return max(compability) el = EntityLink() pos = fool.pos_cut(a) print(el.getCompability(fool.pos_cut(a)))