|
@@ -155,6 +155,148 @@ class Model_money_classify():
|
|
with self.graph.as_default():
|
|
with self.graph.as_default():
|
|
return self.getModel().predict([x[0],x[1],x[2]])
|
|
return self.getModel().predict([x[0],x[1],x[2]])
|
|
'''
|
|
'''
|
|
|
|
+
|
|
|
|
+from itertools import groupby
|
|
|
|
+from BiddingKG.dl.relation_extraction.model import get_words_matrix
|
|
|
|
+class Model_relation_extraction():
|
|
|
|
+ def __init__(self,lazyLoad=getLazyLoad()):
|
|
|
|
+ if USE_PAI_EAS:
|
|
|
|
+ lazyLoad = True
|
|
|
|
+ self.subject_model_file = os.path.dirname(__file__)+"/../relation_extraction/models2/subject_model"
|
|
|
|
+ self.object_model_file = os.path.dirname(__file__)+"/../relation_extraction/models2/object_model"
|
|
|
|
+ self.model_subject = None
|
|
|
|
+ self.model_object = None
|
|
|
|
+ self.sess_subject = tf.Session(graph=tf.Graph())
|
|
|
|
+ self.sess_object = tf.Session(graph=tf.Graph())
|
|
|
|
+ if not lazyLoad:
|
|
|
|
+ self.getModel1()
|
|
|
|
+ self.getModel2()
|
|
|
|
+ self.entity_type_dict = {
|
|
|
|
+ 'org': '<company/org>',
|
|
|
|
+ 'company': '<company/org>',
|
|
|
|
+ 'location': '<location>',
|
|
|
|
+ 'phone': '<phone>',
|
|
|
|
+ 'person': '<contact_person>'
|
|
|
|
+ }
|
|
|
|
+ self.id2predicate = {
|
|
|
|
+ 0: "rel_person", # 公司——联系人
|
|
|
|
+ 1: "rel_phone", # 联系人——电话
|
|
|
|
+ 2: "rel_address" # 公司——地址
|
|
|
|
+ }
|
|
|
|
+ self.words_size = 128
|
|
|
|
+
|
|
|
|
+ # subject_model
|
|
|
|
+ def getModel1(self):
|
|
|
|
+ if self.model_subject is None:
|
|
|
|
+ with self.sess_subject.as_default() as sess:
|
|
|
|
+ with sess.graph.as_default():
|
|
|
|
+ meta_graph_def = tf.saved_model.loader.load(sess,tags=["serve"],export_dir=self.subject_model_file)
|
|
|
|
+ signature_key = tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY
|
|
|
|
+ signature_def = meta_graph_def.signature_def
|
|
|
|
+ input0 = sess.graph.get_tensor_by_name(signature_def[signature_key].inputs["input0"].name)
|
|
|
|
+ input1 = sess.graph.get_tensor_by_name(signature_def[signature_key].inputs["input1"].name)
|
|
|
|
+ output = sess.graph.get_tensor_by_name(signature_def[signature_key].outputs["outputs"].name)
|
|
|
|
+ self.model_subject = [[input0,input1],output]
|
|
|
|
+ return self.model_subject
|
|
|
|
+ # object_model
|
|
|
|
+ def getModel2(self):
|
|
|
|
+ if self.model_object is None:
|
|
|
|
+ with self.sess_object.as_default() as sess:
|
|
|
|
+ with sess.graph.as_default():
|
|
|
|
+ meta_graph_def = tf.saved_model.loader.load(sess,tags=["serve"],export_dir=self.object_model_file)
|
|
|
|
+ signature_key = tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY
|
|
|
|
+ signature_def = meta_graph_def.signature_def
|
|
|
|
+ input0 = sess.graph.get_tensor_by_name(signature_def[signature_key].inputs["input0"].name)
|
|
|
|
+ input1 = sess.graph.get_tensor_by_name(signature_def[signature_key].inputs["input1"].name)
|
|
|
|
+ input2 = sess.graph.get_tensor_by_name(signature_def[signature_key].inputs["input2"].name)
|
|
|
|
+ output = sess.graph.get_tensor_by_name(signature_def[signature_key].outputs["outputs"].name)
|
|
|
|
+ self.model_object = [[input0,input1,input2],output]
|
|
|
|
+ return self.model_object
|
|
|
|
+
|
|
|
|
+ def encode(self,entity_list,list_sentence):
|
|
|
|
+ list_sentence = sorted(list_sentence, key=lambda x: x.sentence_index)
|
|
|
|
+ entity_list = sorted(entity_list, key=lambda x: (x.sentence_index, x.begin_index))
|
|
|
|
+ pre_data = []
|
|
|
|
+ text_data = []
|
|
|
|
+ last_sentence_index = -1
|
|
|
|
+ for key, group in groupby(entity_list, key=lambda x: x.sentence_index):
|
|
|
|
+ if key - last_sentence_index > 1:
|
|
|
|
+ for i in range(last_sentence_index + 1, key):
|
|
|
|
+ pre_data.extend(list_sentence[i].tokens)
|
|
|
|
+ text_data.extend([0] * len(list_sentence[i].tokens))
|
|
|
|
+ group = list(group)
|
|
|
|
+ for i in range(len(group)):
|
|
|
|
+ ent = group[i]
|
|
|
|
+ _tokens = list_sentence[key].tokens
|
|
|
|
+ if i == len(group) - 1:
|
|
|
|
+ if i == 0:
|
|
|
|
+ pre_data.extend(_tokens[:ent.begin_index])
|
|
|
|
+ text_data.extend([0] * len(_tokens[:ent.begin_index]))
|
|
|
|
+ pre_data.append(self.entity_type_dict[ent.entity_type])
|
|
|
|
+ text_data.append(ent)
|
|
|
|
+ pre_data.extend(_tokens[ent.end_index + 1:])
|
|
|
|
+ text_data.extend([0] * len(_tokens[ent.end_index + 1:]))
|
|
|
|
+ break
|
|
|
|
+ else:
|
|
|
|
+ pre_data.append(self.entity_type_dict[ent.entity_type])
|
|
|
|
+ text_data.append(ent)
|
|
|
|
+ pre_data.extend(_tokens[ent.end_index + 1:])
|
|
|
|
+ text_data.extend([0] * len(_tokens[ent.end_index + 1:]))
|
|
|
|
+ break
|
|
|
|
+ if i == 0:
|
|
|
|
+ pre_data.extend(_tokens[:ent.begin_index])
|
|
|
|
+ text_data.extend([0] * len(_tokens[:ent.begin_index]))
|
|
|
|
+ pre_data.append(self.entity_type_dict[ent.entity_type])
|
|
|
|
+ text_data.append(ent)
|
|
|
|
+ pre_data.extend(_tokens[ent.end_index + 1:group[i + 1].begin_index])
|
|
|
|
+ text_data.extend([0] * len(_tokens[ent.end_index + 1:group[i + 1].begin_index]))
|
|
|
|
+ else:
|
|
|
|
+ pre_data.append(self.entity_type_dict[ent.entity_type])
|
|
|
|
+ text_data.append(ent)
|
|
|
|
+ pre_data.extend(_tokens[ent.end_index + 1:group[i + 1].begin_index])
|
|
|
|
+ text_data.extend([0] * len(_tokens[ent.end_index + 1:group[i + 1].begin_index]))
|
|
|
|
+ last_sentence_index = key
|
|
|
|
+ return text_data, pre_data
|
|
|
|
+
|
|
|
|
+ def predict(self,text_in, words, rate=0.5):
|
|
|
|
+ # text_words = text_in
|
|
|
|
+ R = []
|
|
|
|
+ # _t2 = [self.words2id.get(c, 1) for c in words]
|
|
|
|
+ _t2 = np.zeros((len(words), self.words_size))
|
|
|
|
+ for i in range(len(words)):
|
|
|
|
+ _t2[i] = np.array(get_words_matrix(words[i]))
|
|
|
|
+ _t2 = np.array([_t2])
|
|
|
|
+ _t3 = [1 for _ in words]
|
|
|
|
+ _t3 = np.array([_t3])
|
|
|
|
+ # _k1 = self.model_subject.predict([_t2, _t3])
|
|
|
|
+ _k1 = limitRun(self.sess_subject,[self.model_subject[1]],feed_dict={self.model_subject[0][0]:_t2,
|
|
|
|
+ self.model_subject[0][1]:_t3})[0]
|
|
|
|
+ _k1 = _k1[0, :, 0]
|
|
|
|
+ _k1 = np.where(_k1 > rate)[0]
|
|
|
|
+ # print('k1',_k1)
|
|
|
|
+ _subjects = []
|
|
|
|
+ for i in _k1:
|
|
|
|
+ _subject = text_in[i]
|
|
|
|
+ _subjects.append((_subject, i, i))
|
|
|
|
+ if _subjects:
|
|
|
|
+ _t2 = np.repeat(_t2, len(_subjects), 0)
|
|
|
|
+ _t3 = np.repeat(_t3, len(_subjects), 0)
|
|
|
|
+ _k1, _ = np.array([_s[1:] for _s in _subjects]).T.reshape((2, -1, 1))
|
|
|
|
+ # _o1 = self.model_object.predict([_t2, _t3, _k1])
|
|
|
|
+ _o1 = limitRun(self.sess_object, [self.model_object[1]], feed_dict={self.model_object[0][0]: _t2,
|
|
|
|
+ self.model_object[0][1]: _t3,
|
|
|
|
+ self.model_object[0][2]: _k1})[0]
|
|
|
|
+ for i, _subject in enumerate(_subjects):
|
|
|
|
+ _oo1 = np.where(_o1[i] > 0.5)
|
|
|
|
+ # print('_oo1', _oo1)
|
|
|
|
+ for _ooo1, _c1 in zip(*_oo1):
|
|
|
|
+ _object = text_in[_ooo1]
|
|
|
|
+ _predicate = self.id2predicate[_c1]
|
|
|
|
+ R.append((_subject[0], _predicate, _object))
|
|
|
|
+ return R
|
|
|
|
+ else:
|
|
|
|
+ return []
|
|
|
|
+
|
|
|
|
|
|
class Model_person_classify():
|
|
class Model_person_classify():
|
|
def __init__(self,lazyLoad=getLazyLoad()):
|
|
def __init__(self,lazyLoad=getLazyLoad()):
|