''' Created on 2019年5月16日 @author: User ''' import os import sys sys.path.append(os.path.abspath("../..")) from keras import models from keras import layers # from keras_contrib.layers import CRF from keras.preprocessing.sequence import pad_sequences from keras import optimizers,losses,metrics from BiddingKG.dl.common.Utils import * import tensorflow as tf class Model_role_classify(): def __init__(self,lazyLoad=getLazyLoad()): #self.model_role_file = os.path.abspath("../role/models/model_role.model.hdf5") self.model_role_file = os.path.dirname(__file__)+"/../role/log/new_biLSTM-ep012-loss0.028-val_loss0.040-f10.954.h5" self.model_role = None self.graph = tf.get_default_graph() if not lazyLoad: self.getModel() def getModel(self): if self.model_role is None: self.model_role = models.load_model(self.model_role_file,custom_objects={'precision':precision,'recall':recall,'f1_score':f1_score}) return self.model_role def encode(self,tokens,begin_index,end_index,**kwargs): return embedding(spanWindow(tokens=tokens,begin_index=begin_index,end_index=end_index,size=10),shape=(2,10,128)) def predict(self,x): x = np.transpose(np.array(x),(1,0,2,3)) with self.graph.as_default(): return self.getModel().predict([x[0],x[1]]) class Model_role_classify_word(): def __init__(self,lazyLoad=getLazyLoad()): if USE_PAI_EAS: lazyLoad = True #self.model_role_file = os.path.abspath("../role/log/ep071-loss0.107-val_loss0.122-f10.956.h5") self.model_role_file = os.path.dirname(__file__)+"/../role/models/ep038-loss0.140-val_loss0.149-f10.947.h5" #self.model_role_file = os.path.abspath("../role/log/textcnn_ep017-loss0.088-val_loss0.125-f10.955.h5") self.model_role = None self.sess_role = tf.Session(graph=tf.Graph()) if not lazyLoad: self.getModel() def getModel(self): if self.model_role is None: with self.sess_role.as_default() as sess: with self.sess_role.graph.as_default(): meta_graph_def = tf.saved_model.loader.load(sess=self.sess_role, tags=["serve"], export_dir=os.path.dirname(__file__)+"/role_savedmodel") signature_key = tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY signature_def = meta_graph_def.signature_def input0 = self.sess_role.graph.get_tensor_by_name(signature_def[signature_key].inputs["input0"].name) input1 = self.sess_role.graph.get_tensor_by_name(signature_def[signature_key].inputs["input1"].name) input2 = self.sess_role.graph.get_tensor_by_name(signature_def[signature_key].inputs["input2"].name) output = self.sess_role.graph.get_tensor_by_name(signature_def[signature_key].outputs["outputs"].name) self.model_role = [[input0,input1,input2],output] return self.model_role ''' def load_weights(self): model = self.getModel() model.load_weights(self.model_role_file) ''' def encode(self,tokens,begin_index,end_index,entity_text,**kwargs): _span = spanWindow(tokens=tokens,begin_index=begin_index,end_index=end_index,size=12,center_include=True,word_flag=True,text=entity_text) # print(_span) _encode_span = encodeInput(_span, word_len=20, word_flag=True,userFool=False) # print(_encode_span) return _encode_span def predict(self,x): x = np.transpose(np.array(x),(1,0,2)) model_role = self.getModel() assert len(x)==len(model_role[0]) feed_dict = {} for _x,_t in zip(x,model_role[0]): feed_dict[_t] = _x list_result = limitRun(self.sess_role,[model_role[1]],feed_dict)[0] return list_result #return self.sess_role.run(model_role[1],feed_dict=feed_dict) class Model_money_classify(): def __init__(self,lazyLoad=getLazyLoad()): if USE_PAI_EAS: lazyLoad = True self.model_money_file = os.path.dirname(__file__)+"/../money/models/model_money_word.h5" self.model_money = None self.sess_money = tf.Session(graph=tf.Graph()) if not lazyLoad: self.getModel() def getModel(self): if self.model_money is None: with self.sess_money.as_default() as sess: with sess.graph.as_default(): meta_graph_def = tf.saved_model.loader.load(sess,tags=["serve"],export_dir=os.path.dirname(__file__)+"/money_savedmodel") # meta_graph_def = tf.saved_model.loader.load(sess,tags=["serve"],export_dir=os.path.dirname(__file__)+"/money_savedmodel_bilstmonly") signature_key = tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY signature_def = meta_graph_def.signature_def input0 = sess.graph.get_tensor_by_name(signature_def[signature_key].inputs["input0"].name) input1 = sess.graph.get_tensor_by_name(signature_def[signature_key].inputs["input1"].name) input2 = sess.graph.get_tensor_by_name(signature_def[signature_key].inputs["input2"].name) output = sess.graph.get_tensor_by_name(signature_def[signature_key].outputs["outputs"].name) self.model_money = [[input0,input1,input2],output] return self.model_money ''' if self.model_money is None: self.model_money = models.load_model(self.model_money_file,custom_objects={'precision':precision,'recall':recall,'f1_score':f1_score}) return self.model_money ''' ''' def load_weights(self): model = self.getModel() model.load_weights(self.model_money_file) ''' def encode(self,tokens,begin_index,end_index,**kwargs): _span = spanWindow(tokens=tokens, begin_index=begin_index, end_index=end_index, size=10, center_include=True, word_flag=True) # print(_span) return encodeInput(_span, word_len=30, word_flag=True,userFool=False) return embedding_word(_span,shape=(3,100,60)) def predict(self,x): # print("shape",np.shape(x)) x = np.transpose(np.array(x),(1,0,2)) model_money = self.getModel() assert len(x)==len(model_money[0]) feed_dict = {} for _x,_t in zip(x,model_money[0]): feed_dict[_t] = _x list_result = limitRun(self.sess_money,[model_money[1]],feed_dict)[0] #return self.sess_money.run(model_money[1],feed_dict=feed_dict) return list_result ''' with self.graph.as_default(): return self.getModel().predict([x[0],x[1],x[2]]) ''' from itertools import groupby from BiddingKG.dl.relation_extraction.model import get_words_matrix class Model_relation_extraction(): def __init__(self,lazyLoad=getLazyLoad()): if USE_PAI_EAS: lazyLoad = True self.subject_model_file = os.path.dirname(__file__)+"/../relation_extraction/models2/subject_model" self.object_model_file = os.path.dirname(__file__)+"/../relation_extraction/models2/object_model" self.model_subject = None self.model_object = None self.sess_subject = tf.Session(graph=tf.Graph()) self.sess_object = tf.Session(graph=tf.Graph()) if not lazyLoad: self.getModel1() self.getModel2() self.entity_type_dict = { 'org': '', 'company': '', 'location': '', 'phone': '', 'person': '' } self.id2predicate = { 0: "rel_person", # 公司——联系人 1: "rel_phone", # 联系人——电话 2: "rel_address" # 公司——地址 } self.words_size = 128 # subject_model def getModel1(self): if self.model_subject is None: with self.sess_subject.as_default() as sess: with sess.graph.as_default(): meta_graph_def = tf.saved_model.loader.load(sess,tags=["serve"],export_dir=self.subject_model_file) signature_key = tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY signature_def = meta_graph_def.signature_def input0 = sess.graph.get_tensor_by_name(signature_def[signature_key].inputs["input0"].name) input1 = sess.graph.get_tensor_by_name(signature_def[signature_key].inputs["input1"].name) output = sess.graph.get_tensor_by_name(signature_def[signature_key].outputs["outputs"].name) self.model_subject = [[input0,input1],output] return self.model_subject # object_model def getModel2(self): if self.model_object is None: with self.sess_object.as_default() as sess: with sess.graph.as_default(): meta_graph_def = tf.saved_model.loader.load(sess,tags=["serve"],export_dir=self.object_model_file) signature_key = tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY signature_def = meta_graph_def.signature_def input0 = sess.graph.get_tensor_by_name(signature_def[signature_key].inputs["input0"].name) input1 = sess.graph.get_tensor_by_name(signature_def[signature_key].inputs["input1"].name) input2 = sess.graph.get_tensor_by_name(signature_def[signature_key].inputs["input2"].name) output = sess.graph.get_tensor_by_name(signature_def[signature_key].outputs["outputs"].name) self.model_object = [[input0,input1,input2],output] return self.model_object def encode(self,entity_list,list_sentence): list_sentence = sorted(list_sentence, key=lambda x: x.sentence_index) entity_list = sorted(entity_list, key=lambda x: (x.sentence_index, x.begin_index)) pre_data = [] text_data = [] last_sentence_index = -1 for key, group in groupby(entity_list, key=lambda x: x.sentence_index): if key - last_sentence_index > 1: for i in range(last_sentence_index + 1, key): pre_data.extend(list_sentence[i].tokens) text_data.extend([0] * len(list_sentence[i].tokens)) group = list(group) for i in range(len(group)): ent = group[i] _tokens = list_sentence[key].tokens if i == len(group) - 1: if i == 0: pre_data.extend(_tokens[:ent.begin_index]) text_data.extend([0] * len(_tokens[:ent.begin_index])) pre_data.append(self.entity_type_dict[ent.entity_type]) text_data.append(ent) pre_data.extend(_tokens[ent.end_index + 1:]) text_data.extend([0] * len(_tokens[ent.end_index + 1:])) break else: pre_data.append(self.entity_type_dict[ent.entity_type]) text_data.append(ent) pre_data.extend(_tokens[ent.end_index + 1:]) text_data.extend([0] * len(_tokens[ent.end_index + 1:])) break if i == 0: pre_data.extend(_tokens[:ent.begin_index]) text_data.extend([0] * len(_tokens[:ent.begin_index])) pre_data.append(self.entity_type_dict[ent.entity_type]) text_data.append(ent) pre_data.extend(_tokens[ent.end_index + 1:group[i + 1].begin_index]) text_data.extend([0] * len(_tokens[ent.end_index + 1:group[i + 1].begin_index])) else: pre_data.append(self.entity_type_dict[ent.entity_type]) text_data.append(ent) pre_data.extend(_tokens[ent.end_index + 1:group[i + 1].begin_index]) text_data.extend([0] * len(_tokens[ent.end_index + 1:group[i + 1].begin_index])) last_sentence_index = key return text_data, pre_data def predict(self,text_in, words, rate=0.5): # text_words = text_in triple_list = [] # _t2 = [self.words2id.get(c, 1) for c in words] _t2 = np.zeros((len(words), self.words_size)) for i in range(len(words)): _t2[i] = np.array(get_words_matrix(words[i])) _t2 = np.array([_t2]) _t3 = [1 for _ in words] _t3 = np.array([_t3]) # _k1 = self.model_subject.predict([_t2, _t3]) _k1 = limitRun(self.sess_subject,[self.model_subject[1]],feed_dict={self.model_subject[0][0]:_t2, self.model_subject[0][1]:_t3})[0] _k1 = _k1[0, :, 0] _k1 = np.where(_k1 > rate)[0] # print('k1',_k1) _subjects = [] for i in _k1: _subject = text_in[i] _subjects.append((_subject, i, i)) if _subjects: _t2 = np.repeat(_t2, len(_subjects), 0) _t3 = np.repeat(_t3, len(_subjects), 0) _k1, _ = np.array([_s[1:] for _s in _subjects]).T.reshape((2, -1, 1)) # _o1 = self.model_object.predict([_t2, _t3, _k1]) _o1 = limitRun(self.sess_object, [self.model_object[1]], feed_dict={self.model_object[0][0]: _t2, self.model_object[0][1]: _t3, self.model_object[0][2]: _k1})[0] for i, _subject in enumerate(_subjects): _oo1 = np.where(_o1[i] > 0.5) # print('_oo1', _oo1) for _ooo1, _c1 in zip(*_oo1): _object = text_in[_ooo1] _predicate = self.id2predicate[_c1] triple_list.append((_subject[0], _predicate, _object)) return triple_list else: return [] class Model_person_classify(): def __init__(self,lazyLoad=getLazyLoad()): if USE_PAI_EAS: lazyLoad = True self.model_person_file = os.path.dirname(__file__)+"/../person/models/model_person.model.hdf5" self.model_person = None self.sess_person = tf.Session(graph=tf.Graph()) if not lazyLoad: self.getModel() def getModel(self): if self.model_person is None: with self.sess_person.as_default() as sess: with sess.graph.as_default(): # meta_graph_def = tf.saved_model.loader.load(sess,tags=["serve"],export_dir=os.path.dirname(__file__)+"/person_savedmodel_new") meta_graph_def = tf.saved_model.loader.load(sess,tags=["serve"],export_dir=os.path.dirname(__file__)+"/person_savedmodel_new_znj") signature_key = tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY signature_def = meta_graph_def.signature_def input0 = sess.graph.get_tensor_by_name(signature_def[signature_key].inputs["input0"].name) input1 = sess.graph.get_tensor_by_name(signature_def[signature_key].inputs["input1"].name) output = sess.graph.get_tensor_by_name(signature_def[signature_key].outputs["outputs"].name) self.model_person = [[input0,input1],output] return self.model_person ''' if self.model_person is None: self.model_person = models.load_model(self.model_person_file,custom_objects={'precision':precision,'recall':recall,'f1_score':f1_score}) return self.model_person ''' ''' def load_weights(self): model = self.getModel() model.load_weights(self.model_person_file) ''' def encode(self,tokens,begin_index,end_index,**kwargs): # return embedding(spanWindow(tokens=tokens,begin_index=begin_index,end_index=end_index,size=10),shape=(2,10,128)) return embedding(spanWindow(tokens=tokens,begin_index=begin_index,end_index=end_index,size=20),shape=(2,20,128)) def predict(self,x): x = np.transpose(np.array(x),(1,0,2,3)) model_person = self.getModel() assert len(x)==len(model_person[0]) feed_dict = {} for _x,_t in zip(x,model_person[0]): feed_dict[_t] = _x list_result = limitRun(self.sess_person,[model_person[1]],feed_dict)[0] return list_result #return self.sess_person.run(model_person[1],feed_dict=feed_dict) ''' with self.graph.as_default(): return self.getModel().predict([x[0],x[1]]) ''' class Model_form_line(): def __init__(self,lazyLoad=getLazyLoad()): self.model_file = os.path.dirname(__file__)+"/../form/model/model_form.model - 副本.hdf5" self.model_form = None self.graph = tf.get_default_graph() if not lazyLoad: self.getModel() def getModel(self): if self.model_form is None: self.model_form = models.load_model(self.model_file,custom_objects={"precision":precision,"recall":recall,"f1_score":f1_score}) return self.model_form def encode(self,data,shape=(100,60),expand=False,**kwargs): embedding = np.zeros(shape) word_model = getModel_word() for i in range(len(data)): if i>=shape[0]: break if data[i] in word_model.vocab: embedding[i] = word_model[data[i]] if expand: embedding = np.expand_dims(embedding,0) return embedding def predict(self,x): with self.graph.as_default(): return self.getModel().predict(x) class Model_form_item(): def __init__(self,lazyLoad=getLazyLoad()): self.model_file = os.path.dirname(__file__)+"/../form/log/ep039-loss0.038-val_loss0.064-f10.9783.h5" self.model_form = None self.sess_form = tf.Session(graph=tf.Graph()) if not lazyLoad: self.getModel() def getModel(self): if self.model_form is None: with self.sess_form.as_default() as sess: with sess.graph.as_default(): meta_graph_def = tf.saved_model.loader.load(sess,tags=["serve"],export_dir="%s/form_savedmodel"%(os.path.dirname(__file__))) signature_key = tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY signature_def = meta_graph_def.signature_def inputs = sess.graph.get_tensor_by_name(signature_def[signature_key].inputs["inputs"].name) output = sess.graph.get_tensor_by_name(signature_def[signature_key].outputs["outputs"].name) self.model_form = [[inputs],output] return self.model_form ''' if self.model_form is None: with self.graph.as_defalt(): self.model_form = models.load_model(self.model_file,custom_objects={"precision":precision,"recall":recall,"f1_score":f1_score}) return self.model_form ''' def encode(self,data,**kwargs): return encodeInput([data], word_len=50, word_flag=True,userFool=False)[0] return encodeInput_form(data) def predict(self,x): model_form = self.getModel() list_result = limitRun(self.sess_form,[model_form[1]],feed_dict={model_form[0][0]:x})[0] return list_result #return self.sess_form.run(model_form[1],feed_dict={model_form[0][0]:x}) ''' with self.graph.as_default(): return self.getModel().predict(x) ''' class Model_form_context(): def __init__(self,lazyLoad=getLazyLoad()): self.model_form = None self.sess_form = tf.Session(graph=tf.Graph()) if not lazyLoad: self.getModel() def getModel(self): if self.model_form is None: with self.sess_form.as_default() as sess: with sess.graph.as_default(): meta_graph_def = tf.saved_model.loader.load(sess,tags=["serve"],export_dir="%s/form_context_savedmodel"%(os.path.dirname(__file__))) signature_key = tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY signature_def = meta_graph_def.signature_def inputs = sess.graph.get_tensor_by_name(signature_def[signature_key].inputs["inputs"].name) output = sess.graph.get_tensor_by_name(signature_def[signature_key].outputs["outputs"].name) self.model_form = [[inputs],output] return self.model_form ''' if self.model_form is None: with self.graph.as_defalt(): self.model_form = models.load_model(self.model_file,custom_objects={"precision":precision,"recall":recall,"f1_score":f1_score}) return self.model_form ''' def encode_table(self,inner_table,size=30): def encode_item(_table,i,j): _x = [_table[j-1][i-1],_table[j-1][i],_table[j-1][i+1], _table[j][i-1],_table[j][i],_table[j][i+1], _table[j+1][i-1],_table[j+1][i],_table[j+1][i+1]] e_x = [encodeInput_form(_temp[0],MAX_LEN=30) for _temp in _x] _label = _table[j][i][1] # print(_x) # print(_x[4],_label) return e_x,_label,_x def copytable(inner_table): table = [] for line in inner_table: list_line = [] for item in line: list_line.append([item[0][:size],item[1]]) table.append(list_line) return table table = copytable(inner_table) padding = ["#"*30,0] width = len(table[0]) height = len(table) table.insert(0,[padding for i in range(width)]) table.append([padding for i in range(width)]) for item in table: item.insert(0,padding.copy()) item.append(padding.copy()) data_x = [] data_y = [] data_text = [] data_position = [] for _i in range(1,width+1): for _j in range(1,height+1): _x,_y,_text = encode_item(table,_i,_j) data_x.append(_x) _label = [0,0] _label[_y] = 1 data_y.append(_label) data_text.append(_text) data_position.append([_i-1,_j-1]) # input = table[_j][_i][0] # item_y = [0,0] # item_y[table[_j][_i][1]] = 1 # data_x.append(encodeInput([input], word_len=50, word_flag=True,userFool=False)[0]) # data_y.append(item_y) return data_x,data_y,data_text,data_position def encode(self,inner_table,**kwargs): data_x,_,_,data_position = self.encode_table(inner_table) return data_x,data_position def predict(self,x): model_form = self.getModel() list_result = limitRun(self.sess_form,[model_form[1]],feed_dict={model_form[0][0]:x})[0] return list_result # class Model_form_item(): # def __init__(self,lazyLoad=False): # self.model_file = os.path.dirname(__file__)+"/ep039-loss0.038-val_loss0.064-f10.9783.h5" # self.model_form = None # # if not lazyLoad: # self.getModel() # self.graph = tf.get_default_graph() # # def getModel(self): # if self.model_form is None: # self.model_form = models.load_model(self.model_file,custom_objects={"precision":precision,"recall":recall,"f1_score":f1_score}) # return self.model_form # # def encode(self,data,**kwargs): # # return encodeInput_form(data) # # def predict(self,x): # with self.graph.as_default(): # return self.getModel().predict(x)