''' Created on 2019年2月25日 @author: User ''' import keras from keras import models from keras import layers from keras import optimizers,losses,metrics from keras.callbacks import ModelCheckpoint from Utils import * import keras.backend as K import tensorflow as tf class Attention(layers.Layer): def __init__(self, **kwargs): super(Attention, self).__init__(**kwargs) def build(self, input_shape): # W: (EMBED_SIZE, 1) # b: (MAX_TIMESTEPS, 1) # u: (MAX_TIMESTEPS, MAX_TIMESTEPS) print(input_shape) self.W = self.add_weight(name="W_{:s}".format(self.name), shape=(input_shape[-1], 1), initializer="uniform") self.b = self.add_weight(name="b_{:s}".format(self.name), shape=(input_shape[1], 1), initializer="uniform") super(Attention, self).build(input_shape) def call(self, x, mask=None): # input: (BATCH_SIZE, MAX_TIMESTEPS, EMBED_SIZE) # et: (BATCH_SIZE, MAX_TIMESTEPS) et = K.squeeze(K.tanh(K.dot(x, self.W) + self.b), axis=-1) # at: (BATCH_SIZE, MAX_TIMESTEPS) print("et",np.shape(et)) #at = K.dot(et, self.u) #if mask is not None: #at *= K.cast(mask, K.floatx()) # ot: (BATCH_SIZE, MAX_TIMESTEPS, EMBED_SIZE) et /= K.cast(K.sum(et, axis=1, keepdims=True) + K.epsilon(), K.floatx()) print(np.shape(et)) # atx = K.expand_dims(at, axis=-1) # atx1 = K.argmax(at,axis=-1) # et1 = K.one_hot(atx1,100) # at1 = (at * (et1 - 1)) * -1 # atx2 = K.argmax(at1,axis=-1) # et2 = K.one_hot(atx2,100) # at2 = (at1 * (et2 - 1)) * -1 # atx3 = K.argmax(at2,axis=-1) # et3 = K.one_hot(atx3,100) # at3 = (at2 * (et3 - 1)) * -1 # atx4 = K.argmax(at3,axis=-1) # et4 = K.one_hot(atx4,100) # at4 = (at3 * (et4 - 1)) * -1 # atx5 = K.argmax(at4,axis=-1) # et5 = K.one_hot(atx5,100) # at5 = (at4 * (et5 - 1)) * -1 # atx6 = K.argmax(at5,axis=-1) # et6 = K.one_hot(atx6,100) # et = et1 + et2 + et3 + et4 + et5 + et6 # at = at * et # for i in range(at.shape[0]): # at[i][atx1[i]] = 0 # atx2 = K.argmax(at,axis=-1) # for i in range(at.shape[0]): # at[i][atx2[i]] = 0 # atx3 = K.argmax(at,axis=-1) # ad = K.zeros([at.shape[0],at.shape[1]]) # at = at * ad #atx = K.expand_dims(at, axis=-1) return et def compute_mask(self, input, input_mask=None): # do not pass the mask to the next layers return None def compute_output_shape(self, input_shape): # output shape: (BATCH_SIZE, EMBED_SIZE) return (input_shape[0], input_shape[1]) def get_config(self): return super(Attention, self).get_config() def getBiRNNModel(input_shape=[None,36],out_len=2): ''' @summary:获取模型 ''' input = layers.Input(shape=input_shape,dtype="float32") mask = layers.Masking(mask_value=0)(input) ''' whole_lstm = layers.Bidirectional(layers.LSTM(12,return_sequences=False))(mask) repeat = layers.RepeatVector(input_shape[0])(whole_lstm) #lstm_0 = layers.Bidirectional(layers.LSTM(12,return_sequences=True))(mask) #lstm_1 = layers.Bidirectional(layers.LSTM(48,return_sequences=True))(lstm_0) matrix = layers.Dense(24,activation="relu")(mask) concat = layers.merge([repeat,matrix],mode="concat") matrix = layers.Dense(48,activation="relu")(concat) matrix = layers.Dense(24,activation="relu")(matrix) #output = layers.Dense(out_len,activation="softmax")(matrix) output = Attention()(concat) print("out",np.shape(output)) #layers.RepeatVector(np.shape(matrix)[-2])(whole_lstm) ''' '''''' lstm_0 = layers.Bidirectional(layers.LSTM(32,return_sequences=True))(mask) #matrix = layers.Dense(24,activation="relu")(lstm_0) lstm_1 = layers.Bidirectional(layers.LSTM(12,return_sequences=True))(lstm_0) output = layers.Dense(out_len,activation="softmax")(lstm_1) model = models.Model(inputs=[input],outputs=output) model.compile(optimizer=optimizers.Adam(lr=0.01),loss=my_loss,metrics=[acc,precision,recall,f1_score]) model.summary() return model if __name__=="__main__": getBiRNNModel()