'''
Created on 2019年2月25日

@author: User
'''

import keras
from keras import models
from keras import layers
from keras import optimizers,losses,metrics
from keras.callbacks import ModelCheckpoint
from Utils import *
import keras.backend as K
import tensorflow as tf


class Attention(layers.Layer):
    
    def __init__(self, **kwargs):
        super(Attention, self).__init__(**kwargs)

        
    def build(self, input_shape):
        # W: (EMBED_SIZE, 1)
        # b: (MAX_TIMESTEPS, 1)
        # u: (MAX_TIMESTEPS, MAX_TIMESTEPS)
        print(input_shape)
        self.W = self.add_weight(name="W_{:s}".format(self.name),
                                 shape=(input_shape[-1], 1),
                                 initializer="uniform")
        self.b = self.add_weight(name="b_{:s}".format(self.name),
                                 shape=(input_shape[1], 1),
                                 initializer="uniform")
        super(Attention, self).build(input_shape)


    def call(self, x, mask=None):
        # input: (BATCH_SIZE, MAX_TIMESTEPS, EMBED_SIZE)
        # et: (BATCH_SIZE, MAX_TIMESTEPS)
        et = K.squeeze(K.tanh(K.dot(x, self.W) + self.b), axis=-1)
        # at: (BATCH_SIZE, MAX_TIMESTEPS)
        
        print("et",np.shape(et))
        #at = K.dot(et, self.u)
        #if mask is not None:
            #at *= K.cast(mask, K.floatx())
        # ot: (BATCH_SIZE, MAX_TIMESTEPS, EMBED_SIZE)
        et /= K.cast(K.sum(et, axis=1, keepdims=True) + K.epsilon(), K.floatx())
        print(np.shape(et))
#         atx = K.expand_dims(at, axis=-1)
#         atx1 = K.argmax(at,axis=-1)
#         et1 = K.one_hot(atx1,100)
#         at1 = (at * (et1 - 1)) * -1
#         atx2 = K.argmax(at1,axis=-1)
#         et2 = K.one_hot(atx2,100)
#         at2 = (at1 * (et2 - 1)) * -1
#         atx3 = K.argmax(at2,axis=-1)
#         et3 = K.one_hot(atx3,100)
#         at3 = (at2 * (et3 - 1)) * -1
#         atx4 = K.argmax(at3,axis=-1)
#         et4 = K.one_hot(atx4,100)
#         at4 = (at3 * (et4 - 1)) * -1
#         atx5 = K.argmax(at4,axis=-1)
#         et5 = K.one_hot(atx5,100)
#         at5 = (at4 * (et5 - 1)) * -1
#         atx6 = K.argmax(at5,axis=-1)
#         et6 = K.one_hot(atx6,100)
#         et = et1 + et2 + et3 + et4 + et5 + et6
#         at = at * et
#         for i in range(at.shape[0]):
#             at[i][atx1[i]] = 0        
#         atx2 = K.argmax(at,axis=-1)
#         for i in range(at.shape[0]):
#             at[i][atx2[i]] = 0
#         atx3 = K.argmax(at,axis=-1)
#         ad = K.zeros([at.shape[0],at.shape[1]])
#         at = at * ad
        #atx = K.expand_dims(at, axis=-1)
        return et
    def compute_mask(self, input, input_mask=None):
        # do not pass the mask to the next layers
        return None


    def compute_output_shape(self, input_shape):
        # output shape: (BATCH_SIZE, EMBED_SIZE)
        return (input_shape[0], input_shape[1])
    
    
    def get_config(self):
        return super(Attention, self).get_config()


def getBiRNNModel(input_shape=[None,36],out_len=2):
    '''
    @summary:获取模型
    '''
    input = layers.Input(shape=input_shape,dtype="float32")
    mask = layers.Masking(mask_value=0)(input)
    '''
    whole_lstm = layers.Bidirectional(layers.LSTM(12,return_sequences=False))(mask)
    
    repeat = layers.RepeatVector(input_shape[0])(whole_lstm)
    
    #lstm_0 = layers.Bidirectional(layers.LSTM(12,return_sequences=True))(mask)
    
    #lstm_1 = layers.Bidirectional(layers.LSTM(48,return_sequences=True))(lstm_0)
    
    matrix = layers.Dense(24,activation="relu")(mask)
    
    concat = layers.merge([repeat,matrix],mode="concat")
    
    
    matrix = layers.Dense(48,activation="relu")(concat)
    matrix = layers.Dense(24,activation="relu")(matrix)
    #output = layers.Dense(out_len,activation="softmax")(matrix)
    output = Attention()(concat)
    
    print("out",np.shape(output))
    
    #layers.RepeatVector(np.shape(matrix)[-2])(whole_lstm)
    '''
    ''''''
    
    
    lstm_0 = layers.Bidirectional(layers.LSTM(32,return_sequences=True))(mask)
    #matrix = layers.Dense(24,activation="relu")(lstm_0)
    lstm_1 = layers.Bidirectional(layers.LSTM(12,return_sequences=True))(lstm_0)
    output = layers.Dense(out_len,activation="softmax")(lstm_1)
    
    model = models.Model(inputs=[input],outputs=output)
    model.compile(optimizer=optimizers.Adam(lr=0.01),loss=my_loss,metrics=[acc,precision,recall,f1_score])
    model.summary()
    return model

if __name__=="__main__":
    getBiRNNModel()