# encoding=utf-8
import pickle
from data_precess import get_train_test_data
from data_util import precision, recall, f1_score, get_remove_word, get_embedding, get_label, get_dic, clean_word_with_tokenizer, model_in
import keras.backend as K
from keras.layers import Input, Embedding, Bidirectional, GRU, Dropout, Dense
from keras.models import Model
from keras import models
from keras.callbacks import ModelCheckpoint
from keras.engine.topology import Layer

class Attention(Layer):
    def __init__(self, **kwargs):
        super(Attention, self).__init__(**kwargs)

    def build(self, input_shape):
        # W: (EMBED_SIZE, 1)
        # b: (MAX_TIMESTEPS, 1)
        # u: (MAX_TIMESTEPS, MAX_TIMESTEPS)
        self.W = self.add_weight(name="W_{:s}".format(self.name),
                                 shape=(input_shape[-1], 1),
                                 initializer="normal")
        self.b = self.add_weight(name="b_{:s}".format(self.name),
                                 shape=(input_shape[1], 1),
                                 initializer="zeros")
        self.u = self.add_weight(name="u_{:s}".format(self.name),
                                 shape=(input_shape[1], input_shape[1]),
                                 initializer="normal")
        super(Attention, self).build(input_shape)

    def call(self, x, mask=None):
        # input: (BATCH_SIZE, MAX_TIMESTEPS, EMBED_SIZE)
        # et: (BATCH_SIZE, MAX_TIMESTEPS)
        et = K.squeeze(K.tanh(K.dot(x, self.W) + self.b), axis=-1)
        # at: (BATCH_SIZE, MAX_TIMESTEPS)
        at = K.dot(et, self.u)
        at = K.exp(at)
        if mask is not None:
            at *= K.cast(mask, K.floatx())
        # ot: (BATCH_SIZE, MAX_TIMESTEPS, EMBED_SIZE)
        at /= K.cast(K.sum(at, axis=1, keepdims=True) + K.epsilon(), K.floatx())
        atx = K.expand_dims(at, axis=-1)
        ot = atx * x
        # output: (BATCH_SIZE, EMBED_SIZE)
        return K.sum(ot, axis=1)

    def compute_mask(self, input, input_mask=None):
        # do not pass the mask to the next layers
        return None

    def compute_output_shape(self, input_shape):
        # output shape: (BATCH_SIZE, EMBED_SIZE)
        return (input_shape[0], input_shape[-1])

    def get_config(self):
        return super(Attention, self).get_config()

def bigru_attention_softmax(input_size, word_index, embedding_matrix, classes):
    sent_inputs = Input(shape=(input_size,), dtype="float64")
    sent_emb = Embedding(input_dim=len(word_index) + 1,
                         output_dim=100,
                         mask_zero=True,
                         weights=[embedding_matrix])(sent_inputs)

    sent_enc = Bidirectional(GRU(128, dropout=0.2, recurrent_dropout=0.2,
                                 return_sequences=True))(sent_emb)
    embeddings = Dropout(0.2)(sent_enc)
    sent_att1 = Attention()(embeddings)
    fc1_dropout = Dropout(0.2)(sent_att1)
    fc1 = Dense(422, activation="relu")(fc1_dropout)
    fc2_dropout = Dropout(0.2)(fc1)
    sent_pred = Dense(classes, activation="softmax")(fc2_dropout)    
    model = Model(inputs=sent_inputs, outputs=sent_pred)
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=[precision, recall, f1_score])
    model.summary()
    return model

def bigru_attention_softmax_weights(input_size, word_index, embedding_matrix, labels, weight):
    model_gru_attention = bigru_attention_softmax(input_size, word_index, embedding_matrix, labels)
    model_gru_attention.load_weights(weight)
    return model_gru_attention

def train():
    with open('padded_sequences.pkl', 'rb') as f:
        padded_sequences = pickle.load(f)      # 加载词向量化后的训练集x
    with open('labels_np.pkl', 'rb') as f:
        labels_np = pickle.load(f)             # 加载one_hot后的训练集y
    with open('padded_sequences_te.pkl', 'rb') as f:
        padded_sequences_te = pickle.load(f)   # 加载词向量化后的测试集x
    with open('test_label.pkl', 'rb') as f:
        test_label = pickle.load(f)            # 加载one_hot后的测试集y
    with open('word_index.pkl', 'rb') as f:
        word_index = pickle.load(f)            # 加载词典 word:id      
    with open('embedding_matrix.pkl', 'rb') as f:
        embedding_matrix = pickle.load(f)      # 加载词向量矩阵  
    #padded_sequences, labels_np, padded_sequences_te, test_label, word_index, embedding_matrix = get_train_test_data()
    checkpoint_gru_attention = ModelCheckpoint('model/New_attentionLSTM_weights1_100_em21.h5', monitor="val_f1_score",
                                           verbose=1, save_best_only=True, mode='max')
    model_gru_attention = bigru_attention_softmax(100, word_index, embedding_matrix, 211)
    model_gru_attention.fit(padded_sequences, labels_np, 
                        callbacks=[checkpoint_gru_attention], shuffle=True,
                        validation_data=(padded_sequences_te, test_label),
                        epochs=25, batch_size=1024)  # batch_size 128

def test():    
    remove_word = get_remove_word() # 加载停用词、不重要的词
    word_index, tokenizer, embedding_matrix = get_embedding() # 加载文件，返回词典、keras tokennizer对象，词向量矩阵
    label_mapping, labels = get_label() #加载标签字典，返回字典label_mapping {0: '安防系统', 1: '安全保护服务', 2: '安全保护设备'  ; labels10 所有类别的中文名称
    labels = 211 #　分类数
    gru_weights = 'model/New_attentionLSTM_weights1_100_em21.h5'
    model_gru_attention = bigru_attention_softmax_weights(100, word_index, embedding_matrix, labels, gru_weights)
    file = 'F:\\工作文档\\近义词\\text_zhaobiao\\比地_1 畜禽养殖工程_2.txt'
    with open(file, 'r', encoding='utf-8') as f:
        k = file
        content = f.read()
    q = {"id": k, "content": content}
    ContentIDs = []
    ContentIDs.append([q['id'], q['content']])
    x_train, id = clean_word_with_tokenizer(ContentIDs, remove_word, tokenizer)
    gru_te = model_gru_attention.predict(x_train)
    union = model_in(gru_te, label_mapping, id)
    print(union)    
    
if __name__ == '__main__':
    train()
    #test()