''' Created on 2019年3月26日 @author: User ''' import sys import os os.environ['KERAS_BACKEND']='tensorflow' sys.path.append(os.path.abspath("../../..")) from BiddingKG.dl.common.models import * from keras.callbacks import ModelCheckpoint import numpy as np import time from BiddingKG.dl.common.Utils import * import tensorflow as tf from generateData import * def train(): model = getTextCNNModel() train_x, train_y = getData("train.xls") test_x, test_y = getData("test.xls") callback = ModelCheckpoint('log/' + 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}-f1{val_f1_score:.4f}.h5', monitor="val_loss", verbose=1, save_best_only=True, mode="min") model.fit(x=train_x, y=train_y, batch_size=96, epochs=400, callbacks=[callback], shuffle=True, validation_data=(test_x, test_y)) def train1(): data_pk = "./traindata/all_data.pk" if os.path.exists(data_pk): train_x, train_y, test_x, test_y, test_text = load(data_pk) else: # train_x, train_y, test_x, test_y, test_text = getTrainData() # save((train_x, train_y, test_x, test_y, test_text), data_pk) train_x,train_y,test_x,test_y = getTrainData_jsonTable() with tf.Session(graph=tf.Graph()) as sess: with sess.graph.as_default(): print("11111111") vocab,matrix = getVocabAndMatrix(getModel_word()) model = getBiLSTMModel(input_shape=(1,50,60), vocab=vocab, embedding_weights=matrix, classes=2) # model = getTextCNNModel(input_shape=(1,30,60), vocab=vocab, embedding_weights=weights, classes=2) print("22222222") callback = ModelCheckpoint('log/' + 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}-f1{val_f1_score:.4f}.h5', monitor="val_loss", verbose=1, save_best_only=True,save_weights_only=True, mode="min") model.fit(x=train_x, y=train_y, batch_size=128, epochs=400, callbacks=[callback], shuffle=True, validation_data=(test_x, test_y)) def vali(): data_pk = "all_data.pk" train_x, train_y, test_x, test_y, test_text = load(data_pk) model = models.load_model("log/loss_ep106-loss0.008-val_loss0.134-f10.9768.h5", custom_objects={"precision": precision, "recall": recall, "f1_score": f1_score, "Attention": Attention}) predict = model.predict(test_x) predict_y = np.argmax(predict, 1) list_filename = [] list_text = [] list_label = [] list_predict = [] list_prob = [] data = [] for y, y_, text, prob in zip(np.argmax(test_y, 1), predict_y, test_text, predict): if y == y_: data.append([text[0], text[1], y, y_, prob[y_]]) data.sort(key=lambda x: x[2]) for item in data: list_filename.append(item[0]) list_text.append(item[1]) list_label.append(item[2]) list_predict.append(item[3]) list_prob.append(item[4]) df = pd.DataFrame( {"list_filename": list_filename, "list_text": list_text, "list_label": list_label, "list_predict": list_predict, "list_prob": list_prob}) df.to_excel("vali_true.xls", columns=["list_filename", "list_text", "list_label", "list_predict", "list_prob"]) list_filename = [] list_text = [] list_label = [] list_predict = [] list_prob = [] data = [] for y, y_, text, prob in zip(np.argmax(test_y, 1), predict_y, test_text, predict): if y != y_: data.append([text[0], text[1], y, y_, prob[y_]]) data.sort(key=lambda x: x[2]) for item in data: list_filename.append(item[0]) list_text.append(item[1]) list_label.append(item[2]) list_predict.append(item[3]) list_prob.append(item[4]) df = pd.DataFrame( {"list_filename": list_filename, "list_text": list_text, "list_label": list_label, "list_predict": list_predict, "list_prob": list_prob}) df.to_excel("vali_wrong.xls", columns=["list_filename", "list_text", "list_label", "list_predict", "list_prob"]) def test(list_text): x = [] for text in list_text: x.append(encoding(text)) x = np.array(x) # x = np.expand_dims(encoding(text),0) # test_x,test_y = getData("test.xls") model = getTextCNNModel() model.load_weights("log/ep082-loss0.044-val_loss0.126-f10.9592.h5") a = time.time() predict_y = model.predict(x) print("cost", time.time() - a) # model.save("model/model_form.model.hdf5") return predict_y def getBestThreshold(): def getAccurancyRecall(predict, threshold, test_y): nums = 0 counts = 0 for item, _max, y in zip(predict, np.argmax(predict, 1), np.argmax(test_y, 1)): if item[_max] > threshold: if _max == y: nums += 1 counts += 1 precision = nums / counts recall = nums / len(test_y) return 2 * ((precision * recall) / (precision + recall)) # return precision,recall model = getTextCNNModel() model.load_weights("model/model_form.model.hdf5") test_x, test_y = getData("test.xls") predict_y = model.predict(test_x) threshold = 0.5 x = [] y = [] while (threshold < 1): x.append(threshold) t0 = getAccurancyRecall(predict_y, threshold, test_y) y.append(t0) print(threshold, getAccurancyRecall(predict_y, threshold, test_y)) threshold += 0.001 plt.plot(x, y) plt.show() def save_form_model(): with tf.Session(graph=tf.Graph()).as_default() as sess: with sess.graph.as_default(): vocab,matrix = getVocabAndMatrix(getModel_word()) model = getBiLSTMModel(input_shape=(1,50,60), vocab=vocab, embedding_weights=matrix, classes=2) model.load_weights(filepath="log/ep029-loss0.044-val_loss0.057-f10.9788.h5") tf.saved_model.simple_save(sess, "./form_savedmodel/", inputs={"inputs":model.input}, outputs = {"outputs":model.output}) from BiddingKG.dl.form.generateData import getTrainData_jsonTable from BiddingKG.dl.form.model import get_context_form_model def train_context(): vocab,vocab_matrix = getVocabAndMatrix(getModel_word(), Embedding_size=60) sess = tf.Session(graph=tf.Graph()) epochs = 100 batch_size = 1000 with sess.graph.as_default(): node_dict = get_context_form_model(vocab_len=len(vocab_matrix),char_dim=60,lstm_dim=24,context_dim=36,res_dim=16,residual_stacks=5) sess.run(tf.global_variables_initializer()) sess.run(tf.assign(node_dict["embedding"],np.array(vocab_matrix))) train_x,train_y,_ = getTrainData_jsonTable(begin=0,end=20000) test_x,test_y,text_text = getTrainData_jsonTable(begin=-2000,end=-1,return_text=True) saver = tf.train.Saver(max_to_keep=epochs) for _e in range(epochs): batch_begin = 0 avg_loss = 0 avg_precision = 0 _count = 0 while(batch_begin