import tensorflow as tf from utils import * from metrics import * from models import GCN_Align import gc import time import os def loadBestModel(): dir_best_model = os.getcwd()+"\\data1\\100000\\zh_en\\model.ckpt" sess = tf.Session() # Define placeholders num_supports = 1 ph_ae = { 'support': [tf.sparse_placeholder(tf.float32) for _ in range(num_supports)], 'features': tf.sparse_placeholder(tf.float32), #tf.placeholder(tf.float32), 'dropout': tf.placeholder_with_default(0., shape=()), 'num_features_nonzero': tf.placeholder_with_default(0, shape=()) } ph_se = { 'support': [tf.sparse_placeholder(tf.float32) for _ in range(num_supports)], 'features': tf.placeholder(tf.float32), 'dropout': tf.placeholder_with_default(0., shape=()), 'num_features_nonzero': tf.placeholder_with_default(0, shape=()) } # some flags flags = tf.app.flags FLAGS = flags.FLAGS flags.DEFINE_string('lang', 'zh_en', 'Dataset string.') # 'zh_en', 'ja_en', 'fr_en' flags.DEFINE_float('learning_rate', 0.01, 'Initial learning rate.') flags.DEFINE_integer('epochs', 500, 'Number of epochs to train.') flags.DEFINE_float('dropout', 0.3, 'Dropout rate (1 - keep probability).') flags.DEFINE_float('gamma', 3.0, 'Hyper-parameter for margin based loss.') flags.DEFINE_integer('k', 4, 'Number of negative samples for each positive seed.') flags.DEFINE_float('beta', 0.3, 'Weight for structure embeddings.') flags.DEFINE_integer('se_dim', 100, 'Dimension for SE.') flags.DEFINE_integer('ae_dim', 100, 'Dimension for AE.') flags.DEFINE_integer('seed', 9, 'Proportion of seeds, 3 means 30%') # data process adj, ae_input, train, test = load_data(FLAGS.lang) support = [preprocess_adj(adj)] # 把具体值赋给事先定义好的placeholder feed_dict_ae = construct_feed_dict(ae_input, support, ph_ae) feed_dict_ae.update({ph_ae['dropout']: FLAGS.dropout}) feed_dict_se = construct_feed_dict(1.0, support, ph_se) feed_dict_se.update({ph_se['dropout']: FLAGS.dropout}) # 负样本填充placeholder t = 0 k = 0 e = ae_input[2][0] L = np.ones((t, k)) neg_left = L.reshape((t * k,)) L = np.ones((t, k)) neg2_right = L.reshape((t * k,)) neg2_left = np.random.choice(e, t * k) neg_right = np.random.choice(e, t * k) feed_dict_ae.update({'neg_left:0': neg_left, 'neg_right:0': neg_right, 'neg2_left:0': neg2_left, 'neg2_right:0': neg2_right}) feed_dict_se.update({'neg_left:0': neg_left, 'neg_right:0': neg_right, 'neg2_left:0': neg2_left, 'neg2_right:0': neg2_right}) # Create model model_func = GCN_Align # attribute embedding model model_ae = model_func(ph_ae, input_dim=ae_input[2][1], output_dim=FLAGS.ae_dim, ILL=train, sparse_inputs=True, featureless=False, logging=True) # structure embedding model model_se = model_func(ph_se, input_dim=ae_input[2][0], output_dim=FLAGS.se_dim, ILL=train, sparse_inputs=False, featureless=True, logging=True) # load model saver = tf.train.Saver() saver.restore(sess, dir_best_model) # run the last layer, get vector # print(len(feed_dict_ae)) # for i in feed_dict_ae.keys(): # print(i) vec_ae = sess.run(model_ae.outputs, feed_dict=feed_dict_ae) vec_se = sess.run(model_se.outputs, feed_dict=feed_dict_se) # 清内存 print("清内存") del saver del model_ae del model_se del model_func del feed_dict_ae del feed_dict_se del adj del ae_input del train # del test del support del sess gc.collect() # print("AE") # get_hits(vec_ae, test) # print("SE") # get_hits(vec_se, test) # print("SE+AE") # get_combine_hits(vec_se, vec_ae, FLAGS.beta, test) # # calculate similarity # print("AE Similarity") # print(len(vec_ae), len(test)) # predict(vec_ae, test) # print("SE Similarity") # predict(vec_se, test) # print("AE+SE Similarity") # predict(np.concatenate([vec_se*FLAGS.beta, vec_ae*(1.0-FLAGS.beta)], axis=1), test) print("Predict New Align Orgs") start_time = time.time() predict_new(np.concatenate([vec_se*FLAGS.beta, vec_ae*(1.0-FLAGS.beta)], axis=1)) print("use time", time.time()-start_time) print("e"+str(FLAGS.epochs), "d"+str(FLAGS.dropout), "k"+str(FLAGS.k), "s"+str(FLAGS.seed), "lr"+str(FLAGS.learning_rate), "b"+str(FLAGS.beta)) if __name__ == '__main__': loadBestModel()