|
@@ -6,112 +6,122 @@ import gc
|
|
import time
|
|
import time
|
|
import os
|
|
import os
|
|
|
|
|
|
-dir_best_model = os.getcwd()+"\\data1\\100000\\zh_en\\model.ckpt"
|
|
|
|
-sess = tf.Session()
|
|
|
|
-
|
|
|
|
-# Define placeholders
|
|
|
|
-num_supports = 1
|
|
|
|
-ph_ae = {
|
|
|
|
- 'support': [tf.sparse_placeholder(tf.float32) for _ in range(num_supports)],
|
|
|
|
- 'features': tf.sparse_placeholder(tf.float32), #tf.placeholder(tf.float32),
|
|
|
|
- 'dropout': tf.placeholder_with_default(0., shape=()),
|
|
|
|
- 'num_features_nonzero': tf.placeholder_with_default(0, shape=())
|
|
|
|
-}
|
|
|
|
-ph_se = {
|
|
|
|
- 'support': [tf.sparse_placeholder(tf.float32) for _ in range(num_supports)],
|
|
|
|
- 'features': tf.placeholder(tf.float32),
|
|
|
|
- 'dropout': tf.placeholder_with_default(0., shape=()),
|
|
|
|
- 'num_features_nonzero': tf.placeholder_with_default(0, shape=())
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-# some flags
|
|
|
|
-flags = tf.app.flags
|
|
|
|
-FLAGS = flags.FLAGS
|
|
|
|
-flags.DEFINE_string('lang', 'zh_en', 'Dataset string.') # 'zh_en', 'ja_en', 'fr_en'
|
|
|
|
-flags.DEFINE_float('learning_rate', 20, 'Initial learning rate.')
|
|
|
|
-flags.DEFINE_integer('epochs', 20, 'Number of epochs to train.')
|
|
|
|
-flags.DEFINE_float('dropout', 0.3, 'Dropout rate (1 - keep probability).')
|
|
|
|
-flags.DEFINE_float('gamma', 3.0, 'Hyper-parameter for margin based loss.')
|
|
|
|
-flags.DEFINE_integer('k', 5, 'Number of negative samples for each positive seed.')
|
|
|
|
-flags.DEFINE_float('beta', 0.3, 'Weight for structure embeddings.')
|
|
|
|
-flags.DEFINE_integer('se_dim', 100, 'Dimension for SE.')
|
|
|
|
-flags.DEFINE_integer('ae_dim', 100, 'Dimension for AE.')
|
|
|
|
-flags.DEFINE_integer('seed', 5, 'Proportion of seeds, 3 means 30%')
|
|
|
|
-
|
|
|
|
-# data process
|
|
|
|
-adj, ae_input, train, test = load_data(FLAGS.lang)
|
|
|
|
-support = [preprocess_adj(adj)]
|
|
|
|
-
|
|
|
|
-# 把具体值赋给事先定义好的placeholder
|
|
|
|
-feed_dict_ae = construct_feed_dict(ae_input, support, ph_ae)
|
|
|
|
-feed_dict_ae.update({ph_ae['dropout']: FLAGS.dropout})
|
|
|
|
-feed_dict_se = construct_feed_dict(1.0, support, ph_se)
|
|
|
|
-feed_dict_se.update({ph_se['dropout']: FLAGS.dropout})
|
|
|
|
-
|
|
|
|
-# 负样本填充placeholder
|
|
|
|
-t = 0
|
|
|
|
-k = 0
|
|
|
|
-e = ae_input[2][0]
|
|
|
|
-L = np.ones((t, k))
|
|
|
|
-neg_left = L.reshape((t * k,))
|
|
|
|
-L = np.ones((t, k))
|
|
|
|
-neg2_right = L.reshape((t * k,))
|
|
|
|
-neg2_left = np.random.choice(e, t * k)
|
|
|
|
-neg_right = np.random.choice(e, t * k)
|
|
|
|
-feed_dict_ae.update({'neg_left:0': neg_left, 'neg_right:0': neg_right, 'neg2_left:0': neg2_left, 'neg2_right:0': neg2_right})
|
|
|
|
-feed_dict_se.update({'neg_left:0': neg_left, 'neg_right:0': neg_right, 'neg2_left:0': neg2_left, 'neg2_right:0': neg2_right})
|
|
|
|
-
|
|
|
|
-# Create model
|
|
|
|
-model_func = GCN_Align
|
|
|
|
-# attribute embedding model
|
|
|
|
-model_ae = model_func(ph_ae, input_dim=ae_input[2][1], output_dim=FLAGS.ae_dim, ILL=train, sparse_inputs=True, featureless=False, logging=True)
|
|
|
|
-# structure embedding model
|
|
|
|
-model_se = model_func(ph_se, input_dim=ae_input[2][0], output_dim=FLAGS.se_dim, ILL=train, sparse_inputs=False, featureless=True, logging=True)
|
|
|
|
-
|
|
|
|
-# load model
|
|
|
|
-saver = tf.train.Saver()
|
|
|
|
-saver.restore(sess, dir_best_model)
|
|
|
|
-
|
|
|
|
-# run the last layer, get vector
|
|
|
|
-# print(len(feed_dict_ae))
|
|
|
|
-# for i in feed_dict_ae.keys():
|
|
|
|
-# print(i)
|
|
|
|
-vec_ae = sess.run(model_ae.outputs, feed_dict=feed_dict_ae)
|
|
|
|
-vec_se = sess.run(model_se.outputs, feed_dict=feed_dict_se)
|
|
|
|
-
|
|
|
|
-# 清内存
|
|
|
|
-print("清内存")
|
|
|
|
-del saver
|
|
|
|
-del model_ae
|
|
|
|
-del model_se
|
|
|
|
-del model_func
|
|
|
|
-del feed_dict_ae
|
|
|
|
-del feed_dict_se
|
|
|
|
-del adj
|
|
|
|
-del ae_input
|
|
|
|
-del train
|
|
|
|
-# del test
|
|
|
|
-del support
|
|
|
|
-del sess
|
|
|
|
-gc.collect()
|
|
|
|
-
|
|
|
|
-# print("AE")
|
|
|
|
-# get_hits(vec_ae, test)
|
|
|
|
-# print("SE")
|
|
|
|
-# get_hits(vec_se, test)
|
|
|
|
-# print("SE+AE")
|
|
|
|
-# get_combine_hits(vec_se, vec_ae, FLAGS.beta, test)
|
|
|
|
-#
|
|
|
|
-# calculate similarity
|
|
|
|
-# print("AE Similarity")
|
|
|
|
-# print(len(vec_ae), len(test))
|
|
|
|
-# predict(vec_ae, test)
|
|
|
|
-# print("SE Similarity")
|
|
|
|
-# predict(vec_se, test)
|
|
|
|
-# print("AE+SE Similarity")
|
|
|
|
-# predict(np.concatenate([vec_se*FLAGS.beta, vec_ae*(1.0-FLAGS.beta)], axis=1), test)
|
|
|
|
-
|
|
|
|
-print("Predict New Align Orgs")
|
|
|
|
-start_time = time.time()
|
|
|
|
-predict_new(np.concatenate([vec_se*FLAGS.beta, vec_ae*(1.0-FLAGS.beta)], axis=1))
|
|
|
|
-print("use time", time.time()-start_time)
|
|
|
|
|
|
+
|
|
|
|
+def loadBestModel():
|
|
|
|
+
|
|
|
|
+ dir_best_model = os.getcwd()+"\\data1\\100000\\zh_en\\model.ckpt"
|
|
|
|
+ sess = tf.Session()
|
|
|
|
+
|
|
|
|
+ # Define placeholders
|
|
|
|
+ num_supports = 1
|
|
|
|
+ ph_ae = {
|
|
|
|
+ 'support': [tf.sparse_placeholder(tf.float32) for _ in range(num_supports)],
|
|
|
|
+ 'features': tf.sparse_placeholder(tf.float32), #tf.placeholder(tf.float32),
|
|
|
|
+ 'dropout': tf.placeholder_with_default(0., shape=()),
|
|
|
|
+ 'num_features_nonzero': tf.placeholder_with_default(0, shape=())
|
|
|
|
+ }
|
|
|
|
+ ph_se = {
|
|
|
|
+ 'support': [tf.sparse_placeholder(tf.float32) for _ in range(num_supports)],
|
|
|
|
+ 'features': tf.placeholder(tf.float32),
|
|
|
|
+ 'dropout': tf.placeholder_with_default(0., shape=()),
|
|
|
|
+ 'num_features_nonzero': tf.placeholder_with_default(0, shape=())
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ # some flags
|
|
|
|
+ flags = tf.app.flags
|
|
|
|
+ FLAGS = flags.FLAGS
|
|
|
|
+ flags.DEFINE_string('lang', 'zh_en', 'Dataset string.') # 'zh_en', 'ja_en', 'fr_en'
|
|
|
|
+ flags.DEFINE_float('learning_rate', 0.01, 'Initial learning rate.')
|
|
|
|
+ flags.DEFINE_integer('epochs', 500, 'Number of epochs to train.')
|
|
|
|
+ flags.DEFINE_float('dropout', 0.3, 'Dropout rate (1 - keep probability).')
|
|
|
|
+ flags.DEFINE_float('gamma', 3.0, 'Hyper-parameter for margin based loss.')
|
|
|
|
+ flags.DEFINE_integer('k', 4, 'Number of negative samples for each positive seed.')
|
|
|
|
+ flags.DEFINE_float('beta', 0.3, 'Weight for structure embeddings.')
|
|
|
|
+ flags.DEFINE_integer('se_dim', 100, 'Dimension for SE.')
|
|
|
|
+ flags.DEFINE_integer('ae_dim', 100, 'Dimension for AE.')
|
|
|
|
+ flags.DEFINE_integer('seed', 9, 'Proportion of seeds, 3 means 30%')
|
|
|
|
+
|
|
|
|
+ # data process
|
|
|
|
+ adj, ae_input, train, test = load_data(FLAGS.lang)
|
|
|
|
+ support = [preprocess_adj(adj)]
|
|
|
|
+
|
|
|
|
+ # 把具体值赋给事先定义好的placeholder
|
|
|
|
+ feed_dict_ae = construct_feed_dict(ae_input, support, ph_ae)
|
|
|
|
+ feed_dict_ae.update({ph_ae['dropout']: FLAGS.dropout})
|
|
|
|
+ feed_dict_se = construct_feed_dict(1.0, support, ph_se)
|
|
|
|
+ feed_dict_se.update({ph_se['dropout']: FLAGS.dropout})
|
|
|
|
+
|
|
|
|
+ # 负样本填充placeholder
|
|
|
|
+ t = 0
|
|
|
|
+ k = 0
|
|
|
|
+ e = ae_input[2][0]
|
|
|
|
+ L = np.ones((t, k))
|
|
|
|
+ neg_left = L.reshape((t * k,))
|
|
|
|
+ L = np.ones((t, k))
|
|
|
|
+ neg2_right = L.reshape((t * k,))
|
|
|
|
+ neg2_left = np.random.choice(e, t * k)
|
|
|
|
+ neg_right = np.random.choice(e, t * k)
|
|
|
|
+ feed_dict_ae.update({'neg_left:0': neg_left, 'neg_right:0': neg_right, 'neg2_left:0': neg2_left, 'neg2_right:0': neg2_right})
|
|
|
|
+ feed_dict_se.update({'neg_left:0': neg_left, 'neg_right:0': neg_right, 'neg2_left:0': neg2_left, 'neg2_right:0': neg2_right})
|
|
|
|
+
|
|
|
|
+ # Create model
|
|
|
|
+ model_func = GCN_Align
|
|
|
|
+ # attribute embedding model
|
|
|
|
+ model_ae = model_func(ph_ae, input_dim=ae_input[2][1], output_dim=FLAGS.ae_dim, ILL=train, sparse_inputs=True, featureless=False, logging=True)
|
|
|
|
+ # structure embedding model
|
|
|
|
+ model_se = model_func(ph_se, input_dim=ae_input[2][0], output_dim=FLAGS.se_dim, ILL=train, sparse_inputs=False, featureless=True, logging=True)
|
|
|
|
+
|
|
|
|
+ # load model
|
|
|
|
+ saver = tf.train.Saver()
|
|
|
|
+ saver.restore(sess, dir_best_model)
|
|
|
|
+
|
|
|
|
+ # run the last layer, get vector
|
|
|
|
+ # print(len(feed_dict_ae))
|
|
|
|
+ # for i in feed_dict_ae.keys():
|
|
|
|
+ # print(i)
|
|
|
|
+ vec_ae = sess.run(model_ae.outputs, feed_dict=feed_dict_ae)
|
|
|
|
+ vec_se = sess.run(model_se.outputs, feed_dict=feed_dict_se)
|
|
|
|
+
|
|
|
|
+ # 清内存
|
|
|
|
+ print("清内存")
|
|
|
|
+ del saver
|
|
|
|
+ del model_ae
|
|
|
|
+ del model_se
|
|
|
|
+ del model_func
|
|
|
|
+ del feed_dict_ae
|
|
|
|
+ del feed_dict_se
|
|
|
|
+ del adj
|
|
|
|
+ del ae_input
|
|
|
|
+ del train
|
|
|
|
+ # del test
|
|
|
|
+ del support
|
|
|
|
+ del sess
|
|
|
|
+ gc.collect()
|
|
|
|
+
|
|
|
|
+ # print("AE")
|
|
|
|
+ # get_hits(vec_ae, test)
|
|
|
|
+ # print("SE")
|
|
|
|
+ # get_hits(vec_se, test)
|
|
|
|
+ # print("SE+AE")
|
|
|
|
+ # get_combine_hits(vec_se, vec_ae, FLAGS.beta, test)
|
|
|
|
+ #
|
|
|
|
+ # calculate similarity
|
|
|
|
+ # print("AE Similarity")
|
|
|
|
+ # print(len(vec_ae), len(test))
|
|
|
|
+ # predict(vec_ae, test)
|
|
|
|
+ # print("SE Similarity")
|
|
|
|
+ # predict(vec_se, test)
|
|
|
|
+ # print("AE+SE Similarity")
|
|
|
|
+ # predict(np.concatenate([vec_se*FLAGS.beta, vec_ae*(1.0-FLAGS.beta)], axis=1), test)
|
|
|
|
+
|
|
|
|
+ print("Predict New Align Orgs")
|
|
|
|
+ start_time = time.time()
|
|
|
|
+ predict_new(np.concatenate([vec_se*FLAGS.beta, vec_ae*(1.0-FLAGS.beta)], axis=1))
|
|
|
|
+ print("use time", time.time()-start_time)
|
|
|
|
+
|
|
|
|
+ print("e"+str(FLAGS.epochs), "d"+str(FLAGS.dropout), "k"+str(FLAGS.k), "s"+str(FLAGS.seed),
|
|
|
|
+ "lr"+str(FLAGS.learning_rate), "b"+str(FLAGS.beta))
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+if __name__ == '__main__':
|
|
|
|
+ loadBestModel()
|