import os,sys import numpy as np # from BiddingKG.dl.common.models import * import re import tensorflow as tf import jieba # from keras.layers import * # from keras.models import Model # from keras.utils import Sequence,to_categorical # import keras.backend as K # from keras.callbacks import Callback import pandas as pd maxlen = 512 words_size = 128 # w2v_filepath = os.path.dirname(__file__)+"/../wiki_128_word_embedding_new.vector" w2v_filepath = "wiki_128_word_embedding_new.vector" import gensim model_w2v = gensim.models.KeyedVectors.load_word2vec_format(w2v_filepath,binary=True) def get_words_matrix(words): if words in model_w2v.vocab: return model_w2v[words] else: return model_w2v['unk'] class Model_relation_extraction(): def __init__(self): self.model_file = os.path.dirname(__file__)+"/models/model_attachment_classify" # print(self.model_file) self.sess = tf.Session(graph=tf.Graph()) self.classes_dict = { 0: '其他', 1: '招标文件', 2: '限价(控制价)', 3: '工程量清单', 4: '采购清单', 5: '评标办法' } self.getModel() def getModel(self): with self.sess.as_default() as sess: with sess.graph.as_default(): meta_graph_def = tf.saved_model.loader.load(sess, tags=["serve"], export_dir=self.model_file) signature_key = tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY signature_def = meta_graph_def.signature_def input0 = sess.graph.get_tensor_by_name(signature_def[signature_key].inputs["input0"].name) print(input0.shape) output = sess.graph.get_tensor_by_name(signature_def[signature_key].outputs["outputs"].name) self.model = [input0, output] return self.model def text_process(self,attachmentcon): text = attachmentcon text = re.sub("\n+", ',', text) text = re.sub("\s+|?+", '', text) text = re.sub("[\.·_]{2,}", ',', text) text = re.sub("_", '', text) text = text[:2500] tokens = list(jieba.cut(text)) return tokens def evaluate(self,attachmentcon): text = str(attachmentcon) tokens = self.text_process(text) maxlen = 512 tokens = tokens[:maxlen] words_matrix = np.zeros((maxlen, words_size)) for i in range(len(tokens)): words_matrix[i] = np.array(get_words_matrix(tokens[i])) words_matrix = np.array([words_matrix]) pred = limitRun(self.sess, [self.model[1]], feed_dict={self.model[0]: words_matrix})[0] pred_label = np.argmax(pred[0]) cn_label = self.classes_dict[pred_label] return pred_label,cn_label def limitRun(sess,list_output,feed_dict,MAX_BATCH=1024): len_sample = 0 if len(feed_dict.keys())>0: len_sample = len(feed_dict[list(feed_dict.keys())[0]]) if len_sample>MAX_BATCH: list_result = [[] for _ in range(len(list_output))] _begin = 0 while(_begin