''' Created on 2019年4月11日 @author: User ''' import os import sys import h5py sys.path.append(os.path.abspath("../../..")) import pandas as pd import gensim import numpy as np import math from keras.callbacks import ModelCheckpoint from BiddingKG.dl.common.Utils import * from BiddingKG.dl.common.models import getBiLSTMModel import tensorflow as tf from keras.models import load_model def embedding(datas,shape): ''' @summary:查找词汇对应的词向量 @param: datas:词汇的list shape:结果的shape @return: array,返回对应shape的词嵌入 ''' model_w2v = getModel_word() embed = np.zeros(shape) length = shape[1] out_index = 0 #print(datas) for data in datas: index = 0 for item in str(data)[-shape[1]:]: if index>=length: break if item in model_w2v.vocab: embed[out_index][index] = model_w2v[item] index += 1 else: #embed[out_index][index] = model_w2v['unk'] index += 1 out_index += 1 return embed def labeling(label,out_len=3): out = np.zeros((out_len)) out[label] = 1 return out def getTrainData(percent=0.9): df = pd.read_excel("批量.xls") train_x = [] train_y = [] test_x = [] test_y = [] for before,text,after,label,turn in zip(df["list_before"],df["list_text"],df["list_after"],df["list_label"],df["turn"]): before = str(before) if str(before)!="nan" else "" text = str(text) after = str(after) if str(after)!="nan" else "" the_label = None if math.isnan(turn): the_label = int(label) else: the_label = int(turn) if the_label not in [0,1,2]: print(after,text) continue x = encodeInput([before,text,after], word_len=50, word_flag=True,userFool=False) y = labeling(the_label) if np.random.random()