1234567891011121314151617181920212223242526272829303132333435363738394041 |
- '''
- Created on 2019年3月26日
- @author: User
- '''
- import numpy as np
- import pandas as pd
- import math
- from BiddingKG.dl.common.Utils import *
- def encoding(text,shape=(100,60),expand=False):
- embedding = np.zeros(shape)
- word_model = getModel_word()
- for i in range(len(text)):
- if i>=shape[0]:
- break
- if text[i] in word_model.vocab:
- embedding[i] = word_model[text[i]]
- if expand:
- embedding = np.expand_dims(embedding,0)
- return embedding
- def getData(path = "train.xls"):
- x = []
- y = []
- df = pd.read_excel(path)
- for item,label in zip(df["list_item"],df["list_label"]):
- if str(type(item))!="<class 'str'>":
- continue
- x.append(encoding(item))
- y.append((np.arange(2)==label).astype(np.integer))
- return np.array(x),np.array(y)
-
- if __name__=="__main__":
- #getData()
- print(encodeInput("招标单位"))
|