feature.py 931 B

1234567891011121314151617181920212223242526272829303132333435363738394041
  1. '''
  2. Created on 2019年3月26日
  3. @author: User
  4. '''
  5. import numpy as np
  6. import pandas as pd
  7. import math
  8. from BiddingKG.dl.common.Utils import *
  9. def encoding(text,shape=(100,60),expand=False):
  10. embedding = np.zeros(shape)
  11. word_model = getModel_word()
  12. for i in range(len(text)):
  13. if i>=shape[0]:
  14. break
  15. if text[i] in word_model.vocab:
  16. embedding[i] = word_model[text[i]]
  17. if expand:
  18. embedding = np.expand_dims(embedding,0)
  19. return embedding
  20. def getData(path = "train.xls"):
  21. x = []
  22. y = []
  23. df = pd.read_excel(path)
  24. for item,label in zip(df["list_item"],df["list_label"]):
  25. if str(type(item))!="<class 'str'>":
  26. continue
  27. x.append(encoding(item))
  28. y.append((np.arange(2)==label).astype(np.integer))
  29. return np.array(x),np.array(y)
  30. if __name__=="__main__":
  31. #getData()
  32. print(encodeInput("招标单位"))