gen_vec.py 857 B

1234567891011121314151617181920212223242526272829303132333435
  1. #coding:utf8
  2. import codecs
  3. import pandas as pd
  4. file = "C:\\Users\\User\\Desktop\\sgns.merge.word.txt"
  5. out = "vec.tsv"
  6. with codecs.open(file,"r",encoding="utf8") as f:
  7. with codecs.open(out,"w",encoding="utf8") as out:
  8. first = True
  9. while(True):
  10. line = f.readline().strip()
  11. if not line:
  12. break
  13. line_split = line.split()
  14. if first:
  15. sum = line_split[0]
  16. vims = line_split[1]
  17. first = False
  18. continue
  19. else:
  20. word = line_split[0]
  21. vec = line_split[1:]
  22. out.write(word+"\t")
  23. out.write("{"+",".join(vec)+"}")
  24. out.write("\n")
  25. out.flush()
  26. out.close()
  27. f.close()