download_third_party_data.py 1.0 KB

12345678910111213141516171819202122232425262728293031323334353637
  1. import logging
  2. import errno
  3. import os
  4. import nltk.data
  5. from iepy.preprocess.tagger import download as download_tagger
  6. from iepy.preprocess.ner.stanford import download as download_ner
  7. from iepy.preprocess.corenlp import download as download_corenlp
  8. from iepy.utils import DIRS
  9. def download_third_party_data():
  10. print("Making sure configuration folder exists")
  11. try:
  12. os.makedirs(DIRS.user_data_dir)
  13. except OSError as exc:
  14. if exc.errno == errno.EEXIST and os.path.isdir(DIRS.user_data_dir):
  15. pass
  16. else:
  17. raise
  18. print("Downloading punkt tokenizer")
  19. nltk.download("punkt")
  20. print("Downloading wordnet")
  21. nltk.download("wordnet")
  22. download_tagger()
  23. download_ner()
  24. download_corenlp()
  25. if __name__ == "__main__":
  26. logging.basicConfig(level=logging.INFO,
  27. format=u"%(asctime)s - %(name)s - %(levelname)s - %(message)s")
  28. print("Downloading third party software...")
  29. download_third_party_data()
  30. print("Done.")