dictionary.py 1.2 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344
  1. #!/usr/bin/env python
  2. # -*-coding:utf-8-*-
  3. from BiddingKG.dl.foolnltk.selffool import trie
  4. class Dictionary():
  5. def __init__(self):
  6. self.trie = trie.Trie()
  7. self.weights = {}
  8. self.sizes = 0
  9. def delete_dict(self):
  10. self.trie = trie.Trie()
  11. self.weights = {}
  12. self.sizes = 0
  13. def add_dict(self, path):
  14. words = []
  15. with open(path,encoding="utf8") as f:
  16. for i, line in enumerate(f):
  17. line = line.strip("\n").strip()
  18. if not line:
  19. continue
  20. line = line.split()
  21. word = line[0].strip()
  22. self.trie.add_keyword(word)
  23. if len(line) != 3:
  24. weight = 1.0
  25. else:
  26. #print(line)
  27. weight = float(line[2])
  28. weight = float(weight)
  29. self.weights[word] = weight
  30. words.append(word)
  31. self.sizes += len(self.weights)
  32. def parse_words(self, text):
  33. matchs = self.trie.parse_text(text)
  34. return matchs
  35. def get_weight(self, word):
  36. return self.weights.get(word, 0.1)