test_tagger.py 1.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344
  1. from unittest import TestCase
  2. from tests.factories import SentencedIEDocFactory
  3. from iepy.data.models import IEDocument
  4. from iepy.preprocess.pipeline import PreProcessSteps
  5. from iepy.preprocess.tagger import TaggerRunner, StanfordTaggerRunner
  6. from .manager_case import ManagerTestCase
  7. class TestTaggerRunner(ManagerTestCase):
  8. ManagerClass = IEDocument
  9. def test_tagger_runner_is_calling_postagger(self):
  10. doc = SentencedIEDocFactory(text='Some sentence. And some other. Indeed!')
  11. expected_postags = [['DT', 'NN', '.'], ['CC', 'DT', 'JJ', '.'], ['RB', '.']]
  12. i = iter(expected_postags)
  13. def postagger(sents):
  14. return (zip(sent, next(i)) for sent in sents)
  15. tag = TaggerRunner(postagger)
  16. tag(doc)
  17. self.assertTrue(doc.was_preprocess_step_done(PreProcessSteps.tagging))
  18. self.assertEqual(doc.postags, sum(expected_postags, []))
  19. def test_tagger_runner_not_overriding_by_default(self):
  20. doc = SentencedIEDocFactory(text='Some sentence. And some other. Indeed!')
  21. postagger1 = lambda sents: [[(x, 'A') for x in sent] for sent in sents]
  22. postagger2 = lambda sents: [[(x, 'B') for x in sent] for sent in sents]
  23. tag = TaggerRunner(postagger1)
  24. tag(doc)
  25. tag.postagger = postagger2 # XXX: accessing implementation
  26. tag(doc)
  27. self.assertTrue(all(x == 'A' for x in doc.postags))
  28. def test_tagger_runner_overriding_when_selected(self):
  29. doc = SentencedIEDocFactory(text='Some sentence. And some other. Indeed!')
  30. postagger1 = lambda sents: [[(x, 'A') for x in sent] for sent in sents]
  31. postagger2 = lambda sents: [[(x, 'B') for x in sent] for sent in sents]
  32. tag = TaggerRunner(postagger1, override=True)
  33. tag(doc)
  34. tag.postagger = postagger2 # XXX: accessing implementation
  35. tag(doc)
  36. self.assertTrue(all(x == 'B' for x in doc.postags))