123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164 |
- from unittest import mock
- from operator import attrgetter
- from unittest import TestCase
- from iepy.preprocess.ner.base import BaseNERRunner
- from iepy.preprocess.ner.combiner import (
- CombinedNERRunner, NoOverlapCombinedNERRunner,
- KindPreferenceCombinedNERRunner)
- from iepy.preprocess.pipeline import PreProcessSteps
- class BaseTestCombined(TestCase):
- def setUp(self):
- def ner_not_done(step):
- return step != PreProcessSteps.ner
- self.runner1 = mock.MagicMock()
- self.runner2 = mock.MagicMock()
- self.doc = mock.MagicMock()
- self.doc.was_preprocess_step_done.side_effect = ner_not_done
- class TestCombinedNERRunner(BaseTestCombined):
- def test_runners_called_when_not_done_before(self):
- runner1, runner2, doc = self.runner1, self.runner2, self.doc
- runner = CombinedNERRunner([runner1, runner2])
- runner(doc)
- runner1.run_ner.assert_called_once_with(doc)
- runner2.run_ner.assert_called_once_with(doc)
- def test_runners_called_when_override(self):
- runner1, runner2, doc = self.runner1, self.runner2, self.doc
- doc.was_preprocess_step_done.side_effect = lambda x: True
- runner = CombinedNERRunner([runner1, runner2], override=True)
- runner(doc)
- runner1.run_ner.assert_called_once_with(doc)
- runner2.run_ner.assert_called_once_with(doc)
- def test_runners_not_called_when_done_before(self):
- runner1, runner2, doc = self.runner1, self.runner2, self.doc
- doc.was_preprocess_step_done.side_effect = lambda x: True
- runner = CombinedNERRunner([runner1, runner2])
- runner(doc)
- self.assertFalse(runner1.run_ner.called)
- self.assertFalse(runner2.run_ner.called)
- def test_no_entities_are_lost(self):
- runner1, runner2, doc = self.runner1, self.runner2, self.doc
- e1 = mock.MagicMock()
- e1.offset = 1
- e2 = mock.MagicMock()
- e2.offset = 2
- runner1.run_ner.side_effect = lambda doc: [e1]
- runner2.run_ner.side_effect = lambda doc: [e2]
- runner = CombinedNERRunner([runner1, runner2])
- runner(doc)
- doc.set_ner_result.assert_called_once_with([e1, e2])
- def test_can_define_combiner_for_only_one_ner(self):
- runner = CombinedNERRunner([self.runner1])
- runner(self.doc)
- self.assertTrue(self.doc.set_ner_result.called)
- def test_can_define_combiner_for_lots_of_ners(self):
- runner1, runner2, doc = self.runner1, self.runner2, self.doc
- runner3, runner4 = mock.MagicMock(), mock.MagicMock()
- runners = [runner1, runner2, runner3, runner4]
- ents = []
- for i, r in enumerate(runners):
- ei = mock.MagicMock()
- ei.offset = i + 1
- ents.append(ei)
- runner1.run_ner.side_effect = lambda doc: [ents[0]]
- runner2.run_ner.side_effect = lambda doc: [ents[1]]
- runner3.run_ner.side_effect = lambda doc: [ents[2]]
- runner4.run_ner.side_effect = lambda doc: [ents[3]]
- runner = CombinedNERRunner(runners)
- runner(doc)
- doc.set_ner_result.assert_called_once_with(ents)
- class TestNEROverlappingHandling(BaseTestCombined):
- def setUp(self):
- super(TestNEROverlappingHandling, self).setUp()
- self.result1 = self.construct_occurrences(
- [(1, 3, u'X'), (6, 8, u'W'), (8, 9, u'X'), (11, 12, u'W')])
- self.result2 = self.construct_occurrences(
- [(2, 4, u'Y'), (5, 7, u'Z'), (8, 9, u'Y'), (9, 13, u'Z')])
- self.runner1.run_ner.side_effect = lambda doc: self.result1
- self.runner2.run_ner.side_effect = lambda doc: self.result2
- def construct_occurrences(self, data):
- r = BaseNERRunner()
- eos = []
- for offset, offset_end, kind in data:
- eos.append(r.build_occurrence(
- key='blah',
- kind_name=kind,
- alias='blah',
- offset=offset,
- offset_end=offset_end)
- )
- return eos
- def test_overlapped_are_stored_like_that_on_default_combiner(self):
- runner = CombinedNERRunner([self.runner1, self.runner2])
- runner(self.doc)
- self.doc.set_ner_result.assert_called_once_with(
- sorted(self.result1 + self.result2,
- key=attrgetter('offset', 'offset_end', 'kind_name'))
- )
- def test_simple_overlap_solver_prefers_from_former_subners(self):
- NER = NoOverlapCombinedNERRunner([self.runner1, self.runner2])
- NER(self.doc)
- self.doc.set_ner_result.assert_called_once_with(self.result1)
- # again, the other way around
- NER = NoOverlapCombinedNERRunner([self.runner2, self.runner1])
- self.doc.reset_mock()
- NER(self.doc)
- self.doc.set_ner_result.assert_called_once_with(self.result2)
- def test_overlaps_is_solved_prefering_some_kind_over_other(self):
- combiner = lambda rank: KindPreferenceCombinedNERRunner(
- [self.runner1, self.runner2],
- rank=rank
- )
- combiner([u'X', u'W', u'Y', u'Z'])(self.doc)
- self.assertEqual(
- self.doc.set_ner_result.call_args_list[-1],
- mock.call(self.result1))
- # Not ranked kinds rank bad
- combiner([u'X', u'W'])(self.doc)
- self.assertEqual(
- self.doc.set_ner_result.call_args_list[-1],
- mock.call(self.result1))
- combiner([u'Z', u'Y'])(self.doc)
- self.assertEqual(
- self.doc.set_ner_result.call_args_list[-1],
- mock.call(self.result2))
- def test_kindpreference_must_be_instantiated_with_tuple_or_list(self):
- combiner = lambda rank: KindPreferenceCombinedNERRunner(
- [self.runner1, self.runner2],
- rank=rank
- )
- self.assertRaises(ValueError, combiner, 'something')
- self.assertRaises(ValueError, combiner, None)
- self.assertRaises(ValueError, combiner, 1)
- # Not raises
- combiner(('some', 'thing'))
- combiner(['some', 'thing'])
|