123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162 |
- # -*- coding: utf-8 -*-
- from unittest import mock
- from refo.patterns import Pattern
- from refo import Question, Star, Any
- from iepy.data.db import CandidateEvidenceManager
- from iepy.extraction.rules import rule, Token
- from iepy.extraction.rules_core import RuleBasedCore
- from .factories import (
- EntityKindFactory, RelationFactory, TextSegmentFactory,
- IEDocFactory, EntityOccurrenceFactory, EntityFactory,
- )
- from .manager_case import ManagerTestCase
- class TestRuleBasedCore(ManagerTestCase):
- def setUp(self):
- super(TestRuleBasedCore, self).setUp()
- kind_person = EntityKindFactory(name="person")
- kind_date = EntityKindFactory(name="date")
- self.person_date_relation = RelationFactory(
- name="born in",
- left_entity_kind=kind_person,
- right_entity_kind=kind_date,
- )
- text = "John Soplete ( 15 august 1990 - 26 september 2058 ) " \
- "was a software developer"
- document = self._create_simple_document(text)
- segment = TextSegmentFactory(
- document=document,
- offset=0,
- offset_end=len(document.tokens)
- )
- self.segment = segment
- e_john = EntityFactory(key="John Soplete", kind=kind_person)
- eo1 = EntityOccurrenceFactory(
- entity=e_john, document=document,
- offset=0, offset_end=2,
- alias="j0hn",
- )
- eo1.segments.add(segment)
- e_date = EntityFactory(key="15 august 1990", kind=kind_date)
- eo2 = EntityOccurrenceFactory(
- entity=e_date, document=document,
- offset=3, offset_end=6,
- alias="1990-08-15",
- )
- eo2.segments.add(segment)
- self._candidates = self.get_candidates(self.person_date_relation)
- def get_candidates(self, relation):
- return list(CandidateEvidenceManager.candidates_for_relation(relation))
- def _create_simple_document(self, text):
- tokens = tuple(text.split())
- lemmas = [""] * len(tokens)
- postags = ["POSTAG"] * len(tokens)
- indexes = tuple(list(range(len(tokens))))
- document = IEDocFactory(text=text)
- document.set_tokenization_result(list(zip(indexes, tokens)))
- document.set_lemmatization_result(lemmas)
- document.set_tagging_result(postags)
- document.save()
- return document
- def test_rule_that_matches(self):
- @rule(True)
- def test_rule(Subject, Object):
- anything = Question(Star(Any()))
- return Subject + Token("(") + Object + Token("-") + anything
- pipeline = RuleBasedCore(self.person_date_relation, [test_rule])
- pipeline.start()
- pipeline.process()
- facts = pipeline.predict(self._candidates)
- candidate = self._candidates[0]
- self.assertTrue(facts[candidate])
- def test_rule_that_not_matches(self):
- @rule(True)
- def test_rule(Subject, Object):
- return Subject + Object + Token("something here")
- pipeline = RuleBasedCore(self.person_date_relation, [test_rule])
- pipeline.start()
- pipeline.process()
- facts = pipeline.predict(self._candidates)
- candidate = self._candidates[0]
- self.assertFalse(facts[candidate])
- def test_empty_rules(self):
- pipeline = RuleBasedCore(self.person_date_relation, [])
- pipeline.start()
- pipeline.process()
- facts = pipeline.predict(self._candidates)
- self.assertEqual(len([x for x in facts if facts[x]]), 0)
- def test_match_run_on_every_rule(self):
- mocked_rules = [
- rule(True)(mock.MagicMock(return_value=Token("asd")))
- ] * 10
- pipeline = RuleBasedCore(self.person_date_relation, mocked_rules)
- pipeline.start()
- pipeline.process()
- pipeline.predict(self._candidates)
- for mock_rule in mocked_rules:
- self.assertTrue(mock_rule.called)
- Subject, Object = mock_rule.call_args[0]
- self.assertIsInstance(Subject, Pattern)
- def test_rule_priority(self):
- matcher = lambda *args: True
- not_matcher = lambda *args: None
- rule_should_run = rule(True, priority=1)(mock.MagicMock(return_value=matcher))
- rule_should_not_run = rule(True, priority=0)(
- mock.MagicMock(return_value=not_matcher))
- pipeline = RuleBasedCore(self.person_date_relation,
- [rule_should_not_run, rule_should_run])
- pipeline.start()
- # All rules are compiled on start
- self.assertTrue(rule_should_run.called)
- self.assertTrue(rule_should_not_run.called)
- pipeline.process()
- import refo
- with mock.patch.object(refo, 'match') as fake_refo_match:
- fake_refo_match.side_effect = lambda regex, evidence: regex()
- pipeline.predict(self._candidates)
- self.assertEqual(fake_refo_match.call_count, len(self._candidates))
- # check that on every call, the called is rule_match
- for c_args in fake_refo_match.call_args_list:
- args, kwargs = c_args
- self.assertEqual(args[0], matcher)
- def test_rule_incorrect_answer(self):
- with self.assertRaises(ValueError):
- @rule("YE")
- def rule_match(Subject, Object):
- anything = Question(Star(Any()))
- return Subject + Token("(") + Object + Token("-") + anything
- def test_rule_with_negative_answer(self):
- @rule(False)
- def test_rule(Subject, Object):
- anything = Question(Star(Any()))
- return Subject + Token("(") + Object + Token("-") + anything
- pipeline = RuleBasedCore(self.person_date_relation, [test_rule])
- pipeline.start()
- pipeline.process()
- facts = pipeline.predict(self._candidates)
- candidate = self._candidates[0]
- self.assertFalse(facts[candidate])
|