luojiehua
/
iepy-develop


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162
							# -*- coding: utf-8 -*-

from unittest import mock

from refo.patterns import Pattern
from refo import Question, Star, Any

from iepy.data.db import CandidateEvidenceManager
from iepy.extraction.rules import rule, Token
from iepy.extraction.rules_core import RuleBasedCore
from .factories import (
    EntityKindFactory, RelationFactory, TextSegmentFactory,
    IEDocFactory, EntityOccurrenceFactory, EntityFactory,
)
from .manager_case import ManagerTestCase


class TestRuleBasedCore(ManagerTestCase):

    def setUp(self):
        super(TestRuleBasedCore, self).setUp()

        kind_person = EntityKindFactory(name="person")
        kind_date = EntityKindFactory(name="date")
        self.person_date_relation = RelationFactory(
            name="born in",
            left_entity_kind=kind_person,
            right_entity_kind=kind_date,
        )
        text = "John Soplete ( 15 august 1990 - 26 september 2058 ) " \
               "was a software developer"
        document = self._create_simple_document(text)
        segment = TextSegmentFactory(
            document=document,
            offset=0,
            offset_end=len(document.tokens)
        )
        self.segment = segment
        e_john = EntityFactory(key="John Soplete", kind=kind_person)
        eo1 = EntityOccurrenceFactory(
            entity=e_john, document=document,
            offset=0, offset_end=2,
            alias="j0hn",
        )
        eo1.segments.add(segment)
        e_date = EntityFactory(key="15 august 1990", kind=kind_date)
        eo2 = EntityOccurrenceFactory(
            entity=e_date, document=document,
            offset=3, offset_end=6,
            alias="1990-08-15",
        )
        eo2.segments.add(segment)
        self._candidates = self.get_candidates(self.person_date_relation)

    def get_candidates(self, relation):
        return list(CandidateEvidenceManager.candidates_for_relation(relation))

    def _create_simple_document(self, text):
        tokens = tuple(text.split())
        lemmas = [""] * len(tokens)
        postags = ["POSTAG"] * len(tokens)
        indexes = tuple(list(range(len(tokens))))
        document = IEDocFactory(text=text)
        document.set_tokenization_result(list(zip(indexes, tokens)))
        document.set_lemmatization_result(lemmas)
        document.set_tagging_result(postags)
        document.save()
        return document

    def test_rule_that_matches(self):

        @rule(True)
        def test_rule(Subject, Object):
            anything = Question(Star(Any()))
            return Subject + Token("(") + Object + Token("-") + anything

        pipeline = RuleBasedCore(self.person_date_relation, [test_rule])
        pipeline.start()
        pipeline.process()
        facts = pipeline.predict(self._candidates)
        candidate = self._candidates[0]
        self.assertTrue(facts[candidate])

    def test_rule_that_not_matches(self):

        @rule(True)
        def test_rule(Subject, Object):
            return Subject + Object + Token("something here")

        pipeline = RuleBasedCore(self.person_date_relation, [test_rule])
        pipeline.start()
        pipeline.process()
        facts = pipeline.predict(self._candidates)
        candidate = self._candidates[0]
        self.assertFalse(facts[candidate])

    def test_empty_rules(self):
        pipeline = RuleBasedCore(self.person_date_relation, [])
        pipeline.start()
        pipeline.process()
        facts = pipeline.predict(self._candidates)
        self.assertEqual(len([x for x in facts if facts[x]]), 0)

    def test_match_run_on_every_rule(self):
        mocked_rules = [
            rule(True)(mock.MagicMock(return_value=Token("asd")))
        ] * 10
        pipeline = RuleBasedCore(self.person_date_relation, mocked_rules)
        pipeline.start()
        pipeline.process()
        pipeline.predict(self._candidates)

        for mock_rule in mocked_rules:
            self.assertTrue(mock_rule.called)
            Subject, Object = mock_rule.call_args[0]
            self.assertIsInstance(Subject, Pattern)

    def test_rule_priority(self):

        matcher = lambda *args: True
        not_matcher = lambda *args: None

        rule_should_run = rule(True, priority=1)(mock.MagicMock(return_value=matcher))
        rule_should_not_run = rule(True, priority=0)(
            mock.MagicMock(return_value=not_matcher))

        pipeline = RuleBasedCore(self.person_date_relation,
                                 [rule_should_not_run, rule_should_run])
        pipeline.start()
        # All rules are compiled on start
        self.assertTrue(rule_should_run.called)
        self.assertTrue(rule_should_not_run.called)
        pipeline.process()
        import refo
        with mock.patch.object(refo, 'match') as fake_refo_match:
            fake_refo_match.side_effect = lambda regex, evidence: regex()
            pipeline.predict(self._candidates)
            self.assertEqual(fake_refo_match.call_count, len(self._candidates))
            # check that on every call, the called is rule_match
            for c_args in fake_refo_match.call_args_list:
                args, kwargs = c_args
                self.assertEqual(args[0], matcher)

    def test_rule_incorrect_answer(self):
        with self.assertRaises(ValueError):
            @rule("YE")
            def rule_match(Subject, Object):
                anything = Question(Star(Any()))
                return Subject + Token("(") + Object + Token("-") + anything

    def test_rule_with_negative_answer(self):
        @rule(False)
        def test_rule(Subject, Object):
            anything = Question(Star(Any()))
            return Subject + Token("(") + Object + Token("-") + anything

        pipeline = RuleBasedCore(self.person_date_relation, [test_rule])
        pipeline.start()
        pipeline.process()
        facts = pipeline.predict(self._candidates)
        candidate = self._candidates[0]
        self.assertFalse(facts[candidate])