luojiehua
/
iepy-develop


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262
							from collections import OrderedDict
import logging

from colorama import Fore, Style, init as colorama_init
from future.builtins import input, str

from iepy.data.db import CandidateEvidenceManager
from iepy.data.models import SegmentToTag


logger = logging.getLogger(__name__)


class Answers(object):
    YES = u'y'
    NO = u'n'
    DONT_KNOW = u'd'
    STOP = u'stop'
    options = [YES, NO, DONT_KNOW, STOP]
    values = {YES: 1.0, NO: 0.0, DONT_KNOW: 0.5}


QUESTION_TEMPLATE = str(u"""
Is the following text evidence of the Fact %(fact)s?
    %(text)s
(%(keys)s): """)


class TerminalInterviewer(object):
    """
    Capable of asking Human to validate evidence for some facts over a text terminal.
    Questions is a list of tuples of (Evidence, score), that will be consumed in
    the received order.
    Each time an evidence is validated or rejected by the human, correspondent
    boolean answer is stored by calling the provided callback.
    Extra options can be defined (key, explanation) like this:
        extra_options=[('stop', 'Stop algorithm')]
    when user picks such answers, the control is returned to the caller,
    leaving the internal state untouched, so it's possible to resume execution.
    """
    # FIXME: this "options" shall be merged with the Answers class defined above.
    YES = u'y'
    NO = u'n'
    DONT_KNOW = u'd'
    RUN = u'run'
    base_options = OrderedDict(
        [(YES, u'Valid Evidence'),
         (NO, u'Not valid Evidence'),
         (DONT_KNOW, u'Discard, not sure'),
         (RUN, u'Tired of answering for now. Run with what I gave you.')
         ])
    template = QUESTION_TEMPLATE

    def __init__(self, questions, store_answer_callback,
                 extra_options=None):
        """
        Creates an object capable of asking Human to validate evidence for some facts.
        Questions is a list of tuples of (Evidence, score), that will be consumed in
        the received order.
        Each time an evidence is validated or rejected by the human, correspondent
        boolean answer is stored by calling the provided callback.
        Extra options can be defined (key, explanation) like this:
            extra_options=[('stop', 'Stop algorithm')]
        when user use such answers, flow is returned to the caller,
        and question is discarded (so it's possible to resume execution)
        """
        self.questions = questions
        self.raw_answers = []  # list of answers
        self.store_answer_callback = store_answer_callback
        self.extra_options = OrderedDict(extra_options or [])
        if set(self.base_options).intersection(self.extra_options.keys()):
            raise ValueError(u"Can't define extra answers with the builtin keys")
        self.keys = list(self.base_options.keys()) + list(self.extra_options.keys())
        self.formatter = TerminalEvidenceFormatter()

    def explain(self):
        """Returns string that explains how to use the tool for the person
        answering questions.
        """
        r = u"You'll be presented with pieces of text that have a good chance to be "
        r += u"evidences of the known facts. Please confirm or reject each.\n"
        r += u"Pay attention to the colors.\n"
        r += u"Possible answers are:\n"
        options = list(self.base_options.items()) + list(self.extra_options.items())
        r += u'\n'.join('   %s: %s' % (key, explanation) for key, explanation in options)
        print(r)

    def __call__(self):
        """For each available question prompts the Human if it's valid evidence or not.

        Returns None in case that all question has been answered (or when the Human
        indicates that he's tired of answering).
        Each time that Human replies with a custom answer (not in the base list) that
        answer will be returned instantaneously (and no further question will be shown
        except the terminal is invoked again).
        """
        colorama_init()
        self.explain()
        for evidence in self.questions[len(self.raw_answers):]:
            answer = self.get_human_answer(evidence)
            if answer in self.extra_options:
                # Will not be handled here but in the caller.
                return answer
            elif answer == self.RUN:
                # No more questions and answers for now. Use what is available.
                return None
            else:
                self.raw_answers.append(answer)
                if answer in [self.YES, self.NO]:
                    self.store_answer_callback(evidence, answer == self.YES)

    def get_human_answer(self, evidence):
        keys = u'/'.join(self.keys)
        c_fact, c_text = self.formatter.colored_fact_and_text(evidence)
        question = self.template % {
            'keys': keys, 'fact': c_fact,
            'text': c_text
        }
        answer = input(question)
        while answer not in self.keys:
            answer = input('Invalid answer. (%s): ' % keys)
        return answer


def human_oracle(evidence, possible_answers):
    """Simple text interface to query a human for fact generation."""
    colored_fact, colored_segment = evidence.colored_fact_and_text()
    print(u'SEGMENT: %s' % colored_segment)
    question = ' FACT: {0}? ({1}) '.format(colored_fact,
                                           u'/'.join(possible_answers))
    answer = input(question)
    while answer not in possible_answers:
        answer = input(question)
    return answer


class TerminalEvidenceFormatter(object):
    default_color_1 = Fore.RED
    default_color_2 = Fore.GREEN

    def colored_text(self, ev, color_1=None, color_2=None):
        """Will return a naive formated text with entities remarked.
        Assumes that occurrences does not overlap.
        """
        color_1 = color_1 or self.default_color_1
        color_2 = color_2 or self.default_color_2

        # right and left entity-occurrences. "Right" and "Left" are just ideas, but
        # are not necessary their true position on the text
        r_eo = ev.right_entity_occurrence
        l_eo = ev.left_entity_occurrence
        ev.segment.hydrate()
        r_eo.hydrate_for_segment(ev.segment)
        l_eo.hydrate_for_segment(ev.segment)
        tkns = ev.segment.tokens[:]
        if r_eo.segment_offset < l_eo.segment_offset:
            tkns.insert(l_eo.segment_offset_end, Style.RESET_ALL)
            tkns.insert(l_eo.segment_offset, color_2)
            tkns.insert(r_eo.segment_offset_end, Style.RESET_ALL)
            tkns.insert(r_eo.segment_offset, color_1)
        else:  # must be solved in the reverse order
            tkns.insert(r_eo.segment_offset_end, Style.RESET_ALL)
            tkns.insert(r_eo.segment_offset, color_1)
            tkns.insert(l_eo.segment_offset_end, Style.RESET_ALL)
            tkns.insert(l_eo.segment_offset, color_2)
        return u' '.join(tkns)

    def colored_fact(self, ev, color_1=None, color_2=None):
        color_1 = color_1 or self.default_color_1
        color_2 = color_2 or self.default_color_2
        right_entity = ev.right_entity_occurrence.entity
        left_entity = ev.left_entity_occurrence.entity
        return u'(%s <%s>, %s, %s <%s>)' % (
            color_1 + right_entity.key + Style.RESET_ALL,
            right_entity.kind,
            ev.relation.name,
            color_2 + left_entity.key + Style.RESET_ALL,
            left_entity.kind,
        )

    def colored_fact_and_text(self, ev, color_1=None, color_2=None):
        color_1 = color_1 or self.default_color_1
        color_2 = color_2 or self.default_color_2

        return (
            self.colored_fact(ev, color_1, color_2),
            self.colored_text(ev, color_1, color_2)
        )


class TerminalAdministration(object):
    """Terminal/Console interface for administrating the run of a iepy extraction.
    """
    REFRESH = u'refresh'
    RUN = u'run'
    base_options = OrderedDict(
        [(REFRESH, u'Refresh - check how many new labels were created.'),
         (RUN, u'Run Process - run the process again with the info obtained'),
         ])

    def __init__(self, relation, extra_options):
        self.relation = relation
        self.extra_options = OrderedDict(extra_options or [])
        if set(self.base_options).intersection(self.extra_options.keys()):
            raise ValueError(u"Can't define extra options with the builtin keys")
        self.keys = list(self.base_options.keys()) + list(self.extra_options.keys())

    def update_candidate_evidences_to_label(self, evidence_candidates):
        # Will let the UI know which are the segments that have evidence to label.
        # Needs to respect the provided ordering, so the created SegmentToTag objects
        # when sorted by date respect the evidence_candidates provided.
        logger.info('Creating segments to tag')
        segments_to_tag = []
        for ev_c in evidence_candidates:
            if ev_c.segment not in segments_to_tag:
                segments_to_tag.append(ev_c.segment)

        existent_stt = {stt.segment_id: stt for stt in SegmentToTag.objects.filter(
            relation=self.relation, segment__in=segments_to_tag)}
        for segment in segments_to_tag:
            if segment.pk in existent_stt:
                stt = existent_stt[segment.pk]
            else:
                stt, created = SegmentToTag.objects.get_or_create(
                    segment=segment,
                    relation=self.relation,
                )
            if not stt.done:
                stt.save()  # always saving, so modification_date is updated
        logger.info('Done creating segments to tag')

    def explain(self):
        """Returns string that explains how to use the tool for the person
        administering the extraction.
        """
        r = "Waiting for candidate evidences to be labeled. \n"
        r += "Available commands are:\n"
        options = list(self.base_options.items()) + list(self.extra_options.items())
        r += u'\n'.join('   %s: %s' % (key, explanation) for key, explanation in options)
        print(r)

    def __call__(self):
        self.explain()
        while True:
            # Forever loop until the administrator decides to stop it
            cmd = self.get_command()
            if cmd in self.extra_options or cmd == self.RUN:
                return cmd
            if cmd == self.REFRESH:
                self.refresh_info()

    def refresh_info(self):
        c = CandidateEvidenceManager.value_labeled_candidates_count_for_relation(
            self.relation)
        print ('There are %s labels with yes/no answers' % c)

    def get_command(self):
        keys = u'/'.join(self.keys)
        answer = input('Waiting... what to do: ')
        while answer not in self.keys:
            answer = input('"%s" is an invalid answer. (%s): ' % (answer, keys))
        return answer