terminal.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262
  1. from collections import OrderedDict
  2. import logging
  3. from colorama import Fore, Style, init as colorama_init
  4. from future.builtins import input, str
  5. from iepy.data.db import CandidateEvidenceManager
  6. from iepy.data.models import SegmentToTag
  7. logger = logging.getLogger(__name__)
  8. class Answers(object):
  9. YES = u'y'
  10. NO = u'n'
  11. DONT_KNOW = u'd'
  12. STOP = u'stop'
  13. options = [YES, NO, DONT_KNOW, STOP]
  14. values = {YES: 1.0, NO: 0.0, DONT_KNOW: 0.5}
  15. QUESTION_TEMPLATE = str(u"""
  16. Is the following text evidence of the Fact %(fact)s?
  17. %(text)s
  18. (%(keys)s): """)
  19. class TerminalInterviewer(object):
  20. """
  21. Capable of asking Human to validate evidence for some facts over a text terminal.
  22. Questions is a list of tuples of (Evidence, score), that will be consumed in
  23. the received order.
  24. Each time an evidence is validated or rejected by the human, correspondent
  25. boolean answer is stored by calling the provided callback.
  26. Extra options can be defined (key, explanation) like this:
  27. extra_options=[('stop', 'Stop algorithm')]
  28. when user picks such answers, the control is returned to the caller,
  29. leaving the internal state untouched, so it's possible to resume execution.
  30. """
  31. # FIXME: this "options" shall be merged with the Answers class defined above.
  32. YES = u'y'
  33. NO = u'n'
  34. DONT_KNOW = u'd'
  35. RUN = u'run'
  36. base_options = OrderedDict(
  37. [(YES, u'Valid Evidence'),
  38. (NO, u'Not valid Evidence'),
  39. (DONT_KNOW, u'Discard, not sure'),
  40. (RUN, u'Tired of answering for now. Run with what I gave you.')
  41. ])
  42. template = QUESTION_TEMPLATE
  43. def __init__(self, questions, store_answer_callback,
  44. extra_options=None):
  45. """
  46. Creates an object capable of asking Human to validate evidence for some facts.
  47. Questions is a list of tuples of (Evidence, score), that will be consumed in
  48. the received order.
  49. Each time an evidence is validated or rejected by the human, correspondent
  50. boolean answer is stored by calling the provided callback.
  51. Extra options can be defined (key, explanation) like this:
  52. extra_options=[('stop', 'Stop algorithm')]
  53. when user use such answers, flow is returned to the caller,
  54. and question is discarded (so it's possible to resume execution)
  55. """
  56. self.questions = questions
  57. self.raw_answers = [] # list of answers
  58. self.store_answer_callback = store_answer_callback
  59. self.extra_options = OrderedDict(extra_options or [])
  60. if set(self.base_options).intersection(self.extra_options.keys()):
  61. raise ValueError(u"Can't define extra answers with the builtin keys")
  62. self.keys = list(self.base_options.keys()) + list(self.extra_options.keys())
  63. self.formatter = TerminalEvidenceFormatter()
  64. def explain(self):
  65. """Returns string that explains how to use the tool for the person
  66. answering questions.
  67. """
  68. r = u"You'll be presented with pieces of text that have a good chance to be "
  69. r += u"evidences of the known facts. Please confirm or reject each.\n"
  70. r += u"Pay attention to the colors.\n"
  71. r += u"Possible answers are:\n"
  72. options = list(self.base_options.items()) + list(self.extra_options.items())
  73. r += u'\n'.join(' %s: %s' % (key, explanation) for key, explanation in options)
  74. print(r)
  75. def __call__(self):
  76. """For each available question prompts the Human if it's valid evidence or not.
  77. Returns None in case that all question has been answered (or when the Human
  78. indicates that he's tired of answering).
  79. Each time that Human replies with a custom answer (not in the base list) that
  80. answer will be returned instantaneously (and no further question will be shown
  81. except the terminal is invoked again).
  82. """
  83. colorama_init()
  84. self.explain()
  85. for evidence in self.questions[len(self.raw_answers):]:
  86. answer = self.get_human_answer(evidence)
  87. if answer in self.extra_options:
  88. # Will not be handled here but in the caller.
  89. return answer
  90. elif answer == self.RUN:
  91. # No more questions and answers for now. Use what is available.
  92. return None
  93. else:
  94. self.raw_answers.append(answer)
  95. if answer in [self.YES, self.NO]:
  96. self.store_answer_callback(evidence, answer == self.YES)
  97. def get_human_answer(self, evidence):
  98. keys = u'/'.join(self.keys)
  99. c_fact, c_text = self.formatter.colored_fact_and_text(evidence)
  100. question = self.template % {
  101. 'keys': keys, 'fact': c_fact,
  102. 'text': c_text
  103. }
  104. answer = input(question)
  105. while answer not in self.keys:
  106. answer = input('Invalid answer. (%s): ' % keys)
  107. return answer
  108. def human_oracle(evidence, possible_answers):
  109. """Simple text interface to query a human for fact generation."""
  110. colored_fact, colored_segment = evidence.colored_fact_and_text()
  111. print(u'SEGMENT: %s' % colored_segment)
  112. question = ' FACT: {0}? ({1}) '.format(colored_fact,
  113. u'/'.join(possible_answers))
  114. answer = input(question)
  115. while answer not in possible_answers:
  116. answer = input(question)
  117. return answer
  118. class TerminalEvidenceFormatter(object):
  119. default_color_1 = Fore.RED
  120. default_color_2 = Fore.GREEN
  121. def colored_text(self, ev, color_1=None, color_2=None):
  122. """Will return a naive formated text with entities remarked.
  123. Assumes that occurrences does not overlap.
  124. """
  125. color_1 = color_1 or self.default_color_1
  126. color_2 = color_2 or self.default_color_2
  127. # right and left entity-occurrences. "Right" and "Left" are just ideas, but
  128. # are not necessary their true position on the text
  129. r_eo = ev.right_entity_occurrence
  130. l_eo = ev.left_entity_occurrence
  131. ev.segment.hydrate()
  132. r_eo.hydrate_for_segment(ev.segment)
  133. l_eo.hydrate_for_segment(ev.segment)
  134. tkns = ev.segment.tokens[:]
  135. if r_eo.segment_offset < l_eo.segment_offset:
  136. tkns.insert(l_eo.segment_offset_end, Style.RESET_ALL)
  137. tkns.insert(l_eo.segment_offset, color_2)
  138. tkns.insert(r_eo.segment_offset_end, Style.RESET_ALL)
  139. tkns.insert(r_eo.segment_offset, color_1)
  140. else: # must be solved in the reverse order
  141. tkns.insert(r_eo.segment_offset_end, Style.RESET_ALL)
  142. tkns.insert(r_eo.segment_offset, color_1)
  143. tkns.insert(l_eo.segment_offset_end, Style.RESET_ALL)
  144. tkns.insert(l_eo.segment_offset, color_2)
  145. return u' '.join(tkns)
  146. def colored_fact(self, ev, color_1=None, color_2=None):
  147. color_1 = color_1 or self.default_color_1
  148. color_2 = color_2 or self.default_color_2
  149. right_entity = ev.right_entity_occurrence.entity
  150. left_entity = ev.left_entity_occurrence.entity
  151. return u'(%s <%s>, %s, %s <%s>)' % (
  152. color_1 + right_entity.key + Style.RESET_ALL,
  153. right_entity.kind,
  154. ev.relation.name,
  155. color_2 + left_entity.key + Style.RESET_ALL,
  156. left_entity.kind,
  157. )
  158. def colored_fact_and_text(self, ev, color_1=None, color_2=None):
  159. color_1 = color_1 or self.default_color_1
  160. color_2 = color_2 or self.default_color_2
  161. return (
  162. self.colored_fact(ev, color_1, color_2),
  163. self.colored_text(ev, color_1, color_2)
  164. )
  165. class TerminalAdministration(object):
  166. """Terminal/Console interface for administrating the run of a iepy extraction.
  167. """
  168. REFRESH = u'refresh'
  169. RUN = u'run'
  170. base_options = OrderedDict(
  171. [(REFRESH, u'Refresh - check how many new labels were created.'),
  172. (RUN, u'Run Process - run the process again with the info obtained'),
  173. ])
  174. def __init__(self, relation, extra_options):
  175. self.relation = relation
  176. self.extra_options = OrderedDict(extra_options or [])
  177. if set(self.base_options).intersection(self.extra_options.keys()):
  178. raise ValueError(u"Can't define extra options with the builtin keys")
  179. self.keys = list(self.base_options.keys()) + list(self.extra_options.keys())
  180. def update_candidate_evidences_to_label(self, evidence_candidates):
  181. # Will let the UI know which are the segments that have evidence to label.
  182. # Needs to respect the provided ordering, so the created SegmentToTag objects
  183. # when sorted by date respect the evidence_candidates provided.
  184. logger.info('Creating segments to tag')
  185. segments_to_tag = []
  186. for ev_c in evidence_candidates:
  187. if ev_c.segment not in segments_to_tag:
  188. segments_to_tag.append(ev_c.segment)
  189. existent_stt = {stt.segment_id: stt for stt in SegmentToTag.objects.filter(
  190. relation=self.relation, segment__in=segments_to_tag)}
  191. for segment in segments_to_tag:
  192. if segment.pk in existent_stt:
  193. stt = existent_stt[segment.pk]
  194. else:
  195. stt, created = SegmentToTag.objects.get_or_create(
  196. segment=segment,
  197. relation=self.relation,
  198. )
  199. if not stt.done:
  200. stt.save() # always saving, so modification_date is updated
  201. logger.info('Done creating segments to tag')
  202. def explain(self):
  203. """Returns string that explains how to use the tool for the person
  204. administering the extraction.
  205. """
  206. r = "Waiting for candidate evidences to be labeled. \n"
  207. r += "Available commands are:\n"
  208. options = list(self.base_options.items()) + list(self.extra_options.items())
  209. r += u'\n'.join(' %s: %s' % (key, explanation) for key, explanation in options)
  210. print(r)
  211. def __call__(self):
  212. self.explain()
  213. while True:
  214. # Forever loop until the administrator decides to stop it
  215. cmd = self.get_command()
  216. if cmd in self.extra_options or cmd == self.RUN:
  217. return cmd
  218. if cmd == self.REFRESH:
  219. self.refresh_info()
  220. def refresh_info(self):
  221. c = CandidateEvidenceManager.value_labeled_candidates_count_for_relation(
  222. self.relation)
  223. print ('There are %s labels with yes/no answers' % c)
  224. def get_command(self):
  225. keys = u'/'.join(self.keys)
  226. answer = input('Waiting... what to do: ')
  227. while answer not in self.keys:
  228. answer = input('"%s" is an invalid answer. (%s): ' % (answer, keys))
  229. return answer