views.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509
  1. import json
  2. from collections import defaultdict
  3. from django.db.models import Q
  4. from django.contrib import messages
  5. from django.contrib.auth.decorators import login_required
  6. from django.core.urlresolvers import reverse
  7. from django.shortcuts import get_object_or_404, render_to_response, redirect
  8. from django.utils.decorators import method_decorator
  9. from django.utils import formats
  10. from django.views.generic.base import TemplateView
  11. from django.http import HttpResponse, HttpResponseBadRequest
  12. from extra_views import ModelFormSetView
  13. from corpus.forms import EvidenceForm, EvidenceOnDocumentForm, EvidenceToolboxForm
  14. from corpus.models import (
  15. Relation, TextSegment, IEDocument,
  16. EvidenceLabel, SegmentToTag, EntityKind
  17. )
  18. from iepy.data.db import EntityOccurrenceManager
  19. def _judge(request):
  20. return request.user.username
  21. class Home(TemplateView):
  22. template_name = 'corpus/home.html'
  23. def get_context_data(self, **kwargs):
  24. context = super().get_context_data(**kwargs)
  25. context["relations"] = Relation.objects.all()
  26. segments_to_tag = SegmentToTag.objects.filter(done=False)
  27. relation_ids_to_tag = list(set(segments_to_tag.values_list("relation", flat=True)))
  28. relations_to_tag = Relation.objects.filter(id__in=relation_ids_to_tag)
  29. context["iepy_runs"] = relations_to_tag
  30. return context
  31. home = login_required(Home.as_view())
  32. def next_segment_to_label(request, relation_id):
  33. relation = get_object_or_404(Relation, pk=relation_id)
  34. segment = relation.get_next_segment_to_label(_judge(request))
  35. if segment is None:
  36. return render_to_response('message.html',
  37. {'msg': 'There are no more evidence to label'})
  38. return redirect('corpus:label_evidence_for_segment', relation.pk, segment.pk)
  39. def next_document_to_label(request, relation_id):
  40. relation = get_object_or_404(Relation, pk=relation_id)
  41. doc = relation.get_next_document_to_label(_judge(request))
  42. if doc is None:
  43. return render_to_response('message.html',
  44. {'msg': 'There are no more evidence to label'})
  45. return redirect('corpus:label_evidence_for_document', relation.pk, doc.pk)
  46. def _navigate_labeled_items(request, relation_id, current_id, direction, type_, judgeless):
  47. # The parameter current_id indicates where the user is situated when asking
  48. # to move back or forth
  49. type_name = 'document' if type_ == IEDocument else 'segment'
  50. url_name = 'corpus:label_evidence_for_%s' % type_name
  51. relation = get_object_or_404(Relation, pk=relation_id)
  52. current = get_object_or_404(type_, pk=current_id)
  53. current_id = int(current_id)
  54. going_back = direction.lower() == 'back'
  55. judge = _judge(request) if not judgeless else None
  56. obj_id_to_show = relation.labeled_neighbor(current, judge, going_back)
  57. if obj_id_to_show is None:
  58. # Internal logic couldn't decide what other obj to show. Better to
  59. # forward to the one already shown
  60. response = redirect(url_name, relation.pk, current_id)
  61. messages.add_message(request, messages.WARNING,
  62. 'No other %s to show.' % type_name)
  63. return response
  64. else:
  65. response = redirect(url_name, relation.pk, obj_id_to_show)
  66. if obj_id_to_show == current_id:
  67. direction_str = "previous" if going_back else "next"
  68. messages.add_message(
  69. request, messages.WARNING,
  70. 'No {0} {1} to show.'.format(direction_str, type_name))
  71. return response
  72. def navigate_labeled_segments(request, relation_id, segment_id, direction, judgeless=False):
  73. return _navigate_labeled_items(
  74. request, relation_id, segment_id, direction, TextSegment, judgeless
  75. )
  76. def navigate_labeled_documents(request, relation_id, document_id, direction, judgeless=False):
  77. return _navigate_labeled_items(
  78. request, relation_id, document_id, direction, IEDocument, judgeless
  79. )
  80. class _BaseLabelEvidenceView(ModelFormSetView):
  81. form_class = EvidenceForm
  82. model = EvidenceLabel
  83. extra = 0
  84. max_num = None
  85. can_order = False
  86. can_delete = False
  87. @method_decorator(login_required)
  88. def dispatch(self, *args, **kwargs):
  89. return super().dispatch(*args, **kwargs)
  90. @property
  91. def judge(self):
  92. return _judge(self.request)
  93. class LabelEvidenceOnSegmentBase(_BaseLabelEvidenceView):
  94. template_name = 'corpus/segment_questions.html'
  95. def get_context_data(self, **kwargs):
  96. context = super().get_context_data(**kwargs)
  97. self.segment.hydrate()
  98. title = "Labeling Evidence for Relation {0}".format(self.relation)
  99. subtitle = 'For Document "{0}", Text Segment id {1}'.format(
  100. self.segment.document.human_identifier,
  101. self.segment.id)
  102. context.update({
  103. 'title': title,
  104. 'subtitle': subtitle,
  105. 'segment': self.segment,
  106. 'segment_rich_tokens': list(self.segment.get_enriched_tokens()),
  107. 'relation': self.relation,
  108. 'draw_navigation': True,
  109. })
  110. return context
  111. def get_segment_and_relation(self):
  112. if hasattr(self, 'segment') and hasattr(self, 'relation'):
  113. return self.segment, self.relation
  114. self.segment = get_object_or_404(TextSegment, pk=self.kwargs['segment_id'])
  115. self.segment.hydrate()
  116. self.relation = get_object_or_404(Relation, pk=self.kwargs['relation_id'])
  117. evidences = list(self.segment.get_evidences_for_relation(self.relation))
  118. for ev in evidences:
  119. ev.get_or_create_label_for_judge(self.relation, self.judge) # creating EvidenceLabels
  120. return self.segment, self.relation
  121. def get_queryset(self):
  122. segment, relation = self.get_segment_and_relation()
  123. return super().get_queryset().filter(
  124. judge=self.judge, evidence_candidate__segment=self.segment,
  125. relation=self.relation,
  126. labeled_by_machine=False,
  127. )
  128. def get_success_url(self):
  129. return reverse('corpus:next_segment_to_label', args=[self.relation.pk])
  130. def formset_valid(self, formset):
  131. """
  132. Add message to the user, and set who made this labeling (judge).
  133. """
  134. for form in formset:
  135. if form.has_changed():
  136. form.instance.judge = str(self.request.user)
  137. result = super().formset_valid(formset)
  138. messages.add_message(self.request, messages.INFO,
  139. 'Changes saved for segment {0}.'.format(self.segment.id))
  140. return result
  141. class LabelEvidenceOnSegmentView(LabelEvidenceOnSegmentBase):
  142. def get_context_data(self, *args, **kwargs):
  143. context = super().get_context_data(*args, **kwargs)
  144. for formset in context["formset"]:
  145. instance = formset.instance
  146. evidence = instance.evidence_candidate
  147. instance.all_labels = evidence.labels.all()
  148. context["draw_navigation"] = True
  149. context["draw_postags"] = True
  150. return context
  151. def human_in_the_loop(request, relation_id):
  152. relation = get_object_or_404(Relation, pk=relation_id)
  153. segments_to_tag = SegmentToTag.objects.filter(
  154. relation=relation,
  155. done=False,
  156. ).order_by("-modification_date")
  157. if not segments_to_tag:
  158. return render_to_response(
  159. 'message.html',
  160. {'msg': 'There are no more evidence to label'}
  161. )
  162. segment_to_tag = segments_to_tag[0]
  163. return redirect(
  164. 'corpus:human_in_the_loop_segment',
  165. relation.pk, segment_to_tag.segment.pk
  166. )
  167. class HumanInTheLoopView(LabelEvidenceOnSegmentBase):
  168. def get_context_data(self, *args, **kwargs):
  169. context = super().get_context_data(*args, **kwargs)
  170. context["draw_navigation"] = False
  171. return context
  172. def get_success_url(self):
  173. return reverse('corpus:human_in_the_loop', args=[self.relation.pk])
  174. def formset_valid(self, formset):
  175. result = super().formset_valid(formset)
  176. segment = get_object_or_404(TextSegment, pk=self.kwargs["segment_id"])
  177. segment_to_tag = SegmentToTag.objects.get(
  178. segment=segment,
  179. relation=self.relation,
  180. )
  181. segment_to_tag.done = True
  182. segment_to_tag.save()
  183. return result
  184. class LabelEvidenceOnDocumentView(_BaseLabelEvidenceView):
  185. template_name = 'corpus/document_questions.html'
  186. form_class = EvidenceOnDocumentForm
  187. def get_text_segments(self, only_with_evidences=False):
  188. if only_with_evidences:
  189. return self.relation._matching_text_segments().filter(
  190. document_id=self.document.id).order_by('offset').distinct()
  191. else:
  192. return self.document.get_text_segments()
  193. def get_context_data(self, **kwargs):
  194. ctx = super(LabelEvidenceOnDocumentView, self).get_context_data(**kwargs)
  195. title = "Labeling Evidence for Relation {0}".format(self.relation)
  196. subtitle = 'For Document "{0}"'.format(self.document.human_identifier)
  197. segments_with_rich_tokens = []
  198. for segment in self.get_text_segments(only_with_evidences=True):
  199. segment.hydrate()
  200. segments_with_rich_tokens.append(
  201. {'id': segment.id,
  202. 'rich_tokens': list(segment.get_enriched_tokens())}
  203. )
  204. if self.document.syntactic_sentences:
  205. parsed_sentences = [x.pprint() for x in self.document.syntactic_sentences]
  206. else:
  207. parsed_sentences = [""] * len(segments_with_rich_tokens)
  208. if not segments_with_rich_tokens:
  209. ctx = {
  210. 'title': title,
  211. 'document': self.document,
  212. 'relation': self.relation,
  213. 'eos_propperties': {},
  214. 'relations_list': [],
  215. 'forms_values': [],
  216. 'draw_navigation': True,
  217. 'entity_kinds': EntityKind.objects.all(),
  218. }
  219. return ctx
  220. other_judges_labels = defaultdict(list)
  221. for formset in ctx["formset"]:
  222. instance = formset.instance
  223. evidence = instance.evidence_candidate
  224. for label in evidence.labels.filter(
  225. Q(relation=instance.relation) & ~Q(id=instance.id)
  226. ):
  227. other_judges_labels[label.judge].append([
  228. evidence.left_entity_occurrence.id,
  229. evidence.right_entity_occurrence.id,
  230. label.label
  231. ])
  232. forms_values = {}
  233. eos_propperties = {}
  234. relations_list = []
  235. formset = ctx['formset']
  236. for form_idx, form in enumerate(formset):
  237. lbl_evidence = form.instance
  238. evidence = lbl_evidence.evidence_candidate
  239. left_eo_id = evidence.left_entity_occurrence.pk
  240. right_eo_id = evidence.right_entity_occurrence.pk
  241. info = "Labeled as {} by {} on {}".format(
  242. lbl_evidence.label,
  243. lbl_evidence.judge if lbl_evidence.judge else "unknown",
  244. formats.date_format(
  245. lbl_evidence.modification_date, "SHORT_DATETIME_FORMAT"
  246. )
  247. )
  248. relations_list.append({
  249. "relation": [left_eo_id, right_eo_id],
  250. "form_id": form.prefix,
  251. "info": info,
  252. })
  253. forms_values[form.prefix] = lbl_evidence.label
  254. for eo_id in [left_eo_id, right_eo_id]:
  255. if eo_id not in eos_propperties:
  256. eos_propperties[eo_id] = {
  257. 'selectable': True,
  258. 'selected': False,
  259. }
  260. form_toolbox = EvidenceToolboxForm(prefix='toolbox')
  261. question_options = [x[0] for x in form_toolbox.fields["label"].choices]
  262. form_for_others = EvidenceForm(
  263. prefix='for_others', initial={"label": EvidenceLabel.NORELATION}
  264. )
  265. different_kind = self.relation.left_entity_kind != self.relation.right_entity_kind
  266. ctx.update({
  267. 'title': title,
  268. 'subtitle': subtitle,
  269. 'document': self.document,
  270. 'segments': segments_with_rich_tokens,
  271. 'parsed_sentences': parsed_sentences,
  272. 'relation': self.relation,
  273. 'form_for_others': form_for_others,
  274. 'form_toolbox': form_toolbox,
  275. 'initial_tool': EvidenceLabel.YESRELATION,
  276. 'eos_propperties': json.dumps(eos_propperties),
  277. 'relations_list': json.dumps(relations_list),
  278. 'forms_values': json.dumps(forms_values),
  279. 'question_options': question_options,
  280. 'other_judges_labels': json.dumps(other_judges_labels),
  281. 'other_judges': list(other_judges_labels.keys()),
  282. "draw_navigation": True,
  283. 'entity_kinds': EntityKind.objects.all(),
  284. 'different_kind': different_kind,
  285. })
  286. return ctx
  287. def get_document_and_relation(self):
  288. if hasattr(self, 'document') and hasattr(self, 'relation'):
  289. return self.document, self.relation
  290. self.document = get_object_or_404(IEDocument, pk=self.kwargs['document_id'])
  291. self.relation = get_object_or_404(Relation, pk=self.kwargs['relation_id'])
  292. evidences = []
  293. for segment in self.document.get_text_segments():
  294. evidences.extend(
  295. list(segment.get_evidences_for_relation(self.relation))
  296. )
  297. for ev in evidences:
  298. ev.get_or_create_label_for_judge(self.relation, self.judge) # creating EvidenceLabels
  299. return self.document, self.relation
  300. def get_queryset(self):
  301. document, relation = self.get_document_and_relation()
  302. return super().get_queryset().filter(
  303. judge=self.judge, evidence_candidate__segment__document_id=document,
  304. relation=relation,
  305. labeled_by_machine=False,
  306. )
  307. def get_success_url(self):
  308. if self.is_partial_save():
  309. return self.request.META.get('HTTP_REFERER')
  310. return reverse('corpus:next_document_to_label', args=[self.relation.pk])
  311. def get_default_label_value(self):
  312. return self.request.POST.get('for_others-label', None)
  313. def is_partial_save(self):
  314. # "partial saves" is a hack to allow edition of the Preprocess while labeling
  315. return self.request.POST.get('partial_save', '') == 'enabled'
  316. def formset_valid(self, formset):
  317. """
  318. Add message to the user, handle the "for the rest" case, and set
  319. who made this labeling (judge).
  320. """
  321. partial = self.is_partial_save()
  322. if partial:
  323. default_lbl = None
  324. else:
  325. default_lbl = self.get_default_label_value()
  326. for form in formset:
  327. if form.instance.label is None:
  328. form.instance.label = default_lbl
  329. if form.has_changed():
  330. form.instance.judge = str(self.request.user)
  331. result = super().formset_valid(formset)
  332. if not partial:
  333. messages.add_message(
  334. self.request, messages.INFO,
  335. 'Changes saved for document {0}.'.format(self.document.id)
  336. )
  337. return result
  338. def get_formset_kwargs(self):
  339. """
  340. If is a partial save, hacks the forms to match the queryset so it
  341. matches the ones that actually has a CandidateEvidence.
  342. This is to handle the case where an entity occurrence was removed.
  343. """
  344. kwargs = super().get_formset_kwargs()
  345. queryset = kwargs.get("queryset", [])
  346. data = kwargs.get("data", {})
  347. partial = data.get("partial_save")
  348. if partial != "enabled":
  349. return kwargs
  350. new_data = data.copy()
  351. initial_forms_key = "form-INITIAL_FORMS"
  352. total_forms_key = "form-TOTAL_FORMS"
  353. query_ids = [str(x.id) for x in queryset]
  354. included_forms = []
  355. for key, value in data.items():
  356. if key.endswith("-id"):
  357. form_id = key[:-3]
  358. label_key = "{}-label".format(form_id)
  359. if value in query_ids:
  360. label = data[label_key]
  361. included_forms.append((value, label))
  362. new_data.pop(key)
  363. new_data.pop(label_key)
  364. for i, (form_id, label) in enumerate(included_forms):
  365. form_id_key = "form-{}-id".format(i)
  366. form_label_key = "form-{}-label".format(i)
  367. new_data[form_id_key] = form_id
  368. new_data[form_label_key] = label
  369. new_data[total_forms_key] = str(len(included_forms))
  370. new_data[initial_forms_key] = str(len(included_forms))
  371. kwargs["data"] = new_data
  372. return kwargs
  373. def navigate_documents(request, document_id, direction):
  374. if direction == "back":
  375. documents = IEDocument.objects.filter(id__lt=document_id).order_by("-id")
  376. else:
  377. documents = IEDocument.objects.filter(id__gt=document_id).order_by("id")
  378. if documents:
  379. document_id = documents[0].id
  380. else:
  381. messages.add_message(
  382. request, messages.WARNING,
  383. 'No more documents to show'
  384. )
  385. return redirect('corpus:navigate_document', document_id)
  386. class DocumentNavigation(TemplateView):
  387. template_name = 'corpus/document.html'
  388. def get_context_data(self, document_id, **kwargs):
  389. context = super().get_context_data(**kwargs)
  390. document = get_object_or_404(IEDocument, pk=self.kwargs['document_id'])
  391. sentences = [{"rich_tokens": x, "id": i} for i, x in enumerate(document.get_sentences(enriched=True))]
  392. if document.syntactic_sentences:
  393. parsed_sentences = [x.pprint() for x in document.syntactic_sentences]
  394. else:
  395. parsed_sentences = [""] * len(sentences)
  396. context["entity_kinds"] = EntityKind.objects.all()
  397. context["document"] = document
  398. context["segments"] = sentences
  399. context["parsed_sentences"] = parsed_sentences
  400. context["draw_navigation"] = True
  401. return context
  402. def create_entity_occurrence(request):
  403. kind = get_object_or_404(EntityKind, id=request.POST.get("kind"))
  404. document = get_object_or_404(IEDocument, id=request.POST.get("doc_id"))
  405. if "offset" not in request.POST or "offset_end" not in request.POST:
  406. raise HttpResponseBadRequest("Invalid offsets")
  407. try:
  408. offset = int(request.POST["offset"])
  409. offset_end = int(request.POST["offset_end"])
  410. except ValueError:
  411. raise HttpResponseBadRequest("Invalid offsets")
  412. EntityOccurrenceManager.create_with_entity(kind, document, offset, offset_end)
  413. result = json.dumps({"success": True})
  414. return HttpResponse(result, content_type='application/json')