test_document.py 3.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879
  1. from iepy.preprocess.ner.base import FoundEntity
  2. from .factories import SentencedIEDocFactory, GazetteItemFactory
  3. from .manager_case import ManagerTestCase
  4. class TestDocumentInvariants(ManagerTestCase):
  5. def test_cant_set_different_number_of_synparse_than_sentences(self):
  6. doc = SentencedIEDocFactory()
  7. sents = list(doc.get_sentences())
  8. fake_syn_parse_items = [
  9. '<fake parse tree %i>' % i for i in range(len(sents) + 1)]
  10. with self.assertRaises(ValueError):
  11. doc.set_syntactic_parsing_result(fake_syn_parse_items)
  12. class TestSetNERResults(ManagerTestCase):
  13. def _f_eo(self, key='something', kind_name='ABC', alias='The dog', offset=0,
  14. offset_end=2, from_gazette=False):
  15. # constructs and returns a simple FoundEntity with the args given
  16. return FoundEntity(key=key, kind_name=kind_name, alias=alias, offset=offset,
  17. offset_end=offset_end, from_gazette=from_gazette)
  18. def setUp(self):
  19. self.doc = SentencedIEDocFactory(text="The dog is dead. Long live the dog.")
  20. def test_simple(self):
  21. f_eo = self._f_eo()
  22. self.doc.set_ner_result([f_eo])
  23. self.assertEqual(self.doc.entity_occurrences.count(), 1)
  24. eo = self.doc.entity_occurrences.first()
  25. self.assertEqual(eo.entity.key, f_eo.key)
  26. self.assertEqual(eo.entity.kind.name, f_eo.kind_name)
  27. self.assertEqual(eo.entity.gazette, None)
  28. self.assertEqual(eo.offset, f_eo.offset)
  29. self.assertEqual(eo.offset_end, f_eo.offset_end)
  30. self.assertEqual(eo.alias, f_eo.alias)
  31. def test_offsets_are_checked(self):
  32. f_eo = self._f_eo(offset=-1) # negative offset
  33. self.assertRaises(ValueError, self.doc.set_ner_result, [f_eo])
  34. f_eo = self._f_eo(offset=2, offset_end=2) # end lte start
  35. self.assertRaises(ValueError, self.doc.set_ner_result, [f_eo])
  36. f_eo = self._f_eo(offset=2, offset_end=1) # end lte start
  37. self.assertRaises(ValueError, self.doc.set_ner_result, [f_eo])
  38. doc_tkns = len(self.doc.tokens)
  39. f_eo = self._f_eo(offset=doc_tkns + 1,
  40. offset_end=doc_tkns + 3) # bigger than doc tokens
  41. self.assertRaises(ValueError, self.doc.set_ner_result, [f_eo])
  42. def test_if_from_gazette_is_enabled_gazetteitem_is_set(self):
  43. f_eo = self._f_eo(from_gazette=True)
  44. gz_item = GazetteItemFactory(kind__name=f_eo.kind_name,
  45. text=f_eo.key)
  46. self.doc.set_ner_result([f_eo])
  47. eo = self.doc.entity_occurrences.first()
  48. self.assertEqual(eo.entity.gazette, gz_item)
  49. def test_sending_again_same_found_entity_is_idempotent(self):
  50. f_eo = self._f_eo()
  51. self.doc.set_ner_result([f_eo])
  52. self.doc.set_ner_result([f_eo])
  53. self.assertEqual(self.doc.entity_occurrences.count(), 1)
  54. def test_sending_twice_same_found_entity_doesnt_crash(self):
  55. f_eo = self._f_eo()
  56. self.doc.set_ner_result([f_eo, f_eo])
  57. self.assertEqual(self.doc.entity_occurrences.count(), 1)
  58. def test_same_different_eos_with_same_offsets_and_kind_are_not_allowed(self):
  59. f_eo = self._f_eo()
  60. f_eo_2 = self._f_eo(key=f_eo.key + ' and more') # to be sure is another key
  61. self.doc.set_ner_result([f_eo, f_eo_2])
  62. self.assertEqual(self.doc.entity_occurrences.count(), 1)
  63. eo = self.doc.entity_occurrences.first()
  64. # the one that is saved is the first one
  65. self.assertEqual(eo.entity.key, f_eo.key)