base.py 1.3 KB

1234567891011121314151617181920212223242526272829303132333435363738
  1. from collections import namedtuple
  2. from iepy.preprocess.pipeline import BasePreProcessStepRunner, PreProcessSteps
  3. FoundEntity = namedtuple('FoundEntity', 'key kind_name alias offset offset_end from_gazette')
  4. class BaseNERRunner(BasePreProcessStepRunner):
  5. """Base class for defining NER runners"""
  6. step = PreProcessSteps.ner
  7. def __init__(self, override=False):
  8. self.override = override
  9. def ok_for_running(self, doc):
  10. if not doc.was_preprocess_step_done(PreProcessSteps.sentencer):
  11. # Doc needs previous preprocess steps to be done
  12. return False
  13. if not self.override and doc.was_preprocess_step_done(self.step):
  14. # Current step was already done, and not working in override mode
  15. return False
  16. return True
  17. def __call__(self, doc):
  18. # Do not override this method when subclassing. Instead,
  19. # do it on the "run_ner"
  20. if not self.ok_for_running(doc):
  21. return
  22. entities = self.run_ner(doc)
  23. doc.set_ner_result(entities)
  24. doc.save()
  25. def run_ner(self, doc):
  26. # Define logic in here
  27. return []
  28. def build_occurrence(self, key, kind_name, alias, offset, offset_end):
  29. return FoundEntity(key, kind_name.upper(), alias, offset, offset_end, False)