1234567891011121314151617181920212223242526272829303132333435363738 |
- from collections import namedtuple
- from iepy.preprocess.pipeline import BasePreProcessStepRunner, PreProcessSteps
- FoundEntity = namedtuple('FoundEntity', 'key kind_name alias offset offset_end from_gazette')
- class BaseNERRunner(BasePreProcessStepRunner):
- """Base class for defining NER runners"""
- step = PreProcessSteps.ner
- def __init__(self, override=False):
- self.override = override
- def ok_for_running(self, doc):
- if not doc.was_preprocess_step_done(PreProcessSteps.sentencer):
- # Doc needs previous preprocess steps to be done
- return False
- if not self.override and doc.was_preprocess_step_done(self.step):
- # Current step was already done, and not working in override mode
- return False
- return True
- def __call__(self, doc):
- # Do not override this method when subclassing. Instead,
- # do it on the "run_ner"
- if not self.ok_for_running(doc):
- return
- entities = self.run_ner(doc)
- doc.set_ner_result(entities)
- doc.save()
- def run_ner(self, doc):
- # Define logic in here
- return []
- def build_occurrence(self, key, kind_name, alias, offset, offset_end):
- return FoundEntity(key, kind_name.upper(), alias, offset, offset_end, False)
|