rules_core.py 2.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576
  1. # -*- coding: utf-8 -*-
  2. from operator import attrgetter
  3. import logging
  4. import refo
  5. from iepy.extraction.rules import generate_tokens_to_match, compile_rule
  6. logger = logging.getLogger(__name__)
  7. class RuleBasedCore(object):
  8. """
  9. IEPY's alternative main class. Implements a rule-based information extractor.
  10. From the user's point of view this class is meant to be used like this::
  11. extractor = RuleBasedCore(relation, [<rule-1>, ..., <rule-n>])
  12. extractor.start()
  13. predictions = extractor.predict(candidates) # profit
  14. """
  15. def __init__(self, relation, rules, verbosity=0):
  16. self.relation = relation
  17. self.rules = sorted(rules, key=attrgetter("priority"), reverse=True)
  18. self.learnt = {}
  19. self.verbosity = verbosity
  20. ###
  21. ### IEPY User API
  22. ###
  23. def start(self):
  24. """
  25. Prepares the internal information to start predicting.
  26. """
  27. self.rule_regexes = [
  28. (compile_rule(rule, self.relation), rule.answer) for rule in self.rules
  29. ]
  30. def predict(self, candidates):
  31. """
  32. Using the provided rules, on the given order, applies them to each evidence
  33. candidate, verifying if they match or not.
  34. Returns a dict {evidence: True/False}, where the boolean label indicates if
  35. the relation is present on that evidence or not.
  36. """
  37. logger.info('Predicting using rule based core')
  38. predicted = {}
  39. for i, evidence in enumerate(candidates):
  40. match = self.match(evidence)
  41. predicted[evidence] = match if match is not None else False
  42. if self.verbosity > 0:
  43. if (i + 1) % 1000 == 0:
  44. logger.info('checked {} candidate evidences'.format(i+1))
  45. return predicted
  46. def add_answer(self):
  47. """Dumb method on this extractor, just API compliance"""
  48. pass
  49. def process(self):
  50. """Dumb method on this extractor, just API compliance"""
  51. pass
  52. @property
  53. def questions(self):
  54. """Dumb method on this extractor, just API compliance"""
  55. return []
  56. def match(self, evidence):
  57. tokens_to_match = generate_tokens_to_match(evidence)
  58. for regex, answer in self.rule_regexes:
  59. match = refo.match(regex, tokens_to_match)
  60. if match:
  61. return answer