rules_verifier.py 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149
  1. """
  2. IEPY rules verifier
  3. Usage:
  4. rules_verifier.py <relation> [options]
  5. Options:
  6. --shuffle Chooses the sample randomly and not the first ones
  7. --create-evidences Creates evidences that are missing [default: false]
  8. -r --rule=<rule> Tests only this rule
  9. -l --limit=<limit> Limits the amount of evidences uses
  10. -h --help Show this screen
  11. """
  12. import sys
  13. import logging
  14. from docopt import docopt
  15. import refo
  16. from django.core.exceptions import ObjectDoesNotExist
  17. from colorama import init as colorama_init
  18. import iepy
  19. iepy.setup(__file__)
  20. from iepy.data import models
  21. from iepy.data.models import EvidenceCandidate
  22. from iepy.data.db import CandidateEvidenceManager
  23. from iepy.extraction.terminal import TerminalEvidenceFormatter
  24. from iepy.extraction.rules import (
  25. load_rules, compile_rule, generate_tokens_to_match
  26. )
  27. from iepy.metrics import result_dict_from_predictions
  28. logging.basicConfig(level=logging.INFO, format='%(message)s')
  29. def run_from_command_line():
  30. opts = docopt(__doc__, version=iepy.__version__)
  31. relation_name = opts.get("<relation>")
  32. limit = opts.get("--limit")
  33. rule_name = opts.get("--rule")
  34. shuffle = opts.get("--shuffle")
  35. create_evidences = opts.get("--create-evidences")
  36. if limit is None:
  37. limit = -1
  38. try:
  39. limit = int(limit)
  40. except ValueError:
  41. logging.error("Invalid limit value, it must be a number")
  42. sys.exit(1)
  43. try:
  44. relation = models.Relation.objects.get(name=relation_name)
  45. except ObjectDoesNotExist:
  46. logging.error("Relation {!r} not found".format(relation_name))
  47. sys.exit(1)
  48. # Load rules
  49. rules = get_rules(rule_name)
  50. rule_regexes = [
  51. (rule.__name__, compile_rule(rule, relation), rule.answer) for rule in rules
  52. ]
  53. # Load evidences
  54. if EvidenceCandidate.objects.all().count() == 0:
  55. create_evidences = True
  56. evidences = CandidateEvidenceManager.candidates_for_relation(
  57. relation, create_evidences, seg_limit=limit, shuffle_segs=shuffle
  58. )
  59. conflict_solver = CandidateEvidenceManager.conflict_resolution_newest_wins
  60. answers = CandidateEvidenceManager.labels_for(
  61. relation, evidences, conflict_solver
  62. )
  63. run_tests(rule_regexes, evidences, answers)
  64. def run_tests(rule_regexes, evidences, answers):
  65. predictions = []
  66. real_labels = []
  67. evidences_with_labels = []
  68. colorama_init()
  69. formatter = TerminalEvidenceFormatter()
  70. for name, regex, answer in rule_regexes:
  71. title = "Matches for rule '{}' (value: {})".format(name, answer)
  72. print("\n{}\n{}".format(title, "-" * len(title)))
  73. anything_matched = False
  74. for evidence in evidences:
  75. tokens_to_match = generate_tokens_to_match(evidence)
  76. match = refo.match(regex, tokens_to_match)
  77. if match:
  78. anything_matched = True
  79. print(" * {}".format(formatter.colored_text(evidence)))
  80. if evidence in answers and answers[evidence] is not None:
  81. evidences_with_labels.append(evidence)
  82. real_labels.append(answers[evidence])
  83. if match:
  84. predictions.append(answer)
  85. else:
  86. predictions.append(False)
  87. if not anything_matched:
  88. print(" nothing matched")
  89. print()
  90. if real_labels:
  91. results = result_dict_from_predictions(
  92. evidences_with_labels, real_labels, predictions
  93. )
  94. results.pop("end_time")
  95. keys = [
  96. "true_positives", "true_negatives",
  97. "false_positives", "false_negatives",
  98. "precision", "recall",
  99. "accuracy", "f1",
  100. ]
  101. title = "Metrics"
  102. print("{}\n{}".format(title, "-" * len(title)))
  103. for key in keys:
  104. print("{:>15}: {:.2f}".format(key, results[key]))
  105. def get_rules(rule_name):
  106. # Load rules
  107. rules = load_rules()
  108. if rule_name:
  109. rules = [x for x in rules if x.__name__ == rule_name]
  110. if not rules:
  111. logging.error("rule '{}' does not exists".format(rule_name))
  112. sys.exit(1)
  113. return rules
  114. if __name__ == "__main__":
  115. run_from_command_line()