123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104 |
- from sklearn.linear_model import SGDClassifier
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.svm import SVC
- from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
- from sklearn.preprocessing import StandardScaler
- from sklearn.pipeline import make_pipeline, make_union
- from featureforge.vectorizer import Vectorizer
- from iepy.extraction.features import parse_features
- _valid_classifiers = {
- "sgd": SGDClassifier,
- "knn": KNeighborsClassifier,
- "svc": SVC,
- "randomforest": RandomForestClassifier,
- "adaboost": AdaBoostClassifier,
- }
- _configuration_options = """
- classifier
- classifier_args
- sparse_features
- dense_features
- """.split()
- class RelationExtractionClassifier:
- def __init__(self, **config):
- # Validate options are present
- for option in _configuration_options:
- if option not in config:
- raise ValueError("Missing configuration "
- "option {!r}".format(option))
- # Feature extraction
- sparse_features = parse_features(config["sparse_features"])
- densifier = make_pipeline(Vectorizer(sparse_features, sparse=True),
- ClassifierAsFeature())
- dense_features = parse_features(config["dense_features"])
- vectorization = make_union(densifier,
- Vectorizer(dense_features, sparse=False))
- # Classifier
- try:
- classifier = _valid_classifiers[config["classifier"]]
- except KeyError:
- raise ValueError("Unknown classification algorithm "
- "{!r}".format(config["classifier"]))
- classifier = classifier(**config["classifier_args"])
- self.pipeline = make_pipeline(vectorization, StandardScaler())
- self.classifier = classifier
- def fit(self, X, y):
- X = self.pipeline.fit_transform(X, y)
- self.classifier.fit(X, y)
- return self
- def _chew(self, evidences):
- return self.pipeline.transform(evidences)
- def _predict(self, X):
- return self.classifier.predict(X)
- def _rank(self, X):
- return self.classifier.decision_function(X).ravel()
- def predict(self, evidences):
- return self._predict(self._chew(evidences))
- def decision_function(self, evidences):
- return self._rank(self._chew(evidences))
- class ClassifierAsFeature:
- """
- A transformation that esentially implements a form of dimensionality
- reduction.
- This class uses (by default) a fast SGDClassifier configured like a linear
- SVM to produce a feature that is the decision function of the classifier.
- It's useful to reduce the dimension of bag-of-words feature-set into a
- feature that's denser in information.
- """
- def __init__(self, classifier=None):
- if classifier is None:
- classifier = SGDClassifier()
- self.classifier = classifier
- def fit(self, X, y):
- """
- `X` is expected to be an array-like or a sparse matrix.
- `y` is expected to be an array-like containing the classes to learn.
- """
- self.classifier.fit(X, y)
- return self
- def transform(self, X, y=None):
- """
- `X` is expected to be an array-like or a sparse matrix.
- It returns a dense matrix of shape (n_samples, 1).
- """
- return self.classifier.decision_function(X).reshape(-1, 1)
|