-
Notifications
You must be signed in to change notification settings - Fork 25
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #34 from tachyonicClock/PassiveAggressiveClassifier
Add `PassiveAggressiveClassifier`
- Loading branch information
Showing
4 changed files
with
131 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,13 @@ | ||
from .classifiers import AdaptiveRandomForest, OnlineBagging, AdaptiveRandomForest | ||
from .efdt import EFDT | ||
from .sklearn import PassiveAggressiveClassifier | ||
from .hoeffding_tree import HoeffdingTree | ||
|
||
__all__ = ["AdaptiveRandomForest", "OnlineBagging", "AdaptiveRandomForest", "EFDT", "HoeffdingTree"] | ||
__all__ = [ | ||
"AdaptiveRandomForest", | ||
"OnlineBagging", | ||
"AdaptiveRandomForest", | ||
"EFDT", | ||
"HoeffdingTree", | ||
"PassiveAggressiveClassifier", | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
from typing import Optional, Dict, Union, Literal | ||
from capymoa.learner.learners import Classifier | ||
from sklearn.linear_model import ( | ||
PassiveAggressiveClassifier as skPassiveAggressiveClassifier, | ||
) | ||
from capymoa.stream.instance import Instance, LabeledInstance | ||
from capymoa.stream.stream import Schema | ||
from capymoa.type_alias import LabelIndex, LabelProbabilities | ||
import numpy as np | ||
|
||
|
||
class PassiveAggressiveClassifier(Classifier): | ||
"""Streaming Passive Aggressive Classifier | ||
This wraps :sklearn:`linear_model.PassiveAggressiveClassifier` for | ||
ease of use in the streaming context. Some options are missing because | ||
they are not relevant in the streaming context. | ||
`Online Passive-Aggressive Algorithms K. Crammer, O. Dekel, J. Keshat, S. | ||
Shalev-Shwartz, Y. Singer - JMLR (2006) | ||
<http://jmlr.csail.mit.edu/papers/volume7/crammer06a/crammer06a.pdf>`_ | ||
>>> from capymoa.datasets import ElectricityTiny | ||
>>> from capymoa.learner.classifier import PassiveAggressiveClassifier | ||
>>> from capymoa.evaluation import prequential_evaluation | ||
>>> stream = ElectricityTiny() | ||
>>> schema = stream.get_schema() | ||
>>> learner = PassiveAggressiveClassifier(schema) | ||
>>> results = prequential_evaluation(stream, learner, max_instances=1000, optimise=False) | ||
>>> results["cumulative"].accuracy() | ||
84.3 | ||
""" | ||
|
||
sklearner: skPassiveAggressiveClassifier | ||
"""The underlying scikit-learn object. See: :sklearn:`linear_model.PassiveAggressiveClassifier`""" | ||
|
||
def __init__( | ||
self, | ||
schema: Schema, | ||
max_step_size: float = 1.0, | ||
fit_intercept: bool = True, | ||
loss: str = "hinge", | ||
n_jobs: Optional[int] = None, | ||
class_weight: Union[Dict[int, float], None, Literal["balanced"]] = None, | ||
average: bool = False, | ||
random_seed=1, | ||
): | ||
"""Construct a passive aggressive classifier. | ||
:param schema: Stream schema | ||
:param max_step_size: Maximum step size (regularization). | ||
:param fit_intercept: Whether the intercept should be estimated or not. | ||
If False, the data is assumed to be already centered. | ||
:param loss: The loss function to be used: hinge: equivalent to PA-I in | ||
the reference paper. squared_hinge: equivalent to PA-II in the reference paper. | ||
:param n_jobs: The number of CPUs to use to do the OVA (One Versus All, | ||
for multi-class problems) computation. None means 1 unless in a | ||
``joblib.parallel_backend`` context. -1 means using all processors. | ||
:param class_weight: Preset for the ``sklearner.class_weight`` fit parameter. | ||
Weights associated with classes. If not given, all classes are | ||
supposed to have weight one. | ||
The “balanced” mode uses the values of y to automatically adjust | ||
weights inversely proportional to class frequencies in the input | ||
data as ``n_samples / (n_classes * np.bincount(y))``. | ||
:param average: When set to True, computes the averaged SGD weights and | ||
stores the result in the ``sklearner.coef_`` attribute. If set to an int greater | ||
than 1, averaging will begin once the total number of samples | ||
seen reaches average. So ``average=10`` will begin averaging after | ||
seeing 10 samples. | ||
:param random_seed: Seed for the random number generator. | ||
""" | ||
|
||
super().__init__(schema, random_seed) | ||
|
||
self.sklearner = skPassiveAggressiveClassifier( | ||
C=max_step_size, | ||
fit_intercept=fit_intercept, | ||
early_stopping=False, | ||
shuffle=False, | ||
verbose=0, | ||
loss=loss, | ||
n_jobs=n_jobs, | ||
warm_start=False, | ||
class_weight=class_weight, | ||
average=average, | ||
random_state=random_seed, | ||
) | ||
self._classes = schema.get_label_indexes() | ||
self._is_fitted = False | ||
|
||
def __str__(self): | ||
return str(self.sklearner) | ||
|
||
def train(self, instance: LabeledInstance): | ||
x = instance.x.reshape(1, -1) | ||
y = np.array(instance.y_index).reshape(1) | ||
self.sklearner.partial_fit(x, y, classes=self._classes) | ||
self._is_fitted = True | ||
|
||
def predict(self, instance: Instance) -> Optional[LabelIndex]: | ||
if not self._is_fitted: | ||
return None | ||
x = instance.x.reshape(1, -1) | ||
return self.sklearner.predict(x).item() | ||
|
||
def predict_proba(self, instance: Instance) -> LabelProbabilities: | ||
proba = np.zeros(len(self._classes)) | ||
proba[self.predict(instance)] = 1 | ||
return proba |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters