Skip to content

Commit

Permalink
docs: adding documentation for regressors
Browse files Browse the repository at this point in the history
  • Loading branch information
Spencer Sun authored and hmgomes committed May 23, 2024
1 parent bf62b0e commit 64c7105
Show file tree
Hide file tree
Showing 8 changed files with 158 additions and 14 deletions.
3 changes: 2 additions & 1 deletion src/capymoa/regressor/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from ._soknl import SOKNL, SOKNLBT
from ._soknl_base_tree import SOKNLBT
from ._soknl import SOKNL
from ._orto import ORTO
from ._knn import KNNRegressor
from ._fimtdd import FIMTDD
Expand Down
30 changes: 29 additions & 1 deletion src/capymoa/regressor/_fimtdd.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,35 @@


class FIMTDD(MOARegressor):
"""Implementation of the FIMT-DD tree as described by Ikonomovska et al."""
"""Implementation of the FIMT-DD tree as described by Ikonomovska et al.
Fast Incremental Model Tree with Drift Detection is the regression version
for the famous Hoeffding Tree for data stream learning.
FIMT-DD is implemented in MOA (Massive Online Analysis) and provides several
parameters for customization.
Reference:
`Ikonomovska, Elena, João Gama, and Sašo Džeroski.
Learning model trees from evolving data streams.
Data mining and knowledge discovery 23.1 (2011): 128-168.
PDF avaiable at:
<https://repositorio.inesctec.pt/server/api/core/bitstreams/a0802a15-84a2-493b-885b-a4f9fc4060b7/content>`_
Example usage:
>>> from capymoa.datasets import Fried
>>> from capymoa.regressor import FIMTDD
>>> from capymoa.evaluation import prequential_evaluation
>>> stream = Fried()
>>> schema = stream.get_schema()
>>> learner = FIMTDD(schema)
>>> results = prequential_evaluation(stream, learner, max_instances=1000)
>>> results["cumulative"].RMSE()
7.363273627701553
"""

def __init__(
self,
Expand Down
40 changes: 38 additions & 2 deletions src/capymoa/regressor/_knn.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,49 @@


class KNNRegressor(MOARegressor):
"""
"""K Nearest Neighbor for data stream regression with sliding window
The default number of neighbors (k) is set to 3 instead of 10 (as in MOA)
There is no specific publication for online KNN, please refer to:
`Bifet, Albert, Ricard Gavalda, Geoffrey Holmes, and Bernhard Pfahringer.
Machine learning for data streams: with practical examples in MOA. MIT press, 2023.
HTML version of the book available at:
<https://moa.cms.waikato.ac.nz/book-html/>`_
Example usage:
>>> from capymoa.datasets import Fried
>>> from capymoa.regressor import KNNRegressor
>>> from capymoa.evaluation import prequential_evaluation
>>> stream = Fried()
>>> schema = stream.get_schema()
>>> learner = KNNRegressor(schema)
>>> results = prequential_evaluation(stream, learner, max_instances=1000)
>>> results["cumulative"].RMSE()
2.9811398077838542
"""

def __init__(
self, schema=None, CLI=None, random_seed=1, k=3, median=False, window_size=1000
self,
schema=None,
CLI=None,
random_seed=1,
k=3,
median=False,
window_size=1000,
):
"""
Constructing KNN Regressor.
:param k: the number of the neighbours.
:param median: choose to use mean or median as the aggregation for the final prediction.
:param window_size: the size of the sliding window to store the instances.
"""


# Important, should create the MOA object before invoking the super class __init__
self.moa_learner = _moa_kNN()
super().__init__(
Expand Down
26 changes: 25 additions & 1 deletion src/capymoa/regressor/_orto.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,31 @@


class ORTO(MOARegressor):
"""Implementation of the ORTO tree as described by Ikonomovska et al."""
"""Implementation of the Online Regression Tree with Options (ORTO).
ORTO is an extension to FIMT-DD that allows options during the tree's growth and splits.
Reference:
`Ikonomovska, Elena, Joao Gama, Bernard Zenko, and Saso Dzeroski.
"Speeding-up hoeffding-based regression trees with options."
In Proceedings of the 28th International Conference on Machine Learning (ICML-11), pp. 537-544. 2011.
PDF available at:
<https://icml.cc/2011/papers/349_icmlpaper.pdf>`_
Example usage:
>>> from capymoa.datasets import Fried
>>> from capymoa.regressor import ORTO
>>> from capymoa.evaluation import prequential_evaluation
>>> stream = Fried()
>>> schema = stream.get_schema()
>>> learner = ORTO(schema)
>>> results = prequential_evaluation(stream, learner, max_instances=1000)
>>> results["cumulative"].RMSE()
9.228075678265904
"""

def __init__(
self,
Expand Down
6 changes: 5 additions & 1 deletion src/capymoa/regressor/_passive_aggressive_regressor.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,14 @@ class PassiveAggressiveRegressor(SKRegressor):
ease of use in the streaming context. Some options are missing because
they are not relevant in the streaming context.
Reference:
`Online Passive-Aggressive Algorithms K. Crammer, O. Dekel, J. Keshat, S.
Shalev-Shwartz, Y. Singer - JMLR (2006)
<http://jmlr.csail.mit.edu/papers/volume7/crammer06a/crammer06a.pdf>`_
Example Usage:
>>> from capymoa.datasets import Fried
>>> from capymoa.regressor import PassiveAggressiveRegressor
>>> from capymoa.evaluation import prequential_evaluation
Expand All @@ -24,7 +28,7 @@ class PassiveAggressiveRegressor(SKRegressor):
>>> learner = PassiveAggressiveRegressor(schema)
>>> results = prequential_evaluation(stream, learner, max_instances=1000)
>>> results["cumulative"].RMSE()
3.7...
3.7004531627005455
"""

sklearner: _SKPassiveAggressiveRegressor
Expand Down
4 changes: 3 additions & 1 deletion src/capymoa/regressor/_sgd_regressor.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ class SGDRegressor(SKRegressor):
they are not relevant in the streaming context. Furthermore, the learning rate
is constant.
Example Usage:
>>> from capymoa.datasets import Fried
>>> from capymoa.regressor import PassiveAggressiveRegressor
>>> from capymoa.evaluation import prequential_evaluation
Expand All @@ -22,7 +24,7 @@ class SGDRegressor(SKRegressor):
>>> learner = SGDRegressor(schema)
>>> results = prequential_evaluation(stream, learner, max_instances=1000)
>>> results["cumulative"].RMSE()
4.625...
4.625212156832521
"""

sklearner: _SKSGDRegressor
Expand Down
37 changes: 31 additions & 6 deletions src/capymoa/regressor/_soknl.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,42 @@
# Library imports
from typing import Optional, Union

from ._soknl_base_tree import SOKNLBT
from moa.classifiers.meta import SelfOptimisingKNearestLeaves as _MOA_SOKNL

from capymoa.base import (
MOARegressor, _extract_moa_learner_CLI
)

from capymoa.splitcriteria import SplitCriterion, _split_criterion_to_cli_str
from capymoa.stream._stream import Schema
from moa.classifiers.meta import SelfOptimisingKNearestLeaves as _MOA_SOKNL
from ._soknl_base_tree import SOKNLBT


class SOKNL(MOARegressor):
"""Self-Optimising K-Nearest Leaves (SOKNL) Implementation.
SOKNL is an extension to AdaptiveRandomForestRegressor.
SOKNL stores abstract information for all instance seen by the leaves, use them to calculate the distance
from the leaf to a certain instance. Then final predictions are yielded by the closest k leaves in the forest.
Reference:
`Sun, Yibin, Bernhard Pfahringer, Heitor Murilo Gomes, and Albert Bifet.
"SOKNL: A novel way of integrating K-nearest neighbours with adaptive random forest regression for data streams."
Data Mining and Knowledge Discovery 36, no. 5 (2022): 2006-2032.
PDF available at:
<https://researchcommons.waikato.ac.nz/server/api/core/bitstreams/f91959c0-1515-44c3-bd5f-737135ee3e48/content>`_
Example usage:
>>> from capymoa.datasets import Fried
>>> from capymoa.regressor import SOKNL
>>> from capymoa.evaluation import prequential_evaluation
>>> stream = Fried()
>>> schema = stream.get_schema()
>>> learner = SOKNL(schema)
>>> results = prequential_evaluation(stream, learner, max_instances=1000)
>>> results["cumulative"].RMSE()
3.3738337530234306
"""

def __init__(
self,
schema=None,
Expand Down
26 changes: 25 additions & 1 deletion src/capymoa/regressor/_soknl_base_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,31 @@


class SOKNLBT(MOARegressor):
"""The base tree for Self-Optimising K Nearest Leaves as distribed by Sun. at el."""
"""The base tree for Self-Optimising K Nearest Leaves as distribed by Sun. at el.
SOKNLBT is a modification to the FIMT-DD that allows FIMT-DD to store necessary information.
See in reference:
`Sun, Yibin, Bernhard Pfahringer, Heitor Murilo Gomes, and Albert Bifet.
"SOKNL: A novel way of integrating K-nearest neighbours with adaptive random forest regression for data streams."
Data Mining and Knowledge Discovery 36, no. 5 (2022): 2006-2032.
PDF available at:
<https://researchcommons.waikato.ac.nz/server/api/core/bitstreams/f91959c0-1515-44c3-bd5f-737135ee3e48/content>`_
Example usage:
>>> from capymoa.datasets import Fried
>>> from capymoa.regressor import SOKNLBT
>>> from capymoa.evaluation import prequential_evaluation
>>> stream = Fried()
>>> schema = stream.get_schema()
>>> learner = SOKNLBT(schema)
>>> results = prequential_evaluation(stream, learner, max_instances=1000)
>>> results["cumulative"].RMSE()
4.950050301515773
"""

def __init__(
self,
Expand Down

0 comments on commit 64c7105

Please sign in to comment.