Skip to content

Commit

Permalink
Merge pull request #117 from neurodata/docstring_header_additions
Browse files Browse the repository at this point in the history
Added functionalities to L2 classes, added option to internally calculate k in KNNClassificationVoter, purged root 'test' folder and unnecessary classes, generalized NeuralClassificationTransformer arguments
  • Loading branch information
jdey4 authored Sep 10, 2020
2 parents dfd2d7f + 7ceb314 commit ade50c6
Show file tree
Hide file tree
Showing 8 changed files with 72 additions and 345 deletions.
4 changes: 3 additions & 1 deletion proglearn/deciders.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,9 @@ def fit(
transformer_id_to_voters,
classes=None,
):
self.classes = self.classes if len(self.classes) > 0 else np.unique(y)
if self.classes == None and len(y) == 0:
raise ValueError("Classification Decider classes undefined with no class labels fed to 'fit')
self.classes = self.classes if self.classes != None else np.unique(y)
self.transformer_id_to_transformers = transformer_id_to_transformers
self.transformer_id_to_voters = transformer_id_to_voters

Expand Down
63 changes: 26 additions & 37 deletions proglearn/forest.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
'''
Main Author: Will LeVine
Corresponding Email: levinewill@icloud.com
'''
from .progressive_learner import ProgressiveLearner
from .transformers import TreeClassificationTransformer
from .voters import TreeClassificationVoter
from .deciders import SimpleAverage


class LifelongClassificationForest:
def __init__(self, n_estimators=100, finite_sample_correction=False):
class L2F:
def __init__(self, n_estimators=100, tree_construction_proportion=0.67, finite_sample_correction=False):
self.n_estimators = n_estimators
self.tree_construction_proportion=tree_construction_proportion
self.pl = ProgressiveLearner(
default_transformer_class=TreeClassificationTransformer,
default_transformer_kwargs={},
Expand All @@ -17,22 +21,33 @@ def __init__(self, n_estimators=100, finite_sample_correction=False):
)

def add_task(
self, X, y, task_id=None, transformer_voter_decider_split=[0.67, 0.33, 0]
):
self, X, y, task_id=None):
self.pl.add_task(
X,
y,
task_id=task_id,
transformer_voter_decider_split=transformer_voter_decider_split,
transformer_voter_decider_split=[self.tree_construction_proportion, 1-tree_construction_proportion, 0],
num_transformers=self.n_estimators,
decider_kwargs = {"classes" : np.unique(y)}
)
return self

def add_transformer(self, X, y, transformer_id=None):
self.pl.add_transformer(
X,
y,
transformer_id=transformer_id,
num_transformers=self.n_estimators,
transformer_data_proportion=self.tree_construction_proportion
)

return self

def predict(self, X, task_id, transformer_ids=None):
return self.pl.predict(X, task_id, transformer_ids=transformer_ids)
def predict(self, X, task_id):
return self.pl.predict(X, task_id)

def predict_proba(self, X, task_id, transformer_ids=None):
return self.pl.predict_proba(X, task_id, transformer_ids=transformer_ids)
def predict_proba(self, X, task_id):
return self.pl.predict_proba(X, task_id)


class UncertaintyForest:
Expand All @@ -45,37 +60,11 @@ def fit(self, X, y):
n_estimators=self.n_estimators,
finite_sample_correction=self.finite_sample_correction,
)
self.lf.add_task(X, y, task_id=0)
self.lf.add_task(X, y, task_id=0, decider_kwargs = {"classes" : np.unique(y)})
return self

def predict(self, X):
return self.lf.predict(X, 0)

def predict_proba(self, X):
return self.lf.predict_proba(X, 0)


class TransferForest:
def __init__(self, n_estimators=100):
self.lf = LifelongClassificationForest(n_estimators=n_estimators)
self.source_ids = []

def add_source_task(self, X, y, task_id=None):
self.lf.add_task(
X, y, task_id=task_id, transformer_voter_decider_split=[0.9, 0.1, 0]
)
self.source_ids.append(task_id)
return self

def add_target_task(self, X, y, task_id=None):
self.lf.add_task(
X, y, task_id=task_id, transformer_voter_decider_split=[0.1, 0.9, 0]
)
self.target_id = task_id
return self

def predict(self, X):
return self.lf.predict(X, self.target_id, transformer_ids=self.source_ids)

def predict_proba(self, X):
return self.lf.predict_proba(X, self.target_id, transformer_ids=self.source_ids)
125 changes: 26 additions & 99 deletions proglearn/network.py
Original file line number Diff line number Diff line change
@@ -1,148 +1,75 @@
'''
Main Author: Will LeVine
Corresponding Email: levinewill@icloud.com
'''
import numpy as np

from .progressive_learner import ProgressiveLearner
from .transformers import NeuralClassificationTransformer, NeuralRegressionTransformer
from .voters import KNNClassificationVoter, NeuralRegressionVoter
from .deciders import SimpleAverage, LinearRegressionDecider, KNNRegressionDecider
from .transformers import NeuralClassificationTransformer
from .voters import KNNClassificationVoter
from .deciders import SimpleAverage

from sklearn.utils import check_X_y, check_array
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping


class LifelongRegressionNetwork:
def __init__(
self, network, decider="linear", loss="mse", epochs=100, optimizer=Adam(1e-3), verbose=False
):
self.network = network
self.decider = decider
self.loss = loss
self.epochs = epochs
self.optimizer = optimizer
self.verbose = verbose
self.is_first_task = True

def setup(self):

# Set transformer network hyperparameters.
default_transformer_kwargs = {
"network": self.network,
"euclidean_layer_idx": -2,
"loss": self.loss,
"optimizer": self.optimizer,
"compile_kwargs": {},
"fit_kwargs": {
"epochs": self.epochs,
"verbose": self.verbose,
"callbacks": [EarlyStopping(patience=10, monitor="val_loss")],
"validation_split": 0.25,
},
}

# Set voter network hyperparameters.
default_voter_kwargs = {
"validation_split": 0.25,
"loss": self.loss,
"lr": self.lr,
"epochs": self.epochs,
"verbose": self.verbose,
}

# Choose decider.
if self.decider == "linear":
default_decider_class = LinearRegressionDecider
elif self.decider == "knn":
default_decider_class = KNNRegressionDecider
else:
raise ValueError("Decider must be 'linear' or 'knn'.")

self.pl = ProgressiveLearner(
default_transformer_class=NeuralRegressionTransformer,
default_transformer_kwargs=default_transformer_kwargs,
default_voter_class=NeuralRegressionVoter,
default_voter_kwargs=default_voter_kwargs,
default_decider_class=default_decider_class,
default_decider_kwargs={},
)

def add_task(self, X, y, task_id=None):

if self.is_first_task:
self.setup()
self.is_first_task = False

self.pl.add_task(
X, y, task_id=task_id, transformer_voter_decider_split=[0.6, 0.3, 0.1]
)

return self

def predict(self, X, task_id):
return self.pl.predict(X, task_id)


class LifelongClassificationNetwork:
class L2N:
def __init__(
self,
network,
loss="categorical_crossentropy",
optimizer=Adam(3e-4),
epochs=100,
lr=3e-4,
batch_size=32,
verbose=False,
):
self.network = network
self.loss = loss
self.epochs = epochs
self.optimizer = Adam(lr)
self.optimizer = optimizer
self.verbose = verbose
self.batch_size = batch_size
self.is_first_task = True

def setup(self, num_points_per_task):


# Set transformer network hyperparameters.
default_transformer_kwargs = {
"network": self.network,
"euclidean_layer_idx": -2,
"loss": self.loss,
"optimizer": self.optimizer,
"num_classes": 10,
"compile_kwargs": {},
"fit_kwargs": {
"epochs": self.epochs,
# "callbacks": [EarlyStopping(patience=5, monitor="val_loss")],
"callbacks": [EarlyStopping(patience=5, monitor="val_loss")],
"verbose": self.verbose,
"validation_split": 0.33,
"batch_size": self.batch_size
},
}

# Hyperparameter for KNN voter.
default_voter_kwargs = {"k": int(np.log2(num_points_per_task))}

self.pl = ProgressiveLearner(
default_transformer_class=NeuralClassificationTransformer,
default_transformer_kwargs=default_transformer_kwargs,
default_voter_class=KNNClassificationVoter,
default_voter_kwargs=default_voter_kwargs,
default_voter_kwargs={},
default_decider_class=SimpleAverage,
default_decider_kwargs={},
)

def add_task(self, X, y, task_id=None, decider_kwargs={}):

if self.is_first_task:
num_points_per_task = len(X)
self.setup(num_points_per_task)
self.is_first_task = False

def add_task(self, X, y, task_id=None, transformer_voter_decider_split=[0.67, 0.33, 0]):
self.pl.add_task(
X,
y,
task_id=task_id,
transformer_voter_decider_split=[0.67, 0.33, 0.0],
decider_kwargs=decider_kwargs,
transformer_voter_decider_split=transformer_voter_decider_split,
decider_kwargs = {"classes" : np.unique(y)}
)

return self

def add_transformer(self, X, y, transformer_id=None):
self.pl.add_transformer(
X,
y,
transformer_id=transformer_id
)

return self
Expand All @@ -151,4 +78,4 @@ def predict(self, X, task_id):
return self.pl.predict(X, task_id)

def predict_proba(self, X, task_id):
return self.pl.predict_proba(X, task_id)
return self.pl.predict_proba(X, task_id)
24 changes: 14 additions & 10 deletions proglearn/progressive_learner.py
Original file line number Diff line number Diff line change
Expand Up @@ -466,15 +466,19 @@ def add_task(
task_id : obj, default=None
The id corresponding to the task being added.
transformer_voter_decider_split : ndarray, default=[0.67, 0.33, 0]
A 1d array of length 3. The 2nd index indicates the proportion of the data
set aside to train the decider - these indices are saved internally and
will be used to train all further deciders corresponding to this task for
all function calls. The 1st index indicates the proportion of the data
set aside to train the voter from the (optional) newly added transformer(s)
to the new task. For all other tasks, the aggregate transformer and voter
data is used to train the voters corresponding to those tasks. The 0th index
indicates the proportions of the input data used to train the (optional) newly
added transformer(s).
A 1d array of length 3. The 0th index indicates the proportions of the input
data used to train the (optional) newly added transformer(s) corresponding to
the task_id provided in this function call. The 1st index indicates the proportion of
the data set aside to train the voter(s) from these (optional) newly added
transformer(s) to the task_id provided in this function call. For all other tasks,
the aggregate transformer and voter data pairs from those tasks are used to train
the voter(s) from these (optional) newly added transformer(s) to those tasks;
for all other transformers, the aggregate transformer and voter data provided in
this function call is used to train the voter(s) from those transformers to
the task_id provided in this function call. The 2nd index indicates the
proportion of the data set aside to train the decider - these indices are saved
internally and will be used to train all further deciders corresponding to this
task for all function calls.
num_transformers : int, default=1
The number of transformers to add corresponding to the given inputs.
transformer_class : BaseTransformer, default=None
Expand Down Expand Up @@ -573,4 +577,4 @@ def predict_proba(self, X, task_id, transformer_ids=None):
else:
raise AttributeError(
"Cannot call `predict_proba` on non-classification decider."
)
)
5 changes: 3 additions & 2 deletions proglearn/voters.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def _finite_sample_correction(posteriors, num_points_in_partition, num_classes):


class KNNClassificationVoter(BaseVoter):
def __init__(self, k, kwargs={}):
def __init__(self, k=None, kwargs={}):
"""
Doc strings here.
"""
Expand All @@ -110,7 +110,8 @@ def fit(self, X, y):
Doc strings here.
"""
X, y = check_X_y(X, y)
self.knn = KNeighborsClassifier(self.k, **self.kwargs)
k = int(np.log2(len(X))) if self.k == None else self.k
self.knn = KNeighborsClassifier(k, **self.kwargs)
self.knn.fit(X, y)
self._is_fitted = True

Expand Down
Loading

0 comments on commit ade50c6

Please sign in to comment.