Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[enhancement] WIP new finite checking in LinearRegression, Ridge and incremental variants #2206

Draft
wants to merge 28 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
137 changes: 27 additions & 110 deletions onedal/linear_model/incremental_linear_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,35 +16,13 @@

import numpy as np

from daal4py.sklearn._utils import get_dtype

from ..common.hyperparameters import get_hyperparameters
from ..datatypes import from_table, to_table
from ..utils import _check_X_y, _num_features
from .linear_model import BaseLinearRegression


class IncrementalLinearRegression(BaseLinearRegression):
"""
Incremental Linear Regression oneDAL implementation.

Parameters
----------
fit_intercept : bool, default=True
Whether to calculate the intercept for this model. If set
to False, no intercept will be used in calculations
(i.e. data is expected to be centered).

copy_X : bool, default=True
If True, X will be copied; else, it may be overwritten.

algorithm : string, default="norm_eq"
Algorithm used for computation on oneDAL side
"""

def __init__(self, fit_intercept=True, copy_X=False, algorithm="norm_eq"):
super().__init__(fit_intercept=fit_intercept, copy_X=copy_X, algorithm=algorithm)
self._reset()
class BaseIncrementalLinear(BaseLinearRegression):

def _reset(self):
self._partial_result = self._get_backend(
Expand Down Expand Up @@ -77,11 +55,6 @@ def partial_fit(self, X, y, queue=None):
self._queue = queue
policy = self._get_policy(queue, X)

X, y = _check_X_y(
X, y, dtype=[np.float64, np.float32], accept_2d_y=True, force_all_finite=False
)
y = np.asarray(y, dtype=X.dtype)

self.n_features_in_ = _num_features(X, fallback_1d=True)

X_table, y_table = to_table(X, y, queue=queue)
Expand All @@ -91,6 +64,7 @@ def partial_fit(self, X, y, queue=None):
self._params = self._get_onedal_params(self._dtype)

hparams = get_hyperparameters("linear_regression", "train")

if hparams is not None and not hparams.is_default:
self._partial_result = module.partial_train(
policy,
Expand Down Expand Up @@ -146,7 +120,30 @@ def finalize_fit(self, queue=None):
return self


class IncrementalRidge(BaseLinearRegression):
class IncrementalLinearRegression(BaseIncrementalLinear):
"""
Incremental Linear Regression oneDAL implementation.

Parameters
----------
fit_intercept : bool, default=True
Whether to calculate the intercept for this model. If set
to False, no intercept will be used in calculations
(i.e. data is expected to be centered).

copy_X : bool, default=True
If True, X will be copied; else, it may be overwritten.

algorithm : string, default="norm_eq"
Algorithm used for computation on oneDAL side
"""

def __init__(self, fit_intercept=True, copy_X=False, algorithm="norm_eq"):
super().__init__(fit_intercept=fit_intercept, copy_X=copy_X, algorithm=algorithm)
self._reset()


class IncrementalRidge(BaseIncrementalLinear):
"""
Incremental Ridge Regression oneDAL implementation.

Expand All @@ -170,87 +167,7 @@ class IncrementalRidge(BaseLinearRegression):
"""

def __init__(self, alpha=1.0, fit_intercept=True, copy_X=False, algorithm="norm_eq"):
module = self._get_backend("linear_model", "regression")
super().__init__(
fit_intercept=fit_intercept, alpha=alpha, copy_X=copy_X, algorithm=algorithm
)
self._partial_result = module.partial_train_result()

def _reset(self):
module = self._get_backend("linear_model", "regression")
self._partial_result = module.partial_train_result()

def partial_fit(self, X, y, queue=None):
"""
Computes partial data for ridge regression
from data batch X and saves it to `_partial_result`.
Parameters
----------
X : array-like of shape (n_samples, n_features)
Training data batch, where `n_samples` is the number of samples
in the batch, and `n_features` is the number of features.

y: array-like of shape (n_samples,) or (n_samples, n_targets) in
case of multiple targets
Responses for training data.

queue : dpctl.SyclQueue
If not None, use this queue for computations.
Returns
-------
self : object
Returns the instance itself.
"""
module = self._get_backend("linear_model", "regression")

self._queue = queue
policy = self._get_policy(queue, X)

X, y = _check_X_y(
X, y, dtype=[np.float64, np.float32], accept_2d_y=True, force_all_finite=False
)
y = np.asarray(y, dtype=X.dtype)

self.n_features_in_ = _num_features(X, fallback_1d=True)

X_table, y_table = to_table(X, y, queue=queue)

if not hasattr(self, "_dtype"):
self._dtype = X_table.dtype
self._params = self._get_onedal_params(self._dtype)

self._partial_result = module.partial_train(
policy, self._params, self._partial_result, X_table, y_table
)

def finalize_fit(self, queue=None):
"""
Finalizes ridge regression computation and obtains coefficients
from the current `_partial_result`.

Parameters
----------
queue : dpctl.SyclQueue
If available, uses provided queue for computations.

Returns
-------
self : object
Returns the instance itself.
"""
module = self._get_backend("linear_model", "regression")
if queue is not None:
policy = self._get_policy(queue)
else:
policy = self._get_policy(self._queue)
result = module.finalize_train(policy, self._params, self._partial_result)

self._onedal_model = result.model

packed_coefficients = from_table(result.model.packed_coefficients)
self.coef_, self.intercept_ = (
packed_coefficients[:, 1:].squeeze(),
packed_coefficients[:, 0].squeeze(),
)

return self
self._reset()
Loading
Loading