Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support predict in MNMG Logistic Regression #5516

Merged
merged 2 commits into from
Jul 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions python/cuml/dask/linear_model/logistic_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from cuml.dask.common.base import DelayedPredictionMixin
from cuml.dask.common.base import mnmg_import
from cuml.dask.common.base import SyncFitMixinLinearModel
from cuml.dask.linear_model import LinearRegression
from raft_dask.common.comms import get_raft_comm_state
from dask.distributed import get_worker

Expand All @@ -31,7 +32,7 @@
np = cpu_only_import("numpy")


class LogisticRegression(BaseEstimator, SyncFitMixinLinearModel):
class LogisticRegression(LinearRegression):
def __init__(self, *, client=None, verbose=False, **kwargs):
super().__init__(client=client, verbose=verbose, **kwargs)

Expand All @@ -55,9 +56,6 @@ def fit(self, X, y):

return self

def get_param_names(self):
return list(self.kwargs.keys())

@staticmethod
@mnmg_import
def _create_model(sessionId, datatype, **kwargs):
Expand Down
28 changes: 23 additions & 5 deletions python/cuml/tests/dask/test_dask_logistic_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from cuml.internals.safe_imports import gpu_only_import
import pytest
from cuml.dask.common import utils as dask_utils
from sklearn.metrics import accuracy_score
from sklearn.metrics import accuracy_score, mean_squared_error
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression as skLR
from cuml.internals.safe_imports import cpu_only_import
Expand Down Expand Up @@ -155,28 +155,35 @@ def imp():
X = np.array([(1, 2), (1, 3), (2, 1), (3, 1)], datatype)
y = np.array([1.0, 1.0, 0.0, 0.0], datatype)

from cuml.dask.linear_model.logistic_regression import (
LogisticRegression as cumlLBFGS_dask,
)
from cuml.dask.linear_model import LogisticRegression as cumlLBFGS_dask

X_df, y_df = _prep_training_data(client, X, y, n_parts)

lr = cumlLBFGS_dask()

lr.fit(X_df, y_df)

lr_coef = lr.coef_.to_numpy()
lr_intercept = lr.intercept_.to_numpy()

assert len(lr_coef) == 1
assert lr_coef[0] == pytest.approx([-0.71483153, 0.7148315], abs=1e-6)
assert lr_intercept == pytest.approx([-2.2614916e-08], abs=1e-6)

# test predict
preds = lr.predict(X_df, delayed=True).compute().to_numpy()
from numpy.testing import assert_array_equal

assert_array_equal(preds, y, strict=True)


@pytest.mark.mg
@pytest.mark.parametrize("nrows", [1e5])
@pytest.mark.parametrize("ncols", [20])
@pytest.mark.parametrize("n_parts", [2, 23])
@pytest.mark.parametrize("datatype", [np.float32])
def test_lbfgs(nrows, ncols, n_parts, datatype, client):
@pytest.mark.parametrize("delayed", [True, False])
def test_lbfgs(nrows, ncols, n_parts, datatype, delayed, client):
tolerance = 0.005

def imp():
Expand Down Expand Up @@ -210,3 +217,14 @@ def imp():
for i in range(len(lr_coef)):
assert lr_coef[i] == pytest.approx(sk_coef[i], abs=tolerance)
assert lr_intercept == pytest.approx(sk_intercept, abs=tolerance)

# test predict
cu_preds = lr.predict(X_df, delayed=delayed)
accuracy_cuml = accuracy_score(y, cu_preds.compute().to_numpy())

sk_preds = sk_model.predict(X)
accuracy_sk = accuracy_score(y, sk_preds)

assert (accuracy_cuml >= accuracy_sk) | (
np.abs(accuracy_cuml - accuracy_sk) < 1e-3
)