Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable sklearn 1.2 #712

Merged
merged 4 commits into from
Dec 22, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion econml/cate_interpreter/_interpreters.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import abc
import numbers
import numpy as np
from packaging import version
import sklearn
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
from sklearn.utils import check_array
from ..policy import PolicyTree
Expand Down Expand Up @@ -149,7 +151,7 @@ def __init__(self, *,
self.include_uncertainty = include_model_uncertainty
self.uncertainty_level = uncertainty_level
self.uncertainty_only_on_leaves = uncertainty_only_on_leaves
self.criterion = "mse"
self.criterion = "squared_error" if version.parse(sklearn.__version__) >= version.parse("1.0") else "mse"
self.splitter = splitter
self.max_depth = max_depth
self.min_samples_split = min_samples_split
Expand Down
42 changes: 42 additions & 0 deletions econml/sklearn_extensions/model_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,27 @@ def split(self, X, y, sample_weight=None):
"""
return _split_weighted_sample(self, X, y, sample_weight, is_stratified=False)

def get_n_splits(self, X, y, groups=None):
"""Return the number of splitting iterations in the cross-validator.

Parameters
----------
X : object
Always ignored, exists for compatibility.

y : object
Always ignored, exists for compatibility.

groups : object
Always ignored, exists for compatibility.

Returns
-------
n_splits : int
Returns the number of splitting iterations in the cross-validator.
"""
return self.n_splits

def _get_folds_from_splits(self, splits, sample_size):
folds = []
sample_indices = np.arange(sample_size)
Expand Down Expand Up @@ -213,6 +234,27 @@ def split(self, X, y, sample_weight=None):
"""
return _split_weighted_sample(self, X, y, sample_weight, is_stratified=True)

def get_n_splits(self, X, y, groups=None):
"""Return the number of splitting iterations in the cross-validator.

Parameters
----------
X : object
Always ignored, exists for compatibility.

y : object
Always ignored, exists for compatibility.

groups : object
Always ignored, exists for compatibility.

Returns
-------
n_splits : int
Returns the number of splitting iterations in the cross-validator.
"""
return self.n_splits


class GridSearchCVList(BaseEstimator):
""" An extension of GridSearchCV that allows for passing a list of estimators each with their own
Expand Down
11 changes: 7 additions & 4 deletions econml/solutions/causal_analysis/_causal_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from ...inference import NormalInferenceResults
from ...sklearn_extensions.linear_model import WeightedLasso
from ...sklearn_extensions.model_selection import GridSearchCVList
from ...utilities import _RegressionWrapper, inverse_onehot
from ...utilities import _RegressionWrapper, get_feature_names_or_default, inverse_onehot

# TODO: this utility is documented but internal; reimplement?
from sklearn.utils import _safe_indexing
Expand Down Expand Up @@ -220,13 +220,16 @@ def transform(self, X):
else:
return rest

# TODO: remove once older sklearn support is no longer needed
def get_feature_names(self, names=None):
return self.get_feature_names_out(names)

def get_feature_names_out(self, names=None):
if names is None:
names = [f"x{i}" for i in range(self.d_x)]
rest = _safe_indexing(names, self.passthrough, axis=0)
if self.has_cats:
cats = self.one_hot_encoder.get_feature_names(
_safe_indexing(names, self.categorical, axis=0))
cats = get_feature_names_or_default(self.one_hot_encoder, _safe_indexing(names, self.categorical, axis=0))
return np.concatenate((rest, cats))
else:
return rest
Expand Down Expand Up @@ -1445,7 +1448,7 @@ def _tree(self, is_policy, Xtest, feature_index, *, treatment_costs=0,
intrp.interpret(result.estimator, Xtest)
policy_values = None

return intrp, result.X_transformer.get_feature_names(self.feature_names_), treatment_names, policy_values
return intrp, result.X_transformer.get_feature_names_out(self.feature_names_), treatment_names, policy_values

# TODO: it seems like it would be better to just return the tree itself rather than plot it;
# however, the tree can't store the feature and treatment names we compute here...
Expand Down
14 changes: 7 additions & 7 deletions econml/tests/test_drlearner.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

from econml.dr import DRLearner, LinearDRLearner, SparseLinearDRLearner, ForestDRLearner
from econml.inference import BootstrapInference, StatsModelsInferenceDiscrete
from econml.utilities import shape, hstack, vstack, reshape, cross_product
from econml.utilities import get_feature_names_or_default, shape, hstack, vstack, reshape, cross_product
from econml.sklearn_extensions.linear_model import StatsModelsLinearRegression
import econml.tests.utilities # bugfix for assertWarns

Expand Down Expand Up @@ -451,10 +451,10 @@ def test_drlearner_all_attributes(self):
feature_names = ['A', 'B', 'C']
out_feat_names = feature_names
if featurizer is not None:
out_feat_names = featurizer.fit(
X).get_feature_names(feature_names)
out_feat_names = get_feature_names_or_default(featurizer.fit(X),
feature_names)
np.testing.assert_array_equal(
est.featurizer_.n_input_features_, 3)
est.featurizer_.n_features_in_, 3)
np.testing.assert_array_equal(est.cate_feature_names(feature_names),
out_feat_names)

Expand Down Expand Up @@ -631,10 +631,10 @@ def test_drlearner_with_inference_all_attributes(self):
out_feat_names = feature_names
if X is not None:
if (featurizer is not None):
out_feat_names = featurizer.fit(
X).get_feature_names(feature_names)
out_feat_names = get_feature_names_or_default(featurizer.fit(X),
feature_names)
np.testing.assert_array_equal(
est.featurizer_.n_input_features_, 2)
est.featurizer_.n_features_in_, 2)
np.testing.assert_array_equal(est.cate_feature_names(feature_names),
out_feat_names)

Expand Down
22 changes: 13 additions & 9 deletions econml/tests/test_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from econml.inference import (BootstrapInference, NormalInferenceResults,
EmpiricalInferenceResults, PopulationSummaryResults)
from econml.sklearn_extensions.linear_model import StatsModelsLinearRegression, DebiasedLasso
from econml.utilities import get_input_columns
from econml.utilities import get_feature_names_or_default, get_input_columns


class TestInference(unittest.TestCase):
Expand Down Expand Up @@ -51,8 +51,9 @@ def test_summary(self):
summary_results = cate_est.summary()
coef_rows = np.asarray(summary_results.tables[0].data)[1:, 0]
default_names = get_input_columns(TestInference.X)
fnames = PolynomialFeatures(degree=2, include_bias=False).fit(
TestInference.X).get_feature_names(default_names)
fnames = get_feature_names_or_default(PolynomialFeatures(degree=2,
include_bias=False).fit(TestInference.X),
default_names)
np.testing.assert_array_equal(coef_rows, fnames)
intercept_rows = np.asarray(summary_results.tables[1].data)[1:, 0]
np.testing.assert_array_equal(intercept_rows, ['cate_intercept'])
Expand All @@ -71,8 +72,9 @@ def test_summary(self):
fnames = ['Q' + str(i) for i in range(TestInference.d_x)]
summary_results = cate_est.summary(feature_names=fnames)
coef_rows = np.asarray(summary_results.tables[0].data)[1:, 0]
fnames = PolynomialFeatures(degree=2, include_bias=False).fit(
TestInference.X).get_feature_names(input_features=fnames)
fnames = get_feature_names_or_default(PolynomialFeatures(degree=2,
include_bias=False).fit(TestInference.X),
fnames)
np.testing.assert_array_equal(coef_rows, fnames)
cate_est = LinearDML(model_t=LinearRegression(), model_y=LinearRegression(), featurizer=None)
cate_est.fit(
Expand Down Expand Up @@ -145,8 +147,9 @@ def test_summary_discrete(self):
summary_results = cate_est.summary(T=1)
coef_rows = np.asarray(summary_results.tables[0].data)[1:, 0]
default_names = get_input_columns(TestInference.X)
fnames = PolynomialFeatures(degree=2, include_bias=False).fit(
TestInference.X).get_feature_names(default_names)
fnames = get_feature_names_or_default(PolynomialFeatures(degree=2,
include_bias=False).fit(TestInference.X),
default_names)
np.testing.assert_array_equal(coef_rows, fnames)
intercept_rows = np.asarray(summary_results.tables[1].data)[1:, 0]
np.testing.assert_array_equal(intercept_rows, ['cate_intercept'])
Expand All @@ -166,8 +169,9 @@ def test_summary_discrete(self):
fnames = ['Q' + str(i) for i in range(TestInference.d_x)]
summary_results = cate_est.summary(T=1, feature_names=fnames)
coef_rows = np.asarray(summary_results.tables[0].data)[1:, 0]
fnames = PolynomialFeatures(degree=2, include_bias=False).fit(
TestInference.X).get_feature_names(input_features=fnames)
fnames = get_feature_names_or_default(PolynomialFeatures(degree=2,
include_bias=False).fit(TestInference.X),
fnames)
np.testing.assert_array_equal(coef_rows, fnames)
cate_est = LinearDRLearner(model_regression=LinearRegression(),
model_propensity=LogisticRegression(), featurizer=None)
Expand Down
3 changes: 2 additions & 1 deletion econml/tests/test_integration.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

from econml.utilities import get_feature_names_or_default
import numpy as np
import pandas as pd
import unittest
Expand Down Expand Up @@ -76,7 +77,7 @@ def test_dml(self):
est.fit(Y, T, X=X, W=W, inference='statsmodels')
self._check_input_names(
est.summary(),
feat_comp=est.original_featurizer.get_feature_names(X.columns))
feat_comp=get_feature_names_or_default(est.original_featurizer, X.columns))
est.featurizer = FunctionTransformer()
est.fit(Y, T, X=X, W=W, inference='statsmodels')
self._check_input_names(
Expand Down

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ packages = find_namespace:
install_requires =
numpy
scipy > 1.4.0
scikit-learn > 0.22.0, < 1.2
scikit-learn > 0.22.0, < 1.3
sparse
joblib >= 0.13.0
statsmodels >= 0.10
Expand Down