py-why · kbattocchi · Dec 22, 2022 · Dec 21, 2022 · Dec 21, 2022 · Aug 5, 2022
diff --git a/econml/cate_interpreter/_interpreters.py b/econml/cate_interpreter/_interpreters.py
@@ -4,6 +4,8 @@
 import abc
 import numbers
 import numpy as np
+from packaging import version
+import sklearn
 from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
 from sklearn.utils import check_array
 from ..policy import PolicyTree
@@ -149,7 +151,7 @@ def __init__(self, *,
         self.include_uncertainty = include_model_uncertainty
         self.uncertainty_level = uncertainty_level
         self.uncertainty_only_on_leaves = uncertainty_only_on_leaves
-        self.criterion = "mse"
+        self.criterion = "squared_error" if version.parse(sklearn.__version__) >= version.parse("1.0") else "mse"
         self.splitter = splitter
         self.max_depth = max_depth
         self.min_samples_split = min_samples_split

diff --git a/econml/sklearn_extensions/model_selection.py b/econml/sklearn_extensions/model_selection.py
@@ -137,6 +137,27 @@ def split(self, X, y, sample_weight=None):
         """
         return _split_weighted_sample(self, X, y, sample_weight, is_stratified=False)
 
+    def get_n_splits(self, X, y, groups=None):
+        """Return the number of splitting iterations in the cross-validator.
+
+        Parameters
+        ----------
+        X : object
+            Always ignored, exists for compatibility.
+
+        y : object
+            Always ignored, exists for compatibility.
+
+        groups : object
+            Always ignored, exists for compatibility.
+
+        Returns
+        -------
+        n_splits : int
+            Returns the number of splitting iterations in the cross-validator.
+        """
+        return self.n_splits
+
     def _get_folds_from_splits(self, splits, sample_size):
         folds = []
         sample_indices = np.arange(sample_size)
@@ -213,6 +234,27 @@ def split(self, X, y, sample_weight=None):
         """
         return _split_weighted_sample(self, X, y, sample_weight, is_stratified=True)
 
+    def get_n_splits(self, X, y, groups=None):
+        """Return the number of splitting iterations in the cross-validator.
+
+        Parameters
+        ----------
+        X : object
+            Always ignored, exists for compatibility.
+
+        y : object
+            Always ignored, exists for compatibility.
+
+        groups : object
+            Always ignored, exists for compatibility.
+
+        Returns
+        -------
+        n_splits : int
+            Returns the number of splitting iterations in the cross-validator.
+        """
+        return self.n_splits
+
 
 class GridSearchCVList(BaseEstimator):
     """ An extension of GridSearchCV that allows for passing a list of estimators each with their own

diff --git a/econml/solutions/causal_analysis/_causal_analysis.py b/econml/solutions/causal_analysis/_causal_analysis.py
@@ -25,7 +25,7 @@
 from ...inference import NormalInferenceResults
 from ...sklearn_extensions.linear_model import WeightedLasso
 from ...sklearn_extensions.model_selection import GridSearchCVList
-from ...utilities import _RegressionWrapper, inverse_onehot
+from ...utilities import _RegressionWrapper, get_feature_names_or_default, inverse_onehot
 
 # TODO: this utility is documented but internal; reimplement?
 from sklearn.utils import _safe_indexing
@@ -220,13 +220,16 @@ def transform(self, X):
         else:
             return rest
 
+    # TODO: remove once older sklearn support is no longer needed
     def get_feature_names(self, names=None):
+        return self.get_feature_names_out(names)
+
+    def get_feature_names_out(self, names=None):
         if names is None:
             names = [f"x{i}" for i in range(self.d_x)]
         rest = _safe_indexing(names, self.passthrough, axis=0)
         if self.has_cats:
-            cats = self.one_hot_encoder.get_feature_names(
-                _safe_indexing(names, self.categorical, axis=0))
+            cats = get_feature_names_or_default(self.one_hot_encoder, _safe_indexing(names, self.categorical, axis=0))
             return np.concatenate((rest, cats))
         else:
             return rest
@@ -1445,7 +1448,7 @@ def _tree(self, is_policy, Xtest, feature_index, *, treatment_costs=0,
             intrp.interpret(result.estimator, Xtest)
             policy_values = None
 
-        return intrp, result.X_transformer.get_feature_names(self.feature_names_), treatment_names, policy_values
+        return intrp, result.X_transformer.get_feature_names_out(self.feature_names_), treatment_names, policy_values
 
     # TODO: it seems like it would be better to just return the tree itself rather than plot it;
     #       however, the tree can't store the feature and treatment names we compute here...

diff --git a/econml/tests/test_drlearner.py b/econml/tests/test_drlearner.py
@@ -20,7 +20,7 @@
 
 from econml.dr import DRLearner, LinearDRLearner, SparseLinearDRLearner, ForestDRLearner
 from econml.inference import BootstrapInference, StatsModelsInferenceDiscrete
-from econml.utilities import shape, hstack, vstack, reshape, cross_product
+from econml.utilities import get_feature_names_or_default, shape, hstack, vstack, reshape, cross_product
 from econml.sklearn_extensions.linear_model import StatsModelsLinearRegression
 import econml.tests.utilities  # bugfix for assertWarns
 
@@ -451,10 +451,10 @@ def test_drlearner_all_attributes(self):
                                         feature_names = ['A', 'B', 'C']
                                         out_feat_names = feature_names
                                         if featurizer is not None:
-                                            out_feat_names = featurizer.fit(
-                                                X).get_feature_names(feature_names)
+                                            out_feat_names = get_feature_names_or_default(featurizer.fit(X),
+                                                                                          feature_names)
                                             np.testing.assert_array_equal(
-                                                est.featurizer_.n_input_features_, 3)
+                                                est.featurizer_.n_features_in_, 3)
                                         np.testing.assert_array_equal(est.cate_feature_names(feature_names),
                                                                       out_feat_names)
 
@@ -631,10 +631,10 @@ def test_drlearner_with_inference_all_attributes(self):
                                     out_feat_names = feature_names
                                     if X is not None:
                                         if (featurizer is not None):
-                                            out_feat_names = featurizer.fit(
-                                                X).get_feature_names(feature_names)
+                                            out_feat_names = get_feature_names_or_default(featurizer.fit(X),
+                                                                                          feature_names)
                                             np.testing.assert_array_equal(
-                                                est.featurizer_.n_input_features_, 2)
+                                                est.featurizer_.n_features_in_, 2)
                                         np.testing.assert_array_equal(est.cate_feature_names(feature_names),
                                                                       out_feat_names)
 

diff --git a/econml/tests/test_inference.py b/econml/tests/test_inference.py
@@ -14,7 +14,7 @@
 from econml.inference import (BootstrapInference, NormalInferenceResults,
                               EmpiricalInferenceResults, PopulationSummaryResults)
 from econml.sklearn_extensions.linear_model import StatsModelsLinearRegression, DebiasedLasso
-from econml.utilities import get_input_columns
+from econml.utilities import get_feature_names_or_default, get_input_columns
 
 
 class TestInference(unittest.TestCase):
@@ -51,8 +51,9 @@ def test_summary(self):
             summary_results = cate_est.summary()
             coef_rows = np.asarray(summary_results.tables[0].data)[1:, 0]
             default_names = get_input_columns(TestInference.X)
-            fnames = PolynomialFeatures(degree=2, include_bias=False).fit(
-                TestInference.X).get_feature_names(default_names)
+            fnames = get_feature_names_or_default(PolynomialFeatures(degree=2,
+                                                                     include_bias=False).fit(TestInference.X),
+                                                  default_names)
             np.testing.assert_array_equal(coef_rows, fnames)
             intercept_rows = np.asarray(summary_results.tables[1].data)[1:, 0]
             np.testing.assert_array_equal(intercept_rows, ['cate_intercept'])
@@ -71,8 +72,9 @@ def test_summary(self):
             fnames = ['Q' + str(i) for i in range(TestInference.d_x)]
             summary_results = cate_est.summary(feature_names=fnames)
             coef_rows = np.asarray(summary_results.tables[0].data)[1:, 0]
-            fnames = PolynomialFeatures(degree=2, include_bias=False).fit(
-                TestInference.X).get_feature_names(input_features=fnames)
+            fnames = get_feature_names_or_default(PolynomialFeatures(degree=2,
+                                                                     include_bias=False).fit(TestInference.X),
+                                                  fnames)
             np.testing.assert_array_equal(coef_rows, fnames)
             cate_est = LinearDML(model_t=LinearRegression(), model_y=LinearRegression(), featurizer=None)
             cate_est.fit(
@@ -145,8 +147,9 @@ def test_summary_discrete(self):
             summary_results = cate_est.summary(T=1)
             coef_rows = np.asarray(summary_results.tables[0].data)[1:, 0]
             default_names = get_input_columns(TestInference.X)
-            fnames = PolynomialFeatures(degree=2, include_bias=False).fit(
-                TestInference.X).get_feature_names(default_names)
+            fnames = get_feature_names_or_default(PolynomialFeatures(degree=2,
+                                                                     include_bias=False).fit(TestInference.X),
+                                                  default_names)
             np.testing.assert_array_equal(coef_rows, fnames)
             intercept_rows = np.asarray(summary_results.tables[1].data)[1:, 0]
             np.testing.assert_array_equal(intercept_rows, ['cate_intercept'])
@@ -166,8 +169,9 @@ def test_summary_discrete(self):
             fnames = ['Q' + str(i) for i in range(TestInference.d_x)]
             summary_results = cate_est.summary(T=1, feature_names=fnames)
             coef_rows = np.asarray(summary_results.tables[0].data)[1:, 0]
-            fnames = PolynomialFeatures(degree=2, include_bias=False).fit(
-                TestInference.X).get_feature_names(input_features=fnames)
+            fnames = get_feature_names_or_default(PolynomialFeatures(degree=2,
+                                                                     include_bias=False).fit(TestInference.X),
+                                                  fnames)
             np.testing.assert_array_equal(coef_rows, fnames)
             cate_est = LinearDRLearner(model_regression=LinearRegression(),
                                        model_propensity=LogisticRegression(), featurizer=None)

diff --git a/econml/tests/test_integration.py b/econml/tests/test_integration.py
@@ -1,6 +1,7 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT License.
 
+from econml.utilities import get_feature_names_or_default
 import numpy as np
 import pandas as pd
 import unittest
@@ -76,7 +77,7 @@ def test_dml(self):
         est.fit(Y, T, X=X, W=W, inference='statsmodels')
         self._check_input_names(
             est.summary(),
-            feat_comp=est.original_featurizer.get_feature_names(X.columns))
+            feat_comp=get_feature_names_or_default(est.original_featurizer, X.columns))
         est.featurizer = FunctionTransformer()
         est.fit(Y, T, X=X, W=W, inference='statsmodels')
         self._check_input_names(

diff --git a/notebooks/Solutions/Causal Interpretation for Employee Attrition Dataset.ipynb b/notebooks/Solutions/Causal Interpretation for Employee Attrition Dataset.ipynb
diff --git a/setup.cfg b/setup.cfg
@@ -34,7 +34,7 @@ packages = find_namespace:
 install_requires =
     numpy
     scipy > 1.4.0
-    scikit-learn > 0.22.0, < 1.2
+    scikit-learn > 0.22.0, < 1.3
     sparse
     joblib >= 0.13.0
     statsmodels >= 0.10