Correct pivot definition

py-why · Aug 13, 2020 · c0529c4 · c0529c4
1 parent 6e1a3fc
commit c0529c4
Show file tree

Hide file tree

Showing 3 changed files with 288 additions and 19 deletions.
diff --git a/econml/bootstrap.py b/econml/bootstrap.py
@@ -6,6 +6,253 @@
 from joblib import Parallel, delayed
 from sklearn.base import clone
 from scipy.stats import norm
+from collections import OrderedDict
+import pandas as pd
+
+
+class BootstrapInferenceResults:
+    """
+    Results class for bootstrap inference.
+
+    Parameters
+    ----------
+    pred_dist : array-like, shape (b, m, d_y, d_t) or (b, m, d_y)
+        the raw predictions of the metric using b times bootstrap.
+        Note that when Y or T is a vector rather than a 2-dimensional array,
+        the corresponding singleton dimensions should be collapsed
+    kind : 'percentile' or 'pivot'
+        Whether to use percentile or pivot-based intervals
+    d_t: int
+        Number of treatments
+    d_y: int
+        Number of outputs
+    inf_type: string
+        The type of inference result.
+        It could be either 'effect', 'coefficient' or 'intercept'.
+    fname_transformer: None or predefined function
+        The transform function to get the corresponding feature names from featurizer
+    """
+
+    def __init__(self, pred_dist, kind, d_y, d_t, inf_type, fname_transformer):
+        self.pred_dist = pred_dist
+        self.kind = kind
+        self.d_t = d_t
+        self.d_y = d_y
+        self.inf_type = inf_type
+        self.fname_transformer = fname_transformer
+
+    @property
+    def point_estimate(self):
+        """
+        Get the point estimate of each treatment on each outcome for each sample X[i].
+
+        Returns
+        -------
+        prediction : array-like, shape (m, d_y, d_t) or (m, d_y)
+            The point estimate of each treatment on each outcome for each sample X[i].
+            Note that when Y or T is a vector rather than a 2-dimensional array,
+            the corresponding singleton dimensions in the output will be collapsed
+            (e.g. if both are vectors, then the output of this method will also be a vector)
+        """
+        return np.mean(self.pred_dist, axis=0)
+
+    @property
+    def stderr(self):
+        """
+        Get the standard error of the metric of each treatment on each outcome for each sample X[i].
+
+        Returns
+        -------
+        stderr : array-like, shape (m, d_y, d_t) or (m, d_y)
+            The standard error of the metric of each treatment on each outcome for each sample X[i].
+            Note that when Y or T is a vector rather than a 2-dimensional array,
+            the corresponding singleton dimensions in the output will be collapsed
+            (e.g. if both are vectors, then the output of this method will also be a vector)
+        """
+        return np.std(self.pred_dist, axis=0)
+
+    @property
+    def var(self):
+        """
+        Get the variance of the metric of each treatment on each outcome for each sample X[i].
+
+        Returns
+        -------
+        var : array-like, shape (m, d_y, d_t) or (m, d_y)
+            The variance of the metric of each treatment on each outcome for each sample X[i].
+            Note that when Y or T is a vector rather than a 2-dimensional array,
+            the corresponding singleton dimensions in the output will be collapsed
+            (e.g. if both are vectors, then the output of this method will also be a vector)
+        """
+        return self.stderr**2
+
+    def conf_int(self, alpha=0.1):
+        """
+        Get the confidence interval of the metric of each treatment on each outcome for each sample X[i].
+
+        Parameters
+        ----------
+        alpha: optional float in [0, 1] (Default=0.1)
+            The overall level of confidence of the reported interval.
+            The alpha/2, 1-alpha/2 confidence interval is reported.
+
+        Returns
+        -------
+        lower, upper: tuple of arrays, shape (m, d_y, d_t) or (m, d_y)
+            The lower and the upper bounds of the confidence interval for each quantity.
+            Note that when Y or T is a vector rather than a 2-dimensional array,
+            the corresponding singleton dimensions in the output will be collapsed
+            (e.g. if both are vectors, then the output of this method will also be a vector)
+        """
+        lower = alpha / 2
+        upper = (1 - alpha) / 2
+        if self.kind == 'percentile':
+            return np.percentile(self.pred_dist, lower, axis=0), np.percentile(self.pred_dist, upper, axis=0)
+        elif self.kind == 'pivot':
+            est = self.point_estimate
+            return (2 * est - np.percentile(self.pred_dist, upper, axis=0),
+                    2 * est - np.percentile(self.pred_dist, lower, axis=0))
+        else:
+            raise ValueError("Unrecognized bootstrap kind; valid kinds are 'percentile' and 'pivot'")
+
+    def pvalue(self, value=0):
+        """
+        Get the p value of the each treatment on each outcome for each sample X[i].
+
+        Parameters
+        ----------
+        value: optinal float (default=0)
+            The mean value of the metric you'd like to test under null hypothesis.
+
+        Returns
+        -------
+        pvalue : array-like, shape (m, d_y, d_t) or (m, d_y)
+            The p value of of each treatment on each outcome for each sample X[i].
+            Note that when Y or T is a vector rather than a 2-dimensional array,
+            the corresponding singleton dimensions in the output will be collapsed
+            (e.g. if both are vectors, then the output of this method will also be a vector)
+        """
+
+        if self.kind == 'percentile':
+            dist = self.pred_dist
+        elif self.kind == 'pivot':
+            est = np.mean(self.pred_dist, axis=0)
+            dist = 2 * est - pred_dist
+        else:
+            raise ValueError("Unrecognized bootstrap kind; valid kinds are 'percentile' and 'pivot'")
+        return min((dist < value).sum(), (dist > value).sum()) / dist.shape[0]
+
+    def zstat(self, value=0):
+        """
+        Get the z statistic of the metric of each treatment on each outcome for each sample X[i].
+
+        Parameters
+        ----------
+        value: optinal float (default=0)
+            The mean value of the metric you'd like to test under null hypothesis.
+
+        Returns
+        -------
+        zstat : array-like, shape (m, d_y, d_t) or (m, d_y)
+            The z statistic of the metric of each treatment on each outcome for each sample X[i].
+            Note that when Y or T is a vector rather than a 2-dimensional array,
+            the corresponding singleton dimensions in the output will be collapsed
+            (e.g. if both are vectors, then the output of this method will also be a vector)
+        """
+        return (self.point_estimate - value) / self.stderr
+
+    def summary_frame(self, alpha=0.1, value=0, decimals=3, feat_name=None):
+        """
+        Output the dataframe for all the inferences above.
+
+        Parameters
+        ----------
+        alpha: optional float in [0, 1] (default=0.1)
+            The overall level of confidence of the reported interval.
+            The alpha/2, 1-alpha/2 confidence interval is reported.
+        value: optinal float (default=0)
+            The mean value of the metric you'd like to test under null hypothesis.
+        decimals: optinal int (default=3)
+            Number of decimal places to round each column to.
+        feat_name: optional list of strings or None (default is None)
+            The input of the feature names
+
+        Returns
+        -------
+        output: pandas dataframe
+            The output dataframe includes point estimate, standard error, z score, p value and confidence intervals
+            of the estimated metric of each treatment on each outcome for each sample X[i]
+        """
+        ci_mean = self.conf_int(alpha=alpha)
+        to_include = OrderedDict()
+        to_include['point_estimate'] = self._array_to_frame(self.d_t, self.d_y, self.point_estimate)
+        to_include['stderr'] = self._array_to_frame(self.d_t, self.d_y, self.stderr)
+        to_include['zstat'] = self._array_to_frame(self.d_t, self.d_y, self.zstat(value))
+        to_include['pvalue'] = self._array_to_frame(self.d_t, self.d_y, self.pvalue(value))
+        to_include['ci_lower'] = self._array_to_frame(self.d_t, self.d_y, ci_mean[0])
+        to_include['ci_upper'] = self._array_to_frame(self.d_t, self.d_y, ci_mean[1])
+        res = pd.concat(to_include, axis=1, keys=to_include.keys()).round(decimals)
+        if self.d_t == 1:
+            res.columns = res.columns.droplevel(1)
+        if self.d_y == 1:
+            res.index = res.index.droplevel(1)
+        if self.inf_type == 'coefficient':
+            if feat_name is not None and self.fname_transformer:
+                ind = self.fname_transformer(feat_name)
+            else:
+                ct = res.shape[0] // self.d_y
+                ind = ['X' + str(i) for i in range(ct)]
+
+            if self.d_y > 1:
+                res.index = res.index.set_levels(ind, level=0)
+            else:
+                res.index = ind
+        elif self.inf_type == 'intercept':
+            if self.d_y > 1:
+                res.index = res.index.set_levels(['intercept'], level=0)
+            else:
+                res.index = ['intercept']
+        return res
+
+    def population_summary(self, alpha=0.1, value=0, decimals=3, tol=0.001):
+        """
+        Output the object of population summary results.
+
+        Parameters
+        ----------
+        alpha: optional float in [0, 1] (default=0.1)
+            The overall level of confidence of the reported interval.
+            The alpha/2, 1-alpha/2 confidence interval is reported.
+        value: optinal float (default=0)
+            The mean value of the metric you'd like to test under null hypothesis.
+        decimals: optinal int (default=3)
+            Number of decimal places to round each column to.
+        tol:  optinal float (default=0.001)
+            The stopping criterion. The iterations will stop when the outcome is less than ``tol``
+
+        Returns
+        -------
+        PopulationSummaryResults: object
+            The population summary results instance contains the different summary analysis of point estimate
+            for sample X on each treatment and outcome.
+        """
+        if self.inf_type == 'effect':
+            return PopulationSummaryResults(pred=self.point_estimate, pred_stderr=self.stderr,
+                                            d_t=self.d_t, d_y=self.d_y,
+                                            alpha=alpha, value=value, decimals=decimals, tol=tol)
+        else:
+            raise AttributeError(self.inf_type + " inference doesn't support population_summary function!")
+
+    def _array_to_frame(self, d_t, d_y, arr):
+        if np.isscalar(arr):
+            arr = np.array([arr])
+        if self.inf_type == 'coefficient':
+            arr = np.moveaxis(arr, -1, 0)
+        arr = arr.reshape((-1, d_y, d_t))
+        df = pd.concat([pd.DataFrame(x) for x in arr], keys=np.arange(arr.shape[0]))
+        df.index = df.index.set_levels(['Y' + str(i) for i in range(d_y)], level=1)
+        df.columns = ['T' + str(i) for i in range(d_t)]
+        return df
 
 
 class BootstrapEstimator:
@@ -46,9 +293,10 @@ class BootstrapEstimator:
         that should be preferred (meaning this wrapper will compute the mean of it).
         This option only affects behavior if `compute_means` is set to ``True``.
 
-    bootstrap_type: 'percentile' or 'standard', default 'percentile'
+    bootstrap_type: 'percentile', 'pivot', or 'normal', default 'percentile'
         Bootstrap method used to compute results.  'percentile' will result in using the empiracal CDF of
-        the replicated copmutations of the statistics.   'standard' will instead compute a pivot interval
+        the replicated copmutations of the statistics.   'pivot' will also use the replicates but create a pivot
+        interval that also relies on the estimate over the entire dataset.  'normal' will instead compute an interval
         assuming the replicates are normally distributed.
     """
 
@@ -59,7 +307,7 @@ def __init__(self, wrapped, n_bootstrap_samples=1000, n_jobs=None, compute_means
         self._n_jobs = n_jobs
         self._compute_means = compute_means
         self._prefer_wrapped = prefer_wrapped
-        self._boostrap_type = bootstrap_type
+        self._bootstrap_type = bootstrap_type
         self._wrapped = wrapped
 
     # TODO: Add a __dir__ implementation?
@@ -155,12 +403,23 @@ def call_with_bounds(can_call, lower, upper):
                 def percentile_bootstrap(arr, _):
                     return np.percentile(arr, lower, axis=0), np.percentile(arr, upper, axis=0)
 
-                def pivot_bootstrap(arr, est):
+                def pivot_bootstrap(arr, _):
+                    # TODO: do we want the central estimate to be the average of all bootstrap estimates,
+                    #       or the original estimate over the entire non-bootstrapped population?
+                    est = np.mean(arr, axis=0)
+                    return 2 * est - np.percentile(arr, upper, axis=0), 2 * est - np.percentile(arr, lower, axis=0)
+
+                def normal_bootstrap(arr, _):
+                    est = np.mean(arr, axis=0)
                     std = np.std(arr, axis=0)
                     return est - norm.ppf(upper / 100) * std, est - norm.ppf(lower / 100) * std
-                # TODO: studentized bootstrap? would be more accurate in most cases but can we avoid
-                #       second level bootstrap which would be prohibitive computationally
-                fn = {'percentile': percentile_bootstrap, 'standard': pivot_bootstrap}[self._boostrap_type]
+
+                # TODO: studentized bootstrap? this would be more accurate in most cases but can we avoid
+                #       second level bootstrap which would be prohibitive computationally?
+
+                fn = {'percentile': percentile_bootstrap,
+                      'normal': normal_bootstrap,
+                      'pivot': pivot_bootstrap}[self._boostrap_type]
                 return proxy(can_call, prefix, fn)
 
             can_call = callable(getattr(self._instances[0], prefix))
@@ -178,8 +437,7 @@ def call(lower=5, upper=95):
         def get_inference():
             # can't import from econml.inference at top level without creating mutual dependencies
             from .inference import InferenceResults
-            # TODO: consider treating percentile bootstrap differently since we can work directly with
-            #       the empirical distribution
+
             prefix = name[: - len("_inference")]
             if prefix in ['const_marginal_effect', 'effect']:
                 inf_type = 'effect'
@@ -190,17 +448,26 @@ def get_inference():
             else:
                 raise AttributeError("Unsupported inference: " + name)
 
-            def get_inference():
+            d_t = self._wrapped._d_t[0] if self._wrapped._d_t else 1
+            d_t = 1 if prefix == 'effect' else d_t
+            d_y = self._wrapped._d_y[0] if self._wrapped._d_y else 1
+
+            def get_inference_nonparametric(kind):
+                return proxy(callable(getattr(self._instances[0], prefix)), prefix,
+                             lambda arr, _: BootstrapInferenceResults(pred_dist=arr, kind=kind,
+                                                                      d_t=d_t, d_y=d_y, inf_type=inf_type,
+                                                                      fname_transformer=None))
+
+            def get_inference_parametric():
                 pred = getattr(self._wrapped, prefix)
                 stderr = getattr(self, prefix + '_std')
-                d_t = self._wrapped._d_t[0] if self._wrapped._d_t else 1
-                d_t = 1 if prefix == 'effect' else d_t
-                d_y = self._wrapped._d_y[0] if self._wrapped._d_y else 1
                 return InferenceResults(d_t=d_t, d_y=d_y, pred=pred,
                                         pred_stderr=stderr, inf_type=inf_type,
                                         pred_dist=None, fname_transformer=None)
 
-            return get_inference
+            return {'normal': get_inference_parametric,
+                    'percentile': lambda: get_inference_nonparametric('percentile'),
+                    'pivot': lambda: get_inference_nonparametric('pivot')}[self._bootstrap_type]
 
         caught = None
         m = None

diff --git a/econml/inference.py b/econml/inference.py
@@ -45,10 +45,12 @@ class BootstrapInference(Inference):
     n_jobs: int, optional (default -1)
         The maximum number of concurrently running jobs, as in joblib.Parallel.
 
-    bootstrap_type: 'percentile' or 'standard', default 'percentile'
-        Bootstrap method used to compute results.  'percentile' will result in using the empiracal CDF of
-        the replicated copmutations of the statistics.   'standard' will instead compute a pivot interval
-        assuming the replicates are normally distributed.
+    bootstrap_type: 'percentile', 'pivot', or 'normal', default 'percentile'
+        Bootstrap method used to compute results.
+        'percentile' will result in using the empiracal CDF of the replicated copmutations of the statistics.
+        'pivot' will also use the replicates but create a pivot interval that also relies on the estimate
+        over the entire dataset.
+        'normal' will instead compute a pivot interval assuming the replicates are normally distributed.
     """
 
     def __init__(self, n_bootstrap_samples=100, n_jobs=-1, bootstrap_type='percentile'):

diff --git a/econml/tests/test_bootstrap.py b/econml/tests/test_bootstrap.py
@@ -18,7 +18,7 @@ class TestBootstrap(unittest.TestCase):
     def test_with_sklearn(self):
         """Test that we can bootstrap sklearn estimators."""
         for n_jobs in [None, -1]:  # test parallelism
-            for kind in ['percentile', 'standard']:  # test both percentile and pivot intervals
+            for kind in ['percentile', 'pivot', 'normal']:  # test both percentile and pivot intervals
                 x = np.random.normal(size=(1000, 1))
                 y = x * 0.5 + np.random.normal(size=(1000, 1))
                 y = y.flatten()