py-econometrics · s3alfisc · Jun 19, 2024 · Jun 17, 2024 · Jun 17, 2024 · Jun 19, 2024
diff --git a/.github/workflows/release-drafter.yml b/.github/workflows/release-drafter.yml
@@ -21,4 +21,4 @@ jobs:
           config-name: release-config.yml
           disable-autolabeler: true
         env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/pyfixest/estimation/multcomp.py b/pyfixest/estimation/multcomp.py
@@ -65,7 +65,11 @@ def bonferroni(models: list[Union[Feols, Fepois]], param: str) -> pd.DataFrame:
 
 
 def rwolf(
-    models: list[Union[Feols, Fepois]], param: str, reps: int, seed: int
+    models: list[Union[Feols, Fepois]],
+    param: str,
+    reps: int,
+    seed: int,
+    sampling_method: str = "wild-bootstrap",
 ) -> pd.DataFrame:
     """
     Compute Romano-Wolf adjusted p-values for multiple hypothesis testing.
@@ -86,6 +90,10 @@ def rwolf(
         The number of bootstrap replications.
     seed : int
         The seed for the random number generator.
+    sampling_method : str
+        Sampling method for computing resampled statistics.
+        Users can choose either bootstrap('wild-bootstrap')
+        or randomization inference('ri')
 
     Returns
     -------
@@ -142,34 +150,50 @@ def rwolf(
     for i in range(S):
         model = models[i]
 
-        wildboot_res_df, bootstrapped_t_stats = model.wildboottest(
-            param=param,
-            reps=reps,
-            return_bootstrapped_t_stats=True,
-            seed=seed,  # all S iterations require the same bootstrap samples, hence seed needs to be reset
-        )
-        t_stats[i] = wildboot_res_df["t value"]
-        boot_t_stats[:, i] = bootstrapped_t_stats
+        if sampling_method == "wild-bootstrap":
+            wildboot_res_df, bootstrapped_t_stats = model.wildboottest(
+                param=param,
+                reps=reps,
+                return_bootstrapped_t_stats=True,
+                seed=seed,  # all S iterations require the same bootstrap samples, hence seed needs to be reset
+            )
+
+            t_stats[i] = wildboot_res_df["t value"]
+            boot_t_stats[:, i] = bootstrapped_t_stats
+
+        elif sampling_method == "ri":
+            rng = np.random.default_rng(seed)
+            model.ritest(
+                resampvar=param,
+                rng=rng,
+                reps=reps,
+                type="randomization-t",
+                store_ritest_statistics=True,
+            )
+
+            t_stats[i] = model._ritest_sample_stat
+            boot_t_stats[:, i] = model._ritest_statistics
+        else:
+            raise ValueError("Invalid sampling method specified")
 
     pval = _get_rwolf_pval(t_stats, boot_t_stats)
 
     all_model_stats.loc["RW Pr(>|t|)"] = pval
     all_model_stats.columns = pd.Index([f"est{i}" for i, _ in enumerate(models)])
-
     return all_model_stats
 
 
 def _get_rwolf_pval(t_stats, boot_t_stats):
     """
-    Compute Romano-Wolf adjusted p-values based on bootstrapped t-statistics.
+    Compute Romano-Wolf adjusted p-values based on bootstrapped(or "ri") t-statistics.
 
     Parameters
     ----------
     t_stats (np.ndarray): A vector of length S - where S is the number of
                         tested hypotheses - containing the original,
                         non-bootstrappe t-statisics.
     boot_t_stats (np.ndarray): A (B x S) matrix containing the
-                            bootstrapped t-statistics.
+                            bootstrapped(or "ri") t-statistics.
 
     Returns
     -------

diff --git a/tests/test_errors.py b/tests/test_errors.py
@@ -187,6 +187,14 @@ def test_multcomp_errors():
         rwolf(fit1.to_list(), param="X2", reps=999, seed=92)
 
 
+def test_multcomp_sampling_errors():
+    data = get_data().dropna()
+    # Sampling method not supported in "rwolf"
+    fit1 = feols("Y + Y2 ~ X1 | f1", data=data)
+    with pytest.raises(ValueError):
+        rwolf(fit1.to_list(), param="X1", reps=999, seed=92, sampling_method="abc")
+
+
 def test_rwolf_error():
     rng = np.random.default_rng(123)
 

diff --git a/tests/test_multcomp.py b/tests/test_multcomp.py
@@ -143,3 +143,47 @@ def test_stepwise_function():
     stepwise_r = wildrwolf.get_rwolf_pval(t_stat, t_boot)
 
     np.testing.assert_allclose(stepwise_py, stepwise_r)
+
+
+# Import data from pyfixest
+
+
+@pytest.mark.parametrize("seed", [1000, 2000, 3000])
+@pytest.mark.parametrize("reps", [999, 1999])
+def test_sampling_scheme(seed, reps):
+    # Compare RW adjusted p-values from RI and WB resampling methods
+    # The p-values should be "largely" similar regardless of resampling methods
+
+    data = get_data().dropna()
+    rng = np.random.default_rng(seed)
+
+    # Perturb covariates(not treatment variable)
+    data["Y2"] = data["Y"] * rng.normal(0.2, 1, size=len(data))
+    data["X2"] = rng.normal(size=len(data))
+
+    fit1 = feols("Y ~ X1 + X2", data=data)
+    fit2 = feols("Y ~ X1 + X2 + Y2", data=data)
+
+    # Run rwolf with "ri" sampling method
+    rwolf_df_ri = rwolf([fit1, fit2], "X1", reps=reps, seed=seed, sampling_method="ri")
+
+    ri_pval = rwolf_df_ri["est1"]["RW Pr(>|t|)"]
+
+    # Run rwolf with "wild-bootstrap" sampling method
+    rwolf_df_wb = rwolf(
+        [fit1, fit2], "X1", reps=reps, seed=seed, sampling_method="wild-bootstrap"
+    )
+
+    wb_pval = rwolf_df_wb["est1"]["RW Pr(>|t|)"]
+
+    # Calculate percentage difference in p-values
+    percent_diff = 100 * (wb_pval - ri_pval) / ri_pval
+
+    print(
+        f"Percentage difference in p-values (seed={seed}, reps={reps}): {percent_diff}"
+    )
+
+    # Assert that the percentage difference is within an acceptable range
+    assert (
+        np.abs(percent_diff) < 1.0
+    ), f"Percentage difference is too large: {percent_diff}%"