diff --git a/.github/workflows/release-drafter.yml b/.github/workflows/release-drafter.yml index 26ce8fba..89419696 100644 --- a/.github/workflows/release-drafter.yml +++ b/.github/workflows/release-drafter.yml @@ -21,4 +21,4 @@ jobs: config-name: release-config.yml disable-autolabeler: true env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} \ No newline at end of file + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/pyfixest/estimation/multcomp.py b/pyfixest/estimation/multcomp.py index 5c2f0526..e3abad8f 100644 --- a/pyfixest/estimation/multcomp.py +++ b/pyfixest/estimation/multcomp.py @@ -65,7 +65,11 @@ def bonferroni(models: list[Union[Feols, Fepois]], param: str) -> pd.DataFrame: def rwolf( - models: list[Union[Feols, Fepois]], param: str, reps: int, seed: int + models: list[Union[Feols, Fepois]], + param: str, + reps: int, + seed: int, + sampling_method: str = "wild-bootstrap", ) -> pd.DataFrame: """ Compute Romano-Wolf adjusted p-values for multiple hypothesis testing. @@ -86,6 +90,10 @@ def rwolf( The number of bootstrap replications. seed : int The seed for the random number generator. + sampling_method : str + Sampling method for computing resampled statistics. + Users can choose either bootstrap('wild-bootstrap') + or randomization inference('ri') Returns ------- @@ -142,26 +150,42 @@ def rwolf( for i in range(S): model = models[i] - wildboot_res_df, bootstrapped_t_stats = model.wildboottest( - param=param, - reps=reps, - return_bootstrapped_t_stats=True, - seed=seed, # all S iterations require the same bootstrap samples, hence seed needs to be reset - ) - t_stats[i] = wildboot_res_df["t value"] - boot_t_stats[:, i] = bootstrapped_t_stats + if sampling_method == "wild-bootstrap": + wildboot_res_df, bootstrapped_t_stats = model.wildboottest( + param=param, + reps=reps, + return_bootstrapped_t_stats=True, + seed=seed, # all S iterations require the same bootstrap samples, hence seed needs to be reset + ) + + t_stats[i] = wildboot_res_df["t value"] + boot_t_stats[:, i] = bootstrapped_t_stats + + elif sampling_method == "ri": + rng = np.random.default_rng(seed) + model.ritest( + resampvar=param, + rng=rng, + reps=reps, + type="randomization-t", + store_ritest_statistics=True, + ) + + t_stats[i] = model._ritest_sample_stat + boot_t_stats[:, i] = model._ritest_statistics + else: + raise ValueError("Invalid sampling method specified") pval = _get_rwolf_pval(t_stats, boot_t_stats) all_model_stats.loc["RW Pr(>|t|)"] = pval all_model_stats.columns = pd.Index([f"est{i}" for i, _ in enumerate(models)]) - return all_model_stats def _get_rwolf_pval(t_stats, boot_t_stats): """ - Compute Romano-Wolf adjusted p-values based on bootstrapped t-statistics. + Compute Romano-Wolf adjusted p-values based on bootstrapped(or "ri") t-statistics. Parameters ---------- @@ -169,7 +193,7 @@ def _get_rwolf_pval(t_stats, boot_t_stats): tested hypotheses - containing the original, non-bootstrappe t-statisics. boot_t_stats (np.ndarray): A (B x S) matrix containing the - bootstrapped t-statistics. + bootstrapped(or "ri") t-statistics. Returns ------- diff --git a/tests/test_errors.py b/tests/test_errors.py index ad706126..eee3192e 100644 --- a/tests/test_errors.py +++ b/tests/test_errors.py @@ -187,6 +187,14 @@ def test_multcomp_errors(): rwolf(fit1.to_list(), param="X2", reps=999, seed=92) +def test_multcomp_sampling_errors(): + data = get_data().dropna() + # Sampling method not supported in "rwolf" + fit1 = feols("Y + Y2 ~ X1 | f1", data=data) + with pytest.raises(ValueError): + rwolf(fit1.to_list(), param="X1", reps=999, seed=92, sampling_method="abc") + + def test_rwolf_error(): rng = np.random.default_rng(123) diff --git a/tests/test_multcomp.py b/tests/test_multcomp.py index 98f5cda2..aaa85d90 100644 --- a/tests/test_multcomp.py +++ b/tests/test_multcomp.py @@ -143,3 +143,47 @@ def test_stepwise_function(): stepwise_r = wildrwolf.get_rwolf_pval(t_stat, t_boot) np.testing.assert_allclose(stepwise_py, stepwise_r) + + +# Import data from pyfixest + + +@pytest.mark.parametrize("seed", [1000, 2000, 3000]) +@pytest.mark.parametrize("reps", [999, 1999]) +def test_sampling_scheme(seed, reps): + # Compare RW adjusted p-values from RI and WB resampling methods + # The p-values should be "largely" similar regardless of resampling methods + + data = get_data().dropna() + rng = np.random.default_rng(seed) + + # Perturb covariates(not treatment variable) + data["Y2"] = data["Y"] * rng.normal(0.2, 1, size=len(data)) + data["X2"] = rng.normal(size=len(data)) + + fit1 = feols("Y ~ X1 + X2", data=data) + fit2 = feols("Y ~ X1 + X2 + Y2", data=data) + + # Run rwolf with "ri" sampling method + rwolf_df_ri = rwolf([fit1, fit2], "X1", reps=reps, seed=seed, sampling_method="ri") + + ri_pval = rwolf_df_ri["est1"]["RW Pr(>|t|)"] + + # Run rwolf with "wild-bootstrap" sampling method + rwolf_df_wb = rwolf( + [fit1, fit2], "X1", reps=reps, seed=seed, sampling_method="wild-bootstrap" + ) + + wb_pval = rwolf_df_wb["est1"]["RW Pr(>|t|)"] + + # Calculate percentage difference in p-values + percent_diff = 100 * (wb_pval - ri_pval) / ri_pval + + print( + f"Percentage difference in p-values (seed={seed}, reps={reps}): {percent_diff}" + ) + + # Assert that the percentage difference is within an acceptable range + assert ( + np.abs(percent_diff) < 1.0 + ), f"Percentage difference is too large: {percent_diff}%"