Skip to content

Commit

Permalink
fix bug in ritest with randomization-t (#730)
Browse files Browse the repository at this point in the history
  • Loading branch information
s3alfisc authored Dec 3, 2024
1 parent d83b7b7 commit 10a8fb8
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 67 deletions.
2 changes: 2 additions & 0 deletions docs/changelog.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
are dropped as a dependencies.
- Fixes a bug in the `wildboottest` method, which incorrectly used to run a regression on the demeaned dependend variable in case it was
applied after a fixed effects regression. My apologies for that!
- Fixes a bug in the `ritest` method, which would use randomization inference coefficients instead of t-statistics, leading to incorrect results.
This has consequences for the rwolf() function, which, in case of running ri-inference, would default to run the randomization-t. My apolgies!

## PyFixest 0.22.0 - 0.25.4

Expand Down
23 changes: 16 additions & 7 deletions pyfixest/estimation/feols_.py
Original file line number Diff line number Diff line change
Expand Up @@ -2239,15 +2239,8 @@ def ritest(
if cluster is not None and cluster not in _data:
raise ValueError(f"The variable {cluster} is not found in the data.")

sample_coef = np.array(self.coef().xs(resampvar_))
sample_tstat = np.array(self.tstat().xs(resampvar_))

clustervar_arr = _data[cluster].to_numpy().reshape(-1, 1) if cluster else None

rng = np.random.default_rng() if rng is None else rng

sample_stat = sample_tstat if type == "randomization-t" else sample_coef

if clustervar_arr is not None and np.any(np.isnan(clustervar_arr)):
raise ValueError(
"""
Expand All @@ -2256,9 +2249,25 @@ def ritest(
"""
)

# update vcov if cluster provided but not in model
if cluster is not None and not self._is_clustered:
warnings.warn(
"The initial model was not clustered. CRV1 inference is computed and stored in the model object."
)
self.vcov({"CRV1": cluster})

rng = np.random.default_rng() if rng is None else rng

sample_coef = np.array(self.coef().xs(resampvar_))
sample_tstat = np.array(self.tstat().xs(resampvar_))
sample_stat = sample_tstat if type == "randomization-t" else sample_coef

if type not in ["randomization-t", "randomization-c"]:
raise ValueError("type must be 'randomization-t' or 'randomization-c.")

# always run slow algorithm for randomization-t
choose_algorithm = "slow" if type == "randomization-t" else choose_algorithm

assert isinstance(reps, int) and reps > 0, "reps must be a positive integer."

if self._has_weights:
Expand Down
4 changes: 2 additions & 2 deletions pyfixest/estimation/multcomp.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,8 @@ def rwolf(
fit2 = pf.feols("Y ~ X1 + X2", data=data)
rwolf_df = pf.rwolf([fit1, fit2], "X1", reps=9999, seed=123)
# use randomization inference
rwolf_df = pf.rwolf([fit1, fit2], "X1", reps=9999, seed=123, sampling_method = "ri")
# use randomization inference - dontrun as too slow
# rwolf_df = pf.rwolf([fit1, fit2], "X1", reps=9999, seed=123, sampling_method = "ri")
rwolf_df
```
Expand Down
91 changes: 33 additions & 58 deletions tests/test_ritest.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,40 +47,41 @@ def test_algos_internally(data, fml, resampvar, reps, cluster):


@pytest.mark.extended
@pytest.mark.parametrize("fml", ["Y~X1+f3", "Y~X1+f3|f1", "Y~X1+f3|f1+f2"])
@pytest.mark.parametrize("resampvar", ["X1", "f3"])
@pytest.mark.parametrize("reps", [1000])
@pytest.mark.parametrize("fml", ["Y~X1+f3", "Y~X1+f3|f1"])
@pytest.mark.parametrize("resampvar", ["X1"])
@pytest.mark.parametrize("cluster", [None, "group_id"])
def test_randomization_t_vs_c(data, fml, resampvar, reps, cluster):
fit = pf.feols(fml, data=data)

rng1 = np.random.default_rng(1234)
rng2 = np.random.default_rng(1234)

kwargs = {
"resampvar": resampvar,
"reps": reps,
"store_ritest_statistics": True,
"cluster": cluster,
}

kwargs1 = kwargs.copy()
kwargs2 = kwargs.copy()

kwargs1["type"] = "randomization-c"
kwargs1["rng"] = rng1
kwargs2["choose_algorithm"] = "randomization-t"
kwargs2["rng"] = rng2

res1 = fit.ritest(**kwargs1)
ritest_stats1 = fit._ritest_statistics.copy()

res2 = fit.ritest(**kwargs2)
ritest_stats2 = fit._ritest_statistics.copy()
def test_randomization_t_vs_c(fml, resampvar, cluster):
data = pf.get_data(N=300)

fit1 = pf.feols(fml, data=data)
fit2 = pf.feols(fml, data=data)

rng1 = np.random.default_rng(12354)
rng2 = np.random.default_rng(12354)

fit1.ritest(
resampvar="X1",
type="randomization-c",
rng=rng1,
cluster=cluster,
store_ritest_statistics=True,
reps=100,
)
fit2.ritest(
resampvar="X1",
type="randomization-t",
rng=rng2,
cluster=cluster,
store_ritest_statistics=True,
reps=100,
)

assert np.allclose(res1.Estimate, res2.Estimate, atol=1e-8, rtol=1e-8)
assert np.allclose(res1["Pr(>|t|)"], res2["Pr(>|t|)"], atol=1e-2, rtol=1e-2)
assert np.allclose(ritest_stats1, ritest_stats2, atol=1e-2, rtol=1e-2)
# just weak test that both are somewhat close
assert (
np.abs(fit1._ritest_pvalue - fit2._ritest_pvalue) < 0.03
if cluster is None
else 0.06
), f"P-values are too different for randomization-c and randomization-t tests for {fml} and {resampvar} and {cluster}."


@pytest.fixture
Expand Down Expand Up @@ -160,29 +161,3 @@ def test_fepois_ritest():
@pytest.fixture
def data_r_vs_t():
return pf.get_data(N=5000, seed=2999)


@pytest.mark.extended
@pytest.mark.parametrize("fml", ["Y~X1+f3", "Y~X1+f3|f1", "Y~X1+f3|f1+f2"])
@pytest.mark.parametrize("resampvar", ["X1", "f3"])
@pytest.mark.parametrize("cluster", [None, "group_id"])
@pytest.mark.skip(reason="This feature is not yet fully implemented.")
def test_randomisation_c_vs_t(data_r_vs_t, fml, resampvar, cluster):
"""Test that the randomization-c and randomization-t tests give similar results."""
reps = 1000
fit = pf.feols(fml, data=data_r_vs_t)

rng = np.random.default_rng(1234)

ri1 = fit.ritest(
resampvar=resampvar, reps=reps, type="randomization-c", rng=rng, cluster=cluster
)
ri2 = fit.ritest(
resampvar=resampvar, reps=reps, type="randomization-t", rng=rng, cluster=cluster
)

assert np.allclose(ri1.Estimate, ri2.Estimate, rtol=0.01, atol=0.01)
assert np.allclose(ri1["Pr(>|t|)"], ri2["Pr(>|t|)"], rtol=0.01, atol=0.01)
assert np.allclose(
ri1["Std. Error (Pr(>|t|))"], ri2["Std. Error (Pr(>|t|))"], rtol=0.01, atol=0.01
)

0 comments on commit 10a8fb8

Please sign in to comment.