Skip to content

Commit

Permalink
PyFixest 0.10.10 (#212)
Browse files Browse the repository at this point in the history
* attempt to fix #211

* cleanup

* clarification

* fix i() bug for i(var), not for i(var1, var2)

* add drop_intercept argument to feols, fepois

* fix error with did2s inference

* add error when 0 or -1 in first stage

* fix tests, output warning for i(var1, var2) syntax

* format code

* update figure + fix small bug

* no 0 in second stage did test

* bump version

* deprecate i(var1, var2) syntax, update news

* format

* bring back i(var1, var2)

* format
  • Loading branch information
s3alfisc authored Nov 10, 2023
1 parent f14e765 commit c683dd2
Show file tree
Hide file tree
Showing 12 changed files with 994 additions and 877 deletions.
4 changes: 4 additions & 0 deletions docs/news.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# News

## PyFixest `0.10.10`

Fixes a bug with variable interactions via `i(var)` syntax. See [issue #221](https://github.com/s3alfisc/pyfixest/issues/211).

## PyFixest `0.10.9`

Makes `etable()` prettier and more informative.
Expand Down
928 changes: 467 additions & 461 deletions figures/event_study.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
367 changes: 181 additions & 186 deletions poetry.lock

Large diffs are not rendered by default.

14 changes: 13 additions & 1 deletion pyfixest/FixestMulti.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ def _prepare_estimation(
vcov: Union[None, str, Dict[str, str]] = None,
ssc: Dict[str, str] = {},
fixef_rm: str = "none",
drop_intercept: bool = False,
i_ref1: Optional[Union[List, str]] = None,
i_ref2: Optional[Union[List, str]] = None,
) -> None:
Expand All @@ -70,6 +71,7 @@ def _prepare_estimation(
ssc (Dict[str, str], optional): A dictionary specifying the type of standard errors to use for inference. See `feols()` or `fepois()`.
fixef_rm (str, optional): A string specifying whether singleton fixed effects should be dropped.
Options are "none" (default) and "singleton". If "singleton", singleton fixed effects are dropped.
drop_intercept (bool, optional): Whether to drop the intercept. Default is False.
i_ref1 (Optional[Union[List, str]], optional): A list or string specifying the reference category for the first interaction variable.
i_ref2 (Optional[Union[List, str]], optional): A list or string specifying the reference category for the second interaction variable.
Expand All @@ -85,6 +87,9 @@ def _prepare_estimation(
self._drop_singletons = None
self._fixef_keys = None
self._is_multiple_estimation = None
self._i_ref1 = None
self._i_ref2 = None
self._drop_intercept = None

# set i_ref1 and i_ref2 to list if not None
if i_ref1 is not None:
Expand All @@ -110,8 +115,10 @@ def _prepare_estimation(
self._ssc_dict = ssc
self._drop_singletons = _drop_singletons(fixef_rm)
self._fixef_keys = list(self._fml_dict.keys())

self._i_ref1 = i_ref1
self._i_ref2 = i_ref2
self._drop_intercept = drop_intercept

def _estimate_all_models(
self,
Expand Down Expand Up @@ -147,6 +154,7 @@ def _estimate_all_models(
_method = self._method
_drop_singletons = self._drop_singletons
_ssc_dict = self._ssc_dict
_drop_intercept = self._drop_intercept
_i_ref1 = self._i_ref1
_i_ref2 = self._i_ref2

Expand Down Expand Up @@ -184,7 +192,11 @@ def _estimate_all_models(
_icovars,
X_is_empty,
) = model_matrix_fixest(
fml=fml, data=_data, i_ref1=_i_ref1, i_ref2=_i_ref2
fml=fml,
data=_data,
drop_intercept=_drop_intercept,
i_ref1=_i_ref1,
i_ref2=_i_ref2,
)

weights = np.ones((Y.shape[0], 1))
Expand Down
17 changes: 15 additions & 2 deletions pyfixest/estimation.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ def feols(
ssc=ssc(),
fixef_rm: str = "none",
collin_tol: float = 1e-10,
drop_intercept: bool = False,
i_ref1: Optional[Union[list, str]] = None,
i_ref2: Optional[Union[list, str]] = None,
) -> Union[Feols, FixestMulti]:
Expand Down Expand Up @@ -70,6 +71,9 @@ def feols(
via the diagonal cholesky decomposition of the correlation matrix of the variables.
If the tolerance is higher, more variables will be dropped.
drop_intercept (bool): Whether to drop the intercept from the model. False by default. If True, the intercept will be dropped **after** creating the model matrix via formulaic.
This implies that reference levels for categorical variables will be dropped as well and are not recovered.
i_ref1 (Optional[Union[list, str]]): A list of strings or a string specifying the reference category for the first set of categorical variables in the formula, interacted via "i()".
i_ref2 (Optional[Union[list, str]]): A list of strings or a string specifying the reference category for the second set of categorical variables in the formula, interacted via "i()".
Expand Down Expand Up @@ -135,7 +139,9 @@ def feols(
_estimation_input_checks(fml, data, vcov, ssc, fixef_rm, collin_tol, i_ref1)

fixest = FixestMulti(data=data)
fixest._prepare_estimation("feols", fml, vcov, ssc, fixef_rm, i_ref1, i_ref2)
fixest._prepare_estimation(
"feols", fml, vcov, ssc, fixef_rm, drop_intercept, i_ref1, i_ref2
)

# demean all models: based on fixed effects x split x missing value combinations
fixest._estimate_all_models(vcov, fixest._fixef_keys, collin_tol=collin_tol)
Expand All @@ -155,6 +161,7 @@ def fepois(
iwls_tol: float = 1e-08,
iwls_maxiter: int = 25,
collin_tol: float = 1e-10,
drop_intercept: bool = False,
i_ref1: Optional[Union[list, str]] = None,
i_ref2: Optional[Union[list, str]] = None,
) -> Union[Fepois, FixestMulti]:
Expand Down Expand Up @@ -210,6 +217,10 @@ def fepois(
collin_tol (float): tolerance for collinearity check. 1e-06 by default. If collinear variables are detected, they will be dropped from the model. The performed check is
via the diagonal cholesky decomposition of the correlation matrix of the variables. If the tolerance is higher, more variables will be dropped.
drop_intercept (bool): Whether to drop the intercept from the model. False by default. If True, the intercept will be dropped **after** creating the model matrix via formulaic.
This implies that reference levels for categorical variables will be dropped as well and are not recovered.
i_ref1 (Optional[Union[list, str]]): A list of strings or a string specifying the reference category for the first set of categorical variables in the formula, interacted via "i()".
i_ref2 (Optional[Union[list, str]]): A list of strings or a string specifying the reference category for the second set of categorical variables in the formula, interacted via "i()".
Expand Down Expand Up @@ -264,7 +275,9 @@ def fepois(

fixest = FixestMulti(data=data)

fixest._prepare_estimation("fepois", fml, vcov, ssc, fixef_rm, i_ref1, i_ref2)
fixest._prepare_estimation(
"fepois", fml, vcov, ssc, fixef_rm, drop_intercept, i_ref1, i_ref2
)
if fixest._is_iv:
raise NotImplementedError(
"IV Estimation is not supported for Poisson Regression"
Expand Down
39 changes: 34 additions & 5 deletions pyfixest/experimental/did.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,7 @@ def _did2s_estimate(
"""

_first_stage_full = f"{yname} {_first_stage}"
_second_stage_full = f"{yname}_hat {_second_stage} + 0"
_second_stage_full = f"{yname}_hat {_second_stage}"

if treatment is not None:
if treatment not in data.columns:
Expand All @@ -382,6 +382,13 @@ def _did2s_estimate(
else:
_not_yet_treated_data = data[data["ATT"] == False]

# check if first stage formulas has fixed effects
if "|" not in _first_stage:
raise ValueError("First stage formula must contain fixed effects.")
# check if second stage formulas has fixed effects
if "|" in _second_stage:
raise ValueError("Second stage formula must not contain fixed effects.")

# estimate first stage
fit1 = feols(
fml=_first_stage_full,
Expand All @@ -399,8 +406,14 @@ def _did2s_estimate(
_first_u = data[f"{yname}"].to_numpy().flatten() - Y_hat
data[f"{yname}_hat"] = _first_u

# intercept needs to be dropped by hand due to the presence of fixed effects in the first stage
fit2 = feols(
_second_stage_full, data=data, vcov="iid", i_ref1=i_ref1, i_ref2=i_ref2
_second_stage_full,
data=data,
vcov="iid",
drop_intercept=True,
i_ref1=i_ref1,
i_ref2=i_ref2,
)
_second_u = fit2.resid()

Expand Down Expand Up @@ -447,13 +460,23 @@ def _did2s_vcov(
first_stage_fe = "+".join(first_stage_fe)
first_stage = f"{first_stage_x}+{first_stage_fe}"

second_stage = f"{second_stage} + 0"
second_stage = f"{second_stage}"

# note for future Alex: intercept needs to be dropped! it is not as fixed effects are converted to
# dummies, hence has_fixed checks are False
_, X1, _, _, _, _, _, _, _ = model_matrix_fixest(
fml=f"{yname} {first_stage}", data=data, i_ref1=i_ref1, i_ref2=i_ref2
fml=f"{yname} {first_stage}",
data=data,
drop_intercept=False,
i_ref1=i_ref1,
i_ref2=i_ref2,
)
_, X2, _, _, _, _, _, _, _ = model_matrix_fixest(
fml=f"{yname} {second_stage}", data=data, i_ref1=i_ref1, i_ref2=i_ref2
fml=f"{yname} {second_stage}",
data=data,
drop_intercept=True,
i_ref1=i_ref1,
i_ref2=i_ref2,
) # reference values not dropped, multicollinearity error

X1 = csr_matrix(X1.values)
Expand Down Expand Up @@ -540,6 +563,12 @@ def did2s(
assert first_stage[0] == "~", "First stage must start with ~"
assert second_stage[0] == "~", "Second stage must start with ~"

# assert that there is no 0, -1 or - 1 in the second stage formula
if "0" in second_stage or "-1" in second_stage:
raise ValueError(
"The second stage formula should not contain '0' or '-1'. Note that the intercept is dropped automatically due to the presence of fixed effects in the first stage."
)

data = data.copy()

fit, first_u, second_u = _did2s_estimate(
Expand Down
Loading

0 comments on commit c683dd2

Please sign in to comment.