Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add bugbear to pre-commit and some code improvements #694

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3,486 changes: 1,817 additions & 1,669 deletions pixi.lock

Large diffs are not rendered by default.

12 changes: 6 additions & 6 deletions pyfixest/estimation/FixestMulti_.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,16 +101,16 @@ def __init__(

# set functions inherited from other modules
_module = import_module("pyfixest.report")
_tmp = getattr(_module, "coefplot")
_tmp = _module.coefplot
self.coefplot = functools.partial(_tmp, models=self.all_fitted_models.values())
self.coefplot.__doc__ = _tmp.__doc__
_tmp = getattr(_module, "iplot")
_tmp = _module.iplot
self.iplot = functools.partial(_tmp, models=self.all_fitted_models.values())
self.iplot.__doc__ = _tmp.__doc__
_tmp = getattr(_module, "summary")
_tmp = _module.summary
self.summary = functools.partial(_tmp, models=self.all_fitted_models.values())
self.summary.__doc__ = _tmp.__doc__
_tmp = getattr(_module, "etable")
_tmp = _module.etable
self.etable = functools.partial(_tmp, models=self.all_fitted_models.values())
self.etable.__doc__ = _tmp.__doc__

Expand All @@ -120,7 +120,7 @@ def _prepare_estimation(
fml: str,
vcov: Union[None, str, dict[str, str]] = None,
weights: Union[None, str] = None,
ssc: dict[str, Union[str, bool]] = {},
ssc: Optional[dict[str, Union[str, bool]]] = None,
fixef_rm: str = "none",
drop_intercept: bool = False,
) -> None:
Expand Down Expand Up @@ -181,7 +181,7 @@ def _prepare_estimation(
self._is_iv = FML.is_iv
# self._fml_dict = fxst_fml.condensed_fml_dict
# self._fml_dict_iv = fxst_fml.condensed_fml_dict_iv
self._ssc_dict = ssc
self._ssc_dict = ssc if ssc is not None else {}
self._drop_singletons = _drop_singletons(fixef_rm)

def _estimate_all_models(
Expand Down
2 changes: 0 additions & 2 deletions pyfixest/estimation/FormulaParser.py
Original file line number Diff line number Diff line change
Expand Up @@ -846,5 +846,3 @@ def _check_duplicate_key(my_dict: dict, key: str):
only be used once on the rhs of the two-sided formula.
"""
)
else:
None
16 changes: 11 additions & 5 deletions pyfixest/estimation/estimation.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,15 @@
WeightsTypeOptions,
)
from pyfixest.utils.dev_utils import DataFrameType
from pyfixest.utils.utils import ssc
from pyfixest.utils.utils import ssc as ssc_func
Copy link
Contributor Author

@juanitorduz juanitorduz Nov 3, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was a particularly tricky bug as ssc was both a global function and a function parameter.



def feols(
fml: str,
data: DataFrameType, # type: ignore
vcov: Optional[Union[VcovTypeOptions, dict[str, str]]] = None,
weights: Union[None, str] = None,
ssc: dict[str, Union[str, bool]] = ssc(),
ssc: Optional[dict[str, Union[str, bool]]] = None,
fixef_rm: FixedRmOptions = "none",
fixef_tol=1e-08,
collin_tol: float = 1e-10,
Expand Down Expand Up @@ -357,6 +357,8 @@ class for multiple models specified via `fml`.
fit_D.ccv(treatment = "D", cluster = "group_id")
```
"""
if ssc is None:
ssc = ssc_func()
if i_ref1 is not None:
raise FeatureDeprecationError(
"""
Expand Down Expand Up @@ -419,13 +421,13 @@ def fepois(
fml: str,
data: DataFrameType, # type: ignore
vcov: Optional[Union[VcovTypeOptions, dict[str, str]]] = None,
ssc: dict[str, Union[str, bool]] = ssc(),
ssc: Optional[dict[str, Union[str, bool]]] = None,
fixef_rm: FixedRmOptions = "none",
fixef_tol: float = 1e-08,
iwls_tol: float = 1e-08,
iwls_maxiter: int = 25,
collin_tol: float = 1e-10,
separation_check: Optional[list[str]] = ["fe"],
separation_check: Optional[list[str]] = None,
solver: SolverOptions = "np.linalg.solve",
drop_intercept: bool = False,
i_ref1=None,
Expand Down Expand Up @@ -480,7 +482,7 @@ def fepois(

separation_check: list[str], optional
Methods to identify and drop separated observations.
Either "fe" or "ir". Executes "fe" by default.
Either "fe" or "ir". Executes "fe" by default (when None).

solver : SolverOptions, optional.
The solver to use for the regression. Can be either "np.linalg.solve" or
Expand Down Expand Up @@ -549,6 +551,10 @@ def fepois(
For more examples, please take a look at the documentation of the `feols()`
function.
"""
if separation_check is None:
separation_check = ["fe"]
if ssc is None:
ssc = ssc_func()
if i_ref1 is not None:
raise FeatureDeprecationError(
"""
Expand Down
2 changes: 1 addition & 1 deletion pyfixest/estimation/feiv_.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ def first_stage(self) -> None:
self._non_exo_instruments = list(set(self._coefnames_z) - set(self._coefnames))

fixest_module = import_module("pyfixest.estimation")
fit_ = getattr(fixest_module, "feols")
fit_ = fixest_module.feols

fml_first_stage = self.FixestFormula.fml_first_stage.replace(" ", "")
if self._has_fixef:
Expand Down
23 changes: 10 additions & 13 deletions pyfixest/estimation/feols_.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,13 +301,13 @@

# set functions inherited from other modules
_module = import_module("pyfixest.report")
_tmp = getattr(_module, "coefplot")
_tmp = _module.coefplot
self.coefplot = functools.partial(_tmp, models=[self])
self.coefplot.__doc__ = _tmp.__doc__
_tmp = getattr(_module, "iplot")
_tmp = _module.iplot
self.iplot = functools.partial(_tmp, models=[self])
self.iplot.__doc__ = _tmp.__doc__
_tmp = getattr(_module, "summary")
_tmp = _module.summary
self.summary = functools.partial(_tmp, models=[self])
self.summary.__doc__ = _tmp.__doc__

Expand Down Expand Up @@ -757,7 +757,7 @@
beta_center = _beta_hat

vcov_mat = np.zeros((_k, _k))
for ixg, g in enumerate(clustid):
for ixg, _ in enumerate(clustid):
beta_centered = beta_jack[ixg, :] - beta_center
vcov_mat += np.outer(beta_centered, beta_centered)

Expand All @@ -778,10 +778,7 @@

# lazy loading to avoid circular import
fixest_module = import_module("pyfixest.estimation")
if _method == "feols":
fit_ = getattr(fixest_module, "feols")
else:
fit_ = getattr(fixest_module, "fepois")
fit_ = fixest_module.feols if _method == "feols" else fixest_module.fepois

Check warning on line 781 in pyfixest/estimation/feols_.py

View check run for this annotation

Codecov / codecov/patch

pyfixest/estimation/feols_.py#L781

Added line #L781 was not covered by tests

for ixg, g in enumerate(clustid):
# direct leave one cluster out implementation
Expand All @@ -804,7 +801,7 @@
beta_center = _beta_hat

vcov_mat = np.zeros((_k, _k))
for ixg, g in enumerate(clustid):
for ixg, _ in enumerate(clustid):

Check warning on line 804 in pyfixest/estimation/feols_.py

View check run for this annotation

Codecov / codecov/patch

pyfixest/estimation/feols_.py#L804

Added line #L804 was not covered by tests
beta_centered = beta_jack[ixg, :] - beta_center
vcov_mat += np.outer(beta_centered, beta_centered)

Expand Down Expand Up @@ -1412,7 +1409,7 @@
G = len(unique_clusters)

ccv_module = import_module("pyfixest.estimation.ccv")
_compute_CCV = getattr(ccv_module, "_compute_CCV")
_compute_CCV = ccv_module._compute_CCV

Check warning on line 1412 in pyfixest/estimation/feols_.py

View check run for this annotation

Codecov / codecov/patch

pyfixest/estimation/feols_.py#L1412

Added line #L1412 was not covered by tests

vcov_splits = 0.0
for _ in range(n_splits):
Expand Down Expand Up @@ -1465,7 +1462,7 @@
return pd.concat([res_ccv, res_crv1], axis=1).T

ccv_module = import_module("pyfixest.estimation.ccv")
_ccv = getattr(ccv_module, "_ccv")
_ccv = ccv_module._ccv

return _ccv(
data=data,
Expand Down Expand Up @@ -2482,7 +2479,7 @@
elif isinstance(vcov, (list, str)):
vcov_type_detail = vcov
else:
assert False, "arg vcov needs to be a dict, string or list"
raise TypeError("arg vcov needs to be a dict, string or list")

Check warning on line 2482 in pyfixest/estimation/feols_.py

View check run for this annotation

Codecov / codecov/patch

pyfixest/estimation/feols_.py#L2482

Added line #L2482 was not covered by tests

if vcov_type_detail == "iid":
vcov_type = "iid"
Expand Down Expand Up @@ -2520,7 +2517,7 @@

def _apply_fixef_numpy(df_fe_values, fixef_dicts):
fixef_mat = np.zeros_like(df_fe_values, dtype=float)
for i, (fixef, subdict) in enumerate(fixef_dicts.items()):
for i, (_, subdict) in enumerate(fixef_dicts.items()):
unique_levels, inverse = np.unique(df_fe_values[:, i], return_inverse=True)
mapping = np.array([subdict.get(level, np.nan) for level in unique_levels])
fixef_mat[:, i] = mapping[inverse]
Expand Down
2 changes: 1 addition & 1 deletion pyfixest/estimation/fepois_.py
Original file line number Diff line number Diff line change
Expand Up @@ -584,7 +584,7 @@ def _check_for_separation_ir(
"""
# lazy load to avoid circular import
fixest_module = import_module("pyfixest.estimation")
feols = getattr(fixest_module, "feols")
feols = fixest_module.feols
# initialize
separation_na: set[int] = set()
tmp_suffix = "_separationTmp"
Expand Down
26 changes: 17 additions & 9 deletions pyfixest/report/summarize.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
def etable(
models: Union[list[Union[Feols, Fepois, Feiv]], FixestMulti],
type: str = "gt",
signif_code: list = [0.001, 0.01, 0.05],
signif_code: Optional[list] = None,
coef_fmt: str = "b \n (se)",
custom_stats: Optional[dict] = None,
keep: Optional[Union[list, str]] = None,
Expand Down Expand Up @@ -46,7 +46,7 @@ def etable(
Type of output. Either "df" for pandas DataFrame, "md" for markdown,
"gt" for great_tables, or "tex" for LaTeX table. Default is "gt".
signif_code : list, optional
Significance levels for the stars. Default is [0.001, 0.01, 0.05].
Significance levels for the stars. Default is None, which sets [0.001, 0.01, 0.05].
If None, no stars are printed.
coef_fmt : str, optional
The format of the coefficient (b), standard error (se), t-stats (t), and
Expand Down Expand Up @@ -118,8 +118,10 @@ def etable(
A styled DataFrame with the coefficients and standard errors of the models.
When output is "tex", the LaTeX code is returned as a string.
""" # noqa: D301
if signif_code is None:
signif_code = [0.001, 0.01, 0.05]
assert (
isinstance([0.1, 0.2, 0.3], list) and len(signif_code) == 3
isinstance(signif_code, list) and len(signif_code) == 3
), "signif_code must be a list of length 3"
if signif_code:
assert all(
Expand Down Expand Up @@ -207,7 +209,7 @@ def etable(
interactionSymbol = " x "
R2code = "R2"

for i, model in enumerate(models):
for model in models:
dep_var_list.append(model._depvar)
n_coefs.append(len(model._coefnames))

Expand Down Expand Up @@ -865,7 +867,7 @@ def make_table(
# When there are row groups then insert midrules and groupname
if row_levels > 1 and len(row_groups) > 1:
# Insert a midrule after each row group
for i, row_group in enumerate(row_groups):
for i in range(len(row_groups)):
if rgroup_display:
# Insert a line with the row group name & same space around it
# lines.insert(line_at+1, "\\addlinespace")
Expand Down Expand Up @@ -1143,12 +1145,12 @@ def _format_mean_std(
def dtable(
df: pd.DataFrame,
vars: list,
stats: list = ["count", "mean", "std"],
stats: Optional[list] = None,
bycol: Optional[list[str]] = None,
byrow: Optional[str] = None,
type: str = "gt",
labels: dict = {},
stats_labels: dict = {},
labels: dict | None = None,
stats_labels: dict | None = None,
digits: int = 2,
notes: str = "",
counts_row_below: bool = False,
Expand All @@ -1166,7 +1168,7 @@ def dtable(
vars : list
List of variables to be included in the table.
stats : list, optional
List of statistics to be calculated. The default is ['count','mean', 'std'].
List of statistics to be calculated. The default is None, that sets ['count','mean', 'std'].
All pandas aggregation functions are supported.
bycol : list, optional
List of variables to be used to group the data by columns. The default is None.
Expand Down Expand Up @@ -1200,6 +1202,12 @@ def dtable(
-------
A table in the specified format.
"""
if stats is None:
stats = ["count", "mean", "std"]
if labels is None:
labels = {}
if stats_labels is None:
stats_labels = {}
assert isinstance(df, pd.DataFrame), "df must be a pandas DataFrame."
assert all(
pd.api.types.is_numeric_dtype(df[var]) for var in vars
Expand Down
8 changes: 6 additions & 2 deletions pyfixest/utils/dgps.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,8 +157,8 @@
def get_panel_dgp_stagg(
num_units=1_000,
num_periods=30,
num_treated=[250, 500, 150],
treatment_start_cohorts=[10, 15, 20],
num_treated=None,
treatment_start_cohorts=None,
sigma_unit=1,
sigma_time=0.5,
sigma_epsilon=0.2,
Expand All @@ -168,6 +168,10 @@
ar_coef=0.8,
):
"""Panel DGP with staggered treatment effects and effect heterogeneity."""
if num_treated is None:
num_treated = [250, 500, 150]
if treatment_start_cohorts is None:
treatment_start_cohorts = [10, 15, 20]

Check warning on line 174 in pyfixest/utils/dgps.py

View check run for this annotation

Codecov / codecov/patch

pyfixest/utils/dgps.py#L171-L174

Added lines #L171 - L174 were not covered by tests
if base_treatment_effects is None:
# Cohort 1: mean reversal: big bump that decays to zero within 10 days, then zero
# Cohort 2: shark-fin - logarithmic for the first week, then 0
Expand Down
6 changes: 5 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ dev = [
"pyarrow>=14.0.0",
"pre-commit==3.6.0",
"doubleml==0.7.1",
"wildboottest>=0.3.2", "ipykernel>=6.29.5,<7",
"wildboottest>=0.3.2",
"ipykernel>=6.29.5,<7",
]

docs = [
Expand Down Expand Up @@ -126,6 +127,7 @@ target-version = "py39"
[tool.ruff.lint]
# docs: https://docs.astral.sh/ruff/rules/
select = [
"B", # bugbear
"F", # Pyflakes
"E", # pycodestyle errors
"W", # pycodestyle warnings
Expand All @@ -139,6 +141,8 @@ select = [
ignore = [
# do not enable if formatting
# docs: https://docs.astral.sh/ruff/formatter/#conflicting-lint-rules
"B028", # No explicit `stacklevel` keyword argument found
"B904", # Better exception handling
"W191", # tab indentation
"E111", # indentation
"E114", # indentation
Expand Down
Loading