py-econometrics · s3alfisc · Nov 4, 2024 · Nov 3, 2024 · juanitorduz · Nov 3, 2024
diff --git a/pixi.lock b/pixi.lock
diff --git a/pyfixest/estimation/FixestMulti_.py b/pyfixest/estimation/FixestMulti_.py
@@ -101,16 +101,16 @@ def __init__(
 
         # set functions inherited from other modules
         _module = import_module("pyfixest.report")
-        _tmp = getattr(_module, "coefplot")
+        _tmp = _module.coefplot
         self.coefplot = functools.partial(_tmp, models=self.all_fitted_models.values())
         self.coefplot.__doc__ = _tmp.__doc__
-        _tmp = getattr(_module, "iplot")
+        _tmp = _module.iplot
         self.iplot = functools.partial(_tmp, models=self.all_fitted_models.values())
         self.iplot.__doc__ = _tmp.__doc__
-        _tmp = getattr(_module, "summary")
+        _tmp = _module.summary
         self.summary = functools.partial(_tmp, models=self.all_fitted_models.values())
         self.summary.__doc__ = _tmp.__doc__
-        _tmp = getattr(_module, "etable")
+        _tmp = _module.etable
         self.etable = functools.partial(_tmp, models=self.all_fitted_models.values())
         self.etable.__doc__ = _tmp.__doc__
 
@@ -120,7 +120,7 @@ def _prepare_estimation(
         fml: str,
         vcov: Union[None, str, dict[str, str]] = None,
         weights: Union[None, str] = None,
-        ssc: dict[str, Union[str, bool]] = {},
+        ssc: Optional[dict[str, Union[str, bool]]] = None,
         fixef_rm: str = "none",
         drop_intercept: bool = False,
     ) -> None:
@@ -181,7 +181,7 @@ def _prepare_estimation(
         self._is_iv = FML.is_iv
         # self._fml_dict = fxst_fml.condensed_fml_dict
         # self._fml_dict_iv = fxst_fml.condensed_fml_dict_iv
-        self._ssc_dict = ssc
+        self._ssc_dict = ssc if ssc is not None else {}
         self._drop_singletons = _drop_singletons(fixef_rm)
 
     def _estimate_all_models(

diff --git a/pyfixest/estimation/FormulaParser.py b/pyfixest/estimation/FormulaParser.py
@@ -846,5 +846,3 @@ def _check_duplicate_key(my_dict: dict, key: str):
                 only be used once on the rhs of the two-sided formula.
                 """
             )
-        else:
-            None
diff --git a/pyfixest/estimation/estimation.py b/pyfixest/estimation/estimation.py
@@ -13,15 +13,15 @@
     WeightsTypeOptions,
 )
 from pyfixest.utils.dev_utils import DataFrameType
-from pyfixest.utils.utils import ssc
+from pyfixest.utils.utils import ssc as ssc_func
 
 
 def feols(
     fml: str,
     data: DataFrameType,  # type: ignore
     vcov: Optional[Union[VcovTypeOptions, dict[str, str]]] = None,
     weights: Union[None, str] = None,
-    ssc: dict[str, Union[str, bool]] = ssc(),
+    ssc: Optional[dict[str, Union[str, bool]]] = None,
     fixef_rm: FixedRmOptions = "none",
     fixef_tol=1e-08,
     collin_tol: float = 1e-10,
@@ -357,6 +357,8 @@ class for multiple models specified via `fml`.
     fit_D.ccv(treatment = "D", cluster = "group_id")
     ```
     """
+    if ssc is None:
+        ssc = ssc_func()
     if i_ref1 is not None:
         raise FeatureDeprecationError(
             """
@@ -419,13 +421,13 @@ def fepois(
     fml: str,
     data: DataFrameType,  # type: ignore
     vcov: Optional[Union[VcovTypeOptions, dict[str, str]]] = None,
-    ssc: dict[str, Union[str, bool]] = ssc(),
+    ssc: Optional[dict[str, Union[str, bool]]] = None,
     fixef_rm: FixedRmOptions = "none",
     fixef_tol: float = 1e-08,
     iwls_tol: float = 1e-08,
     iwls_maxiter: int = 25,
     collin_tol: float = 1e-10,
-    separation_check: Optional[list[str]] = ["fe"],
+    separation_check: Optional[list[str]] = None,
     solver: SolverOptions = "np.linalg.solve",
     drop_intercept: bool = False,
     i_ref1=None,
@@ -480,7 +482,7 @@ def fepois(
 
     separation_check: list[str], optional
         Methods to identify and drop separated observations.
-        Either "fe" or "ir". Executes "fe" by default.
+        Either "fe" or "ir". Executes "fe" by default (when None).
 
     solver : SolverOptions, optional.
         The solver to use for the regression. Can be either "np.linalg.solve" or
@@ -549,6 +551,10 @@ def fepois(
     For more examples, please take a look at the documentation of the `feols()`
     function.
     """
+    if separation_check is None:
+        separation_check = ["fe"]
+    if ssc is None:
+        ssc = ssc_func()
     if i_ref1 is not None:
         raise FeatureDeprecationError(
             """

diff --git a/pyfixest/estimation/feiv_.py b/pyfixest/estimation/feiv_.py
@@ -248,7 +248,7 @@ def first_stage(self) -> None:
         self._non_exo_instruments = list(set(self._coefnames_z) - set(self._coefnames))
 
         fixest_module = import_module("pyfixest.estimation")
-        fit_ = getattr(fixest_module, "feols")
+        fit_ = fixest_module.feols
 
         fml_first_stage = self.FixestFormula.fml_first_stage.replace(" ", "")
         if self._has_fixef:

diff --git a/pyfixest/estimation/feols_.py b/pyfixest/estimation/feols_.py
@@ -301,13 +301,13 @@
 
         # set functions inherited from other modules
         _module = import_module("pyfixest.report")
-        _tmp = getattr(_module, "coefplot")
+        _tmp = _module.coefplot
         self.coefplot = functools.partial(_tmp, models=[self])
         self.coefplot.__doc__ = _tmp.__doc__
-        _tmp = getattr(_module, "iplot")
+        _tmp = _module.iplot
         self.iplot = functools.partial(_tmp, models=[self])
         self.iplot.__doc__ = _tmp.__doc__
-        _tmp = getattr(_module, "summary")
+        _tmp = _module.summary
         self.summary = functools.partial(_tmp, models=[self])
         self.summary.__doc__ = _tmp.__doc__
 
@@ -757,7 +757,7 @@
         beta_center = _beta_hat
 
         vcov_mat = np.zeros((_k, _k))
-        for ixg, g in enumerate(clustid):
+        for ixg, _ in enumerate(clustid):
             beta_centered = beta_jack[ixg, :] - beta_center
             vcov_mat += np.outer(beta_centered, beta_centered)
 
@@ -778,10 +778,7 @@
 
         # lazy loading to avoid circular import
         fixest_module = import_module("pyfixest.estimation")
-        if _method == "feols":
-            fit_ = getattr(fixest_module, "feols")
-        else:
-            fit_ = getattr(fixest_module, "fepois")
+        fit_ = fixest_module.feols if _method == "feols" else fixest_module.fepois
 
         for ixg, g in enumerate(clustid):
             # direct leave one cluster out implementation
@@ -804,7 +801,7 @@
         beta_center = _beta_hat
 
         vcov_mat = np.zeros((_k, _k))
-        for ixg, g in enumerate(clustid):
+        for ixg, _ in enumerate(clustid):
             beta_centered = beta_jack[ixg, :] - beta_center
             vcov_mat += np.outer(beta_centered, beta_centered)
 
@@ -1412,7 +1409,7 @@
         G = len(unique_clusters)
 
         ccv_module = import_module("pyfixest.estimation.ccv")
-        _compute_CCV = getattr(ccv_module, "_compute_CCV")
+        _compute_CCV = ccv_module._compute_CCV
 
         vcov_splits = 0.0
         for _ in range(n_splits):
@@ -1465,7 +1462,7 @@
         return pd.concat([res_ccv, res_crv1], axis=1).T
 
         ccv_module = import_module("pyfixest.estimation.ccv")
-        _ccv = getattr(ccv_module, "_ccv")
+        _ccv = ccv_module._ccv
 
         return _ccv(
             data=data,
@@ -2482,7 +2479,7 @@
     elif isinstance(vcov, (list, str)):
         vcov_type_detail = vcov
     else:
-        assert False, "arg vcov needs to be a dict, string or list"
+        raise TypeError("arg vcov needs to be a dict, string or list")
 
     if vcov_type_detail == "iid":
         vcov_type = "iid"
@@ -2520,7 +2517,7 @@
 
 def _apply_fixef_numpy(df_fe_values, fixef_dicts):
     fixef_mat = np.zeros_like(df_fe_values, dtype=float)
-    for i, (fixef, subdict) in enumerate(fixef_dicts.items()):
+    for i, (_, subdict) in enumerate(fixef_dicts.items()):
         unique_levels, inverse = np.unique(df_fe_values[:, i], return_inverse=True)
         mapping = np.array([subdict.get(level, np.nan) for level in unique_levels])
         fixef_mat[:, i] = mapping[inverse]

diff --git a/pyfixest/estimation/fepois_.py b/pyfixest/estimation/fepois_.py
@@ -584,7 +584,7 @@ def _check_for_separation_ir(
     """
     # lazy load to avoid circular import
     fixest_module = import_module("pyfixest.estimation")
-    feols = getattr(fixest_module, "feols")
+    feols = fixest_module.feols
     # initialize
     separation_na: set[int] = set()
     tmp_suffix = "_separationTmp"

diff --git a/pyfixest/report/summarize.py b/pyfixest/report/summarize.py
@@ -17,7 +17,7 @@
 def etable(
     models: Union[list[Union[Feols, Fepois, Feiv]], FixestMulti],
     type: str = "gt",
-    signif_code: list = [0.001, 0.01, 0.05],
+    signif_code: Optional[list] = None,
     coef_fmt: str = "b \n (se)",
     custom_stats: Optional[dict] = None,
     keep: Optional[Union[list, str]] = None,
@@ -46,7 +46,7 @@ def etable(
         Type of output. Either "df" for pandas DataFrame, "md" for markdown,
         "gt" for great_tables, or "tex" for LaTeX table. Default is "gt".
     signif_code : list, optional
-        Significance levels for the stars. Default is [0.001, 0.01, 0.05].
+        Significance levels for the stars. Default is None, which sets [0.001, 0.01, 0.05].
         If None, no stars are printed.
     coef_fmt : str, optional
         The format of the coefficient (b), standard error (se), t-stats (t), and
@@ -118,8 +118,10 @@ def etable(
         A styled DataFrame with the coefficients and standard errors of the models.
         When output is "tex", the LaTeX code is returned as a string.
     """  # noqa: D301
+    if signif_code is None:
+        signif_code = [0.001, 0.01, 0.05]
     assert (
-        isinstance([0.1, 0.2, 0.3], list) and len(signif_code) == 3
+        isinstance(signif_code, list) and len(signif_code) == 3
     ), "signif_code must be a list of length 3"
     if signif_code:
         assert all(
@@ -207,7 +209,7 @@ def etable(
         interactionSymbol = " x "
         R2code = "R2"
 
-    for i, model in enumerate(models):
+    for model in models:
         dep_var_list.append(model._depvar)
         n_coefs.append(len(model._coefnames))
 
@@ -865,7 +867,7 @@ def make_table(
         # When there are row groups then insert midrules and groupname
         if row_levels > 1 and len(row_groups) > 1:
             # Insert a midrule after each row group
-            for i, row_group in enumerate(row_groups):
+            for i in range(len(row_groups)):
                 if rgroup_display:
                     # Insert a line with the row group name & same space around it
                     # lines.insert(line_at+1, "\\addlinespace")
@@ -1143,12 +1145,12 @@ def _format_mean_std(
 def dtable(
     df: pd.DataFrame,
     vars: list,
-    stats: list = ["count", "mean", "std"],
+    stats: Optional[list] = None,
     bycol: Optional[list[str]] = None,
     byrow: Optional[str] = None,
     type: str = "gt",
-    labels: dict = {},
-    stats_labels: dict = {},
+    labels: dict | None = None,
+    stats_labels: dict | None = None,
     digits: int = 2,
     notes: str = "",
     counts_row_below: bool = False,
@@ -1166,7 +1168,7 @@ def dtable(
     vars : list
         List of variables to be included in the table.
     stats : list, optional
-        List of statistics to be calculated. The default is ['count','mean', 'std'].
+        List of statistics to be calculated. The default is None, that sets ['count','mean', 'std'].
         All pandas aggregation functions are supported.
     bycol : list, optional
         List of variables to be used to group the data by columns. The default is None.
@@ -1200,6 +1202,12 @@ def dtable(
     -------
     A table in the specified format.
     """
+    if stats is None:
+        stats = ["count", "mean", "std"]
+    if labels is None:
+        labels = {}
+    if stats_labels is None:
+        stats_labels = {}
     assert isinstance(df, pd.DataFrame), "df must be a pandas DataFrame."
     assert all(
         pd.api.types.is_numeric_dtype(df[var]) for var in vars

diff --git a/pyfixest/utils/dgps.py b/pyfixest/utils/dgps.py
@@ -157,8 +157,8 @@
 def get_panel_dgp_stagg(
     num_units=1_000,
     num_periods=30,
-    num_treated=[250, 500, 150],
-    treatment_start_cohorts=[10, 15, 20],
+    num_treated=None,
+    treatment_start_cohorts=None,
     sigma_unit=1,
     sigma_time=0.5,
     sigma_epsilon=0.2,
@@ -168,6 +168,10 @@
     ar_coef=0.8,
 ):
     """Panel DGP with staggered treatment effects and effect heterogeneity."""
+    if num_treated is None:
+        num_treated = [250, 500, 150]
+    if treatment_start_cohorts is None:
+        treatment_start_cohorts = [10, 15, 20]
     if base_treatment_effects is None:
         # Cohort 1: mean reversal: big bump that decays to zero within 10 days, then zero
         # Cohort 2: shark-fin - logarithmic for the first week, then 0

diff --git a/pyproject.toml b/pyproject.toml
@@ -37,7 +37,8 @@ dev = [
   "pyarrow>=14.0.0",
   "pre-commit==3.6.0",
   "doubleml==0.7.1",
-  "wildboottest>=0.3.2", "ipykernel>=6.29.5,<7",
+  "wildboottest>=0.3.2",
+  "ipykernel>=6.29.5,<7",
 ]
 
 docs = [
@@ -126,6 +127,7 @@ target-version = "py39"
 [tool.ruff.lint]
 # docs: https://docs.astral.sh/ruff/rules/
 select = [
+  "B",   # bugbear
   "F",   # Pyflakes
   "E",   # pycodestyle errors
   "W",   # pycodestyle warnings
@@ -139,6 +141,8 @@ select = [
 ignore = [
   # do not enable if formatting
   # docs: https://docs.astral.sh/ruff/formatter/#conflicting-lint-rules
+  "B028",   # No explicit `stacklevel` keyword argument found
+  "B904",   # Better exception handling
   "W191",   # tab indentation
   "E111",   # indentation
   "E114",   # indentation
-Original file line number
+Diff line change
@@ Expand Up / @@ -846,5 +846,3 @@ def _check_duplicate_key(my_dict: dict, key: str): @@
                     only be used once on the rhs of the two-sided formula.
                     """
                 )
-            else:
-                None