Remove Stargazer Dependency (#568)

* update stargazer nb, set type = df as default * updates * minor reformatting * update etable notebook * update lock file * fix test bug
py-econometrics · Jul 31, 2024 · 5e1bd10 · 5e1bd10
1 parent 3bc70e6
commit 5e1bd10
Show file tree

Hide file tree

Showing 9 changed files with 1,285 additions and 509 deletions.
diff --git a/docs/difference-in-differences.ipynb b/docs/difference-in-differences.ipynb
diff --git a/docs/stargazer.ipynb b/docs/stargazer.ipynb
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyfixest/__init__.py b/pyfixest/__init__.py
@@ -14,7 +14,7 @@
     fepois,
     rwolf,
 )
-from pyfixest.report import Stargazer, coefplot, etable, iplot, summary
+from pyfixest.report import coefplot, etable, iplot, summary
 from pyfixest.utils import (
     get_data,
     get_ssc,

diff --git a/pyfixest/estimation/fepois_.py b/pyfixest/estimation/fepois_.py
@@ -312,6 +312,7 @@ def predict(
             See https://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.linalg.lsqr.html
 
 
+
         Returns
         -------
         np.ndarray

diff --git a/pyfixest/report/__init__.py b/pyfixest/report/__init__.py
@@ -1,4 +1,4 @@
-from pyfixest.report.summarize import Stargazer, etable, summary
+from pyfixest.report.summarize import etable, summary
 from pyfixest.report.visualize import (
     coefplot,
     iplot,
@@ -9,5 +9,4 @@
     "etable",
     "iplot",
     "coefplot",
-    "Stargazer",
 ]
diff --git a/pyfixest/report/summarize.py b/pyfixest/report/summarize.py
@@ -3,8 +3,6 @@
 
 import numpy as np
 import pandas as pd
-from stargazer.stargazer import LineLocation
-from stargazer.stargazer import Stargazer as BaseStargazer
 from tabulate import tabulate
 
 from pyfixest.estimation.feiv_ import Feiv
@@ -14,67 +12,9 @@
 from pyfixest.utils.dev_utils import _select_order_coefs
 
 
-class Stargazer(BaseStargazer):
-    """
-    A wrapper around the Stargazer class from the stargazer package.
-    Adds fixed effects to the regression table. For details,
-    see the Stargazer documentation:
-    https://github.com/StatsReporting/stargazer.
-
-    Parameters
-    ----------
-    models : list
-        A list of regression model objects to be included in the table.
-    """
-
-    def __init__(self, models):
-        """
-        Initialize the Stargazer object with a list of models.
-
-        Parameters
-        ----------
-        models : list
-            A list of regression model objects to be included in the table.
-        """
-        super().__init__(models)
-
-        # temporarily add the _fixef attribute to the models if not present
-        for x in self.models:
-            if not hasattr(x, "_fixef"):
-                x._fixef = None
-
-        if any([x._fixef is not None for x in self.models]):
-            self.add_fixef()
-
-        # delete the _fixef attribute from the models if
-        # not of type Feols, Feiv, Fepois
-        for x in self.models:
-            if (
-                not isinstance(x, Feols)
-                or not isinstance(x, Fepois)
-                or not isinstance(x, Feiv)
-            ):
-                del x._fixef
-
-    def add_fixef(self):
-        """
-        Add information on fixed effects to the regression table.
-
-        This method deparses the fixed effects contained in
-        Feols._fixef and attaches it to the Stargazer
-        regression table.
-        """
-        deparsed_fixef_lists = _deparse_fixef_for_stargazer(
-            [x._fixef for x in self.models]
-        )
-
-        for _, key in enumerate(deparsed_fixef_lists):
-            self.add_line(key, deparsed_fixef_lists[key], LineLocation.FOOTER_TOP)
-
-
 def etable(
     models: Union[list[Union[Feols, Fepois, Feiv]], FixestMulti],
-    type: str = "md",
+    type: str = "df",
     signif_code: list = [0.001, 0.01, 0.05],
     coef_fmt: str = "b (se)",
     custom_stats: Optional[dict] = None,
@@ -135,8 +75,8 @@ def etable(
         A DataFrame with the coefficients and standard errors of the models.
     """  # noqa: D301
     assert (
-        signif_code is None or len(signif_code) == 3
-    ), "signif_code must be a list of length 3 or None"
+        isinstance([0.1, 0.2, 0.3], list) and len(signif_code) == 3
+    ), "signif_code must be a list of length 3"
     if signif_code:
         assert all(
             [0 < i < 1 for i in signif_code]
@@ -315,19 +255,28 @@ def etable(
     res_all = pd.concat([depvars, res, nobs_fixef_df], ignore_index=True)
     res_all.columns = pd.Index([""] + list(res_all.columns[1:]))
 
-    if type == "tex":
-        return res_all.to_latex()
-    elif type == "md":
-        res_all = _tabulate_etable(res_all, len(models), n_fixef)
+    caption = (
+        f"Significance levels: * p < {signif_code[2]}, ** p < {signif_code[1]}, *** p < {signif_code[0]}. "
+        + f"Format of coefficient cell:\n{coef_fmt_title}"
+    )
+
+    if type == "md":
+        res_all = _tabulate_etable_md(res_all, len(models), n_fixef)
         print(res_all)
         if signif_code:
             print(
                 f"Significance levels: * p < {signif_code[2]}, ** p < {signif_code[1]}, *** p < {signif_code[0]}"
             )
-        print(f"Format of coefficient cell:\n{coef_fmt_title}")
+            print(f"Format of coefficient cell:\n{coef_fmt_title}")
         return None
+    elif type in ["df", "tex"]:
+        res_all = _tabulate_etable_df(res_all, n_fixef, caption)
+        if type == "df":
+            return res_all
+        else:
+            return res_all.to_latex()
     else:
-        return res_all
+        raise ValueError("type must be either 'df', 'md' or 'tex'")
 
 
 def summary(
@@ -463,7 +412,57 @@ def _post_processing_input_checks(
     return models
 
 
-def _tabulate_etable(df, n_models, n_fixef):
+def _tabulate_etable_df(df, n_fixef, caption):
+    k, _ = df.shape
+    n_coef = k - 3 - 2 - n_fixef
+
+    line1 = 2 + n_coef
+    line2 = line1 + n_fixef
+    line3 = k
+
+    styler = (
+        df.style.set_properties(**{"text-align": "right"})
+        .set_table_styles(
+            [
+                # {'selector': 'thead th', 'props': 'border-bottom: 2px solid black; text-align: center;'},  # Header row
+                {
+                    "selector": "tbody tr:nth-child(0) td",
+                    "props": "background-color: #f0f0f0",
+                },  # First row
+                {
+                    "selector": "tbody tr:nth-child(1) td",
+                    "props": "border-bottom: 2px solid black",
+                },  # Line below row 1 (index 1)
+                {
+                    "selector": f"tbody tr:nth-child({line1}) td",
+                    "props": "border-bottom: 1px solid black;",
+                },  # Line below fixef_bar row
+                {
+                    "selector": f"tbody tr:nth-child({line2}) td",
+                    "props": "border-bottom: 1px solid black;",
+                },  # Line below fixef_bar row
+                {
+                    "selector": f"tbody tr:nth-child({line3}) td",
+                    "props": "border-bottom: 1px solid black;",
+                },  # Line below fixef_bar row
+                {
+                    "selector": "tbody td",
+                    "props": "background-color: #ffffff;",
+                },  # Background color for all cells
+                {
+                    "selector": "tbody tr td:first-child",
+                    "props": "background-color: #f0f0f0; font-weight: bold;text-align: left;",
+                },  # Set first column to grey and bold
+            ]
+        )
+        .hide(axis="index")
+        .set_caption(caption)
+    )
+
+    return styler
+
+
+def _tabulate_etable_md(df, n_models, n_fixef):
     """
     Format and tabulate a DataFrame.
 
@@ -479,7 +478,10 @@ def _tabulate_etable(df, n_models, n_fixef):
     """
     # Format the DataFrame for tabulate
     table = tabulate(
-        df, headers="keys", showindex=False, colalign=["left"] + n_models * ["right"]
+        df,
+        headers="keys",
+        showindex=False,
+        colalign=["left"] + n_models * ["right"],
     )
 
     # Split the table into header and body
@@ -599,53 +601,3 @@ def _number_formatter(x: float, **kwargs) -> str:
     _int, _float = str(x_str).split(".")
     _float = _float.ljust(digits, "0")
     return _int if digits == 0 else f"{_int}.{_float}"
-
-
-def _deparse_fixef_for_stargazer(fixef_list: list[str]) -> dict[str, list[str]]:
-    """
-    Deparse Feols._fixef to a dict of lists for easy use with Stargazer
-    to add fixed effects to the regression table.
-
-    Parameters
-    ----------
-    fixef_list : list
-        List of fixed effects from Feols._fixef.
-
-    Returns
-    -------
-    dict
-        Dictionary of lists, where each list contains the fixed
-        effects for a given variable.
-
-    Example
-    -------
-        # basic example
-        fixef_list = ['f1', 'f2', 'f1+f2', 'f1', 'f2', 'f1+f2']
-        deparse_fixef_for_stargazer(fixef_list)
-        # Output
-        {'f1': ['f1', '-', 'f1', 'f1', '-', 'f1'],
-        'f2': ['-', 'f2', 'f2', '-', 'f2', 'f2']
-        }
-    """
-
-    def identify_variables(lst):
-        variables = set()
-        for item in lst:
-            if item:
-                parts = item.split("+")
-                for part in parts:
-                    variables.add(part)
-        return list(variables)
-
-    unique_variables = identify_variables(fixef_list)
-
-    variable_lists: dict[str, list[str]] = {var: [] for var in unique_variables}
-
-    for item in fixef_list:
-        for var in unique_variables:
-            if item and var in item:
-                variable_lists[var].append("x")
-            else:
-                variable_lists[var].append("-")
-
-    return variable_lists
diff --git a/pyproject.toml b/pyproject.toml
@@ -33,8 +33,6 @@ wildboottest = ">=0.2.0"
 pre-commit = "^3.6.0"
 doubleml = "^0.7.1"
 marginaleffects = "^0.0.10"
-stargazer = ">=0.0.7"
-statsmodels = "^0.14.2"
 
 [tool.poetry.group.docs.dependencies]
 quartodoc = ">=0.7.2"
@@ -95,6 +93,7 @@ ignore = [
   "SIM110", # Use all instead of `for` loop
   "TRY003", # Avoid specifying long messages outside the exception class
   "D205",   # 1 blank line required between summary line and description
+  "W505",   # Doc line too long
 ]
 
 [tool.ruff.lint.per-file-ignores]

diff --git a/tests/test_summarise.py b/tests/test_summarise.py
@@ -43,7 +43,7 @@ def test_summary():
 
     # Test significance code
     etable([fit1, fit2], signif_code=[0.01, 0.05, 0.1])
-    etable([fit1, fit2], signif_code=None)
+    etable([fit1, fit2], signif_code=[0.02, 0.06, 0.1])
 
     # Test coefficient format
     etable([fit1, fit2], coef_fmt="b (se)\nt [p]")
-Original file line number
+Diff line change
@@ Expand Up / @@ -312,6 +312,7 @@ def predict( @@
                 See https://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.linalg.lsqr.html
             Returns
             -------
             np.ndarray
@@ Expand Down @@