Skip to content

Commit

Permalink
Remove Stargazer Dependency (#568)
Browse files Browse the repository at this point in the history
* update stargazer nb, set type = df as default

* updates

* minor reformatting

* update etable notebook

* update lock file

* fix test bug
  • Loading branch information
s3alfisc authored Jul 31, 2024
1 parent 3bc70e6 commit 5e1bd10
Show file tree
Hide file tree
Showing 9 changed files with 1,285 additions and 509 deletions.
90 changes: 43 additions & 47 deletions docs/difference-in-differences.ipynb

Large diffs are not rendered by default.

1,070 changes: 953 additions & 117 deletions docs/stargazer.ipynb

Large diffs are not rendered by default.

429 changes: 211 additions & 218 deletions poetry.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion pyfixest/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
fepois,
rwolf,
)
from pyfixest.report import Stargazer, coefplot, etable, iplot, summary
from pyfixest.report import coefplot, etable, iplot, summary
from pyfixest.utils import (
get_data,
get_ssc,
Expand Down
1 change: 1 addition & 0 deletions pyfixest/estimation/fepois_.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,7 @@ def predict(
See https://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.linalg.lsqr.html
Returns
-------
np.ndarray
Expand Down
3 changes: 1 addition & 2 deletions pyfixest/report/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from pyfixest.report.summarize import Stargazer, etable, summary
from pyfixest.report.summarize import etable, summary
from pyfixest.report.visualize import (
coefplot,
iplot,
Expand All @@ -9,5 +9,4 @@
"etable",
"iplot",
"coefplot",
"Stargazer",
]
194 changes: 73 additions & 121 deletions pyfixest/report/summarize.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@

import numpy as np
import pandas as pd
from stargazer.stargazer import LineLocation
from stargazer.stargazer import Stargazer as BaseStargazer
from tabulate import tabulate

from pyfixest.estimation.feiv_ import Feiv
Expand All @@ -14,67 +12,9 @@
from pyfixest.utils.dev_utils import _select_order_coefs


class Stargazer(BaseStargazer):
"""
A wrapper around the Stargazer class from the stargazer package.
Adds fixed effects to the regression table. For details,
see the Stargazer documentation:
https://github.com/StatsReporting/stargazer.
Parameters
----------
models : list
A list of regression model objects to be included in the table.
"""

def __init__(self, models):
"""
Initialize the Stargazer object with a list of models.
Parameters
----------
models : list
A list of regression model objects to be included in the table.
"""
super().__init__(models)

# temporarily add the _fixef attribute to the models if not present
for x in self.models:
if not hasattr(x, "_fixef"):
x._fixef = None

if any([x._fixef is not None for x in self.models]):
self.add_fixef()

# delete the _fixef attribute from the models if
# not of type Feols, Feiv, Fepois
for x in self.models:
if (
not isinstance(x, Feols)
or not isinstance(x, Fepois)
or not isinstance(x, Feiv)
):
del x._fixef

def add_fixef(self):
"""
Add information on fixed effects to the regression table.
This method deparses the fixed effects contained in
Feols._fixef and attaches it to the Stargazer
regression table.
"""
deparsed_fixef_lists = _deparse_fixef_for_stargazer(
[x._fixef for x in self.models]
)

for _, key in enumerate(deparsed_fixef_lists):
self.add_line(key, deparsed_fixef_lists[key], LineLocation.FOOTER_TOP)


def etable(
models: Union[list[Union[Feols, Fepois, Feiv]], FixestMulti],
type: str = "md",
type: str = "df",
signif_code: list = [0.001, 0.01, 0.05],
coef_fmt: str = "b (se)",
custom_stats: Optional[dict] = None,
Expand Down Expand Up @@ -135,8 +75,8 @@ def etable(
A DataFrame with the coefficients and standard errors of the models.
""" # noqa: D301
assert (
signif_code is None or len(signif_code) == 3
), "signif_code must be a list of length 3 or None"
isinstance([0.1, 0.2, 0.3], list) and len(signif_code) == 3
), "signif_code must be a list of length 3"
if signif_code:
assert all(
[0 < i < 1 for i in signif_code]
Expand Down Expand Up @@ -315,19 +255,28 @@ def etable(
res_all = pd.concat([depvars, res, nobs_fixef_df], ignore_index=True)
res_all.columns = pd.Index([""] + list(res_all.columns[1:]))

if type == "tex":
return res_all.to_latex()
elif type == "md":
res_all = _tabulate_etable(res_all, len(models), n_fixef)
caption = (
f"Significance levels: * p < {signif_code[2]}, ** p < {signif_code[1]}, *** p < {signif_code[0]}. "
+ f"Format of coefficient cell:\n{coef_fmt_title}"
)

if type == "md":
res_all = _tabulate_etable_md(res_all, len(models), n_fixef)
print(res_all)
if signif_code:
print(
f"Significance levels: * p < {signif_code[2]}, ** p < {signif_code[1]}, *** p < {signif_code[0]}"
)
print(f"Format of coefficient cell:\n{coef_fmt_title}")
print(f"Format of coefficient cell:\n{coef_fmt_title}")
return None
elif type in ["df", "tex"]:
res_all = _tabulate_etable_df(res_all, n_fixef, caption)
if type == "df":
return res_all
else:
return res_all.to_latex()
else:
return res_all
raise ValueError("type must be either 'df', 'md' or 'tex'")


def summary(
Expand Down Expand Up @@ -463,7 +412,57 @@ def _post_processing_input_checks(
return models


def _tabulate_etable(df, n_models, n_fixef):
def _tabulate_etable_df(df, n_fixef, caption):
k, _ = df.shape
n_coef = k - 3 - 2 - n_fixef

line1 = 2 + n_coef
line2 = line1 + n_fixef
line3 = k

styler = (
df.style.set_properties(**{"text-align": "right"})
.set_table_styles(
[
# {'selector': 'thead th', 'props': 'border-bottom: 2px solid black; text-align: center;'}, # Header row
{
"selector": "tbody tr:nth-child(0) td",
"props": "background-color: #f0f0f0",
}, # First row
{
"selector": "tbody tr:nth-child(1) td",
"props": "border-bottom: 2px solid black",
}, # Line below row 1 (index 1)
{
"selector": f"tbody tr:nth-child({line1}) td",
"props": "border-bottom: 1px solid black;",
}, # Line below fixef_bar row
{
"selector": f"tbody tr:nth-child({line2}) td",
"props": "border-bottom: 1px solid black;",
}, # Line below fixef_bar row
{
"selector": f"tbody tr:nth-child({line3}) td",
"props": "border-bottom: 1px solid black;",
}, # Line below fixef_bar row
{
"selector": "tbody td",
"props": "background-color: #ffffff;",
}, # Background color for all cells
{
"selector": "tbody tr td:first-child",
"props": "background-color: #f0f0f0; font-weight: bold;text-align: left;",
}, # Set first column to grey and bold
]
)
.hide(axis="index")
.set_caption(caption)
)

return styler


def _tabulate_etable_md(df, n_models, n_fixef):
"""
Format and tabulate a DataFrame.
Expand All @@ -479,7 +478,10 @@ def _tabulate_etable(df, n_models, n_fixef):
"""
# Format the DataFrame for tabulate
table = tabulate(
df, headers="keys", showindex=False, colalign=["left"] + n_models * ["right"]
df,
headers="keys",
showindex=False,
colalign=["left"] + n_models * ["right"],
)

# Split the table into header and body
Expand Down Expand Up @@ -599,53 +601,3 @@ def _number_formatter(x: float, **kwargs) -> str:
_int, _float = str(x_str).split(".")
_float = _float.ljust(digits, "0")
return _int if digits == 0 else f"{_int}.{_float}"


def _deparse_fixef_for_stargazer(fixef_list: list[str]) -> dict[str, list[str]]:
"""
Deparse Feols._fixef to a dict of lists for easy use with Stargazer
to add fixed effects to the regression table.
Parameters
----------
fixef_list : list
List of fixed effects from Feols._fixef.
Returns
-------
dict
Dictionary of lists, where each list contains the fixed
effects for a given variable.
Example
-------
# basic example
fixef_list = ['f1', 'f2', 'f1+f2', 'f1', 'f2', 'f1+f2']
deparse_fixef_for_stargazer(fixef_list)
# Output
{'f1': ['f1', '-', 'f1', 'f1', '-', 'f1'],
'f2': ['-', 'f2', 'f2', '-', 'f2', 'f2']
}
"""

def identify_variables(lst):
variables = set()
for item in lst:
if item:
parts = item.split("+")
for part in parts:
variables.add(part)
return list(variables)

unique_variables = identify_variables(fixef_list)

variable_lists: dict[str, list[str]] = {var: [] for var in unique_variables}

for item in fixef_list:
for var in unique_variables:
if item and var in item:
variable_lists[var].append("x")
else:
variable_lists[var].append("-")

return variable_lists
3 changes: 1 addition & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,6 @@ wildboottest = ">=0.2.0"
pre-commit = "^3.6.0"
doubleml = "^0.7.1"
marginaleffects = "^0.0.10"
stargazer = ">=0.0.7"
statsmodels = "^0.14.2"

[tool.poetry.group.docs.dependencies]
quartodoc = ">=0.7.2"
Expand Down Expand Up @@ -95,6 +93,7 @@ ignore = [
"SIM110", # Use all instead of `for` loop
"TRY003", # Avoid specifying long messages outside the exception class
"D205", # 1 blank line required between summary line and description
"W505", # Doc line too long
]

[tool.ruff.lint.per-file-ignores]
Expand Down
2 changes: 1 addition & 1 deletion tests/test_summarise.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def test_summary():

# Test significance code
etable([fit1, fit2], signif_code=[0.01, 0.05, 0.1])
etable([fit1, fit2], signif_code=None)
etable([fit1, fit2], signif_code=[0.02, 0.06, 0.1])

# Test coefficient format
etable([fit1, fit2], coef_fmt="b (se)\nt [p]")
Expand Down

0 comments on commit 5e1bd10

Please sign in to comment.