Skip to content

Commit

Permalink
Rename Regression Plot in Case of Duplicate Names (#748)
Browse files Browse the repository at this point in the history
* fix #720 by introducing _model_name_plot arg

* fix #720 by introducing _model_name_plot arg

* add warnings

* model name checks only for plots
  • Loading branch information
s3alfisc authored Dec 15, 2024
1 parent aea8123 commit 459cb55
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 6 deletions.
10 changes: 9 additions & 1 deletion pyfixest/estimation/feols_.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,14 @@ class Feols:
_data: pd.DataFrame
The data frame used in the estimation. None if arguments `lean = True` or
`store_data = False`.
_model_name: str
The name of the model. Usually just the formula string. If split estimation is used,
the model name will include the split variable and value.
_model_name_plot: str
The name of the model used when plotting and summarizing models. Usually identical to
`_model_name`. This might be different when pf.summary() or pf.coefplot() are called
and models with identical _model_name attributes are passed. In this case,
the _model_name_plot attribute will be modified.
"""

def __init__(
Expand All @@ -213,6 +220,7 @@ def __init__(
self._sample_split_value = sample_split_value
self._sample_split_var = sample_split_var
self._model_name = f"{FixestFormula.fml} (Sample: {self._sample_split_var} = {self._sample_split_value})"
self._model_name_plot = self._model_name
self._method = "feols"
self._is_iv = False
self.FixestFormula = FixestFormula
Expand Down
28 changes: 27 additions & 1 deletion pyfixest/report/summarize.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import re
import warnings
from collections import Counter
from collections.abc import ValuesView
from typing import Optional, Union

Expand Down Expand Up @@ -563,7 +565,7 @@ def summary(models: ModelInputType, digits: int = 3) -> None:


def _post_processing_input_checks(
models: ModelInputType,
models: ModelInputType, check_duplicate_model_names: bool = False
) -> list[Union[Feols, Fepois, Feiv]]:
"""
Perform input checks for post-processing models.
Expand All @@ -573,6 +575,10 @@ def _post_processing_input_checks(
models : Union[List[Union[Feols, Fepois, Feiv]], FixestMulti]
The models to be checked. This can either be a list of models
(Feols, Fepois, Feiv) or a single FixestMulti object.
check_duplicate_model_names : bool, optional
Whether to check for duplicate model names. Default is False.
Mostly used to avoid overlapping models in plots created via
pf.coefplot() and pf.iplot().
Returns
-------
Expand Down Expand Up @@ -601,6 +607,26 @@ def _post_processing_input_checks(
else:
raise TypeError("Invalid type for models argument.")

if check_duplicate_model_names:
# create model_name_plot attribute to differentiate between models with the
# same model_name / model formula
all_model_names = [model._model_name for model in models_list]
for model in models_list:
model._model_name_plot = model._model_name

counter = Counter(all_model_names)
duplicate_model_names = [item for item, count in counter.items() if count > 1]

for duplicate_model in duplicate_model_names:
duplicates = [
model for model in models_list if model._model_name == duplicate_model
]
for i, model in enumerate(duplicates):
model._model_name_plot = f"Model {i}: {model._model_name}"
warnings.warn(
f"The _model_name attribute {model._model_name}' is duplicated for models in the `models` you provided. To avoid overlapping model names / plots, the _model_name_plot attribute has been changed to '{model._model_name_plot}'."
)

return models_list


Expand Down
10 changes: 6 additions & 4 deletions pyfixest/report/visualize.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ def iplot(
pf.iplot([fit1], joint = "both")
```
"""
models = _post_processing_input_checks(models)
models = _post_processing_input_checks(models, check_duplicate_model_names=True)
if joint not in [False, None] and len(models) > 1:
raise ValueError(
"The 'joint' parameter is only available for a single model, i.e. objects of type FixestMulti are not supported."
Expand Down Expand Up @@ -302,7 +302,7 @@ def coefplot(
```
"""
models = _post_processing_input_checks(models)
models = _post_processing_input_checks(models, check_duplicate_model_names=True)
if joint not in [False, None] and len(models) > 1:
raise ValueError(
"The 'joint' parameter is only available for a single model, i.e. objects of type FixestMulti are not supported."
Expand Down Expand Up @@ -596,7 +596,7 @@ def _get_model_df(
A tidy model frame.
"""
df_model = fxst.tidy(alpha=alpha).reset_index() # Coefficient -> simple column
df_model["fml"] = f"{fxst._model_name}: {(1- alpha) *100:.1f}%"
df_model["fml"] = f"{fxst._model_name_plot}: {(1- alpha) *100:.1f}%"

if joint in ["both", True]:
lb, ub = f"{alpha / 2*100:.1f}%", f"{(1 - alpha / 2)*100:.1f}%"
Expand All @@ -608,7 +608,9 @@ def _get_model_df(
.drop([lb, ub], axis=1)
.merge(df_joint, on="Coefficient", how="left")
)
df_joint_full["fml"] = f"{fxst._model_name}: {(1- alpha) *100:.1f}% joint CIs"
df_joint_full["fml"] = (
f"{fxst._model_name_plot}: {(1- alpha) *100:.1f}% joint CIs"
)
if joint == "both":
df_model = pd.concat([df_model, df_joint_full], axis=0)
else:
Expand Down
6 changes: 6 additions & 0 deletions tests/test_visualize.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import matplotlib.pyplot as plt
import pandas as pd

import pyfixest as pf
from pyfixest.did.visualize import (
_plot_panelview,
_plot_panelview_output_plot,
Expand Down Expand Up @@ -38,6 +39,11 @@ def test_visualize():
fit6 = feols("Y + Y2 ~ X1 + X2 | f1", data=data)
fit6.coefplot()

# identical models
fit7 = feols("Y ~ X1 + X2 | f1", data=data)
fit8 = feols("Y ~ X1 + X2 | f1", data=data)
pf.coefplot([fit7, fit8])


def test_panelview():
df_het = pd.read_csv("pyfixest/did/data/df_het.csv")
Expand Down

0 comments on commit 459cb55

Please sign in to comment.