Rename Regression Plot in Case of Duplicate Names (#748)

* fix #720 by introducing _model_name_plot arg * fix #720 by introducing _model_name_plot arg * add warnings * model name checks only for plots
py-econometrics · Dec 15, 2024 · 459cb55 · 459cb55
1 parent aea8123
commit 459cb55
Show file tree

Hide file tree

Showing 4 changed files with 48 additions and 6 deletions.
diff --git a/pyfixest/estimation/feols_.py b/pyfixest/estimation/feols_.py
@@ -188,7 +188,14 @@ class Feols:
     _data: pd.DataFrame
         The data frame used in the estimation. None if arguments `lean = True` or
         `store_data = False`.
-
+    _model_name: str
+        The name of the model. Usually just the formula string. If split estimation is used,
+        the model name will include the split variable and value.
+    _model_name_plot: str
+        The name of the model used when plotting and summarizing models. Usually identical to
+        `_model_name`. This might be different when pf.summary() or pf.coefplot() are called
+        and models with identical _model_name attributes are passed. In this case,
+        the _model_name_plot attribute will be modified.
     """
 
     def __init__(
@@ -213,6 +220,7 @@ def __init__(
         self._sample_split_value = sample_split_value
         self._sample_split_var = sample_split_var
         self._model_name = f"{FixestFormula.fml} (Sample: {self._sample_split_var} = {self._sample_split_value})"
+        self._model_name_plot = self._model_name
         self._method = "feols"
         self._is_iv = False
         self.FixestFormula = FixestFormula

diff --git a/pyfixest/report/summarize.py b/pyfixest/report/summarize.py
@@ -1,4 +1,6 @@
 import re
+import warnings
+from collections import Counter
 from collections.abc import ValuesView
 from typing import Optional, Union
 
@@ -563,7 +565,7 @@ def summary(models: ModelInputType, digits: int = 3) -> None:
 
 
 def _post_processing_input_checks(
-    models: ModelInputType,
+    models: ModelInputType, check_duplicate_model_names: bool = False
 ) -> list[Union[Feols, Fepois, Feiv]]:
     """
     Perform input checks for post-processing models.
@@ -573,6 +575,10 @@ def _post_processing_input_checks(
         models : Union[List[Union[Feols, Fepois, Feiv]], FixestMulti]
                 The models to be checked. This can either be a list of models
                 (Feols, Fepois, Feiv) or a single FixestMulti object.
+        check_duplicate_model_names : bool, optional
+                Whether to check for duplicate model names. Default is False.
+                Mostly used to avoid overlapping models in plots created via
+                pf.coefplot() and pf.iplot().
 
     Returns
     -------
@@ -601,6 +607,26 @@ def _post_processing_input_checks(
     else:
         raise TypeError("Invalid type for models argument.")
 
+    if check_duplicate_model_names:
+        # create model_name_plot attribute to differentiate between models with the
+        # same model_name / model formula
+        all_model_names = [model._model_name for model in models_list]
+        for model in models_list:
+            model._model_name_plot = model._model_name
+
+        counter = Counter(all_model_names)
+        duplicate_model_names = [item for item, count in counter.items() if count > 1]
+
+        for duplicate_model in duplicate_model_names:
+            duplicates = [
+                model for model in models_list if model._model_name == duplicate_model
+            ]
+            for i, model in enumerate(duplicates):
+                model._model_name_plot = f"Model {i}: {model._model_name}"
+                warnings.warn(
+                    f"The _model_name attribute {model._model_name}' is duplicated for models in the `models` you provided. To avoid overlapping model names / plots, the _model_name_plot attribute has been changed to '{model._model_name_plot}'."
+                )
+
     return models_list
 
 

diff --git a/pyfixest/report/visualize.py b/pyfixest/report/visualize.py
@@ -156,7 +156,7 @@ def iplot(
     pf.iplot([fit1], joint = "both")
     ```
     """
-    models = _post_processing_input_checks(models)
+    models = _post_processing_input_checks(models, check_duplicate_model_names=True)
     if joint not in [False, None] and len(models) > 1:
         raise ValueError(
             "The 'joint' parameter is only available for a single model, i.e. objects of type FixestMulti are not supported."
@@ -302,7 +302,7 @@ def coefplot(
 
     ```
     """
-    models = _post_processing_input_checks(models)
+    models = _post_processing_input_checks(models, check_duplicate_model_names=True)
     if joint not in [False, None] and len(models) > 1:
         raise ValueError(
             "The 'joint' parameter is only available for a single model, i.e. objects of type FixestMulti are not supported."
@@ -596,7 +596,7 @@ def _get_model_df(
         A tidy model frame.
     """
     df_model = fxst.tidy(alpha=alpha).reset_index()  # Coefficient -> simple column
-    df_model["fml"] = f"{fxst._model_name}: {(1- alpha) *100:.1f}%"
+    df_model["fml"] = f"{fxst._model_name_plot}: {(1- alpha) *100:.1f}%"
 
     if joint in ["both", True]:
         lb, ub = f"{alpha / 2*100:.1f}%", f"{(1 - alpha / 2)*100:.1f}%"
@@ -608,7 +608,9 @@ def _get_model_df(
             .drop([lb, ub], axis=1)
             .merge(df_joint, on="Coefficient", how="left")
         )
-        df_joint_full["fml"] = f"{fxst._model_name}: {(1- alpha) *100:.1f}% joint CIs"
+        df_joint_full["fml"] = (
+            f"{fxst._model_name_plot}: {(1- alpha) *100:.1f}% joint CIs"
+        )
         if joint == "both":
             df_model = pd.concat([df_model, df_joint_full], axis=0)
         else:

diff --git a/tests/test_visualize.py b/tests/test_visualize.py
@@ -1,6 +1,7 @@
 import matplotlib.pyplot as plt
 import pandas as pd
 
+import pyfixest as pf
 from pyfixest.did.visualize import (
     _plot_panelview,
     _plot_panelview_output_plot,
@@ -38,6 +39,11 @@ def test_visualize():
     fit6 = feols("Y + Y2 ~ X1 + X2 | f1", data=data)
     fit6.coefplot()
 
+    # identical models
+    fit7 = feols("Y ~ X1 + X2 | f1", data=data)
+    fit8 = feols("Y ~ X1 + X2 | f1", data=data)
+    pf.coefplot([fit7, fit8])
+
 
 def test_panelview():
     df_het = pd.read_csv("pyfixest/did/data/df_het.csv")