import pandas as pd
import rpy2.robjects as ro
from rpy2.robjects import pandas2ri
@@ -248,7 +248,7 @@ Setup
= pf.get_data(model="Feols", N=10_000, seed=99292) data
Setup
Ordinary Least
IID Inference
First, we estimate a model via `pyfixest. We compute “iid” standard errors.
-
+
= pf.feols(fml="Y ~ X1 + X2 | f1 + f2", data=data, vcov="iid") fit
We estimate the same model with weights:
-
+
= pf.feols(
fit_weights ="Y ~ X1 + X2 | f1 + f2", data=data, weights="weights", vcov="iid"
fml )
Via r-fixest
and rpy2
, we get
-
+
= fixest.feols(
r_fit "Y ~ X1 + X2 | f1 + f2"),
ro.Formula(=data,
@@ -357,7 +357,7 @@ dataIID Inference
R[write to console]: NOTE: 3 observations removed because of NA values (LHS: 1, RHS: 1, Fixed-effects: 1).
Let’s compare how close the covariance matrices are:
-
+
= fit._vcov
fit_vcov = stats.vcov(r_fit)
r_vcov - r_vcov fit_vcov
@@ -367,7 +367,7 @@ IID Inference
And for WLS:
-
+
- stats.vcov(r_fit_weights) fit_weights._vcov
array([[ 1.68051337e-18, -2.11758237e-21],
@@ -375,7 +375,7 @@ IID Inference
We conclude by comparing all estimation results via the tidy
methods:
-
+
fit.tidy()
@@ -427,7 +427,7 @@ IID Inference
-
+
pd.DataFrame(broom.tidy_fixest(r_fit)).T
@@ -467,7 +467,7 @@ IID Inference
-
+
fit_weights.tidy()
@@ -519,7 +519,7 @@ IID Inference
-
+
pd.DataFrame(broom.tidy_fixest(r_fit_weights)).T
@@ -563,13 +563,13 @@ IID Inference
Heteroskedastic Errors
We repeat the same exercise with heteroskedastic (HC1) errors:
-
+
= pf.feols(fml="Y ~ X1 + X2 | f1 + f2", data=data, vcov="hetero")
fit = pf.feols(
fit_weights ="Y ~ X1 + X2 | f1 + f2", data=data, vcov="hetero", weights="weights"
fml )
-
+
= fixest.feols(
r_fit "Y ~ X1 + X2 | f1 + f2"),
ro.Formula(=data,
@@ -592,14 +592,14 @@ dataHeteroskedastic Err
As before, we compare the variance covariance matrices:
-
+
- stats.vcov(r_fit) fit._vcov
array([[-1.61762964e-16, -2.13305660e-17],
[-2.13306190e-17, -5.39492225e-17]])
-
+
- stats.vcov(r_fit_weights) fit_weights._vcov
array([[-2.05022631e-16, -9.53695571e-18],
@@ -607,7 +607,7 @@ Heteroskedastic Err
We conclude by comparing all estimation results via the tidy
methods:
-
+
fit.tidy()
@@ -659,7 +659,7 @@ Heteroskedastic Err
-
+
pd.DataFrame(broom.tidy_fixest(r_fit)).T
@@ -699,7 +699,7 @@ Heteroskedastic Err
-
+
fit_weights.tidy()
@@ -751,7 +751,7 @@ Heteroskedastic Err
-
+
pd.DataFrame(broom.tidy_fixest(r_fit_weights)).T
@@ -795,7 +795,7 @@ Heteroskedastic Err
Cluster-Robust Errors
We conclude with cluster robust errors.
-
+
= pf.feols(fml="Y ~ X1 + X2 | f1 + f2", data=data, vcov={"CRV1": "f1"})
fit = pf.feols(
fit_weights ="Y ~ X1 + X2 | f1 + f2", data=data, vcov={"CRV1": "f1"}, weights="weights"
@@ -821,14 +821,14 @@ fmlCluster-Robust Error
-
+
- stats.vcov(r_fit) fit._vcov
array([[ 4.20670443e-16, -6.97565513e-17],
[-6.97565513e-17, -1.42166010e-17]])
-
+
- stats.vcov(r_fit_weights) fit_weights._vcov
array([[2.59070109e-16, 4.07324592e-16],
@@ -836,7 +836,7 @@ Cluster-Robust Error
We conclude by comparing all estimation results via the tidy
methods:
-
+
fit.tidy()
@@ -888,7 +888,7 @@ Cluster-Robust Error
-
+
pd.DataFrame(broom.tidy_fixest(r_fit)).T
@@ -928,7 +928,7 @@ Cluster-Robust Error
-
+
fit_weights.tidy()
@@ -980,7 +980,7 @@ Cluster-Robust Error
-
+
pd.DataFrame(broom.tidy_fixest(r_fit_weights)).T
@@ -1024,10 +1024,10 @@ Cluster-Robust Error
Poisson Regression
-
+
= pf.get_data(model="Fepois") data
-
+
= pf.fepois(fml="Y ~ X1 + X2 | f1 + f2", data=data, vcov="iid", iwls_tol=1e-10)
fit_iid = pf.fepois(
fit_hetero ="Y ~ X1 + X2 | f1 + f2", data=data, vcov="hetero", iwls_tol=1e-10
@@ -1065,21 +1065,21 @@ fmlPoisson Regression
-
+
- stats.vcov(fit_r_iid) fit_iid._vcov
array([[ 1.20791284e-08, -6.55604931e-10],
[-6.55604931e-10, 1.69958097e-09]])
-
+
- stats.vcov(fit_r_hetero) fit_hetero._vcov
array([[ 2.18101847e-08, -7.38711972e-10],
[-7.38711972e-10, 3.07587753e-09]])
-
+
- stats.vcov(fit_r_crv) fit_crv._vcov
array([[ 1.58300904e-08, -1.20806815e-10],
@@ -1087,7 +1087,7 @@ Poisson Regression
We conclude by comparing all estimation results via the tidy
methods:
-
+
fit_iid.tidy()
@@ -1139,7 +1139,7 @@ Poisson Regression
-
+
pd.DataFrame(broom.tidy_fixest(fit_r_iid)).T
@@ -1179,7 +1179,7 @@ Poisson Regression
-
+
fit_hetero.tidy()
@@ -1231,7 +1231,7 @@ Poisson Regression
-
+
pd.DataFrame(broom.tidy_fixest(fit_r_hetero)).T
@@ -1271,7 +1271,7 @@ Poisson Regression
-
+
fit_crv.tidy()
@@ -1323,7 +1323,7 @@ Poisson Regression
-
+
pd.DataFrame(broom.tidy_fixest(fit_r_crv)).T
diff --git a/difference-in-differences.html b/difference-in-differences.html
index 39e70f54..6e6d95c5 100644
--- a/difference-in-differences.html
+++ b/difference-in-differences.html
@@ -257,7 +257,7 @@ Difference-in-Differences Estimation
See also NBER SI methods lectures on Linear Panel Event Studies.
Setup
-
+
from importlib import resources
import pandas as pd
@@ -272,7 +272,7 @@ Setup
%autoreload 2
-
+
@@ -306,7 +306,7 @@ Setup
-
+
-pyfixest: 0.25.3
-pandas : 2.2.3
+pandas : 2.2.3
+pyfixest: 0.25.3
-
+
# one-shot adoption data - parallel trends is true
= get_sharkfin()
df_one_cohort df_one_cohort.head()
@@ -410,7 +410,7 @@ Setup
-
+
# multi-cohort adoption data
= pd.read_csv(
df_multi_cohort "pyfixest.did.data").joinpath("df_het.csv")
@@ -536,7 +536,7 @@ resources.files(Setup
Examining Treatment Timing
Before any DiD estimation, we need to examine the treatment timing, since it is crucial to our choice of estimator.
-
+
pf.panelview(
df_one_cohort,="unit",
@@ -557,7 +557,7 @@ unitExamining Treat
-
+
pf.panelview(
df_multi_cohort,="unit",
@@ -580,7 +580,7 @@ unitExamining Treat
We immediately see that we have staggered adoption of treatment in the second case, which implies that a naive application of 2WFE might yield biased estimates under substantial effect heterogeneity.
We can also plot treatment assignment in a disaggregated fashion, which gives us a sense of cohort sizes.
-
+
pf.panelview(
df_multi_cohort,="unit",
@@ -604,7 +604,7 @@ unitExamining Treat
Inspecting the Outcome Variable
pf.panelview()
further allows us to inspect the “outcome” variable over time:
-
+
pf.panelview(
df_multi_cohort,="dep_var",
@@ -625,7 +625,7 @@ outcomeInspecting
We immediately see that the first cohort is switched into treatment in 2000, while the second cohort is switched into treatment by 2010. Before each cohort is switched into treatment, the trends are parallel.
We can additionally inspect individual units by dropping the collapse_to_cohort argument. Because we have a large sample, we might want to inspect only a subset of units.
-
+
pf.panelview(
df_multi_cohort,="dep_var",
@@ -647,7 +647,7 @@ outcomeInspecting
One-shot adoption: Static and Dynamic Specifications
After taking a first look at the data, let’s turn to estimation. We return to the df_one_cohort
data set (without staggered treatment rollout).
-
+
= pf.feols(
fit_static_twfe "Y ~ treat | unit + year",
@@ -670,14 +670,14 @@ df_one_cohort,
+
= pf.feols(
fit_dynamic_twfe "Y ~ i(year, ever_treated, ref = 14) | unit + year",
df_one_cohort,={"CRV1": "unit"},
vcov )
-
+
fit_dynamic_twfe.iplot(=False,
coord_flip="Event Study",
@@ -687,7 +687,7 @@ title=rename_event_study_coefs(fit_dynamic_twfe._coefnames),
)
labels
-
+
-
+
fit_lpdid.iplot(=False,
coord_flip="Local-Projections-Estimator",
@@ -1166,7 +1166,7 @@ titleLocal Project
=18.5,
xintercept ).show()
-
+
@@ -297,7 +297,7 @@ Marginal Effects and Hypothesis Tests via marginaleffect
-
+
@@ -390,7 +390,7 @@ Marginal Effects and Hypothesis Tests via marginaleffect
Suppose we were interested in testing the hypothesis that \(X_{1} = X_{2}\). Given the relatively large differences in coefficients and small standard errors, we will likely reject the null that the two parameters are equal.
We can run the formal test via the hypotheses
function from the marginaleffects
package.
-
+
"X1 - X2 = 0") hypotheses(fit,
@@ -546,7 +546,7 @@ PyFixest 0.18.0
Additionally, model_matrix_fixest
now returns a dictionary instead of a tuple.
Brings back fixed effects reference setting via i(var1, var2, ref)
syntax. Deprecates the i_ref1
, i_ref2
function arguments. I.e. it is again possible to e.g. run
-
+
import pyfixest as pf
= pf.get_data()
data
@@ -554,7 +554,7 @@ PyFixest 0.18.0
0:8] fit1.coef()[
Via the ref
syntax, via can set the reference level:
-
+
= pf.feols("Y ~ i(f1, X2, ref = 1)", data=data)
fit2 0:8] fit2.coef()[
@@ -563,7 +563,7 @@ PyFixest 0.18.0
PyFixest 0.17.0
Restructures the codebase and reorganizes how users can interact with the pyfixest
API. It is now recommended to use pyfixest
in the following way:
-
+
import numpy as np
import pyfixest as pf
= pf.get_data()
@@ -631,7 +631,7 @@ data PyFixest 0.17.0
The update should not inroduce any breaking changes. Thanks to @Wenzhi-Ding for the PR!
Adds support for simultaneous confidence intervals via a multiplier bootstrap. Thanks to @apoorvalal for the contribution!
-
+
= True) fit.confint(joint
@@ -648,18 +648,18 @@ PyFixest 0.17.0
Intercept
-0.380105
-1.177593
+0.375929
+1.181769
D
--1.759120
--1.046114
+-1.762853
+-1.042381
f1
--0.014097
-0.023645
+-0.014294
+0.023843
@@ -668,7 +668,7 @@ PyFixest 0.17.0
Adds support for the causal cluster variance estimator by Abadie et al. (QJE, 2023) for OLS via the .ccv()
method.
-
+
= "D", cluster = "group_id") fit.ccv(treatment
/home/runner/work/pyfixest/pyfixest/pyfixest/estimation/feols_.py:1384: UserWarning: The initial model was not clustered. CRV1 inference is computed and stored in the model object.
@@ -694,11 +694,11 @@ PyFixest 0.17.0
CCV
-1.4026168622179929
-0.28043
--5.001663
-0.000093
--1.991779
--0.813455
+0.238985
+-5.869057
+0.000015
+-1.904706
+-0.900528
CRV1
@@ -740,7 +740,7 @@ PyFixest 0.14.0
- Changes all docstrings to
numpy
format.
- Difference-in-differences estimation functions now need to be imported via the
pyfixest.did.estimation
module:
-
+
from pyfixest.did.estimation import did2s, lpdid, event_study
diff --git a/pyfixest.html b/pyfixest.html
index 2173ab59..c92a31d5 100644
--- a/pyfixest.html
+++ b/pyfixest.html
@@ -187,10 +187,11 @@
PyFixest: Fast High-Dimensional Fixed Effects Regression in Python
-
+
PyFixest
is a Python implementation of the formidable fixest package for fast high-dimensional fixed effects regression.
The package aims to mimic fixest
syntax and functionality as closely as Python allows: if you know fixest
well, the goal is that you won’t have to read the docs to get started! In particular, this means that all of fixest's
defaults are mirrored by PyFixest
- currently with only one small exception.
Nevertheless, for a quick introduction, you can take a look at the quickstart or the regression chapter of Arthur Turrell’s book on Coding for Economists.
+For questions on PyFixest
, head on over to our PyFixest Discourse forum.
Features
diff --git a/quarto_example/QuartoExample.pdf b/quarto_example/QuartoExample.pdf
index 375fab6a..f16663ee 100644
Binary files a/quarto_example/QuartoExample.pdf and b/quarto_example/QuartoExample.pdf differ
diff --git a/quickstart.html b/quickstart.html
index 96c32ad2..e91a8996 100644
--- a/quickstart.html
+++ b/quickstart.html
@@ -281,7 +281,7 @@ What is a fix
Read Sample Data
In a first step, we load the module and some synthetic example data:
-
+
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
@@ -302,7 +302,7 @@ Read Sample Data
data.head()
-
+
@@ -336,7 +336,7 @@ Read Sample Data
-
+
@@ -370,7 +370,7 @@ Read Sample Data
-
+
-numpy : 1.26.4
+pandas : 2.2.3
+numpy : 1.26.4
pyfixest : 0.25.3
-pandas : 2.2.3
matplotlib: 3.9.2
@@ -507,7 +507,7 @@ Read Sample Data
-
+
data.info()
<class 'pandas.core.frame.DataFrame'>
@@ -535,7 +535,7 @@ Read Sample Data
OLS Estimation
We are interested in the relation between the dependent variable Y
and the independent variables X1
using a fixed effect model for group_id
. Let’s see how the data looks like:
-
+
= data.plot(kind="scatter", x="X1", y="Y", c="group_id", colormap="viridis") ax
@@ -546,7 +546,7 @@ OLS Estimation
We can estimate a fixed effects regression via the feols()
function. feols()
has three arguments: a two-sided model formula, the data, and optionally, the type of inference.
-
+
= pf.feols(fml="Y ~ X1 | group_id", data=data, vcov="HC1")
fit type(fit)
@@ -559,7 +559,7 @@ OLS Estimation
Inspecting Model Results
To inspect the results, we can use a summary function or method:
-
+
fit.summary()
###
@@ -577,55 +577,55 @@ Inspecting Model
Or display a formatted regression table:
-
+
pf.etable(fit)
-
+
@@ -688,7 +688,7 @@ Inspecting Model
Alternatively, the .summarize
module contains a summary
function, which can be applied on instances of regression model objects or lists of regression model objects. For details on how to customize etable()
, please take a look at the dedicated vignette.
-
+
pf.summary(fit)
###
@@ -706,7 +706,7 @@ Inspecting Model
You can access individual elements of the summary via dedicated methods: .tidy()
returns a “tidy” pd.DataFrame
, .coef()
returns estimated parameters, and se()
estimated standard errors. Other methods include pvalue()
, confint()
and tstat()
.
-
+
fit.tidy()
@@ -749,7 +749,7 @@ Inspecting Model
-
+
fit.coef()
Coefficient
@@ -757,7 +757,7 @@ Inspecting Model
Name: Estimate, dtype: float64
-
+
fit.se()
Coefficient
@@ -765,7 +765,7 @@ Inspecting Model
Name: Std. Error, dtype: float64
-
+
fit.tstat()
Coefficient
@@ -773,7 +773,7 @@ Inspecting Model
Name: t value, dtype: float64
-
+
fit.confint()
@@ -800,11 +800,11 @@ Inspecting Model
Last, model results can be visualized via dedicated methods for plotting:
-
+
fit.coefplot()# or pf.coefplot([fit])
-
+
@@ -522,7 +522,7 @@ Examples
-
+
@@ -671,7 +671,7 @@ Examples
In a first step, we estimate a classical event study model:
-
+
# estimate the model
= pf.did2s(
fit
@@ -761,10 +761,10 @@ df_het,Examples
We can also inspect the model visually:
-
+
= [1200, 400], coord_flip=False).show() fit.iplot(figsize
-
+
@@ -545,7 +545,7 @@ Examples
-
+
diff --git a/reference/did.estimation.lpdid.html b/reference/did.estimation.lpdid.html
index 6729fcf2..d210e196 100644
--- a/reference/did.estimation.lpdid.html
+++ b/reference/did.estimation.lpdid.html
@@ -505,7 +505,7 @@ Returns
Examples
-
+
import pandas as pd
import pyfixest as pf
@@ -528,7 +528,7 @@ Examples
= [1200, 400], coord_flip=False).show() fit.iplot(figsize
-
+
@@ -562,7 +562,7 @@ Examples
-
+
-
+
@@ -606,7 +606,7 @@ Examples
-
+
@@ -656,7 +656,7 @@ Examples
Calling feols()
returns an instance of the [Feols(/reference/Feols.qmd) class. The summary()
method can be used to print the results.
An alternative way to retrieve model results is via the tidy()
method, which returns a pandas dataframe with the estimated coefficients, standard errors, t-statistics, and p-values.
-
+
fit.tidy()
@@ -710,17 +710,17 @@ Examples
You can also access all elements in the tidy data frame by dedicated methods, e.g. fit.coef()
for the coefficients, fit.se()
for the standard errors, fit.tstat()
for the t-statistics, and fit.pval()
for the p-values, and fit.confint()
for the confidence intervals.
The employed type of inference can be specified via the vcov
argument. If vcov is not provided, PyFixest
employs the fixest
default of iid inference, unless there are fixed effects in the model, in which case feols()
clusters the standard error by the first fixed effect (CRV1 inference).
-
+
= pf.feols("Y ~ X1 + X2 | f1 + f2", data, vcov="iid")
fit1 = pf.feols("Y ~ X1 + X2 | f1 + f2", data, vcov="hetero")
fit2 = pf.feols("Y ~ X1 + X2 | f1 + f2", data, vcov={"CRV1": "f1"}) fit3
Supported inference types are “iid”, “hetero”, “HC1”, “HC2”, “HC3”, and “CRV1”/“CRV3”. Clustered standard errors are specified via a dictionary, e.g. {"CRV1": "f1"}
for CRV1 inference with clustering by f1
or {"CRV3": "f1"}
for CRV3 inference with clustering by f1
. For two-way clustering, you can provide a formula string, e.g. {"CRV1": "f1 + f2"}
for CRV1 inference with clustering by f1
.
-
+
= pf.feols("Y ~ X1 + X2 | f1 + f2", data, vcov={"CRV1": "f1 + f2"}) fit4
Inference can be adjusted post estimation via the vcov
method:
-
+
fit.summary()"iid").summary() fit.vcov(
@@ -754,7 +754,7 @@ Examples
The ssc
argument specifies the small sample correction for inference. In general, feols()
uses all of fixest::feols()
defaults, but sets the fixef.K
argument to "none"
whereas the fixest::feols()
default is "nested"
. See here for more details: link to github.
feols()
supports a range of multiple estimation syntax, i.e. you can estimate multiple models in one call. The following example estimates two models, one with fixed effects for f1
and one with fixed effects for f2
using the sw()
syntax.
-
+
= pf.feols("Y ~ X1 + X2 | sw(f1, f2)", data)
fit type(fit)
@@ -762,55 +762,55 @@ Examples
The returned object is an instance of the FixestMulti
class. You can access the results of the first model via fit.fetch_model(0)
and the results of the second model via fit.fetch_model(1)
. You can compare the model results via the etable()
function:
-
+
pf.etable(fit)
-
+
@@ -852,14 +852,14 @@ Examples
fe
-f2
--
+f1
x
+-
-f1
-x
+f2
-
+x
stats
@@ -893,56 +893,56 @@ Examples
Other supported multiple estimation syntax include sw0()
, csw()
and csw0()
. While sw()
adds variables in a “stepwise” fashion, csw()
does so cumulatively.
-
+
= pf.feols("Y ~ X1 + X2 | csw(f1, f2)", data)
fit pf.etable(fit)
-
+
@@ -984,13 +984,13 @@ Examples
fe
-f2
--
+f1
+x
x
-f1
-x
+f2
+-
x
@@ -1025,56 +1025,56 @@ Examples
The sw0()
and csw0()
syntax are similar to sw()
and csw()
, but start with a model that excludes the variables specified in sw()
and csw()
:
-
+
= pf.feols("Y ~ X1 + X2 | sw0(f1, f2)", data)
fit pf.etable(fit)
-
+
@@ -1129,16 +1129,16 @@ Examples
fe
-f2
--
+f1
-
x
+-
-f1
+f2
-
-x
-
+x
stats
@@ -1175,56 +1175,56 @@ Examples
The feols()
function also supports multiple dependent variables. The following example estimates two models, one with Y1
as the dependent variable and one with Y2
as the dependent variable.
-
+
= pf.feols("Y + Y2 ~ X1 | f1 + f2", data)
fit pf.etable(fit)
-
+
@@ -1260,12 +1260,12 @@ Examples
fe
-f2
+f1
x
x
-f1
+f2
x
x
@@ -1301,56 +1301,56 @@ Examples
It is possible to combine different multiple estimation operators:
-
+
= pf.feols("Y + Y2 ~ X1 | sw(f1, f2)", data)
fit pf.etable(fit)
-
+
@@ -1396,18 +1396,18 @@ Examples
fe
-f2
--
--
+f1
x
x
+-
+-
-f1
-x
-x
+f2
-
-
+x
+x
stats
@@ -1448,7 +1448,7 @@ Examples
In general, using muliple estimation syntax can improve the estimation time as covariates that are demeaned in one model and are used in another model do not need to be demeaned again: feols()
implements a caching mechanism that stores the demeaned covariates.
Additionally, you can fit models on different samples via the split and fsplit arguments. The split argument splits the sample according to the variable specified in the argument, while the fsplit argument also includes the full sample in the estimation.
-
+
= pf.feols("Y ~ X1 + X2 | f1 + f2", data, split = "f1")
fit pf.etable(fit)
@@ -1514,52 +1514,52 @@ Examples
cluster_adj_value = G / (G - 1)
-
+
@@ -1769,7 +1769,7 @@ Examples
fe
-f2
+f1
x
x
x
@@ -1802,7 +1802,7 @@ Examples
x
-f1
+f2
x
x
x
@@ -1950,7 +1950,7 @@ Examples
Besides OLS, feols()
also supports IV estimation via three part formulas:
-
+
= pf.feols("Y ~ X2 | f1 + f2 | X1 ~ Z1", data)
fit fit.tidy()
@@ -2004,7 +2004,7 @@ Examples
Here, X1
is the endogenous variable and Z1
is the instrument. f1
and f2
are the fixed effects, as before. To estimate IV models without fixed effects, simply omit the fixed effects part of the formula:
-
+
= pf.feols("Y ~ X2 | X1 ~ Z1", data)
fit fit.tidy()
@@ -2068,7 +2068,7 @@ Examples
Last, feols()
supports interaction of variables via the i()
syntax. Documentation on this is tba.
After fitting a model via feols()
, you can use the predict()
method to get the predicted values:
-
+
= pf.feols("Y ~ X1 + X2 | f1 + f2", data)
fit 0:5] fit.predict()[
@@ -2076,7 +2076,7 @@ Examples
The predict()
method also supports a newdata
argument to predict on new data, which returns a numpy array of the predicted values:
-
+
= pf.feols("Y ~ X1 + X2 | f1 + f2", data)
fit =data)[0:5] fit.predict(newdata
@@ -2084,11 +2084,11 @@ Examples
Last, you can plot the results of a model via the coefplot()
method:
-
+
= pf.feols("Y ~ X1 + X2 | f1 + f2", data)
fit fit.coefplot()
-
+
@@ -593,7 +593,7 @@ Examples
-
+
diff --git a/reference/report.coefplot.html b/reference/report.coefplot.html
index c3217e91..a373db31 100644
--- a/reference/report.coefplot.html
+++ b/reference/report.coefplot.html
@@ -528,7 +528,7 @@ Returns
Examples
-
+
import pyfixest as pf
from pyfixest.report.utils import rename_categoricals
@@ -544,7 +544,7 @@ Examples
= "both") pf.coefplot([fit1], joint
-
+
@@ -578,7 +578,7 @@ Examples
-
+
-
+
@@ -576,7 +576,7 @@ Examples
-
+
-
+
@@ -497,7 +497,7 @@ Examples
-
+
diff --git a/replicating-the-effect.html b/replicating-the-effect.html
index bf03493a..daff3e8c 100644
--- a/replicating-the-effect.html
+++ b/replicating-the-effect.html
@@ -234,7 +234,7 @@ Replicating Examples from “The Effect”
This notebook replicates code examples from Nick Huntington-Klein’s book on causal inference, The Effect.
-
+
from causaldata import Mroz, gapminder, organ_donations, restaurant_inspections
import pyfixest as pf
@@ -243,7 +243,7 @@ Replicating Examples from “The Effect”
%watermark --iversions
-
+
@@ -277,7 +277,7 @@ Replicating Examples from “The Effect”
-
+
@@ -317,7 +317,7 @@ Replicating Examples from “The Effect”
Chapter 4: Describing Relationships
-
+
# Read in data
= Mroz.load_pandas().data
dt # Keep just working women
@@ -329,7 +329,7 @@ Chapter
= pf.feols(fml="lwg ~ csw(inc, wc, k5)", data=dt, vcov="iid")
fit pf.etable(fit)
-/tmp/ipykernel_4055/786816010.py:6: SettingWithCopyWarning:
+/tmp/ipykernel_4227/786816010.py:6: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
@@ -337,52 +337,52 @@ Chapter
dt.loc[:, "earn"] = dt["lwg"].apply("exp")
-
+
@@ -480,7 +480,7 @@ Chapter
Chapter 13: Regression
Example 1
-
+
= restaurant_inspections.load_pandas().data
res = res.inspection_score.astype(float)
res.inspection_score = res.NumberofLocations.astype(float)
@@ -489,52 +489,52 @@ res.NumberofLocations Example 1
= pf.feols(fml="inspection_score ~ NumberofLocations", data=res)
fit pf.etable([fit])
-
+
@@ -597,7 +597,7 @@ Example 1
Example 2
-
+
= restaurant_inspections.load_pandas().data
df
= pf.feols(
@@ -607,52 +607,52 @@ fit1 Example 2
pf.etable([fit1, fit2])
-
+
@@ -749,7 +749,7 @@ Example 2
Example 3: HC Standard Errors
-
+
="inspection_score ~ Year + Weekend", data=df, vcov="HC3").summary() pf.feols(fml
###
@@ -771,7 +771,7 @@ Example 3: HC
Example 4: Clustered Standard Errors
-
+
pf.feols(="inspection_score ~ Year + Weekend", data=df, vcov={"CRV1": "Weekend"}
fml ).tidy()
@@ -837,7 +837,7 @@ Exampl
Example 5: Bootstrap Inference
-
+
= pf.feols(fml="inspection_score ~ Year + Weekend", data=df)
fit =999, param="Year") fit.wildboottest(reps
@@ -860,7 +860,7 @@ Example 1
Example 2
-
+
= gapminder.load_pandas().data
gm "logGDPpercap"] = gm["gdpPercap"].apply("log")
gm[
@@ -946,7 +946,7 @@ Example 2
Example 3: TWFE
-
+
# Set our individual and time (index) for our data
= pf.feols(fml="lifeExp ~ np.log(gdpPercap) | country + year", data=gm)
fit fit.summary()
@@ -971,7 +971,7 @@ Example 3: TWFE
Chapter 18: Difference-in-Differences
Example 1
-
+
= organ_donations.load_pandas().data
od
# Create Treatment Variable
@@ -999,7 +999,7 @@ Example 1
Example 3: Dynamic Treatment Effect
-
+
= organ_donations.load_pandas().data
od
# Create Treatment Variable
diff --git a/search.json b/search.json
index eeddcd32..16e2c702 100644
--- a/search.json
+++ b/search.json
@@ -479,7 +479,7 @@
"href": "reference/estimation.estimation.feols.html#examples",
"title": "estimation.estimation.feols",
"section": "Examples",
- "text": "Examples\nAs in fixest, the [Feols(/reference/Feols.qmd) function can be used to estimate a simple linear regression model with fixed effects. The following example regresses Y on X1 and X2 with fixed effects for f1 and f2: fixed effects are specified after the | symbol.\n\nimport pyfixest as pf\n\ndata = pf.get_data()\n\nfit = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data)\nfit.summary()\n\n\n \n \n \n\n\n\n \n \n \n\n\n###\n\nEstimation: OLS\nDep. var.: Y, Fixed effects: f1+f2\nInference: CRV1\nObservations: 997\n\n| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |\n|:--------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|\n| X1 | -0.924 | 0.061 | -15.165 | 0.000 | -1.049 | -0.799 |\n| X2 | -0.174 | 0.015 | -11.918 | 0.000 | -0.204 | -0.144 |\n---\nRMSE: 1.346 R2: 0.659 R2 Within: 0.303 \n\n\nCalling feols() returns an instance of the [Feols(/reference/Feols.qmd) class. The summary() method can be used to print the results.\nAn alternative way to retrieve model results is via the tidy() method, which returns a pandas dataframe with the estimated coefficients, standard errors, t-statistics, and p-values.\n\nfit.tidy()\n\n\n\n\n\n\n\n\nEstimate\nStd. Error\nt value\nPr(>|t|)\n2.5%\n97.5%\n\n\nCoefficient\n\n\n\n\n\n\n\n\n\n\nX1\n-0.924046\n0.060934\n-15.164621\n2.664535e-15\n-1.048671\n-0.799421\n\n\nX2\n-0.174107\n0.014608\n-11.918277\n1.069367e-12\n-0.203985\n-0.144230\n\n\n\n\n\n\n\nYou can also access all elements in the tidy data frame by dedicated methods, e.g. fit.coef() for the coefficients, fit.se() for the standard errors, fit.tstat() for the t-statistics, and fit.pval() for the p-values, and fit.confint() for the confidence intervals.\nThe employed type of inference can be specified via the vcov argument. If vcov is not provided, PyFixest employs the fixest default of iid inference, unless there are fixed effects in the model, in which case feols() clusters the standard error by the first fixed effect (CRV1 inference).\n\nfit1 = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data, vcov=\"iid\")\nfit2 = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data, vcov=\"hetero\")\nfit3 = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data, vcov={\"CRV1\": \"f1\"})\n\nSupported inference types are “iid”, “hetero”, “HC1”, “HC2”, “HC3”, and “CRV1”/“CRV3”. Clustered standard errors are specified via a dictionary, e.g. {\"CRV1\": \"f1\"} for CRV1 inference with clustering by f1 or {\"CRV3\": \"f1\"} for CRV3 inference with clustering by f1. For two-way clustering, you can provide a formula string, e.g. {\"CRV1\": \"f1 + f2\"} for CRV1 inference with clustering by f1.\n\nfit4 = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data, vcov={\"CRV1\": \"f1 + f2\"})\n\nInference can be adjusted post estimation via the vcov method:\n\nfit.summary()\nfit.vcov(\"iid\").summary()\n\n###\n\nEstimation: OLS\nDep. var.: Y, Fixed effects: f1+f2\nInference: CRV1\nObservations: 997\n\n| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |\n|:--------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|\n| X1 | -0.924 | 0.061 | -15.165 | 0.000 | -1.049 | -0.799 |\n| X2 | -0.174 | 0.015 | -11.918 | 0.000 | -0.204 | -0.144 |\n---\nRMSE: 1.346 R2: 0.659 R2 Within: 0.303 \n###\n\nEstimation: OLS\nDep. var.: Y, Fixed effects: f1+f2\nInference: iid\nObservations: 997\n\n| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |\n|:--------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|\n| X1 | -0.924 | 0.054 | -16.995 | 0.000 | -1.031 | -0.817 |\n| X2 | -0.174 | 0.014 | -12.081 | 0.000 | -0.202 | -0.146 |\n---\nRMSE: 1.346 R2: 0.659 R2 Within: 0.303 \n\n\nThe ssc argument specifies the small sample correction for inference. In general, feols() uses all of fixest::feols() defaults, but sets the fixef.K argument to \"none\" whereas the fixest::feols() default is \"nested\". See here for more details: link to github.\nfeols() supports a range of multiple estimation syntax, i.e. you can estimate multiple models in one call. The following example estimates two models, one with fixed effects for f1 and one with fixed effects for f2 using the sw() syntax.\n\nfit = pf.feols(\"Y ~ X1 + X2 | sw(f1, f2)\", data)\ntype(fit)\n\npyfixest.estimation.FixestMulti_.FixestMulti\n\n\nThe returned object is an instance of the FixestMulti class. You can access the results of the first model via fit.fetch_model(0) and the results of the second model via fit.fetch_model(1). You can compare the model results via the etable() function:\n\npf.etable(fit)\n\n\n\n\n\n\n\n\n\n\n\n\n\nY\n\n\n(1)\n(2)\n\n\n\n\ncoef\n\n\nX1\n-0.950***\n(0.067)\n-0.979***\n(0.077)\n\n\nX2\n-0.174***\n(0.018)\n-0.175***\n(0.022)\n\n\nfe\n\n\nf2\n-\nx\n\n\nf1\nx\n-\n\n\nstats\n\n\nObservations\n997\n998\n\n\nS.E. type\nby: f1\nby: f2\n\n\nR2\n0.489\n0.354\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)\n\n\n\n\n\n\n\n \n\n\nOther supported multiple estimation syntax include sw0(), csw() and csw0(). While sw() adds variables in a “stepwise” fashion, csw() does so cumulatively.\n\nfit = pf.feols(\"Y ~ X1 + X2 | csw(f1, f2)\", data)\npf.etable(fit)\n\n\n\n\n\n\n\n\n\n\n\n\n\nY\n\n\n(1)\n(2)\n\n\n\n\ncoef\n\n\nX1\n-0.950***\n(0.067)\n-0.924***\n(0.061)\n\n\nX2\n-0.174***\n(0.018)\n-0.174***\n(0.015)\n\n\nfe\n\n\nf2\n-\nx\n\n\nf1\nx\nx\n\n\nstats\n\n\nObservations\n997\n997\n\n\nS.E. type\nby: f1\nby: f1\n\n\nR2\n0.489\n0.659\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)\n\n\n\n\n\n\n\n \n\n\nThe sw0() and csw0() syntax are similar to sw() and csw(), but start with a model that excludes the variables specified in sw() and csw():\n\nfit = pf.feols(\"Y ~ X1 + X2 | sw0(f1, f2)\", data)\npf.etable(fit)\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nY\n\n\n(1)\n(2)\n(3)\n\n\n\n\ncoef\n\n\nX1\n-0.993***\n(0.082)\n-0.950***\n(0.067)\n-0.979***\n(0.077)\n\n\nX2\n-0.176***\n(0.022)\n-0.174***\n(0.018)\n-0.175***\n(0.022)\n\n\nIntercept\n0.889***\n(0.108)\n\n\n\n\nfe\n\n\nf2\n-\n-\nx\n\n\nf1\n-\nx\n-\n\n\nstats\n\n\nObservations\n998\n997\n998\n\n\nS.E. type\niid\nby: f1\nby: f2\n\n\nR2\n0.177\n0.489\n0.354\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)\n\n\n\n\n\n\n\n \n\n\nThe feols() function also supports multiple dependent variables. The following example estimates two models, one with Y1 as the dependent variable and one with Y2 as the dependent variable.\n\nfit = pf.feols(\"Y + Y2 ~ X1 | f1 + f2\", data)\npf.etable(fit)\n\n\n\n\n\n\n\n\n\n\n\n\n\nY\nY2\n\n\n(1)\n(2)\n\n\n\n\ncoef\n\n\nX1\n-0.919***\n(0.065)\n-1.228***\n(0.195)\n\n\nfe\n\n\nf2\nx\nx\n\n\nf1\nx\nx\n\n\nstats\n\n\nObservations\n997\n998\n\n\nS.E. type\nby: f1\nby: f1\n\n\nR2\n0.609\n0.168\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)\n\n\n\n\n\n\n\n \n\n\nIt is possible to combine different multiple estimation operators:\n\nfit = pf.feols(\"Y + Y2 ~ X1 | sw(f1, f2)\", data)\npf.etable(fit)\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nY\nY2\nY\nY2\n\n\n(1)\n(2)\n(3)\n(4)\n\n\n\n\ncoef\n\n\nX1\n-0.949***\n(0.069)\n-1.266***\n(0.176)\n-0.982***\n(0.081)\n-1.301***\n(0.205)\n\n\nfe\n\n\nf2\n-\n-\nx\nx\n\n\nf1\nx\nx\n-\n-\n\n\nstats\n\n\nObservations\n997\n998\n998\n999\n\n\nS.E. type\nby: f1\nby: f1\nby: f2\nby: f2\n\n\nR2\n0.437\n0.115\n0.302\n0.090\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)\n\n\n\n\n\n\n\n \n\n\nIn general, using muliple estimation syntax can improve the estimation time as covariates that are demeaned in one model and are used in another model do not need to be demeaned again: feols() implements a caching mechanism that stores the demeaned covariates.\nAdditionally, you can fit models on different samples via the split and fsplit arguments. The split argument splits the sample according to the variable specified in the argument, while the fsplit argument also includes the full sample in the estimation.\n\nfit = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data, split = \"f1\")\npf.etable(fit)\n\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nY\n\n\n(1)\n(2)\n(3)\n(4)\n(5)\n(6)\n(7)\n(8)\n(9)\n(10)\n(11)\n(12)\n(13)\n(14)\n(15)\n(16)\n(17)\n(18)\n(19)\n(20)\n(21)\n(22)\n(23)\n(24)\n(25)\n(26)\n(27)\n(28)\n(29)\n(30)\n\n\n\n\ncoef\n\n\nX1\n-1.357\n(INF)\n-1.137\n(INF)\n-0.455\n(INF)\n-1.138\n(INF)\n0.201\n(INF)\n-0.306\n(INF)\n-0.597\n(INF)\n-0.824\n(INF)\n-1.482\n(INF)\n-1.117\n(INF)\n-1.142\n(INF)\n-1.334\n(INF)\n-3.531\n(INF)\n-1.102\n(INF)\n-0.826\n(INF)\n-0.773\n(INF)\n-1.501\n(INF)\n-1.226\n(INF)\n-0.641\n(INF)\n-0.378\n(INF)\n-0.652\n(INF)\n-1.508\n(INF)\n-0.941\n(INF)\n-0.206\n(INF)\n-0.195\n(INF)\n-0.702\n(INF)\n-1.141\n(INF)\n-1.349\n(INF)\n-0.537\n(INF)\n-1.141\n(INF)\n\n\nX2\n-0.250\n(INF)\n0.198\n(INF)\n-0.145\n(INF)\n-0.330\n(INF)\n-0.177\n(INF)\n-0.187\n(INF)\n-0.118\n(INF)\n-0.292\n(INF)\n-0.029\n(INF)\n-0.264\n(INF)\n-0.148\n(INF)\n-0.313\n(INF)\n-0.152\n(INF)\n-0.296\n(INF)\n0.130\n(INF)\n-0.059\n(INF)\n-0.223\n(INF)\n-0.113\n(INF)\n-0.261\n(INF)\n0.089\n(INF)\n-0.148\n(INF)\n-0.267\n(INF)\n-0.125\n(INF)\n-0.282\n(INF)\n-0.153\n(INF)\n0.004\n(INF)\n0.083\n(INF)\n-0.226\n(INF)\n-0.158\n(INF)\n-0.160\n(INF)\n\n\nfe\n\n\nf2\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\n\n\nf1\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\n\n\nstats\n\n\nObservations\n30\n29\n44\n30\n31\n36\n36\n30\n36\n35\n32\n30\n23\n28\n34\n34\n48\n40\n36\n34\n35\n37\n27\n35\n29\n27\n43\n36\n24\n28\n\n\nS.E. type\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\n\n\nR2\n0.850\n0.691\n0.578\n0.745\n0.939\n0.644\n0.792\n0.776\n0.919\n0.797\n0.727\n0.822\n0.924\n0.865\n0.711\n0.808\n0.651\n0.819\n0.746\n0.731\n0.880\n0.868\n0.796\n0.648\n0.915\n0.820\n0.837\n0.789\n0.688\n0.883\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)\n\n\n\n\n\n\n\n \n\n\nBesides OLS, feols() also supports IV estimation via three part formulas:\n\nfit = pf.feols(\"Y ~ X2 | f1 + f2 | X1 ~ Z1\", data)\nfit.tidy()\n\n\n\n\n\n\n\n\nEstimate\nStd. Error\nt value\nPr(>|t|)\n2.5%\n97.5%\n\n\nCoefficient\n\n\n\n\n\n\n\n\n\n\nX1\n-1.050097\n0.085493\n-12.282912\n5.133671e-13\n-1.224949\n-0.875245\n\n\nX2\n-0.174351\n0.014779\n-11.797039\n1.369793e-12\n-0.204578\n-0.144124\n\n\n\n\n\n\n\nHere, X1 is the endogenous variable and Z1 is the instrument. f1 and f2 are the fixed effects, as before. To estimate IV models without fixed effects, simply omit the fixed effects part of the formula:\n\nfit = pf.feols(\"Y ~ X2 | X1 ~ Z1\", data)\nfit.tidy()\n\n\n\n\n\n\n\n\nEstimate\nStd. Error\nt value\nPr(>|t|)\n2.5%\n97.5%\n\n\nCoefficient\n\n\n\n\n\n\n\n\n\n\nIntercept\n0.861939\n0.151187\n5.701137\n1.567858e-08\n0.565257\n1.158622\n\n\nX1\n-0.967238\n0.130078\n-7.435847\n2.238210e-13\n-1.222497\n-0.711980\n\n\nX2\n-0.176416\n0.021769\n-8.104001\n1.554312e-15\n-0.219134\n-0.133697\n\n\n\n\n\n\n\nLast, feols() supports interaction of variables via the i() syntax. Documentation on this is tba.\nAfter fitting a model via feols(), you can use the predict() method to get the predicted values:\n\nfit = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data)\nfit.predict()[0:5]\n\narray([ 3.0633663 , -0.69574133, -0.91240433, -0.46370257, -1.67331154])\n\n\nThe predict() method also supports a newdata argument to predict on new data, which returns a numpy array of the predicted values:\n\nfit = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data)\nfit.predict(newdata=data)[0:5]\n\narray([ 2.14598761, nan, nan, 3.06336415, -0.69574276])\n\n\nLast, you can plot the results of a model via the coefplot() method:\n\nfit = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data)\nfit.coefplot()\n\n \n \n\n\nObjects of type Feols support a range of other methods to conduct inference. For example, you can run a wild (cluster) bootstrap via the wildboottest() method:\n\nfit.wildboottest(param = \"X1\", reps=1000)\n\nparam X1\nt value -14.70814685400939\nPr(>|t|) 0.0\nbootstrap_type 11\ninference CRV(f1)\nimpose_null True\ndtype: object\n\n\nwould run a wild bootstrap test for the coefficient of X1 with 1000 bootstrap repetitions.\nFor a wild cluster bootstrap, you can specify the cluster variable via the cluster argument:\n\nfit.wildboottest(param = \"X1\", reps=1000, cluster=\"group_id\")\n\nparam X1\nt value -13.658130940490494\nPr(>|t|) 0.0\nbootstrap_type 11\ninference CRV(group_id)\nimpose_null True\ndtype: object\n\n\nThe ritest() method can be used to conduct randomization inference:\n\nfit.ritest(resampvar = \"X1\", reps=1000)\n\nH0 X1=0\nri-type randomization-c\nEstimate -0.9240461507764967\nPr(>|t|) 0.0\nStd. Error (Pr(>|t|)) 0.0\n2.5% (Pr(>|t|)) 0.0\n97.5% (Pr(>|t|)) 0.0\ndtype: object\n\n\nLast, you can compute the cluster causal variance estimator by Athey et al by using the ccv() method:\n\nimport numpy as np\nrng = np.random.default_rng(1234)\ndata[\"D\"] = rng.choice([0, 1], size = data.shape[0])\nfit_D = pf.feols(\"Y ~ D\", data = data)\nfit_D.ccv(treatment = \"D\", cluster = \"group_id\")\n\n/home/runner/work/pyfixest/pyfixest/pyfixest/estimation/feols_.py:1384: UserWarning: The initial model was not clustered. CRV1 inference is computed and stored in the model object.\n warnings.warn(\n\n\n\n\n\n\n\n\n\nEstimate\nStd. Error\nt value\nPr(>|t|)\n2.5%\n97.5%\n\n\n\n\nCCV\n0.016087657906364183\n0.284647\n0.056518\n0.955552\n-0.581934\n0.61411\n\n\nCRV1\n0.016088\n0.13378\n0.120254\n0.905614\n-0.264974\n0.29715",
+ "text": "Examples\nAs in fixest, the [Feols(/reference/Feols.qmd) function can be used to estimate a simple linear regression model with fixed effects. The following example regresses Y on X1 and X2 with fixed effects for f1 and f2: fixed effects are specified after the | symbol.\n\nimport pyfixest as pf\n\ndata = pf.get_data()\n\nfit = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data)\nfit.summary()\n\n\n \n \n \n\n\n\n \n \n \n\n\n###\n\nEstimation: OLS\nDep. var.: Y, Fixed effects: f1+f2\nInference: CRV1\nObservations: 997\n\n| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |\n|:--------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|\n| X1 | -0.924 | 0.061 | -15.165 | 0.000 | -1.049 | -0.799 |\n| X2 | -0.174 | 0.015 | -11.918 | 0.000 | -0.204 | -0.144 |\n---\nRMSE: 1.346 R2: 0.659 R2 Within: 0.303 \n\n\nCalling feols() returns an instance of the [Feols(/reference/Feols.qmd) class. The summary() method can be used to print the results.\nAn alternative way to retrieve model results is via the tidy() method, which returns a pandas dataframe with the estimated coefficients, standard errors, t-statistics, and p-values.\n\nfit.tidy()\n\n\n\n\n\n\n\n\nEstimate\nStd. Error\nt value\nPr(>|t|)\n2.5%\n97.5%\n\n\nCoefficient\n\n\n\n\n\n\n\n\n\n\nX1\n-0.924046\n0.060934\n-15.164621\n2.664535e-15\n-1.048671\n-0.799421\n\n\nX2\n-0.174107\n0.014608\n-11.918277\n1.069367e-12\n-0.203985\n-0.144230\n\n\n\n\n\n\n\nYou can also access all elements in the tidy data frame by dedicated methods, e.g. fit.coef() for the coefficients, fit.se() for the standard errors, fit.tstat() for the t-statistics, and fit.pval() for the p-values, and fit.confint() for the confidence intervals.\nThe employed type of inference can be specified via the vcov argument. If vcov is not provided, PyFixest employs the fixest default of iid inference, unless there are fixed effects in the model, in which case feols() clusters the standard error by the first fixed effect (CRV1 inference).\n\nfit1 = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data, vcov=\"iid\")\nfit2 = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data, vcov=\"hetero\")\nfit3 = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data, vcov={\"CRV1\": \"f1\"})\n\nSupported inference types are “iid”, “hetero”, “HC1”, “HC2”, “HC3”, and “CRV1”/“CRV3”. Clustered standard errors are specified via a dictionary, e.g. {\"CRV1\": \"f1\"} for CRV1 inference with clustering by f1 or {\"CRV3\": \"f1\"} for CRV3 inference with clustering by f1. For two-way clustering, you can provide a formula string, e.g. {\"CRV1\": \"f1 + f2\"} for CRV1 inference with clustering by f1.\n\nfit4 = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data, vcov={\"CRV1\": \"f1 + f2\"})\n\nInference can be adjusted post estimation via the vcov method:\n\nfit.summary()\nfit.vcov(\"iid\").summary()\n\n###\n\nEstimation: OLS\nDep. var.: Y, Fixed effects: f1+f2\nInference: CRV1\nObservations: 997\n\n| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |\n|:--------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|\n| X1 | -0.924 | 0.061 | -15.165 | 0.000 | -1.049 | -0.799 |\n| X2 | -0.174 | 0.015 | -11.918 | 0.000 | -0.204 | -0.144 |\n---\nRMSE: 1.346 R2: 0.659 R2 Within: 0.303 \n###\n\nEstimation: OLS\nDep. var.: Y, Fixed effects: f1+f2\nInference: iid\nObservations: 997\n\n| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |\n|:--------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|\n| X1 | -0.924 | 0.054 | -16.995 | 0.000 | -1.031 | -0.817 |\n| X2 | -0.174 | 0.014 | -12.081 | 0.000 | -0.202 | -0.146 |\n---\nRMSE: 1.346 R2: 0.659 R2 Within: 0.303 \n\n\nThe ssc argument specifies the small sample correction for inference. In general, feols() uses all of fixest::feols() defaults, but sets the fixef.K argument to \"none\" whereas the fixest::feols() default is \"nested\". See here for more details: link to github.\nfeols() supports a range of multiple estimation syntax, i.e. you can estimate multiple models in one call. The following example estimates two models, one with fixed effects for f1 and one with fixed effects for f2 using the sw() syntax.\n\nfit = pf.feols(\"Y ~ X1 + X2 | sw(f1, f2)\", data)\ntype(fit)\n\npyfixest.estimation.FixestMulti_.FixestMulti\n\n\nThe returned object is an instance of the FixestMulti class. You can access the results of the first model via fit.fetch_model(0) and the results of the second model via fit.fetch_model(1). You can compare the model results via the etable() function:\n\npf.etable(fit)\n\n\n\n\n\n\n\n\n\n\n\n\n\nY\n\n\n(1)\n(2)\n\n\n\n\ncoef\n\n\nX1\n-0.950***\n(0.067)\n-0.979***\n(0.077)\n\n\nX2\n-0.174***\n(0.018)\n-0.175***\n(0.022)\n\n\nfe\n\n\nf1\nx\n-\n\n\nf2\n-\nx\n\n\nstats\n\n\nObservations\n997\n998\n\n\nS.E. type\nby: f1\nby: f2\n\n\nR2\n0.489\n0.354\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)\n\n\n\n\n\n\n\n \n\n\nOther supported multiple estimation syntax include sw0(), csw() and csw0(). While sw() adds variables in a “stepwise” fashion, csw() does so cumulatively.\n\nfit = pf.feols(\"Y ~ X1 + X2 | csw(f1, f2)\", data)\npf.etable(fit)\n\n\n\n\n\n\n\n\n\n\n\n\n\nY\n\n\n(1)\n(2)\n\n\n\n\ncoef\n\n\nX1\n-0.950***\n(0.067)\n-0.924***\n(0.061)\n\n\nX2\n-0.174***\n(0.018)\n-0.174***\n(0.015)\n\n\nfe\n\n\nf1\nx\nx\n\n\nf2\n-\nx\n\n\nstats\n\n\nObservations\n997\n997\n\n\nS.E. type\nby: f1\nby: f1\n\n\nR2\n0.489\n0.659\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)\n\n\n\n\n\n\n\n \n\n\nThe sw0() and csw0() syntax are similar to sw() and csw(), but start with a model that excludes the variables specified in sw() and csw():\n\nfit = pf.feols(\"Y ~ X1 + X2 | sw0(f1, f2)\", data)\npf.etable(fit)\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nY\n\n\n(1)\n(2)\n(3)\n\n\n\n\ncoef\n\n\nX1\n-0.993***\n(0.082)\n-0.950***\n(0.067)\n-0.979***\n(0.077)\n\n\nX2\n-0.176***\n(0.022)\n-0.174***\n(0.018)\n-0.175***\n(0.022)\n\n\nIntercept\n0.889***\n(0.108)\n\n\n\n\nfe\n\n\nf1\n-\nx\n-\n\n\nf2\n-\n-\nx\n\n\nstats\n\n\nObservations\n998\n997\n998\n\n\nS.E. type\niid\nby: f1\nby: f2\n\n\nR2\n0.177\n0.489\n0.354\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)\n\n\n\n\n\n\n\n \n\n\nThe feols() function also supports multiple dependent variables. The following example estimates two models, one with Y1 as the dependent variable and one with Y2 as the dependent variable.\n\nfit = pf.feols(\"Y + Y2 ~ X1 | f1 + f2\", data)\npf.etable(fit)\n\n\n\n\n\n\n\n\n\n\n\n\n\nY\nY2\n\n\n(1)\n(2)\n\n\n\n\ncoef\n\n\nX1\n-0.919***\n(0.065)\n-1.228***\n(0.195)\n\n\nfe\n\n\nf1\nx\nx\n\n\nf2\nx\nx\n\n\nstats\n\n\nObservations\n997\n998\n\n\nS.E. type\nby: f1\nby: f1\n\n\nR2\n0.609\n0.168\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)\n\n\n\n\n\n\n\n \n\n\nIt is possible to combine different multiple estimation operators:\n\nfit = pf.feols(\"Y + Y2 ~ X1 | sw(f1, f2)\", data)\npf.etable(fit)\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nY\nY2\nY\nY2\n\n\n(1)\n(2)\n(3)\n(4)\n\n\n\n\ncoef\n\n\nX1\n-0.949***\n(0.069)\n-1.266***\n(0.176)\n-0.982***\n(0.081)\n-1.301***\n(0.205)\n\n\nfe\n\n\nf1\nx\nx\n-\n-\n\n\nf2\n-\n-\nx\nx\n\n\nstats\n\n\nObservations\n997\n998\n998\n999\n\n\nS.E. type\nby: f1\nby: f1\nby: f2\nby: f2\n\n\nR2\n0.437\n0.115\n0.302\n0.090\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)\n\n\n\n\n\n\n\n \n\n\nIn general, using muliple estimation syntax can improve the estimation time as covariates that are demeaned in one model and are used in another model do not need to be demeaned again: feols() implements a caching mechanism that stores the demeaned covariates.\nAdditionally, you can fit models on different samples via the split and fsplit arguments. The split argument splits the sample according to the variable specified in the argument, while the fsplit argument also includes the full sample in the estimation.\n\nfit = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data, split = \"f1\")\npf.etable(fit)\n\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nY\n\n\n(1)\n(2)\n(3)\n(4)\n(5)\n(6)\n(7)\n(8)\n(9)\n(10)\n(11)\n(12)\n(13)\n(14)\n(15)\n(16)\n(17)\n(18)\n(19)\n(20)\n(21)\n(22)\n(23)\n(24)\n(25)\n(26)\n(27)\n(28)\n(29)\n(30)\n\n\n\n\ncoef\n\n\nX1\n-1.357\n(INF)\n-1.137\n(INF)\n-0.455\n(INF)\n-1.138\n(INF)\n0.201\n(INF)\n-0.306\n(INF)\n-0.597\n(INF)\n-0.824\n(INF)\n-1.482\n(INF)\n-1.117\n(INF)\n-1.142\n(INF)\n-1.334\n(INF)\n-3.531\n(INF)\n-1.102\n(INF)\n-0.826\n(INF)\n-0.773\n(INF)\n-1.501\n(INF)\n-1.226\n(INF)\n-0.641\n(INF)\n-0.378\n(INF)\n-0.652\n(INF)\n-1.508\n(INF)\n-0.941\n(INF)\n-0.206\n(INF)\n-0.195\n(INF)\n-0.702\n(INF)\n-1.141\n(INF)\n-1.349\n(INF)\n-0.537\n(INF)\n-1.141\n(INF)\n\n\nX2\n-0.250\n(INF)\n0.198\n(INF)\n-0.145\n(INF)\n-0.330\n(INF)\n-0.177\n(INF)\n-0.187\n(INF)\n-0.118\n(INF)\n-0.292\n(INF)\n-0.029\n(INF)\n-0.264\n(INF)\n-0.148\n(INF)\n-0.313\n(INF)\n-0.152\n(INF)\n-0.296\n(INF)\n0.130\n(INF)\n-0.059\n(INF)\n-0.223\n(INF)\n-0.113\n(INF)\n-0.261\n(INF)\n0.089\n(INF)\n-0.148\n(INF)\n-0.267\n(INF)\n-0.125\n(INF)\n-0.282\n(INF)\n-0.153\n(INF)\n0.004\n(INF)\n0.083\n(INF)\n-0.226\n(INF)\n-0.158\n(INF)\n-0.160\n(INF)\n\n\nfe\n\n\nf1\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\n\n\nf2\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\n\n\nstats\n\n\nObservations\n30\n29\n44\n30\n31\n36\n36\n30\n36\n35\n32\n30\n23\n28\n34\n34\n48\n40\n36\n34\n35\n37\n27\n35\n29\n27\n43\n36\n24\n28\n\n\nS.E. type\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\n\n\nR2\n0.850\n0.691\n0.578\n0.745\n0.939\n0.644\n0.792\n0.776\n0.919\n0.797\n0.727\n0.822\n0.924\n0.865\n0.711\n0.808\n0.651\n0.819\n0.746\n0.731\n0.880\n0.868\n0.796\n0.648\n0.915\n0.820\n0.837\n0.789\n0.688\n0.883\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)\n\n\n\n\n\n\n\n \n\n\nBesides OLS, feols() also supports IV estimation via three part formulas:\n\nfit = pf.feols(\"Y ~ X2 | f1 + f2 | X1 ~ Z1\", data)\nfit.tidy()\n\n\n\n\n\n\n\n\nEstimate\nStd. Error\nt value\nPr(>|t|)\n2.5%\n97.5%\n\n\nCoefficient\n\n\n\n\n\n\n\n\n\n\nX1\n-1.050097\n0.085493\n-12.282912\n5.133671e-13\n-1.224949\n-0.875245\n\n\nX2\n-0.174351\n0.014779\n-11.797039\n1.369793e-12\n-0.204578\n-0.144124\n\n\n\n\n\n\n\nHere, X1 is the endogenous variable and Z1 is the instrument. f1 and f2 are the fixed effects, as before. To estimate IV models without fixed effects, simply omit the fixed effects part of the formula:\n\nfit = pf.feols(\"Y ~ X2 | X1 ~ Z1\", data)\nfit.tidy()\n\n\n\n\n\n\n\n\nEstimate\nStd. Error\nt value\nPr(>|t|)\n2.5%\n97.5%\n\n\nCoefficient\n\n\n\n\n\n\n\n\n\n\nIntercept\n0.861939\n0.151187\n5.701137\n1.567858e-08\n0.565257\n1.158622\n\n\nX1\n-0.967238\n0.130078\n-7.435847\n2.238210e-13\n-1.222497\n-0.711980\n\n\nX2\n-0.176416\n0.021769\n-8.104001\n1.554312e-15\n-0.219134\n-0.133697\n\n\n\n\n\n\n\nLast, feols() supports interaction of variables via the i() syntax. Documentation on this is tba.\nAfter fitting a model via feols(), you can use the predict() method to get the predicted values:\n\nfit = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data)\nfit.predict()[0:5]\n\narray([ 3.0633663 , -0.69574133, -0.91240433, -0.46370257, -1.67331154])\n\n\nThe predict() method also supports a newdata argument to predict on new data, which returns a numpy array of the predicted values:\n\nfit = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data)\nfit.predict(newdata=data)[0:5]\n\narray([ 2.14598761, nan, nan, 3.06336415, -0.69574276])\n\n\nLast, you can plot the results of a model via the coefplot() method:\n\nfit = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data)\nfit.coefplot()\n\n \n \n\n\nObjects of type Feols support a range of other methods to conduct inference. For example, you can run a wild (cluster) bootstrap via the wildboottest() method:\n\nfit.wildboottest(param = \"X1\", reps=1000)\n\nparam X1\nt value -14.70814685400939\nPr(>|t|) 0.0\nbootstrap_type 11\ninference CRV(f1)\nimpose_null True\ndtype: object\n\n\nwould run a wild bootstrap test for the coefficient of X1 with 1000 bootstrap repetitions.\nFor a wild cluster bootstrap, you can specify the cluster variable via the cluster argument:\n\nfit.wildboottest(param = \"X1\", reps=1000, cluster=\"group_id\")\n\nparam X1\nt value -13.658130940490494\nPr(>|t|) 0.0\nbootstrap_type 11\ninference CRV(group_id)\nimpose_null True\ndtype: object\n\n\nThe ritest() method can be used to conduct randomization inference:\n\nfit.ritest(resampvar = \"X1\", reps=1000)\n\nH0 X1=0\nri-type randomization-c\nEstimate -0.9240461507764967\nPr(>|t|) 0.0\nStd. Error (Pr(>|t|)) 0.0\n2.5% (Pr(>|t|)) 0.0\n97.5% (Pr(>|t|)) 0.0\ndtype: object\n\n\nLast, you can compute the cluster causal variance estimator by Athey et al by using the ccv() method:\n\nimport numpy as np\nrng = np.random.default_rng(1234)\ndata[\"D\"] = rng.choice([0, 1], size = data.shape[0])\nfit_D = pf.feols(\"Y ~ D\", data = data)\nfit_D.ccv(treatment = \"D\", cluster = \"group_id\")\n\n/home/runner/work/pyfixest/pyfixest/pyfixest/estimation/feols_.py:1384: UserWarning: The initial model was not clustered. CRV1 inference is computed and stored in the model object.\n warnings.warn(\n\n\n\n\n\n\n\n\n\nEstimate\nStd. Error\nt value\nPr(>|t|)\n2.5%\n97.5%\n\n\n\n\nCCV\n0.016087657906364183\n0.242455\n0.066353\n0.947828\n-0.493292\n0.525467\n\n\nCRV1\n0.016088\n0.13378\n0.120254\n0.905614\n-0.264974\n0.29715",
"crumbs": [
"Function Reference",
"Estimation Functions",
@@ -546,7 +546,7 @@
"href": "replicating-the-effect.html#chapter-4-describing-relationships",
"title": "Replicating Examples from “The Effect”",
"section": "Chapter 4: Describing Relationships",
- "text": "Chapter 4: Describing Relationships\n\n# Read in data\ndt = Mroz.load_pandas().data\n# Keep just working women\ndt = dt.query(\"lfp\")\n# Create unlogged earnings\ndt.loc[:, \"earn\"] = dt[\"lwg\"].apply(\"exp\")\n\n# 5. Run multiple linear regression models by succesively adding controls\nfit = pf.feols(fml=\"lwg ~ csw(inc, wc, k5)\", data=dt, vcov=\"iid\")\npf.etable(fit)\n\n/tmp/ipykernel_4055/786816010.py:6: SettingWithCopyWarning: \nA value is trying to be set on a copy of a slice from a DataFrame.\nTry using .loc[row_indexer,col_indexer] = value instead\n\nSee the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n dt.loc[:, \"earn\"] = dt[\"lwg\"].apply(\"exp\")\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nlwg\n\n\n(1)\n(2)\n(3)\n\n\n\n\ncoef\n\n\ninc\n0.010**\n(0.003)\n0.005\n(0.003)\n0.005\n(0.003)\n\n\nwc\n\n0.342***\n(0.075)\n0.349***\n(0.075)\n\n\nk5\n\n\n-0.072\n(0.087)\n\n\nIntercept\n1.007***\n(0.071)\n0.972***\n(0.070)\n0.982***\n(0.071)\n\n\nstats\n\n\nObservations\n428\n428\n428\n\n\nS.E. type\niid\niid\niid\n\n\nR2\n0.020\n0.066\n0.068\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)"
+ "text": "Chapter 4: Describing Relationships\n\n# Read in data\ndt = Mroz.load_pandas().data\n# Keep just working women\ndt = dt.query(\"lfp\")\n# Create unlogged earnings\ndt.loc[:, \"earn\"] = dt[\"lwg\"].apply(\"exp\")\n\n# 5. Run multiple linear regression models by succesively adding controls\nfit = pf.feols(fml=\"lwg ~ csw(inc, wc, k5)\", data=dt, vcov=\"iid\")\npf.etable(fit)\n\n/tmp/ipykernel_4227/786816010.py:6: SettingWithCopyWarning: \nA value is trying to be set on a copy of a slice from a DataFrame.\nTry using .loc[row_indexer,col_indexer] = value instead\n\nSee the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n dt.loc[:, \"earn\"] = dt[\"lwg\"].apply(\"exp\")\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nlwg\n\n\n(1)\n(2)\n(3)\n\n\n\n\ncoef\n\n\ninc\n0.010**\n(0.003)\n0.005\n(0.003)\n0.005\n(0.003)\n\n\nwc\n\n0.342***\n(0.075)\n0.349***\n(0.075)\n\n\nk5\n\n\n-0.072\n(0.087)\n\n\nIntercept\n1.007***\n(0.071)\n0.972***\n(0.070)\n0.982***\n(0.071)\n\n\nstats\n\n\nObservations\n428\n428\n428\n\n\nS.E. type\niid\niid\niid\n\n\nR2\n0.020\n0.066\n0.068\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)"
},
{
"objectID": "replicating-the-effect.html#chapter-13-regression",
@@ -623,7 +623,7 @@
"href": "difference-in-differences.html#setup",
"title": "Difference-in-Differences Estimation",
"section": "Setup",
- "text": "Setup\n\nfrom importlib import resources\n\nimport pandas as pd\n\nimport pyfixest as pf\nfrom pyfixest.report.utils import rename_event_study_coefs\nfrom pyfixest.utils.dgps import get_sharkfin\n\n%load_ext watermark\n%watermark --iversions\n%load_ext autoreload\n%autoreload 2\n\n\n \n \n \n\n\n\n \n \n \n\n\npyfixest: 0.25.3\npandas : 2.2.3\n\n\n\n\n# one-shot adoption data - parallel trends is true\ndf_one_cohort = get_sharkfin()\ndf_one_cohort.head()\n\n\n\n\n\n\n\n\nunit\nyear\ntreat\nY\never_treated\n\n\n\n\n0\n0\n0\n0\n1.629307\n0\n\n\n1\n0\n1\n0\n0.825902\n0\n\n\n2\n0\n2\n0\n0.208988\n0\n\n\n3\n0\n3\n0\n-0.244739\n0\n\n\n4\n0\n4\n0\n0.804665\n0\n\n\n\n\n\n\n\n\n# multi-cohort adoption data\ndf_multi_cohort = pd.read_csv(\n resources.files(\"pyfixest.did.data\").joinpath(\"df_het.csv\")\n)\ndf_multi_cohort.head()\n\n\n\n\n\n\n\n\nunit\nstate\ngroup\nunit_fe\ng\nyear\nyear_fe\ntreat\nrel_year\nrel_year_binned\nerror\nte\nte_dynamic\ndep_var\n\n\n\n\n0\n1\n33\nGroup 2\n7.043016\n2010\n1990\n0.066159\nFalse\n-20.0\n-6\n-0.086466\n0\n0.0\n7.022709\n\n\n1\n1\n33\nGroup 2\n7.043016\n2010\n1991\n-0.030980\nFalse\n-19.0\n-6\n0.766593\n0\n0.0\n7.778628\n\n\n2\n1\n33\nGroup 2\n7.043016\n2010\n1992\n-0.119607\nFalse\n-18.0\n-6\n1.512968\n0\n0.0\n8.436377\n\n\n3\n1\n33\nGroup 2\n7.043016\n2010\n1993\n0.126321\nFalse\n-17.0\n-6\n0.021870\n0\n0.0\n7.191207\n\n\n4\n1\n33\nGroup 2\n7.043016\n2010\n1994\n-0.106921\nFalse\n-16.0\n-6\n-0.017603\n0\n0.0\n6.918492"
+ "text": "Setup\n\nfrom importlib import resources\n\nimport pandas as pd\n\nimport pyfixest as pf\nfrom pyfixest.report.utils import rename_event_study_coefs\nfrom pyfixest.utils.dgps import get_sharkfin\n\n%load_ext watermark\n%watermark --iversions\n%load_ext autoreload\n%autoreload 2\n\n\n \n \n \n\n\n\n \n \n \n\n\npandas : 2.2.3\npyfixest: 0.25.3\n\n\n\n\n# one-shot adoption data - parallel trends is true\ndf_one_cohort = get_sharkfin()\ndf_one_cohort.head()\n\n\n\n\n\n\n\n\nunit\nyear\ntreat\nY\never_treated\n\n\n\n\n0\n0\n0\n0\n1.629307\n0\n\n\n1\n0\n1\n0\n0.825902\n0\n\n\n2\n0\n2\n0\n0.208988\n0\n\n\n3\n0\n3\n0\n-0.244739\n0\n\n\n4\n0\n4\n0\n0.804665\n0\n\n\n\n\n\n\n\n\n# multi-cohort adoption data\ndf_multi_cohort = pd.read_csv(\n resources.files(\"pyfixest.did.data\").joinpath(\"df_het.csv\")\n)\ndf_multi_cohort.head()\n\n\n\n\n\n\n\n\nunit\nstate\ngroup\nunit_fe\ng\nyear\nyear_fe\ntreat\nrel_year\nrel_year_binned\nerror\nte\nte_dynamic\ndep_var\n\n\n\n\n0\n1\n33\nGroup 2\n7.043016\n2010\n1990\n0.066159\nFalse\n-20.0\n-6\n-0.086466\n0\n0.0\n7.022709\n\n\n1\n1\n33\nGroup 2\n7.043016\n2010\n1991\n-0.030980\nFalse\n-19.0\n-6\n0.766593\n0\n0.0\n7.778628\n\n\n2\n1\n33\nGroup 2\n7.043016\n2010\n1992\n-0.119607\nFalse\n-18.0\n-6\n1.512968\n0\n0.0\n8.436377\n\n\n3\n1\n33\nGroup 2\n7.043016\n2010\n1993\n0.126321\nFalse\n-17.0\n-6\n0.021870\n0\n0.0\n7.191207\n\n\n4\n1\n33\nGroup 2\n7.043016\n2010\n1994\n-0.106921\nFalse\n-16.0\n-6\n-0.017603\n0\n0.0\n6.918492"
},
{
"objectID": "difference-in-differences.html#examining-treatment-timing",
@@ -665,7 +665,7 @@
"href": "quickstart.html",
"title": "Getting Started with PyFixest",
"section": "",
- "text": "A fixed effect model is a statistical model that includes fixed effects, which are parameters that are estimated to be constant across different groups.\nExample [Panel Data]: In the context of panel data, fixed effects are parameters that are constant across different individuals or time. The typical model example is given by the following equation:\n\\[\nY_{it} = \\beta X_{it} + \\alpha_i + \\psi_t + \\varepsilon_{it}\n\\]\nwhere \\(Y_{it}\\) is the dependent variable for individual \\(i\\) at time \\(t\\), \\(X_{it}\\) is the independent variable, \\(\\beta\\) is the coefficient of the independent variable, \\(\\alpha_i\\) is the individual fixed effect, \\(\\psi_t\\) is the time fixed effect, and \\(\\varepsilon_{it}\\) is the error term. The individual fixed effect \\(\\alpha_i\\) is a parameter that is constant across time for each individual, while the time fixed effect \\(\\psi_t\\) is a parameter that is constant across individuals for each time period.\nNote however that, despite the fact that fixed effects are commonly used in panel setting, one does not need a panel data set to work with fixed effects. For example, cluster randomized trials with cluster fixed effects, or wage regressions with worker and firm fixed effects.\nIn this “quick start” guide, we will show you how to estimate a fixed effect model using the PyFixest package. We do not go into the details of the theory behind fixed effect models, but we focus on how to estimate them using PyFixest.\n\n\n\nIn a first step, we load the module and some synthetic example data:\n\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport pandas as pd\nfrom lets_plot import LetsPlot\n\nimport pyfixest as pf\n\nLetsPlot.setup_html()\n\nplt.style.use(\"seaborn-v0_8\")\n\n%load_ext watermark\n%config InlineBackend.figure_format = \"retina\"\n%watermark --iversions\n\ndata = pf.get_data()\n\ndata.head()\n\n\n \n \n \n\n\n\n \n \n \n\n\n\n \n \n \n\n\nnumpy : 1.26.4\npyfixest : 0.25.3\npandas : 2.2.3\nmatplotlib: 3.9.2\n\n\n\n\n\n\n\n\n\n\nY\nY2\nX1\nX2\nf1\nf2\nf3\ngroup_id\nZ1\nZ2\nweights\n\n\n\n\n0\nNaN\n2.357103\n0.0\n0.457858\n15.0\n0.0\n7.0\n9.0\n-0.330607\n1.054826\n0.661478\n\n\n1\n-1.458643\n5.163147\nNaN\n-4.998406\n6.0\n21.0\n4.0\n8.0\nNaN\n-4.113690\n0.772732\n\n\n2\n0.169132\n0.751140\n2.0\n1.558480\nNaN\n1.0\n7.0\n16.0\n1.207778\n0.465282\n0.990929\n\n\n3\n3.319513\n-2.656368\n1.0\n1.560402\n1.0\n10.0\n11.0\n3.0\n2.869997\n0.467570\n0.021123\n\n\n4\n0.134420\n-1.866416\n2.0\n-3.472232\n19.0\n20.0\n6.0\n14.0\n0.835819\n-3.115669\n0.790815\n\n\n\n\n\n\n\n\ndata.info()\n\n<class 'pandas.core.frame.DataFrame'>\nRangeIndex: 1000 entries, 0 to 999\nData columns (total 11 columns):\n # Column Non-Null Count Dtype \n--- ------ -------------- ----- \n 0 Y 999 non-null float64\n 1 Y2 1000 non-null float64\n 2 X1 999 non-null float64\n 3 X2 1000 non-null float64\n 4 f1 999 non-null float64\n 5 f2 1000 non-null float64\n 6 f3 1000 non-null float64\n 7 group_id 1000 non-null float64\n 8 Z1 999 non-null float64\n 9 Z2 1000 non-null float64\n 10 weights 1000 non-null float64\ndtypes: float64(11)\nmemory usage: 86.1 KB\n\n\nWe see that some of our columns have missing data.\n\n\n\nWe are interested in the relation between the dependent variable Y and the independent variables X1 using a fixed effect model for group_id. Let’s see how the data looks like:\n\nax = data.plot(kind=\"scatter\", x=\"X1\", y=\"Y\", c=\"group_id\", colormap=\"viridis\")\n\n\n\n\n\n\n\n\nWe can estimate a fixed effects regression via the feols() function. feols() has three arguments: a two-sided model formula, the data, and optionally, the type of inference.\n\nfit = pf.feols(fml=\"Y ~ X1 | group_id\", data=data, vcov=\"HC1\")\ntype(fit)\n\npyfixest.estimation.feols_.Feols\n\n\nThe first part of the formula contains the dependent variable and “regular” covariates, while the second part contains fixed effects.\nfeols() returns an instance of the Fixest class.\n\n\n\nTo inspect the results, we can use a summary function or method:\n\nfit.summary()\n\n###\n\nEstimation: OLS\nDep. var.: Y, Fixed effects: group_id\nInference: HC1\nObservations: 998\n\n| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |\n|:--------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|\n| X1 | -1.019 | 0.082 | -12.352 | 0.000 | -1.181 | -0.857 |\n---\nRMSE: 2.141 R2: 0.137 R2 Within: 0.126 \n\n\nOr display a formatted regression table:\n\npf.etable(fit)\n\n\n\n\n\n\n\n\n\n\n\n\nY\n\n\n(1)\n\n\n\n\ncoef\n\n\nX1\n-1.019***\n(0.082)\n\n\nfe\n\n\ngroup_id\nx\n\n\nstats\n\n\nObservations\n998\n\n\nS.E. type\nhetero\n\n\nR2\n0.137\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)\n\n\n\n\n\n\n\n \n\n\nAlternatively, the .summarize module contains a summary function, which can be applied on instances of regression model objects or lists of regression model objects. For details on how to customize etable(), please take a look at the dedicated vignette.\n\npf.summary(fit)\n\n###\n\nEstimation: OLS\nDep. var.: Y, Fixed effects: group_id\nInference: HC1\nObservations: 998\n\n| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |\n|:--------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|\n| X1 | -1.019 | 0.082 | -12.352 | 0.000 | -1.181 | -0.857 |\n---\nRMSE: 2.141 R2: 0.137 R2 Within: 0.126 \n\n\nYou can access individual elements of the summary via dedicated methods: .tidy() returns a “tidy” pd.DataFrame, .coef() returns estimated parameters, and se() estimated standard errors. Other methods include pvalue(), confint() and tstat().\n\nfit.tidy()\n\n\n\n\n\n\n\n\nEstimate\nStd. Error\nt value\nPr(>|t|)\n2.5%\n97.5%\n\n\nCoefficient\n\n\n\n\n\n\n\n\n\n\nX1\n-1.019009\n0.082498\n-12.351897\n0.0\n-1.180898\n-0.857119\n\n\n\n\n\n\n\n\nfit.coef()\n\nCoefficient\nX1 -1.019009\nName: Estimate, dtype: float64\n\n\n\nfit.se()\n\nCoefficient\nX1 0.082498\nName: Std. Error, dtype: float64\n\n\n\nfit.tstat()\n\nCoefficient\nX1 -12.351897\nName: t value, dtype: float64\n\n\n\nfit.confint()\n\n\n\n\n\n\n\n\n2.5%\n97.5%\n\n\n\n\nX1\n-1.180898\n-0.857119\n\n\n\n\n\n\n\nLast, model results can be visualized via dedicated methods for plotting:\n\nfit.coefplot()\n# or pf.coefplot([fit])\n\n \n \n\n\n\n\n\nLet’s have a quick d-tour on the intuition behind fixed effects models using the example above. To do so, let us begin by comparing it with a simple OLS model.\n\nfit_simple = pf.feols(\"Y ~ X1\", data=data, vcov=\"HC1\")\n\nfit_simple.summary()\n\n###\n\nEstimation: OLS\nDep. var.: Y, Fixed effects: 0\nInference: HC1\nObservations: 998\n\n| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |\n|:--------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|\n| Intercept | 0.919 | 0.112 | 8.223 | 0.000 | 0.699 | 1.138 |\n| X1 | -1.000 | 0.082 | -12.134 | 0.000 | -1.162 | -0.838 |\n---\nRMSE: 2.158 R2: 0.123 \n\n\nWe can compare both models side by side in a regression table:\n\npf.etable([fit, fit_simple])\n\n\n\n\n\n\n\n\n\n\n\n\n\nY\n\n\n(1)\n(2)\n\n\n\n\ncoef\n\n\nX1\n-1.019***\n(0.082)\n-1.000***\n(0.082)\n\n\nIntercept\n\n0.919***\n(0.112)\n\n\nfe\n\n\ngroup_id\nx\n-\n\n\nstats\n\n\nObservations\n998\n998\n\n\nS.E. type\nhetero\nhetero\n\n\nR2\n0.137\n0.123\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)\n\n\n\n\n\n\n\n \n\n\nWe see that the X1 coefficient is -1.019, which is less than the value from the OLS model in column (2). Where is the difference coming from? Well, in the fixed effect model we are interested in controlling for the feature group_id. One possibility to do this is by adding a simple dummy variable for each level of group_id.\n\nfit_dummy = pf.feols(\"Y ~ X1 + C(group_id) \", data=data, vcov=\"HC1\")\n\nfit_dummy.summary()\n\n###\n\nEstimation: OLS\nDep. var.: Y, Fixed effects: 0\nInference: HC1\nObservations: 998\n\n| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |\n|:--------------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|\n| Intercept | 0.760 | 0.288 | 2.640 | 0.008 | 0.195 | 1.326 |\n| X1 | -1.019 | 0.083 | -12.234 | 0.000 | -1.182 | -0.856 |\n| C(group_id)[T.1.0] | 0.380 | 0.451 | 0.844 | 0.399 | -0.504 | 1.264 |\n| C(group_id)[T.2.0] | 0.084 | 0.389 | 0.216 | 0.829 | -0.680 | 0.848 |\n| C(group_id)[T.3.0] | 0.790 | 0.415 | 1.904 | 0.057 | -0.024 | 1.604 |\n| C(group_id)[T.4.0] | -0.189 | 0.388 | -0.487 | 0.626 | -0.950 | 0.572 |\n| C(group_id)[T.5.0] | 0.537 | 0.388 | 1.385 | 0.166 | -0.224 | 1.297 |\n| C(group_id)[T.6.0] | 0.307 | 0.398 | 0.771 | 0.441 | -0.474 | 1.087 |\n| C(group_id)[T.7.0] | 0.015 | 0.422 | 0.035 | 0.972 | -0.814 | 0.844 |\n| C(group_id)[T.8.0] | 0.382 | 0.406 | 0.941 | 0.347 | -0.415 | 1.179 |\n| C(group_id)[T.9.0] | 0.219 | 0.417 | 0.526 | 0.599 | -0.599 | 1.037 |\n| C(group_id)[T.10.0] | -0.363 | 0.422 | -0.861 | 0.390 | -1.191 | 0.465 |\n| C(group_id)[T.11.0] | 0.201 | 0.387 | 0.520 | 0.603 | -0.559 | 0.961 |\n| C(group_id)[T.12.0] | -0.110 | 0.410 | -0.268 | 0.788 | -0.915 | 0.694 |\n| C(group_id)[T.13.0] | 0.126 | 0.440 | 0.287 | 0.774 | -0.736 | 0.989 |\n| C(group_id)[T.14.0] | 0.353 | 0.416 | 0.848 | 0.397 | -0.464 | 1.170 |\n| C(group_id)[T.15.0] | 0.469 | 0.398 | 1.179 | 0.239 | -0.312 | 1.249 |\n| C(group_id)[T.16.0] | -0.135 | 0.396 | -0.340 | 0.734 | -0.913 | 0.643 |\n| C(group_id)[T.17.0] | -0.005 | 0.401 | -0.013 | 0.989 | -0.792 | 0.781 |\n| C(group_id)[T.18.0] | 0.283 | 0.403 | 0.702 | 0.483 | -0.508 | 1.074 |\n---\nRMSE: 2.141 R2: 0.137 \n\n\nThis is does not scale well! Imagine you have 1000 different levels of group_id. You would need to add 1000 dummy variables to your model. This is where fixed effect models come in handy. They allow you to control for these fixed effects without adding all these dummy variables. The way to do it is by a demeaning procedure. The idea is to subtract the average value of each level of group_id from the respective observations. This way, we control for the fixed effects without adding all these dummy variables. Let’s try to do this manually:\n\ndef _demean_column(df: pd.DataFrame, column: str, by: str) -> pd.Series:\n return df[column] - df.groupby(by)[column].transform(\"mean\")\n\n\nfit_demeaned = pf.feols(\n fml=\"Y_demeaned ~ X1_demeaned\",\n data=data.assign(\n Y_demeaned=lambda df: _demean_column(df, \"Y\", \"group_id\"),\n X1_demeaned=lambda df: _demean_column(df, \"X1\", \"group_id\"),\n ),\n vcov=\"HC1\",\n)\n\nfit_demeaned.summary()\n\n###\n\nEstimation: OLS\nDep. var.: Y_demeaned, Fixed effects: 0\nInference: HC1\nObservations: 998\n\n| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |\n|:--------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|\n| Intercept | 0.003 | 0.068 | 0.041 | 0.968 | -0.130 | 0.136 |\n| X1_demeaned | -1.019 | 0.083 | -12.345 | 0.000 | -1.181 | -0.857 |\n---\nRMSE: 2.141 R2: 0.126 \n\n\nWe get the same results as the fixed effect model Y1 ~ X | group_id above. The PyFixest package uses a more efficient algorithm to estimate the fixed effect model, but the intuition is the same.\n\n\n\nYou can update the coefficients of a model object via the update() method, which may be useful in an online learning setting where data arrives sequentially.\nTo see this in action, let us first fit a model on a subset of the data:\n\ndata_subsample = data.sample(frac=0.5)\nm = pf.feols(\"Y ~ X1 + X2\", data=data_subsample)\n# current coefficient vector\nm._beta_hat\n\narray([ 0.76200339, -0.95890348, -0.19108466])\n\n\nThen sample 5 new observations and update the model with the new data. The update rule is\n\\[\n\\hat{\\beta}_{n+1} = \\hat{\\beta}_n + (X_{n+1}' X_{n+1})^{-1} x_{n+1} + (y_{n+1} - x_{n+1} \\hat{\\beta}_n)\n\\]\nfor a new observation \\((x_{n+1}, y_{n+1})\\).\n\nnew_points_id = np.random.choice(list(set(data.index) - set(data_subsample.index)), 5)\nX_new, y_new = (\n np.c_[np.ones(len(new_points_id)), data.loc[new_points_id][[\"X1\", \"X2\"]].values],\n data.loc[new_points_id][\"Y\"].values,\n)\nm.update(X_new, y_new)\n\narray([ 0.78334343, -0.96579542, -0.19535336])\n\n\nWe verify that we get the same results if we had estimated the model on the appended data.\n\npf.feols(\n \"Y ~ X1 + X2\", data=data.loc[data_subsample.index.append(pd.Index(new_points_id))]\n).coef().values\n\narray([ 0.78334343, -0.96579542, -0.19535336])"
+ "text": "A fixed effect model is a statistical model that includes fixed effects, which are parameters that are estimated to be constant across different groups.\nExample [Panel Data]: In the context of panel data, fixed effects are parameters that are constant across different individuals or time. The typical model example is given by the following equation:\n\\[\nY_{it} = \\beta X_{it} + \\alpha_i + \\psi_t + \\varepsilon_{it}\n\\]\nwhere \\(Y_{it}\\) is the dependent variable for individual \\(i\\) at time \\(t\\), \\(X_{it}\\) is the independent variable, \\(\\beta\\) is the coefficient of the independent variable, \\(\\alpha_i\\) is the individual fixed effect, \\(\\psi_t\\) is the time fixed effect, and \\(\\varepsilon_{it}\\) is the error term. The individual fixed effect \\(\\alpha_i\\) is a parameter that is constant across time for each individual, while the time fixed effect \\(\\psi_t\\) is a parameter that is constant across individuals for each time period.\nNote however that, despite the fact that fixed effects are commonly used in panel setting, one does not need a panel data set to work with fixed effects. For example, cluster randomized trials with cluster fixed effects, or wage regressions with worker and firm fixed effects.\nIn this “quick start” guide, we will show you how to estimate a fixed effect model using the PyFixest package. We do not go into the details of the theory behind fixed effect models, but we focus on how to estimate them using PyFixest.\n\n\n\nIn a first step, we load the module and some synthetic example data:\n\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport pandas as pd\nfrom lets_plot import LetsPlot\n\nimport pyfixest as pf\n\nLetsPlot.setup_html()\n\nplt.style.use(\"seaborn-v0_8\")\n\n%load_ext watermark\n%config InlineBackend.figure_format = \"retina\"\n%watermark --iversions\n\ndata = pf.get_data()\n\ndata.head()\n\n\n \n \n \n\n\n\n \n \n \n\n\n\n \n \n \n\n\npandas : 2.2.3\nnumpy : 1.26.4\npyfixest : 0.25.3\nmatplotlib: 3.9.2\n\n\n\n\n\n\n\n\n\n\nY\nY2\nX1\nX2\nf1\nf2\nf3\ngroup_id\nZ1\nZ2\nweights\n\n\n\n\n0\nNaN\n2.357103\n0.0\n0.457858\n15.0\n0.0\n7.0\n9.0\n-0.330607\n1.054826\n0.661478\n\n\n1\n-1.458643\n5.163147\nNaN\n-4.998406\n6.0\n21.0\n4.0\n8.0\nNaN\n-4.113690\n0.772732\n\n\n2\n0.169132\n0.751140\n2.0\n1.558480\nNaN\n1.0\n7.0\n16.0\n1.207778\n0.465282\n0.990929\n\n\n3\n3.319513\n-2.656368\n1.0\n1.560402\n1.0\n10.0\n11.0\n3.0\n2.869997\n0.467570\n0.021123\n\n\n4\n0.134420\n-1.866416\n2.0\n-3.472232\n19.0\n20.0\n6.0\n14.0\n0.835819\n-3.115669\n0.790815\n\n\n\n\n\n\n\n\ndata.info()\n\n<class 'pandas.core.frame.DataFrame'>\nRangeIndex: 1000 entries, 0 to 999\nData columns (total 11 columns):\n # Column Non-Null Count Dtype \n--- ------ -------------- ----- \n 0 Y 999 non-null float64\n 1 Y2 1000 non-null float64\n 2 X1 999 non-null float64\n 3 X2 1000 non-null float64\n 4 f1 999 non-null float64\n 5 f2 1000 non-null float64\n 6 f3 1000 non-null float64\n 7 group_id 1000 non-null float64\n 8 Z1 999 non-null float64\n 9 Z2 1000 non-null float64\n 10 weights 1000 non-null float64\ndtypes: float64(11)\nmemory usage: 86.1 KB\n\n\nWe see that some of our columns have missing data.\n\n\n\nWe are interested in the relation between the dependent variable Y and the independent variables X1 using a fixed effect model for group_id. Let’s see how the data looks like:\n\nax = data.plot(kind=\"scatter\", x=\"X1\", y=\"Y\", c=\"group_id\", colormap=\"viridis\")\n\n\n\n\n\n\n\n\nWe can estimate a fixed effects regression via the feols() function. feols() has three arguments: a two-sided model formula, the data, and optionally, the type of inference.\n\nfit = pf.feols(fml=\"Y ~ X1 | group_id\", data=data, vcov=\"HC1\")\ntype(fit)\n\npyfixest.estimation.feols_.Feols\n\n\nThe first part of the formula contains the dependent variable and “regular” covariates, while the second part contains fixed effects.\nfeols() returns an instance of the Fixest class.\n\n\n\nTo inspect the results, we can use a summary function or method:\n\nfit.summary()\n\n###\n\nEstimation: OLS\nDep. var.: Y, Fixed effects: group_id\nInference: HC1\nObservations: 998\n\n| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |\n|:--------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|\n| X1 | -1.019 | 0.082 | -12.352 | 0.000 | -1.181 | -0.857 |\n---\nRMSE: 2.141 R2: 0.137 R2 Within: 0.126 \n\n\nOr display a formatted regression table:\n\npf.etable(fit)\n\n\n\n\n\n\n\n\n\n\n\n\nY\n\n\n(1)\n\n\n\n\ncoef\n\n\nX1\n-1.019***\n(0.082)\n\n\nfe\n\n\ngroup_id\nx\n\n\nstats\n\n\nObservations\n998\n\n\nS.E. type\nhetero\n\n\nR2\n0.137\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)\n\n\n\n\n\n\n\n \n\n\nAlternatively, the .summarize module contains a summary function, which can be applied on instances of regression model objects or lists of regression model objects. For details on how to customize etable(), please take a look at the dedicated vignette.\n\npf.summary(fit)\n\n###\n\nEstimation: OLS\nDep. var.: Y, Fixed effects: group_id\nInference: HC1\nObservations: 998\n\n| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |\n|:--------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|\n| X1 | -1.019 | 0.082 | -12.352 | 0.000 | -1.181 | -0.857 |\n---\nRMSE: 2.141 R2: 0.137 R2 Within: 0.126 \n\n\nYou can access individual elements of the summary via dedicated methods: .tidy() returns a “tidy” pd.DataFrame, .coef() returns estimated parameters, and se() estimated standard errors. Other methods include pvalue(), confint() and tstat().\n\nfit.tidy()\n\n\n\n\n\n\n\n\nEstimate\nStd. Error\nt value\nPr(>|t|)\n2.5%\n97.5%\n\n\nCoefficient\n\n\n\n\n\n\n\n\n\n\nX1\n-1.019009\n0.082498\n-12.351897\n0.0\n-1.180898\n-0.857119\n\n\n\n\n\n\n\n\nfit.coef()\n\nCoefficient\nX1 -1.019009\nName: Estimate, dtype: float64\n\n\n\nfit.se()\n\nCoefficient\nX1 0.082498\nName: Std. Error, dtype: float64\n\n\n\nfit.tstat()\n\nCoefficient\nX1 -12.351897\nName: t value, dtype: float64\n\n\n\nfit.confint()\n\n\n\n\n\n\n\n\n2.5%\n97.5%\n\n\n\n\nX1\n-1.180898\n-0.857119\n\n\n\n\n\n\n\nLast, model results can be visualized via dedicated methods for plotting:\n\nfit.coefplot()\n# or pf.coefplot([fit])\n\n \n \n\n\n\n\n\nLet’s have a quick d-tour on the intuition behind fixed effects models using the example above. To do so, let us begin by comparing it with a simple OLS model.\n\nfit_simple = pf.feols(\"Y ~ X1\", data=data, vcov=\"HC1\")\n\nfit_simple.summary()\n\n###\n\nEstimation: OLS\nDep. var.: Y, Fixed effects: 0\nInference: HC1\nObservations: 998\n\n| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |\n|:--------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|\n| Intercept | 0.919 | 0.112 | 8.223 | 0.000 | 0.699 | 1.138 |\n| X1 | -1.000 | 0.082 | -12.134 | 0.000 | -1.162 | -0.838 |\n---\nRMSE: 2.158 R2: 0.123 \n\n\nWe can compare both models side by side in a regression table:\n\npf.etable([fit, fit_simple])\n\n\n\n\n\n\n\n\n\n\n\n\n\nY\n\n\n(1)\n(2)\n\n\n\n\ncoef\n\n\nX1\n-1.019***\n(0.082)\n-1.000***\n(0.082)\n\n\nIntercept\n\n0.919***\n(0.112)\n\n\nfe\n\n\ngroup_id\nx\n-\n\n\nstats\n\n\nObservations\n998\n998\n\n\nS.E. type\nhetero\nhetero\n\n\nR2\n0.137\n0.123\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)\n\n\n\n\n\n\n\n \n\n\nWe see that the X1 coefficient is -1.019, which is less than the value from the OLS model in column (2). Where is the difference coming from? Well, in the fixed effect model we are interested in controlling for the feature group_id. One possibility to do this is by adding a simple dummy variable for each level of group_id.\n\nfit_dummy = pf.feols(\"Y ~ X1 + C(group_id) \", data=data, vcov=\"HC1\")\n\nfit_dummy.summary()\n\n###\n\nEstimation: OLS\nDep. var.: Y, Fixed effects: 0\nInference: HC1\nObservations: 998\n\n| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |\n|:--------------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|\n| Intercept | 0.760 | 0.288 | 2.640 | 0.008 | 0.195 | 1.326 |\n| X1 | -1.019 | 0.083 | -12.234 | 0.000 | -1.182 | -0.856 |\n| C(group_id)[T.1.0] | 0.380 | 0.451 | 0.844 | 0.399 | -0.504 | 1.264 |\n| C(group_id)[T.2.0] | 0.084 | 0.389 | 0.216 | 0.829 | -0.680 | 0.848 |\n| C(group_id)[T.3.0] | 0.790 | 0.415 | 1.904 | 0.057 | -0.024 | 1.604 |\n| C(group_id)[T.4.0] | -0.189 | 0.388 | -0.487 | 0.626 | -0.950 | 0.572 |\n| C(group_id)[T.5.0] | 0.537 | 0.388 | 1.385 | 0.166 | -0.224 | 1.297 |\n| C(group_id)[T.6.0] | 0.307 | 0.398 | 0.771 | 0.441 | -0.474 | 1.087 |\n| C(group_id)[T.7.0] | 0.015 | 0.422 | 0.035 | 0.972 | -0.814 | 0.844 |\n| C(group_id)[T.8.0] | 0.382 | 0.406 | 0.941 | 0.347 | -0.415 | 1.179 |\n| C(group_id)[T.9.0] | 0.219 | 0.417 | 0.526 | 0.599 | -0.599 | 1.037 |\n| C(group_id)[T.10.0] | -0.363 | 0.422 | -0.861 | 0.390 | -1.191 | 0.465 |\n| C(group_id)[T.11.0] | 0.201 | 0.387 | 0.520 | 0.603 | -0.559 | 0.961 |\n| C(group_id)[T.12.0] | -0.110 | 0.410 | -0.268 | 0.788 | -0.915 | 0.694 |\n| C(group_id)[T.13.0] | 0.126 | 0.440 | 0.287 | 0.774 | -0.736 | 0.989 |\n| C(group_id)[T.14.0] | 0.353 | 0.416 | 0.848 | 0.397 | -0.464 | 1.170 |\n| C(group_id)[T.15.0] | 0.469 | 0.398 | 1.179 | 0.239 | -0.312 | 1.249 |\n| C(group_id)[T.16.0] | -0.135 | 0.396 | -0.340 | 0.734 | -0.913 | 0.643 |\n| C(group_id)[T.17.0] | -0.005 | 0.401 | -0.013 | 0.989 | -0.792 | 0.781 |\n| C(group_id)[T.18.0] | 0.283 | 0.403 | 0.702 | 0.483 | -0.508 | 1.074 |\n---\nRMSE: 2.141 R2: 0.137 \n\n\nThis is does not scale well! Imagine you have 1000 different levels of group_id. You would need to add 1000 dummy variables to your model. This is where fixed effect models come in handy. They allow you to control for these fixed effects without adding all these dummy variables. The way to do it is by a demeaning procedure. The idea is to subtract the average value of each level of group_id from the respective observations. This way, we control for the fixed effects without adding all these dummy variables. Let’s try to do this manually:\n\ndef _demean_column(df: pd.DataFrame, column: str, by: str) -> pd.Series:\n return df[column] - df.groupby(by)[column].transform(\"mean\")\n\n\nfit_demeaned = pf.feols(\n fml=\"Y_demeaned ~ X1_demeaned\",\n data=data.assign(\n Y_demeaned=lambda df: _demean_column(df, \"Y\", \"group_id\"),\n X1_demeaned=lambda df: _demean_column(df, \"X1\", \"group_id\"),\n ),\n vcov=\"HC1\",\n)\n\nfit_demeaned.summary()\n\n###\n\nEstimation: OLS\nDep. var.: Y_demeaned, Fixed effects: 0\nInference: HC1\nObservations: 998\n\n| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |\n|:--------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|\n| Intercept | 0.003 | 0.068 | 0.041 | 0.968 | -0.130 | 0.136 |\n| X1_demeaned | -1.019 | 0.083 | -12.345 | 0.000 | -1.181 | -0.857 |\n---\nRMSE: 2.141 R2: 0.126 \n\n\nWe get the same results as the fixed effect model Y1 ~ X | group_id above. The PyFixest package uses a more efficient algorithm to estimate the fixed effect model, but the intuition is the same.\n\n\n\nYou can update the coefficients of a model object via the update() method, which may be useful in an online learning setting where data arrives sequentially.\nTo see this in action, let us first fit a model on a subset of the data:\n\ndata_subsample = data.sample(frac=0.5)\nm = pf.feols(\"Y ~ X1 + X2\", data=data_subsample)\n# current coefficient vector\nm._beta_hat\n\narray([ 0.99581185, -1.0423337 , -0.18385767])\n\n\nThen sample 5 new observations and update the model with the new data. The update rule is\n\\[\n\\hat{\\beta}_{n+1} = \\hat{\\beta}_n + (X_{n+1}' X_{n+1})^{-1} x_{n+1} + (y_{n+1} - x_{n+1} \\hat{\\beta}_n)\n\\]\nfor a new observation \\((x_{n+1}, y_{n+1})\\).\n\nnew_points_id = np.random.choice(list(set(data.index) - set(data_subsample.index)), 5)\nX_new, y_new = (\n np.c_[np.ones(len(new_points_id)), data.loc[new_points_id][[\"X1\", \"X2\"]].values],\n data.loc[new_points_id][\"Y\"].values,\n)\nm.update(X_new, y_new)\n\narray([ 0.99469903, -1.04988716, -0.1869025 ])\n\n\nWe verify that we get the same results if we had estimated the model on the appended data.\n\npf.feols(\n \"Y ~ X1 + X2\", data=data.loc[data_subsample.index.append(pd.Index(new_points_id))]\n).coef().values\n\narray([ 0.99469903, -1.04988716, -0.1869025 ])"
},
{
"objectID": "quickstart.html#what-is-a-fixed-effect-model",
@@ -679,7 +679,7 @@
"href": "quickstart.html#read-sample-data",
"title": "Getting Started with PyFixest",
"section": "",
- "text": "In a first step, we load the module and some synthetic example data:\n\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport pandas as pd\nfrom lets_plot import LetsPlot\n\nimport pyfixest as pf\n\nLetsPlot.setup_html()\n\nplt.style.use(\"seaborn-v0_8\")\n\n%load_ext watermark\n%config InlineBackend.figure_format = \"retina\"\n%watermark --iversions\n\ndata = pf.get_data()\n\ndata.head()\n\n\n \n \n \n\n\n\n \n \n \n\n\n\n \n \n \n\n\nnumpy : 1.26.4\npyfixest : 0.25.3\npandas : 2.2.3\nmatplotlib: 3.9.2\n\n\n\n\n\n\n\n\n\n\nY\nY2\nX1\nX2\nf1\nf2\nf3\ngroup_id\nZ1\nZ2\nweights\n\n\n\n\n0\nNaN\n2.357103\n0.0\n0.457858\n15.0\n0.0\n7.0\n9.0\n-0.330607\n1.054826\n0.661478\n\n\n1\n-1.458643\n5.163147\nNaN\n-4.998406\n6.0\n21.0\n4.0\n8.0\nNaN\n-4.113690\n0.772732\n\n\n2\n0.169132\n0.751140\n2.0\n1.558480\nNaN\n1.0\n7.0\n16.0\n1.207778\n0.465282\n0.990929\n\n\n3\n3.319513\n-2.656368\n1.0\n1.560402\n1.0\n10.0\n11.0\n3.0\n2.869997\n0.467570\n0.021123\n\n\n4\n0.134420\n-1.866416\n2.0\n-3.472232\n19.0\n20.0\n6.0\n14.0\n0.835819\n-3.115669\n0.790815\n\n\n\n\n\n\n\n\ndata.info()\n\n<class 'pandas.core.frame.DataFrame'>\nRangeIndex: 1000 entries, 0 to 999\nData columns (total 11 columns):\n # Column Non-Null Count Dtype \n--- ------ -------------- ----- \n 0 Y 999 non-null float64\n 1 Y2 1000 non-null float64\n 2 X1 999 non-null float64\n 3 X2 1000 non-null float64\n 4 f1 999 non-null float64\n 5 f2 1000 non-null float64\n 6 f3 1000 non-null float64\n 7 group_id 1000 non-null float64\n 8 Z1 999 non-null float64\n 9 Z2 1000 non-null float64\n 10 weights 1000 non-null float64\ndtypes: float64(11)\nmemory usage: 86.1 KB\n\n\nWe see that some of our columns have missing data."
+ "text": "In a first step, we load the module and some synthetic example data:\n\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport pandas as pd\nfrom lets_plot import LetsPlot\n\nimport pyfixest as pf\n\nLetsPlot.setup_html()\n\nplt.style.use(\"seaborn-v0_8\")\n\n%load_ext watermark\n%config InlineBackend.figure_format = \"retina\"\n%watermark --iversions\n\ndata = pf.get_data()\n\ndata.head()\n\n\n \n \n \n\n\n\n \n \n \n\n\n\n \n \n \n\n\npandas : 2.2.3\nnumpy : 1.26.4\npyfixest : 0.25.3\nmatplotlib: 3.9.2\n\n\n\n\n\n\n\n\n\n\nY\nY2\nX1\nX2\nf1\nf2\nf3\ngroup_id\nZ1\nZ2\nweights\n\n\n\n\n0\nNaN\n2.357103\n0.0\n0.457858\n15.0\n0.0\n7.0\n9.0\n-0.330607\n1.054826\n0.661478\n\n\n1\n-1.458643\n5.163147\nNaN\n-4.998406\n6.0\n21.0\n4.0\n8.0\nNaN\n-4.113690\n0.772732\n\n\n2\n0.169132\n0.751140\n2.0\n1.558480\nNaN\n1.0\n7.0\n16.0\n1.207778\n0.465282\n0.990929\n\n\n3\n3.319513\n-2.656368\n1.0\n1.560402\n1.0\n10.0\n11.0\n3.0\n2.869997\n0.467570\n0.021123\n\n\n4\n0.134420\n-1.866416\n2.0\n-3.472232\n19.0\n20.0\n6.0\n14.0\n0.835819\n-3.115669\n0.790815\n\n\n\n\n\n\n\n\ndata.info()\n\n<class 'pandas.core.frame.DataFrame'>\nRangeIndex: 1000 entries, 0 to 999\nData columns (total 11 columns):\n # Column Non-Null Count Dtype \n--- ------ -------------- ----- \n 0 Y 999 non-null float64\n 1 Y2 1000 non-null float64\n 2 X1 999 non-null float64\n 3 X2 1000 non-null float64\n 4 f1 999 non-null float64\n 5 f2 1000 non-null float64\n 6 f3 1000 non-null float64\n 7 group_id 1000 non-null float64\n 8 Z1 999 non-null float64\n 9 Z2 1000 non-null float64\n 10 weights 1000 non-null float64\ndtypes: float64(11)\nmemory usage: 86.1 KB\n\n\nWe see that some of our columns have missing data."
},
{
"objectID": "quickstart.html#ols-estimation",
@@ -707,7 +707,7 @@
"href": "quickstart.html#updating-regression-coefficients",
"title": "Getting Started with PyFixest",
"section": "",
- "text": "You can update the coefficients of a model object via the update() method, which may be useful in an online learning setting where data arrives sequentially.\nTo see this in action, let us first fit a model on a subset of the data:\n\ndata_subsample = data.sample(frac=0.5)\nm = pf.feols(\"Y ~ X1 + X2\", data=data_subsample)\n# current coefficient vector\nm._beta_hat\n\narray([ 0.76200339, -0.95890348, -0.19108466])\n\n\nThen sample 5 new observations and update the model with the new data. The update rule is\n\\[\n\\hat{\\beta}_{n+1} = \\hat{\\beta}_n + (X_{n+1}' X_{n+1})^{-1} x_{n+1} + (y_{n+1} - x_{n+1} \\hat{\\beta}_n)\n\\]\nfor a new observation \\((x_{n+1}, y_{n+1})\\).\n\nnew_points_id = np.random.choice(list(set(data.index) - set(data_subsample.index)), 5)\nX_new, y_new = (\n np.c_[np.ones(len(new_points_id)), data.loc[new_points_id][[\"X1\", \"X2\"]].values],\n data.loc[new_points_id][\"Y\"].values,\n)\nm.update(X_new, y_new)\n\narray([ 0.78334343, -0.96579542, -0.19535336])\n\n\nWe verify that we get the same results if we had estimated the model on the appended data.\n\npf.feols(\n \"Y ~ X1 + X2\", data=data.loc[data_subsample.index.append(pd.Index(new_points_id))]\n).coef().values\n\narray([ 0.78334343, -0.96579542, -0.19535336])"
+ "text": "You can update the coefficients of a model object via the update() method, which may be useful in an online learning setting where data arrives sequentially.\nTo see this in action, let us first fit a model on a subset of the data:\n\ndata_subsample = data.sample(frac=0.5)\nm = pf.feols(\"Y ~ X1 + X2\", data=data_subsample)\n# current coefficient vector\nm._beta_hat\n\narray([ 0.99581185, -1.0423337 , -0.18385767])\n\n\nThen sample 5 new observations and update the model with the new data. The update rule is\n\\[\n\\hat{\\beta}_{n+1} = \\hat{\\beta}_n + (X_{n+1}' X_{n+1})^{-1} x_{n+1} + (y_{n+1} - x_{n+1} \\hat{\\beta}_n)\n\\]\nfor a new observation \\((x_{n+1}, y_{n+1})\\).\n\nnew_points_id = np.random.choice(list(set(data.index) - set(data_subsample.index)), 5)\nX_new, y_new = (\n np.c_[np.ones(len(new_points_id)), data.loc[new_points_id][[\"X1\", \"X2\"]].values],\n data.loc[new_points_id][\"Y\"].values,\n)\nm.update(X_new, y_new)\n\narray([ 0.99469903, -1.04988716, -0.1869025 ])\n\n\nWe verify that we get the same results if we had estimated the model on the appended data.\n\npf.feols(\n \"Y ~ X1 + X2\", data=data.loc[data_subsample.index.append(pd.Index(new_points_id))]\n).coef().values\n\narray([ 0.99469903, -1.04988716, -0.1869025 ])"
},
{
"objectID": "quickstart.html#inference-via-the-wild-bootstrap",
@@ -742,7 +742,7 @@
"href": "quickstart.html#joint-confidence-intervals",
"title": "Getting Started with PyFixest",
"section": "Joint Confidence Intervals",
- "text": "Joint Confidence Intervals\nSimultaneous confidence bands for a vector of parameters can be computed via the joint_confint() method. See Simultaneous confidence bands: Theory, implementation, and an application to SVARs for background.\n\nfit_ci = pf.feols(\"Y ~ X1+ C(f1)\", data=data)\nfit_ci.confint(joint=True).head()\n\n\n\n\n\n\n\n\n2.5%\n97.5%\n\n\n\n\nIntercept\n-0.428799\n1.406710\n\n\nX1\n-1.161391\n-0.737491\n\n\nC(f1)[T.1.0]\n1.380485\n3.784814\n\n\nC(f1)[T.2.0]\n-2.842798\n-0.321070\n\n\nC(f1)[T.3.0]\n-1.612387\n0.987719"
+ "text": "Joint Confidence Intervals\nSimultaneous confidence bands for a vector of parameters can be computed via the joint_confint() method. See Simultaneous confidence bands: Theory, implementation, and an application to SVARs for background.\n\nfit_ci = pf.feols(\"Y ~ X1+ C(f1)\", data=data)\nfit_ci.confint(joint=True).head()\n\n\n\n\n\n\n\n\n2.5%\n97.5%\n\n\n\n\nIntercept\n-0.424794\n1.402705\n\n\nX1\n-1.160466\n-0.738416\n\n\nC(f1)[T.1.0]\n1.385731\n3.779568\n\n\nC(f1)[T.2.0]\n-2.837296\n-0.326572\n\n\nC(f1)[T.3.0]\n-1.606713\n0.982046"
},
{
"objectID": "pyfixest.html#features",
@@ -847,7 +847,7 @@
"href": "news.html#pyfixest-0.17.0",
"title": "News",
"section": "PyFixest 0.17.0",
- "text": "PyFixest 0.17.0\n\nRestructures the codebase and reorganizes how users can interact with the pyfixest API. It is now recommended to use pyfixest in the following way:\n\nimport numpy as np\nimport pyfixest as pf\ndata = pf.get_data()\ndata[\"D\"] = data[\"X1\"] > 0\nfit = pf.feols(\"Y ~ D + f1\", data = data)\nfit.tidy()\n\n\n\n\n\n\n\n\nEstimate\nStd. Error\nt value\nPr(>|t|)\n2.5%\n97.5%\n\n\nCoefficient\n\n\n\n\n\n\n\n\n\n\nIntercept\n0.778849\n0.170261\n4.574437\n0.000005\n0.444737\n1.112961\n\n\nD\n-1.402617\n0.152224\n-9.214140\n0.000000\n-1.701335\n-1.103899\n\n\nf1\n0.004774\n0.008058\n0.592508\n0.553645\n-0.011038\n0.020587\n\n\n\n\n\n\n\nThe update should not inroduce any breaking changes. Thanks to @Wenzhi-Ding for the PR!\nAdds support for simultaneous confidence intervals via a multiplier bootstrap. Thanks to @apoorvalal for the contribution!\n\nfit.confint(joint = True)\n\n\n\n\n\n\n\n\n2.5%\n97.5%\n\n\n\n\nIntercept\n0.380105\n1.177593\n\n\nD\n-1.759120\n-1.046114\n\n\nf1\n-0.014097\n0.023645\n\n\n\n\n\n\n\nAdds support for the causal cluster variance estimator by Abadie et al. (QJE, 2023) for OLS via the .ccv() method.\n\nfit.ccv(treatment = \"D\", cluster = \"group_id\")\n\n/home/runner/work/pyfixest/pyfixest/pyfixest/estimation/feols_.py:1384: UserWarning: The initial model was not clustered. CRV1 inference is computed and stored in the model object.\n warnings.warn(\n\n\n\n\n\n\n\n\n\nEstimate\nStd. Error\nt value\nPr(>|t|)\n2.5%\n97.5%\n\n\n\n\nCCV\n-1.4026168622179929\n0.28043\n-5.001663\n0.000093\n-1.991779\n-0.813455\n\n\nCRV1\n-1.402617\n0.205132\n-6.837621\n0.000002\n-1.833584\n-0.97165"
+ "text": "PyFixest 0.17.0\n\nRestructures the codebase and reorganizes how users can interact with the pyfixest API. It is now recommended to use pyfixest in the following way:\n\nimport numpy as np\nimport pyfixest as pf\ndata = pf.get_data()\ndata[\"D\"] = data[\"X1\"] > 0\nfit = pf.feols(\"Y ~ D + f1\", data = data)\nfit.tidy()\n\n\n\n\n\n\n\n\nEstimate\nStd. Error\nt value\nPr(>|t|)\n2.5%\n97.5%\n\n\nCoefficient\n\n\n\n\n\n\n\n\n\n\nIntercept\n0.778849\n0.170261\n4.574437\n0.000005\n0.444737\n1.112961\n\n\nD\n-1.402617\n0.152224\n-9.214140\n0.000000\n-1.701335\n-1.103899\n\n\nf1\n0.004774\n0.008058\n0.592508\n0.553645\n-0.011038\n0.020587\n\n\n\n\n\n\n\nThe update should not inroduce any breaking changes. Thanks to @Wenzhi-Ding for the PR!\nAdds support for simultaneous confidence intervals via a multiplier bootstrap. Thanks to @apoorvalal for the contribution!\n\nfit.confint(joint = True)\n\n\n\n\n\n\n\n\n2.5%\n97.5%\n\n\n\n\nIntercept\n0.375929\n1.181769\n\n\nD\n-1.762853\n-1.042381\n\n\nf1\n-0.014294\n0.023843\n\n\n\n\n\n\n\nAdds support for the causal cluster variance estimator by Abadie et al. (QJE, 2023) for OLS via the .ccv() method.\n\nfit.ccv(treatment = \"D\", cluster = \"group_id\")\n\n/home/runner/work/pyfixest/pyfixest/pyfixest/estimation/feols_.py:1384: UserWarning: The initial model was not clustered. CRV1 inference is computed and stored in the model object.\n warnings.warn(\n\n\n\n\n\n\n\n\n\nEstimate\nStd. Error\nt value\nPr(>|t|)\n2.5%\n97.5%\n\n\n\n\nCCV\n-1.4026168622179929\n0.238985\n-5.869057\n0.000015\n-1.904706\n-0.900528\n\n\nCRV1\n-1.402617\n0.205132\n-6.837621\n0.000002\n-1.833584\n-0.97165"
},
{
"objectID": "news.html#pyfixest-0.16.0",
@@ -1085,14 +1085,14 @@
"href": "table-layout.html#basic-usage",
"title": "Regression Tables via pf.etable()",
"section": "Basic Usage",
- "text": "Basic Usage\nWe can compare all regression models via the pyfixest-internal pf.etable() function:\n\npf.etable([fit1, fit2, fit3, fit4, fit5, fit6])\n\n\n\n\n\n\n\n \n \n Y\n \n \n Y2\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n X1\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n X2\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n X1:X2\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n f2\n -\n x\n x\n -\n x\n x\n \n \n f1\n x\n x\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)\n \n\n\n\n\n\n\n \n\n\nYou can also estimate and display multiple regressions with one line of code using the (py)fixest stepwise notation:\n\npf.etable(pf.feols(\"Y+Y2~csw(X1,X2,X1:X2)\", data=data))\n\n\n\n\n\n\n\n \n \n Y\n \n \n Y2\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n X1\n -1.000*** (0.085)\n -0.993*** (0.082)\n -0.992*** (0.082)\n -1.322*** (0.215)\n -1.316*** (0.214)\n -1.316*** (0.215)\n \n \n X2\n \n -0.176*** (0.022)\n -0.197*** (0.036)\n \n -0.133* (0.057)\n -0.132 (0.095)\n \n \n X1:X2\n \n \n 0.020 (0.027)\n \n \n -0.001 (0.071)\n \n \n Intercept\n 0.919*** (0.112)\n 0.889*** (0.108)\n 0.888*** (0.108)\n 1.064*** (0.283)\n 1.042*** (0.283)\n 1.042*** (0.283)\n \n \n stats\n \n \n Observations\n 998\n 998\n 998\n 999\n 999\n 999\n \n \n S.E. type\n iid\n iid\n iid\n iid\n iid\n iid\n \n \n R2\n 0.123\n 0.177\n 0.177\n 0.037\n 0.042\n 0.042\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)"
+ "text": "Basic Usage\nWe can compare all regression models via the pyfixest-internal pf.etable() function:\n\npf.etable([fit1, fit2, fit3, fit4, fit5, fit6])\n\n\n\n\n\n\n\n \n \n Y\n \n \n Y2\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n X1\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n X2\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n X1:X2\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n f1\n x\n x\n x\n x\n x\n x\n \n \n f2\n -\n x\n x\n -\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)\n \n\n\n\n\n\n\n \n\n\nYou can also estimate and display multiple regressions with one line of code using the (py)fixest stepwise notation:\n\npf.etable(pf.feols(\"Y+Y2~csw(X1,X2,X1:X2)\", data=data))\n\n\n\n\n\n\n\n \n \n Y\n \n \n Y2\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n X1\n -1.000*** (0.085)\n -0.993*** (0.082)\n -0.992*** (0.082)\n -1.322*** (0.215)\n -1.316*** (0.214)\n -1.316*** (0.215)\n \n \n X2\n \n -0.176*** (0.022)\n -0.197*** (0.036)\n \n -0.133* (0.057)\n -0.132 (0.095)\n \n \n X1:X2\n \n \n 0.020 (0.027)\n \n \n -0.001 (0.071)\n \n \n Intercept\n 0.919*** (0.112)\n 0.889*** (0.108)\n 0.888*** (0.108)\n 1.064*** (0.283)\n 1.042*** (0.283)\n 1.042*** (0.283)\n \n \n stats\n \n \n Observations\n 998\n 998\n 998\n 999\n 999\n 999\n \n \n S.E. type\n iid\n iid\n iid\n iid\n iid\n iid\n \n \n R2\n 0.123\n 0.177\n 0.177\n 0.037\n 0.042\n 0.042\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)"
},
{
"objectID": "table-layout.html#keep-and-drop-variables",
"href": "table-layout.html#keep-and-drop-variables",
"title": "Regression Tables via pf.etable()",
"section": "Keep and drop variables",
- "text": "Keep and drop variables\netable allows us to do a few things out of the box. For example, we can only keep the variables that we’d like, which keeps all variables that fit the provided regex match.\n\npf.etable([fit1, fit2, fit3, fit4, fit5, fit6], keep=\"X1\")\n\n\n\n\n\n\n\n \n \n Y\n \n \n Y2\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n X1\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n X1:X2\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n f2\n -\n x\n x\n -\n x\n x\n \n \n f1\n x\n x\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)\n \n\n\n\n\n\n\n \n\n\nWe can use the exact_match argument to select a specific set of variables:\n\npf.etable([fit1, fit2, fit3, fit4, fit5, fit6], keep=[\"X1\", \"X2\"], exact_match=True)\n\n\n\n\n\n\n\n \n \n Y\n \n \n Y2\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n X1\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n X2\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n fe\n \n \n f2\n -\n x\n x\n -\n x\n x\n \n \n f1\n x\n x\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)\n \n\n\n\n\n\n\n \n\n\nWe can also easily drop variables via the drop argument:\n\npf.etable([fit1, fit2, fit3, fit4, fit5, fit6], drop=[\"X1\"])\n\n\n\n\n\n\n\n \n \n Y\n \n \n Y2\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n X2\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n fe\n \n \n f2\n -\n x\n x\n -\n x\n x\n \n \n f1\n x\n x\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)"
+ "text": "Keep and drop variables\netable allows us to do a few things out of the box. For example, we can only keep the variables that we’d like, which keeps all variables that fit the provided regex match.\n\npf.etable([fit1, fit2, fit3, fit4, fit5, fit6], keep=\"X1\")\n\n\n\n\n\n\n\n \n \n Y\n \n \n Y2\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n X1\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n X1:X2\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n f1\n x\n x\n x\n x\n x\n x\n \n \n f2\n -\n x\n x\n -\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)\n \n\n\n\n\n\n\n \n\n\nWe can use the exact_match argument to select a specific set of variables:\n\npf.etable([fit1, fit2, fit3, fit4, fit5, fit6], keep=[\"X1\", \"X2\"], exact_match=True)\n\n\n\n\n\n\n\n \n \n Y\n \n \n Y2\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n X1\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n X2\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n fe\n \n \n f1\n x\n x\n x\n x\n x\n x\n \n \n f2\n -\n x\n x\n -\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)\n \n\n\n\n\n\n\n \n\n\nWe can also easily drop variables via the drop argument:\n\npf.etable([fit1, fit2, fit3, fit4, fit5, fit6], drop=[\"X1\"])\n\n\n\n\n\n\n\n \n \n Y\n \n \n Y2\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n X2\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n fe\n \n \n f1\n x\n x\n x\n x\n x\n x\n \n \n f2\n -\n x\n x\n -\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)"
},
{
"objectID": "table-layout.html#hide-fixed-effects-or-se-type-rows",
@@ -1106,49 +1106,49 @@
"href": "table-layout.html#display-p-values-or-confidence-intervals",
"title": "Regression Tables via pf.etable()",
"section": "Display p-values or confidence intervals",
- "text": "Display p-values or confidence intervals\nBy default, pf.etable() reports standard errors. But we can also ask to output p-values or confidence intervals via the coef_fmt function argument.\n\npf.etable([fit1, fit2, fit3, fit4, fit5, fit6], coef_fmt=\"b \\n (se) \\n [p]\")\n\n\n\n\n\n\n\n \n \n Y\n \n \n Y2\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n X1\n -0.950*** (0.067) [0.000]\n -0.924*** (0.061) [0.000]\n -0.924*** (0.061) [0.000]\n -1.267*** (0.174) [0.000]\n -1.232*** (0.192) [0.000]\n -1.231*** (0.192) [0.000]\n \n \n X2\n -0.174*** (0.018) [0.000]\n -0.174*** (0.015) [0.000]\n -0.185*** (0.025) [0.000]\n -0.131** (0.042) [0.005]\n -0.118** (0.042) [0.008]\n -0.074 (0.104) [0.482]\n \n \n X1:X2\n \n \n 0.011 (0.018) [0.565]\n \n \n -0.041 (0.081) [0.618]\n \n \n fe\n \n \n f2\n -\n x\n x\n -\n x\n x\n \n \n f1\n x\n x\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error) \n [p-value]"
+ "text": "Display p-values or confidence intervals\nBy default, pf.etable() reports standard errors. But we can also ask to output p-values or confidence intervals via the coef_fmt function argument.\n\npf.etable([fit1, fit2, fit3, fit4, fit5, fit6], coef_fmt=\"b \\n (se) \\n [p]\")\n\n\n\n\n\n\n\n \n \n Y\n \n \n Y2\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n X1\n -0.950*** (0.067) [0.000]\n -0.924*** (0.061) [0.000]\n -0.924*** (0.061) [0.000]\n -1.267*** (0.174) [0.000]\n -1.232*** (0.192) [0.000]\n -1.231*** (0.192) [0.000]\n \n \n X2\n -0.174*** (0.018) [0.000]\n -0.174*** (0.015) [0.000]\n -0.185*** (0.025) [0.000]\n -0.131** (0.042) [0.005]\n -0.118** (0.042) [0.008]\n -0.074 (0.104) [0.482]\n \n \n X1:X2\n \n \n 0.011 (0.018) [0.565]\n \n \n -0.041 (0.081) [0.618]\n \n \n fe\n \n \n f1\n x\n x\n x\n x\n x\n x\n \n \n f2\n -\n x\n x\n -\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error) \n [p-value]"
},
{
"objectID": "table-layout.html#significance-levels-and-rounding",
"href": "table-layout.html#significance-levels-and-rounding",
"title": "Regression Tables via pf.etable()",
"section": "Significance levels and rounding",
- "text": "Significance levels and rounding\nAdditionally, we can also overwrite the defaults for the reported significance levels and control the rounding of results via the signif_code and digits function arguments:\n\npf.etable([fit1, fit2, fit3, fit4, fit5, fit6], signif_code=[0.01, 0.05, 0.1], digits=5)\n\n\n\n\n\n\n\n \n \n Y\n \n \n Y2\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n X1\n -0.94953*** (0.06652)\n -0.92405*** (0.06093)\n -0.92417*** (0.06094)\n -1.26655*** (0.17359)\n -1.23153*** (0.19228)\n -1.23100*** (0.19167)\n \n \n X2\n -0.17423*** (0.01840)\n -0.17411*** (0.01461)\n -0.18550*** (0.02516)\n -0.13056*** (0.04239)\n -0.11767*** (0.04152)\n -0.07369 (0.10356)\n \n \n X1:X2\n \n \n 0.01057 (0.01818)\n \n \n -0.04082 (0.08093)\n \n \n fe\n \n \n f2\n -\n x\n x\n -\n x\n x\n \n \n f1\n x\n x\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.48899\n 0.65904\n 0.65916\n 0.12017\n 0.17151\n 0.17180\n \n\n \n \n \n Significance levels: * p < 0.1, ** p < 0.05, *** p < 0.01. Format of coefficient cell:\nCoefficient \n (Std. Error)"
+ "text": "Significance levels and rounding\nAdditionally, we can also overwrite the defaults for the reported significance levels and control the rounding of results via the signif_code and digits function arguments:\n\npf.etable([fit1, fit2, fit3, fit4, fit5, fit6], signif_code=[0.01, 0.05, 0.1], digits=5)\n\n\n\n\n\n\n\n \n \n Y\n \n \n Y2\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n X1\n -0.94953*** (0.06652)\n -0.92405*** (0.06093)\n -0.92417*** (0.06094)\n -1.26655*** (0.17359)\n -1.23153*** (0.19228)\n -1.23100*** (0.19167)\n \n \n X2\n -0.17423*** (0.01840)\n -0.17411*** (0.01461)\n -0.18550*** (0.02516)\n -0.13056*** (0.04239)\n -0.11767*** (0.04152)\n -0.07369 (0.10356)\n \n \n X1:X2\n \n \n 0.01057 (0.01818)\n \n \n -0.04082 (0.08093)\n \n \n fe\n \n \n f1\n x\n x\n x\n x\n x\n x\n \n \n f2\n -\n x\n x\n -\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.48899\n 0.65904\n 0.65916\n 0.12017\n 0.17151\n 0.17180\n \n\n \n \n \n Significance levels: * p < 0.1, ** p < 0.05, *** p < 0.01. Format of coefficient cell:\nCoefficient \n (Std. Error)"
},
{
"objectID": "table-layout.html#other-output-formats",
"href": "table-layout.html#other-output-formats",
"title": "Regression Tables via pf.etable()",
"section": "Other output formats",
- "text": "Other output formats\nBy default, pf.etable() returns a GT object (see the Great Tables package), but you can also opt to dataframe, markdown, or latex output via the type argument.\n\n# Pandas styler output:\npf.etable(\n [fit1, fit2, fit3, fit4, fit5, fit6],\n signif_code=[0.01, 0.05, 0.1],\n digits=5,\n coef_fmt=\"b (se)\",\n type=\"df\",\n)\n\n\n\n\n \n \n \n est1\n est2\n est3\n est4\n est5\n est6\n \n \n \n \n depvar\n Y\n Y\n Y\n Y2\n Y2\n Y2\n \n \n X1\n -0.94953*** (0.06652)\n -0.92405*** (0.06093)\n -0.92417*** (0.06094)\n -1.26655*** (0.17359)\n -1.23153*** (0.19228)\n -1.23100*** (0.19167)\n \n \n X2\n -0.17423*** (0.01840)\n -0.17411*** (0.01461)\n -0.18550*** (0.02516)\n -0.13056*** (0.04239)\n -0.11767*** (0.04152)\n -0.07369 (0.10356)\n \n \n X1:X2\n \n \n 0.01057 (0.01818)\n \n \n -0.04082 (0.08093)\n \n \n f2\n -\n x\n x\n -\n x\n x\n \n \n f1\n x\n x\n x\n x\n x\n x\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.48899\n 0.65904\n 0.65916\n 0.12017\n 0.17151\n 0.17180\n \n \n\n\n\n\n\n# Markdown output:\npf.etable(\n [fit1, fit2, fit3, fit4, fit5, fit6],\n signif_code=[0.01, 0.05, 0.1],\n digits=5,\n type=\"md\",\n)\n\nindex est1 est2 est3 est4 est5 est6\n------------ ------------ ------------ ------------ ------------ ------------ ------------\ndepvar Y Y Y Y2 Y2 Y2\n------------------------------------------------------------------------------------------------\nX1 -0.94953*** -0.92405*** -0.92417*** -1.26655*** -1.23153*** -1.23100***\n (0.06652) (0.06093) (0.06094) (0.17359) (0.19228) (0.19167)\nX2 -0.17423*** -0.17411*** -0.18550*** -0.13056*** -0.11767*** -0.07369\n (0.01840) (0.01461) (0.02516) (0.04239) (0.04152) (0.10356)\nX1:X2 0.01057 -0.04082\n (0.01818) (0.08093)\n------------------------------------------------------------------------------------------------\nf2 - x x - x x\nf1 x x x x x x\n------------------------------------------------------------------------------------------------\nObservations 997 997 997 998 998 998\nS.E. type by: f1 by: f1 by: f1 by: f1 by: f1 by: f1\nR2 0.48899 0.65904 0.65916 0.12017 0.17151 0.17180\n------------------------------------------------------------------------------------------------\n\n\n\nTo obtain latex output use format = \"tex\". If you want to save the table as a tex file, you can use the filename= argument to specify the respective path where it should be saved. If you want the latex code to be displayed in the notebook, you can use the print_tex=True argument. Etable will use latex packages booktabs, threeparttable and makecell for the table layout, so don’t forget to include these packages in your latex document.\n\n# LaTex output (include latex packages booktabs, threeparttable, and makecell in your document):\ntab = pf.etable(\n [fit1, fit2, fit3, fit4, fit5, fit6],\n signif_code=[0.01, 0.05, 0.1],\n digits=2,\n type=\"tex\",\n print_tex=True,\n)\n\nThe following code generates a pdf including the regression table which you can display clicking on the link below the cell:\n\n## Use pylatex to create a tex file with the table\n\n\ndef make_pdf(tab, file):\n \"Create a PDF document with tex table.\"\n doc = pl.Document()\n doc.packages.append(pl.Package(\"booktabs\"))\n doc.packages.append(pl.Package(\"threeparttable\"))\n doc.packages.append(pl.Package(\"makecell\"))\n\n with (\n doc.create(pl.Section(\"A PyFixest LateX Table\")),\n doc.create(pl.Table(position=\"htbp\")) as table,\n ):\n table.append(pl.NoEscape(tab))\n\n doc.generate_pdf(file, clean_tex=False)\n\n\n# Compile latex to pdf & display a button with the hyperlink to the pdf\n# requires tex installation\nrun = False\nif run:\n make_pdf(tab, \"latexdocs/SampleTableDoc\")\ndisplay(FileLink(\"latexdocs/SampleTableDoc.pdf\"))\n\nlatexdocs/SampleTableDoc.pdf"
+ "text": "Other output formats\nBy default, pf.etable() returns a GT object (see the Great Tables package), but you can also opt to dataframe, markdown, or latex output via the type argument.\n\n# Pandas styler output:\npf.etable(\n [fit1, fit2, fit3, fit4, fit5, fit6],\n signif_code=[0.01, 0.05, 0.1],\n digits=5,\n coef_fmt=\"b (se)\",\n type=\"df\",\n)\n\n\n\n\n \n \n \n est1\n est2\n est3\n est4\n est5\n est6\n \n \n \n \n depvar\n Y\n Y\n Y\n Y2\n Y2\n Y2\n \n \n X1\n -0.94953*** (0.06652)\n -0.92405*** (0.06093)\n -0.92417*** (0.06094)\n -1.26655*** (0.17359)\n -1.23153*** (0.19228)\n -1.23100*** (0.19167)\n \n \n X2\n -0.17423*** (0.01840)\n -0.17411*** (0.01461)\n -0.18550*** (0.02516)\n -0.13056*** (0.04239)\n -0.11767*** (0.04152)\n -0.07369 (0.10356)\n \n \n X1:X2\n \n \n 0.01057 (0.01818)\n \n \n -0.04082 (0.08093)\n \n \n f1\n x\n x\n x\n x\n x\n x\n \n \n f2\n -\n x\n x\n -\n x\n x\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.48899\n 0.65904\n 0.65916\n 0.12017\n 0.17151\n 0.17180\n \n \n\n\n\n\n\n# Markdown output:\npf.etable(\n [fit1, fit2, fit3, fit4, fit5, fit6],\n signif_code=[0.01, 0.05, 0.1],\n digits=5,\n type=\"md\",\n)\n\nindex est1 est2 est3 est4 est5 est6\n------------ ------------ ------------ ------------ ------------ ------------ ------------\ndepvar Y Y Y Y2 Y2 Y2\n------------------------------------------------------------------------------------------------\nX1 -0.94953*** -0.92405*** -0.92417*** -1.26655*** -1.23153*** -1.23100***\n (0.06652) (0.06093) (0.06094) (0.17359) (0.19228) (0.19167)\nX2 -0.17423*** -0.17411*** -0.18550*** -0.13056*** -0.11767*** -0.07369\n (0.01840) (0.01461) (0.02516) (0.04239) (0.04152) (0.10356)\nX1:X2 0.01057 -0.04082\n (0.01818) (0.08093)\n------------------------------------------------------------------------------------------------\nf1 x x x x x x\nf2 - x x - x x\n------------------------------------------------------------------------------------------------\nObservations 997 997 997 998 998 998\nS.E. type by: f1 by: f1 by: f1 by: f1 by: f1 by: f1\nR2 0.48899 0.65904 0.65916 0.12017 0.17151 0.17180\n------------------------------------------------------------------------------------------------\n\n\n\nTo obtain latex output use format = \"tex\". If you want to save the table as a tex file, you can use the filename= argument to specify the respective path where it should be saved. If you want the latex code to be displayed in the notebook, you can use the print_tex=True argument. Etable will use latex packages booktabs, threeparttable and makecell for the table layout, so don’t forget to include these packages in your latex document.\n\n# LaTex output (include latex packages booktabs, threeparttable, and makecell in your document):\ntab = pf.etable(\n [fit1, fit2, fit3, fit4, fit5, fit6],\n signif_code=[0.01, 0.05, 0.1],\n digits=2,\n type=\"tex\",\n print_tex=True,\n)\n\nThe following code generates a pdf including the regression table which you can display clicking on the link below the cell:\n\n## Use pylatex to create a tex file with the table\n\n\ndef make_pdf(tab, file):\n \"Create a PDF document with tex table.\"\n doc = pl.Document()\n doc.packages.append(pl.Package(\"booktabs\"))\n doc.packages.append(pl.Package(\"threeparttable\"))\n doc.packages.append(pl.Package(\"makecell\"))\n\n with (\n doc.create(pl.Section(\"A PyFixest LateX Table\")),\n doc.create(pl.Table(position=\"htbp\")) as table,\n ):\n table.append(pl.NoEscape(tab))\n\n doc.generate_pdf(file, clean_tex=False)\n\n\n# Compile latex to pdf & display a button with the hyperlink to the pdf\n# requires tex installation\nrun = False\nif run:\n make_pdf(tab, \"latexdocs/SampleTableDoc\")\ndisplay(FileLink(\"latexdocs/SampleTableDoc.pdf\"))\n\nlatexdocs/SampleTableDoc.pdf"
},
{
"objectID": "table-layout.html#rename-variables",
"href": "table-layout.html#rename-variables",
"title": "Regression Tables via pf.etable()",
"section": "Rename variables",
- "text": "Rename variables\nYou can also rename variables if you want to have a more readable output. Just pass a dictionary to the labels argument. Note that interaction terms will also be relabeled using the specified labels for the interacted variables (if you want to manually relabel an interaction term differently, add it to the dictionary).\n\nlabels = {\n \"Y\": \"Wage\",\n \"Y2\": \"Wealth\",\n \"X1\": \"Age\",\n \"X2\": \"Years of Schooling\",\n \"f1\": \"Industry\",\n \"f2\": \"Year\",\n}\n\npf.etable([fit1, fit2, fit3, fit4, fit5, fit6], labels=labels)\n\n\n\n\n\n\n\n \n \n Wage\n \n \n Wealth\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n Year\n -\n x\n x\n -\n x\n x\n \n \n Industry\n x\n x\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)\n \n\n\n\n\n\n\n \n\n\nIf you want to label the rows indicating the inclusion of fixed effects not with the variable label but with a custom label, you can pass on a separate dictionary to the felabels argument.\n\npf.etable(\n [fit1, fit2, fit3, fit4, fit5, fit6],\n labels=labels,\n felabels={\"f1\": \"Industry Fixed Effects\", \"f2\": \"Year Fixed Effects\"},\n)\n\n\n\n\n\n\n\n \n \n Wage\n \n \n Wealth\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n Year Fixed Effects\n -\n x\n x\n -\n x\n x\n \n \n Industry Fixed Effects\n x\n x\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)"
+ "text": "Rename variables\nYou can also rename variables if you want to have a more readable output. Just pass a dictionary to the labels argument. Note that interaction terms will also be relabeled using the specified labels for the interacted variables (if you want to manually relabel an interaction term differently, add it to the dictionary).\n\nlabels = {\n \"Y\": \"Wage\",\n \"Y2\": \"Wealth\",\n \"X1\": \"Age\",\n \"X2\": \"Years of Schooling\",\n \"f1\": \"Industry\",\n \"f2\": \"Year\",\n}\n\npf.etable([fit1, fit2, fit3, fit4, fit5, fit6], labels=labels)\n\n\n\n\n\n\n\n \n \n Wage\n \n \n Wealth\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n Industry\n x\n x\n x\n x\n x\n x\n \n \n Year\n -\n x\n x\n -\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)\n \n\n\n\n\n\n\n \n\n\nIf you want to label the rows indicating the inclusion of fixed effects not with the variable label but with a custom label, you can pass on a separate dictionary to the felabels argument.\n\npf.etable(\n [fit1, fit2, fit3, fit4, fit5, fit6],\n labels=labels,\n felabels={\"f1\": \"Industry Fixed Effects\", \"f2\": \"Year Fixed Effects\"},\n)\n\n\n\n\n\n\n\n \n \n Wage\n \n \n Wealth\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n Industry Fixed Effects\n x\n x\n x\n x\n x\n x\n \n \n Year Fixed Effects\n -\n x\n x\n -\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)"
},
{
"objectID": "table-layout.html#custom-model-headlines",
"href": "table-layout.html#custom-model-headlines",
"title": "Regression Tables via pf.etable()",
"section": "Custom model headlines",
- "text": "Custom model headlines\nYou can also add custom headers for each model by passing a list of strings to the model_headers argument.\n\npf.etable(\n [fit1, fit2, fit3, fit4, fit5, fit6],\n labels=labels,\n model_heads=[\"US\", \"China\", \"EU\", \"US\", \"China\", \"EU\"],\n)\n\n\n\n\n\n\n\n \n \n \n \n Wage\n \n \n Wealth\n \n\n\n \n \n US\n \n \n China\n \n \n EU\n \n \n US\n \n \n China\n \n \n EU\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n Year\n -\n x\n x\n -\n x\n x\n \n \n Industry\n x\n x\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)\n \n\n\n\n\n\n\n \n\n\nOr change the ordering of headlines having headlines first and then dependent variables using the head_order argument. “hd” stands for headlines then dependent variables, “dh” for dependent variables then headlines. Assigning “d” or “h” can be used to only show dependent variables or only headlines. When head_order=“” only model numbers are shown.\n\npf.etable(\n [fit1, fit4, fit2, fit5, fit3, fit6],\n labels=labels,\n model_heads=[\"US\", \"US\", \"China\", \"China\", \"EU\", \"EU\"],\n head_order=\"hd\",\n)\n\n\n\n\n\n\n\n \n \n \n \n US\n \n \n China\n \n \n EU\n \n\n\n \n \n Wage\n \n \n Wealth\n \n \n Wage\n \n \n Wealth\n \n \n Wage\n \n \n Wealth\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -1.267*** (0.174)\n -0.924*** (0.061)\n -1.232*** (0.192)\n -0.924*** (0.061)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.131** (0.042)\n -0.174*** (0.015)\n -0.118** (0.042)\n -0.185*** (0.025)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n \n \n 0.011 (0.018)\n -0.041 (0.081)\n \n \n fe\n \n \n Year\n -\n -\n x\n x\n x\n x\n \n \n Industry\n x\n x\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 998\n 997\n 998\n 997\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.120\n 0.659\n 0.172\n 0.659\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)\n \n\n\n\n\n\n\n \n\n\nRemove the dependent variables from the headers:\n\npf.etable(\n [fit1, fit4, fit2, fit5, fit3, fit6],\n labels=labels,\n model_heads=[\"US\", \"US\", \"China\", \"China\", \"EU\", \"EU\"],\n head_order=\"\",\n)\n\n\n\n\n\n\n\n \n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -1.267*** (0.174)\n -0.924*** (0.061)\n -1.232*** (0.192)\n -0.924*** (0.061)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.131** (0.042)\n -0.174*** (0.015)\n -0.118** (0.042)\n -0.185*** (0.025)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n \n \n 0.011 (0.018)\n -0.041 (0.081)\n \n \n fe\n \n \n Year\n -\n -\n x\n x\n x\n x\n \n \n Industry\n x\n x\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 998\n 997\n 998\n 997\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.120\n 0.659\n 0.172\n 0.659\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)"
+ "text": "Custom model headlines\nYou can also add custom headers for each model by passing a list of strings to the model_headers argument.\n\npf.etable(\n [fit1, fit2, fit3, fit4, fit5, fit6],\n labels=labels,\n model_heads=[\"US\", \"China\", \"EU\", \"US\", \"China\", \"EU\"],\n)\n\n\n\n\n\n\n\n \n \n \n \n Wage\n \n \n Wealth\n \n\n\n \n \n US\n \n \n China\n \n \n EU\n \n \n US\n \n \n China\n \n \n EU\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n Industry\n x\n x\n x\n x\n x\n x\n \n \n Year\n -\n x\n x\n -\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)\n \n\n\n\n\n\n\n \n\n\nOr change the ordering of headlines having headlines first and then dependent variables using the head_order argument. “hd” stands for headlines then dependent variables, “dh” for dependent variables then headlines. Assigning “d” or “h” can be used to only show dependent variables or only headlines. When head_order=“” only model numbers are shown.\n\npf.etable(\n [fit1, fit4, fit2, fit5, fit3, fit6],\n labels=labels,\n model_heads=[\"US\", \"US\", \"China\", \"China\", \"EU\", \"EU\"],\n head_order=\"hd\",\n)\n\n\n\n\n\n\n\n \n \n \n \n US\n \n \n China\n \n \n EU\n \n\n\n \n \n Wage\n \n \n Wealth\n \n \n Wage\n \n \n Wealth\n \n \n Wage\n \n \n Wealth\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -1.267*** (0.174)\n -0.924*** (0.061)\n -1.232*** (0.192)\n -0.924*** (0.061)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.131** (0.042)\n -0.174*** (0.015)\n -0.118** (0.042)\n -0.185*** (0.025)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n \n \n 0.011 (0.018)\n -0.041 (0.081)\n \n \n fe\n \n \n Industry\n x\n x\n x\n x\n x\n x\n \n \n Year\n -\n -\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 998\n 997\n 998\n 997\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.120\n 0.659\n 0.172\n 0.659\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)\n \n\n\n\n\n\n\n \n\n\nRemove the dependent variables from the headers:\n\npf.etable(\n [fit1, fit4, fit2, fit5, fit3, fit6],\n labels=labels,\n model_heads=[\"US\", \"US\", \"China\", \"China\", \"EU\", \"EU\"],\n head_order=\"\",\n)\n\n\n\n\n\n\n\n \n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -1.267*** (0.174)\n -0.924*** (0.061)\n -1.232*** (0.192)\n -0.924*** (0.061)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.131** (0.042)\n -0.174*** (0.015)\n -0.118** (0.042)\n -0.185*** (0.025)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n \n \n 0.011 (0.018)\n -0.041 (0.081)\n \n \n fe\n \n \n Industry\n x\n x\n x\n x\n x\n x\n \n \n Year\n -\n -\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 998\n 997\n 998\n 997\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.120\n 0.659\n 0.172\n 0.659\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)"
},
{
"objectID": "table-layout.html#further-custom-model-information",
"href": "table-layout.html#further-custom-model-information",
"title": "Regression Tables via pf.etable()",
"section": "Further custom model information",
- "text": "Further custom model information\nYou can add further custom model statistics/information to the bottom of the table by using the custom_stats argument to which you pass a dictionary with the name of the row and lists of values. The length of the lists must be equal to the number of models.\n\npf.etable(\n [fit1, fit2, fit3, fit4, fit5, fit6],\n labels=labels,\n custom_model_stats={\n \"Number of Clusters\": [42, 42, 42, 37, 37, 37],\n \"Additional Info\": [\"A\", \"A\", \"B\", \"B\", \"C\", \"C\"],\n },\n)\n\n\n\n\n\n\n\n \n \n Wage\n \n \n Wealth\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n Year\n -\n x\n x\n -\n x\n x\n \n \n Industry\n x\n x\n x\n x\n x\n x\n \n \n stats\n \n \n Number of Clusters\n 42\n 42\n 42\n 37\n 37\n 37\n \n \n Additional Info\n A\n A\n B\n B\n C\n C\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)"
+ "text": "Further custom model information\nYou can add further custom model statistics/information to the bottom of the table by using the custom_stats argument to which you pass a dictionary with the name of the row and lists of values. The length of the lists must be equal to the number of models.\n\npf.etable(\n [fit1, fit2, fit3, fit4, fit5, fit6],\n labels=labels,\n custom_model_stats={\n \"Number of Clusters\": [42, 42, 42, 37, 37, 37],\n \"Additional Info\": [\"A\", \"A\", \"B\", \"B\", \"C\", \"C\"],\n },\n)\n\n\n\n\n\n\n\n \n \n Wage\n \n \n Wealth\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n Industry\n x\n x\n x\n x\n x\n x\n \n \n Year\n -\n x\n x\n -\n x\n x\n \n \n stats\n \n \n Number of Clusters\n 42\n 42\n 42\n 37\n 37\n 37\n \n \n Additional Info\n A\n A\n B\n B\n C\n C\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)"
},
{
"objectID": "table-layout.html#custom-table-notes",
"href": "table-layout.html#custom-table-notes",
"title": "Regression Tables via pf.etable()",
"section": "Custom table notes",
- "text": "Custom table notes\nYou can replace the default table notes with your own notes using the notes argument.\n\nmynotes = \"Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet.\"\npf.etable(\n [fit1, fit4, fit2, fit5, fit3, fit6],\n labels=labels,\n model_heads=[\"US\", \"US\", \"China\", \"China\", \"EU\", \"EU\"],\n head_order=\"hd\",\n notes=mynotes,\n)\n\n\n\n\n\n\n\n \n \n \n \n US\n \n \n China\n \n \n EU\n \n\n\n \n \n Wage\n \n \n Wealth\n \n \n Wage\n \n \n Wealth\n \n \n Wage\n \n \n Wealth\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -1.267*** (0.174)\n -0.924*** (0.061)\n -1.232*** (0.192)\n -0.924*** (0.061)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.131** (0.042)\n -0.174*** (0.015)\n -0.118** (0.042)\n -0.185*** (0.025)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n \n \n 0.011 (0.018)\n -0.041 (0.081)\n \n \n fe\n \n \n Year\n -\n -\n x\n x\n x\n x\n \n \n Industry\n x\n x\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 998\n 997\n 998\n 997\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.120\n 0.659\n 0.172\n 0.659\n 0.172\n \n\n \n \n \n Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet."
+ "text": "Custom table notes\nYou can replace the default table notes with your own notes using the notes argument.\n\nmynotes = \"Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet.\"\npf.etable(\n [fit1, fit4, fit2, fit5, fit3, fit6],\n labels=labels,\n model_heads=[\"US\", \"US\", \"China\", \"China\", \"EU\", \"EU\"],\n head_order=\"hd\",\n notes=mynotes,\n)\n\n\n\n\n\n\n\n \n \n \n \n US\n \n \n China\n \n \n EU\n \n\n\n \n \n Wage\n \n \n Wealth\n \n \n Wage\n \n \n Wealth\n \n \n Wage\n \n \n Wealth\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -1.267*** (0.174)\n -0.924*** (0.061)\n -1.232*** (0.192)\n -0.924*** (0.061)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.131** (0.042)\n -0.174*** (0.015)\n -0.118** (0.042)\n -0.185*** (0.025)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n \n \n 0.011 (0.018)\n -0.041 (0.081)\n \n \n fe\n \n \n Industry\n x\n x\n x\n x\n x\n x\n \n \n Year\n -\n -\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 998\n 997\n 998\n 997\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.120\n 0.659\n 0.172\n 0.659\n 0.172\n \n\n \n \n \n Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet."
},
{
"objectID": "table-layout.html#publication-ready-latex-tables",
@@ -1169,35 +1169,35 @@
"href": "table-layout.html#summarize-by-characteristics-in-columns-and-rows",
"title": "Regression Tables via pf.etable()",
"section": "Summarize by characteristics in columns and rows",
- "text": "Summarize by characteristics in columns and rows\nYou can summarize by characteristics using the bycol argument when groups are to be displayed in columns. When the number of observations is the same for all variables in a group, you can also opt to display the number of observations only once for each group byin a separate line at the bottom of the table with counts_row_below==True.\n\n# Generate some categorial data\ndata[\"country\"] = np.random.choice([\"US\", \"EU\"], data.shape[0])\ndata[\"occupation\"] = np.random.choice([\"Blue collar\", \"White collar\"], data.shape[0])\n\n# Drop nan values to have balanced data\ndata.dropna(inplace=True)\n\npf.dtable(\n data,\n vars=[\"Y\", \"Y2\", \"X1\", \"X2\"],\n labels=labels,\n bycol=[\"country\", \"occupation\"],\n stats=[\"count\", \"mean\", \"std\"],\n caption=\"Descriptive statistics\",\n stats_labels={\"count\": \"Number of observations\"},\n counts_row_below=True,\n)\n\n\n\n\n\n\n \n Descriptive statistics\n \n\n \n \n \n \n EU\n \n \n US\n \n\n\n \n \n Blue collar\n \n \n White collar\n \n \n Blue collar\n \n \n White collar\n \n\n\n Mean\n Std. Dev.\n Mean\n Std. Dev.\n Mean\n Std. Dev.\n Mean\n Std. Dev.\n\n\n\n \n stats\n \n \n Wage\n 0.12\n 2.38\n -0.28\n 2.40\n -0.24\n 2.24\n -0.13\n 2.19\n \n \n Wealth\n -0.33\n 5.61\n -0.25\n 5.36\n -0.27\n 5.69\n -0.41\n 5.71\n \n \n Age\n 1.02\n 0.79\n 1.08\n 0.80\n 1.00\n 0.81\n 1.07\n 0.83\n \n \n Years of Schooling\n -0.25\n 2.93\n -0.18\n 3.31\n -0.04\n 3.08\n -0.03\n 2.89\n \n \n nobs\n \n \n Number of observations\n 264\n \n 233\n \n 244\n \n 256\n \n \n\n \n \n \n \n \n\n\n\n\n\n\n \n\n\nYou can also use custom aggregation functions to compute further statistics or affect how statistics are presented. Pyfixest provides two such functions mean_std and mean_newline_std which compute the mean and standard deviation and display both the same cell (either with line break between them or not). This allows to have more compact tables when you want to show statistics for many characteristcs in the columns.\nYou can also hide the display of the statistics labels in the header with hide_stats_labels=True. In that case a table note will be added naming the statistics displayed using its label (if you have not provided a custom note).\n\npf.dtable(\n data,\n vars=[\"Y\", \"Y2\", \"X1\", \"X2\"],\n labels=labels,\n bycol=[\"country\", \"occupation\"],\n stats=[\"mean_newline_std\", \"count\"],\n caption=\"Descriptive statistics\",\n stats_labels={\"count\": \"Number of observations\"},\n counts_row_below=True,\n hide_stats=True,\n)\n\n\n\n\n\n\n \n Descriptive statistics\n \n\n \n \n EU\n \n \n US\n \n\n\n Blue collar\n White collar\n Blue collar\n White collar\n\n\n\n \n stats\n \n \n Wage\n 0.12(2.38)\n -0.28(2.40)\n -0.24(2.24)\n -0.13(2.19)\n \n \n Wealth\n -0.33(5.61)\n -0.25(5.36)\n -0.27(5.69)\n -0.41(5.71)\n \n \n Age\n 1.02(0.79)\n 1.08(0.80)\n 1.00(0.81)\n 1.07(0.83)\n \n \n Years of Schooling\n -0.25(2.93)\n -0.18(3.31)\n -0.04(3.08)\n -0.03(2.89)\n \n \n nobs\n \n \n Number of observations\n 264\n 233\n 244\n 256\n \n\n \n \n \n Note: Displayed statistics are Mean (Std. Dev.).\n \n\n\n\n\n\n\n \n\n\nYou can also split by characteristics in both columns and rows. Note that you can only use one grouping variable in rows, but several in columns (as shown above).\n\npf.dtable(\n data,\n vars=[\"Y\", \"Y2\", \"X1\", \"X2\"],\n labels=labels,\n bycol=[\"country\"],\n byrow=\"occupation\",\n stats=[\"count\", \"mean\", \"std\"],\n caption=\"Descriptive statistics\",\n)\n\n\n\n\n\n\n \n Descriptive statistics\n \n\n \n \n EU\n \n \n US\n \n\n\n N\n Mean\n Std. Dev.\n N\n Mean\n Std. Dev.\n\n\n\n \n Blue collar\n \n \n Wage\n 264\n 0.12\n 2.38\n 244\n -0.24\n 2.24\n \n \n Wealth\n 264\n -0.33\n 5.61\n 244\n -0.27\n 5.69\n \n \n Age\n 264\n 1.02\n 0.79\n 244\n 1.00\n 0.81\n \n \n Years of Schooling\n 264\n -0.25\n 2.93\n 244\n -0.04\n 3.08\n \n \n White collar\n \n \n Wage\n 233\n -0.28\n 2.40\n 256\n -0.13\n 2.19\n \n \n Wealth\n 233\n -0.25\n 5.36\n 256\n -0.41\n 5.71\n \n \n Age\n 233\n 1.08\n 0.80\n 256\n 1.07\n 0.83\n \n \n Years of Schooling\n 233\n -0.18\n 3.31\n 256\n -0.03\n 2.89\n \n\n \n \n \n \n \n\n\n\n\n\n\n \n\n\nAnd you can again export descriptive statistics tables also to LaTex:\n\ndtab = pf.dtable(\n data,\n vars=[\"Y\", \"Y2\", \"X1\", \"X2\"],\n labels=labels,\n bycol=[\"country\"],\n byrow=\"occupation\",\n stats=[\"count\", \"mean\", \"std\"],\n type=\"tex\",\n)\n\nrun = False\nif run:\n make_pdf(dtab, \"latexdocs/SampleTableDoc3\")\ndisplay(FileLink(\"latexdocs/SampleTableDoc3.pdf\"))\n\nlatexdocs/SampleTableDoc3.pdf"
+ "text": "Summarize by characteristics in columns and rows\nYou can summarize by characteristics using the bycol argument when groups are to be displayed in columns. When the number of observations is the same for all variables in a group, you can also opt to display the number of observations only once for each group byin a separate line at the bottom of the table with counts_row_below==True.\n\n# Generate some categorial data\ndata[\"country\"] = np.random.choice([\"US\", \"EU\"], data.shape[0])\ndata[\"occupation\"] = np.random.choice([\"Blue collar\", \"White collar\"], data.shape[0])\n\n# Drop nan values to have balanced data\ndata.dropna(inplace=True)\n\npf.dtable(\n data,\n vars=[\"Y\", \"Y2\", \"X1\", \"X2\"],\n labels=labels,\n bycol=[\"country\", \"occupation\"],\n stats=[\"count\", \"mean\", \"std\"],\n caption=\"Descriptive statistics\",\n stats_labels={\"count\": \"Number of observations\"},\n counts_row_below=True,\n)\n\n\n\n\n\n\n \n Descriptive statistics\n \n\n \n \n \n \n EU\n \n \n US\n \n\n\n \n \n Blue collar\n \n \n White collar\n \n \n Blue collar\n \n \n White collar\n \n\n\n Mean\n Std. Dev.\n Mean\n Std. Dev.\n Mean\n Std. Dev.\n Mean\n Std. Dev.\n\n\n\n \n stats\n \n \n Wage\n -0.12\n 2.30\n -0.13\n 2.32\n -0.09\n 2.32\n -0.17\n 2.30\n \n \n Wealth\n -0.09\n 5.66\n -0.50\n 5.48\n -0.47\n 5.70\n -0.22\n 5.53\n \n \n Age\n 1.07\n 0.81\n 0.98\n 0.79\n 1.04\n 0.79\n 1.08\n 0.83\n \n \n Years of Schooling\n 0.05\n 3.16\n -0.24\n 3.05\n -0.12\n 2.78\n -0.19\n 3.18\n \n \n nobs\n \n \n Number of observations\n 246\n \n 245\n \n 244\n \n 262\n \n \n\n \n \n \n \n \n\n\n\n\n\n\n \n\n\nYou can also use custom aggregation functions to compute further statistics or affect how statistics are presented. Pyfixest provides two such functions mean_std and mean_newline_std which compute the mean and standard deviation and display both the same cell (either with line break between them or not). This allows to have more compact tables when you want to show statistics for many characteristcs in the columns.\nYou can also hide the display of the statistics labels in the header with hide_stats_labels=True. In that case a table note will be added naming the statistics displayed using its label (if you have not provided a custom note).\n\npf.dtable(\n data,\n vars=[\"Y\", \"Y2\", \"X1\", \"X2\"],\n labels=labels,\n bycol=[\"country\", \"occupation\"],\n stats=[\"mean_newline_std\", \"count\"],\n caption=\"Descriptive statistics\",\n stats_labels={\"count\": \"Number of observations\"},\n counts_row_below=True,\n hide_stats=True,\n)\n\n\n\n\n\n\n \n Descriptive statistics\n \n\n \n \n EU\n \n \n US\n \n\n\n Blue collar\n White collar\n Blue collar\n White collar\n\n\n\n \n stats\n \n \n Wage\n -0.12(2.30)\n -0.13(2.32)\n -0.09(2.32)\n -0.17(2.30)\n \n \n Wealth\n -0.09(5.66)\n -0.50(5.48)\n -0.47(5.70)\n -0.22(5.53)\n \n \n Age\n 1.07(0.81)\n 0.98(0.79)\n 1.04(0.79)\n 1.08(0.83)\n \n \n Years of Schooling\n 0.05(3.16)\n -0.24(3.05)\n -0.12(2.78)\n -0.19(3.18)\n \n \n nobs\n \n \n Number of observations\n 246\n 245\n 244\n 262\n \n\n \n \n \n Note: Displayed statistics are Mean (Std. Dev.).\n \n\n\n\n\n\n\n \n\n\nYou can also split by characteristics in both columns and rows. Note that you can only use one grouping variable in rows, but several in columns (as shown above).\n\npf.dtable(\n data,\n vars=[\"Y\", \"Y2\", \"X1\", \"X2\"],\n labels=labels,\n bycol=[\"country\"],\n byrow=\"occupation\",\n stats=[\"count\", \"mean\", \"std\"],\n caption=\"Descriptive statistics\",\n)\n\n\n\n\n\n\n \n Descriptive statistics\n \n\n \n \n EU\n \n \n US\n \n\n\n N\n Mean\n Std. Dev.\n N\n Mean\n Std. Dev.\n\n\n\n \n Blue collar\n \n \n Wage\n 246\n -0.12\n 2.30\n 244\n -0.09\n 2.32\n \n \n Wealth\n 246\n -0.09\n 5.66\n 244\n -0.47\n 5.70\n \n \n Age\n 246\n 1.07\n 0.81\n 244\n 1.04\n 0.79\n \n \n Years of Schooling\n 246\n 0.05\n 3.16\n 244\n -0.12\n 2.78\n \n \n White collar\n \n \n Wage\n 245\n -0.13\n 2.32\n 262\n -0.17\n 2.30\n \n \n Wealth\n 245\n -0.50\n 5.48\n 262\n -0.22\n 5.53\n \n \n Age\n 245\n 0.98\n 0.79\n 262\n 1.08\n 0.83\n \n \n Years of Schooling\n 245\n -0.24\n 3.05\n 262\n -0.19\n 3.18\n \n\n \n \n \n \n \n\n\n\n\n\n\n \n\n\nAnd you can again export descriptive statistics tables also to LaTex:\n\ndtab = pf.dtable(\n data,\n vars=[\"Y\", \"Y2\", \"X1\", \"X2\"],\n labels=labels,\n bycol=[\"country\"],\n byrow=\"occupation\",\n stats=[\"count\", \"mean\", \"std\"],\n type=\"tex\",\n)\n\nrun = False\nif run:\n make_pdf(dtab, \"latexdocs/SampleTableDoc3\")\ndisplay(FileLink(\"latexdocs/SampleTableDoc3.pdf\"))\n\nlatexdocs/SampleTableDoc3.pdf"
},
{
"objectID": "table-layout.html#basic-usage-of-make_table",
"href": "table-layout.html#basic-usage-of-make_table",
"title": "Regression Tables via pf.etable()",
"section": "Basic Usage of make_table",
- "text": "Basic Usage of make_table\n\ndf = pd.DataFrame(np.random.randn(4, 4).round(2), columns=[\"A\", \"B\", \"C\", \"D\"])\n\n# Make Booktabs style table\npf.make_table(df=df, caption=\"This is a caption\", notes=\"These are notes\")\n\n\n\n\n\n\n \n This is a caption\n \n\n \n A\n B\n C\n D\n\n\n\n \n 0\n 1.26\n -0.82\n -1.28\n 0.29\n \n \n 1\n -0.42\n 0.24\n 0.32\n -0.58\n \n \n 2\n 0.19\n 0.72\n -1.27\n -0.07\n \n \n 3\n 0.5\n -1.17\n -0.42\n -0.74\n \n\n \n \n \n These are notes"
+ "text": "Basic Usage of make_table\n\ndf = pd.DataFrame(np.random.randn(4, 4).round(2), columns=[\"A\", \"B\", \"C\", \"D\"])\n\n# Make Booktabs style table\npf.make_table(df=df, caption=\"This is a caption\", notes=\"These are notes\")\n\n\n\n\n\n\n \n This is a caption\n \n\n \n A\n B\n C\n D\n\n\n\n \n 0\n 0.65\n 0.25\n -0.62\n -1.26\n \n \n 1\n -0.2\n -1.18\n -2.6\n 0.2\n \n \n 2\n -0.61\n 0.11\n 1.86\n 0.82\n \n \n 3\n -0.9\n 0.39\n 0.67\n -1.32\n \n\n \n \n \n These are notes"
},
{
"objectID": "table-layout.html#mutiindex-dataframes",
"href": "table-layout.html#mutiindex-dataframes",
"title": "Regression Tables via pf.etable()",
"section": "Mutiindex DataFrames",
- "text": "Mutiindex DataFrames\nWhen the respective dataframe has a mutiindex for the columns, columns spanners are generated from the index. The row index can also be a multiindex (of at most two levels). In this case the first index level is used to generate group rows (for instance using the index name as headline and separating the groups by a horizontal line) and the second index level is used to generate the row labels.\n\n# Create a multiindex dataframe with random data\nrow_index = pd.MultiIndex.from_tuples(\n [\n (\"Group 1\", \"Variable 1\"),\n (\"Group 1\", \"Variable 2\"),\n (\"Group 1\", \"Variable 3\"),\n (\"Group 2\", \"Variable 4\"),\n (\"Group 2\", \"Variable 5\"),\n (\"Group 3\", \"Variable 6\"),\n ]\n)\n\ncol_index = pd.MultiIndex.from_product([[\"A\", \"B\"], [\"X\", \"Y\"], [\"High\", \"Low\"]])\ndf = pd.DataFrame(np.random.randn(6, 8).round(3), index=row_index, columns=col_index)\n\npf.make_table(df=df, caption=\"This is a caption\", notes=\"These are notes\")\n\n\n\n\n\n\n \n This is a caption\n \n\n \n \n \n \n A\n \n \n B\n \n\n\n \n \n X\n \n \n Y\n \n \n X\n \n \n Y\n \n\n\n High\n Low\n High\n Low\n High\n Low\n High\n Low\n\n\n\n \n Group 1\n \n \n Variable 1\n -0.352\n 0.87\n 1.692\n -0.914\n 0.159\n -0.826\n 0.094\n -0.717\n \n \n Variable 2\n -0.119\n 0.226\n -1.739\n -1.611\n -1.237\n -1.428\n 0.401\n 1.572\n \n \n Variable 3\n 0.931\n -1.441\n 1.2\n -0.273\n -0.845\n 0.24\n 0.73\n 0.896\n \n \n Group 2\n \n \n Variable 4\n 0.819\n 0.163\n 2.044\n -1.354\n -0.024\n 1.31\n 0.662\n 0.082\n \n \n Variable 5\n -1.72\n -0.324\n 0.904\n -0.307\n 0.813\n -0.262\n 0.087\n -0.837\n \n \n Group 3\n \n \n Variable 6\n 0.088\n -0.125\n -1.415\n 0.153\n -0.857\n -0.816\n -0.832\n -0.516\n \n\n \n \n \n These are notes\n \n\n\n\n\n\n\n \n\n\nYou can also hide column group names: This just creates a table where variables on the second level of the row index are displayed in groups based on the first level separated by horizontal lines.\n\npf.make_table(\n df=df, caption=\"This is a caption\", notes=\"These are notes\", rgroup_display=False\n).tab_style(style=style.text(style=\"italic\"), locations=loc.body(rows=[1, 5]))\n\n\n\n\n\n\n \n This is a caption\n \n\n \n \n \n \n A\n \n \n B\n \n\n\n \n \n X\n \n \n Y\n \n \n X\n \n \n Y\n \n\n\n High\n Low\n High\n Low\n High\n Low\n High\n Low\n\n\n\n \n Group 1\n \n \n Variable 1\n -0.352\n 0.87\n 1.692\n -0.914\n 0.159\n -0.826\n 0.094\n -0.717\n \n \n Variable 2\n -0.119\n 0.226\n -1.739\n -1.611\n -1.237\n -1.428\n 0.401\n 1.572\n \n \n Variable 3\n 0.931\n -1.441\n 1.2\n -0.273\n -0.845\n 0.24\n 0.73\n 0.896\n \n \n Group 2\n \n \n Variable 4\n 0.819\n 0.163\n 2.044\n -1.354\n -0.024\n 1.31\n 0.662\n 0.082\n \n \n Variable 5\n -1.72\n -0.324\n 0.904\n -0.307\n 0.813\n -0.262\n 0.087\n -0.837\n \n \n Group 3\n \n \n Variable 6\n 0.088\n -0.125\n -1.415\n 0.153\n -0.857\n -0.816\n -0.832\n -0.516\n \n\n \n \n \n These are notes"
+ "text": "Mutiindex DataFrames\nWhen the respective dataframe has a mutiindex for the columns, columns spanners are generated from the index. The row index can also be a multiindex (of at most two levels). In this case the first index level is used to generate group rows (for instance using the index name as headline and separating the groups by a horizontal line) and the second index level is used to generate the row labels.\n\n# Create a multiindex dataframe with random data\nrow_index = pd.MultiIndex.from_tuples(\n [\n (\"Group 1\", \"Variable 1\"),\n (\"Group 1\", \"Variable 2\"),\n (\"Group 1\", \"Variable 3\"),\n (\"Group 2\", \"Variable 4\"),\n (\"Group 2\", \"Variable 5\"),\n (\"Group 3\", \"Variable 6\"),\n ]\n)\n\ncol_index = pd.MultiIndex.from_product([[\"A\", \"B\"], [\"X\", \"Y\"], [\"High\", \"Low\"]])\ndf = pd.DataFrame(np.random.randn(6, 8).round(3), index=row_index, columns=col_index)\n\npf.make_table(df=df, caption=\"This is a caption\", notes=\"These are notes\")\n\n\n\n\n\n\n \n This is a caption\n \n\n \n \n \n \n A\n \n \n B\n \n\n\n \n \n X\n \n \n Y\n \n \n X\n \n \n Y\n \n\n\n High\n Low\n High\n Low\n High\n Low\n High\n Low\n\n\n\n \n Group 1\n \n \n Variable 1\n 0.055\n 0.8\n 0.061\n -0.918\n 0.299\n 1.144\n -0.072\n 1.675\n \n \n Variable 2\n 0.144\n 0.658\n 1.282\n -1.352\n -0.461\n 0.382\n 0.431\n -0.437\n \n \n Variable 3\n -0.109\n 1.582\n 0.21\n 0.173\n 0.618\n -0.203\n -0.019\n 0.721\n \n \n Group 2\n \n \n Variable 4\n 0.195\n 1.226\n -1.197\n 0.256\n -0.88\n -1.154\n 0.823\n 0.142\n \n \n Variable 5\n -0.638\n -0.225\n -0.959\n -0.113\n -1.416\n 0.495\n -0.404\n -0.287\n \n \n Group 3\n \n \n Variable 6\n 0.551\n 0.881\n 0.448\n 0.434\n -0.538\n -1.516\n 1.135\n -0.186\n \n\n \n \n \n These are notes\n \n\n\n\n\n\n\n \n\n\nYou can also hide column group names: This just creates a table where variables on the second level of the row index are displayed in groups based on the first level separated by horizontal lines.\n\npf.make_table(\n df=df, caption=\"This is a caption\", notes=\"These are notes\", rgroup_display=False\n).tab_style(style=style.text(style=\"italic\"), locations=loc.body(rows=[1, 5]))\n\n\n\n\n\n\n \n This is a caption\n \n\n \n \n \n \n A\n \n \n B\n \n\n\n \n \n X\n \n \n Y\n \n \n X\n \n \n Y\n \n\n\n High\n Low\n High\n Low\n High\n Low\n High\n Low\n\n\n\n \n Group 1\n \n \n Variable 1\n 0.055\n 0.8\n 0.061\n -0.918\n 0.299\n 1.144\n -0.072\n 1.675\n \n \n Variable 2\n 0.144\n 0.658\n 1.282\n -1.352\n -0.461\n 0.382\n 0.431\n -0.437\n \n \n Variable 3\n -0.109\n 1.582\n 0.21\n 0.173\n 0.618\n -0.203\n -0.019\n 0.721\n \n \n Group 2\n \n \n Variable 4\n 0.195\n 1.226\n -1.197\n 0.256\n -0.88\n -1.154\n 0.823\n 0.142\n \n \n Variable 5\n -0.638\n -0.225\n -0.959\n -0.113\n -1.416\n 0.495\n -0.404\n -0.287\n \n \n Group 3\n \n \n Variable 6\n 0.551\n 0.881\n 0.448\n 0.434\n -0.538\n -1.516\n 1.135\n -0.186\n \n\n \n \n \n These are notes"
},
{
"objectID": "table-layout.html#example-styling",
"href": "table-layout.html#example-styling",
"title": "Regression Tables via pf.etable()",
"section": "Example Styling",
- "text": "Example Styling\n\n(\n pf.etable([fit1, fit2, fit3, fit4, fit5, fit6])\n .tab_options(\n column_labels_background_color=\"cornsilk\",\n stub_background_color=\"whitesmoke\",\n )\n .tab_style(\n style=style.fill(color=\"mistyrose\"),\n locations=loc.body(columns=\"(3)\", rows=[\"X2\"]),\n )\n)\n\n\n\n\n\n\n\n \n \n Y\n \n \n Y2\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n X1\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n X2\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n X1:X2\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n f2\n -\n x\n x\n -\n x\n x\n \n \n f1\n x\n x\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)"
+ "text": "Example Styling\n\n(\n pf.etable([fit1, fit2, fit3, fit4, fit5, fit6])\n .tab_options(\n column_labels_background_color=\"cornsilk\",\n stub_background_color=\"whitesmoke\",\n )\n .tab_style(\n style=style.fill(color=\"mistyrose\"),\n locations=loc.body(columns=\"(3)\", rows=[\"X2\"]),\n )\n)\n\n\n\n\n\n\n\n \n \n Y\n \n \n Y2\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n X1\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n X2\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n X1:X2\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n f1\n x\n x\n x\n x\n x\n x\n \n \n f2\n -\n x\n x\n -\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)"
},
{
"objectID": "table-layout.html#defining-table-styles-some-examples",
"href": "table-layout.html#defining-table-styles-some-examples",
"title": "Regression Tables via pf.etable()",
"section": "Defining Table Styles: Some Examples",
- "text": "Defining Table Styles: Some Examples\nYou can easily define table styles that you can apply to all tables in your project. Just define a dictionary with the respective values for the tab options (see the Great Tables documentation) and use the style with .tab_options(**style_dict).\n\nstyle_print = {\n \"table_font_size\": \"12px\",\n \"heading_title_font_size\": \"12px\",\n \"source_notes_font_size\": \"8px\",\n \"data_row_padding\": \"3px\",\n \"column_labels_padding\": \"3px\",\n \"row_group_border_top_style\": \"hidden\",\n \"table_body_border_top_style\": \"None\",\n \"table_body_border_bottom_width\": \"1px\",\n \"column_labels_border_top_width\": \"1px\",\n \"table_width\": \"14cm\",\n}\n\n\nstyle_presentation = {\n \"table_font_size\": \"16px\",\n \"table_font_color_light\": \"white\",\n \"table_body_border_top_style\": \"hidden\",\n \"table_body_border_bottom_style\": \"hidden\",\n \"heading_title_font_size\": \"18px\",\n \"source_notes_font_size\": \"12px\",\n \"data_row_padding\": \"3px\",\n \"column_labels_padding\": \"6px\",\n \"column_labels_background_color\": \"midnightblue\",\n \"stub_background_color\": \"whitesmoke\",\n \"row_group_background_color\": \"whitesmoke\",\n \"table_background_color\": \"whitesmoke\",\n \"heading_background_color\": \"white\",\n \"source_notes_background_color\": \"white\",\n \"column_labels_border_bottom_color\": \"white\",\n \"column_labels_font_weight\": \"bold\",\n \"row_group_font_weight\": \"bold\",\n \"table_width\": \"18cm\",\n}\n\n\nt1 = pf.dtable(\n data,\n vars=[\"Y\", \"Y2\", \"X1\", \"X2\"],\n stats=[\"count\", \"mean\", \"std\", \"min\", \"max\"],\n labels=labels,\n caption=\"Descriptive statistics\",\n)\n\nt2 = pf.etable(\n [fit1, fit2, fit3, fit4, fit5, fit6],\n labels=labels,\n show_se=False,\n felabels={\"f1\": \"Industry Fixed Effects\", \"f2\": \"Year Fixed Effects\"},\n caption=\"Regression results\",\n)\n\n\ndisplay(t1.tab_options(**style_print))\ndisplay(t2.tab_options(**style_print))\n\n\n\n\n\n\n \n Descriptive statistics\n \n\n \n N\n Mean\n Std. Dev.\n Min\n Max\n\n\n\n \n Wage\n 997\n -0.13\n 2.31\n -6.54\n 6.91\n \n \n Wealth\n 997\n -0.32\n 5.59\n -16.97\n 17.16\n \n \n Age\n 997\n 1.04\n 0.81\n 0.00\n 2.00\n \n \n Years of Schooling\n 997\n -0.13\n 3.05\n -9.67\n 10.99\n \n\n \n \n \n \n \n\n\n\n\n\n\n \n\n\n\n\n\n\n\n \n Regression results\n \n\n \n \n Wage\n \n \n Wealth\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n Year Fixed Effects\n -\n x\n x\n -\n x\n x\n \n \n Industry Fixed Effects\n x\n x\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)\n \n\n\n\n\n\n\n \n\n\n\nstyle_printDouble = {\n \"table_font_size\": \"12px\",\n \"heading_title_font_size\": \"12px\",\n \"source_notes_font_size\": \"8px\",\n \"data_row_padding\": \"3px\",\n \"column_labels_padding\": \"3px\",\n \"table_body_border_bottom_style\": \"double\",\n \"column_labels_border_top_style\": \"double\",\n \"column_labels_border_bottom_width\": \"0.5px\",\n \"row_group_border_top_style\": \"hidden\",\n \"table_body_border_top_style\": \"None\",\n \"table_width\": \"14cm\",\n}\ndisplay(t1.tab_options(**style_printDouble))\ndisplay(t2.tab_options(**style_printDouble))\n\n\n\n\n\n\n \n Descriptive statistics\n \n\n \n N\n Mean\n Std. Dev.\n Min\n Max\n\n\n\n \n Wage\n 997\n -0.13\n 2.31\n -6.54\n 6.91\n \n \n Wealth\n 997\n -0.32\n 5.59\n -16.97\n 17.16\n \n \n Age\n 997\n 1.04\n 0.81\n 0.00\n 2.00\n \n \n Years of Schooling\n 997\n -0.13\n 3.05\n -9.67\n 10.99\n \n\n \n \n \n \n \n\n\n\n\n\n\n \n\n\n\n\n\n\n\n \n Regression results\n \n\n \n \n Wage\n \n \n Wealth\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n Year Fixed Effects\n -\n x\n x\n -\n x\n x\n \n \n Industry Fixed Effects\n x\n x\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)"
+ "text": "Defining Table Styles: Some Examples\nYou can easily define table styles that you can apply to all tables in your project. Just define a dictionary with the respective values for the tab options (see the Great Tables documentation) and use the style with .tab_options(**style_dict).\n\nstyle_print = {\n \"table_font_size\": \"12px\",\n \"heading_title_font_size\": \"12px\",\n \"source_notes_font_size\": \"8px\",\n \"data_row_padding\": \"3px\",\n \"column_labels_padding\": \"3px\",\n \"row_group_border_top_style\": \"hidden\",\n \"table_body_border_top_style\": \"None\",\n \"table_body_border_bottom_width\": \"1px\",\n \"column_labels_border_top_width\": \"1px\",\n \"table_width\": \"14cm\",\n}\n\n\nstyle_presentation = {\n \"table_font_size\": \"16px\",\n \"table_font_color_light\": \"white\",\n \"table_body_border_top_style\": \"hidden\",\n \"table_body_border_bottom_style\": \"hidden\",\n \"heading_title_font_size\": \"18px\",\n \"source_notes_font_size\": \"12px\",\n \"data_row_padding\": \"3px\",\n \"column_labels_padding\": \"6px\",\n \"column_labels_background_color\": \"midnightblue\",\n \"stub_background_color\": \"whitesmoke\",\n \"row_group_background_color\": \"whitesmoke\",\n \"table_background_color\": \"whitesmoke\",\n \"heading_background_color\": \"white\",\n \"source_notes_background_color\": \"white\",\n \"column_labels_border_bottom_color\": \"white\",\n \"column_labels_font_weight\": \"bold\",\n \"row_group_font_weight\": \"bold\",\n \"table_width\": \"18cm\",\n}\n\n\nt1 = pf.dtable(\n data,\n vars=[\"Y\", \"Y2\", \"X1\", \"X2\"],\n stats=[\"count\", \"mean\", \"std\", \"min\", \"max\"],\n labels=labels,\n caption=\"Descriptive statistics\",\n)\n\nt2 = pf.etable(\n [fit1, fit2, fit3, fit4, fit5, fit6],\n labels=labels,\n show_se=False,\n felabels={\"f1\": \"Industry Fixed Effects\", \"f2\": \"Year Fixed Effects\"},\n caption=\"Regression results\",\n)\n\n\ndisplay(t1.tab_options(**style_print))\ndisplay(t2.tab_options(**style_print))\n\n\n\n\n\n\n \n Descriptive statistics\n \n\n \n N\n Mean\n Std. Dev.\n Min\n Max\n\n\n\n \n Wage\n 997\n -0.13\n 2.31\n -6.54\n 6.91\n \n \n Wealth\n 997\n -0.32\n 5.59\n -16.97\n 17.16\n \n \n Age\n 997\n 1.04\n 0.81\n 0.00\n 2.00\n \n \n Years of Schooling\n 997\n -0.13\n 3.05\n -9.67\n 10.99\n \n\n \n \n \n \n \n\n\n\n\n\n\n \n\n\n\n\n\n\n\n \n Regression results\n \n\n \n \n Wage\n \n \n Wealth\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n Industry Fixed Effects\n x\n x\n x\n x\n x\n x\n \n \n Year Fixed Effects\n -\n x\n x\n -\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)\n \n\n\n\n\n\n\n \n\n\n\nstyle_printDouble = {\n \"table_font_size\": \"12px\",\n \"heading_title_font_size\": \"12px\",\n \"source_notes_font_size\": \"8px\",\n \"data_row_padding\": \"3px\",\n \"column_labels_padding\": \"3px\",\n \"table_body_border_bottom_style\": \"double\",\n \"column_labels_border_top_style\": \"double\",\n \"column_labels_border_bottom_width\": \"0.5px\",\n \"row_group_border_top_style\": \"hidden\",\n \"table_body_border_top_style\": \"None\",\n \"table_width\": \"14cm\",\n}\ndisplay(t1.tab_options(**style_printDouble))\ndisplay(t2.tab_options(**style_printDouble))\n\n\n\n\n\n\n \n Descriptive statistics\n \n\n \n N\n Mean\n Std. Dev.\n Min\n Max\n\n\n\n \n Wage\n 997\n -0.13\n 2.31\n -6.54\n 6.91\n \n \n Wealth\n 997\n -0.32\n 5.59\n -16.97\n 17.16\n \n \n Age\n 997\n 1.04\n 0.81\n 0.00\n 2.00\n \n \n Years of Schooling\n 997\n -0.13\n 3.05\n -9.67\n 10.99\n \n\n \n \n \n \n \n\n\n\n\n\n\n \n\n\n\n\n\n\n\n \n Regression results\n \n\n \n \n Wage\n \n \n Wealth\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n Industry Fixed Effects\n x\n x\n x\n x\n x\n x\n \n \n Year Fixed Effects\n -\n x\n x\n -\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)"
},
{
"objectID": "reference/estimation.feols_compressed_.FeolsCompressed.html",
diff --git a/table-layout.html b/table-layout.html
index 55bd8032..82f6377d 100644
--- a/table-layout.html
+++ b/table-layout.html
@@ -245,7 +245,7 @@ Regression Tables via pf.etable()
Table Layout with PyFixest
Pyfixest comes with functions to generate publication-ready tables. Regression tables are generated with pf.etable()
, which can output different formats, for instance using the Great Tables package or generating formatted LaTex Tables using booktabs. There are also further functions pf.dtable()
to display descriptive statistics and pf.make_table()
generating formatted tables from pandas dataframes in the same layout.
To begin, we load some libraries and fit a set of regression models.
-
+
import numpy as np
import pandas as pd
import pylatex as pl # for the latex table; note: not a dependency of pyfixest - needs manual installation
@@ -267,7 +267,7 @@ Table Layout wi
= pf.feols("Y2 ~ X1 *X2 | f1 + f2", data=data) fit6
-
+
@@ -301,7 +301,7 @@ Table Layout wi
-
+
@@ -338,55 +338,55 @@ Table Layout wi
Basic Usage
We can compare all regression models via the pyfixest-internal pf.etable()
function:
-
+
pf.etable([fit1, fit2, fit3, fit4, fit5, fit6])
-
+
@@ -445,20 +445,20 @@ Basic Usage
fe
- f2
- -
+ f1
x
x
- -
x
x
-
-
- f1
x
x
+
+
+ f2
+ -
x
x
+ -
x
x
@@ -510,55 +510,55 @@ Basic Usage
You can also estimate and display multiple regressions with one line of code using the (py)fixest stepwise notation:
-
+
"Y+Y2~csw(X1,X2,X1:X2)", data=data)) pf.etable(pf.feols(
-
+
@@ -673,55 +673,55 @@ Basic Usage
Keep and drop variables
etable
allows us to do a few things out of the box. For example, we can only keep the variables that we’d like, which keeps all variables that fit the provided regex match.
-
+
="X1") pf.etable([fit1, fit2, fit3, fit4, fit5, fit6], keep
-
+
@@ -771,20 +771,20 @@ Keep and drop vari
fe
- f2
- -
+ f1
x
x
- -
x
x
-
-
- f1
x
x
+
+
+ f2
+ -
x
x
+ -
x
x
@@ -836,55 +836,55 @@ Keep and drop vari
We can use the exact_match
argument to select a specific set of variables:
-
+
=["X1", "X2"], exact_match=True) pf.etable([fit1, fit2, fit3, fit4, fit5, fit6], keep
-
+
@@ -934,20 +934,20 @@ Keep and drop vari
fe
- f2
- -
+ f1
x
x
- -
x
x
-
-
- f1
x
x
+
+
+ f2
+ -
x
x
+ -
x
x
@@ -999,55 +999,55 @@ Keep and drop vari
We can also easily drop variables via the drop
argument:
-
+
=["X1"]) pf.etable([fit1, fit2, fit3, fit4, fit5, fit6], drop
-
+
@@ -1088,20 +1088,20 @@ Keep and drop vari
fe
- f2
- -
+ f1
x
x
- -
x
x
-
-
- f1
x
x
+
+
+ f2
+ -
x
x
+ -
x
x
@@ -1156,55 +1156,55 @@ Keep and drop vari
Hide fixed effects or SE-type rows
We can hide the rows showing the relevant fixed effects and those showing the S.E. type by setting show_fe=False
and show_setype=False
(for instance when the set of fixed effects or the estimation method for the std. errors is the same for all models and you want to describe this in the text or table notes rather than displaying it in the table).
-
+
=False, show_se_type=False) pf.etable([fit1, fit2, fit3, fit4, fit5, fit6], show_fe
-
+
@@ -1301,55 +1301,55 @@ Hide fi
Display p-values or confidence intervals
By default, pf.etable()
reports standard errors. But we can also ask to output p-values or confidence intervals via the coef_fmt
function argument.
-
+
="b \n (se) \n [p]") pf.etable([fit1, fit2, fit3, fit4, fit5, fit6], coef_fmt
-
+
@@ -1408,20 +1408,20 @@ D
fe
- f2
- -
+ f1
x
x
- -
x
x
-
-
- f1
x
x
+
+
+ f2
+ -
x
x
+ -
x
x
@@ -1477,55 +1477,55 @@ D
Significance levels and rounding
Additionally, we can also overwrite the defaults for the reported significance levels and control the rounding of results via the signif_code
and digits
function arguments:
-
+
=[0.01, 0.05, 0.1], digits=5) pf.etable([fit1, fit2, fit3, fit4, fit5, fit6], signif_code
-
+
@@ -1584,20 +1584,20 @@ Significa
fe
- f2
- -
+ f1
x
x
- -
x
x
-
-
- f1
x
x
+
+
+ f2
+ -
x
x
+ -
x
x
@@ -1652,7 +1652,7 @@ Significa
Other output formats
By default, pf.etable()
returns a GT object (see the Great Tables package), but you can also opt to dataframe, markdown, or latex output via the type
argument.
-
+
# Pandas styler output:
pf.etable(
@@ -1714,20 +1714,20 @@ [fit1, fit2, fit3, fit4, fit5, fit6],Other output formats<
-0.04082 (0.08093)
-
IID Inference
First, we estimate a model via `pyfixest. We compute “iid” standard errors.
-= pf.feols(fml="Y ~ X1 + X2 | f1 + f2", data=data, vcov="iid") fit
We estimate the same model with weights:
-= pf.feols(
fit_weights ="Y ~ X1 + X2 | f1 + f2", data=data, weights="weights", vcov="iid"
fml )
Via r-fixest
and rpy2
, we get
= fixest.feols(
r_fit "Y ~ X1 + X2 | f1 + f2"),
ro.Formula(=data,
@@ -357,7 +357,7 @@ dataIID Inference
R[write to console]: NOTE: 3 observations removed because of NA values (LHS: 1, RHS: 1, Fixed-effects: 1).
Let’s compare how close the covariance matrices are:
-= fit._vcov
fit_vcov = stats.vcov(r_fit)
r_vcov - r_vcov fit_vcov
IID Inference
And for WLS:
-- stats.vcov(r_fit_weights) fit_weights._vcov
array([[ 1.68051337e-18, -2.11758237e-21],
@@ -375,7 +375,7 @@ IID Inference
We conclude by comparing all estimation results via the tidy
methods:
fit.tidy()
IID Inference
pd.DataFrame(broom.tidy_fixest(r_fit)).T
IID Inference
fit_weights.tidy()
IID Inference
pd.DataFrame(broom.tidy_fixest(r_fit_weights)).T
IID Inference
Heteroskedastic Errors
We repeat the same exercise with heteroskedastic (HC1) errors:
-= pf.feols(fml="Y ~ X1 + X2 | f1 + f2", data=data, vcov="hetero")
fit = pf.feols(
fit_weights ="Y ~ X1 + X2 | f1 + f2", data=data, vcov="hetero", weights="weights"
fml )
= fixest.feols(
r_fit "Y ~ X1 + X2 | f1 + f2"),
ro.Formula(=data,
@@ -592,14 +592,14 @@ dataHeteroskedastic Err
As before, we compare the variance covariance matrices:
-- stats.vcov(r_fit) fit._vcov
array([[-1.61762964e-16, -2.13305660e-17],
[-2.13306190e-17, -5.39492225e-17]])
- stats.vcov(r_fit_weights) fit_weights._vcov
array([[-2.05022631e-16, -9.53695571e-18],
@@ -607,7 +607,7 @@ Heteroskedastic Err
We conclude by comparing all estimation results via the tidy
methods:
fit.tidy()
Heteroskedastic Err
pd.DataFrame(broom.tidy_fixest(r_fit)).T
Heteroskedastic Err
fit_weights.tidy()
Heteroskedastic Err
pd.DataFrame(broom.tidy_fixest(r_fit_weights)).T
Heteroskedastic Err
Cluster-Robust Errors
We conclude with cluster robust errors.
-
+
= pf.feols(fml="Y ~ X1 + X2 | f1 + f2", data=data, vcov={"CRV1": "f1"})
fit = pf.feols(
fit_weights ="Y ~ X1 + X2 | f1 + f2", data=data, vcov={"CRV1": "f1"}, weights="weights"
@@ -821,14 +821,14 @@ fmlCluster-Robust Error
-
+
- stats.vcov(r_fit) fit._vcov
array([[ 4.20670443e-16, -6.97565513e-17],
[-6.97565513e-17, -1.42166010e-17]])
-
+
- stats.vcov(r_fit_weights) fit_weights._vcov
array([[2.59070109e-16, 4.07324592e-16],
@@ -836,7 +836,7 @@ Cluster-Robust Error
We conclude by comparing all estimation results via the tidy
methods:
-
+
fit.tidy()
@@ -888,7 +888,7 @@ Cluster-Robust Error
-
+
pd.DataFrame(broom.tidy_fixest(r_fit)).T
@@ -928,7 +928,7 @@ Cluster-Robust Error
-
+
fit_weights.tidy()
@@ -980,7 +980,7 @@ Cluster-Robust Error
-
+
pd.DataFrame(broom.tidy_fixest(r_fit_weights)).T
@@ -1024,10 +1024,10 @@ Cluster-Robust Error
Poisson Regression
-
+
= pf.get_data(model="Fepois") data
-
+
= pf.fepois(fml="Y ~ X1 + X2 | f1 + f2", data=data, vcov="iid", iwls_tol=1e-10)
fit_iid = pf.fepois(
fit_hetero ="Y ~ X1 + X2 | f1 + f2", data=data, vcov="hetero", iwls_tol=1e-10
@@ -1065,21 +1065,21 @@ fmlPoisson Regression
-
+
- stats.vcov(fit_r_iid) fit_iid._vcov
array([[ 1.20791284e-08, -6.55604931e-10],
[-6.55604931e-10, 1.69958097e-09]])
-
+
- stats.vcov(fit_r_hetero) fit_hetero._vcov
array([[ 2.18101847e-08, -7.38711972e-10],
[-7.38711972e-10, 3.07587753e-09]])
-
+
- stats.vcov(fit_r_crv) fit_crv._vcov
array([[ 1.58300904e-08, -1.20806815e-10],
@@ -1087,7 +1087,7 @@ Poisson Regression
We conclude by comparing all estimation results via the tidy
methods:
-
+
fit_iid.tidy()
@@ -1139,7 +1139,7 @@ Poisson Regression
-
+
pd.DataFrame(broom.tidy_fixest(fit_r_iid)).T
@@ -1179,7 +1179,7 @@ Poisson Regression
-
+
fit_hetero.tidy()
@@ -1231,7 +1231,7 @@ Poisson Regression
-
+
pd.DataFrame(broom.tidy_fixest(fit_r_hetero)).T
@@ -1271,7 +1271,7 @@ Poisson Regression
-
+
fit_crv.tidy()
@@ -1323,7 +1323,7 @@ Poisson Regression
-
+
pd.DataFrame(broom.tidy_fixest(fit_r_crv)).T
diff --git a/difference-in-differences.html b/difference-in-differences.html
index 39e70f54..6e6d95c5 100644
--- a/difference-in-differences.html
+++ b/difference-in-differences.html
@@ -257,7 +257,7 @@ Difference-in-Differences Estimation
See also NBER SI methods lectures on Linear Panel Event Studies.
Setup
-
+
from importlib import resources
import pandas as pd
@@ -272,7 +272,7 @@ Setup
%autoreload 2
-
+
@@ -306,7 +306,7 @@ Setup
-
+
-pyfixest: 0.25.3
-pandas : 2.2.3
+pandas : 2.2.3
+pyfixest: 0.25.3
-
+
# one-shot adoption data - parallel trends is true
= get_sharkfin()
df_one_cohort df_one_cohort.head()
@@ -410,7 +410,7 @@ Setup
-
+
# multi-cohort adoption data
= pd.read_csv(
df_multi_cohort "pyfixest.did.data").joinpath("df_het.csv")
@@ -536,7 +536,7 @@ resources.files(Setup
Examining Treatment Timing
Before any DiD estimation, we need to examine the treatment timing, since it is crucial to our choice of estimator.
-
+
pf.panelview(
df_one_cohort,="unit",
@@ -557,7 +557,7 @@ unitExamining Treat
-
+
pf.panelview(
df_multi_cohort,="unit",
@@ -580,7 +580,7 @@ unitExamining Treat
We immediately see that we have staggered adoption of treatment in the second case, which implies that a naive application of 2WFE might yield biased estimates under substantial effect heterogeneity.
We can also plot treatment assignment in a disaggregated fashion, which gives us a sense of cohort sizes.
-
+
pf.panelview(
df_multi_cohort,="unit",
@@ -604,7 +604,7 @@ unitExamining Treat
Inspecting the Outcome Variable
pf.panelview()
further allows us to inspect the “outcome” variable over time:
-
+
pf.panelview(
df_multi_cohort,="dep_var",
@@ -625,7 +625,7 @@ outcomeInspecting
We immediately see that the first cohort is switched into treatment in 2000, while the second cohort is switched into treatment by 2010. Before each cohort is switched into treatment, the trends are parallel.
We can additionally inspect individual units by dropping the collapse_to_cohort argument. Because we have a large sample, we might want to inspect only a subset of units.
-
+
pf.panelview(
df_multi_cohort,="dep_var",
@@ -647,7 +647,7 @@ outcomeInspecting
One-shot adoption: Static and Dynamic Specifications
After taking a first look at the data, let’s turn to estimation. We return to the df_one_cohort
data set (without staggered treatment rollout).
-
+
= pf.feols(
fit_static_twfe "Y ~ treat | unit + year",
@@ -670,14 +670,14 @@ df_one_cohort,
+
= pf.feols(
fit_dynamic_twfe "Y ~ i(year, ever_treated, ref = 14) | unit + year",
df_one_cohort,={"CRV1": "unit"},
vcov )
-
+
fit_dynamic_twfe.iplot(=False,
coord_flip="Event Study",
@@ -687,7 +687,7 @@ title=rename_event_study_coefs(fit_dynamic_twfe._coefnames),
)
labels
-
+
-
+
fit_lpdid.iplot(=False,
coord_flip="Local-Projections-Estimator",
@@ -1166,7 +1166,7 @@ titleLocal Project
=18.5,
xintercept ).show()
-
+
@@ -297,7 +297,7 @@ Marginal Effects and Hypothesis Tests via marginaleffect
-
+
@@ -390,7 +390,7 @@ Marginal Effects and Hypothesis Tests via marginaleffect
Suppose we were interested in testing the hypothesis that \(X_{1} = X_{2}\). Given the relatively large differences in coefficients and small standard errors, we will likely reject the null that the two parameters are equal.
We can run the formal test via the hypotheses
function from the marginaleffects
package.
-
+
"X1 - X2 = 0") hypotheses(fit,
@@ -546,7 +546,7 @@ PyFixest 0.18.0
Additionally, model_matrix_fixest
now returns a dictionary instead of a tuple.
Brings back fixed effects reference setting via i(var1, var2, ref)
syntax. Deprecates the i_ref1
, i_ref2
function arguments. I.e. it is again possible to e.g. run
-
+
import pyfixest as pf
= pf.get_data()
data
@@ -554,7 +554,7 @@ PyFixest 0.18.0
0:8] fit1.coef()[
Via the ref
syntax, via can set the reference level:
-
+
= pf.feols("Y ~ i(f1, X2, ref = 1)", data=data)
fit2 0:8] fit2.coef()[
@@ -563,7 +563,7 @@ PyFixest 0.18.0
PyFixest 0.17.0
Restructures the codebase and reorganizes how users can interact with the pyfixest
API. It is now recommended to use pyfixest
in the following way:
-
+
import numpy as np
import pyfixest as pf
= pf.get_data()
@@ -631,7 +631,7 @@ data PyFixest 0.17.0
The update should not inroduce any breaking changes. Thanks to @Wenzhi-Ding for the PR!
Adds support for simultaneous confidence intervals via a multiplier bootstrap. Thanks to @apoorvalal for the contribution!
-
+
= True) fit.confint(joint
@@ -648,18 +648,18 @@ PyFixest 0.17.0
Intercept
-0.380105
-1.177593
+0.375929
+1.181769
D
--1.759120
--1.046114
+-1.762853
+-1.042381
f1
--0.014097
-0.023645
+-0.014294
+0.023843
@@ -668,7 +668,7 @@ PyFixest 0.17.0
Adds support for the causal cluster variance estimator by Abadie et al. (QJE, 2023) for OLS via the .ccv()
method.
-
+
= "D", cluster = "group_id") fit.ccv(treatment
/home/runner/work/pyfixest/pyfixest/pyfixest/estimation/feols_.py:1384: UserWarning: The initial model was not clustered. CRV1 inference is computed and stored in the model object.
@@ -694,11 +694,11 @@ PyFixest 0.17.0
CCV
-1.4026168622179929
-0.28043
--5.001663
-0.000093
--1.991779
--0.813455
+0.238985
+-5.869057
+0.000015
+-1.904706
+-0.900528
CRV1
@@ -740,7 +740,7 @@ PyFixest 0.14.0
- Changes all docstrings to
numpy
format.
- Difference-in-differences estimation functions now need to be imported via the
pyfixest.did.estimation
module:
-
+
from pyfixest.did.estimation import did2s, lpdid, event_study
diff --git a/pyfixest.html b/pyfixest.html
index 2173ab59..c92a31d5 100644
--- a/pyfixest.html
+++ b/pyfixest.html
@@ -187,10 +187,11 @@
PyFixest: Fast High-Dimensional Fixed Effects Regression in Python
-
+
PyFixest
is a Python implementation of the formidable fixest package for fast high-dimensional fixed effects regression.
The package aims to mimic fixest
syntax and functionality as closely as Python allows: if you know fixest
well, the goal is that you won’t have to read the docs to get started! In particular, this means that all of fixest's
defaults are mirrored by PyFixest
- currently with only one small exception.
Nevertheless, for a quick introduction, you can take a look at the quickstart or the regression chapter of Arthur Turrell’s book on Coding for Economists.
+For questions on PyFixest
, head on over to our PyFixest Discourse forum.
Features
diff --git a/quarto_example/QuartoExample.pdf b/quarto_example/QuartoExample.pdf
index 375fab6a..f16663ee 100644
Binary files a/quarto_example/QuartoExample.pdf and b/quarto_example/QuartoExample.pdf differ
diff --git a/quickstart.html b/quickstart.html
index 96c32ad2..e91a8996 100644
--- a/quickstart.html
+++ b/quickstart.html
@@ -281,7 +281,7 @@ What is a fix
Read Sample Data
In a first step, we load the module and some synthetic example data:
-
+
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
@@ -302,7 +302,7 @@ Read Sample Data
data.head()
-
+
@@ -336,7 +336,7 @@ Read Sample Data
-
+
@@ -370,7 +370,7 @@ Read Sample Data
-
+
-numpy : 1.26.4
+pandas : 2.2.3
+numpy : 1.26.4
pyfixest : 0.25.3
-pandas : 2.2.3
matplotlib: 3.9.2
@@ -507,7 +507,7 @@ Read Sample Data
-
+
data.info()
<class 'pandas.core.frame.DataFrame'>
@@ -535,7 +535,7 @@ Read Sample Data
OLS Estimation
We are interested in the relation between the dependent variable Y
and the independent variables X1
using a fixed effect model for group_id
. Let’s see how the data looks like:
-
+
= data.plot(kind="scatter", x="X1", y="Y", c="group_id", colormap="viridis") ax
@@ -546,7 +546,7 @@ OLS Estimation
We can estimate a fixed effects regression via the feols()
function. feols()
has three arguments: a two-sided model formula, the data, and optionally, the type of inference.
-
+
= pf.feols(fml="Y ~ X1 | group_id", data=data, vcov="HC1")
fit type(fit)
@@ -559,7 +559,7 @@ OLS Estimation
Inspecting Model Results
To inspect the results, we can use a summary function or method:
-
+
fit.summary()
###
@@ -577,55 +577,55 @@ Inspecting Model
Or display a formatted regression table:
-
+
pf.etable(fit)
-
+
@@ -688,7 +688,7 @@ Inspecting Model
Alternatively, the .summarize
module contains a summary
function, which can be applied on instances of regression model objects or lists of regression model objects. For details on how to customize etable()
, please take a look at the dedicated vignette.
-
+
pf.summary(fit)
###
@@ -706,7 +706,7 @@ Inspecting Model
You can access individual elements of the summary via dedicated methods: .tidy()
returns a “tidy” pd.DataFrame
, .coef()
returns estimated parameters, and se()
estimated standard errors. Other methods include pvalue()
, confint()
and tstat()
.
-
+
fit.tidy()
@@ -749,7 +749,7 @@ Inspecting Model
-
+
fit.coef()
Coefficient
@@ -757,7 +757,7 @@ Inspecting Model
Name: Estimate, dtype: float64
-
+
fit.se()
Coefficient
@@ -765,7 +765,7 @@ Inspecting Model
Name: Std. Error, dtype: float64
-
+
fit.tstat()
Coefficient
@@ -773,7 +773,7 @@ Inspecting Model
Name: t value, dtype: float64
-
+
fit.confint()
@@ -800,11 +800,11 @@ Inspecting Model
Last, model results can be visualized via dedicated methods for plotting:
-
+
fit.coefplot()# or pf.coefplot([fit])
-
+
@@ -522,7 +522,7 @@ Examples
-
+
@@ -671,7 +671,7 @@ Examples
In a first step, we estimate a classical event study model:
-
+
# estimate the model
= pf.did2s(
fit
@@ -761,10 +761,10 @@ df_het,Examples
We can also inspect the model visually:
-
+
= [1200, 400], coord_flip=False).show() fit.iplot(figsize
-
+
@@ -545,7 +545,7 @@ Examples
-
+
diff --git a/reference/did.estimation.lpdid.html b/reference/did.estimation.lpdid.html
index 6729fcf2..d210e196 100644
--- a/reference/did.estimation.lpdid.html
+++ b/reference/did.estimation.lpdid.html
@@ -505,7 +505,7 @@ Returns
Examples
-
+
import pandas as pd
import pyfixest as pf
@@ -528,7 +528,7 @@ Examples
= [1200, 400], coord_flip=False).show() fit.iplot(figsize
-
+
@@ -562,7 +562,7 @@ Examples
-
+
-
+
@@ -606,7 +606,7 @@ Examples
-
+
@@ -656,7 +656,7 @@ Examples
Calling feols()
returns an instance of the [Feols(/reference/Feols.qmd) class. The summary()
method can be used to print the results.
An alternative way to retrieve model results is via the tidy()
method, which returns a pandas dataframe with the estimated coefficients, standard errors, t-statistics, and p-values.
-
+
fit.tidy()
@@ -710,17 +710,17 @@ Examples
You can also access all elements in the tidy data frame by dedicated methods, e.g. fit.coef()
for the coefficients, fit.se()
for the standard errors, fit.tstat()
for the t-statistics, and fit.pval()
for the p-values, and fit.confint()
for the confidence intervals.
The employed type of inference can be specified via the vcov
argument. If vcov is not provided, PyFixest
employs the fixest
default of iid inference, unless there are fixed effects in the model, in which case feols()
clusters the standard error by the first fixed effect (CRV1 inference).
-
+
= pf.feols("Y ~ X1 + X2 | f1 + f2", data, vcov="iid")
fit1 = pf.feols("Y ~ X1 + X2 | f1 + f2", data, vcov="hetero")
fit2 = pf.feols("Y ~ X1 + X2 | f1 + f2", data, vcov={"CRV1": "f1"}) fit3
Supported inference types are “iid”, “hetero”, “HC1”, “HC2”, “HC3”, and “CRV1”/“CRV3”. Clustered standard errors are specified via a dictionary, e.g. {"CRV1": "f1"}
for CRV1 inference with clustering by f1
or {"CRV3": "f1"}
for CRV3 inference with clustering by f1
. For two-way clustering, you can provide a formula string, e.g. {"CRV1": "f1 + f2"}
for CRV1 inference with clustering by f1
.
-
+
= pf.feols("Y ~ X1 + X2 | f1 + f2", data, vcov={"CRV1": "f1 + f2"}) fit4
Inference can be adjusted post estimation via the vcov
method:
-
+
fit.summary()"iid").summary() fit.vcov(
@@ -754,7 +754,7 @@ Examples
The ssc
argument specifies the small sample correction for inference. In general, feols()
uses all of fixest::feols()
defaults, but sets the fixef.K
argument to "none"
whereas the fixest::feols()
default is "nested"
. See here for more details: link to github.
feols()
supports a range of multiple estimation syntax, i.e. you can estimate multiple models in one call. The following example estimates two models, one with fixed effects for f1
and one with fixed effects for f2
using the sw()
syntax.
-
+
= pf.feols("Y ~ X1 + X2 | sw(f1, f2)", data)
fit type(fit)
@@ -762,55 +762,55 @@ Examples
The returned object is an instance of the FixestMulti
class. You can access the results of the first model via fit.fetch_model(0)
and the results of the second model via fit.fetch_model(1)
. You can compare the model results via the etable()
function:
-
+
pf.etable(fit)
-
+
@@ -852,14 +852,14 @@ Examples
fe
-f2
--
+f1
x
+-
-f1
-x
+f2
-
+x
stats
@@ -893,56 +893,56 @@ Examples
Other supported multiple estimation syntax include sw0()
, csw()
and csw0()
. While sw()
adds variables in a “stepwise” fashion, csw()
does so cumulatively.
-
+
= pf.feols("Y ~ X1 + X2 | csw(f1, f2)", data)
fit pf.etable(fit)
-
+
@@ -984,13 +984,13 @@ Examples
fe
-f2
--
+f1
+x
x
-f1
-x
+f2
+-
x
@@ -1025,56 +1025,56 @@ Examples
The sw0()
and csw0()
syntax are similar to sw()
and csw()
, but start with a model that excludes the variables specified in sw()
and csw()
:
-
+
= pf.feols("Y ~ X1 + X2 | sw0(f1, f2)", data)
fit pf.etable(fit)
-
+
@@ -1129,16 +1129,16 @@ Examples
fe
-f2
--
+f1
-
x
+-
-f1
+f2
-
-x
-
+x
stats
@@ -1175,56 +1175,56 @@ Examples
The feols()
function also supports multiple dependent variables. The following example estimates two models, one with Y1
as the dependent variable and one with Y2
as the dependent variable.
-
+
= pf.feols("Y + Y2 ~ X1 | f1 + f2", data)
fit pf.etable(fit)
-
+
@@ -1260,12 +1260,12 @@ Examples
fe
-f2
+f1
x
x
-f1
+f2
x
x
@@ -1301,56 +1301,56 @@ Examples
It is possible to combine different multiple estimation operators:
-
+
= pf.feols("Y + Y2 ~ X1 | sw(f1, f2)", data)
fit pf.etable(fit)
-
+
@@ -1396,18 +1396,18 @@ Examples
fe
-f2
--
--
+f1
x
x
+-
+-
-f1
-x
-x
+f2
-
-
+x
+x
stats
@@ -1448,7 +1448,7 @@ Examples
In general, using muliple estimation syntax can improve the estimation time as covariates that are demeaned in one model and are used in another model do not need to be demeaned again: feols()
implements a caching mechanism that stores the demeaned covariates.
Additionally, you can fit models on different samples via the split and fsplit arguments. The split argument splits the sample according to the variable specified in the argument, while the fsplit argument also includes the full sample in the estimation.
-
+
= pf.feols("Y ~ X1 + X2 | f1 + f2", data, split = "f1")
fit pf.etable(fit)
@@ -1514,52 +1514,52 @@ Examples
cluster_adj_value = G / (G - 1)
-
+
@@ -1769,7 +1769,7 @@ Examples
fe
-f2
+f1
x
x
x
@@ -1802,7 +1802,7 @@ Examples
x
-f1
+f2
x
x
x
@@ -1950,7 +1950,7 @@ Examples
Besides OLS, feols()
also supports IV estimation via three part formulas:
-
+
= pf.feols("Y ~ X2 | f1 + f2 | X1 ~ Z1", data)
fit fit.tidy()
@@ -2004,7 +2004,7 @@ Examples
Here, X1
is the endogenous variable and Z1
is the instrument. f1
and f2
are the fixed effects, as before. To estimate IV models without fixed effects, simply omit the fixed effects part of the formula:
-
+
= pf.feols("Y ~ X2 | X1 ~ Z1", data)
fit fit.tidy()
@@ -2068,7 +2068,7 @@ Examples
Last, feols()
supports interaction of variables via the i()
syntax. Documentation on this is tba.
After fitting a model via feols()
, you can use the predict()
method to get the predicted values:
-
+
= pf.feols("Y ~ X1 + X2 | f1 + f2", data)
fit 0:5] fit.predict()[
@@ -2076,7 +2076,7 @@ Examples
The predict()
method also supports a newdata
argument to predict on new data, which returns a numpy array of the predicted values:
-
+
= pf.feols("Y ~ X1 + X2 | f1 + f2", data)
fit =data)[0:5] fit.predict(newdata
@@ -2084,11 +2084,11 @@ Examples
Last, you can plot the results of a model via the coefplot()
method:
-
+
= pf.feols("Y ~ X1 + X2 | f1 + f2", data)
fit fit.coefplot()
-
+
@@ -593,7 +593,7 @@ Examples
-
+
diff --git a/reference/report.coefplot.html b/reference/report.coefplot.html
index c3217e91..a373db31 100644
--- a/reference/report.coefplot.html
+++ b/reference/report.coefplot.html
@@ -528,7 +528,7 @@ Returns
Examples
-
+
import pyfixest as pf
from pyfixest.report.utils import rename_categoricals
@@ -544,7 +544,7 @@ Examples
= "both") pf.coefplot([fit1], joint
-
+
@@ -578,7 +578,7 @@ Examples
-
+
-
+
@@ -576,7 +576,7 @@ Examples
-
+
-
+
@@ -497,7 +497,7 @@ Examples
-
+
diff --git a/replicating-the-effect.html b/replicating-the-effect.html
index bf03493a..daff3e8c 100644
--- a/replicating-the-effect.html
+++ b/replicating-the-effect.html
@@ -234,7 +234,7 @@ Replicating Examples from “The Effect”
This notebook replicates code examples from Nick Huntington-Klein’s book on causal inference, The Effect.
-
+
from causaldata import Mroz, gapminder, organ_donations, restaurant_inspections
import pyfixest as pf
@@ -243,7 +243,7 @@ Replicating Examples from “The Effect”
%watermark --iversions
-
+
@@ -277,7 +277,7 @@ Replicating Examples from “The Effect”
-
+
@@ -317,7 +317,7 @@ Replicating Examples from “The Effect”
Chapter 4: Describing Relationships
-
+
# Read in data
= Mroz.load_pandas().data
dt # Keep just working women
@@ -329,7 +329,7 @@ Chapter
= pf.feols(fml="lwg ~ csw(inc, wc, k5)", data=dt, vcov="iid")
fit pf.etable(fit)
-/tmp/ipykernel_4055/786816010.py:6: SettingWithCopyWarning:
+/tmp/ipykernel_4227/786816010.py:6: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
@@ -337,52 +337,52 @@ Chapter
dt.loc[:, "earn"] = dt["lwg"].apply("exp")
-
+
@@ -480,7 +480,7 @@ Chapter
Chapter 13: Regression
Example 1
-
+
= restaurant_inspections.load_pandas().data
res = res.inspection_score.astype(float)
res.inspection_score = res.NumberofLocations.astype(float)
@@ -489,52 +489,52 @@ res.NumberofLocations Example 1
= pf.feols(fml="inspection_score ~ NumberofLocations", data=res)
fit pf.etable([fit])
-
+
@@ -597,7 +597,7 @@ Example 1
Example 2
-
+
= restaurant_inspections.load_pandas().data
df
= pf.feols(
@@ -607,52 +607,52 @@ fit1 Example 2
pf.etable([fit1, fit2])
-
+
@@ -749,7 +749,7 @@ Example 2
Example 3: HC Standard Errors
-
+
="inspection_score ~ Year + Weekend", data=df, vcov="HC3").summary() pf.feols(fml
###
@@ -771,7 +771,7 @@ Example 3: HC
Example 4: Clustered Standard Errors
-
+
pf.feols(="inspection_score ~ Year + Weekend", data=df, vcov={"CRV1": "Weekend"}
fml ).tidy()
@@ -837,7 +837,7 @@ Exampl
Example 5: Bootstrap Inference
-
+
= pf.feols(fml="inspection_score ~ Year + Weekend", data=df)
fit =999, param="Year") fit.wildboottest(reps
@@ -860,7 +860,7 @@ Example 1
Example 2
-
+
= gapminder.load_pandas().data
gm "logGDPpercap"] = gm["gdpPercap"].apply("log")
gm[
@@ -946,7 +946,7 @@ Example 2
Example 3: TWFE
-
+
# Set our individual and time (index) for our data
= pf.feols(fml="lifeExp ~ np.log(gdpPercap) | country + year", data=gm)
fit fit.summary()
@@ -971,7 +971,7 @@ Example 3: TWFE
Chapter 18: Difference-in-Differences
Example 1
-
+
= organ_donations.load_pandas().data
od
# Create Treatment Variable
@@ -999,7 +999,7 @@ Example 1
Example 3: Dynamic Treatment Effect
-
+
= organ_donations.load_pandas().data
od
# Create Treatment Variable
diff --git a/search.json b/search.json
index eeddcd32..16e2c702 100644
--- a/search.json
+++ b/search.json
@@ -479,7 +479,7 @@
"href": "reference/estimation.estimation.feols.html#examples",
"title": "estimation.estimation.feols",
"section": "Examples",
- "text": "Examples\nAs in fixest, the [Feols(/reference/Feols.qmd) function can be used to estimate a simple linear regression model with fixed effects. The following example regresses Y on X1 and X2 with fixed effects for f1 and f2: fixed effects are specified after the | symbol.\n\nimport pyfixest as pf\n\ndata = pf.get_data()\n\nfit = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data)\nfit.summary()\n\n\n \n \n \n\n\n\n \n \n \n\n\n###\n\nEstimation: OLS\nDep. var.: Y, Fixed effects: f1+f2\nInference: CRV1\nObservations: 997\n\n| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |\n|:--------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|\n| X1 | -0.924 | 0.061 | -15.165 | 0.000 | -1.049 | -0.799 |\n| X2 | -0.174 | 0.015 | -11.918 | 0.000 | -0.204 | -0.144 |\n---\nRMSE: 1.346 R2: 0.659 R2 Within: 0.303 \n\n\nCalling feols() returns an instance of the [Feols(/reference/Feols.qmd) class. The summary() method can be used to print the results.\nAn alternative way to retrieve model results is via the tidy() method, which returns a pandas dataframe with the estimated coefficients, standard errors, t-statistics, and p-values.\n\nfit.tidy()\n\n\n\n\n\n\n\n\nEstimate\nStd. Error\nt value\nPr(>|t|)\n2.5%\n97.5%\n\n\nCoefficient\n\n\n\n\n\n\n\n\n\n\nX1\n-0.924046\n0.060934\n-15.164621\n2.664535e-15\n-1.048671\n-0.799421\n\n\nX2\n-0.174107\n0.014608\n-11.918277\n1.069367e-12\n-0.203985\n-0.144230\n\n\n\n\n\n\n\nYou can also access all elements in the tidy data frame by dedicated methods, e.g. fit.coef() for the coefficients, fit.se() for the standard errors, fit.tstat() for the t-statistics, and fit.pval() for the p-values, and fit.confint() for the confidence intervals.\nThe employed type of inference can be specified via the vcov argument. If vcov is not provided, PyFixest employs the fixest default of iid inference, unless there are fixed effects in the model, in which case feols() clusters the standard error by the first fixed effect (CRV1 inference).\n\nfit1 = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data, vcov=\"iid\")\nfit2 = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data, vcov=\"hetero\")\nfit3 = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data, vcov={\"CRV1\": \"f1\"})\n\nSupported inference types are “iid”, “hetero”, “HC1”, “HC2”, “HC3”, and “CRV1”/“CRV3”. Clustered standard errors are specified via a dictionary, e.g. {\"CRV1\": \"f1\"} for CRV1 inference with clustering by f1 or {\"CRV3\": \"f1\"} for CRV3 inference with clustering by f1. For two-way clustering, you can provide a formula string, e.g. {\"CRV1\": \"f1 + f2\"} for CRV1 inference with clustering by f1.\n\nfit4 = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data, vcov={\"CRV1\": \"f1 + f2\"})\n\nInference can be adjusted post estimation via the vcov method:\n\nfit.summary()\nfit.vcov(\"iid\").summary()\n\n###\n\nEstimation: OLS\nDep. var.: Y, Fixed effects: f1+f2\nInference: CRV1\nObservations: 997\n\n| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |\n|:--------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|\n| X1 | -0.924 | 0.061 | -15.165 | 0.000 | -1.049 | -0.799 |\n| X2 | -0.174 | 0.015 | -11.918 | 0.000 | -0.204 | -0.144 |\n---\nRMSE: 1.346 R2: 0.659 R2 Within: 0.303 \n###\n\nEstimation: OLS\nDep. var.: Y, Fixed effects: f1+f2\nInference: iid\nObservations: 997\n\n| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |\n|:--------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|\n| X1 | -0.924 | 0.054 | -16.995 | 0.000 | -1.031 | -0.817 |\n| X2 | -0.174 | 0.014 | -12.081 | 0.000 | -0.202 | -0.146 |\n---\nRMSE: 1.346 R2: 0.659 R2 Within: 0.303 \n\n\nThe ssc argument specifies the small sample correction for inference. In general, feols() uses all of fixest::feols() defaults, but sets the fixef.K argument to \"none\" whereas the fixest::feols() default is \"nested\". See here for more details: link to github.\nfeols() supports a range of multiple estimation syntax, i.e. you can estimate multiple models in one call. The following example estimates two models, one with fixed effects for f1 and one with fixed effects for f2 using the sw() syntax.\n\nfit = pf.feols(\"Y ~ X1 + X2 | sw(f1, f2)\", data)\ntype(fit)\n\npyfixest.estimation.FixestMulti_.FixestMulti\n\n\nThe returned object is an instance of the FixestMulti class. You can access the results of the first model via fit.fetch_model(0) and the results of the second model via fit.fetch_model(1). You can compare the model results via the etable() function:\n\npf.etable(fit)\n\n\n\n\n\n\n\n\n\n\n\n\n\nY\n\n\n(1)\n(2)\n\n\n\n\ncoef\n\n\nX1\n-0.950***\n(0.067)\n-0.979***\n(0.077)\n\n\nX2\n-0.174***\n(0.018)\n-0.175***\n(0.022)\n\n\nfe\n\n\nf2\n-\nx\n\n\nf1\nx\n-\n\n\nstats\n\n\nObservations\n997\n998\n\n\nS.E. type\nby: f1\nby: f2\n\n\nR2\n0.489\n0.354\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)\n\n\n\n\n\n\n\n \n\n\nOther supported multiple estimation syntax include sw0(), csw() and csw0(). While sw() adds variables in a “stepwise” fashion, csw() does so cumulatively.\n\nfit = pf.feols(\"Y ~ X1 + X2 | csw(f1, f2)\", data)\npf.etable(fit)\n\n\n\n\n\n\n\n\n\n\n\n\n\nY\n\n\n(1)\n(2)\n\n\n\n\ncoef\n\n\nX1\n-0.950***\n(0.067)\n-0.924***\n(0.061)\n\n\nX2\n-0.174***\n(0.018)\n-0.174***\n(0.015)\n\n\nfe\n\n\nf2\n-\nx\n\n\nf1\nx\nx\n\n\nstats\n\n\nObservations\n997\n997\n\n\nS.E. type\nby: f1\nby: f1\n\n\nR2\n0.489\n0.659\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)\n\n\n\n\n\n\n\n \n\n\nThe sw0() and csw0() syntax are similar to sw() and csw(), but start with a model that excludes the variables specified in sw() and csw():\n\nfit = pf.feols(\"Y ~ X1 + X2 | sw0(f1, f2)\", data)\npf.etable(fit)\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nY\n\n\n(1)\n(2)\n(3)\n\n\n\n\ncoef\n\n\nX1\n-0.993***\n(0.082)\n-0.950***\n(0.067)\n-0.979***\n(0.077)\n\n\nX2\n-0.176***\n(0.022)\n-0.174***\n(0.018)\n-0.175***\n(0.022)\n\n\nIntercept\n0.889***\n(0.108)\n\n\n\n\nfe\n\n\nf2\n-\n-\nx\n\n\nf1\n-\nx\n-\n\n\nstats\n\n\nObservations\n998\n997\n998\n\n\nS.E. type\niid\nby: f1\nby: f2\n\n\nR2\n0.177\n0.489\n0.354\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)\n\n\n\n\n\n\n\n \n\n\nThe feols() function also supports multiple dependent variables. The following example estimates two models, one with Y1 as the dependent variable and one with Y2 as the dependent variable.\n\nfit = pf.feols(\"Y + Y2 ~ X1 | f1 + f2\", data)\npf.etable(fit)\n\n\n\n\n\n\n\n\n\n\n\n\n\nY\nY2\n\n\n(1)\n(2)\n\n\n\n\ncoef\n\n\nX1\n-0.919***\n(0.065)\n-1.228***\n(0.195)\n\n\nfe\n\n\nf2\nx\nx\n\n\nf1\nx\nx\n\n\nstats\n\n\nObservations\n997\n998\n\n\nS.E. type\nby: f1\nby: f1\n\n\nR2\n0.609\n0.168\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)\n\n\n\n\n\n\n\n \n\n\nIt is possible to combine different multiple estimation operators:\n\nfit = pf.feols(\"Y + Y2 ~ X1 | sw(f1, f2)\", data)\npf.etable(fit)\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nY\nY2\nY\nY2\n\n\n(1)\n(2)\n(3)\n(4)\n\n\n\n\ncoef\n\n\nX1\n-0.949***\n(0.069)\n-1.266***\n(0.176)\n-0.982***\n(0.081)\n-1.301***\n(0.205)\n\n\nfe\n\n\nf2\n-\n-\nx\nx\n\n\nf1\nx\nx\n-\n-\n\n\nstats\n\n\nObservations\n997\n998\n998\n999\n\n\nS.E. type\nby: f1\nby: f1\nby: f2\nby: f2\n\n\nR2\n0.437\n0.115\n0.302\n0.090\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)\n\n\n\n\n\n\n\n \n\n\nIn general, using muliple estimation syntax can improve the estimation time as covariates that are demeaned in one model and are used in another model do not need to be demeaned again: feols() implements a caching mechanism that stores the demeaned covariates.\nAdditionally, you can fit models on different samples via the split and fsplit arguments. The split argument splits the sample according to the variable specified in the argument, while the fsplit argument also includes the full sample in the estimation.\n\nfit = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data, split = \"f1\")\npf.etable(fit)\n\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nY\n\n\n(1)\n(2)\n(3)\n(4)\n(5)\n(6)\n(7)\n(8)\n(9)\n(10)\n(11)\n(12)\n(13)\n(14)\n(15)\n(16)\n(17)\n(18)\n(19)\n(20)\n(21)\n(22)\n(23)\n(24)\n(25)\n(26)\n(27)\n(28)\n(29)\n(30)\n\n\n\n\ncoef\n\n\nX1\n-1.357\n(INF)\n-1.137\n(INF)\n-0.455\n(INF)\n-1.138\n(INF)\n0.201\n(INF)\n-0.306\n(INF)\n-0.597\n(INF)\n-0.824\n(INF)\n-1.482\n(INF)\n-1.117\n(INF)\n-1.142\n(INF)\n-1.334\n(INF)\n-3.531\n(INF)\n-1.102\n(INF)\n-0.826\n(INF)\n-0.773\n(INF)\n-1.501\n(INF)\n-1.226\n(INF)\n-0.641\n(INF)\n-0.378\n(INF)\n-0.652\n(INF)\n-1.508\n(INF)\n-0.941\n(INF)\n-0.206\n(INF)\n-0.195\n(INF)\n-0.702\n(INF)\n-1.141\n(INF)\n-1.349\n(INF)\n-0.537\n(INF)\n-1.141\n(INF)\n\n\nX2\n-0.250\n(INF)\n0.198\n(INF)\n-0.145\n(INF)\n-0.330\n(INF)\n-0.177\n(INF)\n-0.187\n(INF)\n-0.118\n(INF)\n-0.292\n(INF)\n-0.029\n(INF)\n-0.264\n(INF)\n-0.148\n(INF)\n-0.313\n(INF)\n-0.152\n(INF)\n-0.296\n(INF)\n0.130\n(INF)\n-0.059\n(INF)\n-0.223\n(INF)\n-0.113\n(INF)\n-0.261\n(INF)\n0.089\n(INF)\n-0.148\n(INF)\n-0.267\n(INF)\n-0.125\n(INF)\n-0.282\n(INF)\n-0.153\n(INF)\n0.004\n(INF)\n0.083\n(INF)\n-0.226\n(INF)\n-0.158\n(INF)\n-0.160\n(INF)\n\n\nfe\n\n\nf2\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\n\n\nf1\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\n\n\nstats\n\n\nObservations\n30\n29\n44\n30\n31\n36\n36\n30\n36\n35\n32\n30\n23\n28\n34\n34\n48\n40\n36\n34\n35\n37\n27\n35\n29\n27\n43\n36\n24\n28\n\n\nS.E. type\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\n\n\nR2\n0.850\n0.691\n0.578\n0.745\n0.939\n0.644\n0.792\n0.776\n0.919\n0.797\n0.727\n0.822\n0.924\n0.865\n0.711\n0.808\n0.651\n0.819\n0.746\n0.731\n0.880\n0.868\n0.796\n0.648\n0.915\n0.820\n0.837\n0.789\n0.688\n0.883\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)\n\n\n\n\n\n\n\n \n\n\nBesides OLS, feols() also supports IV estimation via three part formulas:\n\nfit = pf.feols(\"Y ~ X2 | f1 + f2 | X1 ~ Z1\", data)\nfit.tidy()\n\n\n\n\n\n\n\n\nEstimate\nStd. Error\nt value\nPr(>|t|)\n2.5%\n97.5%\n\n\nCoefficient\n\n\n\n\n\n\n\n\n\n\nX1\n-1.050097\n0.085493\n-12.282912\n5.133671e-13\n-1.224949\n-0.875245\n\n\nX2\n-0.174351\n0.014779\n-11.797039\n1.369793e-12\n-0.204578\n-0.144124\n\n\n\n\n\n\n\nHere, X1 is the endogenous variable and Z1 is the instrument. f1 and f2 are the fixed effects, as before. To estimate IV models without fixed effects, simply omit the fixed effects part of the formula:\n\nfit = pf.feols(\"Y ~ X2 | X1 ~ Z1\", data)\nfit.tidy()\n\n\n\n\n\n\n\n\nEstimate\nStd. Error\nt value\nPr(>|t|)\n2.5%\n97.5%\n\n\nCoefficient\n\n\n\n\n\n\n\n\n\n\nIntercept\n0.861939\n0.151187\n5.701137\n1.567858e-08\n0.565257\n1.158622\n\n\nX1\n-0.967238\n0.130078\n-7.435847\n2.238210e-13\n-1.222497\n-0.711980\n\n\nX2\n-0.176416\n0.021769\n-8.104001\n1.554312e-15\n-0.219134\n-0.133697\n\n\n\n\n\n\n\nLast, feols() supports interaction of variables via the i() syntax. Documentation on this is tba.\nAfter fitting a model via feols(), you can use the predict() method to get the predicted values:\n\nfit = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data)\nfit.predict()[0:5]\n\narray([ 3.0633663 , -0.69574133, -0.91240433, -0.46370257, -1.67331154])\n\n\nThe predict() method also supports a newdata argument to predict on new data, which returns a numpy array of the predicted values:\n\nfit = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data)\nfit.predict(newdata=data)[0:5]\n\narray([ 2.14598761, nan, nan, 3.06336415, -0.69574276])\n\n\nLast, you can plot the results of a model via the coefplot() method:\n\nfit = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data)\nfit.coefplot()\n\n \n \n\n\nObjects of type Feols support a range of other methods to conduct inference. For example, you can run a wild (cluster) bootstrap via the wildboottest() method:\n\nfit.wildboottest(param = \"X1\", reps=1000)\n\nparam X1\nt value -14.70814685400939\nPr(>|t|) 0.0\nbootstrap_type 11\ninference CRV(f1)\nimpose_null True\ndtype: object\n\n\nwould run a wild bootstrap test for the coefficient of X1 with 1000 bootstrap repetitions.\nFor a wild cluster bootstrap, you can specify the cluster variable via the cluster argument:\n\nfit.wildboottest(param = \"X1\", reps=1000, cluster=\"group_id\")\n\nparam X1\nt value -13.658130940490494\nPr(>|t|) 0.0\nbootstrap_type 11\ninference CRV(group_id)\nimpose_null True\ndtype: object\n\n\nThe ritest() method can be used to conduct randomization inference:\n\nfit.ritest(resampvar = \"X1\", reps=1000)\n\nH0 X1=0\nri-type randomization-c\nEstimate -0.9240461507764967\nPr(>|t|) 0.0\nStd. Error (Pr(>|t|)) 0.0\n2.5% (Pr(>|t|)) 0.0\n97.5% (Pr(>|t|)) 0.0\ndtype: object\n\n\nLast, you can compute the cluster causal variance estimator by Athey et al by using the ccv() method:\n\nimport numpy as np\nrng = np.random.default_rng(1234)\ndata[\"D\"] = rng.choice([0, 1], size = data.shape[0])\nfit_D = pf.feols(\"Y ~ D\", data = data)\nfit_D.ccv(treatment = \"D\", cluster = \"group_id\")\n\n/home/runner/work/pyfixest/pyfixest/pyfixest/estimation/feols_.py:1384: UserWarning: The initial model was not clustered. CRV1 inference is computed and stored in the model object.\n warnings.warn(\n\n\n\n\n\n\n\n\n\nEstimate\nStd. Error\nt value\nPr(>|t|)\n2.5%\n97.5%\n\n\n\n\nCCV\n0.016087657906364183\n0.284647\n0.056518\n0.955552\n-0.581934\n0.61411\n\n\nCRV1\n0.016088\n0.13378\n0.120254\n0.905614\n-0.264974\n0.29715",
+ "text": "Examples\nAs in fixest, the [Feols(/reference/Feols.qmd) function can be used to estimate a simple linear regression model with fixed effects. The following example regresses Y on X1 and X2 with fixed effects for f1 and f2: fixed effects are specified after the | symbol.\n\nimport pyfixest as pf\n\ndata = pf.get_data()\n\nfit = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data)\nfit.summary()\n\n\n \n \n \n\n\n\n \n \n \n\n\n###\n\nEstimation: OLS\nDep. var.: Y, Fixed effects: f1+f2\nInference: CRV1\nObservations: 997\n\n| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |\n|:--------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|\n| X1 | -0.924 | 0.061 | -15.165 | 0.000 | -1.049 | -0.799 |\n| X2 | -0.174 | 0.015 | -11.918 | 0.000 | -0.204 | -0.144 |\n---\nRMSE: 1.346 R2: 0.659 R2 Within: 0.303 \n\n\nCalling feols() returns an instance of the [Feols(/reference/Feols.qmd) class. The summary() method can be used to print the results.\nAn alternative way to retrieve model results is via the tidy() method, which returns a pandas dataframe with the estimated coefficients, standard errors, t-statistics, and p-values.\n\nfit.tidy()\n\n\n\n\n\n\n\n\nEstimate\nStd. Error\nt value\nPr(>|t|)\n2.5%\n97.5%\n\n\nCoefficient\n\n\n\n\n\n\n\n\n\n\nX1\n-0.924046\n0.060934\n-15.164621\n2.664535e-15\n-1.048671\n-0.799421\n\n\nX2\n-0.174107\n0.014608\n-11.918277\n1.069367e-12\n-0.203985\n-0.144230\n\n\n\n\n\n\n\nYou can also access all elements in the tidy data frame by dedicated methods, e.g. fit.coef() for the coefficients, fit.se() for the standard errors, fit.tstat() for the t-statistics, and fit.pval() for the p-values, and fit.confint() for the confidence intervals.\nThe employed type of inference can be specified via the vcov argument. If vcov is not provided, PyFixest employs the fixest default of iid inference, unless there are fixed effects in the model, in which case feols() clusters the standard error by the first fixed effect (CRV1 inference).\n\nfit1 = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data, vcov=\"iid\")\nfit2 = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data, vcov=\"hetero\")\nfit3 = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data, vcov={\"CRV1\": \"f1\"})\n\nSupported inference types are “iid”, “hetero”, “HC1”, “HC2”, “HC3”, and “CRV1”/“CRV3”. Clustered standard errors are specified via a dictionary, e.g. {\"CRV1\": \"f1\"} for CRV1 inference with clustering by f1 or {\"CRV3\": \"f1\"} for CRV3 inference with clustering by f1. For two-way clustering, you can provide a formula string, e.g. {\"CRV1\": \"f1 + f2\"} for CRV1 inference with clustering by f1.\n\nfit4 = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data, vcov={\"CRV1\": \"f1 + f2\"})\n\nInference can be adjusted post estimation via the vcov method:\n\nfit.summary()\nfit.vcov(\"iid\").summary()\n\n###\n\nEstimation: OLS\nDep. var.: Y, Fixed effects: f1+f2\nInference: CRV1\nObservations: 997\n\n| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |\n|:--------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|\n| X1 | -0.924 | 0.061 | -15.165 | 0.000 | -1.049 | -0.799 |\n| X2 | -0.174 | 0.015 | -11.918 | 0.000 | -0.204 | -0.144 |\n---\nRMSE: 1.346 R2: 0.659 R2 Within: 0.303 \n###\n\nEstimation: OLS\nDep. var.: Y, Fixed effects: f1+f2\nInference: iid\nObservations: 997\n\n| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |\n|:--------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|\n| X1 | -0.924 | 0.054 | -16.995 | 0.000 | -1.031 | -0.817 |\n| X2 | -0.174 | 0.014 | -12.081 | 0.000 | -0.202 | -0.146 |\n---\nRMSE: 1.346 R2: 0.659 R2 Within: 0.303 \n\n\nThe ssc argument specifies the small sample correction for inference. In general, feols() uses all of fixest::feols() defaults, but sets the fixef.K argument to \"none\" whereas the fixest::feols() default is \"nested\". See here for more details: link to github.\nfeols() supports a range of multiple estimation syntax, i.e. you can estimate multiple models in one call. The following example estimates two models, one with fixed effects for f1 and one with fixed effects for f2 using the sw() syntax.\n\nfit = pf.feols(\"Y ~ X1 + X2 | sw(f1, f2)\", data)\ntype(fit)\n\npyfixest.estimation.FixestMulti_.FixestMulti\n\n\nThe returned object is an instance of the FixestMulti class. You can access the results of the first model via fit.fetch_model(0) and the results of the second model via fit.fetch_model(1). You can compare the model results via the etable() function:\n\npf.etable(fit)\n\n\n\n\n\n\n\n\n\n\n\n\n\nY\n\n\n(1)\n(2)\n\n\n\n\ncoef\n\n\nX1\n-0.950***\n(0.067)\n-0.979***\n(0.077)\n\n\nX2\n-0.174***\n(0.018)\n-0.175***\n(0.022)\n\n\nfe\n\n\nf1\nx\n-\n\n\nf2\n-\nx\n\n\nstats\n\n\nObservations\n997\n998\n\n\nS.E. type\nby: f1\nby: f2\n\n\nR2\n0.489\n0.354\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)\n\n\n\n\n\n\n\n \n\n\nOther supported multiple estimation syntax include sw0(), csw() and csw0(). While sw() adds variables in a “stepwise” fashion, csw() does so cumulatively.\n\nfit = pf.feols(\"Y ~ X1 + X2 | csw(f1, f2)\", data)\npf.etable(fit)\n\n\n\n\n\n\n\n\n\n\n\n\n\nY\n\n\n(1)\n(2)\n\n\n\n\ncoef\n\n\nX1\n-0.950***\n(0.067)\n-0.924***\n(0.061)\n\n\nX2\n-0.174***\n(0.018)\n-0.174***\n(0.015)\n\n\nfe\n\n\nf1\nx\nx\n\n\nf2\n-\nx\n\n\nstats\n\n\nObservations\n997\n997\n\n\nS.E. type\nby: f1\nby: f1\n\n\nR2\n0.489\n0.659\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)\n\n\n\n\n\n\n\n \n\n\nThe sw0() and csw0() syntax are similar to sw() and csw(), but start with a model that excludes the variables specified in sw() and csw():\n\nfit = pf.feols(\"Y ~ X1 + X2 | sw0(f1, f2)\", data)\npf.etable(fit)\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nY\n\n\n(1)\n(2)\n(3)\n\n\n\n\ncoef\n\n\nX1\n-0.993***\n(0.082)\n-0.950***\n(0.067)\n-0.979***\n(0.077)\n\n\nX2\n-0.176***\n(0.022)\n-0.174***\n(0.018)\n-0.175***\n(0.022)\n\n\nIntercept\n0.889***\n(0.108)\n\n\n\n\nfe\n\n\nf1\n-\nx\n-\n\n\nf2\n-\n-\nx\n\n\nstats\n\n\nObservations\n998\n997\n998\n\n\nS.E. type\niid\nby: f1\nby: f2\n\n\nR2\n0.177\n0.489\n0.354\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)\n\n\n\n\n\n\n\n \n\n\nThe feols() function also supports multiple dependent variables. The following example estimates two models, one with Y1 as the dependent variable and one with Y2 as the dependent variable.\n\nfit = pf.feols(\"Y + Y2 ~ X1 | f1 + f2\", data)\npf.etable(fit)\n\n\n\n\n\n\n\n\n\n\n\n\n\nY\nY2\n\n\n(1)\n(2)\n\n\n\n\ncoef\n\n\nX1\n-0.919***\n(0.065)\n-1.228***\n(0.195)\n\n\nfe\n\n\nf1\nx\nx\n\n\nf2\nx\nx\n\n\nstats\n\n\nObservations\n997\n998\n\n\nS.E. type\nby: f1\nby: f1\n\n\nR2\n0.609\n0.168\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)\n\n\n\n\n\n\n\n \n\n\nIt is possible to combine different multiple estimation operators:\n\nfit = pf.feols(\"Y + Y2 ~ X1 | sw(f1, f2)\", data)\npf.etable(fit)\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nY\nY2\nY\nY2\n\n\n(1)\n(2)\n(3)\n(4)\n\n\n\n\ncoef\n\n\nX1\n-0.949***\n(0.069)\n-1.266***\n(0.176)\n-0.982***\n(0.081)\n-1.301***\n(0.205)\n\n\nfe\n\n\nf1\nx\nx\n-\n-\n\n\nf2\n-\n-\nx\nx\n\n\nstats\n\n\nObservations\n997\n998\n998\n999\n\n\nS.E. type\nby: f1\nby: f1\nby: f2\nby: f2\n\n\nR2\n0.437\n0.115\n0.302\n0.090\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)\n\n\n\n\n\n\n\n \n\n\nIn general, using muliple estimation syntax can improve the estimation time as covariates that are demeaned in one model and are used in another model do not need to be demeaned again: feols() implements a caching mechanism that stores the demeaned covariates.\nAdditionally, you can fit models on different samples via the split and fsplit arguments. The split argument splits the sample according to the variable specified in the argument, while the fsplit argument also includes the full sample in the estimation.\n\nfit = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data, split = \"f1\")\npf.etable(fit)\n\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nY\n\n\n(1)\n(2)\n(3)\n(4)\n(5)\n(6)\n(7)\n(8)\n(9)\n(10)\n(11)\n(12)\n(13)\n(14)\n(15)\n(16)\n(17)\n(18)\n(19)\n(20)\n(21)\n(22)\n(23)\n(24)\n(25)\n(26)\n(27)\n(28)\n(29)\n(30)\n\n\n\n\ncoef\n\n\nX1\n-1.357\n(INF)\n-1.137\n(INF)\n-0.455\n(INF)\n-1.138\n(INF)\n0.201\n(INF)\n-0.306\n(INF)\n-0.597\n(INF)\n-0.824\n(INF)\n-1.482\n(INF)\n-1.117\n(INF)\n-1.142\n(INF)\n-1.334\n(INF)\n-3.531\n(INF)\n-1.102\n(INF)\n-0.826\n(INF)\n-0.773\n(INF)\n-1.501\n(INF)\n-1.226\n(INF)\n-0.641\n(INF)\n-0.378\n(INF)\n-0.652\n(INF)\n-1.508\n(INF)\n-0.941\n(INF)\n-0.206\n(INF)\n-0.195\n(INF)\n-0.702\n(INF)\n-1.141\n(INF)\n-1.349\n(INF)\n-0.537\n(INF)\n-1.141\n(INF)\n\n\nX2\n-0.250\n(INF)\n0.198\n(INF)\n-0.145\n(INF)\n-0.330\n(INF)\n-0.177\n(INF)\n-0.187\n(INF)\n-0.118\n(INF)\n-0.292\n(INF)\n-0.029\n(INF)\n-0.264\n(INF)\n-0.148\n(INF)\n-0.313\n(INF)\n-0.152\n(INF)\n-0.296\n(INF)\n0.130\n(INF)\n-0.059\n(INF)\n-0.223\n(INF)\n-0.113\n(INF)\n-0.261\n(INF)\n0.089\n(INF)\n-0.148\n(INF)\n-0.267\n(INF)\n-0.125\n(INF)\n-0.282\n(INF)\n-0.153\n(INF)\n0.004\n(INF)\n0.083\n(INF)\n-0.226\n(INF)\n-0.158\n(INF)\n-0.160\n(INF)\n\n\nfe\n\n\nf1\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\n\n\nf2\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\n\n\nstats\n\n\nObservations\n30\n29\n44\n30\n31\n36\n36\n30\n36\n35\n32\n30\n23\n28\n34\n34\n48\n40\n36\n34\n35\n37\n27\n35\n29\n27\n43\n36\n24\n28\n\n\nS.E. type\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\n\n\nR2\n0.850\n0.691\n0.578\n0.745\n0.939\n0.644\n0.792\n0.776\n0.919\n0.797\n0.727\n0.822\n0.924\n0.865\n0.711\n0.808\n0.651\n0.819\n0.746\n0.731\n0.880\n0.868\n0.796\n0.648\n0.915\n0.820\n0.837\n0.789\n0.688\n0.883\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)\n\n\n\n\n\n\n\n \n\n\nBesides OLS, feols() also supports IV estimation via three part formulas:\n\nfit = pf.feols(\"Y ~ X2 | f1 + f2 | X1 ~ Z1\", data)\nfit.tidy()\n\n\n\n\n\n\n\n\nEstimate\nStd. Error\nt value\nPr(>|t|)\n2.5%\n97.5%\n\n\nCoefficient\n\n\n\n\n\n\n\n\n\n\nX1\n-1.050097\n0.085493\n-12.282912\n5.133671e-13\n-1.224949\n-0.875245\n\n\nX2\n-0.174351\n0.014779\n-11.797039\n1.369793e-12\n-0.204578\n-0.144124\n\n\n\n\n\n\n\nHere, X1 is the endogenous variable and Z1 is the instrument. f1 and f2 are the fixed effects, as before. To estimate IV models without fixed effects, simply omit the fixed effects part of the formula:\n\nfit = pf.feols(\"Y ~ X2 | X1 ~ Z1\", data)\nfit.tidy()\n\n\n\n\n\n\n\n\nEstimate\nStd. Error\nt value\nPr(>|t|)\n2.5%\n97.5%\n\n\nCoefficient\n\n\n\n\n\n\n\n\n\n\nIntercept\n0.861939\n0.151187\n5.701137\n1.567858e-08\n0.565257\n1.158622\n\n\nX1\n-0.967238\n0.130078\n-7.435847\n2.238210e-13\n-1.222497\n-0.711980\n\n\nX2\n-0.176416\n0.021769\n-8.104001\n1.554312e-15\n-0.219134\n-0.133697\n\n\n\n\n\n\n\nLast, feols() supports interaction of variables via the i() syntax. Documentation on this is tba.\nAfter fitting a model via feols(), you can use the predict() method to get the predicted values:\n\nfit = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data)\nfit.predict()[0:5]\n\narray([ 3.0633663 , -0.69574133, -0.91240433, -0.46370257, -1.67331154])\n\n\nThe predict() method also supports a newdata argument to predict on new data, which returns a numpy array of the predicted values:\n\nfit = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data)\nfit.predict(newdata=data)[0:5]\n\narray([ 2.14598761, nan, nan, 3.06336415, -0.69574276])\n\n\nLast, you can plot the results of a model via the coefplot() method:\n\nfit = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data)\nfit.coefplot()\n\n \n \n\n\nObjects of type Feols support a range of other methods to conduct inference. For example, you can run a wild (cluster) bootstrap via the wildboottest() method:\n\nfit.wildboottest(param = \"X1\", reps=1000)\n\nparam X1\nt value -14.70814685400939\nPr(>|t|) 0.0\nbootstrap_type 11\ninference CRV(f1)\nimpose_null True\ndtype: object\n\n\nwould run a wild bootstrap test for the coefficient of X1 with 1000 bootstrap repetitions.\nFor a wild cluster bootstrap, you can specify the cluster variable via the cluster argument:\n\nfit.wildboottest(param = \"X1\", reps=1000, cluster=\"group_id\")\n\nparam X1\nt value -13.658130940490494\nPr(>|t|) 0.0\nbootstrap_type 11\ninference CRV(group_id)\nimpose_null True\ndtype: object\n\n\nThe ritest() method can be used to conduct randomization inference:\n\nfit.ritest(resampvar = \"X1\", reps=1000)\n\nH0 X1=0\nri-type randomization-c\nEstimate -0.9240461507764967\nPr(>|t|) 0.0\nStd. Error (Pr(>|t|)) 0.0\n2.5% (Pr(>|t|)) 0.0\n97.5% (Pr(>|t|)) 0.0\ndtype: object\n\n\nLast, you can compute the cluster causal variance estimator by Athey et al by using the ccv() method:\n\nimport numpy as np\nrng = np.random.default_rng(1234)\ndata[\"D\"] = rng.choice([0, 1], size = data.shape[0])\nfit_D = pf.feols(\"Y ~ D\", data = data)\nfit_D.ccv(treatment = \"D\", cluster = \"group_id\")\n\n/home/runner/work/pyfixest/pyfixest/pyfixest/estimation/feols_.py:1384: UserWarning: The initial model was not clustered. CRV1 inference is computed and stored in the model object.\n warnings.warn(\n\n\n\n\n\n\n\n\n\nEstimate\nStd. Error\nt value\nPr(>|t|)\n2.5%\n97.5%\n\n\n\n\nCCV\n0.016087657906364183\n0.242455\n0.066353\n0.947828\n-0.493292\n0.525467\n\n\nCRV1\n0.016088\n0.13378\n0.120254\n0.905614\n-0.264974\n0.29715",
"crumbs": [
"Function Reference",
"Estimation Functions",
@@ -546,7 +546,7 @@
"href": "replicating-the-effect.html#chapter-4-describing-relationships",
"title": "Replicating Examples from “The Effect”",
"section": "Chapter 4: Describing Relationships",
- "text": "Chapter 4: Describing Relationships\n\n# Read in data\ndt = Mroz.load_pandas().data\n# Keep just working women\ndt = dt.query(\"lfp\")\n# Create unlogged earnings\ndt.loc[:, \"earn\"] = dt[\"lwg\"].apply(\"exp\")\n\n# 5. Run multiple linear regression models by succesively adding controls\nfit = pf.feols(fml=\"lwg ~ csw(inc, wc, k5)\", data=dt, vcov=\"iid\")\npf.etable(fit)\n\n/tmp/ipykernel_4055/786816010.py:6: SettingWithCopyWarning: \nA value is trying to be set on a copy of a slice from a DataFrame.\nTry using .loc[row_indexer,col_indexer] = value instead\n\nSee the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n dt.loc[:, \"earn\"] = dt[\"lwg\"].apply(\"exp\")\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nlwg\n\n\n(1)\n(2)\n(3)\n\n\n\n\ncoef\n\n\ninc\n0.010**\n(0.003)\n0.005\n(0.003)\n0.005\n(0.003)\n\n\nwc\n\n0.342***\n(0.075)\n0.349***\n(0.075)\n\n\nk5\n\n\n-0.072\n(0.087)\n\n\nIntercept\n1.007***\n(0.071)\n0.972***\n(0.070)\n0.982***\n(0.071)\n\n\nstats\n\n\nObservations\n428\n428\n428\n\n\nS.E. type\niid\niid\niid\n\n\nR2\n0.020\n0.066\n0.068\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)"
+ "text": "Chapter 4: Describing Relationships\n\n# Read in data\ndt = Mroz.load_pandas().data\n# Keep just working women\ndt = dt.query(\"lfp\")\n# Create unlogged earnings\ndt.loc[:, \"earn\"] = dt[\"lwg\"].apply(\"exp\")\n\n# 5. Run multiple linear regression models by succesively adding controls\nfit = pf.feols(fml=\"lwg ~ csw(inc, wc, k5)\", data=dt, vcov=\"iid\")\npf.etable(fit)\n\n/tmp/ipykernel_4227/786816010.py:6: SettingWithCopyWarning: \nA value is trying to be set on a copy of a slice from a DataFrame.\nTry using .loc[row_indexer,col_indexer] = value instead\n\nSee the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n dt.loc[:, \"earn\"] = dt[\"lwg\"].apply(\"exp\")\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nlwg\n\n\n(1)\n(2)\n(3)\n\n\n\n\ncoef\n\n\ninc\n0.010**\n(0.003)\n0.005\n(0.003)\n0.005\n(0.003)\n\n\nwc\n\n0.342***\n(0.075)\n0.349***\n(0.075)\n\n\nk5\n\n\n-0.072\n(0.087)\n\n\nIntercept\n1.007***\n(0.071)\n0.972***\n(0.070)\n0.982***\n(0.071)\n\n\nstats\n\n\nObservations\n428\n428\n428\n\n\nS.E. type\niid\niid\niid\n\n\nR2\n0.020\n0.066\n0.068\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)"
},
{
"objectID": "replicating-the-effect.html#chapter-13-regression",
@@ -623,7 +623,7 @@
"href": "difference-in-differences.html#setup",
"title": "Difference-in-Differences Estimation",
"section": "Setup",
- "text": "Setup\n\nfrom importlib import resources\n\nimport pandas as pd\n\nimport pyfixest as pf\nfrom pyfixest.report.utils import rename_event_study_coefs\nfrom pyfixest.utils.dgps import get_sharkfin\n\n%load_ext watermark\n%watermark --iversions\n%load_ext autoreload\n%autoreload 2\n\n\n \n \n \n\n\n\n \n \n \n\n\npyfixest: 0.25.3\npandas : 2.2.3\n\n\n\n\n# one-shot adoption data - parallel trends is true\ndf_one_cohort = get_sharkfin()\ndf_one_cohort.head()\n\n\n\n\n\n\n\n\nunit\nyear\ntreat\nY\never_treated\n\n\n\n\n0\n0\n0\n0\n1.629307\n0\n\n\n1\n0\n1\n0\n0.825902\n0\n\n\n2\n0\n2\n0\n0.208988\n0\n\n\n3\n0\n3\n0\n-0.244739\n0\n\n\n4\n0\n4\n0\n0.804665\n0\n\n\n\n\n\n\n\n\n# multi-cohort adoption data\ndf_multi_cohort = pd.read_csv(\n resources.files(\"pyfixest.did.data\").joinpath(\"df_het.csv\")\n)\ndf_multi_cohort.head()\n\n\n\n\n\n\n\n\nunit\nstate\ngroup\nunit_fe\ng\nyear\nyear_fe\ntreat\nrel_year\nrel_year_binned\nerror\nte\nte_dynamic\ndep_var\n\n\n\n\n0\n1\n33\nGroup 2\n7.043016\n2010\n1990\n0.066159\nFalse\n-20.0\n-6\n-0.086466\n0\n0.0\n7.022709\n\n\n1\n1\n33\nGroup 2\n7.043016\n2010\n1991\n-0.030980\nFalse\n-19.0\n-6\n0.766593\n0\n0.0\n7.778628\n\n\n2\n1\n33\nGroup 2\n7.043016\n2010\n1992\n-0.119607\nFalse\n-18.0\n-6\n1.512968\n0\n0.0\n8.436377\n\n\n3\n1\n33\nGroup 2\n7.043016\n2010\n1993\n0.126321\nFalse\n-17.0\n-6\n0.021870\n0\n0.0\n7.191207\n\n\n4\n1\n33\nGroup 2\n7.043016\n2010\n1994\n-0.106921\nFalse\n-16.0\n-6\n-0.017603\n0\n0.0\n6.918492"
+ "text": "Setup\n\nfrom importlib import resources\n\nimport pandas as pd\n\nimport pyfixest as pf\nfrom pyfixest.report.utils import rename_event_study_coefs\nfrom pyfixest.utils.dgps import get_sharkfin\n\n%load_ext watermark\n%watermark --iversions\n%load_ext autoreload\n%autoreload 2\n\n\n \n \n \n\n\n\n \n \n \n\n\npandas : 2.2.3\npyfixest: 0.25.3\n\n\n\n\n# one-shot adoption data - parallel trends is true\ndf_one_cohort = get_sharkfin()\ndf_one_cohort.head()\n\n\n\n\n\n\n\n\nunit\nyear\ntreat\nY\never_treated\n\n\n\n\n0\n0\n0\n0\n1.629307\n0\n\n\n1\n0\n1\n0\n0.825902\n0\n\n\n2\n0\n2\n0\n0.208988\n0\n\n\n3\n0\n3\n0\n-0.244739\n0\n\n\n4\n0\n4\n0\n0.804665\n0\n\n\n\n\n\n\n\n\n# multi-cohort adoption data\ndf_multi_cohort = pd.read_csv(\n resources.files(\"pyfixest.did.data\").joinpath(\"df_het.csv\")\n)\ndf_multi_cohort.head()\n\n\n\n\n\n\n\n\nunit\nstate\ngroup\nunit_fe\ng\nyear\nyear_fe\ntreat\nrel_year\nrel_year_binned\nerror\nte\nte_dynamic\ndep_var\n\n\n\n\n0\n1\n33\nGroup 2\n7.043016\n2010\n1990\n0.066159\nFalse\n-20.0\n-6\n-0.086466\n0\n0.0\n7.022709\n\n\n1\n1\n33\nGroup 2\n7.043016\n2010\n1991\n-0.030980\nFalse\n-19.0\n-6\n0.766593\n0\n0.0\n7.778628\n\n\n2\n1\n33\nGroup 2\n7.043016\n2010\n1992\n-0.119607\nFalse\n-18.0\n-6\n1.512968\n0\n0.0\n8.436377\n\n\n3\n1\n33\nGroup 2\n7.043016\n2010\n1993\n0.126321\nFalse\n-17.0\n-6\n0.021870\n0\n0.0\n7.191207\n\n\n4\n1\n33\nGroup 2\n7.043016\n2010\n1994\n-0.106921\nFalse\n-16.0\n-6\n-0.017603\n0\n0.0\n6.918492"
},
{
"objectID": "difference-in-differences.html#examining-treatment-timing",
@@ -665,7 +665,7 @@
"href": "quickstart.html",
"title": "Getting Started with PyFixest",
"section": "",
- "text": "A fixed effect model is a statistical model that includes fixed effects, which are parameters that are estimated to be constant across different groups.\nExample [Panel Data]: In the context of panel data, fixed effects are parameters that are constant across different individuals or time. The typical model example is given by the following equation:\n\\[\nY_{it} = \\beta X_{it} + \\alpha_i + \\psi_t + \\varepsilon_{it}\n\\]\nwhere \\(Y_{it}\\) is the dependent variable for individual \\(i\\) at time \\(t\\), \\(X_{it}\\) is the independent variable, \\(\\beta\\) is the coefficient of the independent variable, \\(\\alpha_i\\) is the individual fixed effect, \\(\\psi_t\\) is the time fixed effect, and \\(\\varepsilon_{it}\\) is the error term. The individual fixed effect \\(\\alpha_i\\) is a parameter that is constant across time for each individual, while the time fixed effect \\(\\psi_t\\) is a parameter that is constant across individuals for each time period.\nNote however that, despite the fact that fixed effects are commonly used in panel setting, one does not need a panel data set to work with fixed effects. For example, cluster randomized trials with cluster fixed effects, or wage regressions with worker and firm fixed effects.\nIn this “quick start” guide, we will show you how to estimate a fixed effect model using the PyFixest package. We do not go into the details of the theory behind fixed effect models, but we focus on how to estimate them using PyFixest.\n\n\n\nIn a first step, we load the module and some synthetic example data:\n\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport pandas as pd\nfrom lets_plot import LetsPlot\n\nimport pyfixest as pf\n\nLetsPlot.setup_html()\n\nplt.style.use(\"seaborn-v0_8\")\n\n%load_ext watermark\n%config InlineBackend.figure_format = \"retina\"\n%watermark --iversions\n\ndata = pf.get_data()\n\ndata.head()\n\n\n \n \n \n\n\n\n \n \n \n\n\n\n \n \n \n\n\nnumpy : 1.26.4\npyfixest : 0.25.3\npandas : 2.2.3\nmatplotlib: 3.9.2\n\n\n\n\n\n\n\n\n\n\nY\nY2\nX1\nX2\nf1\nf2\nf3\ngroup_id\nZ1\nZ2\nweights\n\n\n\n\n0\nNaN\n2.357103\n0.0\n0.457858\n15.0\n0.0\n7.0\n9.0\n-0.330607\n1.054826\n0.661478\n\n\n1\n-1.458643\n5.163147\nNaN\n-4.998406\n6.0\n21.0\n4.0\n8.0\nNaN\n-4.113690\n0.772732\n\n\n2\n0.169132\n0.751140\n2.0\n1.558480\nNaN\n1.0\n7.0\n16.0\n1.207778\n0.465282\n0.990929\n\n\n3\n3.319513\n-2.656368\n1.0\n1.560402\n1.0\n10.0\n11.0\n3.0\n2.869997\n0.467570\n0.021123\n\n\n4\n0.134420\n-1.866416\n2.0\n-3.472232\n19.0\n20.0\n6.0\n14.0\n0.835819\n-3.115669\n0.790815\n\n\n\n\n\n\n\n\ndata.info()\n\n<class 'pandas.core.frame.DataFrame'>\nRangeIndex: 1000 entries, 0 to 999\nData columns (total 11 columns):\n # Column Non-Null Count Dtype \n--- ------ -------------- ----- \n 0 Y 999 non-null float64\n 1 Y2 1000 non-null float64\n 2 X1 999 non-null float64\n 3 X2 1000 non-null float64\n 4 f1 999 non-null float64\n 5 f2 1000 non-null float64\n 6 f3 1000 non-null float64\n 7 group_id 1000 non-null float64\n 8 Z1 999 non-null float64\n 9 Z2 1000 non-null float64\n 10 weights 1000 non-null float64\ndtypes: float64(11)\nmemory usage: 86.1 KB\n\n\nWe see that some of our columns have missing data.\n\n\n\nWe are interested in the relation between the dependent variable Y and the independent variables X1 using a fixed effect model for group_id. Let’s see how the data looks like:\n\nax = data.plot(kind=\"scatter\", x=\"X1\", y=\"Y\", c=\"group_id\", colormap=\"viridis\")\n\n\n\n\n\n\n\n\nWe can estimate a fixed effects regression via the feols() function. feols() has three arguments: a two-sided model formula, the data, and optionally, the type of inference.\n\nfit = pf.feols(fml=\"Y ~ X1 | group_id\", data=data, vcov=\"HC1\")\ntype(fit)\n\npyfixest.estimation.feols_.Feols\n\n\nThe first part of the formula contains the dependent variable and “regular” covariates, while the second part contains fixed effects.\nfeols() returns an instance of the Fixest class.\n\n\n\nTo inspect the results, we can use a summary function or method:\n\nfit.summary()\n\n###\n\nEstimation: OLS\nDep. var.: Y, Fixed effects: group_id\nInference: HC1\nObservations: 998\n\n| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |\n|:--------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|\n| X1 | -1.019 | 0.082 | -12.352 | 0.000 | -1.181 | -0.857 |\n---\nRMSE: 2.141 R2: 0.137 R2 Within: 0.126 \n\n\nOr display a formatted regression table:\n\npf.etable(fit)\n\n\n\n\n\n\n\n\n\n\n\n\nY\n\n\n(1)\n\n\n\n\ncoef\n\n\nX1\n-1.019***\n(0.082)\n\n\nfe\n\n\ngroup_id\nx\n\n\nstats\n\n\nObservations\n998\n\n\nS.E. type\nhetero\n\n\nR2\n0.137\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)\n\n\n\n\n\n\n\n \n\n\nAlternatively, the .summarize module contains a summary function, which can be applied on instances of regression model objects or lists of regression model objects. For details on how to customize etable(), please take a look at the dedicated vignette.\n\npf.summary(fit)\n\n###\n\nEstimation: OLS\nDep. var.: Y, Fixed effects: group_id\nInference: HC1\nObservations: 998\n\n| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |\n|:--------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|\n| X1 | -1.019 | 0.082 | -12.352 | 0.000 | -1.181 | -0.857 |\n---\nRMSE: 2.141 R2: 0.137 R2 Within: 0.126 \n\n\nYou can access individual elements of the summary via dedicated methods: .tidy() returns a “tidy” pd.DataFrame, .coef() returns estimated parameters, and se() estimated standard errors. Other methods include pvalue(), confint() and tstat().\n\nfit.tidy()\n\n\n\n\n\n\n\n\nEstimate\nStd. Error\nt value\nPr(>|t|)\n2.5%\n97.5%\n\n\nCoefficient\n\n\n\n\n\n\n\n\n\n\nX1\n-1.019009\n0.082498\n-12.351897\n0.0\n-1.180898\n-0.857119\n\n\n\n\n\n\n\n\nfit.coef()\n\nCoefficient\nX1 -1.019009\nName: Estimate, dtype: float64\n\n\n\nfit.se()\n\nCoefficient\nX1 0.082498\nName: Std. Error, dtype: float64\n\n\n\nfit.tstat()\n\nCoefficient\nX1 -12.351897\nName: t value, dtype: float64\n\n\n\nfit.confint()\n\n\n\n\n\n\n\n\n2.5%\n97.5%\n\n\n\n\nX1\n-1.180898\n-0.857119\n\n\n\n\n\n\n\nLast, model results can be visualized via dedicated methods for plotting:\n\nfit.coefplot()\n# or pf.coefplot([fit])\n\n \n \n\n\n\n\n\nLet’s have a quick d-tour on the intuition behind fixed effects models using the example above. To do so, let us begin by comparing it with a simple OLS model.\n\nfit_simple = pf.feols(\"Y ~ X1\", data=data, vcov=\"HC1\")\n\nfit_simple.summary()\n\n###\n\nEstimation: OLS\nDep. var.: Y, Fixed effects: 0\nInference: HC1\nObservations: 998\n\n| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |\n|:--------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|\n| Intercept | 0.919 | 0.112 | 8.223 | 0.000 | 0.699 | 1.138 |\n| X1 | -1.000 | 0.082 | -12.134 | 0.000 | -1.162 | -0.838 |\n---\nRMSE: 2.158 R2: 0.123 \n\n\nWe can compare both models side by side in a regression table:\n\npf.etable([fit, fit_simple])\n\n\n\n\n\n\n\n\n\n\n\n\n\nY\n\n\n(1)\n(2)\n\n\n\n\ncoef\n\n\nX1\n-1.019***\n(0.082)\n-1.000***\n(0.082)\n\n\nIntercept\n\n0.919***\n(0.112)\n\n\nfe\n\n\ngroup_id\nx\n-\n\n\nstats\n\n\nObservations\n998\n998\n\n\nS.E. type\nhetero\nhetero\n\n\nR2\n0.137\n0.123\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)\n\n\n\n\n\n\n\n \n\n\nWe see that the X1 coefficient is -1.019, which is less than the value from the OLS model in column (2). Where is the difference coming from? Well, in the fixed effect model we are interested in controlling for the feature group_id. One possibility to do this is by adding a simple dummy variable for each level of group_id.\n\nfit_dummy = pf.feols(\"Y ~ X1 + C(group_id) \", data=data, vcov=\"HC1\")\n\nfit_dummy.summary()\n\n###\n\nEstimation: OLS\nDep. var.: Y, Fixed effects: 0\nInference: HC1\nObservations: 998\n\n| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |\n|:--------------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|\n| Intercept | 0.760 | 0.288 | 2.640 | 0.008 | 0.195 | 1.326 |\n| X1 | -1.019 | 0.083 | -12.234 | 0.000 | -1.182 | -0.856 |\n| C(group_id)[T.1.0] | 0.380 | 0.451 | 0.844 | 0.399 | -0.504 | 1.264 |\n| C(group_id)[T.2.0] | 0.084 | 0.389 | 0.216 | 0.829 | -0.680 | 0.848 |\n| C(group_id)[T.3.0] | 0.790 | 0.415 | 1.904 | 0.057 | -0.024 | 1.604 |\n| C(group_id)[T.4.0] | -0.189 | 0.388 | -0.487 | 0.626 | -0.950 | 0.572 |\n| C(group_id)[T.5.0] | 0.537 | 0.388 | 1.385 | 0.166 | -0.224 | 1.297 |\n| C(group_id)[T.6.0] | 0.307 | 0.398 | 0.771 | 0.441 | -0.474 | 1.087 |\n| C(group_id)[T.7.0] | 0.015 | 0.422 | 0.035 | 0.972 | -0.814 | 0.844 |\n| C(group_id)[T.8.0] | 0.382 | 0.406 | 0.941 | 0.347 | -0.415 | 1.179 |\n| C(group_id)[T.9.0] | 0.219 | 0.417 | 0.526 | 0.599 | -0.599 | 1.037 |\n| C(group_id)[T.10.0] | -0.363 | 0.422 | -0.861 | 0.390 | -1.191 | 0.465 |\n| C(group_id)[T.11.0] | 0.201 | 0.387 | 0.520 | 0.603 | -0.559 | 0.961 |\n| C(group_id)[T.12.0] | -0.110 | 0.410 | -0.268 | 0.788 | -0.915 | 0.694 |\n| C(group_id)[T.13.0] | 0.126 | 0.440 | 0.287 | 0.774 | -0.736 | 0.989 |\n| C(group_id)[T.14.0] | 0.353 | 0.416 | 0.848 | 0.397 | -0.464 | 1.170 |\n| C(group_id)[T.15.0] | 0.469 | 0.398 | 1.179 | 0.239 | -0.312 | 1.249 |\n| C(group_id)[T.16.0] | -0.135 | 0.396 | -0.340 | 0.734 | -0.913 | 0.643 |\n| C(group_id)[T.17.0] | -0.005 | 0.401 | -0.013 | 0.989 | -0.792 | 0.781 |\n| C(group_id)[T.18.0] | 0.283 | 0.403 | 0.702 | 0.483 | -0.508 | 1.074 |\n---\nRMSE: 2.141 R2: 0.137 \n\n\nThis is does not scale well! Imagine you have 1000 different levels of group_id. You would need to add 1000 dummy variables to your model. This is where fixed effect models come in handy. They allow you to control for these fixed effects without adding all these dummy variables. The way to do it is by a demeaning procedure. The idea is to subtract the average value of each level of group_id from the respective observations. This way, we control for the fixed effects without adding all these dummy variables. Let’s try to do this manually:\n\ndef _demean_column(df: pd.DataFrame, column: str, by: str) -> pd.Series:\n return df[column] - df.groupby(by)[column].transform(\"mean\")\n\n\nfit_demeaned = pf.feols(\n fml=\"Y_demeaned ~ X1_demeaned\",\n data=data.assign(\n Y_demeaned=lambda df: _demean_column(df, \"Y\", \"group_id\"),\n X1_demeaned=lambda df: _demean_column(df, \"X1\", \"group_id\"),\n ),\n vcov=\"HC1\",\n)\n\nfit_demeaned.summary()\n\n###\n\nEstimation: OLS\nDep. var.: Y_demeaned, Fixed effects: 0\nInference: HC1\nObservations: 998\n\n| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |\n|:--------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|\n| Intercept | 0.003 | 0.068 | 0.041 | 0.968 | -0.130 | 0.136 |\n| X1_demeaned | -1.019 | 0.083 | -12.345 | 0.000 | -1.181 | -0.857 |\n---\nRMSE: 2.141 R2: 0.126 \n\n\nWe get the same results as the fixed effect model Y1 ~ X | group_id above. The PyFixest package uses a more efficient algorithm to estimate the fixed effect model, but the intuition is the same.\n\n\n\nYou can update the coefficients of a model object via the update() method, which may be useful in an online learning setting where data arrives sequentially.\nTo see this in action, let us first fit a model on a subset of the data:\n\ndata_subsample = data.sample(frac=0.5)\nm = pf.feols(\"Y ~ X1 + X2\", data=data_subsample)\n# current coefficient vector\nm._beta_hat\n\narray([ 0.76200339, -0.95890348, -0.19108466])\n\n\nThen sample 5 new observations and update the model with the new data. The update rule is\n\\[\n\\hat{\\beta}_{n+1} = \\hat{\\beta}_n + (X_{n+1}' X_{n+1})^{-1} x_{n+1} + (y_{n+1} - x_{n+1} \\hat{\\beta}_n)\n\\]\nfor a new observation \\((x_{n+1}, y_{n+1})\\).\n\nnew_points_id = np.random.choice(list(set(data.index) - set(data_subsample.index)), 5)\nX_new, y_new = (\n np.c_[np.ones(len(new_points_id)), data.loc[new_points_id][[\"X1\", \"X2\"]].values],\n data.loc[new_points_id][\"Y\"].values,\n)\nm.update(X_new, y_new)\n\narray([ 0.78334343, -0.96579542, -0.19535336])\n\n\nWe verify that we get the same results if we had estimated the model on the appended data.\n\npf.feols(\n \"Y ~ X1 + X2\", data=data.loc[data_subsample.index.append(pd.Index(new_points_id))]\n).coef().values\n\narray([ 0.78334343, -0.96579542, -0.19535336])"
+ "text": "A fixed effect model is a statistical model that includes fixed effects, which are parameters that are estimated to be constant across different groups.\nExample [Panel Data]: In the context of panel data, fixed effects are parameters that are constant across different individuals or time. The typical model example is given by the following equation:\n\\[\nY_{it} = \\beta X_{it} + \\alpha_i + \\psi_t + \\varepsilon_{it}\n\\]\nwhere \\(Y_{it}\\) is the dependent variable for individual \\(i\\) at time \\(t\\), \\(X_{it}\\) is the independent variable, \\(\\beta\\) is the coefficient of the independent variable, \\(\\alpha_i\\) is the individual fixed effect, \\(\\psi_t\\) is the time fixed effect, and \\(\\varepsilon_{it}\\) is the error term. The individual fixed effect \\(\\alpha_i\\) is a parameter that is constant across time for each individual, while the time fixed effect \\(\\psi_t\\) is a parameter that is constant across individuals for each time period.\nNote however that, despite the fact that fixed effects are commonly used in panel setting, one does not need a panel data set to work with fixed effects. For example, cluster randomized trials with cluster fixed effects, or wage regressions with worker and firm fixed effects.\nIn this “quick start” guide, we will show you how to estimate a fixed effect model using the PyFixest package. We do not go into the details of the theory behind fixed effect models, but we focus on how to estimate them using PyFixest.\n\n\n\nIn a first step, we load the module and some synthetic example data:\n\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport pandas as pd\nfrom lets_plot import LetsPlot\n\nimport pyfixest as pf\n\nLetsPlot.setup_html()\n\nplt.style.use(\"seaborn-v0_8\")\n\n%load_ext watermark\n%config InlineBackend.figure_format = \"retina\"\n%watermark --iversions\n\ndata = pf.get_data()\n\ndata.head()\n\n\n \n \n \n\n\n\n \n \n \n\n\n\n \n \n \n\n\npandas : 2.2.3\nnumpy : 1.26.4\npyfixest : 0.25.3\nmatplotlib: 3.9.2\n\n\n\n\n\n\n\n\n\n\nY\nY2\nX1\nX2\nf1\nf2\nf3\ngroup_id\nZ1\nZ2\nweights\n\n\n\n\n0\nNaN\n2.357103\n0.0\n0.457858\n15.0\n0.0\n7.0\n9.0\n-0.330607\n1.054826\n0.661478\n\n\n1\n-1.458643\n5.163147\nNaN\n-4.998406\n6.0\n21.0\n4.0\n8.0\nNaN\n-4.113690\n0.772732\n\n\n2\n0.169132\n0.751140\n2.0\n1.558480\nNaN\n1.0\n7.0\n16.0\n1.207778\n0.465282\n0.990929\n\n\n3\n3.319513\n-2.656368\n1.0\n1.560402\n1.0\n10.0\n11.0\n3.0\n2.869997\n0.467570\n0.021123\n\n\n4\n0.134420\n-1.866416\n2.0\n-3.472232\n19.0\n20.0\n6.0\n14.0\n0.835819\n-3.115669\n0.790815\n\n\n\n\n\n\n\n\ndata.info()\n\n<class 'pandas.core.frame.DataFrame'>\nRangeIndex: 1000 entries, 0 to 999\nData columns (total 11 columns):\n # Column Non-Null Count Dtype \n--- ------ -------------- ----- \n 0 Y 999 non-null float64\n 1 Y2 1000 non-null float64\n 2 X1 999 non-null float64\n 3 X2 1000 non-null float64\n 4 f1 999 non-null float64\n 5 f2 1000 non-null float64\n 6 f3 1000 non-null float64\n 7 group_id 1000 non-null float64\n 8 Z1 999 non-null float64\n 9 Z2 1000 non-null float64\n 10 weights 1000 non-null float64\ndtypes: float64(11)\nmemory usage: 86.1 KB\n\n\nWe see that some of our columns have missing data.\n\n\n\nWe are interested in the relation between the dependent variable Y and the independent variables X1 using a fixed effect model for group_id. Let’s see how the data looks like:\n\nax = data.plot(kind=\"scatter\", x=\"X1\", y=\"Y\", c=\"group_id\", colormap=\"viridis\")\n\n\n\n\n\n\n\n\nWe can estimate a fixed effects regression via the feols() function. feols() has three arguments: a two-sided model formula, the data, and optionally, the type of inference.\n\nfit = pf.feols(fml=\"Y ~ X1 | group_id\", data=data, vcov=\"HC1\")\ntype(fit)\n\npyfixest.estimation.feols_.Feols\n\n\nThe first part of the formula contains the dependent variable and “regular” covariates, while the second part contains fixed effects.\nfeols() returns an instance of the Fixest class.\n\n\n\nTo inspect the results, we can use a summary function or method:\n\nfit.summary()\n\n###\n\nEstimation: OLS\nDep. var.: Y, Fixed effects: group_id\nInference: HC1\nObservations: 998\n\n| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |\n|:--------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|\n| X1 | -1.019 | 0.082 | -12.352 | 0.000 | -1.181 | -0.857 |\n---\nRMSE: 2.141 R2: 0.137 R2 Within: 0.126 \n\n\nOr display a formatted regression table:\n\npf.etable(fit)\n\n\n\n\n\n\n\n\n\n\n\n\nY\n\n\n(1)\n\n\n\n\ncoef\n\n\nX1\n-1.019***\n(0.082)\n\n\nfe\n\n\ngroup_id\nx\n\n\nstats\n\n\nObservations\n998\n\n\nS.E. type\nhetero\n\n\nR2\n0.137\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)\n\n\n\n\n\n\n\n \n\n\nAlternatively, the .summarize module contains a summary function, which can be applied on instances of regression model objects or lists of regression model objects. For details on how to customize etable(), please take a look at the dedicated vignette.\n\npf.summary(fit)\n\n###\n\nEstimation: OLS\nDep. var.: Y, Fixed effects: group_id\nInference: HC1\nObservations: 998\n\n| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |\n|:--------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|\n| X1 | -1.019 | 0.082 | -12.352 | 0.000 | -1.181 | -0.857 |\n---\nRMSE: 2.141 R2: 0.137 R2 Within: 0.126 \n\n\nYou can access individual elements of the summary via dedicated methods: .tidy() returns a “tidy” pd.DataFrame, .coef() returns estimated parameters, and se() estimated standard errors. Other methods include pvalue(), confint() and tstat().\n\nfit.tidy()\n\n\n\n\n\n\n\n\nEstimate\nStd. Error\nt value\nPr(>|t|)\n2.5%\n97.5%\n\n\nCoefficient\n\n\n\n\n\n\n\n\n\n\nX1\n-1.019009\n0.082498\n-12.351897\n0.0\n-1.180898\n-0.857119\n\n\n\n\n\n\n\n\nfit.coef()\n\nCoefficient\nX1 -1.019009\nName: Estimate, dtype: float64\n\n\n\nfit.se()\n\nCoefficient\nX1 0.082498\nName: Std. Error, dtype: float64\n\n\n\nfit.tstat()\n\nCoefficient\nX1 -12.351897\nName: t value, dtype: float64\n\n\n\nfit.confint()\n\n\n\n\n\n\n\n\n2.5%\n97.5%\n\n\n\n\nX1\n-1.180898\n-0.857119\n\n\n\n\n\n\n\nLast, model results can be visualized via dedicated methods for plotting:\n\nfit.coefplot()\n# or pf.coefplot([fit])\n\n \n \n\n\n\n\n\nLet’s have a quick d-tour on the intuition behind fixed effects models using the example above. To do so, let us begin by comparing it with a simple OLS model.\n\nfit_simple = pf.feols(\"Y ~ X1\", data=data, vcov=\"HC1\")\n\nfit_simple.summary()\n\n###\n\nEstimation: OLS\nDep. var.: Y, Fixed effects: 0\nInference: HC1\nObservations: 998\n\n| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |\n|:--------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|\n| Intercept | 0.919 | 0.112 | 8.223 | 0.000 | 0.699 | 1.138 |\n| X1 | -1.000 | 0.082 | -12.134 | 0.000 | -1.162 | -0.838 |\n---\nRMSE: 2.158 R2: 0.123 \n\n\nWe can compare both models side by side in a regression table:\n\npf.etable([fit, fit_simple])\n\n\n\n\n\n\n\n\n\n\n\n\n\nY\n\n\n(1)\n(2)\n\n\n\n\ncoef\n\n\nX1\n-1.019***\n(0.082)\n-1.000***\n(0.082)\n\n\nIntercept\n\n0.919***\n(0.112)\n\n\nfe\n\n\ngroup_id\nx\n-\n\n\nstats\n\n\nObservations\n998\n998\n\n\nS.E. type\nhetero\nhetero\n\n\nR2\n0.137\n0.123\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)\n\n\n\n\n\n\n\n \n\n\nWe see that the X1 coefficient is -1.019, which is less than the value from the OLS model in column (2). Where is the difference coming from? Well, in the fixed effect model we are interested in controlling for the feature group_id. One possibility to do this is by adding a simple dummy variable for each level of group_id.\n\nfit_dummy = pf.feols(\"Y ~ X1 + C(group_id) \", data=data, vcov=\"HC1\")\n\nfit_dummy.summary()\n\n###\n\nEstimation: OLS\nDep. var.: Y, Fixed effects: 0\nInference: HC1\nObservations: 998\n\n| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |\n|:--------------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|\n| Intercept | 0.760 | 0.288 | 2.640 | 0.008 | 0.195 | 1.326 |\n| X1 | -1.019 | 0.083 | -12.234 | 0.000 | -1.182 | -0.856 |\n| C(group_id)[T.1.0] | 0.380 | 0.451 | 0.844 | 0.399 | -0.504 | 1.264 |\n| C(group_id)[T.2.0] | 0.084 | 0.389 | 0.216 | 0.829 | -0.680 | 0.848 |\n| C(group_id)[T.3.0] | 0.790 | 0.415 | 1.904 | 0.057 | -0.024 | 1.604 |\n| C(group_id)[T.4.0] | -0.189 | 0.388 | -0.487 | 0.626 | -0.950 | 0.572 |\n| C(group_id)[T.5.0] | 0.537 | 0.388 | 1.385 | 0.166 | -0.224 | 1.297 |\n| C(group_id)[T.6.0] | 0.307 | 0.398 | 0.771 | 0.441 | -0.474 | 1.087 |\n| C(group_id)[T.7.0] | 0.015 | 0.422 | 0.035 | 0.972 | -0.814 | 0.844 |\n| C(group_id)[T.8.0] | 0.382 | 0.406 | 0.941 | 0.347 | -0.415 | 1.179 |\n| C(group_id)[T.9.0] | 0.219 | 0.417 | 0.526 | 0.599 | -0.599 | 1.037 |\n| C(group_id)[T.10.0] | -0.363 | 0.422 | -0.861 | 0.390 | -1.191 | 0.465 |\n| C(group_id)[T.11.0] | 0.201 | 0.387 | 0.520 | 0.603 | -0.559 | 0.961 |\n| C(group_id)[T.12.0] | -0.110 | 0.410 | -0.268 | 0.788 | -0.915 | 0.694 |\n| C(group_id)[T.13.0] | 0.126 | 0.440 | 0.287 | 0.774 | -0.736 | 0.989 |\n| C(group_id)[T.14.0] | 0.353 | 0.416 | 0.848 | 0.397 | -0.464 | 1.170 |\n| C(group_id)[T.15.0] | 0.469 | 0.398 | 1.179 | 0.239 | -0.312 | 1.249 |\n| C(group_id)[T.16.0] | -0.135 | 0.396 | -0.340 | 0.734 | -0.913 | 0.643 |\n| C(group_id)[T.17.0] | -0.005 | 0.401 | -0.013 | 0.989 | -0.792 | 0.781 |\n| C(group_id)[T.18.0] | 0.283 | 0.403 | 0.702 | 0.483 | -0.508 | 1.074 |\n---\nRMSE: 2.141 R2: 0.137 \n\n\nThis is does not scale well! Imagine you have 1000 different levels of group_id. You would need to add 1000 dummy variables to your model. This is where fixed effect models come in handy. They allow you to control for these fixed effects without adding all these dummy variables. The way to do it is by a demeaning procedure. The idea is to subtract the average value of each level of group_id from the respective observations. This way, we control for the fixed effects without adding all these dummy variables. Let’s try to do this manually:\n\ndef _demean_column(df: pd.DataFrame, column: str, by: str) -> pd.Series:\n return df[column] - df.groupby(by)[column].transform(\"mean\")\n\n\nfit_demeaned = pf.feols(\n fml=\"Y_demeaned ~ X1_demeaned\",\n data=data.assign(\n Y_demeaned=lambda df: _demean_column(df, \"Y\", \"group_id\"),\n X1_demeaned=lambda df: _demean_column(df, \"X1\", \"group_id\"),\n ),\n vcov=\"HC1\",\n)\n\nfit_demeaned.summary()\n\n###\n\nEstimation: OLS\nDep. var.: Y_demeaned, Fixed effects: 0\nInference: HC1\nObservations: 998\n\n| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |\n|:--------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|\n| Intercept | 0.003 | 0.068 | 0.041 | 0.968 | -0.130 | 0.136 |\n| X1_demeaned | -1.019 | 0.083 | -12.345 | 0.000 | -1.181 | -0.857 |\n---\nRMSE: 2.141 R2: 0.126 \n\n\nWe get the same results as the fixed effect model Y1 ~ X | group_id above. The PyFixest package uses a more efficient algorithm to estimate the fixed effect model, but the intuition is the same.\n\n\n\nYou can update the coefficients of a model object via the update() method, which may be useful in an online learning setting where data arrives sequentially.\nTo see this in action, let us first fit a model on a subset of the data:\n\ndata_subsample = data.sample(frac=0.5)\nm = pf.feols(\"Y ~ X1 + X2\", data=data_subsample)\n# current coefficient vector\nm._beta_hat\n\narray([ 0.99581185, -1.0423337 , -0.18385767])\n\n\nThen sample 5 new observations and update the model with the new data. The update rule is\n\\[\n\\hat{\\beta}_{n+1} = \\hat{\\beta}_n + (X_{n+1}' X_{n+1})^{-1} x_{n+1} + (y_{n+1} - x_{n+1} \\hat{\\beta}_n)\n\\]\nfor a new observation \\((x_{n+1}, y_{n+1})\\).\n\nnew_points_id = np.random.choice(list(set(data.index) - set(data_subsample.index)), 5)\nX_new, y_new = (\n np.c_[np.ones(len(new_points_id)), data.loc[new_points_id][[\"X1\", \"X2\"]].values],\n data.loc[new_points_id][\"Y\"].values,\n)\nm.update(X_new, y_new)\n\narray([ 0.99469903, -1.04988716, -0.1869025 ])\n\n\nWe verify that we get the same results if we had estimated the model on the appended data.\n\npf.feols(\n \"Y ~ X1 + X2\", data=data.loc[data_subsample.index.append(pd.Index(new_points_id))]\n).coef().values\n\narray([ 0.99469903, -1.04988716, -0.1869025 ])"
},
{
"objectID": "quickstart.html#what-is-a-fixed-effect-model",
@@ -679,7 +679,7 @@
"href": "quickstart.html#read-sample-data",
"title": "Getting Started with PyFixest",
"section": "",
- "text": "In a first step, we load the module and some synthetic example data:\n\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport pandas as pd\nfrom lets_plot import LetsPlot\n\nimport pyfixest as pf\n\nLetsPlot.setup_html()\n\nplt.style.use(\"seaborn-v0_8\")\n\n%load_ext watermark\n%config InlineBackend.figure_format = \"retina\"\n%watermark --iversions\n\ndata = pf.get_data()\n\ndata.head()\n\n\n \n \n \n\n\n\n \n \n \n\n\n\n \n \n \n\n\nnumpy : 1.26.4\npyfixest : 0.25.3\npandas : 2.2.3\nmatplotlib: 3.9.2\n\n\n\n\n\n\n\n\n\n\nY\nY2\nX1\nX2\nf1\nf2\nf3\ngroup_id\nZ1\nZ2\nweights\n\n\n\n\n0\nNaN\n2.357103\n0.0\n0.457858\n15.0\n0.0\n7.0\n9.0\n-0.330607\n1.054826\n0.661478\n\n\n1\n-1.458643\n5.163147\nNaN\n-4.998406\n6.0\n21.0\n4.0\n8.0\nNaN\n-4.113690\n0.772732\n\n\n2\n0.169132\n0.751140\n2.0\n1.558480\nNaN\n1.0\n7.0\n16.0\n1.207778\n0.465282\n0.990929\n\n\n3\n3.319513\n-2.656368\n1.0\n1.560402\n1.0\n10.0\n11.0\n3.0\n2.869997\n0.467570\n0.021123\n\n\n4\n0.134420\n-1.866416\n2.0\n-3.472232\n19.0\n20.0\n6.0\n14.0\n0.835819\n-3.115669\n0.790815\n\n\n\n\n\n\n\n\ndata.info()\n\n<class 'pandas.core.frame.DataFrame'>\nRangeIndex: 1000 entries, 0 to 999\nData columns (total 11 columns):\n # Column Non-Null Count Dtype \n--- ------ -------------- ----- \n 0 Y 999 non-null float64\n 1 Y2 1000 non-null float64\n 2 X1 999 non-null float64\n 3 X2 1000 non-null float64\n 4 f1 999 non-null float64\n 5 f2 1000 non-null float64\n 6 f3 1000 non-null float64\n 7 group_id 1000 non-null float64\n 8 Z1 999 non-null float64\n 9 Z2 1000 non-null float64\n 10 weights 1000 non-null float64\ndtypes: float64(11)\nmemory usage: 86.1 KB\n\n\nWe see that some of our columns have missing data."
+ "text": "In a first step, we load the module and some synthetic example data:\n\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport pandas as pd\nfrom lets_plot import LetsPlot\n\nimport pyfixest as pf\n\nLetsPlot.setup_html()\n\nplt.style.use(\"seaborn-v0_8\")\n\n%load_ext watermark\n%config InlineBackend.figure_format = \"retina\"\n%watermark --iversions\n\ndata = pf.get_data()\n\ndata.head()\n\n\n \n \n \n\n\n\n \n \n \n\n\n\n \n \n \n\n\npandas : 2.2.3\nnumpy : 1.26.4\npyfixest : 0.25.3\nmatplotlib: 3.9.2\n\n\n\n\n\n\n\n\n\n\nY\nY2\nX1\nX2\nf1\nf2\nf3\ngroup_id\nZ1\nZ2\nweights\n\n\n\n\n0\nNaN\n2.357103\n0.0\n0.457858\n15.0\n0.0\n7.0\n9.0\n-0.330607\n1.054826\n0.661478\n\n\n1\n-1.458643\n5.163147\nNaN\n-4.998406\n6.0\n21.0\n4.0\n8.0\nNaN\n-4.113690\n0.772732\n\n\n2\n0.169132\n0.751140\n2.0\n1.558480\nNaN\n1.0\n7.0\n16.0\n1.207778\n0.465282\n0.990929\n\n\n3\n3.319513\n-2.656368\n1.0\n1.560402\n1.0\n10.0\n11.0\n3.0\n2.869997\n0.467570\n0.021123\n\n\n4\n0.134420\n-1.866416\n2.0\n-3.472232\n19.0\n20.0\n6.0\n14.0\n0.835819\n-3.115669\n0.790815\n\n\n\n\n\n\n\n\ndata.info()\n\n<class 'pandas.core.frame.DataFrame'>\nRangeIndex: 1000 entries, 0 to 999\nData columns (total 11 columns):\n # Column Non-Null Count Dtype \n--- ------ -------------- ----- \n 0 Y 999 non-null float64\n 1 Y2 1000 non-null float64\n 2 X1 999 non-null float64\n 3 X2 1000 non-null float64\n 4 f1 999 non-null float64\n 5 f2 1000 non-null float64\n 6 f3 1000 non-null float64\n 7 group_id 1000 non-null float64\n 8 Z1 999 non-null float64\n 9 Z2 1000 non-null float64\n 10 weights 1000 non-null float64\ndtypes: float64(11)\nmemory usage: 86.1 KB\n\n\nWe see that some of our columns have missing data."
},
{
"objectID": "quickstart.html#ols-estimation",
@@ -707,7 +707,7 @@
"href": "quickstart.html#updating-regression-coefficients",
"title": "Getting Started with PyFixest",
"section": "",
- "text": "You can update the coefficients of a model object via the update() method, which may be useful in an online learning setting where data arrives sequentially.\nTo see this in action, let us first fit a model on a subset of the data:\n\ndata_subsample = data.sample(frac=0.5)\nm = pf.feols(\"Y ~ X1 + X2\", data=data_subsample)\n# current coefficient vector\nm._beta_hat\n\narray([ 0.76200339, -0.95890348, -0.19108466])\n\n\nThen sample 5 new observations and update the model with the new data. The update rule is\n\\[\n\\hat{\\beta}_{n+1} = \\hat{\\beta}_n + (X_{n+1}' X_{n+1})^{-1} x_{n+1} + (y_{n+1} - x_{n+1} \\hat{\\beta}_n)\n\\]\nfor a new observation \\((x_{n+1}, y_{n+1})\\).\n\nnew_points_id = np.random.choice(list(set(data.index) - set(data_subsample.index)), 5)\nX_new, y_new = (\n np.c_[np.ones(len(new_points_id)), data.loc[new_points_id][[\"X1\", \"X2\"]].values],\n data.loc[new_points_id][\"Y\"].values,\n)\nm.update(X_new, y_new)\n\narray([ 0.78334343, -0.96579542, -0.19535336])\n\n\nWe verify that we get the same results if we had estimated the model on the appended data.\n\npf.feols(\n \"Y ~ X1 + X2\", data=data.loc[data_subsample.index.append(pd.Index(new_points_id))]\n).coef().values\n\narray([ 0.78334343, -0.96579542, -0.19535336])"
+ "text": "You can update the coefficients of a model object via the update() method, which may be useful in an online learning setting where data arrives sequentially.\nTo see this in action, let us first fit a model on a subset of the data:\n\ndata_subsample = data.sample(frac=0.5)\nm = pf.feols(\"Y ~ X1 + X2\", data=data_subsample)\n# current coefficient vector\nm._beta_hat\n\narray([ 0.99581185, -1.0423337 , -0.18385767])\n\n\nThen sample 5 new observations and update the model with the new data. The update rule is\n\\[\n\\hat{\\beta}_{n+1} = \\hat{\\beta}_n + (X_{n+1}' X_{n+1})^{-1} x_{n+1} + (y_{n+1} - x_{n+1} \\hat{\\beta}_n)\n\\]\nfor a new observation \\((x_{n+1}, y_{n+1})\\).\n\nnew_points_id = np.random.choice(list(set(data.index) - set(data_subsample.index)), 5)\nX_new, y_new = (\n np.c_[np.ones(len(new_points_id)), data.loc[new_points_id][[\"X1\", \"X2\"]].values],\n data.loc[new_points_id][\"Y\"].values,\n)\nm.update(X_new, y_new)\n\narray([ 0.99469903, -1.04988716, -0.1869025 ])\n\n\nWe verify that we get the same results if we had estimated the model on the appended data.\n\npf.feols(\n \"Y ~ X1 + X2\", data=data.loc[data_subsample.index.append(pd.Index(new_points_id))]\n).coef().values\n\narray([ 0.99469903, -1.04988716, -0.1869025 ])"
},
{
"objectID": "quickstart.html#inference-via-the-wild-bootstrap",
@@ -742,7 +742,7 @@
"href": "quickstart.html#joint-confidence-intervals",
"title": "Getting Started with PyFixest",
"section": "Joint Confidence Intervals",
- "text": "Joint Confidence Intervals\nSimultaneous confidence bands for a vector of parameters can be computed via the joint_confint() method. See Simultaneous confidence bands: Theory, implementation, and an application to SVARs for background.\n\nfit_ci = pf.feols(\"Y ~ X1+ C(f1)\", data=data)\nfit_ci.confint(joint=True).head()\n\n\n\n\n\n\n\n\n2.5%\n97.5%\n\n\n\n\nIntercept\n-0.428799\n1.406710\n\n\nX1\n-1.161391\n-0.737491\n\n\nC(f1)[T.1.0]\n1.380485\n3.784814\n\n\nC(f1)[T.2.0]\n-2.842798\n-0.321070\n\n\nC(f1)[T.3.0]\n-1.612387\n0.987719"
+ "text": "Joint Confidence Intervals\nSimultaneous confidence bands for a vector of parameters can be computed via the joint_confint() method. See Simultaneous confidence bands: Theory, implementation, and an application to SVARs for background.\n\nfit_ci = pf.feols(\"Y ~ X1+ C(f1)\", data=data)\nfit_ci.confint(joint=True).head()\n\n\n\n\n\n\n\n\n2.5%\n97.5%\n\n\n\n\nIntercept\n-0.424794\n1.402705\n\n\nX1\n-1.160466\n-0.738416\n\n\nC(f1)[T.1.0]\n1.385731\n3.779568\n\n\nC(f1)[T.2.0]\n-2.837296\n-0.326572\n\n\nC(f1)[T.3.0]\n-1.606713\n0.982046"
},
{
"objectID": "pyfixest.html#features",
@@ -847,7 +847,7 @@
"href": "news.html#pyfixest-0.17.0",
"title": "News",
"section": "PyFixest 0.17.0",
- "text": "PyFixest 0.17.0\n\nRestructures the codebase and reorganizes how users can interact with the pyfixest API. It is now recommended to use pyfixest in the following way:\n\nimport numpy as np\nimport pyfixest as pf\ndata = pf.get_data()\ndata[\"D\"] = data[\"X1\"] > 0\nfit = pf.feols(\"Y ~ D + f1\", data = data)\nfit.tidy()\n\n\n\n\n\n\n\n\nEstimate\nStd. Error\nt value\nPr(>|t|)\n2.5%\n97.5%\n\n\nCoefficient\n\n\n\n\n\n\n\n\n\n\nIntercept\n0.778849\n0.170261\n4.574437\n0.000005\n0.444737\n1.112961\n\n\nD\n-1.402617\n0.152224\n-9.214140\n0.000000\n-1.701335\n-1.103899\n\n\nf1\n0.004774\n0.008058\n0.592508\n0.553645\n-0.011038\n0.020587\n\n\n\n\n\n\n\nThe update should not inroduce any breaking changes. Thanks to @Wenzhi-Ding for the PR!\nAdds support for simultaneous confidence intervals via a multiplier bootstrap. Thanks to @apoorvalal for the contribution!\n\nfit.confint(joint = True)\n\n\n\n\n\n\n\n\n2.5%\n97.5%\n\n\n\n\nIntercept\n0.380105\n1.177593\n\n\nD\n-1.759120\n-1.046114\n\n\nf1\n-0.014097\n0.023645\n\n\n\n\n\n\n\nAdds support for the causal cluster variance estimator by Abadie et al. (QJE, 2023) for OLS via the .ccv() method.\n\nfit.ccv(treatment = \"D\", cluster = \"group_id\")\n\n/home/runner/work/pyfixest/pyfixest/pyfixest/estimation/feols_.py:1384: UserWarning: The initial model was not clustered. CRV1 inference is computed and stored in the model object.\n warnings.warn(\n\n\n\n\n\n\n\n\n\nEstimate\nStd. Error\nt value\nPr(>|t|)\n2.5%\n97.5%\n\n\n\n\nCCV\n-1.4026168622179929\n0.28043\n-5.001663\n0.000093\n-1.991779\n-0.813455\n\n\nCRV1\n-1.402617\n0.205132\n-6.837621\n0.000002\n-1.833584\n-0.97165"
+ "text": "PyFixest 0.17.0\n\nRestructures the codebase and reorganizes how users can interact with the pyfixest API. It is now recommended to use pyfixest in the following way:\n\nimport numpy as np\nimport pyfixest as pf\ndata = pf.get_data()\ndata[\"D\"] = data[\"X1\"] > 0\nfit = pf.feols(\"Y ~ D + f1\", data = data)\nfit.tidy()\n\n\n\n\n\n\n\n\nEstimate\nStd. Error\nt value\nPr(>|t|)\n2.5%\n97.5%\n\n\nCoefficient\n\n\n\n\n\n\n\n\n\n\nIntercept\n0.778849\n0.170261\n4.574437\n0.000005\n0.444737\n1.112961\n\n\nD\n-1.402617\n0.152224\n-9.214140\n0.000000\n-1.701335\n-1.103899\n\n\nf1\n0.004774\n0.008058\n0.592508\n0.553645\n-0.011038\n0.020587\n\n\n\n\n\n\n\nThe update should not inroduce any breaking changes. Thanks to @Wenzhi-Ding for the PR!\nAdds support for simultaneous confidence intervals via a multiplier bootstrap. Thanks to @apoorvalal for the contribution!\n\nfit.confint(joint = True)\n\n\n\n\n\n\n\n\n2.5%\n97.5%\n\n\n\n\nIntercept\n0.375929\n1.181769\n\n\nD\n-1.762853\n-1.042381\n\n\nf1\n-0.014294\n0.023843\n\n\n\n\n\n\n\nAdds support for the causal cluster variance estimator by Abadie et al. (QJE, 2023) for OLS via the .ccv() method.\n\nfit.ccv(treatment = \"D\", cluster = \"group_id\")\n\n/home/runner/work/pyfixest/pyfixest/pyfixest/estimation/feols_.py:1384: UserWarning: The initial model was not clustered. CRV1 inference is computed and stored in the model object.\n warnings.warn(\n\n\n\n\n\n\n\n\n\nEstimate\nStd. Error\nt value\nPr(>|t|)\n2.5%\n97.5%\n\n\n\n\nCCV\n-1.4026168622179929\n0.238985\n-5.869057\n0.000015\n-1.904706\n-0.900528\n\n\nCRV1\n-1.402617\n0.205132\n-6.837621\n0.000002\n-1.833584\n-0.97165"
},
{
"objectID": "news.html#pyfixest-0.16.0",
@@ -1085,14 +1085,14 @@
"href": "table-layout.html#basic-usage",
"title": "Regression Tables via pf.etable()",
"section": "Basic Usage",
- "text": "Basic Usage\nWe can compare all regression models via the pyfixest-internal pf.etable() function:\n\npf.etable([fit1, fit2, fit3, fit4, fit5, fit6])\n\n\n\n\n\n\n\n \n \n Y\n \n \n Y2\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n X1\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n X2\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n X1:X2\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n f2\n -\n x\n x\n -\n x\n x\n \n \n f1\n x\n x\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)\n \n\n\n\n\n\n\n \n\n\nYou can also estimate and display multiple regressions with one line of code using the (py)fixest stepwise notation:\n\npf.etable(pf.feols(\"Y+Y2~csw(X1,X2,X1:X2)\", data=data))\n\n\n\n\n\n\n\n \n \n Y\n \n \n Y2\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n X1\n -1.000*** (0.085)\n -0.993*** (0.082)\n -0.992*** (0.082)\n -1.322*** (0.215)\n -1.316*** (0.214)\n -1.316*** (0.215)\n \n \n X2\n \n -0.176*** (0.022)\n -0.197*** (0.036)\n \n -0.133* (0.057)\n -0.132 (0.095)\n \n \n X1:X2\n \n \n 0.020 (0.027)\n \n \n -0.001 (0.071)\n \n \n Intercept\n 0.919*** (0.112)\n 0.889*** (0.108)\n 0.888*** (0.108)\n 1.064*** (0.283)\n 1.042*** (0.283)\n 1.042*** (0.283)\n \n \n stats\n \n \n Observations\n 998\n 998\n 998\n 999\n 999\n 999\n \n \n S.E. type\n iid\n iid\n iid\n iid\n iid\n iid\n \n \n R2\n 0.123\n 0.177\n 0.177\n 0.037\n 0.042\n 0.042\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)"
+ "text": "Basic Usage\nWe can compare all regression models via the pyfixest-internal pf.etable() function:\n\npf.etable([fit1, fit2, fit3, fit4, fit5, fit6])\n\n\n\n\n\n\n\n \n \n Y\n \n \n Y2\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n X1\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n X2\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n X1:X2\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n f1\n x\n x\n x\n x\n x\n x\n \n \n f2\n -\n x\n x\n -\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)\n \n\n\n\n\n\n\n \n\n\nYou can also estimate and display multiple regressions with one line of code using the (py)fixest stepwise notation:\n\npf.etable(pf.feols(\"Y+Y2~csw(X1,X2,X1:X2)\", data=data))\n\n\n\n\n\n\n\n \n \n Y\n \n \n Y2\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n X1\n -1.000*** (0.085)\n -0.993*** (0.082)\n -0.992*** (0.082)\n -1.322*** (0.215)\n -1.316*** (0.214)\n -1.316*** (0.215)\n \n \n X2\n \n -0.176*** (0.022)\n -0.197*** (0.036)\n \n -0.133* (0.057)\n -0.132 (0.095)\n \n \n X1:X2\n \n \n 0.020 (0.027)\n \n \n -0.001 (0.071)\n \n \n Intercept\n 0.919*** (0.112)\n 0.889*** (0.108)\n 0.888*** (0.108)\n 1.064*** (0.283)\n 1.042*** (0.283)\n 1.042*** (0.283)\n \n \n stats\n \n \n Observations\n 998\n 998\n 998\n 999\n 999\n 999\n \n \n S.E. type\n iid\n iid\n iid\n iid\n iid\n iid\n \n \n R2\n 0.123\n 0.177\n 0.177\n 0.037\n 0.042\n 0.042\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)"
},
{
"objectID": "table-layout.html#keep-and-drop-variables",
"href": "table-layout.html#keep-and-drop-variables",
"title": "Regression Tables via pf.etable()",
"section": "Keep and drop variables",
- "text": "Keep and drop variables\netable allows us to do a few things out of the box. For example, we can only keep the variables that we’d like, which keeps all variables that fit the provided regex match.\n\npf.etable([fit1, fit2, fit3, fit4, fit5, fit6], keep=\"X1\")\n\n\n\n\n\n\n\n \n \n Y\n \n \n Y2\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n X1\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n X1:X2\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n f2\n -\n x\n x\n -\n x\n x\n \n \n f1\n x\n x\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)\n \n\n\n\n\n\n\n \n\n\nWe can use the exact_match argument to select a specific set of variables:\n\npf.etable([fit1, fit2, fit3, fit4, fit5, fit6], keep=[\"X1\", \"X2\"], exact_match=True)\n\n\n\n\n\n\n\n \n \n Y\n \n \n Y2\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n X1\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n X2\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n fe\n \n \n f2\n -\n x\n x\n -\n x\n x\n \n \n f1\n x\n x\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)\n \n\n\n\n\n\n\n \n\n\nWe can also easily drop variables via the drop argument:\n\npf.etable([fit1, fit2, fit3, fit4, fit5, fit6], drop=[\"X1\"])\n\n\n\n\n\n\n\n \n \n Y\n \n \n Y2\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n X2\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n fe\n \n \n f2\n -\n x\n x\n -\n x\n x\n \n \n f1\n x\n x\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)"
+ "text": "Keep and drop variables\netable allows us to do a few things out of the box. For example, we can only keep the variables that we’d like, which keeps all variables that fit the provided regex match.\n\npf.etable([fit1, fit2, fit3, fit4, fit5, fit6], keep=\"X1\")\n\n\n\n\n\n\n\n \n \n Y\n \n \n Y2\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n X1\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n X1:X2\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n f1\n x\n x\n x\n x\n x\n x\n \n \n f2\n -\n x\n x\n -\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)\n \n\n\n\n\n\n\n \n\n\nWe can use the exact_match argument to select a specific set of variables:\n\npf.etable([fit1, fit2, fit3, fit4, fit5, fit6], keep=[\"X1\", \"X2\"], exact_match=True)\n\n\n\n\n\n\n\n \n \n Y\n \n \n Y2\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n X1\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n X2\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n fe\n \n \n f1\n x\n x\n x\n x\n x\n x\n \n \n f2\n -\n x\n x\n -\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)\n \n\n\n\n\n\n\n \n\n\nWe can also easily drop variables via the drop argument:\n\npf.etable([fit1, fit2, fit3, fit4, fit5, fit6], drop=[\"X1\"])\n\n\n\n\n\n\n\n \n \n Y\n \n \n Y2\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n X2\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n fe\n \n \n f1\n x\n x\n x\n x\n x\n x\n \n \n f2\n -\n x\n x\n -\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)"
},
{
"objectID": "table-layout.html#hide-fixed-effects-or-se-type-rows",
@@ -1106,49 +1106,49 @@
"href": "table-layout.html#display-p-values-or-confidence-intervals",
"title": "Regression Tables via pf.etable()",
"section": "Display p-values or confidence intervals",
- "text": "Display p-values or confidence intervals\nBy default, pf.etable() reports standard errors. But we can also ask to output p-values or confidence intervals via the coef_fmt function argument.\n\npf.etable([fit1, fit2, fit3, fit4, fit5, fit6], coef_fmt=\"b \\n (se) \\n [p]\")\n\n\n\n\n\n\n\n \n \n Y\n \n \n Y2\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n X1\n -0.950*** (0.067) [0.000]\n -0.924*** (0.061) [0.000]\n -0.924*** (0.061) [0.000]\n -1.267*** (0.174) [0.000]\n -1.232*** (0.192) [0.000]\n -1.231*** (0.192) [0.000]\n \n \n X2\n -0.174*** (0.018) [0.000]\n -0.174*** (0.015) [0.000]\n -0.185*** (0.025) [0.000]\n -0.131** (0.042) [0.005]\n -0.118** (0.042) [0.008]\n -0.074 (0.104) [0.482]\n \n \n X1:X2\n \n \n 0.011 (0.018) [0.565]\n \n \n -0.041 (0.081) [0.618]\n \n \n fe\n \n \n f2\n -\n x\n x\n -\n x\n x\n \n \n f1\n x\n x\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error) \n [p-value]"
+ "text": "Display p-values or confidence intervals\nBy default, pf.etable() reports standard errors. But we can also ask to output p-values or confidence intervals via the coef_fmt function argument.\n\npf.etable([fit1, fit2, fit3, fit4, fit5, fit6], coef_fmt=\"b \\n (se) \\n [p]\")\n\n\n\n\n\n\n\n \n \n Y\n \n \n Y2\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n X1\n -0.950*** (0.067) [0.000]\n -0.924*** (0.061) [0.000]\n -0.924*** (0.061) [0.000]\n -1.267*** (0.174) [0.000]\n -1.232*** (0.192) [0.000]\n -1.231*** (0.192) [0.000]\n \n \n X2\n -0.174*** (0.018) [0.000]\n -0.174*** (0.015) [0.000]\n -0.185*** (0.025) [0.000]\n -0.131** (0.042) [0.005]\n -0.118** (0.042) [0.008]\n -0.074 (0.104) [0.482]\n \n \n X1:X2\n \n \n 0.011 (0.018) [0.565]\n \n \n -0.041 (0.081) [0.618]\n \n \n fe\n \n \n f1\n x\n x\n x\n x\n x\n x\n \n \n f2\n -\n x\n x\n -\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error) \n [p-value]"
},
{
"objectID": "table-layout.html#significance-levels-and-rounding",
"href": "table-layout.html#significance-levels-and-rounding",
"title": "Regression Tables via pf.etable()",
"section": "Significance levels and rounding",
- "text": "Significance levels and rounding\nAdditionally, we can also overwrite the defaults for the reported significance levels and control the rounding of results via the signif_code and digits function arguments:\n\npf.etable([fit1, fit2, fit3, fit4, fit5, fit6], signif_code=[0.01, 0.05, 0.1], digits=5)\n\n\n\n\n\n\n\n \n \n Y\n \n \n Y2\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n X1\n -0.94953*** (0.06652)\n -0.92405*** (0.06093)\n -0.92417*** (0.06094)\n -1.26655*** (0.17359)\n -1.23153*** (0.19228)\n -1.23100*** (0.19167)\n \n \n X2\n -0.17423*** (0.01840)\n -0.17411*** (0.01461)\n -0.18550*** (0.02516)\n -0.13056*** (0.04239)\n -0.11767*** (0.04152)\n -0.07369 (0.10356)\n \n \n X1:X2\n \n \n 0.01057 (0.01818)\n \n \n -0.04082 (0.08093)\n \n \n fe\n \n \n f2\n -\n x\n x\n -\n x\n x\n \n \n f1\n x\n x\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.48899\n 0.65904\n 0.65916\n 0.12017\n 0.17151\n 0.17180\n \n\n \n \n \n Significance levels: * p < 0.1, ** p < 0.05, *** p < 0.01. Format of coefficient cell:\nCoefficient \n (Std. Error)"
+ "text": "Significance levels and rounding\nAdditionally, we can also overwrite the defaults for the reported significance levels and control the rounding of results via the signif_code and digits function arguments:\n\npf.etable([fit1, fit2, fit3, fit4, fit5, fit6], signif_code=[0.01, 0.05, 0.1], digits=5)\n\n\n\n\n\n\n\n \n \n Y\n \n \n Y2\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n X1\n -0.94953*** (0.06652)\n -0.92405*** (0.06093)\n -0.92417*** (0.06094)\n -1.26655*** (0.17359)\n -1.23153*** (0.19228)\n -1.23100*** (0.19167)\n \n \n X2\n -0.17423*** (0.01840)\n -0.17411*** (0.01461)\n -0.18550*** (0.02516)\n -0.13056*** (0.04239)\n -0.11767*** (0.04152)\n -0.07369 (0.10356)\n \n \n X1:X2\n \n \n 0.01057 (0.01818)\n \n \n -0.04082 (0.08093)\n \n \n fe\n \n \n f1\n x\n x\n x\n x\n x\n x\n \n \n f2\n -\n x\n x\n -\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.48899\n 0.65904\n 0.65916\n 0.12017\n 0.17151\n 0.17180\n \n\n \n \n \n Significance levels: * p < 0.1, ** p < 0.05, *** p < 0.01. Format of coefficient cell:\nCoefficient \n (Std. Error)"
},
{
"objectID": "table-layout.html#other-output-formats",
"href": "table-layout.html#other-output-formats",
"title": "Regression Tables via pf.etable()",
"section": "Other output formats",
- "text": "Other output formats\nBy default, pf.etable() returns a GT object (see the Great Tables package), but you can also opt to dataframe, markdown, or latex output via the type argument.\n\n# Pandas styler output:\npf.etable(\n [fit1, fit2, fit3, fit4, fit5, fit6],\n signif_code=[0.01, 0.05, 0.1],\n digits=5,\n coef_fmt=\"b (se)\",\n type=\"df\",\n)\n\n\n\n\n \n \n \n est1\n est2\n est3\n est4\n est5\n est6\n \n \n \n \n depvar\n Y\n Y\n Y\n Y2\n Y2\n Y2\n \n \n X1\n -0.94953*** (0.06652)\n -0.92405*** (0.06093)\n -0.92417*** (0.06094)\n -1.26655*** (0.17359)\n -1.23153*** (0.19228)\n -1.23100*** (0.19167)\n \n \n X2\n -0.17423*** (0.01840)\n -0.17411*** (0.01461)\n -0.18550*** (0.02516)\n -0.13056*** (0.04239)\n -0.11767*** (0.04152)\n -0.07369 (0.10356)\n \n \n X1:X2\n \n \n 0.01057 (0.01818)\n \n \n -0.04082 (0.08093)\n \n \n f2\n -\n x\n x\n -\n x\n x\n \n \n f1\n x\n x\n x\n x\n x\n x\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.48899\n 0.65904\n 0.65916\n 0.12017\n 0.17151\n 0.17180\n \n \n\n\n\n\n\n# Markdown output:\npf.etable(\n [fit1, fit2, fit3, fit4, fit5, fit6],\n signif_code=[0.01, 0.05, 0.1],\n digits=5,\n type=\"md\",\n)\n\nindex est1 est2 est3 est4 est5 est6\n------------ ------------ ------------ ------------ ------------ ------------ ------------\ndepvar Y Y Y Y2 Y2 Y2\n------------------------------------------------------------------------------------------------\nX1 -0.94953*** -0.92405*** -0.92417*** -1.26655*** -1.23153*** -1.23100***\n (0.06652) (0.06093) (0.06094) (0.17359) (0.19228) (0.19167)\nX2 -0.17423*** -0.17411*** -0.18550*** -0.13056*** -0.11767*** -0.07369\n (0.01840) (0.01461) (0.02516) (0.04239) (0.04152) (0.10356)\nX1:X2 0.01057 -0.04082\n (0.01818) (0.08093)\n------------------------------------------------------------------------------------------------\nf2 - x x - x x\nf1 x x x x x x\n------------------------------------------------------------------------------------------------\nObservations 997 997 997 998 998 998\nS.E. type by: f1 by: f1 by: f1 by: f1 by: f1 by: f1\nR2 0.48899 0.65904 0.65916 0.12017 0.17151 0.17180\n------------------------------------------------------------------------------------------------\n\n\n\nTo obtain latex output use format = \"tex\". If you want to save the table as a tex file, you can use the filename= argument to specify the respective path where it should be saved. If you want the latex code to be displayed in the notebook, you can use the print_tex=True argument. Etable will use latex packages booktabs, threeparttable and makecell for the table layout, so don’t forget to include these packages in your latex document.\n\n# LaTex output (include latex packages booktabs, threeparttable, and makecell in your document):\ntab = pf.etable(\n [fit1, fit2, fit3, fit4, fit5, fit6],\n signif_code=[0.01, 0.05, 0.1],\n digits=2,\n type=\"tex\",\n print_tex=True,\n)\n\nThe following code generates a pdf including the regression table which you can display clicking on the link below the cell:\n\n## Use pylatex to create a tex file with the table\n\n\ndef make_pdf(tab, file):\n \"Create a PDF document with tex table.\"\n doc = pl.Document()\n doc.packages.append(pl.Package(\"booktabs\"))\n doc.packages.append(pl.Package(\"threeparttable\"))\n doc.packages.append(pl.Package(\"makecell\"))\n\n with (\n doc.create(pl.Section(\"A PyFixest LateX Table\")),\n doc.create(pl.Table(position=\"htbp\")) as table,\n ):\n table.append(pl.NoEscape(tab))\n\n doc.generate_pdf(file, clean_tex=False)\n\n\n# Compile latex to pdf & display a button with the hyperlink to the pdf\n# requires tex installation\nrun = False\nif run:\n make_pdf(tab, \"latexdocs/SampleTableDoc\")\ndisplay(FileLink(\"latexdocs/SampleTableDoc.pdf\"))\n\nlatexdocs/SampleTableDoc.pdf"
+ "text": "Other output formats\nBy default, pf.etable() returns a GT object (see the Great Tables package), but you can also opt to dataframe, markdown, or latex output via the type argument.\n\n# Pandas styler output:\npf.etable(\n [fit1, fit2, fit3, fit4, fit5, fit6],\n signif_code=[0.01, 0.05, 0.1],\n digits=5,\n coef_fmt=\"b (se)\",\n type=\"df\",\n)\n\n\n\n\n \n \n \n est1\n est2\n est3\n est4\n est5\n est6\n \n \n \n \n depvar\n Y\n Y\n Y\n Y2\n Y2\n Y2\n \n \n X1\n -0.94953*** (0.06652)\n -0.92405*** (0.06093)\n -0.92417*** (0.06094)\n -1.26655*** (0.17359)\n -1.23153*** (0.19228)\n -1.23100*** (0.19167)\n \n \n X2\n -0.17423*** (0.01840)\n -0.17411*** (0.01461)\n -0.18550*** (0.02516)\n -0.13056*** (0.04239)\n -0.11767*** (0.04152)\n -0.07369 (0.10356)\n \n \n X1:X2\n \n \n 0.01057 (0.01818)\n \n \n -0.04082 (0.08093)\n \n \n f1\n x\n x\n x\n x\n x\n x\n \n \n f2\n -\n x\n x\n -\n x\n x\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.48899\n 0.65904\n 0.65916\n 0.12017\n 0.17151\n 0.17180\n \n \n\n\n\n\n\n# Markdown output:\npf.etable(\n [fit1, fit2, fit3, fit4, fit5, fit6],\n signif_code=[0.01, 0.05, 0.1],\n digits=5,\n type=\"md\",\n)\n\nindex est1 est2 est3 est4 est5 est6\n------------ ------------ ------------ ------------ ------------ ------------ ------------\ndepvar Y Y Y Y2 Y2 Y2\n------------------------------------------------------------------------------------------------\nX1 -0.94953*** -0.92405*** -0.92417*** -1.26655*** -1.23153*** -1.23100***\n (0.06652) (0.06093) (0.06094) (0.17359) (0.19228) (0.19167)\nX2 -0.17423*** -0.17411*** -0.18550*** -0.13056*** -0.11767*** -0.07369\n (0.01840) (0.01461) (0.02516) (0.04239) (0.04152) (0.10356)\nX1:X2 0.01057 -0.04082\n (0.01818) (0.08093)\n------------------------------------------------------------------------------------------------\nf1 x x x x x x\nf2 - x x - x x\n------------------------------------------------------------------------------------------------\nObservations 997 997 997 998 998 998\nS.E. type by: f1 by: f1 by: f1 by: f1 by: f1 by: f1\nR2 0.48899 0.65904 0.65916 0.12017 0.17151 0.17180\n------------------------------------------------------------------------------------------------\n\n\n\nTo obtain latex output use format = \"tex\". If you want to save the table as a tex file, you can use the filename= argument to specify the respective path where it should be saved. If you want the latex code to be displayed in the notebook, you can use the print_tex=True argument. Etable will use latex packages booktabs, threeparttable and makecell for the table layout, so don’t forget to include these packages in your latex document.\n\n# LaTex output (include latex packages booktabs, threeparttable, and makecell in your document):\ntab = pf.etable(\n [fit1, fit2, fit3, fit4, fit5, fit6],\n signif_code=[0.01, 0.05, 0.1],\n digits=2,\n type=\"tex\",\n print_tex=True,\n)\n\nThe following code generates a pdf including the regression table which you can display clicking on the link below the cell:\n\n## Use pylatex to create a tex file with the table\n\n\ndef make_pdf(tab, file):\n \"Create a PDF document with tex table.\"\n doc = pl.Document()\n doc.packages.append(pl.Package(\"booktabs\"))\n doc.packages.append(pl.Package(\"threeparttable\"))\n doc.packages.append(pl.Package(\"makecell\"))\n\n with (\n doc.create(pl.Section(\"A PyFixest LateX Table\")),\n doc.create(pl.Table(position=\"htbp\")) as table,\n ):\n table.append(pl.NoEscape(tab))\n\n doc.generate_pdf(file, clean_tex=False)\n\n\n# Compile latex to pdf & display a button with the hyperlink to the pdf\n# requires tex installation\nrun = False\nif run:\n make_pdf(tab, \"latexdocs/SampleTableDoc\")\ndisplay(FileLink(\"latexdocs/SampleTableDoc.pdf\"))\n\nlatexdocs/SampleTableDoc.pdf"
},
{
"objectID": "table-layout.html#rename-variables",
"href": "table-layout.html#rename-variables",
"title": "Regression Tables via pf.etable()",
"section": "Rename variables",
- "text": "Rename variables\nYou can also rename variables if you want to have a more readable output. Just pass a dictionary to the labels argument. Note that interaction terms will also be relabeled using the specified labels for the interacted variables (if you want to manually relabel an interaction term differently, add it to the dictionary).\n\nlabels = {\n \"Y\": \"Wage\",\n \"Y2\": \"Wealth\",\n \"X1\": \"Age\",\n \"X2\": \"Years of Schooling\",\n \"f1\": \"Industry\",\n \"f2\": \"Year\",\n}\n\npf.etable([fit1, fit2, fit3, fit4, fit5, fit6], labels=labels)\n\n\n\n\n\n\n\n \n \n Wage\n \n \n Wealth\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n Year\n -\n x\n x\n -\n x\n x\n \n \n Industry\n x\n x\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)\n \n\n\n\n\n\n\n \n\n\nIf you want to label the rows indicating the inclusion of fixed effects not with the variable label but with a custom label, you can pass on a separate dictionary to the felabels argument.\n\npf.etable(\n [fit1, fit2, fit3, fit4, fit5, fit6],\n labels=labels,\n felabels={\"f1\": \"Industry Fixed Effects\", \"f2\": \"Year Fixed Effects\"},\n)\n\n\n\n\n\n\n\n \n \n Wage\n \n \n Wealth\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n Year Fixed Effects\n -\n x\n x\n -\n x\n x\n \n \n Industry Fixed Effects\n x\n x\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)"
+ "text": "Rename variables\nYou can also rename variables if you want to have a more readable output. Just pass a dictionary to the labels argument. Note that interaction terms will also be relabeled using the specified labels for the interacted variables (if you want to manually relabel an interaction term differently, add it to the dictionary).\n\nlabels = {\n \"Y\": \"Wage\",\n \"Y2\": \"Wealth\",\n \"X1\": \"Age\",\n \"X2\": \"Years of Schooling\",\n \"f1\": \"Industry\",\n \"f2\": \"Year\",\n}\n\npf.etable([fit1, fit2, fit3, fit4, fit5, fit6], labels=labels)\n\n\n\n\n\n\n\n \n \n Wage\n \n \n Wealth\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n Industry\n x\n x\n x\n x\n x\n x\n \n \n Year\n -\n x\n x\n -\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)\n \n\n\n\n\n\n\n \n\n\nIf you want to label the rows indicating the inclusion of fixed effects not with the variable label but with a custom label, you can pass on a separate dictionary to the felabels argument.\n\npf.etable(\n [fit1, fit2, fit3, fit4, fit5, fit6],\n labels=labels,\n felabels={\"f1\": \"Industry Fixed Effects\", \"f2\": \"Year Fixed Effects\"},\n)\n\n\n\n\n\n\n\n \n \n Wage\n \n \n Wealth\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n Industry Fixed Effects\n x\n x\n x\n x\n x\n x\n \n \n Year Fixed Effects\n -\n x\n x\n -\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)"
},
{
"objectID": "table-layout.html#custom-model-headlines",
"href": "table-layout.html#custom-model-headlines",
"title": "Regression Tables via pf.etable()",
"section": "Custom model headlines",
- "text": "Custom model headlines\nYou can also add custom headers for each model by passing a list of strings to the model_headers argument.\n\npf.etable(\n [fit1, fit2, fit3, fit4, fit5, fit6],\n labels=labels,\n model_heads=[\"US\", \"China\", \"EU\", \"US\", \"China\", \"EU\"],\n)\n\n\n\n\n\n\n\n \n \n \n \n Wage\n \n \n Wealth\n \n\n\n \n \n US\n \n \n China\n \n \n EU\n \n \n US\n \n \n China\n \n \n EU\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n Year\n -\n x\n x\n -\n x\n x\n \n \n Industry\n x\n x\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)\n \n\n\n\n\n\n\n \n\n\nOr change the ordering of headlines having headlines first and then dependent variables using the head_order argument. “hd” stands for headlines then dependent variables, “dh” for dependent variables then headlines. Assigning “d” or “h” can be used to only show dependent variables or only headlines. When head_order=“” only model numbers are shown.\n\npf.etable(\n [fit1, fit4, fit2, fit5, fit3, fit6],\n labels=labels,\n model_heads=[\"US\", \"US\", \"China\", \"China\", \"EU\", \"EU\"],\n head_order=\"hd\",\n)\n\n\n\n\n\n\n\n \n \n \n \n US\n \n \n China\n \n \n EU\n \n\n\n \n \n Wage\n \n \n Wealth\n \n \n Wage\n \n \n Wealth\n \n \n Wage\n \n \n Wealth\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -1.267*** (0.174)\n -0.924*** (0.061)\n -1.232*** (0.192)\n -0.924*** (0.061)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.131** (0.042)\n -0.174*** (0.015)\n -0.118** (0.042)\n -0.185*** (0.025)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n \n \n 0.011 (0.018)\n -0.041 (0.081)\n \n \n fe\n \n \n Year\n -\n -\n x\n x\n x\n x\n \n \n Industry\n x\n x\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 998\n 997\n 998\n 997\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.120\n 0.659\n 0.172\n 0.659\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)\n \n\n\n\n\n\n\n \n\n\nRemove the dependent variables from the headers:\n\npf.etable(\n [fit1, fit4, fit2, fit5, fit3, fit6],\n labels=labels,\n model_heads=[\"US\", \"US\", \"China\", \"China\", \"EU\", \"EU\"],\n head_order=\"\",\n)\n\n\n\n\n\n\n\n \n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -1.267*** (0.174)\n -0.924*** (0.061)\n -1.232*** (0.192)\n -0.924*** (0.061)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.131** (0.042)\n -0.174*** (0.015)\n -0.118** (0.042)\n -0.185*** (0.025)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n \n \n 0.011 (0.018)\n -0.041 (0.081)\n \n \n fe\n \n \n Year\n -\n -\n x\n x\n x\n x\n \n \n Industry\n x\n x\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 998\n 997\n 998\n 997\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.120\n 0.659\n 0.172\n 0.659\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)"
+ "text": "Custom model headlines\nYou can also add custom headers for each model by passing a list of strings to the model_headers argument.\n\npf.etable(\n [fit1, fit2, fit3, fit4, fit5, fit6],\n labels=labels,\n model_heads=[\"US\", \"China\", \"EU\", \"US\", \"China\", \"EU\"],\n)\n\n\n\n\n\n\n\n \n \n \n \n Wage\n \n \n Wealth\n \n\n\n \n \n US\n \n \n China\n \n \n EU\n \n \n US\n \n \n China\n \n \n EU\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n Industry\n x\n x\n x\n x\n x\n x\n \n \n Year\n -\n x\n x\n -\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)\n \n\n\n\n\n\n\n \n\n\nOr change the ordering of headlines having headlines first and then dependent variables using the head_order argument. “hd” stands for headlines then dependent variables, “dh” for dependent variables then headlines. Assigning “d” or “h” can be used to only show dependent variables or only headlines. When head_order=“” only model numbers are shown.\n\npf.etable(\n [fit1, fit4, fit2, fit5, fit3, fit6],\n labels=labels,\n model_heads=[\"US\", \"US\", \"China\", \"China\", \"EU\", \"EU\"],\n head_order=\"hd\",\n)\n\n\n\n\n\n\n\n \n \n \n \n US\n \n \n China\n \n \n EU\n \n\n\n \n \n Wage\n \n \n Wealth\n \n \n Wage\n \n \n Wealth\n \n \n Wage\n \n \n Wealth\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -1.267*** (0.174)\n -0.924*** (0.061)\n -1.232*** (0.192)\n -0.924*** (0.061)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.131** (0.042)\n -0.174*** (0.015)\n -0.118** (0.042)\n -0.185*** (0.025)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n \n \n 0.011 (0.018)\n -0.041 (0.081)\n \n \n fe\n \n \n Industry\n x\n x\n x\n x\n x\n x\n \n \n Year\n -\n -\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 998\n 997\n 998\n 997\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.120\n 0.659\n 0.172\n 0.659\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)\n \n\n\n\n\n\n\n \n\n\nRemove the dependent variables from the headers:\n\npf.etable(\n [fit1, fit4, fit2, fit5, fit3, fit6],\n labels=labels,\n model_heads=[\"US\", \"US\", \"China\", \"China\", \"EU\", \"EU\"],\n head_order=\"\",\n)\n\n\n\n\n\n\n\n \n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -1.267*** (0.174)\n -0.924*** (0.061)\n -1.232*** (0.192)\n -0.924*** (0.061)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.131** (0.042)\n -0.174*** (0.015)\n -0.118** (0.042)\n -0.185*** (0.025)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n \n \n 0.011 (0.018)\n -0.041 (0.081)\n \n \n fe\n \n \n Industry\n x\n x\n x\n x\n x\n x\n \n \n Year\n -\n -\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 998\n 997\n 998\n 997\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.120\n 0.659\n 0.172\n 0.659\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)"
},
{
"objectID": "table-layout.html#further-custom-model-information",
"href": "table-layout.html#further-custom-model-information",
"title": "Regression Tables via pf.etable()",
"section": "Further custom model information",
- "text": "Further custom model information\nYou can add further custom model statistics/information to the bottom of the table by using the custom_stats argument to which you pass a dictionary with the name of the row and lists of values. The length of the lists must be equal to the number of models.\n\npf.etable(\n [fit1, fit2, fit3, fit4, fit5, fit6],\n labels=labels,\n custom_model_stats={\n \"Number of Clusters\": [42, 42, 42, 37, 37, 37],\n \"Additional Info\": [\"A\", \"A\", \"B\", \"B\", \"C\", \"C\"],\n },\n)\n\n\n\n\n\n\n\n \n \n Wage\n \n \n Wealth\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n Year\n -\n x\n x\n -\n x\n x\n \n \n Industry\n x\n x\n x\n x\n x\n x\n \n \n stats\n \n \n Number of Clusters\n 42\n 42\n 42\n 37\n 37\n 37\n \n \n Additional Info\n A\n A\n B\n B\n C\n C\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)"
+ "text": "Further custom model information\nYou can add further custom model statistics/information to the bottom of the table by using the custom_stats argument to which you pass a dictionary with the name of the row and lists of values. The length of the lists must be equal to the number of models.\n\npf.etable(\n [fit1, fit2, fit3, fit4, fit5, fit6],\n labels=labels,\n custom_model_stats={\n \"Number of Clusters\": [42, 42, 42, 37, 37, 37],\n \"Additional Info\": [\"A\", \"A\", \"B\", \"B\", \"C\", \"C\"],\n },\n)\n\n\n\n\n\n\n\n \n \n Wage\n \n \n Wealth\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n Industry\n x\n x\n x\n x\n x\n x\n \n \n Year\n -\n x\n x\n -\n x\n x\n \n \n stats\n \n \n Number of Clusters\n 42\n 42\n 42\n 37\n 37\n 37\n \n \n Additional Info\n A\n A\n B\n B\n C\n C\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)"
},
{
"objectID": "table-layout.html#custom-table-notes",
"href": "table-layout.html#custom-table-notes",
"title": "Regression Tables via pf.etable()",
"section": "Custom table notes",
- "text": "Custom table notes\nYou can replace the default table notes with your own notes using the notes argument.\n\nmynotes = \"Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet.\"\npf.etable(\n [fit1, fit4, fit2, fit5, fit3, fit6],\n labels=labels,\n model_heads=[\"US\", \"US\", \"China\", \"China\", \"EU\", \"EU\"],\n head_order=\"hd\",\n notes=mynotes,\n)\n\n\n\n\n\n\n\n \n \n \n \n US\n \n \n China\n \n \n EU\n \n\n\n \n \n Wage\n \n \n Wealth\n \n \n Wage\n \n \n Wealth\n \n \n Wage\n \n \n Wealth\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -1.267*** (0.174)\n -0.924*** (0.061)\n -1.232*** (0.192)\n -0.924*** (0.061)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.131** (0.042)\n -0.174*** (0.015)\n -0.118** (0.042)\n -0.185*** (0.025)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n \n \n 0.011 (0.018)\n -0.041 (0.081)\n \n \n fe\n \n \n Year\n -\n -\n x\n x\n x\n x\n \n \n Industry\n x\n x\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 998\n 997\n 998\n 997\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.120\n 0.659\n 0.172\n 0.659\n 0.172\n \n\n \n \n \n Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet."
+ "text": "Custom table notes\nYou can replace the default table notes with your own notes using the notes argument.\n\nmynotes = \"Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet.\"\npf.etable(\n [fit1, fit4, fit2, fit5, fit3, fit6],\n labels=labels,\n model_heads=[\"US\", \"US\", \"China\", \"China\", \"EU\", \"EU\"],\n head_order=\"hd\",\n notes=mynotes,\n)\n\n\n\n\n\n\n\n \n \n \n \n US\n \n \n China\n \n \n EU\n \n\n\n \n \n Wage\n \n \n Wealth\n \n \n Wage\n \n \n Wealth\n \n \n Wage\n \n \n Wealth\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -1.267*** (0.174)\n -0.924*** (0.061)\n -1.232*** (0.192)\n -0.924*** (0.061)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.131** (0.042)\n -0.174*** (0.015)\n -0.118** (0.042)\n -0.185*** (0.025)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n \n \n 0.011 (0.018)\n -0.041 (0.081)\n \n \n fe\n \n \n Industry\n x\n x\n x\n x\n x\n x\n \n \n Year\n -\n -\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 998\n 997\n 998\n 997\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.120\n 0.659\n 0.172\n 0.659\n 0.172\n \n\n \n \n \n Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet."
},
{
"objectID": "table-layout.html#publication-ready-latex-tables",
@@ -1169,35 +1169,35 @@
"href": "table-layout.html#summarize-by-characteristics-in-columns-and-rows",
"title": "Regression Tables via pf.etable()",
"section": "Summarize by characteristics in columns and rows",
- "text": "Summarize by characteristics in columns and rows\nYou can summarize by characteristics using the bycol argument when groups are to be displayed in columns. When the number of observations is the same for all variables in a group, you can also opt to display the number of observations only once for each group byin a separate line at the bottom of the table with counts_row_below==True.\n\n# Generate some categorial data\ndata[\"country\"] = np.random.choice([\"US\", \"EU\"], data.shape[0])\ndata[\"occupation\"] = np.random.choice([\"Blue collar\", \"White collar\"], data.shape[0])\n\n# Drop nan values to have balanced data\ndata.dropna(inplace=True)\n\npf.dtable(\n data,\n vars=[\"Y\", \"Y2\", \"X1\", \"X2\"],\n labels=labels,\n bycol=[\"country\", \"occupation\"],\n stats=[\"count\", \"mean\", \"std\"],\n caption=\"Descriptive statistics\",\n stats_labels={\"count\": \"Number of observations\"},\n counts_row_below=True,\n)\n\n\n\n\n\n\n \n Descriptive statistics\n \n\n \n \n \n \n EU\n \n \n US\n \n\n\n \n \n Blue collar\n \n \n White collar\n \n \n Blue collar\n \n \n White collar\n \n\n\n Mean\n Std. Dev.\n Mean\n Std. Dev.\n Mean\n Std. Dev.\n Mean\n Std. Dev.\n\n\n\n \n stats\n \n \n Wage\n 0.12\n 2.38\n -0.28\n 2.40\n -0.24\n 2.24\n -0.13\n 2.19\n \n \n Wealth\n -0.33\n 5.61\n -0.25\n 5.36\n -0.27\n 5.69\n -0.41\n 5.71\n \n \n Age\n 1.02\n 0.79\n 1.08\n 0.80\n 1.00\n 0.81\n 1.07\n 0.83\n \n \n Years of Schooling\n -0.25\n 2.93\n -0.18\n 3.31\n -0.04\n 3.08\n -0.03\n 2.89\n \n \n nobs\n \n \n Number of observations\n 264\n \n 233\n \n 244\n \n 256\n \n \n\n \n \n \n \n \n\n\n\n\n\n\n \n\n\nYou can also use custom aggregation functions to compute further statistics or affect how statistics are presented. Pyfixest provides two such functions mean_std and mean_newline_std which compute the mean and standard deviation and display both the same cell (either with line break between them or not). This allows to have more compact tables when you want to show statistics for many characteristcs in the columns.\nYou can also hide the display of the statistics labels in the header with hide_stats_labels=True. In that case a table note will be added naming the statistics displayed using its label (if you have not provided a custom note).\n\npf.dtable(\n data,\n vars=[\"Y\", \"Y2\", \"X1\", \"X2\"],\n labels=labels,\n bycol=[\"country\", \"occupation\"],\n stats=[\"mean_newline_std\", \"count\"],\n caption=\"Descriptive statistics\",\n stats_labels={\"count\": \"Number of observations\"},\n counts_row_below=True,\n hide_stats=True,\n)\n\n\n\n\n\n\n \n Descriptive statistics\n \n\n \n \n EU\n \n \n US\n \n\n\n Blue collar\n White collar\n Blue collar\n White collar\n\n\n\n \n stats\n \n \n Wage\n 0.12(2.38)\n -0.28(2.40)\n -0.24(2.24)\n -0.13(2.19)\n \n \n Wealth\n -0.33(5.61)\n -0.25(5.36)\n -0.27(5.69)\n -0.41(5.71)\n \n \n Age\n 1.02(0.79)\n 1.08(0.80)\n 1.00(0.81)\n 1.07(0.83)\n \n \n Years of Schooling\n -0.25(2.93)\n -0.18(3.31)\n -0.04(3.08)\n -0.03(2.89)\n \n \n nobs\n \n \n Number of observations\n 264\n 233\n 244\n 256\n \n\n \n \n \n Note: Displayed statistics are Mean (Std. Dev.).\n \n\n\n\n\n\n\n \n\n\nYou can also split by characteristics in both columns and rows. Note that you can only use one grouping variable in rows, but several in columns (as shown above).\n\npf.dtable(\n data,\n vars=[\"Y\", \"Y2\", \"X1\", \"X2\"],\n labels=labels,\n bycol=[\"country\"],\n byrow=\"occupation\",\n stats=[\"count\", \"mean\", \"std\"],\n caption=\"Descriptive statistics\",\n)\n\n\n\n\n\n\n \n Descriptive statistics\n \n\n \n \n EU\n \n \n US\n \n\n\n N\n Mean\n Std. Dev.\n N\n Mean\n Std. Dev.\n\n\n\n \n Blue collar\n \n \n Wage\n 264\n 0.12\n 2.38\n 244\n -0.24\n 2.24\n \n \n Wealth\n 264\n -0.33\n 5.61\n 244\n -0.27\n 5.69\n \n \n Age\n 264\n 1.02\n 0.79\n 244\n 1.00\n 0.81\n \n \n Years of Schooling\n 264\n -0.25\n 2.93\n 244\n -0.04\n 3.08\n \n \n White collar\n \n \n Wage\n 233\n -0.28\n 2.40\n 256\n -0.13\n 2.19\n \n \n Wealth\n 233\n -0.25\n 5.36\n 256\n -0.41\n 5.71\n \n \n Age\n 233\n 1.08\n 0.80\n 256\n 1.07\n 0.83\n \n \n Years of Schooling\n 233\n -0.18\n 3.31\n 256\n -0.03\n 2.89\n \n\n \n \n \n \n \n\n\n\n\n\n\n \n\n\nAnd you can again export descriptive statistics tables also to LaTex:\n\ndtab = pf.dtable(\n data,\n vars=[\"Y\", \"Y2\", \"X1\", \"X2\"],\n labels=labels,\n bycol=[\"country\"],\n byrow=\"occupation\",\n stats=[\"count\", \"mean\", \"std\"],\n type=\"tex\",\n)\n\nrun = False\nif run:\n make_pdf(dtab, \"latexdocs/SampleTableDoc3\")\ndisplay(FileLink(\"latexdocs/SampleTableDoc3.pdf\"))\n\nlatexdocs/SampleTableDoc3.pdf"
+ "text": "Summarize by characteristics in columns and rows\nYou can summarize by characteristics using the bycol argument when groups are to be displayed in columns. When the number of observations is the same for all variables in a group, you can also opt to display the number of observations only once for each group byin a separate line at the bottom of the table with counts_row_below==True.\n\n# Generate some categorial data\ndata[\"country\"] = np.random.choice([\"US\", \"EU\"], data.shape[0])\ndata[\"occupation\"] = np.random.choice([\"Blue collar\", \"White collar\"], data.shape[0])\n\n# Drop nan values to have balanced data\ndata.dropna(inplace=True)\n\npf.dtable(\n data,\n vars=[\"Y\", \"Y2\", \"X1\", \"X2\"],\n labels=labels,\n bycol=[\"country\", \"occupation\"],\n stats=[\"count\", \"mean\", \"std\"],\n caption=\"Descriptive statistics\",\n stats_labels={\"count\": \"Number of observations\"},\n counts_row_below=True,\n)\n\n\n\n\n\n\n \n Descriptive statistics\n \n\n \n \n \n \n EU\n \n \n US\n \n\n\n \n \n Blue collar\n \n \n White collar\n \n \n Blue collar\n \n \n White collar\n \n\n\n Mean\n Std. Dev.\n Mean\n Std. Dev.\n Mean\n Std. Dev.\n Mean\n Std. Dev.\n\n\n\n \n stats\n \n \n Wage\n -0.12\n 2.30\n -0.13\n 2.32\n -0.09\n 2.32\n -0.17\n 2.30\n \n \n Wealth\n -0.09\n 5.66\n -0.50\n 5.48\n -0.47\n 5.70\n -0.22\n 5.53\n \n \n Age\n 1.07\n 0.81\n 0.98\n 0.79\n 1.04\n 0.79\n 1.08\n 0.83\n \n \n Years of Schooling\n 0.05\n 3.16\n -0.24\n 3.05\n -0.12\n 2.78\n -0.19\n 3.18\n \n \n nobs\n \n \n Number of observations\n 246\n \n 245\n \n 244\n \n 262\n \n \n\n \n \n \n \n \n\n\n\n\n\n\n \n\n\nYou can also use custom aggregation functions to compute further statistics or affect how statistics are presented. Pyfixest provides two such functions mean_std and mean_newline_std which compute the mean and standard deviation and display both the same cell (either with line break between them or not). This allows to have more compact tables when you want to show statistics for many characteristcs in the columns.\nYou can also hide the display of the statistics labels in the header with hide_stats_labels=True. In that case a table note will be added naming the statistics displayed using its label (if you have not provided a custom note).\n\npf.dtable(\n data,\n vars=[\"Y\", \"Y2\", \"X1\", \"X2\"],\n labels=labels,\n bycol=[\"country\", \"occupation\"],\n stats=[\"mean_newline_std\", \"count\"],\n caption=\"Descriptive statistics\",\n stats_labels={\"count\": \"Number of observations\"},\n counts_row_below=True,\n hide_stats=True,\n)\n\n\n\n\n\n\n \n Descriptive statistics\n \n\n \n \n EU\n \n \n US\n \n\n\n Blue collar\n White collar\n Blue collar\n White collar\n\n\n\n \n stats\n \n \n Wage\n -0.12(2.30)\n -0.13(2.32)\n -0.09(2.32)\n -0.17(2.30)\n \n \n Wealth\n -0.09(5.66)\n -0.50(5.48)\n -0.47(5.70)\n -0.22(5.53)\n \n \n Age\n 1.07(0.81)\n 0.98(0.79)\n 1.04(0.79)\n 1.08(0.83)\n \n \n Years of Schooling\n 0.05(3.16)\n -0.24(3.05)\n -0.12(2.78)\n -0.19(3.18)\n \n \n nobs\n \n \n Number of observations\n 246\n 245\n 244\n 262\n \n\n \n \n \n Note: Displayed statistics are Mean (Std. Dev.).\n \n\n\n\n\n\n\n \n\n\nYou can also split by characteristics in both columns and rows. Note that you can only use one grouping variable in rows, but several in columns (as shown above).\n\npf.dtable(\n data,\n vars=[\"Y\", \"Y2\", \"X1\", \"X2\"],\n labels=labels,\n bycol=[\"country\"],\n byrow=\"occupation\",\n stats=[\"count\", \"mean\", \"std\"],\n caption=\"Descriptive statistics\",\n)\n\n\n\n\n\n\n \n Descriptive statistics\n \n\n \n \n EU\n \n \n US\n \n\n\n N\n Mean\n Std. Dev.\n N\n Mean\n Std. Dev.\n\n\n\n \n Blue collar\n \n \n Wage\n 246\n -0.12\n 2.30\n 244\n -0.09\n 2.32\n \n \n Wealth\n 246\n -0.09\n 5.66\n 244\n -0.47\n 5.70\n \n \n Age\n 246\n 1.07\n 0.81\n 244\n 1.04\n 0.79\n \n \n Years of Schooling\n 246\n 0.05\n 3.16\n 244\n -0.12\n 2.78\n \n \n White collar\n \n \n Wage\n 245\n -0.13\n 2.32\n 262\n -0.17\n 2.30\n \n \n Wealth\n 245\n -0.50\n 5.48\n 262\n -0.22\n 5.53\n \n \n Age\n 245\n 0.98\n 0.79\n 262\n 1.08\n 0.83\n \n \n Years of Schooling\n 245\n -0.24\n 3.05\n 262\n -0.19\n 3.18\n \n\n \n \n \n \n \n\n\n\n\n\n\n \n\n\nAnd you can again export descriptive statistics tables also to LaTex:\n\ndtab = pf.dtable(\n data,\n vars=[\"Y\", \"Y2\", \"X1\", \"X2\"],\n labels=labels,\n bycol=[\"country\"],\n byrow=\"occupation\",\n stats=[\"count\", \"mean\", \"std\"],\n type=\"tex\",\n)\n\nrun = False\nif run:\n make_pdf(dtab, \"latexdocs/SampleTableDoc3\")\ndisplay(FileLink(\"latexdocs/SampleTableDoc3.pdf\"))\n\nlatexdocs/SampleTableDoc3.pdf"
},
{
"objectID": "table-layout.html#basic-usage-of-make_table",
"href": "table-layout.html#basic-usage-of-make_table",
"title": "Regression Tables via pf.etable()",
"section": "Basic Usage of make_table",
- "text": "Basic Usage of make_table\n\ndf = pd.DataFrame(np.random.randn(4, 4).round(2), columns=[\"A\", \"B\", \"C\", \"D\"])\n\n# Make Booktabs style table\npf.make_table(df=df, caption=\"This is a caption\", notes=\"These are notes\")\n\n\n\n\n\n\n \n This is a caption\n \n\n \n A\n B\n C\n D\n\n\n\n \n 0\n 1.26\n -0.82\n -1.28\n 0.29\n \n \n 1\n -0.42\n 0.24\n 0.32\n -0.58\n \n \n 2\n 0.19\n 0.72\n -1.27\n -0.07\n \n \n 3\n 0.5\n -1.17\n -0.42\n -0.74\n \n\n \n \n \n These are notes"
+ "text": "Basic Usage of make_table\n\ndf = pd.DataFrame(np.random.randn(4, 4).round(2), columns=[\"A\", \"B\", \"C\", \"D\"])\n\n# Make Booktabs style table\npf.make_table(df=df, caption=\"This is a caption\", notes=\"These are notes\")\n\n\n\n\n\n\n \n This is a caption\n \n\n \n A\n B\n C\n D\n\n\n\n \n 0\n 0.65\n 0.25\n -0.62\n -1.26\n \n \n 1\n -0.2\n -1.18\n -2.6\n 0.2\n \n \n 2\n -0.61\n 0.11\n 1.86\n 0.82\n \n \n 3\n -0.9\n 0.39\n 0.67\n -1.32\n \n\n \n \n \n These are notes"
},
{
"objectID": "table-layout.html#mutiindex-dataframes",
"href": "table-layout.html#mutiindex-dataframes",
"title": "Regression Tables via pf.etable()",
"section": "Mutiindex DataFrames",
- "text": "Mutiindex DataFrames\nWhen the respective dataframe has a mutiindex for the columns, columns spanners are generated from the index. The row index can also be a multiindex (of at most two levels). In this case the first index level is used to generate group rows (for instance using the index name as headline and separating the groups by a horizontal line) and the second index level is used to generate the row labels.\n\n# Create a multiindex dataframe with random data\nrow_index = pd.MultiIndex.from_tuples(\n [\n (\"Group 1\", \"Variable 1\"),\n (\"Group 1\", \"Variable 2\"),\n (\"Group 1\", \"Variable 3\"),\n (\"Group 2\", \"Variable 4\"),\n (\"Group 2\", \"Variable 5\"),\n (\"Group 3\", \"Variable 6\"),\n ]\n)\n\ncol_index = pd.MultiIndex.from_product([[\"A\", \"B\"], [\"X\", \"Y\"], [\"High\", \"Low\"]])\ndf = pd.DataFrame(np.random.randn(6, 8).round(3), index=row_index, columns=col_index)\n\npf.make_table(df=df, caption=\"This is a caption\", notes=\"These are notes\")\n\n\n\n\n\n\n \n This is a caption\n \n\n \n \n \n \n A\n \n \n B\n \n\n\n \n \n X\n \n \n Y\n \n \n X\n \n \n Y\n \n\n\n High\n Low\n High\n Low\n High\n Low\n High\n Low\n\n\n\n \n Group 1\n \n \n Variable 1\n -0.352\n 0.87\n 1.692\n -0.914\n 0.159\n -0.826\n 0.094\n -0.717\n \n \n Variable 2\n -0.119\n 0.226\n -1.739\n -1.611\n -1.237\n -1.428\n 0.401\n 1.572\n \n \n Variable 3\n 0.931\n -1.441\n 1.2\n -0.273\n -0.845\n 0.24\n 0.73\n 0.896\n \n \n Group 2\n \n \n Variable 4\n 0.819\n 0.163\n 2.044\n -1.354\n -0.024\n 1.31\n 0.662\n 0.082\n \n \n Variable 5\n -1.72\n -0.324\n 0.904\n -0.307\n 0.813\n -0.262\n 0.087\n -0.837\n \n \n Group 3\n \n \n Variable 6\n 0.088\n -0.125\n -1.415\n 0.153\n -0.857\n -0.816\n -0.832\n -0.516\n \n\n \n \n \n These are notes\n \n\n\n\n\n\n\n \n\n\nYou can also hide column group names: This just creates a table where variables on the second level of the row index are displayed in groups based on the first level separated by horizontal lines.\n\npf.make_table(\n df=df, caption=\"This is a caption\", notes=\"These are notes\", rgroup_display=False\n).tab_style(style=style.text(style=\"italic\"), locations=loc.body(rows=[1, 5]))\n\n\n\n\n\n\n \n This is a caption\n \n\n \n \n \n \n A\n \n \n B\n \n\n\n \n \n X\n \n \n Y\n \n \n X\n \n \n Y\n \n\n\n High\n Low\n High\n Low\n High\n Low\n High\n Low\n\n\n\n \n Group 1\n \n \n Variable 1\n -0.352\n 0.87\n 1.692\n -0.914\n 0.159\n -0.826\n 0.094\n -0.717\n \n \n Variable 2\n -0.119\n 0.226\n -1.739\n -1.611\n -1.237\n -1.428\n 0.401\n 1.572\n \n \n Variable 3\n 0.931\n -1.441\n 1.2\n -0.273\n -0.845\n 0.24\n 0.73\n 0.896\n \n \n Group 2\n \n \n Variable 4\n 0.819\n 0.163\n 2.044\n -1.354\n -0.024\n 1.31\n 0.662\n 0.082\n \n \n Variable 5\n -1.72\n -0.324\n 0.904\n -0.307\n 0.813\n -0.262\n 0.087\n -0.837\n \n \n Group 3\n \n \n Variable 6\n 0.088\n -0.125\n -1.415\n 0.153\n -0.857\n -0.816\n -0.832\n -0.516\n \n\n \n \n \n These are notes"
+ "text": "Mutiindex DataFrames\nWhen the respective dataframe has a mutiindex for the columns, columns spanners are generated from the index. The row index can also be a multiindex (of at most two levels). In this case the first index level is used to generate group rows (for instance using the index name as headline and separating the groups by a horizontal line) and the second index level is used to generate the row labels.\n\n# Create a multiindex dataframe with random data\nrow_index = pd.MultiIndex.from_tuples(\n [\n (\"Group 1\", \"Variable 1\"),\n (\"Group 1\", \"Variable 2\"),\n (\"Group 1\", \"Variable 3\"),\n (\"Group 2\", \"Variable 4\"),\n (\"Group 2\", \"Variable 5\"),\n (\"Group 3\", \"Variable 6\"),\n ]\n)\n\ncol_index = pd.MultiIndex.from_product([[\"A\", \"B\"], [\"X\", \"Y\"], [\"High\", \"Low\"]])\ndf = pd.DataFrame(np.random.randn(6, 8).round(3), index=row_index, columns=col_index)\n\npf.make_table(df=df, caption=\"This is a caption\", notes=\"These are notes\")\n\n\n\n\n\n\n \n This is a caption\n \n\n \n \n \n \n A\n \n \n B\n \n\n\n \n \n X\n \n \n Y\n \n \n X\n \n \n Y\n \n\n\n High\n Low\n High\n Low\n High\n Low\n High\n Low\n\n\n\n \n Group 1\n \n \n Variable 1\n 0.055\n 0.8\n 0.061\n -0.918\n 0.299\n 1.144\n -0.072\n 1.675\n \n \n Variable 2\n 0.144\n 0.658\n 1.282\n -1.352\n -0.461\n 0.382\n 0.431\n -0.437\n \n \n Variable 3\n -0.109\n 1.582\n 0.21\n 0.173\n 0.618\n -0.203\n -0.019\n 0.721\n \n \n Group 2\n \n \n Variable 4\n 0.195\n 1.226\n -1.197\n 0.256\n -0.88\n -1.154\n 0.823\n 0.142\n \n \n Variable 5\n -0.638\n -0.225\n -0.959\n -0.113\n -1.416\n 0.495\n -0.404\n -0.287\n \n \n Group 3\n \n \n Variable 6\n 0.551\n 0.881\n 0.448\n 0.434\n -0.538\n -1.516\n 1.135\n -0.186\n \n\n \n \n \n These are notes\n \n\n\n\n\n\n\n \n\n\nYou can also hide column group names: This just creates a table where variables on the second level of the row index are displayed in groups based on the first level separated by horizontal lines.\n\npf.make_table(\n df=df, caption=\"This is a caption\", notes=\"These are notes\", rgroup_display=False\n).tab_style(style=style.text(style=\"italic\"), locations=loc.body(rows=[1, 5]))\n\n\n\n\n\n\n \n This is a caption\n \n\n \n \n \n \n A\n \n \n B\n \n\n\n \n \n X\n \n \n Y\n \n \n X\n \n \n Y\n \n\n\n High\n Low\n High\n Low\n High\n Low\n High\n Low\n\n\n\n \n Group 1\n \n \n Variable 1\n 0.055\n 0.8\n 0.061\n -0.918\n 0.299\n 1.144\n -0.072\n 1.675\n \n \n Variable 2\n 0.144\n 0.658\n 1.282\n -1.352\n -0.461\n 0.382\n 0.431\n -0.437\n \n \n Variable 3\n -0.109\n 1.582\n 0.21\n 0.173\n 0.618\n -0.203\n -0.019\n 0.721\n \n \n Group 2\n \n \n Variable 4\n 0.195\n 1.226\n -1.197\n 0.256\n -0.88\n -1.154\n 0.823\n 0.142\n \n \n Variable 5\n -0.638\n -0.225\n -0.959\n -0.113\n -1.416\n 0.495\n -0.404\n -0.287\n \n \n Group 3\n \n \n Variable 6\n 0.551\n 0.881\n 0.448\n 0.434\n -0.538\n -1.516\n 1.135\n -0.186\n \n\n \n \n \n These are notes"
},
{
"objectID": "table-layout.html#example-styling",
"href": "table-layout.html#example-styling",
"title": "Regression Tables via pf.etable()",
"section": "Example Styling",
- "text": "Example Styling\n\n(\n pf.etable([fit1, fit2, fit3, fit4, fit5, fit6])\n .tab_options(\n column_labels_background_color=\"cornsilk\",\n stub_background_color=\"whitesmoke\",\n )\n .tab_style(\n style=style.fill(color=\"mistyrose\"),\n locations=loc.body(columns=\"(3)\", rows=[\"X2\"]),\n )\n)\n\n\n\n\n\n\n\n \n \n Y\n \n \n Y2\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n X1\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n X2\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n X1:X2\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n f2\n -\n x\n x\n -\n x\n x\n \n \n f1\n x\n x\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)"
+ "text": "Example Styling\n\n(\n pf.etable([fit1, fit2, fit3, fit4, fit5, fit6])\n .tab_options(\n column_labels_background_color=\"cornsilk\",\n stub_background_color=\"whitesmoke\",\n )\n .tab_style(\n style=style.fill(color=\"mistyrose\"),\n locations=loc.body(columns=\"(3)\", rows=[\"X2\"]),\n )\n)\n\n\n\n\n\n\n\n \n \n Y\n \n \n Y2\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n X1\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n X2\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n X1:X2\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n f1\n x\n x\n x\n x\n x\n x\n \n \n f2\n -\n x\n x\n -\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)"
},
{
"objectID": "table-layout.html#defining-table-styles-some-examples",
"href": "table-layout.html#defining-table-styles-some-examples",
"title": "Regression Tables via pf.etable()",
"section": "Defining Table Styles: Some Examples",
- "text": "Defining Table Styles: Some Examples\nYou can easily define table styles that you can apply to all tables in your project. Just define a dictionary with the respective values for the tab options (see the Great Tables documentation) and use the style with .tab_options(**style_dict).\n\nstyle_print = {\n \"table_font_size\": \"12px\",\n \"heading_title_font_size\": \"12px\",\n \"source_notes_font_size\": \"8px\",\n \"data_row_padding\": \"3px\",\n \"column_labels_padding\": \"3px\",\n \"row_group_border_top_style\": \"hidden\",\n \"table_body_border_top_style\": \"None\",\n \"table_body_border_bottom_width\": \"1px\",\n \"column_labels_border_top_width\": \"1px\",\n \"table_width\": \"14cm\",\n}\n\n\nstyle_presentation = {\n \"table_font_size\": \"16px\",\n \"table_font_color_light\": \"white\",\n \"table_body_border_top_style\": \"hidden\",\n \"table_body_border_bottom_style\": \"hidden\",\n \"heading_title_font_size\": \"18px\",\n \"source_notes_font_size\": \"12px\",\n \"data_row_padding\": \"3px\",\n \"column_labels_padding\": \"6px\",\n \"column_labels_background_color\": \"midnightblue\",\n \"stub_background_color\": \"whitesmoke\",\n \"row_group_background_color\": \"whitesmoke\",\n \"table_background_color\": \"whitesmoke\",\n \"heading_background_color\": \"white\",\n \"source_notes_background_color\": \"white\",\n \"column_labels_border_bottom_color\": \"white\",\n \"column_labels_font_weight\": \"bold\",\n \"row_group_font_weight\": \"bold\",\n \"table_width\": \"18cm\",\n}\n\n\nt1 = pf.dtable(\n data,\n vars=[\"Y\", \"Y2\", \"X1\", \"X2\"],\n stats=[\"count\", \"mean\", \"std\", \"min\", \"max\"],\n labels=labels,\n caption=\"Descriptive statistics\",\n)\n\nt2 = pf.etable(\n [fit1, fit2, fit3, fit4, fit5, fit6],\n labels=labels,\n show_se=False,\n felabels={\"f1\": \"Industry Fixed Effects\", \"f2\": \"Year Fixed Effects\"},\n caption=\"Regression results\",\n)\n\n\ndisplay(t1.tab_options(**style_print))\ndisplay(t2.tab_options(**style_print))\n\n\n\n\n\n\n \n Descriptive statistics\n \n\n \n N\n Mean\n Std. Dev.\n Min\n Max\n\n\n\n \n Wage\n 997\n -0.13\n 2.31\n -6.54\n 6.91\n \n \n Wealth\n 997\n -0.32\n 5.59\n -16.97\n 17.16\n \n \n Age\n 997\n 1.04\n 0.81\n 0.00\n 2.00\n \n \n Years of Schooling\n 997\n -0.13\n 3.05\n -9.67\n 10.99\n \n\n \n \n \n \n \n\n\n\n\n\n\n \n\n\n\n\n\n\n\n \n Regression results\n \n\n \n \n Wage\n \n \n Wealth\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n Year Fixed Effects\n -\n x\n x\n -\n x\n x\n \n \n Industry Fixed Effects\n x\n x\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)\n \n\n\n\n\n\n\n \n\n\n\nstyle_printDouble = {\n \"table_font_size\": \"12px\",\n \"heading_title_font_size\": \"12px\",\n \"source_notes_font_size\": \"8px\",\n \"data_row_padding\": \"3px\",\n \"column_labels_padding\": \"3px\",\n \"table_body_border_bottom_style\": \"double\",\n \"column_labels_border_top_style\": \"double\",\n \"column_labels_border_bottom_width\": \"0.5px\",\n \"row_group_border_top_style\": \"hidden\",\n \"table_body_border_top_style\": \"None\",\n \"table_width\": \"14cm\",\n}\ndisplay(t1.tab_options(**style_printDouble))\ndisplay(t2.tab_options(**style_printDouble))\n\n\n\n\n\n\n \n Descriptive statistics\n \n\n \n N\n Mean\n Std. Dev.\n Min\n Max\n\n\n\n \n Wage\n 997\n -0.13\n 2.31\n -6.54\n 6.91\n \n \n Wealth\n 997\n -0.32\n 5.59\n -16.97\n 17.16\n \n \n Age\n 997\n 1.04\n 0.81\n 0.00\n 2.00\n \n \n Years of Schooling\n 997\n -0.13\n 3.05\n -9.67\n 10.99\n \n\n \n \n \n \n \n\n\n\n\n\n\n \n\n\n\n\n\n\n\n \n Regression results\n \n\n \n \n Wage\n \n \n Wealth\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n Year Fixed Effects\n -\n x\n x\n -\n x\n x\n \n \n Industry Fixed Effects\n x\n x\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)"
+ "text": "Defining Table Styles: Some Examples\nYou can easily define table styles that you can apply to all tables in your project. Just define a dictionary with the respective values for the tab options (see the Great Tables documentation) and use the style with .tab_options(**style_dict).\n\nstyle_print = {\n \"table_font_size\": \"12px\",\n \"heading_title_font_size\": \"12px\",\n \"source_notes_font_size\": \"8px\",\n \"data_row_padding\": \"3px\",\n \"column_labels_padding\": \"3px\",\n \"row_group_border_top_style\": \"hidden\",\n \"table_body_border_top_style\": \"None\",\n \"table_body_border_bottom_width\": \"1px\",\n \"column_labels_border_top_width\": \"1px\",\n \"table_width\": \"14cm\",\n}\n\n\nstyle_presentation = {\n \"table_font_size\": \"16px\",\n \"table_font_color_light\": \"white\",\n \"table_body_border_top_style\": \"hidden\",\n \"table_body_border_bottom_style\": \"hidden\",\n \"heading_title_font_size\": \"18px\",\n \"source_notes_font_size\": \"12px\",\n \"data_row_padding\": \"3px\",\n \"column_labels_padding\": \"6px\",\n \"column_labels_background_color\": \"midnightblue\",\n \"stub_background_color\": \"whitesmoke\",\n \"row_group_background_color\": \"whitesmoke\",\n \"table_background_color\": \"whitesmoke\",\n \"heading_background_color\": \"white\",\n \"source_notes_background_color\": \"white\",\n \"column_labels_border_bottom_color\": \"white\",\n \"column_labels_font_weight\": \"bold\",\n \"row_group_font_weight\": \"bold\",\n \"table_width\": \"18cm\",\n}\n\n\nt1 = pf.dtable(\n data,\n vars=[\"Y\", \"Y2\", \"X1\", \"X2\"],\n stats=[\"count\", \"mean\", \"std\", \"min\", \"max\"],\n labels=labels,\n caption=\"Descriptive statistics\",\n)\n\nt2 = pf.etable(\n [fit1, fit2, fit3, fit4, fit5, fit6],\n labels=labels,\n show_se=False,\n felabels={\"f1\": \"Industry Fixed Effects\", \"f2\": \"Year Fixed Effects\"},\n caption=\"Regression results\",\n)\n\n\ndisplay(t1.tab_options(**style_print))\ndisplay(t2.tab_options(**style_print))\n\n\n\n\n\n\n \n Descriptive statistics\n \n\n \n N\n Mean\n Std. Dev.\n Min\n Max\n\n\n\n \n Wage\n 997\n -0.13\n 2.31\n -6.54\n 6.91\n \n \n Wealth\n 997\n -0.32\n 5.59\n -16.97\n 17.16\n \n \n Age\n 997\n 1.04\n 0.81\n 0.00\n 2.00\n \n \n Years of Schooling\n 997\n -0.13\n 3.05\n -9.67\n 10.99\n \n\n \n \n \n \n \n\n\n\n\n\n\n \n\n\n\n\n\n\n\n \n Regression results\n \n\n \n \n Wage\n \n \n Wealth\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n Industry Fixed Effects\n x\n x\n x\n x\n x\n x\n \n \n Year Fixed Effects\n -\n x\n x\n -\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)\n \n\n\n\n\n\n\n \n\n\n\nstyle_printDouble = {\n \"table_font_size\": \"12px\",\n \"heading_title_font_size\": \"12px\",\n \"source_notes_font_size\": \"8px\",\n \"data_row_padding\": \"3px\",\n \"column_labels_padding\": \"3px\",\n \"table_body_border_bottom_style\": \"double\",\n \"column_labels_border_top_style\": \"double\",\n \"column_labels_border_bottom_width\": \"0.5px\",\n \"row_group_border_top_style\": \"hidden\",\n \"table_body_border_top_style\": \"None\",\n \"table_width\": \"14cm\",\n}\ndisplay(t1.tab_options(**style_printDouble))\ndisplay(t2.tab_options(**style_printDouble))\n\n\n\n\n\n\n \n Descriptive statistics\n \n\n \n N\n Mean\n Std. Dev.\n Min\n Max\n\n\n\n \n Wage\n 997\n -0.13\n 2.31\n -6.54\n 6.91\n \n \n Wealth\n 997\n -0.32\n 5.59\n -16.97\n 17.16\n \n \n Age\n 997\n 1.04\n 0.81\n 0.00\n 2.00\n \n \n Years of Schooling\n 997\n -0.13\n 3.05\n -9.67\n 10.99\n \n\n \n \n \n \n \n\n\n\n\n\n\n \n\n\n\n\n\n\n\n \n Regression results\n \n\n \n \n Wage\n \n \n Wealth\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n Industry Fixed Effects\n x\n x\n x\n x\n x\n x\n \n \n Year Fixed Effects\n -\n x\n x\n -\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)"
},
{
"objectID": "reference/estimation.feols_compressed_.FeolsCompressed.html",
diff --git a/table-layout.html b/table-layout.html
index 55bd8032..82f6377d 100644
--- a/table-layout.html
+++ b/table-layout.html
@@ -245,7 +245,7 @@ Regression Tables via pf.etable()
Table Layout with PyFixest
Pyfixest comes with functions to generate publication-ready tables. Regression tables are generated with pf.etable()
, which can output different formats, for instance using the Great Tables package or generating formatted LaTex Tables using booktabs. There are also further functions pf.dtable()
to display descriptive statistics and pf.make_table()
generating formatted tables from pandas dataframes in the same layout.
To begin, we load some libraries and fit a set of regression models.
-
+
import numpy as np
import pandas as pd
import pylatex as pl # for the latex table; note: not a dependency of pyfixest - needs manual installation
@@ -267,7 +267,7 @@ Table Layout wi
= pf.feols("Y2 ~ X1 *X2 | f1 + f2", data=data) fit6
-
+
@@ -301,7 +301,7 @@ Table Layout wi
-
+
@@ -338,55 +338,55 @@ Table Layout wi
Basic Usage
We can compare all regression models via the pyfixest-internal pf.etable()
function:
-
+
pf.etable([fit1, fit2, fit3, fit4, fit5, fit6])
-
+
@@ -445,20 +445,20 @@ Basic Usage
fe
- f2
- -
+ f1
x
x
- -
x
x
-
-
- f1
x
x
+
+
+ f2
+ -
x
x
+ -
x
x
@@ -510,55 +510,55 @@ Basic Usage
You can also estimate and display multiple regressions with one line of code using the (py)fixest stepwise notation:
-
+
"Y+Y2~csw(X1,X2,X1:X2)", data=data)) pf.etable(pf.feols(
-
+
@@ -673,55 +673,55 @@ Basic Usage
Keep and drop variables
etable
allows us to do a few things out of the box. For example, we can only keep the variables that we’d like, which keeps all variables that fit the provided regex match.
-
+
="X1") pf.etable([fit1, fit2, fit3, fit4, fit5, fit6], keep
-
+
@@ -771,20 +771,20 @@ Keep and drop vari
fe
- f2
- -
+ f1
x
x
- -
x
x
-
-
- f1
x
x
+
+
+ f2
+ -
x
x
+ -
x
x
@@ -836,55 +836,55 @@ Keep and drop vari
We can use the exact_match
argument to select a specific set of variables:
-
+
=["X1", "X2"], exact_match=True) pf.etable([fit1, fit2, fit3, fit4, fit5, fit6], keep
-
+
@@ -934,20 +934,20 @@ Keep and drop vari
fe
- f2
- -
+ f1
x
x
- -
x
x
-
-
- f1
x
x
+
+
+ f2
+ -
x
x
+ -
x
x
@@ -999,55 +999,55 @@ Keep and drop vari
We can also easily drop variables via the drop
argument:
-
+
=["X1"]) pf.etable([fit1, fit2, fit3, fit4, fit5, fit6], drop
-
+
@@ -1088,20 +1088,20 @@ Keep and drop vari
fe
- f2
- -
+ f1
x
x
- -
x
x
-
-
- f1
x
x
+
+
+ f2
+ -
x
x
+ -
x
x
@@ -1156,55 +1156,55 @@ Keep and drop vari
Hide fixed effects or SE-type rows
We can hide the rows showing the relevant fixed effects and those showing the S.E. type by setting show_fe=False
and show_setype=False
(for instance when the set of fixed effects or the estimation method for the std. errors is the same for all models and you want to describe this in the text or table notes rather than displaying it in the table).
-
+
=False, show_se_type=False) pf.etable([fit1, fit2, fit3, fit4, fit5, fit6], show_fe
-
+
@@ -1301,55 +1301,55 @@ Hide fi
Display p-values or confidence intervals
By default, pf.etable()
reports standard errors. But we can also ask to output p-values or confidence intervals via the coef_fmt
function argument.
-
+
="b \n (se) \n [p]") pf.etable([fit1, fit2, fit3, fit4, fit5, fit6], coef_fmt
-
+
@@ -1408,20 +1408,20 @@ D
fe
- f2
- -
+ f1
x
x
- -
x
x
-
-
- f1
x
x
+
+
+ f2
+ -
x
x
+ -
x
x
@@ -1477,55 +1477,55 @@ D
Significance levels and rounding
Additionally, we can also overwrite the defaults for the reported significance levels and control the rounding of results via the signif_code
and digits
function arguments:
-
+
=[0.01, 0.05, 0.1], digits=5) pf.etable([fit1, fit2, fit3, fit4, fit5, fit6], signif_code
-
+
@@ -1584,20 +1584,20 @@ Significa
fe
- f2
- -
+ f1
x
x
- -
x
x
-
-
- f1
x
x
+
+
+ f2
+ -
x
x
+ -
x
x
@@ -1652,7 +1652,7 @@ Significa
Other output formats
By default, pf.etable()
returns a GT object (see the Great Tables package), but you can also opt to dataframe, markdown, or latex output via the type
argument.
-
+
# Pandas styler output:
pf.etable(
@@ -1714,20 +1714,20 @@ [fit1, fit2, fit3, fit4, fit5, fit6],Other output formats<
-0.04082 (0.08093)
-
Cluster-Robust Errors
We conclude with cluster robust errors.
-= pf.feols(fml="Y ~ X1 + X2 | f1 + f2", data=data, vcov={"CRV1": "f1"})
fit = pf.feols(
fit_weights ="Y ~ X1 + X2 | f1 + f2", data=data, vcov={"CRV1": "f1"}, weights="weights"
@@ -821,14 +821,14 @@ fmlCluster-Robust Error
- stats.vcov(r_fit) fit._vcov
array([[ 4.20670443e-16, -6.97565513e-17],
[-6.97565513e-17, -1.42166010e-17]])
- stats.vcov(r_fit_weights) fit_weights._vcov
array([[2.59070109e-16, 4.07324592e-16],
@@ -836,7 +836,7 @@ Cluster-Robust Error
We conclude by comparing all estimation results via the tidy
methods:
fit.tidy()
Cluster-Robust Error
pd.DataFrame(broom.tidy_fixest(r_fit)).T
Cluster-Robust Error
fit_weights.tidy()
Cluster-Robust Error
pd.DataFrame(broom.tidy_fixest(r_fit_weights)).T
Cluster-Robust Error
Poisson Regression
-
+
= pf.get_data(model="Fepois") data
-
+
= pf.fepois(fml="Y ~ X1 + X2 | f1 + f2", data=data, vcov="iid", iwls_tol=1e-10)
fit_iid = pf.fepois(
fit_hetero ="Y ~ X1 + X2 | f1 + f2", data=data, vcov="hetero", iwls_tol=1e-10
@@ -1065,21 +1065,21 @@ fmlPoisson Regression
-
+
- stats.vcov(fit_r_iid) fit_iid._vcov
array([[ 1.20791284e-08, -6.55604931e-10],
[-6.55604931e-10, 1.69958097e-09]])
-
+
- stats.vcov(fit_r_hetero) fit_hetero._vcov
array([[ 2.18101847e-08, -7.38711972e-10],
[-7.38711972e-10, 3.07587753e-09]])
-
+
- stats.vcov(fit_r_crv) fit_crv._vcov
array([[ 1.58300904e-08, -1.20806815e-10],
@@ -1087,7 +1087,7 @@ Poisson Regression
We conclude by comparing all estimation results via the tidy
methods:
-
+
fit_iid.tidy()
@@ -1139,7 +1139,7 @@ Poisson Regression
-
+
pd.DataFrame(broom.tidy_fixest(fit_r_iid)).T
@@ -1179,7 +1179,7 @@ Poisson Regression
-
+
fit_hetero.tidy()
@@ -1231,7 +1231,7 @@ Poisson Regression
-
+
pd.DataFrame(broom.tidy_fixest(fit_r_hetero)).T
@@ -1271,7 +1271,7 @@ Poisson Regression
-
+
fit_crv.tidy()
@@ -1323,7 +1323,7 @@ Poisson Regression
-
+
pd.DataFrame(broom.tidy_fixest(fit_r_crv)).T
diff --git a/difference-in-differences.html b/difference-in-differences.html
index 39e70f54..6e6d95c5 100644
--- a/difference-in-differences.html
+++ b/difference-in-differences.html
@@ -257,7 +257,7 @@ Difference-in-Differences Estimation
See also NBER SI methods lectures on Linear Panel Event Studies.
Setup
-
+
from importlib import resources
import pandas as pd
@@ -272,7 +272,7 @@ Setup
%autoreload 2
-
+
@@ -306,7 +306,7 @@ Setup
-
+
-pyfixest: 0.25.3
-pandas : 2.2.3
+pandas : 2.2.3
+pyfixest: 0.25.3
-
+
# one-shot adoption data - parallel trends is true
= get_sharkfin()
df_one_cohort df_one_cohort.head()
@@ -410,7 +410,7 @@ Setup
-
+
# multi-cohort adoption data
= pd.read_csv(
df_multi_cohort "pyfixest.did.data").joinpath("df_het.csv")
@@ -536,7 +536,7 @@ resources.files(Setup
Examining Treatment Timing
Before any DiD estimation, we need to examine the treatment timing, since it is crucial to our choice of estimator.
-
+
pf.panelview(
df_one_cohort,="unit",
@@ -557,7 +557,7 @@ unitExamining Treat
-
+
pf.panelview(
df_multi_cohort,="unit",
@@ -580,7 +580,7 @@ unitExamining Treat
We immediately see that we have staggered adoption of treatment in the second case, which implies that a naive application of 2WFE might yield biased estimates under substantial effect heterogeneity.
We can also plot treatment assignment in a disaggregated fashion, which gives us a sense of cohort sizes.
-
+
pf.panelview(
df_multi_cohort,="unit",
@@ -604,7 +604,7 @@ unitExamining Treat
Inspecting the Outcome Variable
pf.panelview()
further allows us to inspect the “outcome” variable over time:
-
+
pf.panelview(
df_multi_cohort,="dep_var",
@@ -625,7 +625,7 @@ outcomeInspecting
We immediately see that the first cohort is switched into treatment in 2000, while the second cohort is switched into treatment by 2010. Before each cohort is switched into treatment, the trends are parallel.
We can additionally inspect individual units by dropping the collapse_to_cohort argument. Because we have a large sample, we might want to inspect only a subset of units.
-
+
pf.panelview(
df_multi_cohort,="dep_var",
@@ -647,7 +647,7 @@ outcomeInspecting
One-shot adoption: Static and Dynamic Specifications
After taking a first look at the data, let’s turn to estimation. We return to the df_one_cohort
data set (without staggered treatment rollout).
-
+
= pf.feols(
fit_static_twfe "Y ~ treat | unit + year",
@@ -670,14 +670,14 @@ df_one_cohort,
+
= pf.feols(
fit_dynamic_twfe "Y ~ i(year, ever_treated, ref = 14) | unit + year",
df_one_cohort,={"CRV1": "unit"},
vcov )
-
+
fit_dynamic_twfe.iplot(=False,
coord_flip="Event Study",
@@ -687,7 +687,7 @@ title=rename_event_study_coefs(fit_dynamic_twfe._coefnames),
)
labels
-
+
-
+
fit_lpdid.iplot(=False,
coord_flip="Local-Projections-Estimator",
@@ -1166,7 +1166,7 @@ titleLocal Project
=18.5,
xintercept ).show()
-
+
@@ -297,7 +297,7 @@ Marginal Effects and Hypothesis Tests via marginaleffect
-
+
@@ -390,7 +390,7 @@ Marginal Effects and Hypothesis Tests via marginaleffect
Suppose we were interested in testing the hypothesis that \(X_{1} = X_{2}\). Given the relatively large differences in coefficients and small standard errors, we will likely reject the null that the two parameters are equal.
We can run the formal test via the hypotheses
function from the marginaleffects
package.
-
+
"X1 - X2 = 0") hypotheses(fit,
@@ -546,7 +546,7 @@ PyFixest 0.18.0
Additionally, model_matrix_fixest
now returns a dictionary instead of a tuple.
Brings back fixed effects reference setting via i(var1, var2, ref)
syntax. Deprecates the i_ref1
, i_ref2
function arguments. I.e. it is again possible to e.g. run
-
+
import pyfixest as pf
= pf.get_data()
data
@@ -554,7 +554,7 @@ PyFixest 0.18.0
0:8] fit1.coef()[
Via the ref
syntax, via can set the reference level:
-
+
= pf.feols("Y ~ i(f1, X2, ref = 1)", data=data)
fit2 0:8] fit2.coef()[
@@ -563,7 +563,7 @@ PyFixest 0.18.0
PyFixest 0.17.0
Restructures the codebase and reorganizes how users can interact with the pyfixest
API. It is now recommended to use pyfixest
in the following way:
-
+
import numpy as np
import pyfixest as pf
= pf.get_data()
@@ -631,7 +631,7 @@ data PyFixest 0.17.0
The update should not inroduce any breaking changes. Thanks to @Wenzhi-Ding for the PR!
Adds support for simultaneous confidence intervals via a multiplier bootstrap. Thanks to @apoorvalal for the contribution!
-
+
= True) fit.confint(joint
@@ -648,18 +648,18 @@ PyFixest 0.17.0
Intercept
-0.380105
-1.177593
+0.375929
+1.181769
D
--1.759120
--1.046114
+-1.762853
+-1.042381
f1
--0.014097
-0.023645
+-0.014294
+0.023843
@@ -668,7 +668,7 @@ PyFixest 0.17.0
Adds support for the causal cluster variance estimator by Abadie et al. (QJE, 2023) for OLS via the .ccv()
method.
-
+
= "D", cluster = "group_id") fit.ccv(treatment
/home/runner/work/pyfixest/pyfixest/pyfixest/estimation/feols_.py:1384: UserWarning: The initial model was not clustered. CRV1 inference is computed and stored in the model object.
@@ -694,11 +694,11 @@ PyFixest 0.17.0
CCV
-1.4026168622179929
-0.28043
--5.001663
-0.000093
--1.991779
--0.813455
+0.238985
+-5.869057
+0.000015
+-1.904706
+-0.900528
CRV1
@@ -740,7 +740,7 @@ PyFixest 0.14.0
- Changes all docstrings to
numpy
format.
- Difference-in-differences estimation functions now need to be imported via the
pyfixest.did.estimation
module:
-
+
from pyfixest.did.estimation import did2s, lpdid, event_study
diff --git a/pyfixest.html b/pyfixest.html
index 2173ab59..c92a31d5 100644
--- a/pyfixest.html
+++ b/pyfixest.html
@@ -187,10 +187,11 @@
PyFixest: Fast High-Dimensional Fixed Effects Regression in Python
-
+
PyFixest
is a Python implementation of the formidable fixest package for fast high-dimensional fixed effects regression.
The package aims to mimic fixest
syntax and functionality as closely as Python allows: if you know fixest
well, the goal is that you won’t have to read the docs to get started! In particular, this means that all of fixest's
defaults are mirrored by PyFixest
- currently with only one small exception.
Nevertheless, for a quick introduction, you can take a look at the quickstart or the regression chapter of Arthur Turrell’s book on Coding for Economists.
+For questions on PyFixest
, head on over to our PyFixest Discourse forum.
Features
diff --git a/quarto_example/QuartoExample.pdf b/quarto_example/QuartoExample.pdf
index 375fab6a..f16663ee 100644
Binary files a/quarto_example/QuartoExample.pdf and b/quarto_example/QuartoExample.pdf differ
diff --git a/quickstart.html b/quickstart.html
index 96c32ad2..e91a8996 100644
--- a/quickstart.html
+++ b/quickstart.html
@@ -281,7 +281,7 @@ What is a fix
Read Sample Data
In a first step, we load the module and some synthetic example data:
-
+
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
@@ -302,7 +302,7 @@ Read Sample Data
data.head()
-
+
@@ -336,7 +336,7 @@ Read Sample Data
-
+
@@ -370,7 +370,7 @@ Read Sample Data
-
+
-numpy : 1.26.4
+pandas : 2.2.3
+numpy : 1.26.4
pyfixest : 0.25.3
-pandas : 2.2.3
matplotlib: 3.9.2
@@ -507,7 +507,7 @@ Read Sample Data
-
+
data.info()
<class 'pandas.core.frame.DataFrame'>
@@ -535,7 +535,7 @@ Read Sample Data
OLS Estimation
We are interested in the relation between the dependent variable Y
and the independent variables X1
using a fixed effect model for group_id
. Let’s see how the data looks like:
-
+
= data.plot(kind="scatter", x="X1", y="Y", c="group_id", colormap="viridis") ax
@@ -546,7 +546,7 @@ OLS Estimation
We can estimate a fixed effects regression via the feols()
function. feols()
has three arguments: a two-sided model formula, the data, and optionally, the type of inference.
-
+
= pf.feols(fml="Y ~ X1 | group_id", data=data, vcov="HC1")
fit type(fit)
@@ -559,7 +559,7 @@ OLS Estimation
Inspecting Model Results
To inspect the results, we can use a summary function or method:
-
+
fit.summary()
###
@@ -577,55 +577,55 @@ Inspecting Model
Or display a formatted regression table:
-
+
pf.etable(fit)
-
+
@@ -688,7 +688,7 @@ Inspecting Model
Alternatively, the .summarize
module contains a summary
function, which can be applied on instances of regression model objects or lists of regression model objects. For details on how to customize etable()
, please take a look at the dedicated vignette.
-
+
pf.summary(fit)
###
@@ -706,7 +706,7 @@ Inspecting Model
You can access individual elements of the summary via dedicated methods: .tidy()
returns a “tidy” pd.DataFrame
, .coef()
returns estimated parameters, and se()
estimated standard errors. Other methods include pvalue()
, confint()
and tstat()
.
-
+
fit.tidy()
@@ -749,7 +749,7 @@ Inspecting Model
-
+
fit.coef()
Coefficient
@@ -757,7 +757,7 @@ Inspecting Model
Name: Estimate, dtype: float64
-
+
fit.se()
Coefficient
@@ -765,7 +765,7 @@ Inspecting Model
Name: Std. Error, dtype: float64
-
+
fit.tstat()
Coefficient
@@ -773,7 +773,7 @@ Inspecting Model
Name: t value, dtype: float64
-
+
fit.confint()
@@ -800,11 +800,11 @@ Inspecting Model
Last, model results can be visualized via dedicated methods for plotting:
-
+
fit.coefplot()# or pf.coefplot([fit])
-
+
@@ -522,7 +522,7 @@ Examples
-
+
@@ -671,7 +671,7 @@ Examples
In a first step, we estimate a classical event study model:
-
+
# estimate the model
= pf.did2s(
fit
@@ -761,10 +761,10 @@ df_het,Examples
We can also inspect the model visually:
-
+
= [1200, 400], coord_flip=False).show() fit.iplot(figsize
-
+
@@ -545,7 +545,7 @@ Examples
-
+
diff --git a/reference/did.estimation.lpdid.html b/reference/did.estimation.lpdid.html
index 6729fcf2..d210e196 100644
--- a/reference/did.estimation.lpdid.html
+++ b/reference/did.estimation.lpdid.html
@@ -505,7 +505,7 @@ Returns
Examples
-
+
import pandas as pd
import pyfixest as pf
@@ -528,7 +528,7 @@ Examples
= [1200, 400], coord_flip=False).show() fit.iplot(figsize
-
+
@@ -562,7 +562,7 @@ Examples
-
+
-
+
@@ -606,7 +606,7 @@ Examples
-
+
@@ -656,7 +656,7 @@ Examples
Calling feols()
returns an instance of the [Feols(/reference/Feols.qmd) class. The summary()
method can be used to print the results.
An alternative way to retrieve model results is via the tidy()
method, which returns a pandas dataframe with the estimated coefficients, standard errors, t-statistics, and p-values.
-
+
fit.tidy()
@@ -710,17 +710,17 @@ Examples
You can also access all elements in the tidy data frame by dedicated methods, e.g. fit.coef()
for the coefficients, fit.se()
for the standard errors, fit.tstat()
for the t-statistics, and fit.pval()
for the p-values, and fit.confint()
for the confidence intervals.
The employed type of inference can be specified via the vcov
argument. If vcov is not provided, PyFixest
employs the fixest
default of iid inference, unless there are fixed effects in the model, in which case feols()
clusters the standard error by the first fixed effect (CRV1 inference).
-
+
= pf.feols("Y ~ X1 + X2 | f1 + f2", data, vcov="iid")
fit1 = pf.feols("Y ~ X1 + X2 | f1 + f2", data, vcov="hetero")
fit2 = pf.feols("Y ~ X1 + X2 | f1 + f2", data, vcov={"CRV1": "f1"}) fit3
Supported inference types are “iid”, “hetero”, “HC1”, “HC2”, “HC3”, and “CRV1”/“CRV3”. Clustered standard errors are specified via a dictionary, e.g. {"CRV1": "f1"}
for CRV1 inference with clustering by f1
or {"CRV3": "f1"}
for CRV3 inference with clustering by f1
. For two-way clustering, you can provide a formula string, e.g. {"CRV1": "f1 + f2"}
for CRV1 inference with clustering by f1
.
-
+
= pf.feols("Y ~ X1 + X2 | f1 + f2", data, vcov={"CRV1": "f1 + f2"}) fit4
Inference can be adjusted post estimation via the vcov
method:
-
+
fit.summary()"iid").summary() fit.vcov(
@@ -754,7 +754,7 @@ Examples
The ssc
argument specifies the small sample correction for inference. In general, feols()
uses all of fixest::feols()
defaults, but sets the fixef.K
argument to "none"
whereas the fixest::feols()
default is "nested"
. See here for more details: link to github.
feols()
supports a range of multiple estimation syntax, i.e. you can estimate multiple models in one call. The following example estimates two models, one with fixed effects for f1
and one with fixed effects for f2
using the sw()
syntax.
-
+
= pf.feols("Y ~ X1 + X2 | sw(f1, f2)", data)
fit type(fit)
@@ -762,55 +762,55 @@ Examples
The returned object is an instance of the FixestMulti
class. You can access the results of the first model via fit.fetch_model(0)
and the results of the second model via fit.fetch_model(1)
. You can compare the model results via the etable()
function:
-
+
pf.etable(fit)
-
+
@@ -852,14 +852,14 @@ Examples
fe
-f2
--
+f1
x
+-
-f1
-x
+f2
-
+x
stats
@@ -893,56 +893,56 @@ Examples
Other supported multiple estimation syntax include sw0()
, csw()
and csw0()
. While sw()
adds variables in a “stepwise” fashion, csw()
does so cumulatively.
-
+
= pf.feols("Y ~ X1 + X2 | csw(f1, f2)", data)
fit pf.etable(fit)
-
+
@@ -984,13 +984,13 @@ Examples
fe
-f2
--
+f1
+x
x
-f1
-x
+f2
+-
x
@@ -1025,56 +1025,56 @@ Examples
The sw0()
and csw0()
syntax are similar to sw()
and csw()
, but start with a model that excludes the variables specified in sw()
and csw()
:
-
+
= pf.feols("Y ~ X1 + X2 | sw0(f1, f2)", data)
fit pf.etable(fit)
-
+
@@ -1129,16 +1129,16 @@ Examples
fe
-f2
--
+f1
-
x
+-
-f1
+f2
-
-x
-
+x
stats
@@ -1175,56 +1175,56 @@ Examples
The feols()
function also supports multiple dependent variables. The following example estimates two models, one with Y1
as the dependent variable and one with Y2
as the dependent variable.
-
+
= pf.feols("Y + Y2 ~ X1 | f1 + f2", data)
fit pf.etable(fit)
-
+
@@ -1260,12 +1260,12 @@ Examples
fe
-f2
+f1
x
x
-f1
+f2
x
x
@@ -1301,56 +1301,56 @@ Examples
It is possible to combine different multiple estimation operators:
-
+
= pf.feols("Y + Y2 ~ X1 | sw(f1, f2)", data)
fit pf.etable(fit)
-
+
@@ -1396,18 +1396,18 @@ Examples
fe
-f2
--
--
+f1
x
x
+-
+-
-f1
-x
-x
+f2
-
-
+x
+x
stats
@@ -1448,7 +1448,7 @@ Examples
In general, using muliple estimation syntax can improve the estimation time as covariates that are demeaned in one model and are used in another model do not need to be demeaned again: feols()
implements a caching mechanism that stores the demeaned covariates.
Additionally, you can fit models on different samples via the split and fsplit arguments. The split argument splits the sample according to the variable specified in the argument, while the fsplit argument also includes the full sample in the estimation.
-
+
= pf.feols("Y ~ X1 + X2 | f1 + f2", data, split = "f1")
fit pf.etable(fit)
@@ -1514,52 +1514,52 @@ Examples
cluster_adj_value = G / (G - 1)
-
+
@@ -1769,7 +1769,7 @@ Examples
fe
-f2
+f1
x
x
x
@@ -1802,7 +1802,7 @@ Examples
x
-f1
+f2
x
x
x
@@ -1950,7 +1950,7 @@ Examples
Besides OLS, feols()
also supports IV estimation via three part formulas:
-
+
= pf.feols("Y ~ X2 | f1 + f2 | X1 ~ Z1", data)
fit fit.tidy()
@@ -2004,7 +2004,7 @@ Examples
Here, X1
is the endogenous variable and Z1
is the instrument. f1
and f2
are the fixed effects, as before. To estimate IV models without fixed effects, simply omit the fixed effects part of the formula:
-
+
= pf.feols("Y ~ X2 | X1 ~ Z1", data)
fit fit.tidy()
@@ -2068,7 +2068,7 @@ Examples
Last, feols()
supports interaction of variables via the i()
syntax. Documentation on this is tba.
After fitting a model via feols()
, you can use the predict()
method to get the predicted values:
-
+
= pf.feols("Y ~ X1 + X2 | f1 + f2", data)
fit 0:5] fit.predict()[
@@ -2076,7 +2076,7 @@ Examples
The predict()
method also supports a newdata
argument to predict on new data, which returns a numpy array of the predicted values:
-
+
= pf.feols("Y ~ X1 + X2 | f1 + f2", data)
fit =data)[0:5] fit.predict(newdata
@@ -2084,11 +2084,11 @@ Examples
Last, you can plot the results of a model via the coefplot()
method:
-
+
= pf.feols("Y ~ X1 + X2 | f1 + f2", data)
fit fit.coefplot()
-
+
@@ -593,7 +593,7 @@ Examples
-
+
diff --git a/reference/report.coefplot.html b/reference/report.coefplot.html
index c3217e91..a373db31 100644
--- a/reference/report.coefplot.html
+++ b/reference/report.coefplot.html
@@ -528,7 +528,7 @@ Returns
Examples
-
+
import pyfixest as pf
from pyfixest.report.utils import rename_categoricals
@@ -544,7 +544,7 @@ Examples
= "both") pf.coefplot([fit1], joint
-
+
@@ -578,7 +578,7 @@ Examples
-
+
-
+
@@ -576,7 +576,7 @@ Examples
-
+
-
+
@@ -497,7 +497,7 @@ Examples
-
+
diff --git a/replicating-the-effect.html b/replicating-the-effect.html
index bf03493a..daff3e8c 100644
--- a/replicating-the-effect.html
+++ b/replicating-the-effect.html
@@ -234,7 +234,7 @@ Replicating Examples from “The Effect”
This notebook replicates code examples from Nick Huntington-Klein’s book on causal inference, The Effect.
-
+
from causaldata import Mroz, gapminder, organ_donations, restaurant_inspections
import pyfixest as pf
@@ -243,7 +243,7 @@ Replicating Examples from “The Effect”
%watermark --iversions
-
+
@@ -277,7 +277,7 @@ Replicating Examples from “The Effect”
-
+
@@ -317,7 +317,7 @@ Replicating Examples from “The Effect”
Chapter 4: Describing Relationships
-
+
# Read in data
= Mroz.load_pandas().data
dt # Keep just working women
@@ -329,7 +329,7 @@ Chapter
= pf.feols(fml="lwg ~ csw(inc, wc, k5)", data=dt, vcov="iid")
fit pf.etable(fit)
-/tmp/ipykernel_4055/786816010.py:6: SettingWithCopyWarning:
+/tmp/ipykernel_4227/786816010.py:6: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
@@ -337,52 +337,52 @@ Chapter
dt.loc[:, "earn"] = dt["lwg"].apply("exp")
-
+
@@ -480,7 +480,7 @@ Chapter
Chapter 13: Regression
Example 1
-
+
= restaurant_inspections.load_pandas().data
res = res.inspection_score.astype(float)
res.inspection_score = res.NumberofLocations.astype(float)
@@ -489,52 +489,52 @@ res.NumberofLocations Example 1
= pf.feols(fml="inspection_score ~ NumberofLocations", data=res)
fit pf.etable([fit])
-
+
@@ -597,7 +597,7 @@ Example 1
Example 2
-
+
= restaurant_inspections.load_pandas().data
df
= pf.feols(
@@ -607,52 +607,52 @@ fit1 Example 2
pf.etable([fit1, fit2])
-
+
@@ -749,7 +749,7 @@ Example 2
Example 3: HC Standard Errors
-
+
="inspection_score ~ Year + Weekend", data=df, vcov="HC3").summary() pf.feols(fml
###
@@ -771,7 +771,7 @@ Example 3: HC
Example 4: Clustered Standard Errors
-
+
pf.feols(="inspection_score ~ Year + Weekend", data=df, vcov={"CRV1": "Weekend"}
fml ).tidy()
@@ -837,7 +837,7 @@ Exampl
Example 5: Bootstrap Inference
-
+
= pf.feols(fml="inspection_score ~ Year + Weekend", data=df)
fit =999, param="Year") fit.wildboottest(reps
@@ -860,7 +860,7 @@ Example 1
Example 2
-
+
= gapminder.load_pandas().data
gm "logGDPpercap"] = gm["gdpPercap"].apply("log")
gm[
@@ -946,7 +946,7 @@ Example 2
Example 3: TWFE
-
+
# Set our individual and time (index) for our data
= pf.feols(fml="lifeExp ~ np.log(gdpPercap) | country + year", data=gm)
fit fit.summary()
@@ -971,7 +971,7 @@ Example 3: TWFE
Chapter 18: Difference-in-Differences
Example 1
-
+
= organ_donations.load_pandas().data
od
# Create Treatment Variable
@@ -999,7 +999,7 @@ Example 1
Example 3: Dynamic Treatment Effect
-
+
= organ_donations.load_pandas().data
od
# Create Treatment Variable
diff --git a/search.json b/search.json
index eeddcd32..16e2c702 100644
--- a/search.json
+++ b/search.json
@@ -479,7 +479,7 @@
"href": "reference/estimation.estimation.feols.html#examples",
"title": "estimation.estimation.feols",
"section": "Examples",
- "text": "Examples\nAs in fixest, the [Feols(/reference/Feols.qmd) function can be used to estimate a simple linear regression model with fixed effects. The following example regresses Y on X1 and X2 with fixed effects for f1 and f2: fixed effects are specified after the | symbol.\n\nimport pyfixest as pf\n\ndata = pf.get_data()\n\nfit = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data)\nfit.summary()\n\n\n \n \n \n\n\n\n \n \n \n\n\n###\n\nEstimation: OLS\nDep. var.: Y, Fixed effects: f1+f2\nInference: CRV1\nObservations: 997\n\n| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |\n|:--------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|\n| X1 | -0.924 | 0.061 | -15.165 | 0.000 | -1.049 | -0.799 |\n| X2 | -0.174 | 0.015 | -11.918 | 0.000 | -0.204 | -0.144 |\n---\nRMSE: 1.346 R2: 0.659 R2 Within: 0.303 \n\n\nCalling feols() returns an instance of the [Feols(/reference/Feols.qmd) class. The summary() method can be used to print the results.\nAn alternative way to retrieve model results is via the tidy() method, which returns a pandas dataframe with the estimated coefficients, standard errors, t-statistics, and p-values.\n\nfit.tidy()\n\n\n\n\n\n\n\n\nEstimate\nStd. Error\nt value\nPr(>|t|)\n2.5%\n97.5%\n\n\nCoefficient\n\n\n\n\n\n\n\n\n\n\nX1\n-0.924046\n0.060934\n-15.164621\n2.664535e-15\n-1.048671\n-0.799421\n\n\nX2\n-0.174107\n0.014608\n-11.918277\n1.069367e-12\n-0.203985\n-0.144230\n\n\n\n\n\n\n\nYou can also access all elements in the tidy data frame by dedicated methods, e.g. fit.coef() for the coefficients, fit.se() for the standard errors, fit.tstat() for the t-statistics, and fit.pval() for the p-values, and fit.confint() for the confidence intervals.\nThe employed type of inference can be specified via the vcov argument. If vcov is not provided, PyFixest employs the fixest default of iid inference, unless there are fixed effects in the model, in which case feols() clusters the standard error by the first fixed effect (CRV1 inference).\n\nfit1 = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data, vcov=\"iid\")\nfit2 = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data, vcov=\"hetero\")\nfit3 = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data, vcov={\"CRV1\": \"f1\"})\n\nSupported inference types are “iid”, “hetero”, “HC1”, “HC2”, “HC3”, and “CRV1”/“CRV3”. Clustered standard errors are specified via a dictionary, e.g. {\"CRV1\": \"f1\"} for CRV1 inference with clustering by f1 or {\"CRV3\": \"f1\"} for CRV3 inference with clustering by f1. For two-way clustering, you can provide a formula string, e.g. {\"CRV1\": \"f1 + f2\"} for CRV1 inference with clustering by f1.\n\nfit4 = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data, vcov={\"CRV1\": \"f1 + f2\"})\n\nInference can be adjusted post estimation via the vcov method:\n\nfit.summary()\nfit.vcov(\"iid\").summary()\n\n###\n\nEstimation: OLS\nDep. var.: Y, Fixed effects: f1+f2\nInference: CRV1\nObservations: 997\n\n| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |\n|:--------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|\n| X1 | -0.924 | 0.061 | -15.165 | 0.000 | -1.049 | -0.799 |\n| X2 | -0.174 | 0.015 | -11.918 | 0.000 | -0.204 | -0.144 |\n---\nRMSE: 1.346 R2: 0.659 R2 Within: 0.303 \n###\n\nEstimation: OLS\nDep. var.: Y, Fixed effects: f1+f2\nInference: iid\nObservations: 997\n\n| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |\n|:--------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|\n| X1 | -0.924 | 0.054 | -16.995 | 0.000 | -1.031 | -0.817 |\n| X2 | -0.174 | 0.014 | -12.081 | 0.000 | -0.202 | -0.146 |\n---\nRMSE: 1.346 R2: 0.659 R2 Within: 0.303 \n\n\nThe ssc argument specifies the small sample correction for inference. In general, feols() uses all of fixest::feols() defaults, but sets the fixef.K argument to \"none\" whereas the fixest::feols() default is \"nested\". See here for more details: link to github.\nfeols() supports a range of multiple estimation syntax, i.e. you can estimate multiple models in one call. The following example estimates two models, one with fixed effects for f1 and one with fixed effects for f2 using the sw() syntax.\n\nfit = pf.feols(\"Y ~ X1 + X2 | sw(f1, f2)\", data)\ntype(fit)\n\npyfixest.estimation.FixestMulti_.FixestMulti\n\n\nThe returned object is an instance of the FixestMulti class. You can access the results of the first model via fit.fetch_model(0) and the results of the second model via fit.fetch_model(1). You can compare the model results via the etable() function:\n\npf.etable(fit)\n\n\n\n\n\n\n\n\n\n\n\n\n\nY\n\n\n(1)\n(2)\n\n\n\n\ncoef\n\n\nX1\n-0.950***\n(0.067)\n-0.979***\n(0.077)\n\n\nX2\n-0.174***\n(0.018)\n-0.175***\n(0.022)\n\n\nfe\n\n\nf2\n-\nx\n\n\nf1\nx\n-\n\n\nstats\n\n\nObservations\n997\n998\n\n\nS.E. type\nby: f1\nby: f2\n\n\nR2\n0.489\n0.354\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)\n\n\n\n\n\n\n\n \n\n\nOther supported multiple estimation syntax include sw0(), csw() and csw0(). While sw() adds variables in a “stepwise” fashion, csw() does so cumulatively.\n\nfit = pf.feols(\"Y ~ X1 + X2 | csw(f1, f2)\", data)\npf.etable(fit)\n\n\n\n\n\n\n\n\n\n\n\n\n\nY\n\n\n(1)\n(2)\n\n\n\n\ncoef\n\n\nX1\n-0.950***\n(0.067)\n-0.924***\n(0.061)\n\n\nX2\n-0.174***\n(0.018)\n-0.174***\n(0.015)\n\n\nfe\n\n\nf2\n-\nx\n\n\nf1\nx\nx\n\n\nstats\n\n\nObservations\n997\n997\n\n\nS.E. type\nby: f1\nby: f1\n\n\nR2\n0.489\n0.659\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)\n\n\n\n\n\n\n\n \n\n\nThe sw0() and csw0() syntax are similar to sw() and csw(), but start with a model that excludes the variables specified in sw() and csw():\n\nfit = pf.feols(\"Y ~ X1 + X2 | sw0(f1, f2)\", data)\npf.etable(fit)\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nY\n\n\n(1)\n(2)\n(3)\n\n\n\n\ncoef\n\n\nX1\n-0.993***\n(0.082)\n-0.950***\n(0.067)\n-0.979***\n(0.077)\n\n\nX2\n-0.176***\n(0.022)\n-0.174***\n(0.018)\n-0.175***\n(0.022)\n\n\nIntercept\n0.889***\n(0.108)\n\n\n\n\nfe\n\n\nf2\n-\n-\nx\n\n\nf1\n-\nx\n-\n\n\nstats\n\n\nObservations\n998\n997\n998\n\n\nS.E. type\niid\nby: f1\nby: f2\n\n\nR2\n0.177\n0.489\n0.354\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)\n\n\n\n\n\n\n\n \n\n\nThe feols() function also supports multiple dependent variables. The following example estimates two models, one with Y1 as the dependent variable and one with Y2 as the dependent variable.\n\nfit = pf.feols(\"Y + Y2 ~ X1 | f1 + f2\", data)\npf.etable(fit)\n\n\n\n\n\n\n\n\n\n\n\n\n\nY\nY2\n\n\n(1)\n(2)\n\n\n\n\ncoef\n\n\nX1\n-0.919***\n(0.065)\n-1.228***\n(0.195)\n\n\nfe\n\n\nf2\nx\nx\n\n\nf1\nx\nx\n\n\nstats\n\n\nObservations\n997\n998\n\n\nS.E. type\nby: f1\nby: f1\n\n\nR2\n0.609\n0.168\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)\n\n\n\n\n\n\n\n \n\n\nIt is possible to combine different multiple estimation operators:\n\nfit = pf.feols(\"Y + Y2 ~ X1 | sw(f1, f2)\", data)\npf.etable(fit)\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nY\nY2\nY\nY2\n\n\n(1)\n(2)\n(3)\n(4)\n\n\n\n\ncoef\n\n\nX1\n-0.949***\n(0.069)\n-1.266***\n(0.176)\n-0.982***\n(0.081)\n-1.301***\n(0.205)\n\n\nfe\n\n\nf2\n-\n-\nx\nx\n\n\nf1\nx\nx\n-\n-\n\n\nstats\n\n\nObservations\n997\n998\n998\n999\n\n\nS.E. type\nby: f1\nby: f1\nby: f2\nby: f2\n\n\nR2\n0.437\n0.115\n0.302\n0.090\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)\n\n\n\n\n\n\n\n \n\n\nIn general, using muliple estimation syntax can improve the estimation time as covariates that are demeaned in one model and are used in another model do not need to be demeaned again: feols() implements a caching mechanism that stores the demeaned covariates.\nAdditionally, you can fit models on different samples via the split and fsplit arguments. The split argument splits the sample according to the variable specified in the argument, while the fsplit argument also includes the full sample in the estimation.\n\nfit = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data, split = \"f1\")\npf.etable(fit)\n\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nY\n\n\n(1)\n(2)\n(3)\n(4)\n(5)\n(6)\n(7)\n(8)\n(9)\n(10)\n(11)\n(12)\n(13)\n(14)\n(15)\n(16)\n(17)\n(18)\n(19)\n(20)\n(21)\n(22)\n(23)\n(24)\n(25)\n(26)\n(27)\n(28)\n(29)\n(30)\n\n\n\n\ncoef\n\n\nX1\n-1.357\n(INF)\n-1.137\n(INF)\n-0.455\n(INF)\n-1.138\n(INF)\n0.201\n(INF)\n-0.306\n(INF)\n-0.597\n(INF)\n-0.824\n(INF)\n-1.482\n(INF)\n-1.117\n(INF)\n-1.142\n(INF)\n-1.334\n(INF)\n-3.531\n(INF)\n-1.102\n(INF)\n-0.826\n(INF)\n-0.773\n(INF)\n-1.501\n(INF)\n-1.226\n(INF)\n-0.641\n(INF)\n-0.378\n(INF)\n-0.652\n(INF)\n-1.508\n(INF)\n-0.941\n(INF)\n-0.206\n(INF)\n-0.195\n(INF)\n-0.702\n(INF)\n-1.141\n(INF)\n-1.349\n(INF)\n-0.537\n(INF)\n-1.141\n(INF)\n\n\nX2\n-0.250\n(INF)\n0.198\n(INF)\n-0.145\n(INF)\n-0.330\n(INF)\n-0.177\n(INF)\n-0.187\n(INF)\n-0.118\n(INF)\n-0.292\n(INF)\n-0.029\n(INF)\n-0.264\n(INF)\n-0.148\n(INF)\n-0.313\n(INF)\n-0.152\n(INF)\n-0.296\n(INF)\n0.130\n(INF)\n-0.059\n(INF)\n-0.223\n(INF)\n-0.113\n(INF)\n-0.261\n(INF)\n0.089\n(INF)\n-0.148\n(INF)\n-0.267\n(INF)\n-0.125\n(INF)\n-0.282\n(INF)\n-0.153\n(INF)\n0.004\n(INF)\n0.083\n(INF)\n-0.226\n(INF)\n-0.158\n(INF)\n-0.160\n(INF)\n\n\nfe\n\n\nf2\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\n\n\nf1\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\n\n\nstats\n\n\nObservations\n30\n29\n44\n30\n31\n36\n36\n30\n36\n35\n32\n30\n23\n28\n34\n34\n48\n40\n36\n34\n35\n37\n27\n35\n29\n27\n43\n36\n24\n28\n\n\nS.E. type\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\n\n\nR2\n0.850\n0.691\n0.578\n0.745\n0.939\n0.644\n0.792\n0.776\n0.919\n0.797\n0.727\n0.822\n0.924\n0.865\n0.711\n0.808\n0.651\n0.819\n0.746\n0.731\n0.880\n0.868\n0.796\n0.648\n0.915\n0.820\n0.837\n0.789\n0.688\n0.883\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)\n\n\n\n\n\n\n\n \n\n\nBesides OLS, feols() also supports IV estimation via three part formulas:\n\nfit = pf.feols(\"Y ~ X2 | f1 + f2 | X1 ~ Z1\", data)\nfit.tidy()\n\n\n\n\n\n\n\n\nEstimate\nStd. Error\nt value\nPr(>|t|)\n2.5%\n97.5%\n\n\nCoefficient\n\n\n\n\n\n\n\n\n\n\nX1\n-1.050097\n0.085493\n-12.282912\n5.133671e-13\n-1.224949\n-0.875245\n\n\nX2\n-0.174351\n0.014779\n-11.797039\n1.369793e-12\n-0.204578\n-0.144124\n\n\n\n\n\n\n\nHere, X1 is the endogenous variable and Z1 is the instrument. f1 and f2 are the fixed effects, as before. To estimate IV models without fixed effects, simply omit the fixed effects part of the formula:\n\nfit = pf.feols(\"Y ~ X2 | X1 ~ Z1\", data)\nfit.tidy()\n\n\n\n\n\n\n\n\nEstimate\nStd. Error\nt value\nPr(>|t|)\n2.5%\n97.5%\n\n\nCoefficient\n\n\n\n\n\n\n\n\n\n\nIntercept\n0.861939\n0.151187\n5.701137\n1.567858e-08\n0.565257\n1.158622\n\n\nX1\n-0.967238\n0.130078\n-7.435847\n2.238210e-13\n-1.222497\n-0.711980\n\n\nX2\n-0.176416\n0.021769\n-8.104001\n1.554312e-15\n-0.219134\n-0.133697\n\n\n\n\n\n\n\nLast, feols() supports interaction of variables via the i() syntax. Documentation on this is tba.\nAfter fitting a model via feols(), you can use the predict() method to get the predicted values:\n\nfit = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data)\nfit.predict()[0:5]\n\narray([ 3.0633663 , -0.69574133, -0.91240433, -0.46370257, -1.67331154])\n\n\nThe predict() method also supports a newdata argument to predict on new data, which returns a numpy array of the predicted values:\n\nfit = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data)\nfit.predict(newdata=data)[0:5]\n\narray([ 2.14598761, nan, nan, 3.06336415, -0.69574276])\n\n\nLast, you can plot the results of a model via the coefplot() method:\n\nfit = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data)\nfit.coefplot()\n\n \n \n\n\nObjects of type Feols support a range of other methods to conduct inference. For example, you can run a wild (cluster) bootstrap via the wildboottest() method:\n\nfit.wildboottest(param = \"X1\", reps=1000)\n\nparam X1\nt value -14.70814685400939\nPr(>|t|) 0.0\nbootstrap_type 11\ninference CRV(f1)\nimpose_null True\ndtype: object\n\n\nwould run a wild bootstrap test for the coefficient of X1 with 1000 bootstrap repetitions.\nFor a wild cluster bootstrap, you can specify the cluster variable via the cluster argument:\n\nfit.wildboottest(param = \"X1\", reps=1000, cluster=\"group_id\")\n\nparam X1\nt value -13.658130940490494\nPr(>|t|) 0.0\nbootstrap_type 11\ninference CRV(group_id)\nimpose_null True\ndtype: object\n\n\nThe ritest() method can be used to conduct randomization inference:\n\nfit.ritest(resampvar = \"X1\", reps=1000)\n\nH0 X1=0\nri-type randomization-c\nEstimate -0.9240461507764967\nPr(>|t|) 0.0\nStd. Error (Pr(>|t|)) 0.0\n2.5% (Pr(>|t|)) 0.0\n97.5% (Pr(>|t|)) 0.0\ndtype: object\n\n\nLast, you can compute the cluster causal variance estimator by Athey et al by using the ccv() method:\n\nimport numpy as np\nrng = np.random.default_rng(1234)\ndata[\"D\"] = rng.choice([0, 1], size = data.shape[0])\nfit_D = pf.feols(\"Y ~ D\", data = data)\nfit_D.ccv(treatment = \"D\", cluster = \"group_id\")\n\n/home/runner/work/pyfixest/pyfixest/pyfixest/estimation/feols_.py:1384: UserWarning: The initial model was not clustered. CRV1 inference is computed and stored in the model object.\n warnings.warn(\n\n\n\n\n\n\n\n\n\nEstimate\nStd. Error\nt value\nPr(>|t|)\n2.5%\n97.5%\n\n\n\n\nCCV\n0.016087657906364183\n0.284647\n0.056518\n0.955552\n-0.581934\n0.61411\n\n\nCRV1\n0.016088\n0.13378\n0.120254\n0.905614\n-0.264974\n0.29715",
+ "text": "Examples\nAs in fixest, the [Feols(/reference/Feols.qmd) function can be used to estimate a simple linear regression model with fixed effects. The following example regresses Y on X1 and X2 with fixed effects for f1 and f2: fixed effects are specified after the | symbol.\n\nimport pyfixest as pf\n\ndata = pf.get_data()\n\nfit = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data)\nfit.summary()\n\n\n \n \n \n\n\n\n \n \n \n\n\n###\n\nEstimation: OLS\nDep. var.: Y, Fixed effects: f1+f2\nInference: CRV1\nObservations: 997\n\n| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |\n|:--------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|\n| X1 | -0.924 | 0.061 | -15.165 | 0.000 | -1.049 | -0.799 |\n| X2 | -0.174 | 0.015 | -11.918 | 0.000 | -0.204 | -0.144 |\n---\nRMSE: 1.346 R2: 0.659 R2 Within: 0.303 \n\n\nCalling feols() returns an instance of the [Feols(/reference/Feols.qmd) class. The summary() method can be used to print the results.\nAn alternative way to retrieve model results is via the tidy() method, which returns a pandas dataframe with the estimated coefficients, standard errors, t-statistics, and p-values.\n\nfit.tidy()\n\n\n\n\n\n\n\n\nEstimate\nStd. Error\nt value\nPr(>|t|)\n2.5%\n97.5%\n\n\nCoefficient\n\n\n\n\n\n\n\n\n\n\nX1\n-0.924046\n0.060934\n-15.164621\n2.664535e-15\n-1.048671\n-0.799421\n\n\nX2\n-0.174107\n0.014608\n-11.918277\n1.069367e-12\n-0.203985\n-0.144230\n\n\n\n\n\n\n\nYou can also access all elements in the tidy data frame by dedicated methods, e.g. fit.coef() for the coefficients, fit.se() for the standard errors, fit.tstat() for the t-statistics, and fit.pval() for the p-values, and fit.confint() for the confidence intervals.\nThe employed type of inference can be specified via the vcov argument. If vcov is not provided, PyFixest employs the fixest default of iid inference, unless there are fixed effects in the model, in which case feols() clusters the standard error by the first fixed effect (CRV1 inference).\n\nfit1 = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data, vcov=\"iid\")\nfit2 = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data, vcov=\"hetero\")\nfit3 = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data, vcov={\"CRV1\": \"f1\"})\n\nSupported inference types are “iid”, “hetero”, “HC1”, “HC2”, “HC3”, and “CRV1”/“CRV3”. Clustered standard errors are specified via a dictionary, e.g. {\"CRV1\": \"f1\"} for CRV1 inference with clustering by f1 or {\"CRV3\": \"f1\"} for CRV3 inference with clustering by f1. For two-way clustering, you can provide a formula string, e.g. {\"CRV1\": \"f1 + f2\"} for CRV1 inference with clustering by f1.\n\nfit4 = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data, vcov={\"CRV1\": \"f1 + f2\"})\n\nInference can be adjusted post estimation via the vcov method:\n\nfit.summary()\nfit.vcov(\"iid\").summary()\n\n###\n\nEstimation: OLS\nDep. var.: Y, Fixed effects: f1+f2\nInference: CRV1\nObservations: 997\n\n| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |\n|:--------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|\n| X1 | -0.924 | 0.061 | -15.165 | 0.000 | -1.049 | -0.799 |\n| X2 | -0.174 | 0.015 | -11.918 | 0.000 | -0.204 | -0.144 |\n---\nRMSE: 1.346 R2: 0.659 R2 Within: 0.303 \n###\n\nEstimation: OLS\nDep. var.: Y, Fixed effects: f1+f2\nInference: iid\nObservations: 997\n\n| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |\n|:--------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|\n| X1 | -0.924 | 0.054 | -16.995 | 0.000 | -1.031 | -0.817 |\n| X2 | -0.174 | 0.014 | -12.081 | 0.000 | -0.202 | -0.146 |\n---\nRMSE: 1.346 R2: 0.659 R2 Within: 0.303 \n\n\nThe ssc argument specifies the small sample correction for inference. In general, feols() uses all of fixest::feols() defaults, but sets the fixef.K argument to \"none\" whereas the fixest::feols() default is \"nested\". See here for more details: link to github.\nfeols() supports a range of multiple estimation syntax, i.e. you can estimate multiple models in one call. The following example estimates two models, one with fixed effects for f1 and one with fixed effects for f2 using the sw() syntax.\n\nfit = pf.feols(\"Y ~ X1 + X2 | sw(f1, f2)\", data)\ntype(fit)\n\npyfixest.estimation.FixestMulti_.FixestMulti\n\n\nThe returned object is an instance of the FixestMulti class. You can access the results of the first model via fit.fetch_model(0) and the results of the second model via fit.fetch_model(1). You can compare the model results via the etable() function:\n\npf.etable(fit)\n\n\n\n\n\n\n\n\n\n\n\n\n\nY\n\n\n(1)\n(2)\n\n\n\n\ncoef\n\n\nX1\n-0.950***\n(0.067)\n-0.979***\n(0.077)\n\n\nX2\n-0.174***\n(0.018)\n-0.175***\n(0.022)\n\n\nfe\n\n\nf1\nx\n-\n\n\nf2\n-\nx\n\n\nstats\n\n\nObservations\n997\n998\n\n\nS.E. type\nby: f1\nby: f2\n\n\nR2\n0.489\n0.354\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)\n\n\n\n\n\n\n\n \n\n\nOther supported multiple estimation syntax include sw0(), csw() and csw0(). While sw() adds variables in a “stepwise” fashion, csw() does so cumulatively.\n\nfit = pf.feols(\"Y ~ X1 + X2 | csw(f1, f2)\", data)\npf.etable(fit)\n\n\n\n\n\n\n\n\n\n\n\n\n\nY\n\n\n(1)\n(2)\n\n\n\n\ncoef\n\n\nX1\n-0.950***\n(0.067)\n-0.924***\n(0.061)\n\n\nX2\n-0.174***\n(0.018)\n-0.174***\n(0.015)\n\n\nfe\n\n\nf1\nx\nx\n\n\nf2\n-\nx\n\n\nstats\n\n\nObservations\n997\n997\n\n\nS.E. type\nby: f1\nby: f1\n\n\nR2\n0.489\n0.659\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)\n\n\n\n\n\n\n\n \n\n\nThe sw0() and csw0() syntax are similar to sw() and csw(), but start with a model that excludes the variables specified in sw() and csw():\n\nfit = pf.feols(\"Y ~ X1 + X2 | sw0(f1, f2)\", data)\npf.etable(fit)\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nY\n\n\n(1)\n(2)\n(3)\n\n\n\n\ncoef\n\n\nX1\n-0.993***\n(0.082)\n-0.950***\n(0.067)\n-0.979***\n(0.077)\n\n\nX2\n-0.176***\n(0.022)\n-0.174***\n(0.018)\n-0.175***\n(0.022)\n\n\nIntercept\n0.889***\n(0.108)\n\n\n\n\nfe\n\n\nf1\n-\nx\n-\n\n\nf2\n-\n-\nx\n\n\nstats\n\n\nObservations\n998\n997\n998\n\n\nS.E. type\niid\nby: f1\nby: f2\n\n\nR2\n0.177\n0.489\n0.354\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)\n\n\n\n\n\n\n\n \n\n\nThe feols() function also supports multiple dependent variables. The following example estimates two models, one with Y1 as the dependent variable and one with Y2 as the dependent variable.\n\nfit = pf.feols(\"Y + Y2 ~ X1 | f1 + f2\", data)\npf.etable(fit)\n\n\n\n\n\n\n\n\n\n\n\n\n\nY\nY2\n\n\n(1)\n(2)\n\n\n\n\ncoef\n\n\nX1\n-0.919***\n(0.065)\n-1.228***\n(0.195)\n\n\nfe\n\n\nf1\nx\nx\n\n\nf2\nx\nx\n\n\nstats\n\n\nObservations\n997\n998\n\n\nS.E. type\nby: f1\nby: f1\n\n\nR2\n0.609\n0.168\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)\n\n\n\n\n\n\n\n \n\n\nIt is possible to combine different multiple estimation operators:\n\nfit = pf.feols(\"Y + Y2 ~ X1 | sw(f1, f2)\", data)\npf.etable(fit)\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nY\nY2\nY\nY2\n\n\n(1)\n(2)\n(3)\n(4)\n\n\n\n\ncoef\n\n\nX1\n-0.949***\n(0.069)\n-1.266***\n(0.176)\n-0.982***\n(0.081)\n-1.301***\n(0.205)\n\n\nfe\n\n\nf1\nx\nx\n-\n-\n\n\nf2\n-\n-\nx\nx\n\n\nstats\n\n\nObservations\n997\n998\n998\n999\n\n\nS.E. type\nby: f1\nby: f1\nby: f2\nby: f2\n\n\nR2\n0.437\n0.115\n0.302\n0.090\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)\n\n\n\n\n\n\n\n \n\n\nIn general, using muliple estimation syntax can improve the estimation time as covariates that are demeaned in one model and are used in another model do not need to be demeaned again: feols() implements a caching mechanism that stores the demeaned covariates.\nAdditionally, you can fit models on different samples via the split and fsplit arguments. The split argument splits the sample according to the variable specified in the argument, while the fsplit argument also includes the full sample in the estimation.\n\nfit = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data, split = \"f1\")\npf.etable(fit)\n\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nY\n\n\n(1)\n(2)\n(3)\n(4)\n(5)\n(6)\n(7)\n(8)\n(9)\n(10)\n(11)\n(12)\n(13)\n(14)\n(15)\n(16)\n(17)\n(18)\n(19)\n(20)\n(21)\n(22)\n(23)\n(24)\n(25)\n(26)\n(27)\n(28)\n(29)\n(30)\n\n\n\n\ncoef\n\n\nX1\n-1.357\n(INF)\n-1.137\n(INF)\n-0.455\n(INF)\n-1.138\n(INF)\n0.201\n(INF)\n-0.306\n(INF)\n-0.597\n(INF)\n-0.824\n(INF)\n-1.482\n(INF)\n-1.117\n(INF)\n-1.142\n(INF)\n-1.334\n(INF)\n-3.531\n(INF)\n-1.102\n(INF)\n-0.826\n(INF)\n-0.773\n(INF)\n-1.501\n(INF)\n-1.226\n(INF)\n-0.641\n(INF)\n-0.378\n(INF)\n-0.652\n(INF)\n-1.508\n(INF)\n-0.941\n(INF)\n-0.206\n(INF)\n-0.195\n(INF)\n-0.702\n(INF)\n-1.141\n(INF)\n-1.349\n(INF)\n-0.537\n(INF)\n-1.141\n(INF)\n\n\nX2\n-0.250\n(INF)\n0.198\n(INF)\n-0.145\n(INF)\n-0.330\n(INF)\n-0.177\n(INF)\n-0.187\n(INF)\n-0.118\n(INF)\n-0.292\n(INF)\n-0.029\n(INF)\n-0.264\n(INF)\n-0.148\n(INF)\n-0.313\n(INF)\n-0.152\n(INF)\n-0.296\n(INF)\n0.130\n(INF)\n-0.059\n(INF)\n-0.223\n(INF)\n-0.113\n(INF)\n-0.261\n(INF)\n0.089\n(INF)\n-0.148\n(INF)\n-0.267\n(INF)\n-0.125\n(INF)\n-0.282\n(INF)\n-0.153\n(INF)\n0.004\n(INF)\n0.083\n(INF)\n-0.226\n(INF)\n-0.158\n(INF)\n-0.160\n(INF)\n\n\nfe\n\n\nf1\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\n\n\nf2\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\n\n\nstats\n\n\nObservations\n30\n29\n44\n30\n31\n36\n36\n30\n36\n35\n32\n30\n23\n28\n34\n34\n48\n40\n36\n34\n35\n37\n27\n35\n29\n27\n43\n36\n24\n28\n\n\nS.E. type\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\n\n\nR2\n0.850\n0.691\n0.578\n0.745\n0.939\n0.644\n0.792\n0.776\n0.919\n0.797\n0.727\n0.822\n0.924\n0.865\n0.711\n0.808\n0.651\n0.819\n0.746\n0.731\n0.880\n0.868\n0.796\n0.648\n0.915\n0.820\n0.837\n0.789\n0.688\n0.883\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)\n\n\n\n\n\n\n\n \n\n\nBesides OLS, feols() also supports IV estimation via three part formulas:\n\nfit = pf.feols(\"Y ~ X2 | f1 + f2 | X1 ~ Z1\", data)\nfit.tidy()\n\n\n\n\n\n\n\n\nEstimate\nStd. Error\nt value\nPr(>|t|)\n2.5%\n97.5%\n\n\nCoefficient\n\n\n\n\n\n\n\n\n\n\nX1\n-1.050097\n0.085493\n-12.282912\n5.133671e-13\n-1.224949\n-0.875245\n\n\nX2\n-0.174351\n0.014779\n-11.797039\n1.369793e-12\n-0.204578\n-0.144124\n\n\n\n\n\n\n\nHere, X1 is the endogenous variable and Z1 is the instrument. f1 and f2 are the fixed effects, as before. To estimate IV models without fixed effects, simply omit the fixed effects part of the formula:\n\nfit = pf.feols(\"Y ~ X2 | X1 ~ Z1\", data)\nfit.tidy()\n\n\n\n\n\n\n\n\nEstimate\nStd. Error\nt value\nPr(>|t|)\n2.5%\n97.5%\n\n\nCoefficient\n\n\n\n\n\n\n\n\n\n\nIntercept\n0.861939\n0.151187\n5.701137\n1.567858e-08\n0.565257\n1.158622\n\n\nX1\n-0.967238\n0.130078\n-7.435847\n2.238210e-13\n-1.222497\n-0.711980\n\n\nX2\n-0.176416\n0.021769\n-8.104001\n1.554312e-15\n-0.219134\n-0.133697\n\n\n\n\n\n\n\nLast, feols() supports interaction of variables via the i() syntax. Documentation on this is tba.\nAfter fitting a model via feols(), you can use the predict() method to get the predicted values:\n\nfit = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data)\nfit.predict()[0:5]\n\narray([ 3.0633663 , -0.69574133, -0.91240433, -0.46370257, -1.67331154])\n\n\nThe predict() method also supports a newdata argument to predict on new data, which returns a numpy array of the predicted values:\n\nfit = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data)\nfit.predict(newdata=data)[0:5]\n\narray([ 2.14598761, nan, nan, 3.06336415, -0.69574276])\n\n\nLast, you can plot the results of a model via the coefplot() method:\n\nfit = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data)\nfit.coefplot()\n\n \n \n\n\nObjects of type Feols support a range of other methods to conduct inference. For example, you can run a wild (cluster) bootstrap via the wildboottest() method:\n\nfit.wildboottest(param = \"X1\", reps=1000)\n\nparam X1\nt value -14.70814685400939\nPr(>|t|) 0.0\nbootstrap_type 11\ninference CRV(f1)\nimpose_null True\ndtype: object\n\n\nwould run a wild bootstrap test for the coefficient of X1 with 1000 bootstrap repetitions.\nFor a wild cluster bootstrap, you can specify the cluster variable via the cluster argument:\n\nfit.wildboottest(param = \"X1\", reps=1000, cluster=\"group_id\")\n\nparam X1\nt value -13.658130940490494\nPr(>|t|) 0.0\nbootstrap_type 11\ninference CRV(group_id)\nimpose_null True\ndtype: object\n\n\nThe ritest() method can be used to conduct randomization inference:\n\nfit.ritest(resampvar = \"X1\", reps=1000)\n\nH0 X1=0\nri-type randomization-c\nEstimate -0.9240461507764967\nPr(>|t|) 0.0\nStd. Error (Pr(>|t|)) 0.0\n2.5% (Pr(>|t|)) 0.0\n97.5% (Pr(>|t|)) 0.0\ndtype: object\n\n\nLast, you can compute the cluster causal variance estimator by Athey et al by using the ccv() method:\n\nimport numpy as np\nrng = np.random.default_rng(1234)\ndata[\"D\"] = rng.choice([0, 1], size = data.shape[0])\nfit_D = pf.feols(\"Y ~ D\", data = data)\nfit_D.ccv(treatment = \"D\", cluster = \"group_id\")\n\n/home/runner/work/pyfixest/pyfixest/pyfixest/estimation/feols_.py:1384: UserWarning: The initial model was not clustered. CRV1 inference is computed and stored in the model object.\n warnings.warn(\n\n\n\n\n\n\n\n\n\nEstimate\nStd. Error\nt value\nPr(>|t|)\n2.5%\n97.5%\n\n\n\n\nCCV\n0.016087657906364183\n0.242455\n0.066353\n0.947828\n-0.493292\n0.525467\n\n\nCRV1\n0.016088\n0.13378\n0.120254\n0.905614\n-0.264974\n0.29715",
"crumbs": [
"Function Reference",
"Estimation Functions",
@@ -546,7 +546,7 @@
"href": "replicating-the-effect.html#chapter-4-describing-relationships",
"title": "Replicating Examples from “The Effect”",
"section": "Chapter 4: Describing Relationships",
- "text": "Chapter 4: Describing Relationships\n\n# Read in data\ndt = Mroz.load_pandas().data\n# Keep just working women\ndt = dt.query(\"lfp\")\n# Create unlogged earnings\ndt.loc[:, \"earn\"] = dt[\"lwg\"].apply(\"exp\")\n\n# 5. Run multiple linear regression models by succesively adding controls\nfit = pf.feols(fml=\"lwg ~ csw(inc, wc, k5)\", data=dt, vcov=\"iid\")\npf.etable(fit)\n\n/tmp/ipykernel_4055/786816010.py:6: SettingWithCopyWarning: \nA value is trying to be set on a copy of a slice from a DataFrame.\nTry using .loc[row_indexer,col_indexer] = value instead\n\nSee the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n dt.loc[:, \"earn\"] = dt[\"lwg\"].apply(\"exp\")\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nlwg\n\n\n(1)\n(2)\n(3)\n\n\n\n\ncoef\n\n\ninc\n0.010**\n(0.003)\n0.005\n(0.003)\n0.005\n(0.003)\n\n\nwc\n\n0.342***\n(0.075)\n0.349***\n(0.075)\n\n\nk5\n\n\n-0.072\n(0.087)\n\n\nIntercept\n1.007***\n(0.071)\n0.972***\n(0.070)\n0.982***\n(0.071)\n\n\nstats\n\n\nObservations\n428\n428\n428\n\n\nS.E. type\niid\niid\niid\n\n\nR2\n0.020\n0.066\n0.068\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)"
+ "text": "Chapter 4: Describing Relationships\n\n# Read in data\ndt = Mroz.load_pandas().data\n# Keep just working women\ndt = dt.query(\"lfp\")\n# Create unlogged earnings\ndt.loc[:, \"earn\"] = dt[\"lwg\"].apply(\"exp\")\n\n# 5. Run multiple linear regression models by succesively adding controls\nfit = pf.feols(fml=\"lwg ~ csw(inc, wc, k5)\", data=dt, vcov=\"iid\")\npf.etable(fit)\n\n/tmp/ipykernel_4227/786816010.py:6: SettingWithCopyWarning: \nA value is trying to be set on a copy of a slice from a DataFrame.\nTry using .loc[row_indexer,col_indexer] = value instead\n\nSee the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n dt.loc[:, \"earn\"] = dt[\"lwg\"].apply(\"exp\")\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nlwg\n\n\n(1)\n(2)\n(3)\n\n\n\n\ncoef\n\n\ninc\n0.010**\n(0.003)\n0.005\n(0.003)\n0.005\n(0.003)\n\n\nwc\n\n0.342***\n(0.075)\n0.349***\n(0.075)\n\n\nk5\n\n\n-0.072\n(0.087)\n\n\nIntercept\n1.007***\n(0.071)\n0.972***\n(0.070)\n0.982***\n(0.071)\n\n\nstats\n\n\nObservations\n428\n428\n428\n\n\nS.E. type\niid\niid\niid\n\n\nR2\n0.020\n0.066\n0.068\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)"
},
{
"objectID": "replicating-the-effect.html#chapter-13-regression",
@@ -623,7 +623,7 @@
"href": "difference-in-differences.html#setup",
"title": "Difference-in-Differences Estimation",
"section": "Setup",
- "text": "Setup\n\nfrom importlib import resources\n\nimport pandas as pd\n\nimport pyfixest as pf\nfrom pyfixest.report.utils import rename_event_study_coefs\nfrom pyfixest.utils.dgps import get_sharkfin\n\n%load_ext watermark\n%watermark --iversions\n%load_ext autoreload\n%autoreload 2\n\n\n \n \n \n\n\n\n \n \n \n\n\npyfixest: 0.25.3\npandas : 2.2.3\n\n\n\n\n# one-shot adoption data - parallel trends is true\ndf_one_cohort = get_sharkfin()\ndf_one_cohort.head()\n\n\n\n\n\n\n\n\nunit\nyear\ntreat\nY\never_treated\n\n\n\n\n0\n0\n0\n0\n1.629307\n0\n\n\n1\n0\n1\n0\n0.825902\n0\n\n\n2\n0\n2\n0\n0.208988\n0\n\n\n3\n0\n3\n0\n-0.244739\n0\n\n\n4\n0\n4\n0\n0.804665\n0\n\n\n\n\n\n\n\n\n# multi-cohort adoption data\ndf_multi_cohort = pd.read_csv(\n resources.files(\"pyfixest.did.data\").joinpath(\"df_het.csv\")\n)\ndf_multi_cohort.head()\n\n\n\n\n\n\n\n\nunit\nstate\ngroup\nunit_fe\ng\nyear\nyear_fe\ntreat\nrel_year\nrel_year_binned\nerror\nte\nte_dynamic\ndep_var\n\n\n\n\n0\n1\n33\nGroup 2\n7.043016\n2010\n1990\n0.066159\nFalse\n-20.0\n-6\n-0.086466\n0\n0.0\n7.022709\n\n\n1\n1\n33\nGroup 2\n7.043016\n2010\n1991\n-0.030980\nFalse\n-19.0\n-6\n0.766593\n0\n0.0\n7.778628\n\n\n2\n1\n33\nGroup 2\n7.043016\n2010\n1992\n-0.119607\nFalse\n-18.0\n-6\n1.512968\n0\n0.0\n8.436377\n\n\n3\n1\n33\nGroup 2\n7.043016\n2010\n1993\n0.126321\nFalse\n-17.0\n-6\n0.021870\n0\n0.0\n7.191207\n\n\n4\n1\n33\nGroup 2\n7.043016\n2010\n1994\n-0.106921\nFalse\n-16.0\n-6\n-0.017603\n0\n0.0\n6.918492"
+ "text": "Setup\n\nfrom importlib import resources\n\nimport pandas as pd\n\nimport pyfixest as pf\nfrom pyfixest.report.utils import rename_event_study_coefs\nfrom pyfixest.utils.dgps import get_sharkfin\n\n%load_ext watermark\n%watermark --iversions\n%load_ext autoreload\n%autoreload 2\n\n\n \n \n \n\n\n\n \n \n \n\n\npandas : 2.2.3\npyfixest: 0.25.3\n\n\n\n\n# one-shot adoption data - parallel trends is true\ndf_one_cohort = get_sharkfin()\ndf_one_cohort.head()\n\n\n\n\n\n\n\n\nunit\nyear\ntreat\nY\never_treated\n\n\n\n\n0\n0\n0\n0\n1.629307\n0\n\n\n1\n0\n1\n0\n0.825902\n0\n\n\n2\n0\n2\n0\n0.208988\n0\n\n\n3\n0\n3\n0\n-0.244739\n0\n\n\n4\n0\n4\n0\n0.804665\n0\n\n\n\n\n\n\n\n\n# multi-cohort adoption data\ndf_multi_cohort = pd.read_csv(\n resources.files(\"pyfixest.did.data\").joinpath(\"df_het.csv\")\n)\ndf_multi_cohort.head()\n\n\n\n\n\n\n\n\nunit\nstate\ngroup\nunit_fe\ng\nyear\nyear_fe\ntreat\nrel_year\nrel_year_binned\nerror\nte\nte_dynamic\ndep_var\n\n\n\n\n0\n1\n33\nGroup 2\n7.043016\n2010\n1990\n0.066159\nFalse\n-20.0\n-6\n-0.086466\n0\n0.0\n7.022709\n\n\n1\n1\n33\nGroup 2\n7.043016\n2010\n1991\n-0.030980\nFalse\n-19.0\n-6\n0.766593\n0\n0.0\n7.778628\n\n\n2\n1\n33\nGroup 2\n7.043016\n2010\n1992\n-0.119607\nFalse\n-18.0\n-6\n1.512968\n0\n0.0\n8.436377\n\n\n3\n1\n33\nGroup 2\n7.043016\n2010\n1993\n0.126321\nFalse\n-17.0\n-6\n0.021870\n0\n0.0\n7.191207\n\n\n4\n1\n33\nGroup 2\n7.043016\n2010\n1994\n-0.106921\nFalse\n-16.0\n-6\n-0.017603\n0\n0.0\n6.918492"
},
{
"objectID": "difference-in-differences.html#examining-treatment-timing",
@@ -665,7 +665,7 @@
"href": "quickstart.html",
"title": "Getting Started with PyFixest",
"section": "",
- "text": "A fixed effect model is a statistical model that includes fixed effects, which are parameters that are estimated to be constant across different groups.\nExample [Panel Data]: In the context of panel data, fixed effects are parameters that are constant across different individuals or time. The typical model example is given by the following equation:\n\\[\nY_{it} = \\beta X_{it} + \\alpha_i + \\psi_t + \\varepsilon_{it}\n\\]\nwhere \\(Y_{it}\\) is the dependent variable for individual \\(i\\) at time \\(t\\), \\(X_{it}\\) is the independent variable, \\(\\beta\\) is the coefficient of the independent variable, \\(\\alpha_i\\) is the individual fixed effect, \\(\\psi_t\\) is the time fixed effect, and \\(\\varepsilon_{it}\\) is the error term. The individual fixed effect \\(\\alpha_i\\) is a parameter that is constant across time for each individual, while the time fixed effect \\(\\psi_t\\) is a parameter that is constant across individuals for each time period.\nNote however that, despite the fact that fixed effects are commonly used in panel setting, one does not need a panel data set to work with fixed effects. For example, cluster randomized trials with cluster fixed effects, or wage regressions with worker and firm fixed effects.\nIn this “quick start” guide, we will show you how to estimate a fixed effect model using the PyFixest package. We do not go into the details of the theory behind fixed effect models, but we focus on how to estimate them using PyFixest.\n\n\n\nIn a first step, we load the module and some synthetic example data:\n\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport pandas as pd\nfrom lets_plot import LetsPlot\n\nimport pyfixest as pf\n\nLetsPlot.setup_html()\n\nplt.style.use(\"seaborn-v0_8\")\n\n%load_ext watermark\n%config InlineBackend.figure_format = \"retina\"\n%watermark --iversions\n\ndata = pf.get_data()\n\ndata.head()\n\n\n \n \n \n\n\n\n \n \n \n\n\n\n \n \n \n\n\nnumpy : 1.26.4\npyfixest : 0.25.3\npandas : 2.2.3\nmatplotlib: 3.9.2\n\n\n\n\n\n\n\n\n\n\nY\nY2\nX1\nX2\nf1\nf2\nf3\ngroup_id\nZ1\nZ2\nweights\n\n\n\n\n0\nNaN\n2.357103\n0.0\n0.457858\n15.0\n0.0\n7.0\n9.0\n-0.330607\n1.054826\n0.661478\n\n\n1\n-1.458643\n5.163147\nNaN\n-4.998406\n6.0\n21.0\n4.0\n8.0\nNaN\n-4.113690\n0.772732\n\n\n2\n0.169132\n0.751140\n2.0\n1.558480\nNaN\n1.0\n7.0\n16.0\n1.207778\n0.465282\n0.990929\n\n\n3\n3.319513\n-2.656368\n1.0\n1.560402\n1.0\n10.0\n11.0\n3.0\n2.869997\n0.467570\n0.021123\n\n\n4\n0.134420\n-1.866416\n2.0\n-3.472232\n19.0\n20.0\n6.0\n14.0\n0.835819\n-3.115669\n0.790815\n\n\n\n\n\n\n\n\ndata.info()\n\n<class 'pandas.core.frame.DataFrame'>\nRangeIndex: 1000 entries, 0 to 999\nData columns (total 11 columns):\n # Column Non-Null Count Dtype \n--- ------ -------------- ----- \n 0 Y 999 non-null float64\n 1 Y2 1000 non-null float64\n 2 X1 999 non-null float64\n 3 X2 1000 non-null float64\n 4 f1 999 non-null float64\n 5 f2 1000 non-null float64\n 6 f3 1000 non-null float64\n 7 group_id 1000 non-null float64\n 8 Z1 999 non-null float64\n 9 Z2 1000 non-null float64\n 10 weights 1000 non-null float64\ndtypes: float64(11)\nmemory usage: 86.1 KB\n\n\nWe see that some of our columns have missing data.\n\n\n\nWe are interested in the relation between the dependent variable Y and the independent variables X1 using a fixed effect model for group_id. Let’s see how the data looks like:\n\nax = data.plot(kind=\"scatter\", x=\"X1\", y=\"Y\", c=\"group_id\", colormap=\"viridis\")\n\n\n\n\n\n\n\n\nWe can estimate a fixed effects regression via the feols() function. feols() has three arguments: a two-sided model formula, the data, and optionally, the type of inference.\n\nfit = pf.feols(fml=\"Y ~ X1 | group_id\", data=data, vcov=\"HC1\")\ntype(fit)\n\npyfixest.estimation.feols_.Feols\n\n\nThe first part of the formula contains the dependent variable and “regular” covariates, while the second part contains fixed effects.\nfeols() returns an instance of the Fixest class.\n\n\n\nTo inspect the results, we can use a summary function or method:\n\nfit.summary()\n\n###\n\nEstimation: OLS\nDep. var.: Y, Fixed effects: group_id\nInference: HC1\nObservations: 998\n\n| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |\n|:--------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|\n| X1 | -1.019 | 0.082 | -12.352 | 0.000 | -1.181 | -0.857 |\n---\nRMSE: 2.141 R2: 0.137 R2 Within: 0.126 \n\n\nOr display a formatted regression table:\n\npf.etable(fit)\n\n\n\n\n\n\n\n\n\n\n\n\nY\n\n\n(1)\n\n\n\n\ncoef\n\n\nX1\n-1.019***\n(0.082)\n\n\nfe\n\n\ngroup_id\nx\n\n\nstats\n\n\nObservations\n998\n\n\nS.E. type\nhetero\n\n\nR2\n0.137\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)\n\n\n\n\n\n\n\n \n\n\nAlternatively, the .summarize module contains a summary function, which can be applied on instances of regression model objects or lists of regression model objects. For details on how to customize etable(), please take a look at the dedicated vignette.\n\npf.summary(fit)\n\n###\n\nEstimation: OLS\nDep. var.: Y, Fixed effects: group_id\nInference: HC1\nObservations: 998\n\n| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |\n|:--------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|\n| X1 | -1.019 | 0.082 | -12.352 | 0.000 | -1.181 | -0.857 |\n---\nRMSE: 2.141 R2: 0.137 R2 Within: 0.126 \n\n\nYou can access individual elements of the summary via dedicated methods: .tidy() returns a “tidy” pd.DataFrame, .coef() returns estimated parameters, and se() estimated standard errors. Other methods include pvalue(), confint() and tstat().\n\nfit.tidy()\n\n\n\n\n\n\n\n\nEstimate\nStd. Error\nt value\nPr(>|t|)\n2.5%\n97.5%\n\n\nCoefficient\n\n\n\n\n\n\n\n\n\n\nX1\n-1.019009\n0.082498\n-12.351897\n0.0\n-1.180898\n-0.857119\n\n\n\n\n\n\n\n\nfit.coef()\n\nCoefficient\nX1 -1.019009\nName: Estimate, dtype: float64\n\n\n\nfit.se()\n\nCoefficient\nX1 0.082498\nName: Std. Error, dtype: float64\n\n\n\nfit.tstat()\n\nCoefficient\nX1 -12.351897\nName: t value, dtype: float64\n\n\n\nfit.confint()\n\n\n\n\n\n\n\n\n2.5%\n97.5%\n\n\n\n\nX1\n-1.180898\n-0.857119\n\n\n\n\n\n\n\nLast, model results can be visualized via dedicated methods for plotting:\n\nfit.coefplot()\n# or pf.coefplot([fit])\n\n \n \n\n\n\n\n\nLet’s have a quick d-tour on the intuition behind fixed effects models using the example above. To do so, let us begin by comparing it with a simple OLS model.\n\nfit_simple = pf.feols(\"Y ~ X1\", data=data, vcov=\"HC1\")\n\nfit_simple.summary()\n\n###\n\nEstimation: OLS\nDep. var.: Y, Fixed effects: 0\nInference: HC1\nObservations: 998\n\n| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |\n|:--------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|\n| Intercept | 0.919 | 0.112 | 8.223 | 0.000 | 0.699 | 1.138 |\n| X1 | -1.000 | 0.082 | -12.134 | 0.000 | -1.162 | -0.838 |\n---\nRMSE: 2.158 R2: 0.123 \n\n\nWe can compare both models side by side in a regression table:\n\npf.etable([fit, fit_simple])\n\n\n\n\n\n\n\n\n\n\n\n\n\nY\n\n\n(1)\n(2)\n\n\n\n\ncoef\n\n\nX1\n-1.019***\n(0.082)\n-1.000***\n(0.082)\n\n\nIntercept\n\n0.919***\n(0.112)\n\n\nfe\n\n\ngroup_id\nx\n-\n\n\nstats\n\n\nObservations\n998\n998\n\n\nS.E. type\nhetero\nhetero\n\n\nR2\n0.137\n0.123\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)\n\n\n\n\n\n\n\n \n\n\nWe see that the X1 coefficient is -1.019, which is less than the value from the OLS model in column (2). Where is the difference coming from? Well, in the fixed effect model we are interested in controlling for the feature group_id. One possibility to do this is by adding a simple dummy variable for each level of group_id.\n\nfit_dummy = pf.feols(\"Y ~ X1 + C(group_id) \", data=data, vcov=\"HC1\")\n\nfit_dummy.summary()\n\n###\n\nEstimation: OLS\nDep. var.: Y, Fixed effects: 0\nInference: HC1\nObservations: 998\n\n| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |\n|:--------------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|\n| Intercept | 0.760 | 0.288 | 2.640 | 0.008 | 0.195 | 1.326 |\n| X1 | -1.019 | 0.083 | -12.234 | 0.000 | -1.182 | -0.856 |\n| C(group_id)[T.1.0] | 0.380 | 0.451 | 0.844 | 0.399 | -0.504 | 1.264 |\n| C(group_id)[T.2.0] | 0.084 | 0.389 | 0.216 | 0.829 | -0.680 | 0.848 |\n| C(group_id)[T.3.0] | 0.790 | 0.415 | 1.904 | 0.057 | -0.024 | 1.604 |\n| C(group_id)[T.4.0] | -0.189 | 0.388 | -0.487 | 0.626 | -0.950 | 0.572 |\n| C(group_id)[T.5.0] | 0.537 | 0.388 | 1.385 | 0.166 | -0.224 | 1.297 |\n| C(group_id)[T.6.0] | 0.307 | 0.398 | 0.771 | 0.441 | -0.474 | 1.087 |\n| C(group_id)[T.7.0] | 0.015 | 0.422 | 0.035 | 0.972 | -0.814 | 0.844 |\n| C(group_id)[T.8.0] | 0.382 | 0.406 | 0.941 | 0.347 | -0.415 | 1.179 |\n| C(group_id)[T.9.0] | 0.219 | 0.417 | 0.526 | 0.599 | -0.599 | 1.037 |\n| C(group_id)[T.10.0] | -0.363 | 0.422 | -0.861 | 0.390 | -1.191 | 0.465 |\n| C(group_id)[T.11.0] | 0.201 | 0.387 | 0.520 | 0.603 | -0.559 | 0.961 |\n| C(group_id)[T.12.0] | -0.110 | 0.410 | -0.268 | 0.788 | -0.915 | 0.694 |\n| C(group_id)[T.13.0] | 0.126 | 0.440 | 0.287 | 0.774 | -0.736 | 0.989 |\n| C(group_id)[T.14.0] | 0.353 | 0.416 | 0.848 | 0.397 | -0.464 | 1.170 |\n| C(group_id)[T.15.0] | 0.469 | 0.398 | 1.179 | 0.239 | -0.312 | 1.249 |\n| C(group_id)[T.16.0] | -0.135 | 0.396 | -0.340 | 0.734 | -0.913 | 0.643 |\n| C(group_id)[T.17.0] | -0.005 | 0.401 | -0.013 | 0.989 | -0.792 | 0.781 |\n| C(group_id)[T.18.0] | 0.283 | 0.403 | 0.702 | 0.483 | -0.508 | 1.074 |\n---\nRMSE: 2.141 R2: 0.137 \n\n\nThis is does not scale well! Imagine you have 1000 different levels of group_id. You would need to add 1000 dummy variables to your model. This is where fixed effect models come in handy. They allow you to control for these fixed effects without adding all these dummy variables. The way to do it is by a demeaning procedure. The idea is to subtract the average value of each level of group_id from the respective observations. This way, we control for the fixed effects without adding all these dummy variables. Let’s try to do this manually:\n\ndef _demean_column(df: pd.DataFrame, column: str, by: str) -> pd.Series:\n return df[column] - df.groupby(by)[column].transform(\"mean\")\n\n\nfit_demeaned = pf.feols(\n fml=\"Y_demeaned ~ X1_demeaned\",\n data=data.assign(\n Y_demeaned=lambda df: _demean_column(df, \"Y\", \"group_id\"),\n X1_demeaned=lambda df: _demean_column(df, \"X1\", \"group_id\"),\n ),\n vcov=\"HC1\",\n)\n\nfit_demeaned.summary()\n\n###\n\nEstimation: OLS\nDep. var.: Y_demeaned, Fixed effects: 0\nInference: HC1\nObservations: 998\n\n| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |\n|:--------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|\n| Intercept | 0.003 | 0.068 | 0.041 | 0.968 | -0.130 | 0.136 |\n| X1_demeaned | -1.019 | 0.083 | -12.345 | 0.000 | -1.181 | -0.857 |\n---\nRMSE: 2.141 R2: 0.126 \n\n\nWe get the same results as the fixed effect model Y1 ~ X | group_id above. The PyFixest package uses a more efficient algorithm to estimate the fixed effect model, but the intuition is the same.\n\n\n\nYou can update the coefficients of a model object via the update() method, which may be useful in an online learning setting where data arrives sequentially.\nTo see this in action, let us first fit a model on a subset of the data:\n\ndata_subsample = data.sample(frac=0.5)\nm = pf.feols(\"Y ~ X1 + X2\", data=data_subsample)\n# current coefficient vector\nm._beta_hat\n\narray([ 0.76200339, -0.95890348, -0.19108466])\n\n\nThen sample 5 new observations and update the model with the new data. The update rule is\n\\[\n\\hat{\\beta}_{n+1} = \\hat{\\beta}_n + (X_{n+1}' X_{n+1})^{-1} x_{n+1} + (y_{n+1} - x_{n+1} \\hat{\\beta}_n)\n\\]\nfor a new observation \\((x_{n+1}, y_{n+1})\\).\n\nnew_points_id = np.random.choice(list(set(data.index) - set(data_subsample.index)), 5)\nX_new, y_new = (\n np.c_[np.ones(len(new_points_id)), data.loc[new_points_id][[\"X1\", \"X2\"]].values],\n data.loc[new_points_id][\"Y\"].values,\n)\nm.update(X_new, y_new)\n\narray([ 0.78334343, -0.96579542, -0.19535336])\n\n\nWe verify that we get the same results if we had estimated the model on the appended data.\n\npf.feols(\n \"Y ~ X1 + X2\", data=data.loc[data_subsample.index.append(pd.Index(new_points_id))]\n).coef().values\n\narray([ 0.78334343, -0.96579542, -0.19535336])"
+ "text": "A fixed effect model is a statistical model that includes fixed effects, which are parameters that are estimated to be constant across different groups.\nExample [Panel Data]: In the context of panel data, fixed effects are parameters that are constant across different individuals or time. The typical model example is given by the following equation:\n\\[\nY_{it} = \\beta X_{it} + \\alpha_i + \\psi_t + \\varepsilon_{it}\n\\]\nwhere \\(Y_{it}\\) is the dependent variable for individual \\(i\\) at time \\(t\\), \\(X_{it}\\) is the independent variable, \\(\\beta\\) is the coefficient of the independent variable, \\(\\alpha_i\\) is the individual fixed effect, \\(\\psi_t\\) is the time fixed effect, and \\(\\varepsilon_{it}\\) is the error term. The individual fixed effect \\(\\alpha_i\\) is a parameter that is constant across time for each individual, while the time fixed effect \\(\\psi_t\\) is a parameter that is constant across individuals for each time period.\nNote however that, despite the fact that fixed effects are commonly used in panel setting, one does not need a panel data set to work with fixed effects. For example, cluster randomized trials with cluster fixed effects, or wage regressions with worker and firm fixed effects.\nIn this “quick start” guide, we will show you how to estimate a fixed effect model using the PyFixest package. We do not go into the details of the theory behind fixed effect models, but we focus on how to estimate them using PyFixest.\n\n\n\nIn a first step, we load the module and some synthetic example data:\n\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport pandas as pd\nfrom lets_plot import LetsPlot\n\nimport pyfixest as pf\n\nLetsPlot.setup_html()\n\nplt.style.use(\"seaborn-v0_8\")\n\n%load_ext watermark\n%config InlineBackend.figure_format = \"retina\"\n%watermark --iversions\n\ndata = pf.get_data()\n\ndata.head()\n\n\n \n \n \n\n\n\n \n \n \n\n\n\n \n \n \n\n\npandas : 2.2.3\nnumpy : 1.26.4\npyfixest : 0.25.3\nmatplotlib: 3.9.2\n\n\n\n\n\n\n\n\n\n\nY\nY2\nX1\nX2\nf1\nf2\nf3\ngroup_id\nZ1\nZ2\nweights\n\n\n\n\n0\nNaN\n2.357103\n0.0\n0.457858\n15.0\n0.0\n7.0\n9.0\n-0.330607\n1.054826\n0.661478\n\n\n1\n-1.458643\n5.163147\nNaN\n-4.998406\n6.0\n21.0\n4.0\n8.0\nNaN\n-4.113690\n0.772732\n\n\n2\n0.169132\n0.751140\n2.0\n1.558480\nNaN\n1.0\n7.0\n16.0\n1.207778\n0.465282\n0.990929\n\n\n3\n3.319513\n-2.656368\n1.0\n1.560402\n1.0\n10.0\n11.0\n3.0\n2.869997\n0.467570\n0.021123\n\n\n4\n0.134420\n-1.866416\n2.0\n-3.472232\n19.0\n20.0\n6.0\n14.0\n0.835819\n-3.115669\n0.790815\n\n\n\n\n\n\n\n\ndata.info()\n\n<class 'pandas.core.frame.DataFrame'>\nRangeIndex: 1000 entries, 0 to 999\nData columns (total 11 columns):\n # Column Non-Null Count Dtype \n--- ------ -------------- ----- \n 0 Y 999 non-null float64\n 1 Y2 1000 non-null float64\n 2 X1 999 non-null float64\n 3 X2 1000 non-null float64\n 4 f1 999 non-null float64\n 5 f2 1000 non-null float64\n 6 f3 1000 non-null float64\n 7 group_id 1000 non-null float64\n 8 Z1 999 non-null float64\n 9 Z2 1000 non-null float64\n 10 weights 1000 non-null float64\ndtypes: float64(11)\nmemory usage: 86.1 KB\n\n\nWe see that some of our columns have missing data.\n\n\n\nWe are interested in the relation between the dependent variable Y and the independent variables X1 using a fixed effect model for group_id. Let’s see how the data looks like:\n\nax = data.plot(kind=\"scatter\", x=\"X1\", y=\"Y\", c=\"group_id\", colormap=\"viridis\")\n\n\n\n\n\n\n\n\nWe can estimate a fixed effects regression via the feols() function. feols() has three arguments: a two-sided model formula, the data, and optionally, the type of inference.\n\nfit = pf.feols(fml=\"Y ~ X1 | group_id\", data=data, vcov=\"HC1\")\ntype(fit)\n\npyfixest.estimation.feols_.Feols\n\n\nThe first part of the formula contains the dependent variable and “regular” covariates, while the second part contains fixed effects.\nfeols() returns an instance of the Fixest class.\n\n\n\nTo inspect the results, we can use a summary function or method:\n\nfit.summary()\n\n###\n\nEstimation: OLS\nDep. var.: Y, Fixed effects: group_id\nInference: HC1\nObservations: 998\n\n| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |\n|:--------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|\n| X1 | -1.019 | 0.082 | -12.352 | 0.000 | -1.181 | -0.857 |\n---\nRMSE: 2.141 R2: 0.137 R2 Within: 0.126 \n\n\nOr display a formatted regression table:\n\npf.etable(fit)\n\n\n\n\n\n\n\n\n\n\n\n\nY\n\n\n(1)\n\n\n\n\ncoef\n\n\nX1\n-1.019***\n(0.082)\n\n\nfe\n\n\ngroup_id\nx\n\n\nstats\n\n\nObservations\n998\n\n\nS.E. type\nhetero\n\n\nR2\n0.137\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)\n\n\n\n\n\n\n\n \n\n\nAlternatively, the .summarize module contains a summary function, which can be applied on instances of regression model objects or lists of regression model objects. For details on how to customize etable(), please take a look at the dedicated vignette.\n\npf.summary(fit)\n\n###\n\nEstimation: OLS\nDep. var.: Y, Fixed effects: group_id\nInference: HC1\nObservations: 998\n\n| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |\n|:--------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|\n| X1 | -1.019 | 0.082 | -12.352 | 0.000 | -1.181 | -0.857 |\n---\nRMSE: 2.141 R2: 0.137 R2 Within: 0.126 \n\n\nYou can access individual elements of the summary via dedicated methods: .tidy() returns a “tidy” pd.DataFrame, .coef() returns estimated parameters, and se() estimated standard errors. Other methods include pvalue(), confint() and tstat().\n\nfit.tidy()\n\n\n\n\n\n\n\n\nEstimate\nStd. Error\nt value\nPr(>|t|)\n2.5%\n97.5%\n\n\nCoefficient\n\n\n\n\n\n\n\n\n\n\nX1\n-1.019009\n0.082498\n-12.351897\n0.0\n-1.180898\n-0.857119\n\n\n\n\n\n\n\n\nfit.coef()\n\nCoefficient\nX1 -1.019009\nName: Estimate, dtype: float64\n\n\n\nfit.se()\n\nCoefficient\nX1 0.082498\nName: Std. Error, dtype: float64\n\n\n\nfit.tstat()\n\nCoefficient\nX1 -12.351897\nName: t value, dtype: float64\n\n\n\nfit.confint()\n\n\n\n\n\n\n\n\n2.5%\n97.5%\n\n\n\n\nX1\n-1.180898\n-0.857119\n\n\n\n\n\n\n\nLast, model results can be visualized via dedicated methods for plotting:\n\nfit.coefplot()\n# or pf.coefplot([fit])\n\n \n \n\n\n\n\n\nLet’s have a quick d-tour on the intuition behind fixed effects models using the example above. To do so, let us begin by comparing it with a simple OLS model.\n\nfit_simple = pf.feols(\"Y ~ X1\", data=data, vcov=\"HC1\")\n\nfit_simple.summary()\n\n###\n\nEstimation: OLS\nDep. var.: Y, Fixed effects: 0\nInference: HC1\nObservations: 998\n\n| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |\n|:--------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|\n| Intercept | 0.919 | 0.112 | 8.223 | 0.000 | 0.699 | 1.138 |\n| X1 | -1.000 | 0.082 | -12.134 | 0.000 | -1.162 | -0.838 |\n---\nRMSE: 2.158 R2: 0.123 \n\n\nWe can compare both models side by side in a regression table:\n\npf.etable([fit, fit_simple])\n\n\n\n\n\n\n\n\n\n\n\n\n\nY\n\n\n(1)\n(2)\n\n\n\n\ncoef\n\n\nX1\n-1.019***\n(0.082)\n-1.000***\n(0.082)\n\n\nIntercept\n\n0.919***\n(0.112)\n\n\nfe\n\n\ngroup_id\nx\n-\n\n\nstats\n\n\nObservations\n998\n998\n\n\nS.E. type\nhetero\nhetero\n\n\nR2\n0.137\n0.123\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)\n\n\n\n\n\n\n\n \n\n\nWe see that the X1 coefficient is -1.019, which is less than the value from the OLS model in column (2). Where is the difference coming from? Well, in the fixed effect model we are interested in controlling for the feature group_id. One possibility to do this is by adding a simple dummy variable for each level of group_id.\n\nfit_dummy = pf.feols(\"Y ~ X1 + C(group_id) \", data=data, vcov=\"HC1\")\n\nfit_dummy.summary()\n\n###\n\nEstimation: OLS\nDep. var.: Y, Fixed effects: 0\nInference: HC1\nObservations: 998\n\n| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |\n|:--------------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|\n| Intercept | 0.760 | 0.288 | 2.640 | 0.008 | 0.195 | 1.326 |\n| X1 | -1.019 | 0.083 | -12.234 | 0.000 | -1.182 | -0.856 |\n| C(group_id)[T.1.0] | 0.380 | 0.451 | 0.844 | 0.399 | -0.504 | 1.264 |\n| C(group_id)[T.2.0] | 0.084 | 0.389 | 0.216 | 0.829 | -0.680 | 0.848 |\n| C(group_id)[T.3.0] | 0.790 | 0.415 | 1.904 | 0.057 | -0.024 | 1.604 |\n| C(group_id)[T.4.0] | -0.189 | 0.388 | -0.487 | 0.626 | -0.950 | 0.572 |\n| C(group_id)[T.5.0] | 0.537 | 0.388 | 1.385 | 0.166 | -0.224 | 1.297 |\n| C(group_id)[T.6.0] | 0.307 | 0.398 | 0.771 | 0.441 | -0.474 | 1.087 |\n| C(group_id)[T.7.0] | 0.015 | 0.422 | 0.035 | 0.972 | -0.814 | 0.844 |\n| C(group_id)[T.8.0] | 0.382 | 0.406 | 0.941 | 0.347 | -0.415 | 1.179 |\n| C(group_id)[T.9.0] | 0.219 | 0.417 | 0.526 | 0.599 | -0.599 | 1.037 |\n| C(group_id)[T.10.0] | -0.363 | 0.422 | -0.861 | 0.390 | -1.191 | 0.465 |\n| C(group_id)[T.11.0] | 0.201 | 0.387 | 0.520 | 0.603 | -0.559 | 0.961 |\n| C(group_id)[T.12.0] | -0.110 | 0.410 | -0.268 | 0.788 | -0.915 | 0.694 |\n| C(group_id)[T.13.0] | 0.126 | 0.440 | 0.287 | 0.774 | -0.736 | 0.989 |\n| C(group_id)[T.14.0] | 0.353 | 0.416 | 0.848 | 0.397 | -0.464 | 1.170 |\n| C(group_id)[T.15.0] | 0.469 | 0.398 | 1.179 | 0.239 | -0.312 | 1.249 |\n| C(group_id)[T.16.0] | -0.135 | 0.396 | -0.340 | 0.734 | -0.913 | 0.643 |\n| C(group_id)[T.17.0] | -0.005 | 0.401 | -0.013 | 0.989 | -0.792 | 0.781 |\n| C(group_id)[T.18.0] | 0.283 | 0.403 | 0.702 | 0.483 | -0.508 | 1.074 |\n---\nRMSE: 2.141 R2: 0.137 \n\n\nThis is does not scale well! Imagine you have 1000 different levels of group_id. You would need to add 1000 dummy variables to your model. This is where fixed effect models come in handy. They allow you to control for these fixed effects without adding all these dummy variables. The way to do it is by a demeaning procedure. The idea is to subtract the average value of each level of group_id from the respective observations. This way, we control for the fixed effects without adding all these dummy variables. Let’s try to do this manually:\n\ndef _demean_column(df: pd.DataFrame, column: str, by: str) -> pd.Series:\n return df[column] - df.groupby(by)[column].transform(\"mean\")\n\n\nfit_demeaned = pf.feols(\n fml=\"Y_demeaned ~ X1_demeaned\",\n data=data.assign(\n Y_demeaned=lambda df: _demean_column(df, \"Y\", \"group_id\"),\n X1_demeaned=lambda df: _demean_column(df, \"X1\", \"group_id\"),\n ),\n vcov=\"HC1\",\n)\n\nfit_demeaned.summary()\n\n###\n\nEstimation: OLS\nDep. var.: Y_demeaned, Fixed effects: 0\nInference: HC1\nObservations: 998\n\n| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |\n|:--------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|\n| Intercept | 0.003 | 0.068 | 0.041 | 0.968 | -0.130 | 0.136 |\n| X1_demeaned | -1.019 | 0.083 | -12.345 | 0.000 | -1.181 | -0.857 |\n---\nRMSE: 2.141 R2: 0.126 \n\n\nWe get the same results as the fixed effect model Y1 ~ X | group_id above. The PyFixest package uses a more efficient algorithm to estimate the fixed effect model, but the intuition is the same.\n\n\n\nYou can update the coefficients of a model object via the update() method, which may be useful in an online learning setting where data arrives sequentially.\nTo see this in action, let us first fit a model on a subset of the data:\n\ndata_subsample = data.sample(frac=0.5)\nm = pf.feols(\"Y ~ X1 + X2\", data=data_subsample)\n# current coefficient vector\nm._beta_hat\n\narray([ 0.99581185, -1.0423337 , -0.18385767])\n\n\nThen sample 5 new observations and update the model with the new data. The update rule is\n\\[\n\\hat{\\beta}_{n+1} = \\hat{\\beta}_n + (X_{n+1}' X_{n+1})^{-1} x_{n+1} + (y_{n+1} - x_{n+1} \\hat{\\beta}_n)\n\\]\nfor a new observation \\((x_{n+1}, y_{n+1})\\).\n\nnew_points_id = np.random.choice(list(set(data.index) - set(data_subsample.index)), 5)\nX_new, y_new = (\n np.c_[np.ones(len(new_points_id)), data.loc[new_points_id][[\"X1\", \"X2\"]].values],\n data.loc[new_points_id][\"Y\"].values,\n)\nm.update(X_new, y_new)\n\narray([ 0.99469903, -1.04988716, -0.1869025 ])\n\n\nWe verify that we get the same results if we had estimated the model on the appended data.\n\npf.feols(\n \"Y ~ X1 + X2\", data=data.loc[data_subsample.index.append(pd.Index(new_points_id))]\n).coef().values\n\narray([ 0.99469903, -1.04988716, -0.1869025 ])"
},
{
"objectID": "quickstart.html#what-is-a-fixed-effect-model",
@@ -679,7 +679,7 @@
"href": "quickstart.html#read-sample-data",
"title": "Getting Started with PyFixest",
"section": "",
- "text": "In a first step, we load the module and some synthetic example data:\n\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport pandas as pd\nfrom lets_plot import LetsPlot\n\nimport pyfixest as pf\n\nLetsPlot.setup_html()\n\nplt.style.use(\"seaborn-v0_8\")\n\n%load_ext watermark\n%config InlineBackend.figure_format = \"retina\"\n%watermark --iversions\n\ndata = pf.get_data()\n\ndata.head()\n\n\n \n \n \n\n\n\n \n \n \n\n\n\n \n \n \n\n\nnumpy : 1.26.4\npyfixest : 0.25.3\npandas : 2.2.3\nmatplotlib: 3.9.2\n\n\n\n\n\n\n\n\n\n\nY\nY2\nX1\nX2\nf1\nf2\nf3\ngroup_id\nZ1\nZ2\nweights\n\n\n\n\n0\nNaN\n2.357103\n0.0\n0.457858\n15.0\n0.0\n7.0\n9.0\n-0.330607\n1.054826\n0.661478\n\n\n1\n-1.458643\n5.163147\nNaN\n-4.998406\n6.0\n21.0\n4.0\n8.0\nNaN\n-4.113690\n0.772732\n\n\n2\n0.169132\n0.751140\n2.0\n1.558480\nNaN\n1.0\n7.0\n16.0\n1.207778\n0.465282\n0.990929\n\n\n3\n3.319513\n-2.656368\n1.0\n1.560402\n1.0\n10.0\n11.0\n3.0\n2.869997\n0.467570\n0.021123\n\n\n4\n0.134420\n-1.866416\n2.0\n-3.472232\n19.0\n20.0\n6.0\n14.0\n0.835819\n-3.115669\n0.790815\n\n\n\n\n\n\n\n\ndata.info()\n\n<class 'pandas.core.frame.DataFrame'>\nRangeIndex: 1000 entries, 0 to 999\nData columns (total 11 columns):\n # Column Non-Null Count Dtype \n--- ------ -------------- ----- \n 0 Y 999 non-null float64\n 1 Y2 1000 non-null float64\n 2 X1 999 non-null float64\n 3 X2 1000 non-null float64\n 4 f1 999 non-null float64\n 5 f2 1000 non-null float64\n 6 f3 1000 non-null float64\n 7 group_id 1000 non-null float64\n 8 Z1 999 non-null float64\n 9 Z2 1000 non-null float64\n 10 weights 1000 non-null float64\ndtypes: float64(11)\nmemory usage: 86.1 KB\n\n\nWe see that some of our columns have missing data."
+ "text": "In a first step, we load the module and some synthetic example data:\n\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport pandas as pd\nfrom lets_plot import LetsPlot\n\nimport pyfixest as pf\n\nLetsPlot.setup_html()\n\nplt.style.use(\"seaborn-v0_8\")\n\n%load_ext watermark\n%config InlineBackend.figure_format = \"retina\"\n%watermark --iversions\n\ndata = pf.get_data()\n\ndata.head()\n\n\n \n \n \n\n\n\n \n \n \n\n\n\n \n \n \n\n\npandas : 2.2.3\nnumpy : 1.26.4\npyfixest : 0.25.3\nmatplotlib: 3.9.2\n\n\n\n\n\n\n\n\n\n\nY\nY2\nX1\nX2\nf1\nf2\nf3\ngroup_id\nZ1\nZ2\nweights\n\n\n\n\n0\nNaN\n2.357103\n0.0\n0.457858\n15.0\n0.0\n7.0\n9.0\n-0.330607\n1.054826\n0.661478\n\n\n1\n-1.458643\n5.163147\nNaN\n-4.998406\n6.0\n21.0\n4.0\n8.0\nNaN\n-4.113690\n0.772732\n\n\n2\n0.169132\n0.751140\n2.0\n1.558480\nNaN\n1.0\n7.0\n16.0\n1.207778\n0.465282\n0.990929\n\n\n3\n3.319513\n-2.656368\n1.0\n1.560402\n1.0\n10.0\n11.0\n3.0\n2.869997\n0.467570\n0.021123\n\n\n4\n0.134420\n-1.866416\n2.0\n-3.472232\n19.0\n20.0\n6.0\n14.0\n0.835819\n-3.115669\n0.790815\n\n\n\n\n\n\n\n\ndata.info()\n\n<class 'pandas.core.frame.DataFrame'>\nRangeIndex: 1000 entries, 0 to 999\nData columns (total 11 columns):\n # Column Non-Null Count Dtype \n--- ------ -------------- ----- \n 0 Y 999 non-null float64\n 1 Y2 1000 non-null float64\n 2 X1 999 non-null float64\n 3 X2 1000 non-null float64\n 4 f1 999 non-null float64\n 5 f2 1000 non-null float64\n 6 f3 1000 non-null float64\n 7 group_id 1000 non-null float64\n 8 Z1 999 non-null float64\n 9 Z2 1000 non-null float64\n 10 weights 1000 non-null float64\ndtypes: float64(11)\nmemory usage: 86.1 KB\n\n\nWe see that some of our columns have missing data."
},
{
"objectID": "quickstart.html#ols-estimation",
@@ -707,7 +707,7 @@
"href": "quickstart.html#updating-regression-coefficients",
"title": "Getting Started with PyFixest",
"section": "",
- "text": "You can update the coefficients of a model object via the update() method, which may be useful in an online learning setting where data arrives sequentially.\nTo see this in action, let us first fit a model on a subset of the data:\n\ndata_subsample = data.sample(frac=0.5)\nm = pf.feols(\"Y ~ X1 + X2\", data=data_subsample)\n# current coefficient vector\nm._beta_hat\n\narray([ 0.76200339, -0.95890348, -0.19108466])\n\n\nThen sample 5 new observations and update the model with the new data. The update rule is\n\\[\n\\hat{\\beta}_{n+1} = \\hat{\\beta}_n + (X_{n+1}' X_{n+1})^{-1} x_{n+1} + (y_{n+1} - x_{n+1} \\hat{\\beta}_n)\n\\]\nfor a new observation \\((x_{n+1}, y_{n+1})\\).\n\nnew_points_id = np.random.choice(list(set(data.index) - set(data_subsample.index)), 5)\nX_new, y_new = (\n np.c_[np.ones(len(new_points_id)), data.loc[new_points_id][[\"X1\", \"X2\"]].values],\n data.loc[new_points_id][\"Y\"].values,\n)\nm.update(X_new, y_new)\n\narray([ 0.78334343, -0.96579542, -0.19535336])\n\n\nWe verify that we get the same results if we had estimated the model on the appended data.\n\npf.feols(\n \"Y ~ X1 + X2\", data=data.loc[data_subsample.index.append(pd.Index(new_points_id))]\n).coef().values\n\narray([ 0.78334343, -0.96579542, -0.19535336])"
+ "text": "You can update the coefficients of a model object via the update() method, which may be useful in an online learning setting where data arrives sequentially.\nTo see this in action, let us first fit a model on a subset of the data:\n\ndata_subsample = data.sample(frac=0.5)\nm = pf.feols(\"Y ~ X1 + X2\", data=data_subsample)\n# current coefficient vector\nm._beta_hat\n\narray([ 0.99581185, -1.0423337 , -0.18385767])\n\n\nThen sample 5 new observations and update the model with the new data. The update rule is\n\\[\n\\hat{\\beta}_{n+1} = \\hat{\\beta}_n + (X_{n+1}' X_{n+1})^{-1} x_{n+1} + (y_{n+1} - x_{n+1} \\hat{\\beta}_n)\n\\]\nfor a new observation \\((x_{n+1}, y_{n+1})\\).\n\nnew_points_id = np.random.choice(list(set(data.index) - set(data_subsample.index)), 5)\nX_new, y_new = (\n np.c_[np.ones(len(new_points_id)), data.loc[new_points_id][[\"X1\", \"X2\"]].values],\n data.loc[new_points_id][\"Y\"].values,\n)\nm.update(X_new, y_new)\n\narray([ 0.99469903, -1.04988716, -0.1869025 ])\n\n\nWe verify that we get the same results if we had estimated the model on the appended data.\n\npf.feols(\n \"Y ~ X1 + X2\", data=data.loc[data_subsample.index.append(pd.Index(new_points_id))]\n).coef().values\n\narray([ 0.99469903, -1.04988716, -0.1869025 ])"
},
{
"objectID": "quickstart.html#inference-via-the-wild-bootstrap",
@@ -742,7 +742,7 @@
"href": "quickstart.html#joint-confidence-intervals",
"title": "Getting Started with PyFixest",
"section": "Joint Confidence Intervals",
- "text": "Joint Confidence Intervals\nSimultaneous confidence bands for a vector of parameters can be computed via the joint_confint() method. See Simultaneous confidence bands: Theory, implementation, and an application to SVARs for background.\n\nfit_ci = pf.feols(\"Y ~ X1+ C(f1)\", data=data)\nfit_ci.confint(joint=True).head()\n\n\n\n\n\n\n\n\n2.5%\n97.5%\n\n\n\n\nIntercept\n-0.428799\n1.406710\n\n\nX1\n-1.161391\n-0.737491\n\n\nC(f1)[T.1.0]\n1.380485\n3.784814\n\n\nC(f1)[T.2.0]\n-2.842798\n-0.321070\n\n\nC(f1)[T.3.0]\n-1.612387\n0.987719"
+ "text": "Joint Confidence Intervals\nSimultaneous confidence bands for a vector of parameters can be computed via the joint_confint() method. See Simultaneous confidence bands: Theory, implementation, and an application to SVARs for background.\n\nfit_ci = pf.feols(\"Y ~ X1+ C(f1)\", data=data)\nfit_ci.confint(joint=True).head()\n\n\n\n\n\n\n\n\n2.5%\n97.5%\n\n\n\n\nIntercept\n-0.424794\n1.402705\n\n\nX1\n-1.160466\n-0.738416\n\n\nC(f1)[T.1.0]\n1.385731\n3.779568\n\n\nC(f1)[T.2.0]\n-2.837296\n-0.326572\n\n\nC(f1)[T.3.0]\n-1.606713\n0.982046"
},
{
"objectID": "pyfixest.html#features",
@@ -847,7 +847,7 @@
"href": "news.html#pyfixest-0.17.0",
"title": "News",
"section": "PyFixest 0.17.0",
- "text": "PyFixest 0.17.0\n\nRestructures the codebase and reorganizes how users can interact with the pyfixest API. It is now recommended to use pyfixest in the following way:\n\nimport numpy as np\nimport pyfixest as pf\ndata = pf.get_data()\ndata[\"D\"] = data[\"X1\"] > 0\nfit = pf.feols(\"Y ~ D + f1\", data = data)\nfit.tidy()\n\n\n\n\n\n\n\n\nEstimate\nStd. Error\nt value\nPr(>|t|)\n2.5%\n97.5%\n\n\nCoefficient\n\n\n\n\n\n\n\n\n\n\nIntercept\n0.778849\n0.170261\n4.574437\n0.000005\n0.444737\n1.112961\n\n\nD\n-1.402617\n0.152224\n-9.214140\n0.000000\n-1.701335\n-1.103899\n\n\nf1\n0.004774\n0.008058\n0.592508\n0.553645\n-0.011038\n0.020587\n\n\n\n\n\n\n\nThe update should not inroduce any breaking changes. Thanks to @Wenzhi-Ding for the PR!\nAdds support for simultaneous confidence intervals via a multiplier bootstrap. Thanks to @apoorvalal for the contribution!\n\nfit.confint(joint = True)\n\n\n\n\n\n\n\n\n2.5%\n97.5%\n\n\n\n\nIntercept\n0.380105\n1.177593\n\n\nD\n-1.759120\n-1.046114\n\n\nf1\n-0.014097\n0.023645\n\n\n\n\n\n\n\nAdds support for the causal cluster variance estimator by Abadie et al. (QJE, 2023) for OLS via the .ccv() method.\n\nfit.ccv(treatment = \"D\", cluster = \"group_id\")\n\n/home/runner/work/pyfixest/pyfixest/pyfixest/estimation/feols_.py:1384: UserWarning: The initial model was not clustered. CRV1 inference is computed and stored in the model object.\n warnings.warn(\n\n\n\n\n\n\n\n\n\nEstimate\nStd. Error\nt value\nPr(>|t|)\n2.5%\n97.5%\n\n\n\n\nCCV\n-1.4026168622179929\n0.28043\n-5.001663\n0.000093\n-1.991779\n-0.813455\n\n\nCRV1\n-1.402617\n0.205132\n-6.837621\n0.000002\n-1.833584\n-0.97165"
+ "text": "PyFixest 0.17.0\n\nRestructures the codebase and reorganizes how users can interact with the pyfixest API. It is now recommended to use pyfixest in the following way:\n\nimport numpy as np\nimport pyfixest as pf\ndata = pf.get_data()\ndata[\"D\"] = data[\"X1\"] > 0\nfit = pf.feols(\"Y ~ D + f1\", data = data)\nfit.tidy()\n\n\n\n\n\n\n\n\nEstimate\nStd. Error\nt value\nPr(>|t|)\n2.5%\n97.5%\n\n\nCoefficient\n\n\n\n\n\n\n\n\n\n\nIntercept\n0.778849\n0.170261\n4.574437\n0.000005\n0.444737\n1.112961\n\n\nD\n-1.402617\n0.152224\n-9.214140\n0.000000\n-1.701335\n-1.103899\n\n\nf1\n0.004774\n0.008058\n0.592508\n0.553645\n-0.011038\n0.020587\n\n\n\n\n\n\n\nThe update should not inroduce any breaking changes. Thanks to @Wenzhi-Ding for the PR!\nAdds support for simultaneous confidence intervals via a multiplier bootstrap. Thanks to @apoorvalal for the contribution!\n\nfit.confint(joint = True)\n\n\n\n\n\n\n\n\n2.5%\n97.5%\n\n\n\n\nIntercept\n0.375929\n1.181769\n\n\nD\n-1.762853\n-1.042381\n\n\nf1\n-0.014294\n0.023843\n\n\n\n\n\n\n\nAdds support for the causal cluster variance estimator by Abadie et al. (QJE, 2023) for OLS via the .ccv() method.\n\nfit.ccv(treatment = \"D\", cluster = \"group_id\")\n\n/home/runner/work/pyfixest/pyfixest/pyfixest/estimation/feols_.py:1384: UserWarning: The initial model was not clustered. CRV1 inference is computed and stored in the model object.\n warnings.warn(\n\n\n\n\n\n\n\n\n\nEstimate\nStd. Error\nt value\nPr(>|t|)\n2.5%\n97.5%\n\n\n\n\nCCV\n-1.4026168622179929\n0.238985\n-5.869057\n0.000015\n-1.904706\n-0.900528\n\n\nCRV1\n-1.402617\n0.205132\n-6.837621\n0.000002\n-1.833584\n-0.97165"
},
{
"objectID": "news.html#pyfixest-0.16.0",
@@ -1085,14 +1085,14 @@
"href": "table-layout.html#basic-usage",
"title": "Regression Tables via pf.etable()",
"section": "Basic Usage",
- "text": "Basic Usage\nWe can compare all regression models via the pyfixest-internal pf.etable() function:\n\npf.etable([fit1, fit2, fit3, fit4, fit5, fit6])\n\n\n\n\n\n\n\n \n \n Y\n \n \n Y2\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n X1\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n X2\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n X1:X2\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n f2\n -\n x\n x\n -\n x\n x\n \n \n f1\n x\n x\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)\n \n\n\n\n\n\n\n \n\n\nYou can also estimate and display multiple regressions with one line of code using the (py)fixest stepwise notation:\n\npf.etable(pf.feols(\"Y+Y2~csw(X1,X2,X1:X2)\", data=data))\n\n\n\n\n\n\n\n \n \n Y\n \n \n Y2\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n X1\n -1.000*** (0.085)\n -0.993*** (0.082)\n -0.992*** (0.082)\n -1.322*** (0.215)\n -1.316*** (0.214)\n -1.316*** (0.215)\n \n \n X2\n \n -0.176*** (0.022)\n -0.197*** (0.036)\n \n -0.133* (0.057)\n -0.132 (0.095)\n \n \n X1:X2\n \n \n 0.020 (0.027)\n \n \n -0.001 (0.071)\n \n \n Intercept\n 0.919*** (0.112)\n 0.889*** (0.108)\n 0.888*** (0.108)\n 1.064*** (0.283)\n 1.042*** (0.283)\n 1.042*** (0.283)\n \n \n stats\n \n \n Observations\n 998\n 998\n 998\n 999\n 999\n 999\n \n \n S.E. type\n iid\n iid\n iid\n iid\n iid\n iid\n \n \n R2\n 0.123\n 0.177\n 0.177\n 0.037\n 0.042\n 0.042\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)"
+ "text": "Basic Usage\nWe can compare all regression models via the pyfixest-internal pf.etable() function:\n\npf.etable([fit1, fit2, fit3, fit4, fit5, fit6])\n\n\n\n\n\n\n\n \n \n Y\n \n \n Y2\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n X1\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n X2\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n X1:X2\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n f1\n x\n x\n x\n x\n x\n x\n \n \n f2\n -\n x\n x\n -\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)\n \n\n\n\n\n\n\n \n\n\nYou can also estimate and display multiple regressions with one line of code using the (py)fixest stepwise notation:\n\npf.etable(pf.feols(\"Y+Y2~csw(X1,X2,X1:X2)\", data=data))\n\n\n\n\n\n\n\n \n \n Y\n \n \n Y2\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n X1\n -1.000*** (0.085)\n -0.993*** (0.082)\n -0.992*** (0.082)\n -1.322*** (0.215)\n -1.316*** (0.214)\n -1.316*** (0.215)\n \n \n X2\n \n -0.176*** (0.022)\n -0.197*** (0.036)\n \n -0.133* (0.057)\n -0.132 (0.095)\n \n \n X1:X2\n \n \n 0.020 (0.027)\n \n \n -0.001 (0.071)\n \n \n Intercept\n 0.919*** (0.112)\n 0.889*** (0.108)\n 0.888*** (0.108)\n 1.064*** (0.283)\n 1.042*** (0.283)\n 1.042*** (0.283)\n \n \n stats\n \n \n Observations\n 998\n 998\n 998\n 999\n 999\n 999\n \n \n S.E. type\n iid\n iid\n iid\n iid\n iid\n iid\n \n \n R2\n 0.123\n 0.177\n 0.177\n 0.037\n 0.042\n 0.042\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)"
},
{
"objectID": "table-layout.html#keep-and-drop-variables",
"href": "table-layout.html#keep-and-drop-variables",
"title": "Regression Tables via pf.etable()",
"section": "Keep and drop variables",
- "text": "Keep and drop variables\netable allows us to do a few things out of the box. For example, we can only keep the variables that we’d like, which keeps all variables that fit the provided regex match.\n\npf.etable([fit1, fit2, fit3, fit4, fit5, fit6], keep=\"X1\")\n\n\n\n\n\n\n\n \n \n Y\n \n \n Y2\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n X1\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n X1:X2\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n f2\n -\n x\n x\n -\n x\n x\n \n \n f1\n x\n x\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)\n \n\n\n\n\n\n\n \n\n\nWe can use the exact_match argument to select a specific set of variables:\n\npf.etable([fit1, fit2, fit3, fit4, fit5, fit6], keep=[\"X1\", \"X2\"], exact_match=True)\n\n\n\n\n\n\n\n \n \n Y\n \n \n Y2\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n X1\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n X2\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n fe\n \n \n f2\n -\n x\n x\n -\n x\n x\n \n \n f1\n x\n x\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)\n \n\n\n\n\n\n\n \n\n\nWe can also easily drop variables via the drop argument:\n\npf.etable([fit1, fit2, fit3, fit4, fit5, fit6], drop=[\"X1\"])\n\n\n\n\n\n\n\n \n \n Y\n \n \n Y2\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n X2\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n fe\n \n \n f2\n -\n x\n x\n -\n x\n x\n \n \n f1\n x\n x\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)"
+ "text": "Keep and drop variables\netable allows us to do a few things out of the box. For example, we can only keep the variables that we’d like, which keeps all variables that fit the provided regex match.\n\npf.etable([fit1, fit2, fit3, fit4, fit5, fit6], keep=\"X1\")\n\n\n\n\n\n\n\n \n \n Y\n \n \n Y2\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n X1\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n X1:X2\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n f1\n x\n x\n x\n x\n x\n x\n \n \n f2\n -\n x\n x\n -\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)\n \n\n\n\n\n\n\n \n\n\nWe can use the exact_match argument to select a specific set of variables:\n\npf.etable([fit1, fit2, fit3, fit4, fit5, fit6], keep=[\"X1\", \"X2\"], exact_match=True)\n\n\n\n\n\n\n\n \n \n Y\n \n \n Y2\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n X1\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n X2\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n fe\n \n \n f1\n x\n x\n x\n x\n x\n x\n \n \n f2\n -\n x\n x\n -\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)\n \n\n\n\n\n\n\n \n\n\nWe can also easily drop variables via the drop argument:\n\npf.etable([fit1, fit2, fit3, fit4, fit5, fit6], drop=[\"X1\"])\n\n\n\n\n\n\n\n \n \n Y\n \n \n Y2\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n X2\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n fe\n \n \n f1\n x\n x\n x\n x\n x\n x\n \n \n f2\n -\n x\n x\n -\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)"
},
{
"objectID": "table-layout.html#hide-fixed-effects-or-se-type-rows",
@@ -1106,49 +1106,49 @@
"href": "table-layout.html#display-p-values-or-confidence-intervals",
"title": "Regression Tables via pf.etable()",
"section": "Display p-values or confidence intervals",
- "text": "Display p-values or confidence intervals\nBy default, pf.etable() reports standard errors. But we can also ask to output p-values or confidence intervals via the coef_fmt function argument.\n\npf.etable([fit1, fit2, fit3, fit4, fit5, fit6], coef_fmt=\"b \\n (se) \\n [p]\")\n\n\n\n\n\n\n\n \n \n Y\n \n \n Y2\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n X1\n -0.950*** (0.067) [0.000]\n -0.924*** (0.061) [0.000]\n -0.924*** (0.061) [0.000]\n -1.267*** (0.174) [0.000]\n -1.232*** (0.192) [0.000]\n -1.231*** (0.192) [0.000]\n \n \n X2\n -0.174*** (0.018) [0.000]\n -0.174*** (0.015) [0.000]\n -0.185*** (0.025) [0.000]\n -0.131** (0.042) [0.005]\n -0.118** (0.042) [0.008]\n -0.074 (0.104) [0.482]\n \n \n X1:X2\n \n \n 0.011 (0.018) [0.565]\n \n \n -0.041 (0.081) [0.618]\n \n \n fe\n \n \n f2\n -\n x\n x\n -\n x\n x\n \n \n f1\n x\n x\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error) \n [p-value]"
+ "text": "Display p-values or confidence intervals\nBy default, pf.etable() reports standard errors. But we can also ask to output p-values or confidence intervals via the coef_fmt function argument.\n\npf.etable([fit1, fit2, fit3, fit4, fit5, fit6], coef_fmt=\"b \\n (se) \\n [p]\")\n\n\n\n\n\n\n\n \n \n Y\n \n \n Y2\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n X1\n -0.950*** (0.067) [0.000]\n -0.924*** (0.061) [0.000]\n -0.924*** (0.061) [0.000]\n -1.267*** (0.174) [0.000]\n -1.232*** (0.192) [0.000]\n -1.231*** (0.192) [0.000]\n \n \n X2\n -0.174*** (0.018) [0.000]\n -0.174*** (0.015) [0.000]\n -0.185*** (0.025) [0.000]\n -0.131** (0.042) [0.005]\n -0.118** (0.042) [0.008]\n -0.074 (0.104) [0.482]\n \n \n X1:X2\n \n \n 0.011 (0.018) [0.565]\n \n \n -0.041 (0.081) [0.618]\n \n \n fe\n \n \n f1\n x\n x\n x\n x\n x\n x\n \n \n f2\n -\n x\n x\n -\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error) \n [p-value]"
},
{
"objectID": "table-layout.html#significance-levels-and-rounding",
"href": "table-layout.html#significance-levels-and-rounding",
"title": "Regression Tables via pf.etable()",
"section": "Significance levels and rounding",
- "text": "Significance levels and rounding\nAdditionally, we can also overwrite the defaults for the reported significance levels and control the rounding of results via the signif_code and digits function arguments:\n\npf.etable([fit1, fit2, fit3, fit4, fit5, fit6], signif_code=[0.01, 0.05, 0.1], digits=5)\n\n\n\n\n\n\n\n \n \n Y\n \n \n Y2\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n X1\n -0.94953*** (0.06652)\n -0.92405*** (0.06093)\n -0.92417*** (0.06094)\n -1.26655*** (0.17359)\n -1.23153*** (0.19228)\n -1.23100*** (0.19167)\n \n \n X2\n -0.17423*** (0.01840)\n -0.17411*** (0.01461)\n -0.18550*** (0.02516)\n -0.13056*** (0.04239)\n -0.11767*** (0.04152)\n -0.07369 (0.10356)\n \n \n X1:X2\n \n \n 0.01057 (0.01818)\n \n \n -0.04082 (0.08093)\n \n \n fe\n \n \n f2\n -\n x\n x\n -\n x\n x\n \n \n f1\n x\n x\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.48899\n 0.65904\n 0.65916\n 0.12017\n 0.17151\n 0.17180\n \n\n \n \n \n Significance levels: * p < 0.1, ** p < 0.05, *** p < 0.01. Format of coefficient cell:\nCoefficient \n (Std. Error)"
+ "text": "Significance levels and rounding\nAdditionally, we can also overwrite the defaults for the reported significance levels and control the rounding of results via the signif_code and digits function arguments:\n\npf.etable([fit1, fit2, fit3, fit4, fit5, fit6], signif_code=[0.01, 0.05, 0.1], digits=5)\n\n\n\n\n\n\n\n \n \n Y\n \n \n Y2\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n X1\n -0.94953*** (0.06652)\n -0.92405*** (0.06093)\n -0.92417*** (0.06094)\n -1.26655*** (0.17359)\n -1.23153*** (0.19228)\n -1.23100*** (0.19167)\n \n \n X2\n -0.17423*** (0.01840)\n -0.17411*** (0.01461)\n -0.18550*** (0.02516)\n -0.13056*** (0.04239)\n -0.11767*** (0.04152)\n -0.07369 (0.10356)\n \n \n X1:X2\n \n \n 0.01057 (0.01818)\n \n \n -0.04082 (0.08093)\n \n \n fe\n \n \n f1\n x\n x\n x\n x\n x\n x\n \n \n f2\n -\n x\n x\n -\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.48899\n 0.65904\n 0.65916\n 0.12017\n 0.17151\n 0.17180\n \n\n \n \n \n Significance levels: * p < 0.1, ** p < 0.05, *** p < 0.01. Format of coefficient cell:\nCoefficient \n (Std. Error)"
},
{
"objectID": "table-layout.html#other-output-formats",
"href": "table-layout.html#other-output-formats",
"title": "Regression Tables via pf.etable()",
"section": "Other output formats",
- "text": "Other output formats\nBy default, pf.etable() returns a GT object (see the Great Tables package), but you can also opt to dataframe, markdown, or latex output via the type argument.\n\n# Pandas styler output:\npf.etable(\n [fit1, fit2, fit3, fit4, fit5, fit6],\n signif_code=[0.01, 0.05, 0.1],\n digits=5,\n coef_fmt=\"b (se)\",\n type=\"df\",\n)\n\n\n\n\n \n \n \n est1\n est2\n est3\n est4\n est5\n est6\n \n \n \n \n depvar\n Y\n Y\n Y\n Y2\n Y2\n Y2\n \n \n X1\n -0.94953*** (0.06652)\n -0.92405*** (0.06093)\n -0.92417*** (0.06094)\n -1.26655*** (0.17359)\n -1.23153*** (0.19228)\n -1.23100*** (0.19167)\n \n \n X2\n -0.17423*** (0.01840)\n -0.17411*** (0.01461)\n -0.18550*** (0.02516)\n -0.13056*** (0.04239)\n -0.11767*** (0.04152)\n -0.07369 (0.10356)\n \n \n X1:X2\n \n \n 0.01057 (0.01818)\n \n \n -0.04082 (0.08093)\n \n \n f2\n -\n x\n x\n -\n x\n x\n \n \n f1\n x\n x\n x\n x\n x\n x\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.48899\n 0.65904\n 0.65916\n 0.12017\n 0.17151\n 0.17180\n \n \n\n\n\n\n\n# Markdown output:\npf.etable(\n [fit1, fit2, fit3, fit4, fit5, fit6],\n signif_code=[0.01, 0.05, 0.1],\n digits=5,\n type=\"md\",\n)\n\nindex est1 est2 est3 est4 est5 est6\n------------ ------------ ------------ ------------ ------------ ------------ ------------\ndepvar Y Y Y Y2 Y2 Y2\n------------------------------------------------------------------------------------------------\nX1 -0.94953*** -0.92405*** -0.92417*** -1.26655*** -1.23153*** -1.23100***\n (0.06652) (0.06093) (0.06094) (0.17359) (0.19228) (0.19167)\nX2 -0.17423*** -0.17411*** -0.18550*** -0.13056*** -0.11767*** -0.07369\n (0.01840) (0.01461) (0.02516) (0.04239) (0.04152) (0.10356)\nX1:X2 0.01057 -0.04082\n (0.01818) (0.08093)\n------------------------------------------------------------------------------------------------\nf2 - x x - x x\nf1 x x x x x x\n------------------------------------------------------------------------------------------------\nObservations 997 997 997 998 998 998\nS.E. type by: f1 by: f1 by: f1 by: f1 by: f1 by: f1\nR2 0.48899 0.65904 0.65916 0.12017 0.17151 0.17180\n------------------------------------------------------------------------------------------------\n\n\n\nTo obtain latex output use format = \"tex\". If you want to save the table as a tex file, you can use the filename= argument to specify the respective path where it should be saved. If you want the latex code to be displayed in the notebook, you can use the print_tex=True argument. Etable will use latex packages booktabs, threeparttable and makecell for the table layout, so don’t forget to include these packages in your latex document.\n\n# LaTex output (include latex packages booktabs, threeparttable, and makecell in your document):\ntab = pf.etable(\n [fit1, fit2, fit3, fit4, fit5, fit6],\n signif_code=[0.01, 0.05, 0.1],\n digits=2,\n type=\"tex\",\n print_tex=True,\n)\n\nThe following code generates a pdf including the regression table which you can display clicking on the link below the cell:\n\n## Use pylatex to create a tex file with the table\n\n\ndef make_pdf(tab, file):\n \"Create a PDF document with tex table.\"\n doc = pl.Document()\n doc.packages.append(pl.Package(\"booktabs\"))\n doc.packages.append(pl.Package(\"threeparttable\"))\n doc.packages.append(pl.Package(\"makecell\"))\n\n with (\n doc.create(pl.Section(\"A PyFixest LateX Table\")),\n doc.create(pl.Table(position=\"htbp\")) as table,\n ):\n table.append(pl.NoEscape(tab))\n\n doc.generate_pdf(file, clean_tex=False)\n\n\n# Compile latex to pdf & display a button with the hyperlink to the pdf\n# requires tex installation\nrun = False\nif run:\n make_pdf(tab, \"latexdocs/SampleTableDoc\")\ndisplay(FileLink(\"latexdocs/SampleTableDoc.pdf\"))\n\nlatexdocs/SampleTableDoc.pdf"
+ "text": "Other output formats\nBy default, pf.etable() returns a GT object (see the Great Tables package), but you can also opt to dataframe, markdown, or latex output via the type argument.\n\n# Pandas styler output:\npf.etable(\n [fit1, fit2, fit3, fit4, fit5, fit6],\n signif_code=[0.01, 0.05, 0.1],\n digits=5,\n coef_fmt=\"b (se)\",\n type=\"df\",\n)\n\n\n\n\n \n \n \n est1\n est2\n est3\n est4\n est5\n est6\n \n \n \n \n depvar\n Y\n Y\n Y\n Y2\n Y2\n Y2\n \n \n X1\n -0.94953*** (0.06652)\n -0.92405*** (0.06093)\n -0.92417*** (0.06094)\n -1.26655*** (0.17359)\n -1.23153*** (0.19228)\n -1.23100*** (0.19167)\n \n \n X2\n -0.17423*** (0.01840)\n -0.17411*** (0.01461)\n -0.18550*** (0.02516)\n -0.13056*** (0.04239)\n -0.11767*** (0.04152)\n -0.07369 (0.10356)\n \n \n X1:X2\n \n \n 0.01057 (0.01818)\n \n \n -0.04082 (0.08093)\n \n \n f1\n x\n x\n x\n x\n x\n x\n \n \n f2\n -\n x\n x\n -\n x\n x\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.48899\n 0.65904\n 0.65916\n 0.12017\n 0.17151\n 0.17180\n \n \n\n\n\n\n\n# Markdown output:\npf.etable(\n [fit1, fit2, fit3, fit4, fit5, fit6],\n signif_code=[0.01, 0.05, 0.1],\n digits=5,\n type=\"md\",\n)\n\nindex est1 est2 est3 est4 est5 est6\n------------ ------------ ------------ ------------ ------------ ------------ ------------\ndepvar Y Y Y Y2 Y2 Y2\n------------------------------------------------------------------------------------------------\nX1 -0.94953*** -0.92405*** -0.92417*** -1.26655*** -1.23153*** -1.23100***\n (0.06652) (0.06093) (0.06094) (0.17359) (0.19228) (0.19167)\nX2 -0.17423*** -0.17411*** -0.18550*** -0.13056*** -0.11767*** -0.07369\n (0.01840) (0.01461) (0.02516) (0.04239) (0.04152) (0.10356)\nX1:X2 0.01057 -0.04082\n (0.01818) (0.08093)\n------------------------------------------------------------------------------------------------\nf1 x x x x x x\nf2 - x x - x x\n------------------------------------------------------------------------------------------------\nObservations 997 997 997 998 998 998\nS.E. type by: f1 by: f1 by: f1 by: f1 by: f1 by: f1\nR2 0.48899 0.65904 0.65916 0.12017 0.17151 0.17180\n------------------------------------------------------------------------------------------------\n\n\n\nTo obtain latex output use format = \"tex\". If you want to save the table as a tex file, you can use the filename= argument to specify the respective path where it should be saved. If you want the latex code to be displayed in the notebook, you can use the print_tex=True argument. Etable will use latex packages booktabs, threeparttable and makecell for the table layout, so don’t forget to include these packages in your latex document.\n\n# LaTex output (include latex packages booktabs, threeparttable, and makecell in your document):\ntab = pf.etable(\n [fit1, fit2, fit3, fit4, fit5, fit6],\n signif_code=[0.01, 0.05, 0.1],\n digits=2,\n type=\"tex\",\n print_tex=True,\n)\n\nThe following code generates a pdf including the regression table which you can display clicking on the link below the cell:\n\n## Use pylatex to create a tex file with the table\n\n\ndef make_pdf(tab, file):\n \"Create a PDF document with tex table.\"\n doc = pl.Document()\n doc.packages.append(pl.Package(\"booktabs\"))\n doc.packages.append(pl.Package(\"threeparttable\"))\n doc.packages.append(pl.Package(\"makecell\"))\n\n with (\n doc.create(pl.Section(\"A PyFixest LateX Table\")),\n doc.create(pl.Table(position=\"htbp\")) as table,\n ):\n table.append(pl.NoEscape(tab))\n\n doc.generate_pdf(file, clean_tex=False)\n\n\n# Compile latex to pdf & display a button with the hyperlink to the pdf\n# requires tex installation\nrun = False\nif run:\n make_pdf(tab, \"latexdocs/SampleTableDoc\")\ndisplay(FileLink(\"latexdocs/SampleTableDoc.pdf\"))\n\nlatexdocs/SampleTableDoc.pdf"
},
{
"objectID": "table-layout.html#rename-variables",
"href": "table-layout.html#rename-variables",
"title": "Regression Tables via pf.etable()",
"section": "Rename variables",
- "text": "Rename variables\nYou can also rename variables if you want to have a more readable output. Just pass a dictionary to the labels argument. Note that interaction terms will also be relabeled using the specified labels for the interacted variables (if you want to manually relabel an interaction term differently, add it to the dictionary).\n\nlabels = {\n \"Y\": \"Wage\",\n \"Y2\": \"Wealth\",\n \"X1\": \"Age\",\n \"X2\": \"Years of Schooling\",\n \"f1\": \"Industry\",\n \"f2\": \"Year\",\n}\n\npf.etable([fit1, fit2, fit3, fit4, fit5, fit6], labels=labels)\n\n\n\n\n\n\n\n \n \n Wage\n \n \n Wealth\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n Year\n -\n x\n x\n -\n x\n x\n \n \n Industry\n x\n x\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)\n \n\n\n\n\n\n\n \n\n\nIf you want to label the rows indicating the inclusion of fixed effects not with the variable label but with a custom label, you can pass on a separate dictionary to the felabels argument.\n\npf.etable(\n [fit1, fit2, fit3, fit4, fit5, fit6],\n labels=labels,\n felabels={\"f1\": \"Industry Fixed Effects\", \"f2\": \"Year Fixed Effects\"},\n)\n\n\n\n\n\n\n\n \n \n Wage\n \n \n Wealth\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n Year Fixed Effects\n -\n x\n x\n -\n x\n x\n \n \n Industry Fixed Effects\n x\n x\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)"
+ "text": "Rename variables\nYou can also rename variables if you want to have a more readable output. Just pass a dictionary to the labels argument. Note that interaction terms will also be relabeled using the specified labels for the interacted variables (if you want to manually relabel an interaction term differently, add it to the dictionary).\n\nlabels = {\n \"Y\": \"Wage\",\n \"Y2\": \"Wealth\",\n \"X1\": \"Age\",\n \"X2\": \"Years of Schooling\",\n \"f1\": \"Industry\",\n \"f2\": \"Year\",\n}\n\npf.etable([fit1, fit2, fit3, fit4, fit5, fit6], labels=labels)\n\n\n\n\n\n\n\n \n \n Wage\n \n \n Wealth\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n Industry\n x\n x\n x\n x\n x\n x\n \n \n Year\n -\n x\n x\n -\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)\n \n\n\n\n\n\n\n \n\n\nIf you want to label the rows indicating the inclusion of fixed effects not with the variable label but with a custom label, you can pass on a separate dictionary to the felabels argument.\n\npf.etable(\n [fit1, fit2, fit3, fit4, fit5, fit6],\n labels=labels,\n felabels={\"f1\": \"Industry Fixed Effects\", \"f2\": \"Year Fixed Effects\"},\n)\n\n\n\n\n\n\n\n \n \n Wage\n \n \n Wealth\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n Industry Fixed Effects\n x\n x\n x\n x\n x\n x\n \n \n Year Fixed Effects\n -\n x\n x\n -\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)"
},
{
"objectID": "table-layout.html#custom-model-headlines",
"href": "table-layout.html#custom-model-headlines",
"title": "Regression Tables via pf.etable()",
"section": "Custom model headlines",
- "text": "Custom model headlines\nYou can also add custom headers for each model by passing a list of strings to the model_headers argument.\n\npf.etable(\n [fit1, fit2, fit3, fit4, fit5, fit6],\n labels=labels,\n model_heads=[\"US\", \"China\", \"EU\", \"US\", \"China\", \"EU\"],\n)\n\n\n\n\n\n\n\n \n \n \n \n Wage\n \n \n Wealth\n \n\n\n \n \n US\n \n \n China\n \n \n EU\n \n \n US\n \n \n China\n \n \n EU\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n Year\n -\n x\n x\n -\n x\n x\n \n \n Industry\n x\n x\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)\n \n\n\n\n\n\n\n \n\n\nOr change the ordering of headlines having headlines first and then dependent variables using the head_order argument. “hd” stands for headlines then dependent variables, “dh” for dependent variables then headlines. Assigning “d” or “h” can be used to only show dependent variables or only headlines. When head_order=“” only model numbers are shown.\n\npf.etable(\n [fit1, fit4, fit2, fit5, fit3, fit6],\n labels=labels,\n model_heads=[\"US\", \"US\", \"China\", \"China\", \"EU\", \"EU\"],\n head_order=\"hd\",\n)\n\n\n\n\n\n\n\n \n \n \n \n US\n \n \n China\n \n \n EU\n \n\n\n \n \n Wage\n \n \n Wealth\n \n \n Wage\n \n \n Wealth\n \n \n Wage\n \n \n Wealth\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -1.267*** (0.174)\n -0.924*** (0.061)\n -1.232*** (0.192)\n -0.924*** (0.061)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.131** (0.042)\n -0.174*** (0.015)\n -0.118** (0.042)\n -0.185*** (0.025)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n \n \n 0.011 (0.018)\n -0.041 (0.081)\n \n \n fe\n \n \n Year\n -\n -\n x\n x\n x\n x\n \n \n Industry\n x\n x\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 998\n 997\n 998\n 997\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.120\n 0.659\n 0.172\n 0.659\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)\n \n\n\n\n\n\n\n \n\n\nRemove the dependent variables from the headers:\n\npf.etable(\n [fit1, fit4, fit2, fit5, fit3, fit6],\n labels=labels,\n model_heads=[\"US\", \"US\", \"China\", \"China\", \"EU\", \"EU\"],\n head_order=\"\",\n)\n\n\n\n\n\n\n\n \n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -1.267*** (0.174)\n -0.924*** (0.061)\n -1.232*** (0.192)\n -0.924*** (0.061)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.131** (0.042)\n -0.174*** (0.015)\n -0.118** (0.042)\n -0.185*** (0.025)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n \n \n 0.011 (0.018)\n -0.041 (0.081)\n \n \n fe\n \n \n Year\n -\n -\n x\n x\n x\n x\n \n \n Industry\n x\n x\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 998\n 997\n 998\n 997\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.120\n 0.659\n 0.172\n 0.659\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)"
+ "text": "Custom model headlines\nYou can also add custom headers for each model by passing a list of strings to the model_headers argument.\n\npf.etable(\n [fit1, fit2, fit3, fit4, fit5, fit6],\n labels=labels,\n model_heads=[\"US\", \"China\", \"EU\", \"US\", \"China\", \"EU\"],\n)\n\n\n\n\n\n\n\n \n \n \n \n Wage\n \n \n Wealth\n \n\n\n \n \n US\n \n \n China\n \n \n EU\n \n \n US\n \n \n China\n \n \n EU\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n Industry\n x\n x\n x\n x\n x\n x\n \n \n Year\n -\n x\n x\n -\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)\n \n\n\n\n\n\n\n \n\n\nOr change the ordering of headlines having headlines first and then dependent variables using the head_order argument. “hd” stands for headlines then dependent variables, “dh” for dependent variables then headlines. Assigning “d” or “h” can be used to only show dependent variables or only headlines. When head_order=“” only model numbers are shown.\n\npf.etable(\n [fit1, fit4, fit2, fit5, fit3, fit6],\n labels=labels,\n model_heads=[\"US\", \"US\", \"China\", \"China\", \"EU\", \"EU\"],\n head_order=\"hd\",\n)\n\n\n\n\n\n\n\n \n \n \n \n US\n \n \n China\n \n \n EU\n \n\n\n \n \n Wage\n \n \n Wealth\n \n \n Wage\n \n \n Wealth\n \n \n Wage\n \n \n Wealth\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -1.267*** (0.174)\n -0.924*** (0.061)\n -1.232*** (0.192)\n -0.924*** (0.061)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.131** (0.042)\n -0.174*** (0.015)\n -0.118** (0.042)\n -0.185*** (0.025)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n \n \n 0.011 (0.018)\n -0.041 (0.081)\n \n \n fe\n \n \n Industry\n x\n x\n x\n x\n x\n x\n \n \n Year\n -\n -\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 998\n 997\n 998\n 997\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.120\n 0.659\n 0.172\n 0.659\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)\n \n\n\n\n\n\n\n \n\n\nRemove the dependent variables from the headers:\n\npf.etable(\n [fit1, fit4, fit2, fit5, fit3, fit6],\n labels=labels,\n model_heads=[\"US\", \"US\", \"China\", \"China\", \"EU\", \"EU\"],\n head_order=\"\",\n)\n\n\n\n\n\n\n\n \n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -1.267*** (0.174)\n -0.924*** (0.061)\n -1.232*** (0.192)\n -0.924*** (0.061)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.131** (0.042)\n -0.174*** (0.015)\n -0.118** (0.042)\n -0.185*** (0.025)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n \n \n 0.011 (0.018)\n -0.041 (0.081)\n \n \n fe\n \n \n Industry\n x\n x\n x\n x\n x\n x\n \n \n Year\n -\n -\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 998\n 997\n 998\n 997\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.120\n 0.659\n 0.172\n 0.659\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)"
},
{
"objectID": "table-layout.html#further-custom-model-information",
"href": "table-layout.html#further-custom-model-information",
"title": "Regression Tables via pf.etable()",
"section": "Further custom model information",
- "text": "Further custom model information\nYou can add further custom model statistics/information to the bottom of the table by using the custom_stats argument to which you pass a dictionary with the name of the row and lists of values. The length of the lists must be equal to the number of models.\n\npf.etable(\n [fit1, fit2, fit3, fit4, fit5, fit6],\n labels=labels,\n custom_model_stats={\n \"Number of Clusters\": [42, 42, 42, 37, 37, 37],\n \"Additional Info\": [\"A\", \"A\", \"B\", \"B\", \"C\", \"C\"],\n },\n)\n\n\n\n\n\n\n\n \n \n Wage\n \n \n Wealth\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n Year\n -\n x\n x\n -\n x\n x\n \n \n Industry\n x\n x\n x\n x\n x\n x\n \n \n stats\n \n \n Number of Clusters\n 42\n 42\n 42\n 37\n 37\n 37\n \n \n Additional Info\n A\n A\n B\n B\n C\n C\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)"
+ "text": "Further custom model information\nYou can add further custom model statistics/information to the bottom of the table by using the custom_stats argument to which you pass a dictionary with the name of the row and lists of values. The length of the lists must be equal to the number of models.\n\npf.etable(\n [fit1, fit2, fit3, fit4, fit5, fit6],\n labels=labels,\n custom_model_stats={\n \"Number of Clusters\": [42, 42, 42, 37, 37, 37],\n \"Additional Info\": [\"A\", \"A\", \"B\", \"B\", \"C\", \"C\"],\n },\n)\n\n\n\n\n\n\n\n \n \n Wage\n \n \n Wealth\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n Industry\n x\n x\n x\n x\n x\n x\n \n \n Year\n -\n x\n x\n -\n x\n x\n \n \n stats\n \n \n Number of Clusters\n 42\n 42\n 42\n 37\n 37\n 37\n \n \n Additional Info\n A\n A\n B\n B\n C\n C\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)"
},
{
"objectID": "table-layout.html#custom-table-notes",
"href": "table-layout.html#custom-table-notes",
"title": "Regression Tables via pf.etable()",
"section": "Custom table notes",
- "text": "Custom table notes\nYou can replace the default table notes with your own notes using the notes argument.\n\nmynotes = \"Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet.\"\npf.etable(\n [fit1, fit4, fit2, fit5, fit3, fit6],\n labels=labels,\n model_heads=[\"US\", \"US\", \"China\", \"China\", \"EU\", \"EU\"],\n head_order=\"hd\",\n notes=mynotes,\n)\n\n\n\n\n\n\n\n \n \n \n \n US\n \n \n China\n \n \n EU\n \n\n\n \n \n Wage\n \n \n Wealth\n \n \n Wage\n \n \n Wealth\n \n \n Wage\n \n \n Wealth\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -1.267*** (0.174)\n -0.924*** (0.061)\n -1.232*** (0.192)\n -0.924*** (0.061)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.131** (0.042)\n -0.174*** (0.015)\n -0.118** (0.042)\n -0.185*** (0.025)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n \n \n 0.011 (0.018)\n -0.041 (0.081)\n \n \n fe\n \n \n Year\n -\n -\n x\n x\n x\n x\n \n \n Industry\n x\n x\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 998\n 997\n 998\n 997\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.120\n 0.659\n 0.172\n 0.659\n 0.172\n \n\n \n \n \n Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet."
+ "text": "Custom table notes\nYou can replace the default table notes with your own notes using the notes argument.\n\nmynotes = \"Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet.\"\npf.etable(\n [fit1, fit4, fit2, fit5, fit3, fit6],\n labels=labels,\n model_heads=[\"US\", \"US\", \"China\", \"China\", \"EU\", \"EU\"],\n head_order=\"hd\",\n notes=mynotes,\n)\n\n\n\n\n\n\n\n \n \n \n \n US\n \n \n China\n \n \n EU\n \n\n\n \n \n Wage\n \n \n Wealth\n \n \n Wage\n \n \n Wealth\n \n \n Wage\n \n \n Wealth\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -1.267*** (0.174)\n -0.924*** (0.061)\n -1.232*** (0.192)\n -0.924*** (0.061)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.131** (0.042)\n -0.174*** (0.015)\n -0.118** (0.042)\n -0.185*** (0.025)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n \n \n 0.011 (0.018)\n -0.041 (0.081)\n \n \n fe\n \n \n Industry\n x\n x\n x\n x\n x\n x\n \n \n Year\n -\n -\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 998\n 997\n 998\n 997\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.120\n 0.659\n 0.172\n 0.659\n 0.172\n \n\n \n \n \n Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet."
},
{
"objectID": "table-layout.html#publication-ready-latex-tables",
@@ -1169,35 +1169,35 @@
"href": "table-layout.html#summarize-by-characteristics-in-columns-and-rows",
"title": "Regression Tables via pf.etable()",
"section": "Summarize by characteristics in columns and rows",
- "text": "Summarize by characteristics in columns and rows\nYou can summarize by characteristics using the bycol argument when groups are to be displayed in columns. When the number of observations is the same for all variables in a group, you can also opt to display the number of observations only once for each group byin a separate line at the bottom of the table with counts_row_below==True.\n\n# Generate some categorial data\ndata[\"country\"] = np.random.choice([\"US\", \"EU\"], data.shape[0])\ndata[\"occupation\"] = np.random.choice([\"Blue collar\", \"White collar\"], data.shape[0])\n\n# Drop nan values to have balanced data\ndata.dropna(inplace=True)\n\npf.dtable(\n data,\n vars=[\"Y\", \"Y2\", \"X1\", \"X2\"],\n labels=labels,\n bycol=[\"country\", \"occupation\"],\n stats=[\"count\", \"mean\", \"std\"],\n caption=\"Descriptive statistics\",\n stats_labels={\"count\": \"Number of observations\"},\n counts_row_below=True,\n)\n\n\n\n\n\n\n \n Descriptive statistics\n \n\n \n \n \n \n EU\n \n \n US\n \n\n\n \n \n Blue collar\n \n \n White collar\n \n \n Blue collar\n \n \n White collar\n \n\n\n Mean\n Std. Dev.\n Mean\n Std. Dev.\n Mean\n Std. Dev.\n Mean\n Std. Dev.\n\n\n\n \n stats\n \n \n Wage\n 0.12\n 2.38\n -0.28\n 2.40\n -0.24\n 2.24\n -0.13\n 2.19\n \n \n Wealth\n -0.33\n 5.61\n -0.25\n 5.36\n -0.27\n 5.69\n -0.41\n 5.71\n \n \n Age\n 1.02\n 0.79\n 1.08\n 0.80\n 1.00\n 0.81\n 1.07\n 0.83\n \n \n Years of Schooling\n -0.25\n 2.93\n -0.18\n 3.31\n -0.04\n 3.08\n -0.03\n 2.89\n \n \n nobs\n \n \n Number of observations\n 264\n \n 233\n \n 244\n \n 256\n \n \n\n \n \n \n \n \n\n\n\n\n\n\n \n\n\nYou can also use custom aggregation functions to compute further statistics or affect how statistics are presented. Pyfixest provides two such functions mean_std and mean_newline_std which compute the mean and standard deviation and display both the same cell (either with line break between them or not). This allows to have more compact tables when you want to show statistics for many characteristcs in the columns.\nYou can also hide the display of the statistics labels in the header with hide_stats_labels=True. In that case a table note will be added naming the statistics displayed using its label (if you have not provided a custom note).\n\npf.dtable(\n data,\n vars=[\"Y\", \"Y2\", \"X1\", \"X2\"],\n labels=labels,\n bycol=[\"country\", \"occupation\"],\n stats=[\"mean_newline_std\", \"count\"],\n caption=\"Descriptive statistics\",\n stats_labels={\"count\": \"Number of observations\"},\n counts_row_below=True,\n hide_stats=True,\n)\n\n\n\n\n\n\n \n Descriptive statistics\n \n\n \n \n EU\n \n \n US\n \n\n\n Blue collar\n White collar\n Blue collar\n White collar\n\n\n\n \n stats\n \n \n Wage\n 0.12(2.38)\n -0.28(2.40)\n -0.24(2.24)\n -0.13(2.19)\n \n \n Wealth\n -0.33(5.61)\n -0.25(5.36)\n -0.27(5.69)\n -0.41(5.71)\n \n \n Age\n 1.02(0.79)\n 1.08(0.80)\n 1.00(0.81)\n 1.07(0.83)\n \n \n Years of Schooling\n -0.25(2.93)\n -0.18(3.31)\n -0.04(3.08)\n -0.03(2.89)\n \n \n nobs\n \n \n Number of observations\n 264\n 233\n 244\n 256\n \n\n \n \n \n Note: Displayed statistics are Mean (Std. Dev.).\n \n\n\n\n\n\n\n \n\n\nYou can also split by characteristics in both columns and rows. Note that you can only use one grouping variable in rows, but several in columns (as shown above).\n\npf.dtable(\n data,\n vars=[\"Y\", \"Y2\", \"X1\", \"X2\"],\n labels=labels,\n bycol=[\"country\"],\n byrow=\"occupation\",\n stats=[\"count\", \"mean\", \"std\"],\n caption=\"Descriptive statistics\",\n)\n\n\n\n\n\n\n \n Descriptive statistics\n \n\n \n \n EU\n \n \n US\n \n\n\n N\n Mean\n Std. Dev.\n N\n Mean\n Std. Dev.\n\n\n\n \n Blue collar\n \n \n Wage\n 264\n 0.12\n 2.38\n 244\n -0.24\n 2.24\n \n \n Wealth\n 264\n -0.33\n 5.61\n 244\n -0.27\n 5.69\n \n \n Age\n 264\n 1.02\n 0.79\n 244\n 1.00\n 0.81\n \n \n Years of Schooling\n 264\n -0.25\n 2.93\n 244\n -0.04\n 3.08\n \n \n White collar\n \n \n Wage\n 233\n -0.28\n 2.40\n 256\n -0.13\n 2.19\n \n \n Wealth\n 233\n -0.25\n 5.36\n 256\n -0.41\n 5.71\n \n \n Age\n 233\n 1.08\n 0.80\n 256\n 1.07\n 0.83\n \n \n Years of Schooling\n 233\n -0.18\n 3.31\n 256\n -0.03\n 2.89\n \n\n \n \n \n \n \n\n\n\n\n\n\n \n\n\nAnd you can again export descriptive statistics tables also to LaTex:\n\ndtab = pf.dtable(\n data,\n vars=[\"Y\", \"Y2\", \"X1\", \"X2\"],\n labels=labels,\n bycol=[\"country\"],\n byrow=\"occupation\",\n stats=[\"count\", \"mean\", \"std\"],\n type=\"tex\",\n)\n\nrun = False\nif run:\n make_pdf(dtab, \"latexdocs/SampleTableDoc3\")\ndisplay(FileLink(\"latexdocs/SampleTableDoc3.pdf\"))\n\nlatexdocs/SampleTableDoc3.pdf"
+ "text": "Summarize by characteristics in columns and rows\nYou can summarize by characteristics using the bycol argument when groups are to be displayed in columns. When the number of observations is the same for all variables in a group, you can also opt to display the number of observations only once for each group byin a separate line at the bottom of the table with counts_row_below==True.\n\n# Generate some categorial data\ndata[\"country\"] = np.random.choice([\"US\", \"EU\"], data.shape[0])\ndata[\"occupation\"] = np.random.choice([\"Blue collar\", \"White collar\"], data.shape[0])\n\n# Drop nan values to have balanced data\ndata.dropna(inplace=True)\n\npf.dtable(\n data,\n vars=[\"Y\", \"Y2\", \"X1\", \"X2\"],\n labels=labels,\n bycol=[\"country\", \"occupation\"],\n stats=[\"count\", \"mean\", \"std\"],\n caption=\"Descriptive statistics\",\n stats_labels={\"count\": \"Number of observations\"},\n counts_row_below=True,\n)\n\n\n\n\n\n\n \n Descriptive statistics\n \n\n \n \n \n \n EU\n \n \n US\n \n\n\n \n \n Blue collar\n \n \n White collar\n \n \n Blue collar\n \n \n White collar\n \n\n\n Mean\n Std. Dev.\n Mean\n Std. Dev.\n Mean\n Std. Dev.\n Mean\n Std. Dev.\n\n\n\n \n stats\n \n \n Wage\n -0.12\n 2.30\n -0.13\n 2.32\n -0.09\n 2.32\n -0.17\n 2.30\n \n \n Wealth\n -0.09\n 5.66\n -0.50\n 5.48\n -0.47\n 5.70\n -0.22\n 5.53\n \n \n Age\n 1.07\n 0.81\n 0.98\n 0.79\n 1.04\n 0.79\n 1.08\n 0.83\n \n \n Years of Schooling\n 0.05\n 3.16\n -0.24\n 3.05\n -0.12\n 2.78\n -0.19\n 3.18\n \n \n nobs\n \n \n Number of observations\n 246\n \n 245\n \n 244\n \n 262\n \n \n\n \n \n \n \n \n\n\n\n\n\n\n \n\n\nYou can also use custom aggregation functions to compute further statistics or affect how statistics are presented. Pyfixest provides two such functions mean_std and mean_newline_std which compute the mean and standard deviation and display both the same cell (either with line break between them or not). This allows to have more compact tables when you want to show statistics for many characteristcs in the columns.\nYou can also hide the display of the statistics labels in the header with hide_stats_labels=True. In that case a table note will be added naming the statistics displayed using its label (if you have not provided a custom note).\n\npf.dtable(\n data,\n vars=[\"Y\", \"Y2\", \"X1\", \"X2\"],\n labels=labels,\n bycol=[\"country\", \"occupation\"],\n stats=[\"mean_newline_std\", \"count\"],\n caption=\"Descriptive statistics\",\n stats_labels={\"count\": \"Number of observations\"},\n counts_row_below=True,\n hide_stats=True,\n)\n\n\n\n\n\n\n \n Descriptive statistics\n \n\n \n \n EU\n \n \n US\n \n\n\n Blue collar\n White collar\n Blue collar\n White collar\n\n\n\n \n stats\n \n \n Wage\n -0.12(2.30)\n -0.13(2.32)\n -0.09(2.32)\n -0.17(2.30)\n \n \n Wealth\n -0.09(5.66)\n -0.50(5.48)\n -0.47(5.70)\n -0.22(5.53)\n \n \n Age\n 1.07(0.81)\n 0.98(0.79)\n 1.04(0.79)\n 1.08(0.83)\n \n \n Years of Schooling\n 0.05(3.16)\n -0.24(3.05)\n -0.12(2.78)\n -0.19(3.18)\n \n \n nobs\n \n \n Number of observations\n 246\n 245\n 244\n 262\n \n\n \n \n \n Note: Displayed statistics are Mean (Std. Dev.).\n \n\n\n\n\n\n\n \n\n\nYou can also split by characteristics in both columns and rows. Note that you can only use one grouping variable in rows, but several in columns (as shown above).\n\npf.dtable(\n data,\n vars=[\"Y\", \"Y2\", \"X1\", \"X2\"],\n labels=labels,\n bycol=[\"country\"],\n byrow=\"occupation\",\n stats=[\"count\", \"mean\", \"std\"],\n caption=\"Descriptive statistics\",\n)\n\n\n\n\n\n\n \n Descriptive statistics\n \n\n \n \n EU\n \n \n US\n \n\n\n N\n Mean\n Std. Dev.\n N\n Mean\n Std. Dev.\n\n\n\n \n Blue collar\n \n \n Wage\n 246\n -0.12\n 2.30\n 244\n -0.09\n 2.32\n \n \n Wealth\n 246\n -0.09\n 5.66\n 244\n -0.47\n 5.70\n \n \n Age\n 246\n 1.07\n 0.81\n 244\n 1.04\n 0.79\n \n \n Years of Schooling\n 246\n 0.05\n 3.16\n 244\n -0.12\n 2.78\n \n \n White collar\n \n \n Wage\n 245\n -0.13\n 2.32\n 262\n -0.17\n 2.30\n \n \n Wealth\n 245\n -0.50\n 5.48\n 262\n -0.22\n 5.53\n \n \n Age\n 245\n 0.98\n 0.79\n 262\n 1.08\n 0.83\n \n \n Years of Schooling\n 245\n -0.24\n 3.05\n 262\n -0.19\n 3.18\n \n\n \n \n \n \n \n\n\n\n\n\n\n \n\n\nAnd you can again export descriptive statistics tables also to LaTex:\n\ndtab = pf.dtable(\n data,\n vars=[\"Y\", \"Y2\", \"X1\", \"X2\"],\n labels=labels,\n bycol=[\"country\"],\n byrow=\"occupation\",\n stats=[\"count\", \"mean\", \"std\"],\n type=\"tex\",\n)\n\nrun = False\nif run:\n make_pdf(dtab, \"latexdocs/SampleTableDoc3\")\ndisplay(FileLink(\"latexdocs/SampleTableDoc3.pdf\"))\n\nlatexdocs/SampleTableDoc3.pdf"
},
{
"objectID": "table-layout.html#basic-usage-of-make_table",
"href": "table-layout.html#basic-usage-of-make_table",
"title": "Regression Tables via pf.etable()",
"section": "Basic Usage of make_table",
- "text": "Basic Usage of make_table\n\ndf = pd.DataFrame(np.random.randn(4, 4).round(2), columns=[\"A\", \"B\", \"C\", \"D\"])\n\n# Make Booktabs style table\npf.make_table(df=df, caption=\"This is a caption\", notes=\"These are notes\")\n\n\n\n\n\n\n \n This is a caption\n \n\n \n A\n B\n C\n D\n\n\n\n \n 0\n 1.26\n -0.82\n -1.28\n 0.29\n \n \n 1\n -0.42\n 0.24\n 0.32\n -0.58\n \n \n 2\n 0.19\n 0.72\n -1.27\n -0.07\n \n \n 3\n 0.5\n -1.17\n -0.42\n -0.74\n \n\n \n \n \n These are notes"
+ "text": "Basic Usage of make_table\n\ndf = pd.DataFrame(np.random.randn(4, 4).round(2), columns=[\"A\", \"B\", \"C\", \"D\"])\n\n# Make Booktabs style table\npf.make_table(df=df, caption=\"This is a caption\", notes=\"These are notes\")\n\n\n\n\n\n\n \n This is a caption\n \n\n \n A\n B\n C\n D\n\n\n\n \n 0\n 0.65\n 0.25\n -0.62\n -1.26\n \n \n 1\n -0.2\n -1.18\n -2.6\n 0.2\n \n \n 2\n -0.61\n 0.11\n 1.86\n 0.82\n \n \n 3\n -0.9\n 0.39\n 0.67\n -1.32\n \n\n \n \n \n These are notes"
},
{
"objectID": "table-layout.html#mutiindex-dataframes",
"href": "table-layout.html#mutiindex-dataframes",
"title": "Regression Tables via pf.etable()",
"section": "Mutiindex DataFrames",
- "text": "Mutiindex DataFrames\nWhen the respective dataframe has a mutiindex for the columns, columns spanners are generated from the index. The row index can also be a multiindex (of at most two levels). In this case the first index level is used to generate group rows (for instance using the index name as headline and separating the groups by a horizontal line) and the second index level is used to generate the row labels.\n\n# Create a multiindex dataframe with random data\nrow_index = pd.MultiIndex.from_tuples(\n [\n (\"Group 1\", \"Variable 1\"),\n (\"Group 1\", \"Variable 2\"),\n (\"Group 1\", \"Variable 3\"),\n (\"Group 2\", \"Variable 4\"),\n (\"Group 2\", \"Variable 5\"),\n (\"Group 3\", \"Variable 6\"),\n ]\n)\n\ncol_index = pd.MultiIndex.from_product([[\"A\", \"B\"], [\"X\", \"Y\"], [\"High\", \"Low\"]])\ndf = pd.DataFrame(np.random.randn(6, 8).round(3), index=row_index, columns=col_index)\n\npf.make_table(df=df, caption=\"This is a caption\", notes=\"These are notes\")\n\n\n\n\n\n\n \n This is a caption\n \n\n \n \n \n \n A\n \n \n B\n \n\n\n \n \n X\n \n \n Y\n \n \n X\n \n \n Y\n \n\n\n High\n Low\n High\n Low\n High\n Low\n High\n Low\n\n\n\n \n Group 1\n \n \n Variable 1\n -0.352\n 0.87\n 1.692\n -0.914\n 0.159\n -0.826\n 0.094\n -0.717\n \n \n Variable 2\n -0.119\n 0.226\n -1.739\n -1.611\n -1.237\n -1.428\n 0.401\n 1.572\n \n \n Variable 3\n 0.931\n -1.441\n 1.2\n -0.273\n -0.845\n 0.24\n 0.73\n 0.896\n \n \n Group 2\n \n \n Variable 4\n 0.819\n 0.163\n 2.044\n -1.354\n -0.024\n 1.31\n 0.662\n 0.082\n \n \n Variable 5\n -1.72\n -0.324\n 0.904\n -0.307\n 0.813\n -0.262\n 0.087\n -0.837\n \n \n Group 3\n \n \n Variable 6\n 0.088\n -0.125\n -1.415\n 0.153\n -0.857\n -0.816\n -0.832\n -0.516\n \n\n \n \n \n These are notes\n \n\n\n\n\n\n\n \n\n\nYou can also hide column group names: This just creates a table where variables on the second level of the row index are displayed in groups based on the first level separated by horizontal lines.\n\npf.make_table(\n df=df, caption=\"This is a caption\", notes=\"These are notes\", rgroup_display=False\n).tab_style(style=style.text(style=\"italic\"), locations=loc.body(rows=[1, 5]))\n\n\n\n\n\n\n \n This is a caption\n \n\n \n \n \n \n A\n \n \n B\n \n\n\n \n \n X\n \n \n Y\n \n \n X\n \n \n Y\n \n\n\n High\n Low\n High\n Low\n High\n Low\n High\n Low\n\n\n\n \n Group 1\n \n \n Variable 1\n -0.352\n 0.87\n 1.692\n -0.914\n 0.159\n -0.826\n 0.094\n -0.717\n \n \n Variable 2\n -0.119\n 0.226\n -1.739\n -1.611\n -1.237\n -1.428\n 0.401\n 1.572\n \n \n Variable 3\n 0.931\n -1.441\n 1.2\n -0.273\n -0.845\n 0.24\n 0.73\n 0.896\n \n \n Group 2\n \n \n Variable 4\n 0.819\n 0.163\n 2.044\n -1.354\n -0.024\n 1.31\n 0.662\n 0.082\n \n \n Variable 5\n -1.72\n -0.324\n 0.904\n -0.307\n 0.813\n -0.262\n 0.087\n -0.837\n \n \n Group 3\n \n \n Variable 6\n 0.088\n -0.125\n -1.415\n 0.153\n -0.857\n -0.816\n -0.832\n -0.516\n \n\n \n \n \n These are notes"
+ "text": "Mutiindex DataFrames\nWhen the respective dataframe has a mutiindex for the columns, columns spanners are generated from the index. The row index can also be a multiindex (of at most two levels). In this case the first index level is used to generate group rows (for instance using the index name as headline and separating the groups by a horizontal line) and the second index level is used to generate the row labels.\n\n# Create a multiindex dataframe with random data\nrow_index = pd.MultiIndex.from_tuples(\n [\n (\"Group 1\", \"Variable 1\"),\n (\"Group 1\", \"Variable 2\"),\n (\"Group 1\", \"Variable 3\"),\n (\"Group 2\", \"Variable 4\"),\n (\"Group 2\", \"Variable 5\"),\n (\"Group 3\", \"Variable 6\"),\n ]\n)\n\ncol_index = pd.MultiIndex.from_product([[\"A\", \"B\"], [\"X\", \"Y\"], [\"High\", \"Low\"]])\ndf = pd.DataFrame(np.random.randn(6, 8).round(3), index=row_index, columns=col_index)\n\npf.make_table(df=df, caption=\"This is a caption\", notes=\"These are notes\")\n\n\n\n\n\n\n \n This is a caption\n \n\n \n \n \n \n A\n \n \n B\n \n\n\n \n \n X\n \n \n Y\n \n \n X\n \n \n Y\n \n\n\n High\n Low\n High\n Low\n High\n Low\n High\n Low\n\n\n\n \n Group 1\n \n \n Variable 1\n 0.055\n 0.8\n 0.061\n -0.918\n 0.299\n 1.144\n -0.072\n 1.675\n \n \n Variable 2\n 0.144\n 0.658\n 1.282\n -1.352\n -0.461\n 0.382\n 0.431\n -0.437\n \n \n Variable 3\n -0.109\n 1.582\n 0.21\n 0.173\n 0.618\n -0.203\n -0.019\n 0.721\n \n \n Group 2\n \n \n Variable 4\n 0.195\n 1.226\n -1.197\n 0.256\n -0.88\n -1.154\n 0.823\n 0.142\n \n \n Variable 5\n -0.638\n -0.225\n -0.959\n -0.113\n -1.416\n 0.495\n -0.404\n -0.287\n \n \n Group 3\n \n \n Variable 6\n 0.551\n 0.881\n 0.448\n 0.434\n -0.538\n -1.516\n 1.135\n -0.186\n \n\n \n \n \n These are notes\n \n\n\n\n\n\n\n \n\n\nYou can also hide column group names: This just creates a table where variables on the second level of the row index are displayed in groups based on the first level separated by horizontal lines.\n\npf.make_table(\n df=df, caption=\"This is a caption\", notes=\"These are notes\", rgroup_display=False\n).tab_style(style=style.text(style=\"italic\"), locations=loc.body(rows=[1, 5]))\n\n\n\n\n\n\n \n This is a caption\n \n\n \n \n \n \n A\n \n \n B\n \n\n\n \n \n X\n \n \n Y\n \n \n X\n \n \n Y\n \n\n\n High\n Low\n High\n Low\n High\n Low\n High\n Low\n\n\n\n \n Group 1\n \n \n Variable 1\n 0.055\n 0.8\n 0.061\n -0.918\n 0.299\n 1.144\n -0.072\n 1.675\n \n \n Variable 2\n 0.144\n 0.658\n 1.282\n -1.352\n -0.461\n 0.382\n 0.431\n -0.437\n \n \n Variable 3\n -0.109\n 1.582\n 0.21\n 0.173\n 0.618\n -0.203\n -0.019\n 0.721\n \n \n Group 2\n \n \n Variable 4\n 0.195\n 1.226\n -1.197\n 0.256\n -0.88\n -1.154\n 0.823\n 0.142\n \n \n Variable 5\n -0.638\n -0.225\n -0.959\n -0.113\n -1.416\n 0.495\n -0.404\n -0.287\n \n \n Group 3\n \n \n Variable 6\n 0.551\n 0.881\n 0.448\n 0.434\n -0.538\n -1.516\n 1.135\n -0.186\n \n\n \n \n \n These are notes"
},
{
"objectID": "table-layout.html#example-styling",
"href": "table-layout.html#example-styling",
"title": "Regression Tables via pf.etable()",
"section": "Example Styling",
- "text": "Example Styling\n\n(\n pf.etable([fit1, fit2, fit3, fit4, fit5, fit6])\n .tab_options(\n column_labels_background_color=\"cornsilk\",\n stub_background_color=\"whitesmoke\",\n )\n .tab_style(\n style=style.fill(color=\"mistyrose\"),\n locations=loc.body(columns=\"(3)\", rows=[\"X2\"]),\n )\n)\n\n\n\n\n\n\n\n \n \n Y\n \n \n Y2\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n X1\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n X2\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n X1:X2\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n f2\n -\n x\n x\n -\n x\n x\n \n \n f1\n x\n x\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)"
+ "text": "Example Styling\n\n(\n pf.etable([fit1, fit2, fit3, fit4, fit5, fit6])\n .tab_options(\n column_labels_background_color=\"cornsilk\",\n stub_background_color=\"whitesmoke\",\n )\n .tab_style(\n style=style.fill(color=\"mistyrose\"),\n locations=loc.body(columns=\"(3)\", rows=[\"X2\"]),\n )\n)\n\n\n\n\n\n\n\n \n \n Y\n \n \n Y2\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n X1\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n X2\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n X1:X2\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n f1\n x\n x\n x\n x\n x\n x\n \n \n f2\n -\n x\n x\n -\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)"
},
{
"objectID": "table-layout.html#defining-table-styles-some-examples",
"href": "table-layout.html#defining-table-styles-some-examples",
"title": "Regression Tables via pf.etable()",
"section": "Defining Table Styles: Some Examples",
- "text": "Defining Table Styles: Some Examples\nYou can easily define table styles that you can apply to all tables in your project. Just define a dictionary with the respective values for the tab options (see the Great Tables documentation) and use the style with .tab_options(**style_dict).\n\nstyle_print = {\n \"table_font_size\": \"12px\",\n \"heading_title_font_size\": \"12px\",\n \"source_notes_font_size\": \"8px\",\n \"data_row_padding\": \"3px\",\n \"column_labels_padding\": \"3px\",\n \"row_group_border_top_style\": \"hidden\",\n \"table_body_border_top_style\": \"None\",\n \"table_body_border_bottom_width\": \"1px\",\n \"column_labels_border_top_width\": \"1px\",\n \"table_width\": \"14cm\",\n}\n\n\nstyle_presentation = {\n \"table_font_size\": \"16px\",\n \"table_font_color_light\": \"white\",\n \"table_body_border_top_style\": \"hidden\",\n \"table_body_border_bottom_style\": \"hidden\",\n \"heading_title_font_size\": \"18px\",\n \"source_notes_font_size\": \"12px\",\n \"data_row_padding\": \"3px\",\n \"column_labels_padding\": \"6px\",\n \"column_labels_background_color\": \"midnightblue\",\n \"stub_background_color\": \"whitesmoke\",\n \"row_group_background_color\": \"whitesmoke\",\n \"table_background_color\": \"whitesmoke\",\n \"heading_background_color\": \"white\",\n \"source_notes_background_color\": \"white\",\n \"column_labels_border_bottom_color\": \"white\",\n \"column_labels_font_weight\": \"bold\",\n \"row_group_font_weight\": \"bold\",\n \"table_width\": \"18cm\",\n}\n\n\nt1 = pf.dtable(\n data,\n vars=[\"Y\", \"Y2\", \"X1\", \"X2\"],\n stats=[\"count\", \"mean\", \"std\", \"min\", \"max\"],\n labels=labels,\n caption=\"Descriptive statistics\",\n)\n\nt2 = pf.etable(\n [fit1, fit2, fit3, fit4, fit5, fit6],\n labels=labels,\n show_se=False,\n felabels={\"f1\": \"Industry Fixed Effects\", \"f2\": \"Year Fixed Effects\"},\n caption=\"Regression results\",\n)\n\n\ndisplay(t1.tab_options(**style_print))\ndisplay(t2.tab_options(**style_print))\n\n\n\n\n\n\n \n Descriptive statistics\n \n\n \n N\n Mean\n Std. Dev.\n Min\n Max\n\n\n\n \n Wage\n 997\n -0.13\n 2.31\n -6.54\n 6.91\n \n \n Wealth\n 997\n -0.32\n 5.59\n -16.97\n 17.16\n \n \n Age\n 997\n 1.04\n 0.81\n 0.00\n 2.00\n \n \n Years of Schooling\n 997\n -0.13\n 3.05\n -9.67\n 10.99\n \n\n \n \n \n \n \n\n\n\n\n\n\n \n\n\n\n\n\n\n\n \n Regression results\n \n\n \n \n Wage\n \n \n Wealth\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n Year Fixed Effects\n -\n x\n x\n -\n x\n x\n \n \n Industry Fixed Effects\n x\n x\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)\n \n\n\n\n\n\n\n \n\n\n\nstyle_printDouble = {\n \"table_font_size\": \"12px\",\n \"heading_title_font_size\": \"12px\",\n \"source_notes_font_size\": \"8px\",\n \"data_row_padding\": \"3px\",\n \"column_labels_padding\": \"3px\",\n \"table_body_border_bottom_style\": \"double\",\n \"column_labels_border_top_style\": \"double\",\n \"column_labels_border_bottom_width\": \"0.5px\",\n \"row_group_border_top_style\": \"hidden\",\n \"table_body_border_top_style\": \"None\",\n \"table_width\": \"14cm\",\n}\ndisplay(t1.tab_options(**style_printDouble))\ndisplay(t2.tab_options(**style_printDouble))\n\n\n\n\n\n\n \n Descriptive statistics\n \n\n \n N\n Mean\n Std. Dev.\n Min\n Max\n\n\n\n \n Wage\n 997\n -0.13\n 2.31\n -6.54\n 6.91\n \n \n Wealth\n 997\n -0.32\n 5.59\n -16.97\n 17.16\n \n \n Age\n 997\n 1.04\n 0.81\n 0.00\n 2.00\n \n \n Years of Schooling\n 997\n -0.13\n 3.05\n -9.67\n 10.99\n \n\n \n \n \n \n \n\n\n\n\n\n\n \n\n\n\n\n\n\n\n \n Regression results\n \n\n \n \n Wage\n \n \n Wealth\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n Year Fixed Effects\n -\n x\n x\n -\n x\n x\n \n \n Industry Fixed Effects\n x\n x\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)"
+ "text": "Defining Table Styles: Some Examples\nYou can easily define table styles that you can apply to all tables in your project. Just define a dictionary with the respective values for the tab options (see the Great Tables documentation) and use the style with .tab_options(**style_dict).\n\nstyle_print = {\n \"table_font_size\": \"12px\",\n \"heading_title_font_size\": \"12px\",\n \"source_notes_font_size\": \"8px\",\n \"data_row_padding\": \"3px\",\n \"column_labels_padding\": \"3px\",\n \"row_group_border_top_style\": \"hidden\",\n \"table_body_border_top_style\": \"None\",\n \"table_body_border_bottom_width\": \"1px\",\n \"column_labels_border_top_width\": \"1px\",\n \"table_width\": \"14cm\",\n}\n\n\nstyle_presentation = {\n \"table_font_size\": \"16px\",\n \"table_font_color_light\": \"white\",\n \"table_body_border_top_style\": \"hidden\",\n \"table_body_border_bottom_style\": \"hidden\",\n \"heading_title_font_size\": \"18px\",\n \"source_notes_font_size\": \"12px\",\n \"data_row_padding\": \"3px\",\n \"column_labels_padding\": \"6px\",\n \"column_labels_background_color\": \"midnightblue\",\n \"stub_background_color\": \"whitesmoke\",\n \"row_group_background_color\": \"whitesmoke\",\n \"table_background_color\": \"whitesmoke\",\n \"heading_background_color\": \"white\",\n \"source_notes_background_color\": \"white\",\n \"column_labels_border_bottom_color\": \"white\",\n \"column_labels_font_weight\": \"bold\",\n \"row_group_font_weight\": \"bold\",\n \"table_width\": \"18cm\",\n}\n\n\nt1 = pf.dtable(\n data,\n vars=[\"Y\", \"Y2\", \"X1\", \"X2\"],\n stats=[\"count\", \"mean\", \"std\", \"min\", \"max\"],\n labels=labels,\n caption=\"Descriptive statistics\",\n)\n\nt2 = pf.etable(\n [fit1, fit2, fit3, fit4, fit5, fit6],\n labels=labels,\n show_se=False,\n felabels={\"f1\": \"Industry Fixed Effects\", \"f2\": \"Year Fixed Effects\"},\n caption=\"Regression results\",\n)\n\n\ndisplay(t1.tab_options(**style_print))\ndisplay(t2.tab_options(**style_print))\n\n\n\n\n\n\n \n Descriptive statistics\n \n\n \n N\n Mean\n Std. Dev.\n Min\n Max\n\n\n\n \n Wage\n 997\n -0.13\n 2.31\n -6.54\n 6.91\n \n \n Wealth\n 997\n -0.32\n 5.59\n -16.97\n 17.16\n \n \n Age\n 997\n 1.04\n 0.81\n 0.00\n 2.00\n \n \n Years of Schooling\n 997\n -0.13\n 3.05\n -9.67\n 10.99\n \n\n \n \n \n \n \n\n\n\n\n\n\n \n\n\n\n\n\n\n\n \n Regression results\n \n\n \n \n Wage\n \n \n Wealth\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n Industry Fixed Effects\n x\n x\n x\n x\n x\n x\n \n \n Year Fixed Effects\n -\n x\n x\n -\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)\n \n\n\n\n\n\n\n \n\n\n\nstyle_printDouble = {\n \"table_font_size\": \"12px\",\n \"heading_title_font_size\": \"12px\",\n \"source_notes_font_size\": \"8px\",\n \"data_row_padding\": \"3px\",\n \"column_labels_padding\": \"3px\",\n \"table_body_border_bottom_style\": \"double\",\n \"column_labels_border_top_style\": \"double\",\n \"column_labels_border_bottom_width\": \"0.5px\",\n \"row_group_border_top_style\": \"hidden\",\n \"table_body_border_top_style\": \"None\",\n \"table_width\": \"14cm\",\n}\ndisplay(t1.tab_options(**style_printDouble))\ndisplay(t2.tab_options(**style_printDouble))\n\n\n\n\n\n\n \n Descriptive statistics\n \n\n \n N\n Mean\n Std. Dev.\n Min\n Max\n\n\n\n \n Wage\n 997\n -0.13\n 2.31\n -6.54\n 6.91\n \n \n Wealth\n 997\n -0.32\n 5.59\n -16.97\n 17.16\n \n \n Age\n 997\n 1.04\n 0.81\n 0.00\n 2.00\n \n \n Years of Schooling\n 997\n -0.13\n 3.05\n -9.67\n 10.99\n \n\n \n \n \n \n \n\n\n\n\n\n\n \n\n\n\n\n\n\n\n \n Regression results\n \n\n \n \n Wage\n \n \n Wealth\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n Industry Fixed Effects\n x\n x\n x\n x\n x\n x\n \n \n Year Fixed Effects\n -\n x\n x\n -\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)"
},
{
"objectID": "reference/estimation.feols_compressed_.FeolsCompressed.html",
diff --git a/table-layout.html b/table-layout.html
index 55bd8032..82f6377d 100644
--- a/table-layout.html
+++ b/table-layout.html
@@ -245,7 +245,7 @@ Regression Tables via pf.etable()
Table Layout with PyFixest
Pyfixest comes with functions to generate publication-ready tables. Regression tables are generated with pf.etable()
, which can output different formats, for instance using the Great Tables package or generating formatted LaTex Tables using booktabs. There are also further functions pf.dtable()
to display descriptive statistics and pf.make_table()
generating formatted tables from pandas dataframes in the same layout.
To begin, we load some libraries and fit a set of regression models.
-
+
import numpy as np
import pandas as pd
import pylatex as pl # for the latex table; note: not a dependency of pyfixest - needs manual installation
@@ -267,7 +267,7 @@ Table Layout wi
= pf.feols("Y2 ~ X1 *X2 | f1 + f2", data=data) fit6
-
+
@@ -301,7 +301,7 @@ Table Layout wi
-
+
@@ -338,55 +338,55 @@ Table Layout wi
Basic Usage
We can compare all regression models via the pyfixest-internal pf.etable()
function:
-
+
pf.etable([fit1, fit2, fit3, fit4, fit5, fit6])
-
+
@@ -445,20 +445,20 @@ Basic Usage
fe
- f2
- -
+ f1
x
x
- -
x
x
-
-
- f1
x
x
+
+
+ f2
+ -
x
x
+ -
x
x
@@ -510,55 +510,55 @@ Basic Usage
You can also estimate and display multiple regressions with one line of code using the (py)fixest stepwise notation:
-
+
"Y+Y2~csw(X1,X2,X1:X2)", data=data)) pf.etable(pf.feols(
-
+
@@ -673,55 +673,55 @@ Basic Usage
Keep and drop variables
etable
allows us to do a few things out of the box. For example, we can only keep the variables that we’d like, which keeps all variables that fit the provided regex match.
-
+
="X1") pf.etable([fit1, fit2, fit3, fit4, fit5, fit6], keep
-
+
@@ -771,20 +771,20 @@ Keep and drop vari
fe
- f2
- -
+ f1
x
x
- -
x
x
-
-
- f1
x
x
+
+
+ f2
+ -
x
x
+ -
x
x
@@ -836,55 +836,55 @@ Keep and drop vari
We can use the exact_match
argument to select a specific set of variables:
-
+
=["X1", "X2"], exact_match=True) pf.etable([fit1, fit2, fit3, fit4, fit5, fit6], keep
-
+
@@ -934,20 +934,20 @@ Keep and drop vari
fe
- f2
- -
+ f1
x
x
- -
x
x
-
-
- f1
x
x
+
+
+ f2
+ -
x
x
+ -
x
x
@@ -999,55 +999,55 @@ Keep and drop vari
We can also easily drop variables via the drop
argument:
-
+
=["X1"]) pf.etable([fit1, fit2, fit3, fit4, fit5, fit6], drop
-
+
@@ -1088,20 +1088,20 @@ Keep and drop vari
fe
- f2
- -
+ f1
x
x
- -
x
x
-
-
- f1
x
x
+
+
+ f2
+ -
x
x
+ -
x
x
@@ -1156,55 +1156,55 @@ Keep and drop vari
Hide fixed effects or SE-type rows
We can hide the rows showing the relevant fixed effects and those showing the S.E. type by setting show_fe=False
and show_setype=False
(for instance when the set of fixed effects or the estimation method for the std. errors is the same for all models and you want to describe this in the text or table notes rather than displaying it in the table).
-
+
=False, show_se_type=False) pf.etable([fit1, fit2, fit3, fit4, fit5, fit6], show_fe
-
+
@@ -1301,55 +1301,55 @@ Hide fi
Display p-values or confidence intervals
By default, pf.etable()
reports standard errors. But we can also ask to output p-values or confidence intervals via the coef_fmt
function argument.
-
+
="b \n (se) \n [p]") pf.etable([fit1, fit2, fit3, fit4, fit5, fit6], coef_fmt
-
+
@@ -1408,20 +1408,20 @@ D
fe
- f2
- -
+ f1
x
x
- -
x
x
-
-
- f1
x
x
+
+
+ f2
+ -
x
x
+ -
x
x
@@ -1477,55 +1477,55 @@ D
Significance levels and rounding
Additionally, we can also overwrite the defaults for the reported significance levels and control the rounding of results via the signif_code
and digits
function arguments:
-
+
=[0.01, 0.05, 0.1], digits=5) pf.etable([fit1, fit2, fit3, fit4, fit5, fit6], signif_code
-
+
@@ -1584,20 +1584,20 @@ Significa
fe
- f2
- -
+ f1
x
x
- -
x
x
-
-
- f1
x
x
+
+
+ f2
+ -
x
x
+ -
x
x
@@ -1652,7 +1652,7 @@ Significa
Other output formats
By default, pf.etable()
returns a GT object (see the Great Tables package), but you can also opt to dataframe, markdown, or latex output via the type
argument.
-
+
# Pandas styler output:
pf.etable(
@@ -1714,20 +1714,20 @@ [fit1, fit2, fit3, fit4, fit5, fit6],Other output formats<
-0.04082 (0.08093)
-
Poisson Regression
-= pf.get_data(model="Fepois") data
= pf.fepois(fml="Y ~ X1 + X2 | f1 + f2", data=data, vcov="iid", iwls_tol=1e-10)
fit_iid = pf.fepois(
fit_hetero ="Y ~ X1 + X2 | f1 + f2", data=data, vcov="hetero", iwls_tol=1e-10
@@ -1065,21 +1065,21 @@ fmlPoisson Regression
- stats.vcov(fit_r_iid) fit_iid._vcov
array([[ 1.20791284e-08, -6.55604931e-10],
[-6.55604931e-10, 1.69958097e-09]])
- stats.vcov(fit_r_hetero) fit_hetero._vcov
array([[ 2.18101847e-08, -7.38711972e-10],
[-7.38711972e-10, 3.07587753e-09]])
- stats.vcov(fit_r_crv) fit_crv._vcov
array([[ 1.58300904e-08, -1.20806815e-10],
@@ -1087,7 +1087,7 @@ Poisson Regression
We conclude by comparing all estimation results via the tidy
methods:
fit_iid.tidy()
Poisson Regression
pd.DataFrame(broom.tidy_fixest(fit_r_iid)).T
Poisson Regression
fit_hetero.tidy()
Poisson Regression
pd.DataFrame(broom.tidy_fixest(fit_r_hetero)).T
Poisson Regression
fit_crv.tidy()
Poisson Regression
pd.DataFrame(broom.tidy_fixest(fit_r_crv)).T
Difference-in-Differences Estimation
See also NBER SI methods lectures on Linear Panel Event Studies.
Setup
-from importlib import resources
import pandas as pd
@@ -272,7 +272,7 @@ Setup
%autoreload 2
Setup
pyfixest: 0.25.3
-pandas : 2.2.3
+pandas : 2.2.3
+pyfixest: 0.25.3
# one-shot adoption data - parallel trends is true
= get_sharkfin()
df_one_cohort df_one_cohort.head()
Setup
# multi-cohort adoption data
= pd.read_csv(
df_multi_cohort "pyfixest.did.data").joinpath("df_het.csv")
@@ -536,7 +536,7 @@ resources.files(Setup
Examining Treatment Timing
Before any DiD estimation, we need to examine the treatment timing, since it is crucial to our choice of estimator.
-
+
pf.panelview(
df_one_cohort,="unit",
@@ -557,7 +557,7 @@ unitExamining Treat
-
+
pf.panelview(
df_multi_cohort,="unit",
@@ -580,7 +580,7 @@ unitExamining Treat
We immediately see that we have staggered adoption of treatment in the second case, which implies that a naive application of 2WFE might yield biased estimates under substantial effect heterogeneity.
We can also plot treatment assignment in a disaggregated fashion, which gives us a sense of cohort sizes.
-
+
pf.panelview(
df_multi_cohort,="unit",
@@ -604,7 +604,7 @@ unitExamining Treat
Inspecting the Outcome Variable
pf.panelview()
further allows us to inspect the “outcome” variable over time:
-
+
pf.panelview(
df_multi_cohort,="dep_var",
@@ -625,7 +625,7 @@ outcomeInspecting
We immediately see that the first cohort is switched into treatment in 2000, while the second cohort is switched into treatment by 2010. Before each cohort is switched into treatment, the trends are parallel.
We can additionally inspect individual units by dropping the collapse_to_cohort argument. Because we have a large sample, we might want to inspect only a subset of units.
-
+
pf.panelview(
df_multi_cohort,="dep_var",
@@ -647,7 +647,7 @@ outcomeInspecting
One-shot adoption: Static and Dynamic Specifications
After taking a first look at the data, let’s turn to estimation. We return to the df_one_cohort
data set (without staggered treatment rollout).
-
+
= pf.feols(
fit_static_twfe "Y ~ treat | unit + year",
@@ -670,14 +670,14 @@ df_one_cohort,
+
= pf.feols(
fit_dynamic_twfe "Y ~ i(year, ever_treated, ref = 14) | unit + year",
df_one_cohort,={"CRV1": "unit"},
vcov )
-
+
fit_dynamic_twfe.iplot(=False,
coord_flip="Event Study",
@@ -687,7 +687,7 @@ title=rename_event_study_coefs(fit_dynamic_twfe._coefnames),
)
labels
-
+
-
+
fit_lpdid.iplot(=False,
coord_flip="Local-Projections-Estimator",
@@ -1166,7 +1166,7 @@ titleLocal Project
=18.5,
xintercept ).show()
-
+
@@ -297,7 +297,7 @@ Marginal Effects and Hypothesis Tests via marginaleffect
-
+
@@ -390,7 +390,7 @@ Marginal Effects and Hypothesis Tests via marginaleffect
Suppose we were interested in testing the hypothesis that \(X_{1} = X_{2}\). Given the relatively large differences in coefficients and small standard errors, we will likely reject the null that the two parameters are equal.
We can run the formal test via the hypotheses
function from the marginaleffects
package.
-
+
"X1 - X2 = 0") hypotheses(fit,
@@ -546,7 +546,7 @@ PyFixest 0.18.0
Additionally, model_matrix_fixest
now returns a dictionary instead of a tuple.
Brings back fixed effects reference setting via i(var1, var2, ref)
syntax. Deprecates the i_ref1
, i_ref2
function arguments. I.e. it is again possible to e.g. run
-
+
import pyfixest as pf
= pf.get_data()
data
@@ -554,7 +554,7 @@ PyFixest 0.18.0
0:8] fit1.coef()[
Via the ref
syntax, via can set the reference level:
-
+
= pf.feols("Y ~ i(f1, X2, ref = 1)", data=data)
fit2 0:8] fit2.coef()[
@@ -563,7 +563,7 @@ PyFixest 0.18.0
PyFixest 0.17.0
Restructures the codebase and reorganizes how users can interact with the pyfixest
API. It is now recommended to use pyfixest
in the following way:
-
+
import numpy as np
import pyfixest as pf
= pf.get_data()
@@ -631,7 +631,7 @@ data PyFixest 0.17.0
The update should not inroduce any breaking changes. Thanks to @Wenzhi-Ding for the PR!
Adds support for simultaneous confidence intervals via a multiplier bootstrap. Thanks to @apoorvalal for the contribution!
-
+
= True) fit.confint(joint
@@ -648,18 +648,18 @@ PyFixest 0.17.0
Intercept
-0.380105
-1.177593
+0.375929
+1.181769
D
--1.759120
--1.046114
+-1.762853
+-1.042381
f1
--0.014097
-0.023645
+-0.014294
+0.023843
@@ -668,7 +668,7 @@ PyFixest 0.17.0
Adds support for the causal cluster variance estimator by Abadie et al. (QJE, 2023) for OLS via the .ccv()
method.
-
+
= "D", cluster = "group_id") fit.ccv(treatment
/home/runner/work/pyfixest/pyfixest/pyfixest/estimation/feols_.py:1384: UserWarning: The initial model was not clustered. CRV1 inference is computed and stored in the model object.
@@ -694,11 +694,11 @@ PyFixest 0.17.0
CCV
-1.4026168622179929
-0.28043
--5.001663
-0.000093
--1.991779
--0.813455
+0.238985
+-5.869057
+0.000015
+-1.904706
+-0.900528
CRV1
@@ -740,7 +740,7 @@ PyFixest 0.14.0
- Changes all docstrings to
numpy
format.
- Difference-in-differences estimation functions now need to be imported via the
pyfixest.did.estimation
module:
-
+
from pyfixest.did.estimation import did2s, lpdid, event_study
diff --git a/pyfixest.html b/pyfixest.html
index 2173ab59..c92a31d5 100644
--- a/pyfixest.html
+++ b/pyfixest.html
@@ -187,10 +187,11 @@
PyFixest: Fast High-Dimensional Fixed Effects Regression in Python
-
+
PyFixest
is a Python implementation of the formidable fixest package for fast high-dimensional fixed effects regression.
The package aims to mimic fixest
syntax and functionality as closely as Python allows: if you know fixest
well, the goal is that you won’t have to read the docs to get started! In particular, this means that all of fixest's
defaults are mirrored by PyFixest
- currently with only one small exception.
Nevertheless, for a quick introduction, you can take a look at the quickstart or the regression chapter of Arthur Turrell’s book on Coding for Economists.
+For questions on PyFixest
, head on over to our PyFixest Discourse forum.
Features
diff --git a/quarto_example/QuartoExample.pdf b/quarto_example/QuartoExample.pdf
index 375fab6a..f16663ee 100644
Binary files a/quarto_example/QuartoExample.pdf and b/quarto_example/QuartoExample.pdf differ
diff --git a/quickstart.html b/quickstart.html
index 96c32ad2..e91a8996 100644
--- a/quickstart.html
+++ b/quickstart.html
@@ -281,7 +281,7 @@ What is a fix
Read Sample Data
In a first step, we load the module and some synthetic example data:
-
+
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
@@ -302,7 +302,7 @@ Read Sample Data
data.head()
-
+
@@ -336,7 +336,7 @@ Read Sample Data
-
+
@@ -370,7 +370,7 @@ Read Sample Data
-
+
-numpy : 1.26.4
+pandas : 2.2.3
+numpy : 1.26.4
pyfixest : 0.25.3
-pandas : 2.2.3
matplotlib: 3.9.2
@@ -507,7 +507,7 @@ Read Sample Data
-
+
data.info()
<class 'pandas.core.frame.DataFrame'>
@@ -535,7 +535,7 @@ Read Sample Data
OLS Estimation
We are interested in the relation between the dependent variable Y
and the independent variables X1
using a fixed effect model for group_id
. Let’s see how the data looks like:
-
+
= data.plot(kind="scatter", x="X1", y="Y", c="group_id", colormap="viridis") ax
@@ -546,7 +546,7 @@ OLS Estimation
We can estimate a fixed effects regression via the feols()
function. feols()
has three arguments: a two-sided model formula, the data, and optionally, the type of inference.
-
+
= pf.feols(fml="Y ~ X1 | group_id", data=data, vcov="HC1")
fit type(fit)
@@ -559,7 +559,7 @@ OLS Estimation
Inspecting Model Results
To inspect the results, we can use a summary function or method:
-
+
fit.summary()
###
@@ -577,55 +577,55 @@ Inspecting Model
Or display a formatted regression table:
-
+
pf.etable(fit)
-
+
@@ -688,7 +688,7 @@ Inspecting Model
Alternatively, the .summarize
module contains a summary
function, which can be applied on instances of regression model objects or lists of regression model objects. For details on how to customize etable()
, please take a look at the dedicated vignette.
-
+
pf.summary(fit)
###
@@ -706,7 +706,7 @@ Inspecting Model
You can access individual elements of the summary via dedicated methods: .tidy()
returns a “tidy” pd.DataFrame
, .coef()
returns estimated parameters, and se()
estimated standard errors. Other methods include pvalue()
, confint()
and tstat()
.
-
+
fit.tidy()
@@ -749,7 +749,7 @@ Inspecting Model
-
+
fit.coef()
Coefficient
@@ -757,7 +757,7 @@ Inspecting Model
Name: Estimate, dtype: float64
-
+
fit.se()
Coefficient
@@ -765,7 +765,7 @@ Inspecting Model
Name: Std. Error, dtype: float64
-
+
fit.tstat()
Coefficient
@@ -773,7 +773,7 @@ Inspecting Model
Name: t value, dtype: float64
-
+
fit.confint()
@@ -800,11 +800,11 @@ Inspecting Model
Last, model results can be visualized via dedicated methods for plotting:
-
+
fit.coefplot()# or pf.coefplot([fit])
-
+
@@ -522,7 +522,7 @@ Examples
-
+
@@ -671,7 +671,7 @@ Examples
In a first step, we estimate a classical event study model:
-
+
# estimate the model
= pf.did2s(
fit
@@ -761,10 +761,10 @@ df_het,Examples
We can also inspect the model visually:
-
+
= [1200, 400], coord_flip=False).show() fit.iplot(figsize
-
+
@@ -545,7 +545,7 @@ Examples
-
+
diff --git a/reference/did.estimation.lpdid.html b/reference/did.estimation.lpdid.html
index 6729fcf2..d210e196 100644
--- a/reference/did.estimation.lpdid.html
+++ b/reference/did.estimation.lpdid.html
@@ -505,7 +505,7 @@ Returns
Examples
-
+
import pandas as pd
import pyfixest as pf
@@ -528,7 +528,7 @@ Examples
= [1200, 400], coord_flip=False).show() fit.iplot(figsize
-
+
@@ -562,7 +562,7 @@ Examples
-
+
-
+
@@ -606,7 +606,7 @@ Examples
-
+
@@ -656,7 +656,7 @@ Examples
Calling feols()
returns an instance of the [Feols(/reference/Feols.qmd) class. The summary()
method can be used to print the results.
An alternative way to retrieve model results is via the tidy()
method, which returns a pandas dataframe with the estimated coefficients, standard errors, t-statistics, and p-values.
-
+
fit.tidy()
@@ -710,17 +710,17 @@ Examples
You can also access all elements in the tidy data frame by dedicated methods, e.g. fit.coef()
for the coefficients, fit.se()
for the standard errors, fit.tstat()
for the t-statistics, and fit.pval()
for the p-values, and fit.confint()
for the confidence intervals.
The employed type of inference can be specified via the vcov
argument. If vcov is not provided, PyFixest
employs the fixest
default of iid inference, unless there are fixed effects in the model, in which case feols()
clusters the standard error by the first fixed effect (CRV1 inference).
-
+
= pf.feols("Y ~ X1 + X2 | f1 + f2", data, vcov="iid")
fit1 = pf.feols("Y ~ X1 + X2 | f1 + f2", data, vcov="hetero")
fit2 = pf.feols("Y ~ X1 + X2 | f1 + f2", data, vcov={"CRV1": "f1"}) fit3
Supported inference types are “iid”, “hetero”, “HC1”, “HC2”, “HC3”, and “CRV1”/“CRV3”. Clustered standard errors are specified via a dictionary, e.g. {"CRV1": "f1"}
for CRV1 inference with clustering by f1
or {"CRV3": "f1"}
for CRV3 inference with clustering by f1
. For two-way clustering, you can provide a formula string, e.g. {"CRV1": "f1 + f2"}
for CRV1 inference with clustering by f1
.
-
+
= pf.feols("Y ~ X1 + X2 | f1 + f2", data, vcov={"CRV1": "f1 + f2"}) fit4
Inference can be adjusted post estimation via the vcov
method:
-
+
fit.summary()"iid").summary() fit.vcov(
@@ -754,7 +754,7 @@ Examples
The ssc
argument specifies the small sample correction for inference. In general, feols()
uses all of fixest::feols()
defaults, but sets the fixef.K
argument to "none"
whereas the fixest::feols()
default is "nested"
. See here for more details: link to github.
feols()
supports a range of multiple estimation syntax, i.e. you can estimate multiple models in one call. The following example estimates two models, one with fixed effects for f1
and one with fixed effects for f2
using the sw()
syntax.
-
+
= pf.feols("Y ~ X1 + X2 | sw(f1, f2)", data)
fit type(fit)
@@ -762,55 +762,55 @@ Examples
The returned object is an instance of the FixestMulti
class. You can access the results of the first model via fit.fetch_model(0)
and the results of the second model via fit.fetch_model(1)
. You can compare the model results via the etable()
function:
-
+
pf.etable(fit)
-
+
@@ -852,14 +852,14 @@ Examples
fe
-f2
--
+f1
x
+-
-f1
-x
+f2
-
+x
stats
@@ -893,56 +893,56 @@ Examples
Other supported multiple estimation syntax include sw0()
, csw()
and csw0()
. While sw()
adds variables in a “stepwise” fashion, csw()
does so cumulatively.
-
+
= pf.feols("Y ~ X1 + X2 | csw(f1, f2)", data)
fit pf.etable(fit)
-
+
@@ -984,13 +984,13 @@ Examples
fe
-f2
--
+f1
+x
x
-f1
-x
+f2
+-
x
@@ -1025,56 +1025,56 @@ Examples
The sw0()
and csw0()
syntax are similar to sw()
and csw()
, but start with a model that excludes the variables specified in sw()
and csw()
:
-
+
= pf.feols("Y ~ X1 + X2 | sw0(f1, f2)", data)
fit pf.etable(fit)
-
+
@@ -1129,16 +1129,16 @@ Examples
fe
-f2
--
+f1
-
x
+-
-f1
+f2
-
-x
-
+x
stats
@@ -1175,56 +1175,56 @@ Examples
The feols()
function also supports multiple dependent variables. The following example estimates two models, one with Y1
as the dependent variable and one with Y2
as the dependent variable.
-
+
= pf.feols("Y + Y2 ~ X1 | f1 + f2", data)
fit pf.etable(fit)
-
+
@@ -1260,12 +1260,12 @@ Examples
fe
-f2
+f1
x
x
-f1
+f2
x
x
@@ -1301,56 +1301,56 @@ Examples
It is possible to combine different multiple estimation operators:
-
+
= pf.feols("Y + Y2 ~ X1 | sw(f1, f2)", data)
fit pf.etable(fit)
-
+
@@ -1396,18 +1396,18 @@ Examples
fe
-f2
--
--
+f1
x
x
+-
+-
-f1
-x
-x
+f2
-
-
+x
+x
stats
@@ -1448,7 +1448,7 @@ Examples
In general, using muliple estimation syntax can improve the estimation time as covariates that are demeaned in one model and are used in another model do not need to be demeaned again: feols()
implements a caching mechanism that stores the demeaned covariates.
Additionally, you can fit models on different samples via the split and fsplit arguments. The split argument splits the sample according to the variable specified in the argument, while the fsplit argument also includes the full sample in the estimation.
-
+
= pf.feols("Y ~ X1 + X2 | f1 + f2", data, split = "f1")
fit pf.etable(fit)
@@ -1514,52 +1514,52 @@ Examples
cluster_adj_value = G / (G - 1)
-
+
@@ -1769,7 +1769,7 @@ Examples
fe
-f2
+f1
x
x
x
@@ -1802,7 +1802,7 @@ Examples
x
-f1
+f2
x
x
x
@@ -1950,7 +1950,7 @@ Examples
Besides OLS, feols()
also supports IV estimation via three part formulas:
-
+
= pf.feols("Y ~ X2 | f1 + f2 | X1 ~ Z1", data)
fit fit.tidy()
@@ -2004,7 +2004,7 @@ Examples
Here, X1
is the endogenous variable and Z1
is the instrument. f1
and f2
are the fixed effects, as before. To estimate IV models without fixed effects, simply omit the fixed effects part of the formula:
-
+
= pf.feols("Y ~ X2 | X1 ~ Z1", data)
fit fit.tidy()
@@ -2068,7 +2068,7 @@ Examples
Last, feols()
supports interaction of variables via the i()
syntax. Documentation on this is tba.
After fitting a model via feols()
, you can use the predict()
method to get the predicted values:
-
+
= pf.feols("Y ~ X1 + X2 | f1 + f2", data)
fit 0:5] fit.predict()[
@@ -2076,7 +2076,7 @@ Examples
The predict()
method also supports a newdata
argument to predict on new data, which returns a numpy array of the predicted values:
-
+
= pf.feols("Y ~ X1 + X2 | f1 + f2", data)
fit =data)[0:5] fit.predict(newdata
@@ -2084,11 +2084,11 @@ Examples
Last, you can plot the results of a model via the coefplot()
method:
-
+
= pf.feols("Y ~ X1 + X2 | f1 + f2", data)
fit fit.coefplot()
-
+
@@ -593,7 +593,7 @@ Examples
-
+
diff --git a/reference/report.coefplot.html b/reference/report.coefplot.html
index c3217e91..a373db31 100644
--- a/reference/report.coefplot.html
+++ b/reference/report.coefplot.html
@@ -528,7 +528,7 @@ Returns
Examples
-
+
import pyfixest as pf
from pyfixest.report.utils import rename_categoricals
@@ -544,7 +544,7 @@ Examples
= "both") pf.coefplot([fit1], joint
-
+
@@ -578,7 +578,7 @@ Examples
-
+
-
+
@@ -576,7 +576,7 @@ Examples
-
+
-
+
@@ -497,7 +497,7 @@ Examples
-
+
diff --git a/replicating-the-effect.html b/replicating-the-effect.html
index bf03493a..daff3e8c 100644
--- a/replicating-the-effect.html
+++ b/replicating-the-effect.html
@@ -234,7 +234,7 @@ Replicating Examples from “The Effect”
This notebook replicates code examples from Nick Huntington-Klein’s book on causal inference, The Effect.
-
+
from causaldata import Mroz, gapminder, organ_donations, restaurant_inspections
import pyfixest as pf
@@ -243,7 +243,7 @@ Replicating Examples from “The Effect”
%watermark --iversions
-
+
@@ -277,7 +277,7 @@ Replicating Examples from “The Effect”
-
+
@@ -317,7 +317,7 @@ Replicating Examples from “The Effect”
Chapter 4: Describing Relationships
-
+
# Read in data
= Mroz.load_pandas().data
dt # Keep just working women
@@ -329,7 +329,7 @@ Chapter
= pf.feols(fml="lwg ~ csw(inc, wc, k5)", data=dt, vcov="iid")
fit pf.etable(fit)
-/tmp/ipykernel_4055/786816010.py:6: SettingWithCopyWarning:
+/tmp/ipykernel_4227/786816010.py:6: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
@@ -337,52 +337,52 @@ Chapter
dt.loc[:, "earn"] = dt["lwg"].apply("exp")
-
+
@@ -480,7 +480,7 @@ Chapter
Chapter 13: Regression
Example 1
-
+
= restaurant_inspections.load_pandas().data
res = res.inspection_score.astype(float)
res.inspection_score = res.NumberofLocations.astype(float)
@@ -489,52 +489,52 @@ res.NumberofLocations Example 1
= pf.feols(fml="inspection_score ~ NumberofLocations", data=res)
fit pf.etable([fit])
-
+
@@ -597,7 +597,7 @@ Example 1
Example 2
-
+
= restaurant_inspections.load_pandas().data
df
= pf.feols(
@@ -607,52 +607,52 @@ fit1 Example 2
pf.etable([fit1, fit2])
-
+
@@ -749,7 +749,7 @@ Example 2
Example 3: HC Standard Errors
-
+
="inspection_score ~ Year + Weekend", data=df, vcov="HC3").summary() pf.feols(fml
###
@@ -771,7 +771,7 @@ Example 3: HC
Example 4: Clustered Standard Errors
-
+
pf.feols(="inspection_score ~ Year + Weekend", data=df, vcov={"CRV1": "Weekend"}
fml ).tidy()
@@ -837,7 +837,7 @@ Exampl
Example 5: Bootstrap Inference
-
+
= pf.feols(fml="inspection_score ~ Year + Weekend", data=df)
fit =999, param="Year") fit.wildboottest(reps
@@ -860,7 +860,7 @@ Example 1
Example 2
-
+
= gapminder.load_pandas().data
gm "logGDPpercap"] = gm["gdpPercap"].apply("log")
gm[
@@ -946,7 +946,7 @@ Example 2
Example 3: TWFE
-
+
# Set our individual and time (index) for our data
= pf.feols(fml="lifeExp ~ np.log(gdpPercap) | country + year", data=gm)
fit fit.summary()
@@ -971,7 +971,7 @@ Example 3: TWFE
Chapter 18: Difference-in-Differences
Example 1
-
+
= organ_donations.load_pandas().data
od
# Create Treatment Variable
@@ -999,7 +999,7 @@ Example 1
Example 3: Dynamic Treatment Effect
-
+
= organ_donations.load_pandas().data
od
# Create Treatment Variable
diff --git a/search.json b/search.json
index eeddcd32..16e2c702 100644
--- a/search.json
+++ b/search.json
@@ -479,7 +479,7 @@
"href": "reference/estimation.estimation.feols.html#examples",
"title": "estimation.estimation.feols",
"section": "Examples",
- "text": "Examples\nAs in fixest, the [Feols(/reference/Feols.qmd) function can be used to estimate a simple linear regression model with fixed effects. The following example regresses Y on X1 and X2 with fixed effects for f1 and f2: fixed effects are specified after the | symbol.\n\nimport pyfixest as pf\n\ndata = pf.get_data()\n\nfit = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data)\nfit.summary()\n\n\n \n \n \n\n\n\n \n \n \n\n\n###\n\nEstimation: OLS\nDep. var.: Y, Fixed effects: f1+f2\nInference: CRV1\nObservations: 997\n\n| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |\n|:--------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|\n| X1 | -0.924 | 0.061 | -15.165 | 0.000 | -1.049 | -0.799 |\n| X2 | -0.174 | 0.015 | -11.918 | 0.000 | -0.204 | -0.144 |\n---\nRMSE: 1.346 R2: 0.659 R2 Within: 0.303 \n\n\nCalling feols() returns an instance of the [Feols(/reference/Feols.qmd) class. The summary() method can be used to print the results.\nAn alternative way to retrieve model results is via the tidy() method, which returns a pandas dataframe with the estimated coefficients, standard errors, t-statistics, and p-values.\n\nfit.tidy()\n\n\n\n\n\n\n\n\nEstimate\nStd. Error\nt value\nPr(>|t|)\n2.5%\n97.5%\n\n\nCoefficient\n\n\n\n\n\n\n\n\n\n\nX1\n-0.924046\n0.060934\n-15.164621\n2.664535e-15\n-1.048671\n-0.799421\n\n\nX2\n-0.174107\n0.014608\n-11.918277\n1.069367e-12\n-0.203985\n-0.144230\n\n\n\n\n\n\n\nYou can also access all elements in the tidy data frame by dedicated methods, e.g. fit.coef() for the coefficients, fit.se() for the standard errors, fit.tstat() for the t-statistics, and fit.pval() for the p-values, and fit.confint() for the confidence intervals.\nThe employed type of inference can be specified via the vcov argument. If vcov is not provided, PyFixest employs the fixest default of iid inference, unless there are fixed effects in the model, in which case feols() clusters the standard error by the first fixed effect (CRV1 inference).\n\nfit1 = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data, vcov=\"iid\")\nfit2 = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data, vcov=\"hetero\")\nfit3 = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data, vcov={\"CRV1\": \"f1\"})\n\nSupported inference types are “iid”, “hetero”, “HC1”, “HC2”, “HC3”, and “CRV1”/“CRV3”. Clustered standard errors are specified via a dictionary, e.g. {\"CRV1\": \"f1\"} for CRV1 inference with clustering by f1 or {\"CRV3\": \"f1\"} for CRV3 inference with clustering by f1. For two-way clustering, you can provide a formula string, e.g. {\"CRV1\": \"f1 + f2\"} for CRV1 inference with clustering by f1.\n\nfit4 = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data, vcov={\"CRV1\": \"f1 + f2\"})\n\nInference can be adjusted post estimation via the vcov method:\n\nfit.summary()\nfit.vcov(\"iid\").summary()\n\n###\n\nEstimation: OLS\nDep. var.: Y, Fixed effects: f1+f2\nInference: CRV1\nObservations: 997\n\n| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |\n|:--------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|\n| X1 | -0.924 | 0.061 | -15.165 | 0.000 | -1.049 | -0.799 |\n| X2 | -0.174 | 0.015 | -11.918 | 0.000 | -0.204 | -0.144 |\n---\nRMSE: 1.346 R2: 0.659 R2 Within: 0.303 \n###\n\nEstimation: OLS\nDep. var.: Y, Fixed effects: f1+f2\nInference: iid\nObservations: 997\n\n| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |\n|:--------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|\n| X1 | -0.924 | 0.054 | -16.995 | 0.000 | -1.031 | -0.817 |\n| X2 | -0.174 | 0.014 | -12.081 | 0.000 | -0.202 | -0.146 |\n---\nRMSE: 1.346 R2: 0.659 R2 Within: 0.303 \n\n\nThe ssc argument specifies the small sample correction for inference. In general, feols() uses all of fixest::feols() defaults, but sets the fixef.K argument to \"none\" whereas the fixest::feols() default is \"nested\". See here for more details: link to github.\nfeols() supports a range of multiple estimation syntax, i.e. you can estimate multiple models in one call. The following example estimates two models, one with fixed effects for f1 and one with fixed effects for f2 using the sw() syntax.\n\nfit = pf.feols(\"Y ~ X1 + X2 | sw(f1, f2)\", data)\ntype(fit)\n\npyfixest.estimation.FixestMulti_.FixestMulti\n\n\nThe returned object is an instance of the FixestMulti class. You can access the results of the first model via fit.fetch_model(0) and the results of the second model via fit.fetch_model(1). You can compare the model results via the etable() function:\n\npf.etable(fit)\n\n\n\n\n\n\n\n\n\n\n\n\n\nY\n\n\n(1)\n(2)\n\n\n\n\ncoef\n\n\nX1\n-0.950***\n(0.067)\n-0.979***\n(0.077)\n\n\nX2\n-0.174***\n(0.018)\n-0.175***\n(0.022)\n\n\nfe\n\n\nf2\n-\nx\n\n\nf1\nx\n-\n\n\nstats\n\n\nObservations\n997\n998\n\n\nS.E. type\nby: f1\nby: f2\n\n\nR2\n0.489\n0.354\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)\n\n\n\n\n\n\n\n \n\n\nOther supported multiple estimation syntax include sw0(), csw() and csw0(). While sw() adds variables in a “stepwise” fashion, csw() does so cumulatively.\n\nfit = pf.feols(\"Y ~ X1 + X2 | csw(f1, f2)\", data)\npf.etable(fit)\n\n\n\n\n\n\n\n\n\n\n\n\n\nY\n\n\n(1)\n(2)\n\n\n\n\ncoef\n\n\nX1\n-0.950***\n(0.067)\n-0.924***\n(0.061)\n\n\nX2\n-0.174***\n(0.018)\n-0.174***\n(0.015)\n\n\nfe\n\n\nf2\n-\nx\n\n\nf1\nx\nx\n\n\nstats\n\n\nObservations\n997\n997\n\n\nS.E. type\nby: f1\nby: f1\n\n\nR2\n0.489\n0.659\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)\n\n\n\n\n\n\n\n \n\n\nThe sw0() and csw0() syntax are similar to sw() and csw(), but start with a model that excludes the variables specified in sw() and csw():\n\nfit = pf.feols(\"Y ~ X1 + X2 | sw0(f1, f2)\", data)\npf.etable(fit)\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nY\n\n\n(1)\n(2)\n(3)\n\n\n\n\ncoef\n\n\nX1\n-0.993***\n(0.082)\n-0.950***\n(0.067)\n-0.979***\n(0.077)\n\n\nX2\n-0.176***\n(0.022)\n-0.174***\n(0.018)\n-0.175***\n(0.022)\n\n\nIntercept\n0.889***\n(0.108)\n\n\n\n\nfe\n\n\nf2\n-\n-\nx\n\n\nf1\n-\nx\n-\n\n\nstats\n\n\nObservations\n998\n997\n998\n\n\nS.E. type\niid\nby: f1\nby: f2\n\n\nR2\n0.177\n0.489\n0.354\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)\n\n\n\n\n\n\n\n \n\n\nThe feols() function also supports multiple dependent variables. The following example estimates two models, one with Y1 as the dependent variable and one with Y2 as the dependent variable.\n\nfit = pf.feols(\"Y + Y2 ~ X1 | f1 + f2\", data)\npf.etable(fit)\n\n\n\n\n\n\n\n\n\n\n\n\n\nY\nY2\n\n\n(1)\n(2)\n\n\n\n\ncoef\n\n\nX1\n-0.919***\n(0.065)\n-1.228***\n(0.195)\n\n\nfe\n\n\nf2\nx\nx\n\n\nf1\nx\nx\n\n\nstats\n\n\nObservations\n997\n998\n\n\nS.E. type\nby: f1\nby: f1\n\n\nR2\n0.609\n0.168\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)\n\n\n\n\n\n\n\n \n\n\nIt is possible to combine different multiple estimation operators:\n\nfit = pf.feols(\"Y + Y2 ~ X1 | sw(f1, f2)\", data)\npf.etable(fit)\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nY\nY2\nY\nY2\n\n\n(1)\n(2)\n(3)\n(4)\n\n\n\n\ncoef\n\n\nX1\n-0.949***\n(0.069)\n-1.266***\n(0.176)\n-0.982***\n(0.081)\n-1.301***\n(0.205)\n\n\nfe\n\n\nf2\n-\n-\nx\nx\n\n\nf1\nx\nx\n-\n-\n\n\nstats\n\n\nObservations\n997\n998\n998\n999\n\n\nS.E. type\nby: f1\nby: f1\nby: f2\nby: f2\n\n\nR2\n0.437\n0.115\n0.302\n0.090\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)\n\n\n\n\n\n\n\n \n\n\nIn general, using muliple estimation syntax can improve the estimation time as covariates that are demeaned in one model and are used in another model do not need to be demeaned again: feols() implements a caching mechanism that stores the demeaned covariates.\nAdditionally, you can fit models on different samples via the split and fsplit arguments. The split argument splits the sample according to the variable specified in the argument, while the fsplit argument also includes the full sample in the estimation.\n\nfit = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data, split = \"f1\")\npf.etable(fit)\n\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nY\n\n\n(1)\n(2)\n(3)\n(4)\n(5)\n(6)\n(7)\n(8)\n(9)\n(10)\n(11)\n(12)\n(13)\n(14)\n(15)\n(16)\n(17)\n(18)\n(19)\n(20)\n(21)\n(22)\n(23)\n(24)\n(25)\n(26)\n(27)\n(28)\n(29)\n(30)\n\n\n\n\ncoef\n\n\nX1\n-1.357\n(INF)\n-1.137\n(INF)\n-0.455\n(INF)\n-1.138\n(INF)\n0.201\n(INF)\n-0.306\n(INF)\n-0.597\n(INF)\n-0.824\n(INF)\n-1.482\n(INF)\n-1.117\n(INF)\n-1.142\n(INF)\n-1.334\n(INF)\n-3.531\n(INF)\n-1.102\n(INF)\n-0.826\n(INF)\n-0.773\n(INF)\n-1.501\n(INF)\n-1.226\n(INF)\n-0.641\n(INF)\n-0.378\n(INF)\n-0.652\n(INF)\n-1.508\n(INF)\n-0.941\n(INF)\n-0.206\n(INF)\n-0.195\n(INF)\n-0.702\n(INF)\n-1.141\n(INF)\n-1.349\n(INF)\n-0.537\n(INF)\n-1.141\n(INF)\n\n\nX2\n-0.250\n(INF)\n0.198\n(INF)\n-0.145\n(INF)\n-0.330\n(INF)\n-0.177\n(INF)\n-0.187\n(INF)\n-0.118\n(INF)\n-0.292\n(INF)\n-0.029\n(INF)\n-0.264\n(INF)\n-0.148\n(INF)\n-0.313\n(INF)\n-0.152\n(INF)\n-0.296\n(INF)\n0.130\n(INF)\n-0.059\n(INF)\n-0.223\n(INF)\n-0.113\n(INF)\n-0.261\n(INF)\n0.089\n(INF)\n-0.148\n(INF)\n-0.267\n(INF)\n-0.125\n(INF)\n-0.282\n(INF)\n-0.153\n(INF)\n0.004\n(INF)\n0.083\n(INF)\n-0.226\n(INF)\n-0.158\n(INF)\n-0.160\n(INF)\n\n\nfe\n\n\nf2\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\n\n\nf1\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\n\n\nstats\n\n\nObservations\n30\n29\n44\n30\n31\n36\n36\n30\n36\n35\n32\n30\n23\n28\n34\n34\n48\n40\n36\n34\n35\n37\n27\n35\n29\n27\n43\n36\n24\n28\n\n\nS.E. type\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\n\n\nR2\n0.850\n0.691\n0.578\n0.745\n0.939\n0.644\n0.792\n0.776\n0.919\n0.797\n0.727\n0.822\n0.924\n0.865\n0.711\n0.808\n0.651\n0.819\n0.746\n0.731\n0.880\n0.868\n0.796\n0.648\n0.915\n0.820\n0.837\n0.789\n0.688\n0.883\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)\n\n\n\n\n\n\n\n \n\n\nBesides OLS, feols() also supports IV estimation via three part formulas:\n\nfit = pf.feols(\"Y ~ X2 | f1 + f2 | X1 ~ Z1\", data)\nfit.tidy()\n\n\n\n\n\n\n\n\nEstimate\nStd. Error\nt value\nPr(>|t|)\n2.5%\n97.5%\n\n\nCoefficient\n\n\n\n\n\n\n\n\n\n\nX1\n-1.050097\n0.085493\n-12.282912\n5.133671e-13\n-1.224949\n-0.875245\n\n\nX2\n-0.174351\n0.014779\n-11.797039\n1.369793e-12\n-0.204578\n-0.144124\n\n\n\n\n\n\n\nHere, X1 is the endogenous variable and Z1 is the instrument. f1 and f2 are the fixed effects, as before. To estimate IV models without fixed effects, simply omit the fixed effects part of the formula:\n\nfit = pf.feols(\"Y ~ X2 | X1 ~ Z1\", data)\nfit.tidy()\n\n\n\n\n\n\n\n\nEstimate\nStd. Error\nt value\nPr(>|t|)\n2.5%\n97.5%\n\n\nCoefficient\n\n\n\n\n\n\n\n\n\n\nIntercept\n0.861939\n0.151187\n5.701137\n1.567858e-08\n0.565257\n1.158622\n\n\nX1\n-0.967238\n0.130078\n-7.435847\n2.238210e-13\n-1.222497\n-0.711980\n\n\nX2\n-0.176416\n0.021769\n-8.104001\n1.554312e-15\n-0.219134\n-0.133697\n\n\n\n\n\n\n\nLast, feols() supports interaction of variables via the i() syntax. Documentation on this is tba.\nAfter fitting a model via feols(), you can use the predict() method to get the predicted values:\n\nfit = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data)\nfit.predict()[0:5]\n\narray([ 3.0633663 , -0.69574133, -0.91240433, -0.46370257, -1.67331154])\n\n\nThe predict() method also supports a newdata argument to predict on new data, which returns a numpy array of the predicted values:\n\nfit = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data)\nfit.predict(newdata=data)[0:5]\n\narray([ 2.14598761, nan, nan, 3.06336415, -0.69574276])\n\n\nLast, you can plot the results of a model via the coefplot() method:\n\nfit = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data)\nfit.coefplot()\n\n \n \n\n\nObjects of type Feols support a range of other methods to conduct inference. For example, you can run a wild (cluster) bootstrap via the wildboottest() method:\n\nfit.wildboottest(param = \"X1\", reps=1000)\n\nparam X1\nt value -14.70814685400939\nPr(>|t|) 0.0\nbootstrap_type 11\ninference CRV(f1)\nimpose_null True\ndtype: object\n\n\nwould run a wild bootstrap test for the coefficient of X1 with 1000 bootstrap repetitions.\nFor a wild cluster bootstrap, you can specify the cluster variable via the cluster argument:\n\nfit.wildboottest(param = \"X1\", reps=1000, cluster=\"group_id\")\n\nparam X1\nt value -13.658130940490494\nPr(>|t|) 0.0\nbootstrap_type 11\ninference CRV(group_id)\nimpose_null True\ndtype: object\n\n\nThe ritest() method can be used to conduct randomization inference:\n\nfit.ritest(resampvar = \"X1\", reps=1000)\n\nH0 X1=0\nri-type randomization-c\nEstimate -0.9240461507764967\nPr(>|t|) 0.0\nStd. Error (Pr(>|t|)) 0.0\n2.5% (Pr(>|t|)) 0.0\n97.5% (Pr(>|t|)) 0.0\ndtype: object\n\n\nLast, you can compute the cluster causal variance estimator by Athey et al by using the ccv() method:\n\nimport numpy as np\nrng = np.random.default_rng(1234)\ndata[\"D\"] = rng.choice([0, 1], size = data.shape[0])\nfit_D = pf.feols(\"Y ~ D\", data = data)\nfit_D.ccv(treatment = \"D\", cluster = \"group_id\")\n\n/home/runner/work/pyfixest/pyfixest/pyfixest/estimation/feols_.py:1384: UserWarning: The initial model was not clustered. CRV1 inference is computed and stored in the model object.\n warnings.warn(\n\n\n\n\n\n\n\n\n\nEstimate\nStd. Error\nt value\nPr(>|t|)\n2.5%\n97.5%\n\n\n\n\nCCV\n0.016087657906364183\n0.284647\n0.056518\n0.955552\n-0.581934\n0.61411\n\n\nCRV1\n0.016088\n0.13378\n0.120254\n0.905614\n-0.264974\n0.29715",
+ "text": "Examples\nAs in fixest, the [Feols(/reference/Feols.qmd) function can be used to estimate a simple linear regression model with fixed effects. The following example regresses Y on X1 and X2 with fixed effects for f1 and f2: fixed effects are specified after the | symbol.\n\nimport pyfixest as pf\n\ndata = pf.get_data()\n\nfit = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data)\nfit.summary()\n\n\n \n \n \n\n\n\n \n \n \n\n\n###\n\nEstimation: OLS\nDep. var.: Y, Fixed effects: f1+f2\nInference: CRV1\nObservations: 997\n\n| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |\n|:--------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|\n| X1 | -0.924 | 0.061 | -15.165 | 0.000 | -1.049 | -0.799 |\n| X2 | -0.174 | 0.015 | -11.918 | 0.000 | -0.204 | -0.144 |\n---\nRMSE: 1.346 R2: 0.659 R2 Within: 0.303 \n\n\nCalling feols() returns an instance of the [Feols(/reference/Feols.qmd) class. The summary() method can be used to print the results.\nAn alternative way to retrieve model results is via the tidy() method, which returns a pandas dataframe with the estimated coefficients, standard errors, t-statistics, and p-values.\n\nfit.tidy()\n\n\n\n\n\n\n\n\nEstimate\nStd. Error\nt value\nPr(>|t|)\n2.5%\n97.5%\n\n\nCoefficient\n\n\n\n\n\n\n\n\n\n\nX1\n-0.924046\n0.060934\n-15.164621\n2.664535e-15\n-1.048671\n-0.799421\n\n\nX2\n-0.174107\n0.014608\n-11.918277\n1.069367e-12\n-0.203985\n-0.144230\n\n\n\n\n\n\n\nYou can also access all elements in the tidy data frame by dedicated methods, e.g. fit.coef() for the coefficients, fit.se() for the standard errors, fit.tstat() for the t-statistics, and fit.pval() for the p-values, and fit.confint() for the confidence intervals.\nThe employed type of inference can be specified via the vcov argument. If vcov is not provided, PyFixest employs the fixest default of iid inference, unless there are fixed effects in the model, in which case feols() clusters the standard error by the first fixed effect (CRV1 inference).\n\nfit1 = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data, vcov=\"iid\")\nfit2 = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data, vcov=\"hetero\")\nfit3 = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data, vcov={\"CRV1\": \"f1\"})\n\nSupported inference types are “iid”, “hetero”, “HC1”, “HC2”, “HC3”, and “CRV1”/“CRV3”. Clustered standard errors are specified via a dictionary, e.g. {\"CRV1\": \"f1\"} for CRV1 inference with clustering by f1 or {\"CRV3\": \"f1\"} for CRV3 inference with clustering by f1. For two-way clustering, you can provide a formula string, e.g. {\"CRV1\": \"f1 + f2\"} for CRV1 inference with clustering by f1.\n\nfit4 = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data, vcov={\"CRV1\": \"f1 + f2\"})\n\nInference can be adjusted post estimation via the vcov method:\n\nfit.summary()\nfit.vcov(\"iid\").summary()\n\n###\n\nEstimation: OLS\nDep. var.: Y, Fixed effects: f1+f2\nInference: CRV1\nObservations: 997\n\n| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |\n|:--------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|\n| X1 | -0.924 | 0.061 | -15.165 | 0.000 | -1.049 | -0.799 |\n| X2 | -0.174 | 0.015 | -11.918 | 0.000 | -0.204 | -0.144 |\n---\nRMSE: 1.346 R2: 0.659 R2 Within: 0.303 \n###\n\nEstimation: OLS\nDep. var.: Y, Fixed effects: f1+f2\nInference: iid\nObservations: 997\n\n| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |\n|:--------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|\n| X1 | -0.924 | 0.054 | -16.995 | 0.000 | -1.031 | -0.817 |\n| X2 | -0.174 | 0.014 | -12.081 | 0.000 | -0.202 | -0.146 |\n---\nRMSE: 1.346 R2: 0.659 R2 Within: 0.303 \n\n\nThe ssc argument specifies the small sample correction for inference. In general, feols() uses all of fixest::feols() defaults, but sets the fixef.K argument to \"none\" whereas the fixest::feols() default is \"nested\". See here for more details: link to github.\nfeols() supports a range of multiple estimation syntax, i.e. you can estimate multiple models in one call. The following example estimates two models, one with fixed effects for f1 and one with fixed effects for f2 using the sw() syntax.\n\nfit = pf.feols(\"Y ~ X1 + X2 | sw(f1, f2)\", data)\ntype(fit)\n\npyfixest.estimation.FixestMulti_.FixestMulti\n\n\nThe returned object is an instance of the FixestMulti class. You can access the results of the first model via fit.fetch_model(0) and the results of the second model via fit.fetch_model(1). You can compare the model results via the etable() function:\n\npf.etable(fit)\n\n\n\n\n\n\n\n\n\n\n\n\n\nY\n\n\n(1)\n(2)\n\n\n\n\ncoef\n\n\nX1\n-0.950***\n(0.067)\n-0.979***\n(0.077)\n\n\nX2\n-0.174***\n(0.018)\n-0.175***\n(0.022)\n\n\nfe\n\n\nf1\nx\n-\n\n\nf2\n-\nx\n\n\nstats\n\n\nObservations\n997\n998\n\n\nS.E. type\nby: f1\nby: f2\n\n\nR2\n0.489\n0.354\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)\n\n\n\n\n\n\n\n \n\n\nOther supported multiple estimation syntax include sw0(), csw() and csw0(). While sw() adds variables in a “stepwise” fashion, csw() does so cumulatively.\n\nfit = pf.feols(\"Y ~ X1 + X2 | csw(f1, f2)\", data)\npf.etable(fit)\n\n\n\n\n\n\n\n\n\n\n\n\n\nY\n\n\n(1)\n(2)\n\n\n\n\ncoef\n\n\nX1\n-0.950***\n(0.067)\n-0.924***\n(0.061)\n\n\nX2\n-0.174***\n(0.018)\n-0.174***\n(0.015)\n\n\nfe\n\n\nf1\nx\nx\n\n\nf2\n-\nx\n\n\nstats\n\n\nObservations\n997\n997\n\n\nS.E. type\nby: f1\nby: f1\n\n\nR2\n0.489\n0.659\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)\n\n\n\n\n\n\n\n \n\n\nThe sw0() and csw0() syntax are similar to sw() and csw(), but start with a model that excludes the variables specified in sw() and csw():\n\nfit = pf.feols(\"Y ~ X1 + X2 | sw0(f1, f2)\", data)\npf.etable(fit)\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nY\n\n\n(1)\n(2)\n(3)\n\n\n\n\ncoef\n\n\nX1\n-0.993***\n(0.082)\n-0.950***\n(0.067)\n-0.979***\n(0.077)\n\n\nX2\n-0.176***\n(0.022)\n-0.174***\n(0.018)\n-0.175***\n(0.022)\n\n\nIntercept\n0.889***\n(0.108)\n\n\n\n\nfe\n\n\nf1\n-\nx\n-\n\n\nf2\n-\n-\nx\n\n\nstats\n\n\nObservations\n998\n997\n998\n\n\nS.E. type\niid\nby: f1\nby: f2\n\n\nR2\n0.177\n0.489\n0.354\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)\n\n\n\n\n\n\n\n \n\n\nThe feols() function also supports multiple dependent variables. The following example estimates two models, one with Y1 as the dependent variable and one with Y2 as the dependent variable.\n\nfit = pf.feols(\"Y + Y2 ~ X1 | f1 + f2\", data)\npf.etable(fit)\n\n\n\n\n\n\n\n\n\n\n\n\n\nY\nY2\n\n\n(1)\n(2)\n\n\n\n\ncoef\n\n\nX1\n-0.919***\n(0.065)\n-1.228***\n(0.195)\n\n\nfe\n\n\nf1\nx\nx\n\n\nf2\nx\nx\n\n\nstats\n\n\nObservations\n997\n998\n\n\nS.E. type\nby: f1\nby: f1\n\n\nR2\n0.609\n0.168\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)\n\n\n\n\n\n\n\n \n\n\nIt is possible to combine different multiple estimation operators:\n\nfit = pf.feols(\"Y + Y2 ~ X1 | sw(f1, f2)\", data)\npf.etable(fit)\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nY\nY2\nY\nY2\n\n\n(1)\n(2)\n(3)\n(4)\n\n\n\n\ncoef\n\n\nX1\n-0.949***\n(0.069)\n-1.266***\n(0.176)\n-0.982***\n(0.081)\n-1.301***\n(0.205)\n\n\nfe\n\n\nf1\nx\nx\n-\n-\n\n\nf2\n-\n-\nx\nx\n\n\nstats\n\n\nObservations\n997\n998\n998\n999\n\n\nS.E. type\nby: f1\nby: f1\nby: f2\nby: f2\n\n\nR2\n0.437\n0.115\n0.302\n0.090\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)\n\n\n\n\n\n\n\n \n\n\nIn general, using muliple estimation syntax can improve the estimation time as covariates that are demeaned in one model and are used in another model do not need to be demeaned again: feols() implements a caching mechanism that stores the demeaned covariates.\nAdditionally, you can fit models on different samples via the split and fsplit arguments. The split argument splits the sample according to the variable specified in the argument, while the fsplit argument also includes the full sample in the estimation.\n\nfit = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data, split = \"f1\")\npf.etable(fit)\n\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n/home/runner/work/pyfixest/pyfixest/pyfixest/utils/utils.py:160: RuntimeWarning: divide by zero encountered in scalar divide\n cluster_adj_value = G / (G - 1)\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nY\n\n\n(1)\n(2)\n(3)\n(4)\n(5)\n(6)\n(7)\n(8)\n(9)\n(10)\n(11)\n(12)\n(13)\n(14)\n(15)\n(16)\n(17)\n(18)\n(19)\n(20)\n(21)\n(22)\n(23)\n(24)\n(25)\n(26)\n(27)\n(28)\n(29)\n(30)\n\n\n\n\ncoef\n\n\nX1\n-1.357\n(INF)\n-1.137\n(INF)\n-0.455\n(INF)\n-1.138\n(INF)\n0.201\n(INF)\n-0.306\n(INF)\n-0.597\n(INF)\n-0.824\n(INF)\n-1.482\n(INF)\n-1.117\n(INF)\n-1.142\n(INF)\n-1.334\n(INF)\n-3.531\n(INF)\n-1.102\n(INF)\n-0.826\n(INF)\n-0.773\n(INF)\n-1.501\n(INF)\n-1.226\n(INF)\n-0.641\n(INF)\n-0.378\n(INF)\n-0.652\n(INF)\n-1.508\n(INF)\n-0.941\n(INF)\n-0.206\n(INF)\n-0.195\n(INF)\n-0.702\n(INF)\n-1.141\n(INF)\n-1.349\n(INF)\n-0.537\n(INF)\n-1.141\n(INF)\n\n\nX2\n-0.250\n(INF)\n0.198\n(INF)\n-0.145\n(INF)\n-0.330\n(INF)\n-0.177\n(INF)\n-0.187\n(INF)\n-0.118\n(INF)\n-0.292\n(INF)\n-0.029\n(INF)\n-0.264\n(INF)\n-0.148\n(INF)\n-0.313\n(INF)\n-0.152\n(INF)\n-0.296\n(INF)\n0.130\n(INF)\n-0.059\n(INF)\n-0.223\n(INF)\n-0.113\n(INF)\n-0.261\n(INF)\n0.089\n(INF)\n-0.148\n(INF)\n-0.267\n(INF)\n-0.125\n(INF)\n-0.282\n(INF)\n-0.153\n(INF)\n0.004\n(INF)\n0.083\n(INF)\n-0.226\n(INF)\n-0.158\n(INF)\n-0.160\n(INF)\n\n\nfe\n\n\nf1\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\n\n\nf2\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\nx\n\n\nstats\n\n\nObservations\n30\n29\n44\n30\n31\n36\n36\n30\n36\n35\n32\n30\n23\n28\n34\n34\n48\n40\n36\n34\n35\n37\n27\n35\n29\n27\n43\n36\n24\n28\n\n\nS.E. type\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\nby: f1\n\n\nR2\n0.850\n0.691\n0.578\n0.745\n0.939\n0.644\n0.792\n0.776\n0.919\n0.797\n0.727\n0.822\n0.924\n0.865\n0.711\n0.808\n0.651\n0.819\n0.746\n0.731\n0.880\n0.868\n0.796\n0.648\n0.915\n0.820\n0.837\n0.789\n0.688\n0.883\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)\n\n\n\n\n\n\n\n \n\n\nBesides OLS, feols() also supports IV estimation via three part formulas:\n\nfit = pf.feols(\"Y ~ X2 | f1 + f2 | X1 ~ Z1\", data)\nfit.tidy()\n\n\n\n\n\n\n\n\nEstimate\nStd. Error\nt value\nPr(>|t|)\n2.5%\n97.5%\n\n\nCoefficient\n\n\n\n\n\n\n\n\n\n\nX1\n-1.050097\n0.085493\n-12.282912\n5.133671e-13\n-1.224949\n-0.875245\n\n\nX2\n-0.174351\n0.014779\n-11.797039\n1.369793e-12\n-0.204578\n-0.144124\n\n\n\n\n\n\n\nHere, X1 is the endogenous variable and Z1 is the instrument. f1 and f2 are the fixed effects, as before. To estimate IV models without fixed effects, simply omit the fixed effects part of the formula:\n\nfit = pf.feols(\"Y ~ X2 | X1 ~ Z1\", data)\nfit.tidy()\n\n\n\n\n\n\n\n\nEstimate\nStd. Error\nt value\nPr(>|t|)\n2.5%\n97.5%\n\n\nCoefficient\n\n\n\n\n\n\n\n\n\n\nIntercept\n0.861939\n0.151187\n5.701137\n1.567858e-08\n0.565257\n1.158622\n\n\nX1\n-0.967238\n0.130078\n-7.435847\n2.238210e-13\n-1.222497\n-0.711980\n\n\nX2\n-0.176416\n0.021769\n-8.104001\n1.554312e-15\n-0.219134\n-0.133697\n\n\n\n\n\n\n\nLast, feols() supports interaction of variables via the i() syntax. Documentation on this is tba.\nAfter fitting a model via feols(), you can use the predict() method to get the predicted values:\n\nfit = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data)\nfit.predict()[0:5]\n\narray([ 3.0633663 , -0.69574133, -0.91240433, -0.46370257, -1.67331154])\n\n\nThe predict() method also supports a newdata argument to predict on new data, which returns a numpy array of the predicted values:\n\nfit = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data)\nfit.predict(newdata=data)[0:5]\n\narray([ 2.14598761, nan, nan, 3.06336415, -0.69574276])\n\n\nLast, you can plot the results of a model via the coefplot() method:\n\nfit = pf.feols(\"Y ~ X1 + X2 | f1 + f2\", data)\nfit.coefplot()\n\n \n \n\n\nObjects of type Feols support a range of other methods to conduct inference. For example, you can run a wild (cluster) bootstrap via the wildboottest() method:\n\nfit.wildboottest(param = \"X1\", reps=1000)\n\nparam X1\nt value -14.70814685400939\nPr(>|t|) 0.0\nbootstrap_type 11\ninference CRV(f1)\nimpose_null True\ndtype: object\n\n\nwould run a wild bootstrap test for the coefficient of X1 with 1000 bootstrap repetitions.\nFor a wild cluster bootstrap, you can specify the cluster variable via the cluster argument:\n\nfit.wildboottest(param = \"X1\", reps=1000, cluster=\"group_id\")\n\nparam X1\nt value -13.658130940490494\nPr(>|t|) 0.0\nbootstrap_type 11\ninference CRV(group_id)\nimpose_null True\ndtype: object\n\n\nThe ritest() method can be used to conduct randomization inference:\n\nfit.ritest(resampvar = \"X1\", reps=1000)\n\nH0 X1=0\nri-type randomization-c\nEstimate -0.9240461507764967\nPr(>|t|) 0.0\nStd. Error (Pr(>|t|)) 0.0\n2.5% (Pr(>|t|)) 0.0\n97.5% (Pr(>|t|)) 0.0\ndtype: object\n\n\nLast, you can compute the cluster causal variance estimator by Athey et al by using the ccv() method:\n\nimport numpy as np\nrng = np.random.default_rng(1234)\ndata[\"D\"] = rng.choice([0, 1], size = data.shape[0])\nfit_D = pf.feols(\"Y ~ D\", data = data)\nfit_D.ccv(treatment = \"D\", cluster = \"group_id\")\n\n/home/runner/work/pyfixest/pyfixest/pyfixest/estimation/feols_.py:1384: UserWarning: The initial model was not clustered. CRV1 inference is computed and stored in the model object.\n warnings.warn(\n\n\n\n\n\n\n\n\n\nEstimate\nStd. Error\nt value\nPr(>|t|)\n2.5%\n97.5%\n\n\n\n\nCCV\n0.016087657906364183\n0.242455\n0.066353\n0.947828\n-0.493292\n0.525467\n\n\nCRV1\n0.016088\n0.13378\n0.120254\n0.905614\n-0.264974\n0.29715",
"crumbs": [
"Function Reference",
"Estimation Functions",
@@ -546,7 +546,7 @@
"href": "replicating-the-effect.html#chapter-4-describing-relationships",
"title": "Replicating Examples from “The Effect”",
"section": "Chapter 4: Describing Relationships",
- "text": "Chapter 4: Describing Relationships\n\n# Read in data\ndt = Mroz.load_pandas().data\n# Keep just working women\ndt = dt.query(\"lfp\")\n# Create unlogged earnings\ndt.loc[:, \"earn\"] = dt[\"lwg\"].apply(\"exp\")\n\n# 5. Run multiple linear regression models by succesively adding controls\nfit = pf.feols(fml=\"lwg ~ csw(inc, wc, k5)\", data=dt, vcov=\"iid\")\npf.etable(fit)\n\n/tmp/ipykernel_4055/786816010.py:6: SettingWithCopyWarning: \nA value is trying to be set on a copy of a slice from a DataFrame.\nTry using .loc[row_indexer,col_indexer] = value instead\n\nSee the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n dt.loc[:, \"earn\"] = dt[\"lwg\"].apply(\"exp\")\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nlwg\n\n\n(1)\n(2)\n(3)\n\n\n\n\ncoef\n\n\ninc\n0.010**\n(0.003)\n0.005\n(0.003)\n0.005\n(0.003)\n\n\nwc\n\n0.342***\n(0.075)\n0.349***\n(0.075)\n\n\nk5\n\n\n-0.072\n(0.087)\n\n\nIntercept\n1.007***\n(0.071)\n0.972***\n(0.070)\n0.982***\n(0.071)\n\n\nstats\n\n\nObservations\n428\n428\n428\n\n\nS.E. type\niid\niid\niid\n\n\nR2\n0.020\n0.066\n0.068\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)"
+ "text": "Chapter 4: Describing Relationships\n\n# Read in data\ndt = Mroz.load_pandas().data\n# Keep just working women\ndt = dt.query(\"lfp\")\n# Create unlogged earnings\ndt.loc[:, \"earn\"] = dt[\"lwg\"].apply(\"exp\")\n\n# 5. Run multiple linear regression models by succesively adding controls\nfit = pf.feols(fml=\"lwg ~ csw(inc, wc, k5)\", data=dt, vcov=\"iid\")\npf.etable(fit)\n\n/tmp/ipykernel_4227/786816010.py:6: SettingWithCopyWarning: \nA value is trying to be set on a copy of a slice from a DataFrame.\nTry using .loc[row_indexer,col_indexer] = value instead\n\nSee the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n dt.loc[:, \"earn\"] = dt[\"lwg\"].apply(\"exp\")\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nlwg\n\n\n(1)\n(2)\n(3)\n\n\n\n\ncoef\n\n\ninc\n0.010**\n(0.003)\n0.005\n(0.003)\n0.005\n(0.003)\n\n\nwc\n\n0.342***\n(0.075)\n0.349***\n(0.075)\n\n\nk5\n\n\n-0.072\n(0.087)\n\n\nIntercept\n1.007***\n(0.071)\n0.972***\n(0.070)\n0.982***\n(0.071)\n\n\nstats\n\n\nObservations\n428\n428\n428\n\n\nS.E. type\niid\niid\niid\n\n\nR2\n0.020\n0.066\n0.068\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)"
},
{
"objectID": "replicating-the-effect.html#chapter-13-regression",
@@ -623,7 +623,7 @@
"href": "difference-in-differences.html#setup",
"title": "Difference-in-Differences Estimation",
"section": "Setup",
- "text": "Setup\n\nfrom importlib import resources\n\nimport pandas as pd\n\nimport pyfixest as pf\nfrom pyfixest.report.utils import rename_event_study_coefs\nfrom pyfixest.utils.dgps import get_sharkfin\n\n%load_ext watermark\n%watermark --iversions\n%load_ext autoreload\n%autoreload 2\n\n\n \n \n \n\n\n\n \n \n \n\n\npyfixest: 0.25.3\npandas : 2.2.3\n\n\n\n\n# one-shot adoption data - parallel trends is true\ndf_one_cohort = get_sharkfin()\ndf_one_cohort.head()\n\n\n\n\n\n\n\n\nunit\nyear\ntreat\nY\never_treated\n\n\n\n\n0\n0\n0\n0\n1.629307\n0\n\n\n1\n0\n1\n0\n0.825902\n0\n\n\n2\n0\n2\n0\n0.208988\n0\n\n\n3\n0\n3\n0\n-0.244739\n0\n\n\n4\n0\n4\n0\n0.804665\n0\n\n\n\n\n\n\n\n\n# multi-cohort adoption data\ndf_multi_cohort = pd.read_csv(\n resources.files(\"pyfixest.did.data\").joinpath(\"df_het.csv\")\n)\ndf_multi_cohort.head()\n\n\n\n\n\n\n\n\nunit\nstate\ngroup\nunit_fe\ng\nyear\nyear_fe\ntreat\nrel_year\nrel_year_binned\nerror\nte\nte_dynamic\ndep_var\n\n\n\n\n0\n1\n33\nGroup 2\n7.043016\n2010\n1990\n0.066159\nFalse\n-20.0\n-6\n-0.086466\n0\n0.0\n7.022709\n\n\n1\n1\n33\nGroup 2\n7.043016\n2010\n1991\n-0.030980\nFalse\n-19.0\n-6\n0.766593\n0\n0.0\n7.778628\n\n\n2\n1\n33\nGroup 2\n7.043016\n2010\n1992\n-0.119607\nFalse\n-18.0\n-6\n1.512968\n0\n0.0\n8.436377\n\n\n3\n1\n33\nGroup 2\n7.043016\n2010\n1993\n0.126321\nFalse\n-17.0\n-6\n0.021870\n0\n0.0\n7.191207\n\n\n4\n1\n33\nGroup 2\n7.043016\n2010\n1994\n-0.106921\nFalse\n-16.0\n-6\n-0.017603\n0\n0.0\n6.918492"
+ "text": "Setup\n\nfrom importlib import resources\n\nimport pandas as pd\n\nimport pyfixest as pf\nfrom pyfixest.report.utils import rename_event_study_coefs\nfrom pyfixest.utils.dgps import get_sharkfin\n\n%load_ext watermark\n%watermark --iversions\n%load_ext autoreload\n%autoreload 2\n\n\n \n \n \n\n\n\n \n \n \n\n\npandas : 2.2.3\npyfixest: 0.25.3\n\n\n\n\n# one-shot adoption data - parallel trends is true\ndf_one_cohort = get_sharkfin()\ndf_one_cohort.head()\n\n\n\n\n\n\n\n\nunit\nyear\ntreat\nY\never_treated\n\n\n\n\n0\n0\n0\n0\n1.629307\n0\n\n\n1\n0\n1\n0\n0.825902\n0\n\n\n2\n0\n2\n0\n0.208988\n0\n\n\n3\n0\n3\n0\n-0.244739\n0\n\n\n4\n0\n4\n0\n0.804665\n0\n\n\n\n\n\n\n\n\n# multi-cohort adoption data\ndf_multi_cohort = pd.read_csv(\n resources.files(\"pyfixest.did.data\").joinpath(\"df_het.csv\")\n)\ndf_multi_cohort.head()\n\n\n\n\n\n\n\n\nunit\nstate\ngroup\nunit_fe\ng\nyear\nyear_fe\ntreat\nrel_year\nrel_year_binned\nerror\nte\nte_dynamic\ndep_var\n\n\n\n\n0\n1\n33\nGroup 2\n7.043016\n2010\n1990\n0.066159\nFalse\n-20.0\n-6\n-0.086466\n0\n0.0\n7.022709\n\n\n1\n1\n33\nGroup 2\n7.043016\n2010\n1991\n-0.030980\nFalse\n-19.0\n-6\n0.766593\n0\n0.0\n7.778628\n\n\n2\n1\n33\nGroup 2\n7.043016\n2010\n1992\n-0.119607\nFalse\n-18.0\n-6\n1.512968\n0\n0.0\n8.436377\n\n\n3\n1\n33\nGroup 2\n7.043016\n2010\n1993\n0.126321\nFalse\n-17.0\n-6\n0.021870\n0\n0.0\n7.191207\n\n\n4\n1\n33\nGroup 2\n7.043016\n2010\n1994\n-0.106921\nFalse\n-16.0\n-6\n-0.017603\n0\n0.0\n6.918492"
},
{
"objectID": "difference-in-differences.html#examining-treatment-timing",
@@ -665,7 +665,7 @@
"href": "quickstart.html",
"title": "Getting Started with PyFixest",
"section": "",
- "text": "A fixed effect model is a statistical model that includes fixed effects, which are parameters that are estimated to be constant across different groups.\nExample [Panel Data]: In the context of panel data, fixed effects are parameters that are constant across different individuals or time. The typical model example is given by the following equation:\n\\[\nY_{it} = \\beta X_{it} + \\alpha_i + \\psi_t + \\varepsilon_{it}\n\\]\nwhere \\(Y_{it}\\) is the dependent variable for individual \\(i\\) at time \\(t\\), \\(X_{it}\\) is the independent variable, \\(\\beta\\) is the coefficient of the independent variable, \\(\\alpha_i\\) is the individual fixed effect, \\(\\psi_t\\) is the time fixed effect, and \\(\\varepsilon_{it}\\) is the error term. The individual fixed effect \\(\\alpha_i\\) is a parameter that is constant across time for each individual, while the time fixed effect \\(\\psi_t\\) is a parameter that is constant across individuals for each time period.\nNote however that, despite the fact that fixed effects are commonly used in panel setting, one does not need a panel data set to work with fixed effects. For example, cluster randomized trials with cluster fixed effects, or wage regressions with worker and firm fixed effects.\nIn this “quick start” guide, we will show you how to estimate a fixed effect model using the PyFixest package. We do not go into the details of the theory behind fixed effect models, but we focus on how to estimate them using PyFixest.\n\n\n\nIn a first step, we load the module and some synthetic example data:\n\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport pandas as pd\nfrom lets_plot import LetsPlot\n\nimport pyfixest as pf\n\nLetsPlot.setup_html()\n\nplt.style.use(\"seaborn-v0_8\")\n\n%load_ext watermark\n%config InlineBackend.figure_format = \"retina\"\n%watermark --iversions\n\ndata = pf.get_data()\n\ndata.head()\n\n\n \n \n \n\n\n\n \n \n \n\n\n\n \n \n \n\n\nnumpy : 1.26.4\npyfixest : 0.25.3\npandas : 2.2.3\nmatplotlib: 3.9.2\n\n\n\n\n\n\n\n\n\n\nY\nY2\nX1\nX2\nf1\nf2\nf3\ngroup_id\nZ1\nZ2\nweights\n\n\n\n\n0\nNaN\n2.357103\n0.0\n0.457858\n15.0\n0.0\n7.0\n9.0\n-0.330607\n1.054826\n0.661478\n\n\n1\n-1.458643\n5.163147\nNaN\n-4.998406\n6.0\n21.0\n4.0\n8.0\nNaN\n-4.113690\n0.772732\n\n\n2\n0.169132\n0.751140\n2.0\n1.558480\nNaN\n1.0\n7.0\n16.0\n1.207778\n0.465282\n0.990929\n\n\n3\n3.319513\n-2.656368\n1.0\n1.560402\n1.0\n10.0\n11.0\n3.0\n2.869997\n0.467570\n0.021123\n\n\n4\n0.134420\n-1.866416\n2.0\n-3.472232\n19.0\n20.0\n6.0\n14.0\n0.835819\n-3.115669\n0.790815\n\n\n\n\n\n\n\n\ndata.info()\n\n<class 'pandas.core.frame.DataFrame'>\nRangeIndex: 1000 entries, 0 to 999\nData columns (total 11 columns):\n # Column Non-Null Count Dtype \n--- ------ -------------- ----- \n 0 Y 999 non-null float64\n 1 Y2 1000 non-null float64\n 2 X1 999 non-null float64\n 3 X2 1000 non-null float64\n 4 f1 999 non-null float64\n 5 f2 1000 non-null float64\n 6 f3 1000 non-null float64\n 7 group_id 1000 non-null float64\n 8 Z1 999 non-null float64\n 9 Z2 1000 non-null float64\n 10 weights 1000 non-null float64\ndtypes: float64(11)\nmemory usage: 86.1 KB\n\n\nWe see that some of our columns have missing data.\n\n\n\nWe are interested in the relation between the dependent variable Y and the independent variables X1 using a fixed effect model for group_id. Let’s see how the data looks like:\n\nax = data.plot(kind=\"scatter\", x=\"X1\", y=\"Y\", c=\"group_id\", colormap=\"viridis\")\n\n\n\n\n\n\n\n\nWe can estimate a fixed effects regression via the feols() function. feols() has three arguments: a two-sided model formula, the data, and optionally, the type of inference.\n\nfit = pf.feols(fml=\"Y ~ X1 | group_id\", data=data, vcov=\"HC1\")\ntype(fit)\n\npyfixest.estimation.feols_.Feols\n\n\nThe first part of the formula contains the dependent variable and “regular” covariates, while the second part contains fixed effects.\nfeols() returns an instance of the Fixest class.\n\n\n\nTo inspect the results, we can use a summary function or method:\n\nfit.summary()\n\n###\n\nEstimation: OLS\nDep. var.: Y, Fixed effects: group_id\nInference: HC1\nObservations: 998\n\n| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |\n|:--------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|\n| X1 | -1.019 | 0.082 | -12.352 | 0.000 | -1.181 | -0.857 |\n---\nRMSE: 2.141 R2: 0.137 R2 Within: 0.126 \n\n\nOr display a formatted regression table:\n\npf.etable(fit)\n\n\n\n\n\n\n\n\n\n\n\n\nY\n\n\n(1)\n\n\n\n\ncoef\n\n\nX1\n-1.019***\n(0.082)\n\n\nfe\n\n\ngroup_id\nx\n\n\nstats\n\n\nObservations\n998\n\n\nS.E. type\nhetero\n\n\nR2\n0.137\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)\n\n\n\n\n\n\n\n \n\n\nAlternatively, the .summarize module contains a summary function, which can be applied on instances of regression model objects or lists of regression model objects. For details on how to customize etable(), please take a look at the dedicated vignette.\n\npf.summary(fit)\n\n###\n\nEstimation: OLS\nDep. var.: Y, Fixed effects: group_id\nInference: HC1\nObservations: 998\n\n| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |\n|:--------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|\n| X1 | -1.019 | 0.082 | -12.352 | 0.000 | -1.181 | -0.857 |\n---\nRMSE: 2.141 R2: 0.137 R2 Within: 0.126 \n\n\nYou can access individual elements of the summary via dedicated methods: .tidy() returns a “tidy” pd.DataFrame, .coef() returns estimated parameters, and se() estimated standard errors. Other methods include pvalue(), confint() and tstat().\n\nfit.tidy()\n\n\n\n\n\n\n\n\nEstimate\nStd. Error\nt value\nPr(>|t|)\n2.5%\n97.5%\n\n\nCoefficient\n\n\n\n\n\n\n\n\n\n\nX1\n-1.019009\n0.082498\n-12.351897\n0.0\n-1.180898\n-0.857119\n\n\n\n\n\n\n\n\nfit.coef()\n\nCoefficient\nX1 -1.019009\nName: Estimate, dtype: float64\n\n\n\nfit.se()\n\nCoefficient\nX1 0.082498\nName: Std. Error, dtype: float64\n\n\n\nfit.tstat()\n\nCoefficient\nX1 -12.351897\nName: t value, dtype: float64\n\n\n\nfit.confint()\n\n\n\n\n\n\n\n\n2.5%\n97.5%\n\n\n\n\nX1\n-1.180898\n-0.857119\n\n\n\n\n\n\n\nLast, model results can be visualized via dedicated methods for plotting:\n\nfit.coefplot()\n# or pf.coefplot([fit])\n\n \n \n\n\n\n\n\nLet’s have a quick d-tour on the intuition behind fixed effects models using the example above. To do so, let us begin by comparing it with a simple OLS model.\n\nfit_simple = pf.feols(\"Y ~ X1\", data=data, vcov=\"HC1\")\n\nfit_simple.summary()\n\n###\n\nEstimation: OLS\nDep. var.: Y, Fixed effects: 0\nInference: HC1\nObservations: 998\n\n| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |\n|:--------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|\n| Intercept | 0.919 | 0.112 | 8.223 | 0.000 | 0.699 | 1.138 |\n| X1 | -1.000 | 0.082 | -12.134 | 0.000 | -1.162 | -0.838 |\n---\nRMSE: 2.158 R2: 0.123 \n\n\nWe can compare both models side by side in a regression table:\n\npf.etable([fit, fit_simple])\n\n\n\n\n\n\n\n\n\n\n\n\n\nY\n\n\n(1)\n(2)\n\n\n\n\ncoef\n\n\nX1\n-1.019***\n(0.082)\n-1.000***\n(0.082)\n\n\nIntercept\n\n0.919***\n(0.112)\n\n\nfe\n\n\ngroup_id\nx\n-\n\n\nstats\n\n\nObservations\n998\n998\n\n\nS.E. type\nhetero\nhetero\n\n\nR2\n0.137\n0.123\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)\n\n\n\n\n\n\n\n \n\n\nWe see that the X1 coefficient is -1.019, which is less than the value from the OLS model in column (2). Where is the difference coming from? Well, in the fixed effect model we are interested in controlling for the feature group_id. One possibility to do this is by adding a simple dummy variable for each level of group_id.\n\nfit_dummy = pf.feols(\"Y ~ X1 + C(group_id) \", data=data, vcov=\"HC1\")\n\nfit_dummy.summary()\n\n###\n\nEstimation: OLS\nDep. var.: Y, Fixed effects: 0\nInference: HC1\nObservations: 998\n\n| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |\n|:--------------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|\n| Intercept | 0.760 | 0.288 | 2.640 | 0.008 | 0.195 | 1.326 |\n| X1 | -1.019 | 0.083 | -12.234 | 0.000 | -1.182 | -0.856 |\n| C(group_id)[T.1.0] | 0.380 | 0.451 | 0.844 | 0.399 | -0.504 | 1.264 |\n| C(group_id)[T.2.0] | 0.084 | 0.389 | 0.216 | 0.829 | -0.680 | 0.848 |\n| C(group_id)[T.3.0] | 0.790 | 0.415 | 1.904 | 0.057 | -0.024 | 1.604 |\n| C(group_id)[T.4.0] | -0.189 | 0.388 | -0.487 | 0.626 | -0.950 | 0.572 |\n| C(group_id)[T.5.0] | 0.537 | 0.388 | 1.385 | 0.166 | -0.224 | 1.297 |\n| C(group_id)[T.6.0] | 0.307 | 0.398 | 0.771 | 0.441 | -0.474 | 1.087 |\n| C(group_id)[T.7.0] | 0.015 | 0.422 | 0.035 | 0.972 | -0.814 | 0.844 |\n| C(group_id)[T.8.0] | 0.382 | 0.406 | 0.941 | 0.347 | -0.415 | 1.179 |\n| C(group_id)[T.9.0] | 0.219 | 0.417 | 0.526 | 0.599 | -0.599 | 1.037 |\n| C(group_id)[T.10.0] | -0.363 | 0.422 | -0.861 | 0.390 | -1.191 | 0.465 |\n| C(group_id)[T.11.0] | 0.201 | 0.387 | 0.520 | 0.603 | -0.559 | 0.961 |\n| C(group_id)[T.12.0] | -0.110 | 0.410 | -0.268 | 0.788 | -0.915 | 0.694 |\n| C(group_id)[T.13.0] | 0.126 | 0.440 | 0.287 | 0.774 | -0.736 | 0.989 |\n| C(group_id)[T.14.0] | 0.353 | 0.416 | 0.848 | 0.397 | -0.464 | 1.170 |\n| C(group_id)[T.15.0] | 0.469 | 0.398 | 1.179 | 0.239 | -0.312 | 1.249 |\n| C(group_id)[T.16.0] | -0.135 | 0.396 | -0.340 | 0.734 | -0.913 | 0.643 |\n| C(group_id)[T.17.0] | -0.005 | 0.401 | -0.013 | 0.989 | -0.792 | 0.781 |\n| C(group_id)[T.18.0] | 0.283 | 0.403 | 0.702 | 0.483 | -0.508 | 1.074 |\n---\nRMSE: 2.141 R2: 0.137 \n\n\nThis is does not scale well! Imagine you have 1000 different levels of group_id. You would need to add 1000 dummy variables to your model. This is where fixed effect models come in handy. They allow you to control for these fixed effects without adding all these dummy variables. The way to do it is by a demeaning procedure. The idea is to subtract the average value of each level of group_id from the respective observations. This way, we control for the fixed effects without adding all these dummy variables. Let’s try to do this manually:\n\ndef _demean_column(df: pd.DataFrame, column: str, by: str) -> pd.Series:\n return df[column] - df.groupby(by)[column].transform(\"mean\")\n\n\nfit_demeaned = pf.feols(\n fml=\"Y_demeaned ~ X1_demeaned\",\n data=data.assign(\n Y_demeaned=lambda df: _demean_column(df, \"Y\", \"group_id\"),\n X1_demeaned=lambda df: _demean_column(df, \"X1\", \"group_id\"),\n ),\n vcov=\"HC1\",\n)\n\nfit_demeaned.summary()\n\n###\n\nEstimation: OLS\nDep. var.: Y_demeaned, Fixed effects: 0\nInference: HC1\nObservations: 998\n\n| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |\n|:--------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|\n| Intercept | 0.003 | 0.068 | 0.041 | 0.968 | -0.130 | 0.136 |\n| X1_demeaned | -1.019 | 0.083 | -12.345 | 0.000 | -1.181 | -0.857 |\n---\nRMSE: 2.141 R2: 0.126 \n\n\nWe get the same results as the fixed effect model Y1 ~ X | group_id above. The PyFixest package uses a more efficient algorithm to estimate the fixed effect model, but the intuition is the same.\n\n\n\nYou can update the coefficients of a model object via the update() method, which may be useful in an online learning setting where data arrives sequentially.\nTo see this in action, let us first fit a model on a subset of the data:\n\ndata_subsample = data.sample(frac=0.5)\nm = pf.feols(\"Y ~ X1 + X2\", data=data_subsample)\n# current coefficient vector\nm._beta_hat\n\narray([ 0.76200339, -0.95890348, -0.19108466])\n\n\nThen sample 5 new observations and update the model with the new data. The update rule is\n\\[\n\\hat{\\beta}_{n+1} = \\hat{\\beta}_n + (X_{n+1}' X_{n+1})^{-1} x_{n+1} + (y_{n+1} - x_{n+1} \\hat{\\beta}_n)\n\\]\nfor a new observation \\((x_{n+1}, y_{n+1})\\).\n\nnew_points_id = np.random.choice(list(set(data.index) - set(data_subsample.index)), 5)\nX_new, y_new = (\n np.c_[np.ones(len(new_points_id)), data.loc[new_points_id][[\"X1\", \"X2\"]].values],\n data.loc[new_points_id][\"Y\"].values,\n)\nm.update(X_new, y_new)\n\narray([ 0.78334343, -0.96579542, -0.19535336])\n\n\nWe verify that we get the same results if we had estimated the model on the appended data.\n\npf.feols(\n \"Y ~ X1 + X2\", data=data.loc[data_subsample.index.append(pd.Index(new_points_id))]\n).coef().values\n\narray([ 0.78334343, -0.96579542, -0.19535336])"
+ "text": "A fixed effect model is a statistical model that includes fixed effects, which are parameters that are estimated to be constant across different groups.\nExample [Panel Data]: In the context of panel data, fixed effects are parameters that are constant across different individuals or time. The typical model example is given by the following equation:\n\\[\nY_{it} = \\beta X_{it} + \\alpha_i + \\psi_t + \\varepsilon_{it}\n\\]\nwhere \\(Y_{it}\\) is the dependent variable for individual \\(i\\) at time \\(t\\), \\(X_{it}\\) is the independent variable, \\(\\beta\\) is the coefficient of the independent variable, \\(\\alpha_i\\) is the individual fixed effect, \\(\\psi_t\\) is the time fixed effect, and \\(\\varepsilon_{it}\\) is the error term. The individual fixed effect \\(\\alpha_i\\) is a parameter that is constant across time for each individual, while the time fixed effect \\(\\psi_t\\) is a parameter that is constant across individuals for each time period.\nNote however that, despite the fact that fixed effects are commonly used in panel setting, one does not need a panel data set to work with fixed effects. For example, cluster randomized trials with cluster fixed effects, or wage regressions with worker and firm fixed effects.\nIn this “quick start” guide, we will show you how to estimate a fixed effect model using the PyFixest package. We do not go into the details of the theory behind fixed effect models, but we focus on how to estimate them using PyFixest.\n\n\n\nIn a first step, we load the module and some synthetic example data:\n\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport pandas as pd\nfrom lets_plot import LetsPlot\n\nimport pyfixest as pf\n\nLetsPlot.setup_html()\n\nplt.style.use(\"seaborn-v0_8\")\n\n%load_ext watermark\n%config InlineBackend.figure_format = \"retina\"\n%watermark --iversions\n\ndata = pf.get_data()\n\ndata.head()\n\n\n \n \n \n\n\n\n \n \n \n\n\n\n \n \n \n\n\npandas : 2.2.3\nnumpy : 1.26.4\npyfixest : 0.25.3\nmatplotlib: 3.9.2\n\n\n\n\n\n\n\n\n\n\nY\nY2\nX1\nX2\nf1\nf2\nf3\ngroup_id\nZ1\nZ2\nweights\n\n\n\n\n0\nNaN\n2.357103\n0.0\n0.457858\n15.0\n0.0\n7.0\n9.0\n-0.330607\n1.054826\n0.661478\n\n\n1\n-1.458643\n5.163147\nNaN\n-4.998406\n6.0\n21.0\n4.0\n8.0\nNaN\n-4.113690\n0.772732\n\n\n2\n0.169132\n0.751140\n2.0\n1.558480\nNaN\n1.0\n7.0\n16.0\n1.207778\n0.465282\n0.990929\n\n\n3\n3.319513\n-2.656368\n1.0\n1.560402\n1.0\n10.0\n11.0\n3.0\n2.869997\n0.467570\n0.021123\n\n\n4\n0.134420\n-1.866416\n2.0\n-3.472232\n19.0\n20.0\n6.0\n14.0\n0.835819\n-3.115669\n0.790815\n\n\n\n\n\n\n\n\ndata.info()\n\n<class 'pandas.core.frame.DataFrame'>\nRangeIndex: 1000 entries, 0 to 999\nData columns (total 11 columns):\n # Column Non-Null Count Dtype \n--- ------ -------------- ----- \n 0 Y 999 non-null float64\n 1 Y2 1000 non-null float64\n 2 X1 999 non-null float64\n 3 X2 1000 non-null float64\n 4 f1 999 non-null float64\n 5 f2 1000 non-null float64\n 6 f3 1000 non-null float64\n 7 group_id 1000 non-null float64\n 8 Z1 999 non-null float64\n 9 Z2 1000 non-null float64\n 10 weights 1000 non-null float64\ndtypes: float64(11)\nmemory usage: 86.1 KB\n\n\nWe see that some of our columns have missing data.\n\n\n\nWe are interested in the relation between the dependent variable Y and the independent variables X1 using a fixed effect model for group_id. Let’s see how the data looks like:\n\nax = data.plot(kind=\"scatter\", x=\"X1\", y=\"Y\", c=\"group_id\", colormap=\"viridis\")\n\n\n\n\n\n\n\n\nWe can estimate a fixed effects regression via the feols() function. feols() has three arguments: a two-sided model formula, the data, and optionally, the type of inference.\n\nfit = pf.feols(fml=\"Y ~ X1 | group_id\", data=data, vcov=\"HC1\")\ntype(fit)\n\npyfixest.estimation.feols_.Feols\n\n\nThe first part of the formula contains the dependent variable and “regular” covariates, while the second part contains fixed effects.\nfeols() returns an instance of the Fixest class.\n\n\n\nTo inspect the results, we can use a summary function or method:\n\nfit.summary()\n\n###\n\nEstimation: OLS\nDep. var.: Y, Fixed effects: group_id\nInference: HC1\nObservations: 998\n\n| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |\n|:--------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|\n| X1 | -1.019 | 0.082 | -12.352 | 0.000 | -1.181 | -0.857 |\n---\nRMSE: 2.141 R2: 0.137 R2 Within: 0.126 \n\n\nOr display a formatted regression table:\n\npf.etable(fit)\n\n\n\n\n\n\n\n\n\n\n\n\nY\n\n\n(1)\n\n\n\n\ncoef\n\n\nX1\n-1.019***\n(0.082)\n\n\nfe\n\n\ngroup_id\nx\n\n\nstats\n\n\nObservations\n998\n\n\nS.E. type\nhetero\n\n\nR2\n0.137\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)\n\n\n\n\n\n\n\n \n\n\nAlternatively, the .summarize module contains a summary function, which can be applied on instances of regression model objects or lists of regression model objects. For details on how to customize etable(), please take a look at the dedicated vignette.\n\npf.summary(fit)\n\n###\n\nEstimation: OLS\nDep. var.: Y, Fixed effects: group_id\nInference: HC1\nObservations: 998\n\n| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |\n|:--------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|\n| X1 | -1.019 | 0.082 | -12.352 | 0.000 | -1.181 | -0.857 |\n---\nRMSE: 2.141 R2: 0.137 R2 Within: 0.126 \n\n\nYou can access individual elements of the summary via dedicated methods: .tidy() returns a “tidy” pd.DataFrame, .coef() returns estimated parameters, and se() estimated standard errors. Other methods include pvalue(), confint() and tstat().\n\nfit.tidy()\n\n\n\n\n\n\n\n\nEstimate\nStd. Error\nt value\nPr(>|t|)\n2.5%\n97.5%\n\n\nCoefficient\n\n\n\n\n\n\n\n\n\n\nX1\n-1.019009\n0.082498\n-12.351897\n0.0\n-1.180898\n-0.857119\n\n\n\n\n\n\n\n\nfit.coef()\n\nCoefficient\nX1 -1.019009\nName: Estimate, dtype: float64\n\n\n\nfit.se()\n\nCoefficient\nX1 0.082498\nName: Std. Error, dtype: float64\n\n\n\nfit.tstat()\n\nCoefficient\nX1 -12.351897\nName: t value, dtype: float64\n\n\n\nfit.confint()\n\n\n\n\n\n\n\n\n2.5%\n97.5%\n\n\n\n\nX1\n-1.180898\n-0.857119\n\n\n\n\n\n\n\nLast, model results can be visualized via dedicated methods for plotting:\n\nfit.coefplot()\n# or pf.coefplot([fit])\n\n \n \n\n\n\n\n\nLet’s have a quick d-tour on the intuition behind fixed effects models using the example above. To do so, let us begin by comparing it with a simple OLS model.\n\nfit_simple = pf.feols(\"Y ~ X1\", data=data, vcov=\"HC1\")\n\nfit_simple.summary()\n\n###\n\nEstimation: OLS\nDep. var.: Y, Fixed effects: 0\nInference: HC1\nObservations: 998\n\n| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |\n|:--------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|\n| Intercept | 0.919 | 0.112 | 8.223 | 0.000 | 0.699 | 1.138 |\n| X1 | -1.000 | 0.082 | -12.134 | 0.000 | -1.162 | -0.838 |\n---\nRMSE: 2.158 R2: 0.123 \n\n\nWe can compare both models side by side in a regression table:\n\npf.etable([fit, fit_simple])\n\n\n\n\n\n\n\n\n\n\n\n\n\nY\n\n\n(1)\n(2)\n\n\n\n\ncoef\n\n\nX1\n-1.019***\n(0.082)\n-1.000***\n(0.082)\n\n\nIntercept\n\n0.919***\n(0.112)\n\n\nfe\n\n\ngroup_id\nx\n-\n\n\nstats\n\n\nObservations\n998\n998\n\n\nS.E. type\nhetero\nhetero\n\n\nR2\n0.137\n0.123\n\n\n\nSignificance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)\n\n\n\n\n\n\n\n \n\n\nWe see that the X1 coefficient is -1.019, which is less than the value from the OLS model in column (2). Where is the difference coming from? Well, in the fixed effect model we are interested in controlling for the feature group_id. One possibility to do this is by adding a simple dummy variable for each level of group_id.\n\nfit_dummy = pf.feols(\"Y ~ X1 + C(group_id) \", data=data, vcov=\"HC1\")\n\nfit_dummy.summary()\n\n###\n\nEstimation: OLS\nDep. var.: Y, Fixed effects: 0\nInference: HC1\nObservations: 998\n\n| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |\n|:--------------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|\n| Intercept | 0.760 | 0.288 | 2.640 | 0.008 | 0.195 | 1.326 |\n| X1 | -1.019 | 0.083 | -12.234 | 0.000 | -1.182 | -0.856 |\n| C(group_id)[T.1.0] | 0.380 | 0.451 | 0.844 | 0.399 | -0.504 | 1.264 |\n| C(group_id)[T.2.0] | 0.084 | 0.389 | 0.216 | 0.829 | -0.680 | 0.848 |\n| C(group_id)[T.3.0] | 0.790 | 0.415 | 1.904 | 0.057 | -0.024 | 1.604 |\n| C(group_id)[T.4.0] | -0.189 | 0.388 | -0.487 | 0.626 | -0.950 | 0.572 |\n| C(group_id)[T.5.0] | 0.537 | 0.388 | 1.385 | 0.166 | -0.224 | 1.297 |\n| C(group_id)[T.6.0] | 0.307 | 0.398 | 0.771 | 0.441 | -0.474 | 1.087 |\n| C(group_id)[T.7.0] | 0.015 | 0.422 | 0.035 | 0.972 | -0.814 | 0.844 |\n| C(group_id)[T.8.0] | 0.382 | 0.406 | 0.941 | 0.347 | -0.415 | 1.179 |\n| C(group_id)[T.9.0] | 0.219 | 0.417 | 0.526 | 0.599 | -0.599 | 1.037 |\n| C(group_id)[T.10.0] | -0.363 | 0.422 | -0.861 | 0.390 | -1.191 | 0.465 |\n| C(group_id)[T.11.0] | 0.201 | 0.387 | 0.520 | 0.603 | -0.559 | 0.961 |\n| C(group_id)[T.12.0] | -0.110 | 0.410 | -0.268 | 0.788 | -0.915 | 0.694 |\n| C(group_id)[T.13.0] | 0.126 | 0.440 | 0.287 | 0.774 | -0.736 | 0.989 |\n| C(group_id)[T.14.0] | 0.353 | 0.416 | 0.848 | 0.397 | -0.464 | 1.170 |\n| C(group_id)[T.15.0] | 0.469 | 0.398 | 1.179 | 0.239 | -0.312 | 1.249 |\n| C(group_id)[T.16.0] | -0.135 | 0.396 | -0.340 | 0.734 | -0.913 | 0.643 |\n| C(group_id)[T.17.0] | -0.005 | 0.401 | -0.013 | 0.989 | -0.792 | 0.781 |\n| C(group_id)[T.18.0] | 0.283 | 0.403 | 0.702 | 0.483 | -0.508 | 1.074 |\n---\nRMSE: 2.141 R2: 0.137 \n\n\nThis is does not scale well! Imagine you have 1000 different levels of group_id. You would need to add 1000 dummy variables to your model. This is where fixed effect models come in handy. They allow you to control for these fixed effects without adding all these dummy variables. The way to do it is by a demeaning procedure. The idea is to subtract the average value of each level of group_id from the respective observations. This way, we control for the fixed effects without adding all these dummy variables. Let’s try to do this manually:\n\ndef _demean_column(df: pd.DataFrame, column: str, by: str) -> pd.Series:\n return df[column] - df.groupby(by)[column].transform(\"mean\")\n\n\nfit_demeaned = pf.feols(\n fml=\"Y_demeaned ~ X1_demeaned\",\n data=data.assign(\n Y_demeaned=lambda df: _demean_column(df, \"Y\", \"group_id\"),\n X1_demeaned=lambda df: _demean_column(df, \"X1\", \"group_id\"),\n ),\n vcov=\"HC1\",\n)\n\nfit_demeaned.summary()\n\n###\n\nEstimation: OLS\nDep. var.: Y_demeaned, Fixed effects: 0\nInference: HC1\nObservations: 998\n\n| Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5% | 97.5% |\n|:--------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|\n| Intercept | 0.003 | 0.068 | 0.041 | 0.968 | -0.130 | 0.136 |\n| X1_demeaned | -1.019 | 0.083 | -12.345 | 0.000 | -1.181 | -0.857 |\n---\nRMSE: 2.141 R2: 0.126 \n\n\nWe get the same results as the fixed effect model Y1 ~ X | group_id above. The PyFixest package uses a more efficient algorithm to estimate the fixed effect model, but the intuition is the same.\n\n\n\nYou can update the coefficients of a model object via the update() method, which may be useful in an online learning setting where data arrives sequentially.\nTo see this in action, let us first fit a model on a subset of the data:\n\ndata_subsample = data.sample(frac=0.5)\nm = pf.feols(\"Y ~ X1 + X2\", data=data_subsample)\n# current coefficient vector\nm._beta_hat\n\narray([ 0.99581185, -1.0423337 , -0.18385767])\n\n\nThen sample 5 new observations and update the model with the new data. The update rule is\n\\[\n\\hat{\\beta}_{n+1} = \\hat{\\beta}_n + (X_{n+1}' X_{n+1})^{-1} x_{n+1} + (y_{n+1} - x_{n+1} \\hat{\\beta}_n)\n\\]\nfor a new observation \\((x_{n+1}, y_{n+1})\\).\n\nnew_points_id = np.random.choice(list(set(data.index) - set(data_subsample.index)), 5)\nX_new, y_new = (\n np.c_[np.ones(len(new_points_id)), data.loc[new_points_id][[\"X1\", \"X2\"]].values],\n data.loc[new_points_id][\"Y\"].values,\n)\nm.update(X_new, y_new)\n\narray([ 0.99469903, -1.04988716, -0.1869025 ])\n\n\nWe verify that we get the same results if we had estimated the model on the appended data.\n\npf.feols(\n \"Y ~ X1 + X2\", data=data.loc[data_subsample.index.append(pd.Index(new_points_id))]\n).coef().values\n\narray([ 0.99469903, -1.04988716, -0.1869025 ])"
},
{
"objectID": "quickstart.html#what-is-a-fixed-effect-model",
@@ -679,7 +679,7 @@
"href": "quickstart.html#read-sample-data",
"title": "Getting Started with PyFixest",
"section": "",
- "text": "In a first step, we load the module and some synthetic example data:\n\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport pandas as pd\nfrom lets_plot import LetsPlot\n\nimport pyfixest as pf\n\nLetsPlot.setup_html()\n\nplt.style.use(\"seaborn-v0_8\")\n\n%load_ext watermark\n%config InlineBackend.figure_format = \"retina\"\n%watermark --iversions\n\ndata = pf.get_data()\n\ndata.head()\n\n\n \n \n \n\n\n\n \n \n \n\n\n\n \n \n \n\n\nnumpy : 1.26.4\npyfixest : 0.25.3\npandas : 2.2.3\nmatplotlib: 3.9.2\n\n\n\n\n\n\n\n\n\n\nY\nY2\nX1\nX2\nf1\nf2\nf3\ngroup_id\nZ1\nZ2\nweights\n\n\n\n\n0\nNaN\n2.357103\n0.0\n0.457858\n15.0\n0.0\n7.0\n9.0\n-0.330607\n1.054826\n0.661478\n\n\n1\n-1.458643\n5.163147\nNaN\n-4.998406\n6.0\n21.0\n4.0\n8.0\nNaN\n-4.113690\n0.772732\n\n\n2\n0.169132\n0.751140\n2.0\n1.558480\nNaN\n1.0\n7.0\n16.0\n1.207778\n0.465282\n0.990929\n\n\n3\n3.319513\n-2.656368\n1.0\n1.560402\n1.0\n10.0\n11.0\n3.0\n2.869997\n0.467570\n0.021123\n\n\n4\n0.134420\n-1.866416\n2.0\n-3.472232\n19.0\n20.0\n6.0\n14.0\n0.835819\n-3.115669\n0.790815\n\n\n\n\n\n\n\n\ndata.info()\n\n<class 'pandas.core.frame.DataFrame'>\nRangeIndex: 1000 entries, 0 to 999\nData columns (total 11 columns):\n # Column Non-Null Count Dtype \n--- ------ -------------- ----- \n 0 Y 999 non-null float64\n 1 Y2 1000 non-null float64\n 2 X1 999 non-null float64\n 3 X2 1000 non-null float64\n 4 f1 999 non-null float64\n 5 f2 1000 non-null float64\n 6 f3 1000 non-null float64\n 7 group_id 1000 non-null float64\n 8 Z1 999 non-null float64\n 9 Z2 1000 non-null float64\n 10 weights 1000 non-null float64\ndtypes: float64(11)\nmemory usage: 86.1 KB\n\n\nWe see that some of our columns have missing data."
+ "text": "In a first step, we load the module and some synthetic example data:\n\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport pandas as pd\nfrom lets_plot import LetsPlot\n\nimport pyfixest as pf\n\nLetsPlot.setup_html()\n\nplt.style.use(\"seaborn-v0_8\")\n\n%load_ext watermark\n%config InlineBackend.figure_format = \"retina\"\n%watermark --iversions\n\ndata = pf.get_data()\n\ndata.head()\n\n\n \n \n \n\n\n\n \n \n \n\n\n\n \n \n \n\n\npandas : 2.2.3\nnumpy : 1.26.4\npyfixest : 0.25.3\nmatplotlib: 3.9.2\n\n\n\n\n\n\n\n\n\n\nY\nY2\nX1\nX2\nf1\nf2\nf3\ngroup_id\nZ1\nZ2\nweights\n\n\n\n\n0\nNaN\n2.357103\n0.0\n0.457858\n15.0\n0.0\n7.0\n9.0\n-0.330607\n1.054826\n0.661478\n\n\n1\n-1.458643\n5.163147\nNaN\n-4.998406\n6.0\n21.0\n4.0\n8.0\nNaN\n-4.113690\n0.772732\n\n\n2\n0.169132\n0.751140\n2.0\n1.558480\nNaN\n1.0\n7.0\n16.0\n1.207778\n0.465282\n0.990929\n\n\n3\n3.319513\n-2.656368\n1.0\n1.560402\n1.0\n10.0\n11.0\n3.0\n2.869997\n0.467570\n0.021123\n\n\n4\n0.134420\n-1.866416\n2.0\n-3.472232\n19.0\n20.0\n6.0\n14.0\n0.835819\n-3.115669\n0.790815\n\n\n\n\n\n\n\n\ndata.info()\n\n<class 'pandas.core.frame.DataFrame'>\nRangeIndex: 1000 entries, 0 to 999\nData columns (total 11 columns):\n # Column Non-Null Count Dtype \n--- ------ -------------- ----- \n 0 Y 999 non-null float64\n 1 Y2 1000 non-null float64\n 2 X1 999 non-null float64\n 3 X2 1000 non-null float64\n 4 f1 999 non-null float64\n 5 f2 1000 non-null float64\n 6 f3 1000 non-null float64\n 7 group_id 1000 non-null float64\n 8 Z1 999 non-null float64\n 9 Z2 1000 non-null float64\n 10 weights 1000 non-null float64\ndtypes: float64(11)\nmemory usage: 86.1 KB\n\n\nWe see that some of our columns have missing data."
},
{
"objectID": "quickstart.html#ols-estimation",
@@ -707,7 +707,7 @@
"href": "quickstart.html#updating-regression-coefficients",
"title": "Getting Started with PyFixest",
"section": "",
- "text": "You can update the coefficients of a model object via the update() method, which may be useful in an online learning setting where data arrives sequentially.\nTo see this in action, let us first fit a model on a subset of the data:\n\ndata_subsample = data.sample(frac=0.5)\nm = pf.feols(\"Y ~ X1 + X2\", data=data_subsample)\n# current coefficient vector\nm._beta_hat\n\narray([ 0.76200339, -0.95890348, -0.19108466])\n\n\nThen sample 5 new observations and update the model with the new data. The update rule is\n\\[\n\\hat{\\beta}_{n+1} = \\hat{\\beta}_n + (X_{n+1}' X_{n+1})^{-1} x_{n+1} + (y_{n+1} - x_{n+1} \\hat{\\beta}_n)\n\\]\nfor a new observation \\((x_{n+1}, y_{n+1})\\).\n\nnew_points_id = np.random.choice(list(set(data.index) - set(data_subsample.index)), 5)\nX_new, y_new = (\n np.c_[np.ones(len(new_points_id)), data.loc[new_points_id][[\"X1\", \"X2\"]].values],\n data.loc[new_points_id][\"Y\"].values,\n)\nm.update(X_new, y_new)\n\narray([ 0.78334343, -0.96579542, -0.19535336])\n\n\nWe verify that we get the same results if we had estimated the model on the appended data.\n\npf.feols(\n \"Y ~ X1 + X2\", data=data.loc[data_subsample.index.append(pd.Index(new_points_id))]\n).coef().values\n\narray([ 0.78334343, -0.96579542, -0.19535336])"
+ "text": "You can update the coefficients of a model object via the update() method, which may be useful in an online learning setting where data arrives sequentially.\nTo see this in action, let us first fit a model on a subset of the data:\n\ndata_subsample = data.sample(frac=0.5)\nm = pf.feols(\"Y ~ X1 + X2\", data=data_subsample)\n# current coefficient vector\nm._beta_hat\n\narray([ 0.99581185, -1.0423337 , -0.18385767])\n\n\nThen sample 5 new observations and update the model with the new data. The update rule is\n\\[\n\\hat{\\beta}_{n+1} = \\hat{\\beta}_n + (X_{n+1}' X_{n+1})^{-1} x_{n+1} + (y_{n+1} - x_{n+1} \\hat{\\beta}_n)\n\\]\nfor a new observation \\((x_{n+1}, y_{n+1})\\).\n\nnew_points_id = np.random.choice(list(set(data.index) - set(data_subsample.index)), 5)\nX_new, y_new = (\n np.c_[np.ones(len(new_points_id)), data.loc[new_points_id][[\"X1\", \"X2\"]].values],\n data.loc[new_points_id][\"Y\"].values,\n)\nm.update(X_new, y_new)\n\narray([ 0.99469903, -1.04988716, -0.1869025 ])\n\n\nWe verify that we get the same results if we had estimated the model on the appended data.\n\npf.feols(\n \"Y ~ X1 + X2\", data=data.loc[data_subsample.index.append(pd.Index(new_points_id))]\n).coef().values\n\narray([ 0.99469903, -1.04988716, -0.1869025 ])"
},
{
"objectID": "quickstart.html#inference-via-the-wild-bootstrap",
@@ -742,7 +742,7 @@
"href": "quickstart.html#joint-confidence-intervals",
"title": "Getting Started with PyFixest",
"section": "Joint Confidence Intervals",
- "text": "Joint Confidence Intervals\nSimultaneous confidence bands for a vector of parameters can be computed via the joint_confint() method. See Simultaneous confidence bands: Theory, implementation, and an application to SVARs for background.\n\nfit_ci = pf.feols(\"Y ~ X1+ C(f1)\", data=data)\nfit_ci.confint(joint=True).head()\n\n\n\n\n\n\n\n\n2.5%\n97.5%\n\n\n\n\nIntercept\n-0.428799\n1.406710\n\n\nX1\n-1.161391\n-0.737491\n\n\nC(f1)[T.1.0]\n1.380485\n3.784814\n\n\nC(f1)[T.2.0]\n-2.842798\n-0.321070\n\n\nC(f1)[T.3.0]\n-1.612387\n0.987719"
+ "text": "Joint Confidence Intervals\nSimultaneous confidence bands for a vector of parameters can be computed via the joint_confint() method. See Simultaneous confidence bands: Theory, implementation, and an application to SVARs for background.\n\nfit_ci = pf.feols(\"Y ~ X1+ C(f1)\", data=data)\nfit_ci.confint(joint=True).head()\n\n\n\n\n\n\n\n\n2.5%\n97.5%\n\n\n\n\nIntercept\n-0.424794\n1.402705\n\n\nX1\n-1.160466\n-0.738416\n\n\nC(f1)[T.1.0]\n1.385731\n3.779568\n\n\nC(f1)[T.2.0]\n-2.837296\n-0.326572\n\n\nC(f1)[T.3.0]\n-1.606713\n0.982046"
},
{
"objectID": "pyfixest.html#features",
@@ -847,7 +847,7 @@
"href": "news.html#pyfixest-0.17.0",
"title": "News",
"section": "PyFixest 0.17.0",
- "text": "PyFixest 0.17.0\n\nRestructures the codebase and reorganizes how users can interact with the pyfixest API. It is now recommended to use pyfixest in the following way:\n\nimport numpy as np\nimport pyfixest as pf\ndata = pf.get_data()\ndata[\"D\"] = data[\"X1\"] > 0\nfit = pf.feols(\"Y ~ D + f1\", data = data)\nfit.tidy()\n\n\n\n\n\n\n\n\nEstimate\nStd. Error\nt value\nPr(>|t|)\n2.5%\n97.5%\n\n\nCoefficient\n\n\n\n\n\n\n\n\n\n\nIntercept\n0.778849\n0.170261\n4.574437\n0.000005\n0.444737\n1.112961\n\n\nD\n-1.402617\n0.152224\n-9.214140\n0.000000\n-1.701335\n-1.103899\n\n\nf1\n0.004774\n0.008058\n0.592508\n0.553645\n-0.011038\n0.020587\n\n\n\n\n\n\n\nThe update should not inroduce any breaking changes. Thanks to @Wenzhi-Ding for the PR!\nAdds support for simultaneous confidence intervals via a multiplier bootstrap. Thanks to @apoorvalal for the contribution!\n\nfit.confint(joint = True)\n\n\n\n\n\n\n\n\n2.5%\n97.5%\n\n\n\n\nIntercept\n0.380105\n1.177593\n\n\nD\n-1.759120\n-1.046114\n\n\nf1\n-0.014097\n0.023645\n\n\n\n\n\n\n\nAdds support for the causal cluster variance estimator by Abadie et al. (QJE, 2023) for OLS via the .ccv() method.\n\nfit.ccv(treatment = \"D\", cluster = \"group_id\")\n\n/home/runner/work/pyfixest/pyfixest/pyfixest/estimation/feols_.py:1384: UserWarning: The initial model was not clustered. CRV1 inference is computed and stored in the model object.\n warnings.warn(\n\n\n\n\n\n\n\n\n\nEstimate\nStd. Error\nt value\nPr(>|t|)\n2.5%\n97.5%\n\n\n\n\nCCV\n-1.4026168622179929\n0.28043\n-5.001663\n0.000093\n-1.991779\n-0.813455\n\n\nCRV1\n-1.402617\n0.205132\n-6.837621\n0.000002\n-1.833584\n-0.97165"
+ "text": "PyFixest 0.17.0\n\nRestructures the codebase and reorganizes how users can interact with the pyfixest API. It is now recommended to use pyfixest in the following way:\n\nimport numpy as np\nimport pyfixest as pf\ndata = pf.get_data()\ndata[\"D\"] = data[\"X1\"] > 0\nfit = pf.feols(\"Y ~ D + f1\", data = data)\nfit.tidy()\n\n\n\n\n\n\n\n\nEstimate\nStd. Error\nt value\nPr(>|t|)\n2.5%\n97.5%\n\n\nCoefficient\n\n\n\n\n\n\n\n\n\n\nIntercept\n0.778849\n0.170261\n4.574437\n0.000005\n0.444737\n1.112961\n\n\nD\n-1.402617\n0.152224\n-9.214140\n0.000000\n-1.701335\n-1.103899\n\n\nf1\n0.004774\n0.008058\n0.592508\n0.553645\n-0.011038\n0.020587\n\n\n\n\n\n\n\nThe update should not inroduce any breaking changes. Thanks to @Wenzhi-Ding for the PR!\nAdds support for simultaneous confidence intervals via a multiplier bootstrap. Thanks to @apoorvalal for the contribution!\n\nfit.confint(joint = True)\n\n\n\n\n\n\n\n\n2.5%\n97.5%\n\n\n\n\nIntercept\n0.375929\n1.181769\n\n\nD\n-1.762853\n-1.042381\n\n\nf1\n-0.014294\n0.023843\n\n\n\n\n\n\n\nAdds support for the causal cluster variance estimator by Abadie et al. (QJE, 2023) for OLS via the .ccv() method.\n\nfit.ccv(treatment = \"D\", cluster = \"group_id\")\n\n/home/runner/work/pyfixest/pyfixest/pyfixest/estimation/feols_.py:1384: UserWarning: The initial model was not clustered. CRV1 inference is computed and stored in the model object.\n warnings.warn(\n\n\n\n\n\n\n\n\n\nEstimate\nStd. Error\nt value\nPr(>|t|)\n2.5%\n97.5%\n\n\n\n\nCCV\n-1.4026168622179929\n0.238985\n-5.869057\n0.000015\n-1.904706\n-0.900528\n\n\nCRV1\n-1.402617\n0.205132\n-6.837621\n0.000002\n-1.833584\n-0.97165"
},
{
"objectID": "news.html#pyfixest-0.16.0",
@@ -1085,14 +1085,14 @@
"href": "table-layout.html#basic-usage",
"title": "Regression Tables via pf.etable()",
"section": "Basic Usage",
- "text": "Basic Usage\nWe can compare all regression models via the pyfixest-internal pf.etable() function:\n\npf.etable([fit1, fit2, fit3, fit4, fit5, fit6])\n\n\n\n\n\n\n\n \n \n Y\n \n \n Y2\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n X1\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n X2\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n X1:X2\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n f2\n -\n x\n x\n -\n x\n x\n \n \n f1\n x\n x\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)\n \n\n\n\n\n\n\n \n\n\nYou can also estimate and display multiple regressions with one line of code using the (py)fixest stepwise notation:\n\npf.etable(pf.feols(\"Y+Y2~csw(X1,X2,X1:X2)\", data=data))\n\n\n\n\n\n\n\n \n \n Y\n \n \n Y2\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n X1\n -1.000*** (0.085)\n -0.993*** (0.082)\n -0.992*** (0.082)\n -1.322*** (0.215)\n -1.316*** (0.214)\n -1.316*** (0.215)\n \n \n X2\n \n -0.176*** (0.022)\n -0.197*** (0.036)\n \n -0.133* (0.057)\n -0.132 (0.095)\n \n \n X1:X2\n \n \n 0.020 (0.027)\n \n \n -0.001 (0.071)\n \n \n Intercept\n 0.919*** (0.112)\n 0.889*** (0.108)\n 0.888*** (0.108)\n 1.064*** (0.283)\n 1.042*** (0.283)\n 1.042*** (0.283)\n \n \n stats\n \n \n Observations\n 998\n 998\n 998\n 999\n 999\n 999\n \n \n S.E. type\n iid\n iid\n iid\n iid\n iid\n iid\n \n \n R2\n 0.123\n 0.177\n 0.177\n 0.037\n 0.042\n 0.042\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)"
+ "text": "Basic Usage\nWe can compare all regression models via the pyfixest-internal pf.etable() function:\n\npf.etable([fit1, fit2, fit3, fit4, fit5, fit6])\n\n\n\n\n\n\n\n \n \n Y\n \n \n Y2\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n X1\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n X2\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n X1:X2\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n f1\n x\n x\n x\n x\n x\n x\n \n \n f2\n -\n x\n x\n -\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)\n \n\n\n\n\n\n\n \n\n\nYou can also estimate and display multiple regressions with one line of code using the (py)fixest stepwise notation:\n\npf.etable(pf.feols(\"Y+Y2~csw(X1,X2,X1:X2)\", data=data))\n\n\n\n\n\n\n\n \n \n Y\n \n \n Y2\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n X1\n -1.000*** (0.085)\n -0.993*** (0.082)\n -0.992*** (0.082)\n -1.322*** (0.215)\n -1.316*** (0.214)\n -1.316*** (0.215)\n \n \n X2\n \n -0.176*** (0.022)\n -0.197*** (0.036)\n \n -0.133* (0.057)\n -0.132 (0.095)\n \n \n X1:X2\n \n \n 0.020 (0.027)\n \n \n -0.001 (0.071)\n \n \n Intercept\n 0.919*** (0.112)\n 0.889*** (0.108)\n 0.888*** (0.108)\n 1.064*** (0.283)\n 1.042*** (0.283)\n 1.042*** (0.283)\n \n \n stats\n \n \n Observations\n 998\n 998\n 998\n 999\n 999\n 999\n \n \n S.E. type\n iid\n iid\n iid\n iid\n iid\n iid\n \n \n R2\n 0.123\n 0.177\n 0.177\n 0.037\n 0.042\n 0.042\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)"
},
{
"objectID": "table-layout.html#keep-and-drop-variables",
"href": "table-layout.html#keep-and-drop-variables",
"title": "Regression Tables via pf.etable()",
"section": "Keep and drop variables",
- "text": "Keep and drop variables\netable allows us to do a few things out of the box. For example, we can only keep the variables that we’d like, which keeps all variables that fit the provided regex match.\n\npf.etable([fit1, fit2, fit3, fit4, fit5, fit6], keep=\"X1\")\n\n\n\n\n\n\n\n \n \n Y\n \n \n Y2\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n X1\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n X1:X2\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n f2\n -\n x\n x\n -\n x\n x\n \n \n f1\n x\n x\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)\n \n\n\n\n\n\n\n \n\n\nWe can use the exact_match argument to select a specific set of variables:\n\npf.etable([fit1, fit2, fit3, fit4, fit5, fit6], keep=[\"X1\", \"X2\"], exact_match=True)\n\n\n\n\n\n\n\n \n \n Y\n \n \n Y2\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n X1\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n X2\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n fe\n \n \n f2\n -\n x\n x\n -\n x\n x\n \n \n f1\n x\n x\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)\n \n\n\n\n\n\n\n \n\n\nWe can also easily drop variables via the drop argument:\n\npf.etable([fit1, fit2, fit3, fit4, fit5, fit6], drop=[\"X1\"])\n\n\n\n\n\n\n\n \n \n Y\n \n \n Y2\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n X2\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n fe\n \n \n f2\n -\n x\n x\n -\n x\n x\n \n \n f1\n x\n x\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)"
+ "text": "Keep and drop variables\netable allows us to do a few things out of the box. For example, we can only keep the variables that we’d like, which keeps all variables that fit the provided regex match.\n\npf.etable([fit1, fit2, fit3, fit4, fit5, fit6], keep=\"X1\")\n\n\n\n\n\n\n\n \n \n Y\n \n \n Y2\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n X1\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n X1:X2\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n f1\n x\n x\n x\n x\n x\n x\n \n \n f2\n -\n x\n x\n -\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)\n \n\n\n\n\n\n\n \n\n\nWe can use the exact_match argument to select a specific set of variables:\n\npf.etable([fit1, fit2, fit3, fit4, fit5, fit6], keep=[\"X1\", \"X2\"], exact_match=True)\n\n\n\n\n\n\n\n \n \n Y\n \n \n Y2\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n X1\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n X2\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n fe\n \n \n f1\n x\n x\n x\n x\n x\n x\n \n \n f2\n -\n x\n x\n -\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)\n \n\n\n\n\n\n\n \n\n\nWe can also easily drop variables via the drop argument:\n\npf.etable([fit1, fit2, fit3, fit4, fit5, fit6], drop=[\"X1\"])\n\n\n\n\n\n\n\n \n \n Y\n \n \n Y2\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n X2\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n fe\n \n \n f1\n x\n x\n x\n x\n x\n x\n \n \n f2\n -\n x\n x\n -\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)"
},
{
"objectID": "table-layout.html#hide-fixed-effects-or-se-type-rows",
@@ -1106,49 +1106,49 @@
"href": "table-layout.html#display-p-values-or-confidence-intervals",
"title": "Regression Tables via pf.etable()",
"section": "Display p-values or confidence intervals",
- "text": "Display p-values or confidence intervals\nBy default, pf.etable() reports standard errors. But we can also ask to output p-values or confidence intervals via the coef_fmt function argument.\n\npf.etable([fit1, fit2, fit3, fit4, fit5, fit6], coef_fmt=\"b \\n (se) \\n [p]\")\n\n\n\n\n\n\n\n \n \n Y\n \n \n Y2\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n X1\n -0.950*** (0.067) [0.000]\n -0.924*** (0.061) [0.000]\n -0.924*** (0.061) [0.000]\n -1.267*** (0.174) [0.000]\n -1.232*** (0.192) [0.000]\n -1.231*** (0.192) [0.000]\n \n \n X2\n -0.174*** (0.018) [0.000]\n -0.174*** (0.015) [0.000]\n -0.185*** (0.025) [0.000]\n -0.131** (0.042) [0.005]\n -0.118** (0.042) [0.008]\n -0.074 (0.104) [0.482]\n \n \n X1:X2\n \n \n 0.011 (0.018) [0.565]\n \n \n -0.041 (0.081) [0.618]\n \n \n fe\n \n \n f2\n -\n x\n x\n -\n x\n x\n \n \n f1\n x\n x\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error) \n [p-value]"
+ "text": "Display p-values or confidence intervals\nBy default, pf.etable() reports standard errors. But we can also ask to output p-values or confidence intervals via the coef_fmt function argument.\n\npf.etable([fit1, fit2, fit3, fit4, fit5, fit6], coef_fmt=\"b \\n (se) \\n [p]\")\n\n\n\n\n\n\n\n \n \n Y\n \n \n Y2\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n X1\n -0.950*** (0.067) [0.000]\n -0.924*** (0.061) [0.000]\n -0.924*** (0.061) [0.000]\n -1.267*** (0.174) [0.000]\n -1.232*** (0.192) [0.000]\n -1.231*** (0.192) [0.000]\n \n \n X2\n -0.174*** (0.018) [0.000]\n -0.174*** (0.015) [0.000]\n -0.185*** (0.025) [0.000]\n -0.131** (0.042) [0.005]\n -0.118** (0.042) [0.008]\n -0.074 (0.104) [0.482]\n \n \n X1:X2\n \n \n 0.011 (0.018) [0.565]\n \n \n -0.041 (0.081) [0.618]\n \n \n fe\n \n \n f1\n x\n x\n x\n x\n x\n x\n \n \n f2\n -\n x\n x\n -\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error) \n [p-value]"
},
{
"objectID": "table-layout.html#significance-levels-and-rounding",
"href": "table-layout.html#significance-levels-and-rounding",
"title": "Regression Tables via pf.etable()",
"section": "Significance levels and rounding",
- "text": "Significance levels and rounding\nAdditionally, we can also overwrite the defaults for the reported significance levels and control the rounding of results via the signif_code and digits function arguments:\n\npf.etable([fit1, fit2, fit3, fit4, fit5, fit6], signif_code=[0.01, 0.05, 0.1], digits=5)\n\n\n\n\n\n\n\n \n \n Y\n \n \n Y2\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n X1\n -0.94953*** (0.06652)\n -0.92405*** (0.06093)\n -0.92417*** (0.06094)\n -1.26655*** (0.17359)\n -1.23153*** (0.19228)\n -1.23100*** (0.19167)\n \n \n X2\n -0.17423*** (0.01840)\n -0.17411*** (0.01461)\n -0.18550*** (0.02516)\n -0.13056*** (0.04239)\n -0.11767*** (0.04152)\n -0.07369 (0.10356)\n \n \n X1:X2\n \n \n 0.01057 (0.01818)\n \n \n -0.04082 (0.08093)\n \n \n fe\n \n \n f2\n -\n x\n x\n -\n x\n x\n \n \n f1\n x\n x\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.48899\n 0.65904\n 0.65916\n 0.12017\n 0.17151\n 0.17180\n \n\n \n \n \n Significance levels: * p < 0.1, ** p < 0.05, *** p < 0.01. Format of coefficient cell:\nCoefficient \n (Std. Error)"
+ "text": "Significance levels and rounding\nAdditionally, we can also overwrite the defaults for the reported significance levels and control the rounding of results via the signif_code and digits function arguments:\n\npf.etable([fit1, fit2, fit3, fit4, fit5, fit6], signif_code=[0.01, 0.05, 0.1], digits=5)\n\n\n\n\n\n\n\n \n \n Y\n \n \n Y2\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n X1\n -0.94953*** (0.06652)\n -0.92405*** (0.06093)\n -0.92417*** (0.06094)\n -1.26655*** (0.17359)\n -1.23153*** (0.19228)\n -1.23100*** (0.19167)\n \n \n X2\n -0.17423*** (0.01840)\n -0.17411*** (0.01461)\n -0.18550*** (0.02516)\n -0.13056*** (0.04239)\n -0.11767*** (0.04152)\n -0.07369 (0.10356)\n \n \n X1:X2\n \n \n 0.01057 (0.01818)\n \n \n -0.04082 (0.08093)\n \n \n fe\n \n \n f1\n x\n x\n x\n x\n x\n x\n \n \n f2\n -\n x\n x\n -\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.48899\n 0.65904\n 0.65916\n 0.12017\n 0.17151\n 0.17180\n \n\n \n \n \n Significance levels: * p < 0.1, ** p < 0.05, *** p < 0.01. Format of coefficient cell:\nCoefficient \n (Std. Error)"
},
{
"objectID": "table-layout.html#other-output-formats",
"href": "table-layout.html#other-output-formats",
"title": "Regression Tables via pf.etable()",
"section": "Other output formats",
- "text": "Other output formats\nBy default, pf.etable() returns a GT object (see the Great Tables package), but you can also opt to dataframe, markdown, or latex output via the type argument.\n\n# Pandas styler output:\npf.etable(\n [fit1, fit2, fit3, fit4, fit5, fit6],\n signif_code=[0.01, 0.05, 0.1],\n digits=5,\n coef_fmt=\"b (se)\",\n type=\"df\",\n)\n\n\n\n\n \n \n \n est1\n est2\n est3\n est4\n est5\n est6\n \n \n \n \n depvar\n Y\n Y\n Y\n Y2\n Y2\n Y2\n \n \n X1\n -0.94953*** (0.06652)\n -0.92405*** (0.06093)\n -0.92417*** (0.06094)\n -1.26655*** (0.17359)\n -1.23153*** (0.19228)\n -1.23100*** (0.19167)\n \n \n X2\n -0.17423*** (0.01840)\n -0.17411*** (0.01461)\n -0.18550*** (0.02516)\n -0.13056*** (0.04239)\n -0.11767*** (0.04152)\n -0.07369 (0.10356)\n \n \n X1:X2\n \n \n 0.01057 (0.01818)\n \n \n -0.04082 (0.08093)\n \n \n f2\n -\n x\n x\n -\n x\n x\n \n \n f1\n x\n x\n x\n x\n x\n x\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.48899\n 0.65904\n 0.65916\n 0.12017\n 0.17151\n 0.17180\n \n \n\n\n\n\n\n# Markdown output:\npf.etable(\n [fit1, fit2, fit3, fit4, fit5, fit6],\n signif_code=[0.01, 0.05, 0.1],\n digits=5,\n type=\"md\",\n)\n\nindex est1 est2 est3 est4 est5 est6\n------------ ------------ ------------ ------------ ------------ ------------ ------------\ndepvar Y Y Y Y2 Y2 Y2\n------------------------------------------------------------------------------------------------\nX1 -0.94953*** -0.92405*** -0.92417*** -1.26655*** -1.23153*** -1.23100***\n (0.06652) (0.06093) (0.06094) (0.17359) (0.19228) (0.19167)\nX2 -0.17423*** -0.17411*** -0.18550*** -0.13056*** -0.11767*** -0.07369\n (0.01840) (0.01461) (0.02516) (0.04239) (0.04152) (0.10356)\nX1:X2 0.01057 -0.04082\n (0.01818) (0.08093)\n------------------------------------------------------------------------------------------------\nf2 - x x - x x\nf1 x x x x x x\n------------------------------------------------------------------------------------------------\nObservations 997 997 997 998 998 998\nS.E. type by: f1 by: f1 by: f1 by: f1 by: f1 by: f1\nR2 0.48899 0.65904 0.65916 0.12017 0.17151 0.17180\n------------------------------------------------------------------------------------------------\n\n\n\nTo obtain latex output use format = \"tex\". If you want to save the table as a tex file, you can use the filename= argument to specify the respective path where it should be saved. If you want the latex code to be displayed in the notebook, you can use the print_tex=True argument. Etable will use latex packages booktabs, threeparttable and makecell for the table layout, so don’t forget to include these packages in your latex document.\n\n# LaTex output (include latex packages booktabs, threeparttable, and makecell in your document):\ntab = pf.etable(\n [fit1, fit2, fit3, fit4, fit5, fit6],\n signif_code=[0.01, 0.05, 0.1],\n digits=2,\n type=\"tex\",\n print_tex=True,\n)\n\nThe following code generates a pdf including the regression table which you can display clicking on the link below the cell:\n\n## Use pylatex to create a tex file with the table\n\n\ndef make_pdf(tab, file):\n \"Create a PDF document with tex table.\"\n doc = pl.Document()\n doc.packages.append(pl.Package(\"booktabs\"))\n doc.packages.append(pl.Package(\"threeparttable\"))\n doc.packages.append(pl.Package(\"makecell\"))\n\n with (\n doc.create(pl.Section(\"A PyFixest LateX Table\")),\n doc.create(pl.Table(position=\"htbp\")) as table,\n ):\n table.append(pl.NoEscape(tab))\n\n doc.generate_pdf(file, clean_tex=False)\n\n\n# Compile latex to pdf & display a button with the hyperlink to the pdf\n# requires tex installation\nrun = False\nif run:\n make_pdf(tab, \"latexdocs/SampleTableDoc\")\ndisplay(FileLink(\"latexdocs/SampleTableDoc.pdf\"))\n\nlatexdocs/SampleTableDoc.pdf"
+ "text": "Other output formats\nBy default, pf.etable() returns a GT object (see the Great Tables package), but you can also opt to dataframe, markdown, or latex output via the type argument.\n\n# Pandas styler output:\npf.etable(\n [fit1, fit2, fit3, fit4, fit5, fit6],\n signif_code=[0.01, 0.05, 0.1],\n digits=5,\n coef_fmt=\"b (se)\",\n type=\"df\",\n)\n\n\n\n\n \n \n \n est1\n est2\n est3\n est4\n est5\n est6\n \n \n \n \n depvar\n Y\n Y\n Y\n Y2\n Y2\n Y2\n \n \n X1\n -0.94953*** (0.06652)\n -0.92405*** (0.06093)\n -0.92417*** (0.06094)\n -1.26655*** (0.17359)\n -1.23153*** (0.19228)\n -1.23100*** (0.19167)\n \n \n X2\n -0.17423*** (0.01840)\n -0.17411*** (0.01461)\n -0.18550*** (0.02516)\n -0.13056*** (0.04239)\n -0.11767*** (0.04152)\n -0.07369 (0.10356)\n \n \n X1:X2\n \n \n 0.01057 (0.01818)\n \n \n -0.04082 (0.08093)\n \n \n f1\n x\n x\n x\n x\n x\n x\n \n \n f2\n -\n x\n x\n -\n x\n x\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.48899\n 0.65904\n 0.65916\n 0.12017\n 0.17151\n 0.17180\n \n \n\n\n\n\n\n# Markdown output:\npf.etable(\n [fit1, fit2, fit3, fit4, fit5, fit6],\n signif_code=[0.01, 0.05, 0.1],\n digits=5,\n type=\"md\",\n)\n\nindex est1 est2 est3 est4 est5 est6\n------------ ------------ ------------ ------------ ------------ ------------ ------------\ndepvar Y Y Y Y2 Y2 Y2\n------------------------------------------------------------------------------------------------\nX1 -0.94953*** -0.92405*** -0.92417*** -1.26655*** -1.23153*** -1.23100***\n (0.06652) (0.06093) (0.06094) (0.17359) (0.19228) (0.19167)\nX2 -0.17423*** -0.17411*** -0.18550*** -0.13056*** -0.11767*** -0.07369\n (0.01840) (0.01461) (0.02516) (0.04239) (0.04152) (0.10356)\nX1:X2 0.01057 -0.04082\n (0.01818) (0.08093)\n------------------------------------------------------------------------------------------------\nf1 x x x x x x\nf2 - x x - x x\n------------------------------------------------------------------------------------------------\nObservations 997 997 997 998 998 998\nS.E. type by: f1 by: f1 by: f1 by: f1 by: f1 by: f1\nR2 0.48899 0.65904 0.65916 0.12017 0.17151 0.17180\n------------------------------------------------------------------------------------------------\n\n\n\nTo obtain latex output use format = \"tex\". If you want to save the table as a tex file, you can use the filename= argument to specify the respective path where it should be saved. If you want the latex code to be displayed in the notebook, you can use the print_tex=True argument. Etable will use latex packages booktabs, threeparttable and makecell for the table layout, so don’t forget to include these packages in your latex document.\n\n# LaTex output (include latex packages booktabs, threeparttable, and makecell in your document):\ntab = pf.etable(\n [fit1, fit2, fit3, fit4, fit5, fit6],\n signif_code=[0.01, 0.05, 0.1],\n digits=2,\n type=\"tex\",\n print_tex=True,\n)\n\nThe following code generates a pdf including the regression table which you can display clicking on the link below the cell:\n\n## Use pylatex to create a tex file with the table\n\n\ndef make_pdf(tab, file):\n \"Create a PDF document with tex table.\"\n doc = pl.Document()\n doc.packages.append(pl.Package(\"booktabs\"))\n doc.packages.append(pl.Package(\"threeparttable\"))\n doc.packages.append(pl.Package(\"makecell\"))\n\n with (\n doc.create(pl.Section(\"A PyFixest LateX Table\")),\n doc.create(pl.Table(position=\"htbp\")) as table,\n ):\n table.append(pl.NoEscape(tab))\n\n doc.generate_pdf(file, clean_tex=False)\n\n\n# Compile latex to pdf & display a button with the hyperlink to the pdf\n# requires tex installation\nrun = False\nif run:\n make_pdf(tab, \"latexdocs/SampleTableDoc\")\ndisplay(FileLink(\"latexdocs/SampleTableDoc.pdf\"))\n\nlatexdocs/SampleTableDoc.pdf"
},
{
"objectID": "table-layout.html#rename-variables",
"href": "table-layout.html#rename-variables",
"title": "Regression Tables via pf.etable()",
"section": "Rename variables",
- "text": "Rename variables\nYou can also rename variables if you want to have a more readable output. Just pass a dictionary to the labels argument. Note that interaction terms will also be relabeled using the specified labels for the interacted variables (if you want to manually relabel an interaction term differently, add it to the dictionary).\n\nlabels = {\n \"Y\": \"Wage\",\n \"Y2\": \"Wealth\",\n \"X1\": \"Age\",\n \"X2\": \"Years of Schooling\",\n \"f1\": \"Industry\",\n \"f2\": \"Year\",\n}\n\npf.etable([fit1, fit2, fit3, fit4, fit5, fit6], labels=labels)\n\n\n\n\n\n\n\n \n \n Wage\n \n \n Wealth\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n Year\n -\n x\n x\n -\n x\n x\n \n \n Industry\n x\n x\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)\n \n\n\n\n\n\n\n \n\n\nIf you want to label the rows indicating the inclusion of fixed effects not with the variable label but with a custom label, you can pass on a separate dictionary to the felabels argument.\n\npf.etable(\n [fit1, fit2, fit3, fit4, fit5, fit6],\n labels=labels,\n felabels={\"f1\": \"Industry Fixed Effects\", \"f2\": \"Year Fixed Effects\"},\n)\n\n\n\n\n\n\n\n \n \n Wage\n \n \n Wealth\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n Year Fixed Effects\n -\n x\n x\n -\n x\n x\n \n \n Industry Fixed Effects\n x\n x\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)"
+ "text": "Rename variables\nYou can also rename variables if you want to have a more readable output. Just pass a dictionary to the labels argument. Note that interaction terms will also be relabeled using the specified labels for the interacted variables (if you want to manually relabel an interaction term differently, add it to the dictionary).\n\nlabels = {\n \"Y\": \"Wage\",\n \"Y2\": \"Wealth\",\n \"X1\": \"Age\",\n \"X2\": \"Years of Schooling\",\n \"f1\": \"Industry\",\n \"f2\": \"Year\",\n}\n\npf.etable([fit1, fit2, fit3, fit4, fit5, fit6], labels=labels)\n\n\n\n\n\n\n\n \n \n Wage\n \n \n Wealth\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n Industry\n x\n x\n x\n x\n x\n x\n \n \n Year\n -\n x\n x\n -\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)\n \n\n\n\n\n\n\n \n\n\nIf you want to label the rows indicating the inclusion of fixed effects not with the variable label but with a custom label, you can pass on a separate dictionary to the felabels argument.\n\npf.etable(\n [fit1, fit2, fit3, fit4, fit5, fit6],\n labels=labels,\n felabels={\"f1\": \"Industry Fixed Effects\", \"f2\": \"Year Fixed Effects\"},\n)\n\n\n\n\n\n\n\n \n \n Wage\n \n \n Wealth\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n Industry Fixed Effects\n x\n x\n x\n x\n x\n x\n \n \n Year Fixed Effects\n -\n x\n x\n -\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)"
},
{
"objectID": "table-layout.html#custom-model-headlines",
"href": "table-layout.html#custom-model-headlines",
"title": "Regression Tables via pf.etable()",
"section": "Custom model headlines",
- "text": "Custom model headlines\nYou can also add custom headers for each model by passing a list of strings to the model_headers argument.\n\npf.etable(\n [fit1, fit2, fit3, fit4, fit5, fit6],\n labels=labels,\n model_heads=[\"US\", \"China\", \"EU\", \"US\", \"China\", \"EU\"],\n)\n\n\n\n\n\n\n\n \n \n \n \n Wage\n \n \n Wealth\n \n\n\n \n \n US\n \n \n China\n \n \n EU\n \n \n US\n \n \n China\n \n \n EU\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n Year\n -\n x\n x\n -\n x\n x\n \n \n Industry\n x\n x\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)\n \n\n\n\n\n\n\n \n\n\nOr change the ordering of headlines having headlines first and then dependent variables using the head_order argument. “hd” stands for headlines then dependent variables, “dh” for dependent variables then headlines. Assigning “d” or “h” can be used to only show dependent variables or only headlines. When head_order=“” only model numbers are shown.\n\npf.etable(\n [fit1, fit4, fit2, fit5, fit3, fit6],\n labels=labels,\n model_heads=[\"US\", \"US\", \"China\", \"China\", \"EU\", \"EU\"],\n head_order=\"hd\",\n)\n\n\n\n\n\n\n\n \n \n \n \n US\n \n \n China\n \n \n EU\n \n\n\n \n \n Wage\n \n \n Wealth\n \n \n Wage\n \n \n Wealth\n \n \n Wage\n \n \n Wealth\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -1.267*** (0.174)\n -0.924*** (0.061)\n -1.232*** (0.192)\n -0.924*** (0.061)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.131** (0.042)\n -0.174*** (0.015)\n -0.118** (0.042)\n -0.185*** (0.025)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n \n \n 0.011 (0.018)\n -0.041 (0.081)\n \n \n fe\n \n \n Year\n -\n -\n x\n x\n x\n x\n \n \n Industry\n x\n x\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 998\n 997\n 998\n 997\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.120\n 0.659\n 0.172\n 0.659\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)\n \n\n\n\n\n\n\n \n\n\nRemove the dependent variables from the headers:\n\npf.etable(\n [fit1, fit4, fit2, fit5, fit3, fit6],\n labels=labels,\n model_heads=[\"US\", \"US\", \"China\", \"China\", \"EU\", \"EU\"],\n head_order=\"\",\n)\n\n\n\n\n\n\n\n \n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -1.267*** (0.174)\n -0.924*** (0.061)\n -1.232*** (0.192)\n -0.924*** (0.061)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.131** (0.042)\n -0.174*** (0.015)\n -0.118** (0.042)\n -0.185*** (0.025)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n \n \n 0.011 (0.018)\n -0.041 (0.081)\n \n \n fe\n \n \n Year\n -\n -\n x\n x\n x\n x\n \n \n Industry\n x\n x\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 998\n 997\n 998\n 997\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.120\n 0.659\n 0.172\n 0.659\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)"
+ "text": "Custom model headlines\nYou can also add custom headers for each model by passing a list of strings to the model_headers argument.\n\npf.etable(\n [fit1, fit2, fit3, fit4, fit5, fit6],\n labels=labels,\n model_heads=[\"US\", \"China\", \"EU\", \"US\", \"China\", \"EU\"],\n)\n\n\n\n\n\n\n\n \n \n \n \n Wage\n \n \n Wealth\n \n\n\n \n \n US\n \n \n China\n \n \n EU\n \n \n US\n \n \n China\n \n \n EU\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n Industry\n x\n x\n x\n x\n x\n x\n \n \n Year\n -\n x\n x\n -\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)\n \n\n\n\n\n\n\n \n\n\nOr change the ordering of headlines having headlines first and then dependent variables using the head_order argument. “hd” stands for headlines then dependent variables, “dh” for dependent variables then headlines. Assigning “d” or “h” can be used to only show dependent variables or only headlines. When head_order=“” only model numbers are shown.\n\npf.etable(\n [fit1, fit4, fit2, fit5, fit3, fit6],\n labels=labels,\n model_heads=[\"US\", \"US\", \"China\", \"China\", \"EU\", \"EU\"],\n head_order=\"hd\",\n)\n\n\n\n\n\n\n\n \n \n \n \n US\n \n \n China\n \n \n EU\n \n\n\n \n \n Wage\n \n \n Wealth\n \n \n Wage\n \n \n Wealth\n \n \n Wage\n \n \n Wealth\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -1.267*** (0.174)\n -0.924*** (0.061)\n -1.232*** (0.192)\n -0.924*** (0.061)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.131** (0.042)\n -0.174*** (0.015)\n -0.118** (0.042)\n -0.185*** (0.025)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n \n \n 0.011 (0.018)\n -0.041 (0.081)\n \n \n fe\n \n \n Industry\n x\n x\n x\n x\n x\n x\n \n \n Year\n -\n -\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 998\n 997\n 998\n 997\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.120\n 0.659\n 0.172\n 0.659\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)\n \n\n\n\n\n\n\n \n\n\nRemove the dependent variables from the headers:\n\npf.etable(\n [fit1, fit4, fit2, fit5, fit3, fit6],\n labels=labels,\n model_heads=[\"US\", \"US\", \"China\", \"China\", \"EU\", \"EU\"],\n head_order=\"\",\n)\n\n\n\n\n\n\n\n \n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -1.267*** (0.174)\n -0.924*** (0.061)\n -1.232*** (0.192)\n -0.924*** (0.061)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.131** (0.042)\n -0.174*** (0.015)\n -0.118** (0.042)\n -0.185*** (0.025)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n \n \n 0.011 (0.018)\n -0.041 (0.081)\n \n \n fe\n \n \n Industry\n x\n x\n x\n x\n x\n x\n \n \n Year\n -\n -\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 998\n 997\n 998\n 997\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.120\n 0.659\n 0.172\n 0.659\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)"
},
{
"objectID": "table-layout.html#further-custom-model-information",
"href": "table-layout.html#further-custom-model-information",
"title": "Regression Tables via pf.etable()",
"section": "Further custom model information",
- "text": "Further custom model information\nYou can add further custom model statistics/information to the bottom of the table by using the custom_stats argument to which you pass a dictionary with the name of the row and lists of values. The length of the lists must be equal to the number of models.\n\npf.etable(\n [fit1, fit2, fit3, fit4, fit5, fit6],\n labels=labels,\n custom_model_stats={\n \"Number of Clusters\": [42, 42, 42, 37, 37, 37],\n \"Additional Info\": [\"A\", \"A\", \"B\", \"B\", \"C\", \"C\"],\n },\n)\n\n\n\n\n\n\n\n \n \n Wage\n \n \n Wealth\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n Year\n -\n x\n x\n -\n x\n x\n \n \n Industry\n x\n x\n x\n x\n x\n x\n \n \n stats\n \n \n Number of Clusters\n 42\n 42\n 42\n 37\n 37\n 37\n \n \n Additional Info\n A\n A\n B\n B\n C\n C\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)"
+ "text": "Further custom model information\nYou can add further custom model statistics/information to the bottom of the table by using the custom_stats argument to which you pass a dictionary with the name of the row and lists of values. The length of the lists must be equal to the number of models.\n\npf.etable(\n [fit1, fit2, fit3, fit4, fit5, fit6],\n labels=labels,\n custom_model_stats={\n \"Number of Clusters\": [42, 42, 42, 37, 37, 37],\n \"Additional Info\": [\"A\", \"A\", \"B\", \"B\", \"C\", \"C\"],\n },\n)\n\n\n\n\n\n\n\n \n \n Wage\n \n \n Wealth\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n Industry\n x\n x\n x\n x\n x\n x\n \n \n Year\n -\n x\n x\n -\n x\n x\n \n \n stats\n \n \n Number of Clusters\n 42\n 42\n 42\n 37\n 37\n 37\n \n \n Additional Info\n A\n A\n B\n B\n C\n C\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)"
},
{
"objectID": "table-layout.html#custom-table-notes",
"href": "table-layout.html#custom-table-notes",
"title": "Regression Tables via pf.etable()",
"section": "Custom table notes",
- "text": "Custom table notes\nYou can replace the default table notes with your own notes using the notes argument.\n\nmynotes = \"Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet.\"\npf.etable(\n [fit1, fit4, fit2, fit5, fit3, fit6],\n labels=labels,\n model_heads=[\"US\", \"US\", \"China\", \"China\", \"EU\", \"EU\"],\n head_order=\"hd\",\n notes=mynotes,\n)\n\n\n\n\n\n\n\n \n \n \n \n US\n \n \n China\n \n \n EU\n \n\n\n \n \n Wage\n \n \n Wealth\n \n \n Wage\n \n \n Wealth\n \n \n Wage\n \n \n Wealth\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -1.267*** (0.174)\n -0.924*** (0.061)\n -1.232*** (0.192)\n -0.924*** (0.061)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.131** (0.042)\n -0.174*** (0.015)\n -0.118** (0.042)\n -0.185*** (0.025)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n \n \n 0.011 (0.018)\n -0.041 (0.081)\n \n \n fe\n \n \n Year\n -\n -\n x\n x\n x\n x\n \n \n Industry\n x\n x\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 998\n 997\n 998\n 997\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.120\n 0.659\n 0.172\n 0.659\n 0.172\n \n\n \n \n \n Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet."
+ "text": "Custom table notes\nYou can replace the default table notes with your own notes using the notes argument.\n\nmynotes = \"Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet.\"\npf.etable(\n [fit1, fit4, fit2, fit5, fit3, fit6],\n labels=labels,\n model_heads=[\"US\", \"US\", \"China\", \"China\", \"EU\", \"EU\"],\n head_order=\"hd\",\n notes=mynotes,\n)\n\n\n\n\n\n\n\n \n \n \n \n US\n \n \n China\n \n \n EU\n \n\n\n \n \n Wage\n \n \n Wealth\n \n \n Wage\n \n \n Wealth\n \n \n Wage\n \n \n Wealth\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -1.267*** (0.174)\n -0.924*** (0.061)\n -1.232*** (0.192)\n -0.924*** (0.061)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.131** (0.042)\n -0.174*** (0.015)\n -0.118** (0.042)\n -0.185*** (0.025)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n \n \n 0.011 (0.018)\n -0.041 (0.081)\n \n \n fe\n \n \n Industry\n x\n x\n x\n x\n x\n x\n \n \n Year\n -\n -\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 998\n 997\n 998\n 997\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.120\n 0.659\n 0.172\n 0.659\n 0.172\n \n\n \n \n \n Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet."
},
{
"objectID": "table-layout.html#publication-ready-latex-tables",
@@ -1169,35 +1169,35 @@
"href": "table-layout.html#summarize-by-characteristics-in-columns-and-rows",
"title": "Regression Tables via pf.etable()",
"section": "Summarize by characteristics in columns and rows",
- "text": "Summarize by characteristics in columns and rows\nYou can summarize by characteristics using the bycol argument when groups are to be displayed in columns. When the number of observations is the same for all variables in a group, you can also opt to display the number of observations only once for each group byin a separate line at the bottom of the table with counts_row_below==True.\n\n# Generate some categorial data\ndata[\"country\"] = np.random.choice([\"US\", \"EU\"], data.shape[0])\ndata[\"occupation\"] = np.random.choice([\"Blue collar\", \"White collar\"], data.shape[0])\n\n# Drop nan values to have balanced data\ndata.dropna(inplace=True)\n\npf.dtable(\n data,\n vars=[\"Y\", \"Y2\", \"X1\", \"X2\"],\n labels=labels,\n bycol=[\"country\", \"occupation\"],\n stats=[\"count\", \"mean\", \"std\"],\n caption=\"Descriptive statistics\",\n stats_labels={\"count\": \"Number of observations\"},\n counts_row_below=True,\n)\n\n\n\n\n\n\n \n Descriptive statistics\n \n\n \n \n \n \n EU\n \n \n US\n \n\n\n \n \n Blue collar\n \n \n White collar\n \n \n Blue collar\n \n \n White collar\n \n\n\n Mean\n Std. Dev.\n Mean\n Std. Dev.\n Mean\n Std. Dev.\n Mean\n Std. Dev.\n\n\n\n \n stats\n \n \n Wage\n 0.12\n 2.38\n -0.28\n 2.40\n -0.24\n 2.24\n -0.13\n 2.19\n \n \n Wealth\n -0.33\n 5.61\n -0.25\n 5.36\n -0.27\n 5.69\n -0.41\n 5.71\n \n \n Age\n 1.02\n 0.79\n 1.08\n 0.80\n 1.00\n 0.81\n 1.07\n 0.83\n \n \n Years of Schooling\n -0.25\n 2.93\n -0.18\n 3.31\n -0.04\n 3.08\n -0.03\n 2.89\n \n \n nobs\n \n \n Number of observations\n 264\n \n 233\n \n 244\n \n 256\n \n \n\n \n \n \n \n \n\n\n\n\n\n\n \n\n\nYou can also use custom aggregation functions to compute further statistics or affect how statistics are presented. Pyfixest provides two such functions mean_std and mean_newline_std which compute the mean and standard deviation and display both the same cell (either with line break between them or not). This allows to have more compact tables when you want to show statistics for many characteristcs in the columns.\nYou can also hide the display of the statistics labels in the header with hide_stats_labels=True. In that case a table note will be added naming the statistics displayed using its label (if you have not provided a custom note).\n\npf.dtable(\n data,\n vars=[\"Y\", \"Y2\", \"X1\", \"X2\"],\n labels=labels,\n bycol=[\"country\", \"occupation\"],\n stats=[\"mean_newline_std\", \"count\"],\n caption=\"Descriptive statistics\",\n stats_labels={\"count\": \"Number of observations\"},\n counts_row_below=True,\n hide_stats=True,\n)\n\n\n\n\n\n\n \n Descriptive statistics\n \n\n \n \n EU\n \n \n US\n \n\n\n Blue collar\n White collar\n Blue collar\n White collar\n\n\n\n \n stats\n \n \n Wage\n 0.12(2.38)\n -0.28(2.40)\n -0.24(2.24)\n -0.13(2.19)\n \n \n Wealth\n -0.33(5.61)\n -0.25(5.36)\n -0.27(5.69)\n -0.41(5.71)\n \n \n Age\n 1.02(0.79)\n 1.08(0.80)\n 1.00(0.81)\n 1.07(0.83)\n \n \n Years of Schooling\n -0.25(2.93)\n -0.18(3.31)\n -0.04(3.08)\n -0.03(2.89)\n \n \n nobs\n \n \n Number of observations\n 264\n 233\n 244\n 256\n \n\n \n \n \n Note: Displayed statistics are Mean (Std. Dev.).\n \n\n\n\n\n\n\n \n\n\nYou can also split by characteristics in both columns and rows. Note that you can only use one grouping variable in rows, but several in columns (as shown above).\n\npf.dtable(\n data,\n vars=[\"Y\", \"Y2\", \"X1\", \"X2\"],\n labels=labels,\n bycol=[\"country\"],\n byrow=\"occupation\",\n stats=[\"count\", \"mean\", \"std\"],\n caption=\"Descriptive statistics\",\n)\n\n\n\n\n\n\n \n Descriptive statistics\n \n\n \n \n EU\n \n \n US\n \n\n\n N\n Mean\n Std. Dev.\n N\n Mean\n Std. Dev.\n\n\n\n \n Blue collar\n \n \n Wage\n 264\n 0.12\n 2.38\n 244\n -0.24\n 2.24\n \n \n Wealth\n 264\n -0.33\n 5.61\n 244\n -0.27\n 5.69\n \n \n Age\n 264\n 1.02\n 0.79\n 244\n 1.00\n 0.81\n \n \n Years of Schooling\n 264\n -0.25\n 2.93\n 244\n -0.04\n 3.08\n \n \n White collar\n \n \n Wage\n 233\n -0.28\n 2.40\n 256\n -0.13\n 2.19\n \n \n Wealth\n 233\n -0.25\n 5.36\n 256\n -0.41\n 5.71\n \n \n Age\n 233\n 1.08\n 0.80\n 256\n 1.07\n 0.83\n \n \n Years of Schooling\n 233\n -0.18\n 3.31\n 256\n -0.03\n 2.89\n \n\n \n \n \n \n \n\n\n\n\n\n\n \n\n\nAnd you can again export descriptive statistics tables also to LaTex:\n\ndtab = pf.dtable(\n data,\n vars=[\"Y\", \"Y2\", \"X1\", \"X2\"],\n labels=labels,\n bycol=[\"country\"],\n byrow=\"occupation\",\n stats=[\"count\", \"mean\", \"std\"],\n type=\"tex\",\n)\n\nrun = False\nif run:\n make_pdf(dtab, \"latexdocs/SampleTableDoc3\")\ndisplay(FileLink(\"latexdocs/SampleTableDoc3.pdf\"))\n\nlatexdocs/SampleTableDoc3.pdf"
+ "text": "Summarize by characteristics in columns and rows\nYou can summarize by characteristics using the bycol argument when groups are to be displayed in columns. When the number of observations is the same for all variables in a group, you can also opt to display the number of observations only once for each group byin a separate line at the bottom of the table with counts_row_below==True.\n\n# Generate some categorial data\ndata[\"country\"] = np.random.choice([\"US\", \"EU\"], data.shape[0])\ndata[\"occupation\"] = np.random.choice([\"Blue collar\", \"White collar\"], data.shape[0])\n\n# Drop nan values to have balanced data\ndata.dropna(inplace=True)\n\npf.dtable(\n data,\n vars=[\"Y\", \"Y2\", \"X1\", \"X2\"],\n labels=labels,\n bycol=[\"country\", \"occupation\"],\n stats=[\"count\", \"mean\", \"std\"],\n caption=\"Descriptive statistics\",\n stats_labels={\"count\": \"Number of observations\"},\n counts_row_below=True,\n)\n\n\n\n\n\n\n \n Descriptive statistics\n \n\n \n \n \n \n EU\n \n \n US\n \n\n\n \n \n Blue collar\n \n \n White collar\n \n \n Blue collar\n \n \n White collar\n \n\n\n Mean\n Std. Dev.\n Mean\n Std. Dev.\n Mean\n Std. Dev.\n Mean\n Std. Dev.\n\n\n\n \n stats\n \n \n Wage\n -0.12\n 2.30\n -0.13\n 2.32\n -0.09\n 2.32\n -0.17\n 2.30\n \n \n Wealth\n -0.09\n 5.66\n -0.50\n 5.48\n -0.47\n 5.70\n -0.22\n 5.53\n \n \n Age\n 1.07\n 0.81\n 0.98\n 0.79\n 1.04\n 0.79\n 1.08\n 0.83\n \n \n Years of Schooling\n 0.05\n 3.16\n -0.24\n 3.05\n -0.12\n 2.78\n -0.19\n 3.18\n \n \n nobs\n \n \n Number of observations\n 246\n \n 245\n \n 244\n \n 262\n \n \n\n \n \n \n \n \n\n\n\n\n\n\n \n\n\nYou can also use custom aggregation functions to compute further statistics or affect how statistics are presented. Pyfixest provides two such functions mean_std and mean_newline_std which compute the mean and standard deviation and display both the same cell (either with line break between them or not). This allows to have more compact tables when you want to show statistics for many characteristcs in the columns.\nYou can also hide the display of the statistics labels in the header with hide_stats_labels=True. In that case a table note will be added naming the statistics displayed using its label (if you have not provided a custom note).\n\npf.dtable(\n data,\n vars=[\"Y\", \"Y2\", \"X1\", \"X2\"],\n labels=labels,\n bycol=[\"country\", \"occupation\"],\n stats=[\"mean_newline_std\", \"count\"],\n caption=\"Descriptive statistics\",\n stats_labels={\"count\": \"Number of observations\"},\n counts_row_below=True,\n hide_stats=True,\n)\n\n\n\n\n\n\n \n Descriptive statistics\n \n\n \n \n EU\n \n \n US\n \n\n\n Blue collar\n White collar\n Blue collar\n White collar\n\n\n\n \n stats\n \n \n Wage\n -0.12(2.30)\n -0.13(2.32)\n -0.09(2.32)\n -0.17(2.30)\n \n \n Wealth\n -0.09(5.66)\n -0.50(5.48)\n -0.47(5.70)\n -0.22(5.53)\n \n \n Age\n 1.07(0.81)\n 0.98(0.79)\n 1.04(0.79)\n 1.08(0.83)\n \n \n Years of Schooling\n 0.05(3.16)\n -0.24(3.05)\n -0.12(2.78)\n -0.19(3.18)\n \n \n nobs\n \n \n Number of observations\n 246\n 245\n 244\n 262\n \n\n \n \n \n Note: Displayed statistics are Mean (Std. Dev.).\n \n\n\n\n\n\n\n \n\n\nYou can also split by characteristics in both columns and rows. Note that you can only use one grouping variable in rows, but several in columns (as shown above).\n\npf.dtable(\n data,\n vars=[\"Y\", \"Y2\", \"X1\", \"X2\"],\n labels=labels,\n bycol=[\"country\"],\n byrow=\"occupation\",\n stats=[\"count\", \"mean\", \"std\"],\n caption=\"Descriptive statistics\",\n)\n\n\n\n\n\n\n \n Descriptive statistics\n \n\n \n \n EU\n \n \n US\n \n\n\n N\n Mean\n Std. Dev.\n N\n Mean\n Std. Dev.\n\n\n\n \n Blue collar\n \n \n Wage\n 246\n -0.12\n 2.30\n 244\n -0.09\n 2.32\n \n \n Wealth\n 246\n -0.09\n 5.66\n 244\n -0.47\n 5.70\n \n \n Age\n 246\n 1.07\n 0.81\n 244\n 1.04\n 0.79\n \n \n Years of Schooling\n 246\n 0.05\n 3.16\n 244\n -0.12\n 2.78\n \n \n White collar\n \n \n Wage\n 245\n -0.13\n 2.32\n 262\n -0.17\n 2.30\n \n \n Wealth\n 245\n -0.50\n 5.48\n 262\n -0.22\n 5.53\n \n \n Age\n 245\n 0.98\n 0.79\n 262\n 1.08\n 0.83\n \n \n Years of Schooling\n 245\n -0.24\n 3.05\n 262\n -0.19\n 3.18\n \n\n \n \n \n \n \n\n\n\n\n\n\n \n\n\nAnd you can again export descriptive statistics tables also to LaTex:\n\ndtab = pf.dtable(\n data,\n vars=[\"Y\", \"Y2\", \"X1\", \"X2\"],\n labels=labels,\n bycol=[\"country\"],\n byrow=\"occupation\",\n stats=[\"count\", \"mean\", \"std\"],\n type=\"tex\",\n)\n\nrun = False\nif run:\n make_pdf(dtab, \"latexdocs/SampleTableDoc3\")\ndisplay(FileLink(\"latexdocs/SampleTableDoc3.pdf\"))\n\nlatexdocs/SampleTableDoc3.pdf"
},
{
"objectID": "table-layout.html#basic-usage-of-make_table",
"href": "table-layout.html#basic-usage-of-make_table",
"title": "Regression Tables via pf.etable()",
"section": "Basic Usage of make_table",
- "text": "Basic Usage of make_table\n\ndf = pd.DataFrame(np.random.randn(4, 4).round(2), columns=[\"A\", \"B\", \"C\", \"D\"])\n\n# Make Booktabs style table\npf.make_table(df=df, caption=\"This is a caption\", notes=\"These are notes\")\n\n\n\n\n\n\n \n This is a caption\n \n\n \n A\n B\n C\n D\n\n\n\n \n 0\n 1.26\n -0.82\n -1.28\n 0.29\n \n \n 1\n -0.42\n 0.24\n 0.32\n -0.58\n \n \n 2\n 0.19\n 0.72\n -1.27\n -0.07\n \n \n 3\n 0.5\n -1.17\n -0.42\n -0.74\n \n\n \n \n \n These are notes"
+ "text": "Basic Usage of make_table\n\ndf = pd.DataFrame(np.random.randn(4, 4).round(2), columns=[\"A\", \"B\", \"C\", \"D\"])\n\n# Make Booktabs style table\npf.make_table(df=df, caption=\"This is a caption\", notes=\"These are notes\")\n\n\n\n\n\n\n \n This is a caption\n \n\n \n A\n B\n C\n D\n\n\n\n \n 0\n 0.65\n 0.25\n -0.62\n -1.26\n \n \n 1\n -0.2\n -1.18\n -2.6\n 0.2\n \n \n 2\n -0.61\n 0.11\n 1.86\n 0.82\n \n \n 3\n -0.9\n 0.39\n 0.67\n -1.32\n \n\n \n \n \n These are notes"
},
{
"objectID": "table-layout.html#mutiindex-dataframes",
"href": "table-layout.html#mutiindex-dataframes",
"title": "Regression Tables via pf.etable()",
"section": "Mutiindex DataFrames",
- "text": "Mutiindex DataFrames\nWhen the respective dataframe has a mutiindex for the columns, columns spanners are generated from the index. The row index can also be a multiindex (of at most two levels). In this case the first index level is used to generate group rows (for instance using the index name as headline and separating the groups by a horizontal line) and the second index level is used to generate the row labels.\n\n# Create a multiindex dataframe with random data\nrow_index = pd.MultiIndex.from_tuples(\n [\n (\"Group 1\", \"Variable 1\"),\n (\"Group 1\", \"Variable 2\"),\n (\"Group 1\", \"Variable 3\"),\n (\"Group 2\", \"Variable 4\"),\n (\"Group 2\", \"Variable 5\"),\n (\"Group 3\", \"Variable 6\"),\n ]\n)\n\ncol_index = pd.MultiIndex.from_product([[\"A\", \"B\"], [\"X\", \"Y\"], [\"High\", \"Low\"]])\ndf = pd.DataFrame(np.random.randn(6, 8).round(3), index=row_index, columns=col_index)\n\npf.make_table(df=df, caption=\"This is a caption\", notes=\"These are notes\")\n\n\n\n\n\n\n \n This is a caption\n \n\n \n \n \n \n A\n \n \n B\n \n\n\n \n \n X\n \n \n Y\n \n \n X\n \n \n Y\n \n\n\n High\n Low\n High\n Low\n High\n Low\n High\n Low\n\n\n\n \n Group 1\n \n \n Variable 1\n -0.352\n 0.87\n 1.692\n -0.914\n 0.159\n -0.826\n 0.094\n -0.717\n \n \n Variable 2\n -0.119\n 0.226\n -1.739\n -1.611\n -1.237\n -1.428\n 0.401\n 1.572\n \n \n Variable 3\n 0.931\n -1.441\n 1.2\n -0.273\n -0.845\n 0.24\n 0.73\n 0.896\n \n \n Group 2\n \n \n Variable 4\n 0.819\n 0.163\n 2.044\n -1.354\n -0.024\n 1.31\n 0.662\n 0.082\n \n \n Variable 5\n -1.72\n -0.324\n 0.904\n -0.307\n 0.813\n -0.262\n 0.087\n -0.837\n \n \n Group 3\n \n \n Variable 6\n 0.088\n -0.125\n -1.415\n 0.153\n -0.857\n -0.816\n -0.832\n -0.516\n \n\n \n \n \n These are notes\n \n\n\n\n\n\n\n \n\n\nYou can also hide column group names: This just creates a table where variables on the second level of the row index are displayed in groups based on the first level separated by horizontal lines.\n\npf.make_table(\n df=df, caption=\"This is a caption\", notes=\"These are notes\", rgroup_display=False\n).tab_style(style=style.text(style=\"italic\"), locations=loc.body(rows=[1, 5]))\n\n\n\n\n\n\n \n This is a caption\n \n\n \n \n \n \n A\n \n \n B\n \n\n\n \n \n X\n \n \n Y\n \n \n X\n \n \n Y\n \n\n\n High\n Low\n High\n Low\n High\n Low\n High\n Low\n\n\n\n \n Group 1\n \n \n Variable 1\n -0.352\n 0.87\n 1.692\n -0.914\n 0.159\n -0.826\n 0.094\n -0.717\n \n \n Variable 2\n -0.119\n 0.226\n -1.739\n -1.611\n -1.237\n -1.428\n 0.401\n 1.572\n \n \n Variable 3\n 0.931\n -1.441\n 1.2\n -0.273\n -0.845\n 0.24\n 0.73\n 0.896\n \n \n Group 2\n \n \n Variable 4\n 0.819\n 0.163\n 2.044\n -1.354\n -0.024\n 1.31\n 0.662\n 0.082\n \n \n Variable 5\n -1.72\n -0.324\n 0.904\n -0.307\n 0.813\n -0.262\n 0.087\n -0.837\n \n \n Group 3\n \n \n Variable 6\n 0.088\n -0.125\n -1.415\n 0.153\n -0.857\n -0.816\n -0.832\n -0.516\n \n\n \n \n \n These are notes"
+ "text": "Mutiindex DataFrames\nWhen the respective dataframe has a mutiindex for the columns, columns spanners are generated from the index. The row index can also be a multiindex (of at most two levels). In this case the first index level is used to generate group rows (for instance using the index name as headline and separating the groups by a horizontal line) and the second index level is used to generate the row labels.\n\n# Create a multiindex dataframe with random data\nrow_index = pd.MultiIndex.from_tuples(\n [\n (\"Group 1\", \"Variable 1\"),\n (\"Group 1\", \"Variable 2\"),\n (\"Group 1\", \"Variable 3\"),\n (\"Group 2\", \"Variable 4\"),\n (\"Group 2\", \"Variable 5\"),\n (\"Group 3\", \"Variable 6\"),\n ]\n)\n\ncol_index = pd.MultiIndex.from_product([[\"A\", \"B\"], [\"X\", \"Y\"], [\"High\", \"Low\"]])\ndf = pd.DataFrame(np.random.randn(6, 8).round(3), index=row_index, columns=col_index)\n\npf.make_table(df=df, caption=\"This is a caption\", notes=\"These are notes\")\n\n\n\n\n\n\n \n This is a caption\n \n\n \n \n \n \n A\n \n \n B\n \n\n\n \n \n X\n \n \n Y\n \n \n X\n \n \n Y\n \n\n\n High\n Low\n High\n Low\n High\n Low\n High\n Low\n\n\n\n \n Group 1\n \n \n Variable 1\n 0.055\n 0.8\n 0.061\n -0.918\n 0.299\n 1.144\n -0.072\n 1.675\n \n \n Variable 2\n 0.144\n 0.658\n 1.282\n -1.352\n -0.461\n 0.382\n 0.431\n -0.437\n \n \n Variable 3\n -0.109\n 1.582\n 0.21\n 0.173\n 0.618\n -0.203\n -0.019\n 0.721\n \n \n Group 2\n \n \n Variable 4\n 0.195\n 1.226\n -1.197\n 0.256\n -0.88\n -1.154\n 0.823\n 0.142\n \n \n Variable 5\n -0.638\n -0.225\n -0.959\n -0.113\n -1.416\n 0.495\n -0.404\n -0.287\n \n \n Group 3\n \n \n Variable 6\n 0.551\n 0.881\n 0.448\n 0.434\n -0.538\n -1.516\n 1.135\n -0.186\n \n\n \n \n \n These are notes\n \n\n\n\n\n\n\n \n\n\nYou can also hide column group names: This just creates a table where variables on the second level of the row index are displayed in groups based on the first level separated by horizontal lines.\n\npf.make_table(\n df=df, caption=\"This is a caption\", notes=\"These are notes\", rgroup_display=False\n).tab_style(style=style.text(style=\"italic\"), locations=loc.body(rows=[1, 5]))\n\n\n\n\n\n\n \n This is a caption\n \n\n \n \n \n \n A\n \n \n B\n \n\n\n \n \n X\n \n \n Y\n \n \n X\n \n \n Y\n \n\n\n High\n Low\n High\n Low\n High\n Low\n High\n Low\n\n\n\n \n Group 1\n \n \n Variable 1\n 0.055\n 0.8\n 0.061\n -0.918\n 0.299\n 1.144\n -0.072\n 1.675\n \n \n Variable 2\n 0.144\n 0.658\n 1.282\n -1.352\n -0.461\n 0.382\n 0.431\n -0.437\n \n \n Variable 3\n -0.109\n 1.582\n 0.21\n 0.173\n 0.618\n -0.203\n -0.019\n 0.721\n \n \n Group 2\n \n \n Variable 4\n 0.195\n 1.226\n -1.197\n 0.256\n -0.88\n -1.154\n 0.823\n 0.142\n \n \n Variable 5\n -0.638\n -0.225\n -0.959\n -0.113\n -1.416\n 0.495\n -0.404\n -0.287\n \n \n Group 3\n \n \n Variable 6\n 0.551\n 0.881\n 0.448\n 0.434\n -0.538\n -1.516\n 1.135\n -0.186\n \n\n \n \n \n These are notes"
},
{
"objectID": "table-layout.html#example-styling",
"href": "table-layout.html#example-styling",
"title": "Regression Tables via pf.etable()",
"section": "Example Styling",
- "text": "Example Styling\n\n(\n pf.etable([fit1, fit2, fit3, fit4, fit5, fit6])\n .tab_options(\n column_labels_background_color=\"cornsilk\",\n stub_background_color=\"whitesmoke\",\n )\n .tab_style(\n style=style.fill(color=\"mistyrose\"),\n locations=loc.body(columns=\"(3)\", rows=[\"X2\"]),\n )\n)\n\n\n\n\n\n\n\n \n \n Y\n \n \n Y2\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n X1\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n X2\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n X1:X2\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n f2\n -\n x\n x\n -\n x\n x\n \n \n f1\n x\n x\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)"
+ "text": "Example Styling\n\n(\n pf.etable([fit1, fit2, fit3, fit4, fit5, fit6])\n .tab_options(\n column_labels_background_color=\"cornsilk\",\n stub_background_color=\"whitesmoke\",\n )\n .tab_style(\n style=style.fill(color=\"mistyrose\"),\n locations=loc.body(columns=\"(3)\", rows=[\"X2\"]),\n )\n)\n\n\n\n\n\n\n\n \n \n Y\n \n \n Y2\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n X1\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n X2\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n X1:X2\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n f1\n x\n x\n x\n x\n x\n x\n \n \n f2\n -\n x\n x\n -\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)"
},
{
"objectID": "table-layout.html#defining-table-styles-some-examples",
"href": "table-layout.html#defining-table-styles-some-examples",
"title": "Regression Tables via pf.etable()",
"section": "Defining Table Styles: Some Examples",
- "text": "Defining Table Styles: Some Examples\nYou can easily define table styles that you can apply to all tables in your project. Just define a dictionary with the respective values for the tab options (see the Great Tables documentation) and use the style with .tab_options(**style_dict).\n\nstyle_print = {\n \"table_font_size\": \"12px\",\n \"heading_title_font_size\": \"12px\",\n \"source_notes_font_size\": \"8px\",\n \"data_row_padding\": \"3px\",\n \"column_labels_padding\": \"3px\",\n \"row_group_border_top_style\": \"hidden\",\n \"table_body_border_top_style\": \"None\",\n \"table_body_border_bottom_width\": \"1px\",\n \"column_labels_border_top_width\": \"1px\",\n \"table_width\": \"14cm\",\n}\n\n\nstyle_presentation = {\n \"table_font_size\": \"16px\",\n \"table_font_color_light\": \"white\",\n \"table_body_border_top_style\": \"hidden\",\n \"table_body_border_bottom_style\": \"hidden\",\n \"heading_title_font_size\": \"18px\",\n \"source_notes_font_size\": \"12px\",\n \"data_row_padding\": \"3px\",\n \"column_labels_padding\": \"6px\",\n \"column_labels_background_color\": \"midnightblue\",\n \"stub_background_color\": \"whitesmoke\",\n \"row_group_background_color\": \"whitesmoke\",\n \"table_background_color\": \"whitesmoke\",\n \"heading_background_color\": \"white\",\n \"source_notes_background_color\": \"white\",\n \"column_labels_border_bottom_color\": \"white\",\n \"column_labels_font_weight\": \"bold\",\n \"row_group_font_weight\": \"bold\",\n \"table_width\": \"18cm\",\n}\n\n\nt1 = pf.dtable(\n data,\n vars=[\"Y\", \"Y2\", \"X1\", \"X2\"],\n stats=[\"count\", \"mean\", \"std\", \"min\", \"max\"],\n labels=labels,\n caption=\"Descriptive statistics\",\n)\n\nt2 = pf.etable(\n [fit1, fit2, fit3, fit4, fit5, fit6],\n labels=labels,\n show_se=False,\n felabels={\"f1\": \"Industry Fixed Effects\", \"f2\": \"Year Fixed Effects\"},\n caption=\"Regression results\",\n)\n\n\ndisplay(t1.tab_options(**style_print))\ndisplay(t2.tab_options(**style_print))\n\n\n\n\n\n\n \n Descriptive statistics\n \n\n \n N\n Mean\n Std. Dev.\n Min\n Max\n\n\n\n \n Wage\n 997\n -0.13\n 2.31\n -6.54\n 6.91\n \n \n Wealth\n 997\n -0.32\n 5.59\n -16.97\n 17.16\n \n \n Age\n 997\n 1.04\n 0.81\n 0.00\n 2.00\n \n \n Years of Schooling\n 997\n -0.13\n 3.05\n -9.67\n 10.99\n \n\n \n \n \n \n \n\n\n\n\n\n\n \n\n\n\n\n\n\n\n \n Regression results\n \n\n \n \n Wage\n \n \n Wealth\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n Year Fixed Effects\n -\n x\n x\n -\n x\n x\n \n \n Industry Fixed Effects\n x\n x\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)\n \n\n\n\n\n\n\n \n\n\n\nstyle_printDouble = {\n \"table_font_size\": \"12px\",\n \"heading_title_font_size\": \"12px\",\n \"source_notes_font_size\": \"8px\",\n \"data_row_padding\": \"3px\",\n \"column_labels_padding\": \"3px\",\n \"table_body_border_bottom_style\": \"double\",\n \"column_labels_border_top_style\": \"double\",\n \"column_labels_border_bottom_width\": \"0.5px\",\n \"row_group_border_top_style\": \"hidden\",\n \"table_body_border_top_style\": \"None\",\n \"table_width\": \"14cm\",\n}\ndisplay(t1.tab_options(**style_printDouble))\ndisplay(t2.tab_options(**style_printDouble))\n\n\n\n\n\n\n \n Descriptive statistics\n \n\n \n N\n Mean\n Std. Dev.\n Min\n Max\n\n\n\n \n Wage\n 997\n -0.13\n 2.31\n -6.54\n 6.91\n \n \n Wealth\n 997\n -0.32\n 5.59\n -16.97\n 17.16\n \n \n Age\n 997\n 1.04\n 0.81\n 0.00\n 2.00\n \n \n Years of Schooling\n 997\n -0.13\n 3.05\n -9.67\n 10.99\n \n\n \n \n \n \n \n\n\n\n\n\n\n \n\n\n\n\n\n\n\n \n Regression results\n \n\n \n \n Wage\n \n \n Wealth\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n Year Fixed Effects\n -\n x\n x\n -\n x\n x\n \n \n Industry Fixed Effects\n x\n x\n x\n x\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)"
+ "text": "Defining Table Styles: Some Examples\nYou can easily define table styles that you can apply to all tables in your project. Just define a dictionary with the respective values for the tab options (see the Great Tables documentation) and use the style with .tab_options(**style_dict).\n\nstyle_print = {\n \"table_font_size\": \"12px\",\n \"heading_title_font_size\": \"12px\",\n \"source_notes_font_size\": \"8px\",\n \"data_row_padding\": \"3px\",\n \"column_labels_padding\": \"3px\",\n \"row_group_border_top_style\": \"hidden\",\n \"table_body_border_top_style\": \"None\",\n \"table_body_border_bottom_width\": \"1px\",\n \"column_labels_border_top_width\": \"1px\",\n \"table_width\": \"14cm\",\n}\n\n\nstyle_presentation = {\n \"table_font_size\": \"16px\",\n \"table_font_color_light\": \"white\",\n \"table_body_border_top_style\": \"hidden\",\n \"table_body_border_bottom_style\": \"hidden\",\n \"heading_title_font_size\": \"18px\",\n \"source_notes_font_size\": \"12px\",\n \"data_row_padding\": \"3px\",\n \"column_labels_padding\": \"6px\",\n \"column_labels_background_color\": \"midnightblue\",\n \"stub_background_color\": \"whitesmoke\",\n \"row_group_background_color\": \"whitesmoke\",\n \"table_background_color\": \"whitesmoke\",\n \"heading_background_color\": \"white\",\n \"source_notes_background_color\": \"white\",\n \"column_labels_border_bottom_color\": \"white\",\n \"column_labels_font_weight\": \"bold\",\n \"row_group_font_weight\": \"bold\",\n \"table_width\": \"18cm\",\n}\n\n\nt1 = pf.dtable(\n data,\n vars=[\"Y\", \"Y2\", \"X1\", \"X2\"],\n stats=[\"count\", \"mean\", \"std\", \"min\", \"max\"],\n labels=labels,\n caption=\"Descriptive statistics\",\n)\n\nt2 = pf.etable(\n [fit1, fit2, fit3, fit4, fit5, fit6],\n labels=labels,\n show_se=False,\n felabels={\"f1\": \"Industry Fixed Effects\", \"f2\": \"Year Fixed Effects\"},\n caption=\"Regression results\",\n)\n\n\ndisplay(t1.tab_options(**style_print))\ndisplay(t2.tab_options(**style_print))\n\n\n\n\n\n\n \n Descriptive statistics\n \n\n \n N\n Mean\n Std. Dev.\n Min\n Max\n\n\n\n \n Wage\n 997\n -0.13\n 2.31\n -6.54\n 6.91\n \n \n Wealth\n 997\n -0.32\n 5.59\n -16.97\n 17.16\n \n \n Age\n 997\n 1.04\n 0.81\n 0.00\n 2.00\n \n \n Years of Schooling\n 997\n -0.13\n 3.05\n -9.67\n 10.99\n \n\n \n \n \n \n \n\n\n\n\n\n\n \n\n\n\n\n\n\n\n \n Regression results\n \n\n \n \n Wage\n \n \n Wealth\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n Industry Fixed Effects\n x\n x\n x\n x\n x\n x\n \n \n Year Fixed Effects\n -\n x\n x\n -\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)\n \n\n\n\n\n\n\n \n\n\n\nstyle_printDouble = {\n \"table_font_size\": \"12px\",\n \"heading_title_font_size\": \"12px\",\n \"source_notes_font_size\": \"8px\",\n \"data_row_padding\": \"3px\",\n \"column_labels_padding\": \"3px\",\n \"table_body_border_bottom_style\": \"double\",\n \"column_labels_border_top_style\": \"double\",\n \"column_labels_border_bottom_width\": \"0.5px\",\n \"row_group_border_top_style\": \"hidden\",\n \"table_body_border_top_style\": \"None\",\n \"table_width\": \"14cm\",\n}\ndisplay(t1.tab_options(**style_printDouble))\ndisplay(t2.tab_options(**style_printDouble))\n\n\n\n\n\n\n \n Descriptive statistics\n \n\n \n N\n Mean\n Std. Dev.\n Min\n Max\n\n\n\n \n Wage\n 997\n -0.13\n 2.31\n -6.54\n 6.91\n \n \n Wealth\n 997\n -0.32\n 5.59\n -16.97\n 17.16\n \n \n Age\n 997\n 1.04\n 0.81\n 0.00\n 2.00\n \n \n Years of Schooling\n 997\n -0.13\n 3.05\n -9.67\n 10.99\n \n\n \n \n \n \n \n\n\n\n\n\n\n \n\n\n\n\n\n\n\n \n Regression results\n \n\n \n \n Wage\n \n \n Wealth\n \n\n\n (1)\n (2)\n (3)\n (4)\n (5)\n (6)\n\n\n\n \n coef\n \n \n Age\n -0.950*** (0.067)\n -0.924*** (0.061)\n -0.924*** (0.061)\n -1.267*** (0.174)\n -1.232*** (0.192)\n -1.231*** (0.192)\n \n \n Years of Schooling\n -0.174*** (0.018)\n -0.174*** (0.015)\n -0.185*** (0.025)\n -0.131** (0.042)\n -0.118** (0.042)\n -0.074 (0.104)\n \n \n Age × Years of Schooling\n \n \n 0.011 (0.018)\n \n \n -0.041 (0.081)\n \n \n fe\n \n \n Industry Fixed Effects\n x\n x\n x\n x\n x\n x\n \n \n Year Fixed Effects\n -\n x\n x\n -\n x\n x\n \n \n stats\n \n \n Observations\n 997\n 997\n 997\n 998\n 998\n 998\n \n \n S.E. type\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n by: f1\n \n \n R2\n 0.489\n 0.659\n 0.659\n 0.120\n 0.172\n 0.172\n \n\n \n \n \n Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell:\nCoefficient \n (Std. Error)"
},
{
"objectID": "reference/estimation.feols_compressed_.FeolsCompressed.html",
diff --git a/table-layout.html b/table-layout.html
index 55bd8032..82f6377d 100644
--- a/table-layout.html
+++ b/table-layout.html
@@ -245,7 +245,7 @@ Regression Tables via pf.etable()
Table Layout with PyFixest
Pyfixest comes with functions to generate publication-ready tables. Regression tables are generated with pf.etable()
, which can output different formats, for instance using the Great Tables package or generating formatted LaTex Tables using booktabs. There are also further functions pf.dtable()
to display descriptive statistics and pf.make_table()
generating formatted tables from pandas dataframes in the same layout.
To begin, we load some libraries and fit a set of regression models.
-
+
import numpy as np
import pandas as pd
import pylatex as pl # for the latex table; note: not a dependency of pyfixest - needs manual installation
@@ -267,7 +267,7 @@ Table Layout wi
= pf.feols("Y2 ~ X1 *X2 | f1 + f2", data=data) fit6
-
+
@@ -301,7 +301,7 @@ Table Layout wi
-
+
@@ -338,55 +338,55 @@ Table Layout wi
Basic Usage
We can compare all regression models via the pyfixest-internal pf.etable()
function:
-
+
pf.etable([fit1, fit2, fit3, fit4, fit5, fit6])
-
+
@@ -445,20 +445,20 @@ Basic Usage
fe
- f2
- -
+ f1
x
x
- -
x
x
-
-
- f1
x
x
+
+
+ f2
+ -
x
x
+ -
x
x
@@ -510,55 +510,55 @@ Basic Usage
You can also estimate and display multiple regressions with one line of code using the (py)fixest stepwise notation:
-
+
"Y+Y2~csw(X1,X2,X1:X2)", data=data)) pf.etable(pf.feols(
-
+
@@ -673,55 +673,55 @@ Basic Usage
Keep and drop variables
etable
allows us to do a few things out of the box. For example, we can only keep the variables that we’d like, which keeps all variables that fit the provided regex match.
-
+
="X1") pf.etable([fit1, fit2, fit3, fit4, fit5, fit6], keep
-
+
@@ -771,20 +771,20 @@ Keep and drop vari
fe
- f2
- -
+ f1
x
x
- -
x
x
-
-
- f1
x
x
+
+
+ f2
+ -
x
x
+ -
x
x
@@ -836,55 +836,55 @@ Keep and drop vari
We can use the exact_match
argument to select a specific set of variables:
-
+
=["X1", "X2"], exact_match=True) pf.etable([fit1, fit2, fit3, fit4, fit5, fit6], keep
-
+
@@ -934,20 +934,20 @@ Keep and drop vari
fe
- f2
- -
+ f1
x
x
- -
x
x
-
-
- f1
x
x
+
+
+ f2
+ -
x
x
+ -
x
x
@@ -999,55 +999,55 @@ Keep and drop vari
We can also easily drop variables via the drop
argument:
-
+
=["X1"]) pf.etable([fit1, fit2, fit3, fit4, fit5, fit6], drop
-
+
@@ -1088,20 +1088,20 @@ Keep and drop vari
fe
- f2
- -
+ f1
x
x
- -
x
x
-
-
- f1
x
x
+
+
+ f2
+ -
x
x
+ -
x
x
@@ -1156,55 +1156,55 @@ Keep and drop vari
Hide fixed effects or SE-type rows
We can hide the rows showing the relevant fixed effects and those showing the S.E. type by setting show_fe=False
and show_setype=False
(for instance when the set of fixed effects or the estimation method for the std. errors is the same for all models and you want to describe this in the text or table notes rather than displaying it in the table).
-
+
=False, show_se_type=False) pf.etable([fit1, fit2, fit3, fit4, fit5, fit6], show_fe
-
+
@@ -1301,55 +1301,55 @@ Hide fi
Display p-values or confidence intervals
By default, pf.etable()
reports standard errors. But we can also ask to output p-values or confidence intervals via the coef_fmt
function argument.
-
+
="b \n (se) \n [p]") pf.etable([fit1, fit2, fit3, fit4, fit5, fit6], coef_fmt
-
+
@@ -1408,20 +1408,20 @@ D
fe
- f2
- -
+ f1
x
x
- -
x
x
-
-
- f1
x
x
+
+
+ f2
+ -
x
x
+ -
x
x
@@ -1477,55 +1477,55 @@ D
Significance levels and rounding
Additionally, we can also overwrite the defaults for the reported significance levels and control the rounding of results via the signif_code
and digits
function arguments:
-
+
=[0.01, 0.05, 0.1], digits=5) pf.etable([fit1, fit2, fit3, fit4, fit5, fit6], signif_code
-
+
@@ -1584,20 +1584,20 @@ Significa
fe
- f2
- -
+ f1
x
x
- -
x
x
-
-
- f1
x
x
+
+
+ f2
+ -
x
x
+ -
x
x
@@ -1652,7 +1652,7 @@ Significa
Other output formats
By default, pf.etable()
returns a GT object (see the Great Tables package), but you can also opt to dataframe, markdown, or latex output via the type
argument.
-
+
# Pandas styler output:
pf.etable(
@@ -1714,20 +1714,20 @@ [fit1, fit2, fit3, fit4, fit5, fit6],Other output formats<
-0.04082 (0.08093)
-