From 77e7080d04e103aadaca2f2d765f409982ce7c5a Mon Sep 17 00:00:00 2001 From: Alexander Fischer Date: Fri, 22 Nov 2024 22:56:56 +0100 Subject: [PATCH] Update Examples in Docs (#724) * make sure all examples are run in docs * adjust panelview plot size --- docs/changelog.quarto_ipynb | 650 -------------------- docs/difference-in-differences.qmd | 5 +- pyfixest/did/visualize.py | 5 +- pyfixest/estimation/demean_.py | 33 + pyfixest/estimation/feiv_.py | 4 +- pyfixest/estimation/feols_.py | 32 +- pyfixest/estimation/model_matrix_fixest_.py | 14 + pyfixest/estimation/multcomp.py | 30 +- pyfixest/report/summarize.py | 27 + 9 files changed, 110 insertions(+), 690 deletions(-) delete mode 100644 docs/changelog.quarto_ipynb diff --git a/docs/changelog.quarto_ipynb b/docs/changelog.quarto_ipynb deleted file mode 100644 index b4479432..00000000 --- a/docs/changelog.quarto_ipynb +++ /dev/null @@ -1,650 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Changelog\n", - "\n", - "## PyFixest 0.22.0\n", - "\n", - "### Changes\n", - "\n", - "- Fix bug in wildboottest method @s3alfisc (#506)\n", - "- docs: add sanskriti2005 as a contributor for infra @allcontributors (#503)\n", - "- Infra: added the release-drafter for automation of release notes @sanskriti2005 (#502)\n", - "- Fix broken link in contributing.md @s3alfisc (#499)\n", - "- docs: add leostimpfle as a contributor for bug @allcontributors (#495)\n", - "- Update justfile @leostimpfle (#494)\n", - "- docs: add baggiponte as a contributor for doc @allcontributors (#490)\n", - "- docs: improve installation section @baggiponte (#489)\n", - "- Bump tornado from 6.4 to 6.4.1 @dependabot (#487)\n", - "- docs: add leostimpfle as a contributor for code @allcontributors (#478)\n", - "- Feols: speed up the creation of interacted fixed effects via `fe1^fe2` syntax @leostimpfle (#475)\n", - "- rename resampling iterations to 'reps' in all methods @s3alfisc (#474)\n", - "- fix a lot of broken links throught the repo @s3alfisc (#472)\n", - "- Multiple readme fixes required after package was moved to py-econometrics project @s3alfisc (#450)\n", - "\n", - "### Infrastructure\n", - "\n", - "- infrastructure: fix minor release drafter bugs @s3alfisc (#504)\n", - "\n", - "## PyFixest 0.21.0\n", - "\n", - "- Add support for randomization inference via the `ritest()` method:\n" - ], - "id": "b4cebfcd" - }, - { - "cell_type": "code", - "metadata": {}, - "source": [ - "#| eval: False\n", - "import pyfixest as pf\n", - "data = pf.get_data()\n", - "\n", - "fit = pf.feols(\"Y ~ X1\", data = data)\n", - "fit.ritest(resampvar=\"X1=0\", reps = 1000)" - ], - "id": "bddeec7f", - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## PyFixest 0.20.0\n", - "\n", - "- This version introduces MyPy type checks to the entire pyfixest codebase. Thanks to @juanitorduz for nudging me to get started with this =). It also fixes a handful of smaller bugs.\n", - "\n", - "## PyFixest 0.19.0\n", - "\n", - "- Fixes multiple smaller and larger performance regressions. The NYC-Taxi example regression now takes approximately 22 seconds to run (... if my laptopt is connected to a power charger)!\n" - ], - "id": "9084e558" - }, - { - "cell_type": "code", - "metadata": {}, - "source": [ - "#| eval: False\n", - "\n", - "%load_ext autoreload\n", - "%autoreload 2\n", - "\n", - "import duckdb\n", - "import time\n", - "import numpy as np\n", - "import pyfixest as pf\n", - "\n", - "# %%\n", - "nyc = duckdb.sql(\n", - " '''\n", - " FROM 'C:/Users/alexa/Documents/nyc-taxi/**/*.parquet'\n", - " SELECT\n", - " tip_amount, trip_distance, passenger_count,\n", - " vendor_id, payment_type, dropoff_at,\n", - " dayofweek(dropoff_at) AS dofw\n", - " WHERE year = 2012 AND month <= 3\n", - " '''\n", - " ).df()\n", - "\n", - "# convert dowf, vendor_id, payment_type to categorical\n", - "tic = time.time()\n", - "nyc[\"dofw\"] = nyc[\"dofw\"].astype(int)\n", - "nyc[\"vendor_id\"] = nyc[\"vendor_id\"].astype(\"category\")\n", - "nyc[\"payment_type\"] = nyc[\"payment_type\"].astype(\"category\")\n", - "print(f\"\"\"\n", - " I am convering columns of type 'objects' to 'categories' and 'int'data types outside\n", - " of the regression, hence I am cheating a bit. This saves {np.round(time.time() - tic)} seconds.\n", - " \"\"\"\n", - ")\n", - "# I am convering columns of type 'objects' to 'categories' and 'int'data types outside\n", - "# of the regression, hence I am cheating a bit. This saves 7.0 seconds.\n", - "\n", - "run = True\n", - "if run:\n", - "\n", - " # mock regression for JIT compilation\n", - " fit = pf.feols(\n", - " fml = \"tip_amount ~ trip_distance + passenger_count | vendor_id + payment_type + dofw\",\n", - " data = nyc.iloc[1:10_000],\n", - " copy_data = False,\n", - " store_data = False\n", - " )\n", - "\n", - " import time\n", - " tic = time.time()\n", - " fit = pf.feols(\n", - " fml = \"tip_amount ~ trip_distance + passenger_count | vendor_id + payment_type + dofw\",\n", - " data = nyc,\n", - " copy_data = False, # saves a few seconds\n", - " store_data = False # saves a few second\n", - " )\n", - " passed = time.time() - tic\n", - " print(f\"Passed time is {np.round(passed)}.\")\n", - " # Passed time is 22." - ], - "id": "c9628f73", - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "- Adds three new function arguments to `feols()` and `fepois()`: `copy_data`, `store_data`, and `fixef_tol`.\n", - "- Adds support for frequency weights with the `weights_type` function argument.\n" - ], - "id": "c43233ad" - }, - { - "cell_type": "code", - "metadata": {}, - "source": [ - "import pyfixest as pf\n", - "\n", - "data = pf.get_data(N = 10000, model = \"Fepois\")\n", - "df_weighted = data[[\"Y\", \"X1\", \"f1\"]].groupby([\"Y\", \"X1\", \"f1\"]).size().reset_index().rename(columns={0: \"count\"})\n", - "df_weighted[\"id\"] = list(range(df_weighted.shape[0]))\n", - "\n", - "print(\"Dimension of the aggregated df:\", df_weighted.shape)\n", - "print(df_weighted.head())\n", - "\n", - "fit = pf.feols(\n", - " \"Y ~ X1 | f1\",\n", - " data = data\n", - ")\n", - "fit_weighted = pf.feols(\n", - " \"Y ~ X1 | f1\",\n", - " data = df_weighted,\n", - " weights = \"count\",\n", - " weights_type = \"fweights\"\n", - ")\n", - "pf.etable([fit, fit_weighted], coef_fmt = \"b(se) \\n (t) \\n (p)\")" - ], - "id": "a3bdc973", - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "- Bugfix: Wild Cluster Bootstrap Inference with Weights would compute unweighted standard errors. Sorry about that! WLS is not supported for the WCB.\n", - "- Adds support for CRV3 inference with weights.\n", - "\n", - "\n", - "## PyFixest 0.18.0\n", - "\n", - "- Large Refactoring of Interal Processing of Model Formulas, in particular `FixestFormulaParser` and `model_matrix_fixest`. As a results, the code should be cleaner and more robust.\n", - "- Thanks to the refactoring, we can now bump the required `formulaic` version to the stable `1.0.0` release.\n", - "- The `fml` argument of `model_matrix_fixest` is deprecated. Instead, `model_matrix_fixest`\n", - " now asks for a `FixestFormula`, which is essentially a dictionary with information on model\n", - " specifications like a first stage formula (if applicable), dependent variables, fixed effects,\n", - " etc.\n", - "- Additionally, `model_matrix_fixest` now returns a dictionary instead of a tuple.\n", - "- Brings back fixed effects reference setting via `i(var1, var2, ref)` syntax. Deprecates the `i_ref1`, `i_ref2` function arguments. I.e. it is again possible to e.g. run\n" - ], - "id": "e2d8c355" - }, - { - "cell_type": "code", - "metadata": {}, - "source": [ - "#| eval: False\n", - "\n", - "import pyfixest as pf\n", - "data = pf.get_data()\n", - "\n", - "fit1 = pf.feols(\"Y ~ i(f1, X2)\", data=data)\n", - "fit1.coef()[0:8]" - ], - "id": "e18eeb95", - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Via the `ref` syntax, via can set the reference level:\n" - ], - "id": "91e5f2c9" - }, - { - "cell_type": "code", - "metadata": {}, - "source": [ - "#| eval: False\n", - "fit2 = pf.feols(\"Y ~ i(f1, X2, ref = 1)\", data=data)\n", - "fit2.coef()[0:8]" - ], - "id": "2cf87afd", - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## PyFixest 0.17.0\n", - "\n", - "- Restructures the codebase and reorganizes how users can interact with the `pyfixest` API. It is now recommended to use `pyfixest` in the following way:\n" - ], - "id": "6dd9dbc4" - }, - { - "cell_type": "code", - "metadata": { - "md-indent": " " - }, - "source": [ - "import numpy as np\n", - "import pyfixest as pf\n", - "data = pf.get_data()\n", - "data[\"D\"] = data[\"X1\"] > 0\n", - "fit = pf.feols(\"Y ~ D + f1\", data = data)\n", - "fit.tidy()" - ], - "id": "279cb62e", - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - " The update should not inroduce any breaking changes. Thanks to [@Wenzhi-Ding](https://github.com/Wenzhi-Ding) for the PR!\n", - "\n", - "- Adds support for simultaneous confidence intervals via a multiplier bootstrap. Thanks to [@apoorvalal](https://github.com/apoorvalal) for the contribution!\n" - ], - "id": "3dd76c33" - }, - { - "cell_type": "code", - "metadata": { - "md-indent": " " - }, - "source": [ - "fit.confint(joint = True)" - ], - "id": "11a17461", - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "- Adds support for the causal cluster variance estimator by [Abadie et al. (QJE, 2023)](https://academic.oup.com/qje/article/138/1/1/6750017)\n", - " for OLS via the `.ccv()` method.\n" - ], - "id": "2827cd57" - }, - { - "cell_type": "code", - "metadata": { - "md-indent": " " - }, - "source": [ - "fit.ccv(treatment = \"D\", cluster = \"group_id\")" - ], - "id": "0c9a5fb2", - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## PyFixest 0.16.0\n", - "\n", - "- Adds multiple quality of life improvements for developers, thanks to [NKeleher](https://github.com/NKeleher).\n", - "- Adds more options to customize `etable()` output thanks to [Wenzhi-Ding](https://github.com/Wenzhi-Ding).\n", - "- Implements Romano-Wolf and Bonferroni corrections for multiple testing in the `multcomp` module.\n", - "\n", - "## PyFixest 0.15.\n", - "\n", - "- Adds support for weighted least squares for `feols()`.\n", - "- Reduces testing time drastically by running tests on fewer random data samples. Qualitatively,\n", - " the set of test remains identical.\n", - "- Some updates for future `pandas` compatibility.\n", - "\n", - "## PyFixest 0.14.0\n", - "\n", - "- Moves the documentation to [quartodoc](https://github.com/machow/quartodoc).\n", - "- Changes all docstrings to `numpy` format.\n", - "- Difference-in-differences estimation functions now need to be imported via the `pyfixest.did.estimation` module:\n" - ], - "id": "4f79d719" - }, - { - "cell_type": "code", - "metadata": {}, - "source": [ - "from pyfixest.did.estimation import did2s, lpdid, event_study" - ], - "id": "97858cbe", - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## PyFixest 0.13.5\n", - "\n", - "- Fixes a bug that lead to incorrect results when the dependent variable and **all covariates** (excluding the fixed effects) where integers.\n", - "\n", - "## PyFixest 0.13.4\n", - "\n", - "- Fixes a bug in `etable()` with IV's that occurred because `feols()` does not report R2 statistics for IVs.\n", - "\n", - "## PyFixest 0.13.2\n", - "\n", - "- Fixes a bug in `etable()` and a warning in `fixest_model_matrix` that arose with higher `pandas` versions. Thanks to @aeturrell for reporting!\n", - "\n", - "## PyFixest 0.13.0\n", - "\n", - "### New Features\n", - "\n", - "- Introduces a new `pyfixest.did` module which contains routines for Difference-in-Differences estimation.\n", - "- Introduces support for basic versions of the local projections DiD estimator following [Dube et al (2023)](https://www.nber.org/papers/w31184)\n", - "- Adds a new vignette for Difference-in-Differences estimation.\n", - "- Reports R2 values in `etable()`.\n", - "\n", - "\n", - "## PyFixest 0.12.0\n", - "\n", - "\n", - "### Enhancements:\n", - "\n", - "- Good performance improvements for singleton fixed effects detection. Thanks to [@styfenschaer](https://github.com/styfenschaer) for the PR! See [#229](https://github.com/py-econometrics/pyfixest/issues/229).\n", - "- Uses the [r2u project](https://github.com/eddelbuettel/r2u) for installing R and R packages on github actions, with great performance improvements.\n", - "- Allows to pass `polars` data frames to `feols()`, `fepois()` and `predict()`. [#232](https://github.com/py-econometrics/pyfixest/issues/232). Thanks to [@vincentarelbundock](https://github.com/py-econometrics/pyfixest/issues/232) for the suggestion!\n", - "\n", - "### Bug Fixes:\n", - "\n", - "- Missing variables in features were not always handled correctly in `predict()` with `newdata` not `None` in the presence of missing data, which would lead to an error. See [#246](https://github.com/py-econometrics/pyfixest/issues/246) for details.\n", - "- Categorical variables were not always handled correctly in `predict()` with `newdata` not `None`, because the number of fixed effects levels in `newdata` might be smaller than in `data`. In consequence, some levels were not found, which lead to an error. See [#245](https://github.com/py-econometrics/pyfixest/issues/245) for details. Thanks to [@jiafengkevinchen](https://github.com/jiafengkevinchen) for the pointer!\n", - "- Multicollinearity checks for over-identified IV was not implemented correctly, which lead to a dimension error. See [#236](https://github.com/py-econometrics/pyfixest/issues/236) for details. Thanks to [@jiafengkevinchen](https://github.com/jiafengkevinchen) for the pointer!\n", - "- The number of degrees of freedom `k` was computed incorrectly if columns were dropped from the design matrix `X` in the presence of multicollinearity. See [#235](https://github.com/py-econometrics/pyfixest/issues/235) for details. Thanks to [@jiafengkevinchen](https://github.com/jiafengkevinchen) for the pointer!\n", - "- If all variables were dropped due to multicollinearity, an unclear and imprecise error message was produced. See [#228](https://github.com/py-econometrics/pyfixest/issues/228) for details. Thanks to [@manferdinig](https://github.com/manferdinig) for the pointer!\n", - "- If selection `fixef_rm = 'singleton'`, `feols()` and `fepois()` would fail, which has been fixed. [#192](https://github.com/py-econometrics/pyfixest/issues/192)\n", - "\n", - "### Dependency Requirements\n", - "\n", - "- For now, sets `formulaic` versions to be `0.6.6` or lower as version `1.0.0` seems to have introduced a problem with the `i()` operator, See [#244](https://github.com/py-econometrics/pyfixest/issues/244) for details.\n", - "- Drops dependency on `pyhdfe`.\n", - "\n", - "## PyFixest 0.11.1\n", - "\n", - "- Fixes some bugs around the computation of R-squared values (see [issue #103](https://github.com/py-econometrics/pyfixest/issues/103)).\n", - "- Reports R-squared values again when calling `.summary()`.\n", - "\n", - "## PyFixest 0.11.0\n", - "\n", - "- Significant speedups for CRV1 inference.\n", - "\n", - "## PyFixest 0.10.12\n", - "\n", - "Fixes a small bug with the separation check for poisson regression #138.\n", - "\n", - "## PyFixest 0.10.11\n", - "\n", - "Fixes bugs with i(var1, var2) syntax introduced with PyFixest 0.10.10.\n", - "\n", - "## PyFixest 0.10.10\n", - "\n", - "Fixes a bug with variable interactions via `i(var)` syntax. See [issue #221](https://github.com/py-econometrics/pyfixest/issues/211).\n", - "\n", - "## PyFixest 0.10.9\n", - "\n", - "Makes `etable()` prettier and more informative.\n", - "\n", - "## PyFixest 0.10.8\n", - "\n", - "### Breaking changes\n", - "Reference levels for the `i()` formula syntax can no longer be set within the formula, but need to be added via the `i_ref1` function argument to either `feols()` and `fepois()`.\n", - "\n", - "### New feature\n", - "\n", - "A `dids2()` function is added, which implements the 2-stage difference-in-differences procedure à la Gardner and follows the syntax of @kylebutts [did2s](https://github.com/kylebutts/did2s) R package.\n", - "\n", - "```py\n", - "from pyfixest.did.did import did2s\n", - "from pyfixest.estimation import feols\n", - "from pyfixest.visualize import iplot\n", - "import pandas as pd\n", - "import numpy as np\n", - "\n", - "df_het = pd.read_csv(\"https://raw.githubusercontent.com/py-econometrics/pyfixest/master/pyfixest/did/data/df_het.csv\")\n", - "\n", - "fit = did2s(\n", - " df_het,\n", - " yname = \"dep_var\",\n", - " first_stage = \"~ 0 | state + year\",\n", - " second_stage = \"~i(rel_year)\",\n", - " treatment = \"treat\",\n", - " cluster = \"state\",\n", - " i_ref1 = [-1.0, np.inf],\n", - ")\n", - "\n", - "fit_twfe = feols(\n", - " \"dep_var ~ i(rel_year) | state + year\",\n", - " df_het,\n", - " i_ref1 = [-1.0, np.inf]\n", - ")\n", - "\n", - "iplot([fit, fit_twfe], coord_flip=False, figsize = (900, 400), title = \"TWFE vs DID2S\")\n", - "```\n", - "![](figures/event_study.svg)\n", - "\n", - "\n", - "\n", - "## PyFixest 0.10.7\n", - "\n", - "- Adds basic support for event study estimation via two-way fixed effects and Gardner's two-stage \"Did2s\" approach.\n", - " This is a beta version and experimental. Further updates (i.e. proper event studies vs \"only\" ATTs) and a more flexible\n", - " did2s front end will follow in future releases.\n", - "\n", - "```python\n", - "%load_ext autoreload\n", - "%autoreload 2\n", - "\n", - "from pyfixest.did.did import event_study\n", - "import pyfixest as pf\n", - "import pandas as pd\n", - "df_het = pd.read_csv(\"pyfixest/did/data/df_het.csv\")\n", - "\n", - "fit_twfe = event_study(\n", - " data = df_het,\n", - " yname = \"dep_var\",\n", - " idname= \"state\",\n", - " tname = \"year\",\n", - " gname = \"g\",\n", - " estimator = \"twfe\"\n", - ")\n", - "\n", - "fit_did2s = event_study(\n", - " data = df_het,\n", - " yname = \"dep_var\",\n", - " idname= \"state\",\n", - " tname = \"year\",\n", - " gname = \"g\",\n", - " estimator = \"did2s\"\n", - ")\n", - "\n", - "pf.etable([fit_twfe, fit_did2s])\n", - "# | Coefficient | est1 | est2 |\n", - "# |:--------------|:-----------------|:-----------------|\n", - "# | ATT | 2.135*** (0.044) | 2.152*** (0.048) |\n", - "# Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001\n", - "```\n", - "\n", - "## PyFixest 0.10.6\n", - "\n", - "- Adds an `etable()` function that outputs markdown, latex or a pd.DataFrame.\n", - "\n", - "## PyFixest 0.10.5\n", - "\n", - "- Fixes a big in IV estimation that would trigger an error. See [here](https://github.com/py-econometrics/pyfixest/issues/197) for details. Thanks to @aeturrell for reporting!\n", - "\n", - "## PyFixest 0.10.4\n", - "\n", - "- Implements a custom function to drop singleton fixed effects.\n", - "- Additional small performance improvements.\n", - "\n", - "## PyFixest 0.10.3\n", - "\n", - "- Allows for white space in the multiway clustering formula.\n", - "- Adds documentation for multiway clustering.\n", - "\n", - "## PyFixest 0.10.2\n", - "\n", - "- Adds support for two-way clustering.\n", - "- Adds support for CRV3 inference for Poisson regression.\n", - "\n", - "## PyFixest 0.10.1\n", - "\n", - "- Adapts the internal fixed effects demeaning criteron to match `PyHDFE's default.\n", - "- Adds Styfen as coauthor.\n", - "\n", - "## PyFixest 0.10\n", - "\n", - "- Multiple performance improvements.\n", - "- Most importantly, implements a custom demeaning algorithm in `numba` - thanks to Styfen Schaer (@styfenschaer),\n", - " which leads to performance improvements of 5x or more:\n", - "\n", - "```python\n", - "%load_ext autoreload\n", - "%autoreload 2\n", - "\n", - "import numpy as np\n", - "import time\n", - "import pyhdfe\n", - "from pyfixest.demean import demean\n", - "\n", - "np.random.seed(1238)\n", - "N = 10_000_000\n", - "x = np.random.normal(0, 1, 10*N).reshape((N,10))\n", - "f1 = np.random.choice(list(range(1000)), N).reshape((N,1))\n", - "f2 = np.random.choice(list(range(1000)), N).reshape((N,1))\n", - "\n", - "flist = np.concatenate((f1, f2), axis = 1)\n", - "weights = np.ones(N)\n", - "\n", - "algorithm = pyhdfe.create(flist)\n", - "\n", - "start_time = time.time()\n", - "res_pyhdfe = algorithm.residualize(x)\n", - "end_time = time.time()\n", - "print(end_time - start_time)\n", - "# 26.04527711868286\n", - "\n", - "\n", - "start_time = time.time()\n", - "res_pyfixest, success = demean(x, flist, weights, tol = 1e-10)\n", - "# Calculate the execution time\n", - "end_time = time.time()\n", - "print(end_time - start_time)\n", - "#4.334428071975708\n", - "\n", - "np.allclose(res_pyhdfe , res_pyfixest)\n", - "# True\n", - "```\n", - "\n", - "\n", - "\n", - "## PyFixest 0.9.11\n", - "\n", - "- Bump required `formulaic` version to `0.6.5`.\n", - "- Stop copying the data frame in `fixef()`.\n", - "\n", - "## PyFixest 0.9.10\n", - "\n", - "- Fixes a big in the `wildboottest` method (see [#158](https://github.com/py-econometrics/pyfixest/issues/158)).\n", - "- Allows to run a wild bootstrap after fixed effect estimation.\n", - "\n", - "## PyFixest 0.9.9\n", - "\n", - "- Adds support for `wildboottest` for Python `3.11`.\n", - "\n", - "## PyFixest 0.9.8\n", - "\n", - "- Fixes a couple more bugs in the `predict()` and `fixef()` methods.\n", - "- The `predict()` argument `data` is renamed to `newdata`.\n", - "\n", - "## PyFixest 0.9.7\n", - "\n", - "Fixes a bug in `predict()` produced when multicollinear variables are dropped.\n", - "\n", - "## PyFixest 0.9.6\n", - "\n", - "Improved Collinearity handling. See [#145](https://github.com/py-econometrics/pyfixest/issues/145)\n", - "\n", - "## PyFixest 0.9.5\n", - "\n", - "\n", - "- Moves plotting from `matplotlib` to `lets-plot`.\n", - "- Fixes a few minor bugs in plotting and the `fixef()` method.\n", - "\n", - "\n", - "## PyFixest 0.9.1\n", - "\n", - "### Breaking API changes\n", - "\n", - "It is no longer required to initiate an object of type `Fixest` prior to running [Feols(/reference/Feols.qmd) or `fepois`. Instead,\n", - "you can now simply use `feols()` and `fepois()` as functions, just as in `fixest`. Both function can be found in an\n", - "`estimation` module and need to obtain a `pd.DataFrame` as a function argument:\n", - "\n", - "```py\n", - "from pyfixest.estimation import fixest, fepois\n", - "from pyfixest.utils import get_data\n", - "\n", - "data = get_data()\n", - "fit = feols(\"Y ~ X1 | f1\", data = data, vcov = \"iid\")\n", - "```\n", - "\n", - "Calling `feols()` will return an instance of class [Feols(/reference/Feols.qmd), while calling `fepois()` will return an instance of class `Fepois`.\n", - "Multiple estimation syntax will return an instance of class `FixestMulti`.\n", - "\n", - "Post processing works as before via `.summary()`, `.tidy()` and other methods.\n", - "\n", - "### New Features\n", - "\n", - "A summary function allows to compare multiple models:\n", - "\n", - "```py\n", - "from pyfixest.summarize import summary\n", - "fit2 = feols(\"Y ~ X1 + X2| f1\", data = data, vcov = \"iid\")\n", - "summary([fit, fit2])\n", - "```\n", - "\n", - "Visualization is possible via custom methods (`.iplot()` & `.coefplot()`), but a new module allows to visualize\n", - " a list of [Feols(/reference/Feols.qmd) and/or `Fepois` instances:\n", - "\n", - "```py\n", - "from pyfixest.visualize import coefplot, iplot\n", - "coefplot([fit, fit2])\n", - "```\n", - "\n", - "The documentation has been improved (though there is still room for progress), and the code has been cleaned up a\n", - "bit (also lots of room for improvements)." - ], - "id": "444e4b73" - } - ], - "metadata": { - "kernelspec": { - "name": "python3", - "language": "python", - "display_name": "Python 3 (ipykernel)", - "path": "C:\\Users\\alexa\\AppData\\Roaming\\Python\\share\\jupyter\\kernels\\python3" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/docs/difference-in-differences.qmd b/docs/difference-in-differences.qmd index 31c23e46..2d7b7ac4 100644 --- a/docs/difference-in-differences.qmd +++ b/docs/difference-in-differences.qmd @@ -119,7 +119,7 @@ pf.panelview( treat="treat", collapse_to_cohort=True, title = "Outcome Plot", - figsize=(1, 0.5), + figsize=(2, 0.75), ) ``` @@ -142,7 +142,8 @@ pf.panelview( time="year", treat="treat", subsamp=100, - title = "Outcome Plot" + title = "Outcome Plot", + figsize=(2, 0.75), ) ``` diff --git a/pyfixest/did/visualize.py b/pyfixest/did/visualize.py index cf68b1e5..9270abbc 100644 --- a/pyfixest/did/visualize.py +++ b/pyfixest/did/visualize.py @@ -74,12 +74,13 @@ def panelview( Examples -------- - ```python + ```{python} import pandas as pd import numpy as np import pyfixest as pf - df_het = pd.read_csv("pd.read_csv("pyfixest/did/data/df_het.csv") + url = "https://raw.githubusercontent.com/py-econometrics/pyfixest/master/pyfixest/did/data/df_het.csv" + df_het = pd.read_csv(url) # Inspect treatment assignment pf.panelview( diff --git a/pyfixest/estimation/demean_.py b/pyfixest/estimation/demean_.py index 490af17a..eade8d5b 100644 --- a/pyfixest/estimation/demean_.py +++ b/pyfixest/estimation/demean_.py @@ -217,6 +217,39 @@ def demean( tuple[numpy.ndarray, bool] A tuple containing the demeaned array of shape (n_samples, n_features) and a boolean indicating whether the algorithm converged successfully. + + Examples + -------- + ```{python} + import numpy as np + import pyfixest as pf + from pyfixest.utils.dgps import get_blw + from pyfixest.estimation.demean_ import demean + from formulaic import model_matrix + + fml = "y ~ treat | state + year" + + data = get_blw() + data.head() + + Y, rhs = model_matrix(fml, data) + X = rhs[0].drop(columns="Intercept") + fe = rhs[1].drop(columns="Intercept") + YX = np.concatenate([Y, X], axis=1) + + # to numpy + Y = Y.to_numpy() + X = X.to_numpy() + YX = np.concatenate([Y, X], axis=1) + fe = fe.to_numpy().astype(int) # demean requires fixed effects as ints! + + YX_demeaned, success = demean(YX, fe, weights = np.ones(YX.shape[0])) + Y_demeaned = YX_demeaned[:, 0] + X_demeaned = YX_demeaned[:, 1:] + + print(np.linalg.lstsq(X_demeaned, Y_demeaned, rcond=None)[0]) + print(pf.feols(fml, data).coef()) + ``` """ n_samples, n_features = x.shape n_factors = flist.shape[1] diff --git a/pyfixest/estimation/feiv_.py b/pyfixest/estimation/feiv_.py index a56f6c86..33bb0387 100644 --- a/pyfixest/estimation/feiv_.py +++ b/pyfixest/estimation/feiv_.py @@ -374,9 +374,7 @@ def IV_Diag(self, statistics: Optional[list[str]] = None): print("(Unadjusted) F stat :", F_stat_pf) print("Effective F stat :", F_stat_eff_pf) - # The example above generates the following results - # (Unadjusted) F stat : 52.81535560457482 - # Effective F stat : 48.661542741328205 + ``` """ # Set default statistics iv_diag_stat = ["f_stat", "effective_f"] diff --git a/pyfixest/estimation/feols_.py b/pyfixest/estimation/feols_.py index 7bfd04e2..9faa1ba0 100644 --- a/pyfixest/estimation/feols_.py +++ b/pyfixest/estimation/feols_.py @@ -984,20 +984,17 @@ def wald_test(self, R=None, q=None, distribution="F"): Examples -------- + ```{python} import numpy as np import pandas as pd + import pyfixest as pf - from pyfixest.estimation.estimation import feols - - data = pd.read_csv("pyfixest/did/data/df_het.csv") - data = data.iloc[1:3000] + data = pf.get_data() + fit = pf.feols("Y ~ X1 + X2| f1", data, vcov={"CRV1": "f1"}, ssc=pf.ssc(adj=False)) R = np.array([[1,-1]] ) q = np.array([0.0]) - fml = "dep_var ~ treat" - fit = feols(fml, data, vcov={"CRV1": "year"}, ssc=ssc(adj=False)) - # Wald test fit.wald_test(R=R, q=q, distribution = "chi2") f_stat = fit._f_statistic @@ -1005,10 +1002,7 @@ def wald_test(self, R=None, q=None, distribution="F"): print(f"Python f_stat: {f_stat}") print(f"Python p_stat: {p_stat}") - - # The code above produces the following results : - # Python f_stat: 256.55432910297003 - # Python p_stat: 9.67406627744023e-58 + ``` """ _vcov = self._vcov _N = self._N @@ -1332,15 +1326,15 @@ def ccv( Examples -------- - ```python - from pyfixest.estimation import feols - from pyfixest.utils import get_data + ```{python} + import pyfixest as pf + import numpy as np - data = get_data() - data["D1"] = np.random.choice([0, 1], size=data.shape[0]) + data = pf.get_data() + data["D"] = np.random.choice([0, 1], size=data.shape[0]) - fit = feols("Y ~ D", data=data, vcov={"CRV1": "group_id"}) - fit.ccv(treatment="D", pk=0.05, gk=0.5, n_splits=8, seed=123).head() + fit = pf.feols("Y ~ D", data=data, vcov={"CRV1": "group_id"}) + fit.ccv(treatment="D", pk=0.05, qk=0.5, n_splits=8, seed=123).head() ``` """ assert ( @@ -1873,7 +1867,7 @@ def confint( Examples -------- - ```python + ```{python} from pyfixest.utils import get_data from pyfixest.estimation import feols diff --git a/pyfixest/estimation/model_matrix_fixest_.py b/pyfixest/estimation/model_matrix_fixest_.py index ef59fbec..0942cbfe 100644 --- a/pyfixest/estimation/model_matrix_fixest_.py +++ b/pyfixest/estimation/model_matrix_fixest_.py @@ -68,6 +68,20 @@ def model_matrix_fixest( List of variables interacted with i() syntax, None if not applicable. - 'X_is_empty' : bool Flag indicating whether X is empty. + + Examples + -------- + ```{python} + import pyfixest as pf + from pyfixest.estimation.model_matrix_fixest_ import model_matrix_fixest + + data = pf.get_data() + fit = pf.feols("Y ~ X1 + f1 + f2", data=data) + FixestFormula = fit.FixestFormula + + mm = model_matrix_fixest(FixestFormula, data) + mm + ``` """ FixestFormula.check_syntax() diff --git a/pyfixest/estimation/multcomp.py b/pyfixest/estimation/multcomp.py index f089ed8f..09aa88e3 100644 --- a/pyfixest/estimation/multcomp.py +++ b/pyfixest/estimation/multcomp.py @@ -35,15 +35,14 @@ def bonferroni(models: ModelInputType, param: str) -> pd.DataFrame: Examples -------- - ```python - from pyfixest.estimation import feols + ```{python} + import pyfixest as pf from pyfixest.utils import get_data - from pyfixest.multcomp import bonferroni data = get_data().dropna() - fit1 = feols("Y ~ X1", data=data) - fit2 = feols("Y ~ X1 + X2", data=data) - bonf_df = bonferroni([fit1, fit2], param="X1") + fit1 = pf.feols("Y ~ X1", data=data) + fit2 = pf.feols("Y ~ X1 + X2", data=data) + bonf_df = pf.bonferroni([fit1, fit2], param="X1") bonf_df ``` """ @@ -106,18 +105,21 @@ def rwolf( Examples -------- - ```python - from pyfixest.estimation import feols + ```{python} + import pyfixest as pf from pyfixest.utils import get_data - from pyfixest.multcomp import rwolf data = get_data().dropna() - fit = feols("Y ~ Y2 + X1 + X2", data=data) - rwolf(fit.to_list(), "X1", reps=9999, seed=123) + fit = pf.feols("Y ~ Y2 + X1 + X2", data=data) + pf.rwolf(fit, "X1", reps=9999, seed=123) + + fit1 = pf.feols("Y ~ X1", data=data) + fit2 = pf.feols("Y ~ X1 + X2", data=data) + rwolf_df = pf.rwolf([fit1, fit2], "X1", reps=9999, seed=123) + + # use randomization inference + rwolf_df = pf.rwolf([fit1, fit2], "X1", reps=9999, seed=123, sampling_method = "ri") - fit1 = feols("Y ~ X1", data=data) - fit2 = feols("Y ~ X1 + X2", data=data) - rwolf_df = rwolf([fit1, fit2], "X1", reps=9999, seed=123) rwolf_df ``` """ diff --git a/pyfixest/report/summarize.py b/pyfixest/report/summarize.py index 188a4647..df7124c4 100644 --- a/pyfixest/report/summarize.py +++ b/pyfixest/report/summarize.py @@ -122,6 +122,21 @@ def etable( pandas.DataFrame A styled DataFrame with the coefficients and standard errors of the models. When output is "tex", the LaTeX code is returned as a string. + + Examples + -------- + For more examples, take a look at the [regression tables and summary statistics vignette](https://py-econometrics.github.io/pyfixest/table-layout.html). + + ```{python} + import pyfixest as pf + + # load data + df = pf.get_data() + fit1 = pf.feols("Y~X1 + X2 | f1", df) + fit2 = pf.feols("Y~X1 + X2 | f1 + f2", df) + + pf.etable([fit1, fit2]) + ``` """ if signif_code is None: signif_code = [0.001, 0.01, 0.05] @@ -1189,6 +1204,18 @@ def dtable( Returns ------- A table in the specified format. + + Examples + -------- + For more examples, take a look at the [regression tables and summary statistics vignette](https://py-econometrics.github.io/pyfixest/table-layout.html). + + ```{python} + import pyfixest as pf + + # load data + df = pf.get_data() + pf.dtable(df, vars = ["Y", "X1", "X2", "f1"]) + ``` """ if stats is None: stats = ["count", "mean", "std"]