Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
lucasrodes committed Dec 19, 2024
1 parent fdf1e8b commit 1863d0b
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 37 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ tables:
efr_repr:
title: Reproductive Effective Fertility rate (scaled by sex ratio), using UN data
description_short: |-
The number of daughters that live long enough to reproduce, between ages 15 and 49. This focuses on daughters, not all children because only females reproduce. Because a child need not live until age 49 to reproduce, we approximate efr_r by taking the average of efr over all reproductive ages (15-49).
The number of children who live long enough to reproduce, per woman. This number is dependent on the survival of daughters to childbearing age (between 15 and 49 years old).
unit: "children per women"
description_processing: |-
For a given cohort year, we estimate the cumulative survival probability for a person to reach each age from 0 to 49. For example, the probability of a person born in 2000 reaching age 15, 16, 17, and so on up to 49.
Expand All @@ -37,7 +37,7 @@ tables:
efr_labor:
title: Labor Effective Fertility rate, using UN data
description_short: |-
The number of children born in a year who will live long enough to earn labor income. This is approximated this by taking the average of Effective Fertility rate (EFR) over all working ages (15-65).
The number of children who live long enough to earn labor income, per woman. This number is dependent on the survival of daughters to childbearing age (between 15 and 49 years old).
unit: "children per women"
description_processing: |-
For a given cohort year, we estimate the cumulative survival probability for a person to reach each age age from 0 to 65. E.g. the probability of a person born in 2000 to reach age 15, 16, 17, ..., 65.
Expand Down
91 changes: 56 additions & 35 deletions etl/steps/data/garden/demography/2024-12-17/efr_malani_jacob.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Load a meadow dataset and create a garden dataset."""

from owid.catalog import Origin
from owid.catalog import processing as pr

from etl.helpers import PathFinder, create_dataset
Expand All @@ -16,6 +17,14 @@
AGE_REPR_END = 49
AGE_LAB_END = 65

# Additional origin metadata of the paper
origin = Origin(
producer="Malani and Jacob",
title="A New Measure of Surviving Children that Sheds Light on Long-term Trends in Fertility",
citation_full="Malani, A., & Jacob, A. (2024). A New Measure of Surviving Children that Sheds Light on Long-term Trends in Fertility. https://doi.org/10.3386/w33175",
date_published="2024-11-01", # type: ignore
)


def run(dest_dir: str) -> None:
#
Expand All @@ -39,45 +48,16 @@ def run(dest_dir: str) -> None:
tb_proj=tb_un_proj,
)

# Filter TFR table
tb_tfr = tb_tfr.loc[
(tb_tfr["sex"] == "all") & (tb_tfr["age"] == "all") & (tb_tfr["variant"].isin(["estimates", "medium"])),
["country", "year", "fertility_rate"],
]

# Add TFR
tb_un = tb_un.merge(tb_tfr, on=["country", "year"], validate="m:1")

# Estimate EFR
tb_un["efr"] = tb_un["fertility_rate"] * tb_un["cumulative_survival"]

# Estimate metrics
## EFR-labor: Average number of daughters that make it to the reproductive age (15-49)
## EFR-reproductive: Average number of kids that make it to the labour age (15-65)
## Cum survival prob, labor: Probability of a girl to survive to the reproductive age (15-49)
## Cum survival prob, reproductive: Probability of a kid to survive to the labor age (15-65)
tb_un = tb_un.loc[(tb_un["age"] <= AGE_REPR_END) | (tb_un["sex"] == "total")]
tb_un = tb_un.groupby(["country", "year", "sex"], as_index=False)[["efr", "cumulative_survival"]].mean()

# Pivot
tb_un = tb_un.pivot(index=["country", "year"], columns=["sex"], values=["efr", "cumulative_survival"]).reset_index()

def rename_col(colname):
mapping = {
"female": "repr",
"total": "labor",
}

if colname[1] == "":
return colname[0]
else:
return f"{colname[0]}_{mapping.get(colname[1])}"

tb_un.columns = [rename_col(col) for col in tb_un.columns]
# Add EFR
tb_un = estimate_un_efr(tb_un, tb_tfr)

# Format
tb_un = tb_un.format(["country", "year"], short_name="un")

# Add extra origin
tb_un.efr_repr.metadata.origins = [origin] + tb_un.efr_repr.metadata.origins

# Build list of tables
tables = [
tb_un,
]
Expand Down Expand Up @@ -131,3 +111,44 @@ def estimate_un_cum_survival(tb, tb_proj):
# tb = tb.drop(columns=["year_born"])

return tb


def estimate_un_efr(tb_un, tb_tfr):
# Filter TFR table
tb_tfr = tb_tfr.loc[
(tb_tfr["sex"] == "all") & (tb_tfr["age"] == "all") & (tb_tfr["variant"].isin(["estimates", "medium"])),
["country", "year", "fertility_rate"],
]

# Add TFR
tb_un = tb_un.merge(tb_tfr, on=["country", "year"], validate="m:1")

# Estimate EFR
tb_un["efr"] = tb_un["fertility_rate"] * tb_un["cumulative_survival"]

# Estimate metrics
## EFR-labor: Average number of daughters that make it to the reproductive age (15-49)
## EFR-reproductive: Average number of kids that make it to the labour age (15-65)
## Cum survival prob, labor: Probability of a girl to survive to the reproductive age (15-49)
## Cum survival prob, reproductive: Probability of a kid to survive to the labor age (15-65)
tb_un = tb_un.loc[(tb_un["age"] <= AGE_REPR_END) | (tb_un["sex"] == "total")]
tb_un = tb_un.groupby(["country", "year", "sex"], as_index=False)[["efr", "cumulative_survival"]].mean()

# Pivot
tb_un = tb_un.pivot(index=["country", "year"], columns=["sex"], values=["efr", "cumulative_survival"]).reset_index()

# Rename columns
def rename_col(colname):
mapping = {
"female": "repr",
"total": "labor",
}

if colname[1] == "":
return colname[0]
else:
return f"{colname[0]}_{mapping.get(colname[1])}"

tb_un.columns = [rename_col(col) for col in tb_un.columns]

return tb_un

0 comments on commit 1863d0b

Please sign in to comment.