Andrew Gelman, Jennifer Hill, Aki Vehtari 2021-04-20
Tidyverse version by Bill Behrman.
Example where an informative prior makes a difference. See Chapter 9 in Regression and Other Stories.
# Packages
library(tidyverse)
library(rstanarm)
# Parameters
# Common code
file_common <- here::here("_common.R")
#===============================================================================
# Run common code
source(file_common)
Data
sex_ratio <-
tribble(
~x, ~y,
-2, 50,
-1, 44,
0, 50,
1, 47,
2, 56
)
Least-squares regression.
fit_lm <- lm(y ~ x, data = sex_ratio)
arm::display(fit_lm)
#> lm(formula = y ~ x, data = sex_ratio)
#> coef.est coef.se
#> (Intercept) 49.40 1.94
#> x 1.50 1.37
#> ---
#> n = 5, k = 2
#> residual sd = 4.35, R-Squared = 0.28
Data on beauty and sex ratio.
sex_ratio %>%
ggplot(aes(x, y)) +
geom_point() +
scale_y_continuous(labels = scales::label_percent(accuracy = 1, scale = 1)) +
labs(
title = "Data on beauty and sex ratio",
x = "Attractiveness of parent",
y = "Percentage of girl babies"
)
Data and least-squares regression line.
intercept <- coef(fit_lm)[["(Intercept)"]]
slope <- coef(fit_lm)[["x"]]
slope.se <- arm::se.coef(fit_lm)[["x"]]
eqn <-
str_glue(
"y = {format(intercept, digits = 1, nsmall = 1)} + ",
"{format(slope, digits = 1, nsmall = 1)} x\n",
"(Std err of slope is {format(slope.se, digits = 1, nsmall = 1)})"
)
sex_ratio %>%
ggplot(aes(x, y)) +
geom_point() +
geom_abline(slope = slope, intercept = intercept) +
annotate("text", x = 0.2, y = 53, label = eqn, hjust = 0) +
scale_y_continuous(labels = scales::label_percent(accuracy = 1, scale = 1)) +
labs(
title = "Data and least-squares regression line",
x = "Attractiveness of parent",
y = "Percentage of girl babies"
)
Fit linear regression with default weakly informative prior.
set.seed(264)
fit_bayes_default <- stan_glm(y ~ x, data = sex_ratio, refresh = 0)
fit_bayes_default
#> stan_glm
#> family: gaussian [identity]
#> formula: y ~ x
#> observations: 5
#> predictors: 2
#> ------
#> Median MAD_SD
#> (Intercept) 49.4 2.0
#> x 1.5 1.4
#>
#> Auxiliary parameter(s):
#> Median MAD_SD
#> sigma 4.6 1.7
#>
#> ------
#> * For help interpreting the printed output see ?print.stanreg
#> * For info on the priors used see ?prior_summary.stanreg
Fit prior summary.
prior_summary(fit_bayes_default)
#> Priors for model 'fit_bayes_default'
#> ------
#> Intercept (after predictors centered)
#> Specified prior:
#> ~ normal(location = 49, scale = 2.5)
#> Adjusted prior:
#> ~ normal(location = 49, scale = 11)
#>
#> Coefficients
#> Specified prior:
#> ~ normal(location = 0, scale = 2.5)
#> Adjusted prior:
#> ~ normal(location = 0, scale = 7)
#>
#> Auxiliary (sigma)
#> Specified prior:
#> ~ exponential(rate = 1)
#> Adjusted prior:
#> ~ exponential(rate = 0.22)
#> ------
#> See help('prior_summary.stanreg') for more details
Posterior simulations under default prior.
sims_default <- as_tibble(fit_bayes_default)
coefs_default <-
tibble(
`(Intercept)` = coef(fit_bayes_default)[["(Intercept)"]],
x = coef(fit_bayes_default)[["x"]]
)
sims_default %>%
ggplot(aes(`(Intercept)`, x)) +
geom_point(size = 0.1) +
geom_point(data = coefs_default, color = "red", size = 1.5) +
labs(
title = "Posterior simulations under default prior",
x = "Intercept",
y = "Slope"
)
Bayes regression line and posterior uncertainty given default prior.
set.seed(264)
n_lines <- 100
sex_ratio %>%
ggplot(aes(x, y)) +
geom_abline(
aes(slope = x, intercept = `(Intercept)`),
data = sims_default %>% slice_sample(n = n_lines),
alpha = 0.25
) +
geom_abline(
aes(slope = x, intercept = `(Intercept)`),
data = coefs_default,
color = "red"
) +
geom_point(color = "white", size = 2) +
geom_point() +
scale_y_continuous(labels = scales::label_percent(accuracy = 1, scale = 1)) +
labs(
title =
"Bayes regression line and posterior uncertainty given default prior",
x = "Attractiveness of parent",
y = "Percentage of girl babies"
)
Fit linear regression with informative prior.
set.seed(264)
fit_bayes_inform <-
stan_glm(
y ~ x,
data = sex_ratio,
refresh = 0,
prior = normal(location = 0, scale = 0.2),
prior_intercept = normal(location = 48.8, scale = 0.5)
)
fit_bayes_inform
#> stan_glm
#> family: gaussian [identity]
#> formula: y ~ x
#> observations: 5
#> predictors: 2
#> ------
#> Median MAD_SD
#> (Intercept) 48.8 0.5
#> x 0.0 0.2
#>
#> Auxiliary parameter(s):
#> Median MAD_SD
#> sigma 4.3 1.3
#>
#> ------
#> * For help interpreting the printed output see ?print.stanreg
#> * For info on the priors used see ?prior_summary.stanreg
Fit prior summary.
prior_summary(fit_bayes_inform)
#> Priors for model 'fit_bayes_inform'
#> ------
#> Intercept (after predictors centered)
#> ~ normal(location = 49, scale = 0.5)
#>
#> Coefficients
#> ~ normal(location = 0, scale = 0.2)
#>
#> Auxiliary (sigma)
#> Specified prior:
#> ~ exponential(rate = 1)
#> Adjusted prior:
#> ~ exponential(rate = 0.22)
#> ------
#> See help('prior_summary.stanreg') for more details
Posterior simulations under informative prior.
sims_inform <- as_tibble(fit_bayes_inform)
coefs_inform <-
tibble(
`(Intercept)` = coef(fit_bayes_inform)[["(Intercept)"]],
x = coef(fit_bayes_inform)[["x"]]
)
sims_inform %>%
ggplot(aes(`(Intercept)`, x)) +
geom_blank(data = sims_default) +
geom_point(size = 0.1) +
geom_point(data = coefs_inform, color = "red", size = 1.5) +
labs(
title = "Posterior simulations under informative prior",
x = "Intercept",
y = "Slope"
)
Bayes regression line and posterior uncertainty given informative prior.
set.seed(264)
n_lines <- 100
sex_ratio %>%
ggplot(aes(x, y)) +
geom_abline(
aes(slope = x, intercept = `(Intercept)`),
data = sims_inform %>% slice_sample(n = n_lines),
alpha = 0.25
) +
geom_abline(
aes(slope = x, intercept = `(Intercept)`),
data = coefs_inform,
color = "red"
) +
geom_point(color = "white", size = 2) +
geom_point() +
scale_y_continuous(labels = scales::label_percent(accuracy = 1, scale = 1)) +
labs(
title =
"Bayes regression line and posterior uncertainty given informative prior",
x = "Attractiveness of parent",
y = "Percentage of girl babies"
)