ch13_multilevels.qmd

# Multilevel Models {#MLM}

```{r}
#| include: false
library(dplyr)
library(tidyr)
library(tidybayes)
library(rethinking)
library(brms)
# library(qs)
library(loo)
library(modelr)
library(skimr)
# library(simpr)
library(posterior)
library(scales)
library(dagitty)
library(ggplot2)
library(ggdag)
# library(ggdist)
# library(ggmcmc)
library(bayesplot)
library(patchwork)
library(paletteer)
```

Some options to facilitate the computations

```{r}
#  For execution on a local, multicore CPU with excess RAM
options(mc.cores = parallel::detectCores())
#  To avoid recompilation of unchanged Stan programs
rstan::rstan_options(auto_write = TRUE)
```

The default theme used by `ggplot2`

```{r}
theme_set(ggthemes::theme_solarized_2())
# theme_set(
#   ggthemes::theme_solarized_2(light = TRUE) +
#   theme(strip.background = element_rect(fill = "darkgrey"))
#   )
```

## Example: Multilevel tadpoles

```{r}
data(reedfrogs)
dataFrogs <- reedfrogs |>
  mutate(tank = seq_len(n()),
         tank = factor(tank))
rm(reedfrogs)
dataFrogs |>
  skim() |>
  select(-n_missing, -complete_rate) |>
  mutate(across(.cols = where(is.numeric), .fns = round, digits = 2))
```

with the plot of data

```{r ch13_plotFrogs}
plotFrogs <- ggplot(dataFrogs, aes(x = as.integer(tank), y = propsurv)) +
  geom_point(color = "sienna") +
  geom_hline(yintercept = 0.8, color = "sienna1", linetype = "dashed") +
  geom_vline(xintercept = c(16.5, 32.5), size = 1/3, color = "sienna1") +
  scale_x_continuous(breaks = c(1, 16, 32, 48)) +
  scale_y_continuous(breaks = scales::breaks_width(width = 0.2),
                     labels = scales::label_percent(accuracy = 1)) +
  annotate(geom = "text",
           x = c(8, 16 + 6, 32 + 8), y = 0,
           label = c("small tanks", "medium tanks", "large tanks"),
           color = "midnightblue") +
  theme(axis.text.x = element_text(size = rel(1))) +
  labs(title = "Tadpole tanks",
       subtitle = sprintf("%d data points", nrow(dataFrogs)),
       x = "tank", y = "proportion survival")
plotFrogs
```

### Simple

and the model, without multilevel effect, is

$$
\begin{align*}
surv_i &\sim \mathcal{Binomial}(n_i, p_i) \\
logit(p_i) &= \alpha_{tank[i]} \\
\alpha_{tank} &\sim \mathcal{N}(0, 1.5)
\end{align*}
$$

This fit gives the closest waic to the one shown in R code 13.4 on p. 404.

```{r ch13_fit13_01}
tictoc::tic(msg = sprintf("run time of %s, use the cache.", "70 secs."))
fit13_01 <- xfun::cache_rds({
  out <- brm(
    data = dataFrogs,
    family = binomial,
    formula = bf(surv | trials(density) ~ 0 + tank),
    prior = c(
      prior(normal(0, 1.5), class = b)),
    iter = 1000, warmup = 500, chains = 2,
    cores = detectCores(), seed = 1301)
  add_criterion(out, c("loo", "waic"))},
  file = "ch13_fit13_01", rerun = FALSE)
tictoc::toc()
```

```{r}
fit13_01 |>
  spread_draws(`b_.+`, regex = TRUE) |>
  summarize_draws("mean", "sd", ~quantile(.x, probs = c(0.055, 0.945)),
                  default_convergence_measures()) |>
  mutate(across(.cols = where(is.numeric), .fns = round, digits = 2),
         across(.cols = starts_with("ess"), .fns = as.integer))
```

and visualize the intercepts which correspond to the logit of the probabilities.

```{r ch13_plot13_01}
plot13_01 <- list()
plot13_01 <- within(plot13_01, {
  data <- fit13_01 |>
    summarize_draws() |>
    filter(variable != "lp__") |>
    select(variable, a = mean) |>
    mutate(p = inv_logit_scaled(a)) |>
    rename(exp_surv_log_odds = a, exp_surv_prob = p) |>
    pivot_longer(cols = c(exp_surv_log_odds, exp_surv_prob))

  p <- ggplot(data, aes(x = value, fill = name, color = name)) +
    geom_dotplot() +
    scale_fill_manual(values = c("orange1", "orange4")) +
    scale_color_manual(values = c("orange1", "orange4")) +
    scale_y_continuous(breaks = NULL) +
    theme(legend.position = "none") +
    facet_wrap(. ~ name, scales = "free_x") +
    labs(title = "Tank-level intercepts from the no-pooling model",
         x = NULL, y = NULL)
  })
plot13_01$p
```

### Multilevel

and now the multilevel model

$$
\begin{align*}
surv_i &\sim \mathcal{Binomial}(n_i, p_i) \\
logit(p_i) &= \alpha_{tank[i]} \\
\alpha_j &\sim \mathcal{N}(\bar{\alpha}, \sigma) \\
\bar{\alpha} &\sim \mathcal{N}(0, 1.5) \\
\sigma &\sim \mathcal{Exponential}(1)
\end{align*}
$$

and the fit is as follows. Note the prior `prior(exponential(0, 1), class = sd)` *which is parametrized in the standard deviation metric* (Kurtz). It is common for multilevel software to model the variance metric. This will be further explained in chapter 14.

```{r}
#| echo: false
#| output: false
get_prior(
  data = dataFrogs,
  formula = bf(surv | trials(density) ~ 1 + (1 | tank)),
  family = binomial)
```

This fit gives the closest waic to the one shown in R code 13.4 on p. 404.

```{r ch13_fit13_02}
tictoc::tic(msg = sprintf("run time of %s, use the cache.", "60 secs."))
fit13_02 <- xfun::cache_rds({
  out <- brm(
    data = dataFrogs,
    family = binomial,
    formula = bf(surv | trials(density) ~ 1 + (1 | tank)),
    prior = c(
      prior(normal(0, 1.5), class = Intercept),
      prior(exponential(1), class = sd)),
    sample_prior = TRUE,
    iter = 1000, warmup = 500, chains = 2,
    cores = detectCores(), seed = 1303)
  add_criterion(out, c("loo", "waic"))},
  file = "ch13_fit13_02", rerun = FALSE)
tictoc::toc()
```

```{r}
fit13_02 |>
  spread_draws(`b_.+`, `sd_.+`,`r_.+`, regex = TRUE) |>
  summarize_draws("mean", "sd", ~quantile(.x, probs = c(0.055, 0.945)),
                  default_convergence_measures()) |>
  mutate(across(.cols = where(is.numeric), .fns = round, digits = 2),
         across(.cols = starts_with("ess"), .fns = as.integer))
```

### Comparison

and compare the models

```{r}
print(loo_compare(fit13_01, fit13_02, criterion = "waic"), simplify = FALSE)
```

```{r}
model_weights(fit13_01, fit13_02, weights = "waic") |>
  round(digits = 2)
```

### Posterior distribution

```{r}
fit13_02
```

This time we don't have a list of intercepts as in `fit13_02`. We have a description of their distribution $\alpha_j \sim \mathcal{N}(\bar{\alpha}, \sigma)$ where $Intercept = \bar{\alpha}$ and $sd(Intercept) = \sigma$.

The task of getting the posterior is easier with `tidybayes` and `posterior` than the way McElreath and Kurtz do it.

```{r ch13_epred13_02}
# IMPORTANT: We use the median value, not the mean because of skewed
#            binomial dist. You don't always have to use the mean!
epred13_02 <- dataFrogs |>
  add_epred_draws(fit13_02, ndraws = 500) |>
  mutate(propsurv.epred = .epred / density) |>
  median_qi(.width = 0.89)
epred13_02
```

```{r}
#| fig-cap: "Figure 13.1"
plotFrogs +
  geom_point(epred13_02, mapping = aes(x = as.integer(tank), y = propsurv.epred),
             inherit.aes = FALSE, shape = 1, size = 2, color = "darkorange")
```

First, we take a sampling of size 100 of the 4000 draws from the posterior sample with `slice_sample(n = 100)`.

Second, to simulate the **distribution** of the logodds values described by the `b_Intercept` and `tank_Intercepts` of **each draw** we create a sequence of 100 logodds values between -4 and 5 (based on the acutal range of -2, 3.5 shown just above) which is done by `expand(nesting(iter, b_Intercept,  sd_tank__Intercept), x = seq(from = -4, to = 5, length.out = 100))` and which will result in 10000 lines.

Third, we compute the normal density for each of the 10000 lines using the `b_Intercept` and `tank_Intercepts` of each line. The normal density is used because the model is $\alpha_{tank} \sim \mathcal{N}(0, 5)$ and $\alpha \sim \mathcal{N}(0, 1)$. This is done with

```{r}
#| fig-cap: "Figure 13.2"
samples <- list()
samples <- within(samples, {
  data1 <- as_draws_df(fit13_02) |>
    slice_sample(n = 100) |>
    expand(nesting(.draw, b_Intercept, sd_tank__Intercept),
           x = seq(from = -4, to = 5, length.out = 100)) |>
    mutate(density = dnorm(x, mean = b_Intercept, sd = sd_tank__Intercept))
  data2 <- as_draws_df(fit13_02) |>
    slice_sample(n = 1000, replace = TRUE) |>
    mutate(p_logit = rnorm(n(), mean = b_Intercept, sd = sd_tank__Intercept),
           p = gtools::inv.logit(p_logit))
  
  p1 <- ggplot(data1, aes(x = x, y = density, group = .draw)) +
    geom_line(alpha = .2, color = "sienna2") +
    scale_y_continuous(NULL, breaks = NULL) +
    coord_cartesian(xlim = c(-3, 4)) +
    labs(title = "Population survival distribution",
         subtitle = "log-odds scale", x = NULL, y = NULL)
  
  p2 <-  ggplot(data2, aes(x = p)) +
    geom_density(size = 0, fill = "sienna2", color = "sienna2", adjust = 0.1) +
    scale_y_continuous(NULL, breaks = NULL) +
    labs(title = "Probability of survival",
         subtitle = "transformed by the inverse-logit function",
         x = NULL, y = NULL)
})
wrap_plots(samples$p1, samples$p2)
```

To improve the model you could use Half-Normal (or Half-Cauchy) instead of exponential, and now the multilevel model

$$
\begin{align*}
surv_i &\sim \mathcal{Binomial}(n_i, p_i) \\
logit(p_i) &= \alpha_{tank[i]} \\
\alpha_j &\sim \mathcal{N}(\bar{\alpha}, \sigma) \\
\bar{\alpha} &\sim \mathcal{N}(0, 1.5) \\
\sigma &\sim \mathcal{Half-Normal}(0, 1)
\end{align*}
$$

## Varying effects

### The model

$$
\begin{align*}
surv_i &\sim \mathcal{Binomial}(n_i, p_i) \\
logit(p_i) &= \alpha_{pond[i]} \\
\alpha_{pond[i]} &\sim \mathcal{N}(\bar{\alpha}, \sigma) \\
\bar{\alpha} &\sim \mathcal{N}(0, 1.5) \\
\sigma &\sim \mathcal{Exponential}(1)
\end{align*}
$$

where we have

-   $\bar{\alpha}$: Average log-odd of the survival rate for entire population of ponds
-   $\sigma$: Standard deviation of log-odds of survival among ponds
-   $\alpha_{pond}$: vector of individual pond intercept (mean)

### Assign value to the parameters

```{r}
sim <- list()
sim <- within(sim, {
  a_bar <- 1.5
  sigma <- 1.5
  nponds <- 60
  set.seed(5005)  # same seed as McElreath
  data <- data.frame(
    pond = seq_len(nponds),
    Ni = rep(as.integer(c(5, 10, 25, 35)), each = 15),
    true_a = rnorm(nponds, mean = a_bar, sd = sigma)) |>
    mutate(true_p = inv_logit_scaled(true_a))
})
# because of stan, Ni must be an integer
stopifnot(is.integer(sim$data$Ni))
# glimpse(sim$data)
```

and we plot the data to see the real distributions

```{r}
ggplot(sim$data, aes(x = true_a, y = as.factor(Ni))) +
  stat_dotsinterval(.width = 0.5, fill = "orange", fatten_point = 3) +
  labs(title = "Distribution of log odd of survival by pond",
       y = NULL)
```

### Simulate survivors

The model uses

$$
\begin{align*}
surv_i &\sim \mathcal{Binomial}(n_i, p_i) \\
logit(p_i) &= \alpha_{pond[i]} \\
\alpha_{pond[i]} &\sim \mathcal{N}(\bar{\alpha}, \sigma) \\
\bar{\alpha} &\sim \mathcal{N}(0, 1.5) \\
\sigma &\sim \mathcal{Exponential}(1)
\end{align*}
$$

therefore the simulation of $p_i$ must use the logistic function

```{r}
sim <- within(sim, {
  set.seed(5005)
  data <- data |>
    mutate(Si = rbinom(n(), prob = true_p, size = Ni))
  # data$Si <- rbinom(nponds, prob = data$true_p, size = data$Ni)
})
```

### Compute the no-pooling estimates

```{r}
sim$data <- sim$data |>
  mutate(nopool_p = Si / Ni)
# glimpse(sim$data)
```

### Compute the partial pooling estimates

```{r ch13_fit13_03}
tictoc::tic(msg = sprintf("run time of %s, use the cache.", "80 secs."))
fit13_03 <- xfun::cache_rds({
  out <- brm(
    data = sim$data,
    family = binomial,
    formula = bf(Si | trials(Ni) ~ 1 + (1 | pond)),
    prior = c(prior(normal(0, 1.5), class = Intercept),
            prior(exponential(1), class = sd)),
    iter = 1000, warmup = 500, chains = 2,
    cores = detectCores(), seed = 1307)
  out <- add_criterion(out, c("loo", "waic"))
  out},
  file = "ch13_fit13_03", rerun = FALSE)
tictoc::toc()
```

and we have our point summaries and interval as follows

```{r}
fit13_03 |>
  spread_draws(`b_.+`, `sd_.+`,`r_.+`, regex = TRUE) |>
  summarize_draws("mean", "sd", ~quantile(.x, probs = c(0.055, 0.945)),
                  default_convergence_measures()) |>
  mutate(across(.cols = where(is.numeric), .fns = round, digits = 2),
         across(.cols = starts_with("ess"), .fns = as.integer))
```

It is important to understand that `epred_draws` is not simply the result of `linpred-draws` converted with the inverse link function. It can be illustrated in this case as follows

```{r}
# the linear/link-level predictor
lp <- linpred_draws(fit13_03, newdata = sim$data[, c("Ni", "pond")]) |>
  select(-.chain, -.iteration) |>
  summarize_draws()
# the
ep <- epred_draws(fit13_03, newdata = sim$data[, c("Ni", "pond")]) |>
  select(-.chain, -.iteration) |>
  summarize_draws()

# the link-level predictor
head(lp[, c("pond", "Ni", "mean")])
# the expected posterior predictive
head(ep[, c("pond", "Ni", "mean")])

# now the inverse of the link-level predictor
# is not the same as the expected posterior predictive
head(data.frame(
  linpred = lp$mean,
  linpred_inv = gtools::inv.logit(lp$mean),
  epred = ep$mean / ep$Ni
))
```

and we get the information on the level 1, the fixed effect, with `fixef(b13.3)` and the level 2, random effects, with `ranef(fit13_03)`

```{r}
# fixef(fit13_03)
# ranef(fit13_03)
```

and the whole thing is obtained with the `fit` which calls the stan data

```{r}
# fit13_03$fit
```

::: callout-note
In this project we favor using `tidybayes` and `posterior`. `fixef(fit13_03)`, `ranef(fit13_03)`, `fit13_03$fit` and `coef()\[, ,\]` are avoided.
:::

```{r}
sim <- within(sim, {
  linpred <- linpred_draws(fit13_03, newdata = sim$data[, c("Ni", "pond")]) |>
    select(-.chain, -.iteration) |>
    summarize_draws()

  data <- data |>
    # bind_cols(partpool_p) |>
    mutate(partpool_p = inv_logit_scaled(linpred$mean)) |>
    mutate(nopool_error = abs(nopool_p - true_p),
           partpool_error = abs(partpool_p - true_p))
  })
# glimpse(sim$data)
```

```{r}
#| fig-cap: "Figure 13.3"
ggplot(sim$data, aes(x = pond, y = nopool_error)) +
  geom_point(color = "sienna") +
  geom_point(mapping = aes(y = partpool_error),
             shape = 1, size = 2, color = "darkorange") +
  geom_vline(xintercept = c(16.5, 32.5), size = 1/3, color = "sienna1") +
  scale_x_continuous(breaks = c(1, 10, 20, 30, 40, 50, 60)) +
  scale_y_continuous(breaks = scales::breaks_width(width = 0.10),
                     labels = scales::label_percent(accuracy = 1)) +
  annotate(geom = "text", 
           x = c(15 - 7.5, 30 - 7.5, 45 - 7.5, 60 - 7.5), y = .45, 
           label = c("tiny (5)", "small (10)", "medium (25)", "large (35)")) +
  theme(axis.text.x = element_text(size = rel(1))) +
  labs(title = "Tadpole tanks",
       subtitle = "Same results as Kurtz, difference with McElreath caused by the seed",
       x = NULL, y = NULL)
```

## More than one type of cluster

### Multilevel chimpanzees

#### The model

$$
\begin{align*}
pull\_left_i &\sim \mathcal{Binomial}(1, p_i) \\
logit(p_i) &= \alpha + \alpha_{actor[i]} + \gamma_{block[i]} +\beta_{treatment[i]} \\
\beta_j &\sim \mathcal{N}(0, 0.5), \, \text{for } j = 1 \ldots 4 \\
\alpha_j &\sim \mathcal{N}(\bar{\alpha}, \sigma_{\alpha}), \, \text{for } j = 1 \ldots 7 \\
\gamma_j &\sim \mathcal{N}(0, \sigma_{\gamma}), \, \text{for } j = 1 \ldots 6 \\
\bar{\alpha} &\sim \mathcal{N}(0, 1.5) \\
\sigma_{\alpha} &\sim \mathcal{Exponential}(1) \\
\sigma_{\gamma} &\sim \mathcal{Exponential}(1)
\end{align*}
$$

#### The fit

We load the data

```{r}
data(chimpanzees)
dataChimp <- chimpanzees |>
  mutate(actor = factor(actor),
         block = factor(block),
         treatment = factor(1 + prosoc_left + 2 * condition,
                            levels = as.character(1:4),
                            labels = c("R/N", "L/N", "R/P", "L/P")))
rm(chimpanzees)
dataChimp |>
  skim() |>
  mutate(across(.cols = where(is.numeric), .fns = round, digits = 2))
```

```{r}
#| echo: false
#| output: false
get_prior(
  data = dataChimp,
  formula = bf(pulled_left ~ 0 + a + g + b,
                   a ~ 1 + (1 | actor),
                   g ~ 0 + block,
                   b ~ 0 + treatment,
                   nl = TRUE),
  family = bernoulli)
```

::: callout-note
This fit does not give the exact result obtained by McElreath. This is caused by the fact that `brms` non-centered parametrization. See Kurtz in his sections 13.3.1 about that.
:::

```{r ch13_fit13_04}
tictoc::tic(msg = sprintf("run time of %s, use the cache.", "80 secs."))
fit13_04 <- xfun::cache_rds({
  out <- brm(data = dataChimp,
      family = bernoulli,
      formula = bf(pulled_left ~ 0 + a + g + b,
                   a ~ 1 + (1 | actor),
                   g ~ 0 + block,
                   b ~ 0 + treatment,
                   nl = TRUE),
      prior = c(prior(normal(0, 1.5), class = b, coef = Intercept, nlpar = a),
                prior(exponential(1), class = sd, group = actor, nlpar = a),
                prior(normal(0, sigma_g), class = b, nlpar = g),
                prior("target += exponential_lpdf(sigma_g | 1)", check = FALSE),
                prior(normal(0, 0.5), class = b, nlpar = b)
                ),
      stanvars = c(stanvar(scode = "  real<lower=0> sigma_g;", 
                           block = "parameters")),
      iter = 1000, warmup = 500, chains = 2, 
      cores = detectCores(), seed = 1319)
  add_criterion(out, c("loo", "waic"))},
  file = "ch13_fit13_04", rerun = FALSE)
tictoc::toc()
```

```{r}
summarize_draws(fit13_04, "mean", "sd", ~quantile(.x, probs = c(0.055, 0.945)),
                default_convergence_measures()) |>
  filter(!grepl("^lp", x = variable)) |>
  mutate(across(.cols = where(is.numeric), .fns = round, digits = 2),
         across(.cols = starts_with("ess"), .fns = as.integer))
```

the posterior samples is

```{r ch13_post13_04}
post13_04 <- list()
post13_04 <- within(post13_04, {
  summ <- summarize_draws(fit13_04, mean, ~quantile(.x, probs = c(0.055, 0.945))) |>
    filter(!grepl(pattern = "^lp.+", x = variable)) |>
    rename(.lower = `5.5%`,
           .upper = `94.5%`)
  sd <- gather_draws(fit13_04, `sd_.*`, regex = TRUE)
})
# glimpse(post13_04$summ)
```

and we look at the standard deviation of the random effect of actor

```{r ch13_plot13_04}
#| fig-cap: "Figure 13.4"
plot13_04 <- list()
plot13_04 <- within(plot13_04, {
  dens <- post13_04$sd |> 
    ggplot(aes(x = .value, color = .variable)) +
    geom_density(adjust = 0.5, linewidth = 1) +
    scale_color_paletteer_d("ggthemes::Classic_10") +
    # tidybayes::stat_halfeye(.width = 0.89, fill = "orange") +
    # tidybayes::stat_halfeye(aes(x = sd_block__a_Intercept), .width = 0.95, fill = "darkgreen") +
    coord_cartesian(xlim = c(0, 4)) +
    theme(legend.position = c(0.6, 0.8),
          legend.background = element_rect(fill = "transparent"),
          legend.title = element_blank()) +
    labs(title = expression(sigma[actor] *", "* sigma[block]),
         x = expression(sigma), y = NULL)
  
  coef <- post13_04$summ |>
    ggplot(aes(x = mean, xmin = .lower, xmax = .upper, y = variable)) + 
    geom_pointinterval(fatten_point = 3,color = "navyblue") +
    ggrepel::geom_text_repel(aes(label = round(mean, 2)), size = 3, color = "purple") +
    geom_vline(xintercept = 0) +
    theme(legend.position = "none") +
    labs(title = "Posterior distribution",
         subtitle = "With mean and 89% CI",
         x = NULL, y = NULL)
  
})
wrap_plots(plot13_04) +
  plot_annotation(
    title = "Posterior distributions"
  )
```

```{r}
#| echo: false
#| output: false
get_prior(
  data = dataChimp,
  formula = bf(pulled_left ~ 0 + a + b,
               a ~ 1 + (1 | actor),
               b ~ 0 + treatment,
               nl = TRUE),
  family = bernoulli)
```

::: callout-note
This fit gives the same waic as found on page 418 of section 13.3.1.
:::

```{r ch13_fit13_05}
tictoc::tic(msg = sprintf("run time of %s, use the cache.", "70 secs."))
fit13_05 <- xfun::cache_rds({
  out <- brm(data = dataChimp,
      family = bernoulli,
      formula = bf(pulled_left ~ 0 + a + b,
                   a ~ 1 + (1 | actor),
                   b ~ 0 + treatment,
                   nl = TRUE),
      prior = c(prior(normal(0, 1.5), class = b, coef = Intercept, nlpar = a),
                prior(exponential(1), class = sd, group = actor, nlpar = a),
                prior(normal(0, 0.5), class = b, nlpar = b)
                ),
      iter = 1000, warmup = 500, chains = 2, 
      cores = detectCores(), seed = 1319)
  add_criterion(out, c("loo", "waic"))},
  file = "ch13_fit13_05", rerun = FALSE)
tictoc::toc()
```

and we obtain the same results as on p. 418 of section 13.3.1.

```{r}
print(loo_compare(fit13_04, fit13_05, criterion = "waic"), simplify = FALSE)
```

### Even more clusters

```{r}
#| echo: false
#| output: false
get_prior(
  data = dataChimp,
  formula = bf(pulled_left ~ 1 + (1 | actor) + (1 | treatment) + (1 | block)),
  family = bernoulli)
```

```{r ch13_fit13_06}
tictoc::tic(msg = sprintf("run time of %s, use the cache.", "70 secs."))
fit13_06 <- xfun::cache_rds({
  out <- brm(data = dataChimp,
      family = bernoulli,
      formula = bf(pulled_left ~ 1 + (1 | actor) + (1 | treatment) + (1 | block)),
      prior = c(prior(normal(0, 1.5), class = Intercept),
                prior(exponential(1), class = sd)),
      iter = 1000, warmup = 500, chains = 2, 
      cores = detectCores(), seed = 1319)
  add_criterion(out, c("loo", "waic"))},
  file = "ch13_fit13_06", rerun = FALSE)
tictoc::toc()
```

```{r}
print(loo_compare(fit13_04, fit13_05, fit13_06, criterion = "waic"), 
      simplify = FALSE)
```

## Divergent transitions and non-centered priors

```{r }
#| echo: false
message("Not covered")
```

## Multilevel posterior predictions

Strongly recommended to read @kurtz2020b who is more elaborate in this section.

### Posterior prediction for same clusters

We can do a posterior fit for chimp #2. Remember this chimp was an outlier as it was always pulling the left lever no matter what.

```{r}
fitd <- list()
fitd <- within(fitd, {
  chimp <- 2L
  labels<- c("R/N", "L/N", "R/P", "L/P")
  newdata <- data.frame(
    actor = factor(chimp),
    treatment = unique(dataChimp$treatment),
    block = 1
  )
  data <- epred_draws(fit13_04, newdata = newdata) |>
    select(-.chain, -.iteration, -.draw) |>
    summarize_draws()
  # and the empirical frequencies are
  obs <- dataChimp |>
    filter(actor == chimp) |>
    group_by(treatment) |>
    summarize(prob = mean(pulled_left))
  p <- ggplot(obs, aes(x = treatment, y = prob, group = 1)) +
    geom_lineribbon(data, 
                    mapping = aes(x = treatment, y = mean, ymin = q5, ymax = q95),
                    fill = "orange", color = "brown") +
    geom_point(color = "navyblue", size = 2) +
    coord_cartesian(ylim = c(0.75, 1))+
    labs(title = sprintf("Fitted prediction for chimp # %d", chimp), 
         x = NULL, y = NULL)
})
# glimpse(fitd$data)
fitd$p
```

and for chimp \# 5

```{r}
fitd <- list()
fitd <- within(fitd, {
  chimp <- 5L
  labels<- c("R/N", "L/N", "R/P", "L/P")
  newdata <- data.frame(
    actor = factor(chimp),
    treatment = unique(dataChimp$treatment),
    block = 1
  )
  data <- epred_draws(fit13_04, newdata = newdata) |>
    select(-.chain, -.iteration, -.draw) |>
    summarize_draws()
  # and the empirical frequencies are
  obs <- dataChimp |>
    filter(actor == chimp) |>
    group_by(treatment) |>
    summarize(prob = mean(pulled_left))
  p <- ggplot(obs, aes(x = treatment, y = prob, group = 1)) +
    geom_lineribbon(data, 
                    mapping = aes(x = treatment, y = mean, ymin = q5, ymax = q95),
                    fill = "orange", color = "brown") +
    geom_point(color = "navyblue", size = 2) +
    coord_cartesian(ylim = c(0, 1))+
    labs(title = sprintf("Fitted prediction for chimp # %d", chimp), 
         x = NULL, y = NULL)
})
# glimpse(fitd$data)
fitd$p
```

### Posterior prediction for new clusters

### Post-stratification

## Summary