- {tidypivot} allows you to create tables by describing them (like ggplot2 plot description/declaration)
- declarative table creation with ggplot2
- examples/derivative
- filling cells with examples from data.
{tidypivot} allows you to create tables by describing them (like ggplot2 plot description/declaration)
note: see original discussion here: https://evamaerey.github.io/mytidytuesday/2022-02-14-tables/tables.html and thoughtful contributions from @shannonpileggi and @brshallow EvaMaeRey/mytidytuesday#3
And, you know, I’d get a dataset. And, in my head I could very clearly kind of picture, I want to put this on the x-axis. Let’s put this on the y-axis, draw a line, put some points here, break it up by this variable. And then, like, getting that vision out of my head, and into reality, it’s just really, really hard. Just, like, felt harder than it should be. Like, there’s a lot of custom programming involved, where I just felt, like, to me, I just wanted to say, like, you know, this is what I’m thinking, this is how I’m picturing this plot. Like you’re the computer ‘Go and do it’. … and I’d also been reading about the Grammar of Graphics by Leland Wilkinson, I got to meet him a couple of times and … I was, like, this book has been, like, written for me. - https://www.trifacta.com/podcast/tidy-data-with-hadley-wickham/
library(ggplot2)
StatSum$default_aes <- aes(label = after_stat(n))
# I want to put this on the x-axis (cols)
tidytitanic::tidy_titanic |>
ggplot(
# I want to put this on the x-axis (cols)
aes(x = sex,
# I want to put this on the y- axis (rows)
y = survived)
)
# grouping and computation happen in one step, filling in 'table'
last_plot() +
stat_sum(geom = "text")
-
- grouping
-
- compute
-
- pivot
Under the hood:
- group by rows and columns
- value in data to consider (1 if not specified)
- wt, weight the value (1 if not specified)
- fun - do an operation (on value) within group
But API:
- describe layout of table and compute
#' Title
#'
#' @param data
#' @param rows
#' @param cols
#' @param value
#' @param wt
#' @param fun
#' @param prop
#' @param percent
#' @param round
#' @param within
#' @param withinfun
#' @param pivot
#' @param wrap
#' @param totals_within
#'
#' @return
#' @export
#'
#' @examples
pivotr <- function(data,
rows = NULL,
cols = NULL,
value = NULL,
wt = NULL,
fun = NULL,
prop = FALSE,
percent = FALSE,
round = NULL,
within = NULL,
withinfun = NULL,
pivot = NULL,
wrap = NULL,
totals_within = NULL
){
cols_quo <- rlang::enquo(cols)
value_quo <- rlang::enquo(value)
wt_quo <- rlang::enquo(wt)
within_quo <- rlang::enquo(within)
totals_within_quo <- rlang::enquo(totals_within)
if(is.null(prop)) {prop <- FALSE}
if(is.null(pivot)){pivot <- TRUE}
if(is.null(wrap)) {wrap <- FALSE}
if(is.null(fun)) {fun <- sum}
## adding a value as 1 if there is none
if(rlang::quo_is_null(value_quo) ){
data <- data |>
dplyr::mutate(value = 1)
}else{
data <- data |>
dplyr::mutate(value = {{value}})
}
#### weighting ####
if(!rlang::quo_is_null(wt_quo) ){
data <- data |>
dplyr::mutate(value = value * {{wt}})
}
### grouping by tabulation vars col and row
grouped <- data |>
dplyr::group_by(dplyr::across(c({{cols}}, {{rows}})),
.drop = FALSE)
### summarizing ####
summarized <- grouped |>
dplyr::summarise(value = fun(value))
# proportion case or percent
if(prop|percent){
mult <- ifelse(percent, 100, 1)
if(is.null(round)){round <- ifelse(percent, 1, 3)}
# prop is across all data
if(rlang::quo_is_null(within_quo) ){
summarized <- summarized |>
dplyr::ungroup() |>
dplyr::mutate(value = round(value*mult/sum(value), round))
# prop is within categories specified by within variable
}else{
summarized <- summarized |>
dplyr::ungroup() |>
dplyr::group_by(dplyr::across(c({{within}})),
.drop = FALSE) |>
dplyr::mutate(value = round(value*mult/sum(value), round))
}
}
arranged <- summarized
ungrouped <- arranged |>
dplyr::ungroup()
tidy <- ungrouped
# do not pivot if argument pivot false or if no columns specified
if(pivot == F | rlang::quo_is_null(cols_quo)){
tidy
# tidy |>
# dplyr::rename(count = .data$value)
# otherwise pivot by columns
}else{
tidy |>
tidyr::pivot_wider(names_from = {{cols}})
}
}
data_define_value <- function(data, value = NULL, wt = NULL){
value_quo <- rlang::enquo(value)
wt_quo <- rlang::enquo(wt)
if(rlang::quo_is_null(value_quo) ){
## adding a value as 1 if there is none
data <- data |>
dplyr::mutate(value = 1)
}else{
data <- data |>
dplyr::mutate(value = {{value}})
}
#### weighting ####
if(!rlang::quo_is_null(wt_quo) ){
data <- data |>
dplyr::mutate(value = .data$value * {{wt}})
}
data
}
data_to_grouped <- function(data, cols, rows){
### grouping by tabulation vars col and row
data |>
dplyr::group_by(dplyr::across(c({{cols}}, {{rows}})),
.drop = FALSE)
}
data_grouped_to_summarized <- function(data, fun = NULL){
if(is.null(fun)) {fun <- sum}
## adding a value as 1 if there is none
### summarizing ####
data |>
dplyr::summarise(value = fun(.data$value))
}
data_summarized_to_proportioned <- function(data, prop = F, percent = F, within = NULL, round = 2){
# proportion case or percent
within_quo <- rlang::enquo(within)
# totals_within_quo <- rlang::enquo(totals_within)
if(is.null(prop)) {prop <- FALSE}
if(prop|percent){
mult <- ifelse(percent, 100, 1)
if(is.null(round)){round <- ifelse(percent, 1, 3)}
# prop is across all data
if(rlang::quo_is_null(within_quo) ){
data <- data |>
dplyr::ungroup() |>
dplyr::mutate(value = round(.data$value*mult/sum(.data$value), round))
# prop is within categories specified by within variable
}else{
data <- data |>
dplyr::ungroup() |>
dplyr::group_by(dplyr::across(c({{within}})),
.drop = FALSE) |>
dplyr::mutate(value = round(.data$value*mult/sum(.data$value), round))
}
}
data
}
data_proportioned_to_pivoted <- function(data, pivot = T, cols = NULL){
cols_quo <- rlang::enquo(cols)
if(is.null(pivot)){pivot <- TRUE}
tidy <- data |>
dplyr::ungroup()
# do not pivot if argument pivot false or if no columns specified
if(pivot == F | rlang::quo_is_null(cols_quo)){
tidy
# otherwise pivot by columns
}else{
tidy |>
tidyr::pivot_wider(names_from = {{cols}})
}
}
tidytitanic::flat_titanic |>
data_define_value(value = freq) |>
data_to_grouped(rows = survived, cols = sex) |>
data_grouped_to_summarized() |>
data_summarized_to_proportioned(percent = T, within = survived) |>
data_proportioned_to_pivoted(cols = sex)
#> # A tibble: 2 × 3
#> survived Male Female
#> <fct> <dbl> <dbl>
#> 1 No 91.5 8.46
#> 2 Yes 51.6 48.4
pivotr <- function(data,
rows = NULL,
cols = NULL,
value = NULL,
wt = NULL,
fun = NULL,
prop = FALSE,
percent = FALSE,
round = NULL,
within = NULL,
pivot = NULL
){
data |>
data_define_value(value = {{value}}, wt = {{wt}}) |>
data_to_grouped(rows = {{rows}}, cols = {{cols}}) |>
data_grouped_to_summarized(fun = fun) |>
data_summarized_to_proportioned(prop = prop, percent = percent, within = {{within}}, round = round) |>
data_proportioned_to_pivoted(pivot = pivot, cols = {{cols}})
}
tidytitanic::flat_titanic |>
pivotr(value = freq, rows = survived, cols = sex, percent = T, within = survived)
#> # A tibble: 2 × 3
#> survived Male Female
#> <fct> <dbl> <dbl>
#> 1 No 91.5 8.5
#> 2 Yes 51.6 48.4
library(tidytitanic)
tidy_titanic |> pivotr()
#> # A tibble: 1 × 1
#> value
#> <dbl>
#> 1 2201
tidy_titanic |> pivotr(rows = sex, cols = survived)
#> # A tibble: 2 × 3
#> sex No Yes
#> <fct> <dbl> <dbl>
#> 1 Male 1364 367
#> 2 Female 126 344
tidy_titanic |> pivotr(rows = c(sex, age), cols = survived)
#> # A tibble: 4 × 4
#> sex age No Yes
#> <fct> <fct> <dbl> <dbl>
#> 1 Male Child 35 29
#> 2 Male Adult 1329 338
#> 3 Female Child 17 28
#> 4 Female Adult 109 316
tidy_titanic |> pivotr(rows = sex, cols = survived, pivot = F)
#> # A tibble: 4 × 3
#> survived sex value
#> <fct> <fct> <dbl>
#> 1 No Male 1364
#> 2 No Female 126
#> 3 Yes Male 367
#> 4 Yes Female 344
flat_titanic |> pivotr(rows = sex, value = freq, prop = TRUE)
#> # A tibble: 2 × 2
#> sex value
#> <fct> <dbl>
#> 1 Male 0.786
#> 2 Female 0.214
flat_titanic |> pivotr(rows = sex, cols = survived, value = freq, prop = TRUE)
#> # A tibble: 2 × 3
#> sex No Yes
#> <fct> <dbl> <dbl>
#> 1 Male 0.62 0.167
#> 2 Female 0.057 0.156
flat_titanic |> pivotr(rows = sex, cols = survived, value = freq, prop = TRUE, within = sex)
#> # A tibble: 2 × 3
#> sex No Yes
#> <fct> <dbl> <dbl>
#> 1 Male 0.788 0.212
#> 2 Female 0.268 0.732
Here are some examples where you might have derivative functions
pivot_count <- function(...){
# maybe a wt version...
pivotr(fun = length, ...)
}
pivot_average <- function(...){
mean_na_rm <- function(x){mean(x, na.rm = T)}
pivotr(fun = mean_na_rm, ...)
}
pivot_sum <- function(...){
pivotr(fun = sum, ...)
}
pivot_empty <- function(...){
nar <- function(x) return(NA)
pivotr(fun = nar, ...)
}
library(magrittr)
library(tidytitanic)
passengers <- readr::read_csv("https://raw.githubusercontent.com/clauswilke/dviz.supp/master/data-raw/titanic/Titanic.csv")
head(passengers)
#> # A tibble: 6 × 7
#> ...1 Name PClass Age Sex Survived SexCode
#> <dbl> <chr> <chr> <dbl> <chr> <dbl> <dbl>
#> 1 1 Allen, Miss Elisabeth Walton 1st 29 fema… 1 1
#> 2 2 Allison, Miss Helen Loraine 1st 2 fema… 0 1
#> 3 3 Allison, Mr Hudson Joshua Creighton 1st 30 male 0 0
#> 4 4 Allison, Mrs Hudson JC (Bessie Wald… 1st 25 fema… 0 1
#> 5 5 Allison, Master Hudson Trevor 1st 0.92 male 1 0
#> 6 6 Anderson, Mr Harry 1st 47 male 1 0
tidy_titanic |> pivot_count(rows = sex)
#> # A tibble: 2 × 2
#> sex value
#> <fct> <int>
#> 1 Male 1731
#> 2 Female 470
tidy_titanic |> pivot_count(rows = sex, col = survived)
#> # A tibble: 2 × 3
#> sex No Yes
#> <fct> <int> <int>
#> 1 Male 1364 367
#> 2 Female 126 344
flat_titanic |> pivot_sum(rows = survived, value = freq)
#> # A tibble: 2 × 2
#> survived value
#> <fct> <dbl>
#> 1 No 1490
#> 2 Yes 711
flat_titanic |> pivot_sum(rows = sex, cols = survived, value = freq)
#> # A tibble: 2 × 3
#> sex No Yes
#> <fct> <dbl> <dbl>
#> 1 Male 1364 367
#> 2 Female 126 344
flat_titanic |> pivot_average(rows = sex, cols = survived, value = freq)
#> # A tibble: 2 × 3
#> sex No Yes
#> <fct> <dbl> <dbl>
#> 1 Male 170. 45.9
#> 2 Female 15.8 43
flat_titanic |> pivot_empty(rows = survived, cols = age)
#> # A tibble: 2 × 3
#> survived Child Adult
#> <fct> <lgl> <lgl>
#> 1 No NA NA
#> 2 Yes NA NA
passengers |> pivot_average(rows = c(Sex, PClass), cols = Survived, value = Age)
#> # A tibble: 7 × 4
#> Sex PClass `0` `1`
#> <chr> <chr> <dbl> <dbl>
#> 1 female 1st 35.2 37.9
#> 2 female 2nd 31.4 26.9
#> 3 female 3rd 22.8 22.7
#> 4 male * NaN NA
#> 5 male 1st 44.8 34.3
#> 6 male 2nd 31.7 14.8
#> 7 male 3rd 27.1 22.1
pivot_example <- function(...){
sample1 <- function(x) sample(x, 1)
pivotr(fun = sample1, ...)
}
pivot_samplen <- function(..., n = 3, sep = "; "){
samplen <- function(x) paste(sample(x, n, replace = F), collapse = sep)
pivotr(fun = samplen, ...)
}
pivot_list <- function(..., sep = "; "){
paste_collapse <- function(x) paste (x, collapse = sep)
pivotr(fun = paste_collapse, ...)
}
flat_titanic |> pivot_example(rows = sex, value = freq)
#> # A tibble: 2 × 2
#> sex value
#> <fct> <dbl>
#> 1 Male 13
#> 2 Female 140
flat_titanic |> pivot_samplen(rows = sex, value = freq)
#> # A tibble: 2 × 2
#> sex value
#> <fct> <chr>
#> 1 Male 35; 0; 57
#> 2 Female 1; 0; 4
flat_titanic |> pivot_list(rows = sex, cols = survived, value = freq)
#> # A tibble: 2 × 3
#> sex No Yes
#> <fct> <chr> <chr>
#> 1 Male 0; 0; 35; 0; 118; 154; 387; 670 5; 11; 13; 0; 57; 14; 75; 192
#> 2 Female 0; 0; 17; 0; 4; 13; 89; 3 1; 13; 14; 0; 140; 80; 76; 20
set.seed(12345)
passengers |> pivot_example(rows = Survived, cols = Sex, value = Name)
#> # A tibble: 2 × 3
#> Survived female male
#> <dbl> <chr> <chr>
#> 1 0 Solvang, Mrs Lena Jacobsen Meyer, Mr August
#> 2 1 Gibson, Miss Dorothy Williams, Mr Richard Norris II
passengers |> pivot_samplen(rows = Survived, cols = Sex, value = Name, n = 2, sep = "; ")
#> # A tibble: 2 × 3
#> Survived female male
#> <dbl> <chr> <chr>
#> 1 0 McGowan, Miss Katherine; Klasen, Miss Gertrud Emilia Smar…
#> 2 1 Ware, Mrs John James (Florence Louise Long); Dyker, Mrs Adolf … Mock…
passengers |> pivot_samplen(rows = Survived, cols = Sex, value = Age, n = 7)
#> # A tibble: 2 × 3
#> Survived female male
#> <dbl> <chr> <chr>
#> 1 0 NA; 44; 20; NA; 18; 2; NA NA; NA; 28; NA; 19; NA; NA
#> 2 1 22; 5; 59; 12; 13; 26; NA 32; 9; 35; 60; NA; NA; NA
passengers |> dplyr::sample_n(20) |> pivot_list(rows = Sex, cols = Survived, value = Age)
#> # A tibble: 2 × 3
#> Sex `0` `1`
#> <chr> <chr> <chr>
#> 1 female NA; 30 NA; 45; 22
#> 2 male NA; 24; 26; 29; 21; 29; 19; 46; 54; NA; 21; 22; NA 19; 2
library(tidytitanic)
# pivot_prop
flat_titanic |> pivotr(rows = sex, value = freq, prop = TRUE) # pivot_prop
#> # A tibble: 2 × 2
#> sex value
#> <fct> <dbl>
#> 1 Male 0.786
#> 2 Female 0.214
flat_titanic |> pivotr(rows = sex, cols = survived, value = freq, prop = TRUE)
#> # A tibble: 2 × 3
#> sex No Yes
#> <fct> <dbl> <dbl>
#> 1 Male 0.62 0.167
#> 2 Female 0.057 0.156
flat_titanic |> pivotr(rows = sex, cols = survived, value = freq, prop = TRUE, within = sex)
#> # A tibble: 2 × 3
#> sex No Yes
#> <fct> <dbl> <dbl>
#> 1 Male 0.788 0.212
#> 2 Female 0.268 0.732
# pivot_percent
flat_titanic |> pivotr(rows = sex, cols = survived, value = freq, percent = TRUE, within = sex)
#> # A tibble: 2 × 3
#> sex No Yes
#> <fct> <dbl> <dbl>
#> 1 Male 78.8 21.2
#> 2 Female 26.8 73.2
knitr::knit_exit()