Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update UMAP faceting code #644

Merged
merged 19 commits into from
Jan 9, 2024
Merged
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
93eed97
Update legend position and add label wrapping to the lump function. T…
sjspielman Jan 5, 2024
d319c60
Fix factor bug - after lumping, they should still be in order of freq…
sjspielman Jan 5, 2024
22a1beb
Add conditional legend placement using modulo
sjspielman Jan 5, 2024
3fa66d0
add dplyr:: for when this function is called from the supp report, wh…
sjspielman Jan 5, 2024
1e22cd2
Merge branch 'development' into sjspielman/637-faceted-umap-legend
sjspielman Jan 5, 2024
2addaad
make sure we're using three columns
sjspielman Jan 5, 2024
5b1732c
also account for unknown cell type in the refactoring
sjspielman Jan 5, 2024
dc5af08
also if 1 cell type, legend goes on the bottom. This is almost certai…
sjspielman Jan 5, 2024
ad59190
restore aspect.ratio = 1 and update legend placement to fit
sjspielman Jan 5, 2024
b00c951
Set umap height dynamically, but fix width at 8.
sjspielman Jan 5, 2024
14d1d4b
Plot height and legend tweaks after viewing all different options
sjspielman Jan 5, 2024
cf16bfb
Add comments about legend placement choices
sjspielman Jan 8, 2024
61d22c2
Styling: Align UMAPs in the center and update sizing approach to dete…
sjspielman Jan 8, 2024
2e040da
use roxygen comments and put function in a function chunk higher in rmd
sjspielman Jan 8, 2024
13dd624
fix comment typo
sjspielman Jan 8, 2024
c40a52b
Update templates/qc_report/celltypes_qc.rmd
sjspielman Jan 9, 2024
900a8d2
Update templates/qc_report/celltypes_qc.rmd
sjspielman Jan 9, 2024
16ffe3e
Legend y-positioning based on number of facets
sjspielman Jan 9, 2024
0768afe
Merge branch 'development' into sjspielman/637-faceted-umap-legend
sjspielman Jan 9, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 105 additions & 15 deletions templates/qc_report/celltypes_qc.rmd
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ format_celltype_n_table <- function(df) {

#' Function to lump celltype columns in an existing data frame for all of the
#' following columns, if they exist: `<singler/cellassign/submitter>_celltype_annotation`.
#' The cell types will also be renamed via wrapping at the given `wrap` level.
#' The resulting lumped column will be named:
#' `<singler/cellassign/submitter>_celltype_annotation_lumped`.
#'
Expand All @@ -50,16 +51,28 @@ format_celltype_n_table <- function(df) {
#' @param n_celltypes Number of groups to lump into, with rest put into "Other" group. Default is 7.
#'
#' @return Updated df with new column of lumped celltypes for each present method
lump_celltypes <- function(df, n_celltypes = 7) {
lump_wrap_celltypes <- function(df, n_celltypes = 7, wrap = 35) {
df <- df |>
# First, wrap labels
dplyr::mutate(
across(
ends_with("_celltype_annotation"),
\(x) forcats::fct_lump_n(x, n_celltypes, other_level = "All remaining cell types", ties.method = "first"),
\(x) stringr::str_wrap(x, wrap)
)
) |>
# Next, apply factor lumping, but ensure final order is via frequency with the "others" at the end
dplyr::mutate(
across(
ends_with("_celltype_annotation"),
\(x) {
x |>
forcats::fct_lump_n(n_celltypes, other_level = "All remaining cell types", ties.method = "first") |>
forcats::fct_infreq() |>
forcats::fct_relevel("Unknown cell type", "All remaining cell types", after = Inf)
},
.names = "{.col}_lumped"
)
)

return(df)
}

Expand Down Expand Up @@ -114,20 +127,29 @@ plot_umap <- function(
#' Create a faceted UMAP panel where each panel has only one cell type colored
#'
#' @param umap_df Data frame with UMAP1 and UMAP2 columns
#' @param n_celltypes The number of cell types (facets) displayed in the plot
#' @param annotation_column Column containing cell type annotations
#'
#' @return ggplot object containing a faceted UMAP where each cell type is a facet.
#' In each panel, the cell type of interest is colored and all other cells are grey.
faceted_umap <- function(umap_df,
n_celltypes,
annotation_column) {
# Determine legend y-coordinate based on n_celltypes
if (n_celltypes %in% 7:8) {
legend_y <- 0.33
} else if (n_celltypes %in% 4:5) {
legend_y <- 0.52
}

# color by the annotation column but only color one cell type at a time
faceted_umap <- ggplot(
umap_df,
aes(x = UMAP1, y = UMAP2, color = {{ annotation_column }})
) +
# set points for all "other" points
geom_point(
data = select(
data = dplyr::select(
umap_df, -{{ annotation_column }}
),
color = "gray80",
Expand All @@ -136,7 +158,10 @@ faceted_umap <- function(umap_df,
) +
# set points for desired cell type
geom_point(size = 0.3, alpha = 0.5) +
facet_wrap(vars({{ annotation_column }})) +
facet_wrap(
vars({{ annotation_column }}),
ncol = 3
) +
scale_color_brewer(palette = "Dark2") +
# remove axis numbers and background grid
scale_x_continuous(labels = NULL, breaks = NULL) +
Expand All @@ -150,15 +175,62 @@ faceted_umap <- function(umap_df,
size = 1.5
)
)
) +
theme(
legend.position = c(.9, 0),
legend.justification = c("right", "bottom"),
legend.title.align = 0.5
)

# Determine legend placement based on total_celltypes
if (n_celltypes == 1) {
# If n=1, we do not need to a show a legend, and title should be smaller
faceted_umap <- faceted_umap +
theme(
legend.position = "none",
plot.title = element_text(size = rel(0.85)),
aspect.ratio = 1
)
} else if (n_celltypes %% 3 != 0 & n_celltypes > 2) {
# This condition places the legend in the bottom right corner. Can use this for
# any n that doesn't have a full 3 columns, with one exception: for n=2,
# there will be no third column in the faceting to slot the legend into,
# so its legend has to go on the bottom
faceted_umap <- faceted_umap +
theme(
legend.position = c(0.67, legend_y),
legend.justification = c("left", "top"),
legend.title.align = 0.5,
# use slightly smaller legend text, which helps legend fit and prevents
# long wrapped labels from bunching up
legend.text = element_text(size = rel(0.85)),
legend.key.height = unit(0.75, "cm"),
aspect.ratio = 1
)
} else {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you just add a comment here that this is specifically for when n = 2 that because we have < 3 columns, we need the legend on the bottom?
Maybe another comment for each of the if conditions would be helpful to say why each decision was made here.

# For any other n, place legend on the bottom
faceted_umap <- faceted_umap +
theme(
legend.position = "bottom",
aspect.ratio = 1
)
}



return(faceted_umap)
}

#' Determine dimension for UMAP plot display based on number of cell types
#'
#' @param n_celltypes Number of cell types (here, facets) displayed in plot
#'
#' @return A vector of c(width, height) in inches
determine_umap_dimensions <- function(n_celltypes) {
dplyr::case_when(
n_celltypes == 1 ~ c(3.75, 3.5), # 1: no legend
n_celltypes == 2 ~ c(5.5, 5), # 2: bottom legend
n_celltypes == 3 ~ c(8, 5.5), # 3: bottom legend
n_celltypes <= 5 ~ c(9, 6), # 4 & 5: inset legend
n_celltypes <= 6 ~ c(8, 7.5), # 6: bottom legend
.default = c(9, 9) # 7 & 8: inset legend
)
}
```


Expand Down Expand Up @@ -278,8 +350,8 @@ Clusters were calculated using the graph-based {metadata(processed_sce)$cluster_


```{r, eval = has_umap}
# Create dataset for plotting UMAPs with lumped cell types
umap_df <- lump_celltypes(celltype_df)
# Create dataset for plotting UMAPs with lumped and label-wrapped cell types
umap_df <- lump_wrap_celltypes(celltype_df)
```


Expand Down Expand Up @@ -329,31 +401,49 @@ All other cell types are grouped together and labeled "All remaining cell types"

<!-- Now, UMAPs of cell types, where present -->

```{r, eval = has_submitter && has_umap, message=FALSE, warning=FALSE, fig.height = 9, fig.width=9}
```{r, eval = has_submitter && has_umap}
submitter_n_celltypes <- length(levels(umap_df$submitter_celltype_annotation_lumped))
submitter_dims <- determine_umap_dimensions(submitter_n_celltypes)
```

```{r, eval = has_submitter && has_umap, message=FALSE, warning=FALSE, fig.width = submitter_dims[1], fig.height = submitter_dims[2], fig.align = "center"}
# umap for cell assign annotations
faceted_umap(
umap_df,
submitter_n_celltypes,
submitter_celltype_annotation_lumped
) +
ggtitle("UMAP colored by submitter-provided annotations")
```


```{r, eval=has_singler && has_umap, message=FALSE, warning=FALSE, fig.height=9, fig.width=9}
```{r, eval = has_singler && has_umap}
singler_n_celltypes <- length(levels(umap_df$singler_celltype_annotation_lumped))
singler_dims <- determine_umap_dimensions(singler_n_celltypes)
```


```{r, eval=has_singler && has_umap, message=FALSE, warning=FALSE, fig.width = singler_dims[1], fig.height = singler_dims[2], fig.align = "center"}
# umap for cell assign annotations
faceted_umap(
umap_df,
singler_n_celltypes,
singler_celltype_annotation_lumped
) +
ggtitle("UMAP colored by SingleR annotations")
```


```{r, eval = has_cellassign && has_umap}
cellassign_n_celltypes <- length(levels(umap_df$cellassign_celltype_annotation_lumped))
cellassign_dims <- determine_umap_dimensions(cellassign_n_celltypes)
```

```{r, eval = has_cellassign && has_umap, message=FALSE, warning=FALSE, fig.height = 9, fig.width=9}
```{r, eval = has_cellassign && has_umap, message=FALSE, warning=FALSE, fig.width = cellassign_dims[1], fig.height = cellassign_dims[2], fig.align = "center"}
# umap for cell assign annotations
faceted_umap(
umap_df,
cellassign_n_celltypes,
cellassign_celltype_annotation_lumped
) +
ggtitle("UMAP colored by CellAssign annotations")
Expand Down