Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Delete duplicate layer in AnnData objects #759

Merged
merged 21 commits into from
May 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
7b99606
add boolean for if clusters exist
allyhawkins Apr 24, 2024
1a7a282
account for missing predictions
allyhawkins Apr 24, 2024
8d5799f
move addition of has_clusters to correct report
allyhawkins Apr 25, 2024
1ff240c
put clusters definition into main report
allyhawkins Apr 25, 2024
2d3d30f
make sure has_celltypes is in supplemental
allyhawkins Apr 25, 2024
c74328a
only create supplemental if > 1 cell in object
allyhawkins Apr 25, 2024
ca62f47
Apply suggestions from code review
allyhawkins Apr 25, 2024
d5ff33b
Merge pull request #755 from AlexsLemonade/allyhawkins/no-clusters
allyhawkins Apr 25, 2024
40b3bca
Define umap point size when creating cell type umaps
allyhawkins Apr 29, 2024
0e3b01c
move umap point sizes to child reports and set default to 1
allyhawkins Apr 29, 2024
65738c9
Merge pull request #756 from AlexsLemonade/allyhawkins/define-umap-po…
allyhawkins Apr 29, 2024
6d1fc1d
unlist adt list
allyhawkins May 1, 2024
41738f0
make sure that processed file isn't empty
allyhawkins May 1, 2024
854bb93
add warning for libraries not included in merging
allyhawkins May 1, 2024
fad0236
specify branch
allyhawkins May 1, 2024
57630da
remove multiplexed
allyhawkins May 1, 2024
bb515c6
use map_lgl
allyhawkins May 1, 2024
6629141
drop file
allyhawkins May 2, 2024
a9dc826
Merge pull request #757 from AlexsLemonade/allyhawkins/merge-bug-fixes
allyhawkins May 2, 2024
49b5ecc
Merge branch 'main' into jashapiro/nextgen-containers
jashapiro May 8, 2024
17e3663
delete logcounts layer
jashapiro May 9, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions bin/add_celltypes_to_sce.R
Original file line number Diff line number Diff line change
Expand Up @@ -191,13 +191,13 @@ if (!is.null(opt$cellassign_predictions)) {
if (file.size(opt$cellassign_predictions) > 0) {
predictions <- readr::read_tsv(opt$cellassign_predictions)
} else {
# if it's empty, then sce could not be converted to anndata and cell assign was not run
sce$cellassign_celltype_annotation <- "Not run"
predictions <- NULL
}

# if the only column is the barcode column then CellAssign didn't complete successfully
# if the only column is the barcode column or if the predictions file was empty
# then CellAssign didn't complete successfully
# otherwise add in cell type annotations and metadata to SCE
if (all(colnames(predictions) == "barcode")) {
if (is.null(predictions) || all(colnames(predictions) == "barcode")) {
# if failed then note that in the cell type column
sce$cellassign_celltype_annotation <- "Not run"
} else {
Expand Down
2 changes: 1 addition & 1 deletion bin/merge_sces.R
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ adt_present_columns <- sce_list |>

# ensure that there are indeed no "adt" altExps if adt_present_columns is empty
adt_altexps <- sce_list |>
purrr::map(\(sce) "adt" %in% altExpNames(sce))
purrr::map_lgl(\(sce) "adt" %in% altExpNames(sce))
if (is.null(adt_present_columns) && sum(adt_altexps) > 0) {
stop("Error in determining which adt altExp columns should be retained.")
}
Expand Down
1 change: 1 addition & 0 deletions bin/move_counts_anndata.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
# move logcounts to X and rename
object.X = object.layers["logcounts"]
object.uns["X_name"] = "logcounts"
del object.layers["logcounts"]

# export object
object.write_h5ad(args.anndata_file, compression="gzip" if args.compress else None)
27 changes: 15 additions & 12 deletions bin/sce_qc_report.R
Original file line number Diff line number Diff line change
Expand Up @@ -285,17 +285,20 @@ if (opt$celltype_report_file != "") {
stop("Supplemental cell types report template not found.")
}

# render report
rmarkdown::render(
input = opt$celltype_report_template,
output_file = basename(opt$celltype_report_file),
output_dir = dirname(opt$celltype_report_file),
intermediates_dir = tempdir(),
knit_root_dir = tempdir(),
envir = new.env(),
params = list(
library = metadata_list$library_id,
processed_sce = processed_sce
# only render supplemental report if there's more than one cell
if (ncol(processed_sce) > 1) {
# render report
rmarkdown::render(
input = opt$celltype_report_template,
output_file = basename(opt$celltype_report_file),
output_dir = dirname(opt$celltype_report_file),
intermediates_dir = tempdir(),
knit_root_dir = tempdir(),
envir = new.env(),
params = list(
library = metadata_list$library_id,
processed_sce = processed_sce
)
)
)
}
}
15 changes: 13 additions & 2 deletions merge.nf
Original file line number Diff line number Diff line change
Expand Up @@ -178,15 +178,26 @@ workflow {
log.warn("Not merging ${it.project_id} because it contains multiplexed libraries.")
}

// print out warning message for any libraries not included in merging
filtered_libraries_ch.single_sample
.map{[
it.library_id,
file("${params.results_dir}/${it.project_id}/${it.sample_id}/${it.library_id}_processed.rds")
]}
.filter{!(it[1].exists() && it[1].size() > 0)}
.subscribe{
log.warn("Processed files do not exist for ${it[0]}. This library will not be included in the merged object.")
}

grouped_libraries_ch = filtered_libraries_ch.single_sample
// create tuple of [project id, library_id, processed_sce_file]
.map{[
it.project_id,
it.library_id,
file("${params.results_dir}/${it.project_id}/${it.sample_id}/${it.library_id}_processed.rds")
]}
// only include libraries that have been processed through scpca-nf
.filter{file(it[2]).exists()}
// only include libraries that have been processed through scpca-nf and aren't empty
.filter{it[2].exists() && it[2].size() > 0}
// only one row per library ID, this removes all the duplicates that may be present due to CITE/hashing
.unique()
// group tuple by project id: [project_id, [library_id1, library_id2, ...], [sce_file1, sce_file2, ...]]
Expand Down
38 changes: 26 additions & 12 deletions templates/qc_report/celltypes_qc.rmd
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# Cell type Annotation Summary

<!--
This file is meant to be run as a child report within either `main_qc_report.rmd` or `celltypes_supplemental_report.rmd`.
-->


```{r}
## function definitions ##

Expand Down Expand Up @@ -86,14 +91,14 @@ lump_wrap_celltypes <- function(df, n_celltypes = 7, wrap = 35) {
#' @param color_variable Column in data frame to color by, not a string.
#' @param legend_title Title for legend.
#' @param legend_nrow Number of rows in legend. Default is 2.
#' @param point_size Point size
#' @param point_size Point size. Default is 1
#'
#' @return UMAP plot as a ggplot2 object
plot_umap <- function(
umap_df,
color_variable,
legend_title,
point_size = umap_point_size,
point_size = 1,
legend_nrow = 2) {
ggplot(umap_df) +
aes(
Expand Down Expand Up @@ -131,14 +136,14 @@ plot_umap <- function(
#' @param umap_df Data frame with UMAP1 and UMAP2 columns
#' @param n_celltypes The number of cell types (facets) displayed in the plot
#' @param annotation_column Column containing cell type annotations
#' @param point_size Point size
#' @param point_size Point size. Default is 1
#'
#' @return ggplot object containing a faceted UMAP where each cell type is a facet.
#' In each panel, the cell type of interest is colored and all other cells are grey.
faceted_umap <- function(umap_df,
n_celltypes,
annotation_column,
point_size = umap_facet_point_size) {
point_size = 1) {
# Determine legend y-coordinate based on n_celltypes
if (n_celltypes %in% 7:8) {
legend_y <- 0.33
Expand Down Expand Up @@ -283,6 +288,11 @@ glue::glue("
```{r, warning = FALSE}
# Create data frame of cell types
celltype_df <- create_celltype_df(processed_sce)

# determine UMAP point sizing
umap_points_sizes <- determine_umap_point_size(ncol(processed_sce))
umap_point_size <- umap_points_sizes[1]
umap_facet_point_size <- umap_points_sizes[2]
```


Expand Down Expand Up @@ -370,7 +380,7 @@ create_celltype_n_table(celltype_df, cellassign_celltype_annotation) |>
```


```{r, eval = has_umap}
```{r, eval = has_umap && has_clusters}
knitr::asis_output(glue::glue("
## UMAPs

Expand All @@ -388,7 +398,7 @@ umap_df <- lump_wrap_celltypes(celltype_df)

<!-- First UMAP: clusters -->

```{r, eval = has_umap && has_multiplex, results='asis'}
```{r, eval = has_umap && has_multiplex && has_clusters, results='asis'}
glue::glue("
<div class=\"alert alert-info\">
This library contains multiple samples that have not been batch-corrected, which may confound clustering assignments.
Expand All @@ -398,11 +408,12 @@ glue::glue("
```


```{r eval = has_umap, message=FALSE, warning=FALSE}
```{r eval = has_umap && has_clusters, message=FALSE, warning=FALSE}
clusters_plot <- plot_umap(
umap_df,
cluster,
"Cluster"
"Cluster",
point_size = umap_point_size
) +
ggtitle("UMAP colored by cluster identity")

Expand All @@ -418,7 +429,7 @@ if (length(levels(umap_df$cluster)) <= 8) {
```


```{r, eval = has_umap}
```{r, eval = has_umap && has_celltypes}
knitr::asis_output(
'Next, we show UMAPs colored by cell types.
For each cell typing method, we show a separate faceted UMAP.
Expand Down Expand Up @@ -447,7 +458,8 @@ if (has_submitter & has_umap) {
faceted_umap(
umap_df,
submitter_n_celltypes,
submitter_celltype_annotation_lumped
submitter_celltype_annotation_lumped,
point_size = umap_facet_point_size
) +
ggtitle("UMAP colored by submitter-provided annotations")
```
Expand All @@ -469,7 +481,8 @@ if (has_singler & has_umap) {
faceted_umap(
umap_df,
singler_n_celltypes,
singler_celltype_annotation_lumped
singler_celltype_annotation_lumped,
point_size = umap_facet_point_size
) +
ggtitle("UMAP colored by SingleR annotations")
```
Expand All @@ -490,7 +503,8 @@ if (has_cellassign & has_umap) {
faceted_umap(
umap_df,
cellassign_n_celltypes,
cellassign_celltype_annotation_lumped
cellassign_celltype_annotation_lumped,
point_size = umap_facet_point_size
) +
ggtitle("UMAP colored by CellAssign annotations")
```
15 changes: 7 additions & 8 deletions templates/qc_report/celltypes_supplemental_report.rmd
Original file line number Diff line number Diff line change
Expand Up @@ -264,8 +264,12 @@ has_cellassign <- "cellassign" %in% metadata(processed_sce)$celltype_methods
has_submitter <- "submitter" %in% metadata(processed_sce)$celltype_methods &&
!all(is.na(processed_sce$submitter_celltype_annotation)) # make sure they aren't all NA

# check for umap
# If at least 1 is present, we have cell type annotations.
has_celltypes <- any(has_singler, has_cellassign, has_submitter)

# check for umap and clusters
has_umap <- "UMAP" %in% reducedDimNames(processed_sce)
has_clusters <- "cluster" %in% names(colData(processed_sce))

# what celltypes are available?
available_celltypes <- c(
Expand Down Expand Up @@ -296,11 +300,6 @@ plot_height <- 1
# sample_id should be defined with length > 1
sample_id <- metadata(processed_sce)$sample_id
has_multiplex <- length(sample_id) > 1

# determine UMAP point sizing
umap_points_sizes <- determine_umap_point_size(ncol(processed_sce))
umap_point_size <- umap_points_sizes[1]
umap_facet_point_size <- umap_points_sizes[2]
```

<!-- If multiplexed, open with warning -->
Expand Down Expand Up @@ -416,7 +415,7 @@ glue::glue("
```

<!-- If not multiplexed, show the header, text, and heatmap -->
```{r, eval = !has_multiplex, results='asis'}
```{r, eval = !has_multiplex && has_clusters, results='asis'}
glue::glue("
## Unsupervised clustering

Expand Down Expand Up @@ -448,7 +447,7 @@ plot_height <- calculate_plot_height(
```


```{r, eval = !has_multiplex, fig.height = plot_height, fig.width = 8.5, warning = FALSE}
```{r, eval = !has_multiplex && has_clusters, fig.height = plot_height, fig.width = 8.5, warning = FALSE}
jaccard_cluster_matrices |>
create_heatmap_list(
column_title = "Clusters",
Expand Down
8 changes: 2 additions & 6 deletions templates/qc_report/main_qc_report.rmd
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ if (has_cellhash) {
# check for umap and celltypes, but need to be sure that processed_sce exists first
if (has_processed) {
has_umap <- "UMAP" %in% reducedDimNames(processed_sce)

has_clusters <- "cluster" %in% names(colData(processed_sce))
has_singler <- "singler" %in% metadata(processed_sce)$celltype_methods
has_cellassign <- "cellassign" %in% metadata(processed_sce)$celltype_methods
has_submitter <- "submitter" %in% metadata(processed_sce)$celltype_methods &&
Expand All @@ -129,6 +129,7 @@ if (has_processed) {
is_supplemental <- FALSE # this is not the celltype supp report
} else {
has_umap <- FALSE
has_clusters <- FALSE
has_singler <- FALSE
has_cellassign <- FALSE
has_submitter <- FALSE
Expand All @@ -143,11 +144,6 @@ if ((has_singler | has_cellassign) & is.null(params$celltype_report)) {
# check if we have multiplex
has_multiplex <- length(sample_id) > 1
sample_types <- metadata(unfiltered_sce)$sample_type

# determine UMAP point sizing
umap_points_sizes <- determine_umap_point_size(ncol(processed_sce))
umap_point_size <- umap_points_sizes[1]
umap_facet_point_size <- umap_points_sizes[2]
```


Expand Down
8 changes: 8 additions & 0 deletions templates/qc_report/umap_qc.rmd
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,14 @@

The below plot shows the UMAP (Uniform Manifold Approximation and Projection) embeddings for each cell, coloring each cell by the total number of genes detected per cell.

```{r}
# determine UMAP point sizing
umap_points_sizes <- determine_umap_point_size(ncol(processed_sce))
umap_point_size <- umap_points_sizes[1]
umap_facet_point_size <- umap_points_sizes[2]
```


```{r message=FALSE}
# create UMAP colored by number of genes detected
scater::plotUMAP(
Expand Down
Loading