From 7b99606d406168a75f7f8cbacf82fed1fed8d6bd Mon Sep 17 00:00:00 2001
From: Ally Hawkins <54039191+allyhawkins@users.noreply.github.com>
Date: Wed, 24 Apr 2024 15:44:04 -0500
Subject: [PATCH 01/17] add boolean for if clusters exist

---
 templates/qc_report/celltypes_qc.rmd                  | 10 +++++-----
 templates/qc_report/celltypes_supplemental_report.rmd |  4 ++--
 templates/qc_report/main_qc_report.rmd                |  2 ++
 3 files changed, 9 insertions(+), 7 deletions(-)
diff --git a/templates/qc_report/celltypes_qc.rmd b/templates/qc_report/celltypes_qc.rmd
index 5da08030..93288aa4 100644
--- a/templates/qc_report/celltypes_qc.rmd
+++ b/templates/qc_report/celltypes_qc.rmd
@@ -93,7 +93,7 @@ plot_umap <- function(
     umap_df,
     color_variable,
     legend_title,
-    point_size = umap_point_size,
+    point_size = point_size,
     legend_nrow = 2) {
   ggplot(umap_df) +
     aes(
@@ -370,7 +370,7 @@ create_celltype_n_table(celltype_df, cellassign_celltype_annotation) |>
 ```
 
 
-```{r, eval = has_umap}
+```{r, eval = has_umap & has_clusters}
 knitr::asis_output(glue::glue("
 ## UMAPs
 
@@ -388,7 +388,7 @@ umap_df <- lump_wrap_celltypes(celltype_df)
 
 <!-- First UMAP: clusters -->
 
-```{r, eval = has_umap && has_multiplex, results='asis'}
+```{r, eval = has_umap && has_multiplex && has_clusters, results='asis'}
 glue::glue("
   <div class=\"alert alert-info\">
     This library contains multiple samples that have not been batch-corrected, which may confound clustering assignments.
@@ -398,7 +398,7 @@ glue::glue("
 ```
 
 
-```{r eval = has_umap, message=FALSE, warning=FALSE}
+```{r eval = has_umap & has_clusters, message=FALSE, warning=FALSE}
 clusters_plot <- plot_umap(
   umap_df,
   cluster,
@@ -418,7 +418,7 @@ if (length(levels(umap_df$cluster)) <= 8) {
 ```
 
 
-```{r, eval = has_umap}
+```{r, eval = has_umap & has_celltypes}
 knitr::asis_output(
   'Next, we show UMAPs colored by cell types.
 For each cell typing method, we show a separate faceted UMAP.
diff --git a/templates/qc_report/celltypes_supplemental_report.rmd b/templates/qc_report/celltypes_supplemental_report.rmd
index 652d6748..022ad473 100644
--- a/templates/qc_report/celltypes_supplemental_report.rmd
+++ b/templates/qc_report/celltypes_supplemental_report.rmd
@@ -416,7 +416,7 @@ glue::glue("
 ```
 
 <!-- If not multiplexed, show the header, text, and heatmap --> 
-```{r, eval = !has_multiplex, results='asis'}
+```{r, eval = !has_multiplex & has_clusters, results='asis'}
 glue::glue("
   ## Unsupervised clustering
 
@@ -448,7 +448,7 @@ plot_height <- calculate_plot_height(
 ```
 
 
-```{r, eval = !has_multiplex, fig.height = plot_height, fig.width = 8.5, warning = FALSE}
+```{r, eval = !has_multiplex & has_clusters, fig.height = plot_height, fig.width = 8.5, warning = FALSE}
 jaccard_cluster_matrices |>
   create_heatmap_list(
     column_title = "Clusters",
diff --git a/templates/qc_report/main_qc_report.rmd b/templates/qc_report/main_qc_report.rmd
index f64c3536..039de0f7 100644
--- a/templates/qc_report/main_qc_report.rmd
+++ b/templates/qc_report/main_qc_report.rmd
@@ -117,6 +117,7 @@ if (has_cellhash) {
 # check for umap and celltypes, but need to be sure that processed_sce exists first
 if (has_processed) {
   has_umap <- "UMAP" %in% reducedDimNames(processed_sce)
+  has_clusters <- "cluster" %in% names(colData(processed_sce))
 
   has_singler <- "singler" %in% metadata(processed_sce)$celltype_methods
   has_cellassign <- "cellassign" %in% metadata(processed_sce)$celltype_methods
@@ -129,6 +130,7 @@ if (has_processed) {
   is_supplemental <- FALSE # this is not the celltype supp report
 } else {
   has_umap <- FALSE
+  has_clusters <- FALSE
   has_singler <- FALSE
   has_cellassign <- FALSE
   has_submitter <- FALSE

From 1a7a28261574e7a9e66271ebeae1a713f2d8c209 Mon Sep 17 00:00:00 2001
From: Ally Hawkins <54039191+allyhawkins@users.noreply.github.com>
Date: Wed, 24 Apr 2024 16:31:09 -0500
Subject: [PATCH 02/17] account for missing predictions

---
 bin/add_celltypes_to_sce.R | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/bin/add_celltypes_to_sce.R b/bin/add_celltypes_to_sce.R
index 53d4a667..ac32d200 100755
--- a/bin/add_celltypes_to_sce.R
+++ b/bin/add_celltypes_to_sce.R
@@ -191,13 +191,13 @@ if (!is.null(opt$cellassign_predictions)) {
   if (file.size(opt$cellassign_predictions) > 0) {
     predictions <- readr::read_tsv(opt$cellassign_predictions)
   } else {
-    # if it's empty, then sce could not be converted to anndata and cell assign was not run
-    sce$cellassign_celltype_annotation <- "Not run"
+    predictions <- NULL
   }
 
-  # if the only column is the barcode column then CellAssign didn't complete successfully
+  # if the only column is the barcode column or if the predictions file was empty
+  # then CellAssign didn't complete successfully
   # otherwise add in cell type annotations and metadata to SCE
-  if (all(colnames(predictions) == "barcode")) {
+  if (is.null(predictions) | all(colnames(predictions) == "barcode")) {
     # if failed then note that in the cell type column
     sce$cellassign_celltype_annotation <- "Not run"
   } else {

From 8d5799f5ec286954c7dcb63d04035d73db0e4ff0 Mon Sep 17 00:00:00 2001
From: Ally Hawkins <54039191+allyhawkins@users.noreply.github.com>
Date: Thu, 25 Apr 2024 08:38:36 -0500
Subject: [PATCH 03/17] move addition of has_clusters to correct report

---
 templates/qc_report/celltypes_supplemental_report.rmd | 1 +
 templates/qc_report/main_qc_report.rmd                | 2 --
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/templates/qc_report/celltypes_supplemental_report.rmd b/templates/qc_report/celltypes_supplemental_report.rmd
index 022ad473..f0962c66 100644
--- a/templates/qc_report/celltypes_supplemental_report.rmd
+++ b/templates/qc_report/celltypes_supplemental_report.rmd
@@ -266,6 +266,7 @@ has_submitter <- "submitter" %in% metadata(processed_sce)$celltype_methods &&
 
 # check for umap
 has_umap <- "UMAP" %in% reducedDimNames(processed_sce)
+has_clusters <- "cluster" %in% names(colData(processed_sce))
 
 # what celltypes are available?
 available_celltypes <- c(
diff --git a/templates/qc_report/main_qc_report.rmd b/templates/qc_report/main_qc_report.rmd
index 039de0f7..f64c3536 100644
--- a/templates/qc_report/main_qc_report.rmd
+++ b/templates/qc_report/main_qc_report.rmd
@@ -117,7 +117,6 @@ if (has_cellhash) {
 # check for umap and celltypes, but need to be sure that processed_sce exists first
 if (has_processed) {
   has_umap <- "UMAP" %in% reducedDimNames(processed_sce)
-  has_clusters <- "cluster" %in% names(colData(processed_sce))
 
   has_singler <- "singler" %in% metadata(processed_sce)$celltype_methods
   has_cellassign <- "cellassign" %in% metadata(processed_sce)$celltype_methods
@@ -130,7 +129,6 @@ if (has_processed) {
   is_supplemental <- FALSE # this is not the celltype supp report
 } else {
   has_umap <- FALSE
-  has_clusters <- FALSE
   has_singler <- FALSE
   has_cellassign <- FALSE
   has_submitter <- FALSE

From 1ff240c7f1192256247f1fb69a94b6c6bf55df01 Mon Sep 17 00:00:00 2001
From: Ally Hawkins <54039191+allyhawkins@users.noreply.github.com>
Date: Thu, 25 Apr 2024 09:28:40 -0500
Subject: [PATCH 04/17] put clusters definition into main report

---
 templates/qc_report/celltypes_supplemental_report.rmd | 2 +-
 templates/qc_report/main_qc_report.rmd                | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/templates/qc_report/celltypes_supplemental_report.rmd b/templates/qc_report/celltypes_supplemental_report.rmd
index f0962c66..4b1dc159 100644
--- a/templates/qc_report/celltypes_supplemental_report.rmd
+++ b/templates/qc_report/celltypes_supplemental_report.rmd
@@ -264,7 +264,7 @@ has_cellassign <- "cellassign" %in% metadata(processed_sce)$celltype_methods
 has_submitter <- "submitter" %in% metadata(processed_sce)$celltype_methods &&
   !all(is.na(processed_sce$submitter_celltype_annotation)) # make sure they aren't all NA
 
-# check for umap
+# check for umap and clusters
 has_umap <- "UMAP" %in% reducedDimNames(processed_sce)
 has_clusters <- "cluster" %in% names(colData(processed_sce))
 
diff --git a/templates/qc_report/main_qc_report.rmd b/templates/qc_report/main_qc_report.rmd
index f64c3536..aac47d8a 100644
--- a/templates/qc_report/main_qc_report.rmd
+++ b/templates/qc_report/main_qc_report.rmd
@@ -117,7 +117,7 @@ if (has_cellhash) {
 # check for umap and celltypes, but need to be sure that processed_sce exists first
 if (has_processed) {
   has_umap <- "UMAP" %in% reducedDimNames(processed_sce)
-
+  has_clusters <- "cluster" %in% names(colData(processed_sce))
   has_singler <- "singler" %in% metadata(processed_sce)$celltype_methods
   has_cellassign <- "cellassign" %in% metadata(processed_sce)$celltype_methods
   has_submitter <- "submitter" %in% metadata(processed_sce)$celltype_methods &&
@@ -129,6 +129,7 @@ if (has_processed) {
   is_supplemental <- FALSE # this is not the celltype supp report
 } else {
   has_umap <- FALSE
+  has_clusters <- FALSE
   has_singler <- FALSE
   has_cellassign <- FALSE
   has_submitter <- FALSE

From 2d3d30fda0752d2aa998dbfa9e8365e97ab065e9 Mon Sep 17 00:00:00 2001
From: Ally Hawkins <54039191+allyhawkins@users.noreply.github.com>
Date: Thu, 25 Apr 2024 10:03:35 -0500
Subject: [PATCH 05/17] make sure has_celltypes is in supplemental

---
 templates/qc_report/celltypes_supplemental_report.rmd | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/templates/qc_report/celltypes_supplemental_report.rmd b/templates/qc_report/celltypes_supplemental_report.rmd
index 4b1dc159..03e3b3c0 100644
--- a/templates/qc_report/celltypes_supplemental_report.rmd
+++ b/templates/qc_report/celltypes_supplemental_report.rmd
@@ -264,6 +264,9 @@ has_cellassign <- "cellassign" %in% metadata(processed_sce)$celltype_methods
 has_submitter <- "submitter" %in% metadata(processed_sce)$celltype_methods &&
   !all(is.na(processed_sce$submitter_celltype_annotation)) # make sure they aren't all NA
 
+# If at least 1 is present, we have cell type annotations.
+has_celltypes <- any(has_singler, has_cellassign, has_submitter)
+
 # check for umap and clusters
 has_umap <- "UMAP" %in% reducedDimNames(processed_sce)
 has_clusters <- "cluster" %in% names(colData(processed_sce))

From c74328a41eba2b19fdb3992f374ba42424d48acf Mon Sep 17 00:00:00 2001
From: Ally Hawkins <54039191+allyhawkins@users.noreply.github.com>
Date: Thu, 25 Apr 2024 10:38:15 -0500
Subject: [PATCH 06/17] only create supplemental if > 1 cell in object

---
 bin/sce_qc_report.R | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/bin/sce_qc_report.R b/bin/sce_qc_report.R
index 3833c6fe..d7f4728c 100755
--- a/bin/sce_qc_report.R
+++ b/bin/sce_qc_report.R
@@ -285,17 +285,20 @@ if (opt$celltype_report_file != "") {
     stop("Supplemental cell types report template not found.")
   }
 
-  # render report
-  rmarkdown::render(
-    input = opt$celltype_report_template,
-    output_file = basename(opt$celltype_report_file),
-    output_dir = dirname(opt$celltype_report_file),
-    intermediates_dir = tempdir(),
-    knit_root_dir = tempdir(),
-    envir = new.env(),
-    params = list(
-      library = metadata_list$library_id,
-      processed_sce = processed_sce
+  # only render supplemental report if there's more than one cell
+  if (ncol(processed_sce) > 1) {
+    # render report
+    rmarkdown::render(
+      input = opt$celltype_report_template,
+      output_file = basename(opt$celltype_report_file),
+      output_dir = dirname(opt$celltype_report_file),
+      intermediates_dir = tempdir(),
+      knit_root_dir = tempdir(),
+      envir = new.env(),
+      params = list(
+        library = metadata_list$library_id,
+        processed_sce = processed_sce
+      )
     )
-  )
+  }
 }

From ca62f473e98c59d090c155979b624d9b6d01e1e9 Mon Sep 17 00:00:00 2001
From: Ally Hawkins <54039191+allyhawkins@users.noreply.github.com>
Date: Thu, 25 Apr 2024 12:15:53 -0500
Subject: [PATCH 07/17] Apply suggestions from code review

Co-authored-by: Joshua Shapiro <josh.shapiro@ccdatalab.org>
---
 bin/add_celltypes_to_sce.R                            | 2 +-
 templates/qc_report/celltypes_qc.rmd                  | 6 +++---
 templates/qc_report/celltypes_supplemental_report.rmd | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/bin/add_celltypes_to_sce.R b/bin/add_celltypes_to_sce.R
index ac32d200..b837255c 100755
--- a/bin/add_celltypes_to_sce.R
+++ b/bin/add_celltypes_to_sce.R
@@ -197,7 +197,7 @@ if (!is.null(opt$cellassign_predictions)) {
   # if the only column is the barcode column or if the predictions file was empty
   # then CellAssign didn't complete successfully
   # otherwise add in cell type annotations and metadata to SCE
-  if (is.null(predictions) | all(colnames(predictions) == "barcode")) {
+  if (is.null(predictions) || all(colnames(predictions) == "barcode")) {
     # if failed then note that in the cell type column
     sce$cellassign_celltype_annotation <- "Not run"
   } else {
diff --git a/templates/qc_report/celltypes_qc.rmd b/templates/qc_report/celltypes_qc.rmd
index 93288aa4..07a4e1da 100644
--- a/templates/qc_report/celltypes_qc.rmd
+++ b/templates/qc_report/celltypes_qc.rmd
@@ -370,7 +370,7 @@ create_celltype_n_table(celltype_df, cellassign_celltype_annotation) |>
 ```
 
 
-```{r, eval = has_umap & has_clusters}
+```{r, eval = has_umap && has_clusters}
 knitr::asis_output(glue::glue("
 ## UMAPs
 
@@ -398,7 +398,7 @@ glue::glue("
 ```
 
 
-```{r eval = has_umap & has_clusters, message=FALSE, warning=FALSE}
+```{r eval = has_umap && has_clusters, message=FALSE, warning=FALSE}
 clusters_plot <- plot_umap(
   umap_df,
   cluster,
@@ -418,7 +418,7 @@ if (length(levels(umap_df$cluster)) <= 8) {
 ```
 
 
-```{r, eval = has_umap & has_celltypes}
+```{r, eval = has_umap && has_celltypes}
 knitr::asis_output(
   'Next, we show UMAPs colored by cell types.
 For each cell typing method, we show a separate faceted UMAP.
diff --git a/templates/qc_report/celltypes_supplemental_report.rmd b/templates/qc_report/celltypes_supplemental_report.rmd
index 03e3b3c0..c5b5ea9d 100644
--- a/templates/qc_report/celltypes_supplemental_report.rmd
+++ b/templates/qc_report/celltypes_supplemental_report.rmd
@@ -420,7 +420,7 @@ glue::glue("
 ```
 
 <!-- If not multiplexed, show the header, text, and heatmap --> 
-```{r, eval = !has_multiplex & has_clusters, results='asis'}
+```{r, eval = !has_multiplex && has_clusters, results='asis'}
 glue::glue("
   ## Unsupervised clustering
 
@@ -452,7 +452,7 @@ plot_height <- calculate_plot_height(
 ```
 
 
-```{r, eval = !has_multiplex & has_clusters, fig.height = plot_height, fig.width = 8.5, warning = FALSE}
+```{r, eval = !has_multiplex && has_clusters, fig.height = plot_height, fig.width = 8.5, warning = FALSE}
 jaccard_cluster_matrices |>
   create_heatmap_list(
     column_title = "Clusters",

From 40b3bca769a503330e355cb83900207fc799cc8a Mon Sep 17 00:00:00 2001
From: Ally Hawkins <54039191+allyhawkins@users.noreply.github.com>
Date: Mon, 29 Apr 2024 10:09:52 -0500
Subject: [PATCH 08/17] Define umap point size when creating cell type umaps

---
 templates/qc_report/celltypes_qc.rmd | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/templates/qc_report/celltypes_qc.rmd b/templates/qc_report/celltypes_qc.rmd
index 07a4e1da..aae41cd4 100644
--- a/templates/qc_report/celltypes_qc.rmd
+++ b/templates/qc_report/celltypes_qc.rmd
@@ -402,7 +402,8 @@ glue::glue("
 clusters_plot <- plot_umap(
   umap_df,
   cluster,
-  "Cluster"
+  "Cluster",
+  point_size = umap_point_size
 ) +
   ggtitle("UMAP colored by cluster identity")
 
@@ -447,7 +448,8 @@ if (has_submitter & has_umap) {
 faceted_umap(
   umap_df,
   submitter_n_celltypes,
-  submitter_celltype_annotation_lumped
+  submitter_celltype_annotation_lumped,
+  point_size = umap_facet_point_size
 ) +
   ggtitle("UMAP colored by submitter-provided annotations")
 ```
@@ -469,7 +471,8 @@ if (has_singler & has_umap) {
 faceted_umap(
   umap_df,
   singler_n_celltypes,
-  singler_celltype_annotation_lumped
+  singler_celltype_annotation_lumped,
+  point_size = umap_facet_point_size
 ) +
   ggtitle("UMAP colored by SingleR annotations")
 ```
@@ -490,7 +493,8 @@ if (has_cellassign & has_umap) {
 faceted_umap(
   umap_df,
   cellassign_n_celltypes,
-  cellassign_celltype_annotation_lumped
+  cellassign_celltype_annotation_lumped,
+  point_size = umap_facet_point_size
 ) +
   ggtitle("UMAP colored by CellAssign annotations")
 ```

From 0e3b01cab23d9c030e3d85f11f74f17229984710 Mon Sep 17 00:00:00 2001
From: Ally Hawkins <ally.hawkins@ccdatalab.org>
Date: Mon, 29 Apr 2024 11:33:27 -0500
Subject: [PATCH 09/17] move umap point sizes to child reports and set default
 to 1

---
 templates/qc_report/celltypes_qc.rmd           | 18 ++++++++++++++----
 .../celltypes_supplemental_report.rmd          |  5 -----
 templates/qc_report/main_qc_report.rmd         |  5 -----
 templates/qc_report/umap_qc.rmd                |  8 ++++++++
 4 files changed, 22 insertions(+), 14 deletions(-)

diff --git a/templates/qc_report/celltypes_qc.rmd b/templates/qc_report/celltypes_qc.rmd
index aae41cd4..2b69cccf 100644
--- a/templates/qc_report/celltypes_qc.rmd
+++ b/templates/qc_report/celltypes_qc.rmd
@@ -1,5 +1,10 @@
 # Cell type Annotation Summary
 
+<!--
+This file is meant to be run as a child report within either `main_qc_report.rmd` or `celltypes_supplemental_report.rmd`. 
+-->
+
+
 ```{r}
 ## function definitions ##
 
@@ -86,14 +91,14 @@ lump_wrap_celltypes <- function(df, n_celltypes = 7, wrap = 35) {
 #' @param color_variable Column in data frame to color by, not a string.
 #' @param legend_title Title for legend.
 #' @param legend_nrow Number of rows in legend. Default is 2.
-#' @param point_size Point size
+#' @param point_size Point size. Default is 1
 #'
 #' @return UMAP plot as a ggplot2 object
 plot_umap <- function(
     umap_df,
     color_variable,
     legend_title,
-    point_size = point_size,
+    point_size = 1,
     legend_nrow = 2) {
   ggplot(umap_df) +
     aes(
@@ -131,14 +136,14 @@ plot_umap <- function(
 #' @param umap_df Data frame with UMAP1 and UMAP2 columns
 #' @param n_celltypes The number of cell types (facets) displayed in the plot
 #' @param annotation_column Column containing cell type annotations
-#' @param point_size Point size
+#' @param point_size Point size. Default is 1
 #'
 #' @return ggplot object containing a faceted UMAP where each cell type is a facet.
 #'   In each panel, the cell type of interest is colored and all other cells are grey.
 faceted_umap <- function(umap_df,
                          n_celltypes,
                          annotation_column,
-                         point_size = umap_facet_point_size) {
+                         point_size = 1) {
   # Determine legend y-coordinate based on n_celltypes
   if (n_celltypes %in% 7:8) {
     legend_y <- 0.33
@@ -283,6 +288,11 @@ glue::glue("
 ```{r, warning = FALSE}
 # Create data frame of cell types
 celltype_df <- create_celltype_df(processed_sce)
+
+# determine UMAP point sizing
+umap_points_sizes <- determine_umap_point_size(ncol(processed_sce))
+umap_point_size <- umap_points_sizes[1]
+umap_facet_point_size <- umap_points_sizes[2]
 ```
 
 
diff --git a/templates/qc_report/celltypes_supplemental_report.rmd b/templates/qc_report/celltypes_supplemental_report.rmd
index c5b5ea9d..f57e474c 100644
--- a/templates/qc_report/celltypes_supplemental_report.rmd
+++ b/templates/qc_report/celltypes_supplemental_report.rmd
@@ -300,11 +300,6 @@ plot_height <- 1
 #  sample_id should be defined with length > 1
 sample_id <- metadata(processed_sce)$sample_id
 has_multiplex <- length(sample_id) > 1
-
-# determine UMAP point sizing
-umap_points_sizes <- determine_umap_point_size(ncol(processed_sce))
-umap_point_size <- umap_points_sizes[1]
-umap_facet_point_size <- umap_points_sizes[2]
 ```
 
 <!-- If multiplexed, open with warning  --> 
diff --git a/templates/qc_report/main_qc_report.rmd b/templates/qc_report/main_qc_report.rmd
index aac47d8a..8738adcf 100644
--- a/templates/qc_report/main_qc_report.rmd
+++ b/templates/qc_report/main_qc_report.rmd
@@ -144,11 +144,6 @@ if ((has_singler | has_cellassign) & is.null(params$celltype_report)) {
 # check if we have multiplex
 has_multiplex <- length(sample_id) > 1
 sample_types <- metadata(unfiltered_sce)$sample_type
-
-# determine UMAP point sizing
-umap_points_sizes <- determine_umap_point_size(ncol(processed_sce))
-umap_point_size <- umap_points_sizes[1]
-umap_facet_point_size <- umap_points_sizes[2]
 ```
 
 
diff --git a/templates/qc_report/umap_qc.rmd b/templates/qc_report/umap_qc.rmd
index 1a8fac5d..dd63bfa4 100644
--- a/templates/qc_report/umap_qc.rmd
+++ b/templates/qc_report/umap_qc.rmd
@@ -2,6 +2,14 @@
 
 The below plot shows the UMAP (Uniform Manifold Approximation and Projection) embeddings for each cell, coloring each cell by the total number of genes detected per cell.
 
+```{r}
+# determine UMAP point sizing
+umap_points_sizes <- determine_umap_point_size(ncol(processed_sce))
+umap_point_size <- umap_points_sizes[1]
+umap_facet_point_size <- umap_points_sizes[2]
+```
+
+
 ```{r message=FALSE}
 # create UMAP colored by number of genes detected
 scater::plotUMAP(

From 6d1fc1d6e67d6db657deea17ac21b71d1ff88abe Mon Sep 17 00:00:00 2001
From: Ally Hawkins <54039191+allyhawkins@users.noreply.github.com>
Date: Wed, 1 May 2024 10:27:00 -0500
Subject: [PATCH 10/17] unlist adt list

---
 bin/merge_sces.R | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/bin/merge_sces.R b/bin/merge_sces.R
index a9d160e2..0a3f234b 100755
--- a/bin/merge_sces.R
+++ b/bin/merge_sces.R
@@ -247,7 +247,8 @@ adt_present_columns <- sce_list |>
 
 # ensure that there are indeed no "adt" altExps if adt_present_columns is empty
 adt_altexps <- sce_list |>
-  purrr::map(\(sce) "adt" %in% altExpNames(sce))
+  purrr::map(\(sce) "adt" %in% altExpNames(sce)) |>
+  unlist()
 if (is.null(adt_present_columns) && sum(adt_altexps) > 0) {
   stop("Error in determining which adt altExp columns should be retained.")
 }

From 41738f0615536a719d8525403e4936a6fcdd2a67 Mon Sep 17 00:00:00 2001
From: Ally Hawkins <54039191+allyhawkins@users.noreply.github.com>
Date: Wed, 1 May 2024 10:27:21 -0500
Subject: [PATCH 11/17] make sure that processed file isn't empty

---
 merge.nf | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/merge.nf b/merge.nf
index 0a4c42da..f416fa07 100644
--- a/merge.nf
+++ b/merge.nf
@@ -185,8 +185,8 @@ workflow {
         it.library_id,
         file("${params.results_dir}/${it.project_id}/${it.sample_id}/${it.library_id}_processed.rds")
       ]}
-      // only include libraries that have been processed through scpca-nf
-      .filter{file(it[2]).exists()}
+      // only include libraries that have been processed through scpca-nf and aren't empty
+      .filter{file(it[2]).exists() && file(it[2]).size() > 0}
       // only one row per library ID, this removes all the duplicates that may be present due to CITE/hashing
       .unique()
       // group tuple by project id: [project_id, [library_id1, library_id2, ...], [sce_file1, sce_file2, ...]]

From 854bb936ba8ee31f6e39ab0075d74e0dc3a55aa2 Mon Sep 17 00:00:00 2001
From: Ally Hawkins <54039191+allyhawkins@users.noreply.github.com>
Date: Wed, 1 May 2024 13:00:10 -0500
Subject: [PATCH 12/17] add warning for libraries not included in merging

---
 merge.nf | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/merge.nf b/merge.nf
index f416fa07..d5a94e12 100644
--- a/merge.nf
+++ b/merge.nf
@@ -178,6 +178,15 @@ workflow {
         log.warn("Not merging ${it.project_id} because it contains multiplexed libraries.")
       }
 
+    // print out warning message for any libraries not included in merging
+    merge_libaries = filtered_libraries_ch
+      .collect{it.library_id}
+    libraries_ch
+    .filter{!(it.library_id in merge_libaries.getVal())}
+    .subscribe{
+      log.warn("Processed files do not exist for ${it.library_id}. This library will not be included in the merged object.")
+    }
+
     grouped_libraries_ch = filtered_libraries_ch.single_sample
       // create tuple of [project id, library_id, processed_sce_file]
       .map{[

From fad0236e43ee9dfb6a6bd14f649d5ca220af4ffc Mon Sep 17 00:00:00 2001
From: Ally Hawkins <54039191+allyhawkins@users.noreply.github.com>
Date: Wed, 1 May 2024 13:04:37 -0500
Subject: [PATCH 13/17] specify branch

---
 merge.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/merge.nf b/merge.nf
index d5a94e12..3b22eac3 100644
--- a/merge.nf
+++ b/merge.nf
@@ -179,7 +179,7 @@ workflow {
       }
 
     // print out warning message for any libraries not included in merging
-    merge_libaries = filtered_libraries_ch
+    merge_libaries = filtered_libraries_ch.single_sample
       .collect{it.library_id}
     libraries_ch
     .filter{!(it.library_id in merge_libaries.getVal())}

From 57630daecd1cbcb8fd628857a6b330c8ffa32868 Mon Sep 17 00:00:00 2001
From: Ally Hawkins <54039191+allyhawkins@users.noreply.github.com>
Date: Wed, 1 May 2024 16:40:16 -0500
Subject: [PATCH 14/17] remove multiplexed

---
 merge.nf | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/merge.nf b/merge.nf
index 3b22eac3..27923731 100644
--- a/merge.nf
+++ b/merge.nf
@@ -179,12 +179,14 @@ workflow {
       }
 
     // print out warning message for any libraries not included in merging
-    merge_libaries = filtered_libraries_ch.single_sample
-      .collect{it.library_id}
-    libraries_ch
-    .filter{!(it.library_id in merge_libaries.getVal())}
+    filtered_libraries_ch.single_sample
+      .map{[
+        it.library_id,
+        file("${params.results_dir}/${it.project_id}/${it.sample_id}/${it.library_id}_processed.rds")
+      ]}
+    .filter{!(it[1].exists() && it[1].size() > 0)}
     .subscribe{
-      log.warn("Processed files do not exist for ${it.library_id}. This library will not be included in the merged object.")
+      log.warn("Processed files do not exist for ${it[0]}. This library will not be included in the merged object.")
     }
 
     grouped_libraries_ch = filtered_libraries_ch.single_sample

From bb515c6e92425eb583b65a1dcc7c951124bcf964 Mon Sep 17 00:00:00 2001
From: Ally Hawkins <54039191+allyhawkins@users.noreply.github.com>
Date: Wed, 1 May 2024 16:41:01 -0500
Subject: [PATCH 15/17] use map_lgl

---
 bin/merge_sces.R | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/bin/merge_sces.R b/bin/merge_sces.R
index 0a3f234b..8d0a4b8f 100755
--- a/bin/merge_sces.R
+++ b/bin/merge_sces.R
@@ -247,8 +247,7 @@ adt_present_columns <- sce_list |>
 
 # ensure that there are indeed no "adt" altExps if adt_present_columns is empty
 adt_altexps <- sce_list |>
-  purrr::map(\(sce) "adt" %in% altExpNames(sce)) |>
-  unlist()
+  purrr::map_lgl(\(sce) "adt" %in% altExpNames(sce))
 if (is.null(adt_present_columns) && sum(adt_altexps) > 0) {
   stop("Error in determining which adt altExp columns should be retained.")
 }

From 6629141ce29d17ff0cd117019052ac40f53cf70a Mon Sep 17 00:00:00 2001
From: Ally Hawkins <54039191+allyhawkins@users.noreply.github.com>
Date: Thu, 2 May 2024 08:54:36 -0500
Subject: [PATCH 16/17] drop file

Co-authored-by: Joshua Shapiro <josh.shapiro@ccdatalab.org>
---
 merge.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/merge.nf b/merge.nf
index 27923731..557c52a6 100644
--- a/merge.nf
+++ b/merge.nf
@@ -197,7 +197,7 @@ workflow {
         file("${params.results_dir}/${it.project_id}/${it.sample_id}/${it.library_id}_processed.rds")
       ]}
       // only include libraries that have been processed through scpca-nf and aren't empty
-      .filter{file(it[2]).exists() && file(it[2]).size() > 0}
+      .filter{it[2].exists() && it[2].size() > 0}
       // only one row per library ID, this removes all the duplicates that may be present due to CITE/hashing
       .unique()
       // group tuple by project id: [project_id, [library_id1, library_id2, ...], [sce_file1, sce_file2, ...]]

From 17e36639a090bce07b4fb7c7b2cef00a74dfedd1 Mon Sep 17 00:00:00 2001
From: Joshua Shapiro <josh.shapiro@ccdatalab.org>
Date: Thu, 9 May 2024 16:31:09 -0400
Subject: [PATCH 17/17] delete logcounts layer

---
 bin/move_counts_anndata.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/bin/move_counts_anndata.py b/bin/move_counts_anndata.py
index 480ee9a9..75e0412d 100755
--- a/bin/move_counts_anndata.py
+++ b/bin/move_counts_anndata.py
@@ -49,6 +49,7 @@
     # move logcounts to X and rename
     object.X = object.layers["logcounts"]
     object.uns["X_name"] = "logcounts"
+    del object.layers["logcounts"]
 
     # export object
     object.write_h5ad(args.anndata_file, compression="gzip" if args.compress else None)