AlexsLemonade · allyhawkins · Apr 15, 2024 · Apr 12, 2024 · Apr 12, 2024 · Apr 12, 2024
diff --git a/bin/filter_sce.R b/bin/filter_sce.R
@@ -95,14 +95,19 @@ filtered_sce <- scpcaTools::filter_counts(unfiltered_sce)
 # remove unfiltered for memory saving
 rm(unfiltered_sce)
 
+# if no cells left, write out empty file and quit, otherwise continue processing
+if (ncol(filtered_sce) == 0) {
+  # make an empty filtered file
+  file.create(opt$filtered_file)
+  quit(save = "no")
+}
+
 # need to remove old gene-level rowData statistics first
 drop_cols <- colnames(rowData(filtered_sce)) %in% c("mean", "detected")
 rowData(filtered_sce) <- rowData(filtered_sce)[!drop_cols]
-
 # recalculate rowData and add to filtered sce
 filtered_sce <- filtered_sce |>
   scuttle::addPerFeatureQCMetrics()
-
 # add prob_compromised to colData and miQC model to metadata
 # since this can fail, we will check for success
 miQC_worked <- FALSE
@@ -115,28 +120,22 @@ try({
   metadata(filtered_sce)$prob_compromised_cutoff <- opt$prob_compromised_cutoff
   miQC_worked <- TRUE
 })
-
 # set prob_compromised to NA if miQC failed
 if (!miQC_worked) {
   warning("miQC failed. Setting `prob_compromised` to NA.")
   filtered_sce$prob_compromised <- NA_real_
   filtered_sce$miQC_pass <- NA
   metadata(filtered_sce)$prob_compromised_cutoff <- NA
 }
-
 # grab names of altExp, if any
 alt_names <- altExpNames(filtered_sce)
-
 for (alt in alt_names) {
   # remove old row data from unfiltered
   drop_cols <- colnames(rowData(altExp(filtered_sce, alt))) %in% c("mean", "detected")
   rowData(altExp(filtered_sce, alt)) <- rowData(altExp(filtered_sce, alt))[!drop_cols]
-
   # add alt experiment features stats for filtered data
   altExp(filtered_sce, alt) <- scuttle::addPerFeatureQCMetrics(altExp(filtered_sce, alt))
 }
-
-
 # calculate filtering QC from ADTs, if present
 if (!is.null(ambient_profile)) {
   # Create data frame of ADTs and their target types
@@ -146,7 +145,6 @@ if (!is.null(ambient_profile)) {
     # if only 2 columns exist, only the first two col_names will be used
     col_names = c("name", "barcode", "target_type")
   )
-
   # Assign default of `target` if no third column provided
   if (!"target_type" %in% names(adt_barcode_df)) {
     adt_barcode_df$target_type <- "target"
@@ -164,17 +162,14 @@ if (!is.null(ambient_profile)) {
       )
     )
   }
-
   # Check that barcode file ADTs match SCE ADTs
   if (!all.equal(sort(adt_barcode_df$name), sort(rownames(altExp(filtered_sce, adt_exp))))) {
     stop("Mismatch between provided ADT barcode file and ADTs in SCE.")
   }
-
   # Find any negative controls
   neg_controls <- adt_barcode_df |>
     dplyr::filter(target_type == "neg_control") |>
     dplyr::pull(name)
-
   # Calculate QC stats, providing negative controls if present
   # note: function fails if controls is length 0 or null, so keep the `if`
   if (length(neg_controls) == 0) {
@@ -189,17 +184,13 @@ if (!is.null(ambient_profile)) {
       controls = neg_controls
     )
   }
-
   # Save QC stats to the altexp
   colData(altExp(filtered_sce, adt_exp)) <- cbind(
     colData(altExp(filtered_sce, adt_exp)),
     adt_qc_df
   )
-
   # Add `target_type` to rowData
   rowData(altExp(filtered_sce, adt_exp))$target_type <- adt_barcode_df$target_type
 }
-
-
 # write filtered sce to output
 readr::write_rds(filtered_sce, opt$filtered_file, compress = "bz2")
diff --git a/bin/generate_unfiltered_sce.R b/bin/generate_unfiltered_sce.R
@@ -161,7 +161,7 @@ if (opt$feature_dir != "") {
 
 
 # read in sample metadata
-sample_metadata_df <- readr::read_tsv(opt$sample_metadata_file) |>
+sample_metadata_df <- readr::read_tsv(opt$sample_metadata_file, col_types = "c") |>
   # rename sample id column
   dplyr::rename("sample_id" = "scpca_sample_id") |>
   # add library ID as column in sample metadata

diff --git a/bin/sce_qc_report.R b/bin/sce_qc_report.R
@@ -129,9 +129,7 @@ if (!is.null(opt$report_template) && !file.exists(opt$report_template)) {
 if (is.null(opt$unfiltered_sce) || !file.exists(opt$unfiltered_sce)) {
   stop("Unfiltered .rds file missing or `unfiltered_sce` not specified.")
 }
-if (is.null(opt$filtered_sce) || !file.exists(opt$filtered_sce)) {
-  stop("Filtered .rds file missing or `filtered_sce` not specified.")
-}
+
 demux_methods <- c("vireo", "HTODemux", "HashedDrops")
 if (!opt$demux_method %in% demux_methods) {
   stop("Unknown `demux_method` value. Must be one of `vireo`, `HTOdemux`, or `HashedDrops`")
@@ -147,22 +145,28 @@ if (opt$workflow_commit == "null") {
   opt$workflow_commit <- NA
 }
 
-# read sce files
+# read sce files and compile metadata for output files
 unfiltered_sce <- readr::read_rds(opt$unfiltered_sce)
-filtered_sce <- readr::read_rds(opt$filtered_sce)
+sce_meta <- metadata(unfiltered_sce)
+
+# make sure filtered sce has an object, otherwise set to NULL
+if (file.size(opt$filtered_sce) > 0) {
+  filtered_sce <- readr::read_rds(opt$filtered_sce)
+  filtered_sce_meta <- metadata(filtered_sce)
+} else {
+  filtered_sce <- NULL
+  filtered_sce_meta <- NULL
+}
 
 # make sure processed sce has an object, otherwise set to NULL
 if (file.size(opt$processed_sce) > 0) {
   processed_sce <- readr::read_rds(opt$processed_sce)
+  processed_sce_meta <- metadata(processed_sce)
 } else {
   processed_sce <- NULL
+  processed_sce_meta <- NULL
 }
 
-# Compile metadata for output files
-sce_meta <- metadata(unfiltered_sce)
-filtered_sce_meta <- metadata(filtered_sce)
-processed_sce_meta <- metadata(processed_sce)
-
 # Parse sample ids
 sample_ids <- unlist(stringr::str_split(opt$sample_id, ",|;")) |> sort()
 

diff --git a/main.nf b/main.nf
@@ -209,7 +209,17 @@ workflow {
     .filter{it[0]["library_id"] in rna_only_libs.getVal()}
   // make rds for rna only
   rna_sce_ch = generate_sce(rna_quant_ch, sample_metafile)
+    // only continue processing any samples with > 0 cells left after filtering
+    .branch{
+      continue_processing: it[2].size() > 0 || it[2].name.startsWith("STUBL")
+      skip_processing: true
+      }
 
+  // send library ids in rna_sce_ch.skip_processing to log
+  rna_sce_ch.skip_processing
+    .subscribe{
+      log.error("There are no cells found in the filtered object for ${it[0].library_id}.")
+    }
 
   // **** Process feature data ****
   map_quant_feature(runs_ch.feature)
@@ -223,17 +233,31 @@ workflow {
     .map{it.drop(1)} // remove library_id index
 
   // make rds for merged RNA and feature quants
-  feature_sce_ch = generate_merged_sce(feature_rna_quant_ch, sample_metafile)
+  all_feature_ch = generate_merged_sce(feature_rna_quant_ch, sample_metafile)
+    .branch{
+      continue_processing: it[2].size() > 0 || it[2].name.startsWith("STUB")
+      skip_processing: true
+    }
+
+  // send library ids in all_feature_ch.skip_processing to log
+  all_feature_ch.skip_processing
+    .subscribe{
+      log.error("There are no cells found in the filtered object for ${it[0].library_id}.")
+    }
+
+  // pull out cell hash libraries for demuxing
+  feature_sce_ch = all_feature_ch.continue_processing
     .branch{ // branch cellhash libs
       cellhash: it[0]["feature_meta"]["technology"] in cellhash_techs
       single: true
     }
+
   // apply cellhash demultiplexing
   cellhash_demux_ch = cellhash_demux_sce(feature_sce_ch.cellhash, file(params.cellhash_pool_file))
   merged_sce_ch = cellhash_demux_ch.mix(feature_sce_ch.single)
 
   // join SCE outputs and branch by genetic multiplexing
-  sce_ch = rna_sce_ch.mix(merged_sce_ch)
+  sce_ch = rna_sce_ch.continue_processing.mix(merged_sce_ch)
     .branch{
       genetic_multiplex: it[0]["library_id"] in genetic_multiplex_libs.getVal()
       no_genetic: true
@@ -265,7 +289,7 @@ workflow {
   post_process_ch = post_process_sce.out
     // only continue processing any samples with > 0 cells left after processing
     .branch{
-      continue_processing: it[3].size() > 0
+      continue_processing: it[3].size() > 0 || it[3].name.startsWith("STUB")
       skip_processing: true
       }
 
@@ -285,12 +309,23 @@ workflow {
     annotated_celltype_ch = cluster_sce.out
   }
 
-  // combine back with libraries that skipped post processing
+  // first mix any skipped libraries from both rna and feature libs
+  no_filtered_ch = rna_sce_ch.skip_processing.mix(all_feature_ch.skip_processing)
+    // add a fake processed file
+    .map{meta, unfiltered, filtered -> tuple(
+      meta,
+      unfiltered,
+      filtered,
+      "${projectDir}/assets/NO_FILE"
+    )}
+
+  // combine back with libraries that skipped filtering and post processing
   sce_output_ch = annotated_celltype_ch.mix(post_process_ch.skip_processing)
+    .mix(no_filtered_ch)
 
   // generate QC reports
   sce_qc_report(
-    annotated_celltype_ch,
+    sce_output_ch,
     report_template_tuple
   )