diff --git a/bin/filter_sce.R b/bin/filter_sce.R index e507186e..72da4eac 100755 --- a/bin/filter_sce.R +++ b/bin/filter_sce.R @@ -95,14 +95,19 @@ filtered_sce <- scpcaTools::filter_counts(unfiltered_sce) # remove unfiltered for memory saving rm(unfiltered_sce) +# if no cells left, write out empty file and quit, otherwise continue processing +if (ncol(filtered_sce) == 0) { + # make an empty filtered file + file.create(opt$filtered_file) + quit(save = "no") +} + # need to remove old gene-level rowData statistics first drop_cols <- colnames(rowData(filtered_sce)) %in% c("mean", "detected") rowData(filtered_sce) <- rowData(filtered_sce)[!drop_cols] - # recalculate rowData and add to filtered sce filtered_sce <- filtered_sce |> scuttle::addPerFeatureQCMetrics() - # add prob_compromised to colData and miQC model to metadata # since this can fail, we will check for success miQC_worked <- FALSE @@ -115,7 +120,6 @@ try({ metadata(filtered_sce)$prob_compromised_cutoff <- opt$prob_compromised_cutoff miQC_worked <- TRUE }) - # set prob_compromised to NA if miQC failed if (!miQC_worked) { warning("miQC failed. Setting `prob_compromised` to NA.") @@ -123,20 +127,15 @@ if (!miQC_worked) { filtered_sce$miQC_pass <- NA metadata(filtered_sce)$prob_compromised_cutoff <- NA } - # grab names of altExp, if any alt_names <- altExpNames(filtered_sce) - for (alt in alt_names) { # remove old row data from unfiltered drop_cols <- colnames(rowData(altExp(filtered_sce, alt))) %in% c("mean", "detected") rowData(altExp(filtered_sce, alt)) <- rowData(altExp(filtered_sce, alt))[!drop_cols] - # add alt experiment features stats for filtered data altExp(filtered_sce, alt) <- scuttle::addPerFeatureQCMetrics(altExp(filtered_sce, alt)) } - - # calculate filtering QC from ADTs, if present if (!is.null(ambient_profile)) { # Create data frame of ADTs and their target types @@ -146,7 +145,6 @@ if (!is.null(ambient_profile)) { # if only 2 columns exist, only the first two col_names will be used col_names = c("name", "barcode", "target_type") ) - # Assign default of `target` if no third column provided if (!"target_type" %in% names(adt_barcode_df)) { adt_barcode_df$target_type <- "target" @@ -164,17 +162,14 @@ if (!is.null(ambient_profile)) { ) ) } - # Check that barcode file ADTs match SCE ADTs if (!all.equal(sort(adt_barcode_df$name), sort(rownames(altExp(filtered_sce, adt_exp))))) { stop("Mismatch between provided ADT barcode file and ADTs in SCE.") } - # Find any negative controls neg_controls <- adt_barcode_df |> dplyr::filter(target_type == "neg_control") |> dplyr::pull(name) - # Calculate QC stats, providing negative controls if present # note: function fails if controls is length 0 or null, so keep the `if` if (length(neg_controls) == 0) { @@ -189,17 +184,13 @@ if (!is.null(ambient_profile)) { controls = neg_controls ) } - # Save QC stats to the altexp colData(altExp(filtered_sce, adt_exp)) <- cbind( colData(altExp(filtered_sce, adt_exp)), adt_qc_df ) - # Add `target_type` to rowData rowData(altExp(filtered_sce, adt_exp))$target_type <- adt_barcode_df$target_type } - - # write filtered sce to output readr::write_rds(filtered_sce, opt$filtered_file, compress = "bz2") diff --git a/bin/generate_unfiltered_sce.R b/bin/generate_unfiltered_sce.R index 1e01a902..5aaccf11 100755 --- a/bin/generate_unfiltered_sce.R +++ b/bin/generate_unfiltered_sce.R @@ -161,7 +161,7 @@ if (opt$feature_dir != "") { # read in sample metadata -sample_metadata_df <- readr::read_tsv(opt$sample_metadata_file) |> +sample_metadata_df <- readr::read_tsv(opt$sample_metadata_file, col_types = "c") |> # rename sample id column dplyr::rename("sample_id" = "scpca_sample_id") |> # add library ID as column in sample metadata diff --git a/bin/sce_qc_report.R b/bin/sce_qc_report.R index f544c98f..681f2cc3 100755 --- a/bin/sce_qc_report.R +++ b/bin/sce_qc_report.R @@ -129,9 +129,7 @@ if (!is.null(opt$report_template) && !file.exists(opt$report_template)) { if (is.null(opt$unfiltered_sce) || !file.exists(opt$unfiltered_sce)) { stop("Unfiltered .rds file missing or `unfiltered_sce` not specified.") } -if (is.null(opt$filtered_sce) || !file.exists(opt$filtered_sce)) { - stop("Filtered .rds file missing or `filtered_sce` not specified.") -} + demux_methods <- c("vireo", "HTODemux", "HashedDrops") if (!opt$demux_method %in% demux_methods) { stop("Unknown `demux_method` value. Must be one of `vireo`, `HTOdemux`, or `HashedDrops`") @@ -147,22 +145,28 @@ if (opt$workflow_commit == "null") { opt$workflow_commit <- NA } -# read sce files +# read sce files and compile metadata for output files unfiltered_sce <- readr::read_rds(opt$unfiltered_sce) -filtered_sce <- readr::read_rds(opt$filtered_sce) +sce_meta <- metadata(unfiltered_sce) + +# make sure filtered sce has an object, otherwise set to NULL +if (file.size(opt$filtered_sce) > 0) { + filtered_sce <- readr::read_rds(opt$filtered_sce) + filtered_sce_meta <- metadata(filtered_sce) +} else { + filtered_sce <- NULL + filtered_sce_meta <- NULL +} # make sure processed sce has an object, otherwise set to NULL if (file.size(opt$processed_sce) > 0) { processed_sce <- readr::read_rds(opt$processed_sce) + processed_sce_meta <- metadata(processed_sce) } else { processed_sce <- NULL + processed_sce_meta <- NULL } -# Compile metadata for output files -sce_meta <- metadata(unfiltered_sce) -filtered_sce_meta <- metadata(filtered_sce) -processed_sce_meta <- metadata(processed_sce) - # Parse sample ids sample_ids <- unlist(stringr::str_split(opt$sample_id, ",|;")) |> sort() diff --git a/main.nf b/main.nf index 7f02b384..a53f5148 100644 --- a/main.nf +++ b/main.nf @@ -209,7 +209,17 @@ workflow { .filter{it[0]["library_id"] in rna_only_libs.getVal()} // make rds for rna only rna_sce_ch = generate_sce(rna_quant_ch, sample_metafile) + // only continue processing any samples with > 0 cells left after filtering + .branch{ + continue_processing: it[2].size() > 0 || it[2].name.startsWith("STUBL") + skip_processing: true + } + // send library ids in rna_sce_ch.skip_processing to log + rna_sce_ch.skip_processing + .subscribe{ + log.error("There are no cells found in the filtered object for ${it[0].library_id}.") + } // **** Process feature data **** map_quant_feature(runs_ch.feature) @@ -223,17 +233,31 @@ workflow { .map{it.drop(1)} // remove library_id index // make rds for merged RNA and feature quants - feature_sce_ch = generate_merged_sce(feature_rna_quant_ch, sample_metafile) + all_feature_ch = generate_merged_sce(feature_rna_quant_ch, sample_metafile) + .branch{ + continue_processing: it[2].size() > 0 || it[2].name.startsWith("STUB") + skip_processing: true + } + + // send library ids in all_feature_ch.skip_processing to log + all_feature_ch.skip_processing + .subscribe{ + log.error("There are no cells found in the filtered object for ${it[0].library_id}.") + } + + // pull out cell hash libraries for demuxing + feature_sce_ch = all_feature_ch.continue_processing .branch{ // branch cellhash libs cellhash: it[0]["feature_meta"]["technology"] in cellhash_techs single: true } + // apply cellhash demultiplexing cellhash_demux_ch = cellhash_demux_sce(feature_sce_ch.cellhash, file(params.cellhash_pool_file)) merged_sce_ch = cellhash_demux_ch.mix(feature_sce_ch.single) // join SCE outputs and branch by genetic multiplexing - sce_ch = rna_sce_ch.mix(merged_sce_ch) + sce_ch = rna_sce_ch.continue_processing.mix(merged_sce_ch) .branch{ genetic_multiplex: it[0]["library_id"] in genetic_multiplex_libs.getVal() no_genetic: true @@ -265,7 +289,7 @@ workflow { post_process_ch = post_process_sce.out // only continue processing any samples with > 0 cells left after processing .branch{ - continue_processing: it[3].size() > 0 + continue_processing: it[3].size() > 0 || it[3].name.startsWith("STUB") skip_processing: true } @@ -285,12 +309,23 @@ workflow { annotated_celltype_ch = cluster_sce.out } - // combine back with libraries that skipped post processing + // first mix any skipped libraries from both rna and feature libs + no_filtered_ch = rna_sce_ch.skip_processing.mix(all_feature_ch.skip_processing) + // add a fake processed file + .map{meta, unfiltered, filtered -> tuple( + meta, + unfiltered, + filtered, + "${projectDir}/assets/NO_FILE" + )} + + // combine back with libraries that skipped filtering and post processing sce_output_ch = annotated_celltype_ch.mix(post_process_ch.skip_processing) + .mix(no_filtered_ch) // generate QC reports sce_qc_report( - annotated_celltype_ch, + sce_output_ch, report_template_tuple )