Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Skip processing if no cells remain after empty droplet filtering #738

Merged
merged 9 commits into from
Apr 15, 2024
23 changes: 7 additions & 16 deletions bin/filter_sce.R
Original file line number Diff line number Diff line change
Expand Up @@ -95,14 +95,19 @@ filtered_sce <- scpcaTools::filter_counts(unfiltered_sce)
# remove unfiltered for memory saving
rm(unfiltered_sce)

# if no cells left, write out empty file and quit, otherwise continue processing
if (ncol(filtered_sce) == 0) {
# make an empty filtered file
file.create(opt$filtered_file)
quit(save = "no")
}

# need to remove old gene-level rowData statistics first
drop_cols <- colnames(rowData(filtered_sce)) %in% c("mean", "detected")
rowData(filtered_sce) <- rowData(filtered_sce)[!drop_cols]

# recalculate rowData and add to filtered sce
filtered_sce <- filtered_sce |>
scuttle::addPerFeatureQCMetrics()

# add prob_compromised to colData and miQC model to metadata
# since this can fail, we will check for success
miQC_worked <- FALSE
Expand All @@ -115,28 +120,22 @@ try({
metadata(filtered_sce)$prob_compromised_cutoff <- opt$prob_compromised_cutoff
miQC_worked <- TRUE
})

# set prob_compromised to NA if miQC failed
if (!miQC_worked) {
warning("miQC failed. Setting `prob_compromised` to NA.")
filtered_sce$prob_compromised <- NA_real_
filtered_sce$miQC_pass <- NA
metadata(filtered_sce)$prob_compromised_cutoff <- NA
}

# grab names of altExp, if any
alt_names <- altExpNames(filtered_sce)

for (alt in alt_names) {
# remove old row data from unfiltered
drop_cols <- colnames(rowData(altExp(filtered_sce, alt))) %in% c("mean", "detected")
rowData(altExp(filtered_sce, alt)) <- rowData(altExp(filtered_sce, alt))[!drop_cols]

# add alt experiment features stats for filtered data
altExp(filtered_sce, alt) <- scuttle::addPerFeatureQCMetrics(altExp(filtered_sce, alt))
}


# calculate filtering QC from ADTs, if present
if (!is.null(ambient_profile)) {
# Create data frame of ADTs and their target types
Expand All @@ -146,7 +145,6 @@ if (!is.null(ambient_profile)) {
# if only 2 columns exist, only the first two col_names will be used
col_names = c("name", "barcode", "target_type")
)

# Assign default of `target` if no third column provided
if (!"target_type" %in% names(adt_barcode_df)) {
adt_barcode_df$target_type <- "target"
Expand All @@ -164,17 +162,14 @@ if (!is.null(ambient_profile)) {
)
)
}

# Check that barcode file ADTs match SCE ADTs
if (!all.equal(sort(adt_barcode_df$name), sort(rownames(altExp(filtered_sce, adt_exp))))) {
stop("Mismatch between provided ADT barcode file and ADTs in SCE.")
}

# Find any negative controls
neg_controls <- adt_barcode_df |>
dplyr::filter(target_type == "neg_control") |>
dplyr::pull(name)

# Calculate QC stats, providing negative controls if present
# note: function fails if controls is length 0 or null, so keep the `if`
if (length(neg_controls) == 0) {
Expand All @@ -189,17 +184,13 @@ if (!is.null(ambient_profile)) {
controls = neg_controls
)
}

# Save QC stats to the altexp
colData(altExp(filtered_sce, adt_exp)) <- cbind(
colData(altExp(filtered_sce, adt_exp)),
adt_qc_df
)

# Add `target_type` to rowData
rowData(altExp(filtered_sce, adt_exp))$target_type <- adt_barcode_df$target_type
}


# write filtered sce to output
readr::write_rds(filtered_sce, opt$filtered_file, compress = "bz2")
2 changes: 1 addition & 1 deletion bin/generate_unfiltered_sce.R
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ if (opt$feature_dir != "") {


# read in sample metadata
sample_metadata_df <- readr::read_tsv(opt$sample_metadata_file) |>
sample_metadata_df <- readr::read_tsv(opt$sample_metadata_file, col_types = "c") |>
# rename sample id column
dplyr::rename("sample_id" = "scpca_sample_id") |>
# add library ID as column in sample metadata
Expand Down
24 changes: 14 additions & 10 deletions bin/sce_qc_report.R
Original file line number Diff line number Diff line change
Expand Up @@ -129,9 +129,7 @@ if (!is.null(opt$report_template) && !file.exists(opt$report_template)) {
if (is.null(opt$unfiltered_sce) || !file.exists(opt$unfiltered_sce)) {
stop("Unfiltered .rds file missing or `unfiltered_sce` not specified.")
}
if (is.null(opt$filtered_sce) || !file.exists(opt$filtered_sce)) {
stop("Filtered .rds file missing or `filtered_sce` not specified.")
}

demux_methods <- c("vireo", "HTODemux", "HashedDrops")
if (!opt$demux_method %in% demux_methods) {
stop("Unknown `demux_method` value. Must be one of `vireo`, `HTOdemux`, or `HashedDrops`")
Expand All @@ -147,22 +145,28 @@ if (opt$workflow_commit == "null") {
opt$workflow_commit <- NA
}

# read sce files
# read sce files and compile metadata for output files
unfiltered_sce <- readr::read_rds(opt$unfiltered_sce)
filtered_sce <- readr::read_rds(opt$filtered_sce)
sce_meta <- metadata(unfiltered_sce)

# make sure filtered sce has an object, otherwise set to NULL
if (file.size(opt$filtered_sce) > 0) {
filtered_sce <- readr::read_rds(opt$filtered_sce)
filtered_sce_meta <- metadata(filtered_sce)
} else {
filtered_sce <- NULL
filtered_sce_meta <- NULL
}

# make sure processed sce has an object, otherwise set to NULL
if (file.size(opt$processed_sce) > 0) {
processed_sce <- readr::read_rds(opt$processed_sce)
processed_sce_meta <- metadata(processed_sce)
} else {
processed_sce <- NULL
processed_sce_meta <- NULL
}

# Compile metadata for output files
sce_meta <- metadata(unfiltered_sce)
filtered_sce_meta <- metadata(filtered_sce)
processed_sce_meta <- metadata(processed_sce)

# Parse sample ids
sample_ids <- unlist(stringr::str_split(opt$sample_id, ",|;")) |> sort()

Expand Down
45 changes: 40 additions & 5 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,17 @@ workflow {
.filter{it[0]["library_id"] in rna_only_libs.getVal()}
// make rds for rna only
rna_sce_ch = generate_sce(rna_quant_ch, sample_metafile)
// only continue processing any samples with > 0 cells left after filtering
.branch{
continue_processing: it[2].size() > 0 || it[2].name.startsWith("STUBL")
skip_processing: true
}

// send library ids in rna_sce_ch.skip_processing to log
rna_sce_ch.skip_processing
.subscribe{
log.error("There are no cells found in the filtered object for ${it[0].library_id}.")
}

// **** Process feature data ****
map_quant_feature(runs_ch.feature)
Expand All @@ -223,17 +233,31 @@ workflow {
.map{it.drop(1)} // remove library_id index

// make rds for merged RNA and feature quants
feature_sce_ch = generate_merged_sce(feature_rna_quant_ch, sample_metafile)
all_feature_ch = generate_merged_sce(feature_rna_quant_ch, sample_metafile)
.branch{
continue_processing: it[2].size() > 0 || it[2].name.startsWith("STUB")
skip_processing: true
}

// send library ids in all_feature_ch.skip_processing to log
all_feature_ch.skip_processing
.subscribe{
log.error("There are no cells found in the filtered object for ${it[0].library_id}.")
}

// pull out cell hash libraries for demuxing
feature_sce_ch = all_feature_ch.continue_processing
.branch{ // branch cellhash libs
cellhash: it[0]["feature_meta"]["technology"] in cellhash_techs
single: true
}

// apply cellhash demultiplexing
cellhash_demux_ch = cellhash_demux_sce(feature_sce_ch.cellhash, file(params.cellhash_pool_file))
merged_sce_ch = cellhash_demux_ch.mix(feature_sce_ch.single)

// join SCE outputs and branch by genetic multiplexing
sce_ch = rna_sce_ch.mix(merged_sce_ch)
sce_ch = rna_sce_ch.continue_processing.mix(merged_sce_ch)
.branch{
genetic_multiplex: it[0]["library_id"] in genetic_multiplex_libs.getVal()
no_genetic: true
Expand Down Expand Up @@ -265,7 +289,7 @@ workflow {
post_process_ch = post_process_sce.out
// only continue processing any samples with > 0 cells left after processing
.branch{
continue_processing: it[3].size() > 0
continue_processing: it[3].size() > 0 || it[3].name.startsWith("STUB")
skip_processing: true
}

Expand All @@ -285,12 +309,23 @@ workflow {
annotated_celltype_ch = cluster_sce.out
}

// combine back with libraries that skipped post processing
// first mix any skipped libraries from both rna and feature libs
no_filtered_ch = rna_sce_ch.skip_processing.mix(all_feature_ch.skip_processing)
// add a fake processed file
.map{meta, unfiltered, filtered -> tuple(
meta,
unfiltered,
filtered,
"${projectDir}/assets/NO_FILE"
)}

// combine back with libraries that skipped filtering and post processing
sce_output_ch = annotated_celltype_ch.mix(post_process_ch.skip_processing)
.mix(no_filtered_ch)

// generate QC reports
sce_qc_report(
annotated_celltype_ch,
sce_output_ch,
report_template_tuple
)

Expand Down
Loading