Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix ADT filtering #379

Merged
merged 10 commits into from
Jul 18, 2023
90 changes: 56 additions & 34 deletions bin/post_process_sce.R
Original file line number Diff line number Diff line change
Expand Up @@ -96,18 +96,37 @@ metadata(sce)$min_gene_cutoff <- opt$gene_cutoff
# create adt_scpca_filter column, if CITEseq ----------
alt_exp <- opt$adt_name
if (alt_exp %in% altExpNames(sce)) {
sce$adt_scpca_filter <- ifelse(
altExp(sce, alt_exp)$discard,
"Remove",
"Keep"
)

# assign `adt_scpca_filter_method` metadata based on colData contents
if ("sum.controls" %in% names(colData(altExp(sce, alt_exp)))) {
metadata(sce)$adt_scpca_filter_method <- "cleanTagCounts with isotype controls"
} else if ("ambient.scale" %in% names(colData(altExp(sce, alt_exp)))) {
metadata(sce)$adt_scpca_filter_method <- "cleanTagCounts without isotype controls"
# set up filter with all Keep, and then override to Remove where necessary
sce$adt_scpca_filter <- "Keep"
sce$adt_scpca_filter[which(altExp(sce, alt_exp)$discard)] <- "Remove"

# Warnings for different types of failure:
fail_all_removed <- sum(sce$adt_scpca_filter == "Remove") == length(sce$adt_scpca_filter)
fail_all_na <- sum(is.na(altExp(sce, alt_exp)$discard)) == length(sce$adt_scpca_filter)
sjspielman marked this conversation as resolved.
Show resolved Hide resolved

# handle failures - warnings and assign method as "No filter"
if (fail_all_removed | fail_all_na) {
metadata(sce)$adt_scpca_filter_method <- "No filter"
if (fail_all_removed) {
sce$adt_scpca_filter <- "Keep"
warning("Filtering on ADTs attempted to remove all cells. No cells will be removed.")
} else {
warning("ADT filtering failed. No cells will be removed.")
}
} else {
# Handle successes - assign `adt_scpca_filter_method` metadata based on colData contents
if ("sum.controls" %in% names(colData(altExp(sce, alt_exp)))) {
metadata(sce)$adt_scpca_filter_method <- "cleanTagCounts with isotype controls"
} else if ("ambient.scale" %in% names(colData(altExp(sce, alt_exp)))) {
metadata(sce)$adt_scpca_filter_method <- "cleanTagCounts without isotype controls"
} else {
stop("Error in ADT filtering.")
}
}

# make extra sure there are no NAs in `adt_scpca_filter`
if (sum(is.na(sce$adt_scpca_filter)) != 0) {
sjspielman marked this conversation as resolved.
Show resolved Hide resolved
stop("Error in ADT filtering.")
}
}
Expand Down Expand Up @@ -161,6 +180,7 @@ processed_sce <- scuttle::logNormCounts(processed_sce)
# Try to normalize ADT counts, if present
if (alt_exp %in% altExpNames(processed_sce)) {


# need `all()` since,if present, this is an array
if( !all(is.null(metadata(altExp(processed_sce, alt_exp))$ambient_profile))){
# Calculate median size factors from the ambient profile
Expand All @@ -173,38 +193,40 @@ if (alt_exp %in% altExpNames(processed_sce)) {
altExp(processed_sce, alt_exp)$sizeFactor <- 0
}

# Perform filtering specifically to allow for normalization
adt_sce <- altExp(processed_sce, alt_exp)
adt_sce <- adt_sce[,adt_sce$discard == FALSE]

# If any size factors are not positive, simply use log1p
if ( any( adt_sce$sizeFactor <= 0 ) ) {
# Perform filtering specifically to allow for normalization
if (!is.null(processed_sce$adt_scpca_filter)) {
adt_sce <- adt_sce[,processed_sce$adt_scpca_filter == "Keep"]
}

# If any size factors are not positive or there was no filtering, simply use log1p
if ( any( adt_sce$sizeFactor <= 0 ) | metadata(processed_sce)$adt_scpca_filter_method == "No filter") {
metadata(processed_sce)$adt_normalization <- "log-normalization"
logcounts(adt_sce) <- log1p(counts(adt_sce))
} else {
# Apply normalization using size factors
metadata(processed_sce)$adt_normalization <- "median-based"
adt_sce <- scuttle::logNormCounts(adt_sce)

# Add this logcounts matrix with NA values added for cells not included in normalization

# first, get the counts matrix and make it NA
result_matrix <- counts(altExp(processed_sce, alt_exp))
result_matrix[,] <- NA

# now get the computed logcounts & fill them in
result_matrix[, colnames(adt_sce)] <- logcounts(adt_sce)

# Check correct number of NAs:
observed_na_count <- sum(is.na(result_matrix))
expected_na_count <- nrow(adt_sce) * (ncol(altExp(processed_sce, alt_exp)) - ncol(adt_sce))
if (observed_na_count < expected_na_count) {
stop("Incorrect number of NAs recovered during ADT normalization.")
}

# Add result_matrix back into correct SCE as logcounts assay
logcounts(altExp(processed_sce, alt_exp)) <- result_matrix
}
# Now that we have logcounts, add back to `processed_sce` but
# with NA values for cells not included in normalization

# first, get the counts matrix and make it NA
result_matrix <- counts(altExp(processed_sce, alt_exp))
result_matrix[,] <- NA

# now get the computed logcounts & fill them in
result_matrix[, colnames(adt_sce)] <- logcounts(adt_sce)

# Check correct number of NAs:
observed_na_count <- sum(is.na(result_matrix))
expected_na_count <- nrow(adt_sce) * (ncol(altExp(processed_sce, alt_exp)) - ncol(adt_sce))
if (observed_na_count < expected_na_count) {
stop("Incorrect number of NAs recovered during ADT normalization.")
}

# Add result_matrix back into correct SCE as logcounts assay
logcounts(altExp(processed_sce, alt_exp)) <- result_matrix
}


Expand Down
39 changes: 27 additions & 12 deletions templates/qc_report/cite_qc.rmd
Original file line number Diff line number Diff line change
Expand Up @@ -56,22 +56,25 @@ knitr::kable(antibody_tags, digits = 2) |>

## ADT Post-processing Statistics

Note that low-quality cells as identified by ADT counts are not actually filtered from the SCE object.
Instead, cells that passed the filter threshold are labeled as `"Keep"` within the SCE object, and conversely cells that failed to pass the filtered are labeled as `"Remove"`.

```{r}

if (has_processed) {
filtered_cell_count <- sum(processed_sce$adt_scpca_filter == "Keep")

basic_statistics <- tibble::tibble(
# Note that the adt_scpca_filter_method column is only present in the processed_sce object
"Method used to identify cells to filter" = format(processed_meta$adt_scpca_filter_method),
"Number of cells that pass filtering threshold" = format(filtered_cell_count),
"Percent of cells that pass filtering threshold" =
paste0(round( filtered_cell_count/ncol(processed_sce) * 100, digits = 2), "%"),
"Normalization method" = format(processed_meta$adt_normalization)
) |>
"Method used to identify cells to filter" = format(processed_meta$adt_scpca_filter_method)
)

if (!(processed_meta$adt_scpca_filter_method == "No filter")) {
filtered_cell_count <- sum(processed_sce$adt_scpca_filter == "Keep")

basic_statistics <- basic_statistics |>
# Note that the adt_scpca_filter_method column is only present in the processed_sce object
mutate("Number of cells that pass filtering threshold" = format(filtered_cell_count),
"Percent of cells that pass filtering threshold" = paste0(round( filtered_cell_count/ncol(processed_sce) * 100, digits = 2), "%")
)
}
basic_statistics <- basic_statistics |>
mutate("Normalization method" = format(processed_meta$adt_normalization)) |>
reformat_nulls() |>
t()

Expand All @@ -96,9 +99,13 @@ if (has_processed) {


```{r fig.alt="Cell filtering based on both ADT and RNA counts", results='asis'}
if (has_processed) {
if (has_processed & !(processed_meta$adt_scpca_filter_method == "No filter")) {

glue::glue('

Note that low-quality cells as identified by ADT counts are not actually filtered from the SCE object.
Instead, cells that passed the filter threshold are labeled as `"Keep"` within the SCE object, and conversely cells that failed to pass the filtered are labeled as `"Remove"`.

The plot below displays an overall view of cell filtering based on both ADT and RNA counts. Cells are labeled as one of the following:

- "Keep": This cell is retained based on both RNA and ADT counts.
Expand Down Expand Up @@ -144,6 +151,14 @@ if (has_processed) {
facet_wrap(vars(filter_summary)) +
theme(plot.title = element_text(hjust = 0.5),
legend.position = "none")
} else {
glue::glue("
<div class=\"alert alert-warning\">

No ADT filtering was performed on this library.

</div>
")
}
```

Expand Down