Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add CellAssign assignments to SCE object #402

Merged
merged 9 commits into from
Aug 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions bin/classify_SingleR.R
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,11 @@ if(!file.exists(opt$input_sce_file)){
stop("Missing input SCE file")
}

# check that output file ends in rds
if (!(stringr::str_ends(opt$output_sce_file, ".rds"))){
stop("output sce file name must end in .rds")
}

# check that references all exist
singler_model_file <- opt$singler_model_file
if(!file.exists(singler_model_file)) {
Expand Down
80 changes: 80 additions & 0 deletions bin/classify_cellassign.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
#!/usr/bin/env Rscript

# This script is used to read in the predictions from CellAssign and assign cell types in the annotated RDS file

# import libraries
suppressPackageStartupMessages({
library(optparse)
library(SingleCellExperiment)
})

# set up arguments
option_list <- list(
make_option(
opt_str = c("-i", "--input_sce_file"),
type = "character",
help = "path to rds file with input sce object"
),
make_option(
opt_str = c("-o", "--output_sce_file"),
type = "character",
help = "path to output rds file to store annotated sce object. Must end in .rds"
),
make_option(
opt_str = c("--cellassign_predictions"),
type = "character",
help = "path to tsv file containing the prediction matrix returned by running CellAssign"
),
make_option(
opt_str = c("--reference_name"),
type = "character",
help = "name referring to the marker gene reference used for CellAssign"
)
)

opt <- parse_args(OptionParser(option_list = option_list))

# check that input file exists
if (!file.exists(opt$input_sce_file)){
stop("Missing input SCE file")
}

# check that cellassign predictions file was provided
if (!file.exists(opt$cellassign_predictions)){
stop("Missing CellAssign predictions file")
}
# check that reference_name was provided
if (is.null(opt$reference_name)) {
stop("Missing reference name")
}

allyhawkins marked this conversation as resolved.
Show resolved Hide resolved
# check that output file ends in rds
if(!(stringr::str_ends(opt$output_sce_file, ".rds"))){
stop("output sce file name must end in .rds")
}
# read in input files
sce <- readr::read_rds(opt$input_sce_file)
predictions <- readr::read_tsv(opt$cellassign_predictions)

celltype_assignments <- predictions |>
tidyr::pivot_longer(!barcode,
names_to = "celltype",
values_to = "prediction") |>
dplyr::group_by(barcode) |>
dplyr::slice_max(prediction, n = 1) |>
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh this is neat! I was only aware of its less exciting relative dplyr::slice()

dplyr::ungroup()

# join by barcode to make sure assignments are in the right order
celltype_assignments <- data.frame(barcode = sce$barcodes) |>
dplyr::left_join(celltype_assignments, by = "barcode")

# add cell type and prediction to colData
sce$cellassign_celltype_annotation <- celltype_assignments
sce$cellassign_max_prediction <- celltype_assignments$prediction

# add entire predictions matrix and ref name to metadata
metadata(sce)$cellassign_predictions <- predictions
metadata(sce)$cellassign_reference <- opt$reference_name
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing lines to actually save to RDS below this?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yikes... good catch!


# export annotated object with cellassign assignments
readr::write_rds(sce, opt$output_sce_file)
File renamed without changes.
70 changes: 52 additions & 18 deletions modules/classify-celltypes.nf
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ process classify_singleR {
container params.SCPCATOOLS_CONTAINER
label 'mem_8'
label 'cpus_4'
publishDir "${params.results_dir}/${meta.project_id}/${meta.sample_id}", mode: 'copy'
sjspielman marked this conversation as resolved.
Show resolved Hide resolved
input:
tuple val(meta), path(processed_rds), path(singler_model_file)
output:
Expand All @@ -28,16 +27,17 @@ process classify_singleR {

process predict_cellassign {
container params.SCPCATOOLS_CONTAINER
publishDir "${params.checkpoints_dir}/celltype/${meta.library_id}", mode: 'copy'
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍 agreed to send to checkpoint!

label 'mem_32'
label 'cpus_12'
input:
tuple val(meta), path(processed_hdf5), path(cellassign_reference_mtx)
tuple val(meta), path(processed_hdf5), path(cellassign_reference_mtx), val(ref_name)
output:
tuple val(meta), path(cellassign_predictions)
tuple val(meta), path(cellassign_predictions), val(ref_name)
script:
cellassign_predictions = "${meta.library_id}_predictions.tsv"
"""
classify_cellassign.py \
predict_cellassign.py \
--input_hdf5_file ${processed_hdf5} \
--output_predictions ${cellassign_predictions} \
--reference ${cellassign_reference_mtx} \
Expand All @@ -51,6 +51,27 @@ process predict_cellassign {
"""
}

process classify_cellassign {
container params.SCPCATOOLS_CONTAINER
publishDir "${params.results_dir}/${meta.project_id}/${meta.sample_id}", mode: 'copy'
label 'mem_4'
label 'cpus_2'
input:
tuple val(meta), path(input_rds), path(cellassign_predictions), val(ref_name)
output:
tuple val(meta), path(annotated_rds)
script:
annotated_rds = "${meta.library_id}_annotated.rds"
"""
classify_cellassign.R \
--input_sce_file ${input_rds} \
--output_sce_file ${annotated_rds} \
--cellassign_predictions ${cellassign_predictions} \
--reference_name ${ref_name}

"""
}

workflow annotate_celltypes {
take: processed_sce_channel
main:
Expand All @@ -60,7 +81,10 @@ workflow annotate_celltypes {
.map{[
project_id = it.scpca_project_id,
singler_model_file = "${params.singler_models_dir}/${it.singler_ref_file}",
cellassign_ref_file = "${params.cellassign_ref_dir}/${it.cellassign_ref_file}"
cellassign_ref_file = "${params.cellassign_ref_dir}/${it.cellassign_ref_file}",
// add ref name for cellassign since we cannot store it in the cellassign output
// singler ref name does not need to be added because it is stored in the singler model
cellassign_ref_name = it.cellassign_ref_name
]}

// create channel grouped_celltype_ch as: [meta, processed sce object, SingleR reference model]
Expand All @@ -70,26 +94,36 @@ workflow annotate_celltypes {
.combine(celltype_ch, by: 0)
.map{it.drop(1)} // remove extra project ID

// creates [meta, processed, SingleR reference model]
// creates input for singleR [meta, processed, SingleR reference model]
singler_input_ch = grouped_celltype_ch
.map{meta, processed_rds, processed_hdf5, singler_model_file, cellassign_ref_file -> tuple(meta,
processed_rds,
singler_model_file
)}
.map{meta, processed_rds, processed_hdf5, singler_model_file, cellassign_ref_file, cellassign_ref_name -> tuple(meta,
processed_rds,
singler_model_file
)}

// creates input for cellassign [meta, processed hdf5, cellassign ref file, cell assign ref name]
cellassign_input_ch = grouped_celltype_ch
.map{meta, processed_rds, processed_hdf5, singler_model_file, cellassign_ref_file -> tuple(meta,
processed_hdf5,
cellassign_ref_file
)}

.map{meta, processed_rds, processed_hdf5, singler_model_file, cellassign_ref_file, cellassign_ref_name -> tuple(meta,
processed_hdf5,
cellassign_ref_file,
cellassign_ref_name
)}

// get SingleR cell type assignments and add them to SCE
classify_singleR(singler_input_ch)

// get cellassign predictions file
predict_cellassign(cellassign_input_ch)

emit:
classify_singleR.out
predict_cellassign.out
// add cellassign annotations to the object with singleR results
all_celltype_assignments_ch = classify_singleR.out
// combines using meta from both singleR and cellassign, they should be the same
// resulting tuple should be [meta, singleR annotated rds, cellassign predictions]
.combine(predict_cellassign.out, by: 0)

// get CellAssign cell type predictions and add them to SCE
classify_cellassign(all_celltype_assignments_ch)
allyhawkins marked this conversation as resolved.
Show resolved Hide resolved

emit: classify_cellassign.out

}