Skip to content

Commit

Permalink
Merge pull request #402 from AlexsLemonade/allyhawkins/cellassign-ass…
Browse files Browse the repository at this point in the history
…ignments

Add CellAssign assignments to SCE object
  • Loading branch information
allyhawkins authored Aug 14, 2023
2 parents 2a78a01 + 9c97aee commit 9efd773
Show file tree
Hide file tree
Showing 4 changed files with 137 additions and 18 deletions.
5 changes: 5 additions & 0 deletions bin/classify_SingleR.R
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,11 @@ if(!file.exists(opt$input_sce_file)){
stop("Missing input SCE file")
}

# check that output file ends in rds
if (!(stringr::str_ends(opt$output_sce_file, ".rds"))){
stop("output sce file name must end in .rds")
}

# check that references all exist
singler_model_file <- opt$singler_model_file
if(!file.exists(singler_model_file)) {
Expand Down
80 changes: 80 additions & 0 deletions bin/classify_cellassign.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
#!/usr/bin/env Rscript

# This script is used to read in the predictions from CellAssign and assign cell types in the annotated RDS file

# import libraries
suppressPackageStartupMessages({
library(optparse)
library(SingleCellExperiment)
})

# set up arguments
option_list <- list(
make_option(
opt_str = c("-i", "--input_sce_file"),
type = "character",
help = "path to rds file with input sce object"
),
make_option(
opt_str = c("-o", "--output_sce_file"),
type = "character",
help = "path to output rds file to store annotated sce object. Must end in .rds"
),
make_option(
opt_str = c("--cellassign_predictions"),
type = "character",
help = "path to tsv file containing the prediction matrix returned by running CellAssign"
),
make_option(
opt_str = c("--reference_name"),
type = "character",
help = "name referring to the marker gene reference used for CellAssign"
)
)

opt <- parse_args(OptionParser(option_list = option_list))

# check that input file exists
if (!file.exists(opt$input_sce_file)){
stop("Missing input SCE file")
}

# check that cellassign predictions file was provided
if (!file.exists(opt$cellassign_predictions)){
stop("Missing CellAssign predictions file")
}
# check that reference_name was provided
if (is.null(opt$reference_name)) {
stop("Missing reference name")
}

# check that output file ends in rds
if(!(stringr::str_ends(opt$output_sce_file, ".rds"))){
stop("output sce file name must end in .rds")
}
# read in input files
sce <- readr::read_rds(opt$input_sce_file)
predictions <- readr::read_tsv(opt$cellassign_predictions)

celltype_assignments <- predictions |>
tidyr::pivot_longer(!barcode,
names_to = "celltype",
values_to = "prediction") |>
dplyr::group_by(barcode) |>
dplyr::slice_max(prediction, n = 1) |>
dplyr::ungroup()

# join by barcode to make sure assignments are in the right order
celltype_assignments <- data.frame(barcode = sce$barcodes) |>
dplyr::left_join(celltype_assignments, by = "barcode")

# add cell type and prediction to colData
sce$cellassign_celltype_annotation <- celltype_assignments
sce$cellassign_max_prediction <- celltype_assignments$prediction

# add entire predictions matrix and ref name to metadata
metadata(sce)$cellassign_predictions <- predictions
metadata(sce)$cellassign_reference <- opt$reference_name

# export annotated object with cellassign assignments
readr::write_rds(sce, opt$output_sce_file)
File renamed without changes.
70 changes: 52 additions & 18 deletions modules/classify-celltypes.nf
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ process classify_singleR {
container params.SCPCATOOLS_CONTAINER
label 'mem_8'
label 'cpus_4'
publishDir "${params.results_dir}/${meta.project_id}/${meta.sample_id}", mode: 'copy'
input:
tuple val(meta), path(processed_rds), path(singler_model_file)
output:
Expand All @@ -28,16 +27,17 @@ process classify_singleR {

process predict_cellassign {
container params.SCPCATOOLS_CONTAINER
publishDir "${params.checkpoints_dir}/celltype/${meta.library_id}", mode: 'copy'
label 'mem_32'
label 'cpus_12'
input:
tuple val(meta), path(processed_hdf5), path(cellassign_reference_mtx)
tuple val(meta), path(processed_hdf5), path(cellassign_reference_mtx), val(ref_name)
output:
tuple val(meta), path(cellassign_predictions)
tuple val(meta), path(cellassign_predictions), val(ref_name)
script:
cellassign_predictions = "${meta.library_id}_predictions.tsv"
"""
classify_cellassign.py \
predict_cellassign.py \
--input_hdf5_file ${processed_hdf5} \
--output_predictions ${cellassign_predictions} \
--reference ${cellassign_reference_mtx} \
Expand All @@ -51,6 +51,27 @@ process predict_cellassign {
"""
}

process classify_cellassign {
container params.SCPCATOOLS_CONTAINER
publishDir "${params.results_dir}/${meta.project_id}/${meta.sample_id}", mode: 'copy'
label 'mem_4'
label 'cpus_2'
input:
tuple val(meta), path(input_rds), path(cellassign_predictions), val(ref_name)
output:
tuple val(meta), path(annotated_rds)
script:
annotated_rds = "${meta.library_id}_annotated.rds"
"""
classify_cellassign.R \
--input_sce_file ${input_rds} \
--output_sce_file ${annotated_rds} \
--cellassign_predictions ${cellassign_predictions} \
--reference_name ${ref_name}
"""
}

workflow annotate_celltypes {
take: processed_sce_channel
main:
Expand All @@ -60,7 +81,10 @@ workflow annotate_celltypes {
.map{[
project_id = it.scpca_project_id,
singler_model_file = "${params.singler_models_dir}/${it.singler_ref_file}",
cellassign_ref_file = "${params.cellassign_ref_dir}/${it.cellassign_ref_file}"
cellassign_ref_file = "${params.cellassign_ref_dir}/${it.cellassign_ref_file}",
// add ref name for cellassign since we cannot store it in the cellassign output
// singler ref name does not need to be added because it is stored in the singler model
cellassign_ref_name = it.cellassign_ref_name
]}

// create channel grouped_celltype_ch as: [meta, processed sce object, SingleR reference model]
Expand All @@ -70,26 +94,36 @@ workflow annotate_celltypes {
.combine(celltype_ch, by: 0)
.map{it.drop(1)} // remove extra project ID

// creates [meta, processed, SingleR reference model]
// creates input for singleR [meta, processed, SingleR reference model]
singler_input_ch = grouped_celltype_ch
.map{meta, processed_rds, processed_hdf5, singler_model_file, cellassign_ref_file -> tuple(meta,
processed_rds,
singler_model_file
)}
.map{meta, processed_rds, processed_hdf5, singler_model_file, cellassign_ref_file, cellassign_ref_name -> tuple(meta,
processed_rds,
singler_model_file
)}

// creates input for cellassign [meta, processed hdf5, cellassign ref file, cell assign ref name]
cellassign_input_ch = grouped_celltype_ch
.map{meta, processed_rds, processed_hdf5, singler_model_file, cellassign_ref_file -> tuple(meta,
processed_hdf5,
cellassign_ref_file
)}

.map{meta, processed_rds, processed_hdf5, singler_model_file, cellassign_ref_file, cellassign_ref_name -> tuple(meta,
processed_hdf5,
cellassign_ref_file,
cellassign_ref_name
)}

// get SingleR cell type assignments and add them to SCE
classify_singleR(singler_input_ch)

// get cellassign predictions file
predict_cellassign(cellassign_input_ch)

emit:
classify_singleR.out
predict_cellassign.out
// add cellassign annotations to the object with singleR results
all_celltype_assignments_ch = classify_singleR.out
// combines using meta from both singleR and cellassign, they should be the same
// resulting tuple should be [meta, singleR annotated rds, cellassign predictions]
.combine(predict_cellassign.out, by: 0)

// get CellAssign cell type predictions and add them to SCE
classify_cellassign(all_celltype_assignments_ch)

emit: classify_cellassign.out

}

0 comments on commit 9efd773

Please sign in to comment.