Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add SingleR and CellAssign metadata #388

Merged
merged 7 commits into from
Jul 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions add-celltypes.nf
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ if (!file(params.run_metafile).exists()) {
param_error = true
}

if (!file(params.celltype_refs_metafile).exists()) {
log.error("The 'celltype_refs_metafile' file '${params.celltype_refs_metafile}' can not be found.")
if (!file(params.celltype_project_metafile).exists()) {
log.error("The 'celltype_project_metafile' file '${params.celltype_project_metafile}' can not be found.")
param_error = true
}

Expand Down
33 changes: 33 additions & 0 deletions bin/save_singler_refs.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#!/usr/bin/env Rscript

# this script is used to download celldex references and save them as RDS files

library(optparse)
library(celldex)

option_list <- list(
make_option(
opt_str = c("--ref_name"),
type = "character",
help = "name associated with celldex reference, e.g., HumanPrimaryCellAtlasData"
),
make_option(
opt_str = c("--ref_file"),
type = "character",
help = "path to save reference file"
)
)

# Parse options
opt <- parse_args(OptionParser(option_list = option_list))

# check that provided ref name is in celldex package
if(! opt$ref_name %in% ls("package:celldex")){
stop(glue::glue("Provided `ref_name` `{opt$ref_name}` does not match a celldex dataset."))
}

# get a reference library from celldex:
ref <- do.call(`::`, args = list("celldex", opt$ref_name))(ensembl = TRUE)

# export ref data
readr::write_rds(ref, opt$ref_file, compress = "gz")
62 changes: 54 additions & 8 deletions build-celltype-ref.nf
Original file line number Diff line number Diff line change
Expand Up @@ -3,42 +3,88 @@ nextflow.enable.dsl=2

params.t2g_3col_path = "s3://scpca-references/homo_sapiens/ensembl-104/annotation/Homo_sapiens.GRCh38.104.spliced_intron.tx2gene_3col.tsv"

process save_singler_refs {
container params.SCPCATOOLS_CONTAINER
publishDir "${params.singler_references_dir}"
label 'mem_8'
input:
tuple val(ref_name), val(ref_source)
output:
tuple val(ref_name), path(ref_file)
script:
ref_file = "${ref_source}-${ref_name}.rds"
"""
save_singler_refs.R \
--ref_name ${ref_name} \
--ref_file ${ref_file}
"""
stub:
ref_file = "${ref_source}-${ref_name}.rds"
"""
touch ${ref_file}
"""

}

process train_singler_models {
container params.SCPCATOOLS_CONTAINER
publishDir "${params.celltype_model_dir}"
publishDir "${params.singler_models_dir}"
label 'cpus_4'
label 'mem_16'
input:
tuple path(celltype_ref), val(ref_name)
tuple val(ref_name), path(ref_file)
path tx2gene
output:
path celltype_model
script:
celltype_model = "${ref_name}_model.rds"
"""
train_SingleR.R \
--ref_file ${celltype_ref} \
--ref_file ${ref_file} \
--output_file ${celltype_model} \
--fry_tx2gene ${tx2gene} \
--label_name ${params.label_name} \
--seed ${params.seed} \
--threads ${task.cpus}
"""
stub:
celltype_model = "${ref_name}_model.rds"
"""
touch ${celltype_model}
"""
}

workflow build_celltype_ref {

// create channel of cell type ref files and names
celltype_refs_ch = Channel.fromPath(params.celltype_refs_metafile)
celltype_refs_ch = Channel.fromPath(params.celltype_ref_metadata)
.splitCsv(header: true, sep: '\t')
.branch{
singler: it.celltype_method == "SingleR"
cellassign: it.celltype_method == "CellAssign"
}

// singler refs to download and train
singler_refs_ch = celltype_refs_ch.singler
.map{[
celltype_ref_file = "${params.celltype_ref_dir}/${it.celltype_singler_file}",
ref_name = it.celltype_ref_name
ref_name: it.celltype_ref_name,
ref_source: it.celltype_ref_source
]}
.unique() // remove any duplicates

// download and save reference files
save_singler_refs(singler_refs_ch)

// train cell type references using SingleR
train_singler_models(celltype_refs_ch, params.t2g_3col_path)
train_singler_models(save_singler_refs.out, params.t2g_3col_path)

// cellassign refs
cellassign_refs_ch = celltype_refs_ch.cellassign
// create a channel with ref_name, source, organs
.map{[
ref_name: it.celltype_ref_name,
ref_source: it.celltype_ref_source,
organs: it.organs
]}
}

workflow {
Expand Down
2 changes: 1 addition & 1 deletion config/profile_ccdl.config
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ params{
run_ids = "SCPCR000001,SCPCS000101"

// cell type references and SingleR label of interest
celltype_refs_metafile = "s3://ccdl-scpca-data/sample_info/celltype_annotation/scpca-project-celltype-metadata.tsv"
celltype_project_metafile = "s3://ccdl-scpca-data/sample_info/celltype_annotation/scpca-project-celltype-metadata.tsv"
label_name = "label.ont"


Expand Down
13 changes: 10 additions & 3 deletions config/reference_paths.config
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,13 @@ ref_rootdir = 's3://scpca-references'
// barcode files
barcode_dir = "${params.ref_rootdir}/barcodes/10X"

// cell type references
celltype_ref_dir = "${params.ref_rootdir}/celltype/references" // original (generally celldex) files themselves
celltype_model_dir = "${params.ref_rootdir}/celltype/singler_models" // models build from original (celldex) files
// cell type references directories
celltype_ref_dir = "${params.ref_rootdir}/celltype"
// output from save_singler_refs() process
singler_references_dir = "${params.celltype_ref_dir}/singler_references"
// output from train_singler_models() process
singler_models_dir = "${params.celltype_ref_dir}/singler_models"

// cell type metadata
celltype_ref_metadata = "${projectDir}/references/celltype-reference-metadata.tsv"
panglao_marker_genes_file = "${projectDir}/references/PanglaoDB_markers_27_Mar_2020.tsv"
2 changes: 1 addition & 1 deletion modules/classify-celltypes.nf
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ workflow annotate_celltypes {
take: processed_sce_channel
main:
// channel with celltype model and project ids
celltype_ch = Channel.fromPath(params.celltype_refs_metafile)
celltype_ch = Channel.fromPath(params.celltype_project_metafile)
.splitCsv(header: true, sep: '\t')
.map{[
project_id = it.scpca_project_id,
Expand Down
Loading