Skip to content

Commit

Permalink
Merge pull request #487 from AlexsLemonade/jashapiro/try-no-null2
Browse files Browse the repository at this point in the history
Try to solve intermittent workflow failures
  • Loading branch information
jashapiro authored Oct 5, 2023
2 parents 1e3ab54 + e5ee352 commit 3c21045
Show file tree
Hide file tree
Showing 20 changed files with 76 additions and 41 deletions.
Empty file modified bin/add_submitter_annotations.R
100644 → 100755
Empty file.
Empty file modified bin/classify_SingleR.R
100644 → 100755
Empty file.
Empty file modified bin/classify_cellassign.R
100644 → 100755
Empty file.
Empty file modified bin/cluster_sce.R
100644 → 100755
Empty file.
File renamed without changes.
Empty file modified bin/generate_cellassign_refs.R
100644 → 100755
Empty file.
Empty file modified bin/integrate_sce.R
100644 → 100755
Empty file.
Empty file modified bin/merge_sces.R
100644 → 100755
Empty file.
Empty file modified bin/predict_cellassign.py
100644 → 100755
Empty file.
Empty file modified bin/train_SingleR.R
100644 → 100755
Empty file.
10 changes: 6 additions & 4 deletions lib/Utils.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -38,21 +38,23 @@ class Utils {
*/
static def getMetaVal(file, key){
def obj = new JsonSlurper().parse(file)

return(obj[key])
}


/**
* Replace a string with an NA value with null
* Replace a string with an NA value with ""
* (which evaluates as false in boolean contexts)
*
* @param str A string
* @return The input string unless it was NA or a variant thereof, in which case returns null
* @return The input string unless it was NA or a variant thereof, in which case returns ""
*/
static def parseNA(str) {
if (str){
str.toLowerCase() in ["na","n/a","nan"]? null : str
str.toLowerCase() in ['na','n/a','nan']? '' : str
} else {
null
''
}
}
}
2 changes: 1 addition & 1 deletion main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ workflow {
library_id: it.scpca_library_id,
sample_id: it.scpca_sample_id.split(";").sort().join(","),
project_id: Utils.parseNA(it.scpca_project_id)?: "no_project",
submitter: it.submitter,
submitter: Utils.parseNA(it.submitter),
technology: it.technology,
assay_ontology_term_id: Utils.parseNA(it.assay_ontology_term_id),
seq_unit: it.seq_unit,
Expand Down
1 change: 1 addition & 0 deletions modules/af-features.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
//index a feature barcode file
process index_feature{
container params.SALMON_CONTAINER
tag "${id}"

input:
tuple val(id), path(feature_file)
Expand Down
1 change: 1 addition & 0 deletions modules/bulk-salmon.nf
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ process merge_bulk_quants {
container params.SCPCATOOLS_CONTAINER
label 'mem_8'
publishDir "${params.results_dir}/${meta.project_id}", mode: 'copy'
tag "${meta.project_id}"
input:
tuple val(meta), path(salmon_directories), path(t2g_bulk)
path(library_metadata)
Expand Down
46 changes: 25 additions & 21 deletions modules/classify-celltypes.nf
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ process classify_singler {
)
label 'mem_8'
label 'cpus_4'
tag "${meta.library_id}"
input:
tuple val(meta), path(processed_rds), path(singler_model_file)
output:
Expand All @@ -27,13 +28,13 @@ process classify_singler {
--threads ${task.cpus}
# write out meta file
echo "${Utils.makeJson(meta)}" > "${singler_dir}/scpca-meta.json"
echo '${Utils.makeJson(meta)}' > "${singler_dir}/scpca-meta.json"
"""
stub:
singler_dir = file(meta.singler_dir).name
"""
mkdir "${singler_dir}"
echo "${Utils.makeJson(meta)}" > "${singler_dir}/scpca-meta.json"
echo '${Utils.makeJson(meta)}' > "${singler_dir}/scpca-meta.json"
"""
}

Expand All @@ -47,6 +48,7 @@ process classify_cellassign {
)
label 'mem_32'
label 'cpus_12'
tag "${meta.library_id}"
input:
tuple val(meta), path(processed_rds), path(cellassign_reference_file)
output:
Expand All @@ -57,28 +59,28 @@ process classify_cellassign {
"""
# create output directory
mkdir "${cellassign_dir}"
# Convert SCE to AnnData
sce_to_anndata.R \
--input_sce_file "${processed_rds}" \
--output_rna_h5 processed.hdf5
--output_rna_h5 processed.hdf5
# Run CellAssign
predict_cellassign.py \
--input_hdf5_file processed.hdf5
--input_hdf5_file processed.hdf5
--output_predictions "${cellassign_dir}/cellassign_predictions.tsv" \
--reference "${cellassign_reference_file}" \
--seed ${params.seed} \
--threads ${task.cpus}
# write out meta file
echo "${Utils.makeJson(meta)}" > "${cellassign_dir}/scpca-meta.json"
echo '${Utils.makeJson(meta)}' > "${cellassign_dir}/scpca-meta.json"
"""
stub:
cellassign_dir = file(meta.cellassign_dir).name
"""
mkdir "${cellassign_dir}"
echo "${Utils.makeJson(meta)}" > "${cellassign_dir}/scpca-meta.json"
echo '${Utils.makeJson(meta)}' > "${cellassign_dir}/scpca-meta.json"
"""
}

Expand All @@ -88,6 +90,7 @@ process add_celltypes_to_sce {
publishDir "${params.results_dir}/${meta.project_id}/${meta.sample_id}", mode: 'copy'
label 'mem_4'
label 'cpus_2'
tag "${meta.library_id}"
input:
tuple val(meta), path(input_rds), path(cellassign_predictions), val(ref_name)
output:
Expand Down Expand Up @@ -118,9 +121,9 @@ workflow annotate_celltypes {
// project id
it.scpca_project_id,
// singler model file
Utils.parseNA(it.singler_ref_file) ? "${params.singler_models_dir}/${it.singler_ref_file}" : null,
Utils.parseNA(it.singler_ref_file) ? "${params.singler_models_dir}/${it.singler_ref_file}" : '',
// cellassign reference file
Utils.parseNA(it.cellassign_ref_file) ? "${params.cellassign_ref_dir}/${it.cellassign_ref_file}" : null
Utils.parseNA(it.cellassign_ref_file) ? "${params.cellassign_ref_dir}/${it.cellassign_ref_file}" : ''
]}

// create input for typing: [augmented meta, processed_sce]
Expand All @@ -129,7 +132,8 @@ workflow annotate_celltypes {
.combine(celltype_ch, by: 0)
// current contents: [project_id, meta, processed_sce, singler_model_file, cellassign_reference_file]
// add values to meta for later use
.map{ project_id, meta, processed_sce, singler_model_file, cellassign_reference_file ->
.map{ project_id, meta_in, processed_sce, singler_model_file, cellassign_reference_file ->
def meta = meta_in.clone(); // local copy for safe modification
meta.celltype_publish_dir = "${params.checkpoints_dir}/celltype/${meta.library_id}";
meta.singler_dir = "${meta.celltype_publish_dir}/${meta.library_id}_singler";
meta.cellassign_dir = "${meta.celltype_publish_dir}/${meta.library_id}_cellassign";
Expand All @@ -139,7 +143,7 @@ workflow annotate_celltypes {
[meta, processed_sce]
}


// creates [meta, processed sce, singler model file]
singler_input_ch = celltype_input_ch
// add in singler model or empty file
Expand All @@ -149,7 +153,7 @@ workflow annotate_celltypes {
missing_ref: it[2].name == "NO_FILE"
do_singler: true
}


// perform singleR celltyping and export results
classify_singler(singler_input_ch.do_singler)
Expand All @@ -158,7 +162,7 @@ workflow annotate_celltypes {
.map{[it[0]["library_id"], file(empty_file)]}
// add in channel outputs
.mix(classify_singler.out)

// create cellassign input channel: [meta, processed sce, cellassign reference file]
cellassign_input_ch = celltype_input_ch
// add in cellassign reference
Expand All @@ -167,18 +171,18 @@ workflow annotate_celltypes {
.branch{
missing_ref: it[2].name == "NO_FILE"
do_cellassign: true
}
}



// perform CellAssign celltyping and export results
classify_cellassign(cellassign_input_ch.do_cellassign)

// cellassign output channel: [library_id, cellassign_dir]
cellassign_output_ch = cellassign_input_ch.missing_ref
.map{[it[0]["library_id"], file(empty_file)]}
// add in channel outputs
.mix(classify_cellassign.out)
.mix(classify_cellassign.out)

// prepare input for process to add celltypes to the processed SCE
assignment_input_ch = processed_sce_channel
.map{[it[0]["library_id"]] + it}
Expand All @@ -191,7 +195,7 @@ workflow annotate_celltypes {

// Next PR:
//add_celltypes_to_sce(assignment_input_ch)

// add back in the unchanged sce files
// TODO update below with output channel results:
// export_channel = processed_sce_channel
Expand Down
5 changes: 4 additions & 1 deletion modules/export-anndata.nf
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,10 @@ workflow sce_to_anndata{
]}
// remove any sce files that don't have enough cells in the sce object
// number of cells are stored in each metadata.json file
.filter{ Utils.getMetaVal(file(it[3]), "${it[2]}_cells") > 1 }
.filter{
cells = Utils.getMetaVal(file(it[3]), "${it[2]}_cells");
cells ? cells > 1 : true // if no cell count, keep file (for testing)
}
// remove metadata.json file from tuple
.map{it.dropRight(1)}

Expand Down
32 changes: 27 additions & 5 deletions modules/qc-report.nf
Original file line number Diff line number Diff line change
Expand Up @@ -10,22 +10,38 @@ process sce_qc_report{
tuple val(meta), path(unfiltered_rds), path(filtered_rds), path(processed_rds)
tuple path(template_dir), val(template_file)
output:
tuple val(meta), path(unfiltered_rds), path(filtered_rds), path(processed_rds), path(metadata_json), emit: data
tuple val(meta), path(unfiltered_out), path(filtered_out), path(processed_out), path(metadata_json), emit: data
path qc_report, emit: report
script:
qc_report = "${meta.library_id}_qc.html"
template_path = "${template_dir}/${template_file}"
metadata_json = "${meta.library_id}_metadata.json"
workflow_url = workflow.repository ?: workflow.manifest.homePage
workflow_version = workflow.revision ?: workflow.manifest.version
// names for final output files
unfiltered_out = "${meta.library_id}_unfiltered.rds"
filtered_out = "${meta.library_id}_filtered.rds"
processed_out = "${meta.library_id}_processed.rds"
"""
# move files for output
if [ "${unfiltered_rds}" != "${unfiltered_out}" ]; then
mv "${unfiltered_rds}" "${unfiltered_out}"
fi
if [ "${filtered_rds}" != "${filtered_out}" ]; then
mv "${filtered_rds}" "${filtered_out}"
fi
if [ "${processed_rds}" != "${processed_out}" ]; then
mv "${processed_rds}" "${processed_out}"
fi
# generate report
sce_qc_report.R \
--report_template "${template_path}" \
--library_id "${meta.library_id}" \
--sample_id "${meta.sample_id}" \
--unfiltered_sce ${unfiltered_rds} \
--filtered_sce ${filtered_rds} \
--processed_sce ${processed_rds} \
--unfiltered_sce ${unfiltered_out} \
--filtered_sce ${filtered_out} \
--processed_sce ${processed_out} \
--qc_report_file ${qc_report} \
--metadata_json ${metadata_json} \
--technology "${meta.technology}" \
Expand All @@ -37,10 +53,16 @@ process sce_qc_report{
--seed "${params.seed}"
"""
stub:
unfiltered_out = "${meta.library_id}_unfiltered.rds"
filtered_out = "${meta.library_id}_filtered.rds"
processed_out = "${meta.library_id}_processed.rds"
qc_report = "${meta.library_id}_qc.html"
metadata_json = "${meta.library_id}_metadata.json"
"""
touch ${unfiltered_out}
touch ${filtered_out}
touch ${processed_out}
touch ${qc_report}
echo '{}' > ${metadata_json}
echo '{"unfiltered_cells": 10, "filtered_cells": 10, "processed_cells": 10}' > ${metadata_json}
"""
}
1 change: 1 addition & 0 deletions modules/samtools.nf
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@

process index_bam{
container params.SAMTOOLS_CONTAINER
tag "${meta.run_id}"
input:
tuple val(meta), path(bamfile)
output:
Expand Down
18 changes: 9 additions & 9 deletions modules/sce-processing.nf
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ process make_unfiltered_sce{
tuple val(meta), path(unfiltered_rds)
script:
unfiltered_rds = "${meta.library_id}_unfiltered.rds"

"""
generate_unfiltered_sce.R \
--alevin_dir ${alevin_dir} \
Expand Down Expand Up @@ -48,18 +47,18 @@ process make_unfiltered_sce{
// channels with RNA and feature data
process make_merged_unfiltered_sce{
label 'mem_8'
tag "${meta.library_id}"
tag "${rna_meta.library_id}"
container params.SCPCATOOLS_CONTAINER
input:
tuple val(feature_meta), path(feature_alevin_dir),
val (meta), path(alevin_dir),
val(rna_meta), path(alevin_dir),
path(mito_file), path(ref_gtf), path(submitter_cell_types_file)
path sample_metafile
output:
tuple val(meta), path(unfiltered_rds)
script:
unfiltered_rds = "${meta.library_id}_unfiltered.rds"
// add feature metadata as an element of the main meta object
// add feature metadata as elements of the main meta object
meta = rna_meta.clone()
meta['feature_type'] = feature_meta.technology.split('_')[0]
meta['feature_meta'] = feature_meta

Expand All @@ -68,6 +67,7 @@ process make_merged_unfiltered_sce{
meta['feature_type'] = "adt"
}

unfiltered_rds = "${meta.library_id}_unfiltered.rds"
"""
generate_unfiltered_sce.R \
--alevin_dir ${alevin_dir} \
Expand All @@ -92,12 +92,13 @@ process make_merged_unfiltered_sce{
--library_id "${meta.library_id}" \
--submitter_cell_types_file "${submitter_cell_types_file}"
fi
"""
stub:
unfiltered_rds = "${meta.library_id}_unfiltered.rds"
meta = rna_meta.clone()
meta['feature_type'] = feature_meta.technology.split('_')[0]
meta['feature_meta'] = feature_meta

unfiltered_rds = "${meta.library_id}_unfiltered.rds"
"""
touch "${meta.library_id}_unfiltered.rds"
"""
Expand All @@ -121,7 +122,7 @@ process filter_sce{
feature_barcode_file.name != "NO_FILE"

"""
filter_sce_rds.R \
filter_sce.R \
--unfiltered_file ${unfiltered_rds} \
--filtered_file ${filtered_rds} \
${adt_present ? "--adt_name ${meta.feature_type}":""} \
Expand Down Expand Up @@ -200,7 +201,6 @@ process post_process_sce{
tuple val(meta), path(unfiltered_rds), path(filtered_rds), path(processed_rds)
script:
processed_rds = "${meta.library_id}_processed.rds"

"""
post_process_sce.R \
--filtered_sce_file ${filtered_rds} \
Expand Down
1 change: 1 addition & 0 deletions modules/spaceranger.nf
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ process spaceranger{

process spaceranger_publish{
container params.SCPCATOOLS_CONTAINER
tag "${meta.library_id}"
publishDir "${params.results_dir}/${meta.project_id}/${meta.sample_id}", mode: 'copy'
input:
tuple val(meta), path(spatial_out)
Expand Down

0 comments on commit 3c21045

Please sign in to comment.