diff --git a/bin/add_submitter_annotations.R b/bin/add_submitter_annotations.R old mode 100644 new mode 100755 diff --git a/bin/classify_SingleR.R b/bin/classify_SingleR.R old mode 100644 new mode 100755 diff --git a/bin/classify_cellassign.R b/bin/classify_cellassign.R old mode 100644 new mode 100755 diff --git a/bin/cluster_sce.R b/bin/cluster_sce.R old mode 100644 new mode 100755 diff --git a/bin/filter_sce_rds.R b/bin/filter_sce.R similarity index 100% rename from bin/filter_sce_rds.R rename to bin/filter_sce.R diff --git a/bin/generate_cellassign_refs.R b/bin/generate_cellassign_refs.R old mode 100644 new mode 100755 diff --git a/bin/integrate_sce.R b/bin/integrate_sce.R old mode 100644 new mode 100755 diff --git a/bin/merge_sces.R b/bin/merge_sces.R old mode 100644 new mode 100755 diff --git a/bin/predict_cellassign.py b/bin/predict_cellassign.py old mode 100644 new mode 100755 diff --git a/bin/train_SingleR.R b/bin/train_SingleR.R old mode 100644 new mode 100755 diff --git a/lib/Utils.groovy b/lib/Utils.groovy index 944a1db0..fa61a6af 100644 --- a/lib/Utils.groovy +++ b/lib/Utils.groovy @@ -38,21 +38,23 @@ class Utils { */ static def getMetaVal(file, key){ def obj = new JsonSlurper().parse(file) + return(obj[key]) } /** - * Replace a string with an NA value with null + * Replace a string with an NA value with "" + * (which evaluates as false in boolean contexts) * * @param str A string - * @return The input string unless it was NA or a variant thereof, in which case returns null + * @return The input string unless it was NA or a variant thereof, in which case returns "" */ static def parseNA(str) { if (str){ - str.toLowerCase() in ["na","n/a","nan"]? null : str + str.toLowerCase() in ['na','n/a','nan']? '' : str } else { - null + '' } } } diff --git a/main.nf b/main.nf index 47681b0b..3d21e686 100644 --- a/main.nf +++ b/main.nf @@ -93,7 +93,7 @@ workflow { library_id: it.scpca_library_id, sample_id: it.scpca_sample_id.split(";").sort().join(","), project_id: Utils.parseNA(it.scpca_project_id)?: "no_project", - submitter: it.submitter, + submitter: Utils.parseNA(it.submitter), technology: it.technology, assay_ontology_term_id: Utils.parseNA(it.assay_ontology_term_id), seq_unit: it.seq_unit, diff --git a/modules/af-features.nf b/modules/af-features.nf index 8f371fe0..56c33517 100644 --- a/modules/af-features.nf +++ b/modules/af-features.nf @@ -2,6 +2,7 @@ //index a feature barcode file process index_feature{ container params.SALMON_CONTAINER + tag "${id}" input: tuple val(id), path(feature_file) diff --git a/modules/bulk-salmon.nf b/modules/bulk-salmon.nf index c8956b19..c7d6d438 100644 --- a/modules/bulk-salmon.nf +++ b/modules/bulk-salmon.nf @@ -70,6 +70,7 @@ process merge_bulk_quants { container params.SCPCATOOLS_CONTAINER label 'mem_8' publishDir "${params.results_dir}/${meta.project_id}", mode: 'copy' + tag "${meta.project_id}" input: tuple val(meta), path(salmon_directories), path(t2g_bulk) path(library_metadata) diff --git a/modules/classify-celltypes.nf b/modules/classify-celltypes.nf index 2fbd16e6..63b5ff97 100644 --- a/modules/classify-celltypes.nf +++ b/modules/classify-celltypes.nf @@ -8,6 +8,7 @@ process classify_singler { ) label 'mem_8' label 'cpus_4' + tag "${meta.library_id}" input: tuple val(meta), path(processed_rds), path(singler_model_file) output: @@ -27,13 +28,13 @@ process classify_singler { --threads ${task.cpus} # write out meta file - echo "${Utils.makeJson(meta)}" > "${singler_dir}/scpca-meta.json" + echo '${Utils.makeJson(meta)}' > "${singler_dir}/scpca-meta.json" """ stub: singler_dir = file(meta.singler_dir).name """ mkdir "${singler_dir}" - echo "${Utils.makeJson(meta)}" > "${singler_dir}/scpca-meta.json" + echo '${Utils.makeJson(meta)}' > "${singler_dir}/scpca-meta.json" """ } @@ -47,6 +48,7 @@ process classify_cellassign { ) label 'mem_32' label 'cpus_12' + tag "${meta.library_id}" input: tuple val(meta), path(processed_rds), path(cellassign_reference_file) output: @@ -57,28 +59,28 @@ process classify_cellassign { """ # create output directory mkdir "${cellassign_dir}" - + # Convert SCE to AnnData sce_to_anndata.R \ --input_sce_file "${processed_rds}" \ - --output_rna_h5 processed.hdf5 - + --output_rna_h5 processed.hdf5 + # Run CellAssign predict_cellassign.py \ - --input_hdf5_file processed.hdf5 + --input_hdf5_file processed.hdf5 --output_predictions "${cellassign_dir}/cellassign_predictions.tsv" \ --reference "${cellassign_reference_file}" \ --seed ${params.seed} \ --threads ${task.cpus} - + # write out meta file - echo "${Utils.makeJson(meta)}" > "${cellassign_dir}/scpca-meta.json" + echo '${Utils.makeJson(meta)}' > "${cellassign_dir}/scpca-meta.json" """ stub: cellassign_dir = file(meta.cellassign_dir).name """ mkdir "${cellassign_dir}" - echo "${Utils.makeJson(meta)}" > "${cellassign_dir}/scpca-meta.json" + echo '${Utils.makeJson(meta)}' > "${cellassign_dir}/scpca-meta.json" """ } @@ -88,6 +90,7 @@ process add_celltypes_to_sce { publishDir "${params.results_dir}/${meta.project_id}/${meta.sample_id}", mode: 'copy' label 'mem_4' label 'cpus_2' + tag "${meta.library_id}" input: tuple val(meta), path(input_rds), path(cellassign_predictions), val(ref_name) output: @@ -118,9 +121,9 @@ workflow annotate_celltypes { // project id it.scpca_project_id, // singler model file - Utils.parseNA(it.singler_ref_file) ? "${params.singler_models_dir}/${it.singler_ref_file}" : null, + Utils.parseNA(it.singler_ref_file) ? "${params.singler_models_dir}/${it.singler_ref_file}" : '', // cellassign reference file - Utils.parseNA(it.cellassign_ref_file) ? "${params.cellassign_ref_dir}/${it.cellassign_ref_file}" : null + Utils.parseNA(it.cellassign_ref_file) ? "${params.cellassign_ref_dir}/${it.cellassign_ref_file}" : '' ]} // create input for typing: [augmented meta, processed_sce] @@ -129,7 +132,8 @@ workflow annotate_celltypes { .combine(celltype_ch, by: 0) // current contents: [project_id, meta, processed_sce, singler_model_file, cellassign_reference_file] // add values to meta for later use - .map{ project_id, meta, processed_sce, singler_model_file, cellassign_reference_file -> + .map{ project_id, meta_in, processed_sce, singler_model_file, cellassign_reference_file -> + def meta = meta_in.clone(); // local copy for safe modification meta.celltype_publish_dir = "${params.checkpoints_dir}/celltype/${meta.library_id}"; meta.singler_dir = "${meta.celltype_publish_dir}/${meta.library_id}_singler"; meta.cellassign_dir = "${meta.celltype_publish_dir}/${meta.library_id}_cellassign"; @@ -139,7 +143,7 @@ workflow annotate_celltypes { [meta, processed_sce] } - + // creates [meta, processed sce, singler model file] singler_input_ch = celltype_input_ch // add in singler model or empty file @@ -149,7 +153,7 @@ workflow annotate_celltypes { missing_ref: it[2].name == "NO_FILE" do_singler: true } - + // perform singleR celltyping and export results classify_singler(singler_input_ch.do_singler) @@ -158,7 +162,7 @@ workflow annotate_celltypes { .map{[it[0]["library_id"], file(empty_file)]} // add in channel outputs .mix(classify_singler.out) - + // create cellassign input channel: [meta, processed sce, cellassign reference file] cellassign_input_ch = celltype_input_ch // add in cellassign reference @@ -167,18 +171,18 @@ workflow annotate_celltypes { .branch{ missing_ref: it[2].name == "NO_FILE" do_cellassign: true - } + } + - // perform CellAssign celltyping and export results classify_cellassign(cellassign_input_ch.do_cellassign) - + // cellassign output channel: [library_id, cellassign_dir] cellassign_output_ch = cellassign_input_ch.missing_ref .map{[it[0]["library_id"], file(empty_file)]} // add in channel outputs - .mix(classify_cellassign.out) - + .mix(classify_cellassign.out) + // prepare input for process to add celltypes to the processed SCE assignment_input_ch = processed_sce_channel .map{[it[0]["library_id"]] + it} @@ -191,7 +195,7 @@ workflow annotate_celltypes { // Next PR: //add_celltypes_to_sce(assignment_input_ch) - + // add back in the unchanged sce files // TODO update below with output channel results: // export_channel = processed_sce_channel diff --git a/modules/export-anndata.nf b/modules/export-anndata.nf index e6e85e6a..d0fa1460 100644 --- a/modules/export-anndata.nf +++ b/modules/export-anndata.nf @@ -66,7 +66,10 @@ workflow sce_to_anndata{ ]} // remove any sce files that don't have enough cells in the sce object // number of cells are stored in each metadata.json file - .filter{ Utils.getMetaVal(file(it[3]), "${it[2]}_cells") > 1 } + .filter{ + cells = Utils.getMetaVal(file(it[3]), "${it[2]}_cells"); + cells ? cells > 1 : true // if no cell count, keep file (for testing) + } // remove metadata.json file from tuple .map{it.dropRight(1)} diff --git a/modules/qc-report.nf b/modules/qc-report.nf index de9aa8a6..bd9e6f5f 100644 --- a/modules/qc-report.nf +++ b/modules/qc-report.nf @@ -10,7 +10,7 @@ process sce_qc_report{ tuple val(meta), path(unfiltered_rds), path(filtered_rds), path(processed_rds) tuple path(template_dir), val(template_file) output: - tuple val(meta), path(unfiltered_rds), path(filtered_rds), path(processed_rds), path(metadata_json), emit: data + tuple val(meta), path(unfiltered_out), path(filtered_out), path(processed_out), path(metadata_json), emit: data path qc_report, emit: report script: qc_report = "${meta.library_id}_qc.html" @@ -18,14 +18,30 @@ process sce_qc_report{ metadata_json = "${meta.library_id}_metadata.json" workflow_url = workflow.repository ?: workflow.manifest.homePage workflow_version = workflow.revision ?: workflow.manifest.version + // names for final output files + unfiltered_out = "${meta.library_id}_unfiltered.rds" + filtered_out = "${meta.library_id}_filtered.rds" + processed_out = "${meta.library_id}_processed.rds" """ + # move files for output + if [ "${unfiltered_rds}" != "${unfiltered_out}" ]; then + mv "${unfiltered_rds}" "${unfiltered_out}" + fi + if [ "${filtered_rds}" != "${filtered_out}" ]; then + mv "${filtered_rds}" "${filtered_out}" + fi + if [ "${processed_rds}" != "${processed_out}" ]; then + mv "${processed_rds}" "${processed_out}" + fi + + # generate report sce_qc_report.R \ --report_template "${template_path}" \ --library_id "${meta.library_id}" \ --sample_id "${meta.sample_id}" \ - --unfiltered_sce ${unfiltered_rds} \ - --filtered_sce ${filtered_rds} \ - --processed_sce ${processed_rds} \ + --unfiltered_sce ${unfiltered_out} \ + --filtered_sce ${filtered_out} \ + --processed_sce ${processed_out} \ --qc_report_file ${qc_report} \ --metadata_json ${metadata_json} \ --technology "${meta.technology}" \ @@ -37,10 +53,16 @@ process sce_qc_report{ --seed "${params.seed}" """ stub: + unfiltered_out = "${meta.library_id}_unfiltered.rds" + filtered_out = "${meta.library_id}_filtered.rds" + processed_out = "${meta.library_id}_processed.rds" qc_report = "${meta.library_id}_qc.html" metadata_json = "${meta.library_id}_metadata.json" """ + touch ${unfiltered_out} + touch ${filtered_out} + touch ${processed_out} touch ${qc_report} - echo '{}' > ${metadata_json} + echo '{"unfiltered_cells": 10, "filtered_cells": 10, "processed_cells": 10}' > ${metadata_json} """ } diff --git a/modules/samtools.nf b/modules/samtools.nf index a58d75a1..02c3e93c 100644 --- a/modules/samtools.nf +++ b/modules/samtools.nf @@ -1,6 +1,7 @@ process index_bam{ container params.SAMTOOLS_CONTAINER + tag "${meta.run_id}" input: tuple val(meta), path(bamfile) output: diff --git a/modules/sce-processing.nf b/modules/sce-processing.nf index 7ec9d71b..d93f7863 100644 --- a/modules/sce-processing.nf +++ b/modules/sce-processing.nf @@ -12,7 +12,6 @@ process make_unfiltered_sce{ tuple val(meta), path(unfiltered_rds) script: unfiltered_rds = "${meta.library_id}_unfiltered.rds" - """ generate_unfiltered_sce.R \ --alevin_dir ${alevin_dir} \ @@ -48,18 +47,18 @@ process make_unfiltered_sce{ // channels with RNA and feature data process make_merged_unfiltered_sce{ label 'mem_8' - tag "${meta.library_id}" + tag "${rna_meta.library_id}" container params.SCPCATOOLS_CONTAINER input: tuple val(feature_meta), path(feature_alevin_dir), - val (meta), path(alevin_dir), + val(rna_meta), path(alevin_dir), path(mito_file), path(ref_gtf), path(submitter_cell_types_file) path sample_metafile output: tuple val(meta), path(unfiltered_rds) script: - unfiltered_rds = "${meta.library_id}_unfiltered.rds" - // add feature metadata as an element of the main meta object + // add feature metadata as elements of the main meta object + meta = rna_meta.clone() meta['feature_type'] = feature_meta.technology.split('_')[0] meta['feature_meta'] = feature_meta @@ -68,6 +67,7 @@ process make_merged_unfiltered_sce{ meta['feature_type'] = "adt" } + unfiltered_rds = "${meta.library_id}_unfiltered.rds" """ generate_unfiltered_sce.R \ --alevin_dir ${alevin_dir} \ @@ -92,12 +92,13 @@ process make_merged_unfiltered_sce{ --library_id "${meta.library_id}" \ --submitter_cell_types_file "${submitter_cell_types_file}" fi - """ stub: - unfiltered_rds = "${meta.library_id}_unfiltered.rds" + meta = rna_meta.clone() meta['feature_type'] = feature_meta.technology.split('_')[0] meta['feature_meta'] = feature_meta + + unfiltered_rds = "${meta.library_id}_unfiltered.rds" """ touch "${meta.library_id}_unfiltered.rds" """ @@ -121,7 +122,7 @@ process filter_sce{ feature_barcode_file.name != "NO_FILE" """ - filter_sce_rds.R \ + filter_sce.R \ --unfiltered_file ${unfiltered_rds} \ --filtered_file ${filtered_rds} \ ${adt_present ? "--adt_name ${meta.feature_type}":""} \ @@ -200,7 +201,6 @@ process post_process_sce{ tuple val(meta), path(unfiltered_rds), path(filtered_rds), path(processed_rds) script: processed_rds = "${meta.library_id}_processed.rds" - """ post_process_sce.R \ --filtered_sce_file ${filtered_rds} \ diff --git a/modules/spaceranger.nf b/modules/spaceranger.nf index 54318a09..70ef72a2 100644 --- a/modules/spaceranger.nf +++ b/modules/spaceranger.nf @@ -44,6 +44,7 @@ process spaceranger{ process spaceranger_publish{ container params.SCPCATOOLS_CONTAINER + tag "${meta.library_id}" publishDir "${params.results_dir}/${meta.project_id}/${meta.sample_id}", mode: 'copy' input: tuple val(meta), path(spatial_out)