diff --git a/CHANGELOG.md b/CHANGELOG.md index a461cdd..fc8ae89 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,32 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [[1.3.0](https://github.com/nf-core/pixelator/releases/tag/1.3.0)] - 2024-07-17 + +### Enhancements & fixes + +- [[PR #97](https://github.com/nf-core/pixelator/pull/97)] - Update citations +- [[PR #96](https://github.com/nf-core/pixelator/pull/96)] - Make all ext.args assignments closures +- [[PR #98](https://github.com/nf-core/pixelator/pull/98)] - Update metromap to include layout step +- [[PR #99](https://github.com/nf-core/pixelator/pull/99)] - Update README to include layout step +- [[PR #100](https://github.com/nf-core/pixelator/pull/100)] - Use R1/R2 suffixes in amplicon input fastq file renaming +- [[PR #101](https://github.com/nf-core/pixelator/pull/101)] - Fix validation issue when using panel_file instead of panel +- [[PR #102](https://github.com/nf-core/pixelator/pull/101)] - Restructure output directory +- [[PR #103](https://github.com/nf-core/pixelator/pull/103)] - Make rate-diff the default transformation method when computing colocalization +- [[PR #104](https://github.com/nf-core/pixelator/pull/104)] - Update to pixelator 0.18.1 +- [[PR #106](https://github.com/nf-core/pixelator/pull/106)] - Update to pixelator 0.18.2 + +### Software dependencies + +| Dependency | Old version | New version | +| ----------- | ----------- | ----------- | +| `pixelator` | 0.17.1 | 0.18.2 | + +> [!NOTE] +> Dependency has been **updated** if both old and new version information is present. +> Dependency has been **added** if just the new version information is present. +> Dependency has been **removed** if new version information isn't present. + ## [[1.2.0](https://github.com/nf-core/pixelator/releases/tag/1.2.0)] - 2024-05-28 ### Enhancements & fixes diff --git a/CITATIONS.md b/CITATIONS.md index 49b37b6..4d7355e 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -10,9 +10,9 @@ ## Pipeline tools -- [pixelator](https://doi.org/10.1101/2023.06.05.543770) +- [pixelator](https://doi.org/10.1038/s41592-024-02268-9) - > Karlsson, Filip, Tomasz Kallas, Divya Thiagarajan, Max Karlsson, Maud Schweitzer, Jose Fernandez Navarro, Louise Leijonancker, et al. “Molecular Pixelation: Single Cell Spatial Proteomics by Sequencing.” bioRxiv, June 8, 2023. https://doi.org/10.1101/2023.06.05.543770. + > Karlsson, F., Kallas, T., Thiagarajan, D. et al. “Molecular pixelation: spatial proteomics of single cells by sequencing.“ Nat Methods 21, 1044–1052 (2024). https://doi.org/10.1038/s41592-024-02268-9 - [cutadapt](http://dx.doi.org/10.14806/ej.17.1.200) diff --git a/README.md b/README.md index 1c8d8fe..ea09557 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,8 @@ It takes a samplesheet as input and will process your data using `pixelator` to 5. Compute the components of the graph from the edge list in order to create putative cells ([`pixelator graph`](https://github.com/PixelgenTechnologies/pixelator)) 6. Call and annotate cells ([`pixelator annotate`](https://github.com/PixelgenTechnologies/pixelator)) 7. Analyze the cells for polarization and colocalization ([`pixelator analysis`](https://github.com/PixelgenTechnologies/pixelator)) -8. Report generation ([`pixelator report`](https://github.com/PixelgenTechnologies/pixelator)) +8. Generate 3D graph layouts for visualization of cells ([`pixelator layout`](https://github.com/PixelgenTechnologies/pixelator)) +9. Report generation ([`pixelator report`](https://github.com/PixelgenTechnologies/pixelator)) > [!WARNING] > Since Nextflow 23.07.0-edge, Nextflow no longer mounts the host's home directory when using Apptainer or Singularity. @@ -101,3 +102,11 @@ You can cite the `nf-core` publication as follows: > Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen. > > _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x). + +You can cite the molecular pixelation technology as follows: + +> **Molecular pixelation: spatial proteomics of single cells by sequencing.** +> +> Filip Karlsson, Tomasz Kallas, Divya Thiagarajan, Max Karlsson, Maud Schweitzer, Jose Fernandez Navarro, Louise Leijonancker, Sylvain Geny, Erik Pettersson, Jan Rhomberg-Kauert, Ludvig Larsson, Hanna van Ooijen, Stefan Petkov, Marcela González-Granillo, Jessica Bunz, Johan Dahlberg, Michele Simonetti, Prajakta Sathe, Petter Brodin, Alvaro Martinez Barrio & Simon Fredriksson +> +> _Nat Methods._ 2024 May 08. doi: [10.1038/s41592-024-02268-9](https://doi.org/10.1038/s41592-024-02268-9) diff --git a/conf/modules.config b/conf/modules.config index 9491290..c8350e7 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -14,7 +14,7 @@ process { publishDir = [ - path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, + path: { "${params.outdir}/pixelator" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -22,14 +22,14 @@ process { withName: "PIXELATOR.*" { publishDir = [ [ - path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, + path: { "${params.outdir}/pixelator" }, mode: params.publish_dir_mode, saveAs: { filename -> (filename.endsWith('.log') || filename.equals('versions.yml')) ? null : filename } ], [ - path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}/logs" }, + path: { "${params.outdir}/pixelator/logs" }, mode: params.publish_dir_mode, - pattern: "*.log" + pattern: '*.log' ] ] @@ -47,15 +47,50 @@ process { } + withName: PIXELATOR_COLLECT_METADATA { + publishDir = [ + [ + path: { "${params.outdir}/pipeline_info" }, + mode: params.publish_dir_mode, + pattern: 'metadata.json', + saveAs: { filename -> + def timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') + "metadata_${timestamp}.json" + } + ], + [ + path: { "${params.outdir}/pixelator/logs" }, + mode: params.publish_dir_mode, + pattern: '*.log' + ] + ] + } + // use explicit (params.my_option instanceof Integer) checks to avoid issues with 0 evaluating false // since most pixelator flags do accept zero as a value withName: PIXELATOR_AMPLICON { - ext.args = { + ext.args = { ["--design ${meta.design}"].join(' ').trim() } + + publishDir = [ [ - "--design ${meta.design}", - ].join(' ').trim() - } + path: { "${params.outdir}/pixelator" }, + mode: params.publish_dir_mode, + pattern: 'amplicon/*.merged.{fq,fastq}.gz', + saveAs: { (params.save_amplicon_reads || params.save_all) ? it : null } + ], + [ + path: { "${params.outdir}/pixelator/logs/${meta.id}" }, + mode: params.publish_dir_mode, + pattern: "*.log" + ], + [ + path: { "${params.outdir}/pixelator" }, + mode: params.publish_dir_mode, + pattern: '**/*.{report,meta}.json', + saveAs: { params.save_all ? it : null } + ] + ] } withName: PIXELATOR_QC { @@ -80,6 +115,38 @@ process { params.adapterqc_mismatches ? "--mismatches ${params.adapterqc_mismatches}": '', ].join(' ').trim() } + + publishDir = [ + [ + path: { "${params.outdir}/pixelator" }, + mode: params.publish_dir_mode, + pattern: '{preqc,adapterqc}/*.processed.{fq,fastq}.gz', + saveAs: { (params.save_qc_passed_reads || params.save_all) ? it : null } + ], + [ + path: { "${params.outdir}/pixelator" }, + mode: params.publish_dir_mode, + pattern: '{preqc,adapterqc}/*.failed.{fq,fastq}.gz', + saveAs: { (params.save_qc_failed_reads || params.save_all) ? it : null } + ], + [ + path: { "${params.outdir}/pixelator/logs/${meta.id}" }, + mode: params.publish_dir_mode, + pattern: '*.log' + ], + [ + path: { "${params.outdir}/pixelator" }, + mode: params.publish_dir_mode, + pattern: '**/*.{report,meta}.json', + saveAs: { (params.save_all) ? it : null } + ], + [ + path: { "${params.outdir}/pixelator" }, + mode: params.publish_dir_mode, + pattern: '**/*.qc-report.html', + saveAs: { (params.save_all) ? it : null } + ] + ] } withName: PIXELATOR_DEMUX { @@ -90,61 +157,253 @@ process { (params.demux_min_length instanceof Integer) ? "--mismatches ${params.demux_min_length}": '', ].join(' ').trim() } + + publishDir = [ + [ + path: { "${params.outdir}/pixelator" }, + mode: params.publish_dir_mode, + pattern: 'demux/*.processed-*.{fq,fastq}.gz', + saveAs: { (params.save_demux_processed_reads || params.save_all) ? it : null } + ], + [ + path: { "${params.outdir}/pixelator" }, + mode: params.publish_dir_mode, + pattern: 'demux/*failed.{fq,fastq}.gz', + saveAs: { (params.save_demux_failed_reads || params.save_all) ? it : null } + ], + [ + path: { "${params.outdir}/pixelator/logs/${meta.id}" }, + mode: params.publish_dir_mode, + pattern: '*.log' + ], + [ + path: { "${params.outdir}/pixelator" }, + mode: params.publish_dir_mode, + pattern: '**/*.{report,meta}.json', + saveAs: { (params.save_all) ? it : null } + ] + ] } withName: PIXELATOR_COLLAPSE { - ext.args = [ - params.markers_ignore ? "--markers_ignore ${params.markers_ignore}": - params.algorithm ? "--algorithm ${params.algorithm}": '', - params.max_neighbours ? "--max-neighbours ${params.max_neighbours}": '', - params.collapse_mismatches ? "--mismatches ${params.collapse_mismatches}": '', - params.collapse_min_count ? "--min-count ${params.collapse_min_count}": '', - params.collapse_use_counts ? "--use-counts": '', - ].join(' ').trim() + ext.args = { + [ + params.markers_ignore ? "--markers_ignore ${params.markers_ignore}": + params.algorithm ? "--algorithm ${params.algorithm}": '', + params.max_neighbours ? "--max-neighbours ${params.max_neighbours}": '', + params.collapse_mismatches ? "--mismatches ${params.collapse_mismatches}": '', + params.collapse_min_count ? "--min-count ${params.collapse_min_count}": '', + params.collapse_use_counts ? "--use-counts": '', + ].join(' ').trim() + } + + publishDir = [ + [ + path: { "${params.outdir}/pixelator" }, + mode: params.publish_dir_mode, + pattern: 'collapse/*.collapsed.parquet', + saveAs: { (params.save_collapsed_reads || params.save_all) ? it : null } + ], + [ + path: { "${params.outdir}/pixelator/logs/${meta.id}" }, + mode: params.publish_dir_mode, + pattern: '*.log' + ], + [ + path: { "${params.outdir}/pixelator" }, + mode: params.publish_dir_mode, + pattern: '**/*.{report,meta}.json', + saveAs: { (params.save_all) ? it : null } + ] + ] } withName: PIXELATOR_GRAPH { - ext.args = [ - params.multiplet_recovery ? "--multiplet-recovery" : '', - params.leiden_iterations ? "--leiden-iterations ${params.leiden_iterations}" : '', - params.graph_min_count ? "--min-count ${params.graph_min_count}" : '', - ].join(' ').trim() + ext.args = { + [ + params.multiplet_recovery ? "--multiplet-recovery" : '', + params.leiden_iterations ? "--leiden-iterations ${params.leiden_iterations}" : '', + params.graph_min_count ? "--min-count ${params.graph_min_count}" : '', + ].join(' ').trim() + } + + publishDir = [ + [ + path: { "${params.outdir}/pixelator" }, + mode: params.publish_dir_mode, + pattern: 'graph/*.components_recovered.csv', + saveAs: { (params.save_recovered_components || params.save_all) ? it : null } + ], + [ + path: { "${params.outdir}/pixelator" }, + mode: params.publish_dir_mode, + pattern: 'graph/*edgelist.parquet', + saveAs: { (params.save_edgelist || params.save_all) ? it : null } + ], + [ + path: { "${params.outdir}/pixelator/logs/${meta.id}" }, + mode: params.publish_dir_mode, + pattern: '*.log' + ], + [ + path: { "${params.outdir}/pixelator" }, + mode: params.publish_dir_mode, + pattern: '**/*.{report,meta}.json', + saveAs: { (params.save_all) ? it : null } + ] + ] } withName: PIXELATOR_ANNOTATE { - ext.args = [ - (params.min_size instanceof Integer) ? "--min-size ${params.min_size}" : '', - (params.max_size instanceof Integer) ? "--max-size ${params.max_size}" : '', - params.dynamic_filter ? "--dynamic-filter ${params.dynamic_filter}" : '', - params.aggregate_calling ? "--aggregate-calling" : '', - ].join(' ').trim() + ext.args = { + [ + (params.min_size instanceof Integer) ? "--min-size ${params.min_size}" : '', + (params.max_size instanceof Integer) ? "--max-size ${params.max_size}" : '', + params.dynamic_filter ? "--dynamic-filter ${params.dynamic_filter}" : '', + params.aggregate_calling ? "--aggregate-calling" : '', + ].join(' ').trim() + } + + publishDir = [ + [ + path: { "${params.outdir}/pixelator" }, + mode: params.publish_dir_mode, + pattern: 'annotate/*.dataset.pxl', + saveAs: { + if (params.skip_layout && params.skip_analysis) { + // Trim the annotate directory prefix from the output name + return new File(it).name + } + else if (params.save_annotate_dataset || params.save_all) { + return it + } + return null + } + ], + [ + path: { "${params.outdir}/pixelator" }, + mode: params.publish_dir_mode, + pattern: '**/*.{report,meta}.json', + saveAs: { (params.save_raw_component_metrics || params.save_all) ? it : null } + ], + [ + path: { "${params.outdir}/pixelator/logs/${meta.id}" }, + mode: params.publish_dir_mode, + pattern: '*.log' + ], + [ + path: { "${params.outdir}/pixelator" }, + mode: params.publish_dir_mode, + pattern: '**/*.{report,meta}.json', + saveAs: { (params.save_all) ? it : null } + ] + ] } withName: PIXELATOR_ANALYSIS { ext.when = { !params.skip_analysis } - ext.args = [ - params.compute_polarization ? "--compute-polarization" : '', - params.compute_colocalization ? "--compute-colocalization" : '', - params.use_full_bipartite ? "--use-full-bipartite " : '', - params.polarization_min_marker_count ? "--polarization-min-marker-count ${params.polarization_min_marker_count}" : '', - params.polarization_transformation ? "--polarization-transformation ${params.polarization_transformation}" : '', - params.colocalization_transformation ? "--colocalization-transformation ${params.colocalization_transformation}" : '', - params.polarization_n_permutations ? "--polarization-n-permutations ${params.polarization_n_permutations}" : '', - (params.colocalization_neighbourhood_size instanceof Integer) ? "--colocalization-neighbourhood-size ${params.colocalization_neighbourhood_size}" : '', - (params.colocalization_n_permutations instanceof Integer) ? "--colocalization-n-permutations ${params.colocalization_n_permutations}" : '', - (params.colocalization_min_region_count instanceof Integer) ? "--colocalization-min-region-count ${params.colocalization_min_region_count}" : '', - ].join(' ').trim() + ext.args = { + [ + params.compute_polarization ? "--compute-polarization" : '', + params.compute_colocalization ? "--compute-colocalization" : '', + params.use_full_bipartite ? "--use-full-bipartite " : '', + params.polarization_min_marker_count ? "--polarization-min-marker-count ${params.polarization_min_marker_count}" : '', + params.polarization_transformation ? "--polarization-transformation ${params.polarization_transformation}" : '', + params.colocalization_transformation ? "--colocalization-transformation ${params.colocalization_transformation}" : '', + params.polarization_n_permutations ? "--polarization-n-permutations ${params.polarization_n_permutations}" : '', + (params.colocalization_neighbourhood_size instanceof Integer) ? "--colocalization-neighbourhood-size ${params.colocalization_neighbourhood_size}" : '', + (params.colocalization_n_permutations instanceof Integer) ? "--colocalization-n-permutations ${params.colocalization_n_permutations}" : '', + (params.colocalization_min_region_count instanceof Integer) ? "--colocalization-min-region-count ${params.colocalization_min_region_count}" : '', + (params.colocalization_min_marker_count instanceof Integer) ? "--colocalization-min-marker-count ${params.colocalization_min_marker_count}" : '' + ].join(' ').trim() + } + + publishDir = [ + [ + path: { "${params.outdir}/pixelator" }, + mode: params.publish_dir_mode, + pattern: 'analysis/*.dataset.pxl', + saveAs: { + if (params.skip_layout) { + // Trim the annotate directory prefix from the output name + return new File(it).name + } + else if (params.save_analysis_dataset || params.save_all) { + return it + } + return null + } + ], + [ + path: { "${params.outdir}/pixelator/logs/${meta.id}" }, + mode: params.publish_dir_mode, + pattern: '*.log' + ], + [ + path: { "${params.outdir}/pixelator" }, + mode: params.publish_dir_mode, + pattern: '**/*.{report,meta}.json', + saveAs: { (params.save_all) ? it : null } + ] + ] } withName: PIXELATOR_LAYOUT { ext.when = { !params.skip_layout } - ext.args = [ - params.no_node_marker_counts ? "--no-node-marker-counts" : '', - params.layout_algorithm ? "--layout-algorithm ${params.layout_algorithm} " : '', - ].join(' ').trim() + ext.args = { + [ + params.no_node_marker_counts ? "--no-node-marker-counts" : '', + params.layout_algorithm ? "--layout-algorithm ${params.layout_algorithm} " : '', + ].join(' ').trim() + } + + publishDir = [ + [ + path: { "${params.outdir}/pixelator" }, + mode: params.publish_dir_mode, + pattern: 'layout/*.dataset.pxl', + saveAs: { + // Trim the annotate directory prefix from the output name + new File(it).name + } + ], + [ + path: { "${params.outdir}/pixelator/logs/${meta.id}" }, + mode: params.publish_dir_mode, + pattern: '*.log' + ], + [ + path: { "${params.outdir}/pixelator" }, + mode: params.publish_dir_mode, + pattern: '**/*.{report,meta}.json', + saveAs: { (params.save_all) ? it : null } + ] + ] } withName: PIXELATOR_REPORT { ext.when = { !params.skip_report } + publishDir = [ + [ + path: { "${params.outdir}/pixelator" }, + mode: params.publish_dir_mode, + pattern: 'report/*.html', + saveAs: { + // Trim the annotate directory prefix from the output name + new File(it).name + } + ], + [ + path: { "${params.outdir}/pixelator/logs/${meta.id}" }, + mode: params.publish_dir_mode, + pattern: '*.log' + ], + [ + path: { "${params.outdir}/pixelator" }, + mode: params.publish_dir_mode, + pattern: '**/*.{report,meta}.json', + saveAs: { (params.save_all) ? it : null } + ] + ] } } diff --git a/conf/test.config b/conf/test.config index 12b8cb8..90071e0 100644 --- a/conf/test.config +++ b/conf/test.config @@ -24,21 +24,21 @@ params { max_time = '6.h' // Input data - input = params.pipelines_testdata_base_path + 'pixelator/samplesheet/samplesheet.csv' - input_basedir = params.pipelines_testdata_base_path + 'pixelator/testdata' - - multiplet_recovery = true - min_size = 2 - max_size = 100000 - compute_polarization = true - use_full_bipartite = true - colocalization_min_region_count = 0 - colocalization_n_permutations = 10 + input = params.pipelines_testdata_base_path + 'pixelator/samplesheet/samplesheet.csv' + input_basedir = params.pipelines_testdata_base_path + 'pixelator/testdata' + + multiplet_recovery = true + min_size = 2 + max_size = 100000 + compute_polarization = true + use_full_bipartite = true + colocalization_min_region_count = 0 + colocalization_n_permutations = 10 colocalization_neighbourhood_size = 1 // For now skip the layout step since it is very slow on these // small test datasets - skip_layout = true + skip_layout = true // using this since the default pmds_3d does not work on very small graphs - layout_algorithm = "fruchterman_reingold_3d" + layout_algorithm = "fruchterman_reingold_3d" } diff --git a/conf/test_panel_v2.config b/conf/test_panel_v2.config new file mode 100644 index 0000000..19d8484 --- /dev/null +++ b/conf/test_panel_v2.config @@ -0,0 +1,44 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test + using the v2 panel. + + Use as follows: + nextflow run nf-core/pixelator -profile test_panel_v2, --outdir + +---------------------------------------------------------------------------------------- +*/ + + +aws.client.downloadParallel = true + +params { + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + + // Input data + input = params.pipelines_testdata_base_path + 'pixelator/samplesheet/samplesheet_v2.csv' + input_basedir = params.pipelines_testdata_base_path + 'pixelator/testdata/' + + multiplet_recovery = true + min_size = 2 + max_size = 100000 + compute_polarization = true + use_full_bipartite = true + colocalization_min_region_count = 0 + colocalization_n_permutations = 10 + colocalization_neighbourhood_size = 1 + + // For now skip the layout step since it is very slow on these + // small test datasets + skip_layout = false + // using this since the default pmds_3d does not work on very small graphs + layout_algorithm = "fruchterman_reingold_3d" +} diff --git a/docs/images/nf-core-pixelator-metromap.svg b/docs/images/nf-core-pixelator-metromap.svg index a6c3e37..9a1b418 100644 --- a/docs/images/nf-core-pixelator-metromap.svg +++ b/docs/images/nf-core-pixelator-metromap.svg @@ -1,246 +1,277 @@ - - - - - - - - - - - + + + + + + + + + + + - + - - - - - + + + + + - - - - - - - + + + + + + + - - + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - + + + + + + - - - - - - + + + + + + - - - - - - - + + + + + + + + - + + + + + + + - - - - - - - - + - - - - - - - - + + + + + + + + + + - - - - - + + + + + + + + - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + - + + + + + + + + + + + - - + + - + - - + + - + - - + + - + - - + + - + - - + + - + - - + + - + - - + + - + - - + + - - + + - - + + - - + + - - + + - - + + - - + + diff --git a/docs/output.md b/docs/output.md index cda70ef..7d3f5d0 100644 --- a/docs/output.md +++ b/docs/output.md @@ -19,10 +19,18 @@ The pipeline consists of the following steps: - [Compute connected components](#compute-connected-components) - [Filtering, annotation, cell-calling](#cell-calling-filtering-and-annotation) - [Downstream analysis](#downstream-analysis) +- [Generate layouts for visualization](#compute-layouts-for-visualization) - [Generate reports](#generate-reports) ### Preprocessing +The preprocessing step uses `pixelator single-cell amplicon` to create full-length amplicon sequences from both single-end and paired-end data. +It returns a single FASTQ file per sample containing fixed length amplicons. +This step will also calculate Q30 quality scores for different regions of the library. + +These amplicon FASTQ files are intermediate and by default not placed in the output folder with the final files delivered to users. +Set `--save_amplicon_reads` or `--save_all` to enable publishing of these files to: +
Output files @@ -40,22 +48,35 @@ The pipeline consists of the following steps:
-The preprocessing step uses `pixelator single-cell amplicon` to create full-length amplicon sequences from both single-end and paired-end data. -It returns a single fastq file per sample containing fixed length amplicons. -This step will also calculate Q30 quality scores for different regions of the library. - ### Quality control +Quality control is performed using `pixelator single-cell preqc` and `pixelator single-cell adapterqc`. + +The preqc step performs QC and quality filtering of the raw sequencing data using [Fastp](https://github.com/OpenGene/fastp) internally. +It generates a QC report in HTML and JSON formats. It saves processed reads as well as reads that were +discarded (i.e. were too short, had too many Ns, or too low quality, etc.). Internally `preqc` + +The `adapterqc` stage checks for the presence and correctness of the pixel binding sequences, +using [Cutadapt](https://cutadapt.readthedocs.io/en/stable/) internally. +It also generates a QC report in JSON format. It saves processed reads as well as discarded reads (i.e. reads that did not have a match for both pixel binding sequences). + +These processed and discarded FASTQ reads are intermediate and by default not placed in the output folder with the final files delivered to users. +Set `--save_qc_passed_reads` and/or `--save_qc_passed_reads` to enable publishing of these files. +Alternatively, set `--save_all` to keep all intermediary outputs of all steps. +
Output files - `pixelator` - `preqc` + - `.processed.fastq.gz`: Processed reads. - `.failed.fastq.gz`: Discarded reads. - `.report.json`: Fastp json report. + - `.qc-report.html`: Fastp html report. - `.meta.json`: Command invocation metadata. + - `adapterqc` - `.processed.fastq.gz`: Processed reads. @@ -68,17 +89,15 @@ This step will also calculate Q30 quality scores for different regions of the li
-Quality control is performed using `pixelator single-cell preqc` and `pixelator single-cell adapterqc`. - -The preqc stage performs QC and quality filtering of the raw sequencing data. -It also generates a QC report in HTML and JSON formats. It saves processed reads as well as reads that were -discarded (i.e. were too short, had too many Ns, or too low quality, etc.). Internally `preqc` -uses [Fastp](https://github.com/OpenGene/fastp), and `adapterqc` -uses [Cutadapt](https://cutadapt.readthedocs.io/en/stable/). +### Demultiplexing -The `adapterqc` stage checks for the presence and correctness of the pixel binding sequences. It also generates a QC report in JSON format. It saves processed reads as well as discarded reads (i.e. reads that did not have a match for both pixel binding sequences). +The `pixelator single-cell demux` command assigns each read to a marker (with a certain barcode) file. It also generates QC report in +JSON format. It saves processed reads (one file per antibody) as well as discarded reads (in a different file) with no match to the +given barcodes/antibodies. -### Demultiplexing +These processed and discarded FASTQ reads are intermediate and by default not placed in the output folder with the final files delivered to users. +Set `--save_demux_failed_reads` and/or `--save_demux_processed_reads` to enable publishing of these files. +Alternatively, set `--save_all` to keep all intermediary outputs of all steps.
Output files @@ -97,12 +116,21 @@ The `adapterqc` stage checks for the presence and correctness of the pixel bindi
-The `pixelator single-cell demux` command assigns a marker (barcode) to each read. It also generates QC report in -JSON format. It saves processed reads (one per antibody) as well as discarded reads with no match to the -given barcodes/antibodies. - ### Duplicate removal and error correction +This step uses the `pixelator single-cell collapse` command. + +The `collapse` command quantifies molecules by performing error correction and detecting PCR duplicates. +This is achieved using the unique pixel identifier and unique molecular identifier sequences to check for uniqueness, collapse and compute a read count. +The command generates a QC report in JSON format. +Errors are allowed when collapsing reads if `--algorithm` is set to `adjacency` (this is the default option). + +The output format of this command is a parquet file containing deduplicated and error-corrected molecules. + +The collapsed reads are intermediate and by default not placed in the output folder with the final files delivered to users. +Set `--save_collapsed_reads` to enable publishing of these files. +Alternatively, set `--save_all` to keep all intermediary outputs of all steps. +
Output files @@ -110,7 +138,7 @@ given barcodes/antibodies. - `collapse` - - `.collapsed.parquet`: Edgelist of the graph. + - `.collapsed.parquet`: Edge list of the graph. - `.report.json`: Statistics for the collapse step. - `.meta.json`: Command invocation metadata. @@ -119,16 +147,23 @@ given barcodes/antibodies.
-This step uses the `pixelator single-cell collapse` command. +### Compute connected components -The `collapse` command removes duplicate reads and performs error correction. -This is achieved using the unique pixel identifier and unique molecular identifier sequences to check for -uniqueness, collapse and compute a read count. The command generates a QC report in JSON format. -Errors are allowed when collapsing reads if `--algorithm` is set to `adjacency` (this is the default option). +This step uses the `pixelator single-cell graph` command. +The input is the edge list parquet file generated in the collapse step. +The molecules from edge list are filtered by count (`--graph_min_count`) to form the edges of the connected components of the graph. +When graphs are computed and identified, their ID names are added back to the edge list in a column called "component". + +The graph command has the option to recover components (technical multiplets) into smaller +components using community detection to find and remove problematic edges +(see `--multiplet_recovery`). These new component IDs are then stored in the "component" column. The information to keep track of the original and +newly recovered components are stored in a file (components_recovered.csv). +This file is not included in the output folder by default, but can be included by passing `--save_recovered_components`. -The output format of this command is an edge list in CSV format. +The edge list is intermediate and by default not placed in the output folder with the final files delivered to users. +Set `--save_edgelist` to enable publishing of these file. -### Compute connected components +Alternatively, set `--save_all` to keep all intermediary outputs of all steps.
Output files @@ -150,17 +185,19 @@ The output format of this command is an edge list in CSV format.
-This step uses the `pixelator single-cell graph` command. -The input is the edge list dataframe (CSV) generated in the collapse step and after filtering it -by count (`--graph_min_count`), the connected components of the graph (graphs) are computed and -added to the edge list in a column called "component". +### Cell-calling, filtering, and annotation -The graph command has the option to recover components (technical multiplets) into smaller -components using community detection to find and remove problematic edges. -(See `--multiplet_recovery`). The information to keep track of the original and -newly recovered components are stored in a file (components_recovered.csv). +This step uses the `pixelator single-cell annotate` command. -### Cell-calling, filtering, and annotation +The annotate command takes as input the molecule list file generated in the graph command. It parses, and filters the +molecules grouped by "component" ID to find putative cells, and it will generate a PXL file containing the edges of the graphs in an edge list, and an +(AnnData object)[https://anndata.readthedocs.io/en/latest/] as well as some useful metadata. + +Some summary statistics before filtering are stored in `raw_components_metrics.csv.gz`. +This file is not included in the output folder by default, but can be included by passing `--save_raw_component_metrics`. + +By default, the PXL file after annotate will not be saved to the results directory unless `--skip_analysis` and `--skip_layout` is passed. +Set `--save_annotate_dataset` to include these files.
Output files @@ -168,7 +205,7 @@ newly recovered components are stored in a file (components_recovered.csv). - `pixelator` - `annotate` - - `.annotate.dataset.pxl` + - `.annotate.dataset.pxl`: The annotated PXL dataset, - `.meta.json`: Command invocation metadata. - `.raw_components_metrics.csv.gz` - `.report.json`: Statistics for the analysis step. @@ -176,13 +213,24 @@ newly recovered components are stored in a file (components_recovered.csv). - `.pixelator-annotate.log`: pixelator log output.
-This step uses the `pixelator single-cell annotate` command. +### Downstream analysis -The annotate command takes as input the edge list (CSV) file generated in the graph command. It parses, and filters the -edgelist to find putative cells, and it will generate a pxl file containing the edgelist, and an -(AnnData object)[https://anndata.readthedocs.io/en/latest/] as well as some useful metadata. +This step uses the `pixelator single-cell analysis` command. +Downstream analyses are performed on the PXL file generated by the previous stage. +The results of the analysis are added to the PXL file produced in this stage. -### Downstream analysis +Currently, the following analyses are performed: + +- polarization scores (enable with `--compute_polarization`) +- co-localization scores (enable with `--compute_colocalization`) + +Each analysis can be disabled by using respectively `--compute_polarization false` or `--compute_colocalization false`. +This entire step can also be skipped using the `--skip_analysis` option. + +By default, the PXL file after analysis will not be saved to the results directory unless `--skip_layout` is passed. +Set `--save_analysis_dataset` to include these files. + +Alternatively, set `--save_all` to keep all intermediary outputs of all steps.
Output files @@ -200,19 +248,15 @@ edgelist to find putative cells, and it will generate a pxl file containing the
-This step uses the `pixelator single-cell analysis` command. -Downstream analysis is performed on the `pxl` file generated by the previous stage. -The results of the analysis is added to the pxl file. - -Currently, the following analysis are performed: +### Compute layouts for visualization -- polarization scores (enable with `--compute_polarization`) -- co-localization scores (enable with `--compute_colocalization`) +This step uses the `pixelator single-cell layout` command. +It will generate precomputed layouts that can be used to visualize cells +as part of the downstream analysis. This data will be appended to a PXL file. -Each analysis can be disabled by using respectively `--compute_polarization false` or `--compute_colocalization false`. -This entire step can also be skipped using the `--skip_analysis` option. +This entire step can also be skipped using the `--skip_layout` option. -### Compute layouts for visualization +Set `--save_all` to keep all intermediary outputs of all steps.
Output files @@ -230,13 +274,15 @@ This entire step can also be skipped using the `--skip_analysis` option.
-This step uses the `pixelator single-cell layout` command. -It will generate precomputed layouts that can be used to visualize cells -as part of the downstream analysis. +### Generate reports -This entire step can also be skipped using the `--skip_layout` option. +This step uses the `pixelator single-cell report` command. +This step will collect metrics and outputs generated by previous stages +and generate a report in HTML format for each sample. -### Generate reports +This step can be skipped using the `--skip_report` option. + +More information on the report can be found in the [pixelator documentation](https://software.pixelgen.com/pixelator/outputs/qc-report/)
Output files @@ -249,14 +295,6 @@ This entire step can also be skipped using the `--skip_layout` option.
-This step uses the `pixelator single-cell report` command. -This step will collect metrics and outputs generated by previous stages -and generate a report in HTML format for each sample. - -This step can be skipped using the `--skip_report` option. - -More information on the report can be found in the [pixelator documentation](https://software.pixelgen.com/pixelator/outputs/web-report/) - ### Pipeline information
@@ -265,10 +303,36 @@ More information on the report can be found in the [pixelator documentation](htt - `pipeline_info/` - Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.dot`/`pipeline_dag.svg`. - Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.yml`. The `pipeline_report*` files will only be present if the `--email` / `--email_on_fail` parameter's are used when running the pipeline. - - Reformatted samplesheet files used as input to the pipeline: `samplesheet.valid.csv`. - Metadata file with software versions, environment information and pipeline configuration for debugging: `metadata.json` - Parameters used by the pipeline run: `params.json`.
[Nextflow](https://www.nextflow.io/docs/latest/tracing.html) provides excellent functionality for generating various reports relevant to the running and execution of the pipeline. This will allow you to troubleshoot errors with the running of the pipeline, and also provide you with other information such as launch commands, run times and resource usage. + +## Output directory structure + +With default parameters, the pixelator pipeline output directory will only include the latest PXL file +generated by the pipeline (with the most "complete" information) and an interactive HTML report per sample. +The PXL dataset files can be from either the `annotate`, `analysis` or `layout` step. + +With default parameters, the `.layout.datasets.pxl` will be copied to the output directory. +If the `layout` stage is skipped (using `--skip_layout`) the `.analysis.datasets.pxl` files will be included and +if the `analysis` stage is skipped (using `--skip_analysis`) the `.annotate.datasets.pxl` will be copied. + +Various flags are available to store intermediate files and are described in the input parameter documentation. Alternatively, you can keep all intermediate files using `--save_all`. + +Below is an example output structure for a pipeline run using the default settings. + +- `pipeline_info/` +- `pixelator/` + + - `logs/` + + - `/`: + - `*.log` + + - `pbmcs_unstimulated.layout.dataset.pxl` + - `pbmcs_unstimulated.qc-report.html` + - `uropod_control.layout.dataset.pxl` + - `uropod_control.qc-report.html` diff --git a/docs/usage.md b/docs/usage.md index f6337e5..778114a 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -127,9 +127,12 @@ A list of available panels can be listed by running following command: pixelator single-cell --list-panels ``` -Currently, a single built-in panel is available: +Currently, two built-in panels are available: -- `human-sc-immunology-spatial-proteomics` +- `human-sc-immunology-spatial-proteomics-1` +- `human-sc-immunology-spatial-proteomics-2` + +`human-sc-immunology-spatial-proteomics` is also an allowed value and is an alias to `human-sc-immunology-spatial-proteomics-1`. ## Running the pipeline diff --git a/modules/local/pixelator/collect_metadata.nf b/modules/local/pixelator/collect_metadata.nf index 1867abc..a6b5872 100644 --- a/modules/local/pixelator/collect_metadata.nf +++ b/modules/local/pixelator/collect_metadata.nf @@ -8,8 +8,8 @@ process PIXELATOR_COLLECT_METADATA { conda "bioconda::pixelator=0.17.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/pixelator:0.17.1--pyhdfd78af_0' : - 'biocontainers/pixelator:0.17.1--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/pixelator:0.18.2--pyhdfd78af_0' : + 'biocontainers/pixelator:0.18.2--pyhdfd78af_0' }" input: diff --git a/modules/local/pixelator/list_options.nf b/modules/local/pixelator/list_options.nf index 7085463..86a0340 100644 --- a/modules/local/pixelator/list_options.nf +++ b/modules/local/pixelator/list_options.nf @@ -4,8 +4,8 @@ process PIXELATOR_LIST_OPTIONS { conda "bioconda::pixelator=0.17.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/pixelator:0.17.1--pyhdfd78af_0' : - 'biocontainers/pixelator:0.17.1--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/pixelator:0.18.2--pyhdfd78af_0' : + 'biocontainers/pixelator:0.18.2--pyhdfd78af_0' }" output: path "design_options.txt" , emit: designs diff --git a/modules/local/pixelator/single-cell/amplicon/main.nf b/modules/local/pixelator/single-cell/amplicon/main.nf index b73f696..d7b5bcf 100644 --- a/modules/local/pixelator/single-cell/amplicon/main.nf +++ b/modules/local/pixelator/single-cell/amplicon/main.nf @@ -5,8 +5,8 @@ process PIXELATOR_AMPLICON { conda "bioconda::pixelator=0.17.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/pixelator:0.17.1--pyhdfd78af_0' : - 'biocontainers/pixelator:0.17.1--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/pixelator:0.18.2--pyhdfd78af_0' : + 'biocontainers/pixelator:0.18.2--pyhdfd78af_0' }" input: tuple val(meta), path(reads) @@ -26,10 +26,11 @@ process PIXELATOR_AMPLICON { def prefix = task.ext.prefix ?: "${meta.id}" def args = task.ext.args ?: '' - // Make list of old name and new name pairs to use for renaming in the bash while loop + // Make list of old name and new name pairs to use for renaming + // Use R1/R2 style suffixes for limited backward compatibility with pixelator<0.17 def old_new_pairs = (reads instanceof Path || reads.size() == 1) ? [[ reads, "${prefix}${getFileSuffix(reads)}" ]] - : reads.withIndex().collect { entry, index -> [ entry, "${prefix}_${index + 1}${getFileSuffix(entry)}" ] } + : reads.withIndex().collect { entry, index -> [ entry, "${prefix}_R${index + 1}${getFileSuffix(entry)}" ] } def rename_to = old_new_pairs*.join(' ').join(' ') def renamed_reads = old_new_pairs.collect { old_name, new_name -> new_name }.join(' ') diff --git a/modules/local/pixelator/single-cell/analysis/main.nf b/modules/local/pixelator/single-cell/analysis/main.nf index 4cab8af..ede72a1 100644 --- a/modules/local/pixelator/single-cell/analysis/main.nf +++ b/modules/local/pixelator/single-cell/analysis/main.nf @@ -4,8 +4,8 @@ process PIXELATOR_ANALYSIS { conda "bioconda::pixelator=0.17.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/pixelator:0.17.1--pyhdfd78af_0' : - 'biocontainers/pixelator:0.17.1--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/pixelator:0.18.2--pyhdfd78af_0' : + 'biocontainers/pixelator:0.18.2--pyhdfd78af_0' }" input: tuple val(meta), path(data) diff --git a/modules/local/pixelator/single-cell/annotate/main.nf b/modules/local/pixelator/single-cell/annotate/main.nf index 0d3dac4..b66cf87 100644 --- a/modules/local/pixelator/single-cell/annotate/main.nf +++ b/modules/local/pixelator/single-cell/annotate/main.nf @@ -5,8 +5,8 @@ process PIXELATOR_ANNOTATE { conda "bioconda::pixelator=0.17.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/pixelator:0.17.1--pyhdfd78af_0' : - 'biocontainers/pixelator:0.17.1--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/pixelator:0.18.2--pyhdfd78af_0' : + 'biocontainers/pixelator:0.18.2--pyhdfd78af_0' }" input: tuple val(meta), path(dataset), path(panel_file), val(panel) diff --git a/modules/local/pixelator/single-cell/collapse/main.nf b/modules/local/pixelator/single-cell/collapse/main.nf index 22ad72a..a34ecd1 100644 --- a/modules/local/pixelator/single-cell/collapse/main.nf +++ b/modules/local/pixelator/single-cell/collapse/main.nf @@ -4,8 +4,8 @@ process PIXELATOR_COLLAPSE { conda "bioconda::pixelator=0.17.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/pixelator:0.17.1--pyhdfd78af_0' : - 'biocontainers/pixelator:0.17.1--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/pixelator:0.18.2--pyhdfd78af_0' : + 'biocontainers/pixelator:0.18.2--pyhdfd78af_0' }" input: tuple val(meta), path(reads), path(panel_file), val(panel) diff --git a/modules/local/pixelator/single-cell/demux/main.nf b/modules/local/pixelator/single-cell/demux/main.nf index 41d6d99..d1bda81 100644 --- a/modules/local/pixelator/single-cell/demux/main.nf +++ b/modules/local/pixelator/single-cell/demux/main.nf @@ -5,8 +5,8 @@ process PIXELATOR_DEMUX { conda "bioconda::pixelator=0.17.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/pixelator:0.17.1--pyhdfd78af_0' : - 'biocontainers/pixelator:0.17.1--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/pixelator:0.18.2--pyhdfd78af_0' : + 'biocontainers/pixelator:0.18.2--pyhdfd78af_0' }" input: tuple val(meta), path(reads), path(panel_file), val(panel) diff --git a/modules/local/pixelator/single-cell/graph/main.nf b/modules/local/pixelator/single-cell/graph/main.nf index 35d9fb2..c4e4bb6 100644 --- a/modules/local/pixelator/single-cell/graph/main.nf +++ b/modules/local/pixelator/single-cell/graph/main.nf @@ -5,8 +5,8 @@ process PIXELATOR_GRAPH { conda "bioconda::pixelator=0.17.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/pixelator:0.17.1--pyhdfd78af_0' : - 'biocontainers/pixelator:0.17.1--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/pixelator:0.18.2--pyhdfd78af_0' : + 'biocontainers/pixelator:0.18.2--pyhdfd78af_0' }" input: tuple val(meta), path(edge_list) diff --git a/modules/local/pixelator/single-cell/layout/main.nf b/modules/local/pixelator/single-cell/layout/main.nf index 9b534c1..8173aab 100644 --- a/modules/local/pixelator/single-cell/layout/main.nf +++ b/modules/local/pixelator/single-cell/layout/main.nf @@ -5,8 +5,8 @@ process PIXELATOR_LAYOUT { conda "bioconda::pixelator=0.17.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/pixelator:0.17.1--pyhdfd78af_0' : - 'biocontainers/pixelator:0.17.1--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/pixelator:0.18.2--pyhdfd78af_0' : + 'biocontainers/pixelator:0.18.2--pyhdfd78af_0' }" input: tuple val(meta), path(data) diff --git a/modules/local/pixelator/single-cell/qc/main.nf b/modules/local/pixelator/single-cell/qc/main.nf index fe9dbce..f2082c8 100644 --- a/modules/local/pixelator/single-cell/qc/main.nf +++ b/modules/local/pixelator/single-cell/qc/main.nf @@ -5,8 +5,8 @@ process PIXELATOR_QC { conda "bioconda::pixelator=0.17.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/pixelator:0.17.1--pyhdfd78af_0' : - 'biocontainers/pixelator:0.17.1--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/pixelator:0.18.2--pyhdfd78af_0' : + 'biocontainers/pixelator:0.18.2--pyhdfd78af_0' }" input: tuple val(meta), path(reads) @@ -25,6 +25,8 @@ process PIXELATOR_QC { tuple val(meta), path("preqc/*.report.json") , emit: preqc_report_json tuple val(meta), path("{adapterqc,preqc}/*.report.json") , emit: report_json + tuple val(meta), path("preqc/*.qc-report.html") , emit: preqc_report_html + tuple val(meta), path("adapterqc/*.meta.json") , emit: adapterqc_metadata tuple val(meta), path("preqc/*.meta.json") , emit: preqc_metadata tuple val(meta), path("{adapterqc,preqc}/*.meta.json") , emit: metadata diff --git a/modules/local/pixelator/single-cell/report/main.nf b/modules/local/pixelator/single-cell/report/main.nf index 6d891a6..1a6bbba 100644 --- a/modules/local/pixelator/single-cell/report/main.nf +++ b/modules/local/pixelator/single-cell/report/main.nf @@ -5,8 +5,8 @@ process PIXELATOR_REPORT { conda "bioconda::pixelator=0.17.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/pixelator:0.17.1--pyhdfd78af_0' : - 'biocontainers/pixelator:0.17.1--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/pixelator:0.18.2--pyhdfd78af_0' : + 'biocontainers/pixelator:0.18.2--pyhdfd78af_0' }" input: tuple val(meta), path(panel_file), val(panel) diff --git a/nextflow.config b/nextflow.config index 48220fc..63de05a 100644 --- a/nextflow.config +++ b/nextflow.config @@ -11,101 +11,116 @@ params { // Input options - input = null - input_basedir = null + input = null + input_basedir = null // Preqc options - trim_front = 0 - trim_tail = 0 - max_length = null - min_length = null - max_n_bases = 0 - avg_qual = 20 - dedup = false - remove_polyg = false + trim_front = 0 + trim_tail = 0 + max_length = null + min_length = null + max_n_bases = 0 + avg_qual = 20 + dedup = false + remove_polyg = false // adapterqc options - adapterqc_mismatches = 0.1 + adapterqc_mismatches = 0.1 // demux options - demux_mismatches = 0.1 - demux_min_length = null + demux_mismatches = 0.1 + demux_min_length = null // collapse options - markers_ignore = null - algorithm = 'adjacency' - max_neighbours = 60 - collapse_mismatches = 2 - collapse_min_count = 2 - collapse_use_counts = false + markers_ignore = null + algorithm = 'adjacency' + max_neighbours = 60 + collapse_mismatches = 2 + collapse_min_count = 2 + collapse_use_counts = false // graph options - multiplet_recovery = true - leiden_iterations = 10 - graph_min_count = 2 + multiplet_recovery = true + leiden_iterations = 10 + graph_min_count = 2 // annotate options - min_size = null - max_size = null - dynamic_filter = 'min' - aggregate_calling = true + min_size = null + max_size = null + dynamic_filter = 'min' + aggregate_calling = true // analysis options - compute_polarization = true - compute_colocalization = true - use_full_bipartite = false - polarization_transformation = "log1p" - polarization_min_marker_count = 5 - polarization_n_permutations = 50 - colocalization_transformation = "log1p" - colocalization_neighbourhood_size = 1 - colocalization_n_permutations = 50 - colocalization_min_region_count = 5 + compute_polarization = true + compute_colocalization = true + use_full_bipartite = false + polarization_transformation = "log1p" + polarization_min_marker_count = 5 + polarization_n_permutations = 50 + colocalization_transformation = "rate-diff" + colocalization_neighbourhood_size = 1 + colocalization_n_permutations = 50 + colocalization_min_region_count = 5 + colocalization_min_marker_count = 5 + + // Output options + save_amplicon_reads = false + save_qc_passed_reads = false + save_qc_failed_reads = false + save_demux_processed_reads = false + save_demux_failed_reads = false + save_collapsed_reads = false + save_recovered_components = false + save_edgelist = false + save_annotate_dataset = false + save_raw_component_metrics = false + save_analysis_dataset = false + save_all = false // layout options - no_node_marker_counts = false - layout_algorithm = "pmds_3d" + no_node_marker_counts = false + layout_algorithm = "wpmds_3d" // skip options - skip_report = false - skip_analysis = false - skip_layout = false + skip_report = false + skip_analysis = false + skip_layout = false // Main pixelator container override - pixelator_container = null + pixelator_container = null // Boilerplate options - outdir = null - publish_dir_mode = 'copy' - email = null - email_on_fail = null - plaintext_email = false - monochrome_logs = false - hook_url = null - help = false - version = false - pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/' + outdir = null + publish_dir_mode = 'copy' + email = null + email_on_fail = null + plaintext_email = false + monochrome_logs = false + hook_url = null + help = false + version = false + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/' // Config options - config_profile_name = null - config_profile_description = null - custom_config_version = 'master' - custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" - config_profile_contact = null - config_profile_url = null + config_profile_name = null + config_profile_description = null + custom_config_version = 'master' + custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" + config_profile_contact = null + config_profile_url = null // Max resource options // Defaults only, expecting to be overwritten - max_memory = '128.GB' - max_cpus = 16 - max_time = '240.h' + max_memory = '128.GB' + max_cpus = 16 + max_time = '240.h' // Schema validation default options - validationFailUnrecognisedParams = false - validationLenientMode = false - validationSchemaIgnoreParams = 'genomes,igenomes_base' - validationShowHiddenParams = false - validate_params = true + validationFailUnrecognisedParams = false + validationLenientMode = false + validationSchemaIgnoreParams = 'genomes,igenomes_base' + validationShowHiddenParams = false + validate_params = true } // Load base.config by default for all pipelines @@ -238,8 +253,9 @@ profiles { executor.cpus = 4 executor.memory = 8.GB } - test { includeConfig 'conf/test.config' } - test_full { includeConfig 'conf/test_full.config' } + test { includeConfig 'conf/test.config' } + test_panel_v2 { includeConfig 'conf/test_panel_v2.config' } + test_full { includeConfig 'conf/test_full.config' } } // Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile @@ -297,7 +313,7 @@ manifest { description = """Pipeline for analysis of Molecular Pixelation assays""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '1.2.0' + version = '1.3.0' doi = '10.1101/2023.06.05.543770' } diff --git a/nextflow_schema.json b/nextflow_schema.json index 7faa19b..cf0673c 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -45,7 +45,21 @@ } } }, - "preqc_options": { + "amplicon_options": { + "title": "Amplicon generation options", + "type": "object", + "fa_icon": "fas fa-circle", + "properties": { + "save_amplicon_reads": { + "fa_icon": "fas fa-save", + "type": "boolean", + "default": false, + "description": "Save intermediate amplicon reads generated from the raw input reads.", + "help": "By default, generated amplicon FastQ files will not be saved to the results directory. Specify this flag (or set it to `true` in your config file) to copy these files to the results directory when complete." + } + } + }, + "qc_options": { "title": "QC/Filtering/Trimming options", "type": "object", "fa_icon": "fas fa-terminal", @@ -96,13 +110,7 @@ "fa_icon": "fas g", "description": "Remove PolyG sequences (length of 10 or more)", "type": "boolean" - } - } - }, - "adapterqc_options": { - "title": "Adapter QC Options", - "type": "object", - "properties": { + }, "adapterqc_mismatches": { "fa_icon": "fas not-equal", "description": "The number of mismatches allowed (in percentage) [default: 0.1; 0.0<=x<=0.9]", @@ -110,6 +118,20 @@ "default": 0.1, "minimum": 0.0, "maximum": 0.9 + }, + "save_qc_passed_reads": { + "fa_icon": "fas fa-save", + "type": "boolean", + "default": false, + "description": "Save intermediate QC read files containing all reads that passed the filters.", + "help": "By default, filtered read FastQ files after QC will not be saved to the results directory. Specify this flag (or set it to `true` in your config file) to copy these files to the results directory when complete." + }, + "save_qc_failed_reads": { + "fa_icon": "fas fa-save", + "type": "boolean", + "default": false, + "description": "Save intermediate QC read files containing all reads that failed the filters.", + "help": "By default, FastQ files with reads that failed QC will not be saved to the results directory. Specify this flag (or set to `true` in your config file) to copy these files to the results directory when complete." } } }, @@ -130,6 +152,20 @@ "description": "The minimum length of the barcode that must overlap when matching", "help_text": "If you set this argument it will overrule the value from the chosen design", "type": "integer" + }, + "save_demux_processed_reads": { + "fa_icon": "fas fa-save", + "type": "boolean", + "default": false, + "description": "Save intermediate QC read files containing all reads that contain valid antibody barcodes.", + "help": "By default, FastQ files containing reads with valid antibody barcodes will not be saved to the results directory. Specify this flag (or set to `true` in your config file) to copy these files to the results directory when complete." + }, + "save_demux_failed_reads": { + "fa_icon": "fas fa-save", + "type": "boolean", + "default": false, + "description": "Save intermediate QC read files containing all reads that failed the filters.", + "help": "By default, FastQ files containing reads without valid antibody barcodes will not be saved to the results directory. Specify this flag (or set to `true` in your config file) to copy these files to the results directory when complete." } } }, @@ -177,6 +213,13 @@ "collapse_use_counts": { "description": "Use counts when collapsing (the difference in counts between two molecules must be more than double in order to be collapsed)", "type": "boolean" + }, + "save_collapsed_reads": { + "fa_icon": "fas fa-save", + "type": "boolean", + "default": false, + "description": "Save an intermediate parquet file containing collapsed read information.", + "help": "By default, intermediate collapsed reads will not be saved to the results directory. Specify this flag (or set to `true` in your config file) to copy these files to the results directory when complete." } } }, @@ -206,6 +249,20 @@ "minimum": 1, "maximum": 50, "hidden": true + }, + "save_edgelist": { + "fa_icon": "fas fa-save", + "type": "boolean", + "default": false, + "description": "Save an intermediate CSV file containing the unfiltered graph edge list.", + "help": "By default, the unfiltered edge list will not be saved to the results directory. Specify this flag (or set to `true` in your config file) to copy these files to the results directory when complete." + }, + "save_recovered_components": { + "fa_icon": "fas fa-save", + "type": "boolean", + "default": false, + "description": "Save an intermediate CSV file containing the recovered components after multiplet recovery.", + "help": "By default, the recovered component will not be saved to the results directory. Specify this flag (or set to `true` in your config file) to copy these files to the results directory when complete." } } }, @@ -231,6 +288,20 @@ "description": "Enable aggregate calling, information on potential aggregates will be added to the output data", "type": "boolean", "default": true + }, + "save_raw_component_metrics": { + "fa_icon": "fas fa-save", + "type": "boolean", + "default": false, + "description": "Save the raw_component_metrics.csv file from the annotate stage.", + "help": "By default, the raw_component_metrics CSV file after annotate will not be saved to the results directory. Specify this flag (or set to `true` in your config file) to copy these files to the results directory when complete." + }, + "save_annotate_dataset": { + "fa_icon": "fas fa-save", + "type": "boolean", + "default": false, + "description": "Save the PXL dataset after the annotate stage.", + "help": "By default, the PXL file after annotate will not be saved to the results directory unless `--skip_analysis` and `--skip_layout` is passed. Specify this flag (or set to `true` in your config file) to copy these files to the results directory when complete." } } }, @@ -278,7 +349,7 @@ "colocalization_transformation": { "type": "string", "enum": ["raw", "log1p", "rate-diff"], - "default": "log1p", + "default": "rate-diff", "description": "Select the type of transformation to use on the node by antibody counts matrix when computing colocalization" }, "colocalization_neighbourhood_size": { @@ -298,6 +369,19 @@ "description": "The minimum number of counts in a region for it to be considered valid for computing colocalization", "default": 5, "minimum": 0 + }, + "colocalization_min_marker_count": { + "type": "integer", + "description": "The minimum number of counts in a component for it to be considered valid for computing colocalization", + "default": 5, + "minimum": 0 + }, + "save_analysis_dataset": { + "fa_icon": "fas fa-save", + "type": "boolean", + "default": false, + "description": "Save the PXL dataset after the analysis stage.", + "help": "By default, the PXL dataset after the analysis stage will only be saved be saved when `--skip_layout` is passed. Specify this flag (or set to `true` in your config file) to copy these files to the results directory when complete." } } }, @@ -318,7 +402,7 @@ "description": "Select a layout algorithm to use. This can be specified as a comma separated list to compute multiple layouts. Possible values are: fruchterman_reingold, fruchterman_reingold_3d, kamada_kawai, kamada_kawai_3d, pmds, pmds_3d", "type": "string", "pattern": "(\\S+)?(,\\S+)*", - "default": "pmds_3d" + "default": "wpmds_3d" } } }, @@ -341,6 +425,13 @@ "type": "string", "description": "Override the container image reference to use for all steps using the `pixelator` command.", "help_text": "Use this to force the pipeline to use a different image version in all steps that use the pixelator command.\nThe pipeline is not guaranteed to work when using different pixelator versions." + }, + "save_all": { + "fa_icon": "fas fa-save", + "type": "boolean", + "default": false, + "description": "Save all intermediate results.", + "help": "This option is equivalent to passing:\n`--save_amplicon_reads --save_qc_passed_reads --save_qc_failed_reads --save_demux_processed_reads --save_demux_failed_reads --save_collapsed_reads --save_edgelist --save_recovered_components --save_annotate_dataset --save_analysis_dataset`" } } }, @@ -525,10 +616,10 @@ "$ref": "#/definitions/input_output_options" }, { - "$ref": "#/definitions/preqc_options" + "$ref": "#/definitions/amplicon_options" }, { - "$ref": "#/definitions/adapterqc_options" + "$ref": "#/definitions/qc_options" }, { "$ref": "#/definitions/demux_options" diff --git a/subworkflows/local/utils_nfcore_pixelator_pipeline/main.nf b/subworkflows/local/utils_nfcore_pixelator_pipeline/main.nf index 3a61820..7cfab17 100644 --- a/subworkflows/local/utils_nfcore_pixelator_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_pixelator_pipeline/main.nf @@ -339,7 +339,7 @@ def resolve_relative_path(relative_path, URI samplesheet_path) { // def validate_panel(LinkedHashMap meta, HashSet options) { if (meta.panel == null) { - return + return meta } if (!options.contains(meta.panel)) { @@ -356,7 +356,7 @@ def validate_panel(LinkedHashMap meta, HashSet options) { // def validate_design(LinkedHashMap meta, HashSet options) { if (meta.design == null) { - return + return meta } if (!options.contains(meta.design)) { diff --git a/workflows/pixelator.nf b/workflows/pixelator.nf index f7ee340..870f828 100644 --- a/workflows/pixelator.nf +++ b/workflows/pixelator.nf @@ -192,7 +192,8 @@ workflow PIXELATOR { // // MODULE: Run pixelator single-cell layout // - PIXELATOR_LAYOUT ( ch_analysed ) + ch_layout_input = params.skip_analysis ? ch_annotated : ch_analysed + PIXELATOR_LAYOUT ( ch_layout_input ) ch_layout = PIXELATOR_LAYOUT.out.dataset ch_versions = ch_versions.mix(PIXELATOR_LAYOUT.out.versions.first())