Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow optional assembler thread count option #93

Merged
merged 7 commits into from
Mar 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -210,9 +210,12 @@ The pipeline is compatible with [Launchpad](https://help.tower.nf/23.2/launch/la
| `--depth` | Any integer or float value<br />(Default: `20.00`) | Minimum sequencing depth to pass Assembly QC. |

## Assembly
> ℹ️ The output of SPAdes-based assembler is deterministic for a given count of threads. Hence, using `--assembler_thread` with a specific value can guarantee the generated assemblies will be reproducible for others using the same value.
<!-- -->
| Option | Values | Description |
| --- | ---| --- |
| `--assembler` | `"shovill"` or `"unicycler"`<br />(Default: `"shovill"`)| Using which SPAdes-based assembler to assemble the reads. |
| `--assembler_thread` | Any integer value<br />(Default: `0`) | Number of threads used by the assembler. `0` means all available. |
| `--min_contig_length` | Any integer value<br />(Default: `500`) | Minimum legnth of contig to be included in the assembly. |

## Mapping
Expand Down
34 changes: 26 additions & 8 deletions modules/assembly.nf
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,25 @@ process ASSEMBLY_UNICYCLER {
input:
tuple val(sample_id), path(read1), path(read2), path(unpaired)
val min_contig_length
val assembler_thread

output:
tuple val(sample_id), path(fasta)

script:
fasta="${sample_id}.contigs.fasta"
"""
unicycler -1 "$read1" -2 "$read2" -s "$unpaired" -o results -t "`nproc`" --min_fasta_length "$min_contig_length"
mv results/assembly.fasta "${fasta}"
"""
thread="$assembler_thread"

if ( thread.toInteger() == 0 )
"""
unicycler -1 "$read1" -2 "$read2" -s "$unpaired" -o results -t "`nproc`" --min_fasta_length "$min_contig_length"
mv results/assembly.fasta "${fasta}"
"""
else
"""
unicycler -1 "$read1" -2 "$read2" -s "$unpaired" -o results -t "$thread" --min_fasta_length "$min_contig_length"
mv results/assembly.fasta "${fasta}"
"""
}

// Run Shovill to get assembly
Expand All @@ -40,16 +49,25 @@ process ASSEMBLY_SHOVILL {
input:
tuple val(sample_id), path(read1), path(read2), path(unpaired)
val min_contig_length
val assembler_thread

output:
tuple val(sample_id), path(fasta)

script:
fasta="${sample_id}.contigs.fasta"
"""
shovill --R1 "$read1" --R2 "$read2" --outdir results --cpus "`nproc`" --minlen "$min_contig_length" --force
mv results/contigs.fa "${fasta}"
"""
thread="$assembler_thread"

if ( thread.toInteger() == 0 )
"""
shovill --R1 "$read1" --R2 "$read2" --outdir results --cpus "`nproc`" --minlen "$min_contig_length" --force
mv results/contigs.fa "${fasta}"
"""
else
"""
shovill --R1 "$read1" --R2 "$read2" --outdir results --cpus "$thread" --minlen "$min_contig_length" --force
mv results/contigs.fa "${fasta}"
"""
}

// Run quast to assess assembly quality
Expand Down
1 change: 1 addition & 0 deletions modules/info.nf
Original file line number Diff line number Diff line change
Expand Up @@ -368,6 +368,7 @@ process SAVE {
|${assemblerTextRow('Option', 'Value')}
|╠═══════════════════════════╪═════════════════════════════════════════════════════════════════════╣
|${assemblerTextRow('Assembler', params.assembler.capitalize())}
|${assemblerTextRow('Assembler Thread', params.assembler_thread == 0 ? "0 (All Available)" : params.assembler_thread)}
|${assemblerTextRow('Minimum contig length', params.min_contig_length)}
|╚═══════════════════════════╧═════════════════════════════════════════════════════════════════════╝
|""".stripMargin()
Expand Down
3 changes: 2 additions & 1 deletion modules/validate.nf
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ validParams = [
db: 'path',
assembler: 'assembler',
min_contig_length: 'int',
assembler_thread: 'int',
assembly_publish: 'publish_mode',
seroba_db_remote: 'url_targz',
seroba_kmer: 'int',
Expand Down Expand Up @@ -39,7 +40,7 @@ void validate(Map params) {
|(Only one of --init, --version, --help should be used at one time)
'''.stripMargin())
System.exit(1)
}
}

// Skip validation when help option is used
if (params.help) {
Expand Down
2 changes: 2 additions & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ params {

// Default assembler
assembler = "shovill"
// Default assembler thread count (0 means all)
assembler_thread = 0
// Default minimum contig length
min_contig_length = 500
// Default assembly publish mode
Expand Down
8 changes: 7 additions & 1 deletion nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,11 @@
"unicycler"
]
},
"assembler_thread": {
"type": "integer",
"description": "Number of threads used by the assembler. 0 means all available.",
"hidden": true
},
"min_contig_length": {
"type": "integer",
"description": "Minimum legnth of contig to be included in the assembly.",
Expand All @@ -142,7 +147,8 @@
},
"required": [
"assembler",
"min_contig_length"
"min_contig_length",
"assembler_thread"
]
},
"mapping": {
Expand Down
4 changes: 2 additions & 2 deletions workflows/pipeline.nf
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,11 @@ workflow PIPELINE {
// Output into Channel ASSEMBLY_ch, and hardlink (default) the assemblies to $params.output directory
switch (params.assembler) {
case 'shovill':
ASSEMBLY_ch = ASSEMBLY_SHOVILL(READ_QC_PASSED_READS_ch, params.min_contig_length)
ASSEMBLY_ch = ASSEMBLY_SHOVILL(READ_QC_PASSED_READS_ch, params.min_contig_length, params.assembler_thread)
break

case 'unicycler':
ASSEMBLY_ch = ASSEMBLY_UNICYCLER(READ_QC_PASSED_READS_ch, params.min_contig_length)
ASSEMBLY_ch = ASSEMBLY_UNICYCLER(READ_QC_PASSED_READS_ch, params.min_contig_length, params.assembler_thread)
break
}

Expand Down