Skip to content

Commit

Permalink
refactor: move filtering to workflow level CDCgov#147
Browse files Browse the repository at this point in the history
  • Loading branch information
slsevilla committed Apr 3, 2024
1 parent 3059e03 commit 7720ec4
Show file tree
Hide file tree
Showing 5 changed files with 51 additions and 73 deletions.
4 changes: 0 additions & 4 deletions modules/local/gamma.nf
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,6 @@ process GAMMA {
tuple val(meta), path("*.fasta"), optional:true , emit: fasta
path "versions.yml" , emit: versions

when:
//if there are scaffolds left after filtering
"${fairy_outcome[4]}" == "PASSED: More than 0 scaffolds in ${meta.id} after filtering."

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
Expand Down
4 changes: 0 additions & 4 deletions modules/local/mash_distance.nf
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,6 @@ process MASH_DIST {
tuple val(meta), path("*.txt"), emit: dist
path("versions.yml") , emit: versions

when:
//if there are scaffolds left after filtering
"${fairy_outcome[4]}" == "PASSED: More than 0 scaffolds in ${meta.id} after filtering."

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
Expand Down
16 changes: 2 additions & 14 deletions modules/local/prokka.nf
Original file line number Diff line number Diff line change
Expand Up @@ -24,27 +24,15 @@ process PROKKA {
tuple val(meta), path("*.tsv"), emit: tsv
path "versions.yml" , emit: versions

when:
//if there are scaffolds left after filtering
"${fairy_outcome[4]}" == "PASSED: More than 0 scaffolds in ${meta.id} after filtering."

script:
//set up for terra
if (params.terra==false) {
terra = ""
terra_exit = ""
} else if (params.terra==true) {
terra = "PATH=/opt/conda/envs/prokka/bin:\$PATH"
terra_exit = """PATH="\$(printf '%s\\n' "\$PATH" | sed 's|/opt/conda/envs/prokka/bin:||')" """
} else {
error "Please set params.terra to either \"true\" or \"false\""
}
//define variables
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}"
def proteins_opt = proteins ? "--proteins ${proteins[0]}" : ""
def prodigal_opt = prodigal_tf ? "--prodigaltf ${prodigal_tf[0]}" : ""
def container = task.container.toString() - "staphb/prokka@"
def terra = params.terra ? "PATH=/opt/conda/envs/prokka/bin:\$PATH" : ""
def terra_exit = params.terra ? """PATH="\$(printf '%s\\n' "\$PATH" | sed 's|/opt/conda/envs/prokka/bin:||')" """ : ""
"""
#adding python path for running busco on terra
$terra
Expand Down
4 changes: 0 additions & 4 deletions modules/local/quast.nf
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,6 @@ process QUAST {
tuple val(meta), path('*.tsv') , emit: report_tsv
path "versions.yml" , emit: versions

when:
//if the files are not corrupt and there are equal number of reads in each file then run bbduk
"${fairy_outcome[4]}" == "PASSED: More than 0 scaffolds in ${meta.id} after filtering."

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
Expand Down
96 changes: 49 additions & 47 deletions workflows/phoenix.nf
Original file line number Diff line number Diff line change
Expand Up @@ -232,56 +232,64 @@ workflow PHOENIX_EXTERNAL {
)
ch_versions = ch_versions.mix(SCAFFOLD_COUNT_CHECK.out.versions)

// //combing scaffolds with scaffold check information to ensure processes that need scaffolds only run when there are scaffolds in the file
// filtered_scaffolds_ch = BBMAP_REFORMAT.out.filtered_scaffolds.map{ meta, filtered_scaffolds -> [[id:meta.id], filtered_scaffolds]}
// .join(SCAFFOLD_COUNT_CHECK.out.outcome.splitCsv(strip:true, by:5).map{meta, fairy_outcome -> [meta, [fairy_outcome[0][0], fairy_outcome[1][0], fairy_outcome[2][0], fairy_outcome[3][0], fairy_outcome[4][0]]]}, by: [0])
//combing scaffolds with scaffold check information to ensure processes that need scaffolds only run when there are scaffolds in the file
filtered_scaffolds_ch = BBMAP_REFORMAT.out.filtered_scaffolds.map{ meta, filtered_scaffolds -> [[id:meta.id], filtered_scaffolds]}
.join(SCAFFOLD_COUNT_CHECK.out.outcome.splitCsv(strip:true, by:5)
.map{meta, fairy_outcome -> [meta, [fairy_outcome[0][0], fairy_outcome[1][0], fairy_outcome[2][0], fairy_outcome[3][0], fairy_outcome[4][0]]]}, by: [0])
.filter { it[2].findAll {it.contains('PASSED: More than 0 scaffolds')}}

// Running gamma to identify hypervirulence genes in scaffolds
GAMMA_HV (
filtered_scaffolds_ch, params.hvgamdb
)
ch_versions = ch_versions.mix(GAMMA_HV.out.versions)

// // Running gamma to identify hypervirulence genes in scaffolds
// GAMMA_HV (
// filtered_scaffolds_ch, params.hvgamdb
// )
// ch_versions = ch_versions.mix(GAMMA_HV.out.versions)
// Running gamma to identify AR genes in scaffolds
GAMMA_AR (
filtered_scaffolds_ch, params.ardb
)
ch_versions = ch_versions.mix(GAMMA_AR.out.versions)

// // Running gamma to identify AR genes in scaffolds
// GAMMA_AR (
// filtered_scaffolds_ch, params.ardb
// )
// ch_versions = ch_versions.mix(GAMMA_AR.out.versions)
GAMMA_PF (
filtered_scaffolds_ch, params.gamdbpf
)
ch_versions = ch_versions.mix(GAMMA_PF.out.versions)

// GAMMA_PF (
// filtered_scaffolds_ch, params.gamdbpf
// )
// ch_versions = ch_versions.mix(GAMMA_PF.out.versions)
// Getting Assembly Stats
QUAST (
filtered_scaffolds_ch
)
ch_versions = ch_versions.mix(QUAST.out.versions)

// // Getting Assembly Stats
// QUAST (
// filtered_scaffolds_ch
// )
// ch_versions = ch_versions.mix(QUAST.out.versions)
// get gff and protein files for amrfinder+
PROKKA (
filtered_scaffolds_ch, [], []
)
ch_versions = ch_versions.mix(PROKKA.out.versions)

// // Creating krona plots and best hit files for weighted assembly
// KRAKEN2_WTASMBLD (
// BBMAP_REFORMAT.out.filtered_scaffolds, SCAFFOLD_COUNT_CHECK.out.outcome, "wtasmbld", [], QUAST.out.report_tsv, ASSET_CHECK.out.kraken_db, "reads"
// )
// ch_versions = ch_versions.mix(KRAKEN2_WTASMBLD.out.versions)
// Creating krona plots and best hit files for weighted assembly
KRAKEN2_WTASMBLD (
BBMAP_REFORMAT.out.filtered_scaffolds, SCAFFOLD_COUNT_CHECK.out.outcome, "wtasmbld", [], QUAST.out.report_tsv, ASSET_CHECK.out.kraken_db, "reads"
)
ch_versions = ch_versions.mix(KRAKEN2_WTASMBLD.out.versions)

// // combine filtered scaffolds and mash_sketch so mash_sketch goes with each filtered_scaffolds file
// mash_dist_ch = filtered_scaffolds_ch.combine(ASSET_CHECK.out.mash_sketch)
// combine filtered scaffolds and mash_sketch so mash_sketch goes with each filtered_scaffolds file
mash_dist_ch = filtered_scaffolds_ch.combine(ASSET_CHECK.out.mash_sketch)

// // Running Mash distance to get top 20 matches for fastANI to speed things up
// MASH_DIST (
// mash_dist_ch
// )
// ch_versions = ch_versions.mix(MASH_DIST.out.versions)
// Running Mash distance to get top 20 matches for fastANI to speed things up
MASH_DIST (
mash_dist_ch
)
ch_versions = ch_versions.mix(MASH_DIST.out.versions)

// // Combining mash dist with filtered scaffolds and the outcome of the scaffolds count check based on meta.id
// top_mash_hits_ch = MASH_DIST.out.dist.join(filtered_scaffolds_ch, by: [0])
// Combining mash dist with filtered scaffolds and the outcome of the scaffolds count check based on meta.id
top_mash_hits_ch = MASH_DIST.out.dist.join(filtered_scaffolds_ch, by: [0])

// // Generate file with list of paths of top taxa for fastANI
// DETERMINE_TOP_MASH_HITS (
// top_mash_hits_ch
// )
// ch_versions = ch_versions.mix(DETERMINE_TOP_MASH_HITS.out.versions)
// Generate file with list of paths of top taxa for fastANI
DETERMINE_TOP_MASH_HITS (
top_mash_hits_ch
)
ch_versions = ch_versions.mix(DETERMINE_TOP_MASH_HITS.out.versions)

// // Combining filtered scaffolds with the top taxa list based on meta.id
// top_taxa_list_ch = BBMAP_REFORMAT.out.filtered_scaffolds.map{meta, filtered_scaffolds -> [[id:meta.id], filtered_scaffolds]}\
Expand Down Expand Up @@ -322,12 +330,6 @@ workflow PHOENIX_EXTERNAL {
// )
// ch_versions = ch_versions.mix(DO_MLST.out.versions)

// // get gff and protein files for amrfinder+
// PROKKA (
// filtered_scaffolds_ch, [], []
// )
// ch_versions = ch_versions.mix(PROKKA.out.versions)

// /*// Fetch AMRFinder Database
// AMRFINDERPLUS_UPDATE( )
// ch_versions = ch_versions.mix(AMRFINDERPLUS_UPDATE.out.versions)*/
Expand Down

0 comments on commit 7720ec4

Please sign in to comment.