From 7720ec4004eb67bac062db298c4cd4e4538cf329 Mon Sep 17 00:00:00 2001 From: slsevilla Date: Wed, 3 Apr 2024 13:42:50 +0000 Subject: [PATCH] refactor: move filtering to workflow level #147 --- modules/local/gamma.nf | 4 -- modules/local/mash_distance.nf | 4 -- modules/local/prokka.nf | 16 +----- modules/local/quast.nf | 4 -- workflows/phoenix.nf | 96 +++++++++++++++++----------------- 5 files changed, 51 insertions(+), 73 deletions(-) diff --git a/modules/local/gamma.nf b/modules/local/gamma.nf index 93c8a1db..1cc433d7 100755 --- a/modules/local/gamma.nf +++ b/modules/local/gamma.nf @@ -15,10 +15,6 @@ process GAMMA { tuple val(meta), path("*.fasta"), optional:true , emit: fasta path "versions.yml" , emit: versions - when: - //if there are scaffolds left after filtering - "${fairy_outcome[4]}" == "PASSED: More than 0 scaffolds in ${meta.id} after filtering." - script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" diff --git a/modules/local/mash_distance.nf b/modules/local/mash_distance.nf index af7d7ac9..f969b95b 100755 --- a/modules/local/mash_distance.nf +++ b/modules/local/mash_distance.nf @@ -11,10 +11,6 @@ process MASH_DIST { tuple val(meta), path("*.txt"), emit: dist path("versions.yml") , emit: versions - when: - //if there are scaffolds left after filtering - "${fairy_outcome[4]}" == "PASSED: More than 0 scaffolds in ${meta.id} after filtering." - script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" diff --git a/modules/local/prokka.nf b/modules/local/prokka.nf index ceee5c6e..c3902891 100755 --- a/modules/local/prokka.nf +++ b/modules/local/prokka.nf @@ -24,27 +24,15 @@ process PROKKA { tuple val(meta), path("*.tsv"), emit: tsv path "versions.yml" , emit: versions - when: - //if there are scaffolds left after filtering - "${fairy_outcome[4]}" == "PASSED: More than 0 scaffolds in ${meta.id} after filtering." - script: - //set up for terra - if (params.terra==false) { - terra = "" - terra_exit = "" - } else if (params.terra==true) { - terra = "PATH=/opt/conda/envs/prokka/bin:\$PATH" - terra_exit = """PATH="\$(printf '%s\\n' "\$PATH" | sed 's|/opt/conda/envs/prokka/bin:||')" """ - } else { - error "Please set params.terra to either \"true\" or \"false\"" - } //define variables def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" def proteins_opt = proteins ? "--proteins ${proteins[0]}" : "" def prodigal_opt = prodigal_tf ? "--prodigaltf ${prodigal_tf[0]}" : "" def container = task.container.toString() - "staphb/prokka@" + def terra = params.terra ? "PATH=/opt/conda/envs/prokka/bin:\$PATH" : "" + def terra_exit = params.terra ? """PATH="\$(printf '%s\\n' "\$PATH" | sed 's|/opt/conda/envs/prokka/bin:||')" """ : "" """ #adding python path for running busco on terra $terra diff --git a/modules/local/quast.nf b/modules/local/quast.nf index 59bf2726..92e3df1b 100755 --- a/modules/local/quast.nf +++ b/modules/local/quast.nf @@ -11,10 +11,6 @@ process QUAST { tuple val(meta), path('*.tsv') , emit: report_tsv path "versions.yml" , emit: versions - when: - //if the files are not corrupt and there are equal number of reads in each file then run bbduk - "${fairy_outcome[4]}" == "PASSED: More than 0 scaffolds in ${meta.id} after filtering." - script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" diff --git a/workflows/phoenix.nf b/workflows/phoenix.nf index f1a4e128..0153ab07 100644 --- a/workflows/phoenix.nf +++ b/workflows/phoenix.nf @@ -232,56 +232,64 @@ workflow PHOENIX_EXTERNAL { ) ch_versions = ch_versions.mix(SCAFFOLD_COUNT_CHECK.out.versions) - // //combing scaffolds with scaffold check information to ensure processes that need scaffolds only run when there are scaffolds in the file - // filtered_scaffolds_ch = BBMAP_REFORMAT.out.filtered_scaffolds.map{ meta, filtered_scaffolds -> [[id:meta.id], filtered_scaffolds]} - // .join(SCAFFOLD_COUNT_CHECK.out.outcome.splitCsv(strip:true, by:5).map{meta, fairy_outcome -> [meta, [fairy_outcome[0][0], fairy_outcome[1][0], fairy_outcome[2][0], fairy_outcome[3][0], fairy_outcome[4][0]]]}, by: [0]) + //combing scaffolds with scaffold check information to ensure processes that need scaffolds only run when there are scaffolds in the file + filtered_scaffolds_ch = BBMAP_REFORMAT.out.filtered_scaffolds.map{ meta, filtered_scaffolds -> [[id:meta.id], filtered_scaffolds]} + .join(SCAFFOLD_COUNT_CHECK.out.outcome.splitCsv(strip:true, by:5) + .map{meta, fairy_outcome -> [meta, [fairy_outcome[0][0], fairy_outcome[1][0], fairy_outcome[2][0], fairy_outcome[3][0], fairy_outcome[4][0]]]}, by: [0]) + .filter { it[2].findAll {it.contains('PASSED: More than 0 scaffolds')}} + + // Running gamma to identify hypervirulence genes in scaffolds + GAMMA_HV ( + filtered_scaffolds_ch, params.hvgamdb + ) + ch_versions = ch_versions.mix(GAMMA_HV.out.versions) - // // Running gamma to identify hypervirulence genes in scaffolds - // GAMMA_HV ( - // filtered_scaffolds_ch, params.hvgamdb - // ) - // ch_versions = ch_versions.mix(GAMMA_HV.out.versions) + // Running gamma to identify AR genes in scaffolds + GAMMA_AR ( + filtered_scaffolds_ch, params.ardb + ) + ch_versions = ch_versions.mix(GAMMA_AR.out.versions) - // // Running gamma to identify AR genes in scaffolds - // GAMMA_AR ( - // filtered_scaffolds_ch, params.ardb - // ) - // ch_versions = ch_versions.mix(GAMMA_AR.out.versions) + GAMMA_PF ( + filtered_scaffolds_ch, params.gamdbpf + ) + ch_versions = ch_versions.mix(GAMMA_PF.out.versions) - // GAMMA_PF ( - // filtered_scaffolds_ch, params.gamdbpf - // ) - // ch_versions = ch_versions.mix(GAMMA_PF.out.versions) + // Getting Assembly Stats + QUAST ( + filtered_scaffolds_ch + ) + ch_versions = ch_versions.mix(QUAST.out.versions) - // // Getting Assembly Stats - // QUAST ( - // filtered_scaffolds_ch - // ) - // ch_versions = ch_versions.mix(QUAST.out.versions) + // get gff and protein files for amrfinder+ + PROKKA ( + filtered_scaffolds_ch, [], [] + ) + ch_versions = ch_versions.mix(PROKKA.out.versions) - // // Creating krona plots and best hit files for weighted assembly - // KRAKEN2_WTASMBLD ( - // BBMAP_REFORMAT.out.filtered_scaffolds, SCAFFOLD_COUNT_CHECK.out.outcome, "wtasmbld", [], QUAST.out.report_tsv, ASSET_CHECK.out.kraken_db, "reads" - // ) - // ch_versions = ch_versions.mix(KRAKEN2_WTASMBLD.out.versions) + // Creating krona plots and best hit files for weighted assembly + KRAKEN2_WTASMBLD ( + BBMAP_REFORMAT.out.filtered_scaffolds, SCAFFOLD_COUNT_CHECK.out.outcome, "wtasmbld", [], QUAST.out.report_tsv, ASSET_CHECK.out.kraken_db, "reads" + ) + ch_versions = ch_versions.mix(KRAKEN2_WTASMBLD.out.versions) - // // combine filtered scaffolds and mash_sketch so mash_sketch goes with each filtered_scaffolds file - // mash_dist_ch = filtered_scaffolds_ch.combine(ASSET_CHECK.out.mash_sketch) + // combine filtered scaffolds and mash_sketch so mash_sketch goes with each filtered_scaffolds file + mash_dist_ch = filtered_scaffolds_ch.combine(ASSET_CHECK.out.mash_sketch) - // // Running Mash distance to get top 20 matches for fastANI to speed things up - // MASH_DIST ( - // mash_dist_ch - // ) - // ch_versions = ch_versions.mix(MASH_DIST.out.versions) + // Running Mash distance to get top 20 matches for fastANI to speed things up + MASH_DIST ( + mash_dist_ch + ) + ch_versions = ch_versions.mix(MASH_DIST.out.versions) - // // Combining mash dist with filtered scaffolds and the outcome of the scaffolds count check based on meta.id - // top_mash_hits_ch = MASH_DIST.out.dist.join(filtered_scaffolds_ch, by: [0]) + // Combining mash dist with filtered scaffolds and the outcome of the scaffolds count check based on meta.id + top_mash_hits_ch = MASH_DIST.out.dist.join(filtered_scaffolds_ch, by: [0]) - // // Generate file with list of paths of top taxa for fastANI - // DETERMINE_TOP_MASH_HITS ( - // top_mash_hits_ch - // ) - // ch_versions = ch_versions.mix(DETERMINE_TOP_MASH_HITS.out.versions) + // Generate file with list of paths of top taxa for fastANI + DETERMINE_TOP_MASH_HITS ( + top_mash_hits_ch + ) + ch_versions = ch_versions.mix(DETERMINE_TOP_MASH_HITS.out.versions) // // Combining filtered scaffolds with the top taxa list based on meta.id // top_taxa_list_ch = BBMAP_REFORMAT.out.filtered_scaffolds.map{meta, filtered_scaffolds -> [[id:meta.id], filtered_scaffolds]}\ @@ -322,12 +330,6 @@ workflow PHOENIX_EXTERNAL { // ) // ch_versions = ch_versions.mix(DO_MLST.out.versions) - // // get gff and protein files for amrfinder+ - // PROKKA ( - // filtered_scaffolds_ch, [], [] - // ) - // ch_versions = ch_versions.mix(PROKKA.out.versions) - // /*// Fetch AMRFinder Database // AMRFINDERPLUS_UPDATE( ) // ch_versions = ch_versions.mix(AMRFINDERPLUS_UPDATE.out.versions)*/