From 4fe20afe592f13299ca7b0d73abeb0bf97e429d0 Mon Sep 17 00:00:00 2001 From: Katrin Sameith Date: Thu, 25 May 2023 13:41:51 +0200 Subject: [PATCH 1/8] Update bedtools_genomecov.nf Remove paired-end option, and use reads as single-end; otherwise, the coverage graph is misleading as it will often span over nucleosomes --- modules/local/bedtools_genomecov.nf | 2 -- 1 file changed, 2 deletions(-) diff --git a/modules/local/bedtools_genomecov.nf b/modules/local/bedtools_genomecov.nf index 58ac1cae..1766654f 100644 --- a/modules/local/bedtools_genomecov.nf +++ b/modules/local/bedtools_genomecov.nf @@ -21,7 +21,6 @@ process BEDTOOLS_GENOMECOV { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def pe = meta.single_end ? '' : '-pc' """ SCALE_FACTOR=\$(grep '[0-9] mapped (' $flagstat | awk '{print 1000000/\$1}') echo \$SCALE_FACTOR > ${prefix}.scale_factor.txt @@ -31,7 +30,6 @@ process BEDTOOLS_GENOMECOV { -ibam $bam \\ -bg \\ -scale \$SCALE_FACTOR \\ - $pe \\ $args \\ | sort -T '.' -k1,1 -k2,2n > ${prefix}.bedGraph From 782658b97f7ecd1cd019b0c4c92d4dcc5689cb52 Mon Sep 17 00:00:00 2001 From: Katrin Date: Fri, 9 Jun 2023 11:06:50 +0200 Subject: [PATCH 2/8] Installed bedtools/bamtobed module --- modules.json | 183 ++++++++++++++++----- modules/nf-core/bedtools/bamtobed/main.nf | 35 ++++ modules/nf-core/bedtools/bamtobed/meta.yml | 38 +++++ 3 files changed, 215 insertions(+), 41 deletions(-) create mode 100644 modules/nf-core/bedtools/bamtobed/main.nf create mode 100644 modules/nf-core/bedtools/bamtobed/meta.yml diff --git a/modules.json b/modules.json index 4149c653..bab8f05f 100644 --- a/modules.json +++ b/modules.json @@ -8,172 +8,259 @@ "ataqv/ataqv": { "branch": "master", "git_sha": "56421e1a812bc2f9e77dbe9f297e9d9c580cb8a5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "ataqv/mkarv": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] + }, + "bedtools/bamtobed": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": [ + "modules" + ] }, "bowtie2/align": { "branch": "master", "git_sha": "cf5b9c30a2adacc581793afb79fae5f5b50bed01", - "installed_by": ["modules", "fastq_align_bowtie2"] + "installed_by": [ + "modules", + "fastq_align_bowtie2" + ] }, "bowtie2/build": { "branch": "master", "git_sha": "e797efb47b0d3b2124753beb55dc83ab9512bceb", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bwa/index": { "branch": "master", "git_sha": "9518fa4f65f3fb8cde24fde7d40333b39ec8fd65", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bwa/mem": { "branch": "master", "git_sha": "cf5b9c30a2adacc581793afb79fae5f5b50bed01", - "installed_by": ["modules", "fastq_align_bwa"] + "installed_by": [ + "modules", + "fastq_align_bwa" + ] }, "chromap/chromap": { "branch": "master", "git_sha": "cf5b9c30a2adacc581793afb79fae5f5b50bed01", - "installed_by": ["modules", "fastq_align_chromap"] + "installed_by": [ + "modules", + "fastq_align_chromap" + ] }, "chromap/index": { "branch": "master", "git_sha": "3a8e3ca607132a468c07c69aaa3bccd55eb983b8", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "custom/dumpsoftwareversions": { "branch": "master", "git_sha": "8022c68e7403eecbd8ba9c49496f69f8c49d50f0", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "custom/getchromsizes": { "branch": "master", "git_sha": "cf5b9c30a2adacc581793afb79fae5f5b50bed01", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "deeptools/computematrix": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "deeptools/plotfingerprint": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "deeptools/plotheatmap": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "deeptools/plotprofile": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "fastqc": { "branch": "master", "git_sha": "810e8f2603ec38401d49a4aaed06f6d058745552", - "installed_by": ["modules", "fastq_fastqc_umitools_trimgalore"] + "installed_by": [ + "modules", + "fastq_fastqc_umitools_trimgalore" + ] }, "gffread": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gunzip": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "homer/annotatepeaks": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "khmer/uniquekmers": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "macs2/callpeak": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "picard/collectmultiplemetrics": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "picard/markduplicates": { "branch": "master", "git_sha": "eca65aa4a5e2e192ac44d6962c8f9260f314ffb8", - "installed_by": ["modules", "bam_markduplicates_picard"] + "installed_by": [ + "modules", + "bam_markduplicates_picard" + ] }, "picard/mergesamfiles": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "preseq/lcextrap": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/flagstat": { "branch": "master", "git_sha": "cf5b9c30a2adacc581793afb79fae5f5b50bed01", - "installed_by": ["modules", "bam_stats_samtools"] + "installed_by": [ + "modules", + "bam_stats_samtools" + ] }, "samtools/idxstats": { "branch": "master", "git_sha": "cf5b9c30a2adacc581793afb79fae5f5b50bed01", - "installed_by": ["modules", "bam_stats_samtools"] + "installed_by": [ + "modules", + "bam_stats_samtools" + ] }, "samtools/index": { "branch": "master", "git_sha": "cf5b9c30a2adacc581793afb79fae5f5b50bed01", - "installed_by": ["modules", "bam_markduplicates_picard", "bam_sort_stats_samtools"] + "installed_by": [ + "modules", + "bam_markduplicates_picard", + "bam_sort_stats_samtools" + ] }, "samtools/sort": { "branch": "master", "git_sha": "cf5b9c30a2adacc581793afb79fae5f5b50bed01", - "installed_by": ["modules", "bam_sort_stats_samtools"] + "installed_by": [ + "modules", + "bam_sort_stats_samtools" + ] }, "samtools/stats": { "branch": "master", "git_sha": "cf5b9c30a2adacc581793afb79fae5f5b50bed01", - "installed_by": ["modules", "bam_stats_samtools"] + "installed_by": [ + "modules", + "bam_stats_samtools" + ] }, "subread/featurecounts": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "trimgalore": { "branch": "master", "git_sha": "b51a69e30973c71950225c817ad07a3337d22c40", - "installed_by": ["modules", "fastq_fastqc_umitools_trimgalore"] + "installed_by": [ + "modules", + "fastq_fastqc_umitools_trimgalore" + ] }, "ucsc/bedgraphtobigwig": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "umitools/extract": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": ["fastq_fastqc_umitools_trimgalore"] + "installed_by": [ + "fastq_fastqc_umitools_trimgalore" + ] }, "untar": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] } } }, @@ -182,7 +269,9 @@ "bam_markduplicates_picard": { "branch": "master", "git_sha": "6daac2bc63f4847e0c7cc661f4f5b043ac13faaf", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "bam_sort_stats_samtools": { "branch": "master", @@ -197,30 +286,42 @@ "bam_stats_samtools": { "branch": "master", "git_sha": "92eb5091ae5368a60cda58b3a0ced8b36d715b0f", - "installed_by": ["bam_markduplicates_picard", "bam_sort_stats_samtools", "subworkflows"] + "installed_by": [ + "bam_markduplicates_picard", + "bam_sort_stats_samtools", + "subworkflows" + ] }, "fastq_align_bowtie2": { "branch": "master", "git_sha": "ac75f79157ecc64283a2b3a559f1ba90bc0f2259", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "fastq_align_bwa": { "branch": "master", "git_sha": "ac75f79157ecc64283a2b3a559f1ba90bc0f2259", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "fastq_align_chromap": { "branch": "master", "git_sha": "ac75f79157ecc64283a2b3a559f1ba90bc0f2259", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "fastq_fastqc_umitools_trimgalore": { "branch": "master", "git_sha": "b51a69e30973c71950225c817ad07a3337d22c40", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] } } } } } -} +} \ No newline at end of file diff --git a/modules/nf-core/bedtools/bamtobed/main.nf b/modules/nf-core/bedtools/bamtobed/main.nf new file mode 100644 index 00000000..29f5a62f --- /dev/null +++ b/modules/nf-core/bedtools/bamtobed/main.nf @@ -0,0 +1,35 @@ +process BEDTOOLS_BAMTOBED { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::bedtools=2.30.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bedtools:2.30.0--hc088bd4_0' : + 'biocontainers/bedtools:2.30.0--hc088bd4_0' }" + + input: + tuple val(meta), path(bam) + + output: + tuple val(meta), path("*.bed"), emit: bed + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + bedtools \\ + bamtobed \\ + $args \\ + -i $bam \\ + > ${prefix}.bed + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/bedtools/bamtobed/meta.yml b/modules/nf-core/bedtools/bamtobed/meta.yml new file mode 100644 index 00000000..5a4ff73a --- /dev/null +++ b/modules/nf-core/bedtools/bamtobed/meta.yml @@ -0,0 +1,38 @@ +name: bedtools_bamtobed +description: Converts a bam file to a bed12 file. +keywords: + - bam + - bed +tools: + - bedtools: + description: | + A set of tools for genomic analysis tasks, specifically enabling genome arithmetic (merge, count, complement) on various file types. + documentation: https://bedtools.readthedocs.io/en/latest/content/tools/complement.html + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Input BAM file + pattern: "*.{bam}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bed: + type: file + description: Bed file containing genomic intervals. + pattern: "*.{bed}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@yuukiiwa" + - "@drpatelh" From 4c8cda7f0e55945c8a931cc405558a581f702674 Mon Sep 17 00:00:00 2001 From: Katrin Sameith-Lauber Date: Fri, 9 Jun 2023 15:02:09 +0200 Subject: [PATCH 3/8] Run MACS2 on bed input rather than bampe --- conf/modules.config | 2 ++ .../local/bam_peaks_call_qc_annotate_macs2_homer.nf | 11 ++++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index a856f1b0..b2ee1e10 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -584,6 +584,7 @@ if (!params.skip_plot_fingerprint) { process { withName: '.*:MERGED_LIBRARY_CALL_ANNOTATE_PEAKS:MACS2_CALLPEAK' { ext.args = [ + '--format BED', '--keep-dup all', '--nomodel', params.narrow_peak ? '' : "--broad --broad-cutoff ${params.broad_cutoff}", @@ -814,6 +815,7 @@ if (!params.skip_merge_replicates) { process { withName: '.*:MERGED_REPLICATE_CALL_ANNOTATE_PEAKS:MACS2_CALLPEAK' { ext.args = [ + '--format BED', '--keep-dup all', '--nomodel', params.narrow_peak ? '' : "--broad --broad-cutoff ${params.broad_cutoff}", diff --git a/subworkflows/local/bam_peaks_call_qc_annotate_macs2_homer.nf b/subworkflows/local/bam_peaks_call_qc_annotate_macs2_homer.nf index 4c2a8710..a17e20a0 100644 --- a/subworkflows/local/bam_peaks_call_qc_annotate_macs2_homer.nf +++ b/subworkflows/local/bam_peaks_call_qc_annotate_macs2_homer.nf @@ -2,6 +2,7 @@ // Call peaks with MACS2, annotate with HOMER and perform downstream QC // +include { BEDTOOLS_BAMTOBED } from '../../modules/nf-core/bedtools/bamtobed/main' include { MACS2_CALLPEAK } from '../../modules/nf-core/macs2/callpeak/main' include { HOMER_ANNOTATEPEAKS } from '../../modules/nf-core/homer/annotatepeaks/main' @@ -28,11 +29,19 @@ workflow BAM_PEAKS_CALL_QC_ANNOTATE_MACS2_HOMER { ch_versions = Channel.empty() + // + // Convert bam to bed + // + BEDTOOLS_BAMTOBED ( + ch_bam + ) + ch_versions = ch_versions.mix(BEDTOOLS_BAMTOBED.out.versions.first()) + // // Call peaks with MACS2 // MACS2_CALLPEAK ( - ch_bam, + BEDTOOLS_BAMTOBED.out.bed, macs_gsize ) ch_versions = ch_versions.mix(MACS2_CALLPEAK.out.versions.first()) From 91c0a1687df7ba77f3c6c256bd9b1553e7fbd340 Mon Sep 17 00:00:00 2001 From: Katrin Sameith-Lauber Date: Fri, 9 Jun 2023 15:06:25 +0200 Subject: [PATCH 4/8] Move reads such that cut-sites are in the middle --- conf/modules.config | 2 ++ 1 file changed, 2 insertions(+) diff --git a/conf/modules.config b/conf/modules.config index b2ee1e10..7b90fe7c 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -587,6 +587,8 @@ process { '--format BED', '--keep-dup all', '--nomodel', + '--shift -75', + '--extsize 150', params.narrow_peak ? '' : "--broad --broad-cutoff ${params.broad_cutoff}", params.save_macs_pileup ? '--bdg --SPMR' : '', params.macs_pvalue ? "--pvalue ${params.macs_pvalue}" : '', From db8f3f817f55088e3c0481883a045fc1cc55bc44 Mon Sep 17 00:00:00 2001 From: Katrin Sameith-Lauber Date: Wed, 14 Jun 2023 10:58:12 +0200 Subject: [PATCH 5/8] Call peak summits per library that can be used for TFBS detection. --- conf/modules.config | 1 + 1 file changed, 1 insertion(+) diff --git a/conf/modules.config b/conf/modules.config index 7b90fe7c..d8bc2548 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -589,6 +589,7 @@ process { '--nomodel', '--shift -75', '--extsize 150', + '--call-summits', params.narrow_peak ? '' : "--broad --broad-cutoff ${params.broad_cutoff}", params.save_macs_pileup ? '--bdg --SPMR' : '', params.macs_pvalue ? "--pvalue ${params.macs_pvalue}" : '', From 83b2e251c27b0e6b9ca27e0d901f0745b4cab5a6 Mon Sep 17 00:00:00 2001 From: Katrin Sameith-Lauber Date: Wed, 14 Jun 2023 11:01:12 +0200 Subject: [PATCH 6/8] Convert bam to bed, and call MACS2 on single-end bed file. --- .../bam_peaks_call_qc_annotate_macs2_homer.nf | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/bam_peaks_call_qc_annotate_macs2_homer.nf b/subworkflows/local/bam_peaks_call_qc_annotate_macs2_homer.nf index a17e20a0..b28b255c 100644 --- a/subworkflows/local/bam_peaks_call_qc_annotate_macs2_homer.nf +++ b/subworkflows/local/bam_peaks_call_qc_annotate_macs2_homer.nf @@ -33,15 +33,25 @@ workflow BAM_PEAKS_CALL_QC_ANNOTATE_MACS2_HOMER { // Convert bam to bed // BEDTOOLS_BAMTOBED ( - ch_bam + ch_bam.map { meta, ip_bam, control_bam -> [ meta, ip_bam ] } ) ch_versions = ch_versions.mix(BEDTOOLS_BAMTOBED.out.versions.first()) + // Create channels: [meta, ip_bed, []] + BEDTOOLS_BAMTOBED + .out + .bed + .map { + meta, ip_bed -> + [ meta, ip_bed, [] ] + } + .set { ch_bed } + // // Call peaks with MACS2 // MACS2_CALLPEAK ( - BEDTOOLS_BAMTOBED.out.bed, + ch_bed, macs_gsize ) ch_versions = ch_versions.mix(MACS2_CALLPEAK.out.versions.first()) From dac43672794fbe74c57604d9a9244685552a43c8 Mon Sep 17 00:00:00 2001 From: Katrin Date: Tue, 27 Jun 2023 10:25:40 +0200 Subject: [PATCH 7/8] Fixed #164 - Introduced 4/5bp shift as it is common for ATAC-seq data. Fixed #168 - Always write out genome fa and fai so IGV session file can be opened. --- conf/modules.config | 65 ++++++++++++++++- modules/local/deeptools_alignmentsieve.nf | 36 ++++++++++ modules/local/igv.nf | 2 + nextflow.config | 1 + nextflow_schema.json | 7 ++ subworkflows/local/bam_shift_reads.nf | 40 +++++++++++ workflows/atacseq.nf | 85 ++++++++++++++++++----- 7 files changed, 216 insertions(+), 20 deletions(-) create mode 100644 modules/local/deeptools_alignmentsieve.nf create mode 100644 subworkflows/local/bam_shift_reads.nf diff --git a/conf/modules.config b/conf/modules.config index d8bc2548..285e890a 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -453,6 +453,37 @@ process { ] } + withName: '.*:MERGED_LIBRARY_BAM_SHIFT_READS:DEEPTOOLS_ALIGNMENTSIEVE' { + ext.args = '--ATACshift' + ext.prefix = { "${meta.id}.mLb.clN.shifted" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_library/shifted_reads" }, + mode: params.publish_dir_mode, + pattern: '*.bam', + enabled: params.save_align_intermeds + ] + } + + withName: '.*:MERGED_LIBRARY_BAM_SHIFT_READS:SAMTOOLS_SORT' { + ext.prefix = { "${meta.id}.mLb.clN.shifted.sorted" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_library/shifted_reads" }, + mode: params.publish_dir_mode, + pattern: '*.bam', + enabled: params.shift_reads + ] + } + + withName: '.*:MERGED_LIBRARY_BAM_SHIFT_READS:SAMTOOLS_INDEX' { + ext.prefix = { "${meta.id}.mLb.clN.shifted.sorted" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_library/shifted_reads" }, + mode: params.publish_dir_mode, + pattern: '*.bai', + enabled: params.shift_reads + ] + } + withName: '.*:MERGED_LIBRARY_BAM_TO_BIGWIG:BEDTOOLS_GENOMECOV' { ext.args = { [ @@ -782,6 +813,37 @@ if (!params.skip_merge_replicates) { ] } + withName: '.*:MERGED_REPLICATE_BAM_SHIFT_READS:DEEPTOOLS_ALIGNMENTSIEVE' { + ext.args = '--ATACshift' + ext.prefix = { "${meta.id}.mRp.clN.shifted" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_replicate" }, + mode: params.publish_dir_mode, + pattern: '*.bam', + enabled: params.save_align_intermeds + ] + } + + withName: '.*:MERGED_REPLICATE_BAM_SHIFT_READS:SAMTOOLS_SORT' { + ext.prefix = { "${meta.id}.mRp.clN.shifted.sorted" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_replicate/shifted_reads" }, + mode: params.publish_dir_mode, + pattern: '*.bam', + enabled: params.shift_reads + ] + } + + withName: '.*:MERGED_REPLICATE_BAM_SHIFT_READS:SAMTOOLS_INDEX' { + ext.prefix = { "${meta.id}.mRp.clN.shifted.sorted" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_replicate/shifted_reads" }, + mode: params.publish_dir_mode, + pattern: '*.bai', + enabled: params.shift_reads + ] + } + withName: '.*:MERGED_REPLICATE_BAM_TO_BIGWIG:BEDTOOLS_GENOMECOV' { ext.args = { [ @@ -956,8 +1018,7 @@ if (!params.skip_igv) { [ path: { "${params.outdir}/genome" }, mode: params.publish_dir_mode, - pattern: '*.{fa,fasta}', - enabled: params.save_reference + pattern: '*.{fa,fasta,fai}' ] ] } diff --git a/modules/local/deeptools_alignmentsieve.nf b/modules/local/deeptools_alignmentsieve.nf new file mode 100644 index 00000000..5148030a --- /dev/null +++ b/modules/local/deeptools_alignmentsieve.nf @@ -0,0 +1,36 @@ +process DEEPTOOLS_ALIGNMENTSIEVE { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::deeptools=3.5.1" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/deeptools:3.5.1--py_0' : + 'biocontainers/deeptools:3.5.1--py_0' }" + + input: + tuple val(meta), path(bam), path(bai) + + output: + tuple val(meta), path("*.bam"), emit: bam + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + alignmentSieve \\ + $args \\ + -b $bam \\ + -o ${prefix}.bam \\ + --numberOfProcessors $task.cpus + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + deeptools: \$(alignmentSieve --version | sed -e "s/alignmentSieve //g") + END_VERSIONS + """ +} diff --git a/modules/local/igv.nf b/modules/local/igv.nf index 542d6cc2..64f4266b 100644 --- a/modules/local/igv.nf +++ b/modules/local/igv.nf @@ -7,6 +7,7 @@ process IGV { input: path fasta + path fai path ("${bigwig_library_publish_dir}/*") path ("${peak_library_publish_dir}/*") path ("${consensus_library_publish_dir}/*") @@ -25,6 +26,7 @@ process IGV { path "*files.txt" , emit: txt path "*.xml" , emit: xml path fasta , emit: fasta + path fai , emit: fai path "versions.yml", emit: versions when: diff --git a/nextflow.config b/nextflow.config index 8495b492..5214e7e8 100644 --- a/nextflow.config +++ b/nextflow.config @@ -39,6 +39,7 @@ params { skip_merge_replicates = false save_align_intermeds = false save_unaligned = false + shift_reads = true // Options: Peaks narrow_peak = false diff --git a/nextflow_schema.json b/nextflow_schema.json index 41d5c76e..d8d7baa9 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -289,6 +289,13 @@ "hidden": true, "description": "BAMTools JSON file with custom filters for single-end data.", "fa_icon": "fas fa-cog" + }, + "shift_reads": { + "type": "boolean", + "fa_icon": "fas fa-chart-area", + "default": true, + "help_text": "Shift aligned reads as commonly done for ATACseq, +4bp for reads on the + strand, -5 bp for reads on the - strand. This can only be applied if all samples are paired-end.", + "description": "Shift aligned reads (+4bp and -5bp)." } } }, diff --git a/subworkflows/local/bam_shift_reads.nf b/subworkflows/local/bam_shift_reads.nf new file mode 100644 index 00000000..d3fcabc1 --- /dev/null +++ b/subworkflows/local/bam_shift_reads.nf @@ -0,0 +1,40 @@ +include { SAMTOOLS_SORT } from '../../modules/nf-core/samtools/sort/main' +include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main' +include { DEEPTOOLS_ALIGNMENTSIEVE } from '../../modules/local/deeptools_alignmentsieve' + +workflow BAM_SHIFT_READS { + take: + ch_bam_bai // channel: [ val(meta), [ bam ], [bai] ] + + main: + ch_versions = Channel.empty() + + // + // Shift reads + // + DEEPTOOLS_ALIGNMENTSIEVE ( + ch_bam_bai + ) + ch_versions = ch_versions.mix(DEEPTOOLS_ALIGNMENTSIEVE.out.versions) + + // + // Sort reads + // + SAMTOOLS_SORT ( + DEEPTOOLS_ALIGNMENTSIEVE.out.bam + ) + ch_versions = ch_versions.mix(SAMTOOLS_SORT.out.versions) + + // + // Index reads + // + SAMTOOLS_INDEX ( + SAMTOOLS_SORT.out.bam + ) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions) + + emit: + bam = SAMTOOLS_SORT.out.bam // channel: [ val(meta), [ bam ] ] + bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), [ bai ] ] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/workflows/atacseq.nf b/workflows/atacseq.nf index c370ff29..8e5dc907 100644 --- a/workflows/atacseq.nf +++ b/workflows/atacseq.nf @@ -68,9 +68,11 @@ include { MULTIQC } from '../modules/local/multiqc' // // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules // -include { INPUT_CHECK } from '../subworkflows/local/input_check' -include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome' -include { ALIGN_STAR } from '../subworkflows/local/align_star' +include { INPUT_CHECK } from '../subworkflows/local/input_check' +include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome' +include { ALIGN_STAR } from '../subworkflows/local/align_star' +include { BAM_SHIFT_READS as MERGED_LIBRARY_BAM_SHIFT_READS } from '../subworkflows/local/bam_shift_reads' +include { BAM_SHIFT_READS as MERGED_REPLICATE_BAM_SHIFT_READS } from '../subworkflows/local/bam_shift_reads' include { BIGWIG_PLOT_DEEPTOOLS as MERGED_LIBRARY_BIGWIG_PLOT_DEEPTOOLS } from '../subworkflows/local/bigwig_plot_deeptools' include { BAM_FILTER_BAMTOOLS as MERGED_LIBRARY_FILTER_BAM } from '../subworkflows/local/bam_filter_bamtools' include { BAM_BEDGRAPH_BIGWIG_BEDTOOLS_UCSC as MERGED_LIBRARY_BAM_TO_BIGWIG } from '../subworkflows/local/bam_bedgraph_bigwig_bedtools_ucsc' @@ -96,6 +98,7 @@ include { PRESEQ_LCEXTRAP as MERGED_LIBRARY_PRESEQ_LCEXTRAP include { DEEPTOOLS_PLOTFINGERPRINT as MERGED_LIBRARY_DEEPTOOLS_PLOTFINGERPRINT } from '../modules/nf-core/deeptools/plotfingerprint/main' include { ATAQV_ATAQV as MERGED_LIBRARY_ATAQV_ATAQV } from '../modules/nf-core/ataqv/ataqv/main' include { ATAQV_MKARV as MERGED_LIBRARY_ATAQV_MKARV } from '../modules/nf-core/ataqv/mkarv/main' +include { SAMTOOLS_INDEX } from '../modules/nf-core/samtools/index/main' include { PICARD_MERGESAMFILES as PICARD_MERGESAMFILES_LIBRARY } from '../modules/nf-core/picard/mergesamfiles/main' include { PICARD_MERGESAMFILES as PICARD_MERGESAMFILES_REPLICATE } from '../modules/nf-core/picard/mergesamfiles/main' @@ -141,6 +144,24 @@ workflow ATACSEQ { ) ch_versions = ch_versions.mix(INPUT_CHECK.out.versions) + // + // Check if reads are all paired-end if 'shift_reads' parameter is set + // + if (params.shift_reads) { + INPUT_CHECK + .out + .reads + .filter { meta, reads -> meta.single_end } + .collect() + .map { + it -> + def count = it.size() + if (count > 0) { + exit 1, 'The parameter --shift_reads can only be applied if all samples are paired-end.' + } + } + } + // // SUBWORKFLOW: Read QC and trim adapters // @@ -242,8 +263,8 @@ workflow ATACSEQ { [], [] ) - ch_genome_bam = FASTQ_ALIGN_CHROMAP.out.bam ch_genome_bam_index = FASTQ_ALIGN_CHROMAP.out.bai + ch_genome_bam = FASTQ_ALIGN_CHROMAP.out.bam ch_samtools_stats = FASTQ_ALIGN_CHROMAP.out.stats ch_samtools_flagstat = FASTQ_ALIGN_CHROMAP.out.flagstat ch_samtools_idxstats = FASTQ_ALIGN_CHROMAP.out.idxstats @@ -342,11 +363,27 @@ workflow ATACSEQ { ch_versions = ch_versions.mix(MERGED_LIBRARY_PICARD_COLLECTMULTIPLEMETRICS.out.versions.first()) } + // + // SUBWORKFLOW: Shift paired-end reads + // + ch_merged_library_filter_bam = MERGED_LIBRARY_FILTER_BAM.out.bam + ch_merged_library_filter_bai = MERGED_LIBRARY_FILTER_BAM.out.bai + + if (params.shift_reads && params.aligner != 'chromap' ) { + MERGED_LIBRARY_BAM_SHIFT_READS ( + ch_merged_library_filter_bam.join(ch_merged_library_filter_bai, by: [0]), + ) + ch_versions = ch_versions.mix(MERGED_LIBRARY_BAM_SHIFT_READS.out.versions) + + ch_merged_library_filter_bam = MERGED_LIBRARY_BAM_SHIFT_READS.out.bam + ch_merged_library_filter_bai = MERGED_LIBRARY_BAM_SHIFT_READS.out.bai + } + // // SUBWORKFLOW: Normalised bigWig coverage tracks // MERGED_LIBRARY_BAM_TO_BIGWIG ( - MERGED_LIBRARY_FILTER_BAM.out.bam.join(MERGED_LIBRARY_FILTER_BAM.out.flagstat, by: [0]), + ch_merged_library_filter_bam.join(MERGED_LIBRARY_FILTER_BAM.out.flagstat, by: [0]), PREPARE_GENOME.out.chrom_sizes ) ch_versions = ch_versions.mix(MERGED_LIBRARY_BAM_TO_BIGWIG.out.versions) @@ -366,10 +403,8 @@ workflow ATACSEQ { } // Create channels: [ meta, [bam], [bai] ] - MERGED_LIBRARY_FILTER_BAM - .out - .bam - .join(MERGED_LIBRARY_FILTER_BAM.out.bai, by: [0]) + ch_merged_library_filter_bam + .join(ch_merged_library_filter_bai, by: [0]) .set { ch_bam_bai } // @@ -523,24 +558,37 @@ workflow ATACSEQ { ch_markduplicates_replicate_metrics = MERGED_REPLICATE_MARKDUPLICATES_PICARD.out.metrics ch_versions = ch_versions.mix(MERGED_REPLICATE_MARKDUPLICATES_PICARD.out.versions) + // + // SUBWORKFLOW: Shift paired-end reads + // Shift again, as ch_merged_library_replicate_bam is generated out of unshifted reads + // + ch_merged_replicate_markduplicate_bam = MERGED_REPLICATE_MARKDUPLICATES_PICARD.out.bam + ch_merged_replicate_markduplicate_bai = MERGED_REPLICATE_MARKDUPLICATES_PICARD.out.bai + + if (params.shift_reads && params.aligner != 'chromap' ) { + MERGED_REPLICATE_BAM_SHIFT_READS ( + ch_merged_replicate_markduplicate_bam.join(ch_merged_replicate_markduplicate_bai, by: [0]), + ) + ch_versions = ch_versions.mix(MERGED_REPLICATE_BAM_SHIFT_READS.out.versions) + + ch_merged_replicate_markduplicate_bam = MERGED_REPLICATE_BAM_SHIFT_READS.out.bam + ch_merged_replicate_markduplicate_bai = MERGED_REPLICATE_BAM_SHIFT_READS.out.bai + } + // SUBWORKFLOW: Normalised bigWig coverage tracks // MERGED_REPLICATE_BAM_TO_BIGWIG ( - MERGED_REPLICATE_MARKDUPLICATES_PICARD.out.bam.join(MERGED_REPLICATE_MARKDUPLICATES_PICARD.out.flagstat, by: [0]), + ch_merged_replicate_markduplicate_bam.join(MERGED_REPLICATE_MARKDUPLICATES_PICARD.out.flagstat, by: [0]), PREPARE_GENOME.out.chrom_sizes ) ch_ucsc_bedgraphtobigwig_replicate_bigwig = MERGED_REPLICATE_BAM_TO_BIGWIG.out.bigwig ch_versions = ch_versions.mix(MERGED_REPLICATE_BAM_TO_BIGWIG.out.versions) // Create channels: [ meta, bam, ([] for control_bam) ] - MERGED_REPLICATE_MARKDUPLICATES_PICARD - .out - .bam - .map { - meta, bam -> - [ meta , bam, [] ] - } - .set { ch_bam_replicate } + // Create channels: [ meta, [bam], [bai] ] + ch_merged_replicate_markduplicate_bam + .join(ch_merged_replicate_markduplicate_bai, by: [0]) + .set { ch_bam_replicate } // // SUBWORKFLOW: Call peaks with MACS2, annotate with HOMER and perform downstream QC @@ -593,6 +641,7 @@ workflow ATACSEQ { if (!params.skip_igv) { IGV ( PREPARE_GENOME.out.fasta, + PREPARE_GENOME.out.fai, MERGED_LIBRARY_BAM_TO_BIGWIG.out.bigwig.collect{it[1]}.ifEmpty([]), MERGED_LIBRARY_CALL_ANNOTATE_PEAKS.out.peaks.collect{it[1]}.ifEmpty([]), ch_macs2_consensus_library_bed.collect{it[1]}.ifEmpty([]), From df64c3aa3f27f31cdd03770d36c33a8b9e5f55ef Mon Sep 17 00:00:00 2001 From: Katrin Date: Thu, 29 Jun 2023 11:03:54 +0200 Subject: [PATCH 8/8] Use --call-summits only when calling narrow peaks. These summits can then be used for TFBS enrichment. --- conf/modules.config | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 285e890a..5e0f703b 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -620,8 +620,7 @@ process { '--nomodel', '--shift -75', '--extsize 150', - '--call-summits', - params.narrow_peak ? '' : "--broad --broad-cutoff ${params.broad_cutoff}", + params.narrow_peak ? '--call-summits' : "--broad --broad-cutoff ${params.broad_cutoff}", params.save_macs_pileup ? '--bdg --SPMR' : '', params.macs_pvalue ? "--pvalue ${params.macs_pvalue}" : '', params.macs_fdr ? "--qvalue ${params.macs_fdr}" : ''