Skip to content

Commit

Permalink
Merge pull request #375 from nf-core/PR_301
Browse files Browse the repository at this point in the history
Review of PR 301 to Discuss fixes of #164
  • Loading branch information
JoseEspinosa committed Sep 26, 2024
2 parents fe950ac + 347d98e commit ea927b5
Show file tree
Hide file tree
Showing 7 changed files with 241 additions and 17 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Enhancements & fixes

- [[#164]](https://github.com/nf-core/atacseq/issues/164) and partly [[#91]](https://github.com/nf-core/atacseq/issues/91) with code from [[#301]](https://github.com/nf-core/atacseq/pull/301) to address shifting of reads as an option that is turned off by default.
- [[#327](https://github.com/nf-core/atacseq/issues/327)] - Consistently support `.csi` indices as alternative to `.bai` to allow SAMTOOLS_INDEX to be used with the `-c` flag.
- [[#356](https://github.com/nf-core/atacseq/issues/356)] - Get rid of the `lib` folder and rearrange the pipeline accordingly.
- [[#379](https://github.com/nf-core/atacseq/pull/356)] - Use macs3 instead of macs2.
Expand Down
62 changes: 62 additions & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -457,6 +457,37 @@ process {
]
}

withName: '.*:MERGED_LIBRARY_BAM_SHIFT_READS:DEEPTOOLS_ALIGNMENTSIEVE' {
ext.args = '--ATACshift'
ext.prefix = { "${meta.id}.mLb.clN.shifted" }
publishDir = [
path: { "${params.outdir}/${params.aligner}/merged_library/shifted_reads" },
mode: params.publish_dir_mode,
pattern: '*.bam',
enabled: params.save_align_intermeds
]
}

withName: '.*:MERGED_LIBRARY_BAM_SHIFT_READS:SAMTOOLS_SORT' {
ext.prefix = { "${meta.id}.mLb.clN.shifted.sorted" }
publishDir = [
path: { "${params.outdir}/${params.aligner}/merged_library/shifted_reads" },
mode: params.publish_dir_mode,
pattern: '*.bam',
enabled: params.shift_reads
]
}

withName: '.*:MERGED_LIBRARY_BAM_SHIFT_READS:SAMTOOLS_INDEX' {
ext.prefix = { "${meta.id}.mLb.clN.shifted.sorted" }
publishDir = [
path: { "${params.outdir}/${params.aligner}/merged_library/shifted_reads" },
mode: params.publish_dir_mode,
pattern: '*.bai',
enabled: params.shift_reads
]
}

withName: '.*:MERGED_LIBRARY_BAM_TO_BIGWIG:BEDTOOLS_GENOMECOV' {
ext.args = { (meta.single_end && params.fragment_size > 0) ? "-fs ${params.fragment_size}" : '' }
ext.prefix = { "${meta.id}.mLb.clN" }
Expand Down Expand Up @@ -778,6 +809,37 @@ if (!params.skip_merge_replicates) {
]
}

withName: '.*:MERGED_REPLICATE_BAM_SHIFT_READS:DEEPTOOLS_ALIGNMENTSIEVE' {
ext.args = '--ATACshift'
ext.prefix = { "${meta.id}.mRp.clN.shifted" }
publishDir = [
path: { "${params.outdir}/${params.aligner}/merged_replicate" },
mode: params.publish_dir_mode,
pattern: '*.bam',
enabled: params.save_align_intermeds
]
}

withName: '.*:MERGED_REPLICATE_BAM_SHIFT_READS:SAMTOOLS_SORT' {
ext.prefix = { "${meta.id}.mRp.clN.shifted.sorted" }
publishDir = [
path: { "${params.outdir}/${params.aligner}/merged_replicate/shifted_reads" },
mode: params.publish_dir_mode,
pattern: '*.bam',
enabled: params.shift_reads
]
}

withName: '.*:MERGED_REPLICATE_BAM_SHIFT_READS:SAMTOOLS_INDEX' {
ext.prefix = { "${meta.id}.mRp.clN.shifted.sorted" }
publishDir = [
path: { "${params.outdir}/${params.aligner}/merged_replicate/shifted_reads" },
mode: params.publish_dir_mode,
pattern: '*.bai',
enabled: params.shift_reads
]
}

withName: '.*:MERGED_REPLICATE_BAM_TO_BIGWIG:BEDTOOLS_GENOMECOV' {
ext.args = { (meta.single_end && params.fragment_size > 0) ? "-fs ${params.fragment_size}" : '' }
ext.prefix = { "${meta.id}.mRp.clN" }
Expand Down
36 changes: 36 additions & 0 deletions modules/local/deeptools_alignmentsieve.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
process DEEPTOOLS_ALIGNMENTSIEVE {
tag "$meta.id"
label 'process_medium'

conda "bioconda::deeptools=3.5.1"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/deeptools:3.5.1--py_0' :
'biocontainers/deeptools:3.5.1--py_0' }"

input:
tuple val(meta), path(bam), path(bai)

output:
tuple val(meta), path("*.bam"), emit: bam
path "versions.yml", emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"

"""
alignmentSieve \\
$args \\
-b $bam \\
-o ${prefix}.bam \\
--numberOfProcessors $task.cpus
cat <<-END_VERSIONS > versions.yml
"${task.process}":
deeptools: \$(alignmentSieve --version | sed -e "s/alignmentSieve //g")
END_VERSIONS
"""
}
1 change: 1 addition & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ params {
skip_merge_replicates = false
save_align_intermeds = false
save_unaligned = false
shift_reads = false

// Options: Peaks
narrow_peak = false
Expand Down
7 changes: 7 additions & 0 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,13 @@
"hidden": true,
"description": "BAMTools JSON file with custom filters for single-end data.",
"fa_icon": "fas fa-cog"
},
"shift_reads": {
"type": "boolean",
"fa_icon": "fas fa-chart-area",
"default": false,
"help_text": "Shift aligned reads as commonly done for ATACseq footprinting analysis, +4 bp for reads on the + strand, -5 bp for reads on the - strand. This can only be applied if all samples are paired-end.",
"description": "Shift aligned reads (+4 bp and -5 bp)."
}
}
},
Expand Down
53 changes: 53 additions & 0 deletions subworkflows/local/bam_shift_reads.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
include { SAMTOOLS_SORT } from '../../modules/nf-core/samtools/sort/main'
include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main'
include { SAMTOOLS_FLAGSTAT } from '../../modules/nf-core/samtools/flagstat/main'
include { DEEPTOOLS_ALIGNMENTSIEVE } from '../../modules/local/deeptools_alignmentsieve'

workflow BAM_SHIFT_READS {
take:
ch_bam_bai // channel: [ val(meta), [ bam ], [bai] ]
ch_fasta // channel: [ fasta ]

main:
ch_versions = Channel.empty()

//
// Shift reads
//
DEEPTOOLS_ALIGNMENTSIEVE (
ch_bam_bai
)
ch_versions = ch_versions.mix(DEEPTOOLS_ALIGNMENTSIEVE.out.versions)

//
// Sort reads
//
SAMTOOLS_SORT (
DEEPTOOLS_ALIGNMENTSIEVE.out.bam,
ch_fasta
)
ch_versions = ch_versions.mix(SAMTOOLS_SORT.out.versions)

//
// Index reads
//
SAMTOOLS_INDEX (
SAMTOOLS_SORT.out.bam
)
ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions)

//
// Run samtools flagstat
//
SAMTOOLS_FLAGSTAT (
SAMTOOLS_SORT.out.bam.join(SAMTOOLS_INDEX.out.bai, by: [0])
)
ch_versions = ch_versions.mix(SAMTOOLS_FLAGSTAT.out.versions)

emit:
bam = SAMTOOLS_SORT.out.bam // channel: [ val(meta), [ bam ] ]
bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), [ bai ] ]
csi = SAMTOOLS_INDEX.out.csi // channel: [ val(meta), [ csi ] ]
flagstat = SAMTOOLS_FLAGSTAT.out.flagstat // channel: [ val(meta), [ flagstat ] ]
versions = ch_versions // channel: [ versions.yml ]
}
98 changes: 81 additions & 17 deletions workflows/atacseq.nf
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pi
include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_atacseq_pipeline'
include { INPUT_CHECK } from '../subworkflows/local/input_check'
include { ALIGN_STAR } from '../subworkflows/local/align_star'
include { BAM_SHIFT_READS as MERGED_LIBRARY_BAM_SHIFT_READS } from '../subworkflows/local/bam_shift_reads'
include { BAM_SHIFT_READS as MERGED_REPLICATE_BAM_SHIFT_READS } from '../subworkflows/local/bam_shift_reads'
include { BIGWIG_PLOT_DEEPTOOLS as MERGED_LIBRARY_BIGWIG_PLOT_DEEPTOOLS } from '../subworkflows/local/bigwig_plot_deeptools'
include { BAM_FILTER_BAMTOOLS as MERGED_LIBRARY_FILTER_BAM } from '../subworkflows/local/bam_filter_bamtools'
include { BAM_BEDGRAPH_BIGWIG_BEDTOOLS_UCSC as MERGED_LIBRARY_BAM_TO_BIGWIG } from '../subworkflows/local/bam_bedgraph_bigwig_bedtools_ucsc'
Expand Down Expand Up @@ -123,6 +125,24 @@ workflow ATACSEQ {
// See the documentation https://nextflow-io.github.io/nf-validation/samplesheets/fromSamplesheet/
// ! There is currently no tooling to help you write a sample sheet schema

//
// Check if reads are all paired-end if 'shift_reads' parameter is set
//
if (params.shift_reads) {
INPUT_CHECK
.out
.reads
.filter { meta, reads -> meta.single_end }
.collect()
.map {
it ->
def count = it.size()
if (count > 0) {
exit 1, 'The parameter --shift_reads can only be applied if all samples are paired-end.'
}
}
}

//
// SUBWORKFLOW: Read QC and trim adapters
//
Expand Down Expand Up @@ -199,6 +219,7 @@ workflow ATACSEQ {
[]
)
ch_genome_bam = FASTQ_ALIGN_CHROMAP.out.bam
ch_genome_bam_index = FASTQ_ALIGN_CHROMAP.out.bai
ch_samtools_stats = FASTQ_ALIGN_CHROMAP.out.stats
ch_samtools_flagstat = FASTQ_ALIGN_CHROMAP.out.flagstat
ch_samtools_idxstats = FASTQ_ALIGN_CHROMAP.out.idxstats
Expand Down Expand Up @@ -329,11 +350,38 @@ workflow ATACSEQ {
ch_versions = ch_versions.mix(MERGED_LIBRARY_PICARD_COLLECTMULTIPLEMETRICS.out.versions.first())
}

//
// SUBWORKFLOW: Shift paired-end reads
//
ch_merged_library_filter_bam = MERGED_LIBRARY_FILTER_BAM.out.bam
ch_merged_library_filter_bai = MERGED_LIBRARY_FILTER_BAM.out.bai
ch_merged_library_filter_flagstat = MERGED_LIBRARY_FILTER_BAM.out.flagstat
ch_merged_library_filter_csi = MERGED_LIBRARY_FILTER_BAM.out.csi

if (params.shift_reads && params.aligner != 'chromap' ) {
MERGED_LIBRARY_BAM_SHIFT_READS (
ch_merged_library_filter_bam.join(ch_merged_library_filter_bai, by: [0]),
ch_fasta
.map {
[ [:], it ]
}
)
ch_versions = ch_versions.mix(MERGED_LIBRARY_BAM_SHIFT_READS.out.versions)

ch_merged_library_filter_bam = MERGED_LIBRARY_BAM_SHIFT_READS.out.bam
ch_merged_library_filter_bai = MERGED_LIBRARY_BAM_SHIFT_READS.out.bai
ch_merged_library_filter_flagstat = MERGED_LIBRARY_BAM_SHIFT_READS.out.flagstat
ch_merged_library_filter_csi = MERGED_LIBRARY_BAM_SHIFT_READS.out.csi

}

//
// SUBWORKFLOW: Normalised bigWig coverage tracks
//
MERGED_LIBRARY_BAM_TO_BIGWIG (
MERGED_LIBRARY_FILTER_BAM.out.bam.join(MERGED_LIBRARY_FILTER_BAM.out.flagstat, by: [0]),
// MERGED_LIBRARY_FILTER_BAM.out.bam.join(MERGED_LIBRARY_FILTER_BAM.out.flagstat, by: [0]),
// ch_chrom_sizes
ch_merged_library_filter_bam.join(ch_merged_library_filter_flagstat, by: [0]),
ch_chrom_sizes
)
ch_versions = ch_versions.mix(MERGED_LIBRARY_BAM_TO_BIGWIG.out.versions)
Expand All @@ -353,11 +401,9 @@ workflow ATACSEQ {
}

// Create channels: [ meta, [bam], [bai] ] or [ meta, [ bam, control_bam ] [ bai, control_bai ] ]
MERGED_LIBRARY_FILTER_BAM
.out
.bam
.join(MERGED_LIBRARY_FILTER_BAM.out.bai, by: [0], remainder: true)
.join(MERGED_LIBRARY_FILTER_BAM.out.csi, by: [0], remainder: true)
ch_merged_library_filter_bam
.join(ch_merged_library_filter_bai, by: [0], remainder: true)
.join(ch_merged_library_filter_csi, by: [0], remainder: true)
.map {
meta, bam, bai, csi ->
if (bai) {
Expand Down Expand Up @@ -562,32 +608,51 @@ workflow ATACSEQ {
ch_markduplicates_replicate_metrics = MERGED_REPLICATE_MARKDUPLICATES_PICARD.out.metrics
ch_versions = ch_versions.mix(MERGED_REPLICATE_MARKDUPLICATES_PICARD.out.versions)

//
// SUBWORKFLOW: Shift paired-end reads
// Shift again, as ch_merged_library_replicate_bam is generated out of unshifted reads
//
ch_merged_replicate_markduplicate_bam = MERGED_REPLICATE_MARKDUPLICATES_PICARD.out.bam
ch_merged_replicate_markduplicate_bai = MERGED_REPLICATE_MARKDUPLICATES_PICARD.out.bai
ch_merged_replicate_markduplicate_flagstat = MERGED_REPLICATE_MARKDUPLICATES_PICARD.out.flagstat
ch_merged_replicate_markduplicate_csi = MERGED_REPLICATE_MARKDUPLICATES_PICARD.out.csi

if (params.shift_reads && params.aligner != 'chromap' ) {
MERGED_REPLICATE_BAM_SHIFT_READS (
ch_merged_replicate_markduplicate_bam.join(ch_merged_replicate_markduplicate_bai, by: [0]),
ch_fasta
.map {
[ [:], it ]
}
)
ch_versions = ch_versions.mix(MERGED_REPLICATE_BAM_SHIFT_READS.out.versions)

ch_merged_replicate_markduplicate_bam = MERGED_REPLICATE_BAM_SHIFT_READS.out.bam
ch_merged_replicate_markduplicate_bai = MERGED_REPLICATE_BAM_SHIFT_READS.out.bai
ch_merged_replicate_markduplicate_flagstat = MERGED_REPLICATE_BAM_SHIFT_READS.out.flagstat
ch_merged_replicate_markduplicate_csi = MERGED_REPLICATE_BAM_SHIFT_READS.out.csi
}
if (!params.skip_merged_replicate_bigwig) {
//
// SUBWORKFLOW: Normalised bigWig coverage tracks
//
MERGED_REPLICATE_BAM_TO_BIGWIG (
MERGED_REPLICATE_MARKDUPLICATES_PICARD.out.bam.join(MERGED_REPLICATE_MARKDUPLICATES_PICARD.out.flagstat, by: [0]),
ch_merged_replicate_markduplicate_bam.join(ch_merged_replicate_markduplicate_flagstat, by: [0]),
ch_chrom_sizes
)
ch_ucsc_bedgraphtobigwig_replicate_bigwig = MERGED_REPLICATE_BAM_TO_BIGWIG.out.bigwig
ch_versions = ch_versions.mix(MERGED_REPLICATE_BAM_TO_BIGWIG.out.versions)
}

// Create channels: [ meta, bam, ([] for control_bam) ]
if (params.with_control) {
MERGED_REPLICATE_MARKDUPLICATES_PICARD
.out
.bam
ch_merged_replicate_markduplicate_bam
.map {
meta, bam ->
meta.control ? null : [ meta.id, bam ]
}
.set { ch_bam_merged_control }

MERGED_REPLICATE_MARKDUPLICATES_PICARD
.out
.bam
ch_merged_replicate_markduplicate_bam
.map {
meta, bam ->
meta.control ? [ meta.control, meta, bam ] : null
Expand All @@ -596,15 +661,14 @@ workflow ATACSEQ {
.map { it -> [ it[1] , it[2], it[3] ] }
.set { ch_bam_replicate }
} else {
MERGED_REPLICATE_MARKDUPLICATES_PICARD
.out
.bam
ch_merged_replicate_markduplicate_bam
.map {
meta, bam ->
[ meta , bam, [] ]
}
.set { ch_bam_replicate }
}

//
// SUBWORKFLOW: Call peaks with MACS3, annotate with HOMER and perform downstream QC
//
Expand Down

0 comments on commit ea927b5

Please sign in to comment.