Skip to content

Commit

Permalink
Merge branch 'CW-4886' into 'dev'
Browse files Browse the repository at this point in the history
Tidy de analysis

Closes CW-4886

See merge request epi2melabs/workflows/wf-transcriptomes!185
  • Loading branch information
sarahjeeeze committed Sep 16, 2024
2 parents beb094e + d83807f commit d2ddad2
Show file tree
Hide file tree
Showing 5 changed files with 25 additions and 36 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- If required for IGV, reference indexes are output in to a `igv_reference` directory
### Changed
- BAMS are output in to a BAMS directory.
- Reconcile with template 5.2.6.

## [v1.3.0]
### Removed
Expand Down
3 changes: 2 additions & 1 deletion bin/de_analysis.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,14 @@ min_samps_gene_expr <- as.numeric(args[2])
min_samps_feature_expr <- as.numeric(args[3])
min_gene_expr <- as.numeric(args[4])
min_feature_expr <- as.numeric(args[5])
sample_sheet <- args[6]

cat("Loading counts, conditions and parameters.\n")
cts <- as.matrix(read.csv("all_counts.tsv", sep="\t", row.names="Reference", stringsAsFactors=FALSE))

# Set up sample data frame:
#changed this to sample_id
coldata <- read.csv("sample_sheet.csv", row.names="alias", sep=",", stringsAsFactors=TRUE)
coldata <- read.csv(sample_sheet, row.names="alias", sep=",", stringsAsFactors=TRUE)

coldata$sample_id <- rownames(coldata)
# check if control condition exists, sets as reference
Expand Down
4 changes: 1 addition & 3 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -717,9 +717,7 @@ workflow pipeline {
de = differential_expression(transcriptome, input_reads, sample_sheet, gtf)
de_report = de.all_de
count_transcripts_file = de.count_transcripts
dtu_plots = de.dtu_plots
de_outputs = de.de_outputs
counts = de.counts
} else{
de_report = OPTIONAL_FILE
count_transcripts_file = OPTIONAL_FILE
Expand Down Expand Up @@ -763,7 +761,7 @@ workflow pipeline {

if (params.de_analysis){
de_results = report.concat(
transcriptome, de_outputs.flatten(), counts.flatten(),
transcriptome, de_outputs.flatten(),
makeReport.out.results_dge, makeReport.out.tpm,
makeReport.out.filtered, makeReport.out.unfiltered,
makeReport.out.gene_counts)
Expand Down
13 changes: 6 additions & 7 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ params {
threads = 4
// Thresholds for viewing isoforms in report table
isoform_table_nrows = 5000


out_dir = "output"
sample = null
Expand All @@ -42,7 +41,7 @@ params {
pychopper_opts = null
pychopper_backend = "edlib"
cdna_kit = "SQK-PCS109"

// Extra option passed to minimap2 when generating index
minimap2_index_opts = "-k14"

Expand Down Expand Up @@ -79,9 +78,9 @@ params {
de_analysis = false
ref_transcriptome = null
min_samps_gene_expr = 3
min_samps_feature_expr = 1
min_samps_feature_expr = 1
min_gene_expr = 10
min_feature_expr = 3
min_feature_expr = 3


wf {
Expand All @@ -95,8 +94,8 @@ params {
"--sample_sheet 'wf-transcriptomes-demo/sample_sheet.csv'",
]
agent = null
container_sha = "shafb1e2372e1535f0b42891ed2c68ffdac2ca1d658"
common_sha = "shae58638742cf84dbeeec683ba24bcdee67f64b986"
container_sha = "shad8671ea3a8ed52f2c0f40355e8eb5c6f00d2cbda"
common_sha = "shad28e55140f75a68f59bbecc74e880aeab16ab158"
}
}

Expand Down Expand Up @@ -165,7 +164,7 @@ profiles {
}
withLabel:wf_common {
container = "${params.aws_image_prefix}-wf-common:${params.wf.common_sha}"
}
}
shell = ['/bin/bash', '-euo', 'pipefail']
}
}
Expand Down
40 changes: 15 additions & 25 deletions subworkflows/differential_expression.nf
Original file line number Diff line number Diff line change
Expand Up @@ -69,16 +69,21 @@ process deAnalysis {
output:
path "de_analysis/results_dtu_stageR.tsv", emit: stageR
path "merged/filtered_transcript_counts_with_genes.tsv", emit: flt_counts
path "de_analysis/unfiltered_transcript_counts_with_genes.tsv", emit: unflt_counts
path "merged/all_gene_counts.tsv", emit: gene_counts
path "de_analysis/unfiltered_transcript_counts_with_genes.tsv", emit: unflt_counts
path "de_analysis/results_dge.tsv", emit: dge
path "de_analysis/results_dexseq.tsv", emit: dexseq
path "de_analysis", emit: de_analysis
path "de_analysis/results_dge.pdf", emit: dge_pdf
path "de_analysis/results_dge.tsv", emit: dge_tsv
path "de_analysis/results_dtu_gene.tsv", emit: dtu_gene
path "de_analysis/results_dtu_transcript.tsv", emit: dtu_transcript
path "de_analysis/results_dtu_stageR.tsv", emit: dtu_stageR
path "de_analysis/results_dtu.pdf", emit: dtu_pdf
path "de_analysis/cpm_gene_counts.tsv", emit: cpm
"""
mkdir merged
mkdir de_analysis
de_analysis.R annotation.gtf $params.min_samps_gene_expr $params.min_samps_feature_expr $params.min_gene_expr $params.min_feature_expr
de_analysis.R annotation.gtf $params.min_samps_gene_expr $params.min_samps_feature_expr $params.min_gene_expr $params.min_feature_expr "sample_sheet.csv"
"""
}

Expand All @@ -91,23 +96,10 @@ process plotResults {
path "filtered_transcript_counts_with_genes.tsv"
path "results_dtu_stageR.tsv"
path "sample_sheet.tsv"
path de_analysis
output:
path "de_analysis/dtu_plots.pdf", emit: dtu_plots
path "sample_sheet.tsv", emit: sample_sheet_csv
// Output all DE files for use in report process
path "de_analysis/*", emit: stageR
// Output selected DE files to be output in out_dir
path "de_analysis/results_dge.pdf", emit: dge_pdf
path "de_analysis/results_dge.tsv", emit: dge_tsv
path "de_analysis/results_dtu_gene.tsv", emit: dtu_gene
path "de_analysis/results_dtu_transcript.tsv", emit: dtu_transcript
path "de_analysis/results_dtu_stageR.tsv", emit: dtu_stageR
path "de_analysis/results_dtu.pdf", emit: dtu_pdf
path "dtu_plots.pdf", emit: dtu_plots
"""
plot_dtu_results.R
# output plots to common analysis output directory
cp dtu_plots.pdf de_analysis/dtu_plots.pdf
"""
}

Expand Down Expand Up @@ -159,6 +151,7 @@ workflow differential_expression {
sample_sheet
ref_annotation
main:
sample_sheet = Channel.fromPath(sample_sheet)
checkSampleSheetCondition(sample_sheet)
t_index = build_minimap_index_transcriptome(ref_transcriptome)
mapped = map_transcriptome(full_len_reads.combine(t_index)
Expand All @@ -167,19 +160,16 @@ workflow differential_expression {
merged = mergeCounts(count_transcripts.out.counts.collect())
merged_TPM = mergeTPM(count_transcripts.out.counts.collect())
analysis = deAnalysis(sample_sheet, merged, ref_annotation)
plotResults(analysis.flt_counts, analysis.stageR, sample_sheet, analysis.de_analysis)
plotResults(analysis.flt_counts, analysis.stageR, sample_sheet)
// Concat files required for making the report
de_report = analysis.flt_counts.concat(analysis.gene_counts, analysis.dge, analysis.dexseq,
analysis.stageR, plotResults.out.sample_sheet_csv, merged, ref_annotation, merged_TPM, analysis.unflt_counts).collect()
// Concat files required to be output to user
de_outputs_concat = analysis.cpm.concat(plotResults.out.dtu_plots, plotResults.out.dge_pdf, plotResults.out.dge_tsv,
plotResults.out.dtu_gene, plotResults.out.dtu_transcript, plotResults.out.dtu_stageR, plotResults.out.dtu_pdf).collect()
analysis.stageR, sample_sheet, merged, ref_annotation, merged_TPM, analysis.unflt_counts).collect()
// Concat files required to be output to user without any changes
de_outputs_concat = analysis.cpm.concat(plotResults.out.dtu_plots, analysis.dge_pdf, analysis.dge_tsv,
analysis.dtu_gene, analysis.dtu_transcript, analysis.dtu_stageR, analysis.dtu_pdf, analysis.flt_counts, analysis.gene_counts, merged_TPM).collect()
count_transcripts_file = count_transcripts.out.seqkit_stats.collect()
all_counts = merged_TPM.concat(analysis.flt_counts, analysis.gene_counts)
emit:
all_de = de_report
count_transcripts = count_transcripts_file
dtu_plots = plotResults.out.dtu_plots
de_outputs = de_outputs_concat
counts = all_counts
}

0 comments on commit d2ddad2

Please sign in to comment.