Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Allow bam input files #94

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .test/config/units.tsv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
sample unit fragment_len_mean fragment_len_sd fq1 fq2
sample unit fragment_len_mean fragment_len_sd fq1 fq2 bam_single bam_paired
A 1 ngs-test-data/reads/a.chr21.1.fq ngs-test-data/reads/a.chr21.2.fq
B 1 ngs-test-data/reads/b.chr21.1.fq ngs-test-data/reads/b.chr21.2.fq
B 2 300 14 ngs-test-data/reads/b.chr21.1.fq
Expand Down
2 changes: 1 addition & 1 deletion .test/three_prime/config/units.tsv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
sample unit fragment_len_mean fragment_len_sd fq1 fq2
sample unit fragment_len_mean fragment_len_sd fq1 fq2 bam_single bam_paired
SRR8309096 u1 430 43 quant_seq_test_data/SRR8309096.fastq.gz
SRR8309094 u1 430 43 quant_seq_test_data/SRR8309094.fastq.gz
SRR8309095 u1 430 43 quant_seq_test_data/SRR8309095.fastq.gz
Expand Down
2 changes: 1 addition & 1 deletion config/units.tsv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
sample unit fragment_len_mean fragment_len_sd fq1 fq2
sample unit fragment_len_mean fragment_len_sd fq1 fq2 bam_single bam_paired
A 1 raw/a.chr21.1.fq raw/a.chr21.2.fq
B 1 raw/b.chr21.1.fq raw/b.chr21.2.fq
B 2 300 14 raw/b.chr21.1.fq
Expand Down
1 change: 1 addition & 0 deletions workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ include: "rules/diffexp.smk"
include: "rules/diffsplice.smk"
include: "rules/enrichment.smk"
include: "rules/datavzrd.smk"
include: "rules/bam.smk"


rule all:
Expand Down
33 changes: 33 additions & 0 deletions workflow/rules/bam.smk
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
rule bam_paired_to_fastq:
input:
lookup(
query="sample == '{sample}' & unit == '{unit}'",
within=units,
cols="bam_paired",
),
output:
"results/fastq/{sample}-{unit}.1.fq.gz",
"results/fastq/{sample}-{unit}.2.fq.gz",
log:
"logs/fastq/{sample}-{unit}.separate.log",
params:
fastq="-n",
threads: 3
wrapper:
"v3.10.2/bio/samtools/fastq/separate"


rule bam_single_to_fastq:
input:
lookup(
query="sample == '{sample}' & unit == '{unit}'",
within=units,
cols="bam_single",
),
output:
"results/fastq/{sample}-{unit}.fq.gz",
log:
"logs/fastq/{sample}-{unit}.interleaved.log",
threads: 3
wrapper:
"v3.10.2/bio/samtools/fastq/interleaved"
11 changes: 9 additions & 2 deletions workflow/rules/common.smk
Original file line number Diff line number Diff line change
Expand Up @@ -92,13 +92,20 @@ def get_model(wildcards):

def is_single_end(sample, unit):
"""Determine whether unit is single-end."""
bam_paired_not_present = pd.isnull(units.loc[(sample, unit), "bam_paired"])
fq2_not_present = pd.isnull(units.loc[(sample, unit), "fq2"])
return fq2_not_present
return fq2_not_present and bam_paired_not_present


def get_fastqs(wildcards):
"""Get raw FASTQ files from unit sheet."""
if is_single_end(wildcards.sample, wildcards.unit):
if not pd.isnull(units.loc[(wildcards.sample, wildcards.unit), "bam_single"]):
return f"results/fastq/{wildcards.sample}-{wildcards.unit}.fq.gz"
elif not pd.isnull(units.loc[(wildcards.sample, wildcards.unit), "bam_paired"]):
fqfrombam1 = f"results/fastq/{wildcards.sample}-{wildcards.unit}.1.fq.gz"
fqfrombam2 = f"results/fastq/{wildcards.sample}-{wildcards.unit}.2.fq.gz"
return [fqfrombam1, fqfrombam2]
elif is_single_end(wildcards.sample, wildcards.unit):
return units.loc[(wildcards.sample, wildcards.unit), "fq1"]
else:
u = units.loc[(wildcards.sample, wildcards.unit), ["fq1", "fq2"]].dropna()
Expand Down
8 changes: 5 additions & 3 deletions workflow/rules/quant.smk
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
rule kallisto_index:
input:
fasta="resources/transcriptome.cdna.without_poly_a.fasta"
if is_3prime_experiment
else "resources/transcriptome.cdna.fasta",
fasta=(
"resources/transcriptome.cdna.without_poly_a.fasta"
if is_3prime_experiment
else "resources/transcriptome.cdna.fasta"
),
output:
index="results/kallisto_cdna/transcripts.cdna.idx",
log:
Expand Down
11 changes: 8 additions & 3 deletions workflow/schemas/units.schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,16 @@ properties:
description: unit id
fq1:
type: string
description: path to FASTQ file
description: path to FASTQ file (leave empty in case usage of bam_single or bam_paired)
fq2:
type: string
description: path to second FASTQ file (leave empty in case of single-end)
description: path to second FASTQ file (leave empty in case of single-end or usage of bam_single or bam_paired)
bam_single:
type: string
description: path to single bam file (leave empty in case of usage of fastq files)
bam_paired:
type: string
description: path to paired bam file (leave empty in case of usage of fastq files)
required:
- sample
- unit
- fq1
Loading