From 95935cfe69956ca50307a9c6a774c4b96dff860f Mon Sep 17 00:00:00 2001 From: Max Schubach Date: Wed, 2 Nov 2022 14:50:18 +0100 Subject: [PATCH] fix: using multiple fastq inputs in counts --- workflow/rules/common.smk | 26 +++++++++++++------------- workflow/rules/counts.smk | 7 ++----- 2 files changed, 15 insertions(+), 18 deletions(-) diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index 7ca7661..6543ec5 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -127,15 +127,15 @@ def getFW(project, condition, replicate, rnaDna_type): exp = getExperiments(project) exp = exp[exp.Condition == condition] exp = exp[exp.Replicate.astype(str) == replicate] - return "%s/%s" % ( - config["experiments"][project]["data_folder"], - exp["%s_BC_F" % rnaDna_type].iloc[0], - ) + return [ + "%s/%s" % (config["experiments"][project]["data_folder"], f) + for f in exp["%s_BC_F" % rnaDna_type].iloc[0].split(";") + ] def getFWWithIndex(project): return [ - "%s%s" % (config["experiments"][project]["data_folder"], f) + "%s/%s" % (config["experiments"][project]["data_folder"], f) for f in getExperiments(project).BC_F.iloc[0].split(";") ] @@ -144,10 +144,10 @@ def getRev(project, condition, replicate, rnaDna_type): exp = getExperiments(project) exp = exp[exp.Condition == condition] exp = exp[exp.Replicate.astype(str) == replicate] - return "%s/%s" % ( - config["experiments"][project]["data_folder"], - exp["%s_BC_R" % rnaDna_type].iloc[0], - ) + return [ + "%s/%s" % (config["experiments"][project]["data_folder"], f) + for f in exp["%s_BC_R" % rnaDna_type].iloc[0].split(";") + ] def getRevWithIndex(project): @@ -161,10 +161,10 @@ def getUMI(project, condition, replicate, rnaDna_type): exp = getExperiments(project) exp = exp[exp.Condition == condition] exp = exp[exp.Replicate.astype(str) == replicate] - return "%s/%s" % ( - config["experiments"][project]["data_folder"], - exp["%s_UMI" % rnaDna_type].iloc[0], - ) + return [ + "%s/%s" % (config["experiments"][project]["data_folder"], f) + for f in exp["%s_UMI" % rnaDna_type].iloc[0].split(";") + ] def getUMIWithIndex(project): diff --git a/workflow/rules/counts.smk b/workflow/rules/counts.smk index 6f55f51..07e3b79 100644 --- a/workflow/rules/counts.smk +++ b/workflow/rules/counts.smk @@ -112,6 +112,7 @@ rule counts_create_BAM_umi: "results/experiments/{project}/counts/{condition}_{replicate}_{type}.bam", params: bc_length=lambda wc: config["experiments"][wc.project]["bc_length"], + umi_length=lambda wc: config["experiments"][wc.project]["umi_length"], datasetID="{condition}_{replicate}_{type}", conda: "../envs/python27.yaml" @@ -123,9 +124,6 @@ rule counts_create_BAM_umi: """ set +o pipefail; - umi_length=`zcat {input.umi_fastq} | head -2 | tail -1 | wc -c`; - umi_length=$(expr $(($umi_length-1))); - fwd_length=`zcat {input.fw_fastq} | head -2 | tail -1 | wc -c`; fwd_length=$(expr $(($fwd_length-1))); @@ -134,12 +132,11 @@ rule counts_create_BAM_umi: minoverlap=`echo ${{fwd_length}} ${{fwd_length}} {params.bc_length} | awk '{{print ($1+$2-$3-1 < 11) ? $1+$2-$3-1 : 11}}'`; echo $rev_start - echo $umi_length echo $minoverlap paste <( zcat {input.fw_fastq} ) <( zcat {input.rev_fastq} ) <( zcat {input.umi_fastq} ) | \ awk '{{if (NR % 4 == 2 || NR % 4 == 0) {{print $1$2$3}} else {{print $1}}}}' | \ - python {input.script_FastQ2doubleIndexBAM} -p -s $rev_start -l 0 -m $umi_length --RG {params.datasetID} | \ + python {input.script_FastQ2doubleIndexBAM} -p -s $rev_start -l 0 -m {params.umi_length} --RG {params.datasetID} | \ python {input.script_MergeTrimReadsBAM} --FirstReadChimeraFilter '' --adapterFirstRead '' --adapterSecondRead '' -p --mergeoverlap --minoverlap $minoverlap > {output} 2> {log} """