Skip to content

Commit

Permalink
feat: configurable min mapping quality
Browse files Browse the repository at this point in the history
  • Loading branch information
Max Schubach committed Mar 2, 2023
1 parent 88cd7bb commit 28045ae
Show file tree
Hide file tree
Showing 5 changed files with 23 additions and 7 deletions.
1 change: 1 addition & 0 deletions config/example_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ assignments:
alignment_start: # start of an alignment in the reference. Here using 15 bp adapters. Can be different when using adapter free approaches
min: 15 # integer
max: 17 # integer
min_mapping_quality: 1 # integer >=0 Please use 0 when you have oligos that differ by 1 base in your reference/design file
FW:
- resources/Assignment_BasiC/R1.fastq.gz
BC:
Expand Down
3 changes: 2 additions & 1 deletion config/sbatch.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,8 @@ assignment_getBCs:
queue: medium
assignment_collectBCs:
time: "2-04:00"
threads: 1
threads: 20
mem: 10G
queue: medium
assignment_statistic_totalCounts:
time: "0-01:00"
Expand Down
2 changes: 2 additions & 0 deletions docs/config.rst
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ Each asignment you want to process you have to giv him a name like :code:`exampl
Defines the :code:`min` and :code:`max` of a :code:`sequence_length` specify . :code:`sequence_length` is basically the length of a sequence alignment to an oligo in the reference file. Because there can be insertion and deletions we recommend to vary it a bit around the exact length (e.g. +-5). In theory this option enables designs with multiple sequence lengths.
:alignment_start:
Defines the :code:`min` and :code:`max` of the start of the alignment in an oligo. When using adapters you have to set basically the length of the adapter. Otherwise 1 will be the choice for most cases. We also recommend to vary this value a bit because the start might not be exact after the adapter. E.g. by +-1.
:min_mapping_quality:
(Optinal) Defines the minimum mapping quality (MAPQ) of the alinment to an oligo. When using oligos with only 1bp difference it is recommended to set it to 0. Otherwise the default value of 1 is recommended.
:bc_length:
Length of the barcode. Must match with the length of :code:`R2`.
:BC_rev_comp:
Expand Down
17 changes: 12 additions & 5 deletions workflow/rules/assignment.smk
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ rule assignment_merge:
-2 {input.REV} \
-m {params.min_overlap} -p {params.frac_mismatches_allowed} \
-d \
-e {params.min_dovetailed_overlap} \
-z \
-o {output.join} \
-i -f {output.un} \
Expand Down Expand Up @@ -247,7 +248,7 @@ rule assignment_getBCs:
input:
"results/assignment/{assignment}/bam/merge_split{split}.mapped.bam",
output:
temp("results/assignment/{assignment}/BCs/barcodes_incl_other.{split}.tsv.gz"),
temp("results/assignment/{assignment}/BCs/barcodes_incl_other.{split}.tsv"),
conda:
"../envs/bwa_samtools_picard_htslib.yaml"
params:
Expand All @@ -263,6 +264,9 @@ rule assignment_getBCs:
sequence_length_max=lambda wc: config["assignments"][wc.assignment][
"sequence_length"
]["max"],
mapping_quality_min=lambda wc: config["assignments"][wc.assignment][
"min_mapping_quality"
],
log:
temp("results/logs/assignment/getBCs.{assignment}.{split}.log"),
shell:
Expand All @@ -272,13 +276,13 @@ rule assignment_getBCs:
split($(NF),a,":");
split(a[3],a,",");
if (a[1] !~ /N/) {{
if (($5 > 0) && ($4 >= {params.alignment_start_min}) && ($4 <= {params.alignment_start_max}) && (length($10) >= {params.sequence_length_min}) && (length($10) <= {params.sequence_length_max})) {{
if (($5 >= {params.mapping_quality_min}) && ($4 >= {params.alignment_start_min}) && ($4 <= {params.alignment_start_max}) && (length($10) >= {params.sequence_length_min}) && (length($10) <= {params.sequence_length_max})) {{
print a[1],$3,$4";"$6";"$12";"$13";"$5
}} else {{
print a[1],"other","NA"
}}
}}
}}' | gzip -c > {output} 2> {log}
}}' | sort -k1,1 -k2,2 -k3,3 > {output} 2> {log}
"""


Expand Down Expand Up @@ -347,18 +351,21 @@ rule assignment_collectBCs:
"""
input:
expand(
"results/assignment/{{assignment}}/BCs/barcodes_incl_other.{split}.tsv.gz",
"results/assignment/{{assignment}}/BCs/barcodes_incl_other.{split}.tsv",
split=range(0, getSplitNumber()),
),
output:
"results/assignment/{assignment}/barcodes_incl_other.sorted.tsv.gz",
params:
batch_size=getSplitNumber(),
threads: 20
conda:
"../envs/default.yaml"
log:
temp("results/logs/assignment/collectBCs.{assignment}.log"),
shell:
"""
zcat {input} | sort -k1,1 -k2,2 -k3,3 | gzip -c > {output} 2> {log}
sort --batch-size={params.batch_size} --parallel={threads} -k1,1 -k2,2 -k3,3 -m {input} | gzip -c > {output} 2> {log}
"""


Expand Down
7 changes: 6 additions & 1 deletion workflow/schemas/config.schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,10 @@ properties:
type: string
minItems: 1
uniqueItems: true
min_mapping_quality:
type: integer
default: 1
minimum: 0
NGmerge:
type: object
properties:
Expand All @@ -87,7 +91,7 @@ properties:
default: 0.1
min_dovetailed_overlap:
type: integer
default: 20
default: 50
required:
- min_overlap
- frac_mismatches_allowed
Expand Down Expand Up @@ -138,6 +142,7 @@ properties:
- configs
- alignment_start
- sequence_length
- min_mapping_quality
- NGmerge
additionalProperties: false
additionalProperties: false
Expand Down

0 comments on commit 28045ae

Please sign in to comment.