Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add capability for inputting full ForenSeq sequences (not run through UAS) #16

Merged
merged 18 commits into from
Apr 28, 2020
Merged
86 changes: 63 additions & 23 deletions lusSTR/annot.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,9 +116,9 @@ def rev_complement_anno(sequence):
'''
Function creates reverse complement of sequence

Sequences in which the UAS software output contains the sequence on the reverse strand require
translation of the sequence to the forward strand. This allows for consistency between both
loci and any outside analyses in which comparisons may be made.
Sequences in which the UAS software output contains the sequence on the reverse strand
require translation of the sequence to the forward strand. This allows for consistency
between both loci and any outside analyses in which comparisons may be made.
'''
complement = {'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A'}
bases = list(sequence)
Expand All @@ -132,9 +132,9 @@ def rev_comp_forward_strand_bracket(rev_sequence, n, repeat_list, locusid, canno
'''
Function creates bracketed annotation for reverse complement sequences

Function is used to create the bracketed annotation for reverse complement sequences (i.e. the
forward strand). It calls additional functions depending on the locus and/or if the sequence
is a microvariant or not.
Function is used to create the bracketed annotation for reverse complement sequences (i.e
the forward strand). It calls additional functions depending on the locus and/or if the
sequence is a microvariant or not.
'''
if locusid in cannot_split_list:
if locusid == "D19S433":
Expand Down Expand Up @@ -591,6 +591,23 @@ def PentaD_annotation(sequence, no_of_repeat_bases, repeat_list):
return re.sub(" ", " ", final_string)


def full_seq_to_uas(full_seq, front, back):
'''
Function to trim full sequences to the UAS region.

It identifies the number of bases to remove from the 5' and 3' ends of the sequence to
trim to the UAS region. The downstream annotation, including length-based allele
designations, LUS, LUS+ and bracketed annotation is based on this region in the sequence.
'''
if front == 0:
seq_uas = full_seq[:-back]
elif back == 0:
seq_uas = full_seq[front:]
else:
seq_uas = full_seq[front:-back]
return seq_uas


def main(args):
cannot_split = [
"D19S433", "D6S1043", "TH01", "D21S11", "D1S1656", "D7S820", "D5S818", "D12S391",
Expand Down Expand Up @@ -619,32 +636,53 @@ def main(args):
lus = str_dict[locus]['LUS']
sec = str_dict[locus]['Sec']
tert = str_dict[locus]['Tert']
str_allele = traditional_str_allele(sequence, no_of_repeat_bases, no_of_sub_bases)
foren_5 = str_dict[locus]['Foren_5']
foren_3 = str_dict[locus]['Foren_3']
power_5 = str_dict[locus]['Power_5']
power_3 = str_dict[locus]['Power_3']
standage marked this conversation as resolved.
Show resolved Hide resolved
if args.uas:
uas_sequence = sequence
else:
if args.kit == "forenseq":
if str_dict[locus]['ReverseCompNeeded'] == "No":
uas_sequence = full_seq_to_uas(sequence, foren_5, foren_3)
else:
uas_from_full = full_seq_to_uas(sequence, foren_5, foren_3)
uas_sequence = rev_complement_anno(uas_from_full)
elif args.kit == "powerseq":
if str_dict[locus]['ReverseCompNeeded'] == "No":
uas_sequence = full_seq_to_uas(sequence, power_5, power_3)
else:
uas_from_full = full_seq_to_uas(sequence, power_5, power_3)
uas_sequence = rev_complement_anno(uas_from_full)
str_allele = traditional_str_allele(uas_sequence, no_of_repeat_bases, no_of_sub_bases)
if (
locus in cannot_split or
((len(sequence) % no_of_repeat_bases != 0) and locus not in must_split)
((len(uas_sequence) % no_of_repeat_bases != 0) and locus not in must_split)
):
standage marked this conversation as resolved.
Show resolved Hide resolved
if str_dict[locus]['ReverseCompNeeded'] == "Yes":
reverse_comp_sequence = rev_complement_anno(sequence)
reverse_comp_sequence = rev_complement_anno(uas_sequence)
print(reverse_comp_sequence)
standage marked this conversation as resolved.
Show resolved Hide resolved
forward_strand_bracketed_form = rev_comp_forward_strand_bracket(
reverse_comp_sequence, no_of_repeat_bases, repeats, locus, cannot_split
)
print(forward_strand_bracketed_form)
standage marked this conversation as resolved.
Show resolved Hide resolved
reverse_strand_bracketed_form = rev_comp_uas_output_bracket(
forward_strand_bracketed_form, no_of_repeat_bases
)
elif locus == "D21S11":
forward_strand_bracketed_form = D21_bracket(
sequence, no_of_split_bases, repeats
uas_sequence, no_of_split_bases, repeats
)
elif locus == "TH01" and (len(sequence) % no_of_repeat_bases != 0):
forward_strand_bracketed_form = TH01_annotation(sequence, repeats)
elif locus == "TH01" and (len(uas_sequence) % no_of_repeat_bases != 0):
forward_strand_bracketed_form = TH01_annotation(uas_sequence, repeats)
elif locus == "PentaD":
forward_strand_bracketed_form = PentaD_annotation(
sequence, no_of_repeat_bases, repeats
uas_sequence, no_of_repeat_bases, repeats
)
else:
forward_strand_bracketed_form = split_string(
sequence, no_of_repeat_bases, repeats
uas_sequence, no_of_repeat_bases, repeats
)
lus_final, sec_final, tert_final = lus_anno(
forward_strand_bracketed_form, lus, sec, tert, locus, str_allele
Expand All @@ -653,14 +691,14 @@ def main(args):
if locus == "D18S51":
if type(str_allele) == str:
forward_strand_bracketed_form = split_string(
sequence, no_of_repeat_bases, repeats
uas_sequence, no_of_repeat_bases, repeats
)
else:
forward_strand_bracketed_form = loci_need_split_anno(
sequence, no_of_repeat_bases
uas_sequence, no_of_repeat_bases
)
elif str_dict[locus]['ReverseCompNeeded'] == "Yes":
reverse_comp_sequence = rev_complement_anno(sequence)
reverse_comp_sequence = rev_complement_anno(uas_sequence)
forward_strand_bracketed_form = rev_comp_forward_strand_bracket(
reverse_comp_sequence, no_of_repeat_bases, repeats, locus, cannot_split
)
Expand All @@ -669,10 +707,12 @@ def main(args):
)
elif locus == "PentaD":
forward_strand_bracketed_form = PentaD_annotation(
sequence, no_of_repeat_bases, repeats
uas_sequence, no_of_repeat_bases, repeats
)
else:
forward_strand_bracketed_form = loci_need_split_anno(sequence, no_of_repeat_bases)
forward_strand_bracketed_form = loci_need_split_anno(
uas_sequence, no_of_repeat_bases
)
lus_final, sec_final, tert_final = lus_anno(
forward_strand_bracketed_form, lus, sec, tert, locus, str_allele
)
Expand All @@ -692,14 +732,14 @@ def main(args):
lus_plus = f"{str_allele}_{lus_final}_{sec_final}_{tert_final}"
if str_dict[locus]['ReverseCompNeeded'] == "Yes":
summary = [
sampleid, project, analysis, locus, sequence, reverse_comp_sequence, str_allele,
forward_strand_bracketed_form, reverse_strand_bracketed_form, lus_final_output,
lus_plus, reads
sampleid, project, analysis, locus, uas_sequence, reverse_comp_sequence,
str_allele, forward_strand_bracketed_form, reverse_strand_bracketed_form,
lus_final_output, lus_plus, reads
]
summary = '\t'.join(str(i) for i in summary)
else:
summary = [
sampleid, project, analysis, locus, sequence, sequence, str_allele,
sampleid, project, analysis, locus, uas_sequence, uas_sequence, str_allele,
forward_strand_bracketed_form, forward_strand_bracketed_form, lus_final_output,
lus_plus, reads
]
Expand Down
9 changes: 9 additions & 0 deletions lusSTR/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,15 @@ def annot_subparser(subparsers):
'input', help='sample(s) in CSV format; first four columns must be Locus, NumReads, '
'Sequence, SampleID; Optional last two columns can be Project and Analysis.'
)
cli.add_argument(
'--kit', choices=['forenseq', 'powerseq'], default='forenseq',
help='Kit used to develop sequences; only forenseq or powerseq accepted;'
'default = forenseq'
)
cli.add_argument(
'--uas', action='store_true',
help='Use if sequences have been run through the ForenSeq UAS.'
)


mains = {
Expand Down
Loading