Skip to content

Commit

Permalink
Accept transcript accession number as transcript variant identifier.
Browse files Browse the repository at this point in the history
* Internally the accession number is translated to the transcript name
  (i.e. v-number) and subsequent processing is untouched.

Added failing test with accession number as transcript variant identifier.

Fixed comments by @martijnvermaat in PR #405
  • Loading branch information
mkroon1 committed Jun 15, 2016
1 parent a77a3c0 commit 55bc10f
Show file tree
Hide file tree
Showing 4 changed files with 61 additions and 3 deletions.
15 changes: 15 additions & 0 deletions mutalyzer/GenRecord.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,21 @@ def findGene(self, name) :
return None
#findGene

def get_transcript_selector(self, accession):
"""
Returns a tuple with gene name and transcript name (i.e. its
`v-number') for a given transcript ID.
@param accession: unicode
@return: tuple(unicode, unicode)
"""

for gene in self.geneList:
for transcript in gene.transcriptList:
if transcript.transcriptID == accession:
return gene.name, transcript.name
#getInfoByTranscriptID

def listGenes(self) :
"""
List the names of all genes found in this record.
Expand Down
16 changes: 13 additions & 3 deletions mutalyzer/grammar.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,9 +83,19 @@ class Grammar():
# BNF: GeneName -> ([a-Z] | [0-9] | `-')+
GeneName = Word(unicode(alphanums) + '-', min=1)

# BNF: GeneSymbol -> `(' Name (TransVar | ProtIso)? `)'
GeneSymbol = Suppress('(') + Group(GeneName('GeneSymbol') + \
Optional(TransVar ^ ProtIso))('Gene') + Suppress(')')
# BNF: GeneProductID -> GeneName (TransVar | ProtIso)
GeneProductID = Group(GeneName('GeneSymbol') + \
Optional(TransVar ^ ProtIso))('Gene')

# BNF: AccNoStem -> ([a-Z] Number `_')+
AccNoStem = NotAny('LRG_') + Combine(Word(unicode(alphas) + '_') + Number)

# BNF: AccNoFull -> AccNoStem `.' Number
AccNoFull = AccNoStem + Suppress('.') + Number

# BNF: GeneSymbol -> `(' (GeneProductID | AccNoFull) `)'
GeneSymbol = Suppress('(') + (GeneProductID ^ AccNoFull('AccNoTransVar')) \
+ Suppress(')')

# BNF: GI -> (`GI' | `GI:')? Number
GI = Suppress(Optional('GI') ^ Optional('GI:') ^ Optional('gi') ^
Expand Down
13 changes: 13 additions & 0 deletions mutalyzer/variantchecker.py
Original file line number Diff line number Diff line change
Expand Up @@ -1751,6 +1751,19 @@ def check_variant(description, output):
if not retrieved_record:
return

if gene_symbol == '' and transcript_id == '' and \
parsed_description.AccNoTransVar:
# Get gene symbol and transcript name by transcript accession number.
transcript_info = retrieved_record.get_transcript_selector(
'.'.join(parsed_description.AccNoTransVar))
if transcript_info is not None:
gene_symbol, transcript_id = transcript_info
else:
output.addMessage(__file__, 4, 'EINVALIDTRANSVAR',
'Invalid name for transcript variant identifier.')
return


# Add recordType to output for output formatting.
output.addOutput('recordType', filetype)
output.addOutput('organism', retrieved_record.organism)
Expand Down
20 changes: 20 additions & 0 deletions tests/test_variantchecker.py
Original file line number Diff line number Diff line change
Expand Up @@ -2214,3 +2214,23 @@ def test_dup_range_arg_seq_reverse(output, checker):
assert len(e_nodna) == 0
e_ref = output.getMessagesWithErrorCode('EREF')
assert len(e_ref) == 0


@with_references('NG_012337.1')
def test_accno_as_transcript_variant(output, checker):
"""
Test accession number as transcript variant identifier is accepted.
"""
# Variant below should be equivalent to the already accepted
# version: 'NG_012337.1(TIMM8B_v001):c.12_13insGATC'
checker('NG_012337.1(NM_012459.2):c.12_13insGATC')

errorcount, warncount, summary = output.Summary()
assert errorcount == 0
assert output.getOutput('gDescription')[0] == u'g.4911_4912insATCG'

# Check non-existing accession number as transcript variant identifier.
checker('NG_012337.1(DUMMYACCNO_9999.9):c.12_13insGATC')

e_invalidtransvar = output.getMessagesWithErrorCode('EINVALIDTRANSVAR')
assert len(e_invalidtransvar) == 1

0 comments on commit 55bc10f

Please sign in to comment.