Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Ter as a valid AA in the HGVS grammar #90

Merged
merged 4 commits into from
Oct 13, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 34 additions & 19 deletions mutalyzer/Retriever.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,8 @@ def _name_to_file(self, name):

:arg unicode name: The accession number.

:returns unicode: A filename.
:returns: A filename.
:rtype: unicode
"""
return os.path.join(
settings.CACHE_DIR, '{}.{}.bz2'.format(name, self.file_type))
Expand All @@ -73,7 +74,8 @@ def _write(self, raw_data, filename):
:arg str raw_data: The raw_data to be compressed and written.
:arg unicode filename: The intended name of the output filename.

:returns unicode: The full path and name of the file written.
:returns: The full path and name of the file written.
:rtype: unicode
"""
result = chardet.detect(raw_data)
if result['confidence'] > 0.5:
Expand Down Expand Up @@ -108,7 +110,8 @@ def _calculate_hash(self, content):

:arg unicode content: Arbitrary text.

:returns unicode: The md5sum of 'content'.
:returns: The md5sum of 'content'.
:rtype: unicode
"""
hash_func = hashlib.md5()
hash_func.update(content)
Expand All @@ -120,7 +123,8 @@ def _new_ud(self):
"""
Make a new UD number based on the current time (seconds since 1970).

:returns unicode: A new UD number.
:returns: A new UD number.
:rtype: unicode
"""
ud = util.generate_id()
return 'UD_' + unicode(ud)
Expand All @@ -131,7 +135,8 @@ def _update_db_md5(self, raw_data, name, gi):
:arg unicode name:
:arg unicode gi:

:returns unicode : filename
:returns: filename
:rtype: unicode
"""
# TODO: Documentation.
try:
Expand Down Expand Up @@ -164,7 +169,8 @@ def snpConvert(self, rs_id):

:arg unicode rsId: The rsId of the SNP (example: 'rs9919552').

:returns list(unicode): A list of HGVS notations.
:returns: A list of HGVS notations.
:rtype: list(unicode)
"""
# A simple input check.
id = rs_id[2:]
Expand Down Expand Up @@ -275,9 +281,10 @@ def write(self, raw_data, filename, extract):
- 0 ; Do not extract, use 'filename'
- 1 ; Extract

:returns tuple(unicode, unicode): Depending on the value of 'extract':
:returns: Depending on the value of 'extract':
- 0 ; ('filename', None)
- 1 ; (id, gi)
:rtype: tuple(unicode, unicode)
"""
if raw_data.strip() == b'Nothing has been found':
self._output.addMessage(
Expand Down Expand Up @@ -428,7 +435,8 @@ def retrieveslice(self, accno, start, stop, orientation):
- 1 ; Forward.
- 2 ; Reverse complement.

:returns unicode: An UD number.
:returns: An UD number.
:rtype: unicode
"""
# Not a valid slice.
if start > stop:
Expand Down Expand Up @@ -515,7 +523,8 @@ def retrievegene(self, gene, organism, upstream=0, downstream=0):
:arg int upstream: Number of upstream nucleotides for the slice.
:arg int downstream: Number of downstream nucleotides for the slice.

:returns object: GenBank record.
:returns: GenBank record.
:rtype: object
"""
# Search the NCBI for a specific gene in an organism.
query = '{}[Gene] AND {}[Orgn]'.format(gene, organism)
Expand Down Expand Up @@ -647,7 +656,8 @@ def downloadrecord(self, url):

:arg unicode url: Location of a GenBank record.

:returns unicode: UD or None.
:returns: UD or None.
:rtype: unicode
"""
if not (url.startswith('http://') or url.startswith('https://') or
url.startswith('ftp://')):
Expand Down Expand Up @@ -703,7 +713,8 @@ def uploadrecord(self, raw_data):

:arg str raw_data: A GenBank record.

:returns unicode: Accession number for the uploaded file.
:returns: Accession number for the uploaded file.
:rtype: unicode
"""
md5sum = self._calculate_hash(raw_data)

Expand Down Expand Up @@ -738,8 +749,9 @@ def loadrecord(self, identifier):
:arg unicode identifier: A RefSeq accession number or geninfo
identifier (GI).

:returns object: A parsed RefSeq record or `None` if no record could be
found for the given identifier.
:returns: A parsed RefSeq record or `None` if no record could be found
for the given identifier.
:rtype: object
"""
if identifier[0].isdigit():
# This is a GI number (geninfo identifier).
Expand Down Expand Up @@ -833,8 +845,8 @@ def loadrecord(self, identifier):

:arg unicode identifier: The name of the LRG file to read.

:returns object: GenRecord.Record of LRG file or None in case of
failure.
:returns: GenRecord.Record of LRG file or None in case of failure.
:rtype: object
"""
# Make a filename based upon the identifier.
filename = self._name_to_file(identifier)
Expand Down Expand Up @@ -871,7 +883,8 @@ def fetch(self, name):

:arg unicode name: The name of the LRG file to fetch.

:returns unicode: the full path to the file; None in case of an error.
:returns: the full path to the file; None in case of an error.
:rtype: unicode
"""
prefix = settings.LRG_PREFIX_URL
url = prefix + '{}.xml'.format(name)
Expand Down Expand Up @@ -902,7 +915,8 @@ def downloadrecord(self, url, name=None):

:arg unicode url: Location of the LRG record.

:returns unicode: The full path to the file or Nonein case of failure.
:returns: The full path to the file or Nonein case of failure.
:rtype: unicode
"""
lrg_id = name or os.path.splitext(os.path.split(url)[1])[0]
# if not lrg_id.startswith('LRG'):
Expand Down Expand Up @@ -973,8 +987,9 @@ def write(self, raw_data, filename):
:arg str raw_data: The data.
:arg unicode filename: The intended name of the file.

:returns unicode: The full path and name of the file written, None in
case of an error.
:returns: The full path and name of the file written, None in case of
an error.
:rtype: unicode
"""
# Dirty way to test if a file is valid,
# Parse the file to see if it's a real LRG file.
Expand Down
11 changes: 10 additions & 1 deletion mutalyzer/db/queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@ def get_transcript_protein_link(accession, reverse=False):
according to the configuration settings `PROTEIN_LINK_EXPIRATION` and
`NEGATIVE_PROTEIN_LINK_EXPIRATION`.

:arg str accession: Accession number to lookup link for.
:arg str accession: Accession number (without version number) to lookup
link for.
:arg bool reverse: If `True`, `accession` is assumed to be a protein
accession number, otherwise `accession` is assumed to be a transcript
accession number.
Expand Down Expand Up @@ -101,6 +102,14 @@ def update_transcript_protein_link(transcript_accession=None,
"""
Update cached link between a transcript and a protein, or create it if it
doesn't exist yet.

:arg str transcript_accession: Transcript accession number (without
version number).
:arg str protein_accession: Protein accession number (without version
number).

At least one of `transcript_accession` or `protein_accession` must be not
`None`.
"""
if transcript_accession is None and protein_accession is None:
raise ValueError('Link must have a transcript or protein')
Expand Down
10 changes: 6 additions & 4 deletions mutalyzer/grammar.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,12 +342,13 @@ class Grammar():

# BNF: AA3 -> `Ala' | `Arg' | `Asn' | `Asp' | `Cys' | `Gln' | `Glu' |
# `Gly' | `His' | `Ile' | `Leu' | `Lys' | `Met' | `Phe' |
# `Pro' | `Ser' | `Thr' | `Trp' | `Tyr' | `Val'
# `Pro' | `Ser' | `Thr' | `Trp' | `Tyr' | `Val' | `Ter'
AA3 = Literal('Ala') ^ Literal('Arg') ^ Literal('Asn') ^ Literal('Asp') ^ \
Literal('Cys') ^ Literal('Gln') ^ Literal('Glu') ^ Literal('Gly') ^ \
Literal('His') ^ Literal('Ile') ^ Literal('Leu') ^ Literal('Lys') ^ \
Literal('Met') ^ Literal('Phe') ^ Literal('Pro') ^ Literal('Ser') ^ \
Literal('Thr') ^ Literal('Trp') ^ Literal('Tyr') ^ Literal('Val')
Literal('Thr') ^ Literal('Trp') ^ Literal('Tyr') ^ Literal('Val') ^ \
Literal('Ter')

# BNF: AA1 -> `A' | `R' | `N' | `D' | `C' | `Q' | `E' | `G' | `H' | `I' |
# `L' | `K' | `M' | `F' | `P' | `S' | `T' | `W' | `Y' | `V'
Expand Down Expand Up @@ -377,8 +378,9 @@ class Grammar():
# BNF: Subst -> AAPtLoc AA (`extX' `*'? Number)? | (`Met1' | `M1') (`?' | `ext' `-' Number)
# Todo: 'extX' -> 'ext*' (and loose the optional '*'?)
# Todo: Optional AA before 'ext' and 'fMet' after 'ext'?
PSubst = (AAPtLoc + AA.setResultsName('Args') + Optional(Literal('extX') + Optional('*') + Number)) ^ \
((Literal('Met1') ^ Literal('M1')) + (Literal('?') ^ (Literal('ext') + Literal('-') + Number)))
PSubst = ((AAPtLoc + AA.setResultsName('Args') + Optional(Literal('extX') + Optional('*') + Number)) ^
((Literal('Met1') ^ Literal('M1')) + (Literal('?') ^ (Literal('ext') + Literal('-') + Number)))) \
+ Empty().setParseAction(replaceWith('subst'))('MutationType')

# BNF: Del -> AALoc `del'
PDel = AALoc + Literal('del')('MutationType')
Expand Down
6 changes: 4 additions & 2 deletions mutalyzer/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,7 +359,8 @@ def guess_file_type(handle):

:arg file handle: Open readable handle to an NGS data file.

:returns unicode: Either 'fasta', 'fastq' or 'text'.
:returns: Either 'fasta', 'fastq' or 'text'.
:rtype: unicode
"""
try:
extension = getattr(handle, 'name').split('.')[-1]
Expand Down Expand Up @@ -397,7 +398,8 @@ def read_dna(handle):

:arg stream handle: Open readable handle to an NGS data file.

:returns unicode: Content of the first record in the file.
:returns: Content of the first record in the file.
:rtype: unicode
"""
file_format = guess_file_type(handle)

Expand Down