Skip to content

Commit

Permalink
Merge pull request #126 from mutalyzer/genbank-no-version
Browse files Browse the repository at this point in the history
Parse genbank file without VERSION field
  • Loading branch information
martijnvermaat committed Nov 11, 2015
2 parents 4e47d2c + d18b539 commit 757ec7e
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 2 deletions.
8 changes: 6 additions & 2 deletions mutalyzer/parsers/genbank.py
Original file line number Diff line number Diff line change
Expand Up @@ -441,8 +441,12 @@ def create_record(self, filename):
# the genbank file) are from the original NC reference. We try to
# set the .id field to the working value in the caller.
record.source_id = biorecord.id
record.source_accession, record.source_version = biorecord.id.split('.')[:2]
record.source_gi = biorecord.annotations['gi']
try:
record.source_accession, record.source_version = biorecord.id.split('.')[:2]
except ValueError:
record.source_accession = biorecord.id
record.source_version = '1'
record.source_gi = biorecord.annotations.get('gi')
record.organism = biorecord.annotations['organism']

# Todo: This will change once we support protein references
Expand Down
Binary file added tests/data/UD_143772172095.gb.bz2
Binary file not shown.
4 changes: 4 additions & 0 deletions tests/data/references.yml
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,10 @@ MARK1:
- null
- - XM_005273136
- null
ADAC:
accession: UD_143772172095
checksum: 0b7f7991c1fb50bdfd04d3b0e405ecf3
filename: UD_143772172095.gb.bz2
NG_008939.1:
checksum: 114a03e16ad2f63531d796c2fb0d7039
filename: NG_008939.1.gb.bz2
Expand Down
14 changes: 14 additions & 0 deletions tests/test_parsers_genbank.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,17 @@ def test_only_complete_genes_included(settings, references, parser):
filename = os.path.join(settings.CACHE_DIR, '%s.gb.bz2' % accession)
record = parser.create_record(filename)
assert [g.name for g in record.geneList] == ['A1BG']

@with_references('ADAC')
def test_no_version(settings, references, parser):
"""
Genbank file without 'version' field, so BioPython record.id is the
accession number without version. Our parser used to crash on that.
This genbank file was contributed by Gerard Schaafsma (original
source unknown).
"""
accession = references[0].accession
genbank_filename = os.path.join(settings.CACHE_DIR,
'%s.gb.bz2' % accession)
parser.create_record(genbank_filename)

0 comments on commit 757ec7e

Please sign in to comment.