mutalyzer · martijnvermaat · Jul 21, 2015 · Jul 20, 2015
diff --git a/migrations/versions/4bafcc5086dd_fix_zero_exon_transcript_mappings.py b/migrations/versions/4bafcc5086dd_fix_zero_exon_transcript_mappings.py
@@ -0,0 +1,36 @@
+"""Fix zero-exon transcript mappings
+
+Revision ID: 4bafcc5086dd
+Revises: 2e062969eb54
+Create Date: 2015-07-20 16:16:01.602964
+
+"""
+
+from __future__ import unicode_literals
+
+# revision identifiers, used by Alembic.
+revision = '4bafcc5086dd'
+down_revision = u'2e062969eb54'
+
+from alembic import op
+from sqlalchemy import sql
+import sqlalchemy as sa
+
+
+def upgrade():
+    transcript_mappings = sql.table('transcript_mappings',
+                                    sql.column('start', sa.Integer()),
+                                    sql.column('stop', sa.Integer()),
+                                    sql.column('exon_starts', sa.Text()),
+                                    sql.column('exon_stops', sa.Text()))
+    # https://alembic.readthedocs.org/en/latest/ops.html#alembic.operations.Operations.execute
+    op.execute(transcript_mappings
+               .update()
+               .where(transcript_mappings.c.exon_starts == op.inline_literal(''))
+               .values({'exon_starts': transcript_mappings.c.start,
+                        'exon_stops': transcript_mappings.c.stop}))
+
+
+def downgrade():
+    # We cannot reliably downgrade this migration.
+    pass
diff --git a/mutalyzer/mapping.py b/mutalyzer/mapping.py
@@ -920,6 +920,9 @@ def import_from_mapview_file(assembly, mapview_file, group_label):
 
     Our strategy is too sort by gene and chromosome and process the file
     grouped by these two fields.
+
+    For transcripts without any UTR and CDS entries (seems to happen for
+    predicted genes), we generate one exon spanning the entire transcript.
     """
     columns = ['taxonomy', 'chromosome', 'start', 'stop', 'orientation',
                'contig', 'ctg_start', 'ctg_stop', 'ctg_orientation',
@@ -999,6 +1002,12 @@ def build_mappings(records):
             else:
                 cds = None
 
+            # If no exons are annotated, we create one spanning the entire
+            # transcript.
+            if not exon_starts:
+                exon_starts = [start]
+                exon_stops = [stop]
+
             yield TranscriptMapping.create_or_update(
                 chromosome, 'refseq', accession, gene, orientation, start,
                 stop, exon_starts, exon_stops, 'ncbi', cds=cds,