Skip to content

Commit

Permalink
Add test - need to test with spliced UTRs
Browse files Browse the repository at this point in the history
  • Loading branch information
dariober committed Dec 10, 2024
1 parent 3d18e6c commit 6cf24fd
Show file tree
Hide file tree
Showing 4 changed files with 104 additions and 23 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,10 @@ describe('CDS without exons', () => {
it('Convert mRNA with CDS but without exon', () => {
const [gffFeature] = readFeatureFile('test_data/cds_without_exon.gff')
const actual = gff3ToAnnotationFeature(gffFeature)
assert.deepEqual(JSON.stringify(actual), '')
const expected = readAnnotationFeatureSnapshot(
'test_data/cds_without_exon.json',
)
compareFeatures(actual, expected)
})
})

Expand Down
47 changes: 26 additions & 21 deletions packages/apollo-shared/src/GFF3/gff3ToAnnotationFeature.ts
Original file line number Diff line number Diff line change
Expand Up @@ -183,21 +183,23 @@ function convertChildren(
convertedChildren[child._id] = child
}
}
const processedCDS =
cdsFeatures.length > 0 ? processCDS(cdsFeatures, refSeq, featureIds) : []

for (const cds of processedCDS) {
convertedChildren[cds._id] = cds
}
if (cdsFeatures.length > 0) {
const processedCDS = processCDS(cdsFeatures, refSeq, featureIds)

const missingExons = inferMissingExons(
cdsFeatures,
exonFeatures,
utrFeatures,
refSeq,
)
for (const exon of missingExons) {
convertedChildren[exon._id] = exon
for (const cds of processedCDS) {
convertedChildren[cds._id] = cds
}

const missingExons = inferMissingExons(
cdsFeatures,
exonFeatures,
utrFeatures,
processedCDS[0].refSeq,
)
for (const exon of missingExons) {
convertedChildren[exon._id] = exon
}
}

if (Object.keys(convertedChildren).length > 0) {
Expand All @@ -210,15 +212,12 @@ function inferMissingExons(
cdsFeatures: GFF3Feature[],
existingExons: GFF3Feature[],
utrFeatures: GFF3Feature[],
refSeq?: string,
refSeq: string,
): AnnotationFeatureSnapshot[] {
if (!refSeq) {
return []
// throw new Error('refSeq is missing')
}
const missingExons: AnnotationFeatureSnapshot[] = []
for (const protein of cdsFeatures) {
for (const cds of protein) {
// For CDS check if there is an exon containing it. If not, create an exon with same coords as the CDS.
let exonFound = false
for (const x of existingExons) {
if (x.length != 1) {
Expand All @@ -241,19 +240,25 @@ function inferMissingExons(
if (!cds.start || !cds.end) {
throw new Error('Invalid CDS feature')
}
let strand: 1 | -1 | undefined = undefined
if (cds.strand === '+') {
strand = 1
} else if (cds.strand === '-') {
strand = -1
}
const newExon: AnnotationFeatureSnapshot = {
_id: new ObjectID().toHexString(),
refSeq,
type: 'exon',
min: cds.start - 1,
max: cds.end,
strand: cds.strand === '+' ? 1 : cds.strand === '-' ? -1 : undefined,
strand,
}
for (const utr of utrFeatures) {
// If the new exon is adjacent to a UTR, merge the UTR
if (utr.length != 1 || !utr[0].start || !utr[0].end) {
throw new Error('Too many UTRs')
throw new Error('Too many UTRs or invalid UTR')
}
// If the new exon is adjacent to a UTR, merge the UTR
if (utr[0].end === newExon.min) {
newExon.min = utr[0].start - 1
break
Expand Down
1 change: 0 additions & 1 deletion packages/apollo-shared/test_data/cds_without_exon.gff
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
ctgA example gene 1050 9000 . + . ID=eden
ctgA example mRNA 1050 9000 . + . ID=eden.1;Parent=eden
ctgA example five_prime_UTR 1050 1210 . + 0 ID=five1;Parent=eden.1
ctgA example exon 1211 1510 . + 0 ID=exon2;Parent=eden.1
ctgA example CDS 1211 1510 . + 0 ID=cds2;Parent=eden.1
ctgA example CDS 1611 1710 . + 0 ID=cds2;Parent=eden.1
ctgA example three_prime_UTR 1711 1800 . + 0 ID=three1;Parent=eden.1
Expand Down
74 changes: 74 additions & 0 deletions packages/apollo-shared/test_data/cds_without_exon.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
{
"_id": "67581b7d5890a8eb1bedab6e",
"refSeq": "ctgA",
"type": "gene",
"min": 1049,
"max": 9000,
"strand": 1,
"children": {
"67581b7d5890a8eb1bedab6c": {
"_id": "67581b7d5890a8eb1bedab6c",
"refSeq": "ctgA",
"type": "mRNA",
"min": 1049,
"max": 9000,
"strand": 1,
"children": {
"67581b7d5890a8eb1bedab66": {
"_id": "67581b7d5890a8eb1bedab66",
"refSeq": "ctgA",
"type": "exon",
"min": 1200,
"max": 1500,
"strand": 1,
"attributes": { "gff_source": ["example"], "gff_id": ["exon1"] }
},
"67581b7d5890a8eb1bedab67": {
"_id": "67581b7d5890a8eb1bedab67",
"refSeq": "ctgA",
"type": "CDS",
"min": 1210,
"max": 1710,
"strand": 1,
"attributes": { "gff_source": ["example"], "gff_id": ["cds2"] }
},
"67581b7d5890a8eb1bedab68": {
"_id": "67581b7d5890a8eb1bedab68",
"refSeq": "ctgA",
"type": "CDS",
"min": 1200,
"max": 1700,
"strand": 1,
"attributes": { "gff_source": ["example"], "gff_id": ["cds1"] }
},
"67581b7d5890a8eb1bedab69": {
"_id": "67581b7d5890a8eb1bedab69",
"refSeq": "ctgA",
"type": "exon",
"min": 1049,
"max": 1510,
"strand": 1
},
"67581b7d5890a8eb1bedab6b": {
"_id": "67581b7d5890a8eb1bedab6b",
"refSeq": "ctgA",
"type": "exon",
"min": 1600,
"max": 1800,
"strand": 1
}
},
"attributes": { "gff_source": ["example"], "gff_id": ["eden.1"] }
},
"67581b7d5890a8eb1bedab6d": {
"_id": "67581b7d5890a8eb1bedab6d",
"refSeq": "ctgA",
"type": "TF_binding_site",
"min": 1049,
"max": 1100,
"strand": 1,
"attributes": { "gff_source": ["example"] }
}
},
"attributes": { "gff_source": ["example"], "gff_id": ["eden"] }
}

0 comments on commit 6cf24fd

Please sign in to comment.