Skip to content

Commit

Permalink
Temp commit: Need to fix handling of missing refSeq and proper testing
Browse files Browse the repository at this point in the history
  • Loading branch information
dariober committed Dec 9, 2024
1 parent c887747 commit 3d18e6c
Show file tree
Hide file tree
Showing 4 changed files with 1,036 additions and 2 deletions.
4 changes: 2 additions & 2 deletions packages/apollo-collaboration-server/.development.env
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,10 @@ SESSION_SECRET=g9fGaRuw06T7hs960Tm7KYyfcFaYEIaG9jfFnVEQ4QyFXmq7
##############################################################################

# Google client id and secret.
GOOGLE_CLIENT_ID=1054515969695-3hpfg1gd0ld3sgj135kfgikolu86vv30.apps.googleusercontent.com
GOOGLE_CLIENT_ID=1000521104117-bhd8r4v11cc053g0b80ui00ss9s5fitv.apps.googleusercontent.com
# Alternatively, can be a path to a file with the client ID
# GOOGLE_CLIENT_ID_FILE=/run/secrets/google-client-id
GOOGLE_CLIENT_SECRET=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
GOOGLE_CLIENT_SECRET=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
# Alternatively, can be a path to a file with the client secret
# GOOGLE_CLIENT_SECRET_FILE=/run/secrets/google-client-secret

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,14 @@ describe('gff3ToAnnotationFeature examples', () => {
})
})

describe('CDS without exons', () => {
it('Convert mRNA with CDS but without exon', () => {
const [gffFeature] = readFeatureFile('test_data/cds_without_exon.gff')
const actual = gff3ToAnnotationFeature(gffFeature)
assert.deepEqual(JSON.stringify(actual), '')
})
})

describe('gff3ToAnnotationFeature', () => {
for (const testCase of testCases) {
const [description, featureLine, convertedFeature] = testCase
Expand Down
112 changes: 112 additions & 0 deletions packages/apollo-shared/src/GFF3/gff3ToAnnotationFeature.ts
Original file line number Diff line number Diff line change
Expand Up @@ -154,8 +154,19 @@ function convertChildren(
const { child_features: childFeatures } = firstFeature

const cdsFeatures: GFF3Feature[] = []
const exonFeatures: GFF3Feature[] = []
const utrFeatures: GFF3Feature[] = []
for (const childFeature of childFeatures) {
const [firstChildFeatureLocation] = childFeature
if (firstChildFeatureLocation.type === 'exon') {
exonFeatures.push(childFeature)
}
if (
firstChildFeatureLocation.type === 'three_prime_UTR' ||
firstChildFeatureLocation.type === 'five_prime_UTR'
) {
utrFeatures.push(childFeature)
}
if (
firstChildFeatureLocation.type === 'three_prime_UTR' ||
firstChildFeatureLocation.type === 'five_prime_UTR' ||
Expand All @@ -174,16 +185,117 @@ function convertChildren(
}
const processedCDS =
cdsFeatures.length > 0 ? processCDS(cdsFeatures, refSeq, featureIds) : []

for (const cds of processedCDS) {
convertedChildren[cds._id] = cds
}

const missingExons = inferMissingExons(
cdsFeatures,
exonFeatures,
utrFeatures,
refSeq,
)
for (const exon of missingExons) {
convertedChildren[exon._id] = exon
}

if (Object.keys(convertedChildren).length > 0) {
return convertedChildren
}
return
}

function inferMissingExons(
cdsFeatures: GFF3Feature[],
existingExons: GFF3Feature[],
utrFeatures: GFF3Feature[],
refSeq?: string,
): AnnotationFeatureSnapshot[] {
if (!refSeq) {
return []
// throw new Error('refSeq is missing')
}
const missingExons: AnnotationFeatureSnapshot[] = []
for (const protein of cdsFeatures) {
for (const cds of protein) {
let exonFound = false
for (const x of existingExons) {
if (x.length != 1) {
throw new Error('Unexpected number fo exons')
}
const [exon] = x
if (
exon.start &&
exon.end &&
cds.start &&
cds.end &&
exon.start <= cds.start &&
exon.end >= cds.end
) {
exonFound = true
break
}
}
if (!exonFound) {
if (!cds.start || !cds.end) {
throw new Error('Invalid CDS feature')
}
const newExon: AnnotationFeatureSnapshot = {
_id: new ObjectID().toHexString(),
refSeq,
type: 'exon',
min: cds.start - 1,
max: cds.end,
strand: cds.strand === '+' ? 1 : cds.strand === '-' ? -1 : undefined,

Check failure on line 250 in packages/apollo-shared/src/GFF3/gff3ToAnnotationFeature.ts

View workflow job for this annotation

GitHub Actions / Lint

Nest ternary expression should be parenthesized
}
for (const utr of utrFeatures) {
if (utr.length != 1 || !utr[0].start || !utr[0].end) {
throw new Error('Too many UTRs')
}
// If the new exon is adjacent to a UTR, merge the UTR
if (utr[0].end === newExon.min) {
newExon.min = utr[0].start - 1
break
}
if (newExon.max + 1 === utr[0].start) {
newExon.max = utr[0].end
break
}
}
missingExons.push(newExon)
}
}
}
const mergedExons = mergeAnnotationFeatures(missingExons)
return mergedExons
}

function mergeAnnotationFeatures(
features: AnnotationFeatureSnapshot[],
): AnnotationFeatureSnapshot[] {
if (features.length === 0) {
return []
}
features.sort((a, b) => a.min - b.min)

const res = []
res.push(features[0])

for (let i = 1; i < features.length; i++) {
const last = res.at(-1)
const curr = features[i]

// If current interval overlaps with the last merged interval, merge them
if (last && curr.min <= last.max) {
last.max = Math.max(last.max, curr.max)
} else {
res.push(curr)
}
}
return res
}

/**
* If a GFF3 file has CDS features that either (1) don't have an ID or (2) have
* different IDs for each CDS, we have to do a bit of guessing about how they
Expand Down
Loading

0 comments on commit 3d18e6c

Please sign in to comment.