Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Split citations column to multiple based on data fields #134

Merged
merged 1 commit into from
Nov 12, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 24 additions & 10 deletions AnnotatorCore.py
Original file line number Diff line number Diff line change
Expand Up @@ -367,31 +367,34 @@ def processalterationevents(eventfile, outfile, previousoutfile, defaultCancerTy
outf.write("\t" + VARIANT_IN_ONCOKB_HEADER)

outf.write("\tMUTATION_EFFECT")
outf.write("\tMUTATION_EFFECT_CITATIONS")
outf.write("\tONCOGENIC")

newncols += 4
newncols += 5

for l in levels:
outf.write('\t' + l)
newncols += len(levels)

outf.write("\tHIGHEST_LEVEL")
outf.write("\tCITATIONS")
outf.write("\tTX_CITATIONS")
newncols += 2

for l in dxLevels:
outf.write('\t' + l)
newncols += len(dxLevels)

outf.write("\tHIGHEST_DX_LEVEL")
newncols += 1
outf.write("\tDX_CITATIONS")
newncols += 2

for l in pxLevels:
outf.write('\t' + l)
newncols += len(pxLevels)

outf.write("\tHIGHEST_PX_LEVEL")
newncols += 1
outf.write("\tPX_CITATIONS")
newncols += 2

outf.write("\n")

Expand Down Expand Up @@ -1417,8 +1420,11 @@ def gettumortypename(tumortype):
return tumortype['mainType']['name']


def getimplications(oncokbdata, levels, implications):
def getimplications(oncokbdata, implication_type, levels, implications):
citation_column_key = implication_type + '_citations'
for implication in implications:
oncokbdata[citation_column_key] = appendoncokbcitations(oncokbdata[citation_column_key], implication['pmids'],
implication['abstracts'])
level = implication['levelOfEvidence']

if level is not None:
Expand Down Expand Up @@ -1629,8 +1635,12 @@ def process_oncokb_annotation(annotation, annotate_hotspot):
oncokbdata[GENE_IN_ONCOKB_HEADER] = GENE_IN_ONCOKB_DEFAULT
oncokbdata[VARIANT_IN_ONCOKB_HEADER] = VARIANT_IN_ONCOKB_DEFAULT
oncokbdata['mutation_effect'] = ""
oncokbdata['mutation_effect_citations'] = []
oncokbdata['citations'] = []
oncokbdata['oncogenic'] = ""
oncokbdata['tx_citations'] = []
oncokbdata['dx_citations'] = []
oncokbdata['px_citations'] = []

try:
# oncogenic
Expand All @@ -1646,7 +1656,7 @@ def process_oncokb_annotation(annotation, annotate_hotspot):
# mutation effect
if (annotation['mutationEffect'] is not None):
oncokbdata['mutation_effect'] = annotation['mutationEffect']['knownEffect']
oncokbdata['citations'] = appendoncokbcitations(oncokbdata['citations'],
oncokbdata['mutation_effect_citations'] = appendoncokbcitations(oncokbdata['mutation_effect_citations'],
annotation['mutationEffect']['citations']['pmids'],
annotation['mutationEffect']['citations']['abstracts'])

Expand All @@ -1663,7 +1673,7 @@ def process_oncokb_annotation(annotation, annotate_hotspot):
else:
drugs = treatment['drugs']

oncokbdata['citations'] = appendoncokbcitations(oncokbdata['citations'], treatment['pmids'],
oncokbdata['tx_citations'] = appendoncokbcitations(oncokbdata['tx_citations'], treatment['pmids'],
treatment['abstracts'])

if len(drugs) == 0:
Expand All @@ -1676,10 +1686,10 @@ def process_oncokb_annotation(annotation, annotate_hotspot):
if treatmentname not in oncokbdata[level]:
oncokbdata[level].append('+'.join(drugnames))
if annotation['diagnosticImplications'] is not None:
getimplications(oncokbdata, dxLevels, annotation['diagnosticImplications'])
getimplications(oncokbdata, 'dx', dxLevels, annotation['diagnosticImplications'])

if annotation['prognosticImplications'] is not None:
getimplications(oncokbdata, pxLevels, annotation['prognosticImplications'])
getimplications(oncokbdata, 'px', pxLevels, annotation['prognosticImplications'])

oncokbdata['highestDiagnosticImplicationLevel'] = annotation['highestDiagnosticImplicationLevel']
oncokbdata['highestPrognosticImplicationLevel'] = annotation['highestPrognosticImplicationLevel']
Expand All @@ -1701,18 +1711,22 @@ def process_oncokb_annotation(annotation, annotate_hotspot):
ret.append(oncokbdata[GENE_IN_ONCOKB_HEADER])
ret.append(oncokbdata[VARIANT_IN_ONCOKB_HEADER])
ret.append(oncokbdata['mutation_effect'])
ret.append(';'.join(oncokbdata['mutation_effect_citations']))
ret.append(oncokbdata['oncogenic'])
for l in levels:
ret.append(','.join(oncokbdata[l]))
ret.append(gethighestsensitivitylevel(oncokbdata))
ret.append(';'.join(oncokbdata['citations']))
ret.append(';'.join(oncokbdata['tx_citations']))

for l in dxLevels:
ret.append(','.join(oncokbdata[l]))
ret.append(gethighestDxPxlevel(dxLevels, [oncokbdata['highestDiagnosticImplicationLevel']]))
ret.append(';'.join(oncokbdata['dx_citations']))

for l in pxLevels:
ret.append(','.join(oncokbdata[l]))
ret.append(gethighestDxPxlevel(pxLevels, [oncokbdata['highestPrognosticImplicationLevel']]))
ret.append(';'.join(oncokbdata['px_citations']))

return ret

Expand Down
29 changes: 16 additions & 13 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -93,19 +93,22 @@ python ${FILE_NAME.py} -i ${INPUT_FILE} -o ${OUTPUT_FILE} -b ${ONCOKB_API_TOKEN}


## Columns added in the annotation files
| Column | Possible Values | Description |
|-------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| GENE_IN_ONCOKB | TRUE, FALSE | Whether the gene has been curated by the OncoKB Team |
| VARIANT_IN_ONCOKB | TRUE, FALSE | Whether the variant has been curated by the OncoKB Team. Note: when a variant does not exist, it may still have annotations. |
| MUTATION_EFFECT | Gain-of-function, Likely Gain-of-function, Loss-of-function, Likely Loss-of-function, Switch-of-function, Likely Switch-of-function, Neutral, Likely Neutral, Inconclusive, Unknown | The biological effect of a mutation/alteration on the protein function that gives rise to changes in the biological properties of cells expressing the mutant/altered protein compared to cells expressing the wildtype protein. |
| ONCOGENIC | Oncogenic, Likely Oncogenic, Likely Neutral, Inconclusive, Unknown, Resistance | In OncoKB, “oncogenic” is defined as “referring to the ability to induce or cause cancer” as described in the second edition of The Biology of Cancer by Robert Weinberg (2014). |
| LEVEL_* | Therapeutic implications | The leveled therapeutic implications |
| HIGHEST_LEVEL | LEVEL_1, LEVEL_2, LEVEL_3A, LEVEL_3B, LEVEL_4, LEVEL_R1, LEVEL_R2 | The highest level of evidence for therapeutic implications |
| CITATIONS | PMID, Abstract, Website Link | All citations related to a mutation/alteration |
| LEVEL_Dx* | Tumor type the level of evidence is assigned to | The leveled diagnostic implications |
| HIGHEST_DX_LEVEL | LEVEL_Dx1, LEVEL_Dx2, LEVEL_Dx3 | The highest level of evidence for diagnostic implications |
| LEVEL_Px* | Tumor type the level of evidence is assigned to | The leveled prognostic implications |
| HIGHEST_PX_LEVEL | LEVEL_Px1, LEVEL_Px2, LEVEL_Px3 | The highest level of evidence for prognostic implications |
| Column | Possible Values | Description |
|---------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| GENE_IN_ONCOKB | TRUE, FALSE | Whether the gene has been curated by the OncoKB Team |
| VARIANT_IN_ONCOKB | TRUE, FALSE | Whether the variant has been curated by the OncoKB Team. Note: when a variant does not exist, it may still have annotations. |
| MUTATION_EFFECT | Gain-of-function, Likely Gain-of-function, Loss-of-function, Likely Loss-of-function, Switch-of-function, Likely Switch-of-function, Neutral, Likely Neutral, Inconclusive, Unknown | The biological effect of a mutation/alteration on the protein function that gives rise to changes in the biological properties of cells expressing the mutant/altered protein compared to cells expressing the wildtype protein. |
| MUTATION_EFFECT_CITATIONS | PMID, Abstract, Website Link | All citations related to the biological effect |
| ONCOGENIC | Oncogenic, Likely Oncogenic, Likely Neutral, Inconclusive, Unknown, Resistance | In OncoKB, “oncogenic” is defined as “referring to the ability to induce or cause cancer” as described in the second edition of The Biology of Cancer by Robert Weinberg (2014). |
| LEVEL_* | Therapeutic implications | The leveled therapeutic implications |
| HIGHEST_LEVEL | LEVEL_1, LEVEL_2, LEVEL_3A, LEVEL_3B, LEVEL_4, LEVEL_R1, LEVEL_R2 | The highest level of evidence for therapeutic implications |
| TX_CITATIONS | PMID, Abstract, Website Link | All citations related to therapeutic implications |
| LEVEL_Dx* | Tumor type the level of evidence is assigned to | The leveled diagnostic implications |
| HIGHEST_DX_LEVEL | LEVEL_Dx1, LEVEL_Dx2, LEVEL_Dx3 | The highest level of evidence for diagnostic implications |
| DX_CITATIONS | PMID, Abstract, Website Link | All citations related to diagnostic implications |
| LEVEL_Px* | Tumor type the level of evidence is assigned to | The leveled prognostic implications |
| HIGHEST_PX_LEVEL | LEVEL_Px1, LEVEL_Px2, LEVEL_Px3 | The highest level of evidence for prognostic implications |
| PX_CITATIONS | PMID, Abstract, Website Link | All citations related to prognostic implications |

## Questions?
The best way is to email contact@oncokb.org so all our team members can help.
Loading