Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix hotspot annotation #105

Merged
merged 1 commit into from
Oct 30, 2020
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 43 additions & 58 deletions AnnotatorCore.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,22 +219,12 @@ def getcuratedgenes():
log.error("error when processing %s \n" % url +
"reason: %s" % response.reason)

missensesinglehotspots = None
indelsinglehotspots = None
_3dhotspots = None
curatedgenes = []

def inithotspots():
global missensesinglehotspots
global indelsinglehotspots
def init_3d_hotspots():
global _3dhotspots
global curatedgenes
missensesinglehotspots = gethotspots(cancerhotspotsbaseurl+"/api/hotspots/single", "single residue")
indelsinglehotspots = gethotspots(cancerhotspotsbaseurl+"/api/hotspots/single", "in-frame indel")
_3dhotspots = gethotspots(_3dhotspotsbaseurl+"/api/hotspots/3d", None)
curatedgenes |= set(missensesinglehotspots.keys())
curatedgenes |= set(indelsinglehotspots.keys())
curatedgenes |= set(_3dhotspots.keys())


conversiondict = {'Ala': 'A',
Expand Down Expand Up @@ -356,7 +346,7 @@ def get_reference_genome_from_row(row_reference_genome, default_reference_genome
def processalterationevents(eventfile, outfile, previousoutfile, defaultCancerType, cancerTypeMap,
retainonlycuratedgenes, annotatehotspots, user_input_query_type, default_reference_genome):
if annotatehotspots:
inithotspots()
init_3d_hotspots()
if os.path.isfile(previousoutfile):
cacheannotated(previousoutfile, defaultCancerType, cancerTypeMap)
outf = open(outfile, 'w+', 1000)
Expand All @@ -371,8 +361,8 @@ def processalterationevents(eventfile, outfile, previousoutfile, defaultCancerTy
outf.write(headers['^-$'])

if annotatehotspots:
outf.write("\tis-a-hotspot")
outf.write("\tis-a-3d-hotspot")
outf.write("\tIS-A-HOTSPOT")
outf.write("\tIS-A-3D-HOTSPOT")
newncols += 2

outf.write("\t" + GENE_IN_ONCOKB_HEADER)
Expand Down Expand Up @@ -407,10 +397,10 @@ def processalterationevents(eventfile, outfile, previousoutfile, defaultCancerTy

if (query_type == QueryType.HGVSG):
process_hvsg(reader, outf, headers, [HGVSG_HEADER, ALTERATION_HEADER], ncols, newncols, defaultCancerType,
cancerTypeMap, default_reference_genome)
cancerTypeMap, annotatehotspots, default_reference_genome)

if (query_type == QueryType.GENOMIC_CHANGE):
process_genomic_change(reader, outf, headers, ncols, newncols, defaultCancerType, cancerTypeMap, default_reference_genome)
process_genomic_change(reader, outf, headers, ncols, newncols, defaultCancerType, cancerTypeMap, annotatehotspots, default_reference_genome)

outf.close()

Expand Down Expand Up @@ -489,30 +479,27 @@ def process_alteration(maffilereader, outf, maf_headers, alteration_column_names
if start is not None and end is None:
end = start

if annotatehotspots:
hotspot = pullsinglehotspots(hugo, hgvs, None, consequence, start, end, cancertype)
row.append(hotspot)

_3dhotspot = pull3dhotspots(hugo, hgvs, None, consequence, start, end, cancertype)
row.append(_3dhotspot)

if not retainonlycuratedgenes or hugo in curatedgenes:
query = ProteinChangeQuery(hugo, hgvs, cancertype, reference_genome, consequence, start, end)
queries.append(query)
rows.append(row)
else:
# Include Gene in OncoKB and Variant in OncoKB
if annotatehotspots:
default_cols = [['', '', GENE_IN_ONCOKB_DEFAULT, VARIANT_IN_ONCOKB_DEFAULT]]
else:
default_cols = [[GENE_IN_ONCOKB_DEFAULT, VARIANT_IN_ONCOKB_DEFAULT]]
append_annotation_to_file(outf, ncols + nannotationcols, [row],
[[GENE_IN_ONCOKB_DEFAULT, VARIANT_IN_ONCOKB_DEFAULT]])
default_cols)

if len(queries) == POST_QUERIES_THRESHOLD:
annotations = pull_protein_change_info(queries)
annotations = pull_protein_change_info(queries,annotatehotspots)
append_annotation_to_file(outf, ncols + nannotationcols, rows, annotations)
queries = []
rows = []

if len(queries) > 0:
annotations = pull_protein_change_info(queries)
annotations = pull_protein_change_info(queries,annotatehotspots)
append_annotation_to_file(outf, ncols + nannotationcols, rows, annotations)

# this method is from genome-nexus annotation-tools
Expand All @@ -529,7 +516,7 @@ def get_var_allele(ref_allele, tumor_seq_allele1, tumor_seq_allele2):

return tumor_seq_allele

def process_genomic_change(maffilereader, outf, maf_headers, ncols, nannotationcols, defaultCancerType, cancerTypeMap, default_reference_genome):
def process_genomic_change(maffilereader, outf, maf_headers, ncols, nannotationcols, defaultCancerType, cancerTypeMap, annotatehotspots, default_reference_genome):
ichromosome = geIndexOfHeader(maf_headers, [GC_CHROMOSOME_HEADER])
istart = geIndexOfHeader(maf_headers, [GC_START_POSITION_HEADER])
iend = geIndexOfHeader(maf_headers, [GC_END_POSITION_HEADER])
Expand Down Expand Up @@ -574,16 +561,16 @@ def process_genomic_change(maffilereader, outf, maf_headers, ncols, nannotationc
rows.append(row)

if len(queries) == POST_QUERIES_THRESHOLD:
annotations = pull_genomic_change_info(queries)
annotations = pull_genomic_change_info(queries,annotatehotspots)
append_annotation_to_file(outf, ncols+nannotationcols, rows, annotations)
queries = []
rows = []

if len(queries) > 0:
annotations = pull_genomic_change_info(queries)
annotations = pull_genomic_change_info(queries,annotatehotspots)
append_annotation_to_file(outf, ncols+nannotationcols, rows, annotations)

def process_hvsg(maffilereader, outf, maf_headers, alteration_column_names, ncols, nannotationcols, defaultCancerType, cancerTypeMap, default_reference_genome):
def process_hvsg(maffilereader, outf, maf_headers, alteration_column_names, ncols, nannotationcols, defaultCancerType, cancerTypeMap, annotatehotspots, default_reference_genome):
ihgvsg = geIndexOfHeader(maf_headers, alteration_column_names)
isample = geIndexOfHeader(maf_headers, SAMPLE_HEADERS)
icancertype = geIndexOfHeader(maf_headers, CANCER_TYPE_HEADERS)
Expand All @@ -610,21 +597,25 @@ def process_hvsg(maffilereader, outf, maf_headers, alteration_column_names, ncol
reference_genome = get_reference_genome_from_row(get_cell_content(row, ireferencegenome), default_reference_genome)

if hgvsg is None:
if annotatehotspots:
default_cols = [['', '', GENE_IN_ONCOKB_DEFAULT, VARIANT_IN_ONCOKB_DEFAULT]]
else:
default_cols = [[GENE_IN_ONCOKB_DEFAULT, VARIANT_IN_ONCOKB_DEFAULT]]
append_annotation_to_file(outf, ncols + nannotationcols, [row],
[[GENE_IN_ONCOKB_DEFAULT, VARIANT_IN_ONCOKB_DEFAULT]])
default_cols)
else:
query = HGVSgQuery(hgvsg, cancertype, reference_genome)
queries.append(query)
rows.append(row)

if len(queries) == POST_QUERIES_THRESHOLD:
annotations = pull_hgvsg_info(queries)
annotations = pull_hgvsg_info(queries, annotatehotspots)
append_annotation_to_file(outf, ncols+nannotationcols, rows, annotations)
queries = []
rows = []

if len(queries) > 0:
annotations = pull_hgvsg_info(queries)
annotations = pull_hgvsg_info(queries,annotatehotspots)
append_annotation_to_file(outf, ncols+nannotationcols, rows, annotations)


Expand Down Expand Up @@ -1175,22 +1166,7 @@ def geIndexOfHeader(headers, keywords):
return -1


def pullsinglehotspots(hugo, proteinchange, alterationtype, consequence, start, end, cancertype):
try:
if hugo in missensesinglehotspots and consequence == "missense_variant":
for i in range(int(start), int(end) + 1):
if i in missensesinglehotspots[hugo]:
return "Y"
if hugo in indelsinglehotspots and (consequence == "inframe_insertion" or consequence == "inframe_deletion"):
for i in range(int(start), int(end) + 1):
if i in indelsinglehotspots[hugo]:
return "Y"
except TypeError:
log.error("%s: %s-%s" % (hugo, str(start), str(end)))
return ""


def pull3dhotspots(hugo, proteinchange, alterationtype, consequence, start, end, cancertype):
def pull3dhotspots(hugo, consequence, start, end):
try:
if hugo in _3dhotspots and consequence == "missense_variant":
for i in range(int(start), int(end) + 1):
Expand Down Expand Up @@ -1275,7 +1251,7 @@ def __init__(self, hugoA, hugoB, structural_variant_type, cancertype):
self.tumorType = cancertype


def pull_protein_change_info(queries):
def pull_protein_change_info(queries, annotate_hotspot):
url = oncokbapiurl + '/annotate/mutations/byProteinChange'
response = makeoncokbpostrequest(url, queries)
annotation = []
Expand Down Expand Up @@ -1303,11 +1279,11 @@ def pull_protein_change_info(queries):

processed_annotation = []
for query_annotation in annotation:
processed_annotation.append(process_oncokb_annotation(query_annotation))
processed_annotation.append(process_oncokb_annotation(query_annotation, annotate_hotspot))
return processed_annotation


def pull_hgvsg_info(queries):
def pull_hgvsg_info(queries, annotate_hotspot):
url = oncokbapiurl + '/annotate/mutations/byHGVSg'
response = makeoncokbpostrequest(url, queries)
annotation = []
Expand All @@ -1328,10 +1304,10 @@ def pull_hgvsg_info(queries):

processed_annotation = []
for query_annotation in annotation:
processed_annotation.append(process_oncokb_annotation(query_annotation))
processed_annotation.append(process_oncokb_annotation(query_annotation, annotate_hotspot))
return processed_annotation

def pull_genomic_change_info(queries):
def pull_genomic_change_info(queries, annotate_hotspot):
url = oncokbapiurl + '/annotate/mutations/byGenomicChange'
response = makeoncokbpostrequest(url, queries)
annotation = []
Expand All @@ -1352,7 +1328,7 @@ def pull_genomic_change_info(queries):

processed_annotation = []
for query_annotation in annotation:
processed_annotation.append(process_oncokb_annotation(query_annotation))
processed_annotation.append(process_oncokb_annotation(query_annotation, annotate_hotspot))
return processed_annotation


Expand All @@ -1379,7 +1355,7 @@ def pull_cna_info(queries):

processed_annotation = []
for query_annotation in annotation:
processed_annotation.append(process_oncokb_annotation(query_annotation))
processed_annotation.append(process_oncokb_annotation(query_annotation, annotate_hotspot=False))
return processed_annotation


Expand Down Expand Up @@ -1410,12 +1386,12 @@ def pull_structural_variant_info(queries):

processed_annotation = []
for query_annotation in annotation:
processed_annotation.append(process_oncokb_annotation(query_annotation))
processed_annotation.append(process_oncokb_annotation(query_annotation, annotate_hotspot=False))
return processed_annotation



def process_oncokb_annotation(annotation):
def process_oncokb_annotation(annotation, annotate_hotspot):
if annotation is None:
return None

Expand Down Expand Up @@ -1476,6 +1452,15 @@ def process_oncokb_annotation(annotation):


ret = []
if annotate_hotspot:
if annotation['hotspot']:
ret.append('Y')
else:
ret.append('')

_3dhotspot = pull3dhotspots(annotation['query']['hugoSymbol'], annotation['query']['consequence'], annotation['query']['proteinStart'], annotation['query']['proteinEnd'])
ret.append(_3dhotspot)

ret.append(oncokbdata[GENE_IN_ONCOKB_HEADER])
ret.append(oncokbdata[VARIANT_IN_ONCOKB_HEADER])
ret.append(oncokbdata['mutation_effect'])
Expand Down