Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix #4442 - do not bring in ORPHA inheritance model to OMIM specific … #4443

Merged
merged 18 commits into from
Feb 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ About changelog [here](https://keepachangelog.com/en/1.0.0/)
### Fixed
- Removed log info showing hgnc IDs used in variantS search
- Maintain Matchmaker Exchange and Beacon submission status when a case is re-uploaded
- Inheritance mode from ORPHA should not be confounded with the OMIM inheritance model

### Added
- New SO terms: `sequence_variant` and `coding_transcript_variant`
Expand Down
6 changes: 3 additions & 3 deletions scout/server/blueprints/variant/controllers.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ def variant(
# Update some case panels info from db and populate it on variant to avoid showing removed panels
update_case_panels(store, case_obj)
# The hierarchical call order is relevant: cases are used to populate variants
update_variant_case_panels(store, case_obj, variant_obj)
update_variant_case_panels(case_obj, variant_obj)

associate_variant_genes_with_case_panels(store, variant_obj)

Expand Down Expand Up @@ -310,8 +310,8 @@ def variant(

if variant_obj.get("genetic_models"):
variant_models = set(model.split("_", 1)[0] for model in variant_obj["genetic_models"])
all_models = variant_obj.get("all_models", set())
variant_obj["is_matching_inheritance"] = set.intersection(variant_models, all_models)
omim_models = variant_obj.get("omim_models", set())
variant_obj["is_matching_inheritance"] = set.intersection(variant_models, omim_models)
dnil marked this conversation as resolved.
Show resolved Hide resolved

# Prepare classification information for visualisation
classification = variant_obj.get("acmg_classification")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
{% if gene.common and gene.disease_terms %}
{% for disease_term in gene.disease_terms %}
{% if disease_term.source == 'ORPHA' %}
<tr>
<tr data-bs-toggle="tooltip" title="Some ORPHA disorders are phenotypic umbrella terms for multiple genetic entities. The inheritance models are in this case a set derived from all those entities, not necessarily the inheritance mode known for this gene.">
<td>
<a href="http://omim.org/entry/{{ gene.common.omim_id }}" rel="noopener" target="_blank">
{{ gene.common.hgnc_symbol }}
Expand Down
147 changes: 76 additions & 71 deletions scout/server/blueprints/variant/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
LOG = logging.getLogger(__name__)


def add_panel_specific_gene_info(panel_info):
def add_panel_specific_gene_info(panel_info: List[dict]) -> dict:
"""Adds manually curated information from a gene panel to a gene

The panel info is a list of dictionaries since there can be multiple infos about a panel.
Expand Down Expand Up @@ -61,13 +61,9 @@ def add_panel_specific_gene_info(panel_info):
return panel_specific


def update_representative_gene(variant_obj, variant_genes):
def update_representative_gene(variant_obj: dict, variant_genes: List[dict]):
"""Set the gene with most severe consequence as being representative
Used for display purposes

Args:
variant_obj(Variant): a variant object
variant_genes(list(Genes): a list of genes
"""

if variant_genes:
Expand All @@ -93,7 +89,9 @@ def update_representative_gene(variant_obj, variant_genes):
variant_obj["first_rep_gene"] = None


def update_transcripts_information(variant_gene, hgnc_gene, variant_obj, genome_build=None):
def update_transcripts_information(
variant_gene: dict, hgnc_gene: dict, variant_obj: dict, genome_build: Optional[str] = None
):
"""Collect tx info from the hgnc gene and panels and update variant transcripts

Since the hgnc information are continuously being updated we need to run this each time a
Expand All @@ -102,14 +100,7 @@ def update_transcripts_information(variant_gene, hgnc_gene, variant_obj, genome_
This function will:
- Add a dictionary with tx_id -> tx_info to the hgnc variant
- Add information from the panel
- Adds a list of refseq transcripts

Args:
variant_gene(dict): the gene information from the variant
hgnc_gene(dict): the hgnc gene information
varaiant_obj(scout.models.Variant)
genome_build(str): genome build

- Adds a list of RefSeq transcripts
"""
genome_build = genome_build or "37"
disease_associated_no_version = variant_gene.get("disease_associated_no_version", set())
Expand Down Expand Up @@ -152,23 +143,18 @@ def update_transcripts_information(variant_gene, hgnc_gene, variant_obj, genome_
if refseq_id in disease_associated_no_version:
transcript["is_disease_associated"] = True

# Since a ensemble transcript can have multiple refseq identifiers we add all of
# Since an Ensembl transcript can have multiple RefSeq identifiers we add all of
dnil marked this conversation as resolved.
Show resolved Hide resolved
# those
transcript["refseq_identifiers"] = hgnc_transcript.get("refseq_identifiers", [])
transcript["change_str"] = transcript_str(transcript, hgnc_symbol)


def update_variant_case_panels(store, case_obj, variant_obj):
def update_variant_case_panels(case_obj: dict, variant_obj: dict):
dnil marked this conversation as resolved.
Show resolved Hide resolved
"""Populate variant with case gene panels with info on e.g. if a panel was removed on variant_obj.
Variant objects panels are only a list of matching panel names.

The case_obj should be up to date first. Call update_case_panels() as needed in context:
The case_obj should be up-to-date first. Call update_case_panels() as needed in context:
to save some resources we do not call it here for each variant.

Args:
store(adapter.MongoAdapter)
case_obj(dict): case_obj with updated panels - update_case_panels() first
variant_obj(dict)
"""

variant_panel_names = variant_obj.get("panels") or []
Expand All @@ -181,55 +167,62 @@ def update_variant_case_panels(store, case_obj, variant_obj):
variant_obj["case_panels"] = case_panel_objs


def add_gene_info(store, variant_obj, gene_panels=None, genome_build=None):
def get_extra_info(gene_panels: list) -> Dict[int, dict]:
"""Parse out extra information from gene panels."""
extra_info = {}

for panel_obj in gene_panels:
for gene_info in panel_obj["genes"]:
hgnc_id = gene_info["hgnc_id"]
if hgnc_id not in extra_info:
extra_info[hgnc_id] = []

extra_info[hgnc_id].append(gene_info)
return extra_info


def seed_genes_with_only_hgnc_id(variant_obj: dict):
dnil marked this conversation as resolved.
Show resolved Hide resolved
"""Seed genes structure for (STR) variants that have only hgnc_ids."""
if not variant_obj.get("genes") and variant_obj.get("hgnc_ids"):
variant_obj["genes"] = []
for hgnc_id in variant_obj.get("hgnc_ids"):
variant_gene = {"hgnc_id": hgnc_id}
variant_obj["genes"].append(variant_gene)


def add_gene_info(
store: MongoAdapter,
variant_obj: dict,
gene_panels: Optional[List[dict]] = None,
genome_build: Optional[str] = None,
):
"""Adds information to variant genes from hgnc genes and selected gene panels.

Variants are annotated with gene and transcript information from VEP. In Scout the database
keeps updated and extended information about genes and transcript. This function will compliment
the VEP information with the updated database information.
Also there is sometimes additional information that are manually curated in the gene panels.
Only the selected panels passed to this function (typically default) are used.
keeps updated and extended information about genes and transcript. This function will complement
the VEP information with the updated database information.
Also there is sometimes additional information that is manually curated in the gene panels.
Only the selected panels passed to this function (typically the case default panels) are used.
This information needs to be added to the variant before sending it to the template.

This function will loop over all genes and add that extra information.

Args:
store(scout.adapter.MongoAdapter)
variant_obj(dict): A variant from the database
gene_panels(list(dict)): List of panels from database
genome_build(str)

Returns:
variant_obj
"""
gene_panels = gene_panels or []
genome_build = genome_build or "37"

# Add a variable that checks if there are any refseq transcripts
institute = store.institute(variant_obj["institute"])
dnil marked this conversation as resolved.
Show resolved Hide resolved

# extra_info will hold information from gene panels
extra_info = {}
for panel_obj in gene_panels:
for gene_info in panel_obj["genes"]:
hgnc_id = gene_info["hgnc_id"]
if hgnc_id not in extra_info:
extra_info[hgnc_id] = []
extra_info = get_extra_info(gene_panels)

extra_info[hgnc_id].append(gene_info)
seed_genes_with_only_hgnc_id(variant_obj)

# Loop over the genes in the variant object to add information
# from hgnc_genes and panel genes to the variant object
# Add a variable that checks if there are any refseq transcripts
variant_obj["has_refseq"] = False
variant_obj["disease_associated_transcripts"] = []
all_models = set()

# seed genes structure for (STR) variants that have only hgnc_ids
if not variant_obj.get("genes") and variant_obj.get("hgnc_ids"):
variant_obj["genes"] = []
for hgnc_id in variant_obj.get("hgnc_ids"):
variant_gene = {"hgnc_id": hgnc_id}
variant_obj["genes"].append(variant_gene)

if variant_obj.get("genes"):
for variant_gene in variant_obj["genes"]:
hgnc_id = variant_gene["hgnc_id"]
Expand All @@ -251,8 +244,6 @@ def add_gene_info(store, variant_obj, gene_panels=None, genome_build=None):
update_transcripts_information(variant_gene, hgnc_gene, variant_obj, genome_build)

variant_gene["common"] = hgnc_gene

institute = store.institute(variant_obj["institute"])
dnil marked this conversation as resolved.
Show resolved Hide resolved
add_gene_links(variant_gene, genome_build, institute=institute)

# Add disease associated transcripts from panel to variant
Expand All @@ -261,19 +252,39 @@ def add_gene_info(store, variant_obj, gene_panels=None, genome_build=None):
variant_obj["disease_associated_transcripts"].append(transcript_str)

# Add the associated disease terms
disease_terms = store.disease_terms_by_gene(hgnc_id, filter_project={"inheritance": 1})
disease_terms = store.disease_terms_by_gene(
hgnc_id, filter_project={"inheritance": 1, "source": 1}
)

all_models = all_models.union(set(variant_gene["manual_inheritance"]))
omim_models = set()
for disease_term in disease_terms:
omim_models.update(disease_term.get("inheritance", []))
variant_gene["omim_inheritance"] = list(omim_models)
all_models.update(set(variant_gene["manual_inheritance"]))

all_models = all_models.union(omim_models)
update_inheritance_model(variant_gene, all_models, disease_terms)

variant_obj["all_models"] = all_models


def update_inheritance_model(variant_gene: dict, all_models: set, disease_terms: list):
dnil marked this conversation as resolved.
Show resolved Hide resolved
"""Update OMIM inheritance model for variant gene - and update the all models
variable to contain all inheritance models suggested for the gene/disorder.

ORPHA disorders can be more of the umbrella kind, where many genes and inheritance
models are implied. Those models are still added to all_models for the variant, but not to the list
of OMIM inheritance models for the particular gene.
"""
inheritance_models = set()
omim_inheritance_models = set()

for disease_term in disease_terms:
inheritance_models.update(disease_term.get("inheritance", []))

if disease_term.get("source") == "OMIM":
omim_inheritance_models.update(inheritance_models)

variant_gene["omim_inheritance"] = list(omim_inheritance_models)

all_models.update(inheritance_models)


def predictions(genes):
"""Adds information from variant specific genes to display.

Expand Down Expand Up @@ -542,18 +553,12 @@ def end_position(variant_obj):
return variant_obj["position"] + (num_bases - 1)


def default_panels(store, case_obj):
def default_panels(store: MongoAdapter, case_obj: dict) -> List[dict]:
"""Return the panels that are decided to be default for a case.

Check what panels that are default, fetch those and add them to a list.

Args:
store(scout.adapter.MongoAdapter)
case_obj(scout.models.Case)

Returns:
default_panels(list(dict))
Check what panels that are default, fetch those and return them in a list.

case_obj is a dict after scout.models.Case.
"""
default_panels = []
# Add default panel information to variant
Expand Down
3 changes: 1 addition & 2 deletions scout/server/blueprints/variants/controllers.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import decimal
import logging
import re
from datetime import date
from typing import Any, Dict

import bson
Expand Down Expand Up @@ -149,7 +148,7 @@ def variants(
if case_obj.get("group"):
variant_obj["group_assessments"] = _get_group_assessments(store, case_obj, variant_obj)

update_variant_case_panels(store, case_obj, variant_obj)
update_variant_case_panels(case_obj, variant_obj)

variants.append(
parse_variant(
Expand Down
Loading