Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix triple clinvar filter #5006

Open
wants to merge 20 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ About changelog [here](https://keepachangelog.com/en/1.0.0/)
- Empty custom_images dicts in case load config do not crash
- Tracks missing alignment files are now properly skipped on generating IGV views
- ClinVar form to accept MedGen phenotypes
- Filtering variants by ClinVar significance, CLINSIG Confident and ClinVar hits at the same time

## [4.90.1]
### Fixed
Expand Down
59 changes: 23 additions & 36 deletions scout/adapter/mongo/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,11 +329,18 @@ def build_query(
secondary_terms = True

if primary_terms is True:
clinsign_filter = self.clinsig_query(query, mongo_query)
clinsign_filter = self.clinsig_query(query)

# Secondary, excluding filter criteria will hide variants in general,
# but can be overridden by an including, major filter criteria
# such as a Pathogenic ClinSig.

"""
if criterion == "clinvar_tag":
mongo_secondary_query.append({"clnsig": {"$exists": True}})
mongo_secondary_query.append({"clnsig": {"$ne": None}})
"""

if secondary_terms is True:
secondary_filter = self.secondary_query(query, mongo_query)
# If there are no primary criteria given, all secondary criteria are added as a
Expand Down Expand Up @@ -404,60 +411,44 @@ def affected_inds_query(self, mongo_query, case_id, gt_query):
]: # Consider situation where all individuals are unaffected
mongo_query["samples"] = affected_query

def clinsig_query(self, query, mongo_query):
"""Add clinsig filter values to the mongo query object

Args:
query(dict): a dictionary of query filters specified by the users
mongo_query(dict): the query that is going to be submitted to the database

Returns:
clinsig_query(dict): a dictionary with clinsig key-values

"""
def clinsig_query(self, query: dict):
"""Add clinsig filter values to the mongo query object"""
LOG.debug("clinsig is a query parameter")
trusted_revision_level = TRUSTED_REVSTAT_LEVEL
rank = []
str_rank = []
clnsig_query = {}

for item in query["clinsig"]:
rank.append(int(item))
# search for human readable clinsig values in newer cases
rank.append(CLINSIG_MAP[int(item)])
str_rank.append(CLINSIG_MAP[int(item)])

if query.get("clinsig_confident_always_returned") is True:
LOG.debug("add CLINSIG filter with trusted_revision_level")
elem_match_or = {
"$or": [
{"value": {"$in": rank}},
{"value": re.compile("|".join(str_rank))},
]
}

if query.get("clinsig_confident_always_returned") is True:
clnsig_query = {
"clnsig": {
"$elemMatch": {
"$and": [
{
"$or": [
{"value": {"$in": rank}},
{"value": re.compile("|".join(str_rank))},
]
},
elem_match_or,
{"revstat": re.compile("|".join(trusted_revision_level))},
]
}
}
}
else:
LOG.debug("add CLINSIG filter for rank: %s" % ", ".join(str(query["clinsig"])))
clnsig_query = {"clnsig": {"$elemMatch": elem_match_or}}

if query.get("clinvar_tag"):
Copy link
Member Author

@northwestwitch northwestwitch Nov 6, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the fix: this filter is removed from the secondary query and set together with the other 2 clinvar-based filters, in the function dedicated to building the clinvar filters

clnsig_query["clnsig"]["$exists"] = True
clnsig_query["clnsig"]["$ne"] = None

clnsig_query = {
"clnsig": {
"$elemMatch": {
"$or": [
{"value": {"$in": rank}},
{"value": re.compile("|".join(str_rank))},
]
}
}
}
return clnsig_query

def coordinate_filter(self, query, mongo_query):
Expand Down Expand Up @@ -777,10 +768,6 @@ def secondary_query(self, query, mongo_query, secondary_filter=None):
if criterion == "mvl_tag":
mongo_secondary_query.append({"mvl_tag": {"$exists": True}})

if criterion == "clinvar_tag":
mongo_secondary_query.append({"clnsig": {"$exists": True}})
mongo_secondary_query.append({"clnsig": {"$ne": None}})

if criterion == "cosmic_tag":
mongo_secondary_query.append({"cosmic_ids": {"$exists": True}})
mongo_secondary_query.append({"cosmic_ids": {"$ne": None}})
Expand Down
1 change: 0 additions & 1 deletion scout/constants/query_terms.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@
"somatic_score",
"control_frequency",
"mvl_tag",
"clinvar_tag",
"cosmic_tag",
"tumor_frequency",
"fusion_score",
Expand Down
Loading