Skip to content

Commit

Permalink
Merge pull request #634 from broadinstitute/jg/add_mane_select_to_sum…
Browse files Browse the repository at this point in the history
…mary_stats

Add Mane select filtering option to `get_summary_counts`
  • Loading branch information
jkgoodrich authored Oct 23, 2023
2 parents eeb95fc + ad310e2 commit f6b9b1e
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 5 deletions.
25 changes: 20 additions & 5 deletions gnomad/assessment/summary_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
LOF_CSQ_SET,
add_most_severe_consequence_to_consequence,
filter_vep_to_canonical_transcripts,
filter_vep_to_mane_select_transcripts,
get_most_severe_consequence_for_summary,
process_consequences,
)
Expand Down Expand Up @@ -167,6 +168,8 @@ def get_summary_counts(
freq_field: str = "freq",
filter_field: str = "filters",
filter_decoy: bool = False,
canonical_only: bool = True,
mane_select_only: bool = False,
index: int = 0,
) -> hl.Table:
"""
Expand All @@ -186,8 +189,9 @@ def get_summary_counts(
Before calculating summary counts, function:
- Filters out low confidence regions
- Filters to canonical transcripts
- Uses the most severe consequence
- Filters to canonical transcripts (if `canonical_only` is True) or MANE Select
transcripts (if `mane_select_only` is True)
Assumes that:
- Input HT is annotated with VEP.
Expand All @@ -198,10 +202,17 @@ def get_summary_counts(
:param ht: Input Table.
:param freq_field: Name of field in HT containing frequency annotation (array of structs). Default is "freq".
:param filter_field: Name of field in HT containing variant filter information. Default is "filters".
:param canonical_only: Whether to filter to canonical transcripts. Default is True.
:param mane_select_only: Whether to filter to MANE Select transcripts. Default is False.
:param filter_decoy: Whether to filter decoy regions. Default is False.
:param index: Which index of freq_expr to use for annotation. Default is 0.
:return: Table grouped by frequency bin and aggregated across summary count categories.
"""
if canonical_only and mane_select_only:
raise ValueError(
"Only one of `canonical_only` and `mane_select_only` can be True."
)

logger.info("Checking if multi-allelic variants have been split...")
max_alleles = ht.aggregate(hl.agg.max(hl.len(ht.alleles)))
if max_alleles > 2:
Expand All @@ -212,10 +223,14 @@ def get_summary_counts(
ht = ht.filter((hl.len(ht[filter_field]) == 0))
ht = filter_low_conf_regions(ht, filter_decoy=filter_decoy)

logger.info(
"Filtering to canonical transcripts and getting VEP summary annotations..."
)
ht = filter_vep_to_canonical_transcripts(ht)
if canonical_only:
logger.info("Filtering to canonical transcripts...")
ht = filter_vep_to_canonical_transcripts(ht)
elif mane_select_only:
logger.info("Filtering to mane select transcripts...")
ht = filter_vep_to_mane_select_transcripts(ht)

logger.info("Getting VEP summary annotations...")
ht = get_most_severe_consequence_for_summary(ht)

logger.info("Annotating with frequency bin information...")
Expand Down
23 changes: 23 additions & 0 deletions gnomad/utils/vep.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,29 @@ def filter_vep_to_canonical_transcripts(
)


def filter_vep_to_mane_select_transcripts(
mt: Union[hl.MatrixTable, hl.Table],
vep_root: str = "vep",
filter_empty_csq: bool = False,
) -> Union[hl.MatrixTable, hl.Table]:
"""
Filter VEP transcript consequences to those in the MANE Select transcript.
:param mt: Input Table or MatrixTable.
:param vep_root: Name used for VEP annotation. Default is 'vep'.
:param filter_empty_csq: Whether to filter out rows where 'transcript_consequences' is empty. Default is False.
:return: Table or MatrixTable with VEP transcript consequences filtered.
"""
return filter_vep_transcript_csqs(
mt,
vep_root,
synonymous=False,
canonical=False,
mane_select=True,
filter_empty_csq=filter_empty_csq,
)


def filter_vep_to_synonymous_variants(
mt: Union[hl.MatrixTable, hl.Table],
vep_root: str = "vep",
Expand Down

0 comments on commit f6b9b1e

Please sign in to comment.