diff --git a/CHANGELOG.md b/CHANGELOG.md index c11e4cc48..d9729362a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,8 @@ * Changed default DP threshold to 5 for hemi genotype calls in `annotate_adj` and `get_adj_expr` [(#252)](https://github.com/broadinstitute/gnomad_methods/pull/252) * Updated coverage resources to version 3.0.1 [[#242]] (https://github.com/broadinstitute/gnomad_methods/pull/242) * Fixed handling of missing entries (not within a ref block / alt site) when computing `coverage_stats` in `sparse_mt.py` [[#242]] (https://github.com/broadinstitute/gnomad_methods/pull/242) +* Fix for error in `compute_stratified_sample_qc` where `gt_expr` caused error [(#259)](https://github.com/broadinstitute/gnomad_methods/pull/259) +* Add reference genome to call of `has_liftover` in `get_liftover_genome` [(#259)](https://github.com/broadinstitute/gnomad_methods/pull/259) ## Version 0.4.0 - July 9th, 2020 diff --git a/gnomad/sample_qc/filtering.py b/gnomad/sample_qc/filtering.py index 43f5b0498..17af60a6f 100644 --- a/gnomad/sample_qc/filtering.py +++ b/gnomad/sample_qc/filtering.py @@ -185,22 +185,22 @@ def compute_stratified_sample_qc( mt: hl.MatrixTable, strata: Dict[str, hl.expr.BooleanExpression], tmp_ht_prefix: Optional[str], - gt_expr: Optional[hl.expr.CallExpression], + gt_col: Optional[str] = None, ) -> hl.Table: """ Runs hl.sample_qc on different strata and then also merge the results into a single expression. - Note that strata should be non-overlapping, e.g. SNV vs indels or bi-allelic vs multi-allelic + Note that strata should be non-overlapping, e.g. SNV vs indels or bi-allelic vs multi-allelic :param mt: Input MT :param strata: Strata names and filtering expressions :param tmp_ht_prefix: Optional path prefix to write the intermediate strata results to (recommended for larger datasets) - :param gt_expr: Optional entry field storing the genotype (if not specified, then it is assumed that it is stored in mt.GT) + :param gt_col: Name of entry field storing the genotype. Default: 'GT' :return: Sample QC table, including strat-specific numbers """ mt = mt.select_rows(**strata) - if gt_expr is not None: - mt = mt.select_entries(GT=gt_expr) + if gt_col is not None: + mt = mt.select_entries(GT=mt[gt_col]) else: mt = mt.select_entries("GT") @@ -258,6 +258,7 @@ def merge_sample_qc_expr( additive_metrics = [ "n_called", "n_not_called", + "n_filtered", "n_hom_ref", "n_het", "n_hom_var", @@ -314,6 +315,7 @@ def merge_sample_qc_expr( ] ).drop("n") for metric in stats_metrics + if metric in sample_qc_fields } ) diff --git a/gnomad/utils/liftover.py b/gnomad/utils/liftover.py index 43826490a..97110053c 100644 --- a/gnomad/utils/liftover.py +++ b/gnomad/utils/liftover.py @@ -52,7 +52,7 @@ def get_liftover_genome( chain = GRCH37_to_GRCH38_CHAIN logger.info("Adding liftover chain to input build...") - if source.has_liftover(): + if source.has_liftover(target): logger.warning( f"Source reference build {source.name} already has a chain file: {source._liftovers}!\ Using whichever chain has already been added."