Skip to content

Commit

Permalink
Rebase and update
Browse files Browse the repository at this point in the history
  • Loading branch information
tomwhite committed Aug 31, 2020
1 parent 3524b8f commit cc4f4cd
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 5 deletions.
2 changes: 1 addition & 1 deletion docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@ Methods
gwas_linear_regression
hardy_weinberg_test
regenie
variant_stats

Utilities
=========

.. autosummary::
:toctree: generated/
Expand Down
3 changes: 2 additions & 1 deletion sgkit/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
)
from .display import display_genotypes
from .io.vcfzarr_reader import read_vcfzarr
from .stats.aggregation import count_call_alleles, count_variant_alleles
from .stats.aggregation import count_call_alleles, count_variant_alleles, variant_stats
from .stats.association import gwas_linear_regression
from .stats.hwe import hardy_weinberg_test
from .stats.regenie import regenie
Expand All @@ -27,4 +27,5 @@
"read_vcfzarr",
"regenie",
"hardy_weinberg_test",
"variant_stats",
]
7 changes: 4 additions & 3 deletions sgkit/stats/aggregation.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,10 +175,11 @@ def call_rate(ds: Dataset, dim: Dimension) -> Dataset:
def genotype_count(ds: Dataset, dim: Dimension) -> Dataset:
odim = _swap(dim)[:-1]
M, G = ds["call_genotype_mask"].any(dim="ploidy"), ds["call_genotype"]
n_het = (G > 0).any(dim="ploidy") & (G == 0).any(dim="ploidy")
n_hom_ref = (G == 0).all(dim="ploidy")
n_hom_alt = (G > 0).all(dim="ploidy")
n_hom_alt = ((G > 0) & (G[..., 0] == G)).all(dim="ploidy")
n_non_ref = (G > 0).any(dim="ploidy")
n_het = ~(n_hom_alt | n_hom_ref)
# This would 0 out the `het` case with any missing calls
agg = lambda x: xr.where(M, False, x).sum(dim=dim) # type: ignore[no-untyped-call]
return xr.Dataset(
{
Expand All @@ -191,7 +192,7 @@ def genotype_count(ds: Dataset, dim: Dimension) -> Dataset:


def allele_frequency(ds: Dataset) -> Dataset:
AC = count_alleles(ds)
AC = count_variant_alleles(ds)

M = ds["call_genotype_mask"].stack(calls=("samples", "ploidy"))
AN = (~M).sum(dim="calls") # type: ignore
Expand Down

0 comments on commit cc4f4cd

Please sign in to comment.