Rebase and update

sgkit-dev · Aug 31, 2020 · cc4f4cd · cc4f4cd
1 parent 3524b8f
commit cc4f4cd
Show file tree

Hide file tree

Showing 3 changed files with 7 additions and 5 deletions.
diff --git a/docs/api.rst b/docs/api.rst
@@ -27,9 +27,9 @@ Methods
    gwas_linear_regression
    hardy_weinberg_test
    regenie
+   variant_stats
 
 Utilities
-=========
 
 .. autosummary::
    :toctree: generated/

diff --git a/sgkit/__init__.py b/sgkit/__init__.py
@@ -8,7 +8,7 @@
 )
 from .display import display_genotypes
 from .io.vcfzarr_reader import read_vcfzarr
-from .stats.aggregation import count_call_alleles, count_variant_alleles
+from .stats.aggregation import count_call_alleles, count_variant_alleles, variant_stats
 from .stats.association import gwas_linear_regression
 from .stats.hwe import hardy_weinberg_test
 from .stats.regenie import regenie
@@ -27,4 +27,5 @@
     "read_vcfzarr",
     "regenie",
     "hardy_weinberg_test",
+    "variant_stats",
 ]
diff --git a/sgkit/stats/aggregation.py b/sgkit/stats/aggregation.py
@@ -175,10 +175,11 @@ def call_rate(ds: Dataset, dim: Dimension) -> Dataset:
 def genotype_count(ds: Dataset, dim: Dimension) -> Dataset:
     odim = _swap(dim)[:-1]
     M, G = ds["call_genotype_mask"].any(dim="ploidy"), ds["call_genotype"]
-    n_het = (G > 0).any(dim="ploidy") & (G == 0).any(dim="ploidy")
     n_hom_ref = (G == 0).all(dim="ploidy")
-    n_hom_alt = (G > 0).all(dim="ploidy")
+    n_hom_alt = ((G > 0) & (G[..., 0] == G)).all(dim="ploidy")
     n_non_ref = (G > 0).any(dim="ploidy")
+    n_het = ~(n_hom_alt | n_hom_ref)
+    # This would 0 out the `het` case with any missing calls
     agg = lambda x: xr.where(M, False, x).sum(dim=dim)  # type: ignore[no-untyped-call]
     return xr.Dataset(
         {
@@ -191,7 +192,7 @@ def genotype_count(ds: Dataset, dim: Dimension) -> Dataset:
 
 
 def allele_frequency(ds: Dataset) -> Dataset:
-    AC = count_alleles(ds)
+    AC = count_variant_alleles(ds)
 
     M = ds["call_genotype_mask"].stack(calls=("samples", "ploidy"))
     AN = (~M).sum(dim="calls")  # type: ignore