Closes #3278: Add ziggurat method to standard normal (#3596)

* Closes #3278: Add ziggurat method to standard normal This PR (closes #3278) adds `ziggurat` method for calculating standard normal based on numpys implemenation. I also realized that even though the zig method for both `standard_normal` and `standard_exponential` can't handle multi dimensional arrays yet, their other methods can. I created two versions of each proc with `where array_nd > 1` and `where array_nd == 1`, so now we can generate multi-dim exponentially and normally arrays if we use the compatible method. I added parameterization for the methods in the tests * double defined idx --------- Co-authored-by: Tess Hayes <stress-tess@users.noreply.github.com>
Bears-R-Us · Aug 1, 2024 · ed20f95 · ed20f95
1 parent 07fc7b5
commit ed20f95
Show file tree

Hide file tree

Showing 4 changed files with 254 additions and 57 deletions.
diff --git a/PROTO_tests/tests/random_test.py b/PROTO_tests/tests/random_test.py
@@ -268,15 +268,16 @@ def test_choice_hypothesis_testing(self):
         # if pval <= 0.05, the difference from the expected distribution is significant
         assert pval > 0.05
 
-    def test_normal_hypothesis_testing(self):
+    @pytest.mark.parametrize("method", ["zig", "box"])
+    def test_normal_hypothesis_testing(self, method):
         # I tested this many times without a set seed, but with no seed
         # it's expected to fail one out of every ~20 runs given a pval limit of 0.05.
-        rng = ak.random.default_rng(43)
+        rng = ak.random.default_rng(73)
         num_samples = 10**4
 
         mean = rng.uniform(-10, 10)
         deviation = rng.uniform(0, 10)
-        sample = rng.normal(loc=mean, scale=deviation, size=num_samples)
+        sample = rng.normal(loc=mean, scale=deviation, size=num_samples, method=method)
         sample_list = sample.to_list()
 
         # first test if samples are normal at all
@@ -330,24 +331,24 @@ def test_poisson_hypothesis_testing(self):
         _, pval = sp_stats.chisquare(f_obs=obs_counts, f_exp=exp_counts)
         assert pval > 0.05
 
-    def test_exponential_hypothesis_testing(self):
+    @pytest.mark.parametrize("method", ["zig", "inv"])
+    def test_exponential_hypothesis_testing(self, method):
         # I tested this many times without a set seed, but with no seed
         # it's expected to fail one out of every ~20 runs given a pval limit of 0.05.
         rng = ak.random.default_rng(43)
         num_samples = 10**4
 
         scale = rng.uniform(0, 10)
-        for method in "zig", "inv":
-            sample = rng.exponential(scale=scale, size=num_samples, method=method)
-            sample_list = sample.to_list()
-
-            # do the Kolmogorov-Smirnov test for goodness of fit
-            ks_res = sp_stats.kstest(
-                rvs=sample_list,
-                cdf=sp_stats.expon.cdf,
-                args=(0, scale),
-            )
-            assert ks_res.pvalue > 0.05
+        sample = rng.exponential(scale=scale, size=num_samples, method=method)
+        sample_list = sample.to_list()
+
+        # do the Kolmogorov-Smirnov test for goodness of fit
+        ks_res = sp_stats.kstest(
+            rvs=sample_list,
+            cdf=sp_stats.expon.cdf,
+            args=(0, scale),
+        )
+        assert ks_res.pvalue > 0.05
 
     def test_legacy_randint(self):
         testArray = ak.random.randint(0, 10, 5)

diff --git a/arkouda/random/_generator.py b/arkouda/random/_generator.py
@@ -196,21 +196,27 @@ def standard_exponential(self, size=None, method="zig"):
         pdarray
             Drawn samples from the standard exponential distribution.
         """
+        from arkouda.util import _calc_shape
+
         if size is None:
             # delegate to numpy when return size is 1
             return self._np_generator.standard_exponential(method=method)
 
+        shape, full_size, ndim = _calc_shape(size)
+        if full_size < 0:
+            raise ValueError("The size parameter must be > 0")
+
         rep_msg = generic_msg(
-            cmd="standardExponential",
+            cmd=f"standardExponential<{ndim}>",
             args={
                 "name": self._name_dict[akdtype("float64")],
-                "size": size,
+                "size": shape,
                 "method": method.upper(),
                 "has_seed": self._seed is not None,
                 "state": self._state,
             },
         )
-        self._state += size if method.upper() == "INV" else 1
+        self._state += full_size if method.upper() == "INV" else 1
         return create_pdarray(rep_msg)
 
     def integers(self, low, high=None, size=None, dtype=akint64, endpoint=False):
@@ -290,7 +296,7 @@ def integers(self, low, high=None, size=None, dtype=akint64, endpoint=False):
         self._state += full_size
         return create_pdarray(rep_msg)
 
-    def normal(self, loc=0.0, scale=1.0, size=None):
+    def normal(self, loc=0.0, scale=1.0, size=None, method="zig"):
         r"""
         Draw samples from a normal (Gaussian) distribution
 
@@ -305,6 +311,10 @@ def normal(self, loc=0.0, scale=1.0, size=None):
         size: numeric_scalars, optional
             Output shape. Default is None, in which case a single value is returned.
 
+        method : str, optional
+            Either 'box' or 'zig'. 'box' uses the Box–Muller transform
+            'zig' uses the Ziggurat method.
+
         Notes
         -----
         The probability density for the Gaussian distribution is:
@@ -337,7 +347,7 @@ def normal(self, loc=0.0, scale=1.0, size=None):
         if (scale < 0).any() if isinstance(scale, pdarray) else scale < 0:
             raise TypeError("scale must be non-negative.")
 
-        return loc + scale * self.standard_normal(size=size)
+        return loc + scale * self.standard_normal(size=size, method=method)
 
     def random(self, size=None):
         """
@@ -378,7 +388,7 @@ def random(self, size=None):
             return self._np_generator.random()
         return self.uniform(size=size)
 
-    def standard_normal(self, size=None):
+    def standard_normal(self, size=None, method="zig"):
         r"""
         Draw samples from a standard Normal distribution (mean=0, stdev=1).
 
@@ -387,6 +397,10 @@ def standard_normal(self, size=None):
         size: numeric_scalars, optional
             Output shape. Default is None, in which case a single value is returned.
 
+        method : str, optional
+            Either 'box' or 'zig'. 'box' uses the Box–Muller transform
+            'zig' uses the Ziggurat method.
+
         Returns
         -------
         pdarray
@@ -426,6 +440,8 @@ def standard_normal(self, size=None):
             args={
                 "name": self._name_dict[akdtype("float64")],
                 "shape": shape,
+                "method": method.upper(),
+                "has_seed": self._seed is not None,
                 "state": self._state,
             },
         )