diff --git a/PROTO_tests/tests/random_test.py b/PROTO_tests/tests/random_test.py index 0cb1245eb2..3eb8ff291a 100644 --- a/PROTO_tests/tests/random_test.py +++ b/PROTO_tests/tests/random_test.py @@ -227,6 +227,20 @@ def test_poissson(self): assert rng.poisson(lam=scal_lam, size=num_samples).to_list() == scal_sample assert rng.poisson(lam=arr_lam, size=num_samples).to_list() == arr_sample + def test_poisson_seed_reproducibility(self): + # test resolution of issue #3322, same seed gives same result across machines / num locales + seed = 11 + # test with many orders of magnitude to ensure we test different remainders and case where + # all elements are pulled to first locale i.e. total_size < (minimum elemsPerStream = 256) + saved_seeded_file_patterns = ["second_order*", "third_order*", "fourth_order*"] + + # directory of this file + file_dir = os.path.dirname(os.path.realpath(__file__)) + for i, f_name in zip(range(2, 5), saved_seeded_file_patterns): + generated = ak.random.default_rng(seed=seed).poisson(size=10**i) + saved = ak.read_parquet(f"{file_dir}/saved_seeded_random/{f_name}")["array"] + assert (generated == saved).all() + def test_exponential(self): rng = ak.random.default_rng(17) num_samples = 5 @@ -268,6 +282,25 @@ def test_choice_hypothesis_testing(self): # if pval <= 0.05, the difference from the expected distribution is significant assert pval > 0.05 + @pytest.mark.parametrize("method", ["zig", "box"]) + def test_exponential_hypothesis_testing(self, method): + # I tested this many times without a set seed, but with no seed + # it's expected to fail one out of every ~20 runs given a pval limit of 0.05. + rng = ak.random.default_rng(43) + num_samples = 10**4 + + scale = rng.uniform(0, 10) + sample = rng.exponential(scale=scale, size=num_samples, method=method) + sample_list = sample.to_list() + + # do the Kolmogorov-Smirnov test for goodness of fit + ks_res = sp_stats.kstest( + rvs=sample_list, + cdf=sp_stats.expon.cdf, + args=(0, scale), + ) + assert ks_res.pvalue > 0.05 + @pytest.mark.parametrize("method", ["zig", "box"]) def test_normal_hypothesis_testing(self, method): # I tested this many times without a set seed, but with no seed @@ -292,19 +325,30 @@ def test_normal_hypothesis_testing(self, method): ) assert good_fit_res.pvalue > 0.05 - def test_poisson_seed_reproducibility(self): - # test resolution of issue #3322, same seed gives same result across machines / num locales - seed = 11 - # test with many orders of magnitude to ensure we test different remainders and case where - # all elements are pulled to first locale i.e. total_size < (minimum elemsPerStream = 256) - saved_seeded_file_patterns = ["second_order*", "third_order*", "fourth_order*"] + @pytest.mark.parametrize("method", ["zig", "box"]) + def test_lognormal_hypothesis_testing(self, method): + # I tested this many times without a set seed, but with no seed + # it's expected to fail one out of every ~20 runs given a pval limit of 0.05. + rng = ak.random.default_rng(43) + num_samples = 10**4 - # directory of this file - file_dir = os.path.dirname(os.path.realpath(__file__)) - for i, f_name in zip(range(2, 5), saved_seeded_file_patterns): - generated = ak.random.default_rng(seed=seed).poisson(size=10**i) - saved = ak.read_parquet(f"{file_dir}/saved_seeded_random/{f_name}")["array"] - assert (generated == saved).all() + mean = rng.uniform(-10, 10) + deviation = rng.uniform(0, 10) + sample = rng.lognormal(mean=mean, sigma=deviation, size=num_samples, method=method) + + log_sample_list = np.log(sample.to_ndarray()).tolist() + + # first test if samples are normal at all + _, pval = sp_stats.normaltest(log_sample_list) + + # if pval <= 0.05, the difference from the expected distribution is significant + assert pval > 0.05 + + # second goodness of fit test against the distribution with proper mean and std + good_fit_res = sp_stats.goodness_of_fit( + sp_stats.norm, log_sample_list, known_params={"loc": mean, "scale": deviation} + ) + assert good_fit_res.pvalue > 0.05 def test_poisson_hypothesis_testing(self): # I tested this many times without a set seed, but with no seed diff --git a/arkouda/random/_generator.py b/arkouda/random/_generator.py index f35b88c54a..2315eaf642 100644 --- a/arkouda/random/_generator.py +++ b/arkouda/random/_generator.py @@ -1,3 +1,4 @@ +import numpy as np import numpy.random as np_random from arkouda.client import generic_msg @@ -296,6 +297,63 @@ def integers(self, low, high=None, size=None, dtype=akint64, endpoint=False): self._state += full_size return create_pdarray(rep_msg) + def lognormal(self, mean=0.0, sigma=1.0, size=None, method="zig"): + r""" + Draw samples from a log-normal distribution with specified mean, + standard deviation, and array shape. + + Note that the mean and standard deviation are not the values for the distribution itself, + but of the underlying normal distribution it is derived from. + + Parameters + ---------- + mean: float or pdarray of floats, optional + Mean of the distribution. Default of 0. + + sigma: float or pdarray of floats, optional + Standard deviation of the distribution. Must be non-negative. Default of 1. + + size: numeric_scalars, optional + Output shape. Default is None, in which case a single value is returned. + + method : str, optional + Either 'box' or 'zig'. 'box' uses the Box–Muller transform + 'zig' uses the Ziggurat method. + + Notes + ----- + A variable `x` has a log-normal distribution if `log(x)` is normally distributed. + The probability density for the log-normal distribution is: + + .. math:: + p(x) = \frac{1}{\sigma x \sqrt{2\pi}} e^{-\frac{(\ln(x)-\mu)^2}{2\sigma^2}} + + where :math:`\mu` is the mean and :math:`\sigma` the standard deviation of the normally + distributed logarithm of the variable. + A log-normal distribution results if a random variable is the product of a + large number of independent, identically-distributed variables in the same + way that a normal distribution results if the variable is + the sum of a large number of independent, identically-distributed variables. + + Returns + ------- + pdarray + Pdarray of floats (unless size=None, in which case a single float is returned). + + See Also + -------- + normal + + Examples + -------- + >>> ak.random.default_rng(17).lognormal(3, 2.5, 3) + array([7.3866978126031091 106.20159494048757 4.5424399190667666]) + """ + from arkouda.numeric import exp + + norm_arr = self.normal(loc=mean, scale=sigma, size=size, method=method) + return exp(norm_arr) if size is not None else np.exp(norm_arr) + def normal(self, loc=0.0, scale=1.0, size=None, method="zig"): r""" Draw samples from a normal (Gaussian) distribution @@ -337,7 +395,7 @@ def normal(self, loc=0.0, scale=1.0, size=None, method="zig"): Examples -------- - >>> ak.random.default_rng(17).normal(3, 2.5, 10) + >>> ak.random.default_rng(17).normal(3, 2.5, 3) array([2.3673425816523577 4.0532529435624589 2.0598322696795694]) """ if size is None: diff --git a/pydoc/usage/random.rst b/pydoc/usage/random.rst index b1ae754922..a43b6f27a6 100644 --- a/pydoc/usage/random.rst +++ b/pydoc/usage/random.rst @@ -29,6 +29,10 @@ integers --------- .. autofunction:: arkouda.random.Generator.integers +lognormal +--------- +.. autofunction:: arkouda.random.Generator.lognormal + normal --------- .. autofunction:: arkouda.random.Generator.normal diff --git a/src/registry/Commands.chpl b/src/registry/Commands.chpl index c897ef5aa0..15f98df535 100644 --- a/src/registry/Commands.chpl +++ b/src/registry/Commands.chpl @@ -1283,79 +1283,83 @@ registerFunction('uniformGenerator', ark_uniformGenerator_bigint_1, 'R proc ark_standardNormalGenerator_1(cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab): MsgTuple throws do return RandMsg.standardNormalGenerator(cmd, msgArgs, st, array_nd=1); -registerFunction('standardNormalGenerator<1>', ark_standardNormalGenerator_1, 'RandMsg', 161); +registerFunction('standardNormalGenerator<1>', ark_standardNormalGenerator_1, 'RandMsg', 254); + +proc ark_standardExponential_1(cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab): MsgTuple throws do + return RandMsg.standardExponential(cmd, msgArgs, st, array_nd=1); +registerFunction('standardExponential<1>', ark_standardExponential_1, 'RandMsg', 389); proc ark_choice_int(cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab): MsgTuple throws do return RandMsg.choice(cmd, msgArgs, st, array_dtype=int); -registerFunction('choice', ark_choice_int, 'RandMsg', 332); +registerFunction('choice', ark_choice_int, 'RandMsg', 503); proc ark_choice_uint(cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab): MsgTuple throws do return RandMsg.choice(cmd, msgArgs, st, array_dtype=uint); -registerFunction('choice', ark_choice_uint, 'RandMsg', 332); +registerFunction('choice', ark_choice_uint, 'RandMsg', 503); proc ark_choice_uint8(cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab): MsgTuple throws do return RandMsg.choice(cmd, msgArgs, st, array_dtype=uint(8)); -registerFunction('choice', ark_choice_uint8, 'RandMsg', 332); +registerFunction('choice', ark_choice_uint8, 'RandMsg', 503); proc ark_choice_real(cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab): MsgTuple throws do return RandMsg.choice(cmd, msgArgs, st, array_dtype=real); -registerFunction('choice', ark_choice_real, 'RandMsg', 332); +registerFunction('choice', ark_choice_real, 'RandMsg', 503); proc ark_choice_bool(cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab): MsgTuple throws do return RandMsg.choice(cmd, msgArgs, st, array_dtype=bool); -registerFunction('choice', ark_choice_bool, 'RandMsg', 332); +registerFunction('choice', ark_choice_bool, 'RandMsg', 503); proc ark_choice_bigint(cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab): MsgTuple throws do return RandMsg.choice(cmd, msgArgs, st, array_dtype=bigint); -registerFunction('choice', ark_choice_bigint, 'RandMsg', 332); +registerFunction('choice', ark_choice_bigint, 'RandMsg', 503); proc ark_permutation_int_1(cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab): MsgTuple throws do return RandMsg.permutation(cmd, msgArgs, st, array_dtype=int, array_nd=1); -registerFunction('permutation', ark_permutation_int_1, 'RandMsg', 406); +registerFunction('permutation', ark_permutation_int_1, 'RandMsg', 577); proc ark_permutation_uint_1(cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab): MsgTuple throws do return RandMsg.permutation(cmd, msgArgs, st, array_dtype=uint, array_nd=1); -registerFunction('permutation', ark_permutation_uint_1, 'RandMsg', 406); +registerFunction('permutation', ark_permutation_uint_1, 'RandMsg', 577); proc ark_permutation_uint8_1(cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab): MsgTuple throws do return RandMsg.permutation(cmd, msgArgs, st, array_dtype=uint(8), array_nd=1); -registerFunction('permutation', ark_permutation_uint8_1, 'RandMsg', 406); +registerFunction('permutation', ark_permutation_uint8_1, 'RandMsg', 577); proc ark_permutation_real_1(cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab): MsgTuple throws do return RandMsg.permutation(cmd, msgArgs, st, array_dtype=real, array_nd=1); -registerFunction('permutation', ark_permutation_real_1, 'RandMsg', 406); +registerFunction('permutation', ark_permutation_real_1, 'RandMsg', 577); proc ark_permutation_bool_1(cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab): MsgTuple throws do return RandMsg.permutation(cmd, msgArgs, st, array_dtype=bool, array_nd=1); -registerFunction('permutation', ark_permutation_bool_1, 'RandMsg', 406); +registerFunction('permutation', ark_permutation_bool_1, 'RandMsg', 577); proc ark_permutation_bigint_1(cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab): MsgTuple throws do return RandMsg.permutation(cmd, msgArgs, st, array_dtype=bigint, array_nd=1); -registerFunction('permutation', ark_permutation_bigint_1, 'RandMsg', 406); +registerFunction('permutation', ark_permutation_bigint_1, 'RandMsg', 577); proc ark_shuffle_int_1(cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab): MsgTuple throws do return RandMsg.shuffle(cmd, msgArgs, st, array_dtype=int, array_nd=1); -registerFunction('shuffle', ark_shuffle_int_1, 'RandMsg', 493); +registerFunction('shuffle', ark_shuffle_int_1, 'RandMsg', 664); proc ark_shuffle_uint_1(cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab): MsgTuple throws do return RandMsg.shuffle(cmd, msgArgs, st, array_dtype=uint, array_nd=1); -registerFunction('shuffle', ark_shuffle_uint_1, 'RandMsg', 493); +registerFunction('shuffle', ark_shuffle_uint_1, 'RandMsg', 664); proc ark_shuffle_uint8_1(cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab): MsgTuple throws do return RandMsg.shuffle(cmd, msgArgs, st, array_dtype=uint(8), array_nd=1); -registerFunction('shuffle', ark_shuffle_uint8_1, 'RandMsg', 493); +registerFunction('shuffle', ark_shuffle_uint8_1, 'RandMsg', 664); proc ark_shuffle_real_1(cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab): MsgTuple throws do return RandMsg.shuffle(cmd, msgArgs, st, array_dtype=real, array_nd=1); -registerFunction('shuffle', ark_shuffle_real_1, 'RandMsg', 493); +registerFunction('shuffle', ark_shuffle_real_1, 'RandMsg', 664); proc ark_shuffle_bool_1(cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab): MsgTuple throws do return RandMsg.shuffle(cmd, msgArgs, st, array_dtype=bool, array_nd=1); -registerFunction('shuffle', ark_shuffle_bool_1, 'RandMsg', 493); +registerFunction('shuffle', ark_shuffle_bool_1, 'RandMsg', 664); proc ark_shuffle_bigint_1(cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab): MsgTuple throws do return RandMsg.shuffle(cmd, msgArgs, st, array_dtype=bigint, array_nd=1); -registerFunction('shuffle', ark_shuffle_bigint_1, 'RandMsg', 493); +registerFunction('shuffle', ark_shuffle_bigint_1, 'RandMsg', 664); import StatsMsg;