Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closes #3373: Add lognormal to random number generators #3598

Merged
merged 3 commits into from
Aug 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 56 additions & 12 deletions PROTO_tests/tests/random_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,20 @@ def test_poissson(self):
assert rng.poisson(lam=scal_lam, size=num_samples).to_list() == scal_sample
assert rng.poisson(lam=arr_lam, size=num_samples).to_list() == arr_sample

def test_poisson_seed_reproducibility(self):
# test resolution of issue #3322, same seed gives same result across machines / num locales
seed = 11
# test with many orders of magnitude to ensure we test different remainders and case where
# all elements are pulled to first locale i.e. total_size < (minimum elemsPerStream = 256)
saved_seeded_file_patterns = ["second_order*", "third_order*", "fourth_order*"]

# directory of this file
file_dir = os.path.dirname(os.path.realpath(__file__))
for i, f_name in zip(range(2, 5), saved_seeded_file_patterns):
generated = ak.random.default_rng(seed=seed).poisson(size=10**i)
saved = ak.read_parquet(f"{file_dir}/saved_seeded_random/{f_name}")["array"]
assert (generated == saved).all()

def test_exponential(self):
rng = ak.random.default_rng(17)
num_samples = 5
Expand Down Expand Up @@ -268,6 +282,25 @@ def test_choice_hypothesis_testing(self):
# if pval <= 0.05, the difference from the expected distribution is significant
assert pval > 0.05

@pytest.mark.parametrize("method", ["zig", "box"])
def test_exponential_hypothesis_testing(self, method):
# I tested this many times without a set seed, but with no seed
# it's expected to fail one out of every ~20 runs given a pval limit of 0.05.
rng = ak.random.default_rng(43)
num_samples = 10**4

scale = rng.uniform(0, 10)
sample = rng.exponential(scale=scale, size=num_samples, method=method)
sample_list = sample.to_list()

# do the Kolmogorov-Smirnov test for goodness of fit
ks_res = sp_stats.kstest(
rvs=sample_list,
cdf=sp_stats.expon.cdf,
args=(0, scale),
)
assert ks_res.pvalue > 0.05

@pytest.mark.parametrize("method", ["zig", "box"])
def test_normal_hypothesis_testing(self, method):
# I tested this many times without a set seed, but with no seed
Expand All @@ -292,19 +325,30 @@ def test_normal_hypothesis_testing(self, method):
)
assert good_fit_res.pvalue > 0.05

def test_poisson_seed_reproducibility(self):
# test resolution of issue #3322, same seed gives same result across machines / num locales
seed = 11
# test with many orders of magnitude to ensure we test different remainders and case where
# all elements are pulled to first locale i.e. total_size < (minimum elemsPerStream = 256)
saved_seeded_file_patterns = ["second_order*", "third_order*", "fourth_order*"]
@pytest.mark.parametrize("method", ["zig", "box"])
def test_lognormal_hypothesis_testing(self, method):
# I tested this many times without a set seed, but with no seed
# it's expected to fail one out of every ~20 runs given a pval limit of 0.05.
rng = ak.random.default_rng(43)
num_samples = 10**4

# directory of this file
file_dir = os.path.dirname(os.path.realpath(__file__))
for i, f_name in zip(range(2, 5), saved_seeded_file_patterns):
generated = ak.random.default_rng(seed=seed).poisson(size=10**i)
saved = ak.read_parquet(f"{file_dir}/saved_seeded_random/{f_name}")["array"]
assert (generated == saved).all()
mean = rng.uniform(-10, 10)
deviation = rng.uniform(0, 10)
sample = rng.lognormal(mean=mean, sigma=deviation, size=num_samples, method=method)

log_sample_list = np.log(sample.to_ndarray()).tolist()

# first test if samples are normal at all
_, pval = sp_stats.normaltest(log_sample_list)

# if pval <= 0.05, the difference from the expected distribution is significant
assert pval > 0.05

# second goodness of fit test against the distribution with proper mean and std
good_fit_res = sp_stats.goodness_of_fit(
sp_stats.norm, log_sample_list, known_params={"loc": mean, "scale": deviation}
)
assert good_fit_res.pvalue > 0.05

def test_poisson_hypothesis_testing(self):
# I tested this many times without a set seed, but with no seed
Expand Down
60 changes: 59 additions & 1 deletion arkouda/random/_generator.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import numpy as np
import numpy.random as np_random

from arkouda.client import generic_msg
Expand Down Expand Up @@ -296,6 +297,63 @@ def integers(self, low, high=None, size=None, dtype=akint64, endpoint=False):
self._state += full_size
return create_pdarray(rep_msg)

def lognormal(self, mean=0.0, sigma=1.0, size=None, method="zig"):
r"""
Draw samples from a log-normal distribution with specified mean,
standard deviation, and array shape.

Note that the mean and standard deviation are not the values for the distribution itself,
but of the underlying normal distribution it is derived from.

Parameters
----------
mean: float or pdarray of floats, optional
Mean of the distribution. Default of 0.

sigma: float or pdarray of floats, optional
Standard deviation of the distribution. Must be non-negative. Default of 1.

size: numeric_scalars, optional
Output shape. Default is None, in which case a single value is returned.

method : str, optional
Either 'box' or 'zig'. 'box' uses the Box–Muller transform
'zig' uses the Ziggurat method.

Notes
-----
A variable `x` has a log-normal distribution if `log(x)` is normally distributed.
The probability density for the log-normal distribution is:

.. math::
p(x) = \frac{1}{\sigma x \sqrt{2\pi}} e^{-\frac{(\ln(x)-\mu)^2}{2\sigma^2}}

where :math:`\mu` is the mean and :math:`\sigma` the standard deviation of the normally
distributed logarithm of the variable.
A log-normal distribution results if a random variable is the product of a
large number of independent, identically-distributed variables in the same
way that a normal distribution results if the variable is
the sum of a large number of independent, identically-distributed variables.

Returns
-------
pdarray
Pdarray of floats (unless size=None, in which case a single float is returned).

See Also
--------
normal

Examples
--------
>>> ak.random.default_rng(17).lognormal(3, 2.5, 3)
array([7.3866978126031091 106.20159494048757 4.5424399190667666])
"""
from arkouda.numeric import exp

norm_arr = self.normal(loc=mean, scale=sigma, size=size, method=method)
return exp(norm_arr) if size is not None else np.exp(norm_arr)

def normal(self, loc=0.0, scale=1.0, size=None, method="zig"):
r"""
Draw samples from a normal (Gaussian) distribution
Expand Down Expand Up @@ -337,7 +395,7 @@ def normal(self, loc=0.0, scale=1.0, size=None, method="zig"):

Examples
--------
>>> ak.random.default_rng(17).normal(3, 2.5, 10)
>>> ak.random.default_rng(17).normal(3, 2.5, 3)
array([2.3673425816523577 4.0532529435624589 2.0598322696795694])
"""
if size is None:
Expand Down
4 changes: 4 additions & 0 deletions pydoc/usage/random.rst
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ integers
---------
.. autofunction:: arkouda.random.Generator.integers

lognormal
---------
.. autofunction:: arkouda.random.Generator.lognormal

normal
---------
.. autofunction:: arkouda.random.Generator.normal
Expand Down
42 changes: 23 additions & 19 deletions src/registry/Commands.chpl
Original file line number Diff line number Diff line change
Expand Up @@ -1283,79 +1283,83 @@ registerFunction('uniformGenerator<bigint,1>', ark_uniformGenerator_bigint_1, 'R

proc ark_standardNormalGenerator_1(cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab): MsgTuple throws do
return RandMsg.standardNormalGenerator(cmd, msgArgs, st, array_nd=1);
registerFunction('standardNormalGenerator<1>', ark_standardNormalGenerator_1, 'RandMsg', 161);
registerFunction('standardNormalGenerator<1>', ark_standardNormalGenerator_1, 'RandMsg', 254);

proc ark_standardExponential_1(cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab): MsgTuple throws do
return RandMsg.standardExponential(cmd, msgArgs, st, array_nd=1);
registerFunction('standardExponential<1>', ark_standardExponential_1, 'RandMsg', 389);

proc ark_choice_int(cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab): MsgTuple throws do
return RandMsg.choice(cmd, msgArgs, st, array_dtype=int);
registerFunction('choice<int64>', ark_choice_int, 'RandMsg', 332);
registerFunction('choice<int64>', ark_choice_int, 'RandMsg', 503);

proc ark_choice_uint(cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab): MsgTuple throws do
return RandMsg.choice(cmd, msgArgs, st, array_dtype=uint);
registerFunction('choice<uint64>', ark_choice_uint, 'RandMsg', 332);
registerFunction('choice<uint64>', ark_choice_uint, 'RandMsg', 503);

proc ark_choice_uint8(cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab): MsgTuple throws do
return RandMsg.choice(cmd, msgArgs, st, array_dtype=uint(8));
registerFunction('choice<uint8>', ark_choice_uint8, 'RandMsg', 332);
registerFunction('choice<uint8>', ark_choice_uint8, 'RandMsg', 503);

proc ark_choice_real(cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab): MsgTuple throws do
return RandMsg.choice(cmd, msgArgs, st, array_dtype=real);
registerFunction('choice<float64>', ark_choice_real, 'RandMsg', 332);
registerFunction('choice<float64>', ark_choice_real, 'RandMsg', 503);

proc ark_choice_bool(cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab): MsgTuple throws do
return RandMsg.choice(cmd, msgArgs, st, array_dtype=bool);
registerFunction('choice<bool>', ark_choice_bool, 'RandMsg', 332);
registerFunction('choice<bool>', ark_choice_bool, 'RandMsg', 503);

proc ark_choice_bigint(cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab): MsgTuple throws do
return RandMsg.choice(cmd, msgArgs, st, array_dtype=bigint);
registerFunction('choice<bigint>', ark_choice_bigint, 'RandMsg', 332);
registerFunction('choice<bigint>', ark_choice_bigint, 'RandMsg', 503);

proc ark_permutation_int_1(cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab): MsgTuple throws do
return RandMsg.permutation(cmd, msgArgs, st, array_dtype=int, array_nd=1);
registerFunction('permutation<int64,1>', ark_permutation_int_1, 'RandMsg', 406);
registerFunction('permutation<int64,1>', ark_permutation_int_1, 'RandMsg', 577);

proc ark_permutation_uint_1(cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab): MsgTuple throws do
return RandMsg.permutation(cmd, msgArgs, st, array_dtype=uint, array_nd=1);
registerFunction('permutation<uint64,1>', ark_permutation_uint_1, 'RandMsg', 406);
registerFunction('permutation<uint64,1>', ark_permutation_uint_1, 'RandMsg', 577);

proc ark_permutation_uint8_1(cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab): MsgTuple throws do
return RandMsg.permutation(cmd, msgArgs, st, array_dtype=uint(8), array_nd=1);
registerFunction('permutation<uint8,1>', ark_permutation_uint8_1, 'RandMsg', 406);
registerFunction('permutation<uint8,1>', ark_permutation_uint8_1, 'RandMsg', 577);

proc ark_permutation_real_1(cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab): MsgTuple throws do
return RandMsg.permutation(cmd, msgArgs, st, array_dtype=real, array_nd=1);
registerFunction('permutation<float64,1>', ark_permutation_real_1, 'RandMsg', 406);
registerFunction('permutation<float64,1>', ark_permutation_real_1, 'RandMsg', 577);

proc ark_permutation_bool_1(cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab): MsgTuple throws do
return RandMsg.permutation(cmd, msgArgs, st, array_dtype=bool, array_nd=1);
registerFunction('permutation<bool,1>', ark_permutation_bool_1, 'RandMsg', 406);
registerFunction('permutation<bool,1>', ark_permutation_bool_1, 'RandMsg', 577);

proc ark_permutation_bigint_1(cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab): MsgTuple throws do
return RandMsg.permutation(cmd, msgArgs, st, array_dtype=bigint, array_nd=1);
registerFunction('permutation<bigint,1>', ark_permutation_bigint_1, 'RandMsg', 406);
registerFunction('permutation<bigint,1>', ark_permutation_bigint_1, 'RandMsg', 577);

proc ark_shuffle_int_1(cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab): MsgTuple throws do
return RandMsg.shuffle(cmd, msgArgs, st, array_dtype=int, array_nd=1);
registerFunction('shuffle<int64,1>', ark_shuffle_int_1, 'RandMsg', 493);
registerFunction('shuffle<int64,1>', ark_shuffle_int_1, 'RandMsg', 664);

proc ark_shuffle_uint_1(cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab): MsgTuple throws do
return RandMsg.shuffle(cmd, msgArgs, st, array_dtype=uint, array_nd=1);
registerFunction('shuffle<uint64,1>', ark_shuffle_uint_1, 'RandMsg', 493);
registerFunction('shuffle<uint64,1>', ark_shuffle_uint_1, 'RandMsg', 664);

proc ark_shuffle_uint8_1(cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab): MsgTuple throws do
return RandMsg.shuffle(cmd, msgArgs, st, array_dtype=uint(8), array_nd=1);
registerFunction('shuffle<uint8,1>', ark_shuffle_uint8_1, 'RandMsg', 493);
registerFunction('shuffle<uint8,1>', ark_shuffle_uint8_1, 'RandMsg', 664);

proc ark_shuffle_real_1(cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab): MsgTuple throws do
return RandMsg.shuffle(cmd, msgArgs, st, array_dtype=real, array_nd=1);
registerFunction('shuffle<float64,1>', ark_shuffle_real_1, 'RandMsg', 493);
registerFunction('shuffle<float64,1>', ark_shuffle_real_1, 'RandMsg', 664);

proc ark_shuffle_bool_1(cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab): MsgTuple throws do
return RandMsg.shuffle(cmd, msgArgs, st, array_dtype=bool, array_nd=1);
registerFunction('shuffle<bool,1>', ark_shuffle_bool_1, 'RandMsg', 493);
registerFunction('shuffle<bool,1>', ark_shuffle_bool_1, 'RandMsg', 664);

proc ark_shuffle_bigint_1(cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab): MsgTuple throws do
return RandMsg.shuffle(cmd, msgArgs, st, array_dtype=bigint, array_nd=1);
registerFunction('shuffle<bigint,1>', ark_shuffle_bigint_1, 'RandMsg', 493);
registerFunction('shuffle<bigint,1>', ark_shuffle_bigint_1, 'RandMsg', 664);

import StatsMsg;

Expand Down
Loading