Skip to content

Commit

Permalink
Closes #3373: Add lognormal to random number generators
Browse files Browse the repository at this point in the history
This PR (closes #3373) adds `lognormal` to generators. This PR also adds the function signature in `Commands.chpl` that should've been included in
  • Loading branch information
stress-tess committed Aug 1, 2024
1 parent 4281dd7 commit 39ba3f9
Show file tree
Hide file tree
Showing 3 changed files with 114 additions and 32 deletions.
86 changes: 55 additions & 31 deletions PROTO_tests/tests/random_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,20 @@ def test_poissson(self):
assert rng.poisson(lam=scal_lam, size=num_samples).to_list() == scal_sample
assert rng.poisson(lam=arr_lam, size=num_samples).to_list() == arr_sample

def test_poisson_seed_reproducibility(self):
# test resolution of issue #3322, same seed gives same result across machines / num locales
seed = 11
# test with many orders of magnitude to ensure we test different remainders and case where
# all elements are pulled to first locale i.e. total_size < (minimum elemsPerStream = 256)
saved_seeded_file_patterns = ["second_order*", "third_order*", "fourth_order*"]

# directory of this file
file_dir = os.path.dirname(os.path.realpath(__file__))
for i, f_name in zip(range(2, 5), saved_seeded_file_patterns):
generated = ak.random.default_rng(seed=seed).poisson(size=10**i)
saved = ak.read_parquet(f"{file_dir}/saved_seeded_random/{f_name}")["array"]
assert (generated == saved).all()

def test_exponential(self):
rng = ak.random.default_rng(17)
num_samples = 5
Expand Down Expand Up @@ -268,6 +282,25 @@ def test_choice_hypothesis_testing(self):
# if pval <= 0.05, the difference from the expected distribution is significant
assert pval > 0.05

def test_exponential_hypothesis_testing(self):
# I tested this many times without a set seed, but with no seed
# it's expected to fail one out of every ~20 runs given a pval limit of 0.05.
rng = ak.random.default_rng(43)
num_samples = 10**4

scale = rng.uniform(0, 10)
for method in "zig", "inv":
sample = rng.exponential(scale=scale, size=num_samples, method=method)
sample_list = sample.to_list()

# do the Kolmogorov-Smirnov test for goodness of fit
ks_res = sp_stats.kstest(
rvs=sample_list,
cdf=sp_stats.expon.cdf,
args=(0, scale),
)
assert ks_res.pvalue > 0.05

def test_normal_hypothesis_testing(self):
# I tested this many times without a set seed, but with no seed
# it's expected to fail one out of every ~20 runs given a pval limit of 0.05.
Expand All @@ -291,19 +324,29 @@ def test_normal_hypothesis_testing(self):
)
assert good_fit_res.pvalue > 0.05

def test_poisson_seed_reproducibility(self):
# test resolution of issue #3322, same seed gives same result across machines / num locales
seed = 11
# test with many orders of magnitude to ensure we test different remainders and case where
# all elements are pulled to first locale i.e. total_size < (minimum elemsPerStream = 256)
saved_seeded_file_patterns = ["second_order*", "third_order*", "fourth_order*"]
def test_lognormal_hypothesis_testing(self):
# I tested this many times without a set seed, but with no seed
# it's expected to fail one out of every ~20 runs given a pval limit of 0.05.
rng = ak.random.default_rng(43)
num_samples = 10**4

# directory of this file
file_dir = os.path.dirname(os.path.realpath(__file__))
for i, f_name in zip(range(2, 5), saved_seeded_file_patterns):
generated = ak.random.default_rng(seed=seed).poisson(size=10**i)
saved = ak.read_parquet(f"{file_dir}/saved_seeded_random/{f_name}")["array"]
assert (generated == saved).all()
mean = rng.uniform(-10, 10)
deviation = rng.uniform(0, 10)
sample = rng.lognormal(mean=mean, sigma=deviation, size=num_samples)

log_sample_list = np.log(sample.to_ndarray()).tolist()

# first test if samples are normal at all
_, pval = sp_stats.normaltest(log_sample_list)

# if pval <= 0.05, the difference from the expected distribution is significant
assert pval > 0.05

# second goodness of fit test against the distribution with proper mean and std
good_fit_res = sp_stats.goodness_of_fit(
sp_stats.norm, log_sample_list, known_params={"loc": mean, "scale": deviation}
)
assert good_fit_res.pvalue > 0.05

def test_poisson_hypothesis_testing(self):
# I tested this many times without a set seed, but with no seed
Expand All @@ -330,25 +373,6 @@ def test_poisson_hypothesis_testing(self):
_, pval = sp_stats.chisquare(f_obs=obs_counts, f_exp=exp_counts)
assert pval > 0.05

def test_exponential_hypothesis_testing(self):
# I tested this many times without a set seed, but with no seed
# it's expected to fail one out of every ~20 runs given a pval limit of 0.05.
rng = ak.random.default_rng(43)
num_samples = 10**4

scale = rng.uniform(0, 10)
for method in "zig", "inv":
sample = rng.exponential(scale=scale, size=num_samples, method=method)
sample_list = sample.to_list()

# do the Kolmogorov-Smirnov test for goodness of fit
ks_res = sp_stats.kstest(
rvs=sample_list,
cdf=sp_stats.expon.cdf,
args=(0, scale),
)
assert ks_res.pvalue > 0.05

def test_legacy_randint(self):
testArray = ak.random.randint(0, 10, 5)
assert isinstance(testArray, ak.pdarray)
Expand Down
56 changes: 55 additions & 1 deletion arkouda/random/_generator.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import numpy as np
import numpy.random as np_random

from arkouda.client import generic_msg
Expand Down Expand Up @@ -290,6 +291,59 @@ def integers(self, low, high=None, size=None, dtype=akint64, endpoint=False):
self._state += full_size
return create_pdarray(rep_msg)

def lognormal(self, mean=0.0, sigma=1.0, size=None):
r"""
Draw samples from a log-normal distribution with specified mean,
standard deviation, and array shape.
Note that the mean and standard deviation are not the values for the distribution itself,
but of the underlying normal distribution it is derived from.
Parameters
----------
mean: float or pdarray of floats, optional
Mean of the distribution. Default of 0.
sigma: float or pdarray of floats, optional
Standard deviation of the distribution. Must be non-negative. Default of 1.
size: numeric_scalars, optional
Output shape. Default is None, in which case a single value is returned.
Notes
-----
A variable `x` has a log-normal distribution if `log(x)` is normally distributed.
The probability density for the log-normal distribution is:
.. math::
p(x) = \frac{1}{\sigma x \sqrt{2\pi}} e^{-\frac{(\ln(x)-\mu)^2}{2\sigma^2}}
where :math:`\mu` is the mean and :math:`\sigma` the standard deviation of the normally
distributed logarithm of the variable.
A log-normal distribution results if a random variable is the product of a
large number of independent, identically-distributed variables in the same
way that a normal distribution results if the variable is
the sum of a large number of independent, identically-distributed variables.
Returns
-------
pdarray
Pdarray of floats (unless size=None, in which case a single float is returned).
See Also
--------
normal
Examples
--------
>>> ak.random.default_rng(17).lognormal(3, 2.5, 3)
array([7.3866978126031091 106.20159494048757 4.5424399190667666])
"""
from arkouda.numeric import exp

norm_arr = self.normal(loc=mean, scale=sigma, size=size)
return exp(norm_arr) if size is not None else np.exp(norm_arr)

def normal(self, loc=0.0, scale=1.0, size=None):
r"""
Draw samples from a normal (Gaussian) distribution
Expand Down Expand Up @@ -327,7 +381,7 @@ def normal(self, loc=0.0, scale=1.0, size=None):
Examples
--------
>>> ak.random.default_rng(17).normal(3, 2.5, 10)
>>> ak.random.default_rng(17).normal(3, 2.5, 3)
array([2.3673425816523577 4.0532529435624589 2.0598322696795694])
"""
if size is None:
Expand Down
4 changes: 4 additions & 0 deletions pydoc/usage/random.rst
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ integers
---------
.. autofunction:: arkouda.random.Generator.integers

lognormal
---------
.. autofunction:: arkouda.random.Generator.lognormal

normal
---------
.. autofunction:: arkouda.random.Generator.normal
Expand Down

0 comments on commit 39ba3f9

Please sign in to comment.