Closes #3373: Add lognormal to random number generators

This PR (closes #3373) adds `lognormal` to generators. This PR also adds the function signature in `Commands.chpl` that should've been included in
Bears-R-Us · Aug 1, 2024 · 39ba3f9 · 39ba3f9
1 parent 4281dd7
commit 39ba3f9
Show file tree

Hide file tree

Showing 3 changed files with 114 additions and 32 deletions.
diff --git a/PROTO_tests/tests/random_test.py b/PROTO_tests/tests/random_test.py
@@ -227,6 +227,20 @@ def test_poissson(self):
         assert rng.poisson(lam=scal_lam, size=num_samples).to_list() == scal_sample
         assert rng.poisson(lam=arr_lam, size=num_samples).to_list() == arr_sample
 
+    def test_poisson_seed_reproducibility(self):
+        # test resolution of issue #3322, same seed gives same result across machines / num locales
+        seed = 11
+        # test with many orders of magnitude to ensure we test different remainders and case where
+        # all elements are pulled to first locale i.e. total_size < (minimum elemsPerStream = 256)
+        saved_seeded_file_patterns = ["second_order*", "third_order*", "fourth_order*"]
+
+        # directory of this file
+        file_dir = os.path.dirname(os.path.realpath(__file__))
+        for i, f_name in zip(range(2, 5), saved_seeded_file_patterns):
+            generated = ak.random.default_rng(seed=seed).poisson(size=10**i)
+            saved = ak.read_parquet(f"{file_dir}/saved_seeded_random/{f_name}")["array"]
+            assert (generated == saved).all()
+
     def test_exponential(self):
         rng = ak.random.default_rng(17)
         num_samples = 5
@@ -268,6 +282,25 @@ def test_choice_hypothesis_testing(self):
         # if pval <= 0.05, the difference from the expected distribution is significant
         assert pval > 0.05
 
+    def test_exponential_hypothesis_testing(self):
+        # I tested this many times without a set seed, but with no seed
+        # it's expected to fail one out of every ~20 runs given a pval limit of 0.05.
+        rng = ak.random.default_rng(43)
+        num_samples = 10**4
+
+        scale = rng.uniform(0, 10)
+        for method in "zig", "inv":
+            sample = rng.exponential(scale=scale, size=num_samples, method=method)
+            sample_list = sample.to_list()
+
+            # do the Kolmogorov-Smirnov test for goodness of fit
+            ks_res = sp_stats.kstest(
+                rvs=sample_list,
+                cdf=sp_stats.expon.cdf,
+                args=(0, scale),
+            )
+            assert ks_res.pvalue > 0.05
+
     def test_normal_hypothesis_testing(self):
         # I tested this many times without a set seed, but with no seed
         # it's expected to fail one out of every ~20 runs given a pval limit of 0.05.
@@ -291,19 +324,29 @@ def test_normal_hypothesis_testing(self):
         )
         assert good_fit_res.pvalue > 0.05
 
-    def test_poisson_seed_reproducibility(self):
-        # test resolution of issue #3322, same seed gives same result across machines / num locales
-        seed = 11
-        # test with many orders of magnitude to ensure we test different remainders and case where
-        # all elements are pulled to first locale i.e. total_size < (minimum elemsPerStream = 256)
-        saved_seeded_file_patterns = ["second_order*", "third_order*", "fourth_order*"]
+    def test_lognormal_hypothesis_testing(self):
+        # I tested this many times without a set seed, but with no seed
+        # it's expected to fail one out of every ~20 runs given a pval limit of 0.05.
+        rng = ak.random.default_rng(43)
+        num_samples = 10**4
 
-        # directory of this file
-        file_dir = os.path.dirname(os.path.realpath(__file__))
-        for i, f_name in zip(range(2, 5), saved_seeded_file_patterns):
-            generated = ak.random.default_rng(seed=seed).poisson(size=10**i)
-            saved = ak.read_parquet(f"{file_dir}/saved_seeded_random/{f_name}")["array"]
-            assert (generated == saved).all()
+        mean = rng.uniform(-10, 10)
+        deviation = rng.uniform(0, 10)
+        sample = rng.lognormal(mean=mean, sigma=deviation, size=num_samples)
+
+        log_sample_list = np.log(sample.to_ndarray()).tolist()
+
+        # first test if samples are normal at all
+        _, pval = sp_stats.normaltest(log_sample_list)
+
+        # if pval <= 0.05, the difference from the expected distribution is significant
+        assert pval > 0.05
+
+        # second goodness of fit test against the distribution with proper mean and std
+        good_fit_res = sp_stats.goodness_of_fit(
+            sp_stats.norm, log_sample_list, known_params={"loc": mean, "scale": deviation}
+        )
+        assert good_fit_res.pvalue > 0.05
 
     def test_poisson_hypothesis_testing(self):
         # I tested this many times without a set seed, but with no seed
@@ -330,25 +373,6 @@ def test_poisson_hypothesis_testing(self):
         _, pval = sp_stats.chisquare(f_obs=obs_counts, f_exp=exp_counts)
         assert pval > 0.05
 
-    def test_exponential_hypothesis_testing(self):
-        # I tested this many times without a set seed, but with no seed
-        # it's expected to fail one out of every ~20 runs given a pval limit of 0.05.
-        rng = ak.random.default_rng(43)
-        num_samples = 10**4
-
-        scale = rng.uniform(0, 10)
-        for method in "zig", "inv":
-            sample = rng.exponential(scale=scale, size=num_samples, method=method)
-            sample_list = sample.to_list()
-
-            # do the Kolmogorov-Smirnov test for goodness of fit
-            ks_res = sp_stats.kstest(
-                rvs=sample_list,
-                cdf=sp_stats.expon.cdf,
-                args=(0, scale),
-            )
-            assert ks_res.pvalue > 0.05
-
     def test_legacy_randint(self):
         testArray = ak.random.randint(0, 10, 5)
         assert isinstance(testArray, ak.pdarray)

diff --git a/arkouda/random/_generator.py b/arkouda/random/_generator.py
@@ -1,3 +1,4 @@
+import numpy as np
 import numpy.random as np_random
 
 from arkouda.client import generic_msg
@@ -290,6 +291,59 @@ def integers(self, low, high=None, size=None, dtype=akint64, endpoint=False):
         self._state += full_size
         return create_pdarray(rep_msg)
 
+    def lognormal(self, mean=0.0, sigma=1.0, size=None):
+        r"""
+        Draw samples from a log-normal distribution with specified mean,
+        standard deviation, and array shape.
+
+        Note that the mean and standard deviation are not the values for the distribution itself,
+        but of the underlying normal distribution it is derived from.
+
+        Parameters
+        ----------
+        mean: float or pdarray of floats, optional
+            Mean of the distribution. Default of 0.
+
+        sigma: float or pdarray of floats, optional
+            Standard deviation of the distribution. Must be non-negative. Default of 1.
+
+        size: numeric_scalars, optional
+            Output shape. Default is None, in which case a single value is returned.
+
+        Notes
+        -----
+        A variable `x` has a log-normal distribution if `log(x)` is normally distributed.
+        The probability density for the log-normal distribution is:
+
+        .. math::
+           p(x) = \frac{1}{\sigma x \sqrt{2\pi}} e^{-\frac{(\ln(x)-\mu)^2}{2\sigma^2}}
+
+        where :math:`\mu` is the mean and :math:`\sigma` the standard deviation of the normally
+        distributed logarithm of the variable.
+        A log-normal distribution results if a random variable is the product of a
+        large number of independent, identically-distributed variables in the same
+        way that a normal distribution results if the variable is
+        the sum of a large number of independent, identically-distributed variables.
+
+        Returns
+        -------
+        pdarray
+            Pdarray of floats (unless size=None, in which case a single float is returned).
+
+        See Also
+        --------
+        normal
+
+        Examples
+        --------
+        >>> ak.random.default_rng(17).lognormal(3, 2.5, 3)
+        array([7.3866978126031091 106.20159494048757 4.5424399190667666])
+        """
+        from arkouda.numeric import exp
+
+        norm_arr = self.normal(loc=mean, scale=sigma, size=size)
+        return exp(norm_arr) if size is not None else np.exp(norm_arr)
+
     def normal(self, loc=0.0, scale=1.0, size=None):
         r"""
         Draw samples from a normal (Gaussian) distribution
@@ -327,7 +381,7 @@ def normal(self, loc=0.0, scale=1.0, size=None):
 
         Examples
         --------
-        >>> ak.random.default_rng(17).normal(3, 2.5, 10)
+        >>> ak.random.default_rng(17).normal(3, 2.5, 3)
         array([2.3673425816523577 4.0532529435624589 2.0598322696795694])
         """
         if size is None:

diff --git a/pydoc/usage/random.rst b/pydoc/usage/random.rst
@@ -29,6 +29,10 @@ integers
 ---------
 .. autofunction:: arkouda.random.Generator.integers
 
+lognormal
+---------
+.. autofunction:: arkouda.random.Generator.lognormal
+
 normal
 ---------
 .. autofunction:: arkouda.random.Generator.normal