Merge branch 'master' into 3978_add_retry_on_installation_steps_in_CI

Bears-R-Us · Jan 27, 2025 · 470da66 · 470da66
2 parents bb3ebd2 + 4fa2e44
commit 470da66
Show file tree

Hide file tree

Showing 3 changed files with 138 additions and 42 deletions.
diff --git a/arkouda/numpy/random/_generator.py b/arkouda/numpy/random/_generator.py
@@ -101,7 +101,9 @@ def choice(self, a, size=None, replace=True, p=None):
             raise TypeError("choice only accepts a pdarray or int scalar.")
 
         if not replace and size > pop_size:
-            raise ValueError("Cannot take a larger sample than population when replace is False")
+            raise ValueError(
+                "Cannot take a larger sample than population when replace is False"
+            )
 
         has_weights = p is not None
         if has_weights:
@@ -267,7 +269,9 @@ def integers(self, low, high=None, size=None, dtype=akint64, endpoint=False):
 
         if size is None:
             # delegate to numpy when return size is 1
-            return self._np_generator.integers(low=low, high=high, dtype=dtype, endpoint=endpoint)
+            return self._np_generator.integers(
+                low=low, high=high, dtype=dtype, endpoint=endpoint
+            )
 
         if high is None:
             high = low
@@ -343,7 +347,9 @@ def logistic(self, loc=0.0, scale=1.0, size=None):
             return self._np_generator.logistic(loc=loc, scale=scale, size=size)
 
         is_single_mu, mu = float_array_or_scalar_helper("logistic", "loc", loc, size)
-        is_single_scale, scale = float_array_or_scalar_helper("logistic", "scale", scale, size)
+        is_single_scale, scale = float_array_or_scalar_helper(
+            "logistic", "scale", scale, size
+        )
         if (scale < 0).any() if isinstance(scale, pdarray) else scale < 0:
             raise TypeError("scale must be non-negative.")
 
@@ -603,7 +609,7 @@ def shuffle(self, x):
         )
         self._state += x.size
 
-    def permutation(self, x):
+    def permutation(self, x, method="Argsort"):
         """
         Randomly permute a sequence, or return a permuted range.
 
@@ -612,11 +618,23 @@ def permutation(self, x):
         x: int or pdarray
             If x is an integer, randomly permute ak.arange(x). If x is an array,
             make a copy and shuffle the elements randomly.
+        method: str = 'Argsort'
+            The method for generating the permutation.
+            Allowed values: 'FisherYates', 'Argsort'
 
+            If 'Argsort' is selected, the permutation will be generated by
+            an argsort performed on randomly generated floats.
         Returns
         -------
         pdarray
             pdarray of permuted elements
+
+        Raises
+        ------
+        ValueError
+            Raised if method is not an allowed value.
+        TypeError
+            Raised if x is not of type int or pdarray.
         """
         if _val_isinstance_of_union(x, int_scalars):
             is_domain_perm = True
@@ -633,21 +651,32 @@ def permutation(self, x):
         else:
             raise TypeError("permutation only accepts a pdarray or int scalar.")
 
-        # we have to use the int version since we permute the domain
-        name = self._name_dict[to_numpy_dtype(akint64)]
-        rep_msg = generic_msg(
-            cmd=f"permutation<{dtype.name},{ndim}>",
-            args={
-                "name": name,
-                "x": x,
-                "shape": shape,
-                "size": size,
-                "isDomPerm": is_domain_perm,
-                "state": self._state,
-            },
-        )
-        self._state += size
-        return create_pdarray(rep_msg)
+        if method.lower() == "fisheryates":
+            # we have to use the int version since we permute the domain
+            name = self._name_dict[to_numpy_dtype(akint64)]
+            rep_msg = generic_msg(
+                cmd=f"permutation<{dtype.name},{ndim}>",
+                args={
+                    "name": name,
+                    "x": x,
+                    "shape": shape,
+                    "size": size,
+                    "isDomPerm": is_domain_perm,
+                    "state": self._state,
+                },
+            )
+            self._state += size
+            return create_pdarray(rep_msg)
+        elif method.lower() == "argsort":
+            from arkouda.sorting import argsort
+
+            perm = argsort(self.random(size))
+            if is_domain_perm:
+                return perm
+            else:
+                return x[perm]
+        else:
+            raise ValueError("method did not match allowed values: Serial, Argsort")
 
     def poisson(self, lam=1.0, size=None):
         r"""
@@ -689,7 +718,9 @@ def poisson(self, lam=1.0, size=None):
             # delegate to numpy when return size is 1
             return self._np_generator.poisson(lam, size)
 
-        is_single_lambda, lam = float_array_or_scalar_helper("poisson", "lam", lam, size)
+        is_single_lambda, lam = float_array_or_scalar_helper(
+            "poisson", "lam", lam, size
+        )
         if (lam < 0).any() if isinstance(lam, pdarray) else lam < 0:
             raise TypeError("lam must be non-negative.")
 
@@ -826,5 +857,7 @@ def float_array_or_scalar_helper(func_name, var_name, var, size):
 
             var = akcast(var, akfloat64)
     else:
-        raise TypeError(f"{func_name} only accepts a pdarray or float scalar for {var_name}")
+        raise TypeError(
+            f"{func_name} only accepts a pdarray or float scalar for {var_name}"
+        )
     return is_scalar, var
diff --git a/arkouda/sorting.py b/arkouda/sorting.py
@@ -38,6 +38,10 @@ def argsort(
     ----------
     pda : pdarray or Strings or Categorical
         The array to sort (int64, uint64, or float64)
+    algorithm : SortingAlgorithm
+        The algorithm to be used for sorting the array.
+    axis : int_scalars
+        The axis to sort over.
 
     Returns
     -------
@@ -64,6 +68,12 @@ def argsort(
     >>> perm = ak.argsort(a)
     >>> a[perm]
     array([0, 1, 1, 3, 4, 5, 7, 8, 8, 9])
+
+    >>> ak.argsort(a, ak.sorting.SortingAlgorithm["RadixSortLSD"])
+    array([0 2 9 6 8 1 3 5 7 4])
+
+    >>> ak.argsort(a, ak.sorting.SortingAlgorithm["TwoArrayRadixSort"])
+    array([0 2 9 6 8 1 3 5 7 4])
     """
     from arkouda.categorical import Categorical
 

diff --git a/tests/numpy/random_test.py b/tests/numpy/random_test.py
@@ -87,7 +87,8 @@ def test_shuffle(self, data_type):
         assert check(pda, pda_prime, data_type)
 
     @pytest.mark.parametrize("data_type", INT_FLOAT)
-    def test_permutation(self, data_type):
+    @pytest.mark.parametrize("method", ["FisherYates", "Argsort"])
+    def test_permutation(self, data_type, method):
 
         # ints are checked for equality; floats are checked for closeness
 
@@ -98,29 +99,29 @@ def test_permutation(self, data_type):
         # verify all the same elements are in the permutation as in the original
 
         rng = ak.random.default_rng(18)
-        range_permute = rng.permutation(20)
+        range_permute = rng.permutation(20, method=method)
         assert (ak.arange(20) == ak.sort(range_permute)).all()  # range is always int
 
         # verify same seed gives reproducible arrays
 
         rng = ak.random.default_rng(18)
         rnfunc = rng.integers if data_type is ak.int64 else rng.uniform
         pda = rnfunc(-(2**32), 2**32, 10)
-        permuted = rng.permutation(pda)
+        permuted = rng.permutation(pda, method=method)
         assert check(ak.sort(pda), ak.sort(permuted), data_type)
 
         # verify same seed gives reproducible permutations
 
         rng = ak.random.default_rng(18)
-        same_seed_range_permute = rng.permutation(20)
+        same_seed_range_permute = rng.permutation(20, method=method)
         assert check(range_permute, same_seed_range_permute, data_type)
 
         # verify all the same elements are in permutation as in the original
 
         rng = ak.random.default_rng(18)
         rnfunc = rng.integers if data_type is ak.int64 else rng.uniform
         pda_p = rnfunc(-(2**32), 2**32, 10)
-        permuted_p = rng.permutation(pda_p)
+        permuted_p = rng.permutation(pda_p, method=method)
         assert check(ak.sort(pda_p), ak.sort(permuted_p), data_type)
 
     def test_uniform(self):
@@ -205,7 +206,10 @@ def test_logistic(self):
             log_sample = rng.logistic(loc=loc, scale=scale, size=num_samples).to_list()
 
             rng = ak.random.default_rng(17)
-            assert rng.logistic(loc=loc, scale=scale, size=num_samples).to_list() == log_sample
+            assert (
+                rng.logistic(loc=loc, scale=scale, size=num_samples).to_list()
+                == log_sample
+            )
 
     def test_lognormal(self):
         scal = 2
@@ -214,25 +218,40 @@ def test_lognormal(self):
         for mean, sigma in product([scal, arr], [scal, arr]):
             rng = ak.random.default_rng(17)
             num_samples = 5
-            log_sample = rng.lognormal(mean=mean, sigma=sigma, size=num_samples).to_list()
+            log_sample = rng.lognormal(
+                mean=mean, sigma=sigma, size=num_samples
+            ).to_list()
 
             rng = ak.random.default_rng(17)
-            assert rng.lognormal(mean=mean, sigma=sigma, size=num_samples).to_list() == log_sample
+            assert (
+                rng.lognormal(mean=mean, sigma=sigma, size=num_samples).to_list()
+                == log_sample
+            )
 
     def test_normal(self):
         rng = ak.random.default_rng(17)
         both_scalar = rng.normal(loc=10, scale=2, size=10).to_list()
         scale_scalar = rng.normal(loc=ak.array([0, 10, 20]), scale=1, size=3).to_list()
         loc_scalar = rng.normal(loc=10, scale=ak.array([1, 2, 3]), size=3).to_list()
-        both_array = rng.normal(loc=ak.array([0, 10, 20]), scale=ak.array([1, 2, 3]), size=3).to_list()
+        both_array = rng.normal(
+            loc=ak.array([0, 10, 20]), scale=ak.array([1, 2, 3]), size=3
+        ).to_list()
 
         # redeclare rng with same seed to test reproducibility
         rng = ak.random.default_rng(17)
         assert rng.normal(loc=10, scale=2, size=10).to_list() == both_scalar
-        assert rng.normal(loc=ak.array([0, 10, 20]), scale=1, size=3).to_list() == scale_scalar
-        assert rng.normal(loc=10, scale=ak.array([1, 2, 3]), size=3).to_list() == loc_scalar
         assert (
-            rng.normal(loc=ak.array([0, 10, 20]), scale=ak.array([1, 2, 3]), size=3).to_list()
+            rng.normal(loc=ak.array([0, 10, 20]), scale=1, size=3).to_list()
+            == scale_scalar
+        )
+        assert (
+            rng.normal(loc=10, scale=ak.array([1, 2, 3]), size=3).to_list()
+            == loc_scalar
+        )
+        assert (
+            rng.normal(
+                loc=ak.array([0, 10, 20]), scale=ak.array([1, 2, 3]), size=3
+            ).to_list()
             == both_array
         )
 
@@ -279,8 +298,12 @@ def test_exponential(self):
 
         # reset rng with same seed and ensure we get same results
         rng = ak.random.default_rng(17)
-        assert rng.exponential(scale=scal_scale, size=num_samples).to_list() == scal_sample
-        assert rng.exponential(scale=arr_scale, size=num_samples).to_list() == arr_sample
+        assert (
+            rng.exponential(scale=scal_scale, size=num_samples).to_list() == scal_sample
+        )
+        assert (
+            rng.exponential(scale=arr_scale, size=num_samples).to_list() == arr_sample
+        )
 
     def test_choice_hypothesis_testing(self):
         # perform a weighted sample and use chisquare to test
@@ -359,7 +382,9 @@ def test_lognormal_hypothesis_testing(self, method):
 
         mean = rng.uniform(-10, 10)
         deviation = rng.uniform(0, 10)
-        sample = rng.lognormal(mean=mean, sigma=deviation, size=num_samples, method=method)
+        sample = rng.lognormal(
+            mean=mean, sigma=deviation, size=num_samples, method=method
+        )
 
         log_sample_list = np.log(sample.to_ndarray()).tolist()
 
@@ -371,7 +396,9 @@ def test_lognormal_hypothesis_testing(self, method):
 
         # second goodness of fit test against the distribution with proper mean and std
         good_fit_res = sp_stats.goodness_of_fit(
-            sp_stats.norm, log_sample_list, known_params={"loc": mean, "scale": deviation}
+            sp_stats.norm,
+            log_sample_list,
+            known_params={"loc": mean, "scale": deviation},
         )
         assert good_fit_res.pvalue > 0.05
 
@@ -521,10 +548,32 @@ def test_legacy_randint_with_seed(self):
         ] == values.to_list()
 
         values = ak.random.randint(1, 5, 10, dtype=ak.bool_, seed=2)
-        assert [False, True, True, True, True, False, True, True, True, True] == values.to_list()
+        assert [
+            False,
+            True,
+            True,
+            True,
+            True,
+            False,
+            True,
+            True,
+            True,
+            True,
+        ] == values.to_list()
 
         values = ak.random.randint(1, 5, 10, dtype=bool, seed=2)
-        assert [False, True, True, True, True, False, True, True, True, True] == values.to_list()
+        assert [
+            False,
+            True,
+            True,
+            True,
+            True,
+            False,
+            True,
+            True,
+            True,
+            True,
+        ] == values.to_list()
 
         # Test that int_scalars covers uint8, uint16, uint32
         ak.random.randint(np.uint8(1), np.uint32(5), np.uint16(10), seed=np.uint8(2))
@@ -542,12 +591,16 @@ def test_legacy_uniform(self):
 
         uArray = ak.random.uniform(size=3, low=0, high=5, seed=0)
         assert np.allclose(
-            [0.30013431967121934, 0.47383036230759112, 1.0441791878997098], uArray.to_list()
+            [0.30013431967121934, 0.47383036230759112, 1.0441791878997098],
+            uArray.to_list(),
         )
 
-        uArray = ak.random.uniform(size=np.int64(3), low=np.int64(0), high=np.int64(5), seed=np.int64(0))
+        uArray = ak.random.uniform(
+            size=np.int64(3), low=np.int64(0), high=np.int64(5), seed=np.int64(0)
+        )
         assert np.allclose(
-            [0.30013431967121934, 0.47383036230759112, 1.0441791878997098], uArray.to_list()
+            [0.30013431967121934, 0.47383036230759112, 1.0441791878997098],
+            uArray.to_list(),
         )
 
         with pytest.raises(TypeError):