diff --git a/arkouda/numpy/random/_generator.py b/arkouda/numpy/random/_generator.py index 654c8bc509..52ab31b3b9 100644 --- a/arkouda/numpy/random/_generator.py +++ b/arkouda/numpy/random/_generator.py @@ -101,7 +101,9 @@ def choice(self, a, size=None, replace=True, p=None): raise TypeError("choice only accepts a pdarray or int scalar.") if not replace and size > pop_size: - raise ValueError("Cannot take a larger sample than population when replace is False") + raise ValueError( + "Cannot take a larger sample than population when replace is False" + ) has_weights = p is not None if has_weights: @@ -267,7 +269,9 @@ def integers(self, low, high=None, size=None, dtype=akint64, endpoint=False): if size is None: # delegate to numpy when return size is 1 - return self._np_generator.integers(low=low, high=high, dtype=dtype, endpoint=endpoint) + return self._np_generator.integers( + low=low, high=high, dtype=dtype, endpoint=endpoint + ) if high is None: high = low @@ -343,7 +347,9 @@ def logistic(self, loc=0.0, scale=1.0, size=None): return self._np_generator.logistic(loc=loc, scale=scale, size=size) is_single_mu, mu = float_array_or_scalar_helper("logistic", "loc", loc, size) - is_single_scale, scale = float_array_or_scalar_helper("logistic", "scale", scale, size) + is_single_scale, scale = float_array_or_scalar_helper( + "logistic", "scale", scale, size + ) if (scale < 0).any() if isinstance(scale, pdarray) else scale < 0: raise TypeError("scale must be non-negative.") @@ -603,7 +609,7 @@ def shuffle(self, x): ) self._state += x.size - def permutation(self, x): + def permutation(self, x, method="Argsort"): """ Randomly permute a sequence, or return a permuted range. @@ -612,11 +618,23 @@ def permutation(self, x): x: int or pdarray If x is an integer, randomly permute ak.arange(x). If x is an array, make a copy and shuffle the elements randomly. + method: str = 'Argsort' + The method for generating the permutation. + Allowed values: 'FisherYates', 'Argsort' + If 'Argsort' is selected, the permutation will be generated by + an argsort performed on randomly generated floats. Returns ------- pdarray pdarray of permuted elements + + Raises + ------ + ValueError + Raised if method is not an allowed value. + TypeError + Raised if x is not of type int or pdarray. """ if _val_isinstance_of_union(x, int_scalars): is_domain_perm = True @@ -633,21 +651,32 @@ def permutation(self, x): else: raise TypeError("permutation only accepts a pdarray or int scalar.") - # we have to use the int version since we permute the domain - name = self._name_dict[to_numpy_dtype(akint64)] - rep_msg = generic_msg( - cmd=f"permutation<{dtype.name},{ndim}>", - args={ - "name": name, - "x": x, - "shape": shape, - "size": size, - "isDomPerm": is_domain_perm, - "state": self._state, - }, - ) - self._state += size - return create_pdarray(rep_msg) + if method.lower() == "fisheryates": + # we have to use the int version since we permute the domain + name = self._name_dict[to_numpy_dtype(akint64)] + rep_msg = generic_msg( + cmd=f"permutation<{dtype.name},{ndim}>", + args={ + "name": name, + "x": x, + "shape": shape, + "size": size, + "isDomPerm": is_domain_perm, + "state": self._state, + }, + ) + self._state += size + return create_pdarray(rep_msg) + elif method.lower() == "argsort": + from arkouda.sorting import argsort + + perm = argsort(self.random(size)) + if is_domain_perm: + return perm + else: + return x[perm] + else: + raise ValueError("method did not match allowed values: Serial, Argsort") def poisson(self, lam=1.0, size=None): r""" @@ -689,7 +718,9 @@ def poisson(self, lam=1.0, size=None): # delegate to numpy when return size is 1 return self._np_generator.poisson(lam, size) - is_single_lambda, lam = float_array_or_scalar_helper("poisson", "lam", lam, size) + is_single_lambda, lam = float_array_or_scalar_helper( + "poisson", "lam", lam, size + ) if (lam < 0).any() if isinstance(lam, pdarray) else lam < 0: raise TypeError("lam must be non-negative.") @@ -826,5 +857,7 @@ def float_array_or_scalar_helper(func_name, var_name, var, size): var = akcast(var, akfloat64) else: - raise TypeError(f"{func_name} only accepts a pdarray or float scalar for {var_name}") + raise TypeError( + f"{func_name} only accepts a pdarray or float scalar for {var_name}" + ) return is_scalar, var diff --git a/arkouda/sorting.py b/arkouda/sorting.py index 3904d89b80..7e721c7c1f 100644 --- a/arkouda/sorting.py +++ b/arkouda/sorting.py @@ -38,6 +38,10 @@ def argsort( ---------- pda : pdarray or Strings or Categorical The array to sort (int64, uint64, or float64) + algorithm : SortingAlgorithm + The algorithm to be used for sorting the array. + axis : int_scalars + The axis to sort over. Returns ------- @@ -64,6 +68,12 @@ def argsort( >>> perm = ak.argsort(a) >>> a[perm] array([0, 1, 1, 3, 4, 5, 7, 8, 8, 9]) + + >>> ak.argsort(a, ak.sorting.SortingAlgorithm["RadixSortLSD"]) + array([0 2 9 6 8 1 3 5 7 4]) + + >>> ak.argsort(a, ak.sorting.SortingAlgorithm["TwoArrayRadixSort"]) + array([0 2 9 6 8 1 3 5 7 4]) """ from arkouda.categorical import Categorical diff --git a/tests/numpy/random_test.py b/tests/numpy/random_test.py index 7d6f8ac13e..a535368877 100644 --- a/tests/numpy/random_test.py +++ b/tests/numpy/random_test.py @@ -87,7 +87,8 @@ def test_shuffle(self, data_type): assert check(pda, pda_prime, data_type) @pytest.mark.parametrize("data_type", INT_FLOAT) - def test_permutation(self, data_type): + @pytest.mark.parametrize("method", ["FisherYates", "Argsort"]) + def test_permutation(self, data_type, method): # ints are checked for equality; floats are checked for closeness @@ -98,7 +99,7 @@ def test_permutation(self, data_type): # verify all the same elements are in the permutation as in the original rng = ak.random.default_rng(18) - range_permute = rng.permutation(20) + range_permute = rng.permutation(20, method=method) assert (ak.arange(20) == ak.sort(range_permute)).all() # range is always int # verify same seed gives reproducible arrays @@ -106,13 +107,13 @@ def test_permutation(self, data_type): rng = ak.random.default_rng(18) rnfunc = rng.integers if data_type is ak.int64 else rng.uniform pda = rnfunc(-(2**32), 2**32, 10) - permuted = rng.permutation(pda) + permuted = rng.permutation(pda, method=method) assert check(ak.sort(pda), ak.sort(permuted), data_type) # verify same seed gives reproducible permutations rng = ak.random.default_rng(18) - same_seed_range_permute = rng.permutation(20) + same_seed_range_permute = rng.permutation(20, method=method) assert check(range_permute, same_seed_range_permute, data_type) # verify all the same elements are in permutation as in the original @@ -120,7 +121,7 @@ def test_permutation(self, data_type): rng = ak.random.default_rng(18) rnfunc = rng.integers if data_type is ak.int64 else rng.uniform pda_p = rnfunc(-(2**32), 2**32, 10) - permuted_p = rng.permutation(pda_p) + permuted_p = rng.permutation(pda_p, method=method) assert check(ak.sort(pda_p), ak.sort(permuted_p), data_type) def test_uniform(self): @@ -205,7 +206,10 @@ def test_logistic(self): log_sample = rng.logistic(loc=loc, scale=scale, size=num_samples).to_list() rng = ak.random.default_rng(17) - assert rng.logistic(loc=loc, scale=scale, size=num_samples).to_list() == log_sample + assert ( + rng.logistic(loc=loc, scale=scale, size=num_samples).to_list() + == log_sample + ) def test_lognormal(self): scal = 2 @@ -214,25 +218,40 @@ def test_lognormal(self): for mean, sigma in product([scal, arr], [scal, arr]): rng = ak.random.default_rng(17) num_samples = 5 - log_sample = rng.lognormal(mean=mean, sigma=sigma, size=num_samples).to_list() + log_sample = rng.lognormal( + mean=mean, sigma=sigma, size=num_samples + ).to_list() rng = ak.random.default_rng(17) - assert rng.lognormal(mean=mean, sigma=sigma, size=num_samples).to_list() == log_sample + assert ( + rng.lognormal(mean=mean, sigma=sigma, size=num_samples).to_list() + == log_sample + ) def test_normal(self): rng = ak.random.default_rng(17) both_scalar = rng.normal(loc=10, scale=2, size=10).to_list() scale_scalar = rng.normal(loc=ak.array([0, 10, 20]), scale=1, size=3).to_list() loc_scalar = rng.normal(loc=10, scale=ak.array([1, 2, 3]), size=3).to_list() - both_array = rng.normal(loc=ak.array([0, 10, 20]), scale=ak.array([1, 2, 3]), size=3).to_list() + both_array = rng.normal( + loc=ak.array([0, 10, 20]), scale=ak.array([1, 2, 3]), size=3 + ).to_list() # redeclare rng with same seed to test reproducibility rng = ak.random.default_rng(17) assert rng.normal(loc=10, scale=2, size=10).to_list() == both_scalar - assert rng.normal(loc=ak.array([0, 10, 20]), scale=1, size=3).to_list() == scale_scalar - assert rng.normal(loc=10, scale=ak.array([1, 2, 3]), size=3).to_list() == loc_scalar assert ( - rng.normal(loc=ak.array([0, 10, 20]), scale=ak.array([1, 2, 3]), size=3).to_list() + rng.normal(loc=ak.array([0, 10, 20]), scale=1, size=3).to_list() + == scale_scalar + ) + assert ( + rng.normal(loc=10, scale=ak.array([1, 2, 3]), size=3).to_list() + == loc_scalar + ) + assert ( + rng.normal( + loc=ak.array([0, 10, 20]), scale=ak.array([1, 2, 3]), size=3 + ).to_list() == both_array ) @@ -279,8 +298,12 @@ def test_exponential(self): # reset rng with same seed and ensure we get same results rng = ak.random.default_rng(17) - assert rng.exponential(scale=scal_scale, size=num_samples).to_list() == scal_sample - assert rng.exponential(scale=arr_scale, size=num_samples).to_list() == arr_sample + assert ( + rng.exponential(scale=scal_scale, size=num_samples).to_list() == scal_sample + ) + assert ( + rng.exponential(scale=arr_scale, size=num_samples).to_list() == arr_sample + ) def test_choice_hypothesis_testing(self): # perform a weighted sample and use chisquare to test @@ -359,7 +382,9 @@ def test_lognormal_hypothesis_testing(self, method): mean = rng.uniform(-10, 10) deviation = rng.uniform(0, 10) - sample = rng.lognormal(mean=mean, sigma=deviation, size=num_samples, method=method) + sample = rng.lognormal( + mean=mean, sigma=deviation, size=num_samples, method=method + ) log_sample_list = np.log(sample.to_ndarray()).tolist() @@ -371,7 +396,9 @@ def test_lognormal_hypothesis_testing(self, method): # second goodness of fit test against the distribution with proper mean and std good_fit_res = sp_stats.goodness_of_fit( - sp_stats.norm, log_sample_list, known_params={"loc": mean, "scale": deviation} + sp_stats.norm, + log_sample_list, + known_params={"loc": mean, "scale": deviation}, ) assert good_fit_res.pvalue > 0.05 @@ -521,10 +548,32 @@ def test_legacy_randint_with_seed(self): ] == values.to_list() values = ak.random.randint(1, 5, 10, dtype=ak.bool_, seed=2) - assert [False, True, True, True, True, False, True, True, True, True] == values.to_list() + assert [ + False, + True, + True, + True, + True, + False, + True, + True, + True, + True, + ] == values.to_list() values = ak.random.randint(1, 5, 10, dtype=bool, seed=2) - assert [False, True, True, True, True, False, True, True, True, True] == values.to_list() + assert [ + False, + True, + True, + True, + True, + False, + True, + True, + True, + True, + ] == values.to_list() # Test that int_scalars covers uint8, uint16, uint32 ak.random.randint(np.uint8(1), np.uint32(5), np.uint16(10), seed=np.uint8(2)) @@ -542,12 +591,16 @@ def test_legacy_uniform(self): uArray = ak.random.uniform(size=3, low=0, high=5, seed=0) assert np.allclose( - [0.30013431967121934, 0.47383036230759112, 1.0441791878997098], uArray.to_list() + [0.30013431967121934, 0.47383036230759112, 1.0441791878997098], + uArray.to_list(), ) - uArray = ak.random.uniform(size=np.int64(3), low=np.int64(0), high=np.int64(5), seed=np.int64(0)) + uArray = ak.random.uniform( + size=np.int64(3), low=np.int64(0), high=np.int64(5), seed=np.int64(0) + ) assert np.allclose( - [0.30013431967121934, 0.47383036230759112, 1.0441791878997098], uArray.to_list() + [0.30013431967121934, 0.47383036230759112, 1.0441791878997098], + uArray.to_list(), ) with pytest.raises(TypeError):