Skip to content

Commit

Permalink
Merge branch 'master' into 3978_add_retry_on_installation_steps_in_CI
Browse files Browse the repository at this point in the history
  • Loading branch information
ajpotts authored Jan 27, 2025
2 parents bb3ebd2 + 4fa2e44 commit 470da66
Show file tree
Hide file tree
Showing 3 changed files with 138 additions and 42 deletions.
75 changes: 54 additions & 21 deletions arkouda/numpy/random/_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,9 @@ def choice(self, a, size=None, replace=True, p=None):
raise TypeError("choice only accepts a pdarray or int scalar.")

if not replace and size > pop_size:
raise ValueError("Cannot take a larger sample than population when replace is False")
raise ValueError(
"Cannot take a larger sample than population when replace is False"
)

has_weights = p is not None
if has_weights:
Expand Down Expand Up @@ -267,7 +269,9 @@ def integers(self, low, high=None, size=None, dtype=akint64, endpoint=False):

if size is None:
# delegate to numpy when return size is 1
return self._np_generator.integers(low=low, high=high, dtype=dtype, endpoint=endpoint)
return self._np_generator.integers(
low=low, high=high, dtype=dtype, endpoint=endpoint
)

if high is None:
high = low
Expand Down Expand Up @@ -343,7 +347,9 @@ def logistic(self, loc=0.0, scale=1.0, size=None):
return self._np_generator.logistic(loc=loc, scale=scale, size=size)

is_single_mu, mu = float_array_or_scalar_helper("logistic", "loc", loc, size)
is_single_scale, scale = float_array_or_scalar_helper("logistic", "scale", scale, size)
is_single_scale, scale = float_array_or_scalar_helper(
"logistic", "scale", scale, size
)
if (scale < 0).any() if isinstance(scale, pdarray) else scale < 0:
raise TypeError("scale must be non-negative.")

Expand Down Expand Up @@ -603,7 +609,7 @@ def shuffle(self, x):
)
self._state += x.size

def permutation(self, x):
def permutation(self, x, method="Argsort"):
"""
Randomly permute a sequence, or return a permuted range.
Expand All @@ -612,11 +618,23 @@ def permutation(self, x):
x: int or pdarray
If x is an integer, randomly permute ak.arange(x). If x is an array,
make a copy and shuffle the elements randomly.
method: str = 'Argsort'
The method for generating the permutation.
Allowed values: 'FisherYates', 'Argsort'
If 'Argsort' is selected, the permutation will be generated by
an argsort performed on randomly generated floats.
Returns
-------
pdarray
pdarray of permuted elements
Raises
------
ValueError
Raised if method is not an allowed value.
TypeError
Raised if x is not of type int or pdarray.
"""
if _val_isinstance_of_union(x, int_scalars):
is_domain_perm = True
Expand All @@ -633,21 +651,32 @@ def permutation(self, x):
else:
raise TypeError("permutation only accepts a pdarray or int scalar.")

# we have to use the int version since we permute the domain
name = self._name_dict[to_numpy_dtype(akint64)]
rep_msg = generic_msg(
cmd=f"permutation<{dtype.name},{ndim}>",
args={
"name": name,
"x": x,
"shape": shape,
"size": size,
"isDomPerm": is_domain_perm,
"state": self._state,
},
)
self._state += size
return create_pdarray(rep_msg)
if method.lower() == "fisheryates":
# we have to use the int version since we permute the domain
name = self._name_dict[to_numpy_dtype(akint64)]
rep_msg = generic_msg(
cmd=f"permutation<{dtype.name},{ndim}>",
args={
"name": name,
"x": x,
"shape": shape,
"size": size,
"isDomPerm": is_domain_perm,
"state": self._state,
},
)
self._state += size
return create_pdarray(rep_msg)
elif method.lower() == "argsort":
from arkouda.sorting import argsort

perm = argsort(self.random(size))
if is_domain_perm:
return perm
else:
return x[perm]
else:
raise ValueError("method did not match allowed values: Serial, Argsort")

def poisson(self, lam=1.0, size=None):
r"""
Expand Down Expand Up @@ -689,7 +718,9 @@ def poisson(self, lam=1.0, size=None):
# delegate to numpy when return size is 1
return self._np_generator.poisson(lam, size)

is_single_lambda, lam = float_array_or_scalar_helper("poisson", "lam", lam, size)
is_single_lambda, lam = float_array_or_scalar_helper(
"poisson", "lam", lam, size
)
if (lam < 0).any() if isinstance(lam, pdarray) else lam < 0:
raise TypeError("lam must be non-negative.")

Expand Down Expand Up @@ -826,5 +857,7 @@ def float_array_or_scalar_helper(func_name, var_name, var, size):

var = akcast(var, akfloat64)
else:
raise TypeError(f"{func_name} only accepts a pdarray or float scalar for {var_name}")
raise TypeError(
f"{func_name} only accepts a pdarray or float scalar for {var_name}"
)
return is_scalar, var
10 changes: 10 additions & 0 deletions arkouda/sorting.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ def argsort(
----------
pda : pdarray or Strings or Categorical
The array to sort (int64, uint64, or float64)
algorithm : SortingAlgorithm
The algorithm to be used for sorting the array.
axis : int_scalars
The axis to sort over.
Returns
-------
Expand All @@ -64,6 +68,12 @@ def argsort(
>>> perm = ak.argsort(a)
>>> a[perm]
array([0, 1, 1, 3, 4, 5, 7, 8, 8, 9])
>>> ak.argsort(a, ak.sorting.SortingAlgorithm["RadixSortLSD"])
array([0 2 9 6 8 1 3 5 7 4])
>>> ak.argsort(a, ak.sorting.SortingAlgorithm["TwoArrayRadixSort"])
array([0 2 9 6 8 1 3 5 7 4])
"""
from arkouda.categorical import Categorical

Expand Down
95 changes: 74 additions & 21 deletions tests/numpy/random_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,8 @@ def test_shuffle(self, data_type):
assert check(pda, pda_prime, data_type)

@pytest.mark.parametrize("data_type", INT_FLOAT)
def test_permutation(self, data_type):
@pytest.mark.parametrize("method", ["FisherYates", "Argsort"])
def test_permutation(self, data_type, method):

# ints are checked for equality; floats are checked for closeness

Expand All @@ -98,29 +99,29 @@ def test_permutation(self, data_type):
# verify all the same elements are in the permutation as in the original

rng = ak.random.default_rng(18)
range_permute = rng.permutation(20)
range_permute = rng.permutation(20, method=method)
assert (ak.arange(20) == ak.sort(range_permute)).all() # range is always int

# verify same seed gives reproducible arrays

rng = ak.random.default_rng(18)
rnfunc = rng.integers if data_type is ak.int64 else rng.uniform
pda = rnfunc(-(2**32), 2**32, 10)
permuted = rng.permutation(pda)
permuted = rng.permutation(pda, method=method)
assert check(ak.sort(pda), ak.sort(permuted), data_type)

# verify same seed gives reproducible permutations

rng = ak.random.default_rng(18)
same_seed_range_permute = rng.permutation(20)
same_seed_range_permute = rng.permutation(20, method=method)
assert check(range_permute, same_seed_range_permute, data_type)

# verify all the same elements are in permutation as in the original

rng = ak.random.default_rng(18)
rnfunc = rng.integers if data_type is ak.int64 else rng.uniform
pda_p = rnfunc(-(2**32), 2**32, 10)
permuted_p = rng.permutation(pda_p)
permuted_p = rng.permutation(pda_p, method=method)
assert check(ak.sort(pda_p), ak.sort(permuted_p), data_type)

def test_uniform(self):
Expand Down Expand Up @@ -205,7 +206,10 @@ def test_logistic(self):
log_sample = rng.logistic(loc=loc, scale=scale, size=num_samples).to_list()

rng = ak.random.default_rng(17)
assert rng.logistic(loc=loc, scale=scale, size=num_samples).to_list() == log_sample
assert (
rng.logistic(loc=loc, scale=scale, size=num_samples).to_list()
== log_sample
)

def test_lognormal(self):
scal = 2
Expand All @@ -214,25 +218,40 @@ def test_lognormal(self):
for mean, sigma in product([scal, arr], [scal, arr]):
rng = ak.random.default_rng(17)
num_samples = 5
log_sample = rng.lognormal(mean=mean, sigma=sigma, size=num_samples).to_list()
log_sample = rng.lognormal(
mean=mean, sigma=sigma, size=num_samples
).to_list()

rng = ak.random.default_rng(17)
assert rng.lognormal(mean=mean, sigma=sigma, size=num_samples).to_list() == log_sample
assert (
rng.lognormal(mean=mean, sigma=sigma, size=num_samples).to_list()
== log_sample
)

def test_normal(self):
rng = ak.random.default_rng(17)
both_scalar = rng.normal(loc=10, scale=2, size=10).to_list()
scale_scalar = rng.normal(loc=ak.array([0, 10, 20]), scale=1, size=3).to_list()
loc_scalar = rng.normal(loc=10, scale=ak.array([1, 2, 3]), size=3).to_list()
both_array = rng.normal(loc=ak.array([0, 10, 20]), scale=ak.array([1, 2, 3]), size=3).to_list()
both_array = rng.normal(
loc=ak.array([0, 10, 20]), scale=ak.array([1, 2, 3]), size=3
).to_list()

# redeclare rng with same seed to test reproducibility
rng = ak.random.default_rng(17)
assert rng.normal(loc=10, scale=2, size=10).to_list() == both_scalar
assert rng.normal(loc=ak.array([0, 10, 20]), scale=1, size=3).to_list() == scale_scalar
assert rng.normal(loc=10, scale=ak.array([1, 2, 3]), size=3).to_list() == loc_scalar
assert (
rng.normal(loc=ak.array([0, 10, 20]), scale=ak.array([1, 2, 3]), size=3).to_list()
rng.normal(loc=ak.array([0, 10, 20]), scale=1, size=3).to_list()
== scale_scalar
)
assert (
rng.normal(loc=10, scale=ak.array([1, 2, 3]), size=3).to_list()
== loc_scalar
)
assert (
rng.normal(
loc=ak.array([0, 10, 20]), scale=ak.array([1, 2, 3]), size=3
).to_list()
== both_array
)

Expand Down Expand Up @@ -279,8 +298,12 @@ def test_exponential(self):

# reset rng with same seed and ensure we get same results
rng = ak.random.default_rng(17)
assert rng.exponential(scale=scal_scale, size=num_samples).to_list() == scal_sample
assert rng.exponential(scale=arr_scale, size=num_samples).to_list() == arr_sample
assert (
rng.exponential(scale=scal_scale, size=num_samples).to_list() == scal_sample
)
assert (
rng.exponential(scale=arr_scale, size=num_samples).to_list() == arr_sample
)

def test_choice_hypothesis_testing(self):
# perform a weighted sample and use chisquare to test
Expand Down Expand Up @@ -359,7 +382,9 @@ def test_lognormal_hypothesis_testing(self, method):

mean = rng.uniform(-10, 10)
deviation = rng.uniform(0, 10)
sample = rng.lognormal(mean=mean, sigma=deviation, size=num_samples, method=method)
sample = rng.lognormal(
mean=mean, sigma=deviation, size=num_samples, method=method
)

log_sample_list = np.log(sample.to_ndarray()).tolist()

Expand All @@ -371,7 +396,9 @@ def test_lognormal_hypothesis_testing(self, method):

# second goodness of fit test against the distribution with proper mean and std
good_fit_res = sp_stats.goodness_of_fit(
sp_stats.norm, log_sample_list, known_params={"loc": mean, "scale": deviation}
sp_stats.norm,
log_sample_list,
known_params={"loc": mean, "scale": deviation},
)
assert good_fit_res.pvalue > 0.05

Expand Down Expand Up @@ -521,10 +548,32 @@ def test_legacy_randint_with_seed(self):
] == values.to_list()

values = ak.random.randint(1, 5, 10, dtype=ak.bool_, seed=2)
assert [False, True, True, True, True, False, True, True, True, True] == values.to_list()
assert [
False,
True,
True,
True,
True,
False,
True,
True,
True,
True,
] == values.to_list()

values = ak.random.randint(1, 5, 10, dtype=bool, seed=2)
assert [False, True, True, True, True, False, True, True, True, True] == values.to_list()
assert [
False,
True,
True,
True,
True,
False,
True,
True,
True,
True,
] == values.to_list()

# Test that int_scalars covers uint8, uint16, uint32
ak.random.randint(np.uint8(1), np.uint32(5), np.uint16(10), seed=np.uint8(2))
Expand All @@ -542,12 +591,16 @@ def test_legacy_uniform(self):

uArray = ak.random.uniform(size=3, low=0, high=5, seed=0)
assert np.allclose(
[0.30013431967121934, 0.47383036230759112, 1.0441791878997098], uArray.to_list()
[0.30013431967121934, 0.47383036230759112, 1.0441791878997098],
uArray.to_list(),
)

uArray = ak.random.uniform(size=np.int64(3), low=np.int64(0), high=np.int64(5), seed=np.int64(0))
uArray = ak.random.uniform(
size=np.int64(3), low=np.int64(0), high=np.int64(5), seed=np.int64(0)
)
assert np.allclose(
[0.30013431967121934, 0.47383036230759112, 1.0441791878997098], uArray.to_list()
[0.30013431967121934, 0.47383036230759112, 1.0441791878997098],
uArray.to_list(),
)

with pytest.raises(TypeError):
Expand Down

0 comments on commit 470da66

Please sign in to comment.