From 647161b30c8d3773f0210620b77e117f07233fe2 Mon Sep 17 00:00:00 2001 From: Pierce Hayes Date: Fri, 23 Feb 2024 17:10:28 -0500 Subject: [PATCH 1/2] Closes #2993: Create random subfolder and foundation for generator This PR (closes #2993) adds a random subfolder, moves the random generation methods from pdarraycreation under it, and adds the foundations for a generator class. I left the function declarations in pdarraycreation and just have them call the ones in `random` to avoid breaking existing code since `randint` is very widely used. I'm fine with leaving them indefinitely or deprecating them, I don't have strong feelings either way. Future work: Right now a generator method called multiple times always gives the same answer, which doesn't match what numpy does. I think if we add a generator type to the symbol table we can solve that issue --- arkouda/__init__.py | 1 + arkouda/pdarraycreation.py | 41 +----- arkouda/random/__init__.py | 9 ++ arkouda/random/_generator.py | 234 +++++++++++++++++++++++++++++++++++ arkouda/random/_legacy.py | 216 ++++++++++++++++++++++++++++++++ 5 files changed, 465 insertions(+), 36 deletions(-) create mode 100644 arkouda/random/__init__.py create mode 100644 arkouda/random/_generator.py create mode 100644 arkouda/random/_legacy.py diff --git a/arkouda/__init__.py b/arkouda/__init__.py index 5661ed292d..f64e53aebf 100644 --- a/arkouda/__init__.py +++ b/arkouda/__init__.py @@ -41,3 +41,4 @@ ) from arkouda.akscipy.special import * from arkouda.akscipy import * +from arkouda.random import * diff --git a/arkouda/pdarraycreation.py b/arkouda/pdarraycreation.py index 9d7138b809..db32cf1da3 100755 --- a/arkouda/pdarraycreation.py +++ b/arkouda/pdarraycreation.py @@ -954,36 +954,9 @@ def randint( >>> ak.randint(1, 5, 10, dtype=ak.bool, seed=2) array([False, True, True, True, True, False, True, True, True, True]) """ - shape: Union[int_scalars, Tuple[int_scalars, ...]] = 1 - if isinstance(size, tuple): - shape = cast(Tuple, size) - full_size = 1 - for s in cast(Tuple, shape): - full_size *= s - ndim = len(shape) - else: - full_size = cast(int, size) - shape = full_size - ndim = 1 - - if full_size < 0 or ndim < 1 or high < low: - raise ValueError("size must be >= 0, ndim >= 1, and high >= low") - dtype = akdtype(dtype) # normalize dtype - # check dtype for error - if dtype.name not in DTypes: - raise TypeError(f"unsupported dtype {dtype}") + from arkouda.random import randint - repMsg = generic_msg( - cmd=f"randint{ndim}D", - args={ - "shape": shape, - "dtype": dtype.name, - "low": NUMBER_FORMAT_STRINGS[dtype.name].format(low), - "high": NUMBER_FORMAT_STRINGS[dtype.name].format(high), - "seed": seed, - }, - ) - return create_pdarray(repMsg) + return randint(low=low, high=high, size=size, dtype=dtype, seed=seed) @typechecked @@ -1076,13 +1049,9 @@ def standard_normal(size: int_scalars, seed: Union[None, int_scalars] = None) -> >>> ak.standard_normal(3,1) array([-0.68586185091150265, 1.1723810583573375, 0.567584107142031]) """ - if size < 0: - raise ValueError("The size parameter must be > 0") - return create_pdarray( - generic_msg( - cmd="randomNormal", args={"size": NUMBER_FORMAT_STRINGS["int64"].format(size), "seed": seed} - ) - ) + from arkouda.random import standard_normal + + return standard_normal(size=size, seed=seed) @typechecked diff --git a/arkouda/random/__init__.py b/arkouda/random/__init__.py new file mode 100644 index 0000000000..ceaf5da6eb --- /dev/null +++ b/arkouda/random/__init__.py @@ -0,0 +1,9 @@ +from ._generator import Generator, default_rng +from ._legacy import * + +__all__ = [ + 'Generator', + 'randint', + 'standard_normal', + 'uniform', +] diff --git a/arkouda/random/_generator.py b/arkouda/random/_generator.py new file mode 100644 index 0000000000..96a4e6d3d7 --- /dev/null +++ b/arkouda/random/_generator.py @@ -0,0 +1,234 @@ +import numpy.random as np_random +from arkouda.dtypes import int64 as akint64 +from arkouda.dtypes import dtype as to_numpy_dtype + + +class Generator: + """ + ``Generator`` exposes a number of methods for generating random + numbers drawn from a variety of probability distributions. In addition to + the distribution-specific arguments, each method takes a keyword argument + `size` that defaults to ``None``. If `size` is ``None``, then a single + value is generated and returned. If `size` is an integer, then a 1-D + array filled with generated values is returned. + + Parameters + ---------- + seed : int + Seed to allow for reproducible random number generation. + + See Also + -------- + default_rng : Recommended constructor for `Generator`. + """ + + def __init__(self, seed=None): + self._seed = seed + self._np_generator = np_random.default_rng(seed) + + def __repr__(self): + return self.__str__() + + def __str__(self): + _str = self.__class__.__name__ + # be sure to update if we add support for non-pcg generators + _str += "(PCG64)" + return _str + + def integers(self, low, high=None, size=None, dtype=akint64, endpoint=False): + """ + Return random integers from low (inclusive) to high (exclusive), + or if endpoint=True, low (inclusive) to high (inclusive). + + Return random integers from the “discrete uniform” distribution of the specified dtype. + If high is None (the default), then results are from 0 to low. + + Parameters + ---------- + low: numeric_scalars + Lowest (signed) integers to be drawn from the distribution (unless high=None, + in which case this parameter is 0 and this value is used for high). + + high: numeric_scalars + If provided, one above the largest (signed) integer to be drawn from the distribution + (see above for behavior if high=None) + + size: numeric_scalars + Output shape. Default is None, in which case a single value is returned. + + dtype: dtype, optional + Desired dtype of the result. The default value is ak.int64. + + endpoint: bool, optional + If true, sample from the interval [low, high] instead of the default [low, high). Defaults to False + + Returns + ------- + pdarray, numeric_scalar + Values drawn uniformly from the specified range having the desired dtype, + or a single such random int if size not provided. + + Examples + -------- + >>> rng = ak.random.default_rng() + >>> rng.integers(5, 20, 10) + array([15, 13, 10, 8, 5, 18, 16, 14, 7, 13]) # random + >>> rng.integers(5, size=10) + array([2, 4, 0, 0, 0, 3, 1, 5, 5, 3]) # random + """ + from arkouda.random._legacy import randint + + if size is None: + # delegate to numpy when return size is 1 + return self._np_generator.integers( + low=low, high=high, dtype=to_numpy_dtype(dtype), endpoint=endpoint + ) + if high is None: + high = low + 1 + low = 0 + elif endpoint: + high = high + 1 + return randint(low=low, high=high, size=size, dtype=dtype, seed=self._seed) + + def random(self, size=None): + """ + Return random floats in the half-open interval [0.0, 1.0). + + Results are from the uniform distribution over the stated interval. + + Parameters + ---------- + size: numeric_scalars, optional + Output shape. Default is None, in which case a single value is returned. + + Returns + ------- + pdarray + Pdarray of random floats (unless size=None, in which case a single float is returned). + + Notes + ----- + To sample over `[a,b)`, use uniform or multiply the output of random by `(b - a)` and add `a`: + + ``(b - a) * random() + a`` + + See Also + -------- + uniform + + Examples + -------- + >>> rng = ak.random.default_rng() + >>> rng.random() + 0.47108547995356098 # random + >>> rng.random(3) + array([0.055256829926011691, 0.62511314008006458, 0.16400145561571539]) # random + """ + if size is None: + # delegate to numpy when return size is 1 + return self._np_generator.random() + return self.uniform(low=0.0, high=1.0, size=size) + + def standard_normal(self, size=None): + """ + Draw samples from a standard Normal distribution (mean=0, stdev=1). + + Parameters + ---------- + size: numeric_scalars, optional + Output shape. Default is None, in which case a single value is returned. + + Returns + ------- + pdarray + Pdarray of floats (unless size=None, in which case a single float is returned). + + Notes + ----- + For random samples from :math:`N(\\mu, \\sigma^2)`, use: + + ``(sigma * standard_normal(size)) + mu`` + + + Examples + -------- + >>> rng = ak.random.default_rng() + >>> rng.standard_normal() + 2.1923875335537315 # random + >>> rng.standard_normal(3) + array([0.8797352989638163, -0.7085325853376141, 0.021728052940979934]) # random + """ + from arkouda.random._legacy import standard_normal + + if size is None: + # delegate to numpy when return size is 1 + return self._np_generator.standard_normal() + return standard_normal(size=size, seed=self._seed) + + def uniform(self, low=0.0, high=1.0, size=None): + """ + Draw samples from a uniform distribution. + + Samples are uniformly distributed over the half-open interval [low, high). + In other words, any value within the given interval is equally likely to be drawn by uniform. + + Parameters + ---------- + low: float, optional + Lower boundary of the output interval. All values generated will be greater than or equal to low. + The default value is 0. + + high: float, optional + Upper boundary of the output interval. All values generated will be less than high. + high must be greater than or equal to low. The default value is 1.0. + + size: numeric_scalars, optional + Output shape. Default is None, in which case a single value is returned. + + Returns + ------- + pdarray + Pdarray of floats (unless size=None, in which case a single float is returned). + + See Also + -------- + integers + random + + Examples + -------- + >>> rng = ak.random.default_rng() + >>> rng.uniform(-1, 1, 3) + array([0.030785499755523249, 0.08505865366367038, -0.38552048588998722]) # random + """ + from arkouda.random._legacy import uniform + + if size is None: + # delegate to numpy when return size is 1 + return self._np_generator.uniform(low=low, high=high) + return uniform(low=low, high=high, size=size, seed=self._seed) + + +def default_rng(seed=None): + """ + Construct a new Generator. + + Right now we only support PCG64, since this is what is available in chapel. + + Parameters + ---------- + seed: {None, int, Generator}, optional + A seed to initialize the `Generator`. If None, then the seed will + be generated by chapel in an implementation specific manner based on the current time. + This behavior is currently unstable and may change in the future. If an int, + then the value must be non-negative. If passed a `Generator`, it will be returned unaltered. + + Returns + ------- + Generator + The initialized generator object. + """ + if isinstance(seed, Generator): + # Pass through a Generator. + return seed + return Generator(seed) diff --git a/arkouda/random/_legacy.py b/arkouda/random/_legacy.py new file mode 100644 index 0000000000..d3e223d0f8 --- /dev/null +++ b/arkouda/random/_legacy.py @@ -0,0 +1,216 @@ +from typing import Union, Tuple, cast + +import numpy as np # type: ignore +import pandas as pd # type: ignore +from typeguard import typechecked + +from arkouda.client import generic_msg +from arkouda.dtypes import ( + NUMBER_FORMAT_STRINGS, + DTypes, +) +from arkouda.dtypes import dtype as akdtype +from arkouda.dtypes import int64 as akint64 +from arkouda.dtypes import ( + int_scalars, + numeric_scalars, +) +from arkouda.pdarrayclass import create_pdarray, pdarray + + +@typechecked +def randint( + low: numeric_scalars, + high: numeric_scalars, + size: Union[int_scalars, Tuple[int_scalars, ...]] = 1, + dtype=akint64, + seed: int_scalars = None, +) -> pdarray: + """ + Generate a pdarray of randomized int, float, or bool values in a + specified range bounded by the low and high parameters. + + Parameters + ---------- + low : numeric_scalars + The low value (inclusive) of the range + high : numeric_scalars + The high value (exclusive for int, inclusive for float) of the range + size : int_scalars + The length of the returned array + dtype : Union[int64, float64, bool] + The dtype of the array + seed : int_scalars + Seed to allow for reproducible random number generation + + + Returns + ------- + pdarray + Values drawn uniformly from the specified range having the desired dtype + + Raises + ------ + TypeError + Raised if dtype.name not in DTypes, size is not an int, low or high is + not an int or float, or seed is not an int + ValueError + Raised if size < 0 or if high < low + + Notes + ----- + Calling randint with dtype=float64 will result in uniform non-integral + floating point values. + + Ranges >= 2**64 in size is undefined behavior because + it exceeds the maximum value that can be stored on the server (uint64) + + Examples + -------- + >>> ak.randint(0, 10, 5) + array([5, 7, 4, 8, 3]) + + >>> ak.randint(0, 1, 3, dtype=ak.float64) + array([0.92176432277231968, 0.083130710959903542, 0.68894208386667544]) + + >>> ak.randint(0, 1, 5, dtype=ak.bool) + array([True, False, True, True, True]) + + >>> ak.randint(1, 5, 10, seed=2) + array([4, 3, 1, 3, 4, 4, 2, 4, 3, 2]) + + >>> ak.randint(1, 5, 3, dtype=ak.float64, seed=2) + array([2.9160772326374946, 4.353429832157099, 4.5392023718621486]) + + >>> ak.randint(1, 5, 10, dtype=ak.bool, seed=2) + array([False, True, True, True, True, False, True, True, True, True]) + """ + shape: Union[int_scalars, Tuple[int_scalars, ...]] = 1 + if isinstance(size, tuple): + shape = cast(Tuple, size) + full_size = 1 + for s in cast(Tuple, shape): + full_size *= s + ndim = len(shape) + else: + full_size = cast(int, size) + shape = full_size + ndim = 1 + + if full_size < 0 or ndim < 1 or high < low: + raise ValueError("size must be >= 0, ndim >= 1, and high >= low") + dtype = akdtype(dtype) # normalize dtype + # check dtype for error + if dtype.name not in DTypes: + raise TypeError(f"unsupported dtype {dtype}") + + repMsg = generic_msg( + cmd=f"randint{ndim}D", + args={ + "shape": shape, + "dtype": dtype.name, + "low": NUMBER_FORMAT_STRINGS[dtype.name].format(low), + "high": NUMBER_FORMAT_STRINGS[dtype.name].format(high), + "seed": seed, + }, + ) + return create_pdarray(repMsg) + + +@typechecked +def uniform( + size: int_scalars, + low: numeric_scalars = float(0.0), + high: numeric_scalars = 1.0, + seed: Union[None, int_scalars] = None, +) -> pdarray: + """ + Generate a pdarray with uniformly distributed random float values + in a specified range. + + Parameters + ---------- + low : float_scalars + The low value (inclusive) of the range, defaults to 0.0 + high : float_scalars + The high value (inclusive) of the range, defaults to 1.0 + size : int_scalars + The length of the returned array + seed : int_scalars, optional + Value used to initialize the random number generator + + Returns + ------- + pdarray, float64 + Values drawn uniformly from the specified range + + Raises + ------ + TypeError + Raised if dtype.name not in DTypes, size is not an int, or if + either low or high is not an int or float + ValueError + Raised if size < 0 or if high < low + + Notes + ----- + The logic for uniform is delegated to the ak.randint method which + is invoked with a dtype of float64 + + Examples + -------- + >>> ak.uniform(3) + array([0.92176432277231968, 0.083130710959903542, 0.68894208386667544]) + + >>> ak.uniform(size=3,low=0,high=5,seed=0) + array([0.30013431967121934, 0.47383036230759112, 1.0441791878997098]) + """ + return randint(low=low, high=high, size=size, dtype="float64", seed=seed) + + +@typechecked +def standard_normal(size: int_scalars, seed: Union[None, int_scalars] = None) -> pdarray: + """ + Draw real numbers from the standard normal distribution. + + Parameters + ---------- + size : int_scalars + The number of samples to draw (size of the returned array) + seed : int_scalars + Value used to initialize the random number generator + + Returns + ------- + pdarray, float64 + The array of random numbers + + Raises + ------ + TypeError + Raised if size is not an int + ValueError + Raised if size < 0 + + See Also + -------- + randint + + Notes + ----- + For random samples from :math:`N(\\mu, \\sigma^2)`, use: + + ``(sigma * standard_normal(size)) + mu`` + + Examples + -------- + >>> ak.standard_normal(3,1) + array([-0.68586185091150265, 1.1723810583573375, 0.567584107142031]) + """ + if size < 0: + raise ValueError("The size parameter must be > 0") + return create_pdarray( + generic_msg( + cmd="randomNormal", args={"size": NUMBER_FORMAT_STRINGS["int64"].format(size), "seed": seed} + ) + ) \ No newline at end of file From e276d37578c024100c06dff32801077b32a77618 Mon Sep 17 00:00:00 2001 From: Pierce Hayes Date: Tue, 27 Feb 2024 13:41:03 -0500 Subject: [PATCH 2/2] small changes to make flake8 happy --- arkouda/random/__init__.py | 3 +- arkouda/random/_generator.py | 10 ++-- arkouda/random/_legacy.py | 110 ++++++++++++++++------------------- 3 files changed, 59 insertions(+), 64 deletions(-) diff --git a/arkouda/random/__init__.py b/arkouda/random/__init__.py index ceaf5da6eb..58b5136cc1 100644 --- a/arkouda/random/__init__.py +++ b/arkouda/random/__init__.py @@ -1,8 +1,9 @@ from ._generator import Generator, default_rng -from ._legacy import * +from ._legacy import randint, standard_normal, uniform __all__ = [ 'Generator', + 'default_rng', 'randint', 'standard_normal', 'uniform', diff --git a/arkouda/random/_generator.py b/arkouda/random/_generator.py index 96a4e6d3d7..51c50fb5fc 100644 --- a/arkouda/random/_generator.py +++ b/arkouda/random/_generator.py @@ -1,6 +1,7 @@ import numpy.random as np_random -from arkouda.dtypes import int64 as akint64 + from arkouda.dtypes import dtype as to_numpy_dtype +from arkouda.dtypes import int64 as akint64 class Generator: @@ -60,7 +61,8 @@ def integers(self, low, high=None, size=None, dtype=akint64, endpoint=False): Desired dtype of the result. The default value is ak.int64. endpoint: bool, optional - If true, sample from the interval [low, high] instead of the default [low, high). Defaults to False + If true, sample from the interval [low, high] instead of the default [low, high). + Defaults to False Returns ------- @@ -175,8 +177,8 @@ def uniform(self, low=0.0, high=1.0, size=None): Parameters ---------- low: float, optional - Lower boundary of the output interval. All values generated will be greater than or equal to low. - The default value is 0. + Lower boundary of the output interval. All values generated will be greater than or + equal to low. The default value is 0. high: float, optional Upper boundary of the output interval. All values generated will be less than high. diff --git a/arkouda/random/_legacy.py b/arkouda/random/_legacy.py index d3e223d0f8..5d9f60bfd5 100644 --- a/arkouda/random/_legacy.py +++ b/arkouda/random/_legacy.py @@ -1,20 +1,12 @@ -from typing import Union, Tuple, cast +from typing import Tuple, Union, cast -import numpy as np # type: ignore -import pandas as pd # type: ignore from typeguard import typechecked from arkouda.client import generic_msg -from arkouda.dtypes import ( - NUMBER_FORMAT_STRINGS, - DTypes, -) +from arkouda.dtypes import NUMBER_FORMAT_STRINGS, DTypes from arkouda.dtypes import dtype as akdtype from arkouda.dtypes import int64 as akint64 -from arkouda.dtypes import ( - int_scalars, - numeric_scalars, -) +from arkouda.dtypes import int_scalars, numeric_scalars from arkouda.pdarrayclass import create_pdarray, pdarray @@ -117,6 +109,54 @@ def randint( return create_pdarray(repMsg) +@typechecked +def standard_normal(size: int_scalars, seed: Union[None, int_scalars] = None) -> pdarray: + """ + Draw real numbers from the standard normal distribution. + + Parameters + ---------- + size : int_scalars + The number of samples to draw (size of the returned array) + seed : int_scalars + Value used to initialize the random number generator + + Returns + ------- + pdarray, float64 + The array of random numbers + + Raises + ------ + TypeError + Raised if size is not an int + ValueError + Raised if size < 0 + + See Also + -------- + randint + + Notes + ----- + For random samples from :math:`N(\\mu, \\sigma^2)`, use: + + ``(sigma * standard_normal(size)) + mu`` + + Examples + -------- + >>> ak.standard_normal(3,1) + array([-0.68586185091150265, 1.1723810583573375, 0.567584107142031]) + """ + if size < 0: + raise ValueError("The size parameter must be > 0") + return create_pdarray( + generic_msg( + cmd="randomNormal", args={"size": NUMBER_FORMAT_STRINGS["int64"].format(size), "seed": seed} + ) + ) + + @typechecked def uniform( size: int_scalars, @@ -166,51 +206,3 @@ def uniform( array([0.30013431967121934, 0.47383036230759112, 1.0441791878997098]) """ return randint(low=low, high=high, size=size, dtype="float64", seed=seed) - - -@typechecked -def standard_normal(size: int_scalars, seed: Union[None, int_scalars] = None) -> pdarray: - """ - Draw real numbers from the standard normal distribution. - - Parameters - ---------- - size : int_scalars - The number of samples to draw (size of the returned array) - seed : int_scalars - Value used to initialize the random number generator - - Returns - ------- - pdarray, float64 - The array of random numbers - - Raises - ------ - TypeError - Raised if size is not an int - ValueError - Raised if size < 0 - - See Also - -------- - randint - - Notes - ----- - For random samples from :math:`N(\\mu, \\sigma^2)`, use: - - ``(sigma * standard_normal(size)) + mu`` - - Examples - -------- - >>> ak.standard_normal(3,1) - array([-0.68586185091150265, 1.1723810583573375, 0.567584107142031]) - """ - if size < 0: - raise ValueError("The size parameter must be > 0") - return create_pdarray( - generic_msg( - cmd="randomNormal", args={"size": NUMBER_FORMAT_STRINGS["int64"].format(size), "seed": seed} - ) - ) \ No newline at end of file