Skip to content

Commit

Permalink
Closes #3699 zeros, ones, full to return Array (#3701)
Browse files Browse the repository at this point in the history
* Closes #3699 zeros, ones, full to return Array

Co-authored-by: drculhane <[email protected]>

* remove reference to array_api

* combine duplicate functions

* fix circular import

---------

Co-authored-by: Amanda Potts <[email protected]>
Co-authored-by: drculhane <[email protected]>
  • Loading branch information
3 people authored Aug 28, 2024
1 parent 50541d0 commit 78633dd
Show file tree
Hide file tree
Showing 5 changed files with 156 additions and 111 deletions.
86 changes: 50 additions & 36 deletions arkouda/pdarraycreation.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import pandas as pd
from typeguard import typechecked

from arkouda.client import generic_msg
from arkouda.client import generic_msg, get_max_array_rank
from arkouda.numpy.dtypes import (
NUMBER_FORMAT_STRINGS,
DTypes,
Expand All @@ -27,6 +27,7 @@
from arkouda.pdarrayclass import create_pdarray, pdarray
from arkouda.strings import Strings


__all__ = [
"array",
"zeros",
Expand Down Expand Up @@ -204,7 +205,6 @@ def array(
>>> type(strings)
<class 'arkouda.strings.Strings'>
"""
# Removed @typechecked to prevent cyclic dependencies with ArrayView
from arkouda.numeric import cast as akcast

# If a is already a pdarray, do nothing
Expand Down Expand Up @@ -232,16 +232,14 @@ def array(
except (RuntimeError, TypeError, ValueError):
raise TypeError("a must be a pdarray, np.ndarray, or convertible to a numpy array")

# Return multi-dimensional arrayview
if a.ndim != 1:
# TODO add order
if a.dtype.name in NumericDTypes:
flat_a = array(a.flatten(), dtype=dtype)
if isinstance(flat_a, pdarray):
# break into parts so mypy doesn't think we're calling reshape on a Strings
return flat_a.reshape(a.shape)
else:
raise TypeError("Must be an iterable or have a numeric DType")
# Return multi-dimensional pdarray if a.ndim <= get_max_array_rank()
# otherwise raise an error

if a.ndim != 1 and a.dtype.name not in NumericDTypes:
raise TypeError("Must be an iterable or have a numeric DType")

if a.ndim > get_max_array_rank():
raise ValueError(f"array rank {a.ndim} exceeds maximum of {get_max_array_rank()}")

# Check if array of strings
# if a.dtype == numpy.object_ need to check first element
Expand Down Expand Up @@ -285,9 +283,11 @@ def array(
except TypeError:
raise RuntimeError(f"Unhandled dtype {a.dtype}")
else:
from arkouda.util import _infer_shape_from_size
shape, ndim, full_size = _infer_shape_from_size(a.shape)

# Do not allow arrays that are too large
size = a.size
if (size * a.itemsize) > maxTransferBytes:
if (full_size * a.itemsize) > maxTransferBytes:
raise RuntimeError(
"Array exceeds allowed transfer size. Increase ak.client.maxTransferBytes to allow"
)
Expand All @@ -297,8 +297,8 @@ def array(
# native endian bytes
aview = _array_memview(a)
rep_msg = generic_msg(
cmd=f"array<{a.dtype.name},1>",
args={"shape": size},
cmd=f"array<{a.dtype.name},{ndim}>",
args={"dtype": a.dtype.name, "shape": tuple(a.shape), "seg_string": False},
payload=aview,
send_binary=True,
)
Expand Down Expand Up @@ -430,7 +430,7 @@ def bigint_from_uint_arrays(arrays, max_bits=-1):

@typechecked
def zeros(
size: Union[int_scalars, str],
size: Union[int_scalars, Tuple[int_scalars, ...], str],
dtype: Union[np.dtype, type, str, bigint] = float64,
max_bits: Optional[int] = None,
) -> pdarray:
Expand Down Expand Up @@ -472,20 +472,25 @@ def zeros(
>>> ak.zeros(5, dtype=ak.bool_)
array([False, False, False, False, False])
"""
if not np.isscalar(size):
raise TypeError(f"size must be a scalar, not {size.__class__.__name__}")
dtype_name = akdtype(dtype).name
dtype = akdtype(dtype) # normalize dtype
dtype_name = dtype.name if isinstance(dtype, bigint) else cast(np.dtype, dtype).name
# check dtype for error
if dtype_name not in NumericDTypes:
raise TypeError(f"unsupported dtype {akdtype(dtype)}")
repMsg = generic_msg(cmd=f"create<{dtype_name},1>", args={"shape": size})
raise TypeError(f"unsupported dtype {dtype}")
from arkouda.util import _infer_shape_from_size
shape, ndim, full_size = _infer_shape_from_size(size)

if ndim > get_max_array_rank():
raise ValueError(f"array rank {ndim} exceeds maximum of {get_max_array_rank()}")

repMsg = generic_msg(cmd=f"create<{dtype_name},{ndim}>", args={"shape": shape})

return create_pdarray(repMsg, max_bits=max_bits)


@typechecked
def ones(
size: Union[int_scalars, str],
size: Union[int_scalars, Tuple[int_scalars, ...], str],
dtype: Union[np.dtype, type, str, bigint] = float64,
max_bits: Optional[int] = None,
) -> pdarray:
Expand Down Expand Up @@ -527,14 +532,18 @@ def ones(
>>> ak.ones(5, dtype=ak.bool_)
array([True, True, True, True, True])
"""
if not np.isscalar(size):
raise TypeError(f"size must be a scalar, not {size.__class__.__name__}")
dtype = akdtype(dtype) # normalize dtype
dtype_name = dtype.name if isinstance(dtype, bigint) else cast(np.dtype, dtype).name
# check dtype for error
if dtype_name not in NumericDTypes:
raise TypeError(f"unsupported dtype {dtype}")
repMsg = generic_msg(cmd=f"create<{dtype_name},1>", args={"shape": size})
from arkouda.util import _infer_shape_from_size
shape, ndim, full_size = _infer_shape_from_size(size)

if ndim > get_max_array_rank():
raise ValueError(f"array rank {ndim} exceeds maximum of {get_max_array_rank()}")

repMsg = generic_msg(cmd=f"create<{dtype_name},{ndim}>", args={"shape": shape})
a = create_pdarray(repMsg)
a.fill(1)
if max_bits:
Expand All @@ -544,7 +553,7 @@ def ones(

@typechecked
def full(
size: Union[int_scalars, str],
size: Union[int_scalars, Tuple[int_scalars, ...], str],
fill_value: Union[numeric_scalars, str],
dtype: Union[np.dtype, type, str, bigint] = float64,
max_bits: Optional[int] = None,
Expand Down Expand Up @@ -589,8 +598,6 @@ def full(
>>> ak.full(5, 5, dtype=ak.bool_)
array([True, True, True, True, True])
"""
if not np.isscalar(size):
raise TypeError(f"size must be a scalar, not {size.__class__.__name__}")
if isinstance(fill_value, str):
return _full_string(size, fill_value)

Expand All @@ -599,18 +606,25 @@ def full(
# check dtype for error
if dtype_name not in NumericDTypes:
raise TypeError(f"unsupported dtype {dtype}")
repMsg = generic_msg(cmd=f"create<{dtype_name},1>", args={"shape": size})
from arkouda.util import _infer_shape_from_size
shape, ndim, full_size = _infer_shape_from_size(size)

if ndim > get_max_array_rank():
raise ValueError(f"array rank {ndim} exceeds maximum of {get_max_array_rank()}")

repMsg = generic_msg(cmd=f"create<{dtype_name},{ndim}>", args={"shape": shape})

a = create_pdarray(repMsg)
a.fill(fill_value)

if max_bits:
a.max_bits = max_bits
return a


@typechecked
def scalar_array(
value: numeric_scalars,
dtype: Optional[Union[np.dtype, type, str, bigint]] = None
value: numeric_scalars, dtype: Optional[Union[np.dtype, type, str, bigint]] = None
) -> pdarray:
"""
Create a pdarray from a single scalar value.
Expand Down Expand Up @@ -702,7 +716,7 @@ def zeros_like(pda: pdarray) -> pdarray:
>>> ak.zeros_like(zeros)
array([False, False, False, False, False])
"""
return zeros(pda.size, pda.dtype, pda.max_bits)
return zeros(tuple(pda.shape), pda.dtype, pda.max_bits)


@typechecked
Expand Down Expand Up @@ -749,11 +763,11 @@ def ones_like(pda: pdarray) -> pdarray:
>>> ak.ones_like(ones)
array([True, True, True, True, True])
"""
return ones(pda.size, pda.dtype, pda.max_bits)
return ones(tuple(pda.shape), pda.dtype, pda.max_bits)


@typechecked
def full_like(pda: pdarray, fill_value: numeric_scalars) -> pdarray:
def full_like(pda: pdarray, fill_value: numeric_scalars) -> Union[pdarray, Strings]:
"""
Create a pdarray filled with fill_value of the same size and dtype as an existing
pdarray.
Expand Down Expand Up @@ -798,7 +812,7 @@ def full_like(pda: pdarray, fill_value: numeric_scalars) -> pdarray:
>>> ak.full_like(full)
array([True, True, True, True, True])
"""
return full(pda.size, fill_value, pda.dtype, pda.max_bits)
return full(tuple(pda.shape), fill_value, pda.dtype, pda.max_bits)


def arange(*args, **kwargs) -> pdarray:
Expand Down
16 changes: 8 additions & 8 deletions arkouda/random/_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,13 +192,13 @@ def standard_exponential(self, size=None, method="zig"):
pdarray
Drawn samples from the standard exponential distribution.
"""
from arkouda.util import _calc_shape
from arkouda.util import _infer_shape_from_size

if size is None:
# delegate to numpy when return size is 1
return self._np_generator.standard_exponential(method=method)

shape, full_size, ndim = _calc_shape(size)
shape, ndim, full_size = _infer_shape_from_size(size)
if full_size < 0:
raise ValueError("The size parameter must be > 0")

Expand Down Expand Up @@ -257,7 +257,7 @@ def integers(self, low, high=None, size=None, dtype=akint64, endpoint=False):
>>> rng.integers(5, size=10)
array([2, 4, 0, 0, 0, 3, 1, 5, 5, 3]) # random
"""
from arkouda.util import _calc_shape
from arkouda.util import _infer_shape_from_size

# normalize dtype so things like "int" will work
dtype = to_numpy_dtype(dtype)
Expand All @@ -275,7 +275,7 @@ def integers(self, low, high=None, size=None, dtype=akint64, endpoint=False):
elif not endpoint:
high = high - 1

shape, full_size, ndim = _calc_shape(size)
shape, ndim, full_size = _infer_shape_from_size(size)
if full_size < 0:
raise ValueError("The size parameter must be > 0")

Expand Down Expand Up @@ -550,13 +550,13 @@ def standard_normal(self, size=None, method="zig"):
>>> rng.standard_normal(3)
array([0.8797352989638163, -0.7085325853376141, 0.021728052940979934]) # random
"""
from arkouda.util import _calc_shape
from arkouda.util import _infer_shape_from_size

if size is None:
# delegate to numpy when return size is 1
return self._np_generator.standard_normal()

shape, full_size, ndim = _calc_shape(size)
shape, ndim, full_size = _infer_shape_from_size(size)
if full_size < 0:
raise ValueError("The size parameter must be > 0")

Expand Down Expand Up @@ -744,13 +744,13 @@ def uniform(self, low=0.0, high=1.0, size=None):
>>> rng.uniform(-1, 1, 3)
array([0.030785499755523249, 0.08505865366367038, -0.38552048588998722]) # random
"""
from arkouda.util import _calc_shape
from arkouda.util import _infer_shape_from_size

if size is None:
# delegate to numpy when return size is 1
return self._np_generator.uniform(low=low, high=high)

shape, full_size, ndim = _calc_shape(size)
shape, ndim, full_size = _infer_shape_from_size(size)
if full_size < 0:
raise ValueError("The size parameter must be > 0")

Expand Down
8 changes: 4 additions & 4 deletions arkouda/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,15 @@
from arkouda.categorical import Categorical
from arkouda.client import generic_msg, get_config, get_mem_used
from arkouda.client_dtypes import BitVector, BitVectorizer, IPv4
from arkouda.groupbyclass import GroupBy, broadcast
from arkouda.infoclass import list_registry
from arkouda.numpy.dtypes import (
_is_dtype_in_union,
dtype,
float_scalars,
int_scalars,
numeric_scalars,
)
from arkouda.groupbyclass import GroupBy, broadcast
from arkouda.infoclass import list_registry
from arkouda.pdarrayclass import create_pdarray, pdarray
from arkouda.pdarraycreation import arange
from arkouda.pdarraysetops import unique
Expand Down Expand Up @@ -610,7 +610,7 @@ def map(
raise TypeError("Map must be dict or arkouda.Series.")


def _calc_shape(size):
def _infer_shape_from_size(size):
shape: Union[int_scalars, Tuple[int_scalars, ...]] = 1
if isinstance(size, tuple):
shape = cast(Tuple, size)
Expand All @@ -622,4 +622,4 @@ def _calc_shape(size):
full_size = cast(int, size)
shape = full_size
ndim = 1
return shape, full_size, ndim
return shape, ndim, full_size
60 changes: 0 additions & 60 deletions tests/array_view_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,44 +18,6 @@


class TestArrayView:
@pytest.mark.parametrize("size", SIZE)
@pytest.mark.parametrize("num_type", NO_BOOL)
def test_mulitdimensional_array_creation(self, size, num_type):
n = np.array([[0, 0], [0, 1], [1, 1]])
a = ak.array([[0, 0], [0, 1], [1, 1]])
assert np.array_equal(n.tolist(), a.to_list())
n = np.arange(size).reshape(SHAPE[size])
a = ak.arange(size).reshape(SHAPE[size])
assert np.array_equal(n.tolist(), a.to_list())
n = np.arange(size, dtype=num_type).reshape(SHAPE[size])
a = ak.arange(size, dtype=num_type).reshape(SHAPE[size])
assert np.array_equal(n.tolist(), a.to_list())

def test_arrayview_int_indexing(self):
nd = np.arange(9).reshape(3, 3)
pd_reshape = ak.arange(9).reshape(3, 3)
pd_array = ak.array([[0, 1, 2], [3, 4, 5], [6, 7, 8]])

nd_ind = [nd[i, j] for (i, j) in product(range(3), repeat=2)]
reshape_ind = [pd_reshape[i, j] for (i, j) in product(range(3), repeat=2)]
array_ind = [pd_array[i, j] for (i, j) in product(range(3), repeat=2)]
assert nd_ind == reshape_ind
assert nd_ind == array_ind

with pytest.raises(IndexError):
# index out bounds (>= dimension)
# index 3 is out of bounds for axis 0 with size 3
pd_reshape[3, 1]
with pytest.raises(IndexError):
# index -4 is out of bounds for axis 1 with size 3
pd_reshape[2, -4]
with pytest.raises(IndexError):
# too many indicies for array: array is 2-dimensional, but 3 were indexed
pd_reshape[0, 1, 1]
with pytest.raises(ValueError):

# cannot reshape array of size 9 into shape (4,3)
ak.arange(9).reshape(4, 3)

@pytest.mark.parametrize("size", SIZE)
def test_int_list_indexing(self, size):
Expand Down Expand Up @@ -158,28 +120,6 @@ def test_basic_indexing(self):
assert n[0].tolist() == a[0].to_list()
assert n[0][2] == a[0][2]

n = np.array([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]])
a = ak.array([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]])
# n[0, 1:7:2].tolist() = [1, 3, 5]
assert n[0, 1:7:2].tolist() == a[0, 1:7:2].to_list()
# n[0, -2:10].tolist() = [8, 9]
assert n[0, -2:10].tolist() == a[0, -2:10].to_list()
# n[0, -3:3:-1].tolist() = [7, 6, 5, 4]
assert n[0, -3:3:-1].tolist() == a[0, -3:3:-1].to_list()
# n[0, 5:].tolist() = [5, 6, 7, 8, 9]
assert n[0, 5:].tolist() == a[0, 5:].to_list()

n = np.array([[[1], [2], [3]], [[4], [5], [6]]])
a = ak.array([[[1], [2], [3]], [[4], [5], [6]]])
# list(n.shape) = [2, 3, 1]
assert list(n.shape) == a.shape.to_list()
# n.tolist() = [[[1], [2], [3]], [[4], [5], [6]]]
assert n.tolist() == a.to_list()
assert n.__str__() == a.__str__()

# n[1:2].tolist() = [[[4], [5], [6]]]
assert n[1:2].tolist() == a[1:2].to_list()

def test_slicing(self):
a = ak.arange(30).reshape(2, 3, 5)
n = np.arange(30).reshape(2, 3, 5)
Expand Down
Loading

0 comments on commit 78633dd

Please sign in to comment.