Skip to content

Commit

Permalink
update io_test and remove redundant gen_ranges
Browse files Browse the repository at this point in the history
  • Loading branch information
Pierce Hayes committed Aug 7, 2023
1 parent 401e0b2 commit f5864a1
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 65 deletions.
13 changes: 1 addition & 12 deletions PROTO_tests/tests/io_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -1197,18 +1197,7 @@ def test_overwrite_single_dset(self):
assert f1_size == f2_size

def test_snapshot(self):
# Once issue #2617 is resolved, update to df = ak.DataFrame(make_multi_dtype_dict())
df = ak.DataFrame(
{
"int_col": ak.arange(10),
"uint_col": ak.array([i + 2**63 for i in range(10)], dtype=ak.uint64),
"float_col": ak.linspace(-3.5, 3.5, 10),
"bool_col": ak.randint(0, 2, 10, dtype=ak.bool),
"bigint_col": ak.array([i + 2**200 for i in range(10)], dtype=ak.bigint),
"segarr_col": ak.SegArray(ak.arange(0, 20, 2), ak.randint(0, 3, 20)),
"str_col": ak.random_strings_uniform(0, 3, 10),
}
)
df = ak.DataFrame(make_multi_dtype_dict())
df_str_idx = df.copy()
df_str_idx._set_index([f"A{i}" for i in range(len(df))])
col_order = df.columns
Expand Down
37 changes: 28 additions & 9 deletions arkouda/join.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,8 @@
from arkouda.groupbyclass import GroupBy, broadcast
from arkouda.numeric import cumsum
from arkouda.pdarrayclass import create_pdarray, pdarray
from arkouda.pdarraycreation import arange, array, zeros
from arkouda.pdarraycreation import arange, array, ones, zeros
from arkouda.pdarraysetops import concatenate, in1d
from arkouda.segarray import gen_ranges as seg_gen_ranges
from arkouda.strings import Strings

__all__ = ["join_on_eq_with_dt", "gen_ranges", "compute_join_size"]
Expand Down Expand Up @@ -115,21 +114,22 @@ def join_on_eq_with_dt(
resIAttr, resJAttr = cast(str, repMsg).split("+")
resI = create_pdarray(resIAttr)
resJ = create_pdarray(resJAttr)
return (resI, resJ)
return resI, resJ


@typechecked
def gen_ranges(starts: pdarray, ends: pdarray) -> Tuple[pdarray, pdarray]:
def gen_ranges(starts, ends, stride=1):
"""
Generate a segmented array of variable-length, contiguous
ranges between pairs of start- and end-points.
Generate a segmented array of variable-length, contiguous ranges between pairs of
start- and end-points.
Parameters
----------
starts : pdarray, int64
The start value of each range
ends : pdarray, int64
The end value (exclusive) of each range
stride: int
Difference between successive elements of each range
Returns
-------
Expand All @@ -138,8 +138,27 @@ def gen_ranges(starts: pdarray, ends: pdarray) -> Tuple[pdarray, pdarray]:
ranges : pdarray, int64
The actual ranges, flattened into a single array
"""
# only maintain one version of gen_ranges
return seg_gen_ranges(starts, ends)
if starts.size != ends.size:
raise ValueError("starts and ends must be same length")
if starts.size == 0:
return zeros(0, dtype=akint64), zeros(0, dtype=akint64)
lengths = (ends - starts) // stride
if not (lengths >= 0).all():
raise ValueError("all ends must be greater than or equal to starts")
non_empty = lengths != 0
segs = cumsum(lengths) - lengths
totlen = lengths.sum()
slices = ones(totlen, dtype=akint64)
non_empty_starts = starts[non_empty]
non_empty_lengths = lengths[non_empty]
diffs = concatenate(
(
array([non_empty_starts[0]]),
non_empty_starts[1:] - non_empty_starts[:-1] - (non_empty_lengths[:-1] - 1) * stride,
)
)
slices[segs[non_empty]] = diffs
return segs, cumsum(slices)


@typechecked
Expand Down
45 changes: 1 addition & 44 deletions arkouda/segarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from arkouda.dtypes import isSupportedInt, str_
from arkouda.dtypes import uint64 as akuint64
from arkouda.groupbyclass import GroupBy, broadcast
from arkouda.join import gen_ranges
from arkouda.logger import getArkoudaLogger
from arkouda.numeric import cumsum
from arkouda.pdarrayclass import (
Expand All @@ -31,50 +32,6 @@
LEN_SUFFIX = "_lengths"


def gen_ranges(starts, ends, stride=1):
"""
Generate a segmented array of variable-length, contiguous ranges between pairs of
start- and end-points.
Parameters
----------
starts : pdarray, int64
The start value of each range
ends : pdarray, int64
The end value (exclusive) of each range
stride: int
Difference between successive elements of each range
Returns
-------
segments : pdarray, int64
The starting index of each range in the resulting array
ranges : pdarray, int64
The actual ranges, flattened into a single array
"""
if starts.size != ends.size:
raise ValueError("starts and ends must be same length")
if starts.size == 0:
return zeros(0, dtype=akint64), zeros(0, dtype=akint64)
lengths = (ends - starts) // stride
if not (lengths >= 0).all():
raise ValueError("all ends must be greater than or equal to starts")
non_empty = lengths != 0
segs = cumsum(lengths) - lengths
totlen = lengths.sum()
slices = ones(totlen, dtype=akint64)
non_empty_starts = starts[non_empty]
non_empty_lengths = lengths[non_empty]
diffs = concatenate(
(
array([non_empty_starts[0]]),
non_empty_starts[1:] - non_empty_starts[:-1] - (non_empty_lengths[:-1] - 1) * stride,
)
)
slices[segs[non_empty]] = diffs
return segs, cumsum(slices)


def _aggregator(func):
aggdoc = """
Aggregate values over each sub-array.
Expand Down

0 comments on commit f5864a1

Please sign in to comment.