From 32c1171e864b4d33bfee8bed8a0de1641ce5ea32 Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Fri, 10 Jan 2025 08:24:19 -0600 Subject: [PATCH] fix: always check '.length' for 'unknown_length' (#3332) Co-authored-by: Ianna Osborne --- src/awkward/contents/bitmaskedarray.py | 9 ++++++--- src/awkward/contents/bytemaskedarray.py | 6 +++--- src/awkward/contents/indexedarray.py | 10 +++++----- src/awkward/contents/numpyarray.py | 9 ++++++--- src/awkward/contents/unionarray.py | 6 +++--- src/awkward/contents/unmaskedarray.py | 8 ++++---- src/awkward/operations/ak_firsts.py | 3 ++- src/awkward/record.py | 2 +- 8 files changed, 30 insertions(+), 23 deletions(-) diff --git a/src/awkward/contents/bitmaskedarray.py b/src/awkward/contents/bitmaskedarray.py index 24f552fde3..15a6ff3a78 100644 --- a/src/awkward/contents/bitmaskedarray.py +++ b/src/awkward/contents/bitmaskedarray.py @@ -614,14 +614,14 @@ def _numbers_to_type(self, name, including_unknown): return self.to_ByteMaskedArray()._numbers_to_type(name, including_unknown) def _is_unique(self, negaxis, starts, parents, outlength): - if self._mask.length == 0: + if self._mask.length is not unknown_length and self._mask.length == 0: return True return self.to_IndexedOptionArray64()._is_unique( negaxis, starts, parents, outlength ) def _unique(self, negaxis, starts, parents, outlength): - if self._mask.length == 0: + if self._mask.length is not unknown_length and self._mask.length == 0: return self out = self.to_IndexedOptionArray64()._unique( negaxis, starts, parents, outlength @@ -810,7 +810,10 @@ def to_packed(self, recursive: bool = True) -> Self: else: excess_length = int(math.ceil(self._length / 8.0)) - if self._mask.length == excess_length: + if ( + self._mask.length is not unknown_length + and self._mask.length == excess_length + ): mask = self._mask else: mask = self._mask[:excess_length] diff --git a/src/awkward/contents/bytemaskedarray.py b/src/awkward/contents/bytemaskedarray.py index 07165be525..8580ac4b9f 100644 --- a/src/awkward/contents/bytemaskedarray.py +++ b/src/awkward/contents/bytemaskedarray.py @@ -684,7 +684,7 @@ def _offsets_and_flattened(self, axis: int, depth: int) -> tuple[Index, Content] offsets, flattened = next._offsets_and_flattened(axis, depth) - if offsets.length == 0: + if offsets.length is not unknown_length and offsets.length == 0: return ( offsets, ak.contents.IndexedOptionArray( @@ -790,14 +790,14 @@ def _numbers_to_type(self, name, including_unknown): ) def _is_unique(self, negaxis, starts, parents, outlength): - if self._mask.length == 0: + if self._mask.length is not unknown_length and self._mask.length == 0: return True return self.to_IndexedOptionArray64()._is_unique( negaxis, starts, parents, outlength ) def _unique(self, negaxis, starts, parents, outlength): - if self._mask.length == 0: + if self._mask.length is not unknown_length and self._mask.length == 0: return self return self.to_IndexedOptionArray64()._unique( negaxis, starts, parents, outlength diff --git a/src/awkward/contents/indexedarray.py b/src/awkward/contents/indexedarray.py index 4edc24ac07..6ddcf3417c 100644 --- a/src/awkward/contents/indexedarray.py +++ b/src/awkward/contents/indexedarray.py @@ -13,7 +13,7 @@ from awkward._nplikes.numpy import Numpy from awkward._nplikes.numpy_like import IndexType, NumpyMetadata from awkward._nplikes.placeholder import PlaceholderArray -from awkward._nplikes.shape import ShapeItem +from awkward._nplikes.shape import ShapeItem, unknown_length from awkward._nplikes.typetracer import TypeTracer from awkward._parameters import ( parameters_intersect, @@ -781,7 +781,7 @@ def _numbers_to_type(self, name, including_unknown): ) def _is_unique(self, negaxis, starts, parents, outlength): - if self._index.length == 0: + if self._index.length is not unknown_length and self._index.length == 0: return True nextindex = self._unique_index(self._index) @@ -793,7 +793,7 @@ def _is_unique(self, negaxis, starts, parents, outlength): return next._is_unique(negaxis, starts, parents, outlength) def _unique(self, negaxis, starts, parents, outlength): - if self._index.length == 0: + if self._index.length is not unknown_length and self._index.length == 0: return self branch, depth = self.branch_depth @@ -1044,7 +1044,7 @@ def _to_arrow( return out else: - if self._content.length == 0: + if self._content.length is not unknown_length and self._content.length == 0: # IndexedOptionArray._to_arrow replaces -1 in the index with 0. So behind # every masked value is self._content[0], unless self._content.length == 0. # In that case, don't call self._content[index]; it's empty anyway. @@ -1058,7 +1058,7 @@ def _to_arrow( return next2._to_arrow(pyarrow, mask_node, validbytes, length, options) def _to_cudf(self, cudf: Any, mask: Content | None, length: int): - if self._content.length == 0: + if self._content.length is not unknown_length and self._content.length == 0: # IndexedOptionArray._to_arrow replaces -1 in the index with 0. So behind # every masked value is self._content[0], unless self._content.length == 0. # In that case, don't call self._content[index]; it's empty anyway. diff --git a/src/awkward/contents/numpyarray.py b/src/awkward/contents/numpyarray.py index ac870e883e..bb74ff63b0 100644 --- a/src/awkward/contents/numpyarray.py +++ b/src/awkward/contents/numpyarray.py @@ -692,7 +692,7 @@ def _numbers_to_type(self, name, including_unknown): ) def _is_unique(self, negaxis, starts, parents, outlength): - if self.length == 0: + if self.length is not unknown_length and self.length == 0: return True elif len(self.shape) != 1: return self.to_RegularArray()._is_unique( @@ -711,9 +711,12 @@ def _is_unique(self, negaxis, starts, parents, outlength): else: out = self._unique(negaxis, starts, parents, outlength) if isinstance(out, ak.contents.ListOffsetArray): - return out.content.length == self.length + return ( + out.content.length is not unknown_length + and out.content.length == self.length + ) else: - return out.length == self.length + return out.length is not unknown_length and out.length == self.length def _unique(self, negaxis, starts, parents, outlength): if self.shape[0] == 0: diff --git a/src/awkward/contents/unionarray.py b/src/awkward/contents/unionarray.py index d92b107e47..9e452eee8f 100644 --- a/src/awkward/contents/unionarray.py +++ b/src/awkward/contents/unionarray.py @@ -1359,7 +1359,7 @@ def _argsort_next( parameters=self._parameters, mergebool=True, ) - if simplified.length == 0: + if simplified.length is not unknown_length and simplified.length == 0: return ak.contents.NumpyArray( self._backend.nplike.empty(0, dtype=np.int64), parameters=None, @@ -1374,7 +1374,7 @@ def _argsort_next( ) def _sort_next(self, negaxis, starts, parents, outlength, ascending, stable): - if self.length == 0: + if self.length is not unknown_length and self.length == 0: return self simplified = type(self).simplified( @@ -1384,7 +1384,7 @@ def _sort_next(self, negaxis, starts, parents, outlength, ascending, stable): parameters=self._parameters, mergebool=True, ) - if simplified.length == 0: + if simplified.length is not unknown_length and simplified.length == 0: return simplified if isinstance(simplified, ak.contents.UnionArray): diff --git a/src/awkward/contents/unmaskedarray.py b/src/awkward/contents/unmaskedarray.py index 2eb82d64a0..2cbe40c0c0 100644 --- a/src/awkward/contents/unmaskedarray.py +++ b/src/awkward/contents/unmaskedarray.py @@ -13,7 +13,7 @@ from awkward._nplikes.array_like import ArrayLike from awkward._nplikes.numpy import Numpy from awkward._nplikes.numpy_like import IndexType, NumpyMetadata -from awkward._nplikes.shape import ShapeItem +from awkward._nplikes.shape import ShapeItem, unknown_length from awkward._nplikes.typetracer import MaybeNone from awkward._parameters import ( parameters_intersect, @@ -340,7 +340,7 @@ def _offsets_and_flattened(self, axis: int, depth: int) -> tuple[Index, Content] raise AxisError("axis=0 not allowed for flatten") else: offsets, flattened = self._content._offsets_and_flattened(axis, depth) - if offsets.length == 0: + if offsets.length is not unknown_length and offsets.length == 0: return ( offsets, UnmaskedArray(flattened, parameters=self._parameters), @@ -399,12 +399,12 @@ def _numbers_to_type(self, name, including_unknown): ) def _is_unique(self, negaxis, starts, parents, outlength): - if self._content.length == 0: + if self._content.length is not unknown_length and self._content.length == 0: return True return self._content._is_unique(negaxis, starts, parents, outlength) def _unique(self, negaxis, starts, parents, outlength): - if self._content.length == 0: + if self._content.length is not unknown_length and self._content.length == 0: return self return self._content._unique(negaxis, starts, parents, outlength) diff --git a/src/awkward/operations/ak_firsts.py b/src/awkward/operations/ak_firsts.py index 79fba6eb51..1d8c9f0de6 100644 --- a/src/awkward/operations/ak_firsts.py +++ b/src/awkward/operations/ak_firsts.py @@ -11,6 +11,7 @@ _remove_named_axis, ) from awkward._nplikes.numpy_like import NumpyMetadata +from awkward._nplikes.shape import unknown_length from awkward._regularize import regularize_axis from awkward.errors import AxisError @@ -85,7 +86,7 @@ def _impl(array, axis, highlevel, behavior, attrs): # and length > 0 cases. backend = ak.backend(array) slicer = ak.to_backend(ak.from_iter([None, 0]), backend) - if layout.length == 0: + if layout.length is not unknown_length and layout.length == 0: out = layout[slicer[[0]]][0] else: out = layout[slicer[[1]]][0] diff --git a/src/awkward/record.py b/src/awkward/record.py index 73ea265d83..0d1e3890c8 100644 --- a/src/awkward/record.py +++ b/src/awkward/record.py @@ -194,7 +194,7 @@ def _getitem_fields(self, where, only_fields: tuple[str, ...] = ()): return self._array._getitem_fields(where)._getitem_at(self._at) def to_packed(self, recursive: bool = True) -> Self: - if self._array.length == 1: + if self._array.length is not unknown_length and self._array.length == 1: return Record(self._array.to_packed(recursive), self._at) else: return Record(self._array[self._at : self._at + 1].to_packed(recursive), 0)