diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index 1c1c2ef2bf6..99f0d749154 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -2289,7 +2289,9 @@ def arange( if step is None: step = 1 - size = int(np.ceil((stop - start) / step)) + size = len(range(int(start), int(stop), int(step))) + if size == 0: + return as_column([], dtype=dtype) return libcudf.filling.sequence( size, diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 2596f90c59b..1c672aacd86 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -53,7 +53,7 @@ from cudf.core.column_accessor import ColumnAccessor from cudf.core.frame import Frame, _drop_rows_by_labels from cudf.core.groupby.groupby import DataFrameGroupBy -from cudf.core.index import BaseIndex, RangeIndex, as_index +from cudf.core.index import BaseIndex, Index, RangeIndex, as_index from cudf.core.indexed_frame import ( IndexedFrame, _FrameIndexer, @@ -1219,13 +1219,54 @@ def _slice(self: T, arg: slice) -> T: return self start, stop, stride = arg.indices(num_rows) + # early stop for empty cases + if len(range(start, stop, stride)) == 0: + columns = ColumnAccessor( + { + colname: column.column_empty_like(col, newsize=0) + for colname, col in self._data.items() + }, + multiindex=self._data.multiindex, + level_names=self._data.level_names, + ) + + if isinstance(self.index, MultiIndex): + mi_columns = ColumnAccessor( + { + colname: column.column_empty_like(col, newsize=0) + for colname, col in self.index._data.items() + } + ) + return DataFrame._from_data( + columns, + index=MultiIndex._from_data( + mi_columns, name=self.index.name + ), + ) + else: + return DataFrame._from_data( + columns, + index=( + RangeIndex( + start=start, + stop=stop, + step=stride, + name=self.index.name, + ) + if isinstance(self.index, RangeIndex) + else Index( + [], dtype=self.index.dtype, name=self.index.name + ) + ), + ) + # This is just to handle RangeIndex type, stop # it from materializing unnecessarily keep_index = True if self.index is not None and isinstance(self.index, RangeIndex): if self._num_columns == 0: result = self._empty_like(keep_index) - result._index = self.index[start:stop] + result._index = self.index[start:stop:stride] return result keep_index = False diff --git a/python/cudf/cudf/tests/test_indexing.py b/python/cudf/cudf/tests/test_indexing.py index 19d7c8a10ab..c3b414c2d4a 100644 --- a/python/cudf/cudf/tests/test_indexing.py +++ b/python/cudf/cudf/tests/test_indexing.py @@ -1294,8 +1294,8 @@ def test_loc_datetime_index(sli, is_dataframe): @pytest.mark.parametrize( "gdf_kwargs", [ - {"data": {"a": range(100000)}}, - {"data": {"a": range(100000), "b": range(100000)}}, + {"data": {"a": range(1000)}}, + {"data": {"a": range(1000), "b": range(1000)}}, { "data": { "a": range(20), @@ -1304,26 +1304,33 @@ def test_loc_datetime_index(sli, is_dataframe): } }, {"index": [1, 2, 3]}, - {"index": range(100000)}, + {"index": range(1000)}, {"columns": ["a", "b", "c", "d"]}, - {"columns": ["a"], "index": range(100000)}, - {"columns": ["a", "col2", "...col n"], "index": range(100000)}, - {"index": cudf.Series(range(100000)).astype("str")}, + {"columns": ["a"], "index": range(1000)}, + {"columns": ["a", "col2", "...col n"], "index": range(1000)}, + {"index": cudf.Series(range(1000)).astype("str")}, { "columns": ["a", "b", "c", "d"], - "index": cudf.Series(range(100000)).astype("str"), + "index": cudf.Series(range(1000)).astype("str"), }, ], ) @pytest.mark.parametrize( "slice", [ - slice(25000, 50000), - slice(25000, 25001), - slice(50000), + slice(6, None), # start but no stop, [6:] + slice(None, None, 3), # only step, [::3] + slice(1, 10, 2), # start, stop, step + slice(3, -5, 2), # negative stop + slice(-2, -4), # slice is empty + slice(-10, -20, -1), # reversed slice + slice(None), # slices everything, same as [:] + slice(250, 500), + slice(250, 251), + slice(50), slice(1, 10), slice(10, 20), - slice(15, 24000), + slice(15, 24), slice(6), ], )