diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 97ee0948209..e9ab3d5797c 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -4,7 +4,7 @@ import pickle from numbers import Number -from typing import Any, Dict, Optional, Tuple, Type, Union +from typing import Any, Dict, List, Optional, Tuple, Type, Union import cupy import numpy as np @@ -588,13 +588,18 @@ def sum(self): @classmethod def _concat(cls, objs): - data = concat_columns([o._values for o in objs]) + if all(isinstance(obj, RangeIndex) for obj in objs): + result = _concat_range_index(objs) + else: + data = concat_columns([o._values for o in objs]) + result = as_index(data) + names = {obj.name for obj in objs} if len(names) == 1: [name] = names else: name = None - result = as_index(data) + result.name = name return result @@ -3032,3 +3037,43 @@ def __new__( ) return as_index(data, copy=copy, dtype=dtype, name=name, **kwargs) + + +def _concat_range_index(indexes: List[RangeIndex]) -> BaseIndex: + """ + An internal Utility function to concat RangeIndex objects. + """ + start = step = next_ = None + + # Filter the empty indexes + non_empty_indexes = [obj for obj in indexes if len(obj)] + + if not non_empty_indexes: + # Here all "indexes" had 0 length, i.e. were empty. + # In this case return an empty range index. + return RangeIndex(0, 0) + + for obj in non_empty_indexes: + if start is None: + # This is set by the first non-empty index + start = obj.start + if step is None and len(obj) > 1: + step = obj.step + elif step is None: + # First non-empty index had only one element + if obj.start == start: + result = as_index(concat_columns([x._values for x in indexes])) + return result + step = obj.start - start + + non_consecutive = (step != obj.step and len(obj) > 1) or ( + next_ is not None and obj.start != next_ + ) + if non_consecutive: + result = as_index(concat_columns([x._values for x in indexes])) + return result + if step is not None: + next_ = obj[-1] + step + + stop = non_empty_indexes[-1].stop if next_ is None else next_ + return RangeIndex(start, stop, step) diff --git a/python/cudf/cudf/tests/test_index.py b/python/cudf/cudf/tests/test_index.py index f03454c479a..3f58eb3d6e7 100644 --- a/python/cudf/cudf/tests/test_index.py +++ b/python/cudf/cudf/tests/test_index.py @@ -2316,3 +2316,22 @@ def test_get_loc_multi_string(idx, key, method): got = gi.get_loc(key, method=method) assert_eq(expected, got) + + +@pytest.mark.parametrize( + "objs", + [ + [pd.RangeIndex(0, 10), pd.RangeIndex(10, 20)], + [pd.RangeIndex(10, 20), pd.RangeIndex(22, 40), pd.RangeIndex(50, 60)], + [pd.RangeIndex(10, 20, 2), pd.RangeIndex(20, 40, 2)], + ], +) +def test_range_index_concat(objs): + cudf_objs = [cudf.from_pandas(obj) for obj in objs] + + actual = cudf.concat(cudf_objs) + + expected = objs[0] + for obj in objs[1:]: + expected = expected.append(obj) + assert_eq(expected, actual) diff --git a/python/dask_cudf/dask_cudf/tests/test_core.py b/python/dask_cudf/dask_cudf/tests/test_core.py index cf5203a22e5..ace9701b677 100644 --- a/python/dask_cudf/dask_cudf/tests/test_core.py +++ b/python/dask_cudf/dask_cudf/tests/test_core.py @@ -59,7 +59,7 @@ def test_from_cudf_with_generic_idx(): ddf = dgd.from_cudf(cdf, npartitions=2) - assert isinstance(ddf.index.compute(), cudf.core.index.GenericIndex) + assert isinstance(ddf.index.compute(), cudf.RangeIndex) dd.assert_eq(ddf.loc[1:2, ["a"]], cdf.loc[1:2, ["a"]])