rapidsai · er-eis · May 4, 2024 · May 4, 2024 · May 7, 2024 · May 7, 2024
@@ -280,15 +280,30 @@ cpdef read_parquet(filepaths_or_buffers, columns=None, row_groups=None,
 
                     for rg in row_groups[i]:
                         filtered_idx.append(
-                            cudf.RangeIndex(
-                                start=row_groups_i[rg][0],
-                                stop=row_groups_i[rg][1],
-                                step=range_index_meta['step']
+                            (
+                                row_groups_i[rg][0],
+                                row_groups_i[rg][1]
                             )
                         )
 
-                if len(filtered_idx) > 0:
-                    idx = cudf.concat(filtered_idx)
+                step = range_index_meta['step']
+                if len(filtered_idx) == 1:
+                    start, stop = filtered_idx[0]
+                    idx = cudf.RangeIndex(
+                        start=start, stop=stop, step=step
+                    )
+                elif len(filtered_idx) > 1:
+                    idx = cudf.Index(
+                        data=[
+                            n
+                            for start, stop in filtered_idx
+                            for n in range(
+                                start,
+                                stop,
+                                step
+                            )
+                        ]
+                    )
                 else:
                     idx = cudf.Index(cudf.core.column.column_empty(0))
             else:

@@ -121,6 +121,8 @@ def concat(objs, axis=0, join="outer", ignore_index=False, sort=None):
     Parameters
     ----------
     objs : list or dictionary of DataFrame, Series, or Index
+        deprecated:: 24.06
+        concatenating indices is deprecated and will be removed in a future version of cudf.
     axis : {0/'index', 1/'columns'}, default 0
         The axis to concatenate along.
         `axis=1` must be passed if a dictionary is passed.
@@ -285,10 +287,16 @@ def concat(objs, axis=0, join="outer", ignore_index=False, sort=None):
         )
 
     if any(isinstance(o, cudf.BaseIndex) for o in objs):
+        warnings.warn(
+            "index concatenation will be deprecated in a future release",
+            FutureWarning,
+        )
         if not all(isinstance(o, cudf.BaseIndex) for o in objs):
             raise TypeError(
                 "when concatenating indices you must provide ONLY indices"
             )
+        if axis == 1:
+            raise ValueError("cannot concatenate indices across axis 1")
 
     only_series = all(isinstance(o, cudf.Series) for o in objs)
 

@@ -1,7 +1,7 @@
 # Copyright (c) 2018-2024, NVIDIA CORPORATION.
 
 import warnings
-from contextlib import contextmanager
+from contextlib import contextmanager, nullcontext
 from decimal import Decimal
 
 import numpy as np
@@ -104,7 +104,11 @@ def test_concat_dataframe(index, nulls, axis):
         )
 
     # Index
-    res = cudf.concat([gdf.index, gdf2.index], axis=axis).to_pandas()
+    with pytest.warns(
+        FutureWarning,
+        match="index concatenation will be deprecated in a future release",
+    ):
+        res = cudf.concat([gdf.index, gdf2.index], axis=axis).to_pandas()
     sol = df.index.append(df2.index)
     assert_eq(res, sol, check_names=False, check_categorical=False)
 
@@ -151,12 +155,16 @@ def test_concat_errors():
     )
 
     # Mismatched types
-    assert_exceptions_equal(
-        lfunc=pd.concat,
-        rfunc=cudf.concat,
-        lfunc_args_and_kwargs=([], {"objs": [df, df.index, df.x]}),
-        rfunc_args_and_kwargs=([], {"objs": [gdf, gdf.index, gdf.x]}),
-    )
+    with pytest.warns(
+        FutureWarning,
+        match="index concatenation will be deprecated in a future release",
+    ):
+        assert_exceptions_equal(
+            lfunc=pd.concat,
+            rfunc=cudf.concat,
+            lfunc_args_and_kwargs=([], {"objs": [df, df.index, df.x]}),
+            rfunc_args_and_kwargs=([], {"objs": [gdf, gdf.index, gdf.x]}),
+        )
 
     # Unknown type
     assert_exceptions_equal(
@@ -1997,3 +2005,52 @@ def test_concat_dict_incorrect_type_index(d):
         match="cannot concatenate a dictionary containing indices",
     ):
         cudf.concat(d, axis=1)
+
+
+@pytest.mark.parametrize(
+    "axis,exception",
+    [
+        (0, nullcontext()),
+        (
+            1,
+            pytest.raises(
+                ValueError, match="cannot concatenate indices across axis 1"
+            ),
+        ),
+    ],
+)
+@pytest.mark.parametrize(
+    "idx",
+    [
+        [(cudf.Index, {"data": [1, 2, 3]})],
+        [(cudf.Index, {"data": [1, 2, 3]}), (cudf.Index, {"data": [4, 5, 6]})],
+        [
+            (
+                cudf.MultiIndex,
+                {
+                    "levels": [[1, 2], ["blue", "red"]],
+                    "codes": [[0, 0, 1, 1], [1, 0, 1, 0]],
+                },
+            )
+        ],
+        [(cudf.CategoricalIndex, {"data": [1, 2, 3]})],
+        [
+            (cudf.RangeIndex, {"start": 2, "stop": 4, "step": 1}),
+            (cudf.RangeIndex, {"start": 2, "stop": 9, "step": 3}),
+        ],
+    ],
+)
+def test_concat_index(idx, axis, exception):
+    idx = [c(**d) for c, d in idx]
+    with pytest.warns(
+        FutureWarning,
+        match="index concatenation will be deprecated in a future release",
+    ):
+        with exception as e:
+            result = cudf.concat(idx, axis=axis)
+    if not e:
+        assert isinstance(result, cudf.Index)
+    with pytest.raises(
+        TypeError, match="only Series and DataFrame objs are valid"
+    ):
+        pd.concat([i.to_pandas() for i in idx], axis=axis)
@@ -2283,7 +2283,11 @@ def test_get_indexer_invalid(idx1, idx2):
 def test_range_index_concat(objs):
     cudf_objs = [cudf.from_pandas(obj) for obj in objs]
 
-    actual = cudf.concat(cudf_objs)
+    with pytest.warns(
+        FutureWarning,
+        match="index concatenation will be deprecated in a future release",
+    ):
+        actual = cudf.concat(cudf_objs)
 
     expected = objs[0]
     for obj in objs[1:]:

@@ -276,7 +276,11 @@ def test_categorical_categories():
 def test_categorical_as_known():
     df = dask_cudf.from_cudf(DataFrame({"col_1": [0, 1, 2, 3]}), npartitions=2)
     df["col_1"] = df["col_1"].astype("category")
-    actual = df["col_1"].cat.as_known()
+    with pytest.warns(
+        FutureWarning,
+        match="index concatenation will be deprecated in a future release",
+    ):
+        actual = df["col_1"].cat.as_known()
 
     pdf = dd.from_pandas(pd.DataFrame({"col_1": [0, 1, 2, 3]}), npartitions=2)
     pdf["col_1"] = pdf["col_1"].astype("category")

@@ -13,6 +13,7 @@
 from dask.utils import M
 
 import cudf
+from cudf import BaseIndex
 
 import dask_cudf
 from dask_cudf.tests.utils import skip_dask_expr, xfail_dask_expr
@@ -148,7 +149,11 @@ def test_from_pandas_with_generic_idx():
 
     ddf = dask_cudf.from_cudf(cdf, npartitions=2)
 
-    assert isinstance(ddf.index.compute(), cudf.RangeIndex)
+    with pytest.warns(
+        FutureWarning,
+        match="index concatenation will be deprecated in a future release",
+    ):
+        assert isinstance(ddf.index.compute(), cudf.RangeIndex)
     dd.assert_eq(ddf.loc[1:2, ["a"]], cdf.loc[1:2, ["a"]])
 
 
@@ -610,7 +615,14 @@ def test_unary_ops(func, gdf, gddf):
     p = func(gdf)
     g = func(gddf)
 
-    dd.assert_eq(p, g, check_names=False)
+    if isinstance(p, BaseIndex):
+        with pytest.warns(
+            FutureWarning,
+            match="index concatenation will be deprecated in a future release",
+        ):
+            dd.assert_eq(p, g, check_names=False)
+    else:
+        dd.assert_eq(p, g, check_names=False)
 
 
 @pytest.mark.parametrize("series", [True, False])