From a2840c646be2e2d1baf7e8f53da0ae2f7f320ada Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Thu, 5 Aug 2021 15:51:03 -0500
Subject: [PATCH] Fix concatenation of `cudf.RangeIndex` (#8970)

Fixes: #6872

In cudf, we have been concatenating a collection of `RangeIndex`'s by materializing each one of them, but instead we should rather be introspecting each RangeIndex to decide whether to materialize of not. This PR fixes it.

<!--

Thank you for contributing to cuDF :)

Here are some guidelines to help the review process go smoothly.

1. Please write a description in this text box of the changes that are being
   made.

2. Please ensure that you have written units tests for the changes made/features
   added.

3. There are CI checks in place to enforce that committed code follows our style
   and syntax standards. Please see our contribution guide in `CONTRIBUTING.MD`
   in the project root for more information about the checks we perform and how
   you can run them locally.

4. If you are closing an issue please use one of the automatic closing words as
   noted here: https://help.github.com/articles/closing-issues-using-keywords/

5. If your pull request is not ready for review but you want to make use of the
   continuous integration testing facilities please mark your pull request as Draft.
   https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/changing-the-stage-of-a-pull-request#converting-a-pull-request-to-a-draft

6. If your pull request is ready to be reviewed without requiring additional
   work on top of it, then remove it from "Draft" and make it "Ready for Review".
   https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/changing-the-stage-of-a-pull-request#marking-a-pull-request-as-ready-for-review

   If assistance is required to complete the functionality, for example when the
   C/C++ code of a feature is complete but Python bindings are still required,
   then add the label `help wanted` so that others can triage and assist.
   The additional changes then can be implemented on top of the same PR.
   If the assistance is done by members of the rapidsAI team, then no
   additional actions are required by the creator of the original PR for this,
   otherwise the original author of the PR needs to give permission to the
   person(s) assisting to commit to their personal fork of the project. If that
   doesn't happen then a new PR based on the code of the original PR can be
   opened by the person assisting, which then will be the PR that will be
   merged.

7. Once all work has been done and review has taken place please do not add
   features or make changes out of the scope of those requested by the reviewer
   (doing this just add delays as already reviewed code ends up having to be
   re-reviewed/it is hard to tell what is new etc!). Further, please do not
   rebase your branch on the target branch, force push, or rewrite history.
   Doing any of these causes the context of any comments made by reviewers to be lost.
   If conflicts occur against the target branch they should be resolved by
   merging the target branch into the branch used for making the pull request.

Many thanks in advance for your cooperation!

-->

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Benjamin Zaitlen (https://github.com/quasiben)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/8970
---
 python/cudf/cudf/core/index.py                | 51 +++++++++++++++++--
 python/cudf/cudf/tests/test_index.py          | 19 +++++++
 python/dask_cudf/dask_cudf/tests/test_core.py |  2 +-
 3 files changed, 68 insertions(+), 4 deletions(-)

diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index 691b6ab2e29..c94aa940ec5 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -4,7 +4,7 @@
 
 import pickle
 from numbers import Number
-from typing import Any, Dict, Optional, Tuple, Type, Union
+from typing import Any, Dict, List, Optional, Tuple, Type, Union
 
 import cupy
 import numpy as np
@@ -588,13 +588,18 @@ def sum(self):
 
     @classmethod
     def _concat(cls, objs):
-        data = concat_columns([o._values for o in objs])
+        if all(isinstance(obj, RangeIndex) for obj in objs):
+            result = _concat_range_index(objs)
+        else:
+            data = concat_columns([o._values for o in objs])
+            result = as_index(data)
+
         names = {obj.name for obj in objs}
         if len(names) == 1:
             [name] = names
         else:
             name = None
-        result = as_index(data)
+
         result.name = name
         return result
 
@@ -3043,3 +3048,43 @@ def __new__(
             )
 
         return as_index(data, copy=copy, dtype=dtype, name=name, **kwargs)
+
+
+def _concat_range_index(indexes: List[RangeIndex]) -> BaseIndex:
+    """
+    An internal Utility function to concat RangeIndex objects.
+    """
+    start = step = next_ = None
+
+    # Filter the empty indexes
+    non_empty_indexes = [obj for obj in indexes if len(obj)]
+
+    if not non_empty_indexes:
+        # Here all "indexes" had 0 length, i.e. were empty.
+        # In this case return an empty range index.
+        return RangeIndex(0, 0)
+
+    for obj in non_empty_indexes:
+        if start is None:
+            # This is set by the first non-empty index
+            start = obj.start
+            if step is None and len(obj) > 1:
+                step = obj.step
+        elif step is None:
+            # First non-empty index had only one element
+            if obj.start == start:
+                result = as_index(concat_columns([x._values for x in indexes]))
+                return result
+            step = obj.start - start
+
+        non_consecutive = (step != obj.step and len(obj) > 1) or (
+            next_ is not None and obj.start != next_
+        )
+        if non_consecutive:
+            result = as_index(concat_columns([x._values for x in indexes]))
+            return result
+        if step is not None:
+            next_ = obj[-1] + step
+
+    stop = non_empty_indexes[-1].stop if next_ is None else next_
+    return RangeIndex(start, stop, step)
diff --git a/python/cudf/cudf/tests/test_index.py b/python/cudf/cudf/tests/test_index.py
index f03454c479a..3f58eb3d6e7 100644
--- a/python/cudf/cudf/tests/test_index.py
+++ b/python/cudf/cudf/tests/test_index.py
@@ -2316,3 +2316,22 @@ def test_get_loc_multi_string(idx, key, method):
         got = gi.get_loc(key, method=method)
 
         assert_eq(expected, got)
+
+
+@pytest.mark.parametrize(
+    "objs",
+    [
+        [pd.RangeIndex(0, 10), pd.RangeIndex(10, 20)],
+        [pd.RangeIndex(10, 20), pd.RangeIndex(22, 40), pd.RangeIndex(50, 60)],
+        [pd.RangeIndex(10, 20, 2), pd.RangeIndex(20, 40, 2)],
+    ],
+)
+def test_range_index_concat(objs):
+    cudf_objs = [cudf.from_pandas(obj) for obj in objs]
+
+    actual = cudf.concat(cudf_objs)
+
+    expected = objs[0]
+    for obj in objs[1:]:
+        expected = expected.append(obj)
+    assert_eq(expected, actual)
diff --git a/python/dask_cudf/dask_cudf/tests/test_core.py b/python/dask_cudf/dask_cudf/tests/test_core.py
index cf5203a22e5..ace9701b677 100644
--- a/python/dask_cudf/dask_cudf/tests/test_core.py
+++ b/python/dask_cudf/dask_cudf/tests/test_core.py
@@ -59,7 +59,7 @@ def test_from_cudf_with_generic_idx():
 
     ddf = dgd.from_cudf(cdf, npartitions=2)
 
-    assert isinstance(ddf.index.compute(), cudf.core.index.GenericIndex)
+    assert isinstance(ddf.index.compute(), cudf.RangeIndex)
     dd.assert_eq(ddf.loc[1:2, ["a"]], cdf.loc[1:2, ["a"]])