Index class deprecation enforcements (#13204)

This PR: - [x] Enforces `Index` related deprecations by removing `Float32Index`, `Float64Index`, `GenericIndex`, `Int8Index`, `Int16Index`, `Int32Index`, `Int64Index`, `StringIndex`, `UInt8Index`, `UInt16Index`, `UInt32Index`, `UInt64Index`. - [x] Cleans up the repr logic to more closely align with pandas for `<NA>` value representation incase of `string` dtype. - [x] Fixes docstring and pytests to support the removals of the above classes. This PR also fixes 202 pytests: ```bash = 267 failed, 95670 passed, 2044 skipped, 763 xfailed, 300 xpassed in 442.18s (0:07:22) = ``` On `pandas_2.0_feature_branch`: ```bash = 469 failed, 95464 passed, 2044 skipped, 763 xfailed, 300 xpassed in 469.26s (0:07:49) = ```
rapidsai · May 30, 2023 · 258bf3d · 258bf3d
1 parent 16c987e
commit 258bf3d
Show file tree

Hide file tree

Showing 33 changed files with 284 additions and 846 deletions.
diff --git a/docs/cudf/source/api_docs/index_objects.rst b/docs/cudf/source/api_docs/index_objects.rst
@@ -149,9 +149,6 @@ Numeric Index
    :template: autosummary/class_without_autosummary.rst
 
    RangeIndex
-   Int64Index
-   UInt64Index
-   Float64Index
 
 .. _api.categoricalindex:
 

diff --git a/docs/cudf/source/conf.py b/docs/cudf/source/conf.py
@@ -261,7 +261,7 @@ def process_class_docstrings(app, what, name, obj, options, lines):
     from the processed docstring.
     """
     if what == "class":
-        if name in {"cudf.RangeIndex", "cudf.Int64Index", "cudf.UInt64Index", "cudf.Float64Index", "cudf.CategoricalIndex", "cudf.IntervalIndex", "cudf.MultiIndex", "cudf.DatetimeIndex", "cudf.TimedeltaIndex", "cudf.TimedeltaIndex"}:
+        if name in {"cudf.RangeIndex", "cudf.CategoricalIndex", "cudf.IntervalIndex", "cudf.MultiIndex", "cudf.DatetimeIndex", "cudf.TimedeltaIndex", "cudf.TimedeltaIndex"}:
 
             cut_index = lines.index('.. rubric:: Attributes')
             lines[:] = lines[:cut_index]

diff --git a/docs/cudf/source/developer_guide/library_design.md b/docs/cudf/source/developer_guide/library_design.md
@@ -22,7 +22,7 @@ Finally we tie these pieces together to provide a more holistic view of the proj
 % class IndexedFrame
 % class SingleColumnFrame
 % class BaseIndex
-% class GenericIndex
+% class Index
 % class MultiIndex
 % class RangeIndex
 % class DataFrame
@@ -42,8 +42,8 @@ Finally we tie these pieces together to provide a more holistic view of the proj
 % BaseIndex <|-- MultiIndex
 % Frame <|-- MultiIndex
 %
-% BaseIndex <|-- GenericIndex
-% SingleColumnFrame <|-- GenericIndex
+% BaseIndex <|-- Index
+% SingleColumnFrame <|-- Index
 %
 % @enduml
 
@@ -89,31 +89,26 @@ While we've highlighted some exceptional cases of Indexes before, let's start wi
 In practice, `BaseIndex` does have concrete implementations of a small set of methods.
 However, currently many of these implementations are not applicable to all subclasses and will be eventually be removed.
 
-Almost all indexes are subclasses of `GenericIndex`, a single-columned index with the class hierarchy:
+Almost all indexes are subclasses of `Index`, a single-columned index with the class hierarchy:
 ```python
-class GenericIndex(SingleColumnFrame, BaseIndex)
+class Index(SingleColumnFrame, BaseIndex)
 ```
 Integer, float, or string indexes are all composed of a single column of data.
-Most `GenericIndex` methods are inherited from `Frame`, saving us the trouble of rewriting them.
+Most `Index` methods are inherited from `Frame`, saving us the trouble of rewriting them.
 
 We now consider the three main exceptions to this model:
 
 - A `RangeIndex` is not backed by a column of data, so it inherits directly from `BaseIndex` alone.
   Wherever possible, its methods have special implementations designed to avoid materializing columns.
-  Where such an implementation is infeasible, we fall back to converting it to an `Int64Index` first instead.
+  Where such an implementation is infeasible, we fall back to converting it to an `Index` of `int64`
+  dtype first instead.
 - A `MultiIndex` is backed by _multiple_ columns of data.
   Therefore, its inheritance hierarchy looks like `class MultiIndex(Frame, BaseIndex)`.
   Some of its more `Frame`-like methods may be inherited,
   but many others must be reimplemented since in many cases a `MultiIndex` is not expected to behave like a `Frame`.
-- Just like in pandas, `Index` itself can never be instantiated.
-  `pandas.Index` is the parent class for indexes,
-  but its constructor returns an appropriate subclass depending on the input data type and shape.
-  Unfortunately, mimicking this behavior requires overriding `__new__`,
-  which in turn makes shared initialization across inheritance trees much more cumbersome to manage.
-  To enable sharing constructor logic across different index classes,
-  we instead define `BaseIndex` as the parent class of all indexes.
+- To enable sharing constructor logic across different index classes,
+  we define `BaseIndex` as the parent class of all indexes.
   `Index` inherits from `BaseIndex`, but it masquerades as a `BaseIndex` to match pandas.
-  This class should contain no implementations since it is simply a factory for other indexes.
 
 
 ## The Column layer

diff --git a/python/cudf/benchmarks/conftest.py b/python/cudf/benchmarks/conftest.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022, NVIDIA CORPORATION.
+# Copyright (c) 2022-2023, NVIDIA CORPORATION.
 
 """Defines pytest fixtures for all benchmarks.
 
@@ -40,8 +40,8 @@
 In addition to the above fixtures, we also provide the following more
 specialized fixtures:
     - rangeindex: Since RangeIndex always holds int64 data we cannot conflate
-      it with index_dtype_int64 (a true Int64Index), and it cannot hold nulls.
-      As a result, it is provided as a separate fixture.
+      it with index_dtype_int64 (a true Index with int64 dtype), and it
+      cannot hold nulls. As a result, it is provided as a separate fixture.
 """
 
 import os

diff --git a/python/cudf/cudf/__init__.py b/python/cudf/cudf/__init__.py
@@ -40,22 +40,10 @@
     BaseIndex,
     CategoricalIndex,
     DatetimeIndex,
-    Float32Index,
-    Float64Index,
-    GenericIndex,
     Index,
-    Int8Index,
-    Int16Index,
-    Int32Index,
-    Int64Index,
     IntervalIndex,
     RangeIndex,
-    StringIndex,
     TimedeltaIndex,
-    UInt8Index,
-    UInt16Index,
-    UInt32Index,
-    UInt64Index,
     interval_range,
 )
 from cudf.core.missing import NA
@@ -106,15 +94,8 @@
     "DatetimeIndex",
     "Decimal32Dtype",
     "Decimal64Dtype",
-    "Float32Index",
-    "Float64Index",
-    "GenericIndex",
     "Grouper",
     "Index",
-    "Int16Index",
-    "Int32Index",
-    "Int64Index",
-    "Int8Index",
     "IntervalDtype",
     "IntervalIndex",
     "ListDtype",
@@ -123,13 +104,8 @@
     "RangeIndex",
     "Scalar",
     "Series",
-    "StringIndex",
     "StructDtype",
     "TimedeltaIndex",
-    "UInt16Index",
-    "UInt32Index",
-    "UInt64Index",
-    "UInt8Index",
     "api",
     "concat",
     "crosstab",

diff --git a/python/cudf/cudf/_typing.py b/python/cudf/cudf/_typing.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021-2022, NVIDIA CORPORATION.
+# Copyright (c) 2021-2023, NVIDIA CORPORATION.
 
 import sys
 from typing import TYPE_CHECKING, Any, Callable, Dict, Iterable, TypeVar, Union
@@ -37,9 +37,7 @@
 
 DataFrameOrSeries = Union["cudf.Series", "cudf.DataFrame"]
 SeriesOrIndex = Union["cudf.Series", "cudf.core.index.BaseIndex"]
-SeriesOrSingleColumnIndex = Union[
-    "cudf.Series", "cudf.core.index.GenericIndex"
-]
+SeriesOrSingleColumnIndex = Union["cudf.Series", "cudf.core.index.Index"]
 
 # Groupby aggregation
 AggType = Union[str, Callable]