From 22ef0634f07f7b40d718e80bed176e88ac734ebe Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 4 Jun 2024 14:58:11 -1000 Subject: [PATCH] Remove internal usage of core.index.as_index in favor of cudf.Index (#15851) `cudf.Index.__init__` essentially calls `as_index` immediately internally. To avoid both from potentially diverging, the public `cudf.Index` should be preferred to ensure the public behaviors are used internally Authors: - Matthew Roeschke (https://github.com/mroeschke) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/15851 --- python/cudf/cudf/core/algorithms.py | 4 +- python/cudf/cudf/core/column/methods.py | 4 +- python/cudf/cudf/core/column/string.py | 4 +- python/cudf/cudf/core/cut.py | 4 +- python/cudf/cudf/core/dataframe.py | 36 +++++++-------- python/cudf/cudf/core/dtypes.py | 4 +- python/cudf/cudf/core/groupby/groupby.py | 6 +-- python/cudf/cudf/core/index.py | 30 +++++-------- python/cudf/cudf/core/indexed_frame.py | 4 +- python/cudf/cudf/core/multiindex.py | 7 +-- python/cudf/cudf/core/series.py | 8 ++-- python/cudf/cudf/core/tools/datetimes.py | 5 +-- python/cudf/cudf/tests/test_array_function.py | 4 +- python/cudf/cudf/tests/test_binops.py | 31 +++++++------ python/cudf/cudf/tests/test_contains.py | 6 +-- python/cudf/cudf/tests/test_dlpack.py | 2 +- python/cudf/cudf/tests/test_index.py | 44 ++++++++----------- python/cudf/cudf/tests/test_multiindex.py | 7 +-- python/cudf/cudf/tests/test_string.py | 38 ++++++++-------- .../cudf/cudf/tests/text/test_text_methods.py | 8 ++-- 20 files changed, 116 insertions(+), 140 deletions(-) diff --git a/python/cudf/cudf/core/algorithms.py b/python/cudf/cudf/core/algorithms.py index 272abdece9e..51a32e29886 100644 --- a/python/cudf/cudf/core/algorithms.py +++ b/python/cudf/cudf/core/algorithms.py @@ -6,7 +6,7 @@ from cudf.core.column import as_column from cudf.core.copy_types import BooleanMask -from cudf.core.index import RangeIndex, as_index +from cudf.core.index import Index, RangeIndex from cudf.core.indexed_frame import IndexedFrame from cudf.core.scalar import Scalar from cudf.options import get_option @@ -107,7 +107,7 @@ def factorize(values, sort=False, use_na_sentinel=True, size_hint=None): dtype="int64" if get_option("mode.pandas_compatible") else None, ).values - return labels, cats.values if return_cupy_array else as_index(cats) + return labels, cats.values if return_cupy_array else Index(cats) def _linear_interpolation(column, index=None): diff --git a/python/cudf/cudf/core/column/methods.py b/python/cudf/cudf/core/column/methods.py index e827c7a3dd3..7f7355c571a 100644 --- a/python/cudf/cudf/core/column/methods.py +++ b/python/cudf/cudf/core/column/methods.py @@ -93,8 +93,6 @@ def _return_or_inplace( else: return cudf.Series(new_col, name=self._parent.name) elif isinstance(self._parent, cudf.BaseIndex): - return cudf.core.index.as_index( - new_col, name=self._parent.name - ) + return cudf.Index(new_col, name=self._parent.name) else: return self._parent._mimic_inplace(new_col, inplace=False) diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index fd98d0dc163..d12aa80e9a3 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -4391,7 +4391,7 @@ def code_points(self) -> SeriesOrIndex: if isinstance(self._parent, cudf.Series): return cudf.Series(new_col, name=self._parent.name) elif isinstance(self._parent, cudf.BaseIndex): - return cudf.core.index.as_index(new_col, name=self._parent.name) + return cudf.Index(new_col, name=self._parent.name) else: return new_col @@ -4706,7 +4706,7 @@ def character_tokenize(self) -> SeriesOrIndex: index = self._parent.index.repeat(lengths) return cudf.Series(result_col, name=self._parent.name, index=index) elif isinstance(self._parent, cudf.BaseIndex): - return cudf.core.index.as_index(result_col, name=self._parent.name) + return cudf.Index(result_col, name=self._parent.name) else: return result_col diff --git a/python/cudf/cudf/core/cut.py b/python/cudf/cudf/core/cut.py index ccf730c91fb..54c5e829e8a 100644 --- a/python/cudf/cudf/core/cut.py +++ b/python/cudf/cudf/core/cut.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021-2023, NVIDIA CORPORATION. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. from collections import abc @@ -292,7 +292,7 @@ def cut( ) # we return a categorical index, as we don't have a Categorical method - categorical_index = cudf.core.index.as_index(col) + categorical_index = cudf.Index(col) if isinstance(orig_x, (pd.Series, cudf.Series)): # if we have a series input we return a series output diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 4c55b5427de..c8f1e872300 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -712,7 +712,7 @@ def __init__( data = data.reindex(index) index = data.index else: - index = as_index(index) + index = cudf.Index(index) else: index = data.index @@ -761,7 +761,7 @@ def __init__( if index is None: self._index = RangeIndex(0) else: - self._index = as_index(index) + self._index = cudf.Index(index) if columns is not None: rangeindex = isinstance( columns, (range, pd.RangeIndex, cudf.RangeIndex) @@ -875,7 +875,7 @@ def _init_from_series_list(self, data, columns, index): # When `index` is `None`, the final index of # resulting dataframe will be union of # all Series's names. - final_index = as_index(_get_union_of_series_names(data)) + final_index = cudf.Index(_get_union_of_series_names(data)) else: # When an `index` is passed, the final index of # resulting dataframe will be whatever @@ -919,7 +919,7 @@ def _init_from_series_list(self, data, columns, index): f"not match length of index ({index_length})" ) - final_index = as_index(index) + final_index = cudf.Index(index) series_lengths = list(map(len, data)) data = numeric_normalize_types(*data) @@ -943,7 +943,7 @@ def _init_from_series_list(self, data, columns, index): # Setting `final_columns` to self._index so # that the resulting `transpose` will be have # columns set to `final_columns` - self._index = as_index(final_columns) + self._index = cudf.Index(final_columns) transpose = self.T else: @@ -987,9 +987,9 @@ def _init_from_list_like(self, data, index=None, columns=None): if index is None: index = RangeIndex(start=0, stop=len(data)) else: - index = as_index(index) + index = cudf.Index(index) - self._index = as_index(index) + self._index = cudf.Index(index) # list-of-dicts case if len(data) > 0 and isinstance(data[0], dict): data = DataFrame.from_pandas(pd.DataFrame(data)) @@ -1095,7 +1095,7 @@ def _init_from_dict_like( self._index = RangeIndex(0, num_rows) else: - self._index = as_index(index) + self._index = cudf.Index(index) if len(data): self._data.multiindex = True @@ -1410,7 +1410,7 @@ def __setitem__(self, arg, value): new_columns, verify=False ) if isinstance(value, (pd.Series, Series)): - self._index = as_index(value.index) + self._index = cudf.Index(value.index) elif len(value) > 0: self._index = RangeIndex(length) return @@ -1728,7 +1728,7 @@ def _concat( for cols in columns: table_index = None if 1 == first_data_column_position: - table_index = cudf.core.index.as_index(cols[0]) + table_index = cudf.Index(cols[0]) elif first_data_column_position > 1: table_index = DataFrame._from_data( data=dict( @@ -1780,9 +1780,7 @@ def _concat( if not isinstance(out.index, MultiIndex) and isinstance( out.index.dtype, cudf.CategoricalDtype ): - out = out.set_index( - cudf.core.index.as_index(out.index._values) - ) + out = out.set_index(cudf.Index(out.index._values)) for name, col in out._data.items(): out._data[name] = col._with_type_metadata( tables[0]._data[name].dtype @@ -2828,7 +2826,7 @@ def reindex( if columns is None: df = self else: - columns = as_index(columns) + columns = cudf.Index(columns) intersection = self._data.to_pandas_index().intersection( columns.to_pandas() ) @@ -3245,7 +3243,7 @@ def _insert(self, loc, name, value, nan_as_null=None, ignore_index=True): if len(self) == 0: if isinstance(value, (pd.Series, Series)): if not ignore_index: - self.index = as_index(value.index) + self.index = cudf.Index(value.index) elif (length := len(value)) > 0: if num_cols != 0: ca = self._data._from_columns_like_self( @@ -5654,7 +5652,7 @@ def from_records(cls, data, index=None, columns=None, nan_as_null=False): } if not is_scalar(index): - new_index = as_index(index) + new_index = cudf.Index(index) else: new_index = None @@ -5738,7 +5736,7 @@ def _from_arrays(cls, data, index=None, columns=None, nan_as_null=False): } if index is not None: - index = as_index(index) + index = cudf.Index(index) if isinstance(columns, (pd.Index, cudf.Index)): level_names = tuple(columns.names) @@ -6171,7 +6169,7 @@ def count(self, axis=0, numeric_only=False): for col in self._data.names ] }, - as_index(self._data.names), + cudf.Index(self._data.names), ) _SUPPORT_AXIS_LOOKUP = { @@ -6298,7 +6296,7 @@ def _reduce( source._data.names, names=source._data.level_names ) else: - idx = as_index(source._data.names) + idx = cudf.Index(source._data.names) return Series._from_data({None: as_column(result)}, idx) elif axis == 1: return source._apply_cupy_method_axis_1(op, **kwargs) diff --git a/python/cudf/cudf/core/dtypes.py b/python/cudf/cudf/core/dtypes.py index 9bb1995b836..4729233ee6e 100644 --- a/python/cudf/cudf/core/dtypes.py +++ b/python/cudf/cudf/core/dtypes.py @@ -186,10 +186,10 @@ def categories(self) -> "cudf.core.index.Index": Index(['b', 'a'], dtype='object') """ if self._categories is None: - return cudf.core.index.as_index( + return cudf.Index( cudf.core.column.column_empty(0, dtype="object", masked=False) ) - return cudf.core.index.as_index(self._categories, copy=False) + return cudf.Index(self._categories, copy=False) @property def type(self): diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py index 3e7a1ee6026..ac8b381cbec 100644 --- a/python/cudf/cudf/core/groupby/groupby.py +++ b/python/cudf/cudf/core/groupby/groupby.py @@ -2800,15 +2800,13 @@ def keys(self): nkeys = len(self._key_columns) if nkeys == 0: - return cudf.core.index.as_index([], name=None) + return cudf.Index([], name=None) elif nkeys > 1: return cudf.MultiIndex._from_data( dict(zip(range(nkeys), self._key_columns)) )._set_names(self.names) else: - return cudf.core.index.as_index( - self._key_columns[0], name=self.names[0] - ) + return cudf.Index(self._key_columns[0], name=self.names[0]) @property def values(self) -> cudf.core.frame.Frame: diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 4b09765fa46..7297ac4e929 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -1169,7 +1169,7 @@ def _concat(cls, objs): result = _concat_range_index(non_empties) else: data = concat_columns([o._values for o in non_empties]) - result = as_index(data) + result = Index(data) names = {obj.name for obj in objs} if len(names) == 1: @@ -1437,7 +1437,7 @@ def __repr__(self): def __getitem__(self, index): res = self._get_elements_from_column(index) if isinstance(res, ColumnBase): - res = as_index(res, name=self.name) + res = Index(res, name=self.name) return res @property # type: ignore @@ -1958,7 +1958,7 @@ def microsecond(self): >>> datetime_index.microsecond Index([0, 1, 2], dtype='int32') """ # noqa: E501 - return as_index( + return Index( ( # Need to manually promote column to int32 because # pandas-matching binop behaviour requires that this @@ -2209,7 +2209,7 @@ def _get_dt_field(self, field): mask=out_column.base_mask, offset=out_column.offset, ) - return as_index(out_column, name=self.name) + return Index(out_column, name=self.name) def _is_boolean(self): return False @@ -2522,9 +2522,7 @@ def days(self): Number of days for each element. """ # Need to specifically return `int64` to avoid overflow. - return as_index( - arbitrary=self._values.days, name=self.name, dtype="int64" - ) + return Index(self._values.days, name=self.name, dtype="int64") @property # type: ignore @_cudf_nvtx_annotate @@ -2532,9 +2530,7 @@ def seconds(self): """ Number of seconds (>= 0 and less than 1 day) for each element. """ - return as_index( - arbitrary=self._values.seconds, name=self.name, dtype="int32" - ) + return Index(self._values.seconds, name=self.name, dtype="int32") @property # type: ignore @_cudf_nvtx_annotate @@ -2542,9 +2538,7 @@ def microseconds(self): """ Number of microseconds (>= 0 and less than 1 second) for each element. """ - return as_index( - arbitrary=self._values.microseconds, name=self.name, dtype="int32" - ) + return Index(self._values.microseconds, name=self.name, dtype="int32") @property # type: ignore @_cudf_nvtx_annotate @@ -2553,9 +2547,7 @@ def nanoseconds(self): Number of nanoseconds (>= 0 and less than 1 microsecond) for each element. """ - return as_index( - arbitrary=self._values.nanoseconds, name=self.name, dtype="int32" - ) + return Index(self._values.nanoseconds, name=self.name, dtype="int32") @property # type: ignore @_cudf_nvtx_annotate @@ -2693,7 +2685,7 @@ def codes(self): """ The category codes of this categorical. """ - return as_index(self._values.codes) + return Index(self._values.codes) @property # type: ignore @_cudf_nvtx_annotate @@ -3137,7 +3129,7 @@ def _concat_range_index(indexes: List[RangeIndex]) -> BaseIndex: elif step is None: # First non-empty index had only one element if obj.start == start: - result = as_index(concat_columns([x._values for x in indexes])) + result = Index(concat_columns([x._values for x in indexes])) return result step = obj.start - start @@ -3145,7 +3137,7 @@ def _concat_range_index(indexes: List[RangeIndex]) -> BaseIndex: next_ is not None and obj.start != next_ ) if non_consecutive: - result = as_index(concat_columns([x._values for x in indexes])) + result = Index(concat_columns([x._values for x in indexes])) return result if step is not None: next_ = obj[-1] + step diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index 5a466f20f8c..688b268d478 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -3638,7 +3638,7 @@ def _align_to_index( sort: bool = True, allow_non_unique: bool = False, ) -> Self: - index = cudf.core.index.as_index(index) + index = cudf.Index(index) if self.index.equals(index): return self @@ -3713,7 +3713,7 @@ def _reindex( raise ValueError( "cannot reindex on an axis with duplicate labels" ) - index = cudf.core.index.as_index( + index = cudf.Index( index, name=getattr(index, "name", self.index.name) ) diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py index 049fac45ba8..11b4b9154a2 100644 --- a/python/cudf/cudf/core/multiindex.py +++ b/python/cudf/cudf/core/multiindex.py @@ -30,7 +30,6 @@ BaseIndex, _get_indexer_basic, _lexsorted_equal_range, - as_index, ) from cudf.core.join._join_helpers import _match_join_keys from cudf.utils.dtypes import is_column_like @@ -824,7 +823,7 @@ def _index_and_downcast(self, result, index, index_key): # it into an Index and name the final index values according # to that column's name. *_, last_column = index._data.columns - out_index = as_index(last_column) + out_index = cudf.Index(last_column) out_index.name = index.names[-1] index = out_index elif out_index._num_columns > 1: @@ -1082,7 +1081,9 @@ def get_level_values(self, level): raise KeyError(f"Level not found: '{level}'") else: level_idx = colnames.index(level) - level_values = as_index(self._data[level], name=self.names[level_idx]) + level_values = cudf.Index( + self._data[level], name=self.names[level_idx] + ) return level_values def _is_numeric(self): diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 169f7c11cf9..a52b583d3b4 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -596,7 +596,7 @@ def __init__( name_from_data = data.name column = as_column(data, nan_as_null=nan_as_null, dtype=dtype) if isinstance(data, pd.Series): - index_from_data = as_index(data.index) + index_from_data = cudf.Index(data.index) elif isinstance(data, Series): index_from_data = data.index elif isinstance(data, ColumnAccessor): @@ -612,7 +612,7 @@ def __init__( column = as_column( list(data.values()), nan_as_null=nan_as_null, dtype=dtype ) - index_from_data = as_index(list(data.keys())) + index_from_data = cudf.Index(list(data.keys())) else: # Using `getattr_static` to check if # `data` is on device memory and perform @@ -649,7 +649,7 @@ def __init__( name = name_from_data if index is not None: - index = as_index(index) + index = cudf.Index(index) if index_from_data is not None: first_index = index_from_data @@ -5241,7 +5241,7 @@ def isclose(a, b, rtol=1e-05, atol=1e-08, equal_nan=False): if isinstance(a, cudf.Series) and isinstance(b, cudf.Series): b = b.reindex(a.index) - index = as_index(a.index) + index = cudf.Index(a.index) a_col = as_column(a) a_array = cupy.asarray(a_col.data_array_view(mode="read")) diff --git a/python/cudf/cudf/core/tools/datetimes.py b/python/cudf/cudf/core/tools/datetimes.py index 12a1ecc68e0..f002a838fa9 100644 --- a/python/cudf/cudf/core/tools/datetimes.py +++ b/python/cudf/cudf/core/tools/datetimes.py @@ -18,7 +18,6 @@ ) from cudf.api.types import is_integer, is_scalar from cudf.core import column -from cudf.core.index import as_index # https://github.com/pandas-dev/pandas/blob/2.2.x/pandas/core/tools/datetimes.py#L1112 _unit_map = { @@ -287,13 +286,13 @@ def to_datetime( utc=utc, ) if isinstance(arg, (cudf.BaseIndex, pd.Index)): - return as_index(col, name=arg.name) + return cudf.Index(col, name=arg.name) elif isinstance(arg, (cudf.Series, pd.Series)): return cudf.Series(col, index=arg.index, name=arg.name) elif is_scalar(arg): return col.element_indexing(0) else: - return as_index(col) + return cudf.Index(col) except Exception as e: if errors == "raise": raise e diff --git a/python/cudf/cudf/tests/test_array_function.py b/python/cudf/cudf/tests/test_array_function.py index 58939f0ddd9..e6b89e2c5fa 100644 --- a/python/cudf/cudf/tests/test_array_function.py +++ b/python/cudf/cudf/tests/test_array_function.py @@ -108,7 +108,7 @@ def test_array_func_missing_cudf_dataframe(pd_df, func): ], ) def test_array_func_cudf_index(np_ar, func): - cudf_index = cudf.core.index.as_index(cudf.Series(np_ar)) + cudf_index = cudf.Index(cudf.Series(np_ar)) expect = func(np_ar) got = func(cudf_index) if np.isscalar(expect): @@ -128,7 +128,7 @@ def test_array_func_cudf_index(np_ar, func): ], ) def test_array_func_missing_cudf_index(np_ar, func): - cudf_index = cudf.core.index.as_index(cudf.Series(np_ar)) + cudf_index = cudf.Index(cudf.Series(np_ar)) with pytest.raises(TypeError): func(cudf_index) diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py index 5d0c403daa2..fa371914c3e 100644 --- a/python/cudf/cudf/tests/test_binops.py +++ b/python/cudf/cudf/tests/test_binops.py @@ -12,10 +12,9 @@ import pytest import cudf -from cudf import Series +from cudf import Index, Series from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION from cudf.core.buffer.spill_manager import get_global_manager -from cudf.core.index import as_index from cudf.testing import _utils as utils from cudf.utils.dtypes import ( BOOL_TYPES, @@ -186,8 +185,8 @@ def test_series_binop(binop, obj_class): sr2 = Series(arr2) if obj_class == "Index": - sr1 = as_index(sr1) - sr2 = as_index(sr2) + sr1 = Index(sr1) + sr2 = Index(sr2) result = binop(sr1, sr2) expect = binop(pd.Series(arr1), pd.Series(arr2)) @@ -225,7 +224,7 @@ def test_series_binop_scalar(nelem, binop, obj_class, use_cudf_scalar): sr = Series(arr) if obj_class == "Index": - sr = as_index(sr) + sr = Index(sr) if use_cudf_scalar: result = binop(sr, rhs) @@ -251,8 +250,8 @@ def test_series_bitwise_binop(binop, obj_class, lhs_dtype, rhs_dtype): sr2 = Series(arr2) if obj_class == "Index": - sr1 = as_index(sr1) - sr2 = as_index(sr2) + sr1 = Index(sr1) + sr2 = Index(sr2) result = binop(sr1, sr2) @@ -274,8 +273,8 @@ def test_series_compare(cmpop, obj_class, dtype): sr2 = Series(arr2) if obj_class == "Index": - sr1 = as_index(sr1) - sr2 = as_index(sr2) + sr1 = Index(sr1) + sr2 = Index(sr2) result1 = cmpop(sr1, sr1) result2 = cmpop(sr2, sr2) @@ -402,7 +401,7 @@ def test_series_compare_scalar( rhs = cudf.Scalar(rhs) if obj_class == "Index": - sr1 = as_index(sr1) + sr1 = Index(sr1) result1 = cmpop(sr1, rhs) result2 = cmpop(rhs, sr1) @@ -488,8 +487,8 @@ def test_series_binop_mixed_dtype(binop, lhs_dtype, rhs_dtype, obj_class): sr2 = Series(rhs) if obj_class == "Index": - sr1 = as_index(sr1) - sr2 = as_index(sr2) + sr1 = Index(sr1) + sr2 = Index(sr2) result = binop(Series(sr1), Series(sr2)) @@ -513,8 +512,8 @@ def test_series_cmpop_mixed_dtype(cmpop, lhs_dtype, rhs_dtype, obj_class): sr2 = Series(rhs) if obj_class == "Index": - sr1 = as_index(sr1) - sr2 = as_index(sr2) + sr1 = Index(sr1) + sr2 = Index(sr2) result = cmpop(Series(sr1), Series(sr2)) @@ -538,7 +537,7 @@ def test_series_reflected_ops_scalar(func, dtype, obj_class): # class typing if obj_class == "Index": - gs = as_index(gs) + gs = Index(gs) gs_result = func(gs) @@ -588,7 +587,7 @@ def test_series_reflected_ops_cudf_scalar(funcs, dtype, obj_class): # class typing if obj_class == "Index": - gs = as_index(gs) + gs = Index(gs) gs_result = gpu_func(gs) diff --git a/python/cudf/cudf/tests/test_contains.py b/python/cudf/cudf/tests/test_contains.py index 15dfa111860..a65ab1780b6 100644 --- a/python/cudf/cudf/tests/test_contains.py +++ b/python/cudf/cudf/tests/test_contains.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2022, NVIDIA CORPORATION. +# Copyright (c) 2019-2024, NVIDIA CORPORATION. import datetime @@ -8,7 +8,7 @@ import cudf from cudf import Series -from cudf.core.index import RangeIndex, as_index +from cudf.core.index import Index, RangeIndex from cudf.testing._utils import ( DATETIME_TYPES, NUMERIC_TYPES, @@ -74,7 +74,7 @@ def test_series_contains(values, item, expected): @pytest.mark.parametrize("values, item, expected", testdata_all) def test_index_contains(values, item, expected): - index = as_index(values) + index = Index(values) assert_eq(expected, item in index) diff --git a/python/cudf/cudf/tests/test_dlpack.py b/python/cudf/cudf/tests/test_dlpack.py index aafe920d3a1..7ea3979b0f1 100644 --- a/python/cudf/cudf/tests/test_dlpack.py +++ b/python/cudf/cudf/tests/test_dlpack.py @@ -101,7 +101,7 @@ def test_to_dlpack_index(data_1d): with expectation: if np.isnan(data_1d).any(): pytest.skip("Nulls not allowed in Index") - gi = cudf.core.index.as_index(data_1d) + gi = cudf.Index(data_1d) dlt = gi.to_dlpack() # PyCapsules are a C-API thing so couldn't come up with a better way diff --git a/python/cudf/cudf/tests/test_index.py b/python/cudf/cudf/tests/test_index.py index b92ae1b3364..3d6c71ebc1b 100644 --- a/python/cudf/cudf/tests/test_index.py +++ b/python/cudf/cudf/tests/test_index.py @@ -17,13 +17,7 @@ import cudf from cudf.api.extensions import no_default from cudf.api.types import is_bool_dtype -from cudf.core.index import ( - CategoricalIndex, - DatetimeIndex, - Index, - RangeIndex, - as_index, -) +from cudf.core.index import CategoricalIndex, DatetimeIndex, Index, RangeIndex from cudf.testing._utils import ( ALL_TYPES, FLOAT_TYPES, @@ -200,11 +194,11 @@ def test_pandas_as_index(): pdf_category_index = pd.CategoricalIndex(["a", "b", "c", "b", "a"]) # Define cudf Indexes - gdf_int_index = as_index(pdf_int_index) - gdf_uint_index = as_index(pdf_uint_index) - gdf_float_index = as_index(pdf_float_index) - gdf_datetime_index = as_index(pdf_datetime_index) - gdf_category_index = as_index(pdf_category_index) + gdf_int_index = Index(pdf_int_index) + gdf_uint_index = Index(pdf_uint_index) + gdf_float_index = Index(pdf_float_index) + gdf_datetime_index = Index(pdf_datetime_index) + gdf_category_index = Index(pdf_category_index) # Check instance types assert isinstance(gdf_int_index, Index) @@ -232,7 +226,7 @@ def test_pandas_as_index(): @pytest.mark.parametrize("name", SERIES_OR_INDEX_NAMES) def test_index_rename(initial_name, name): pds = pd.Index([1, 2, 3], name=initial_name) - gds = as_index(pds) + gds = Index(pds) assert_eq(pds, gds) @@ -245,18 +239,18 @@ def test_index_rename(initial_name, name): and if name is being handles in recursive creation. """ pds = pd.Index(expect) - gds = as_index(got) + gds = Index(got) assert_eq(pds, gds) pds = pd.Index(pds, name="abc") - gds = as_index(gds, name="abc") + gds = Index(gds, name="abc") assert_eq(pds, gds) def test_index_rename_inplace(): pds = pd.Index([1, 2, 3], name="asdf") - gds = as_index(pds) + gds = Index(pds) # inplace=False should yield a deep copy gds_renamed_deep = gds.rename("new_name", inplace=False) @@ -280,7 +274,7 @@ def test_index_rename_preserves_arg(): assert idx1.name == "orig_name" # a new object but referencing the same data - idx3 = as_index(idx1, name="last_name") + idx3 = Index(idx1, name="last_name") assert idx3.name == "last_name" assert idx1.name == "orig_name" @@ -456,7 +450,7 @@ def test_from_pandas_gen(): def test_index_names(): - idx = cudf.core.index.as_index([1, 2, 3], name="idx") + idx = Index([1, 2, 3], name="idx") assert idx.names == ("idx",) @@ -874,8 +868,8 @@ def test_index_equals(data, other): pd_data = pd.Index(data) pd_other = pd.Index(other) - gd_data = cudf.core.index.as_index(data) - gd_other = cudf.core.index.as_index(other) + gd_data = Index(data) + gd_other = Index(other) expected = pd_data.equals(pd_other) actual = gd_data.equals(gd_other) @@ -920,8 +914,8 @@ def test_index_categories_equal(data, other): pd_data = pd.Index(data).astype("category") pd_other = pd.Index(other) - gd_data = cudf.core.index.as_index(data).astype("category") - gd_other = cudf.core.index.as_index(other) + gd_data = Index(data).astype("category") + gd_other = Index(other) expected = pd_data.equals(pd_other) actual = gd_data.equals(gd_other) @@ -970,7 +964,7 @@ def test_index_equal_misc(data, other): pd_data = pd.Index(data) pd_other = other - gd_data = cudf.core.index.as_index(data) + gd_data = Index(data) gd_other = other expected = pd_data.equals(pd_other) @@ -1089,8 +1083,8 @@ def test_index_empty_append_name_conflict(): ], ) def test_index_append_error(data, other): - gd_data = cudf.core.index.as_index(data) - gd_other = cudf.core.index.as_index(other) + gd_data = Index(data) + gd_other = Index(other) got_dtype = ( gd_other.dtype diff --git a/python/cudf/cudf/tests/test_multiindex.py b/python/cudf/cudf/tests/test_multiindex.py index dd731fab8f3..f143112a45f 100644 --- a/python/cudf/cudf/tests/test_multiindex.py +++ b/python/cudf/cudf/tests/test_multiindex.py @@ -21,7 +21,6 @@ import cudf from cudf.api.extensions import no_default from cudf.core.column import as_column -from cudf.core.index import as_index from cudf.testing._utils import ( assert_eq, assert_exceptions_equal, @@ -158,8 +157,6 @@ def test_multiindex_swaplevel(): def test_string_index(): - from cudf.core.index import Index - pdf = pd.DataFrame(np.random.rand(5, 5)) gdf = cudf.from_pandas(pdf) stringIndex = ["a", "b", "c", "d", "e"] @@ -170,11 +167,11 @@ def test_string_index(): pdf.index = stringIndex gdf.index = stringIndex assert_eq(pdf, gdf) - stringIndex = Index(["a", "b", "c", "d", "e"], name="name") + stringIndex = cudf.Index(["a", "b", "c", "d", "e"], name="name") pdf.index = stringIndex.to_pandas() gdf.index = stringIndex assert_eq(pdf, gdf) - stringIndex = as_index(as_column(["a", "b", "c", "d", "e"]), name="name") + stringIndex = cudf.Index(as_column(["a", "b", "c", "d", "e"]), name="name") pdf.index = stringIndex.to_pandas() gdf.index = stringIndex assert_eq(pdf, gdf) diff --git a/python/cudf/cudf/tests/test_string.py b/python/cudf/cudf/tests/test_string.py index de771a56e77..801c530da43 100644 --- a/python/cudf/cudf/tests/test_string.py +++ b/python/cudf/cudf/tests/test_string.py @@ -16,7 +16,7 @@ import cudf from cudf import concat from cudf.core.column.string import StringColumn -from cudf.core.index import Index, as_index +from cudf.core.index import Index from cudf.testing._utils import ( DATETIME_TYPES, NUMERIC_TYPES, @@ -1500,7 +1500,7 @@ def test_strings_partition(data): assert_eq(ps.str.partition(","), gs.str.partition(",")) assert_eq(ps.str.partition("-"), gs.str.partition("-")) - gi = as_index(data, name="new name") + gi = cudf.Index(data, name="new name") pi = pd.Index(data, name="new name") assert_eq(pi.str.partition(), gi.str.partition()) assert_eq(pi.str.partition(","), gi.str.partition(",")) @@ -1639,7 +1639,7 @@ def test_strings_strip_tests(data, to_strip): ps.str.lstrip(to_strip=to_strip), gs.str.lstrip(to_strip=to_strip) ) - gi = as_index(data) + gi = cudf.Index(data) pi = pd.Index(data) assert_eq(pi.str.strip(to_strip=to_strip), gi.str.strip(to_strip=to_strip)) @@ -1696,7 +1696,7 @@ def test_strings_filling_tests(data, width, fillchar): gs.str.rjust(width=width, fillchar=fillchar), ) - gi = as_index(data) + gi = cudf.Index(data) pi = pd.Index(data) assert_eq( @@ -1731,7 +1731,7 @@ def test_strings_zfill_tests(data, width): assert_eq(ps.str.zfill(width=width), gs.str.zfill(width=width)) - gi = as_index(data) + gi = cudf.Index(data) pi = pd.Index(data) assert_eq(pi.str.zfill(width=width), gi.str.zfill(width=width)) @@ -1763,7 +1763,7 @@ def test_strings_pad_tests(data, width, side, fillchar): gs.str.pad(width=width, side=side, fillchar=fillchar), ) - gi = as_index(data) + gi = cudf.Index(data) pi = pd.Index(data) assert_eq( @@ -1807,7 +1807,7 @@ def test_string_wrap(data, width): ), ) - gi = as_index(data) + gi = cudf.Index(data) pi = pd.Index(data) assert_eq( @@ -1941,7 +1941,7 @@ def test_string_replace_with_backrefs(find, replace): expected = ps.str.replace(find, replace, regex=True) assert_eq(got, expected) - got = as_index(gs).str.replace_with_backrefs(find, replace) + got = cudf.Index(gs).str.replace_with_backrefs(find, replace) expected = pd.Index(ps).str.replace(find, replace, regex=True) assert_eq(got, expected) @@ -2227,7 +2227,7 @@ def test_string_str_rindex(data, sub, er): assert_eq(ps.str.rindex(sub), gs.str.rindex(sub), check_dtype=False) assert_eq( pd.Index(ps).str.rindex(sub), - as_index(gs).str.rindex(sub), + cudf.Index(gs).str.rindex(sub), exact=False, ) @@ -2336,7 +2336,7 @@ def test_string_str_match(data, pat): assert_eq(ps.str.match(pat), gs.str.match(pat)) assert_eq( - pd.Index(pd.Index(ps).str.match(pat)), as_index(gs).str.match(pat) + pd.Index(pd.Index(ps).str.match(pat)), cudf.Index(gs).str.match(pat) ) @@ -2363,7 +2363,7 @@ def test_string_str_translate(data): ) assert_eq( pd.Index(ps).str.translate(str.maketrans({"a": "z"})), - as_index(gs).str.translate(str.maketrans({"a": "z"})), + cudf.Index(gs).str.translate(str.maketrans({"a": "z"})), ) assert_eq( ps.str.translate(str.maketrans({"a": "z", "i": "$", "z": "1"})), @@ -2373,7 +2373,7 @@ def test_string_str_translate(data): pd.Index(ps).str.translate( str.maketrans({"a": "z", "i": "$", "z": "1"}) ), - as_index(gs).str.translate( + cudf.Index(gs).str.translate( str.maketrans({"a": "z", "i": "$", "z": "1"}) ), ) @@ -2389,7 +2389,7 @@ def test_string_str_translate(data): pd.Index(ps).str.translate( str.maketrans({"+": "-", "-": "$", "?": "!", "B": "."}) ), - as_index(gs).str.translate( + cudf.Index(gs).str.translate( str.maketrans({"+": "-", "-": "$", "?": "!", "B": "."}) ), ) @@ -2779,8 +2779,8 @@ def test_string_str_byte_count(data, expected): actual = sr.str.byte_count() assert_eq(expected, actual) - si = as_index(data) - expected = as_index(expected, dtype="int32") + si = cudf.Index(data) + expected = cudf.Index(expected, dtype="int32") actual = si.str.byte_count() assert_eq(expected, actual) @@ -2828,8 +2828,8 @@ def test_str_isinteger(data, expected): actual = sr.str.isinteger() assert_eq(expected, actual) - sr = as_index(data) - expected = as_index(expected) + sr = cudf.Index(data) + expected = cudf.Index(expected) actual = sr.str.isinteger() assert_eq(expected, actual) @@ -2884,8 +2884,8 @@ def test_str_isfloat(data, expected): actual = sr.str.isfloat() assert_eq(expected, actual) - sr = as_index(data) - expected = as_index(expected) + sr = cudf.Index(data) + expected = cudf.Index(expected) actual = sr.str.isfloat() assert_eq(expected, actual) diff --git a/python/cudf/cudf/tests/text/test_text_methods.py b/python/cudf/cudf/tests/text/test_text_methods.py index 6ecead862bb..6bd3b99bae1 100644 --- a/python/cudf/cudf/tests/text/test_text_methods.py +++ b/python/cudf/cudf/tests/text/test_text_methods.py @@ -539,7 +539,7 @@ def test_character_tokenize_series(): def test_character_tokenize_index(): - sr = cudf.core.index.as_index( + sr = cudf.Index( [ "hello world", "sdf", @@ -550,7 +550,7 @@ def test_character_tokenize_index(): ), ] ) - expected = cudf.core.index.as_index( + expected = cudf.Index( [ "h", "e", @@ -648,8 +648,8 @@ def test_character_tokenize_index(): actual = sr.str.character_tokenize() assert_eq(expected, actual) - sr = cudf.core.index.as_index(["a"]) - expected = cudf.core.index.as_index(["a"]) + sr = cudf.Index(["a"]) + expected = cudf.Index(["a"]) actual = sr.str.character_tokenize() assert_eq(expected, actual)