Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove internal usage of core.index.as_index in favor of cudf.Index #15851

Merged
merged 3 commits into from
Jun 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions python/cudf/cudf/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from cudf.core.column import as_column
from cudf.core.copy_types import BooleanMask
from cudf.core.index import RangeIndex, as_index
from cudf.core.index import Index, RangeIndex
from cudf.core.indexed_frame import IndexedFrame
from cudf.core.scalar import Scalar
from cudf.options import get_option
Expand Down Expand Up @@ -107,7 +107,7 @@ def factorize(values, sort=False, use_na_sentinel=True, size_hint=None):
dtype="int64" if get_option("mode.pandas_compatible") else None,
).values

return labels, cats.values if return_cupy_array else as_index(cats)
return labels, cats.values if return_cupy_array else Index(cats)


def _linear_interpolation(column, index=None):
Expand Down
4 changes: 1 addition & 3 deletions python/cudf/cudf/core/column/methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,8 +93,6 @@ def _return_or_inplace(
else:
return cudf.Series(new_col, name=self._parent.name)
elif isinstance(self._parent, cudf.BaseIndex):
return cudf.core.index.as_index(
new_col, name=self._parent.name
)
return cudf.Index(new_col, name=self._parent.name)
else:
return self._parent._mimic_inplace(new_col, inplace=False)
4 changes: 2 additions & 2 deletions python/cudf/cudf/core/column/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -4391,7 +4391,7 @@ def code_points(self) -> SeriesOrIndex:
if isinstance(self._parent, cudf.Series):
return cudf.Series(new_col, name=self._parent.name)
elif isinstance(self._parent, cudf.BaseIndex):
return cudf.core.index.as_index(new_col, name=self._parent.name)
return cudf.Index(new_col, name=self._parent.name)
else:
return new_col

Expand Down Expand Up @@ -4706,7 +4706,7 @@ def character_tokenize(self) -> SeriesOrIndex:
index = self._parent.index.repeat(lengths)
return cudf.Series(result_col, name=self._parent.name, index=index)
elif isinstance(self._parent, cudf.BaseIndex):
return cudf.core.index.as_index(result_col, name=self._parent.name)
return cudf.Index(result_col, name=self._parent.name)
else:
return result_col

Expand Down
4 changes: 2 additions & 2 deletions python/cudf/cudf/core/cut.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2021-2023, NVIDIA CORPORATION.
# Copyright (c) 2021-2024, NVIDIA CORPORATION.

from collections import abc

Expand Down Expand Up @@ -292,7 +292,7 @@ def cut(
)

# we return a categorical index, as we don't have a Categorical method
categorical_index = cudf.core.index.as_index(col)
categorical_index = cudf.Index(col)

if isinstance(orig_x, (pd.Series, cudf.Series)):
# if we have a series input we return a series output
Expand Down
36 changes: 17 additions & 19 deletions python/cudf/cudf/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -712,7 +712,7 @@ def __init__(
data = data.reindex(index)
index = data.index
else:
index = as_index(index)
index = cudf.Index(index)
else:
index = data.index

Expand Down Expand Up @@ -761,7 +761,7 @@ def __init__(
if index is None:
self._index = RangeIndex(0)
else:
self._index = as_index(index)
self._index = cudf.Index(index)
if columns is not None:
rangeindex = isinstance(
columns, (range, pd.RangeIndex, cudf.RangeIndex)
Expand Down Expand Up @@ -875,7 +875,7 @@ def _init_from_series_list(self, data, columns, index):
# When `index` is `None`, the final index of
# resulting dataframe will be union of
# all Series's names.
final_index = as_index(_get_union_of_series_names(data))
final_index = cudf.Index(_get_union_of_series_names(data))
else:
# When an `index` is passed, the final index of
# resulting dataframe will be whatever
Expand Down Expand Up @@ -919,7 +919,7 @@ def _init_from_series_list(self, data, columns, index):
f"not match length of index ({index_length})"
)

final_index = as_index(index)
final_index = cudf.Index(index)

series_lengths = list(map(len, data))
data = numeric_normalize_types(*data)
Expand All @@ -943,7 +943,7 @@ def _init_from_series_list(self, data, columns, index):
# Setting `final_columns` to self._index so
# that the resulting `transpose` will be have
# columns set to `final_columns`
self._index = as_index(final_columns)
self._index = cudf.Index(final_columns)

transpose = self.T
else:
Expand Down Expand Up @@ -987,9 +987,9 @@ def _init_from_list_like(self, data, index=None, columns=None):
if index is None:
index = RangeIndex(start=0, stop=len(data))
else:
index = as_index(index)
index = cudf.Index(index)

self._index = as_index(index)
self._index = cudf.Index(index)
# list-of-dicts case
if len(data) > 0 and isinstance(data[0], dict):
data = DataFrame.from_pandas(pd.DataFrame(data))
Expand Down Expand Up @@ -1095,7 +1095,7 @@ def _init_from_dict_like(

self._index = RangeIndex(0, num_rows)
else:
self._index = as_index(index)
self._index = cudf.Index(index)

if len(data):
self._data.multiindex = True
Expand Down Expand Up @@ -1410,7 +1410,7 @@ def __setitem__(self, arg, value):
new_columns, verify=False
)
if isinstance(value, (pd.Series, Series)):
self._index = as_index(value.index)
self._index = cudf.Index(value.index)
elif len(value) > 0:
self._index = RangeIndex(length)
return
Expand Down Expand Up @@ -1728,7 +1728,7 @@ def _concat(
for cols in columns:
table_index = None
if 1 == first_data_column_position:
table_index = cudf.core.index.as_index(cols[0])
table_index = cudf.Index(cols[0])
elif first_data_column_position > 1:
table_index = DataFrame._from_data(
data=dict(
Expand Down Expand Up @@ -1780,9 +1780,7 @@ def _concat(
if not isinstance(out.index, MultiIndex) and isinstance(
out.index.dtype, cudf.CategoricalDtype
):
out = out.set_index(
cudf.core.index.as_index(out.index._values)
)
out = out.set_index(cudf.Index(out.index._values))
for name, col in out._data.items():
out._data[name] = col._with_type_metadata(
tables[0]._data[name].dtype
Expand Down Expand Up @@ -2828,7 +2826,7 @@ def reindex(
if columns is None:
df = self
else:
columns = as_index(columns)
columns = cudf.Index(columns)
intersection = self._data.to_pandas_index().intersection(
columns.to_pandas()
)
Expand Down Expand Up @@ -3245,7 +3243,7 @@ def _insert(self, loc, name, value, nan_as_null=None, ignore_index=True):
if len(self) == 0:
if isinstance(value, (pd.Series, Series)):
if not ignore_index:
self.index = as_index(value.index)
self.index = cudf.Index(value.index)
elif (length := len(value)) > 0:
if num_cols != 0:
ca = self._data._from_columns_like_self(
Expand Down Expand Up @@ -5656,7 +5654,7 @@ def from_records(cls, data, index=None, columns=None, nan_as_null=False):
}

if not is_scalar(index):
new_index = as_index(index)
new_index = cudf.Index(index)
else:
new_index = None

Expand Down Expand Up @@ -5740,7 +5738,7 @@ def _from_arrays(cls, data, index=None, columns=None, nan_as_null=False):
}

if index is not None:
index = as_index(index)
index = cudf.Index(index)

if isinstance(columns, (pd.Index, cudf.Index)):
level_names = tuple(columns.names)
Expand Down Expand Up @@ -6173,7 +6171,7 @@ def count(self, axis=0, numeric_only=False):
for col in self._data.names
]
},
as_index(self._data.names),
cudf.Index(self._data.names),
)

_SUPPORT_AXIS_LOOKUP = {
Expand Down Expand Up @@ -6300,7 +6298,7 @@ def _reduce(
source._data.names, names=source._data.level_names
)
else:
idx = as_index(source._data.names)
idx = cudf.Index(source._data.names)
return Series._from_data({None: as_column(result)}, idx)
elif axis == 1:
return source._apply_cupy_method_axis_1(op, **kwargs)
Expand Down
4 changes: 2 additions & 2 deletions python/cudf/cudf/core/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,10 +186,10 @@ def categories(self) -> "cudf.core.index.Index":
Index(['b', 'a'], dtype='object')
"""
if self._categories is None:
return cudf.core.index.as_index(
return cudf.Index(
cudf.core.column.column_empty(0, dtype="object", masked=False)
)
return cudf.core.index.as_index(self._categories, copy=False)
return cudf.Index(self._categories, copy=False)

@property
def type(self):
Expand Down
6 changes: 2 additions & 4 deletions python/cudf/cudf/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -2800,15 +2800,13 @@ def keys(self):
nkeys = len(self._key_columns)

if nkeys == 0:
return cudf.core.index.as_index([], name=None)
return cudf.Index([], name=None)
elif nkeys > 1:
return cudf.MultiIndex._from_data(
dict(zip(range(nkeys), self._key_columns))
)._set_names(self.names)
else:
return cudf.core.index.as_index(
self._key_columns[0], name=self.names[0]
)
return cudf.Index(self._key_columns[0], name=self.names[0])

@property
def values(self) -> cudf.core.frame.Frame:
Expand Down
30 changes: 11 additions & 19 deletions python/cudf/cudf/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -1159,7 +1159,7 @@ def _concat(cls, objs):
result = _concat_range_index(non_empties)
else:
data = concat_columns([o._values for o in non_empties])
result = as_index(data)
result = Index(data)

names = {obj.name for obj in objs}
if len(names) == 1:
Expand Down Expand Up @@ -1431,7 +1431,7 @@ def __repr__(self):
def __getitem__(self, index):
res = self._get_elements_from_column(index)
if isinstance(res, ColumnBase):
res = as_index(res, name=self.name)
res = Index(res, name=self.name)
return res

@property # type: ignore
Expand Down Expand Up @@ -1951,7 +1951,7 @@ def microsecond(self):
>>> datetime_index.microsecond
Index([0, 1, 2], dtype='int32')
""" # noqa: E501
return as_index(
return Index(
(
# Need to manually promote column to int32 because
# pandas-matching binop behaviour requires that this
Expand Down Expand Up @@ -2215,7 +2215,7 @@ def _get_dt_field(self, field):
mask=out_column.base_mask,
offset=out_column.offset,
)
return as_index(out_column, name=self.name)
return Index(out_column, name=self.name)

def _is_boolean(self):
return False
Expand Down Expand Up @@ -2545,29 +2545,23 @@ def days(self):
Number of days for each element.
"""
# Need to specifically return `int64` to avoid overflow.
return as_index(
arbitrary=self._values.days, name=self.name, dtype="int64"
)
return Index(self._values.days, name=self.name, dtype="int64")

@property # type: ignore
@_cudf_nvtx_annotate
def seconds(self):
"""
Number of seconds (>= 0 and less than 1 day) for each element.
"""
return as_index(
arbitrary=self._values.seconds, name=self.name, dtype="int32"
)
return Index(self._values.seconds, name=self.name, dtype="int32")

@property # type: ignore
@_cudf_nvtx_annotate
def microseconds(self):
"""
Number of microseconds (>= 0 and less than 1 second) for each element.
"""
return as_index(
arbitrary=self._values.microseconds, name=self.name, dtype="int32"
)
return Index(self._values.microseconds, name=self.name, dtype="int32")

@property # type: ignore
@_cudf_nvtx_annotate
Expand All @@ -2576,9 +2570,7 @@ def nanoseconds(self):
Number of nanoseconds (>= 0 and less than 1 microsecond) for each
element.
"""
return as_index(
arbitrary=self._values.nanoseconds, name=self.name, dtype="int32"
)
return Index(self._values.nanoseconds, name=self.name, dtype="int32")

@property # type: ignore
@_cudf_nvtx_annotate
Expand Down Expand Up @@ -2716,7 +2708,7 @@ def codes(self):
"""
The category codes of this categorical.
"""
return as_index(self._values.codes)
return Index(self._values.codes)

@property # type: ignore
@_cudf_nvtx_annotate
Expand Down Expand Up @@ -3160,15 +3152,15 @@ def _concat_range_index(indexes: List[RangeIndex]) -> BaseIndex:
elif step is None:
# First non-empty index had only one element
if obj.start == start:
result = as_index(concat_columns([x._values for x in indexes]))
result = Index(concat_columns([x._values for x in indexes]))
return result
step = obj.start - start

non_consecutive = (step != obj.step and len(obj) > 1) or (
next_ is not None and obj.start != next_
)
if non_consecutive:
result = as_index(concat_columns([x._values for x in indexes]))
result = Index(concat_columns([x._values for x in indexes]))
return result
if step is not None:
next_ = obj[-1] + step
Expand Down
4 changes: 2 additions & 2 deletions python/cudf/cudf/core/indexed_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -3640,7 +3640,7 @@ def _align_to_index(
sort: bool = True,
allow_non_unique: bool = False,
) -> Self:
index = cudf.core.index.as_index(index)
index = cudf.Index(index)

if self.index.equals(index):
return self
Expand Down Expand Up @@ -3715,7 +3715,7 @@ def _reindex(
raise ValueError(
"cannot reindex on an axis with duplicate labels"
)
index = cudf.core.index.as_index(
index = cudf.Index(
index, name=getattr(index, "name", self.index.name)
)

Expand Down
7 changes: 4 additions & 3 deletions python/cudf/cudf/core/multiindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
BaseIndex,
_get_indexer_basic,
_lexsorted_equal_range,
as_index,
)
from cudf.core.join._join_helpers import _match_join_keys
from cudf.utils.dtypes import is_column_like
Expand Down Expand Up @@ -824,7 +823,7 @@ def _index_and_downcast(self, result, index, index_key):
# it into an Index and name the final index values according
# to that column's name.
*_, last_column = index._data.columns
out_index = as_index(last_column)
out_index = cudf.Index(last_column)
out_index.name = index.names[-1]
index = out_index
elif out_index._num_columns > 1:
Expand Down Expand Up @@ -1082,7 +1081,9 @@ def get_level_values(self, level):
raise KeyError(f"Level not found: '{level}'")
else:
level_idx = colnames.index(level)
level_values = as_index(self._data[level], name=self.names[level_idx])
level_values = cudf.Index(
self._data[level], name=self.names[level_idx]
)
return level_values

def _is_numeric(self):
Expand Down
Loading
Loading