diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 131463e8871..f2be0e3bd6e 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -10,7 +10,7 @@ import warnings from collections import defaultdict from collections.abc import Iterable, Sequence -from typing import Any, Optional, Set, TypeVar +from typing import Any, Optional, TypeVar import cupy import numpy as np @@ -554,6 +554,7 @@ def _align_input_series_indices(data, index): return data, index + # The `constructor*` properties are used by `dask` (and `dask_cudf`) @property def _constructor(self): return DataFrame @@ -1456,7 +1457,7 @@ def _get_columns_by_label(self, labels, downcast=False): new_data, index=self.index, name=labels ) return out - out = self._constructor()._from_data( + out = self.__class__()._from_data( new_data, index=self.index, columns=new_data.to_pandas_index() ) return out @@ -3158,20 +3159,6 @@ def take(self, positions, keep_index=True): out.columns = self.columns return out - def __copy__(self): - return self.copy(deep=True) - - def __deepcopy__(self, memo=None): - """ - Parameters - ---------- - memo, default None - Standard signature. Unused - """ - if memo is None: - memo = {} - return self.copy(deep=True) - @annotate("INSERT", color="green", domain="cudf_python") def insert(self, loc, name, value): """ Add a column to DataFrame at the index specified by loc. @@ -7769,8 +7756,6 @@ def explode(self, column, ignore_index=False): return super()._explode(column, ignore_index) - _accessors = set() # type: Set[Any] - def from_pandas(obj, nan_as_null=None): """ diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index 5c4186c4ac7..4a434be42ce 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -48,6 +48,12 @@ class Frame(libcudf.table.Table): _data: "ColumnAccessor" + @classmethod + def __init_subclass__(cls): + # All subclasses contain a set _accessors that is used to hold custom + # accessors defined by user APIs (see cudf/api/extensions/accessor.py). + cls._accessors = set() + @classmethod def _from_table(cls, table: Frame): return cls(table._data, index=table._index) @@ -608,7 +614,7 @@ def _get_columns_by_index(self, indices): """ data = self._data.select_by_index(indices) - return self._constructor( + return self.__class__( data, columns=data.to_pandas_index(), index=self.index ) @@ -3295,6 +3301,211 @@ def _reindex( return self._mimic_inplace(result, inplace=inplace) +class SingleColumnFrame(Frame): + """A one-dimensional frame. + + Frames with only a single column share certain logic that is encoded in + this class. + """ + + @property + def name(self): + """The name of this object.""" + return next(iter(self._data.names)) + + @name.setter + def name(self, value): + self._data[value] = self._data.pop(self.name) + + @property + def ndim(self): + """Dimension of the data (always 1).""" + return 1 + + @property + def shape(self): + """Returns a tuple representing the dimensionality of the Index. + """ + return (len(self),) + + def __iter__(self): + cudf.utils.utils.raise_iteration_error(obj=self) + + def __len__(self): + return len(self._column) + + def __bool__(self): + raise TypeError( + f"The truth value of a {type(self)} is ambiguous. Use " + "a.empty, a.bool(), a.item(), a.any() or a.all()." + ) + + @property + def _column(self): + return self._data[self.name] + + @_column.setter + def _column(self, value): + self._data[self.name] = value + + @property + def values(self): + """ + Return a CuPy representation of the data. + + Returns + ------- + out : cupy.ndarray + A device representation of the underlying data. + + Examples + -------- + >>> import cudf + >>> ser = cudf.Series([1, -10, 100, 20]) + >>> ser.values + array([ 1, -10, 100, 20]) + >>> type(ser.values) + + >>> index = cudf.Index([1, -10, 100, 20]) + >>> index.values + array([ 1, -10, 100, 20]) + >>> type(index.values) + + """ + return self._column.values + + @property + def values_host(self): + """ + Return a NumPy representation of the data. + + Returns + ------- + out : numpy.ndarray + A host representation of the underlying data. + + Examples + -------- + >>> import cudf + >>> ser = cudf.Series([1, -10, 100, 20]) + >>> ser.values_host + array([ 1, -10, 100, 20]) + >>> type(ser.values_host) + + >>> index = cudf.Index([1, -10, 100, 20]) + >>> index.values_host + array([ 1, -10, 100, 20]) + >>> type(index.values_host) + + """ + return self._column.values_host + + def tolist(self): + + raise TypeError( + "cuDF does not support conversion to host memory " + "via the `tolist()` method. Consider using " + "`.to_arrow().to_pylist()` to construct a Python list." + ) + + to_list = tolist + + def to_gpu_array(self, fillna=None): + """Get a dense numba device array for the data. + + Parameters + ---------- + fillna : str or None + See *fillna* in ``.to_array``. + + Notes + ----- + + if ``fillna`` is ``None``, null values are skipped. Therefore, the + output size could be smaller. + + Returns + ------- + numba.DeviceNDArray + + Examples + -------- + >>> import cudf + >>> s = cudf.Series([10, 20, 30, 40, 50]) + >>> s + 0 10 + 1 20 + 2 30 + 3 40 + 4 50 + dtype: int64 + >>> s.to_gpu_array() + + """ + return self._column.to_gpu_array(fillna=fillna) + + @classmethod + def from_arrow(cls, array): + """Create from PyArrow Array/ChunkedArray. + + Parameters + ---------- + array : PyArrow Array/ChunkedArray + PyArrow Object which has to be converted. + + Raises + ------ + TypeError for invalid input type. + + Returns + ------- + SingleColumnFrame + + Examples + -------- + >>> import cudf + >>> import pyarrow as pa + >>> cudf.Index.from_arrow(pa.array(["a", "b", None])) + StringIndex(['a' 'b' None], dtype='object') + >>> cudf.Series.from_arrow(pa.array(["a", "b", None])) + 0 a + 1 b + 2 + dtype: object + """ + return cls(cudf.core.column.column.ColumnBase.from_arrow(array)) + + def to_arrow(self): + """ + Convert to a PyArrow Array. + + Returns + ------- + PyArrow Array + + Examples + -------- + >>> import cudf + >>> sr = cudf.Series(["a", "b", None]) + >>> sr.to_arrow() + + [ + "a", + "b", + null + ] + >>> ind = cudf.Index(["a", "b", None]) + >>> ind.to_arrow() + + [ + "a", + "b", + null + ] + """ + return self._column.to_arrow() + + def _get_replacement_values_for_columns( to_replace: Any, value: Any, columns_dtype_map: Dict[Any, Any] ) -> Tuple[Dict[Any, bool], Dict[Any, Any], Dict[Any, Any]]: diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 0ffe0c11fef..5f390be79e2 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -4,7 +4,7 @@ import pickle from numbers import Number -from typing import Any, Dict, Set, Type +from typing import Any, Dict, Type import cupy import numpy as np @@ -29,7 +29,7 @@ ) from cudf.core.column.string import StringMethods as StringMethods from cudf.core.dtypes import IntervalDtype -from cudf.core.frame import Frame +from cudf.core.frame import SingleColumnFrame from cudf.utils import ioutils from cudf.utils.docutils import copy_docstring from cudf.utils.dtypes import ( @@ -45,35 +45,7 @@ from cudf.utils.utils import cached_property, search_range -def _to_frame(this_index, index=True, name=None): - """Create a DataFrame with a column containing this Index - - Parameters - ---------- - index : boolean, default True - Set the index of the returned DataFrame as the original Index - name : str, default None - Name to be used for the column - - Returns - ------- - DataFrame - cudf DataFrame - """ - - if name is not None: - col_name = name - elif this_index.name is None: - col_name = 0 - else: - col_name = this_index.name - - return cudf.DataFrame( - {col_name: this_index._values}, index=this_index if index else None - ) - - -class Index(Frame, Serializable): +class Index(SingleColumnFrame, Serializable): dtype: DtypeObj @@ -180,12 +152,6 @@ def drop_duplicates(self, keep="first"): """ # noqa: E501 return super().drop_duplicates(keep=keep) - @property - def shape(self): - """Returns a tuple representing the dimensionality of the Index. - """ - return (len(self),) - def serialize(self): header = {} header["index_column"] = {} @@ -277,81 +243,6 @@ def get_level_values(self, level): else: raise KeyError(f"Requested level with name {level} " "not found") - def __iter__(self): - cudf.utils.utils.raise_iteration_error(obj=self) - - @classmethod - def from_arrow(cls, array): - """Convert PyArrow Array/ChunkedArray to Index - - Parameters - ---------- - array : PyArrow Array/ChunkedArray - PyArrow Object which has to be converted to Index - - Raises - ------ - TypeError for invalid input type. - - Returns - ------- - cudf Index - - Examples - -------- - >>> import cudf - >>> import pyarrow as pa - >>> cudf.Index.from_arrow(pa.array(["a", "b", None])) - StringIndex(['a' 'b' None], dtype='object') - """ - - return cls(cudf.core.column.column.ColumnBase.from_arrow(array)) - - def to_arrow(self): - """Convert Index to PyArrow Array - - Returns - ------- - PyArrow Array - - Examples - -------- - >>> import cudf - >>> ind = cudf.Index(["a", "b", None]) - >>> ind.to_arrow() - - [ - "a", - "b", - null - ] - """ - - return self._data.columns[0].to_arrow() - - @property - def values_host(self): - """ - Return a numpy representation of the Index. - - Only the values in the Index will be returned. - - Returns - ------- - out : numpy.ndarray - The values of the Index. - - Examples - -------- - >>> import cudf - >>> index = cudf.Index([1, -10, 100, 20]) - >>> index.values_host - array([ 1, -10, 100, 20]) - >>> type(index.values_host) - - """ - return self._values.values_host - @classmethod def deserialize(cls, header, frames): h = header["index_column"] @@ -362,12 +253,6 @@ def deserialize(cls, header, frames): index = col_typ.deserialize(h, frames[: header["frame_count"]]) return idx_typ(index, name=name) - @property - def ndim(self): - """Dimension of the data. Apart from MultiIndex ndim is always 1. - """ - return 1 - @property def names(self): """ @@ -388,18 +273,6 @@ def names(self, values): self.name = values[0] - @property - def name(self): - """ - Returns the name of the Index. - """ - return next(iter(self._data.names)) - - @name.setter - def name(self, value): - col = self._data.pop(self.name) - self._data[value] = col - def dropna(self, how="any"): """ Return an Index with null values removed. @@ -641,25 +514,32 @@ def argsort(self, ascending=True, **kwargs): indices = self._values.argsort(ascending=ascending, **kwargs) return cupy.asarray(indices) - @property - def values(self): - """ - Return an array representing the data in the Index. + def to_frame(self, index=True, name=None): + """Create a DataFrame with a column containing this Index + + Parameters + ---------- + index : boolean, default True + Set the index of the returned DataFrame as the original Index + name : str, default None + Name to be used for the column Returns ------- - array : A cupy array of data in the Index. - - Examples - -------- - >>> import cudf - >>> index = cudf.Index([1, -10, 100, 20]) - >>> index.values - array([ 1, -10, 100, 20]) - >>> type(index.values) - + DataFrame + cudf DataFrame """ - return self._values.values + + if name is not None: + col_name = name + elif self.name is None: + col_name = 0 + else: + col_name = self.name + + return cudf.DataFrame( + {col_name: self._values}, index=self if index else None + ) def any(self): """ @@ -686,16 +566,6 @@ def to_pandas(self): """ return pd.Index(self._values.to_pandas(), name=self.name) - def tolist(self): - - raise TypeError( - "cuDF does not support conversion to host memory " - "via `tolist()` method. Consider using " - "`.to_arrow().to_pylist()` to construct a Python list." - ) - - to_list = tolist - @ioutils.doc_to_dlpack() def to_dlpack(self): """{docstring}""" @@ -1557,9 +1427,7 @@ def _from_table(cls, table): @classmethod def _from_data(cls, data, index=None): - return cls._from_table(Frame(data=data)) - - _accessors = set() # type: Set[Any] + return cls._from_table(SingleColumnFrame(data=data)) @property def _constructor_expanddim(self): @@ -1606,7 +1474,7 @@ def __new__( if step == 0: raise ValueError("Step must not be zero.") - out = Frame.__new__(cls) + out = SingleColumnFrame.__new__(cls) if isinstance(start, range): therange = start start = therange.start @@ -1815,7 +1683,7 @@ def is_contiguous(self): @property def size(self): - return self.__len__() + return len(self) def find_label_range(self, first=None, last=None): """Find subrange in the ``RangeIndex``, marked by their positions, that @@ -1856,25 +1724,6 @@ def find_label_range(self, first=None, last=None): return begin, end - @copy_docstring(_to_frame) # type: ignore - def to_frame(self, index=True, name=None): - return _to_frame(self, index, name) - - def to_gpu_array(self, fillna=None): - """Get a dense numba device array for the data. - - Parameters - ---------- - fillna : str or None - Replacement value to fill in place of nulls. - - Notes - ----- - if ``fillna`` is ``None``, null values are skipped. Therefore, the - output size could be smaller. - """ - return self._values.to_gpu_array(fillna=fillna) - def to_pandas(self): return pd.RangeIndex( start=self._start, @@ -1978,7 +1827,7 @@ def __new__(cls, values, **kwargs): Column's name. Otherwise if this name is different from the value Column's, the values Column will be cloned to adopt this name. """ - out = Frame.__new__(cls) + out = SingleColumnFrame.__new__(cls) out._initialize(values, **kwargs) return out @@ -2043,9 +1892,6 @@ def copy(self, name=None, deep=False, dtype=None, names=None): def __sizeof__(self): return self._values.__sizeof__() - def __len__(self): - return len(self._values) - def __repr__(self): max_seq_items = get_option("max_seq_items") or len(self) mr = 0 @@ -2135,10 +1981,6 @@ def __getitem__(self, index): else: return res - @copy_docstring(_to_frame) # type: ignore - def to_frame(self, index=True, name=None): - return _to_frame(self, index, name) - @property def dtype(self): """ @@ -2201,7 +2043,7 @@ class NumericIndex(GenericIndex): def __new__(cls, data=None, dtype=None, copy=False, name=None): - out = Frame.__new__(cls) + out = SingleColumnFrame.__new__(cls) dtype = _index_to_dtype[cls] if copy: data = column.as_column(data, dtype=dtype).copy() @@ -2323,7 +2165,7 @@ def __new__( # pandas dtindex creation first which. For now # just make sure we handle np.datetime64 arrays # and then just dispatch upstream - out = Frame.__new__(cls) + out = SingleColumnFrame.__new__(cls) if freq is not None: raise NotImplementedError("Freq is not yet supported") @@ -2578,7 +2420,7 @@ def __new__( name=None, ) -> "TimedeltaIndex": - out = Frame.__new__(cls) + out = SingleColumnFrame.__new__(cls) if freq is not None: raise NotImplementedError("freq is not yet supported") @@ -2710,7 +2552,7 @@ def __new__( ) if copy: data = column.as_column(data, dtype=dtype).copy(deep=True) - out = Frame.__new__(cls) + out = SingleColumnFrame.__new__(cls) kwargs = _setdefault_name(data, name=name) if isinstance(data, CategoricalColumn): data = data @@ -2936,7 +2778,7 @@ def __new__( ) -> "IntervalIndex": if copy: data = column.as_column(data, dtype=dtype).copy() - out = Frame.__new__(cls) + out = SingleColumnFrame.__new__(cls) kwargs = _setdefault_name(data, name=name) if isinstance(data, IntervalColumn): data = data @@ -3009,7 +2851,7 @@ class StringIndex(GenericIndex): """ def __new__(cls, values, copy=False, **kwargs): - out = Frame.__new__(cls) + out = SingleColumnFrame.__new__(cls) kwargs = _setdefault_name(values, **kwargs) if isinstance(values, StringColumn): values = values.copy(deep=copy) diff --git a/python/cudf/cudf/core/indexing.py b/python/cudf/cudf/core/indexing.py index 7970b9fa3dc..a732abc0705 100755 --- a/python/cudf/cudf/core/indexing.py +++ b/python/cudf/cudf/core/indexing.py @@ -2,6 +2,7 @@ from typing import Any, Union +import cupy as cp import numpy as np import pandas as pd from nvtx import annotate @@ -58,7 +59,9 @@ def get_label_range_or_mask(index, start, stop, step): if start is not None and stop is not None: if start > stop: return slice(0, 0, None) - boolean_mask = (index >= start) and (index <= stop) + # TODO: Once Index binary ops are updated to support logical_and, + # can use that instead of using cupy. + boolean_mask = cp.logical_and((index >= start), (index <= stop)) elif start is not None: boolean_mask = index >= start else: diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py index a4748632aab..ca029198e52 100644 --- a/python/cudf/cudf/core/multiindex.py +++ b/python/cudf/cudf/core/multiindex.py @@ -19,7 +19,7 @@ from cudf.core._compat import PANDAS_GE_120 from cudf.core.column import column from cudf.core.column_accessor import ColumnAccessor -from cudf.core.frame import Frame +from cudf.core.frame import Frame, SingleColumnFrame from cudf.core.index import Index, as_index @@ -572,7 +572,7 @@ def from_arrow(cls, table): names=['a', 'b']) """ - return super(Index, cls).from_arrow(table) + return super(SingleColumnFrame, cls).from_arrow(table) def to_arrow(self): """Convert MultiIndex to PyArrow Table @@ -606,7 +606,7 @@ def to_arrow(self): ] """ - return super(Index, self).to_arrow() + return super(SingleColumnFrame, self).to_arrow() @property def codes(self): @@ -1048,9 +1048,6 @@ def deserialize(cls, header, frames): names = pickle.loads(header["names"]) return MultiIndex(names=names, source_data=source_data) - def __iter__(self): - cudf.utils.utils.raise_iteration_error(obj=self) - def __getitem__(self, index): # TODO: This should be a take of the _source_data only match = self.take(index) @@ -1107,29 +1104,6 @@ def get_level_values(self, level): ) return level_values - def _to_frame(self): - - # for each column of codes - # replace column with mapping from integers to levels - df = self.codes.copy(deep=False) - for idx, col in enumerate(df.columns): - # use merge as a replace fn - level = cudf.DataFrame( - { - "idx": column.arange( - len(self.levels[idx]), dtype=df[col].dtype - ), - "level": self.levels[idx], - } - ) - code = cudf.DataFrame({"idx": df[col]}) - df[col] = code.merge(level).level - return df - - @property - def _values(self): - return list([i for i in self]) - @classmethod def _concat(cls, objs): diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 4cc5fb56a4c..5ee40d576b6 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -7,7 +7,7 @@ from collections import abc as abc from numbers import Number from shutil import get_terminal_size -from typing import Any, Optional, Set +from typing import Any, Optional from uuid import uuid4 import cupy @@ -38,7 +38,7 @@ from cudf.core.column.string import StringMethods from cudf.core.column.struct import StructMethods from cudf.core.column_accessor import ColumnAccessor -from cudf.core.frame import Frame, _drop_rows_by_labels +from cudf.core.frame import SingleColumnFrame, _drop_rows_by_labels from cudf.core.groupby.groupby import SeriesGroupBy from cudf.core.index import Index, RangeIndex, as_index from cudf.core.indexing import _SeriesIlocIndexer, _SeriesLocIndexer @@ -61,7 +61,8 @@ ) -class Series(Frame, Serializable): +class Series(SingleColumnFrame, Serializable): + # The `constructor*` properties are used by `dask` (and `dask_cudf`) @property def _constructor(self): return Series @@ -265,8 +266,7 @@ def __init__( @classmethod def _from_table(cls, table, index=None): - name = next(iter(table._data.keys())) - data = next(iter(table._data.values())) + name, data = next(iter(table._data.items())) if index is None: if table._index is not None: index = Index._from_table(table._index) @@ -289,14 +289,6 @@ def _from_data( out.name = name return out - @property - def _column(self): - return self._data[self.name] - - @_column.setter - def _column(self, value): - self._data[self.name] = value - def __contains__(self, item): return item in self._index @@ -341,52 +333,6 @@ def from_pandas(cls, s, nan_as_null=None): """ return cls(s, nan_as_null=nan_as_null) - @property - def values(self): - """ - Return a CuPy representation of the Series. - - Only the values in the Series will be returned. - - Returns - ------- - out : cupy.ndarray - The values of the Series. - - Examples - -------- - >>> import cudf - >>> ser = cudf.Series([1, -10, 100, 20]) - >>> ser.values - array([ 1, -10, 100, 20]) - >>> type(ser.values) - - """ - return self._column.values - - @property - def values_host(self): - """ - Return a numpy representation of the Series. - - Only the values in the Series will be returned. - - Returns - ------- - out : numpy.ndarray - The values of the Series. - - Examples - -------- - >>> import cudf - >>> ser = cudf.Series([1, -10, 100, 20]) - >>> ser.values_host - array([ 1, -10, 100, 20]) - >>> type(ser.values_host) - - """ - return self._column.values_host - def serialize(self): header = {} frames = [] @@ -401,12 +347,6 @@ def serialize(self): return header, frames - @property - def shape(self): - """Returns a tuple representing the dimensionality of the Series. - """ - return (len(self),) - @property def dt(self): """ @@ -435,23 +375,6 @@ def dt(self): "Can only use .dt accessor with datetimelike values" ) - @property - def ndim(self): - """Dimension of the data. Series ndim is always 1. - """ - return 1 - - @property - def name(self): - """Returns name of the Series. - """ - return self._data.names[0] - - @name.setter - def name(self, value): - col = self._data.pop(self.name) - self._data[value] = col - @classmethod def deserialize(cls, header, frames): index_nframes = header["index_frame_count"] @@ -487,64 +410,11 @@ def _get_columns_by_label(self, labels, downcast=False): new_data = super()._get_columns_by_label(labels, downcast) return ( - self._constructor(data=new_data, index=self.index) + self.__class__(data=new_data, index=self.index) if len(new_data) > 0 - else self._constructor(dtype=self.dtype, name=self.name) + else self.__class__(dtype=self.dtype, name=self.name) ) - @classmethod - def from_arrow(cls, array): - """ - Convert from PyArrow Array/ChunkedArray to Series. - - Parameters - ---------- - array : PyArrow Array/ChunkedArray - PyArrow Object which has to be converted to cudf Series. - - Raises - ------ - TypeError for invalid input type. - - Returns - ------- - cudf Series - - Examples - -------- - >>> import cudf - >>> import pyarrow as pa - >>> cudf.Series.from_arrow(pa.array(["a", "b", None])) - 0 a - 1 b - 2 - dtype: object - """ - - return cls(cudf.core.column.ColumnBase.from_arrow(array)) - - def to_arrow(self): - """ - Convert Series to a PyArrow Array. - - Returns - ------- - PyArrow Array - - Examples - -------- - >>> import cudf - >>> sr = cudf.Series(["a", "b", None]) - >>> sr.to_arrow() - - [ - "a", - "b", - null - ] - """ - return self._column.to_arrow() - def drop( self, labels=None, @@ -667,14 +537,6 @@ def drop( if not inplace: return out - def __copy__(self, deep=True): - return self.copy(deep) - - def __deepcopy__(self, memo=None): - if memo is None: - memo = {} - return self.copy() - def append(self, to_append, ignore_index=False, verify_integrity=False): """Append values from another ``Series`` or array-like object. If ``ignore_index=True``, the index is reset. @@ -1047,11 +909,6 @@ def memory_usage(self, index=True, deep=False): n += self._index.memory_usage(deep=deep) return n - def __len__(self): - """Returns the size of the ``Series`` including null values. - """ - return len(self._column) - def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): if method == "__call__": return get_appropriate_dispatched_func( @@ -1186,12 +1043,9 @@ def __getitem__(self, arg): else: return self.loc[arg] - def __iter__(self): - cudf.utils.utils.raise_iteration_error(obj=self) - - iteritems = __iter__ + iteritems = SingleColumnFrame.__iter__ - items = __iter__ + items = SingleColumnFrame.__iter__ def to_dict(self, into=dict): raise TypeError( @@ -1251,22 +1105,6 @@ def take(self, indices, keep_index=True): data = self._column.take(col_inds, keep_index=False) return self._copy_construct(data=data, index=None) - def __bool__(self): - """Always raise TypeError when converting a Series - into a boolean. - """ - raise TypeError(f"can't compute boolean for {type(self)}") - - def tolist(self): - - raise TypeError( - "cuDF does not support conversion to host memory " - "via `tolist()` method. Consider using " - "`.to_arrow().to_pylist()` to construct a Python list." - ) - - to_list = tolist - def head(self, n=5): """ Return the first `n` rows. @@ -3149,40 +2987,6 @@ def any(self, axis=0, bool_only=None, skipna=True, level=None, **kwargs): return result_series._column.any() - def to_gpu_array(self, fillna=None): - """Get a dense numba device array for the data. - - Parameters - ---------- - fillna : str or None - See *fillna* in ``.to_array``. - - Notes - ----- - - if ``fillna`` is ``None``, null values are skipped. Therefore, the - output size could be smaller. - - Returns - ------- - numba DeviceNDArray - - Examples - -------- - >>> import cudf - >>> s = cudf.Series([10, 20, 30, 40, 50]) - >>> s - 0 10 - 1 20 - 2 30 - 3 40 - 4 50 - dtype: int64 - >>> s.to_gpu_array() - - """ - return self._column.to_gpu_array(fillna=fillna) - def to_pandas(self, index=True, nullable=False, **kwargs): """ Convert to a Pandas Series. @@ -6527,8 +6331,6 @@ def explode(self, ignore_index=False): return super()._explode(self._column_names[0], ignore_index) - _accessors = set() # type: Set[Any] - truediv_int_dtype_corrections = { "int8": "float32", diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index 4890ccc289e..e5e36ba7e21 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -4377,12 +4377,6 @@ def test_constructor_properties(): df[key1] = val1 df[key2] = val2 - # Correct use of _constructor (for DataFrame) - assert_eq(df, df._constructor({key1: val1, key2: val2})) - - # Correct use of _constructor (for cudf.Series) - assert_eq(df[key1], df[key2]._constructor(val1, name=key1)) - # Correct use of _constructor_sliced (for DataFrame) assert_eq(df[key1], df._constructor_sliced(val1, name=key1)) diff --git a/python/cudf/cudf/tests/test_index.py b/python/cudf/cudf/tests/test_index.py index 21a431dd540..158dffc3884 100644 --- a/python/cudf/cudf/tests/test_index.py +++ b/python/cudf/cudf/tests/test_index.py @@ -1799,7 +1799,7 @@ def test_index_tolist(data, dtype): TypeError, match=re.escape( r"cuDF does not support conversion to host memory " - r"via `tolist()` method. Consider using " + r"via the `tolist()` method. Consider using " r"`.to_arrow().to_pylist()` to construct a Python list." ), ): diff --git a/python/cudf/cudf/tests/test_pickling.py b/python/cudf/cudf/tests/test_pickling.py index e87ab3730dd..ca819c7f59b 100644 --- a/python/cudf/cudf/tests/test_pickling.py +++ b/python/cudf/cudf/tests/test_pickling.py @@ -90,7 +90,9 @@ def test_pickle_index(): idx = GenericIndex(np.arange(nelem), name="a") pickled = pickle.dumps(idx) out = pickle.loads(pickled) - assert idx == out + # TODO: Once operations like `all` are supported on Index objects, we can + # just use that without calling values first. + assert (idx == out).values.all() def test_pickle_buffer(): diff --git a/python/cudf/cudf/tests/test_series.py b/python/cudf/cudf/tests/test_series.py index 0dc53fa29e9..0cc0ad57745 100644 --- a/python/cudf/cudf/tests/test_series.py +++ b/python/cudf/cudf/tests/test_series.py @@ -372,7 +372,7 @@ def test_series_tolist(data): TypeError, match=re.escape( r"cuDF does not support conversion to host memory " - r"via `tolist()` method. Consider using " + r"via the `tolist()` method. Consider using " r"`.to_arrow().to_pylist()` to construct a Python list." ), ):