diff --git a/python/cudf/cudf/core/column_accessor.py b/python/cudf/cudf/core/column_accessor.py index 9cb86ca1cd2..c9c00692174 100644 --- a/python/cudf/cudf/core/column_accessor.py +++ b/python/cudf/cudf/core/column_accessor.py @@ -342,6 +342,26 @@ def select_by_label(self, key: Any) -> ColumnAccessor: return self._select_by_label_with_wildcard(key) return self._select_by_label_grouped(key) + def get_labels_by_index(self, index: Any) -> tuple: + """Get the labels corresponding to the provided column indices. + + Parameters + ---------- + index : integer, integer slice, or list-like of integers + The column indexes. + + Returns + ------- + tuple + """ + if isinstance(index, slice): + start, stop, step = index.indices(len(self._data)) + return self.names[start:stop:step] + elif pd.api.types.is_integer(index): + return (self.names[index],) + else: + return tuple(self.names[i] for i in index) + def select_by_index(self, index: Any) -> ColumnAccessor: """ Return a ColumnAccessor composed of the columns @@ -355,13 +375,7 @@ def select_by_index(self, index: Any) -> ColumnAccessor: ------- ColumnAccessor """ - if isinstance(index, slice): - start, stop, step = index.indices(len(self._data)) - keys = self.names[start:stop:step] - elif pd.api.types.is_integer(index): - keys = (self.names[index],) - else: - keys = tuple(self.names[i] for i in index) + keys = self.get_labels_by_index(index) data = {k: self._data[k] for k in keys} return self.__class__( data, multiindex=self.multiindex, level_names=self.level_names, diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 39ae9c774e5..afd087c63cf 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -372,9 +372,9 @@ class _DataFrameIlocIndexer(_DataFrameIndexer): def _getitem_tuple_arg(self, arg): # Iloc Step 1: # Gather the columns specified by the second tuple arg - columns_df = self._frame._get_columns_by_index(arg[1]) - - columns_df._index = self._frame._index + columns_df = self._frame._from_data( + self._frame._data.select_by_index(arg[1]), self._frame._index + ) # Iloc Step 2: # Gather the rows specified by the first tuple arg @@ -422,9 +422,9 @@ def _getitem_tuple_arg(self, arg): @_cudf_nvtx_annotate def _setitem_tuple_arg(self, key, value): - columns = self._frame._get_columns_by_index(key[1]) - - for col in columns: + # TODO: Determine if this usage is prevalent enough to expose this + # selection logic at a higher level than ColumnAccessor. + for col in self._frame._data.get_labels_by_index(key[1]): self._frame[col].iloc[key[0]] = value def _getitem_scalar(self, arg): @@ -612,7 +612,8 @@ def __init__( new_df = self._from_arrays(data, index=index, columns=columns) self._data = new_df._data - self.index = new_df._index + self._index = new_df._index + self._check_data_index_length_match() elif hasattr(data, "__array_interface__"): arr_interface = data.__array_interface__ if len(arr_interface["descr"]) == 1: @@ -621,7 +622,8 @@ def __init__( else: new_df = self.from_records(data, index=index, columns=columns) self._data = new_df._data - self.index = new_df._index + self._index = new_df._index + self._check_data_index_length_match() else: if is_list_like(data): if len(data) > 0 and is_scalar(data[0]): @@ -632,7 +634,8 @@ def __init__( new_df = DataFrame(data=data, index=index) self._data = new_df._data - self.index = new_df._index + self._index = new_df._index + self._check_data_index_length_match() elif len(data) > 0 and isinstance(data[0], Series): self._init_from_series_list( data=data, columns=columns, index=index @@ -653,6 +656,15 @@ def __init__( if dtype: self._data = self.astype(dtype)._data + def _check_data_index_length_match(df: DataFrame) -> None: + # Validate that the number of rows in the data matches the index if the + # data is not empty. This is a helper for the constructor. + if df._data.nrows > 0 and df._data.nrows != len(df._index): + raise ValueError( + f"Shape of passed values is {df.shape}, indices imply " + f"({len(df._index)}, {df._num_columns})" + ) + @_cudf_nvtx_annotate def _init_from_series_list(self, data, columns, index): if index is None: @@ -856,9 +868,7 @@ def _from_data( index: Optional[BaseIndex] = None, columns: Any = None, ) -> DataFrame: - out = super()._from_data(data, index) - if index is None: - out.index = RangeIndex(out._data.nrows) + out = super()._from_data(data=data, index=index) if columns is not None: out.columns = columns return out @@ -5601,7 +5611,9 @@ def stack(self, level=-1, dropna=True): """ assert level in (None, -1) repeated_index = self.index.repeat(self.shape[1]) - name_index = Frame({0: self._column_names}).tile(self.shape[0]) + name_index = cudf.DataFrame._from_data({0: self._column_names}).tile( + self.shape[0] + ) new_index = list(repeated_index._columns) + [name_index._columns[0]] if isinstance(self._index, MultiIndex): index_names = self._index.names + [None] diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index 2802009b848..b0a0436655c 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -143,11 +143,7 @@ def _num_columns(self) -> int: @property def _num_rows(self) -> int: - if self._index is not None: - return len(self._index) - if len(self._data) == 0: - return 0 - return len(self._data.columns[0]) + return 0 if self._num_columns == 0 else len(self._data.columns[0]) @property def _column_names(self) -> Tuple[Any, ...]: # TODO: Tuple[str]? @@ -184,69 +180,38 @@ def deserialize(cls, header, frames): @classmethod @_cudf_nvtx_annotate - def _from_data( - cls, - data: MutableMapping, - index: Optional[cudf.core.index.BaseIndex] = None, - ): + def _from_data(cls, data: MutableMapping): obj = cls.__new__(cls) - Frame.__init__(obj, data, index) + Frame.__init__(obj, data) return obj @classmethod @_cudf_nvtx_annotate def _from_columns( - cls, - columns: List[ColumnBase], - column_names: abc.Iterable[str], - index_names: Optional[List[str]] = None, + cls, columns: List[ColumnBase], column_names: abc.Iterable[str], ): - """Construct a `Frame` object from a list of columns. - - If `index_names` is set, the first `len(index_names)` columns are - used to construct the index of the frame. - """ - index = None - n_index_columns = 0 - if index_names is not None: - n_index_columns = len(index_names) - index = cudf.core.index._index_from_columns( - columns[:n_index_columns] - ) - if isinstance(index, cudf.MultiIndex): - index.names = index_names - else: - index.name = index_names[0] - - data = { - name: columns[i + n_index_columns] - for i, name in enumerate(column_names) - } + """Construct a `Frame` object from a list of columns.""" + data = {name: columns[i] for i, name in enumerate(column_names)} - return cls._from_data(data, index) + return cls._from_data(data) @_cudf_nvtx_annotate def _from_columns_like_self( self, columns: List[ColumnBase], column_names: Optional[abc.Iterable[str]] = None, - index_names: Optional[List[str]] = None, ): - """Construct a `Frame` from a list of columns with metadata from self. + """Construct a Frame from a list of columns with metadata from self. If `column_names` is None, use column names from self. - If `index_names` is set, the first `len(index_names)` columns are - used to construct the index of the frame. """ if column_names is None: column_names = self._column_names - frame = self.__class__._from_columns( - columns, column_names, index_names - ) - return frame._copy_type_metadata(self, include_index=bool(index_names)) + frame = self.__class__._from_columns(columns, column_names) + return frame._copy_type_metadata(self) def _mimic_inplace( - self: T, result: Frame, inplace: bool = False + self: T, result: T, inplace: bool = False ) -> Optional[Frame]: if inplace: for col in self._data: @@ -255,7 +220,6 @@ def _mimic_inplace( result._data[col], inplace=True ) self._data = result._data - self._index = result._index return None else: return result @@ -426,92 +390,6 @@ def memory_usage(self, deep=False): def __len__(self): return self._num_rows - @_cudf_nvtx_annotate - def copy(self: T, deep: bool = True) -> T: - """ - Make a copy of this object's indices and data. - - When ``deep=True`` (default), a new object will be created with a - copy of the calling object's data and indices. Modifications to - the data or indices of the copy will not be reflected in the - original object (see notes below). - When ``deep=False``, a new object will be created without copying - the calling object's data or index (only references to the data - and index are copied). Any changes to the data of the original - will be reflected in the shallow copy (and vice versa). - - Parameters - ---------- - deep : bool, default True - Make a deep copy, including a copy of the data and the indices. - With ``deep=False`` neither the indices nor the data are copied. - - Returns - ------- - copy : Series or DataFrame - Object type matches caller. - - Examples - -------- - >>> s = cudf.Series([1, 2], index=["a", "b"]) - >>> s - a 1 - b 2 - dtype: int64 - >>> s_copy = s.copy() - >>> s_copy - a 1 - b 2 - dtype: int64 - - **Shallow copy versus default (deep) copy:** - - >>> s = cudf.Series([1, 2], index=["a", "b"]) - >>> deep = s.copy() - >>> shallow = s.copy(deep=False) - - Shallow copy shares data and index with original. - - >>> s is shallow - False - >>> s._column is shallow._column and s.index is shallow.index - True - - Deep copy has own copy of data and index. - - >>> s is deep - False - >>> s.values is deep.values or s.index is deep.index - False - - Updates to the data shared by shallow copy and original is reflected - in both; deep copy remains unchanged. - - >>> s['a'] = 3 - >>> shallow['b'] = 4 - >>> s - a 3 - b 4 - dtype: int64 - >>> shallow - a 3 - b 4 - dtype: int64 - >>> deep - a 1 - b 2 - dtype: int64 - """ - new_frame = self.__class__.__new__(self.__class__) - new_frame._data = self._data.copy(deep=deep) - - if self._index is not None: - new_frame._index = self._index.copy(deep=deep) - else: - new_frame._index = None - - return new_frame - @_cudf_nvtx_annotate def astype(self, dtype, copy=False, **kwargs): result = {} @@ -525,7 +403,7 @@ def astype(self, dtype, copy=False, **kwargs): return result @_cudf_nvtx_annotate - def equals(self, other, **kwargs): + def equals(self, other): """ Test whether two objects contain the same elements. This function allows two Series or DataFrames to be compared against @@ -584,28 +462,19 @@ def equals(self, other, **kwargs): """ if self is other: return True - - check_types = kwargs.get("check_types", True) - - if check_types: - if type(self) is not type(other): - return False - - if other is None or len(self) != len(other): - return False - - # check data: - for self_col, other_col in zip( - self._data.values(), other._data.values() + if ( + other is None + or not isinstance(other, type(self)) + or len(self) != len(other) ): - if not self_col.equals(other_col, check_dtypes=check_types): - return False + return False - # check index: - if self._index is None: - return other._index is None - else: - return self._index.equals(other._index) + return all( + self_col.equals(other_col, check_dtypes=True) + for self_col, other_col in zip( + self._data.values(), other._data.values() + ) + ) @_cudf_nvtx_annotate def _get_columns_by_label(self, labels, downcast=False): @@ -615,30 +484,6 @@ def _get_columns_by_label(self, labels, downcast=False): """ return self._data.select_by_label(labels) - @_cudf_nvtx_annotate - def _get_columns_by_index(self, indices): - """ - Returns columns of the Frame specified by `labels` - - """ - data = self._data.select_by_index(indices) - return self.__class__._from_data( - data, columns=data.to_pandas_index(), index=self.index - ) - - def _as_column(self): - """ - _as_column : Converts a single columned Frame to Column - """ - assert ( - self._num_columns == 1 - and self._index is None - and self._column_names[0] is None - ), """There should be only one data column, - no index and None as the name to use this method""" - - return self._data[None].copy(deep=False) - @property def values(self): """ @@ -884,6 +729,10 @@ def clip(self, lower=None, upper=None, inplace=False, axis=1): 3 4 dtype: int64 """ + if isinstance(self, cudf.BaseIndex): + warnings.warn( + "Index.clip is deprecated and will be removed.", FutureWarning, + ) if axis != 1: raise NotImplementedError("`axis is not yet supported in clip`") @@ -901,13 +750,10 @@ def clip(self, lower=None, upper=None, inplace=False, axis=1): if len(lower) != self._num_columns: raise ValueError( - """Length of lower/upper should be - equal to number of columns in - DataFrame/Series/Index/MultiIndex""" + "Length of lower/upper should be equal to number of columns" ) - output = self.copy(deep=False) - if output.ndim == 1: + if self.ndim == 1: # In case of series and Index, # swap lower and upper if lower > upper if ( @@ -917,11 +763,12 @@ def clip(self, lower=None, upper=None, inplace=False, axis=1): ): lower[0], upper[0] = upper[0], lower[0] - for i, name in enumerate(self._data): - output._data[name] = self._data[name].clip(lower[i], upper[i]) - + data = { + name: col.clip(lower[i], upper[i]) + for i, (name, col) in enumerate(self._data.items()) + } + output = self._from_data(data, self._index) output._copy_type_metadata(self, include_index=False) - return self._mimic_inplace(output, inplace=inplace) @_cudf_nvtx_annotate @@ -1198,7 +1045,7 @@ def fillna( Returns ------- - result : DataFrame + result : DataFrame, Series, or Index Copy with nulls filled. Examples @@ -1327,8 +1174,7 @@ def fillna( filled_data[col_name] = col.copy(deep=True) return self._mimic_inplace( - self._from_data(data=filled_data, index=self._index), - inplace=inplace, + self._from_data(data=filled_data), inplace=inplace, ) @_cudf_nvtx_annotate diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 586401de150..e60cf1f2103 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -113,7 +113,7 @@ def _index_from_data(data: MutableMapping, name: Any = None): index_class_type = IntervalIndex else: index_class_type = cudf.MultiIndex - return index_class_type._from_data(data, None, name) + return index_class_type._from_data(data, name) def _index_from_columns( @@ -375,7 +375,7 @@ def equals(self, other): other._step, ): return True - return Int64Index._from_data(self._data).equals(other) + return self._as_int64().equals(other) @_cudf_nvtx_annotate def serialize(self): @@ -841,7 +841,16 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): return NotImplemented + @classmethod @_cudf_nvtx_annotate + def _from_data( + cls, data: MutableMapping, name: Any = None + ) -> GenericIndex: + out = super()._from_data(data=data) + if name is not None: + out.name = name + return out + def _binaryop( self, other: T, op: str, fill_value: Any = None, *args, **kwargs, ) -> SingleColumnFrame: @@ -915,22 +924,28 @@ def equals(self, other, **kwargs): True if “other” is an Index and it has the same elements as calling index; False otherwise. """ - if not isinstance(other, BaseIndex): + if ( + other is None + or not isinstance(other, BaseIndex) + or len(self) != len(other) + ): return False - check_types = False + check_dtypes = False self_is_categorical = isinstance(self, CategoricalIndex) other_is_categorical = isinstance(other, CategoricalIndex) if self_is_categorical and not other_is_categorical: other = other.astype(self.dtype) - check_types = True + check_dtypes = True elif other_is_categorical and not self_is_categorical: self = self.astype(other.dtype) - check_types = True + check_dtypes = True try: - return super().equals(other, check_types=check_types) + return self._column.equals( + other._column, check_dtypes=check_dtypes + ) except TypeError: return False diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index f527f7a1e1f..342a4e52101 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -8,7 +8,18 @@ import warnings from collections import Counter, abc from functools import cached_property -from typing import Any, Callable, Dict, Optional, Tuple, Type, TypeVar, Union +from typing import ( + Any, + Callable, + Dict, + List, + MutableMapping, + Optional, + Tuple, + Type, + TypeVar, + Union, +) from uuid import uuid4 import cupy as cp @@ -26,6 +37,7 @@ is_list_dtype, is_list_like, ) +from cudf.core._base_index import BaseIndex from cudf.core.column import ColumnBase from cudf.core.column_accessor import ColumnAccessor from cudf.core.frame import Frame, _drop_rows_by_labels @@ -175,6 +187,145 @@ def to_dict(self, *args, **kwargs): # noqa: D102 "`.to_pandas().to_dict()` to construct a Python dictionary." ) + @property + def _num_rows(self) -> int: + # Important to use the index because the data may be empty. + return len(self._index) + + @classmethod + def _from_data( + cls, data: MutableMapping, index: Optional[BaseIndex] = None, + ): + out = super()._from_data(data) + out._index = RangeIndex(out._data.nrows) if index is None else index + return out + + @classmethod + @_cudf_nvtx_annotate + def _from_columns( + cls, + columns: List[ColumnBase], + column_names: List[str], + index_names: Optional[List[str]] = None, + ): + """Construct a `Frame` object from a list of columns. + + If `index_names` is set, the first `len(index_names)` columns are + used to construct the index of the frame. + """ + data_columns = columns + + n_index_columns = len(index_names) if index_names else 0 + index_columns = columns[:n_index_columns] + data_columns = columns[n_index_columns:] + + out = super()._from_columns(data_columns, column_names) + + if index_names is not None: + out._index = cudf.core.index._index_from_columns(index_columns) + if isinstance(out._index, cudf.MultiIndex): + out._index.names = index_names + else: + out._index.name = index_names[0] + + return out + + @_cudf_nvtx_annotate + def _from_columns_like_self( + self, + columns: List[ColumnBase], + column_names: Optional[abc.Iterable[str]] = None, + index_names: Optional[List[str]] = None, + ): + """Construct a `Frame` from a list of columns with metadata from self. + + If `index_names` is set, the first `len(index_names)` columns are + used to construct the index of the frame. + """ + frame = self.__class__._from_columns( + columns, column_names, index_names + ) + return frame._copy_type_metadata(self, include_index=bool(index_names)) + + def _mimic_inplace( + self: T, result: T, inplace: bool = False + ) -> Optional[Frame]: + if inplace: + self._index = result._index + return super()._mimic_inplace(result, inplace) + + def copy(self: T, deep: bool = True) -> T: + """Make a copy of this object's indices and data. + + When ``deep=True`` (default), a new object will be created with a + copy of the calling object's data and indices. Modifications to + the data or indices of the copy will not be reflected in the + original object (see notes below). + When ``deep=False``, a new object will be created without copying + the calling object's data or index (only references to the data + and index are copied). Any changes to the data of the original + will be reflected in the shallow copy (and vice versa). + + Parameters + ---------- + deep : bool, default True + Make a deep copy, including a copy of the data and the indices. + With ``deep=False`` neither the indices nor the data are copied. + + Returns + ------- + copy : Series or DataFrame + Object type matches caller. + + Examples + -------- + >>> s = cudf.Series([1, 2], index=["a", "b"]) + >>> s + a 1 + b 2 + dtype: int64 + >>> s_copy = s.copy() + >>> s_copy + a 1 + b 2 + dtype: int64 + + **Shallow copy versus default (deep) copy:** + + >>> s = cudf.Series([1, 2], index=["a", "b"]) + >>> deep = s.copy() + >>> shallow = s.copy(deep=False) + + Updates to the data shared by shallow copy and original is reflected + in both; deep copy remains unchanged. + + >>> s['a'] = 3 + >>> shallow['b'] = 4 + >>> s + a 3 + b 4 + dtype: int64 + >>> shallow + a 3 + b 4 + dtype: int64 + >>> deep + a 1 + b 2 + dtype: int64 + """ + return self._from_data( + self._data.copy(deep=deep), + # Indexes are immutable so copies can always be shallow. + self._index.copy(deep=False), + ) + + @_cudf_nvtx_annotate + def equals(self, other): # noqa: D102 + if not super().equals(other): + return False + return self._index.equals(other._index) + @property def index(self): """Get the labels for the rows.""" @@ -753,6 +904,18 @@ def _split(self, splits, keep_index=True): for i in range(len(splits) + 1) ] + @_cudf_nvtx_annotate + def fillna( + self, value=None, method=None, axis=None, inplace=False, limit=None + ): # noqa: D102 + old_index = self._index + ret = super().fillna(value, method, axis, inplace, limit) + if inplace: + self._index = old_index + else: + ret._index = old_index + return ret + def add_prefix(self, prefix): """ Prefix labels with string `prefix`. @@ -1062,7 +1225,9 @@ def _align_to_index( result = result.sort_values(sort_col_id) del result[sort_col_id] - result = self.__class__._from_data(result._data, index=result.index) + result = self.__class__._from_data( + data=result._data, index=result.index + ) result._data.multiindex = self._data.multiindex result._data._level_names = self._data._level_names result.index.names = self.index.names diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py index d94c2ae3e93..1b946a140c6 100644 --- a/python/cudf/cudf/core/multiindex.py +++ b/python/cudf/cudf/core/multiindex.py @@ -8,7 +8,7 @@ from collections.abc import Sequence from functools import cached_property from numbers import Integral -from typing import Any, List, MutableMapping, Optional, Tuple, Union +from typing import Any, List, MutableMapping, Tuple, Union import cupy import numpy as np @@ -278,14 +278,8 @@ def set_names(self, names, level=None, inplace=False): @classmethod @_cudf_nvtx_annotate - def _from_data( - cls, - data: MutableMapping, - index: Optional[cudf.core.index.BaseIndex] = None, - name: Any = None, - ) -> MultiIndex: - assert index is None - obj = cls.from_frame(cudf.DataFrame._from_data(data)) + def _from_data(cls, data: MutableMapping, name: Any = None,) -> MultiIndex: + obj = cls.from_frame(cudf.DataFrame._from_data(data=data)) if name is not None: obj.name = name return obj diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index ef5850ecc17..c36c6a78aec 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -459,12 +459,9 @@ def _from_data( index: Optional[BaseIndex] = None, name: Any = None, ) -> Series: - """ - Construct the Series from a ColumnAccessor - """ - out: Series = super()._from_data(data, index, name) - if index is None: - out._index = RangeIndex(out._data.nrows) + out = super()._from_data(data=data, index=index) + if name is not None: + out.name = name return out @_cudf_nvtx_annotate diff --git a/python/cudf/cudf/core/single_column_frame.py b/python/cudf/cudf/core/single_column_frame.py index b35d653e28f..de10261315c 100644 --- a/python/cudf/cudf/core/single_column_frame.py +++ b/python/cudf/cudf/core/single_column_frame.py @@ -3,16 +3,7 @@ from __future__ import annotations -from typing import ( - Any, - Dict, - MutableMapping, - Optional, - Tuple, - Type, - TypeVar, - Union, -) +from typing import Any, Dict, Optional, Tuple, Type, TypeVar, Union import cupy import numpy as np @@ -67,20 +58,6 @@ def _scan(self, op, axis=None, *args, **kwargs): return super()._scan(op, axis=axis, *args, **kwargs) - @classmethod - @_cudf_nvtx_annotate - def _from_data( - cls, - data: MutableMapping, - index: Optional[cudf.core.index.BaseIndex] = None, - name: Any = None, - ): - - out = super()._from_data(data, index) - if name is not None: - out.name = name - return out - @property # type: ignore @_cudf_nvtx_annotate def name(self): diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index 5bde75c2e21..136deb59334 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -1565,18 +1565,10 @@ def test_dataframe_cupy_wrong_dimensions(): def test_dataframe_cupy_array_wrong_index(): d_ary = cupy.empty((2, 3), dtype=np.int32) - with pytest.raises( - ValueError, - match="Length mismatch: Expected axis has 2 elements, " - "new values have 1 elements", - ): + with pytest.raises(ValueError): cudf.DataFrame(d_ary, index=["a"]) - with pytest.raises( - ValueError, - match="Length mismatch: Expected axis has 2 elements, " - "new values have 1 elements", - ): + with pytest.raises(ValueError): cudf.DataFrame(d_ary, index="a")