diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 8bdb36fc27d..4c565ee6330 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -5808,7 +5808,7 @@ def _prepare_for_rowwise_op(self, method, skipna): is_pure_dt = all(is_datetime_dtype(dt) for dt in self.dtypes) if not is_pure_dt: - filtered = self.select_dtypes(include=[np.number, np.bool]) + filtered = self.select_dtypes(include=[np.number, np.bool_]) else: filtered = self.copy(deep=False) @@ -6587,8 +6587,8 @@ def kurtosis( msg = "Kurtosis only supports int, float, and bool dtypes." raise NotImplementedError(msg) - self = self.select_dtypes(include=[np.number, np.bool]) - return self._apply_support_method( + filtered = self.select_dtypes(include=[np.number, np.bool_]) + return filtered._apply_support_method( "kurtosis", axis=axis, skipna=skipna, @@ -6636,8 +6636,8 @@ def skew( msg = "Skew only supports int, float, and bool dtypes." raise NotImplementedError(msg) - self = self.select_dtypes(include=[np.number, np.bool]) - return self._apply_support_method( + filtered = self.select_dtypes(include=[np.number, np.bool_]) + return filtered._apply_support_method( "skew", axis=axis, skipna=skipna, diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index dedefeaf9a2..662ed05e2df 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -1195,7 +1195,7 @@ def scatter_by_map( map_index = as_column(map_index) # Convert float to integer - if map_index.dtype == np.float: + if map_index.dtype.kind == "f": map_index = map_index.astype(np.int32) # Convert string or categorical to integer diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 88f3f8c4c89..97066b71946 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -1997,7 +1997,14 @@ def __repr__(self): output = ( preprocess.astype("str") .to_pandas() - .astype("category") + .astype( + dtype=pd.CategoricalDtype( + categories=preprocess.dtype.categories.astype( + "str" + ).to_pandas(), + ordered=preprocess.dtype.ordered, + ) + ) .__repr__() ) break_idx = output.find("ordered=") diff --git a/python/cudf/cudf/core/indexing.py b/python/cudf/cudf/core/indexing.py index cf372286b7e..653004eaee4 100755 --- a/python/cudf/cudf/core/indexing.py +++ b/python/cudf/cudf/core/indexing.py @@ -173,7 +173,7 @@ def _loc_to_iloc(self, arg): else: arg = Series(column.as_column(arg)) - if arg.dtype in [np.bool, np.bool_]: + if arg.dtype in (bool, np.bool_): return arg else: indices = indices_from_labels(self._sr, arg) diff --git a/python/cudf/cudf/core/scalar.py b/python/cudf/cudf/core/scalar.py index 4ea32c77724..a3467e6fbe0 100644 --- a/python/cudf/cudf/core/scalar.py +++ b/python/cudf/cudf/core/scalar.py @@ -1,4 +1,5 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. + import numpy as np from cudf._lib.scalar import DeviceScalar, _is_null_host_scalar @@ -251,7 +252,7 @@ def __repr__(self): def _binop_result_dtype_or_error(self, other, op): if op in {"__eq__", "__ne__", "__lt__", "__gt__", "__le__", "__ge__"}: - return np.bool + return np.bool_ out_dtype = get_allowed_combinations_for_operator( self.dtype, other.dtype, op diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index be03fb147ff..cce8b17bea4 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -1072,7 +1072,16 @@ def __repr__(self): show_dimensions = get_option("display.show_dimensions") if preprocess._column.categories.dtype.kind == "f": pd_series = ( - preprocess.astype("str").to_pandas().astype("category") + preprocess.astype("str") + .to_pandas() + .astype( + dtype=pd.CategoricalDtype( + categories=preprocess.dtype.categories.astype( + "str" + ).to_pandas(), + ordered=preprocess.dtype.ordered, + ) + ) ) else: pd_series = preprocess.to_pandas() diff --git a/python/cudf/cudf/tests/test_applymap.py b/python/cudf/cudf/tests/test_applymap.py index 0c7b93fde7b..1f35bc93c78 100644 --- a/python/cudf/cudf/tests/test_applymap.py +++ b/python/cudf/cudf/tests/test_applymap.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018, NVIDIA CORPORATION. +# Copyright (c) 2018-2021, NVIDIA CORPORATION. from itertools import product from math import floor @@ -22,7 +22,7 @@ def test_applymap_round(nelem, masked): # Make mask bitmask = utils.random_bitmask(nelem) boolmask = np.asarray( - utils.expand_bits_to_bytes(bitmask), dtype=np.bool + utils.expand_bits_to_bytes(bitmask), dtype=np.bool_ )[:nelem] data[~boolmask] = np.nan diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py index c821755f670..579716f8277 100644 --- a/python/cudf/cudf/tests/test_binops.py +++ b/python/cudf/cudf/tests/test_binops.py @@ -290,15 +290,15 @@ def test_validity_add(nelem, lhs_nulls, rhs_nulls): res = lhs + rhs if lhs_nulls == "some" and rhs_nulls == "some": res_mask = np.asarray( - utils.expand_bits_to_bytes(lhs_mask & rhs_mask), dtype=np.bool + utils.expand_bits_to_bytes(lhs_mask & rhs_mask), dtype=np.bool_ )[:nelem] if lhs_nulls == "some" and rhs_nulls == "none": res_mask = np.asarray( - utils.expand_bits_to_bytes(lhs_mask), dtype=np.bool + utils.expand_bits_to_bytes(lhs_mask), dtype=np.bool_ )[:nelem] if lhs_nulls == "none" and rhs_nulls == "some": res_mask = np.asarray( - utils.expand_bits_to_bytes(rhs_mask), dtype=np.bool + utils.expand_bits_to_bytes(rhs_mask), dtype=np.bool_ )[:nelem] # Fill NA values na_value = -10000 diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index a3bad0ab5a6..69f6ecfeb17 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -1214,7 +1214,7 @@ def test_dataframe_concat_different_numerical_columns(dtype1, dtype2): def test_dataframe_concat_different_column_types(): - df1 = cudf.Series([42], dtype=np.float) + df1 = cudf.Series([42], dtype=np.float64) df2 = cudf.Series(["a"], dtype="category") with pytest.raises(ValueError): cudf.concat([df1, df2]) @@ -2353,7 +2353,7 @@ def check_frame_series_equality(left, right): def test_tail_for_string(): gdf = cudf.DataFrame() - gdf["id"] = cudf.Series(["a", "b"], dtype=np.object) + gdf["id"] = cudf.Series(["a", "b"], dtype=np.object_) gdf["v"] = cudf.Series([1, 2]) assert_eq(gdf.tail(3), gdf.to_pandas().tail(3)) diff --git a/python/cudf/cudf/tests/test_joining.py b/python/cudf/cudf/tests/test_joining.py index 8692057aa58..969cf1bf549 100644 --- a/python/cudf/cudf/tests/test_joining.py +++ b/python/cudf/cudf/tests/test_joining.py @@ -821,8 +821,8 @@ def test_join_empty_table_dtype(): "column_a", [ ( - pd.Series([None, 1, 2, 3, 4, 5, 6, 7]).astype(np.float), - pd.Series([8, 9, 10, 11, 12, None, 14, 15]).astype(np.float), + pd.Series([None, 1, 2, 3, 4, 5, 6, 7], dtype=np.float64), + pd.Series([8, 9, 10, 11, 12, None, 14, 15], dtype=np.float64), ) ], ) @@ -830,8 +830,8 @@ def test_join_empty_table_dtype(): "column_b", [ ( - pd.Series([0, 1, 0, None, 1, 0, 0, 0]).astype(np.float), - pd.Series([None, 1, 2, 1, 2, 2, 0, 0]).astype(np.float), + pd.Series([0, 1, 0, None, 1, 0, 0, 0], dtype=np.float64), + pd.Series([None, 1, 2, 1, 2, 2, 0, 0], dtype=np.float64), ) ], ) diff --git a/python/cudf/cudf/tests/test_pandas_interop.py b/python/cudf/cudf/tests/test_pandas_interop.py index 15b1acdfc08..24c60f12a2f 100644 --- a/python/cudf/cudf/tests/test_pandas_interop.py +++ b/python/cudf/cudf/tests/test_pandas_interop.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018-2020, NVIDIA CORPORATION. +# Copyright (c) 2018-2021, NVIDIA CORPORATION. import numpy as np import pandas as pd @@ -23,8 +23,8 @@ def test_to_pandas(): # Notice, the dtype differ when Pandas and cudf boolean series # contains None/NaN - assert df["c"].dtype == np.bool - assert pdf["c"].dtype == np.object + assert df["c"].dtype == np.bool_ + assert pdf["c"].dtype == np.object_ assert len(df["a"]) == len(pdf["a"]) assert len(df["b"]) == len(pdf["b"]) diff --git a/python/cudf/cudf/tests/test_repr.py b/python/cudf/cudf/tests/test_repr.py index 66e09f61869..7c274734980 100644 --- a/python/cudf/cudf/tests/test_repr.py +++ b/python/cudf/cudf/tests/test_repr.py @@ -1433,12 +1433,25 @@ def test_categorical_series_with_nan_repr(): 4 NaN 5 dtype: category - Categories (4, float64): [1.0, 10.0, 2.0, NaN] + Categories (4, float64): [1.0, 2.0, 10.0, NaN] """ ) assert series.__repr__().split() == expected_repr.split() + sliced_expected_repr = textwrap.dedent( + """ + 2 NaN + 3 10.0 + 4 NaN + 5 + dtype: category + Categories (4, float64): [1.0, 2.0, 10.0, NaN] + """ + ) + + assert series[2:].__repr__().split() == sliced_expected_repr.split() + def test_categorical_dataframe_with_nan_repr(): series = cudf.Series( @@ -1469,7 +1482,14 @@ def test_categorical_index_with_nan_repr(): expected_repr = ( "CategoricalIndex([1.0, 2.0, NaN, 10.0, NaN, ], " - "categories=[1.0, 10.0, 2.0, NaN], ordered=False, dtype='category')" + "categories=[1.0, 2.0, 10.0, NaN], ordered=False, dtype='category')" ) assert cat_index.__repr__() == expected_repr + + sliced_expected_repr = ( + "CategoricalIndex([NaN, 10.0, NaN, ], " + "categories=[1.0, 2.0, 10.0, NaN], ordered=False, dtype='category')" + ) + + assert cat_index[2:].__repr__() == sliced_expected_repr diff --git a/python/cudf/cudf/tests/utils.py b/python/cudf/cudf/tests/utils.py index ae60aad8db2..1163c3085e4 100644 --- a/python/cudf/cudf/tests/utils.py +++ b/python/cudf/cudf/tests/utils.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. import re from collections.abc import Mapping, Sequence @@ -259,7 +259,9 @@ def gen_rand(dtype, size, **kwargs): elif dtype.kind == "b": low = kwargs.get("low", 0) high = kwargs.get("high", 2) - return np.random.randint(low=low, high=high, size=size).astype(np.bool) + return np.random.randint(low=low, high=high, size=size).astype( + np.bool_ + ) elif dtype.kind == "M": low = kwargs.get("low", 0) time_unit, _ = np.datetime_data(dtype) diff --git a/python/cudf/cudf/utils/utils.py b/python/cudf/cudf/utils/utils.py index e8b8c53312a..03a39f6fb4b 100644 --- a/python/cudf/cudf/utils/utils.py +++ b/python/cudf/cudf/utils/utils.py @@ -194,7 +194,7 @@ def wrapper(*args, **kwargs): return wrapper -def get_null_series(size, dtype=np.bool): +def get_null_series(size, dtype=np.bool_): """ Creates a null series of provided dtype and size