rapidsai · rapids-bot · Mar 2, 2021 · Mar 1, 2021 · Mar 1, 2021 · Mar 1, 2021
@@ -5808,7 +5808,7 @@ def _prepare_for_rowwise_op(self, method, skipna):
         is_pure_dt = all(is_datetime_dtype(dt) for dt in self.dtypes)
 
         if not is_pure_dt:
-            filtered = self.select_dtypes(include=[np.number, np.bool])
+            filtered = self.select_dtypes(include=[np.number, np.bool_])
         else:
             filtered = self.copy(deep=False)
 
@@ -6587,8 +6587,8 @@ def kurtosis(
             msg = "Kurtosis only supports int, float, and bool dtypes."
             raise NotImplementedError(msg)
 
-        self = self.select_dtypes(include=[np.number, np.bool])
-        return self._apply_support_method(
+        filtered = self.select_dtypes(include=[np.number, np.bool_])
+        return filtered._apply_support_method(
             "kurtosis",
             axis=axis,
             skipna=skipna,
@@ -6636,8 +6636,8 @@ def skew(
             msg = "Skew only supports int, float, and bool dtypes."
             raise NotImplementedError(msg)
 
-        self = self.select_dtypes(include=[np.number, np.bool])
-        return self._apply_support_method(
+        filtered = self.select_dtypes(include=[np.number, np.bool_])
+        return filtered._apply_support_method(
             "skew",
             axis=axis,
             skipna=skipna,

@@ -1195,7 +1195,7 @@ def scatter_by_map(
             map_index = as_column(map_index)
 
         # Convert float to integer
-        if map_index.dtype == np.float:
+        if map_index.dtype.kind == "f":
             map_index = map_index.astype(np.int32)
 
         # Convert string or categorical to integer

@@ -1997,7 +1997,14 @@ def __repr__(self):
                 output = (
                     preprocess.astype("str")
                     .to_pandas()
-                    .astype("category")
+                    .astype(
+                        dtype=pd.CategoricalDtype(
+                            categories=preprocess.dtype.categories.astype(
+                                "str"
+                            ).to_pandas(),
+                            ordered=preprocess.dtype.ordered,
+                        )
+                    )
                     .__repr__()
                 )
                 break_idx = output.find("ordered=")

@@ -173,7 +173,7 @@ def _loc_to_iloc(self, arg):
 
         else:
             arg = Series(column.as_column(arg))
-            if arg.dtype in [np.bool, np.bool_]:
+            if arg.dtype in (bool, np.bool_):
                 return arg
             else:
                 indices = indices_from_labels(self._sr, arg)

@@ -1,4 +1,5 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2021, NVIDIA CORPORATION.
+
 import numpy as np
 
 from cudf._lib.scalar import DeviceScalar, _is_null_host_scalar
@@ -251,7 +252,7 @@ def __repr__(self):
 
     def _binop_result_dtype_or_error(self, other, op):
         if op in {"__eq__", "__ne__", "__lt__", "__gt__", "__le__", "__ge__"}:
-            return np.bool
+            return np.bool_
 
         out_dtype = get_allowed_combinations_for_operator(
             self.dtype, other.dtype, op

@@ -1072,7 +1072,16 @@ def __repr__(self):
             show_dimensions = get_option("display.show_dimensions")
             if preprocess._column.categories.dtype.kind == "f":
                 pd_series = (
-                    preprocess.astype("str").to_pandas().astype("category")
+                    preprocess.astype("str")
+                    .to_pandas()
+                    .astype(
+                        dtype=pd.CategoricalDtype(
+                            categories=preprocess.dtype.categories.astype(
+                                "str"
+                            ).to_pandas(),
+                            ordered=preprocess.dtype.ordered,
+                        )
+                    )
                 )
             else:
                 pd_series = preprocess.to_pandas()

@@ -1,4 +1,4 @@
-# Copyright (c) 2018, NVIDIA CORPORATION.
+# Copyright (c) 2018-2021, NVIDIA CORPORATION.
 
 from itertools import product
 from math import floor
@@ -22,7 +22,7 @@ def test_applymap_round(nelem, masked):
         # Make mask
         bitmask = utils.random_bitmask(nelem)
         boolmask = np.asarray(
-            utils.expand_bits_to_bytes(bitmask), dtype=np.bool
+            utils.expand_bits_to_bytes(bitmask), dtype=np.bool_
         )[:nelem]
         data[~boolmask] = np.nan
 

@@ -290,15 +290,15 @@ def test_validity_add(nelem, lhs_nulls, rhs_nulls):
     res = lhs + rhs
     if lhs_nulls == "some" and rhs_nulls == "some":
         res_mask = np.asarray(
-            utils.expand_bits_to_bytes(lhs_mask & rhs_mask), dtype=np.bool
+            utils.expand_bits_to_bytes(lhs_mask & rhs_mask), dtype=np.bool_
         )[:nelem]
     if lhs_nulls == "some" and rhs_nulls == "none":
         res_mask = np.asarray(
-            utils.expand_bits_to_bytes(lhs_mask), dtype=np.bool
+            utils.expand_bits_to_bytes(lhs_mask), dtype=np.bool_
         )[:nelem]
     if lhs_nulls == "none" and rhs_nulls == "some":
         res_mask = np.asarray(
-            utils.expand_bits_to_bytes(rhs_mask), dtype=np.bool
+            utils.expand_bits_to_bytes(rhs_mask), dtype=np.bool_
         )[:nelem]
     # Fill NA values
     na_value = -10000

@@ -1214,7 +1214,7 @@ def test_dataframe_concat_different_numerical_columns(dtype1, dtype2):
 
 
 def test_dataframe_concat_different_column_types():
-    df1 = cudf.Series([42], dtype=np.float)
+    df1 = cudf.Series([42], dtype=np.float64)
     df2 = cudf.Series(["a"], dtype="category")
     with pytest.raises(ValueError):
         cudf.concat([df1, df2])
@@ -2353,7 +2353,7 @@ def check_frame_series_equality(left, right):
 
 def test_tail_for_string():
     gdf = cudf.DataFrame()
-    gdf["id"] = cudf.Series(["a", "b"], dtype=np.object)
+    gdf["id"] = cudf.Series(["a", "b"], dtype=np.object_)
     gdf["v"] = cudf.Series([1, 2])
     assert_eq(gdf.tail(3), gdf.to_pandas().tail(3))
 

@@ -821,17 +821,17 @@ def test_join_empty_table_dtype():
     "column_a",
     [
         (
-            pd.Series([None, 1, 2, 3, 4, 5, 6, 7]).astype(np.float),
-            pd.Series([8, 9, 10, 11, 12, None, 14, 15]).astype(np.float),
+            pd.Series([None, 1, 2, 3, 4, 5, 6, 7], dtype=np.float64),
+            pd.Series([8, 9, 10, 11, 12, None, 14, 15], dtype=np.float64),
         )
     ],
 )
 @pytest.mark.parametrize(
     "column_b",
     [
         (
-            pd.Series([0, 1, 0, None, 1, 0, 0, 0]).astype(np.float),
-            pd.Series([None, 1, 2, 1, 2, 2, 0, 0]).astype(np.float),
+            pd.Series([0, 1, 0, None, 1, 0, 0, 0], dtype=np.float64),
+            pd.Series([None, 1, 2, 1, 2, 2, 0, 0], dtype=np.float64),
         )
     ],
 )

@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2020, NVIDIA CORPORATION.
+# Copyright (c) 2018-2021, NVIDIA CORPORATION.
 
 import numpy as np
 import pandas as pd
@@ -23,8 +23,8 @@ def test_to_pandas():
 
     # Notice, the dtype differ when Pandas and cudf boolean series
     # contains None/NaN
-    assert df["c"].dtype == np.bool
-    assert pdf["c"].dtype == np.object
+    assert df["c"].dtype == np.bool_
+    assert pdf["c"].dtype == np.object_
 
     assert len(df["a"]) == len(pdf["a"])
     assert len(df["b"]) == len(pdf["b"])

@@ -1433,12 +1433,25 @@ def test_categorical_series_with_nan_repr():
     4     NaN
     5    <NA>
     dtype: category
-    Categories (4, float64): [1.0, 10.0, 2.0, NaN]
+    Categories (4, float64): [1.0, 2.0, 10.0, NaN]
     """
     )
 
     assert series.__repr__().split() == expected_repr.split()
 
+    sliced_expected_repr = textwrap.dedent(
+        """
+        2     NaN
+        3    10.0
+        4     NaN
+        5    <NA>
+        dtype: category
+        Categories (4, float64): [1.0, 2.0, 10.0, NaN]
+        """
+    )
+
+    assert series[2:].__repr__().split() == sliced_expected_repr.split()
+
 
 def test_categorical_dataframe_with_nan_repr():
     series = cudf.Series(
@@ -1469,7 +1482,14 @@ def test_categorical_index_with_nan_repr():
 
     expected_repr = (
         "CategoricalIndex([1.0, 2.0, NaN, 10.0, NaN, <NA>], "
-        "categories=[1.0, 10.0, 2.0, NaN], ordered=False, dtype='category')"
+        "categories=[1.0, 2.0, 10.0, NaN], ordered=False, dtype='category')"
     )
 
     assert cat_index.__repr__() == expected_repr
+
+    sliced_expected_repr = (
+        "CategoricalIndex([NaN, 10.0, NaN, <NA>], "
+        "categories=[1.0, 2.0, 10.0, NaN], ordered=False, dtype='category')"
+    )
+
+    assert cat_index[2:].__repr__() == sliced_expected_repr
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2021, NVIDIA CORPORATION.
 
 import re
 from collections.abc import Mapping, Sequence
@@ -259,7 +259,9 @@ def gen_rand(dtype, size, **kwargs):
     elif dtype.kind == "b":
         low = kwargs.get("low", 0)
         high = kwargs.get("high", 2)
-        return np.random.randint(low=low, high=high, size=size).astype(np.bool)
+        return np.random.randint(low=low, high=high, size=size).astype(
+            np.bool_
+        )
     elif dtype.kind == "M":
         low = kwargs.get("low", 0)
         time_unit, _ = np.datetime_data(dtype)

@@ -194,7 +194,7 @@ def wrapper(*args, **kwargs):
     return wrapper
 
 
-def get_null_series(size, dtype=np.bool):
+def get_null_series(size, dtype=np.bool_):
     """
     Creates a null series of provided dtype and size