From 60c7c876f9a6fa7be44ccbe81902c7c113d6df5b Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Wed, 4 Aug 2021 08:35:06 -0400
Subject: [PATCH 01/20] Replace cudf.dtype -> np.dtype

---
 python/cudf/cudf/__init__.py                  |  1 +
 python/cudf/cudf/api/types.py                 | 10 ++++++++++
 python/cudf/cudf/core/column/column.py        |  8 ++++----
 python/cudf/cudf/core/column/datetime.py      |  4 ++--
 python/cudf/cudf/core/column/numerical.py     |  6 +++---
 python/cudf/cudf/core/column/string.py        |  8 ++++----
 python/cudf/cudf/core/column/timedelta.py     |  4 ++--
 python/cudf/cudf/core/scalar.py               |  3 ++-
 python/cudf/cudf/core/series.py               |  2 +-
 python/cudf/cudf/testing/_utils.py            |  2 +-
 python/cudf/cudf/testing/dataset_generator.py | 10 +++++-----
 python/cudf/cudf/tests/test_binops.py         | 12 ++++++------
 python/cudf/cudf/tests/test_categorical.py    |  2 +-
 python/cudf/cudf/tests/test_contains.py       |  5 +++--
 python/cudf/cudf/tests/test_joining.py        |  2 +-
 python/cudf/cudf/tests/test_label_encode.py   |  3 ++-
 python/cudf/cudf/tests/test_numerical.py      |  2 +-
 python/cudf/cudf/tests/test_reductions.py     | 14 +++++++-------
 python/cudf/cudf/tests/test_repr.py           |  4 ++--
 python/cudf/cudf/tests/test_scalar.py         |  4 ++--
 python/cudf/cudf/tests/test_udf_binops.py     |  3 ++-
 python/cudf/cudf/tests/test_unaops.py         |  4 ++--
 python/cudf/cudf/utils/dtypes.py              |  8 ++++----
 python/cudf/cudf/utils/utils.py               |  2 +-
 24 files changed, 69 insertions(+), 54 deletions(-)

diff --git a/python/cudf/cudf/__init__.py b/python/cudf/cudf/__init__.py
index 2d52b517242..23621f1e315 100644
--- a/python/cudf/cudf/__init__.py
+++ b/python/cudf/cudf/__init__.py
@@ -15,6 +15,7 @@
     register_index_accessor,
     register_series_accessor,
 )
+from cudf.api.types import dtype
 from cudf.core import (
     NA,
     BaseIndex,
diff --git a/python/cudf/cudf/api/types.py b/python/cudf/cudf/api/types.py
index 01af22f70bf..a1237e34366 100644
--- a/python/cudf/cudf/api/types.py
+++ b/python/cudf/cudf/api/types.py
@@ -27,6 +27,16 @@
 )
 
 
+def dtype(arbitrary):
+    try:
+        return np.dtype(arbitrary)
+    except TypeError:
+        pass
+    if isinstance(arbitrary, cudf.core.dtypes._BaseDtype):
+        return arbitrary
+    return pd.api.types.pandas_type(arbitrary)
+
+
 def is_numeric_dtype(obj):
     """Check whether the provided array or dtype is of a numeric dtype.
 
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index d449d52927e..f2945694a16 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -432,7 +432,7 @@ def view(self, dtype: Dtype) -> ColumnBase:
 
         """
 
-        dtype = np.dtype(dtype)
+        dtype = cudf.dtype(dtype)
 
         if dtype.kind in ("o", "u", "s"):
             raise TypeError(
@@ -2078,11 +2078,11 @@ def as_column(
                             data
                         )
                     dtype = pd.api.types.pandas_dtype(dtype)
-                    np_type = np.dtype(dtype).type
+                    np_type = cudf.dtype(dtype).type
                     if np_type == np.bool_:
                         pa_type = pa.bool_()
                     else:
-                        pa_type = np_to_pa_dtype(np.dtype(dtype))
+                        pa_type = np_to_pa_dtype(cudf.dtype(dtype))
                 data = as_column(
                     pa.array(
                         arbitrary,
@@ -2131,7 +2131,7 @@ def _construct_array(
     Construct a CuPy or NumPy array from `arbitrary`
     """
     try:
-        dtype = dtype if dtype is None else np.dtype(dtype)
+        dtype = dtype if dtype is None else cudf.dtype(dtype)
         arbitrary = cupy.asarray(arbitrary, dtype=dtype)
     except (TypeError, ValueError):
         native_dtype = dtype
diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py
index f3d1880b290..4d99308d128 100644
--- a/python/cudf/cudf/core/column/datetime.py
+++ b/python/cudf/cudf/core/column/datetime.py
@@ -71,7 +71,7 @@ def __init__(
         mask : Buffer; optional
             The validity mask
         """
-        dtype = np.dtype(dtype)
+        dtype = cudf.dtype(dtype)
         if data.size % dtype.itemsize:
             raise ValueError("Buffer size must be divisible by element size")
         if size is None:
@@ -236,7 +236,7 @@ def __cuda_array_interface__(self) -> Mapping[builtins.str, Any]:
         return output
 
     def as_datetime_column(self, dtype: Dtype, **kwargs) -> DatetimeColumn:
-        dtype = np.dtype(dtype)
+        dtype = cudf.dtype(dtype)
         if dtype == self.dtype:
             return self
         return libcudf.unary.cast(self, dtype=dtype)
diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py
index a3f4a82a7dc..0955039dafd 100644
--- a/python/cudf/cudf/core/column/numerical.py
+++ b/python/cudf/cudf/core/column/numerical.py
@@ -53,7 +53,7 @@ def __init__(
             The dtype associated with the data Buffer
         mask : Buffer, optional
         """
-        dtype = np.dtype(dtype)
+        dtype = cudf.dtype(dtype)
         if data.size % dtype.itemsize:
             raise ValueError("Buffer size must be divisible by element size")
         if size is None:
@@ -253,7 +253,7 @@ def as_decimal_column(
         return libcudf.unary.cast(self, dtype)
 
     def as_numerical_column(self, dtype: Dtype, **kwargs) -> NumericalColumn:
-        dtype = np.dtype(dtype)
+        dtype = cudf.dtype(dtype)
         if dtype == self.dtype:
             return self
         return libcudf.unary.cast(self, dtype)
@@ -608,7 +608,7 @@ def _safe_cast_to_int(col: ColumnBase, dtype: DtypeObj) -> ColumnBase:
     else:
         raise TypeError(
             f"Cannot safely cast non-equivalent "
-            f"{col.dtype.type.__name__} to {np.dtype(dtype).type.__name__}"
+            f"{col.dtype.type.__name__} to {cudf.dtype(dtype).type.__name__}"
         )
 
 
diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index 92c57477465..ed776b62470 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -5054,7 +5054,7 @@ def __contains__(self, item: ScalarLike) -> bool:
     def as_numerical_column(
         self, dtype: Dtype, **kwargs
     ) -> "cudf.core.column.NumericalColumn":
-        out_dtype = np.dtype(dtype)
+        out_dtype = cudf.dtype(dtype)
 
         if out_dtype.kind in {"i", "u"}:
             if not libstrings.is_integer(self).all():
@@ -5096,7 +5096,7 @@ def _as_datetime_or_timedelta_column(self, dtype, format):
     def as_datetime_column(
         self, dtype: Dtype, **kwargs
     ) -> "cudf.core.column.DatetimeColumn":
-        out_dtype = np.dtype(dtype)
+        out_dtype = cudf.dtype(dtype)
 
         # infer on host from the first not na element
         # or return all null column if all values
@@ -5120,7 +5120,7 @@ def as_datetime_column(
     def as_timedelta_column(
         self, dtype: Dtype, **kwargs
     ) -> "cudf.core.column.TimeDeltaColumn":
-        out_dtype = np.dtype(dtype)
+        out_dtype = cudf.dtype(dtype)
         format = "%D days %H:%M:%S"
         return self._as_datetime_or_timedelta_column(out_dtype, format)
 
@@ -5379,7 +5379,7 @@ def view(self, dtype) -> "cudf.core.column.ColumnBase":
             raise ValueError(
                 "Can not produce a view of a string column with nulls"
             )
-        dtype = np.dtype(dtype)
+        dtype = cudf.dtype(dtype)
         str_byte_offset = self.base_children[0].element_indexing(self.offset)
         str_end_byte_offset = self.base_children[0].element_indexing(
             self.offset + self.size
diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py
index a27c20cc50c..2c893fafae7 100644
--- a/python/cudf/cudf/core/column/timedelta.py
+++ b/python/cudf/cudf/core/column/timedelta.py
@@ -60,7 +60,7 @@ def __init__(
             The number of null values.
             If None, it is calculated automatically.
         """
-        dtype = np.dtype(dtype)
+        dtype = cudf.dtype(dtype)
         if data.size % dtype.itemsize:
             raise ValueError("Buffer size must be divisible by element size")
         if size is None:
@@ -353,7 +353,7 @@ def as_string_column(
             )
 
     def as_timedelta_column(self, dtype: Dtype, **kwargs) -> TimeDeltaColumn:
-        dtype = np.dtype(dtype)
+        dtype = cudf.dtype(dtype)
         if dtype == self.dtype:
             return self
         return libcudf.unary.cast(self, dtype=dtype)
diff --git a/python/cudf/cudf/core/scalar.py b/python/cudf/cudf/core/scalar.py
index c6663a25684..75be36a1b16 100644
--- a/python/cudf/cudf/core/scalar.py
+++ b/python/cudf/cudf/core/scalar.py
@@ -5,6 +5,7 @@
 import pyarrow as pa
 from pandas._libs.missing import NAType as pd_NAType
 
+import cudf
 from cudf._lib.scalar import DeviceScalar, _is_null_host_scalar
 from cudf.core.column.column import ColumnBase
 from cudf.core.dtypes import Decimal64Dtype, ListDtype, StructDtype
@@ -171,7 +172,7 @@ def _preprocess_host_value(self, value, dtype):
                 dtype = value.dtype
 
         if not isinstance(dtype, Decimal64Dtype):
-            dtype = np.dtype(dtype)
+            dtype = cudf.dtype(dtype)
 
         if not valid:
             value = NA
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index fb197fbc90d..0080207c908 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -3774,7 +3774,7 @@ def one_hot_encoding(self, cats, dtype="float64"):
             cats = cats.to_pandas()
         else:
             cats = pd.Series(cats, dtype="object")
-        dtype = np.dtype(dtype)
+        dtype = cudf.dtype(dtype)
 
         def encode(cat):
             if cat is None:
diff --git a/python/cudf/cudf/testing/_utils.py b/python/cudf/cudf/testing/_utils.py
index 672e83e6f64..68914c9b0e2 100644
--- a/python/cudf/cudf/testing/_utils.py
+++ b/python/cudf/cudf/testing/_utils.py
@@ -245,7 +245,7 @@ def _get_args_kwars_for_assert_exceptions(func_args_and_kwargs):
 
 
 def gen_rand(dtype, size, **kwargs):
-    dtype = np.dtype(dtype)
+    dtype = cudf.dtype(dtype)
     if dtype.kind == "f":
         res = np.random.random(size=size).astype(dtype)
         if kwargs.get("positive_only", False):
diff --git a/python/cudf/cudf/testing/dataset_generator.py b/python/cudf/cudf/testing/dataset_generator.py
index 5e03068f818..4a475c52777 100644
--- a/python/cudf/cudf/testing/dataset_generator.py
+++ b/python/cudf/cudf/testing/dataset_generator.py
@@ -380,7 +380,7 @@ def rand_dataframe(
                 )
             )
         else:
-            dtype = np.dtype(dtype)
+            dtype = cudf.dtype(dtype)
             if dtype.kind in ("i", "u"):
                 column_params.append(
                     ColumnParameters(
@@ -428,7 +428,7 @@ def rand_dataframe(
                             dtype=dtype, size=cardinality
                         ),
                         is_sorted=False,
-                        dtype=np.dtype(dtype),
+                        dtype=cudf.dtype(dtype),
                     )
                 )
             elif dtype.kind == "m":
@@ -440,7 +440,7 @@ def rand_dataframe(
                             dtype=dtype, size=cardinality
                         ),
                         is_sorted=False,
-                        dtype=np.dtype(dtype),
+                        dtype=cudf.dtype(dtype),
                     )
                 )
             elif dtype.kind == "b":
@@ -450,7 +450,7 @@ def rand_dataframe(
                         null_frequency=null_frequency,
                         generator=boolean_generator(cardinality),
                         is_sorted=False,
-                        dtype=np.dtype(dtype),
+                        dtype=cudf.dtype(dtype),
                     )
                 )
             else:
@@ -538,7 +538,7 @@ def get_values_for_nested_data(dtype, lists_max_length):
     Returns list of values based on dtype.
     """
     cardinality = np.random.randint(0, lists_max_length)
-    dtype = np.dtype(dtype)
+    dtype = cudf.dtype(dtype)
     if dtype.kind in ("i", "u"):
         values = int_generator(dtype=dtype, size=cardinality)()
     elif dtype.kind == "f":
diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py
index 8277b8e7b32..c49b6d794d9 100644
--- a/python/cudf/cudf/tests/test_binops.py
+++ b/python/cudf/cudf/tests/test_binops.py
@@ -931,7 +931,7 @@ def test_ufunc_ops(lhs, rhs, ops):
 def dtype_scalar(val, dtype):
     if dtype == "str":
         return str(val)
-    dtype = np.dtype(dtype)
+    dtype = cudf.dtype(dtype)
     if dtype.type in {np.datetime64, np.timedelta64}:
         res, _ = np.datetime_data(dtype)
         return dtype.type(val, res)
@@ -1695,13 +1695,13 @@ def test_binops_with_lhs_numpy_scalar(frame, dtype):
     )
 
     if dtype == "datetime64[s]":
-        val = np.dtype(dtype).type(4, "s")
+        val = cudf.dtype(dtype).type(4, "s")
     elif dtype == "timedelta64[s]":
-        val = np.dtype(dtype).type(4, "s")
+        val = cudf.dtype(dtype).type(4, "s")
     elif dtype == "category":
         val = np.int64(4)
     else:
-        val = np.dtype(dtype).type(4)
+        val = cudf.dtype(dtype).type(4)
 
     expected = val == data.to_pandas()
     got = val == data
@@ -2793,11 +2793,11 @@ def test_column_null_scalar_comparison(dtype, null_scalar, cmpop):
     # a new series where all the elements are <NA>.
 
     if isinstance(null_scalar, np.datetime64):
-        if np.dtype(dtype).kind not in "mM":
+        if cudf.dtype(dtype).kind not in "mM":
             pytest.skip()
         null_scalar = null_scalar.astype(dtype)
 
-    dtype = np.dtype(dtype)
+    dtype = cudf.dtype(dtype)
 
     data = [1, 2, 3, 4, 5]
     sr = cudf.Series(data, dtype=dtype)
diff --git a/python/cudf/cudf/tests/test_categorical.py b/python/cudf/cudf/tests/test_categorical.py
index d8e10a62a12..51327038c39 100644
--- a/python/cudf/cudf/tests/test_categorical.py
+++ b/python/cudf/cudf/tests/test_categorical.py
@@ -799,7 +799,7 @@ def test_categorical_setitem_with_nan():
 @pytest.mark.parametrize("dtype", list(NUMERIC_TYPES) + ["object"])
 @pytest.mark.parametrize("input_obj", [[1, cudf.NA, 3]])
 def test_series_construction_with_nulls(input_obj, dtype):
-    dtype = np.dtype(dtype)
+    dtype = cudf.dtype(dtype)
     input_obj = [
         dtype.type(v) if v is not cudf.NA else cudf.NA for v in input_obj
     ]
diff --git a/python/cudf/cudf/tests/test_contains.py b/python/cudf/cudf/tests/test_contains.py
index b6650600261..f06142f4cc9 100644
--- a/python/cudf/cudf/tests/test_contains.py
+++ b/python/cudf/cudf/tests/test_contains.py
@@ -4,6 +4,7 @@
 import pandas as pd
 import pytest
 
+import cudf
 from cudf import Series
 from cudf.core.index import RangeIndex, as_index
 from cudf.testing._utils import (
@@ -82,7 +83,7 @@ def test_rangeindex_contains():
 
 @pytest.mark.parametrize("dtype", NUMERIC_TYPES)
 def test_lists_contains(dtype):
-    dtype = np.dtype(dtype)
+    dtype = cudf.dtype(dtype)
     inner_data = np.array([1, 2, 3], dtype=dtype)
 
     data = Series([inner_data])
@@ -96,7 +97,7 @@ def test_lists_contains(dtype):
 
 @pytest.mark.parametrize("dtype", DATETIME_TYPES + TIMEDELTA_TYPES)
 def test_lists_contains_datetime(dtype):
-    dtype = np.dtype(dtype)
+    dtype = cudf.dtype(dtype)
     inner_data = np.array([1, 2, 3])
 
     unit, _ = np.datetime_data(dtype)
diff --git a/python/cudf/cudf/tests/test_joining.py b/python/cudf/cudf/tests/test_joining.py
index 7b56f864272..da5b85b4e37 100644
--- a/python/cudf/cudf/tests/test_joining.py
+++ b/python/cudf/cudf/tests/test_joining.py
@@ -810,7 +810,7 @@ def test_join_datetimes_index(dtype):
     pdf = pdf_lhs.join(pdf_rhs, sort=True)
     gdf = gdf_lhs.join(gdf_rhs, sort=True)
 
-    assert gdf["d"].dtype == np.dtype(dtype)
+    assert gdf["d"].dtype == cudf.dtype(dtype)
 
     assert_join_results_equal(pdf, gdf, how="inner")
 
diff --git a/python/cudf/cudf/tests/test_label_encode.py b/python/cudf/cudf/tests/test_label_encode.py
index 29a787768f2..f513aa7a134 100644
--- a/python/cudf/cudf/tests/test_label_encode.py
+++ b/python/cudf/cudf/tests/test_label_encode.py
@@ -6,6 +6,7 @@
 import numpy as np
 import pytest
 
+import cudf
 from cudf.core import DataFrame, Series
 
 
@@ -18,7 +19,7 @@ def _random_int(nelem, dtype):
 
 
 def _random(nelem, dtype):
-    dtype = np.dtype(dtype)
+    dtype = cudf.dtype(dtype)
     if dtype.kind in {"i", "u"}:
         return _random_int(nelem, dtype)
     elif dtype.kind == "f":
diff --git a/python/cudf/cudf/tests/test_numerical.py b/python/cudf/cudf/tests/test_numerical.py
index 7a766a49a62..2e1ce5cddfc 100644
--- a/python/cudf/cudf/tests/test_numerical.py
+++ b/python/cudf/cudf/tests/test_numerical.py
@@ -390,7 +390,7 @@ def test_to_numeric_error(data, errors):
 @pytest.mark.parametrize("dtype", NUMERIC_TYPES)
 @pytest.mark.parametrize("input_obj", [[1, cudf.NA, 3]])
 def test_series_construction_with_nulls(dtype, input_obj):
-    dtype = np.dtype(dtype)
+    dtype = cudf.dtype(dtype)
     # numpy case
 
     expect = pd.Series(input_obj, dtype=cudf_dtypes_to_pandas_dtypes[dtype])
diff --git a/python/cudf/cudf/tests/test_reductions.py b/python/cudf/cudf/tests/test_reductions.py
index 7cbc56f943c..b7bbefb8c58 100644
--- a/python/cudf/cudf/tests/test_reductions.py
+++ b/python/cudf/cudf/tests/test_reductions.py
@@ -25,7 +25,7 @@
 
 @pytest.mark.parametrize("dtype,nelem", params)
 def test_sum(dtype, nelem):
-    dtype = np.dtype(dtype).type
+    dtype = cudf.dtype(dtype).type
     data = gen_rand(dtype, nelem)
     sr = Series(data)
 
@@ -69,8 +69,8 @@ def test_sum_decimal(dtype, nelem):
 @pytest.mark.parametrize("dtype,nelem", params)
 def test_product(dtype, nelem):
     np.random.seed(0)
-    dtype = np.dtype(dtype).type
-    if np.dtype(dtype).kind in {"u", "i"}:
+    dtype = cudf.dtype(dtype).type
+    if cudf.dtype(dtype).kind in {"u", "i"}:
         data = np.ones(nelem, dtype=dtype)
         # Set at most 30 items to [0..2) to keep the value within 2^32
         for _ in range(30):
@@ -107,7 +107,7 @@ def test_product_decimal(dtype):
 
 @pytest.mark.parametrize("dtype,nelem", params)
 def test_sum_of_squares(dtype, nelem):
-    dtype = np.dtype(dtype).type
+    dtype = cudf.dtype(dtype).type
     data = gen_rand(dtype, nelem)
     sr = Series(data)
 
@@ -115,7 +115,7 @@ def test_sum_of_squares(dtype, nelem):
     # got = dtype(got)
     expect = (data ** 2).sum()
 
-    if np.dtype(dtype).kind in {"u", "i"}:
+    if cudf.dtype(dtype).kind in {"u", "i"}:
         if 0 <= expect <= np.iinfo(dtype).max:
             np.testing.assert_array_almost_equal(expect, got)
         else:
@@ -141,7 +141,7 @@ def test_sum_of_squares_decimal(dtype):
 
 @pytest.mark.parametrize("dtype,nelem", params)
 def test_min(dtype, nelem):
-    dtype = np.dtype(dtype).type
+    dtype = cudf.dtype(dtype).type
     data = gen_rand(dtype, nelem)
     sr = Series(data)
 
@@ -167,7 +167,7 @@ def test_min_decimal(dtype, nelem):
 
 @pytest.mark.parametrize("dtype,nelem", params)
 def test_max(dtype, nelem):
-    dtype = np.dtype(dtype).type
+    dtype = cudf.dtype(dtype).type
     data = gen_rand(dtype, nelem)
     sr = Series(data)
 
diff --git a/python/cudf/cudf/tests/test_repr.py b/python/cudf/cudf/tests/test_repr.py
index 4906349ecba..3ef0e2edaed 100644
--- a/python/cudf/cudf/tests/test_repr.py
+++ b/python/cudf/cudf/tests/test_repr.py
@@ -24,11 +24,11 @@ def test_null_series(nrows, dtype):
     data = cudf.Series(np.random.randint(1, 9, size))
     column = data.set_mask(mask)
     sr = cudf.Series(column).astype(dtype)
-    if dtype != "category" and np.dtype(dtype).kind in {"u", "i"}:
+    if dtype != "category" and cudf.dtype(dtype).kind in {"u", "i"}:
         ps = pd.Series(
             sr._column.data_array_view.copy_to_host(),
             dtype=cudf_dtypes_to_pandas_dtypes.get(
-                np.dtype(dtype), np.dtype(dtype)
+                cudf.dtype(dtype), cudf.dtype(dtype)
             ),
         )
         ps[sr.isnull().to_pandas()] = pd.NA
diff --git a/python/cudf/cudf/tests/test_scalar.py b/python/cudf/cudf/tests/test_scalar.py
index 605005f41fc..a9919900256 100644
--- a/python/cudf/cudf/tests/test_scalar.py
+++ b/python/cudf/cudf/tests/test_scalar.py
@@ -198,7 +198,7 @@ def test_null_scalar(dtype):
     s = cudf.Scalar(None, dtype=dtype)
     assert s.value is cudf.NA
     assert s.dtype == (
-        np.dtype(dtype)
+        cudf.dtype(dtype)
         if not isinstance(dtype, cudf.Decimal64Dtype)
         else dtype
     )
@@ -239,7 +239,7 @@ def test_generic_null_scalar_construction_fails(value):
 def test_scalar_dtype_and_validity(dtype):
     s = cudf.Scalar(1, dtype=dtype)
 
-    assert s.dtype == np.dtype(dtype)
+    assert s.dtype == cudf.dtype(dtype)
     assert s.is_valid() is True
 
 
diff --git a/python/cudf/cudf/tests/test_udf_binops.py b/python/cudf/cudf/tests/test_udf_binops.py
index df7361ab183..5a5aca615ba 100644
--- a/python/cudf/cudf/tests/test_udf_binops.py
+++ b/python/cudf/cudf/tests/test_udf_binops.py
@@ -6,6 +6,7 @@
 from numba.cuda import compile_ptx
 from numba.np import numpy_support
 
+import cudf
 from cudf import _lib as libcudf
 from cudf.core import Series
 from cudf.utils import dtypes as dtypeutils
@@ -27,7 +28,7 @@ def test_generic_ptx(dtype):
     def generic_function(a, b):
         return a ** 3 + b
 
-    nb_type = numpy_support.from_dtype(np.dtype(dtype))
+    nb_type = numpy_support.from_dtype(cudf.dtype(dtype))
     type_signature = (nb_type, nb_type)
 
     ptx_code, output_type = compile_ptx(
diff --git a/python/cudf/cudf/tests/test_unaops.py b/python/cudf/cudf/tests/test_unaops.py
index 2089f764724..c549dd2712b 100644
--- a/python/cudf/cudf/tests/test_unaops.py
+++ b/python/cudf/cudf/tests/test_unaops.py
@@ -35,7 +35,7 @@ def test_series_invert(dtype):
 def test_series_not(dtype):
     import pandas as pd
 
-    dtype = np.dtype(dtype).type
+    dtype = cudf.dtype(dtype).type
     arr = pd.Series(np.random.choice([True, False], 1000)).astype(dtype)
     if dtype is not np.bool_:
         arr = arr * (np.random.random(1000) * 100).astype(dtype)
@@ -134,7 +134,7 @@ def generate_valid_scalar_unaop_combos():
 
 @pytest.mark.parametrize("slr,dtype,op", generate_valid_scalar_unaop_combos())
 def test_scalar_unary_operations(slr, dtype, op):
-    slr_host = np.dtype(dtype).type(slr)
+    slr_host = cudf.dtype(dtype).type(slr)
     slr_device = cudf.Scalar(slr, dtype=dtype)
 
     expect = op(slr_host)
diff --git a/python/cudf/cudf/utils/dtypes.py b/python/cudf/cudf/utils/dtypes.py
index e1ae87e5089..db7de4441ec 100644
--- a/python/cudf/cudf/utils/dtypes.py
+++ b/python/cudf/cudf/utils/dtypes.py
@@ -140,7 +140,7 @@ def np_to_pa_dtype(dtype):
             return pa.duration(time_unit)
         # default fallback unit is ns
         return pa.duration("ns")
-    return _np_pa_dtypes[np.dtype(dtype).type]
+    return _np_pa_dtypes[cudf.dtype(dtype).type]
 
 
 def get_numeric_type_info(dtype):
@@ -202,7 +202,7 @@ def cudf_dtype_to_pa_type(dtype):
     ):
         return dtype.to_arrow()
     else:
-        return np_to_pa_dtype(np.dtype(dtype))
+        return np_to_pa_dtype(cudf.dtype(dtype))
 
 
 def cudf_dtype_from_pa_type(typ):
@@ -404,7 +404,7 @@ def check_cast_unsupported_dtype(dtype):
     if isinstance(dtype, pd.core.arrays.numpy_.PandasDtype):
         dtype = dtype.numpy_dtype
     else:
-        dtype = np.dtype(dtype)
+        dtype = cudf.dtype(dtype)
 
     if dtype in cudf._lib.types.np_to_cudf_types:
         return dtype
@@ -438,7 +438,7 @@ def get_time_unit(obj):
 
 
 def _get_nan_for_dtype(dtype):
-    dtype = np.dtype(dtype)
+    dtype = cudf.dtype(dtype)
     if pd.api.types.is_datetime64_dtype(
         dtype
     ) or pd.api.types.is_timedelta64_dtype(dtype):
diff --git a/python/cudf/cudf/utils/utils.py b/python/cudf/cudf/utils/utils.py
index 209f61ad399..5804c794f97 100644
--- a/python/cudf/cudf/utils/utils.py
+++ b/python/cudf/cudf/utils/utils.py
@@ -70,7 +70,7 @@ def scalar_broadcast_to(scalar, size, dtype=None):
     scalar = to_cudf_compatible_scalar(scalar, dtype=dtype)
     dtype = scalar.dtype
 
-    if np.dtype(dtype).kind in ("O", "U"):
+    if cudf.dtype(dtype).kind in ("O", "U"):
         gather_map = column.full(size, 0, dtype="int32")
         scalar_str_col = column.as_column([scalar], dtype="str")
         return scalar_str_col[gather_map]

From 5e50f522b3d8b235b5a6d4362148b0bbb6dd94d5 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Wed, 4 Aug 2021 09:29:05 -0400
Subject: [PATCH 02/20] First stab at cudf.dtype

---
 python/cudf/cudf/api/types.py         | 19 +++++++++++++++++--
 python/cudf/cudf/core/dtypes.py       |  6 ++++++
 python/cudf/cudf/tests/test_dtypes.py | 21 +++++++++++++++++++++
 3 files changed, 44 insertions(+), 2 deletions(-)

diff --git a/python/cudf/cudf/api/types.py b/python/cudf/cudf/api/types.py
index a1237e34366..fc999ae422b 100644
--- a/python/cudf/cudf/api/types.py
+++ b/python/cudf/cudf/api/types.py
@@ -29,12 +29,27 @@
 
 def dtype(arbitrary):
     try:
-        return np.dtype(arbitrary)
+        np_dtype = np.dtype(arbitrary)
+        if np_dtype.name == "float16":
+            np_dtype = np.dtype("float32")
+        elif np_dtype.name in ("object", "str"):
+            np_dtype = np.dtype("object")
+        return np_dtype
     except TypeError:
         pass
     if isinstance(arbitrary, cudf.core.dtypes._BaseDtype):
         return arbitrary
-    return pd.api.types.pandas_type(arbitrary)
+    elif isinstance(arbitrary, pd.CategoricalDtype):
+        return cudf.CategoricalDtype.from_pandas(arbitrary)
+    elif isinstance(arbitrary, pd.IntervalDtype):
+        return cudf.IntervalDtype.from_pandas(arbitrary)
+    pd_dtype = pd.api.types.pandas_dtype(arbitrary)
+    try:
+        return pd_dtype.numpy_dtype
+    except AttributeError:
+        # no NumPy type corresponding to this type
+        # always object?
+        return np.dtype("object")
 
 
 def is_numeric_dtype(obj):
diff --git a/python/cudf/cudf/core/dtypes.py b/python/cudf/cudf/core/dtypes.py
index 6dbe55d0bb8..4062b734bb3 100644
--- a/python/cudf/cudf/core/dtypes.py
+++ b/python/cudf/cudf/core/dtypes.py
@@ -559,6 +559,12 @@ def to_arrow(self):
             pa.from_numpy_dtype(self.subtype), self.closed
         )
 
+    @classmethod
+    def from_pandas(cls, pd_dtype: pd.IntervalDtype) -> "IntervalDtype":
+        return cls(
+            subtype=pd_dtype.subtype
+        )  # TODO: needs `closed` when we upgrade Pandas
+
 
 def is_categorical_dtype(obj):
     """Check whether an array-like or dtype is of the Categorical dtype.
diff --git a/python/cudf/cudf/tests/test_dtypes.py b/python/cudf/cudf/tests/test_dtypes.py
index 41d7f5d215e..d21f67d1def 100644
--- a/python/cudf/cudf/tests/test_dtypes.py
+++ b/python/cudf/cudf/tests/test_dtypes.py
@@ -257,3 +257,24 @@ def test_lists_of_structs_dtype(data):
 
     assert_column_array_dtype_equal(got._column, expected)
     assert expected.equals(got._column.to_arrow())
+
+
+@pytest.mark.parametrize(
+    "in_dtype,expect",
+    [
+        (np.dtype("int8"), np.dtype("int8")),
+        (np.int8, np.dtype("int8")),
+        (np.float16, np.dtype("float32")),
+        (pd.Int8Dtype(), np.dtype("int8")),
+        (pd.StringDtype(), np.dtype("object")),
+        ("int8", np.dtype("int8")),
+        ("boolean", np.dtype("bool")),
+        (int, np.dtype("int64")),
+        (float, np.dtype("float64")),
+        (cudf.ListDtype("int64"), cudf.ListDtype("int64")),
+        ("float16", np.dtype("float32")),
+        (np.dtype("U"), np.dtype("object")),
+    ],
+)
+def test_dtype(in_dtype, expect):
+    assert_eq(cudf.dtype(in_dtype), expect)

From 367b743167a29a8841eba42ddacf6ee0476b0d44 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Wed, 4 Aug 2021 10:12:04 -0400
Subject: [PATCH 03/20] Handle datetimes/timedeltas in cudf.dtype

---
 python/cudf/cudf/api/types.py         |  4 ++++
 python/cudf/cudf/tests/test_dtypes.py | 10 ++++++++++
 2 files changed, 14 insertions(+)

diff --git a/python/cudf/cudf/api/types.py b/python/cudf/cudf/api/types.py
index fc999ae422b..f1d87a6761b 100644
--- a/python/cudf/cudf/api/types.py
+++ b/python/cudf/cudf/api/types.py
@@ -34,6 +34,10 @@ def dtype(arbitrary):
             np_dtype = np.dtype("float32")
         elif np_dtype.name in ("object", "str"):
             np_dtype = np.dtype("object")
+        elif np_dtype.str == "<m8":
+            np_dtype = np.dtype("<m8[ns]")
+        elif np_dtype.str == "<M8":
+            np_dtype = np.dtype("<M8[ns]")
         return np_dtype
     except TypeError:
         pass
diff --git a/python/cudf/cudf/tests/test_dtypes.py b/python/cudf/cudf/tests/test_dtypes.py
index d21f67d1def..cc2554eb78b 100644
--- a/python/cudf/cudf/tests/test_dtypes.py
+++ b/python/cudf/cudf/tests/test_dtypes.py
@@ -274,6 +274,16 @@ def test_lists_of_structs_dtype(data):
         (cudf.ListDtype("int64"), cudf.ListDtype("int64")),
         ("float16", np.dtype("float32")),
         (np.dtype("U"), np.dtype("object")),
+        ("timedelta64", np.dtype("<m8[ns]")),
+        ("timedelta64[ns]", np.dtype("<m8[ns]")),
+        ("timedelta64[ms]", np.dtype("<m8[ms]")),
+        ("timedelta64[D]", np.dtype("<m8[D]")),
+        ("<m8[s]", np.dtype("<m8[s]")),
+        ("datetime64", np.dtype("<M8[ns]")),
+        ("datetime64[ns]", np.dtype("<M8[ns]")),
+        ("datetime64[ms]", np.dtype("<M8[ms]")),
+        ("datetime64[D]", np.dtype("<M8[D]")),
+        ("<M8[s]", np.dtype("<M8[s]")),
     ],
 )
 def test_dtype(in_dtype, expect):

From d04a5f190bd620bb95e0d4fa41ea22e491930e98 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Wed, 4 Aug 2021 12:02:49 -0400
Subject: [PATCH 04/20] Fix test

---
 python/cudf/cudf/tests/test_binops.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py
index c49b6d794d9..d8761057683 100644
--- a/python/cudf/cudf/tests/test_binops.py
+++ b/python/cudf/cudf/tests/test_binops.py
@@ -1700,6 +1700,8 @@ def test_binops_with_lhs_numpy_scalar(frame, dtype):
         val = cudf.dtype(dtype).type(4, "s")
     elif dtype == "category":
         val = np.int64(4)
+    elif dtype == "str":
+        val = str(4)
     else:
         val = cudf.dtype(dtype).type(4)
 

From 85351e99bfd54c8dec2efbe4ef65fd6372ff1bc8 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Thu, 5 Aug 2021 12:35:30 -0400
Subject: [PATCH 05/20] Handle disallowed numpy types

---
 python/cudf/cudf/api/types.py         | 7 +++++--
 python/cudf/cudf/tests/test_dtypes.py | 8 ++++++++
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/python/cudf/cudf/api/types.py b/python/cudf/cudf/api/types.py
index f1d87a6761b..8547ec0310c 100644
--- a/python/cudf/cudf/api/types.py
+++ b/python/cudf/cudf/api/types.py
@@ -32,15 +32,18 @@ def dtype(arbitrary):
         np_dtype = np.dtype(arbitrary)
         if np_dtype.name == "float16":
             np_dtype = np.dtype("float32")
-        elif np_dtype.name in ("object", "str"):
+        elif np_dtype.kind in ("OU"):
             np_dtype = np.dtype("object")
         elif np_dtype.str == "<m8":
             np_dtype = np.dtype("<m8[ns]")
         elif np_dtype.str == "<M8":
             np_dtype = np.dtype("<M8[ns]")
-        return np_dtype
     except TypeError:
         pass
+    else:
+        if np_dtype.kind not in "biufUOMm":
+            raise TypeError(f"Unsupported type {np_dtype}")
+        return np_dtype
     if isinstance(arbitrary, cudf.core.dtypes._BaseDtype):
         return arbitrary
     elif isinstance(arbitrary, pd.CategoricalDtype):
diff --git a/python/cudf/cudf/tests/test_dtypes.py b/python/cudf/cudf/tests/test_dtypes.py
index cc2554eb78b..ad67327a105 100644
--- a/python/cudf/cudf/tests/test_dtypes.py
+++ b/python/cudf/cudf/tests/test_dtypes.py
@@ -288,3 +288,11 @@ def test_lists_of_structs_dtype(data):
 )
 def test_dtype(in_dtype, expect):
     assert_eq(cudf.dtype(in_dtype), expect)
+
+
+@pytest.mark.parametrize(
+    "in_dtype", ["complex", np.complex128, complex, "S", "a", "V"]
+)
+def test_dtype_raise(in_dtype):
+    with pytest.raises(TypeError):
+        cudf.dtype(in_dtype)

From 67cca8a09b1cb54b806ac138d37e1990f14d601e Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <3190405+shwina@users.noreply.github.com>
Date: Thu, 5 Aug 2021 14:09:05 -0400
Subject: [PATCH 06/20] Update python/cudf/cudf/tests/test_dtypes.py

Co-authored-by: GALI PREM SAGAR <sagarprem75@gmail.com>
---
 python/cudf/cudf/tests/test_dtypes.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/python/cudf/cudf/tests/test_dtypes.py b/python/cudf/cudf/tests/test_dtypes.py
index ad67327a105..36c05e40261 100644
--- a/python/cudf/cudf/tests/test_dtypes.py
+++ b/python/cudf/cudf/tests/test_dtypes.py
@@ -269,6 +269,8 @@ def test_lists_of_structs_dtype(data):
         (pd.StringDtype(), np.dtype("object")),
         ("int8", np.dtype("int8")),
         ("boolean", np.dtype("bool")),
+        ("bool_", np.dtype("bool")),
+        (np.bool_, np.dtype("bool")),
         (int, np.dtype("int64")),
         (float, np.dtype("float64")),
         (cudf.ListDtype("int64"), cudf.ListDtype("int64")),

From a10eae01af842740ca88775650817deed095b9d1 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Fri, 6 Aug 2021 13:50:26 -0400
Subject: [PATCH 07/20] Some fixes

---
 python/cudf/cudf/api/types.py                 | 39 +++++++++++++------
 python/cudf/cudf/core/column/column.py        | 20 +++++-----
 python/cudf/cudf/testing/dataset_generator.py | 12 +++---
 3 files changed, 44 insertions(+), 27 deletions(-)

diff --git a/python/cudf/cudf/api/types.py b/python/cudf/cudf/api/types.py
index 8547ec0310c..bbcbfcbbe17 100644
--- a/python/cudf/cudf/api/types.py
+++ b/python/cudf/cudf/api/types.py
@@ -28,35 +28,52 @@
 
 
 def dtype(arbitrary):
+    """
+    Returns the cuDF-supported dtype corresponding to `arbitrary`
+
+    Inputs
+    ------
+    arbitrary: dtype or scalar-like
+
+    Returns
+    -------
+    dtype: the cuDF-supported dtype that best matches `arbitrary`
+    """
+    # first, try interpreting arbitrary as a NumPy dtype that we support:
     try:
         np_dtype = np.dtype(arbitrary)
         if np_dtype.name == "float16":
             np_dtype = np.dtype("float32")
         elif np_dtype.kind in ("OU"):
             np_dtype = np.dtype("object")
-        elif np_dtype.str == "<m8":
-            np_dtype = np.dtype("<m8[ns]")
-        elif np_dtype.str == "<M8":
-            np_dtype = np.dtype("<M8[ns]")
     except TypeError:
         pass
     else:
         if np_dtype.kind not in "biufUOMm":
             raise TypeError(f"Unsupported type {np_dtype}")
         return np_dtype
+
+    #  next, check if `arbitrary` is one of our extension types:
     if isinstance(arbitrary, cudf.core.dtypes._BaseDtype):
         return arbitrary
-    elif isinstance(arbitrary, pd.CategoricalDtype):
-        return cudf.CategoricalDtype.from_pandas(arbitrary)
-    elif isinstance(arbitrary, pd.IntervalDtype):
-        return cudf.IntervalDtype.from_pandas(arbitrary)
+
+    # use `pandas_dtype` to try and interpret
+    # `arbitrary` as a Pandas extension type.
+    #  Return the corresponding NumPy/cuDF type.
     pd_dtype = pd.api.types.pandas_dtype(arbitrary)
     try:
         return pd_dtype.numpy_dtype
     except AttributeError:
-        # no NumPy type corresponding to this type
-        # always object?
-        return np.dtype("object")
+        if isinstance(pd_dtype, pd.CategoricalDtype):
+            return cudf.CategoricalDtype.from_pandas(pd_dtype)
+        elif isinstance(pd_dtype, pd.StringDtype):
+            return np.dtype("object")
+        elif isinstance(pd_dtype, pd.IntervalDtype):
+            return cudf.IntervalDtype.from_pandas(pd_dtype)
+        else:
+            raise TypeError(
+                f"Cannot interpret {arbitrary} as a valid cuDF dtype"
+            )
 
 
 def is_numeric_dtype(obj):
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 14c086c6710..5f4f5702cd6 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -53,7 +53,6 @@
     is_scalar,
     is_string_dtype,
     is_struct_dtype,
-    pandas_dtype,
 )
 from cudf.core.abc import Serializable
 from cudf.core.buffer import Buffer
@@ -889,7 +888,7 @@ def astype(self, dtype: Dtype, **kwargs) -> ColumnBase:
             return self.as_numerical_column(dtype, **kwargs)
         elif is_categorical_dtype(dtype):
             return self.as_categorical_column(dtype, **kwargs)
-        elif pandas_dtype(dtype).type in {
+        elif cudf.dtype(dtype).type in {
             np.str_,
             np.object_,
             str,
@@ -1299,7 +1298,7 @@ def column_empty(
 ) -> ColumnBase:
     """Allocate a new column like the given row_count and dtype.
     """
-    dtype = pandas_dtype(dtype)
+    dtype = cudf.dtype(dtype)
     children = ()  # type: Tuple[ColumnBase, ...]
 
     if is_struct_dtype(dtype):
@@ -1364,7 +1363,7 @@ def build_column(
     offset : int, optional
     children : tuple, optional
     """
-    dtype = pandas_dtype(dtype)
+    dtype = cudf.dtype(dtype)
 
     if _is_non_decimal_numeric_dtype(dtype):
         assert data is not None
@@ -1769,9 +1768,9 @@ def as_column(
         col = ColumnBase.from_arrow(arbitrary)
         if isinstance(arbitrary, pa.NullArray):
             if type(dtype) == str and dtype == "empty":
-                new_dtype = pandas_dtype(arbitrary.type.to_pandas_dtype())
+                new_dtype = np.dtype(arbitrary.type.to_pandas_dtype())
             else:
-                new_dtype = pandas_dtype(dtype)
+                new_dtype = np.dtype(dtype)
             col = col.astype(new_dtype)
 
         return col
@@ -1865,7 +1864,7 @@ def as_column(
             arbitrary = np.ascontiguousarray(arbitrary)
 
         if dtype is not None:
-            arbitrary = arbitrary.astype(dtype)
+            arbitrary = arbitrary.astype(np.dtype(dtype))
 
         if arb_dtype.kind == "M":
 
@@ -2034,12 +2033,11 @@ def as_column(
                         return cudf.core.column.Decimal32Column.from_arrow(
                             data
                         )
-                    dtype = pd.api.types.pandas_dtype(dtype)
-                    np_type = cudf.dtype(dtype).type
+                    np_type = np.dtype(dtype).type
                     if np_type == np.bool_:
                         pa_type = pa.bool_()
                     else:
-                        pa_type = np_to_pa_dtype(cudf.dtype(dtype))
+                        pa_type = np_to_pa_dtype(np.dtype(dtype))
                 data = as_column(
                     pa.array(
                         arbitrary,
@@ -2280,7 +2278,7 @@ def full(size: int, fill_value: ScalarLike, dtype: Dtype = None) -> ColumnBase:
 def concat_columns(objs: "MutableSequence[ColumnBase]") -> ColumnBase:
     """Concatenate a sequence of columns."""
     if len(objs) == 0:
-        dtype = pandas_dtype(None)
+        dtype = cudf.dtype(None)
         return column_empty(0, dtype=dtype, masked=True)
 
     # If all columns are `NumericalColumn` with different dtypes,
diff --git a/python/cudf/cudf/testing/dataset_generator.py b/python/cudf/cudf/testing/dataset_generator.py
index 4a475c52777..cdea22a05af 100644
--- a/python/cudf/cudf/testing/dataset_generator.py
+++ b/python/cudf/cudf/testing/dataset_generator.py
@@ -18,6 +18,7 @@
 from pyarrow import parquet as pq
 
 import cudf
+from cudf.utils.dtypes import np_to_pa_dtype
 
 
 class ColumnParameters:
@@ -94,6 +95,7 @@ def _write(tbl, path, format):
 def _generate_column(column_params, num_rows):
     # If cardinality is specified, we create a set to sample from.
     # Otherwise, we simply use the given generator to generate each value.
+
     if column_params.cardinality is not None:
         # Construct set of values to sample from where
         # set size = cardinality
@@ -127,7 +129,7 @@ def _generate_column(column_params, num_rows):
         if hasattr(column_params.dtype, "to_arrow"):
             arrow_type = column_params.dtype.to_arrow()
         elif column_params.dtype is not None:
-            arrow_type = pa.from_numpy_dtype(column_params.dtype)
+            arrow_type = np_to_pa_dtype(cudf.dtype(column_params.dtype))
         else:
             arrow_type = None
 
@@ -227,15 +229,15 @@ def get_dataframe(parameters, use_threads):
         ):
             arrow_type = pa.dictionary(
                 index_type=pa.int64(),
-                value_type=pa.from_numpy_dtype(
-                    type(next(iter(column_params.generator)))
+                value_type=np_to_pa_dtype(
+                    cudf.dtype(type(next(iter(column_params.generator))))
                 ),
             )
         elif hasattr(column_params.dtype, "to_arrow"):
             arrow_type = column_params.dtype.to_arrow()
         else:
-            arrow_type = pa.from_numpy_dtype(
-                type(next(iter(column_params.generator)))
+            arrow_type = np_to_pa_dtype(
+                cudf.dtype(type(next(iter(column_params.generator))))
                 if column_params.dtype is None
                 else column_params.dtype
             )

From 89ac918ce902aee3cb90e4772791130ae2b7e03e Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Mon, 9 Aug 2021 10:00:35 -0400
Subject: [PATCH 08/20] Remaining failures

---
 python/cudf/cudf/testing/_utils.py    | 2 +-
 python/cudf/cudf/tests/test_dtypes.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/python/cudf/cudf/testing/_utils.py b/python/cudf/cudf/testing/_utils.py
index 68914c9b0e2..b101835e626 100644
--- a/python/cudf/cudf/testing/_utils.py
+++ b/python/cudf/cudf/testing/_utils.py
@@ -284,7 +284,7 @@ def gen_rand(dtype, size, **kwargs):
         return pd.to_datetime(
             np.random.randint(low=low, high=high, size=size), unit=time_unit
         )
-    elif dtype.kind == "U":
+    elif dtype.kind in ("O", "U"):
         return pd.util.testing.rands_array(10, size)
     raise NotImplementedError(f"dtype.kind={dtype.kind}")
 
diff --git a/python/cudf/cudf/tests/test_dtypes.py b/python/cudf/cudf/tests/test_dtypes.py
index ad67327a105..98a35312527 100644
--- a/python/cudf/cudf/tests/test_dtypes.py
+++ b/python/cudf/cudf/tests/test_dtypes.py
@@ -274,12 +274,12 @@ def test_lists_of_structs_dtype(data):
         (cudf.ListDtype("int64"), cudf.ListDtype("int64")),
         ("float16", np.dtype("float32")),
         (np.dtype("U"), np.dtype("object")),
-        ("timedelta64", np.dtype("<m8[ns]")),
+        ("timedelta64", np.dtype("<m8")),
         ("timedelta64[ns]", np.dtype("<m8[ns]")),
         ("timedelta64[ms]", np.dtype("<m8[ms]")),
         ("timedelta64[D]", np.dtype("<m8[D]")),
         ("<m8[s]", np.dtype("<m8[s]")),
-        ("datetime64", np.dtype("<M8[ns]")),
+        ("datetime64", np.dtype("<M8")),
         ("datetime64[ns]", np.dtype("<M8[ns]")),
         ("datetime64[ms]", np.dtype("<M8[ms]")),
         ("datetime64[D]", np.dtype("<M8[D]")),

From 64a32908a301154d4cf93bc917e444bc0851ac1d Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Mon, 9 Aug 2021 17:38:51 -0400
Subject: [PATCH 09/20] Style

---
 python/cudf/cudf/api/types.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/api/types.py b/python/cudf/cudf/api/types.py
index bbcbfcbbe17..5d495f3e21d 100644
--- a/python/cudf/cudf/api/types.py
+++ b/python/cudf/cudf/api/types.py
@@ -29,7 +29,7 @@
 
 def dtype(arbitrary):
     """
-    Returns the cuDF-supported dtype corresponding to `arbitrary`
+    Return the cuDF-supported dtype corresponding to `arbitrary`.
 
     Inputs
     ------

From a62ab3214f8dfe6f1d068eaf24dcbf446d87e73d Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <3190405+shwina@users.noreply.github.com>
Date: Mon, 9 Aug 2021 18:55:47 -0400
Subject: [PATCH 10/20] Update python/cudf/cudf/api/types.py

Co-authored-by: Vyas Ramasubramani <vyas.ramasubramani@gmail.com>
---
 python/cudf/cudf/api/types.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/cudf/cudf/api/types.py b/python/cudf/cudf/api/types.py
index 5d495f3e21d..e1a04d5ea81 100644
--- a/python/cudf/cudf/api/types.py
+++ b/python/cudf/cudf/api/types.py
@@ -31,8 +31,8 @@ def dtype(arbitrary):
     """
     Return the cuDF-supported dtype corresponding to `arbitrary`.
 
-    Inputs
-    ------
+    Parameters
+    ------------
     arbitrary: dtype or scalar-like
 
     Returns

From f79e59f12e9f2f82f0a95f74da27b1b1a4364cc3 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Tue, 10 Aug 2021 15:30:06 -0400
Subject: [PATCH 11/20] cudf.dtype -> np.dtype

---
 python/cudf/cudf/__init__.py                  | 19 ++--
 python/cudf/cudf/_fuzz_testing/utils.py       | 54 +++++------
 python/cudf/cudf/_lib/__init__.py             |  1 +
 python/cudf/cudf/_lib/aggregation.pyx         |  6 +-
 python/cudf/cudf/_lib/binaryop.pyx            |  3 +-
 python/cudf/cudf/_lib/copying.pyx             |  2 +-
 python/cudf/cudf/_lib/orc.pyx                 |  2 +-
 python/cudf/cudf/_lib/parquet.pyx             |  2 +-
 python/cudf/cudf/_lib/scalar.pyx              |  4 +-
 python/cudf/cudf/_lib/string_casting.pyx      | 24 ++---
 python/cudf/cudf/_lib/transform.pyx           |  7 +-
 python/cudf/cudf/_lib/types.pyx               | 25 +++---
 python/cudf/cudf/api/types.py                 | 53 +----------
 python/cudf/cudf/comm/gpuarrow.py             |  3 +-
 python/cudf/cudf/core/__init__.py             | 30 -------
 python/cudf/cudf/core/_internals/__init__.py  |  2 -
 python/cudf/cudf/core/buffer.py               |  5 +-
 python/cudf/cudf/core/column/column.py        | 49 +++++-----
 python/cudf/cudf/core/column/datetime.py      | 12 +--
 python/cudf/cudf/core/column/numerical.py     | 28 +++---
 python/cudf/cudf/core/column/string.py        | 66 +++++++-------
 python/cudf/cudf/core/column/timedelta.py     | 14 +--
 python/cudf/cudf/core/cut.py                  |  3 +-
 python/cudf/cudf/core/dataframe.py            | 17 ++--
 python/cudf/cudf/core/dtypes.py               | 51 ++++++++++-
 python/cudf/cudf/core/frame.py                |  7 +-
 python/cudf/cudf/core/index.py                |  6 +-
 python/cudf/cudf/core/scalar.py               | 21 +++--
 python/cudf/cudf/core/tools/datetimes.py      |  2 +-
 python/cudf/cudf/core/tools/numeric.py        | 14 +--
 python/cudf/cudf/tests/test_binops.py         |  2 +-
 python/cudf/cudf/tests/test_column.py         |  2 +-
 python/cudf/cudf/tests/test_copying.py        |  2 +-
 python/cudf/cudf/tests/test_datetime.py       |  2 +-
 python/cudf/cudf/tests/test_factorize.py      |  2 +-
 python/cudf/cudf/tests/test_groupby.py        |  2 +-
 python/cudf/cudf/tests/test_label_encode.py   |  2 +-
 python/cudf/cudf/tests/test_monotonic.py      |  2 +-
 python/cudf/cudf/tests/test_numpy_interop.py  |  2 +-
 python/cudf/cudf/tests/test_onehot.py         |  4 +-
 python/cudf/cudf/tests/test_pack.py           |  2 +-
 python/cudf/cudf/tests/test_pandas_interop.py |  2 +-
 python/cudf/cudf/tests/test_pickling.py       |  2 +-
 python/cudf/cudf/tests/test_query.py          |  2 +-
 python/cudf/cudf/tests/test_rank.py           |  2 +-
 python/cudf/cudf/tests/test_reductions.py     |  2 +-
 python/cudf/cudf/tests/test_sorting.py        |  2 +-
 python/cudf/cudf/tests/test_sparse_df.py      |  2 +-
 python/cudf/cudf/tests/test_transform.py      |  2 +-
 python/cudf/cudf/tests/test_udf_binops.py     |  3 +-
 python/cudf/cudf/tests/test_unaops.py         |  2 +-
 python/cudf/cudf/utils/dtypes.py              | 90 ++++++++-----------
 python/cudf/cudf/utils/utils.py               |  2 +-
 python/dask_cudf/dask_cudf/backends.py        |  4 +-
 54 files changed, 318 insertions(+), 355 deletions(-)

diff --git a/python/cudf/cudf/__init__.py b/python/cudf/cudf/__init__.py
index 23621f1e315..55145aaa166 100644
--- a/python/cudf/cudf/__init__.py
+++ b/python/cudf/cudf/__init__.py
@@ -8,6 +8,7 @@
 
 import rmm
 
+from cudf.api.types import dtype
 from cudf import core, datasets, testing
 from cudf._version import get_versions
 from cudf.api.extensions import (
@@ -15,35 +16,35 @@
     register_index_accessor,
     register_series_accessor,
 )
-from cudf.api.types import dtype
-from cudf.core import (
+from cudf.core.scalar import (
     NA,
+    Scalar,
+)
+from cudf.core.index import (
     BaseIndex,
     CategoricalIndex,
-    DataFrame,
     DatetimeIndex,
     Float32Index,
     Float64Index,
     Index,
+    GenericIndex,
     Int8Index,
     Int16Index,
     Int32Index,
     Int64Index,
     IntervalIndex,
-    MultiIndex,
     RangeIndex,
-    Scalar,
-    Series,
     TimedeltaIndex,
     UInt8Index,
     UInt16Index,
     UInt32Index,
     UInt64Index,
-    cut,
-    from_pandas,
     interval_range,
-    merge,
 )
+from cudf.core.dataframe import DataFrame, from_pandas, merge
+from cudf.core.series import Series
+from cudf.core.multiindex import MultiIndex
+from cudf.core.cut import cut
 from cudf.core.algorithms import factorize
 from cudf.core.dtypes import (
     CategoricalDtype,
diff --git a/python/cudf/cudf/_fuzz_testing/utils.py b/python/cudf/cudf/_fuzz_testing/utils.py
index fe9ed4d4934..83ab02351f2 100644
--- a/python/cudf/cudf/_fuzz_testing/utils.py
+++ b/python/cudf/cudf/_fuzz_testing/utils.py
@@ -18,44 +18,44 @@
 ALL_POSSIBLE_VALUES = "ALL_POSSIBLE_VALUES"
 
 _PANDAS_TO_AVRO_SCHEMA_MAP = {
-    np.dtype("int8"): "int",
+    cudf.dtype("int8"): "int",
     pd.Int8Dtype(): ["int", "null"],
     pd.Int16Dtype(): ["int", "null"],
     pd.Int32Dtype(): ["int", "null"],
     pd.Int64Dtype(): ["long", "null"],
     pd.BooleanDtype(): ["boolean", "null"],
     pd.StringDtype(): ["string", "null"],
-    np.dtype("bool_"): "boolean",
-    np.dtype("int16"): "int",
-    np.dtype("int32"): "int",
-    np.dtype("int64"): "long",
-    np.dtype("O"): "string",
-    np.dtype("str"): "string",
-    np.dtype("float32"): "float",
-    np.dtype("float64"): "double",
-    np.dtype("<M8[ns]"): {"type": "long", "logicalType": "timestamp-millis"},
-    np.dtype("<M8[ms]"): {"type": "long", "logicalType": "timestamp-millis"},
-    np.dtype("<M8[us]"): {"type": "long", "logicalType": "timestamp-micros"},
+    cudf.dtype("bool_"): "boolean",
+    cudf.dtype("int16"): "int",
+    cudf.dtype("int32"): "int",
+    cudf.dtype("int64"): "long",
+    cudf.dtype("O"): "string",
+    cudf.dtype("str"): "string",
+    cudf.dtype("float32"): "float",
+    cudf.dtype("float64"): "double",
+    cudf.dtype("<M8[ns]"): {"type": "long", "logicalType": "timestamp-millis"},
+    cudf.dtype("<M8[ms]"): {"type": "long", "logicalType": "timestamp-millis"},
+    cudf.dtype("<M8[us]"): {"type": "long", "logicalType": "timestamp-micros"},
 }
 
 PANDAS_TO_ORC_TYPES = {
-    np.dtype("int8"): pyorc.TinyInt(),
+    cudf.dtype("int8"): pyorc.TinyInt(),
     pd.Int8Dtype(): pyorc.TinyInt(),
     pd.Int16Dtype(): pyorc.SmallInt(),
     pd.Int32Dtype(): pyorc.Int(),
     pd.Int64Dtype(): pyorc.BigInt(),
     pd.BooleanDtype(): pyorc.Boolean(),
-    np.dtype("bool_"): pyorc.Boolean(),
-    np.dtype("int16"): pyorc.SmallInt(),
-    np.dtype("int32"): pyorc.Int(),
-    np.dtype("int64"): pyorc.BigInt(),
-    np.dtype("O"): pyorc.String(),
+    cudf.dtype("bool_"): pyorc.Boolean(),
+    cudf.dtype("int16"): pyorc.SmallInt(),
+    cudf.dtype("int32"): pyorc.Int(),
+    cudf.dtype("int64"): pyorc.BigInt(),
+    cudf.dtype("O"): pyorc.String(),
     pd.StringDtype(): pyorc.String(),
-    np.dtype("float32"): pyorc.Float(),
-    np.dtype("float64"): pyorc.Double(),
-    np.dtype("<M8[ns]"): pyorc.Timestamp(),
-    np.dtype("<M8[ms]"): pyorc.Timestamp(),
-    np.dtype("<M8[us]"): pyorc.Timestamp(),
+    cudf.dtype("float32"): pyorc.Float(),
+    cudf.dtype("float64"): pyorc.Double(),
+    cudf.dtype("<M8[ns]"): pyorc.Timestamp(),
+    cudf.dtype("<M8[ms]"): pyorc.Timestamp(),
+    cudf.dtype("<M8[us]"): pyorc.Timestamp(),
 }
 
 ORC_TO_PANDAS_TYPES = {
@@ -64,10 +64,10 @@
     pyorc.Boolean().name: pd.BooleanDtype(),
     pyorc.SmallInt().name: pd.Int16Dtype(),
     pyorc.BigInt().name: pd.Int64Dtype(),
-    pyorc.String().name: np.dtype("O"),
-    pyorc.Float().name: np.dtype("float32"),
-    pyorc.Double().name: np.dtype("float64"),
-    pyorc.Timestamp().name: np.dtype("<M8[ns]"),
+    pyorc.String().name: cudf.dtype("O"),
+    pyorc.Float().name: cudf.dtype("float32"),
+    pyorc.Double().name: cudf.dtype("float64"),
+    pyorc.Timestamp().name: cudf.dtype("<M8[ns]"),
 }
 
 
diff --git a/python/cudf/cudf/_lib/__init__.py b/python/cudf/cudf/_lib/__init__.py
index 0293518a5d9..02f0444e413 100644
--- a/python/cudf/cudf/_lib/__init__.py
+++ b/python/cudf/cudf/_lib/__init__.py
@@ -15,6 +15,7 @@
     interop,
     join,
     json,
+    labeling,
     merge,
     null_mask,
     nvtext,
diff --git a/python/cudf/cudf/_lib/aggregation.pyx b/python/cudf/cudf/_lib/aggregation.pyx
index 4c94452c73d..072bc99eee2 100644
--- a/python/cudf/cudf/_lib/aggregation.pyx
+++ b/python/cudf/cudf/_lib/aggregation.pyx
@@ -27,6 +27,8 @@ from cudf._lib.types import Interpolation
 cimport cudf._lib.cpp.aggregation as libcudf_aggregation
 cimport cudf._lib.cpp.types as libcudf_types
 
+import cudf
+
 
 class AggregationKind(Enum):
     SUM = libcudf_aggregation.aggregation.Kind.SUM
@@ -251,7 +253,7 @@ cdef class Aggregation:
         nb_type = numpy_support.from_dtype(kwargs['dtype'])
         type_signature = (nb_type[:],)
         compiled_op = cudautils.compile_udf(op, type_signature)
-        output_np_dtype = np.dtype(compiled_op[1])
+        output_np_dtype = cudf.dtype(compiled_op[1])
         cpp_str = compiled_op[0].encode('UTF-8')
         if output_np_dtype not in np_to_cudf_types:
             raise TypeError(
@@ -395,7 +397,7 @@ cdef class RollingAggregation:
         nb_type = numpy_support.from_dtype(kwargs['dtype'])
         type_signature = (nb_type[:],)
         compiled_op = cudautils.compile_udf(op, type_signature)
-        output_np_dtype = np.dtype(compiled_op[1])
+        output_np_dtype = cudf.dtype(compiled_op[1])
         cpp_str = compiled_op[0].encode('UTF-8')
         if output_np_dtype not in np_to_cudf_types:
             raise TypeError(
diff --git a/python/cudf/cudf/_lib/binaryop.pyx b/python/cudf/cudf/_lib/binaryop.pyx
index e8305ecaf2d..7e0be09236f 100644
--- a/python/cudf/cudf/_lib/binaryop.pyx
+++ b/python/cudf/cudf/_lib/binaryop.pyx
@@ -28,6 +28,7 @@ from cudf.utils.dtypes import is_scalar, is_string_dtype
 
 cimport cudf._lib.cpp.binaryop as cpp_binaryop
 from cudf._lib.cpp.binaryop cimport binary_operator
+import cudf
 
 
 class BinaryOperation(IntEnum):
@@ -211,7 +212,7 @@ def binaryop_udf(Column lhs, Column rhs, udf_ptx, dtype):
     cdef type_id tid = (
         <type_id> (
             <underlying_type_t_type_id> (
-                np_to_cudf_types[np.dtype(dtype)]
+                np_to_cudf_types[cudf.dtype(dtype)]
             )
         )
     )
diff --git a/python/cudf/cudf/_lib/copying.pyx b/python/cudf/cudf/_lib/copying.pyx
index fb58bf96098..a5789e4d0ae 100644
--- a/python/cudf/cudf/_lib/copying.pyx
+++ b/python/cudf/cudf/_lib/copying.pyx
@@ -787,7 +787,7 @@ cdef class _CPackedColumns:
         """
         Construct a ``PackedColumns`` object from a ``cudf.DataFrame``.
         """
-        from cudf.core import RangeIndex, dtypes
+        from cudf import RangeIndex, dtypes
 
         cdef _CPackedColumns p = _CPackedColumns.__new__(_CPackedColumns)
 
diff --git a/python/cudf/cudf/_lib/orc.pyx b/python/cudf/cudf/_lib/orc.pyx
index e15b569ed85..ef392b164a0 100644
--- a/python/cudf/cudf/_lib/orc.pyx
+++ b/python/cudf/cudf/_lib/orc.pyx
@@ -97,7 +97,7 @@ cpdef read_orc(object filepaths_or_buffers,
             if timestamp_type is None else
             <type_id>(
                 <underlying_type_t_type_id> (
-                    np_to_cudf_types[np.dtype(timestamp_type)]
+                    np_to_cudf_types[cudf.dtype(timestamp_type)]
                 )
             )
         ),
diff --git a/python/cudf/cudf/_lib/parquet.pyx b/python/cudf/cudf/_lib/parquet.pyx
index 471aa3107d9..95ae2202f68 100644
--- a/python/cudf/cudf/_lib/parquet.pyx
+++ b/python/cudf/cudf/_lib/parquet.pyx
@@ -199,7 +199,7 @@ cpdef read_parquet(filepaths_or_buffers, columns=None, row_groups=None,
             meta_dtype = cols_dtype_map.get(col, None)
             df._data[col] = cudf.core.column.column_empty(
                 row_count=0,
-                dtype=np.dtype(meta_dtype)
+                dtype=cudf.dtype(meta_dtype)
             )
 
     # Set the index column
diff --git a/python/cudf/cudf/_lib/scalar.pyx b/python/cudf/cudf/_lib/scalar.pyx
index cf1d577bd8f..95fa5d4d20d 100644
--- a/python/cudf/cudf/_lib/scalar.pyx
+++ b/python/cudf/cudf/_lib/scalar.pyx
@@ -81,7 +81,7 @@ cdef class DeviceScalar:
         dtype : dtype
             A NumPy dtype.
         """
-        self._dtype = dtype if dtype.kind != 'U' else np.dtype('object')
+        self._dtype = dtype if dtype.kind != 'U' else cudf.dtype('object')
         self._set_value(value, self._dtype)
 
     def _set_value(self, value, dtype):
@@ -560,7 +560,7 @@ def _is_null_host_scalar(slr):
 def _create_proxy_nat_scalar(dtype):
     cdef DeviceScalar result = DeviceScalar.__new__(DeviceScalar)
 
-    dtype = np.dtype(dtype)
+    dtype = cudf.dtype(dtype)
     if dtype.char in 'mM':
         nat = dtype.type('NaT').astype(dtype)
         if dtype.type == np.datetime64:
diff --git a/python/cudf/cudf/_lib/string_casting.pyx b/python/cudf/cudf/_lib/string_casting.pyx
index 8f65cc9fee5..8d7e307c5fb 100644
--- a/python/cudf/cudf/_lib/string_casting.pyx
+++ b/python/cudf/cudf/_lib/string_casting.pyx
@@ -56,6 +56,8 @@ from cudf._lib.cpp.strings.convert.convert_urls cimport (
 )
 from cudf._lib.cpp.types cimport data_type, type_id
 
+import cudf
+
 
 def floating_to_string(Column input_col):
     cdef column_view input_column_view = input_col.view()
@@ -115,7 +117,7 @@ def stod(Column input_col, **kwargs):
     A Column with strings cast to double
     """
 
-    return string_to_floating(input_col, np.dtype("float64"))
+    return string_to_floating(input_col, cudf.dtype("float64"))
 
 
 def ftos(Column input_col):
@@ -147,7 +149,7 @@ def stof(Column input_col, **kwargs):
     A Column with strings cast to float
     """
 
-    return string_to_floating(input_col, np.dtype("float32"))
+    return string_to_floating(input_col, cudf.dtype("float32"))
 
 
 def integer_to_string(Column input_col):
@@ -208,7 +210,7 @@ def stoi8(Column input_col, **kwargs):
     A Column with strings cast to int8
     """
 
-    return string_to_integer(input_col, np.dtype("int8"))
+    return string_to_integer(input_col, cudf.dtype("int8"))
 
 
 def i16tos(Column input_col):
@@ -240,7 +242,7 @@ def stoi16(Column input_col):
     A Column with strings cast to int16
     """
 
-    return string_to_integer(input_col, np.dtype("int16"))
+    return string_to_integer(input_col, cudf.dtype("int16"))
 
 
 def itos(Column input_col):
@@ -272,7 +274,7 @@ def stoi(Column input_col):
     A Column with strings cast to int32
     """
 
-    return string_to_integer(input_col, np.dtype("int32"))
+    return string_to_integer(input_col, cudf.dtype("int32"))
 
 
 def ltos(Column input_col):
@@ -304,7 +306,7 @@ def stol(Column input_col, **kwargs):
     A Column with strings cast to int64
     """
 
-    return string_to_integer(input_col, np.dtype("int64"))
+    return string_to_integer(input_col, cudf.dtype("int64"))
 
 
 def ui8tos(Column input_col):
@@ -336,7 +338,7 @@ def stoui8(Column input_col, **kwargs):
     A Column with strings cast to uint8
     """
 
-    return string_to_integer(input_col, np.dtype("uint8"))
+    return string_to_integer(input_col, cudf.dtype("uint8"))
 
 
 def ui16tos(Column input_col):
@@ -368,7 +370,7 @@ def stoui16(Column input_col, **kwargs):
     A Column with strings cast to uint16
     """
 
-    return string_to_integer(input_col, np.dtype("uint16"))
+    return string_to_integer(input_col, cudf.dtype("uint16"))
 
 
 def uitos(Column input_col):
@@ -400,7 +402,7 @@ def stoui(Column input_col, **kwargs):
     A Column with strings cast to uint32
     """
 
-    return string_to_integer(input_col, np.dtype("uint32"))
+    return string_to_integer(input_col, cudf.dtype("uint32"))
 
 
 def ultos(Column input_col):
@@ -432,7 +434,7 @@ def stoul(Column input_col, **kwargs):
     A Column with strings cast to uint64
     """
 
-    return string_to_integer(input_col, np.dtype("uint64"))
+    return string_to_integer(input_col, cudf.dtype("uint64"))
 
 
 def _to_booleans(Column input_col, object string_true="True"):
@@ -745,7 +747,7 @@ def htoi(Column input_col, **kwargs):
     cdef column_view input_column_view = input_col.view()
     cdef type_id tid = <type_id> (
         <underlying_type_t_type_id> (
-            np_to_cudf_types[kwargs.get('dtype', np.dtype("int64"))]
+            np_to_cudf_types[kwargs.get('dtype', cudf.dtype("int64"))]
         )
     )
     cdef data_type c_out_type = data_type(tid)
diff --git a/python/cudf/cudf/_lib/transform.pyx b/python/cudf/cudf/_lib/transform.pyx
index 63abdb8314c..67fc1c441b0 100644
--- a/python/cudf/cudf/_lib/transform.pyx
+++ b/python/cudf/cudf/_lib/transform.pyx
@@ -58,8 +58,9 @@ def mask_to_bools(object mask_buffer, size_type begin_bit, size_type end_bit):
     Given a mask buffer, returns a boolean column representng bit 0 -> False
     and 1 -> True within range of [begin_bit, end_bit),
     """
-    if not isinstance(mask_buffer, cudf.core.Buffer):
-        raise TypeError("mask_buffer is not an instance of cudf.core.Buffer")
+    if not isinstance(mask_buffer, cudf.core.buffer.Buffer):
+        raise TypeError("mask_buffer is not an instance of "
+                        "cudf.core.buffer. Buffer")
     cdef bitmask_type* bit_mask = <bitmask_type*><uintptr_t>(mask_buffer.ptr)
 
     cdef unique_ptr[column] result
@@ -98,7 +99,7 @@ def transform(Column input, op):
     nb_signature = (nb_type,)
     compiled_op = cudautils.compile_udf(op, nb_signature)
     c_str = compiled_op[0].encode('UTF-8')
-    np_dtype = np.dtype(compiled_op[1])
+    np_dtype = cudf.dtype(compiled_op[1])
 
     try:
         c_tid = <type_id> (
diff --git a/python/cudf/cudf/_lib/types.pyx b/python/cudf/cudf/_lib/types.pyx
index d93e1b75376..d3a4c45f213 100644
--- a/python/cudf/cudf/_lib/types.pyx
+++ b/python/cudf/cudf/_lib/types.pyx
@@ -30,6 +30,7 @@ from cudf.utils.dtypes import (
 )
 
 cimport cudf._lib.cpp.types as libcudf_types
+import cudf
 
 
 class TypeId(IntEnum):
@@ -188,11 +189,11 @@ cdef dtype_from_lists_column_view(column_view cv):
     cdef column_view child = lv.get()[0].child()
 
     if child.type().id() == libcudf_types.type_id.LIST:
-        return ListDtype(dtype_from_lists_column_view(child))
+        return cudf.ListDtype(dtype_from_lists_column_view(child))
     elif child.type().id() == libcudf_types.type_id.EMPTY:
-        return ListDtype(np.dtype("int8"))
+        return cudf.ListDtype("int8")
     else:
-        return ListDtype(
+        return cudf.ListDtype(
             dtype_from_column_view(child)
         )
 
@@ -201,7 +202,7 @@ cdef dtype_from_structs_column_view(column_view cv):
         str(i): dtype_from_column_view(cv.child(i))
         for i in range(cv.num_children())
     }
-    return StructDtype(fields)
+    return cudf.StructDtype(fields)
 
 cdef dtype_from_column_view(column_view cv):
     cdef libcudf_types.type_id tid = cv.type().id()
@@ -210,26 +211,26 @@ cdef dtype_from_column_view(column_view cv):
     elif tid == libcudf_types.type_id.STRUCT:
         return dtype_from_structs_column_view(cv)
     elif tid == libcudf_types.type_id.DECIMAL64:
-        return Decimal64Dtype(
-            precision=Decimal64Dtype.MAX_PRECISION,
+        return cudf.Decimal64Dtype(
+            precision=cudf.Decimal64Dtype.MAX_PRECISION,
             scale=-cv.type().scale()
         )
     elif tid == libcudf_types.type_id.DECIMAL32:
-        return Decimal32Dtype(
-            precision=Decimal32Dtype.MAX_PRECISION,
+        return cudf.Decimal32Dtype(
+            precision=cudf.Decimal32Dtype.MAX_PRECISION,
             scale=-cv.type().scale()
         )
     else:
         return cudf_to_np_types[<underlying_type_t_type_id>(tid)]
 
 cdef libcudf_types.data_type dtype_to_data_type(dtype) except *:
-    if is_list_dtype(dtype):
+    if cudf.api.types.is_list_dtype(dtype):
         tid = libcudf_types.type_id.LIST
-    elif is_struct_dtype(dtype):
+    elif cudf.api.types.is_struct_dtype(dtype):
         tid = libcudf_types.type_id.STRUCT
-    elif is_decimal64_dtype(dtype):
+    elif cudf.api.types.is_decimal64_dtype(dtype):
         tid = libcudf_types.type_id.DECIMAL64
-    elif is_decimal32_dtype(dtype):
+    elif cudf.api.types.is_decimal32_dtype(dtype):
         tid = libcudf_types.type_id.DECIMAL32
     else:
         tid = <libcudf_types.type_id> (
diff --git a/python/cudf/cudf/api/types.py b/python/cudf/cudf/api/types.py
index 5d495f3e21d..bf296e11178 100644
--- a/python/cudf/cudf/api/types.py
+++ b/python/cudf/cudf/api/types.py
@@ -14,9 +14,9 @@
 from pandas.api import types as pd_types
 
 import cudf
-from cudf._lib.scalar import DeviceScalar
 from cudf.core.dtypes import (  # noqa: F401
     _BaseDtype,
+    dtype,
     is_categorical_dtype,
     is_decimal32_dtype,
     is_decimal64_dtype,
@@ -27,55 +27,6 @@
 )
 
 
-def dtype(arbitrary):
-    """
-    Return the cuDF-supported dtype corresponding to `arbitrary`.
-
-    Inputs
-    ------
-    arbitrary: dtype or scalar-like
-
-    Returns
-    -------
-    dtype: the cuDF-supported dtype that best matches `arbitrary`
-    """
-    # first, try interpreting arbitrary as a NumPy dtype that we support:
-    try:
-        np_dtype = np.dtype(arbitrary)
-        if np_dtype.name == "float16":
-            np_dtype = np.dtype("float32")
-        elif np_dtype.kind in ("OU"):
-            np_dtype = np.dtype("object")
-    except TypeError:
-        pass
-    else:
-        if np_dtype.kind not in "biufUOMm":
-            raise TypeError(f"Unsupported type {np_dtype}")
-        return np_dtype
-
-    #  next, check if `arbitrary` is one of our extension types:
-    if isinstance(arbitrary, cudf.core.dtypes._BaseDtype):
-        return arbitrary
-
-    # use `pandas_dtype` to try and interpret
-    # `arbitrary` as a Pandas extension type.
-    #  Return the corresponding NumPy/cuDF type.
-    pd_dtype = pd.api.types.pandas_dtype(arbitrary)
-    try:
-        return pd_dtype.numpy_dtype
-    except AttributeError:
-        if isinstance(pd_dtype, pd.CategoricalDtype):
-            return cudf.CategoricalDtype.from_pandas(pd_dtype)
-        elif isinstance(pd_dtype, pd.StringDtype):
-            return np.dtype("object")
-        elif isinstance(pd_dtype, pd.IntervalDtype):
-            return cudf.IntervalDtype.from_pandas(pd_dtype)
-        else:
-            raise TypeError(
-                f"Cannot interpret {arbitrary} as a valid cuDF dtype"
-            )
-
-
 def is_numeric_dtype(obj):
     """Check whether the provided array or dtype is of a numeric dtype.
 
@@ -173,7 +124,7 @@ def is_scalar(val):
         Return True if given object is scalar.
     """
     return (
-        isinstance(val, DeviceScalar)
+        isinstance(val, cudf._lib.scalar.DeviceScalar)
         or isinstance(val, cudf.Scalar)
         or isinstance(val, cudf.core.tools.datetimes.DateOffset)
         or pd_types.is_scalar(val)
diff --git a/python/cudf/cudf/comm/gpuarrow.py b/python/cudf/cudf/comm/gpuarrow.py
index 451572224c6..85b4bf20e5c 100644
--- a/python/cudf/cudf/comm/gpuarrow.py
+++ b/python/cudf/cudf/comm/gpuarrow.py
@@ -6,10 +6,11 @@
 import pandas as pd
 import pyarrow as pa
 
+from cudf import Series
 from cudf._lib.gpuarrow import (
     CudaRecordBatchStreamReader as _CudaRecordBatchStreamReader,
 )
-from cudf.core import Series, column
+from cudf.core import column
 from cudf.utils.utils import mask_bitsize, mask_dtype
 
 
diff --git a/python/cudf/cudf/core/__init__.py b/python/cudf/cudf/core/__init__.py
index 5eaa5b52fd4..ec4878b332d 100644
--- a/python/cudf/cudf/core/__init__.py
+++ b/python/cudf/cudf/core/__init__.py
@@ -1,31 +1 @@
 # Copyright (c) 2018-2021, NVIDIA CORPORATION.
-
-from cudf.core import _internals, buffer, column, column_accessor, common
-from cudf.core.buffer import Buffer
-from cudf.core.dataframe import DataFrame, from_pandas, merge
-from cudf.core.index import (
-    BaseIndex,
-    CategoricalIndex,
-    DatetimeIndex,
-    Float32Index,
-    Float64Index,
-    GenericIndex,
-    Index,
-    Int8Index,
-    Int16Index,
-    Int32Index,
-    Int64Index,
-    IntervalIndex,
-    RangeIndex,
-    TimedeltaIndex,
-    UInt8Index,
-    UInt16Index,
-    UInt32Index,
-    UInt64Index,
-    interval_range,
-)
-from cudf.core.multiindex import MultiIndex
-from cudf.core.scalar import NA, Scalar
-from cudf.core.series import Series
-import cudf.core.udf
-from cudf.core.cut import cut
diff --git a/python/cudf/cudf/core/_internals/__init__.py b/python/cudf/cudf/core/_internals/__init__.py
index 53d186def85..6faeeffdbec 100644
--- a/python/cudf/cudf/core/_internals/__init__.py
+++ b/python/cudf/cudf/core/_internals/__init__.py
@@ -1,3 +1 @@
 # Copyright (c) 2021, NVIDIA CORPORATION.
-
-from cudf.core._internals.where import where
diff --git a/python/cudf/cudf/core/buffer.py b/python/cudf/cudf/core/buffer.py
index c6875052685..bb121023a68 100644
--- a/python/cudf/cudf/core/buffer.py
+++ b/python/cudf/cudf/core/buffer.py
@@ -11,6 +11,7 @@
 import rmm
 from rmm import DeviceBuffer
 
+import cudf
 from cudf.core.abc import Serializable
 
 
@@ -157,7 +158,7 @@ def _buffer_data_from_array_interface(array_interface):
     ptr = array_interface["data"][0]
     if ptr is None:
         ptr = 0
-    itemsize = np.dtype(array_interface["typestr"]).itemsize
+    itemsize = cudf.dtype(array_interface["typestr"]).itemsize
     shape = (
         array_interface["shape"] if len(array_interface["shape"]) > 0 else (1,)
     )
@@ -168,7 +169,7 @@ def _buffer_data_from_array_interface(array_interface):
 def confirm_1d_contiguous(array_interface):
     strides = array_interface["strides"]
     shape = array_interface["shape"]
-    itemsize = np.dtype(array_interface["typestr"]).itemsize
+    itemsize = cudf.dtype(array_interface["typestr"]).itemsize
     typestr = array_interface["typestr"]
     if typestr not in ("|i1", "|u1"):
         raise TypeError("Buffer data must be of uint8 type")
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 5f4f5702cd6..7c1eeb06a98 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -64,7 +64,6 @@
 )
 from cudf.utils import ioutils, utils
 from cudf.utils.dtypes import (
-    check_cast_unsupported_dtype,
     cudf_dtype_from_pa_type,
     get_time_unit,
     min_unsigned_type,
@@ -241,7 +240,12 @@ def from_arrow(cls, array: pa.Array) -> ColumnBase:
         """
         if not isinstance(array, (pa.Array, pa.ChunkedArray)):
             raise TypeError("array should be PyArrow array or chunked array")
+
+        if array.type == pa.float16():
+            array = pa.Array.from_pandas(array.to_numpy().astype("float32"))
+
         data = pa.table([array], [None])
+
         if isinstance(array.type, pa.DictionaryType):
             indices_table = pa.table(
                 {
@@ -500,7 +504,10 @@ def slice(self, start: int, stop: int, stride: int = None) -> ColumnBase:
         else:
             # Need to create a gather map for given slice with stride
             gather_map = arange(
-                start=start, stop=stop, step=stride, dtype=np.dtype(np.int32),
+                start=start,
+                stop=stop,
+                step=stride,
+                dtype=cudf.dtype(np.int32),
             )
             return self.take(gather_map)
 
@@ -543,7 +550,7 @@ def __setitem__(self, key: Any, value: Any):
                     start=key_start,
                     stop=key_stop,
                     step=key_stride,
-                    dtype=np.dtype(np.int32),
+                    dtype=cudf.dtype(np.int32),
                 )
                 nelem = len(key)
             else:
@@ -950,7 +957,7 @@ def as_categorical_column(self, dtype, **kwargs) -> ColumnBase:
             cats = cats._column.dropna(drop_nan=False)
             min_type = min_unsigned_type(len(cats), 8)
             labels = labels - 1
-            if np.dtype(min_type).itemsize < labels.dtype.itemsize:
+            if cudf.dtype(min_type).itemsize < labels.dtype.itemsize:
                 labels = labels.astype(min_type)
 
         return build_categorical_column(
@@ -1311,7 +1318,7 @@ def column_empty(
         data = None
         children = (
             build_column(
-                data=Buffer.empty(row_count * np.dtype("int32").itemsize),
+                data=Buffer.empty(row_count * cudf.dtype("int32").itemsize),
                 dtype="int32",
             ),
         )
@@ -1320,7 +1327,7 @@ def column_empty(
         children = (
             full(row_count + 1, 0, dtype="int32"),
             build_column(
-                data=Buffer.empty(row_count * np.dtype("int8").itemsize),
+                data=Buffer.empty(row_count * cudf.dtype("int8").itemsize),
                 dtype="int8",
             ),
         )
@@ -1719,9 +1726,9 @@ def as_column(
 
     elif hasattr(arbitrary, "__cuda_array_interface__"):
         desc = arbitrary.__cuda_array_interface__
-        current_dtype = np.dtype(desc["typestr"])
+        current_dtype = cudf.dtype(desc["typestr"])
 
-        arb_dtype = check_cast_unsupported_dtype(current_dtype)
+        arb_dtype = cudf.dtype(current_dtype)
 
         if desc.get("mask", None) is not None:
             # Extract and remove the mask from arbitrary before
@@ -1768,9 +1775,9 @@ def as_column(
         col = ColumnBase.from_arrow(arbitrary)
         if isinstance(arbitrary, pa.NullArray):
             if type(dtype) == str and dtype == "empty":
-                new_dtype = np.dtype(arbitrary.type.to_pandas_dtype())
+                new_dtype = cudf.dtype(arbitrary.type.to_pandas_dtype())
             else:
-                new_dtype = np.dtype(dtype)
+                new_dtype = cudf.dtype(dtype)
             col = col.astype(new_dtype)
 
         return col
@@ -1787,7 +1794,7 @@ def as_column(
         elif arbitrary.dtype == np.bool_:
             data = as_column(cupy.asarray(arbitrary), dtype=arbitrary.dtype)
         elif arbitrary.dtype.kind in ("f"):
-            arb_dtype = check_cast_unsupported_dtype(arbitrary.dtype)
+            arb_dtype = cudf.dtype(arbitrary.dtype)
             data = as_column(
                 cupy.asarray(arbitrary, dtype=arb_dtype),
                 nan_as_null=nan_as_null,
@@ -1825,7 +1832,7 @@ def as_column(
         ):
             arbitrary = None
             if dtype is None:
-                dtype = np.dtype("float64")
+                dtype = cudf.dtype("float64")
 
         data = as_column(
             utils.scalar_broadcast_to(arbitrary, length, dtype=dtype)
@@ -1840,7 +1847,7 @@ def as_column(
         # CUDF assumes values are always contiguous
         desc = arbitrary.__array_interface__
         shape = desc["shape"]
-        arb_dtype = np.dtype(desc["typestr"])
+        arb_dtype = cudf.dtype(desc["typestr"])
         # CUDF assumes values are always contiguous
         if len(shape) > 1:
             raise ValueError("Data must be 1-dimensional")
@@ -1872,7 +1879,7 @@ def as_column(
             cast_dtype = time_unit in ("D", "W", "M", "Y")
 
             if cast_dtype:
-                arbitrary = arbitrary.astype(np.dtype("datetime64[s]"))
+                arbitrary = arbitrary.astype(cudf.dtype("datetime64[s]"))
 
             buffer = Buffer(arbitrary.view("|u1"))
             mask = None
@@ -1892,7 +1899,7 @@ def as_column(
             cast_dtype = time_unit in ("D", "W", "M", "Y")
 
             if cast_dtype:
-                arbitrary = arbitrary.astype(np.dtype("timedelta64[s]"))
+                arbitrary = arbitrary.astype(cudf.dtype("timedelta64[s]"))
 
             buffer = Buffer(arbitrary.view("|u1"))
             mask = None
@@ -1931,9 +1938,7 @@ def as_column(
             if dtype is not None:
                 data = data.astype(dtype)
         elif arb_dtype.kind in ("f"):
-            arb_dtype = check_cast_unsupported_dtype(
-                arb_dtype if dtype is None else dtype
-            )
+            arb_dtype = cudf.dtype(arb_dtype if dtype is None else dtype)
             data = as_column(
                 cupy.asarray(arbitrary, dtype=arb_dtype),
                 nan_as_null=nan_as_null,
@@ -1946,9 +1951,9 @@ def as_column(
             arb_dtype = arbitrary.dtype
         else:
             if arbitrary.dtype == pd.StringDtype():
-                arb_dtype = np.dtype("O")
+                arb_dtype = cudf.dtype("O")
             else:
-                arb_dtype = check_cast_unsupported_dtype(arbitrary.dtype)
+                arb_dtype = cudf.dtype(arbitrary.dtype)
                 if arb_dtype != arbitrary.dtype.numpy_dtype:
                     arbitrary = arbitrary.astype(arb_dtype)
         if (
@@ -2100,7 +2105,7 @@ def _construct_array(
             arbitrary,
             dtype=native_dtype
             if native_dtype is None
-            else np.dtype(native_dtype),
+            else cudf.dtype(native_dtype),
         )
     return arbitrary
 
@@ -2109,7 +2114,7 @@ def _data_from_cuda_array_interface_desc(obj) -> Buffer:
     desc = obj.__cuda_array_interface__
     ptr = desc["data"][0]
     nelem = desc["shape"][0] if len(desc["shape"]) > 0 else 1
-    dtype = np.dtype(desc["typestr"])
+    dtype = cudf.dtype(desc["typestr"])
 
     data = Buffer(data=ptr, size=nelem * dtype.itemsize, owner=obj)
     return data
diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py
index 4d99308d128..bf5bba9d288 100644
--- a/python/cudf/cudf/core/column/datetime.py
+++ b/python/cudf/cudf/core/column/datetime.py
@@ -264,7 +264,7 @@ def as_string_column(
             )
         if len(self) > 0:
             return string._datetime_to_str_typecast_functions[
-                np.dtype(self.dtype)
+                cudf.dtype(self.dtype)
             ](self, format)
         else:
             return cast(
@@ -316,7 +316,7 @@ def binary_operator(
             return rhs._datetime_binop(self, op, reflect=reflect)
         lhs: Union[ScalarLike, ColumnBase] = self
         if op in ("eq", "ne", "lt", "gt", "le", "ge", "NULL_EQUALS"):
-            out_dtype = np.dtype(np.bool_)  # type: Dtype
+            out_dtype = cudf.dtype(np.bool_)  # type: Dtype
         elif op == "add" and pd.api.types.is_timedelta64_dtype(rhs.dtype):
             out_dtype = cudf.core.column.timedelta._timedelta_add_result_dtype(
                 rhs, lhs
@@ -389,13 +389,13 @@ def can_cast_safely(self, to_dtype: Dtype) -> bool:
             to_res, _ = np.datetime_data(to_dtype)
             self_res, _ = np.datetime_data(self.dtype)
 
-            max_int = np.iinfo(np.dtype("int64")).max
+            max_int = np.iinfo(cudf.dtype("int64")).max
 
             max_dist = np.timedelta64(
-                self.max().astype(np.dtype("int64"), copy=False), self_res
+                self.max().astype(cudf.dtype("int64"), copy=False), self_res
             )
             min_dist = np.timedelta64(
-                self.min().astype(np.dtype("int64"), copy=False), self_res
+                self.min().astype(cudf.dtype("int64"), copy=False), self_res
             )
 
             self_delta_dtype = np.timedelta64(0, self_res).dtype
@@ -408,7 +408,7 @@ def can_cast_safely(self, to_dtype: Dtype) -> bool:
                 return True
             else:
                 return False
-        elif to_dtype == np.dtype("int64") or to_dtype == np.dtype("O"):
+        elif to_dtype == cudf.dtype("int64") or to_dtype == cudf.dtype("O"):
             # can safely cast to representation, or string
             return True
         else:
diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py
index 0955039dafd..03033f86ffc 100644
--- a/python/cudf/cudf/core/column/numerical.py
+++ b/python/cudf/cudf/core/column/numerical.py
@@ -121,14 +121,14 @@ def binary_operator(
         self, binop: str, rhs: BinaryOperand, reflect: bool = False,
     ) -> ColumnBase:
         int_dtypes = [
-            np.dtype("int8"),
-            np.dtype("int16"),
-            np.dtype("int32"),
-            np.dtype("int64"),
-            np.dtype("uint8"),
-            np.dtype("uint16"),
-            np.dtype("uint32"),
-            np.dtype("uint64"),
+            cudf.dtype("int8"),
+            cudf.dtype("int16"),
+            cudf.dtype("int32"),
+            cudf.dtype("int64"),
+            cudf.dtype("uint8"),
+            cudf.dtype("uint16"),
+            cudf.dtype("uint32"),
+            cudf.dtype("uint64"),
         ]
         if rhs is None:
             out_dtype = self.dtype
@@ -158,7 +158,7 @@ def binary_operator(
                     (np.isscalar(tmp) and (0 == tmp))
                     or ((isinstance(tmp, NumericalColumn)) and (0.0 in tmp))
                 ):
-                    out_dtype = np.dtype("float64")
+                    out_dtype = cudf.dtype("float64")
 
         if binop in {"lt", "gt", "le", "ge", "eq", "ne", "NULL_EQUALS"}:
             out_dtype = "bool"
@@ -183,13 +183,13 @@ def normalize_binop_value(
             if isinstance(other, cudf.Scalar):
                 return other
             other_dtype = np.promote_types(self.dtype, other_dtype)
-            if other_dtype == np.dtype("float16"):
-                other_dtype = np.dtype("float32")
+            if other_dtype == cudf.dtype("float16"):
+                other_dtype = cudf.dtype("float32")
                 other = other_dtype.type(other)
             if self.dtype.kind == "b":
                 other_dtype = min_signed_type(other)
             if np.isscalar(other):
-                other = np.dtype(other_dtype).type(other)
+                other = cudf.dtype(other_dtype).type(other)
                 return other
             else:
                 ary = utils.scalar_broadcast_to(
@@ -202,7 +202,7 @@ def normalize_binop_value(
             raise TypeError(f"cannot broadcast {type(other)}")
 
     def int2ip(self) -> "cudf.core.column.StringColumn":
-        if self.dtype != np.dtype("int64"):
+        if self.dtype != cudf.dtype("int64"):
             raise TypeError("Only int64 type can be converted to ip")
 
         return libcudf.string_casting.int2ip(self)
@@ -212,7 +212,7 @@ def as_string_column(
     ) -> "cudf.core.column.StringColumn":
         if len(self) > 0:
             return string._numeric_to_str_typecast_functions[
-                np.dtype(self.dtype)
+                cudf.dtype(self.dtype)
             ](self)
         else:
             return cast(
diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index 5f38f2c698e..f22aee2fbf3 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -53,47 +53,47 @@ def str_to_boolean(column: StringColumn):
 
 
 _str_to_numeric_typecast_functions = {
-    np.dtype("int8"): str_cast.stoi8,
-    np.dtype("int16"): str_cast.stoi16,
-    np.dtype("int32"): str_cast.stoi,
-    np.dtype("int64"): str_cast.stol,
-    np.dtype("uint8"): str_cast.stoui8,
-    np.dtype("uint16"): str_cast.stoui16,
-    np.dtype("uint32"): str_cast.stoui,
-    np.dtype("uint64"): str_cast.stoul,
-    np.dtype("float32"): str_cast.stof,
-    np.dtype("float64"): str_cast.stod,
-    np.dtype("bool"): str_to_boolean,
+    cudf.dtype("int8"): str_cast.stoi8,
+    cudf.dtype("int16"): str_cast.stoi16,
+    cudf.dtype("int32"): str_cast.stoi,
+    cudf.dtype("int64"): str_cast.stol,
+    cudf.dtype("uint8"): str_cast.stoui8,
+    cudf.dtype("uint16"): str_cast.stoui16,
+    cudf.dtype("uint32"): str_cast.stoui,
+    cudf.dtype("uint64"): str_cast.stoul,
+    cudf.dtype("float32"): str_cast.stof,
+    cudf.dtype("float64"): str_cast.stod,
+    cudf.dtype("bool"): str_to_boolean,
 }
 
 _numeric_to_str_typecast_functions = {
-    np.dtype("int8"): str_cast.i8tos,
-    np.dtype("int16"): str_cast.i16tos,
-    np.dtype("int32"): str_cast.itos,
-    np.dtype("int64"): str_cast.ltos,
-    np.dtype("uint8"): str_cast.ui8tos,
-    np.dtype("uint16"): str_cast.ui16tos,
-    np.dtype("uint32"): str_cast.uitos,
-    np.dtype("uint64"): str_cast.ultos,
-    np.dtype("float32"): str_cast.ftos,
-    np.dtype("float64"): str_cast.dtos,
-    np.dtype("bool"): str_cast.from_booleans,
+    cudf.dtype("int8"): str_cast.i8tos,
+    cudf.dtype("int16"): str_cast.i16tos,
+    cudf.dtype("int32"): str_cast.itos,
+    cudf.dtype("int64"): str_cast.ltos,
+    cudf.dtype("uint8"): str_cast.ui8tos,
+    cudf.dtype("uint16"): str_cast.ui16tos,
+    cudf.dtype("uint32"): str_cast.uitos,
+    cudf.dtype("uint64"): str_cast.ultos,
+    cudf.dtype("float32"): str_cast.ftos,
+    cudf.dtype("float64"): str_cast.dtos,
+    cudf.dtype("bool"): str_cast.from_booleans,
 }
 
 _datetime_to_str_typecast_functions = {
     # TODO: support Date32 UNIX days
-    # np.dtype("datetime64[D]"): str_cast.int2timestamp,
-    np.dtype("datetime64[s]"): str_cast.int2timestamp,
-    np.dtype("datetime64[ms]"): str_cast.int2timestamp,
-    np.dtype("datetime64[us]"): str_cast.int2timestamp,
-    np.dtype("datetime64[ns]"): str_cast.int2timestamp,
+    # cudf.dtype("datetime64[D]"): str_cast.int2timestamp,
+    cudf.dtype("datetime64[s]"): str_cast.int2timestamp,
+    cudf.dtype("datetime64[ms]"): str_cast.int2timestamp,
+    cudf.dtype("datetime64[us]"): str_cast.int2timestamp,
+    cudf.dtype("datetime64[ns]"): str_cast.int2timestamp,
 }
 
 _timedelta_to_str_typecast_functions = {
-    np.dtype("timedelta64[s]"): str_cast.int2timedelta,
-    np.dtype("timedelta64[ms]"): str_cast.int2timedelta,
-    np.dtype("timedelta64[us]"): str_cast.int2timedelta,
-    np.dtype("timedelta64[ns]"): str_cast.int2timedelta,
+    cudf.dtype("timedelta64[s]"): str_cast.int2timedelta,
+    cudf.dtype("timedelta64[ms]"): str_cast.int2timedelta,
+    cudf.dtype("timedelta64[us]"): str_cast.int2timedelta,
+    cudf.dtype("timedelta64[ns]"): str_cast.int2timedelta,
 }
 
 
@@ -4895,7 +4895,7 @@ def __init__(
             Two non-null columns containing the string data and offsets
             respectively
         """
-        dtype = np.dtype("object")
+        dtype = cudf.dtype("object")
 
         if size is None:
             for child in children:
@@ -5240,7 +5240,7 @@ def deserialize(cls, header: dict, frames: list) -> StringColumn:
         return col
 
     def can_cast_safely(self, to_dtype: Dtype) -> bool:
-        to_dtype = np.dtype(to_dtype)
+        to_dtype = cudf.dtype(to_dtype)
 
         if self.dtype == to_dtype:
             return True
diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py
index 2c893fafae7..37bff1907fa 100644
--- a/python/cudf/cudf/core/column/timedelta.py
+++ b/python/cudf/cudf/core/column/timedelta.py
@@ -137,7 +137,7 @@ def _binary_op_floordiv(
                     rhs = cudf.Scalar(None, "float64")
             else:
                 rhs = rhs.astype(common_dtype).astype("float64")
-            out_dtype = np.dtype("int64")
+            out_dtype = cudf.dtype("int64")
         elif rhs.dtype.kind in ("f", "i", "u"):
             out_dtype = self.dtype
         else:
@@ -204,7 +204,7 @@ def _binary_op_truediv(
             else:
                 rhs = rhs.astype(common_dtype).astype("float64")
 
-            out_dtype = np.dtype("float64")
+            out_dtype = cudf.dtype("float64")
         elif rhs.dtype.kind in ("f", "i", "u"):
             out_dtype = self.dtype
         else:
@@ -344,7 +344,7 @@ def as_string_column(
             )
         if len(self) > 0:
             return string._timedelta_to_str_typecast_functions[
-                np.dtype(self.dtype)
+                cudf.dtype(self.dtype)
             ](self, format=format)
         else:
             return cast(
@@ -575,9 +575,9 @@ def nanoseconds(self) -> "cudf.core.column.NumericalColumn":
 
 
 def determine_out_dtype(lhs_dtype: Dtype, rhs_dtype: Dtype) -> Dtype:
-    if np.can_cast(np.dtype(lhs_dtype), np.dtype(rhs_dtype)):
+    if np.can_cast(cudf.dtype(lhs_dtype), cudf.dtype(rhs_dtype)):
         return rhs_dtype
-    elif np.can_cast(np.dtype(rhs_dtype), np.dtype(lhs_dtype)):
+    elif np.can_cast(cudf.dtype(rhs_dtype), cudf.dtype(lhs_dtype)):
         return lhs_dtype
     else:
         raise TypeError(f"Cannot type-cast {lhs_dtype} and {rhs_dtype}")
@@ -594,7 +594,7 @@ def _timedelta_add_result_dtype(
         lhs_unit = units.index(lhs_time_unit)
         rhs_time_unit = cudf.utils.dtypes.get_time_unit(rhs)
         rhs_unit = units.index(rhs_time_unit)
-        out_dtype = np.dtype(f"datetime64[{units[max(lhs_unit, rhs_unit)]}]")
+        out_dtype = cudf.dtype(f"datetime64[{units[max(lhs_unit, rhs_unit)]}]")
     else:
         raise TypeError(
             f"Addition of {lhs.dtype} with {rhs.dtype} "
@@ -619,7 +619,7 @@ def _timedelta_sub_result_dtype(
         lhs_unit = units.index(lhs_time_unit)
         rhs_time_unit = cudf.utils.dtypes.get_time_unit(rhs)
         rhs_unit = units.index(rhs_time_unit)
-        out_dtype = np.dtype(f"datetime64[{units[max(lhs_unit, rhs_unit)]}]")
+        out_dtype = cudf.dtype(f"datetime64[{units[max(lhs_unit, rhs_unit)]}]")
     else:
         raise TypeError(
             f"Subtraction of {lhs.dtype} with {rhs.dtype} "
diff --git a/python/cudf/cudf/core/cut.py b/python/cudf/cudf/core/cut.py
index 7811f477170..91f623a3cd3 100644
--- a/python/cudf/cudf/core/cut.py
+++ b/python/cudf/cudf/core/cut.py
@@ -5,7 +5,6 @@
 import pandas as pd
 
 import cudf
-from cudf._lib.labeling import label_bins
 from cudf.core.column import as_column, build_categorical_column
 from cudf.core.index import IntervalIndex, interval_range
 from cudf.utils.dtypes import is_list_like
@@ -240,7 +239,7 @@ def cut(
         # the input arr must be changed to the same type as the edges
         input_arr = input_arr.astype(left_edges.dtype)
     # get the indexes for the appropriate number
-    index_labels = label_bins(
+    index_labels = cudf._lib.labeling.label_bins(
         input_arr, left_edges, left_inclusive, right_edges, right_inclusive
     )
 
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index bc068413efb..7d2fe5dfb98 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -23,6 +23,7 @@
 from pandas.io.formats.printing import pprint_thing
 
 import cudf
+import cudf.core.common
 from cudf import _lib as libcudf
 from cudf.api.types import is_bool_dtype, is_dict_like
 from cudf.core import column, reshape
@@ -3533,12 +3534,12 @@ def as_gpu_matrix(self, columns=None, order="F"):
         if ncol < 1:
             # This is the case for empty dataframe - construct empty cupy array
             matrix = cupy.empty(
-                shape=(0, 0), dtype=np.dtype("float64"), order=order
+                shape=(0, 0), dtype=cudf.dtype("float64"), order=order
             )
             return cuda.as_cuda_array(matrix)
 
         if any(
-            (is_categorical_dtype(c) or np.issubdtype(c, np.dtype("object")))
+            (is_categorical_dtype(c) or np.issubdtype(c, cudf.dtype("object")))
             for c in cols
         ):
             raise TypeError("non-numeric data not yet supported")
@@ -3552,7 +3553,7 @@ def as_gpu_matrix(self, columns=None, order="F"):
                 )
         cupy_dtype = dtype
         if np.issubdtype(cupy_dtype, np.datetime64):
-            cupy_dtype = np.dtype("int64")
+            cupy_dtype = cudf.dtype("int64")
 
         if order not in ("F", "C"):
             raise ValueError(
@@ -5752,7 +5753,7 @@ def to_records(self, index=True):
         """
         members = [("index", self.index.dtype)] if index else []
         members += [(col, self[col].dtype) for col in self._data.names]
-        dtype = np.dtype(members)
+        dtype = cudf.dtype(members)
         ret = np.recarray(len(self), dtype=dtype)
         if index:
             ret["index"] = self.index.to_array()
@@ -6137,12 +6138,12 @@ def isin(self, values):
                     isinstance(
                         self[col]._column, cudf.core.column.CategoricalColumn
                     )
-                    or np.issubdtype(self[col].dtype, np.dtype("object"))
+                    or np.issubdtype(self[col].dtype, cudf.dtype("object"))
                 ) or (
                     isinstance(
                         values._column, cudf.core.column.CategoricalColumn
                     )
-                    or np.issubdtype(values.dtype, np.dtype("object"))
+                    or np.issubdtype(values.dtype, cudf.dtype("object"))
                 ):
                     result[col] = utils.scalar_broadcast_to(False, len(self))
                 else:
@@ -7209,7 +7210,7 @@ def _apply_support_method(self, method, axis=0, *args, **kwargs):
                                 prepared._data[col]
                             )
                             if not is_datetime_dtype(common_dtype)
-                            else np.dtype("float64")
+                            else cudf.dtype("float64")
                         )
                         .fillna(np.nan)
                     )
@@ -8081,7 +8082,7 @@ def _get_union_of_indices(indexes):
     if len(indexes) == 1:
         return indexes[0]
     else:
-        merged_index = cudf.core.Index._concat(indexes)
+        merged_index = cudf.Index._concat(indexes)
         merged_index = merged_index.drop_duplicates()
         _, inds = merged_index._values.sort_by_values()
         return merged_index.take(inds)
diff --git a/python/cudf/cudf/core/dtypes.py b/python/cudf/cudf/core/dtypes.py
index 4062b734bb3..03da08097e2 100644
--- a/python/cudf/cudf/core/dtypes.py
+++ b/python/cudf/cudf/core/dtypes.py
@@ -21,6 +21,55 @@
 from cudf.core.buffer import Buffer
 
 
+def dtype(arbitrary):
+    """
+    Return the cuDF-supported dtype corresponding to `arbitrary`.
+
+    Inputs
+    ------
+    arbitrary: dtype or scalar-like
+
+    Returns
+    -------
+    dtype: the cuDF-supported dtype that best matches `arbitrary`
+    """
+    # first, try interpreting arbitrary as a NumPy dtype that we support:
+    try:
+        np_dtype = np.dtype(arbitrary)
+        if np_dtype.name == "float16":
+            np_dtype = np.dtype("float32")
+        elif np_dtype.kind in ("OU"):
+            np_dtype = np.dtype("object")
+    except TypeError:
+        pass
+    else:
+        if np_dtype.kind not in "biufUOMm":
+            raise TypeError(f"Unsupported type {np_dtype}")
+        return np_dtype
+
+    #  next, check if `arbitrary` is one of our extension types:
+    if isinstance(arbitrary, cudf.core.dtypes._BaseDtype):
+        return arbitrary
+
+    # use `pandas_dtype` to try and interpret
+    # `arbitrary` as a Pandas extension type.
+    #  Return the corresponding NumPy/cuDF type.
+    pd_dtype = pd.api.types.pandas_dtype(arbitrary)
+    try:
+        return pd_dtype.numpy_dtype
+    except AttributeError:
+        if isinstance(pd_dtype, pd.CategoricalDtype):
+            return cudf.CategoricalDtype.from_pandas(pd_dtype)
+        elif isinstance(pd_dtype, pd.StringDtype):
+            return np.dtype("object")
+        elif isinstance(pd_dtype, pd.IntervalDtype):
+            return cudf.IntervalDtype.from_pandas(pd_dtype)
+        else:
+            raise TypeError(
+                f"Cannot interpret {arbitrary} as a valid cuDF dtype"
+            )
+
+
 class _BaseDtype(ExtensionDtype, Serializable):
     # Base type for all cudf-specific dtypes
     pass
@@ -157,7 +206,7 @@ def element_type(self) -> Dtype:
         elif isinstance(self._typ.value_type, pa.StructType):
             return StructDtype.from_arrow(self._typ.value_type)
         else:
-            return np.dtype(self._typ.value_type.to_pandas_dtype()).name
+            return cudf.dtype(self._typ.value_type.to_pandas_dtype()).name
 
     @property
     def leaf_type(self):
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 14b8ebe801f..e26248340f5 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -884,8 +884,9 @@ def where(self, cond, other=None, inplace=False):
         4    <NA>
         dtype: int64
         """
+        import cudf.core._internals.where
 
-        return cudf.core._internals.where(
+        return cudf.core._internals.where.where(
             frame=self, cond=cond, other=other, inplace=inplace
         )
 
@@ -3304,7 +3305,7 @@ def _reindex(
         if index is not None:
             index = cudf.core.index.as_index(index)
 
-            if isinstance(index, cudf.core.MultiIndex):
+            if isinstance(index, cudf.MultiIndex):
                 idx_dtype_match = (
                     df.index._source_data.dtypes == index._source_data.dtypes
                 ).all()
@@ -4016,7 +4017,7 @@ def _get_replacement_values_for_columns(
                 col: [value]
                 if _is_non_decimal_numeric_dtype(columns_dtype_map[col])
                 else cudf.utils.utils.scalar_broadcast_to(
-                    value, (len(to_replace),), np.dtype(type(value)),
+                    value, (len(to_replace),), cudf.dtype(type(value)),
                 )
                 for col in columns_dtype_map
             }
diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index 97ee0948209..64041e23763 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -646,12 +646,12 @@ def append(self, other):
                 if is_mixed_with_object_dtype(this, other):
                     got_dtype = (
                         other.dtype
-                        if this.dtype == np.dtype("object")
+                        if this.dtype == cudf.dtype("object")
                         else this.dtype
                     )
                     raise TypeError(
                         f"cudf does not support appending an Index of "
-                        f"dtype `{np.dtype('object')}` with an Index "
+                        f"dtype `{cudf.dtype('object')}` with an Index "
                         f"of dtype `{got_dtype}`, please type-cast "
                         f"either one of them to same dtypes."
                     )
@@ -1629,7 +1629,7 @@ def dtype(self):
         """
         `dtype` of the range of values in RangeIndex.
         """
-        return np.dtype(np.int64)
+        return cudf.dtype(np.int64)
 
     @property
     def is_contiguous(self):
diff --git a/python/cudf/cudf/core/scalar.py b/python/cudf/cudf/core/scalar.py
index 75be36a1b16..cda9e9414e4 100644
--- a/python/cudf/cudf/core/scalar.py
+++ b/python/cudf/cudf/core/scalar.py
@@ -6,7 +6,6 @@
 from pandas._libs.missing import NAType as pd_NAType
 
 import cudf
-from cudf._lib.scalar import DeviceScalar, _is_null_host_scalar
 from cudf.core.column.column import ColumnBase
 from cudf.core.dtypes import Decimal64Dtype, ListDtype, StructDtype
 from cudf.core.index import BaseIndex
@@ -68,7 +67,7 @@ def __init__(self, value, dtype=None):
                 self._host_dtype = value._host_dtype
             else:
                 self._device_value = value._device_value
-        elif isinstance(value, DeviceScalar):
+        elif isinstance(value, cudf._lib.scalar.DeviceScalar):
             self._device_value = value
         else:
             self._host_value, self._host_dtype = self._preprocess_host_value(
@@ -86,7 +85,7 @@ def _is_device_value_current(self):
     @property
     def device_value(self):
         if self._device_value is None:
-            self._device_value = DeviceScalar(
+            self._device_value = cudf._lib.scalar.DeviceScalar(
                 self._host_value, self._host_dtype
             )
         return self._device_value
@@ -102,7 +101,7 @@ def value(self):
     def dtype(self):
         if self._is_host_value_current:
             if isinstance(self._host_value, str):
-                return np.dtype("object")
+                return cudf.dtype("object")
             else:
                 return self._host_dtype
         else:
@@ -111,13 +110,13 @@ def dtype(self):
     def is_valid(self):
         if not self._is_host_value_current:
             self._device_value_to_host()
-        return not _is_null_host_scalar(self._host_value)
+        return not cudf._lib.scalar._is_null_host_scalar(self._host_value)
 
     def _device_value_to_host(self):
         self._host_value = self._device_value._to_host_scalar()
 
     def _preprocess_host_value(self, value, dtype):
-        valid = not _is_null_host_scalar(value)
+        valid = not cudf._lib.scalar._is_null_host_scalar(value)
 
         if isinstance(value, list):
             if dtype is not None:
@@ -187,7 +186,7 @@ def _sync(self):
         if self._is_host_value_current and self._is_device_value_current:
             return
         elif self._is_host_value_current and not self._is_device_value_current:
-            self._device_value = DeviceScalar(
+            self._device_value = cudf._lib.scalar.DeviceScalar(
                 self._host_value, self._host_dtype
             )
         elif self._is_device_value_current and not self._is_host_value_current:
@@ -324,10 +323,10 @@ def _binop_result_dtype_or_error(self, other, op):
                     and self.dtype.char == other.dtype.char == "M"
                 ):
                     res, _ = np.datetime_data(max(self.dtype, other.dtype))
-                    return np.dtype("m8" + f"[{res}]")
+                    return cudf.dtype("m8" + f"[{res}]")
                 return np.result_type(self.dtype, other.dtype)
 
-        return np.dtype(out_dtype)
+        return cudf.dtype(out_dtype)
 
     def _scalar_binop(self, other, op):
         if isinstance(other, (ColumnBase, Series, BaseIndex, np.ndarray)):
@@ -358,9 +357,9 @@ def _unaop_result_type_or_error(self, op):
 
         if op in {"__ceil__", "__floor__"}:
             if self.dtype.char in "bBhHf?":
-                return np.dtype("float32")
+                return cudf.dtype("float32")
             else:
-                return np.dtype("float64")
+                return cudf.dtype("float64")
         return self.dtype
 
     def _scalar_unaop(self, op):
diff --git a/python/cudf/cudf/core/tools/datetimes.py b/python/cudf/cudf/core/tools/datetimes.py
index 00f60cfc8b5..75ecb9b90be 100644
--- a/python/cudf/cudf/core/tools/datetimes.py
+++ b/python/cudf/cudf/core/tools/datetimes.py
@@ -495,7 +495,7 @@ def __init__(self, n=1, normalize=False, **kwds):
                     dtype = "int16"
                 else:
                     unit = self._UNITS_TO_CODES[k]
-                    dtype = np.dtype(f"timedelta64[{unit}]")
+                    dtype = cudf.dtype(f"timedelta64[{unit}]")
                 scalars[k] = cudf.Scalar(v, dtype=dtype)
 
         self._scalars = scalars
diff --git a/python/cudf/cudf/core/tools/numeric.py b/python/cudf/cudf/core/tools/numeric.py
index 6d31c1ba74d..d5c4df12246 100644
--- a/python/cudf/cudf/core/tools/numeric.py
+++ b/python/cudf/cudf/core/tools/numeric.py
@@ -109,7 +109,7 @@ def to_numeric(arg, errors="raise", downcast=None):
     dtype = col.dtype
 
     if is_datetime_dtype(dtype) or is_timedelta_dtype(dtype):
-        col = col.as_numerical_column(np.dtype("int64"))
+        col = col.as_numerical_column(cudf.dtype("int64"))
     elif is_categorical_dtype(dtype):
         cat_dtype = col.dtype.type
         if _is_non_decimal_numeric_dtype(cat_dtype):
@@ -140,7 +140,7 @@ def to_numeric(arg, errors="raise", downcast=None):
         raise ValueError("Unrecognized datatype")
 
     # str->float conversion may require lower precision
-    if col.dtype == np.dtype("f"):
+    if col.dtype == cudf.dtype("f"):
         col = col.as_numerical_column("d")
 
     if downcast:
@@ -150,13 +150,13 @@ def to_numeric(arg, errors="raise", downcast=None):
             "unsigned": list(np.typecodes["UnsignedInteger"]),
         }
         float_types = list(np.typecodes["Float"])
-        idx = float_types.index(np.dtype(np.float32).char)
+        idx = float_types.index(cudf.dtype(np.float32).char)
         downcast_type_map["float"] = float_types[idx:]
 
         type_set = downcast_type_map[downcast]
 
         for t in type_set:
-            downcast_dtype = np.dtype(t)
+            downcast_dtype = cudf.dtype(t)
             if downcast_dtype.itemsize <= col.dtype.itemsize:
                 if col.can_cast_safely(downcast_dtype):
                     col = libcudf.unary.cast(col, downcast_dtype)
@@ -197,7 +197,7 @@ def _convert_str_col(col, errors, _downcast=None):
 
     is_integer = libstrings.is_integer(col)
     if is_integer.all():
-        return col.as_numerical_column(dtype=np.dtype("i8"))
+        return col.as_numerical_column(dtype=cudf.dtype("i8"))
 
     col = _proc_inf_empty_strings(col)
 
@@ -210,9 +210,9 @@ def _convert_str_col(col, errors, _downcast=None):
                     "limited by float32 precision."
                 )
             )
-            return col.as_numerical_column(dtype=np.dtype("f"))
+            return col.as_numerical_column(dtype=cudf.dtype("f"))
         else:
-            return col.as_numerical_column(dtype=np.dtype("d"))
+            return col.as_numerical_column(dtype=cudf.dtype("d"))
     else:
         if errors == "coerce":
             col = libcudf.string_casting.stod(col)
diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py
index d8761057683..22dfd1aa145 100644
--- a/python/cudf/cudf/tests/test_binops.py
+++ b/python/cudf/cudf/tests/test_binops.py
@@ -13,7 +13,7 @@
 import pytest
 
 import cudf
-from cudf.core import Series
+from cudf import Series
 from cudf.core.index import as_index
 from cudf.testing import _utils as utils
 from cudf.utils.dtypes import (
diff --git a/python/cudf/cudf/tests/test_column.py b/python/cudf/cudf/tests/test_column.py
index 761b2f32f18..cc4c98b611f 100644
--- a/python/cudf/cudf/tests/test_column.py
+++ b/python/cudf/cudf/tests/test_column.py
@@ -362,7 +362,7 @@ def test_column_view_string_slice(slc):
 )
 def test_as_column_buffer(data, expected):
     actual_column = cudf.core.column.as_column(
-        cudf.core.Buffer(data), dtype=data.dtype
+        cudf.core.buffer.Buffer(data), dtype=data.dtype
     )
     assert_eq(cudf.Series(actual_column), cudf.Series(expected))
 
diff --git a/python/cudf/cudf/tests/test_copying.py b/python/cudf/cudf/tests/test_copying.py
index 0965b5298a4..21a6a9172db 100644
--- a/python/cudf/cudf/tests/test_copying.py
+++ b/python/cudf/cudf/tests/test_copying.py
@@ -5,7 +5,7 @@
 import pytest
 
 import cudf
-from cudf.core import Series
+from cudf import Series
 from cudf.testing._utils import NUMERIC_TYPES, OTHER_TYPES, assert_eq
 
 
diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py
index 5f5a0a78414..4e17e4e52df 100644
--- a/python/cudf/cudf/tests/test_datetime.py
+++ b/python/cudf/cudf/tests/test_datetime.py
@@ -12,7 +12,7 @@
 import pytest
 
 import cudf
-from cudf.core import DataFrame, Series
+from cudf import DataFrame, Series
 from cudf.core.index import DatetimeIndex
 from cudf.testing._utils import (
     DATETIME_TYPES,
diff --git a/python/cudf/cudf/tests/test_factorize.py b/python/cudf/cudf/tests/test_factorize.py
index 3df0031745e..46cbc9d2b52 100644
--- a/python/cudf/cudf/tests/test_factorize.py
+++ b/python/cudf/cudf/tests/test_factorize.py
@@ -6,7 +6,7 @@
 import pytest
 
 import cudf
-from cudf.core import DataFrame, Index
+from cudf import DataFrame, Index
 from cudf.testing._utils import assert_eq
 
 
diff --git a/python/cudf/cudf/tests/test_groupby.py b/python/cudf/cudf/tests/test_groupby.py
index de7d8e35bce..10217a2193f 100644
--- a/python/cudf/cudf/tests/test_groupby.py
+++ b/python/cudf/cudf/tests/test_groupby.py
@@ -13,7 +13,7 @@
 import rmm
 
 import cudf
-from cudf.core import DataFrame, Series
+from cudf import DataFrame, Series
 from cudf.core._compat import PANDAS_GE_110
 from cudf.testing._utils import (
     DATETIME_TYPES,
diff --git a/python/cudf/cudf/tests/test_label_encode.py b/python/cudf/cudf/tests/test_label_encode.py
index f513aa7a134..bac324d9c1c 100644
--- a/python/cudf/cudf/tests/test_label_encode.py
+++ b/python/cudf/cudf/tests/test_label_encode.py
@@ -7,7 +7,7 @@
 import pytest
 
 import cudf
-from cudf.core import DataFrame, Series
+from cudf import DataFrame, Series
 
 
 def _random_float(nelem, dtype):
diff --git a/python/cudf/cudf/tests/test_monotonic.py b/python/cudf/cudf/tests/test_monotonic.py
index e9c828ec0f5..7643bfdf050 100644
--- a/python/cudf/cudf/tests/test_monotonic.py
+++ b/python/cudf/cudf/tests/test_monotonic.py
@@ -8,7 +8,7 @@
 import pytest
 
 import cudf
-from cudf.core import MultiIndex, Series
+from cudf import MultiIndex, Series
 from cudf.core.index import (
     CategoricalIndex,
     DatetimeIndex,
diff --git a/python/cudf/cudf/tests/test_numpy_interop.py b/python/cudf/cudf/tests/test_numpy_interop.py
index e5efe2f027d..55b5a38c3e5 100644
--- a/python/cudf/cudf/tests/test_numpy_interop.py
+++ b/python/cudf/cudf/tests/test_numpy_interop.py
@@ -1,7 +1,7 @@
 import numpy as np
 import pytest
 
-from cudf.core import DataFrame, Series
+from cudf import DataFrame, Series
 from cudf.testing._utils import assert_eq
 
 
diff --git a/python/cudf/cudf/tests/test_onehot.py b/python/cudf/cudf/tests/test_onehot.py
index bbec4594e15..0a3ead6cf31 100644
--- a/python/cudf/cudf/tests/test_onehot.py
+++ b/python/cudf/cudf/tests/test_onehot.py
@@ -7,7 +7,7 @@
 import pytest
 
 import cudf
-from cudf.core import DataFrame, GenericIndex, Series
+from cudf import DataFrame, Index, Series
 from cudf.testing import _utils as utils
 
 
@@ -86,7 +86,7 @@ def test_onehot_generic_index():
     indices = np.random.randint(low=0, high=100, size=size)
     df = DataFrame()
     values = np.random.randint(low=0, high=4, size=size)
-    df["fo"] = Series(values, index=GenericIndex(indices))
+    df["fo"] = Series(values, index=Index(indices))
     out = df.one_hot_encoding(
         "fo", cats=df.fo.unique(), prefix="fo", dtype=np.int32
     )
diff --git a/python/cudf/cudf/tests/test_pack.py b/python/cudf/cudf/tests/test_pack.py
index c735a71d5e1..8f54e17c0c3 100644
--- a/python/cudf/cudf/tests/test_pack.py
+++ b/python/cudf/cudf/tests/test_pack.py
@@ -18,8 +18,8 @@
 import numpy as np
 import pandas as pd
 
+from cudf import DataFrame, GenericIndex, Series
 from cudf._lib.copying import pack, unpack
-from cudf.core import DataFrame, GenericIndex, Series
 from cudf.testing._utils import assert_eq
 
 
diff --git a/python/cudf/cudf/tests/test_pandas_interop.py b/python/cudf/cudf/tests/test_pandas_interop.py
index a8a45fc3c28..c90d6f23c2d 100644
--- a/python/cudf/cudf/tests/test_pandas_interop.py
+++ b/python/cudf/cudf/tests/test_pandas_interop.py
@@ -4,7 +4,7 @@
 import pandas as pd
 
 import cudf
-from cudf.core import DataFrame
+from cudf import DataFrame
 from cudf.testing._utils import assert_eq
 
 
diff --git a/python/cudf/cudf/tests/test_pickling.py b/python/cudf/cudf/tests/test_pickling.py
index 48a25fcfadb..0f8b46cee35 100644
--- a/python/cudf/cudf/tests/test_pickling.py
+++ b/python/cudf/cudf/tests/test_pickling.py
@@ -6,7 +6,7 @@
 import pandas as pd
 import pytest
 
-from cudf.core import DataFrame, GenericIndex, Series
+from cudf import DataFrame, GenericIndex, Series
 from cudf.core.buffer import Buffer
 from cudf.testing._utils import assert_eq
 
diff --git a/python/cudf/cudf/tests/test_query.py b/python/cudf/cudf/tests/test_query.py
index 8dc5df2dd7c..07c6cce5cd3 100644
--- a/python/cudf/cudf/tests/test_query.py
+++ b/python/cudf/cudf/tests/test_query.py
@@ -11,7 +11,7 @@
 import pytest
 
 import cudf
-from cudf.core import DataFrame
+from cudf import DataFrame
 from cudf.testing._utils import assert_eq
 from cudf.utils import queryutils
 
diff --git a/python/cudf/cudf/tests/test_rank.py b/python/cudf/cudf/tests/test_rank.py
index 3c98496def3..563278e3a8f 100644
--- a/python/cudf/cudf/tests/test_rank.py
+++ b/python/cudf/cudf/tests/test_rank.py
@@ -6,7 +6,7 @@
 import pandas as pd
 import pytest
 
-from cudf.core import DataFrame
+from cudf import DataFrame
 from cudf.testing._utils import assert_eq, assert_exceptions_equal
 
 
diff --git a/python/cudf/cudf/tests/test_reductions.py b/python/cudf/cudf/tests/test_reductions.py
index b7bbefb8c58..5b6a91e33ce 100644
--- a/python/cudf/cudf/tests/test_reductions.py
+++ b/python/cudf/cudf/tests/test_reductions.py
@@ -11,7 +11,7 @@
 import pytest
 
 import cudf
-from cudf.core import Series
+from cudf import Series
 from cudf.core.dtypes import Decimal64Dtype
 from cudf.testing import _utils as utils
 from cudf.testing._utils import NUMERIC_TYPES, assert_eq, gen_rand
diff --git a/python/cudf/cudf/tests/test_sorting.py b/python/cudf/cudf/tests/test_sorting.py
index 759feedf2d5..ef9f853bd11 100644
--- a/python/cudf/cudf/tests/test_sorting.py
+++ b/python/cudf/cudf/tests/test_sorting.py
@@ -7,7 +7,7 @@
 import pandas as pd
 import pytest
 
-from cudf.core import DataFrame, Series
+from cudf import DataFrame, Series
 from cudf.core.column import NumericalColumn
 from cudf.testing._utils import (
     DATETIME_TYPES,
diff --git a/python/cudf/cudf/tests/test_sparse_df.py b/python/cudf/cudf/tests/test_sparse_df.py
index 50c8f3f41a8..e10ad8e5306 100644
--- a/python/cudf/cudf/tests/test_sparse_df.py
+++ b/python/cudf/cudf/tests/test_sparse_df.py
@@ -6,8 +6,8 @@
 import pytest
 from numba import cuda
 
+from cudf import DataFrame, Series
 from cudf.comm.gpuarrow import GpuArrowReader
-from cudf.core import DataFrame, Series
 from cudf.testing._utils import assert_eq
 
 
diff --git a/python/cudf/cudf/tests/test_transform.py b/python/cudf/cudf/tests/test_transform.py
index 582d5a43edf..0c246554082 100644
--- a/python/cudf/cudf/tests/test_transform.py
+++ b/python/cudf/cudf/tests/test_transform.py
@@ -5,7 +5,7 @@
 import numpy as np
 import pytest
 
-from cudf.core import Series
+from cudf import Series
 from cudf.testing._utils import NUMERIC_TYPES
 
 supported_types = NUMERIC_TYPES
diff --git a/python/cudf/cudf/tests/test_udf_binops.py b/python/cudf/cudf/tests/test_udf_binops.py
index 5a5aca615ba..4d6188acf8c 100644
--- a/python/cudf/cudf/tests/test_udf_binops.py
+++ b/python/cudf/cudf/tests/test_udf_binops.py
@@ -7,8 +7,7 @@
 from numba.np import numpy_support
 
 import cudf
-from cudf import _lib as libcudf
-from cudf.core import Series
+from cudf import Series, _lib as libcudf
 from cudf.utils import dtypes as dtypeutils
 
 
diff --git a/python/cudf/cudf/tests/test_unaops.py b/python/cudf/cudf/tests/test_unaops.py
index c549dd2712b..25ebe6fa710 100644
--- a/python/cudf/cudf/tests/test_unaops.py
+++ b/python/cudf/cudf/tests/test_unaops.py
@@ -9,7 +9,7 @@
 import pytest
 
 import cudf
-from cudf.core import Series
+from cudf import Series
 from cudf.testing import _utils as utils
 
 _unaops = [operator.abs, operator.invert, operator.neg, np.ceil, np.floor]
diff --git a/python/cudf/cudf/utils/dtypes.py b/python/cudf/cudf/utils/dtypes.py
index 71173faf9d7..9511bb389e7 100644
--- a/python/cudf/cudf/utils/dtypes.py
+++ b/python/cudf/cudf/utils/dtypes.py
@@ -54,16 +54,16 @@
 }
 
 cudf_dtypes_to_pandas_dtypes = {
-    np.dtype("uint8"): pd.UInt8Dtype(),
-    np.dtype("uint16"): pd.UInt16Dtype(),
-    np.dtype("uint32"): pd.UInt32Dtype(),
-    np.dtype("uint64"): pd.UInt64Dtype(),
-    np.dtype("int8"): pd.Int8Dtype(),
-    np.dtype("int16"): pd.Int16Dtype(),
-    np.dtype("int32"): pd.Int32Dtype(),
-    np.dtype("int64"): pd.Int64Dtype(),
-    np.dtype("bool_"): pd.BooleanDtype(),
-    np.dtype("object"): pd.StringDtype(),
+    cudf.dtype("uint8"): pd.UInt8Dtype(),
+    cudf.dtype("uint16"): pd.UInt16Dtype(),
+    cudf.dtype("uint32"): pd.UInt32Dtype(),
+    cudf.dtype("uint64"): pd.UInt64Dtype(),
+    cudf.dtype("int8"): pd.Int8Dtype(),
+    cudf.dtype("int16"): pd.Int16Dtype(),
+    cudf.dtype("int32"): pd.Int32Dtype(),
+    cudf.dtype("int64"): pd.Int64Dtype(),
+    cudf.dtype("bool_"): pd.BooleanDtype(),
+    cudf.dtype("object"): pd.StringDtype(),
 }
 
 pyarrow_dtypes_to_pandas_dtypes = {
@@ -80,16 +80,16 @@
 }
 
 pandas_dtypes_to_cudf_dtypes = {
-    pd.UInt8Dtype(): np.dtype("uint8"),
-    pd.UInt16Dtype(): np.dtype("uint16"),
-    pd.UInt32Dtype(): np.dtype("uint32"),
-    pd.UInt64Dtype(): np.dtype("uint64"),
-    pd.Int8Dtype(): np.dtype("int8"),
-    pd.Int16Dtype(): np.dtype("int16"),
-    pd.Int32Dtype(): np.dtype("int32"),
-    pd.Int64Dtype(): np.dtype("int64"),
-    pd.BooleanDtype(): np.dtype("bool_"),
-    pd.StringDtype(): np.dtype("object"),
+    pd.UInt8Dtype(): cudf.dtype("uint8"),
+    pd.UInt16Dtype(): cudf.dtype("uint16"),
+    pd.UInt32Dtype(): cudf.dtype("uint32"),
+    pd.UInt64Dtype(): cudf.dtype("uint64"),
+    pd.Int8Dtype(): cudf.dtype("int8"),
+    pd.Int16Dtype(): cudf.dtype("int16"),
+    pd.Int32Dtype(): cudf.dtype("int32"),
+    pd.Int64Dtype(): cudf.dtype("int64"),
+    pd.BooleanDtype(): cudf.dtype("bool_"),
+    pd.StringDtype(): cudf.dtype("object"),
 }
 
 pandas_dtypes_alias_to_cudf_alias = {
@@ -105,10 +105,10 @@
 }
 
 if PANDAS_GE_120:
-    cudf_dtypes_to_pandas_dtypes[np.dtype("float32")] = pd.Float32Dtype()
-    cudf_dtypes_to_pandas_dtypes[np.dtype("float64")] = pd.Float64Dtype()
-    pandas_dtypes_to_cudf_dtypes[pd.Float32Dtype()] = np.dtype("float32")
-    pandas_dtypes_to_cudf_dtypes[pd.Float64Dtype()] = np.dtype("float64")
+    cudf_dtypes_to_pandas_dtypes[cudf.dtype("float32")] = pd.Float32Dtype()
+    cudf_dtypes_to_pandas_dtypes[cudf.dtype("float64")] = pd.Float64Dtype()
+    pandas_dtypes_to_cudf_dtypes[pd.Float32Dtype()] = cudf.dtype("float32")
+    pandas_dtypes_to_cudf_dtypes[pd.Float64Dtype()] = cudf.dtype("float64")
     pandas_dtypes_alias_to_cudf_alias["Float32"] = "float32"
     pandas_dtypes_alias_to_cudf_alias["Float64"] = "float64"
 
@@ -351,7 +351,7 @@ def min_signed_type(x, min_size=8):
     that can represent the integer ``x``
     """
     for int_dtype in np.sctypes["int"]:
-        if (np.dtype(int_dtype).itemsize * 8) >= min_size:
+        if (cudf.dtype(int_dtype).itemsize * 8) >= min_size:
             if np.iinfo(int_dtype).min <= x <= np.iinfo(int_dtype).max:
                 return int_dtype
     # resort to using `int64` and let numpy raise appropriate exception:
@@ -364,7 +364,7 @@ def min_unsigned_type(x, min_size=8):
     that can represent the integer ``x``
     """
     for int_dtype in np.sctypes["uint"]:
-        if (np.dtype(int_dtype).itemsize * 8) >= min_size:
+        if (cudf.dtype(int_dtype).itemsize * 8) >= min_size:
             if 0 <= x <= np.iinfo(int_dtype).max:
                 return int_dtype
     # resort to using `uint64` and let numpy raise appropriate exception:
@@ -388,9 +388,9 @@ def min_column_type(x, expected_type):
         max_bound_dtype = np.min_scalar_type(x.max())
         min_bound_dtype = np.min_scalar_type(x.min())
         result_type = np.promote_types(max_bound_dtype, min_bound_dtype)
-        if result_type == np.dtype("float16"):
+        if result_type == cudf.dtype("float16"):
             # cuDF does not support float16 dtype
-            result_type = np.dtype("float32")
+            result_type = cudf.dtype("float32")
         return result_type
 
     if np.issubdtype(expected_type, np.integer):
@@ -405,32 +405,12 @@ def get_min_float_dtype(col):
     max_bound_dtype = np.min_scalar_type(float(col.max()))
     min_bound_dtype = np.min_scalar_type(float(col.min()))
     result_type = np.promote_types(max_bound_dtype, min_bound_dtype)
-    if result_type == np.dtype("float16"):
+    if result_type == cudf.dtype("float16"):
         # cuDF does not support float16 dtype
-        result_type = np.dtype("float32")
+        result_type = cudf.dtype("float32")
     return result_type
 
 
-def check_cast_unsupported_dtype(dtype):
-    if is_categorical_dtype(dtype):
-        return dtype
-
-    if isinstance(dtype, pd.core.arrays.numpy_.PandasDtype):
-        dtype = dtype.numpy_dtype
-    else:
-        dtype = cudf.dtype(dtype)
-
-    if dtype in cudf._lib.types.np_to_cudf_types:
-        return dtype
-
-    if dtype == np.dtype("float16"):
-        return np.dtype("float32")
-
-    raise NotImplementedError(
-        f"Cannot cast {dtype} dtype, as it is not supported by CuDF."
-    )
-
-
 def is_mixed_with_object_dtype(lhs, rhs):
     return (lhs.dtype == "object" and rhs.dtype != "object") or (
         rhs.dtype == "object" and lhs.dtype != "object"
@@ -550,7 +530,7 @@ def find_common_type(dtypes):
                 [dtype for dtype in dtypes if is_decimal_dtype(dtype)]
             )
         else:
-            return np.dtype("O")
+            return cudf.dtype("O")
 
     # Corner case 1:
     # Resort to np.result_type to handle "M" and "m" types separately
@@ -567,9 +547,9 @@ def find_common_type(dtypes):
         dtypes.add(np.result_type(*td_dtypes))
 
     common_dtype = np.find_common_type(list(dtypes), [])
-    if common_dtype == np.dtype("float16"):
+    if common_dtype == cudf.dtype("float16"):
         # cuDF does not support float16 dtype
-        return np.dtype("float32")
+        return cudf.dtype("float32")
     else:
         return common_dtype
 
@@ -582,9 +562,9 @@ def _can_cast(from_dtype, to_dtype):
     cudf specific dtypes.
     """
     if isinstance(from_dtype, type):
-        from_dtype = np.dtype(from_dtype)
+        from_dtype = cudf.dtype(from_dtype)
     if isinstance(to_dtype, type):
-        to_dtype = np.dtype(to_dtype)
+        to_dtype = cudf.dtype(to_dtype)
 
     # TODO : Add precision & scale checking for
     # decimal types in future
diff --git a/python/cudf/cudf/utils/utils.py b/python/cudf/cudf/utils/utils.py
index 58d133d16d8..c9d38c8399e 100644
--- a/python/cudf/cudf/utils/utils.py
+++ b/python/cudf/cudf/utils/utils.py
@@ -17,7 +17,7 @@
 from cudf.utils.dtypes import to_cudf_compatible_scalar
 
 # The size of the mask in bytes
-mask_dtype = np.dtype(np.int32)
+mask_dtype = cudf.dtype(np.int32)
 mask_bitsize = mask_dtype.itemsize * 8
 
 
diff --git a/python/dask_cudf/dask_cudf/backends.py b/python/dask_cudf/dask_cudf/backends.py
index 53543b9e886..0cc2821e8ef 100644
--- a/python/dask_cudf/dask_cudf/backends.py
+++ b/python/dask_cudf/dask_cudf/backends.py
@@ -67,10 +67,10 @@ def _nonempty_index(idx):
         return cudf.core.index.GenericIndex(
             np.arange(2, dtype=idx.dtype), name=idx.name
         )
-    elif isinstance(idx, cudf.core.MultiIndex):
+    elif isinstance(idx, cudf.core.multiindex.MultiIndex):
         levels = [meta_nonempty(lev) for lev in idx.levels]
         codes = [[0, 0] for i in idx.levels]
-        return cudf.core.MultiIndex(
+        return cudf.core.multiindex.MultiIndex(
             levels=levels, codes=codes, names=idx.names
         )
 

From 3eba47c14447ee5f36451fd80eebd9a9bcd18d0b Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Wed, 11 Aug 2021 13:57:09 -0400
Subject: [PATCH 12/20] Progress

---
 python/cudf/cudf/_lib/copying.pyx      |  7 ++++---
 python/cudf/cudf/core/column/column.py |  3 ---
 python/cudf/cudf/core/dataframe.py     |  2 +-
 python/cudf/cudf/utils/dtypes.py       | 23 +++++++----------------
 4 files changed, 12 insertions(+), 23 deletions(-)

diff --git a/python/cudf/cudf/_lib/copying.pyx b/python/cudf/cudf/_lib/copying.pyx
index a5789e4d0ae..aa279e84d91 100644
--- a/python/cudf/cudf/_lib/copying.pyx
+++ b/python/cudf/cudf/_lib/copying.pyx
@@ -787,12 +787,13 @@ cdef class _CPackedColumns:
         """
         Construct a ``PackedColumns`` object from a ``cudf.DataFrame``.
         """
-        from cudf import RangeIndex, dtypes
+        import cudf.core.dtypes
+        from cudf import RangeIndex
 
         cdef _CPackedColumns p = _CPackedColumns.__new__(_CPackedColumns)
 
         if keep_index and (
-            not isinstance(input_table.index, RangeIndex)
+            not isinstance(input_table.index, cudf.RangeIndex)
             or input_table.index.start != 0
             or input_table.index.stop != len(input_table)
             or input_table.index.step != 1
@@ -805,7 +806,7 @@ cdef class _CPackedColumns:
         p.column_names = input_table._column_names
         p.column_dtypes = {}
         for name, col in input_table._data.items():
-            if isinstance(col.dtype, dtypes._BaseDtype):
+            if isinstance(col.dtype, cudf.core.dtypes._BaseDtype):
                 p.column_dtypes[name] = col.dtype
 
         p.c_obj = move(cpp_copying.pack(input_table_view))
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 7b7bef0e2d1..7a02e98ef12 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -261,9 +261,6 @@ def from_arrow(cls, array: pa.Array) -> ColumnBase:
         if not isinstance(array, (pa.Array, pa.ChunkedArray)):
             raise TypeError("array should be PyArrow array or chunked array")
 
-        if array.type == pa.float16():
-            array = pa.Array.from_pandas(array.to_numpy().astype("float32"))
-
         data = pa.table([array], [None])
 
         if isinstance(array.type, pa.DictionaryType):
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index a9ddc77963c..d18c9adb7a1 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -5756,7 +5756,7 @@ def to_records(self, index=True):
         """
         members = [("index", self.index.dtype)] if index else []
         members += [(col, self[col].dtype) for col in self._data.names]
-        dtype = cudf.dtype(members)
+        dtype = np.dtype(members)
         ret = np.recarray(len(self), dtype=dtype)
         if index:
             ret["index"] = self.index.to_array()
diff --git a/python/cudf/cudf/utils/dtypes.py b/python/cudf/cudf/utils/dtypes.py
index 9511bb389e7..81727aad9a2 100644
--- a/python/cudf/cudf/utils/dtypes.py
+++ b/python/cudf/cudf/utils/dtypes.py
@@ -388,27 +388,22 @@ def min_column_type(x, expected_type):
         max_bound_dtype = np.min_scalar_type(x.max())
         min_bound_dtype = np.min_scalar_type(x.min())
         result_type = np.promote_types(max_bound_dtype, min_bound_dtype)
-        if result_type == cudf.dtype("float16"):
-            # cuDF does not support float16 dtype
-            result_type = cudf.dtype("float32")
-        return result_type
 
-    if np.issubdtype(expected_type, np.integer):
+    elif np.issubdtype(expected_type, np.integer):
         max_bound_dtype = np.min_scalar_type(x.max())
         min_bound_dtype = np.min_scalar_type(x.min())
-        return np.promote_types(max_bound_dtype, min_bound_dtype)
+        result_type = np.promote_types(max_bound_dtype, min_bound_dtype)
+    else:
+        result_type = x.dtype
 
-    return x.dtype
+    return cudf.dtype(result_type)
 
 
 def get_min_float_dtype(col):
     max_bound_dtype = np.min_scalar_type(float(col.max()))
     min_bound_dtype = np.min_scalar_type(float(col.min()))
     result_type = np.promote_types(max_bound_dtype, min_bound_dtype)
-    if result_type == cudf.dtype("float16"):
-        # cuDF does not support float16 dtype
-        result_type = cudf.dtype("float32")
-    return result_type
+    return cudf.dtype(result_type)
 
 
 def is_mixed_with_object_dtype(lhs, rhs):
@@ -547,11 +542,7 @@ def find_common_type(dtypes):
         dtypes.add(np.result_type(*td_dtypes))
 
     common_dtype = np.find_common_type(list(dtypes), [])
-    if common_dtype == cudf.dtype("float16"):
-        # cuDF does not support float16 dtype
-        return cudf.dtype("float32")
-    else:
-        return common_dtype
+    return cudf.dtype(common_dtype)
 
 
 def _can_cast(from_dtype, to_dtype):

From 048629c4b31db385e855c2eb0e1578186aaffa8f Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Wed, 11 Aug 2021 14:32:27 -0400
Subject: [PATCH 13/20] More fix

---
 python/cudf/cudf/core/dtypes.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/dtypes.py b/python/cudf/cudf/core/dtypes.py
index 0e5646c6026..c5988d76207 100644
--- a/python/cudf/cudf/core/dtypes.py
+++ b/python/cudf/cudf/core/dtypes.py
@@ -38,6 +38,8 @@ def dtype(arbitrary):
         np_dtype = np.dtype(arbitrary)
         if np_dtype.name == "float16":
             np_dtype = np.dtype("float32")
+        elif np_dtype.name == "float128":
+            raise NotImplementedError()
         elif np_dtype.kind in ("OU"):
             np_dtype = np.dtype("object")
     except TypeError:
@@ -56,7 +58,7 @@ def dtype(arbitrary):
     #  Return the corresponding NumPy/cuDF type.
     pd_dtype = pd.api.types.pandas_dtype(arbitrary)
     try:
-        return pd_dtype.numpy_dtype
+        return dtype(pd_dtype.numpy_dtype)
     except AttributeError:
         if isinstance(pd_dtype, pd.CategoricalDtype):
             return cudf.CategoricalDtype.from_pandas(pd_dtype)

From 40736c46cce9615e91f0895b31264a9a3cc7df53 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Wed, 11 Aug 2021 14:42:26 -0400
Subject: [PATCH 14/20] Early returns

---
 python/cudf/cudf/core/dtypes.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/cudf/cudf/core/dtypes.py b/python/cudf/cudf/core/dtypes.py
index c5988d76207..ead0b6453c1 100644
--- a/python/cudf/cudf/core/dtypes.py
+++ b/python/cudf/cudf/core/dtypes.py
@@ -37,11 +37,11 @@ def dtype(arbitrary):
     try:
         np_dtype = np.dtype(arbitrary)
         if np_dtype.name == "float16":
-            np_dtype = np.dtype("float32")
+            return np.dtype("float32")
         elif np_dtype.name == "float128":
             raise NotImplementedError()
         elif np_dtype.kind in ("OU"):
-            np_dtype = np.dtype("object")
+            return np.dtype("object")
     except TypeError:
         pass
     else:

From 550c7ba3a8e3105dd6852031e6fefb0d0b3c4dac Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Wed, 11 Aug 2021 14:54:29 -0400
Subject: [PATCH 15/20] More tests

---
 python/cudf/cudf/tests/test_dtypes.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/python/cudf/cudf/tests/test_dtypes.py b/python/cudf/cudf/tests/test_dtypes.py
index 0c3769fab67..a5a9109e13c 100644
--- a/python/cudf/cudf/tests/test_dtypes.py
+++ b/python/cudf/cudf/tests/test_dtypes.py
@@ -286,6 +286,21 @@ def test_lists_of_structs_dtype(data):
         ("datetime64[ms]", np.dtype("<M8[ms]")),
         ("datetime64[D]", np.dtype("<M8[D]")),
         ("<M8[s]", np.dtype("<M8[s]")),
+        (cudf.ListDtype("int64"), cudf.ListDtype("int64")),
+        ("category", cudf.CategoricalDtype()),
+        (
+            cudf.CategoricalDtype(categories=("a", "b", "c")),
+            cudf.CategoricalDtype(categories=("a", "b", "c")),
+        ),
+        (
+            pd.CategoricalDtype(categories=("a", "b", "c")),
+            cudf.CategoricalDtype(categories=("a", "b", "c")),
+        ),
+        (
+            # this is a pandas.core.arrays.numpy_.PandasDtype...
+            pd.array([1], dtype="int16").dtype,
+            np.dtype("int16"),
+        ),
     ],
 )
 def test_dtype(in_dtype, expect):

From 72d6304adc31fba55772c543b4bf2d68a27cdcfd Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Wed, 11 Aug 2021 18:25:26 -0400
Subject: [PATCH 16/20] Resolve circular import issues

---
 python/cudf/cudf/_lib/scalar.pyx                      | 11 +++++------
 python/cudf/cudf/_lib/string_casting.pyx              |  7 ++-----
 .../cudf/_lib/strings/convert/convert_fixed_point.pyx |  8 ++------
 python/cudf/cudf/_lib/table.pyx                       |  6 +++---
 python/cudf/cudf/core/column/categorical.py           |  5 +++--
 5 files changed, 15 insertions(+), 22 deletions(-)

diff --git a/python/cudf/cudf/_lib/scalar.pyx b/python/cudf/cudf/_lib/scalar.pyx
index 95fa5d4d20d..fe11d5e2627 100644
--- a/python/cudf/cudf/_lib/scalar.pyx
+++ b/python/cudf/cudf/_lib/scalar.pyx
@@ -35,6 +35,7 @@ from cudf._lib.types cimport dtype_from_column_view, underlying_type_t_type_id
 
 from cudf._lib.interop import from_arrow, to_arrow
 
+cimport cudf._lib.cpp.types as libcudf_types
 from cudf._lib.cpp.scalar.scalar cimport (
     duration_scalar,
     fixed_point_scalar,
@@ -60,9 +61,7 @@ from cudf._lib.cpp.wrappers.timestamps cimport (
 )
 from cudf._lib.utils cimport data_from_table_view
 
-from cudf.utils.dtypes import _decimal_to_int64, is_list_dtype, is_struct_dtype
-
-cimport cudf._lib.cpp.types as libcudf_types
+import cudf
 
 
 cdef class DeviceScalar:
@@ -120,9 +119,9 @@ cdef class DeviceScalar:
     def _to_host_scalar(self):
         if isinstance(self.dtype, cudf.Decimal64Dtype):
             result = _get_py_decimal_from_fixed_point(self.c_value)
-        elif is_struct_dtype(self.dtype):
+        elif cudf.api.types.is_struct_dtype(self.dtype):
             result = _get_py_dict_from_struct(self.c_value)
-        elif is_list_dtype(self.dtype):
+        elif cudf.api.types.is_list_dtype(self.dtype):
             result = _get_py_list_from_list(self.c_value)
         elif pd.api.types.is_string_dtype(self.dtype):
             result = _get_py_string_from_string(self.c_value)
@@ -309,7 +308,7 @@ cdef _set_decimal64_from_scalar(unique_ptr[scalar]& s,
                                 object value,
                                 object dtype,
                                 bool valid=True):
-    value = _decimal_to_int64(value) if valid else 0
+    value = cudf.utils.dtypes._decimal_to_int64(value) if valid else 0
     s.reset(
         new fixed_point_scalar[decimal64](
             <int64_t>np.int64(value), scale_type(-dtype.scale), valid
diff --git a/python/cudf/cudf/_lib/string_casting.pyx b/python/cudf/cudf/_lib/string_casting.pyx
index 8d7e307c5fb..25e4149183e 100644
--- a/python/cudf/cudf/_lib/string_casting.pyx
+++ b/python/cudf/cudf/_lib/string_casting.pyx
@@ -10,10 +10,6 @@ from cudf._lib.scalar cimport DeviceScalar
 
 from cudf._lib.types import np_to_cudf_types
 
-from cudf._lib.types cimport underlying_type_t_type_id
-
-from cudf.core.column.column import as_column
-
 from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
 from libcpp.utility cimport move
@@ -55,6 +51,7 @@ from cudf._lib.cpp.strings.convert.convert_urls cimport (
     url_encode as cpp_url_encode,
 )
 from cudf._lib.cpp.types cimport data_type, type_id
+from cudf._lib.types cimport underlying_type_t_type_id
 
 import cudf
 
@@ -590,7 +587,7 @@ def istimestamp(
 
     """
     if input_col.size == 0:
-        return as_column([], dtype=kwargs.get('dtype'))
+        return cudf.core.column.as_column([], dtype=kwargs.get('dtype'))
     cdef column_view input_column_view = input_col.view()
     cdef string c_timestamp_format = <string>str(format).encode('UTF-8')
     cdef unique_ptr[column] c_result
diff --git a/python/cudf/cudf/_lib/strings/convert/convert_fixed_point.pyx b/python/cudf/cudf/_lib/strings/convert/convert_fixed_point.pyx
index 6eb8984b869..e35ab6489c6 100644
--- a/python/cudf/cudf/_lib/strings/convert/convert_fixed_point.pyx
+++ b/python/cudf/cudf/_lib/strings/convert/convert_fixed_point.pyx
@@ -6,11 +6,6 @@ from cudf._lib.column cimport Column
 
 from cudf._lib.types import np_to_cudf_types
 
-from cudf._lib.cpp.types cimport DECIMAL64
-from cudf._lib.types cimport underlying_type_t_type_id
-
-from cudf.core.column.column import as_column
-
 from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
 from libcpp.utility cimport move
@@ -22,7 +17,8 @@ from cudf._lib.cpp.strings.convert.convert_fixed_point cimport (
     is_fixed_point as cpp_is_fixed_point,
     to_fixed_point as cpp_to_fixed_point,
 )
-from cudf._lib.cpp.types cimport data_type, type_id
+from cudf._lib.cpp.types cimport DECIMAL64, data_type, type_id
+from cudf._lib.types cimport underlying_type_t_type_id
 
 
 def from_decimal(Column input_col):
diff --git a/python/cudf/cudf/_lib/table.pyx b/python/cudf/cudf/_lib/table.pyx
index 09cb05a076d..2981a46a54a 100644
--- a/python/cudf/cudf/_lib/table.pyx
+++ b/python/cudf/cudf/_lib/table.pyx
@@ -4,8 +4,6 @@ import itertools
 
 import numpy as np
 
-from cudf.core.column_accessor import ColumnAccessor
-
 from cython.operator cimport dereference
 from libc.stdint cimport uintptr_t
 from libcpp.memory cimport unique_ptr
@@ -19,6 +17,8 @@ from cudf._lib.cpp.table.table cimport table
 from cudf._lib.cpp.table.table_view cimport mutable_table_view, table_view
 from cudf._lib.cpp.types cimport size_type
 
+import cudf
+
 
 cdef class Table:
     def __init__(self, object data=None, object index=None):
@@ -34,7 +34,7 @@ cdef class Table:
         """
         if data is None:
             data = {}
-        self._data = ColumnAccessor(data)
+        self._data = cudf.core.column_accessor.ColumnAccessor(data)
         self._index = index
 
     @property
diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index f435e0fa88c..a486d70f047 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -22,7 +22,6 @@
 
 import cudf
 from cudf import _lib as libcudf
-from cudf._lib.scalar import as_device_scalar
 from cudf._lib.transform import bools_to_mask
 from cudf._typing import ColumnLike, Dtype, ScalarLike
 from cudf.core.buffer import Buffer
@@ -884,7 +883,9 @@ def _fill(
             return self if inplace else self.copy()
 
         fill_code = self._encode(fill_value)
-        fill_scalar = as_device_scalar(fill_code, self.codes.dtype)
+        fill_scalar = cudf._lib.scalar.as_device_scalar(
+            fill_code, self.codes.dtype
+        )
 
         result = self if inplace else self.copy()
 

From c8925f55a9a797528e60059e63f873b04465dc60 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Thu, 12 Aug 2021 10:48:37 -0400
Subject: [PATCH 17/20] Unused import

---
 python/cudf/cudf/_lib/copying.pyx | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/python/cudf/cudf/_lib/copying.pyx b/python/cudf/cudf/_lib/copying.pyx
index aa279e84d91..e00ed6bc647 100644
--- a/python/cudf/cudf/_lib/copying.pyx
+++ b/python/cudf/cudf/_lib/copying.pyx
@@ -787,8 +787,7 @@ cdef class _CPackedColumns:
         """
         Construct a ``PackedColumns`` object from a ``cudf.DataFrame``.
         """
-        import cudf.core.dtypes
-        from cudf import RangeIndex
+        import cudf.core.dtype
 
         cdef _CPackedColumns p = _CPackedColumns.__new__(_CPackedColumns)
 

From 26df99a62c63539ae5dd99b6173e3c3be22d3b04 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Thu, 12 Aug 2021 10:49:00 -0400
Subject: [PATCH 18/20] Space

---
 python/cudf/cudf/_lib/transform.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/_lib/transform.pyx b/python/cudf/cudf/_lib/transform.pyx
index 67fc1c441b0..9fada59640e 100644
--- a/python/cudf/cudf/_lib/transform.pyx
+++ b/python/cudf/cudf/_lib/transform.pyx
@@ -60,7 +60,7 @@ def mask_to_bools(object mask_buffer, size_type begin_bit, size_type end_bit):
     """
     if not isinstance(mask_buffer, cudf.core.buffer.Buffer):
         raise TypeError("mask_buffer is not an instance of "
-                        "cudf.core.buffer. Buffer")
+                        "cudf.core.buffer.Buffer")
     cdef bitmask_type* bit_mask = <bitmask_type*><uintptr_t>(mask_buffer.ptr)
 
     cdef unique_ptr[column] result

From fec34d919f94af656aa0de814ed30b44083cf2d7 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Thu, 12 Aug 2021 10:58:40 -0400
Subject: [PATCH 19/20] Add interval tests

---
 python/cudf/cudf/tests/test_dtypes.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/python/cudf/cudf/tests/test_dtypes.py b/python/cudf/cudf/tests/test_dtypes.py
index a5a9109e13c..ee6cc7b6df6 100644
--- a/python/cudf/cudf/tests/test_dtypes.py
+++ b/python/cudf/cudf/tests/test_dtypes.py
@@ -301,6 +301,9 @@ def test_lists_of_structs_dtype(data):
             pd.array([1], dtype="int16").dtype,
             np.dtype("int16"),
         ),
+        (pd.IntervalDtype("int"), cudf.IntervalDtype("int64")),
+        (cudf.IntervalDtype("int"), cudf.IntervalDtype("int64")),
+        (pd.IntervalDtype("int64"), cudf.IntervalDtype("int64")),
     ],
 )
 def test_dtype(in_dtype, expect):

From 5fc19a92e53089a68a5ad4526923b941bd495706 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Thu, 12 Aug 2021 13:28:58 -0400
Subject: [PATCH 20/20] :(

---
 python/cudf/cudf/_lib/copying.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/_lib/copying.pyx b/python/cudf/cudf/_lib/copying.pyx
index e00ed6bc647..ed31574b4a5 100644
--- a/python/cudf/cudf/_lib/copying.pyx
+++ b/python/cudf/cudf/_lib/copying.pyx
@@ -787,7 +787,7 @@ cdef class _CPackedColumns:
         """
         Construct a ``PackedColumns`` object from a ``cudf.DataFrame``.
         """
-        import cudf.core.dtype
+        import cudf.core.dtypes
 
         cdef _CPackedColumns p = _CPackedColumns.__new__(_CPackedColumns)