From fd5d5963711ff9e7a295fc99ab1507ec300d395e Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Tue, 16 Jan 2024 14:03:48 -0800
Subject: [PATCH 1/2] Remove **kwargs from astype

---
 python/cudf/cudf/core/column/column.py   | 10 ++++------
 python/cudf/cudf/core/dataframe.py       | 10 ++++++++--
 python/cudf/cudf/core/frame.py           | 13 +++++--------
 python/cudf/cudf/core/indexed_frame.py   | 11 ++++++++---
 python/cudf/cudf/core/series.py          | 10 ++++++++--
 python/cudf/cudf/tests/test_dataframe.py | 17 +++++++++--------
 6 files changed, 42 insertions(+), 29 deletions(-)

diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index c13ec33c51c..0b21dc72444 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -958,9 +958,7 @@ def distinct_count(self, dropna: bool = True) -> int:
     def can_cast_safely(self, to_dtype: Dtype) -> bool:
         raise NotImplementedError()
 
-    def astype(
-        self, dtype: Dtype, copy: bool = False, format: str | None = None
-    ) -> ColumnBase:
+    def astype(self, dtype: Dtype, copy: bool = False) -> ColumnBase:
         if copy:
             col = self.copy()
         else:
@@ -1000,7 +998,7 @@ def astype(
                     f"Casting to {dtype} is not supported, use "
                     "`.astype('str')` instead."
                 )
-            return col.as_string_column(dtype, format=format)
+            return col.as_string_column(dtype)
         elif isinstance(dtype, (ListDtype, StructDtype)):
             if not col.dtype == dtype:
                 raise NotImplementedError(
@@ -1012,9 +1010,9 @@ def astype(
         elif isinstance(dtype, cudf.core.dtypes.DecimalDtype):
             return col.as_decimal_column(dtype)
         elif np.issubdtype(cast(Any, dtype), np.datetime64):
-            return col.as_datetime_column(dtype, format=format)
+            return col.as_datetime_column(dtype)
         elif np.issubdtype(cast(Any, dtype), np.timedelta64):
-            return col.as_timedelta_column(dtype, format=format)
+            return col.as_timedelta_column(dtype)
         else:
             return col.as_numerical_column(dtype)
 
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index f9cf180ff44..2f18c194fde 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -19,6 +19,7 @@
     Callable,
     Dict,
     List,
+    Literal,
     MutableMapping,
     Optional,
     Set,
@@ -1774,7 +1775,12 @@ def _concat(
 
         return out
 
-    def astype(self, dtype, copy=False, errors="raise", **kwargs):
+    def astype(
+        self,
+        dtype,
+        copy: bool = False,
+        errors: Literal["raise", "ignore"] = "raise",
+    ):
         if is_dict_like(dtype):
             if len(set(dtype.keys()) - set(self._data.names)) > 0:
                 raise KeyError(
@@ -1783,7 +1789,7 @@ def astype(self, dtype, copy=False, errors="raise", **kwargs):
                 )
         else:
             dtype = {cc: dtype for cc in self._data.names}
-        return super().astype(dtype, copy, errors, **kwargs)
+        return super().astype(dtype, copy, errors)
 
     def _clean_renderable_dataframe(self, output):
         """
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 5f7a86e86d8..d5781fad966 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -286,14 +286,11 @@ def __len__(self):
         return self._num_rows
 
     @_cudf_nvtx_annotate
-    def astype(self, dtype, copy=False, **kwargs):
-        result_data = {}
-        for col_name, col in self._data.items():
-            dt = dtype.get(col_name, col.dtype)
-            if not is_dtype_equal(dt, col.dtype):
-                result_data[col_name] = col.astype(dt, copy=copy, **kwargs)
-            else:
-                result_data[col_name] = col.copy() if copy else col
+    def astype(self, dtype, copy: bool = False):
+        result_data = {
+            col_name: col.astype(dtype.get(col_name, col.dtype), copy=copy)
+            for col_name, col in self._data.items()
+        }
 
         return ColumnAccessor._create_unsafe(
             data=result_data,
diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index 2a35ac0f959..8896205c626 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -14,6 +14,7 @@
     Callable,
     Dict,
     List,
+    Literal,
     MutableMapping,
     Optional,
     Tuple,
@@ -3753,7 +3754,12 @@ def _append(
 
         return cudf.concat(to_concat, ignore_index=ignore_index, sort=sort)
 
-    def astype(self, dtype, copy=False, errors="raise", **kwargs):
+    def astype(
+        self,
+        dtype,
+        copy: bool = False,
+        errors: Literal["raise", "ignore"] = "raise",
+    ):
         """Cast the object to the given dtype.
 
         Parameters
@@ -3774,7 +3780,6 @@ def astype(self, dtype, copy=False, errors="raise", **kwargs):
             -   ``raise`` : allow exceptions to be raised
             -   ``ignore`` : suppress exceptions. On error return original
                 object.
-        **kwargs : extra arguments to pass on to the constructor
 
         Returns
         -------
@@ -3865,7 +3870,7 @@ def astype(self, dtype, copy=False, errors="raise", **kwargs):
             raise ValueError("invalid error value specified")
 
         try:
-            data = super().astype(dtype, copy, **kwargs)
+            data = super().astype(dtype, copy)
         except Exception as e:
             if errors == "raise":
                 raise e
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index bc1eaef86db..55100343306 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -12,6 +12,7 @@
 from typing import (
     Any,
     Dict,
+    Literal,
     MutableMapping,
     Optional,
     Sequence,
@@ -2141,7 +2142,12 @@ def nullmask(self):
         return cudf.Series(self._column.nullmask)
 
     @_cudf_nvtx_annotate
-    def astype(self, dtype, copy=False, errors="raise", **kwargs):
+    def astype(
+        self,
+        dtype,
+        copy: bool = False,
+        errors: Literal["raise", "ignore"] = "raise",
+    ):
         if is_dict_like(dtype):
             if len(dtype) > 1 or self.name not in dtype:
                 raise KeyError(
@@ -2150,7 +2156,7 @@ def astype(self, dtype, copy=False, errors="raise", **kwargs):
                 )
         else:
             dtype = {self.name: dtype}
-        return super().astype(dtype, copy, errors, **kwargs)
+        return super().astype(dtype, copy, errors)
 
     @_cudf_nvtx_annotate
     def sort_index(self, axis=0, *args, **kwargs):
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index 30530e9d2a3..115635269ae 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2023, NVIDIA CORPORATION.
+# Copyright (c) 2018-2024, NVIDIA CORPORATION.
 
 import array as arr
 import contextlib
@@ -5114,15 +5114,16 @@ def test_df_astype_to_categorical_ordered(ordered):
 
 
 @pytest.mark.parametrize(
-    "dtype,args",
-    [(dtype, {}) for dtype in ALL_TYPES]
-    + [("category", {"ordered": True}), ("category", {"ordered": False})],
+    "dtype",
+    [dtype for dtype in ALL_TYPES]
+    + [
+        cudf.CategoricalDtype(ordered=True),
+        cudf.CategoricalDtype(ordered=False),
+    ],
 )
-def test_empty_df_astype(dtype, args):
+def test_empty_df_astype(dtype):
     df = cudf.DataFrame()
-    kwargs = {}
-    kwargs.update(args)
-    assert_eq(df, df.astype(dtype=dtype, **kwargs))
+    assert_eq(df, df.astype(dtype=dtype))
 
 
 @pytest.mark.parametrize(

From 6dd5d5222df635990add4d5da3abd8241b149631 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Thu, 18 Jan 2024 10:12:41 -0800
Subject: [PATCH 2/2] Add pandas astype comp

---
 python/cudf/cudf/tests/test_dataframe.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index 115635269ae..d75db7dfaae 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -5123,7 +5123,9 @@ def test_df_astype_to_categorical_ordered(ordered):
 )
 def test_empty_df_astype(dtype):
     df = cudf.DataFrame()
-    assert_eq(df, df.astype(dtype=dtype))
+    result = df.astype(dtype=dtype)
+    assert_eq(df, result)
+    assert_eq(df.to_pandas().astype(dtype=dtype), result)
 
 
 @pytest.mark.parametrize(