From 040f5773c37d89cdbc0baf57fa70a4b3dcab4fc9 Mon Sep 17 00:00:00 2001
From: Lawrence Mitchell <lmitchell@nvidia.com>
Date: Thu, 11 May 2023 18:16:37 +0100
Subject: [PATCH] Run no_implicit_optional to rewrite types

PEP484 prohibits implicit Optional types, so

    def bad(x: int = None):
        pass

Is invalid. MyPy since version 0.983 prohibits this usage by default.
So rewrite all of the typing (except x : Any = None) using
https://github.com/hauntsaninja/no_implicit_optional.
---
 python/cudf/cudf/_lib/column.pyi              |  4 +-
 python/cudf/cudf/core/buffer/buffer.py        |  4 +-
 python/cudf/cudf/core/buffer/spill_manager.py |  6 +-
 .../cudf/cudf/core/buffer/spillable_buffer.py |  8 +-
 python/cudf/cudf/core/buffer/utils.py         |  4 +-
 python/cudf/cudf/core/column/categorical.py   | 21 +++--
 python/cudf/cudf/core/column/column.py        | 69 +++++++++--------
 python/cudf/cudf/core/column/datetime.py      | 34 +++++----
 python/cudf/cudf/core/column/decimal.py       |  7 +-
 python/cudf/cudf/core/column/interval.py      |  8 +-
 python/cudf/cudf/core/column/numerical.py     | 20 ++---
 .../cudf/cudf/core/column/numerical_base.py   | 31 +++++---
 python/cudf/cudf/core/column/string.py        | 76 +++++++++++--------
 python/cudf/cudf/core/column/struct.py        |  5 +-
 python/cudf/cudf/core/column/timedelta.py     | 21 ++---
 python/cudf/cudf/core/column_accessor.py      |  2 +-
 python/cudf/cudf/core/dataframe.py            |  4 +-
 python/cudf/cudf/core/index.py                |  1 -
 python/cudf/cudf/core/multiindex.py           |  3 -
 python/cudf/cudf/core/single_column_frame.py  |  1 -
 20 files changed, 196 insertions(+), 133 deletions(-)

diff --git a/python/cudf/cudf/_lib/column.pyi b/python/cudf/cudf/_lib/column.pyi
index 013cba3ae03..bd53801a972 100644
--- a/python/cudf/cudf/_lib/column.pyi
+++ b/python/cudf/cudf/_lib/column.pyi
@@ -29,8 +29,8 @@ class Column:
         size: int,
         dtype: Dtype,
         mask: Optional[Buffer] = None,
-        offset: int = None,
-        null_count: int = None,
+        offset: Optional[int] = None,
+        null_count: Optional[int] = None,
         children: Tuple[ColumnBase, ...] = (),
     ) -> None: ...
     @property
diff --git a/python/cudf/cudf/core/buffer/buffer.py b/python/cudf/cudf/core/buffer/buffer.py
index 2262730d8a1..abf1ec47e3d 100644
--- a/python/cudf/cudf/core/buffer/buffer.py
+++ b/python/cudf/cudf/core/buffer/buffer.py
@@ -5,7 +5,7 @@
 import math
 import pickle
 from types import SimpleNamespace
-from typing import Any, Dict, Mapping, Sequence, Tuple, Type, TypeVar
+from typing import Any, Dict, Mapping, Optional, Sequence, Tuple, Type, TypeVar
 
 import numpy
 
@@ -42,7 +42,7 @@ def host_memory_allocation(nbytes: int) -> memoryview:
 def cuda_array_interface_wrapper(
     ptr: int,
     size: int,
-    owner: object = None,
+    owner: Optional[object] = None,
     readonly=False,
     typestr="|u1",
     version=0,
diff --git a/python/cudf/cudf/core/buffer/spill_manager.py b/python/cudf/cudf/core/buffer/spill_manager.py
index d2a87af3869..7f8399ba522 100644
--- a/python/cudf/cudf/core/buffer/spill_manager.py
+++ b/python/cudf/cudf/core/buffer/spill_manager.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022, NVIDIA CORPORATION.
+# Copyright (c) 2022-2023, NVIDIA CORPORATION.
 
 from __future__ import annotations
 
@@ -225,7 +225,7 @@ def __init__(
         self,
         *,
         spill_on_demand: bool = False,
-        device_memory_limit: int = None,
+        device_memory_limit: Optional[int] = None,
         statistic_level: int = 0,
     ) -> None:
         self._lock = threading.Lock()
@@ -358,7 +358,7 @@ def spill_device_memory(self, nbytes: int) -> int:
                     buf.lock.release()
         return spilled
 
-    def spill_to_device_limit(self, device_limit: int = None) -> int:
+    def spill_to_device_limit(self, device_limit: Optional[int] = None) -> int:
         """Try to spill device memory until device limit
 
         Notice, by default this is a no-op.
diff --git a/python/cudf/cudf/core/buffer/spillable_buffer.py b/python/cudf/cudf/core/buffer/spillable_buffer.py
index c71841a5a26..169b52b828e 100644
--- a/python/cudf/cudf/core/buffer/spillable_buffer.py
+++ b/python/cudf/cudf/core/buffer/spillable_buffer.py
@@ -448,7 +448,9 @@ def __cuda_array_interface__(self) -> dict:
             "version": 0,
         }
 
-    def memoryview(self, *, offset: int = 0, size: int = None) -> memoryview:
+    def memoryview(
+        self, *, offset: int = 0, size: Optional[int] = None
+    ) -> memoryview:
         size = self._size if size is None else size
         with self.lock:
             if self.spillable:
@@ -573,7 +575,9 @@ def deserialize(cls, header: dict, frames: list):
         # copied.
         return SpillableBuffer.deserialize(header, frames)
 
-    def memoryview(self, *, offset: int = 0, size: int = None) -> memoryview:
+    def memoryview(
+        self, *, offset: int = 0, size: Optional[int] = None
+    ) -> memoryview:
         size = self._size if size is None else size
         return self._base.memoryview(offset=self._offset + offset, size=size)
 
diff --git a/python/cudf/cudf/core/buffer/utils.py b/python/cudf/cudf/core/buffer/utils.py
index 2fe332a12fe..85e4762641e 100644
--- a/python/cudf/cudf/core/buffer/utils.py
+++ b/python/cudf/cudf/core/buffer/utils.py
@@ -16,8 +16,8 @@
 def as_buffer(
     data: Union[int, Any],
     *,
-    size: int = None,
-    owner: object = None,
+    size: Optional[int] = None,
+    owner: Optional[object] = None,
     exposed: bool = False,
 ) -> Buffer:
     """Factory function to wrap `data` in a Buffer object.
diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index 52f7c0b957f..c6d7f779884 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -710,10 +710,10 @@ class CategoricalColumn(column.ColumnBase):
     def __init__(
         self,
         dtype: CategoricalDtype,
-        mask: Buffer = None,
-        size: int = None,
+        mask: Optional[Buffer] = None,
+        size: Optional[int] = None,
         offset: int = 0,
-        null_count: int = None,
+        null_count: Optional[int] = None,
         children: Tuple["column.ColumnBase", ...] = (),
     ):
 
@@ -889,7 +889,7 @@ def _fill(
         return result
 
     def slice(
-        self, start: int, stop: int, stride: int = None
+        self, start: int, stop: int, stride: Optional[int] = None
     ) -> "column.ColumnBase":
         codes = self.codes.slice(start, stop, stride)
         return cudf.core.column.build_categorical_column(
@@ -962,7 +962,9 @@ def __cuda_array_interface__(self) -> Mapping[str, Any]:
             " if you need this functionality."
         )
 
-    def to_pandas(self, index: pd.Index = None, **kwargs) -> pd.Series:
+    def to_pandas(
+        self, index: Optional[pd.Index] = None, **kwargs
+    ) -> pd.Series:
         if self.categories.dtype.kind == "f":
             new_mask = bools_to_mask(self.notnull())
             col = column.build_categorical_column(
@@ -1219,7 +1221,10 @@ def notnull(self) -> ColumnBase:
         return result
 
     def fillna(
-        self, fill_value: Any = None, method: Any = None, dtype: Dtype = None
+        self,
+        fill_value: Any = None,
+        method: Any = None,
+        dtype: Optional[Dtype] = None,
     ) -> CategoricalColumn:
         """
         Fill null values with *fill_value*
@@ -1237,7 +1242,7 @@ def fillna(
                     try:
                         fill_value = self._encode(fill_value)
                         fill_value = self.codes.dtype.type(fill_value)
-                    except (ValueError) as err:
+                    except ValueError as err:
                         err_msg = "fill value must be in categories"
                         raise ValueError(err_msg) from err
             else:
@@ -1641,7 +1646,7 @@ def _create_empty_categorical_column(
 
 
 def pandas_categorical_as_column(
-    categorical: ColumnLike, codes: ColumnLike = None
+    categorical: ColumnLike, codes: Optional[ColumnLike] = None
 ) -> CategoricalColumn:
     """Creates a CategoricalColumn from a pandas.Categorical
 
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 042a1060fae..6557001f884 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -201,7 +201,9 @@ def __repr__(self):
             f"dtype: {self.dtype}"
         )
 
-    def to_pandas(self, index: pd.Index = None, **kwargs) -> "pd.Series":
+    def to_pandas(
+        self, index: Optional[pd.Index] = None, **kwargs
+    ) -> "pd.Series":
         """Convert object to pandas type.
 
         The default implementation falls back to PyArrow for the conversion.
@@ -548,7 +550,9 @@ def element_indexing(self, index: int):
 
         return libcudf.copying.get_element(self, idx).value
 
-    def slice(self, start: int, stop: int, stride: int = None) -> ColumnBase:
+    def slice(
+        self, start: int, stop: int, stride: Optional[int] = None
+    ) -> ColumnBase:
         stride = 1 if stride is None else stride
         if start < 0:
             start = start + len(self)
@@ -699,8 +703,8 @@ def _check_scatter_key_length(
     def fillna(
         self: T,
         value: Any = None,
-        method: str = None,
-        dtype: Dtype = None,
+        method: Optional[str] = None,
+        dtype: Optional[Dtype] = None,
     ) -> T:
         """Fill null values with ``value``.
 
@@ -1097,7 +1101,6 @@ def apply_boolean_mask(self, mask) -> ColumnBase:
     def argsort(
         self, ascending: bool = True, na_position: str = "last"
     ) -> "cudf.core.column.NumericalColumn":
-
         return self.as_frame()._get_sorted_inds(
             ascending=ascending, na_position=na_position
         )
@@ -1244,14 +1247,19 @@ def normalize_binop_value(
     ) -> Union[ColumnBase, ScalarLike]:
         raise NotImplementedError
 
-    def _minmax(self, skipna: bool = None):
+    def _minmax(self, skipna: Optional[bool] = None):
         result_col = self._process_for_reduction(skipna=skipna)
         if isinstance(result_col, ColumnBase):
             return libcudf.reduce.minmax(result_col)
         return result_col
 
     def _reduce(
-        self, op: str, skipna: bool = None, min_count: int = 0, *args, **kwargs
+        self,
+        op: str,
+        skipna: Optional[bool] = None,
+        min_count: int = 0,
+        *args,
+        **kwargs,
     ) -> ScalarLike:
         """Compute {op} of column values.
 
@@ -1273,7 +1281,7 @@ def contains_na_entries(self) -> bool:
         return self.null_count != 0
 
     def _process_for_reduction(
-        self, skipna: bool = None, min_count: int = 0
+        self, skipna: Optional[bool] = None, min_count: int = 0
     ) -> Union[ColumnBase, ScalarLike]:
         skipna = True if skipna is None else skipna
 
@@ -1314,8 +1322,8 @@ def _with_type_metadata(self: ColumnBase, dtype: Dtype) -> ColumnBase:
     def _label_encoding(
         self,
         cats: ColumnBase,
-        dtype: Dtype = None,
-        na_sentinel: ScalarLike = None,
+        dtype: Optional[Dtype] = None,
+        na_sentinel: Optional[ScalarLike] = None,
     ):
         """
         Convert each value in `self` into an integer code, with `cats`
@@ -1389,9 +1397,9 @@ def _return_sentinel_column():
 
 def column_empty_like(
     column: ColumnBase,
-    dtype: Dtype = None,
+    dtype: Optional[Dtype] = None,
     masked: bool = False,
-    newsize: int = None,
+    newsize: Optional[int] = None,
 ) -> ColumnBase:
     """Allocate a new column like the given *column*"""
     if dtype is None:
@@ -1494,10 +1502,10 @@ def build_column(
     data: Union[Buffer, None],
     dtype: Dtype,
     *,
-    size: int = None,
-    mask: Buffer = None,
+    size: Optional[int] = None,
+    mask: Optional[Buffer] = None,
     offset: int = 0,
-    null_count: int = None,
+    null_count: Optional[int] = None,
     children: Tuple[ColumnBase, ...] = (),
 ) -> ColumnBase:
     """
@@ -1666,10 +1674,10 @@ def build_column(
 def build_categorical_column(
     categories: ColumnBase,
     codes: ColumnBase,
-    mask: Buffer = None,
-    size: int = None,
+    mask: Optional[Buffer] = None,
+    size: Optional[int] = None,
     offset: int = 0,
-    null_count: int = None,
+    null_count: Optional[int] = None,
     ordered: bool = False,
 ) -> "cudf.core.column.CategoricalColumn":
     """
@@ -1757,10 +1765,10 @@ def build_interval_column(
 def build_list_column(
     indices: ColumnBase,
     elements: ColumnBase,
-    mask: Buffer = None,
-    size: int = None,
+    mask: Optional[Buffer] = None,
+    size: Optional[int] = None,
     offset: int = 0,
-    null_count: int = None,
+    null_count: Optional[int] = None,
 ) -> "cudf.core.column.ListColumn":
     """
     Build a ListColumn
@@ -1803,10 +1811,10 @@ def build_struct_column(
     names: Sequence[str],
     children: Tuple[ColumnBase, ...],
     dtype: Optional[Dtype] = None,
-    mask: Buffer = None,
-    size: int = None,
+    mask: Optional[Buffer] = None,
+    size: Optional[int] = None,
     offset: int = 0,
-    null_count: int = None,
+    null_count: Optional[int] = None,
 ) -> "cudf.core.column.StructColumn":
     """
     Build a StructColumn
@@ -1863,9 +1871,9 @@ def _make_copy_replacing_NaT_with_null(column):
 
 def as_column(
     arbitrary: Any,
-    nan_as_null: bool = None,
-    dtype: Dtype = None,
-    length: int = None,
+    nan_as_null: Optional[bool] = None,
+    dtype: Optional[Dtype] = None,
+    length: Optional[int] = None,
 ):
     """Create a Column from an arbitrary object
 
@@ -2106,7 +2114,6 @@ def as_column(
 
             data = build_column(data=buffer, mask=mask, dtype=arbitrary.dtype)
         elif arb_dtype.kind == "m":
-
             time_unit = get_time_unit(arbitrary)
             cast_dtype = time_unit in ("D", "W", "M", "Y")
 
@@ -2466,7 +2473,7 @@ def deserialize_columns(headers: List[dict], frames: List) -> List[ColumnBase]:
 
 def arange(
     start: Union[int, float],
-    stop: Union[int, float] = None,
+    stop: Optional[Union[int, float]] = None,
     step: Union[int, float] = 1,
     dtype=None,
 ) -> cudf.core.column.NumericalColumn:
@@ -2524,7 +2531,9 @@ def arange(
     )
 
 
-def full(size: int, fill_value: ScalarLike, dtype: Dtype = None) -> ColumnBase:
+def full(
+    size: int, fill_value: ScalarLike, dtype: Optional[Dtype] = None
+) -> ColumnBase:
     """
     Returns a column of given size and dtype, filled with a given value.
 
diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py
index 4c65a631adc..c0a2a6ac546 100644
--- a/python/cudf/cudf/core/column/datetime.py
+++ b/python/cudf/cudf/core/column/datetime.py
@@ -6,7 +6,7 @@
 import locale
 import re
 from locale import nl_langinfo
-from typing import Any, Mapping, Sequence, cast
+from typing import Any, Mapping, Optional, Sequence, cast
 
 import numpy as np
 import pandas as pd
@@ -125,10 +125,10 @@ def __init__(
         self,
         data: Buffer,
         dtype: DtypeObj,
-        mask: Buffer = None,
-        size: int = None,  # TODO: make non-optional
+        mask: Optional[Buffer] = None,
+        size: Optional[int] = None,  # TODO: make non-optional
         offset: int = 0,
-        null_count: int = None,
+        null_count: Optional[int] = None,
     ):
         dtype = cudf.dtype(dtype)
 
@@ -202,7 +202,10 @@ def day_of_year(self) -> ColumnBase:
         return self.get_dt_field("day_of_year")
 
     def to_pandas(
-        self, index: pd.Index = None, nullable: bool = False, **kwargs
+        self,
+        index: Optional[pd.Index] = None,
+        nullable: bool = False,
+        **kwargs,
     ) -> "cudf.Series":
         # Workaround until following issue is fixed:
         # https://issues.apache.org/jira/browse/ARROW-9772
@@ -363,7 +366,7 @@ def mean(
 
     def std(
         self,
-        skipna: bool = None,
+        skipna: Optional[bool] = None,
         min_count: int = 0,
         dtype: Dtype = np.float64,
         ddof: int = 1,
@@ -375,7 +378,7 @@ def std(
             * _unit_to_nanoseconds_conversion[self.time_unit],
         )
 
-    def median(self, skipna: bool = None) -> pd.Timestamp:
+    def median(self, skipna: Optional[bool] = None) -> pd.Timestamp:
         return pd.Timestamp(
             self.as_numerical.median(skipna=skipna), unit=self.time_unit
         )
@@ -451,7 +454,10 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase:
         return libcudf.binaryop.binaryop(lhs, rhs, op, out_dtype)
 
     def fillna(
-        self, fill_value: Any = None, method: str = None, dtype: Dtype = None
+        self,
+        fill_value: Any = None,
+        method: Optional[str] = None,
+        dtype: Optional[Dtype] = None,
     ) -> DatetimeColumn:
         if fill_value is not None:
             if cudf.utils.utils._isnat(fill_value):
@@ -495,7 +501,6 @@ def isin(self, values: Sequence) -> ColumnBase:
 
     def can_cast_safely(self, to_dtype: Dtype) -> bool:
         if np.issubdtype(to_dtype, np.datetime64):
-
             to_res, _ = np.datetime_data(to_dtype)
             self_res, _ = np.datetime_data(self.dtype)
 
@@ -542,10 +547,10 @@ def __init__(
         self,
         data: Buffer,
         dtype: pd.DatetimeTZDtype,
-        mask: Buffer = None,
-        size: int = None,
+        mask: Optional[Buffer] = None,
+        size: Optional[int] = None,
         offset: int = 0,
-        null_count: int = None,
+        null_count: Optional[int] = None,
     ):
         super().__init__(
             data=data,
@@ -558,7 +563,10 @@ def __init__(
         self._dtype = dtype
 
     def to_pandas(
-        self, index: pd.Index = None, nullable: bool = False, **kwargs
+        self,
+        index: Optional[pd.Index] = None,
+        nullable: bool = False,
+        **kwargs,
     ) -> "cudf.Series":
         return self._local_time.to_pandas().dt.tz_localize(
             self.dtype.tz, ambiguous="NaT", nonexistent="NaT"
diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py
index 9fc7663ffca..420637c1924 100644
--- a/python/cudf/cudf/core/column/decimal.py
+++ b/python/cudf/cudf/core/column/decimal.py
@@ -2,7 +2,7 @@
 
 import warnings
 from decimal import Decimal
-from typing import Any, Sequence, Union, cast
+from typing import Any, Optional, Sequence, Union, cast
 
 import cupy as cp
 import numpy as np
@@ -103,7 +103,10 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str):
         return result
 
     def fillna(
-        self, value: Any = None, method: str = None, dtype: Dtype = None
+        self,
+        value: Any = None,
+        method: Optional[str] = None,
+        dtype: Optional[Dtype] = None,
     ):
         """Fill null values with ``value``.
 
diff --git a/python/cudf/cudf/core/column/interval.py b/python/cudf/cudf/core/column/interval.py
index 657403a6082..1b9caa42ecf 100644
--- a/python/cudf/cudf/core/column/interval.py
+++ b/python/cudf/cudf/core/column/interval.py
@@ -1,4 +1,6 @@
-# Copyright (c) 2018-2022, NVIDIA CORPORATION.
+# Copyright (c) 2018-2023, NVIDIA CORPORATION.
+from typing import Optional
+
 import pandas as pd
 import pyarrow as pa
 
@@ -124,7 +126,9 @@ def as_interval_column(self, dtype, **kwargs):
         else:
             raise ValueError("dtype must be IntervalDtype")
 
-    def to_pandas(self, index: pd.Index = None, **kwargs) -> "pd.Series":
+    def to_pandas(
+        self, index: Optional[pd.Index] = None, **kwargs
+    ) -> "pd.Series":
         # Note: This does not handle null values in the interval column.
         # However, this exact sequence (calling __from_arrow__ on the output of
         # self.to_arrow) is currently the best known way to convert interval
diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py
index 87e73d212ef..840858c4bdb 100644
--- a/python/cudf/cudf/core/column/numerical.py
+++ b/python/cudf/cudf/core/column/numerical.py
@@ -81,10 +81,10 @@ def __init__(
         self,
         data: Buffer,
         dtype: DtypeObj,
-        mask: Buffer = None,
-        size: int = None,  # TODO: make this non-optional
+        mask: Optional[Buffer] = None,
+        size: Optional[int] = None,  # TODO: make this non-optional
         offset: int = 0,
-        null_count: int = None,
+        null_count: Optional[int] = None,
     ):
         dtype = cudf.dtype(dtype)
 
@@ -428,11 +428,11 @@ def _process_values_for_isin(
 
         return lhs, rhs
 
-    def _can_return_nan(self, skipna: bool = None) -> bool:
+    def _can_return_nan(self, skipna: Optional[bool] = None) -> bool:
         return not skipna and self.has_nulls(include_nan=True)
 
     def _process_for_reduction(
-        self, skipna: bool = None, min_count: int = 0
+        self, skipna: Optional[bool] = None, min_count: int = 0
     ) -> Union[NumericalColumn, ScalarLike]:
         skipna = True if skipna is None else skipna
 
@@ -516,8 +516,8 @@ def find_and_replace(
     def fillna(
         self,
         fill_value: Any = None,
-        method: str = None,
-        dtype: Dtype = None,
+        method: Optional[str] = None,
+        dtype: Optional[Dtype] = None,
         fill_nan: bool = True,
     ) -> NumericalColumn:
         """
@@ -684,7 +684,6 @@ def can_cast_safely(self, to_dtype: DtypeObj) -> bool:
             ):
                 return True
             else:
-
                 filled = self.fillna(0)
                 return (
                     cudf.Series(filled).astype(to_dtype).astype(filled.dtype)
@@ -720,7 +719,10 @@ def _with_type_metadata(self: ColumnBase, dtype: Dtype) -> ColumnBase:
         return self
 
     def to_pandas(
-        self, index: pd.Index = None, nullable: bool = False, **kwargs
+        self,
+        index: Optional[pd.Index] = None,
+        nullable: bool = False,
+        **kwargs,
     ) -> "pd.Series":
         if nullable and self.dtype in np_dtypes_to_pandas_dtypes:
             pandas_nullable_dtype = np_dtypes_to_pandas_dtypes[self.dtype]
diff --git a/python/cudf/cudf/core/column/numerical_base.py b/python/cudf/cudf/core/column/numerical_base.py
index bb7711a3ead..08c2f7cc7b1 100644
--- a/python/cudf/cudf/core/column/numerical_base.py
+++ b/python/cudf/cudf/core/column/numerical_base.py
@@ -1,9 +1,9 @@
-# Copyright (c) 2018-2022, NVIDIA CORPORATION.
+# Copyright (c) 2018-2023, NVIDIA CORPORATION.
 """Define an interface for columns that can perform numerical operations."""
 
 from __future__ import annotations
 
-from typing import cast
+from typing import Optional, cast
 
 import numpy as np
 
@@ -40,10 +40,10 @@ class NumericalBaseColumn(ColumnBase, Scannable):
         "cummax",
     }
 
-    def _can_return_nan(self, skipna: bool = None) -> bool:
+    def _can_return_nan(self, skipna: Optional[bool] = None) -> bool:
         return not skipna and self.has_nulls()
 
-    def kurtosis(self, skipna: bool = None) -> float:
+    def kurtosis(self, skipna: Optional[bool] = None) -> float:
         skipna = True if skipna is None else skipna
 
         if len(self) == 0 or self._can_return_nan(skipna=skipna):
@@ -68,7 +68,7 @@ def kurtosis(self, skipna: bool = None) -> float:
         kurt = term_one_section_one * term_one_section_two - 3 * term_two
         return kurt
 
-    def skew(self, skipna: bool = None) -> ScalarLike:
+    def skew(self, skipna: Optional[bool] = None) -> ScalarLike:
         skipna = True if skipna is None else skipna
 
         if len(self) == 0 or self._can_return_nan(skipna=skipna):
@@ -122,26 +122,39 @@ def quantile(
             )
         return result
 
-    def mean(self, skipna: bool = None, min_count: int = 0, dtype=np.float64):
+    def mean(
+        self,
+        skipna: Optional[bool] = None,
+        min_count: int = 0,
+        dtype=np.float64,
+    ):
         return self._reduce(
             "mean", skipna=skipna, min_count=min_count, dtype=dtype
         )
 
     def var(
-        self, skipna: bool = None, min_count: int = 0, dtype=np.float64, ddof=1
+        self,
+        skipna: Optional[bool] = None,
+        min_count: int = 0,
+        dtype=np.float64,
+        ddof=1,
     ):
         return self._reduce(
             "var", skipna=skipna, min_count=min_count, dtype=dtype, ddof=ddof
         )
 
     def std(
-        self, skipna: bool = None, min_count: int = 0, dtype=np.float64, ddof=1
+        self,
+        skipna: Optional[bool] = None,
+        min_count: int = 0,
+        dtype=np.float64,
+        ddof=1,
     ):
         return self._reduce(
             "std", skipna=skipna, min_count=min_count, dtype=dtype, ddof=ddof
         )
 
-    def median(self, skipna: bool = None) -> NumericalBaseColumn:
+    def median(self, skipna: Optional[bool] = None) -> NumericalBaseColumn:
         skipna = True if skipna is None else skipna
 
         if self._can_return_nan(skipna=skipna):
diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index 1a09fc0b985..8e83d0c72b6 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -259,12 +259,14 @@ def byte_count(self) -> SeriesOrIndex:
         )
 
     @overload
-    def cat(self, sep: str = None, na_rep: str = None) -> str:
+    def cat(
+        self, sep: Optional[str] = None, na_rep: Optional[str] = None
+    ) -> str:
         ...
 
     @overload
     def cat(
-        self, others, sep: str = None, na_rep: str = None
+        self, others, sep: Optional[str] = None, na_rep: Optional[str] = None
     ) -> Union[SeriesOrIndex, "cudf.core.column.string.StringColumn"]:
         ...
 
@@ -792,7 +794,7 @@ def contains(
             result_col = libstrings.contains_multiple(input_column, pat)
         return self._return_or_inplace(result_col)
 
-    def like(self, pat: str, esc: str = None) -> SeriesOrIndex:
+    def like(self, pat: str, esc: Optional[str] = None) -> SeriesOrIndex:
         """
         Test if a like pattern matches a string of a Series or Index.
 
@@ -1072,7 +1074,10 @@ def replace_with_backrefs(self, pat: str, repl: str) -> SeriesOrIndex:
         )
 
     def slice(
-        self, start: int = None, stop: int = None, step: int = None
+        self,
+        start: Optional[int] = None,
+        stop: Optional[int] = None,
+        step: Optional[int] = None,
     ) -> SeriesOrIndex:
         """
         Slice substrings from each element in the Series or Index.
@@ -2047,7 +2052,7 @@ def istitle(self) -> SeriesOrIndex:
         return self._return_or_inplace(libstrings.is_title(self._column))
 
     def filter_alphanum(
-        self, repl: str = None, keep: bool = True
+        self, repl: Optional[str] = None, keep: bool = True
     ) -> SeriesOrIndex:
         """
         Remove non-alphanumeric characters from strings in this column.
@@ -2133,7 +2138,10 @@ def slice_from(
         )
 
     def slice_replace(
-        self, start: int = None, stop: int = None, repl: str = None
+        self,
+        start: Optional[int] = None,
+        stop: Optional[int] = None,
+        repl: Optional[str] = None,
     ) -> SeriesOrIndex:
         """
         Replace the specified section of each string with a new string.
@@ -2221,7 +2229,9 @@ def slice_replace(
             ),
         )
 
-    def insert(self, start: int = 0, repl: str = None) -> SeriesOrIndex:
+    def insert(
+        self, start: int = 0, repl: Optional[str] = None
+    ) -> SeriesOrIndex:
         """
         Insert the specified string into each string in the specified
         position.
@@ -2401,10 +2411,10 @@ def get_json_object(
 
     def split(
         self,
-        pat: str = None,
+        pat: Optional[str] = None,
         n: int = -1,
         expand: bool = False,
-        regex: bool = None,
+        regex: Optional[bool] = None,
     ) -> SeriesOrIndex:
         """
         Split strings around given separator/delimiter.
@@ -2569,10 +2579,10 @@ def split(
 
     def rsplit(
         self,
-        pat: str = None,
+        pat: Optional[str] = None,
         n: int = -1,
         expand: bool = False,
-        regex: bool = None,
+        regex: Optional[bool] = None,
     ) -> SeriesOrIndex:
         """
         Split strings around given separator/delimiter.
@@ -3221,7 +3231,7 @@ def rjust(self, width: int, fillchar: str = " ") -> SeriesOrIndex:
             libstrings.rjust(self._column, width, fillchar)
         )
 
-    def strip(self, to_strip: str = None) -> SeriesOrIndex:
+    def strip(self, to_strip: Optional[str] = None) -> SeriesOrIndex:
         r"""
         Remove leading and trailing characters.
 
@@ -3280,7 +3290,7 @@ def strip(self, to_strip: str = None) -> SeriesOrIndex:
             libstrings.strip(self._column, cudf.Scalar(to_strip, "str"))
         )
 
-    def lstrip(self, to_strip: str = None) -> SeriesOrIndex:
+    def lstrip(self, to_strip: Optional[str] = None) -> SeriesOrIndex:
         r"""
         Remove leading and trailing characters.
 
@@ -3327,7 +3337,7 @@ def lstrip(self, to_strip: str = None) -> SeriesOrIndex:
             libstrings.lstrip(self._column, cudf.Scalar(to_strip, "str"))
         )
 
-    def rstrip(self, to_strip: str = None) -> SeriesOrIndex:
+    def rstrip(self, to_strip: Optional[str] = None) -> SeriesOrIndex:
         r"""
         Remove leading and trailing characters.
 
@@ -3980,7 +3990,9 @@ def removeprefix(self, prefix: str) -> SeriesOrIndex:
         )
         return self._return_or_inplace(result)
 
-    def find(self, sub: str, start: int = 0, end: int = None) -> SeriesOrIndex:
+    def find(
+        self, sub: str, start: int = 0, end: Optional[int] = None
+    ) -> SeriesOrIndex:
         """
         Return lowest indexes in each strings in the Series/Index
         where the substring is fully contained between ``[start:end]``.
@@ -4036,7 +4048,7 @@ def find(self, sub: str, start: int = 0, end: int = None) -> SeriesOrIndex:
         return self._return_or_inplace(result_col)
 
     def rfind(
-        self, sub: str, start: int = 0, end: int = None
+        self, sub: str, start: int = 0, end: Optional[int] = None
     ) -> SeriesOrIndex:
         """
         Return highest indexes in each strings in the Series/Index
@@ -4097,7 +4109,7 @@ def rfind(
         return self._return_or_inplace(result_col)
 
     def index(
-        self, sub: str, start: int = 0, end: int = None
+        self, sub: str, start: int = 0, end: Optional[int] = None
     ) -> SeriesOrIndex:
         """
         Return lowest indexes in each strings where the substring
@@ -4159,7 +4171,7 @@ def index(
             return result
 
     def rindex(
-        self, sub: str, start: int = 0, end: int = None
+        self, sub: str, start: int = 0, end: Optional[int] = None
     ) -> SeriesOrIndex:
         """
         Return highest indexes in each strings where the substring
@@ -4426,7 +4438,7 @@ def translate(self, table: dict) -> SeriesOrIndex:
         )
 
     def filter_characters(
-        self, table: dict, keep: bool = True, repl: str = None
+        self, table: dict, keep: bool = True, repl: Optional[str] = None
     ) -> SeriesOrIndex:
         """
         Remove characters from each string using the character ranges
@@ -4877,7 +4889,7 @@ def ngrams_tokenize(
         )
 
     def replace_tokens(
-        self, targets, replacements, delimiter: str = None
+        self, targets, replacements, delimiter: Optional[str] = None
     ) -> SeriesOrIndex:
         """
         The targets tokens are searched for within each string in the series
@@ -4962,8 +4974,8 @@ def replace_tokens(
     def filter_tokens(
         self,
         min_token_length: int,
-        replacement: str = None,
-        delimiter: str = None,
+        replacement: Optional[str] = None,
+        delimiter: Optional[str] = None,
     ) -> SeriesOrIndex:
         """
         Remove tokens from within each string in the series that are
@@ -5351,10 +5363,10 @@ class StringColumn(column.ColumnBase):
 
     def __init__(
         self,
-        mask: Buffer = None,
-        size: int = None,  # TODO: make non-optional
+        mask: Optional[Buffer] = None,
+        size: Optional[int] = None,  # TODO: make non-optional
         offset: int = 0,
-        null_count: int = None,
+        null_count: Optional[int] = None,
         children: Tuple["column.ColumnBase", ...] = (),
     ):
         dtype = cudf.api.types.dtype("object")
@@ -5484,8 +5496,8 @@ def to_arrow(self) -> pa.Array:
 
     def sum(
         self,
-        skipna: bool = None,
-        dtype: Dtype = None,
+        skipna: Optional[bool] = None,
+        dtype: Optional[Dtype] = None,
         min_count: int = 0,
     ):
         result_col = self._process_for_reduction(
@@ -5616,7 +5628,10 @@ def values(self) -> cupy.ndarray:
         raise TypeError("String Arrays is not yet implemented in cudf")
 
     def to_pandas(
-        self, index: pd.Index = None, nullable: bool = False, **kwargs
+        self,
+        index: Optional[pd.Index] = None,
+        nullable: bool = False,
+        **kwargs,
     ) -> "pd.Series":
         if nullable:
             pandas_array = pd.StringDtype().__from_arrow__(self.to_arrow())
@@ -5686,8 +5701,8 @@ def find_and_replace(
     def fillna(
         self,
         fill_value: Any = None,
-        method: str = None,
-        dtype: Dtype = None,
+        method: Optional[str] = None,
+        dtype: Optional[Dtype] = None,
     ) -> StringColumn:
         if fill_value is not None:
             if not is_scalar(fill_value):
@@ -5835,7 +5850,6 @@ def view(self, dtype) -> "cudf.core.column.ColumnBase":
 
 
 def _get_cols_list(parent_obj, others):
-
     parent_index = (
         parent_obj.index if isinstance(parent_obj, cudf.Series) else parent_obj
     )
diff --git a/python/cudf/cudf/core/column/struct.py b/python/cudf/cudf/core/column/struct.py
index 6838d711641..6306bd1f32d 100644
--- a/python/cudf/cudf/core/column/struct.py
+++ b/python/cudf/cudf/core/column/struct.py
@@ -2,6 +2,7 @@
 from __future__ import annotations
 
 from functools import cached_property
+from typing import Optional
 
 import pandas as pd
 import pyarrow as pa
@@ -57,7 +58,9 @@ def to_arrow(self):
             pa_type, len(self), buffers, children=children
         )
 
-    def to_pandas(self, index: pd.Index = None, **kwargs) -> "pd.Series":
+    def to_pandas(
+        self, index: Optional[pd.Index] = None, **kwargs
+    ) -> "pd.Series":
         # We cannot go via Arrow's `to_pandas` because of the following issue:
         # https://issues.apache.org/jira/browse/ARROW-12680
 
diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py
index e7979fa4d27..e1d913742ec 100644
--- a/python/cudf/cudf/core/column/timedelta.py
+++ b/python/cudf/cudf/core/column/timedelta.py
@@ -3,7 +3,7 @@
 from __future__ import annotations
 
 import datetime
-from typing import Any, Sequence, cast
+from typing import Any, Optional, Sequence, cast
 
 import numpy as np
 import pandas as pd
@@ -80,10 +80,10 @@ def __init__(
         self,
         data: Buffer,
         dtype: Dtype,
-        size: int = None,  # TODO: make non-optional
-        mask: Buffer = None,
+        size: Optional[int] = None,  # TODO: make non-optional
+        mask: Optional[Buffer] = None,
         offset: int = 0,
-        null_count: int = None,
+        null_count: Optional[int] = None,
     ):
         dtype = cudf.dtype(dtype)
 
@@ -251,7 +251,10 @@ def time_unit(self) -> str:
         return self._time_unit
 
     def fillna(
-        self, fill_value: Any = None, method: str = None, dtype: Dtype = None
+        self,
+        fill_value: Any = None,
+        method: Optional[str] = None,
+        dtype: Optional[Dtype] = None,
     ) -> TimeDeltaColumn:
         if fill_value is not None:
             if cudf.utils.utils._isnat(fill_value):
@@ -313,7 +316,7 @@ def mean(self, skipna=None, dtype: Dtype = np.float64) -> pd.Timedelta:
             unit=self.time_unit,
         )
 
-    def median(self, skipna: bool = None) -> pd.Timedelta:
+    def median(self, skipna: Optional[bool] = None) -> pd.Timedelta:
         return pd.Timedelta(
             self.as_numerical.median(skipna=skipna), unit=self.time_unit
         )
@@ -340,9 +343,9 @@ def quantile(
 
     def sum(
         self,
-        skipna: bool = None,
+        skipna: Optional[bool] = None,
         min_count: int = 0,
-        dtype: Dtype = None,
+        dtype: Optional[Dtype] = None,
     ) -> pd.Timedelta:
         return pd.Timedelta(
             # Since sum isn't overridden in Numerical[Base]Column, mypy only
@@ -356,7 +359,7 @@ def sum(
 
     def std(
         self,
-        skipna: bool = None,
+        skipna: Optional[bool] = None,
         min_count: int = 0,
         dtype: Dtype = np.float64,
         ddof: int = 1,
diff --git a/python/cudf/cudf/core/column_accessor.py b/python/cudf/cudf/core/column_accessor.py
index 707eda3f5e6..832d5acf2de 100644
--- a/python/cudf/cudf/core/column_accessor.py
+++ b/python/cudf/cudf/core/column_accessor.py
@@ -101,7 +101,7 @@ class ColumnAccessor(abc.MutableMapping):
 
     def __init__(
         self,
-        data: Union[abc.MutableMapping, ColumnAccessor] = None,
+        data: Union[abc.MutableMapping, ColumnAccessor, None] = None,
         multiindex: bool = False,
         level_names=None,
     ):
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 8c8f0119b3f..79e97fa6455 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -1988,8 +1988,8 @@ def from_dict(
         cls,
         data: dict,
         orient: str = "columns",
-        dtype: Dtype = None,
-        columns: list = None,
+        dtype: Optional[Dtype] = None,
+        columns: Optional[list] = None,
     ) -> DataFrame:
         """
         Construct DataFrame from dict of array-like or dicts.
diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index 281290e1788..0ec06f8d81f 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -2601,7 +2601,6 @@ def __init__(
         copy=False,
         name=None,
     ):
-
         if freq is not None:
             raise NotImplementedError("freq is not yet supported")
 
diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py
index 4a9bc89fa34..edabdb34435 100644
--- a/python/cudf/cudf/core/multiindex.py
+++ b/python/cudf/cudf/core/multiindex.py
@@ -101,7 +101,6 @@ def __init__(
         name=None,
         **kwargs,
     ):
-
         if sortorder is not None:
             raise NotImplementedError("sortorder is not yet supported")
         if name is not None:
@@ -811,7 +810,6 @@ def _get_valid_indices_by_tuple(self, index, row_tuple, max_length):
 
     @_cudf_nvtx_annotate
     def _index_and_downcast(self, result, index, index_key):
-
         if isinstance(index_key, (numbers.Number, slice)):
             index_key = [index_key]
         if (
@@ -1069,7 +1067,6 @@ def _is_interval(self):
     @classmethod
     @_cudf_nvtx_annotate
     def _concat(cls, objs):
-
         source_data = [o.to_frame(index=False) for o in objs]
 
         # TODO: Verify if this is really necessary or if we can rely on
diff --git a/python/cudf/cudf/core/single_column_frame.py b/python/cudf/cudf/core/single_column_frame.py
index c4128621148..037ac9c378e 100644
--- a/python/cudf/cudf/core/single_column_frame.py
+++ b/python/cudf/cudf/core/single_column_frame.py
@@ -140,7 +140,6 @@ def to_numpy(
         return super().to_numpy(dtype, copy, na_value).flatten()
 
     def tolist(self):  # noqa: D102
-
         raise TypeError(
             "cuDF does not support conversion to host memory "
             "via the `tolist()` method. Consider using "