From 30169059560e6e849695cbcb049b713ff976dca5 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Tue, 12 Dec 2023 15:50:41 -0800
Subject: [PATCH 1/9] Refactor scalar handling in as_column

---
 python/cudf/cudf/core/column/column.py | 28 ++++++++------------------
 python/cudf/cudf/tests/test_column.py  |  7 +++++++
 2 files changed, 15 insertions(+), 20 deletions(-)

diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 430b71989d6..5db7a59a238 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -2171,33 +2171,23 @@ def as_column(
         if dtype is not None:
             data = data.astype(dtype)
 
-    elif isinstance(arbitrary, (pd.Timestamp, pd.Timedelta)):
-        # This will always treat NaTs as nulls since it's not technically a
-        # discrete value like NaN
-        length = length or 1
-        data = as_column(
-            pa.array(pd.Series([arbitrary] * length), from_pandas=True)
-        )
-        if dtype is not None:
-            data = data.astype(dtype)
-
-    elif np.isscalar(arbitrary) and not isinstance(arbitrary, memoryview):
+    elif is_scalar(arbitrary) and not isinstance(arbitrary, memoryview):
         length = length or 1
         if (
-            (nan_as_null is True)
+            nan_as_null is True
             and isinstance(arbitrary, (np.floating, float))
             and np.isnan(arbitrary)
         ):
             arbitrary = None
             if dtype is None:
                 dtype = cudf.dtype("float64")
+        elif arbitrary is None and dtype is None:
+            dtype = cudf.dtype("object")
+        arbitrary = cudf.Scalar(arbitrary, dtype=dtype)
+        data = ColumnBase.from_scalar(arbitrary, length)
 
-        data = as_column(full(length, arbitrary, dtype=dtype))
-        if not nan_as_null and not is_decimal_dtype(data.dtype):
-            if np.issubdtype(data.dtype, np.floating):
-                data = data.fillna(np.nan)
-            elif np.issubdtype(data.dtype, np.datetime64):
-                data = data.fillna(np.datetime64("NaT"))
+        if dtype is not None:
+            data = data.astype(dtype)
 
     elif hasattr(arbitrary, "__array_interface__"):
         # CUDF assumes values are always contiguous
@@ -2315,8 +2305,6 @@ def as_column(
         data = as_column(
             np.asarray(arbitrary), dtype=dtype, nan_as_null=nan_as_null
         )
-    elif isinstance(arbitrary, cudf.Scalar):
-        data = ColumnBase.from_scalar(arbitrary, length if length else 1)
     else:
         try:
             data = as_column(
diff --git a/python/cudf/cudf/tests/test_column.py b/python/cudf/cudf/tests/test_column.py
index 0546638f388..2b09054ea16 100644
--- a/python/cudf/cudf/tests/test_column.py
+++ b/python/cudf/cudf/tests/test_column.py
@@ -552,3 +552,10 @@ def test_astype_with_aliases(alias, expect_dtype, data):
     gd_data = cudf.Series.from_pandas(pd_data)
 
     assert_eq(pd_data.astype(expect_dtype), gd_data.astype(alias))
+
+
+def test_as_column_None_as_object_type():
+    result = as_column(None, length=1)
+    assert isinstance(result, cudf.core.column.string.StringColumn)
+    assert len(result) == 1
+    assert result.null_count == 1

From 6d2a3108b218f7c69e248b49c5d38a30b6c5835a Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Wed, 13 Dec 2023 13:49:48 -0800
Subject: [PATCH 2/9] Add exception for Interval

---
 python/cudf/cudf/core/column/column.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 5db7a59a238..b4e0b70a47e 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -2173,6 +2173,14 @@ def as_column(
 
     elif is_scalar(arbitrary) and not isinstance(arbitrary, memoryview):
         length = length or 1
+        if isinstance(arbitrary, pd.Interval):
+            # No cudf.Scalar support yet
+            return as_column(
+                pd.Series([arbitrary] * length),
+                nan_as_null=nan_as_null,
+                dtype=dtype,
+                length=length,
+            )
         if (
             nan_as_null is True
             and isinstance(arbitrary, (np.floating, float))

From 9b701bfc8cbd0ce5c96db56e401319954af4a757 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Mon, 18 Dec 2023 16:33:03 -0800
Subject: [PATCH 3/9] lint

---
 python/cudf/cudf/core/column/column.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index b6797f1e6f2..de9d08ff92d 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -55,7 +55,6 @@
     is_categorical_dtype,
     is_datetime64_dtype,
     is_datetime64tz_dtype,
-    is_decimal_dtype,
     is_dtype_equal,
     is_integer_dtype,
     is_interval_dtype,

From 5e7cb98a0930763787f49bb5b50fc441443d23b0 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Mon, 18 Dec 2023 16:57:51 -0800
Subject: [PATCH 4/9] Add return

---
 python/cudf/cudf/core/column/column.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index de9d08ff92d..f998ac82d46 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -2184,6 +2184,7 @@ def as_column(
 
         if dtype is not None:
             data = data.astype(dtype)
+        return data
 
     elif hasattr(arbitrary, "__array_interface__"):
         # CUDF assumes values are always contiguous

From 3c87fa88e23fd0ef9af9ed83d399ba0e8451a201 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Tue, 2 Jan 2024 17:59:29 -0800
Subject: [PATCH 5/9] Use as_column instead of full

---
 python/cudf/cudf/core/column/__init__.py    |  3 +-
 python/cudf/cudf/core/column/categorical.py | 14 +++---
 python/cudf/cudf/core/column/column.py      | 56 ++++-----------------
 python/cudf/cudf/core/column/decimal.py     |  6 +--
 python/cudf/cudf/core/column/numerical.py   |  5 +-
 python/cudf/cudf/core/column/string.py      | 14 ++++--
 python/cudf/cudf/core/dataframe.py          | 24 +++++----
 python/cudf/cudf/core/indexed_frame.py      | 16 +++---
 python/cudf/cudf/core/multiindex.py         |  4 +-
 python/cudf/cudf/core/series.py             |  7 ++-
 python/cudf/cudf/core/tools/datetimes.py    |  6 +--
 python/cudf/cudf/core/window/rolling.py     |  6 +--
 python/cudf/cudf/io/parquet.py              | 16 +++---
 python/cudf/cudf/tests/test_column.py       |  2 +-
 python/cudf/cudf/tests/test_testing.py      |  8 +--
 python/cudf/cudf/utils/utils.py             |  6 +--
 python/dask_cudf/dask_cudf/backends.py      |  8 +--
 17 files changed, 84 insertions(+), 117 deletions(-)

diff --git a/python/cudf/cudf/core/column/__init__.py b/python/cudf/cudf/core/column/__init__.py
index aba4ded4f9d..c0bca2c3c95 100644
--- a/python/cudf/cudf/core/column/__init__.py
+++ b/python/cudf/cudf/core/column/__init__.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
 """
 isort: skip_file
@@ -19,7 +19,6 @@
     column_empty_like_same_mask,
     concat_columns,
     deserialize_columns,
-    full,
     serialize_columns,
 )
 from cudf.core.column.datetime import DatetimeColumn  # noqa: F401
diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index 7036a9ee870..569a6118abf 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2023, NVIDIA CORPORATION.
+# Copyright (c) 2018-2024, NVIDIA CORPORATION.
 
 from __future__ import annotations
 
@@ -937,8 +937,8 @@ def normalize_binop_value(self, other: ScalarLike) -> CategoricalColumn:
                 )
             return other
 
-        ary = column.full(
-            len(self), self._encode(other), dtype=self.codes.dtype
+        ary = column.as_column(
+            self._encode(other), length=len(self), dtype=self.codes.dtype
         )
         return column.build_categorical_column(
             categories=self.dtype.categories._values,
@@ -1618,11 +1618,9 @@ def _create_empty_categorical_column(
     return column.build_categorical_column(
         categories=column.as_column(dtype.categories),
         codes=column.as_column(
-            column.full(
-                categorical_column.size,
-                _DEFAULT_CATEGORICAL_VALUE,
-                categorical_column.codes.dtype,
-            )
+            _DEFAULT_CATEGORICAL_VALUE,
+            length=categorical_column.size,
+            dtype=categorical_column.codes.dtype,
         ),
         offset=categorical_column.offset,
         size=categorical_column.size,
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index f998ac82d46..b06ec30dd79 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2023, NVIDIA CORPORATION.
+# Copyright (c) 2018-2024, NVIDIA CORPORATION.
 
 from __future__ import annotations
 
@@ -877,7 +877,7 @@ def isin(self, values: Sequence) -> ColumnBase:
         except ValueError:
             # pandas functionally returns all False when cleansing via
             # typecasting fails
-            return full(len(self), False, dtype="bool")
+            return as_column(False, length=len(self), dtype="bool")
 
         return lhs._obtain_isin_result(rhs)
 
@@ -904,9 +904,9 @@ def _isin_earlystop(self, rhs: ColumnBase) -> Union[ColumnBase, None]:
             if self.null_count and rhs.null_count:
                 return self.isnull()
             else:
-                return cudf.core.column.full(len(self), False, dtype="bool")
+                return as_column(False, length=len(self), dtype="bool")
         elif self.null_count == 0 and (rhs.null_count == len(rhs)):
-            return cudf.core.column.full(len(self), False, dtype="bool")
+            return as_column(False, length=len(self), dtype="bool")
         else:
             return None
 
@@ -1388,9 +1388,7 @@ def _label_encoding(
             na_sentinel = cudf.Scalar(-1)
 
         def _return_sentinel_column():
-            return cudf.core.column.full(
-                size=len(self), fill_value=na_sentinel, dtype=dtype
-            )
+            return as_column(na_sentinel, dtype=dtype, length=len(self))
 
         if dtype is None:
             dtype = min_scalar_type(max(len(cats), na_sentinel), 8)
@@ -1485,7 +1483,9 @@ def column_empty(
     elif isinstance(dtype, ListDtype):
         data = None
         children = (
-            full(row_count + 1, 0, dtype=libcudf.types.size_type_dtype),
+            as_column(
+                0, length=row_count + 1, dtype=libcudf.types.size_type_dtype
+            ),
             column_empty(row_count, dtype=dtype.element_type),
         )
     elif isinstance(dtype, CategoricalDtype):
@@ -1504,7 +1504,9 @@ def column_empty(
     elif dtype.kind in "OU" and not isinstance(dtype, DecimalDtype):
         data = None
         children = (
-            full(row_count + 1, 0, dtype=libcudf.types.size_type_dtype),
+            as_column(
+                0, length=row_count + 1, dtype=libcudf.types.size_type_dtype
+            ),
             build_column(
                 data=as_buffer(
                     rmm.DeviceBuffer(
@@ -2694,42 +2696,6 @@ def arange(
     )
 
 
-def full(
-    size: int, fill_value: ScalarLike, dtype: Optional[Dtype] = None
-) -> ColumnBase:
-    """
-    Returns a column of given size and dtype, filled with a given value.
-
-    Parameters
-    ----------
-    size : int
-        size of the expected column.
-    fill_value : scalar
-         A scalar value to fill a new array.
-    dtype : default None
-        Data type specifier. It is inferred from other arguments by default.
-
-    Returns
-    -------
-    Column
-
-    Examples
-    --------
-    >>> import cudf
-    >>> col = cudf.core.column.full(size=5, fill_value=7, dtype='int8')
-    >>> col
-    <cudf.core.column.numerical.NumericalColumn object at 0x7fa0912e8b90>
-    >>> cudf.Series(col)
-    0    7
-    1    7
-    2    7
-    3    7
-    4    7
-    dtype: int8
-    """
-    return ColumnBase.from_scalar(cudf.Scalar(fill_value, dtype), size)
-
-
 def concat_columns(objs: "MutableSequence[ColumnBase]") -> ColumnBase:
     """Concatenate a sequence of columns."""
     if len(objs) == 0:
diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py
index 02e03f92745..4c01c8a8d15 100644
--- a/python/cudf/cudf/core/column/decimal.py
+++ b/python/cudf/cudf/core/column/decimal.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021-2023, NVIDIA CORPORATION.
+# Copyright (c) 2021-2024, NVIDIA CORPORATION.
 
 import warnings
 from decimal import Decimal
@@ -65,8 +65,8 @@ def as_string_column(
     def __pow__(self, other):
         if isinstance(other, int):
             if other == 0:
-                res = cudf.core.column.full(
-                    size=len(self), fill_value=1, dtype=self.dtype
+                res = cudf.core.column.as_column(
+                    1, dtype=self.dtype, length=len(self)
                 )
                 if self.nullable:
                     res = res.set_mask(self.mask)
diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py
index f40886bf153..9493368f625 100644
--- a/python/cudf/cudf/core/column/numerical.py
+++ b/python/cudf/cudf/core/column/numerical.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2023, NVIDIA CORPORATION.
+# Copyright (c) 2018-2024, NVIDIA CORPORATION.
 
 from __future__ import annotations
 
@@ -41,7 +41,6 @@
     as_column,
     build_column,
     column,
-    full,
     string,
 )
 from cudf.core.dtypes import CategoricalDtype
@@ -506,7 +505,7 @@ def find_and_replace(
             )
         if len(replacement_col) == 1 and len(to_replace_col) > 1:
             replacement_col = column.as_column(
-                full(len(to_replace_col), replacement[0], self.dtype)
+                replacement[0], length=len(to_replace_col), dtype=self.dtype
             )
         elif len(replacement_col) == 1 and len(to_replace_col) == 0:
             return self.copy()
diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index 7bf81f3e2d3..c7e07a621e4 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2023, NVIDIA CORPORATION.
+# Copyright (c) 2019-2024, NVIDIA CORPORATION.
 
 from __future__ import annotations
 
@@ -5488,7 +5488,9 @@ def __init__(
 
         if len(children) == 0 and size != 0:
             # all nulls-column:
-            offsets = column.full(size + 1, 0, dtype=size_type_dtype)
+            offsets = column.as_column(
+                0, length=size + 1, dtype=size_type_dtype
+            )
 
             chars = cudf.core.column.column_empty(0, dtype="int8")
             children = (offsets, chars)
@@ -5885,8 +5887,8 @@ def _binaryop(
                     "__eq__",
                     "__ne__",
                 }:
-                    return column.full(
-                        len(self), op == "__ne__", dtype="bool"
+                    return column.as_column(
+                        op == "__ne__", length=len(self), dtype="bool"
                     ).set_mask(self.mask)
                 else:
                     return NotImplemented
@@ -5895,7 +5897,9 @@ def _binaryop(
                 if isinstance(other, cudf.Scalar):
                     other = cast(
                         StringColumn,
-                        column.full(len(self), other, dtype="object"),
+                        column.as_column(
+                            other, length=len(self), dtype="object"
+                        ),
                     )
 
                 # Explicit types are necessary because mypy infers ColumnBase
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 01935fec8c3..e5251341680 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2023, NVIDIA CORPORATION.
+# Copyright (c) 2018-2024, NVIDIA CORPORATION.
 
 from __future__ import annotations
 
@@ -1388,7 +1388,7 @@ def __setitem__(self, arg, value):
                             allow_non_unique=True,
                         )
                     if is_scalar(value):
-                        self._data[arg] = column.full(len(self), value)
+                        self._data[arg] = as_column(value, length=len(self))
                     else:
                         value = as_column(value)
                         self._data[arg] = value
@@ -1436,8 +1436,8 @@ def __setitem__(self, arg, value):
                 else:
                     for col in arg:
                         if is_scalar(value):
-                            self._data[col] = column.full(
-                                size=len(self), fill_value=value
+                            self._data[col] = as_column(
+                                value, length=len(self)
                             )
                         else:
                             self._data[col] = column.as_column(value)
@@ -3165,10 +3165,12 @@ def _insert(self, loc, name, value, nan_as_null=None, ignore_index=True):
             )
 
         if _is_scalar_or_zero_d_array(value):
-            value = column.full(
-                len(self),
+            value = as_column(
                 value,
-                "str" if libcudf.scalar._is_null_host_scalar(value) else None,
+                length=len(self),
+                dtype="str"
+                if libcudf.scalar._is_null_host_scalar(value)
+                else None,
             )
 
         if len(self) == 0:
@@ -5830,7 +5832,7 @@ def isin(self, values):
         fill_value = cudf.Scalar(False)
 
         def make_false_column_like_self():
-            return column.full(len(self), fill_value, "bool")
+            return column.as_column(fill_value, length=len(self), dtype="bool")
 
         # Preprocess different input types into a mapping from column names to
         # a list of values to check.
@@ -5952,7 +5954,7 @@ def _prepare_for_rowwise_op(self, method, skipna):
                 {
                     name: filtered._data[name]._get_mask_as_column()
                     if filtered._data[name].nullable
-                    else column.full(len(filtered._data[name]), True)
+                    else as_column(True, length=len(filtered._data[name]))
                     for name in filtered._data.names
                 }
             )
@@ -7772,8 +7774,8 @@ def func(left, right, output):
             return output
 
         for name in uncommon_columns:
-            output._data[name] = column.full(
-                size=len(output), fill_value=value, dtype="bool"
+            output._data[name] = as_column(
+                value, length=len(output), dtype="bool"
             )
         return output
 
diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index ab089ceb103..21c3d961a9e 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021-2023, NVIDIA CORPORATION.
+# Copyright (c) 2021-2024, NVIDIA CORPORATION.
 """Base class for Frame types that have an index."""
 
 from __future__ import annotations
@@ -48,7 +48,7 @@
 )
 from cudf.core._base_index import BaseIndex
 from cudf.core.buffer import acquire_spill_lock
-from cudf.core.column import ColumnBase, as_column, full
+from cudf.core.column import ColumnBase, as_column
 from cudf.core.column_accessor import ColumnAccessor
 from cudf.core.copy_types import BooleanMask, GatherMap
 from cudf.core.dtypes import ListDtype
@@ -2096,7 +2096,7 @@ def duplicated(self, subset=None, keep="first"):
         (result,) = libcudf.copying.scatter(
             [cudf.Scalar(False, dtype=bool)],
             distinct,
-            [full(len(self), True, dtype=bool)],
+            [as_column(True, length=len(self), dtype=bool)],
             bounds_check=False,
         )
         return cudf.Series(result, index=self.index)
@@ -2357,9 +2357,7 @@ def _apply(self, func, kernel_getter, *args, **kwargs):
 
         # Mask and data column preallocated
         ans_col = _return_arr_from_dtype(retty, len(self))
-        ans_mask = cudf.core.column.full(
-            size=len(self), fill_value=True, dtype="bool"
-        )
+        ans_mask = as_column(size=len(self), fill_value=True, dtype="bool")
         output_args = [(ans_col, ans_mask), len(self)]
         input_args = _get_input_args_from_frame(self)
         launch_args = output_args + input_args + list(args)
@@ -5252,10 +5250,10 @@ def _get_replacement_values_for_columns(
             values_columns = {
                 col: [value]
                 if _is_non_decimal_numeric_dtype(columns_dtype_map[col])
-                else full(
-                    len(to_replace),
+                else as_column(
                     value,
-                    cudf.dtype(type(value)),
+                    length=len(to_replace),
+                    dtype=cudf.dtype(type(value)),
                 )
                 for col in columns_dtype_map
             }
diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py
index 4f98a878792..ecc1dbcbb06 100644
--- a/python/cudf/cudf/core/multiindex.py
+++ b/python/cudf/cudf/core/multiindex.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2023, NVIDIA CORPORATION.
+# Copyright (c) 2019-2024, NVIDIA CORPORATION.
 
 from __future__ import annotations
 
@@ -748,7 +748,7 @@ def isin(self, values, level=None):
             self_df = self.to_frame(index=False).reset_index()
             values_df = values_idx.to_frame(index=False)
             idx = self_df.merge(values_df, how="leftsemi")._data["index"]
-            res = cudf.core.column.full(size=len(self), fill_value=False)
+            res = column.as_column(False, length=len(self))
             res[idx] = True
             result = res.values
         else:
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index 3562c83e797..aa41be9209b 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2023, NVIDIA CORPORATION.
+# Copyright (c) 2018-2024, NVIDIA CORPORATION.
 
 from __future__ import annotations
 
@@ -58,7 +58,6 @@
     arange,
     as_column,
     column,
-    full,
 )
 from cudf.core.column.categorical import (
     CategoricalAccessor as CategoricalAccessor,
@@ -1396,7 +1395,7 @@ def map(self, arg, na_action=None) -> "Series":
                 {
                     "x": arg.keys(),
                     "s": arg.values(),
-                    "bool": full(len(arg), True, dtype=self.dtype),
+                    "bool": as_column(True, length=len(arg), dtype=self.dtype),
                 }
             )
             res = lhs.merge(rhs, on="x", how="left").sort_values(
@@ -1416,7 +1415,7 @@ def map(self, arg, na_action=None) -> "Series":
                 {
                     "x": arg.keys(),
                     "s": arg,
-                    "bool": full(len(arg), True, dtype=self.dtype),
+                    "bool": as_column(True, length=len(arg), dtype=self.dtype),
                 }
             )
             res = lhs.merge(rhs, on="x", how="left").sort_values(
diff --git a/python/cudf/cudf/core/tools/datetimes.py b/python/cudf/cudf/core/tools/datetimes.py
index 6ec9dcb5f44..c23e3de8115 100644
--- a/python/cudf/cudf/core/tools/datetimes.py
+++ b/python/cudf/cudf/core/tools/datetimes.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2023, NVIDIA CORPORATION.
+# Copyright (c) 2019-2024, NVIDIA CORPORATION.
 
 import math
 import re
@@ -730,7 +730,7 @@ def _isin_datetimelike(
         rhs = cudf.core.column.as_column(values)
 
         if rhs.dtype.kind in {"f", "i", "u"}:
-            return cudf.core.column.full(len(lhs), False, dtype="bool")
+            return column.as_column(False, length=len(lhs), dtype="bool")
         rhs = rhs.astype(lhs.dtype)
         res = lhs._isin_earlystop(rhs)
         if res is not None:
@@ -738,7 +738,7 @@ def _isin_datetimelike(
     except ValueError:
         # pandas functionally returns all False when cleansing via
         # typecasting fails
-        return cudf.core.column.full(len(lhs), False, dtype="bool")
+        return column.as_column(False, length=len(lhs), dtype="bool")
 
     res = lhs._obtain_isin_result(rhs)
     return res
diff --git a/python/cudf/cudf/core/window/rolling.py b/python/cudf/cudf/core/window/rolling.py
index 8a92ea86d57..711a799aedf 100644
--- a/python/cudf/cudf/core/window/rolling.py
+++ b/python/cudf/cudf/core/window/rolling.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2023, NVIDIA CORPORATION
+# Copyright (c) 2020-2024, NVIDIA CORPORATION
 
 import itertools
 
@@ -245,8 +245,8 @@ def _apply_agg_column(self, source_column, agg_name):
             window = None
         else:
             preceding_window = as_column(self.window)
-            following_window = column.full(
-                self.window.size, 0, dtype=self.window.dtype
+            following_window = as_column(
+                0, length=self.window.size, dtype=self.window.dtype
             )
             window = None
 
diff --git a/python/cudf/cudf/io/parquet.py b/python/cudf/cudf/io/parquet.py
index bcc24a85cf9..4ad1eb97270 100644
--- a/python/cudf/cudf/io/parquet.py
+++ b/python/cudf/cudf/io/parquet.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2023, NVIDIA CORPORATION.
+# Copyright (c) 2019-2024, NVIDIA CORPORATION.
 from __future__ import annotations
 
 import itertools
@@ -20,7 +20,7 @@
 import cudf
 from cudf._lib import parquet as libparquet
 from cudf.api.types import is_list_like
-from cudf.core.column import build_categorical_column, column_empty, full
+from cudf.core.column import as_column, build_categorical_column, column_empty
 from cudf.utils import ioutils
 from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate
 
@@ -762,9 +762,9 @@ def _parquet_to_frame(
             _len = len(dfs[-1])
             if partition_categories and name in partition_categories:
                 # Build the categorical column from `codes`
-                codes = full(
-                    size=_len,
-                    fill_value=partition_categories[name].index(value),
+                codes = as_column(
+                    partition_categories[name].index(value),
+                    length=_len,
                 )
                 dfs[-1][name] = build_categorical_column(
                     categories=partition_categories[name],
@@ -788,10 +788,10 @@ def _parquet_to_frame(
                         masked=True,
                     )
                 else:
-                    dfs[-1][name] = full(
-                        size=_len,
-                        fill_value=value,
+                    dfs[-1][name] = as_column(
+                        value,
                         dtype=_dtype,
+                        length=_len,
                     )
 
     # Concatenate dfs and return.
diff --git a/python/cudf/cudf/tests/test_column.py b/python/cudf/cudf/tests/test_column.py
index 3c3e989fec6..cdf1b60aec6 100644
--- a/python/cudf/cudf/tests/test_column.py
+++ b/python/cudf/cudf/tests/test_column.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
 import cupy as cp
 import numpy as np
diff --git a/python/cudf/cudf/tests/test_testing.py b/python/cudf/cudf/tests/test_testing.py
index e6658040663..bbb8ba687c4 100644
--- a/python/cudf/cudf/tests/test_testing.py
+++ b/python/cudf/cudf/tests/test_testing.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2022, NVIDIA CORPORATION.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
 import numpy as np
 import pandas as pd
@@ -6,7 +6,7 @@
 import pytest
 
 import cudf
-from cudf.core.column.column import as_column, full
+from cudf.core.column.column import as_column
 from cudf.testing import (
     assert_frame_equal,
     assert_index_equal,
@@ -173,8 +173,8 @@ def test_assert_column_equal_dtype_edge_cases(other):
     assert_column_equal(base.slice(0, 0), other.slice(0, 0), check_dtype=False)
     assert_column_equal(other.slice(0, 0), base.slice(0, 0), check_dtype=False)
 
-    base = full(len(base), fill_value=cudf.NA, dtype=base.dtype)
-    other = full(len(other), fill_value=cudf.NA, dtype=other.dtype)
+    base = as_column(cudf.NA, length=len(base), dtype=base.dtype)
+    other = as_column(cudf.NA, length=len(other), dtype=other.dtype)
 
     assert_column_equal(base, other, check_dtype=False)
     assert_column_equal(other, base, check_dtype=False)
diff --git a/python/cudf/cudf/utils/utils.py b/python/cudf/cudf/utils/utils.py
index ec5693e14d2..95621cf9519 100644
--- a/python/cudf/cudf/utils/utils.py
+++ b/python/cudf/cudf/utils/utils.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
 import decimal
 import functools
@@ -396,8 +396,8 @@ def _all_bools_with_nulls(lhs, rhs, bool_fill_value):
     else:
         result_mask = None
 
-    result_col = column.full(
-        size=len(lhs), fill_value=bool_fill_value, dtype=cudf.dtype(np.bool_)
+    result_col = column.as_column(
+        bool_fill_value, dtype=cudf.dtype(np.bool_), length=len(lhs)
     )
     if result_mask is not None:
         result_col = result_col.set_mask(result_mask.as_mask())
diff --git a/python/dask_cudf/dask_cudf/backends.py b/python/dask_cudf/dask_cudf/backends.py
index 387643587d1..9bcc9fb7be7 100644
--- a/python/dask_cudf/dask_cudf/backends.py
+++ b/python/dask_cudf/dask_cudf/backends.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
 import warnings
 from collections.abc import Iterator
@@ -106,8 +106,10 @@ def _get_non_empty_data(s):
         categories = (
             s.categories if len(s.categories) else [UNKNOWN_CATEGORIES]
         )
-        codes = cudf.core.column.full(
-            size=2, fill_value=0, dtype=cudf._lib.types.size_type_dtype
+        codes = cudf.core.column.as_column(
+            0,
+            dtype=cudf._lib.types.size_type_dtype,
+            length=2,
         )
         ordered = s.ordered
         data = cudf.core.column.build_categorical_column(

From 0811adc4d5e2379279c5c297fe5b9f74ac809b47 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Wed, 3 Jan 2024 17:02:33 -0800
Subject: [PATCH 6/9] Fix typo

---
 python/cudf/cudf/core/indexed_frame.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index 21c3d961a9e..9e4458908c3 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -2357,7 +2357,7 @@ def _apply(self, func, kernel_getter, *args, **kwargs):
 
         # Mask and data column preallocated
         ans_col = _return_arr_from_dtype(retty, len(self))
-        ans_mask = as_column(size=len(self), fill_value=True, dtype="bool")
+        ans_mask = as_column(True, length=len(self), dtype="bool")
         output_args = [(ans_col, ans_mask), len(self)]
         input_args = _get_input_args_from_frame(self)
         launch_args = output_args + input_args + list(args)

From 30e77a2c042431b3af7f4a3f4bc4a1d98d468c0e Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Thu, 4 Jan 2024 18:08:04 -0800
Subject: [PATCH 7/9] Unpack 0D arrays

---
 python/cudf/cudf/core/dataframe.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index e5251341680..37bb1093989 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -3165,12 +3165,16 @@ def _insert(self, loc, name, value, nan_as_null=None, ignore_index=True):
             )
 
         if _is_scalar_or_zero_d_array(value):
+            dtype = None
+            if hasattr(value, "ndim"):
+                dtype = value.dtype
+                value = value.item()
+            if libcudf.scalar._is_null_host_scalar(value):
+                dtype = "str"
             value = as_column(
                 value,
                 length=len(self),
-                dtype="str"
-                if libcudf.scalar._is_null_host_scalar(value)
-                else None,
+                dtype=dtype,
             )
 
         if len(self) == 0:

From 5ebd6f588fa1bafc6f482b1b84d5761f8d114193 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Mon, 22 Jan 2024 15:03:23 -0800
Subject: [PATCH 8/9] For len 0 return empty of same dtype

---
 python/cudf/cudf/core/column/column.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index df09ffb6f57..e68756cbef8 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -2134,7 +2134,10 @@ def as_column(
             data = data.astype(dtype)
 
     elif is_scalar(arbitrary) and not isinstance(arbitrary, memoryview):
-        length = length or 1
+        if length is None:
+            length = 1
+        elif length < 0:
+            raise ValueError(f"{length=} must be >=0.")
         if isinstance(arbitrary, pd.Interval):
             # No cudf.Scalar support yet
             return as_column(
@@ -2148,17 +2151,14 @@ def as_column(
             and isinstance(arbitrary, (np.floating, float))
             and np.isnan(arbitrary)
         ):
-            arbitrary = None
             if dtype is None:
-                dtype = cudf.dtype("float64")
-        elif arbitrary is None and dtype is None:
-            dtype = cudf.dtype("object")
+                dtype = getattr(arbitrary, "dtype", cudf.dtype("float64"))
+            arbitrary = None
         arbitrary = cudf.Scalar(arbitrary, dtype=dtype)
-        data = ColumnBase.from_scalar(arbitrary, length)
-
-        if dtype is not None:
-            data = data.astype(dtype)
-        return data
+        if length == 0:
+            return column_empty(length, dtype=arbitrary.dtype)
+        else:
+            return ColumnBase.from_scalar(arbitrary, length)
 
     elif hasattr(arbitrary, "__array_interface__"):
         # CUDF assumes values are always contiguous

From 35e8aa5f3b5354f27d9c5e1007b60c4c1550ec92 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Fri, 2 Feb 2024 15:53:53 -0800
Subject: [PATCH 9/9] Fix usages

---
 python/cudf/cudf/core/dataframe.py  | 2 +-
 python/cudf/cudf/core/index.py      | 6 +++---
 python/cudf/cudf/core/multiindex.py | 6 +++---
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 3dd45cc808f..712b1bee821 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -3212,7 +3212,7 @@ def _insert(self, loc, name, value, nan_as_null=None, ignore_index=True):
 
         if _is_scalar_or_zero_d_array(value):
             dtype = None
-            if hasattr(value, "ndim"):
+            if isinstance(value, (np.ndarray, cupy.ndarray)):
                 dtype = value.dtype
                 value = value.item()
             if libcudf.scalar._is_null_host_scalar(value):
diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index c8eedae200b..541199127b6 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -1227,9 +1227,9 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None):
             )
 
         needle = as_column(target)
-        result = cudf.core.column.full(
-            len(needle),
-            fill_value=-1,
+        result = as_column(
+            -1,
+            length=len(needle),
             dtype=libcudf.types.size_type_dtype,
         )
 
diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py
index 239307f824e..9a1dbac7e54 100644
--- a/python/cudf/cudf/core/multiindex.py
+++ b/python/cudf/cudf/core/multiindex.py
@@ -1837,9 +1837,9 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None):
                 f"{method=} is not supported yet for MultiIndex."
             )
 
-        result = cudf.core.column.full(
-            len(target),
-            fill_value=-1,
+        result = column.as_column(
+            -1,
+            length=len(target),
             dtype=libcudf.types.size_type_dtype,
         )
         if not len(self):