From 3d0dc8abdbb5dd8d0e73ab542fcffefec95eb7bc Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Mon, 14 Oct 2024 06:56:29 -0700
Subject: [PATCH 01/19] fix

---
 .../cudf_polars/dsl/expressions/unary.py      | 14 +++++++++-
 .../tests/expressions/test_stringfunction.py  | 27 +++++++++++++++++++
 2 files changed, 40 insertions(+), 1 deletion(-)

diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/unary.py b/python/cudf_polars/cudf_polars/dsl/expressions/unary.py
index 3d4d15be1ce..6614308b5d7 100644
--- a/python/cudf_polars/cudf_polars/dsl/expressions/unary.py
+++ b/python/cudf_polars/cudf_polars/dsl/expressions/unary.py
@@ -33,7 +33,19 @@ class Cast(Expr):
     def __init__(self, dtype: plc.DataType, value: Expr) -> None:
         super().__init__(dtype)
         self.children = (value,)
-        if not dtypes.can_cast(value.dtype, self.dtype):
+        if (
+            self.dtype.id() == plc.TypeId.STRING
+            or value.dtype.id() == plc.TypeId.STRING
+        ):
+            if not (
+                self.dtype.id() == plc.TypeId.STRING
+                and value.dtype.id() in {plc.TypeId.FLOAT32, plc.TypeId.FLOAT64}
+            ) or (
+                self.dtype.id() in {plc.TypeId.FLOAT32, plc.TypeId.FLOAT64}
+                and value.dtype.id() == plc.TypeId.STRING
+            ):
+                raise NotImplementedError("Only string to float cast is supported")
+        elif not dtypes.can_cast(value.dtype, self.dtype):
             raise NotImplementedError(
                 f"Can't cast {self.dtype.id().name} to {value.dtype.id().name}"
             )
diff --git a/python/cudf_polars/tests/expressions/test_stringfunction.py b/python/cudf_polars/tests/expressions/test_stringfunction.py
index 4f6850ac977..adf89f53d8d 100644
--- a/python/cudf_polars/tests/expressions/test_stringfunction.py
+++ b/python/cudf_polars/tests/expressions/test_stringfunction.py
@@ -40,6 +40,28 @@ def ldf(with_nulls):
     )
 
 
+@pytest.fixture
+def str_to_float_data(with_nulls):
+    a = [
+        "1.1",
+        "2.2",
+        "3.3",
+        "4.4",
+        "5.5",
+        "6.6",
+        "7.7",
+        "8.8",
+        "9.9",
+        "10.10",
+        "11.11",
+        "12.12",
+    ]
+    if with_nulls:
+        a[4] = None
+        a[-3] = None
+    return pl.LazyFrame({"a": a})
+
+
 slice_cases = [
     (1, 3),
     (0, 3),
@@ -337,3 +359,8 @@ def test_unsupported_regex_raises(pattern):
 
     q = df.select(pl.col("a").str.contains(pattern, strict=True))
     assert_ir_translation_raises(q, NotImplementedError)
+
+
+def test_string_to_float(str_to_float_data):
+    query = str_to_float_data.select(pl.col("a").cast(pl.Float64))
+    assert_gpu_result_equal(query)

From 59ceb03f297008a5246914266f7f4a83ccd7dd71 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Mon, 14 Oct 2024 07:04:54 -0700
Subject: [PATCH 02/19] passing

---
 .../cudf_polars/dsl/expressions/unary.py      | 24 ++++++++++++++-----
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/unary.py b/python/cudf_polars/cudf_polars/dsl/expressions/unary.py
index 6614308b5d7..a7bdca631a1 100644
--- a/python/cudf_polars/cudf_polars/dsl/expressions/unary.py
+++ b/python/cudf_polars/cudf_polars/dsl/expressions/unary.py
@@ -38,11 +38,14 @@ def __init__(self, dtype: plc.DataType, value: Expr) -> None:
             or value.dtype.id() == plc.TypeId.STRING
         ):
             if not (
-                self.dtype.id() == plc.TypeId.STRING
-                and value.dtype.id() in {plc.TypeId.FLOAT32, plc.TypeId.FLOAT64}
-            ) or (
-                self.dtype.id() in {plc.TypeId.FLOAT32, plc.TypeId.FLOAT64}
-                and value.dtype.id() == plc.TypeId.STRING
+                (
+                    self.dtype.id() == plc.TypeId.STRING
+                    and value.dtype.id() in {plc.TypeId.FLOAT32, plc.TypeId.FLOAT64}
+                )
+                or (
+                    self.dtype.id() in {plc.TypeId.FLOAT32, plc.TypeId.FLOAT64}
+                    and value.dtype.id() == plc.TypeId.STRING
+                )
             ):
                 raise NotImplementedError("Only string to float cast is supported")
         elif not dtypes.can_cast(value.dtype, self.dtype):
@@ -60,7 +63,16 @@ def do_evaluate(
         """Evaluate this expression given a dataframe for context."""
         (child,) = self.children
         column = child.evaluate(df, context=context, mapping=mapping)
-        return Column(plc.unary.cast(column.obj, self.dtype)).sorted_like(column)
+        if (
+            self.dtype.id() == plc.TypeId.STRING
+            or column.obj.type().id() == plc.TypeId.STRING
+        ):
+            result = plc.strings.convert.convert_floats.to_floats(
+                column.obj, self.dtype
+            )
+        else:
+            result = plc.unary.cast(column.obj, self.dtype)
+        return Column(result).sorted_like(column)
 
     def collect_agg(self, *, depth: int) -> AggInfo:
         """Collect information about aggregations in groupbys."""

From be0fae9bdc34beb6fd2b8f984b8b64f07b9a2820 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Mon, 14 Oct 2024 07:44:12 -0700
Subject: [PATCH 03/19] more tests, needs refactor

---
 .../cudf_polars/dsl/expressions/unary.py      | 31 +++++--
 .../tests/expressions/test_stringfunction.py  | 87 ++++++++++++++++++-
 2 files changed, 108 insertions(+), 10 deletions(-)

diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/unary.py b/python/cudf_polars/cudf_polars/dsl/expressions/unary.py
index a7bdca631a1..8cf28801986 100644
--- a/python/cudf_polars/cudf_polars/dsl/expressions/unary.py
+++ b/python/cudf_polars/cudf_polars/dsl/expressions/unary.py
@@ -9,6 +9,7 @@
 
 import pyarrow as pa
 import pylibcudf as plc
+from pylibcudf.traits import is_floating_point, is_integral_not_bool
 
 from cudf_polars.containers import Column
 from cudf_polars.dsl.expressions.base import AggInfo, ExecutionContext, Expr
@@ -23,6 +24,10 @@
 __all__ = ["Cast", "UnaryFunction", "Len"]
 
 
+def _is_int_or_float(dtype: plc.DataType) -> bool:
+    return is_integral_not_bool(dtype) or is_floating_point(dtype)
+
+
 class Cast(Expr):
     """Class representing a cast of an expression."""
 
@@ -38,12 +43,9 @@ def __init__(self, dtype: plc.DataType, value: Expr) -> None:
             or value.dtype.id() == plc.TypeId.STRING
         ):
             if not (
-                (
-                    self.dtype.id() == plc.TypeId.STRING
-                    and value.dtype.id() in {plc.TypeId.FLOAT32, plc.TypeId.FLOAT64}
-                )
+                (self.dtype.id() == plc.TypeId.STRING and _is_int_or_float(value.dtype))
                 or (
-                    self.dtype.id() in {plc.TypeId.FLOAT32, plc.TypeId.FLOAT64}
+                    _is_int_or_float(self.dtype)
                     and value.dtype.id() == plc.TypeId.STRING
                 )
             ):
@@ -67,9 +69,22 @@ def do_evaluate(
             self.dtype.id() == plc.TypeId.STRING
             or column.obj.type().id() == plc.TypeId.STRING
         ):
-            result = plc.strings.convert.convert_floats.to_floats(
-                column.obj, self.dtype
-            )
+            if self.dtype.id() == plc.TypeId.STRING:
+                if is_floating_point(column.obj.type()):
+                    result = plc.strings.convert.convert_floats.from_floats(column.obj)
+                else:
+                    result = plc.strings.convert.convert_integers.from_integers(
+                        column.obj
+                    )
+            else:
+                if is_floating_point(self.dtype):
+                    result = plc.strings.convert.convert_floats.to_floats(
+                        column.obj, self.dtype
+                    )
+                else:
+                    result = plc.strings.convert.convert_integers.to_integers(
+                        column.obj, self.dtype
+                    )
         else:
             result = plc.unary.cast(column.obj, self.dtype)
         return Column(result).sorted_like(column)
diff --git a/python/cudf_polars/tests/expressions/test_stringfunction.py b/python/cudf_polars/tests/expressions/test_stringfunction.py
index adf89f53d8d..f6c64ec02a7 100644
--- a/python/cudf_polars/tests/expressions/test_stringfunction.py
+++ b/python/cudf_polars/tests/expressions/test_stringfunction.py
@@ -62,6 +62,72 @@ def str_to_float_data(with_nulls):
     return pl.LazyFrame({"a": a})
 
 
+@pytest.fixture
+def str_to_integer_data(with_nulls):
+    a = [
+        "1",
+        "2",
+        "3",
+        "4",
+        "5",
+        "6",
+        "7",
+        "8",
+        "9",
+        "10",
+        "11",
+        "12",
+    ]
+    if with_nulls:
+        a[4] = None
+        a[-3] = None
+    return pl.LazyFrame({"a": a})
+
+
+@pytest.fixture
+def str_from_float_data(with_nulls):
+    a = [
+        1.1,
+        2.2,
+        3.3,
+        4.4,
+        5.5,
+        6.6,
+        7.7,
+        8.8,
+        9.9,
+        10.10,
+        11.11,
+        12.12,
+    ]
+    if with_nulls:
+        a[4] = None
+        a[-3] = None
+    return pl.LazyFrame({"a": a})
+
+
+@pytest.fixture
+def str_from_integer_data(with_nulls):
+    a = [
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        10,
+        11,
+        12,
+    ]
+    if with_nulls:
+        a[4] = None
+        a[-3] = None
+    return pl.LazyFrame({"a": a})
+
+
 slice_cases = [
     (1, 3),
     (0, 3),
@@ -361,6 +427,23 @@ def test_unsupported_regex_raises(pattern):
     assert_ir_translation_raises(q, NotImplementedError)
 
 
-def test_string_to_float(str_to_float_data):
-    query = str_to_float_data.select(pl.col("a").cast(pl.Float64))
+@pytest.mark.parametrize("dtype", [pl.Float32, pl.Float64])
+def test_string_to_float(str_to_float_data, dtype):
+    query = str_to_float_data.select(pl.col("a").cast(dtype))
+    assert_gpu_result_equal(query)
+
+
+@pytest.mark.parametrize("dtype", [pl.Int8, pl.Int16, pl.Int32, pl.Int64])
+def test_string_to_integer(str_to_integer_data, dtype):
+    query = str_to_integer_data.select(pl.col("a").cast(dtype))
+    assert_gpu_result_equal(query)
+
+
+def test_string_from_float(str_from_float_data):
+    query = str_from_float_data.select(pl.col("a").cast(pl.String))
+    assert_gpu_result_equal(query)
+
+
+def test_string_from_integer(str_from_integer_data):
+    query = str_from_integer_data.select(pl.col("a").cast(pl.String))
     assert_gpu_result_equal(query)

From 209b9060baf20c5dca3613b988b390aa9e577df4 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Mon, 14 Oct 2024 08:58:35 -0700
Subject: [PATCH 04/19] refactor

---
 .../tests/expressions/test_stringfunction.py  | 94 +++----------------
 1 file changed, 11 insertions(+), 83 deletions(-)

diff --git a/python/cudf_polars/tests/expressions/test_stringfunction.py b/python/cudf_polars/tests/expressions/test_stringfunction.py
index f6c64ec02a7..bf59dab08ab 100644
--- a/python/cudf_polars/tests/expressions/test_stringfunction.py
+++ b/python/cudf_polars/tests/expressions/test_stringfunction.py
@@ -40,74 +40,21 @@ def ldf(with_nulls):
     )
 
 
-@pytest.fixture
-def str_to_float_data(with_nulls):
-    a = [
-        "1.1",
-        "2.2",
-        "3.3",
-        "4.4",
-        "5.5",
-        "6.6",
-        "7.7",
-        "8.8",
-        "9.9",
-        "10.10",
-        "11.11",
-        "12.12",
-    ]
-    if with_nulls:
-        a[4] = None
-        a[-3] = None
-    return pl.LazyFrame({"a": a})
-
-
-@pytest.fixture
-def str_to_integer_data(with_nulls):
-    a = [
-        "1",
-        "2",
-        "3",
-        "4",
-        "5",
-        "6",
-        "7",
-        "8",
-        "9",
-        "10",
-        "11",
-        "12",
-    ]
-    if with_nulls:
-        a[4] = None
-        a[-3] = None
-    return pl.LazyFrame({"a": a})
+@pytest.fixture(params=[pl.Float32, pl.Float64, pl.Int8, pl.Int16, pl.Int32, pl.Int64])
+def numeric_type(request):
+    return request.param
 
 
 @pytest.fixture
-def str_from_float_data(with_nulls):
-    a = [
-        1.1,
-        2.2,
-        3.3,
-        4.4,
-        5.5,
-        6.6,
-        7.7,
-        8.8,
-        9.9,
-        10.10,
-        11.11,
-        12.12,
-    ]
+def str_to_numeric_data(with_nulls):
+    a = ["1", "2", "3", "4", "5", "6"]
     if with_nulls:
         a[4] = None
-        a[-3] = None
     return pl.LazyFrame({"a": a})
 
 
 @pytest.fixture
-def str_from_integer_data(with_nulls):
+def str_from_numeric_data(with_nulls, numeric_type):
     a = [
         1,
         2,
@@ -115,17 +62,10 @@ def str_from_integer_data(with_nulls):
         4,
         5,
         6,
-        7,
-        8,
-        9,
-        10,
-        11,
-        12,
     ]
     if with_nulls:
         a[4] = None
-        a[-3] = None
-    return pl.LazyFrame({"a": a})
+    return pl.LazyFrame({"a": pl.Series(a, dtype=numeric_type)})
 
 
 slice_cases = [
@@ -427,23 +367,11 @@ def test_unsupported_regex_raises(pattern):
     assert_ir_translation_raises(q, NotImplementedError)
 
 
-@pytest.mark.parametrize("dtype", [pl.Float32, pl.Float64])
-def test_string_to_float(str_to_float_data, dtype):
-    query = str_to_float_data.select(pl.col("a").cast(dtype))
-    assert_gpu_result_equal(query)
-
-
-@pytest.mark.parametrize("dtype", [pl.Int8, pl.Int16, pl.Int32, pl.Int64])
-def test_string_to_integer(str_to_integer_data, dtype):
-    query = str_to_integer_data.select(pl.col("a").cast(dtype))
-    assert_gpu_result_equal(query)
-
-
-def test_string_from_float(str_from_float_data):
-    query = str_from_float_data.select(pl.col("a").cast(pl.String))
+def test_string_to_numeric(str_to_numeric_data, numeric_type):
+    query = str_to_numeric_data.select(pl.col("a").cast(numeric_type))
     assert_gpu_result_equal(query)
 
 
-def test_string_from_integer(str_from_integer_data):
-    query = str_from_integer_data.select(pl.col("a").cast(pl.String))
+def test_string_from_numeric(str_from_numeric_data):
+    query = str_from_numeric_data.select(pl.col("a").cast(pl.String))
     assert_gpu_result_equal(query)

From c9199ec01359fcd97efbf506f6ac73e93fc504de Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Tue, 15 Oct 2024 11:35:30 -0700
Subject: [PATCH 05/19] address reviews

---
 .../cudf_polars/dsl/expressions/unary.py      | 56 +++++++------------
 .../cudf_polars/cudf_polars/utils/dtypes.py   | 15 ++++-
 2 files changed, 33 insertions(+), 38 deletions(-)

diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/unary.py b/python/cudf_polars/cudf_polars/dsl/expressions/unary.py
index 8cf28801986..c9e52e5056f 100644
--- a/python/cudf_polars/cudf_polars/dsl/expressions/unary.py
+++ b/python/cudf_polars/cudf_polars/dsl/expressions/unary.py
@@ -9,7 +9,7 @@
 
 import pyarrow as pa
 import pylibcudf as plc
-from pylibcudf.traits import is_floating_point, is_integral_not_bool
+from pylibcudf.traits import is_floating_point
 
 from cudf_polars.containers import Column
 from cudf_polars.dsl.expressions.base import AggInfo, ExecutionContext, Expr
@@ -24,10 +24,6 @@
 __all__ = ["Cast", "UnaryFunction", "Len"]
 
 
-def _is_int_or_float(dtype: plc.DataType) -> bool:
-    return is_integral_not_bool(dtype) or is_floating_point(dtype)
-
-
 class Cast(Expr):
     """Class representing a cast of an expression."""
 
@@ -38,21 +34,9 @@ class Cast(Expr):
     def __init__(self, dtype: plc.DataType, value: Expr) -> None:
         super().__init__(dtype)
         self.children = (value,)
-        if (
-            self.dtype.id() == plc.TypeId.STRING
-            or value.dtype.id() == plc.TypeId.STRING
-        ):
-            if not (
-                (self.dtype.id() == plc.TypeId.STRING and _is_int_or_float(value.dtype))
-                or (
-                    _is_int_or_float(self.dtype)
-                    and value.dtype.id() == plc.TypeId.STRING
-                )
-            ):
-                raise NotImplementedError("Only string to float cast is supported")
-        elif not dtypes.can_cast(value.dtype, self.dtype):
+        if not dtypes.can_cast(value.dtype, self.dtype):
             raise NotImplementedError(
-                f"Can't cast {self.dtype.id().name} to {value.dtype.id().name}"
+                f"Can't cast {value.dtype.id().name} to {self.dtype.id().name}"
             )
 
     def do_evaluate(
@@ -69,26 +53,28 @@ def do_evaluate(
             self.dtype.id() == plc.TypeId.STRING
             or column.obj.type().id() == plc.TypeId.STRING
         ):
-            if self.dtype.id() == plc.TypeId.STRING:
-                if is_floating_point(column.obj.type()):
-                    result = plc.strings.convert.convert_floats.from_floats(column.obj)
-                else:
-                    result = plc.strings.convert.convert_integers.from_integers(
-                        column.obj
-                    )
-            else:
-                if is_floating_point(self.dtype):
-                    result = plc.strings.convert.convert_floats.to_floats(
-                        column.obj, self.dtype
-                    )
-                else:
-                    result = plc.strings.convert.convert_integers.to_integers(
-                        column.obj, self.dtype
-                    )
+            result = self._handle_string_cast(column)
         else:
             result = plc.unary.cast(column.obj, self.dtype)
         return Column(result).sorted_like(column)
 
+    def _handle_string_cast(self, column: Column) -> plc.Column:
+        if self.dtype.id() == plc.TypeId.STRING:
+            if is_floating_point(column.obj.type()):
+                result = plc.strings.convert.convert_floats.from_floats(column.obj)
+            else:
+                result = plc.strings.convert.convert_integers.from_integers(column.obj)
+        else:
+            if is_floating_point(self.dtype):
+                result = plc.strings.convert.convert_floats.to_floats(
+                    column.obj, self.dtype
+                )
+            else:
+                result = plc.strings.convert.convert_integers.to_integers(
+                    column.obj, self.dtype
+                )
+        return result
+
     def collect_agg(self, *, depth: int) -> AggInfo:
         """Collect information about aggregations in groupbys."""
         # TODO: Could do with sort-based groupby and segmented filter
diff --git a/python/cudf_polars/cudf_polars/utils/dtypes.py b/python/cudf_polars/cudf_polars/utils/dtypes.py
index 4154a404e98..fb7ed0aaf2b 100644
--- a/python/cudf_polars/cudf_polars/utils/dtypes.py
+++ b/python/cudf_polars/cudf_polars/utils/dtypes.py
@@ -9,6 +9,7 @@
 
 import pyarrow as pa
 import pylibcudf as plc
+from pylibcudf.traits import is_floating_point, is_integral_not_bool
 from typing_extensions import assert_never
 
 import polars as pl
@@ -45,6 +46,10 @@ def downcast_arrow_lists(typ: pa.DataType) -> pa.DataType:
     return typ
 
 
+def _is_int_or_float(dtype: plc.DataType) -> bool:
+    return is_integral_not_bool(dtype) or is_floating_point(dtype)
+
+
 def can_cast(from_: plc.DataType, to: plc.DataType) -> bool:
     """
     Can we cast (via :func:`~.pylibcudf.unary.cast`) between two datatypes.
@@ -61,9 +66,13 @@ def can_cast(from_: plc.DataType, to: plc.DataType) -> bool:
     True if casting is supported, False otherwise
     """
     return (
-        plc.traits.is_fixed_width(to)
-        and plc.traits.is_fixed_width(from_)
-        and plc.unary.is_supported_cast(from_, to)
+        (
+            plc.traits.is_fixed_width(to)
+            and plc.traits.is_fixed_width(from_)
+            and plc.unary.is_supported_cast(from_, to)
+        )
+        or (from_.id() == plc.TypeId.STRING and _is_int_or_float(to))
+        or (_is_int_or_float(from_) and to.id() == plc.TypeId.STRING)
     )
 
 

From 7feb1f39668319d71daecd990af4cde11408028e Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Wed, 16 Oct 2024 11:10:27 -0700
Subject: [PATCH 06/19] update tests

---
 python/cudf_polars/tests/expressions/test_casting.py   |  2 +-
 .../tests/expressions/test_numeric_binops.py           | 10 ----------
 2 files changed, 1 insertion(+), 11 deletions(-)

diff --git a/python/cudf_polars/tests/expressions/test_casting.py b/python/cudf_polars/tests/expressions/test_casting.py
index 3e003054338..0722a0f198a 100644
--- a/python/cudf_polars/tests/expressions/test_casting.py
+++ b/python/cudf_polars/tests/expressions/test_casting.py
@@ -14,7 +14,7 @@
 _supported_dtypes = [(pl.Int8(), pl.Int64())]
 
 _unsupported_dtypes = [
-    (pl.String(), pl.Int64()),
+    (pl.Datetime("ns"), pl.Int64()),
 ]
 
 
diff --git a/python/cudf_polars/tests/expressions/test_numeric_binops.py b/python/cudf_polars/tests/expressions/test_numeric_binops.py
index 8f68bbc460c..fa1ec3c19e4 100644
--- a/python/cudf_polars/tests/expressions/test_numeric_binops.py
+++ b/python/cudf_polars/tests/expressions/test_numeric_binops.py
@@ -8,7 +8,6 @@
 
 from cudf_polars.testing.asserts import (
     assert_gpu_result_equal,
-    assert_ir_translation_raises,
 )
 
 dtypes = [
@@ -114,12 +113,3 @@ def test_binop_with_scalar(left_scalar, right_scalar):
     q = df.select(lop / rop)
 
     assert_gpu_result_equal(q)
-
-
-def test_numeric_to_string_cast_fails():
-    df = pl.DataFrame(
-        {"a": [1, 1, 2, 3, 3, 4, 1], "b": [None, 2, 3, 4, 5, 6, 7]}
-    ).lazy()
-    q = df.select(pl.col("a").cast(pl.String))
-
-    assert_ir_translation_raises(q, NotImplementedError)

From 6d699ac5d09366e99e9bb3b7893c636e1216b033 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Wed, 16 Oct 2024 14:40:28 -0700
Subject: [PATCH 07/19] handle runtime conversion failure

---
 .../cudf_polars/dsl/expressions/unary.py      | 38 +++++++++++++++----
 .../tests/expressions/test_stringfunction.py  | 10 +++++
 2 files changed, 40 insertions(+), 8 deletions(-)

diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/unary.py b/python/cudf_polars/cudf_polars/dsl/expressions/unary.py
index c9e52e5056f..e6a93e3f656 100644
--- a/python/cudf_polars/cudf_polars/dsl/expressions/unary.py
+++ b/python/cudf_polars/cudf_polars/dsl/expressions/unary.py
@@ -9,8 +9,16 @@
 
 import pyarrow as pa
 import pylibcudf as plc
+from pylibcudf.strings.convert.convert_floats import from_floats, is_float, to_floats
+from pylibcudf.strings.convert.convert_integers import (
+    from_integers,
+    is_integer,
+    to_integers,
+)
 from pylibcudf.traits import is_floating_point
 
+from polars.exceptions import InvalidOperationError
+
 from cudf_polars.containers import Column
 from cudf_polars.dsl.expressions.base import AggInfo, ExecutionContext, Expr
 from cudf_polars.dsl.expressions.literal import Literal
@@ -61,18 +69,32 @@ def do_evaluate(
     def _handle_string_cast(self, column: Column) -> plc.Column:
         if self.dtype.id() == plc.TypeId.STRING:
             if is_floating_point(column.obj.type()):
-                result = plc.strings.convert.convert_floats.from_floats(column.obj)
+                result = from_floats(column.obj)
             else:
-                result = plc.strings.convert.convert_integers.from_integers(column.obj)
+                result = from_integers(column.obj)
         else:
             if is_floating_point(self.dtype):
-                result = plc.strings.convert.convert_floats.to_floats(
-                    column.obj, self.dtype
-                )
+                floats = is_float(column.obj)
+                if not plc.interop.to_arrow(
+                    plc.reduce.reduce(
+                        floats,
+                        plc.aggregation.all(),
+                        plc.DataType(plc.TypeId.BOOL8),
+                    )
+                ).as_py():
+                    raise InvalidOperationError("Conversion from `str` failed.")
+                result = to_floats(column.obj, self.dtype)
             else:
-                result = plc.strings.convert.convert_integers.to_integers(
-                    column.obj, self.dtype
-                )
+                integers = is_integer(column.obj)
+                if not plc.interop.to_arrow(
+                    plc.reduce.reduce(
+                        integers,
+                        plc.aggregation.all(),
+                        plc.DataType(plc.TypeId.BOOL8),
+                    )
+                ).as_py():
+                    raise InvalidOperationError("Conversion from `str` failed.")
+                result = to_integers(column.obj, self.dtype)
         return result
 
     def collect_agg(self, *, depth: int) -> AggInfo:
diff --git a/python/cudf_polars/tests/expressions/test_stringfunction.py b/python/cudf_polars/tests/expressions/test_stringfunction.py
index bf59dab08ab..d1655e5e466 100644
--- a/python/cudf_polars/tests/expressions/test_stringfunction.py
+++ b/python/cudf_polars/tests/expressions/test_stringfunction.py
@@ -375,3 +375,13 @@ def test_string_to_numeric(str_to_numeric_data, numeric_type):
 def test_string_from_numeric(str_from_numeric_data):
     query = str_from_numeric_data.select(pl.col("a").cast(pl.String))
     assert_gpu_result_equal(query)
+
+
+def test_string_to_numeric_invalid(numeric_type):
+    df = pl.LazyFrame({"a": ["a", "b", "c"]})
+    q = df.select(pl.col("a").cast(numeric_type))
+    assert_collect_raises(
+        q,
+        polars_except=pl.exceptions.InvalidOperationError,
+        cudf_except=pl.exceptions.ComputeError,
+    )

From 735c9e319650243708eb660d62adca9cccec997f Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Thu, 24 Oct 2024 13:46:11 -0700
Subject: [PATCH 08/19] moving things

---
 .../cudf_polars/containers/column.py          | 52 +++++++++++++++++--
 .../cudf_polars/dsl/expressions/unary.py      | 49 +----------------
 2 files changed, 48 insertions(+), 53 deletions(-)

diff --git a/python/cudf_polars/cudf_polars/containers/column.py b/python/cudf_polars/cudf_polars/containers/column.py
index 00186098e54..a30604abddf 100644
--- a/python/cudf_polars/cudf_polars/containers/column.py
+++ b/python/cudf_polars/cudf_polars/containers/column.py
@@ -9,6 +9,15 @@
 from typing import TYPE_CHECKING
 
 import pylibcudf as plc
+from pylibcudf.strings.convert.convert_floats import from_floats, is_float, to_floats
+from pylibcudf.strings.convert.convert_integers import (
+    from_integers,
+    is_integer,
+    to_integers,
+)
+from pylibcudf.traits import is_floating_point
+
+from polars.exceptions import InvalidOperationError
 
 if TYPE_CHECKING:
     from typing_extensions import Self
@@ -129,11 +138,44 @@ def astype(self, dtype: plc.DataType) -> Column:
         This only produces a copy if the requested dtype doesn't match
         the current one.
         """
-        if self.obj.type() != dtype:
-            return Column(plc.unary.cast(self.obj, dtype), name=self.name).sorted_like(
-                self
-            )
-        return self
+        if self.obj.type() == dtype:
+            return self
+
+        if dtype.id() == plc.TypeId.STRING or self.obj.type().id() == plc.TypeId.STRING:
+            result = self._handle_string_cast(dtype)
+        else:
+            result = plc.unary.cast(self.obj, dtype)
+        return Column(result).sorted_like(self)
+
+    def _handle_string_cast(self, dtype: plc.DataType) -> plc.Column:
+        if dtype.id() == plc.TypeId.STRING:
+            if is_floating_point(self.obj.type()):
+                return from_floats(self.obj)
+            else:
+                return from_integers(self.obj)
+        else:
+            if is_floating_point(dtype):
+                floats = is_float(self.obj)
+                if not plc.interop.to_arrow(
+                    plc.reduce.reduce(
+                        floats,
+                        plc.aggregation.all(),
+                        plc.DataType(plc.TypeId.BOOL8),
+                    )
+                ).as_py():
+                    raise InvalidOperationError("Conversion from `str` failed.")
+                return to_floats(self.obj, dtype)
+            else:
+                integers = is_integer(self.obj)
+                if not plc.interop.to_arrow(
+                    plc.reduce.reduce(
+                        integers,
+                        plc.aggregation.all(),
+                        plc.DataType(plc.TypeId.BOOL8),
+                    )
+                ).as_py():
+                    raise InvalidOperationError("Conversion from `str` failed.")
+                return to_integers(self.obj, dtype)
 
     def copy_metadata(self, from_: pl.Series, /) -> Self:
         """
diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/unary.py b/python/cudf_polars/cudf_polars/dsl/expressions/unary.py
index 2e0ab12eeb3..5d9f50a916d 100644
--- a/python/cudf_polars/cudf_polars/dsl/expressions/unary.py
+++ b/python/cudf_polars/cudf_polars/dsl/expressions/unary.py
@@ -9,15 +9,6 @@
 
 import pyarrow as pa
 import pylibcudf as plc
-from pylibcudf.strings.convert.convert_floats import from_floats, is_float, to_floats
-from pylibcudf.strings.convert.convert_integers import (
-    from_integers,
-    is_integer,
-    to_integers,
-)
-from pylibcudf.traits import is_floating_point
-
-from polars.exceptions import InvalidOperationError
 
 from cudf_polars.containers import Column
 from cudf_polars.dsl.expressions.base import AggInfo, ExecutionContext, Expr
@@ -56,45 +47,7 @@ def do_evaluate(
         """Evaluate this expression given a dataframe for context."""
         (child,) = self.children
         column = child.evaluate(df, context=context, mapping=mapping)
-        if (
-            self.dtype.id() == plc.TypeId.STRING
-            or column.obj.type().id() == plc.TypeId.STRING
-        ):
-            result = self._handle_string_cast(column)
-        else:
-            result = plc.unary.cast(column.obj, self.dtype)
-        return Column(result).sorted_like(column)
-
-    def _handle_string_cast(self, column: Column) -> plc.Column:
-        if self.dtype.id() == plc.TypeId.STRING:
-            if is_floating_point(column.obj.type()):
-                result = from_floats(column.obj)
-            else:
-                result = from_integers(column.obj)
-        else:
-            if is_floating_point(self.dtype):
-                floats = is_float(column.obj)
-                if not plc.interop.to_arrow(
-                    plc.reduce.reduce(
-                        floats,
-                        plc.aggregation.all(),
-                        plc.DataType(plc.TypeId.BOOL8),
-                    )
-                ).as_py():
-                    raise InvalidOperationError("Conversion from `str` failed.")
-                result = to_floats(column.obj, self.dtype)
-            else:
-                integers = is_integer(column.obj)
-                if not plc.interop.to_arrow(
-                    plc.reduce.reduce(
-                        integers,
-                        plc.aggregation.all(),
-                        plc.DataType(plc.TypeId.BOOL8),
-                    )
-                ).as_py():
-                    raise InvalidOperationError("Conversion from `str` failed.")
-                result = to_integers(column.obj, self.dtype)
-        return result
+        return column.astype(self.dtype)
 
     def collect_agg(self, *, depth: int) -> AggInfo:
         """Collect information about aggregations in groupbys."""

From 9f2cc181f0b24b3a0c6622cb7e1e63f09c90c182 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Fri, 25 Oct 2024 09:18:28 -0700
Subject: [PATCH 09/19] implement and use is_numeric_not_bool

---
 cpp/include/cudf/utilities/traits.hpp         | 24 +++++++++++++++++++
 cpp/src/utilities/traits.cpp                  | 13 ++++++++++
 .../cudf_polars/containers/column.py          |  5 ++--
 .../cudf_polars/cudf_polars/utils/dtypes.py   | 10 +++-----
 .../pylibcudf/libcudf/utilities/traits.pxd    |  1 +
 .../pylibcudf/pylibcudf/tests/test_traits.py  |  5 ++++
 python/pylibcudf/pylibcudf/traits.pyx         |  6 +++++
 7 files changed, 54 insertions(+), 10 deletions(-)

diff --git a/cpp/include/cudf/utilities/traits.hpp b/cpp/include/cudf/utilities/traits.hpp
index 3f37ae02151..c37fa460ca1 100644
--- a/cpp/include/cudf/utilities/traits.hpp
+++ b/cpp/include/cudf/utilities/traits.hpp
@@ -303,6 +303,30 @@ constexpr inline bool is_integral_not_bool()
  */
 bool is_integral_not_bool(data_type type);
 
+/**
+ * @brief Indicates whether the type `T` is a numeric type but not bool type.
+ *
+ * @tparam T  The type to verify
+ * @return true `T` is numeric but not bool
+ * @return false  `T` is not numeric or is bool
+ */
+template <typename T>
+constexpr inline bool is_numeric_not_bool()
+{
+  return cudf::is_numeric<T>() and not std::is_same_v<T, bool>;
+}
+
+/**
+ * @brief Indicates whether `type` is a numeric `data_type` but not BOOL8
+ *
+ * "Numeric" types are integral/floating point types such as `INT*` or `FLOAT*`.
+ *
+ * @param type The `data_type` to verify
+ * @return true `type` is numeric but not bool
+ * @return false `type` is not numeric or is bool
+ */
+bool is_numeric_not_bool(data_type type);
+
 /**
  * @brief Indicates whether the type `T` is a floating point type.
  *
diff --git a/cpp/src/utilities/traits.cpp b/cpp/src/utilities/traits.cpp
index a68dc84e340..d65e46e3c80 100644
--- a/cpp/src/utilities/traits.cpp
+++ b/cpp/src/utilities/traits.cpp
@@ -171,6 +171,19 @@ bool is_integral_not_bool(data_type type)
   return cudf::type_dispatcher(type, is_integral_not_bool_impl{});
 }
 
+struct is_numeric_not_bool_impl {
+  template <typename T>
+  constexpr bool operator()()
+  {
+    return is_numeric_not_bool<T>();
+  }
+};
+
+bool is_numeric_not_bool(data_type type)
+{
+  return cudf::type_dispatcher(type, is_numeric_not_bool_impl{});
+}
+
 struct is_floating_point_impl {
   template <typename T>
   constexpr bool operator()()
diff --git a/python/cudf_polars/cudf_polars/containers/column.py b/python/cudf_polars/cudf_polars/containers/column.py
index a30604abddf..e04c7ecf07f 100644
--- a/python/cudf_polars/cudf_polars/containers/column.py
+++ b/python/cudf_polars/cudf_polars/containers/column.py
@@ -142,10 +142,9 @@ def astype(self, dtype: plc.DataType) -> Column:
             return self
 
         if dtype.id() == plc.TypeId.STRING or self.obj.type().id() == plc.TypeId.STRING:
-            result = self._handle_string_cast(dtype)
+            return Column(self._handle_string_cast(dtype))
         else:
-            result = plc.unary.cast(self.obj, dtype)
-        return Column(result).sorted_like(self)
+            return Column(plc.unary.cast(self.obj, dtype)).sorted_like(self)
 
     def _handle_string_cast(self, dtype: plc.DataType) -> plc.Column:
         if dtype.id() == plc.TypeId.STRING:
diff --git a/python/cudf_polars/cudf_polars/utils/dtypes.py b/python/cudf_polars/cudf_polars/utils/dtypes.py
index fb7ed0aaf2b..52f4951dabc 100644
--- a/python/cudf_polars/cudf_polars/utils/dtypes.py
+++ b/python/cudf_polars/cudf_polars/utils/dtypes.py
@@ -9,7 +9,7 @@
 
 import pyarrow as pa
 import pylibcudf as plc
-from pylibcudf.traits import is_floating_point, is_integral_not_bool
+from pylibcudf.traits import is_numeric_not_bool
 from typing_extensions import assert_never
 
 import polars as pl
@@ -46,10 +46,6 @@ def downcast_arrow_lists(typ: pa.DataType) -> pa.DataType:
     return typ
 
 
-def _is_int_or_float(dtype: plc.DataType) -> bool:
-    return is_integral_not_bool(dtype) or is_floating_point(dtype)
-
-
 def can_cast(from_: plc.DataType, to: plc.DataType) -> bool:
     """
     Can we cast (via :func:`~.pylibcudf.unary.cast`) between two datatypes.
@@ -71,8 +67,8 @@ def can_cast(from_: plc.DataType, to: plc.DataType) -> bool:
             and plc.traits.is_fixed_width(from_)
             and plc.unary.is_supported_cast(from_, to)
         )
-        or (from_.id() == plc.TypeId.STRING and _is_int_or_float(to))
-        or (_is_int_or_float(from_) and to.id() == plc.TypeId.STRING)
+        or (from_.id() == plc.TypeId.STRING and is_numeric_not_bool(to))
+        or (is_numeric_not_bool(from_) and to.id() == plc.TypeId.STRING)
     )
 
 
diff --git a/python/pylibcudf/pylibcudf/libcudf/utilities/traits.pxd b/python/pylibcudf/pylibcudf/libcudf/utilities/traits.pxd
index 69765e44274..5533530754e 100644
--- a/python/pylibcudf/pylibcudf/libcudf/utilities/traits.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/utilities/traits.pxd
@@ -9,6 +9,7 @@ cdef extern from "cudf/utilities/traits.hpp" namespace "cudf" nogil:
     cdef bool is_relationally_comparable(data_type)
     cdef bool is_equality_comparable(data_type)
     cdef bool is_numeric(data_type)
+    cdef bool is_numeric_not_bool(data_type)
     cdef bool is_index_type(data_type)
     cdef bool is_unsigned(data_type)
     cdef bool is_integral(data_type)
diff --git a/python/pylibcudf/pylibcudf/tests/test_traits.py b/python/pylibcudf/pylibcudf/tests/test_traits.py
index 2570e8abd51..2c1708304eb 100644
--- a/python/pylibcudf/pylibcudf/tests/test_traits.py
+++ b/python/pylibcudf/pylibcudf/tests/test_traits.py
@@ -20,6 +20,11 @@ def test_is_numeric():
     assert not plc.traits.is_numeric(plc.DataType(plc.TypeId.LIST))
 
 
+def test_is_numeric_not_bool():
+    assert plc.traits.is_numeric_not_bool(plc.DataType(plc.TypeId.FLOAT64))
+    assert not plc.traits.is_numeric_not_bool(plc.DataType(plc.TypeId.BOOL8))
+
+
 def test_is_index_type():
     assert plc.traits.is_index_type(plc.DataType(plc.TypeId.INT8))
     assert not plc.traits.is_index_type(plc.DataType(plc.TypeId.BOOL8))
diff --git a/python/pylibcudf/pylibcudf/traits.pyx b/python/pylibcudf/pylibcudf/traits.pyx
index 5a1c67e1f6c..9c52e0ac1ab 100644
--- a/python/pylibcudf/pylibcudf/traits.pyx
+++ b/python/pylibcudf/pylibcudf/traits.pyx
@@ -29,6 +29,12 @@ cpdef bool is_numeric(DataType typ):
     """
     return traits.is_numeric(typ.c_obj)
 
+cpdef bool is_numeric_not_bool(DataType typ):
+    """Checks if the given data type is numeric excluding booleans.
+
+    For details, see :cpp:func:`is_numeric_not_bool`.
+    """
+    return traits.is_numeric_not_bool(typ.c_obj)
 
 cpdef bool is_index_type(DataType typ):
     """Checks if the given data type is an index type.

From 9ecac4142b7821efba6dac127d30fc46829a620a Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Fri, 25 Oct 2024 10:03:18 -0700
Subject: [PATCH 10/19] update tests

---
 .../cudf_polars/cudf_polars/utils/dtypes.py   |  2 +-
 .../tests/expressions/test_stringfunction.py  | 76 +++++++++++++++++--
 2 files changed, 69 insertions(+), 9 deletions(-)

diff --git a/python/cudf_polars/cudf_polars/utils/dtypes.py b/python/cudf_polars/cudf_polars/utils/dtypes.py
index 52f4951dabc..491d57584b6 100644
--- a/python/cudf_polars/cudf_polars/utils/dtypes.py
+++ b/python/cudf_polars/cudf_polars/utils/dtypes.py
@@ -68,7 +68,7 @@ def can_cast(from_: plc.DataType, to: plc.DataType) -> bool:
             and plc.unary.is_supported_cast(from_, to)
         )
         or (from_.id() == plc.TypeId.STRING and is_numeric_not_bool(to))
-        or (is_numeric_not_bool(from_) and to.id() == plc.TypeId.STRING)
+        or (to.id() == plc.TypeId.STRING and is_numeric_not_bool(from_))
     )
 
 
diff --git a/python/cudf_polars/tests/expressions/test_stringfunction.py b/python/cudf_polars/tests/expressions/test_stringfunction.py
index d1655e5e466..641608ffb0e 100644
--- a/python/cudf_polars/tests/expressions/test_stringfunction.py
+++ b/python/cudf_polars/tests/expressions/test_stringfunction.py
@@ -40,13 +40,23 @@ def ldf(with_nulls):
     )
 
 
-@pytest.fixture(params=[pl.Float32, pl.Float64, pl.Int8, pl.Int16, pl.Int32, pl.Int64])
+@pytest.fixture(params=[pl.Int8, pl.Int16, pl.Int32, pl.Int64])
+def integer_type(request):
+    return request.param
+
+
+@pytest.fixture(params=[pl.Float32, pl.Float64])
+def floating_type(request):
+    return request.param
+
+
+@pytest.fixture(params=[pl.Int8, pl.Int16, pl.Int32, pl.Int64, pl.Float32, pl.Float64])
 def numeric_type(request):
     return request.param
 
 
 @pytest.fixture
-def str_to_numeric_data(with_nulls):
+def str_to_integer_data(with_nulls):
     a = ["1", "2", "3", "4", "5", "6"]
     if with_nulls:
         a[4] = None
@@ -54,7 +64,28 @@ def str_to_numeric_data(with_nulls):
 
 
 @pytest.fixture
-def str_from_numeric_data(with_nulls, numeric_type):
+def str_to_float_data(with_nulls):
+    a = [
+        "1.1",
+        "2.2",
+        "3.3",
+        "4.4",
+        "5.5",
+        "6.6",
+        "inf",
+        "+inf",
+        "-inf",
+        "nan",
+        "-1.234",
+        "2e2",
+    ]
+    if with_nulls:
+        a[4] = None
+    return pl.LazyFrame({"a": a})
+
+
+@pytest.fixture
+def str_from_integer_data(with_nulls, integer_type):
     a = [
         1,
         2,
@@ -65,7 +96,26 @@ def str_from_numeric_data(with_nulls, numeric_type):
     ]
     if with_nulls:
         a[4] = None
-    return pl.LazyFrame({"a": pl.Series(a, dtype=numeric_type)})
+    return pl.LazyFrame({"a": pl.Series(a, dtype=integer_type)})
+
+
+@pytest.fixture
+def str_from_float_data(with_nulls, floating_type):
+    a = [
+        1.1,
+        2.2,
+        3.3,
+        4.4,
+        5.5,
+        6.6,
+        float("inf"),
+        float("+inf"),
+        float("-inf"),
+        float("nan"),
+    ]
+    if with_nulls:
+        a[4] = None
+    return pl.LazyFrame({"a": pl.Series(a, dtype=floating_type)})
 
 
 slice_cases = [
@@ -367,13 +417,23 @@ def test_unsupported_regex_raises(pattern):
     assert_ir_translation_raises(q, NotImplementedError)
 
 
-def test_string_to_numeric(str_to_numeric_data, numeric_type):
-    query = str_to_numeric_data.select(pl.col("a").cast(numeric_type))
+def test_string_to_integer(str_to_integer_data, integer_type):
+    query = str_to_integer_data.select(pl.col("a").cast(integer_type))
+    assert_gpu_result_equal(query)
+
+
+def test_string_from_integer(str_from_integer_data):
+    query = str_from_integer_data.select(pl.col("a").cast(pl.String))
+    assert_gpu_result_equal(query)
+
+
+def test_string_to_float(str_to_float_data, floating_type):
+    query = str_to_float_data.select(pl.col("a").cast(floating_type))
     assert_gpu_result_equal(query)
 
 
-def test_string_from_numeric(str_from_numeric_data):
-    query = str_from_numeric_data.select(pl.col("a").cast(pl.String))
+def test_string_from_float(str_from_float_data):
+    query = str_from_float_data.select(pl.col("a").cast(pl.String))
     assert_gpu_result_equal(query)
 
 

From cd800833f443dac342eecabf9bd339ca9f279ad4 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Mon, 28 Oct 2024 07:41:57 -0700
Subject: [PATCH 11/19] test, implement, and use is_order_preserving_cast

---
 .../cudf_polars/containers/column.py          |  7 ++-
 .../cudf_polars/cudf_polars/utils/dtypes.py   | 54 ++++++++++++++++++-
 python/cudf_polars/tests/utils/test_dtypes.py | 28 +++++++++-
 3 files changed, 85 insertions(+), 4 deletions(-)

diff --git a/python/cudf_polars/cudf_polars/containers/column.py b/python/cudf_polars/cudf_polars/containers/column.py
index e04c7ecf07f..a472daf53ca 100644
--- a/python/cudf_polars/cudf_polars/containers/column.py
+++ b/python/cudf_polars/cudf_polars/containers/column.py
@@ -19,6 +19,8 @@
 
 from polars.exceptions import InvalidOperationError
 
+from cudf_polars.utils.dtypes import is_order_preserving_cast
+
 if TYPE_CHECKING:
     from typing_extensions import Self
 
@@ -144,7 +146,10 @@ def astype(self, dtype: plc.DataType) -> Column:
         if dtype.id() == plc.TypeId.STRING or self.obj.type().id() == plc.TypeId.STRING:
             return Column(self._handle_string_cast(dtype))
         else:
-            return Column(plc.unary.cast(self.obj, dtype)).sorted_like(self)
+            result = Column(plc.unary.cast(self.obj, dtype))
+            if is_order_preserving_cast(self.obj.type(), dtype):
+                return result.sorted_like(self)
+            return result
 
     def _handle_string_cast(self, dtype: plc.DataType) -> plc.Column:
         if dtype.id() == plc.TypeId.STRING:
diff --git a/python/cudf_polars/cudf_polars/utils/dtypes.py b/python/cudf_polars/cudf_polars/utils/dtypes.py
index 491d57584b6..75b76e8e25e 100644
--- a/python/cudf_polars/cudf_polars/utils/dtypes.py
+++ b/python/cudf_polars/cudf_polars/utils/dtypes.py
@@ -9,12 +9,21 @@
 
 import pyarrow as pa
 import pylibcudf as plc
-from pylibcudf.traits import is_numeric_not_bool
+from pylibcudf.traits import (
+    is_floating_point,
+    is_integral_not_bool,
+    is_numeric_not_bool,
+)
 from typing_extensions import assert_never
 
 import polars as pl
 
-__all__ = ["from_polars", "downcast_arrow_lists", "can_cast"]
+__all__ = [
+    "from_polars",
+    "downcast_arrow_lists",
+    "can_cast",
+    "is_order_preserving_cast",
+]
 
 
 def downcast_arrow_lists(typ: pa.DataType) -> pa.DataType:
@@ -72,6 +81,47 @@ def can_cast(from_: plc.DataType, to: plc.DataType) -> bool:
     )
 
 
+def is_order_preserving_cast(from_: plc.DataType, to: plc.DataType) -> bool:
+    """
+    Determine if a cast would preserve the order of the source data.
+
+    Parameters
+    ----------
+    from_
+        Source datatype
+    to
+        Target datatype
+
+    Returns
+    -------
+    True if the cast is order-preserving, False otherwise
+    """
+    if from_.id() == to.id():
+        return True
+
+    if is_integral_not_bool(from_) and is_integral_not_bool(to):
+        # True if signedness is the same and the target is larger
+        if plc.traits.is_unsigned(from_) == plc.traits.is_unsigned(to):
+            if plc.types.size_of(to) >= plc.types.size_of(from_):
+                return True
+        elif (plc.traits.is_unsigned(from_) and not plc.traits.is_unsigned(to)) and (
+            plc.types.size_of(to) > plc.types.size_of(from_)
+        ):
+            # Unsigned to signed is order preserving if target is large enough
+            # But signed to unsigned is never order preserving due to negative values
+            return True
+    elif is_floating_point(from_) and is_floating_point(to):
+        # True if the target is larger
+        if plc.types.size_of(to) > plc.types.size_of(from_):
+            return True
+    elif (is_integral_not_bool(from_) and is_floating_point(to)) and (
+        plc.types.size_of(to) > plc.types.size_of(from_)
+    ):
+        # Int64 fits in float64, but not in float32
+        return True
+    return False
+
+
 @cache
 def from_polars(dtype: pl.DataType) -> plc.DataType:
     """
diff --git a/python/cudf_polars/tests/utils/test_dtypes.py b/python/cudf_polars/tests/utils/test_dtypes.py
index bbdb4faa256..5b510830329 100644
--- a/python/cudf_polars/tests/utils/test_dtypes.py
+++ b/python/cudf_polars/tests/utils/test_dtypes.py
@@ -3,11 +3,23 @@
 
 from __future__ import annotations
 
+import pylibcudf as plc
 import pytest
 
 import polars as pl
 
-from cudf_polars.utils.dtypes import from_polars
+from cudf_polars.utils.dtypes import from_polars, is_order_preserving_cast
+
+INT8 = plc.DataType(plc.TypeId.INT8)
+INT16 = plc.DataType(plc.TypeId.INT16)
+INT32 = plc.DataType(plc.TypeId.INT32)
+INT64 = plc.DataType(plc.TypeId.INT64)
+UINT8 = plc.DataType(plc.TypeId.UINT8)
+UINT16 = plc.DataType(plc.TypeId.UINT16)
+UINT32 = plc.DataType(plc.TypeId.UINT32)
+UINT64 = plc.DataType(plc.TypeId.UINT64)
+FLOAT32 = plc.DataType(plc.TypeId.FLOAT32)
+FLOAT64 = plc.DataType(plc.TypeId.FLOAT64)
 
 
 @pytest.mark.parametrize(
@@ -30,3 +42,17 @@
 def test_unhandled_dtype_conversion_raises(pltype):
     with pytest.raises(NotImplementedError):
         _ = from_polars(pltype)
+
+
+def test_is_order_preserving_cast():
+    assert is_order_preserving_cast(INT8, INT8)  # Same type
+    assert is_order_preserving_cast(INT8, INT16)  # Smaller type
+    assert is_order_preserving_cast(INT8, FLOAT32)  # Int to large enough float
+    assert is_order_preserving_cast(UINT8, UINT16)  # Unsigned to larger unsigned
+    assert is_order_preserving_cast(UINT8, FLOAT32)  # Unsigned to large enough float
+    assert is_order_preserving_cast(FLOAT32, FLOAT64)  # Float to larger float
+
+    assert not is_order_preserving_cast(INT16, INT8)  # Bigger type
+    assert not is_order_preserving_cast(INT8, UINT8)  # Different signedness
+    assert not is_order_preserving_cast(INT64, FLOAT32)  # Int to undersized float
+    assert not is_order_preserving_cast(FLOAT32, INT32)  # Float to undersized int

From f98f635f6f905e8921f6c766aefbcc786841613f Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Mon, 28 Oct 2024 07:49:06 -0700
Subject: [PATCH 12/19] small fix

---
 .../cudf_polars/tests/expressions/test_stringfunction.py | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/python/cudf_polars/tests/expressions/test_stringfunction.py b/python/cudf_polars/tests/expressions/test_stringfunction.py
index 641608ffb0e..d2a1c12a109 100644
--- a/python/cudf_polars/tests/expressions/test_stringfunction.py
+++ b/python/cudf_polars/tests/expressions/test_stringfunction.py
@@ -86,14 +86,7 @@ def str_to_float_data(with_nulls):
 
 @pytest.fixture
 def str_from_integer_data(with_nulls, integer_type):
-    a = [
-        1,
-        2,
-        3,
-        4,
-        5,
-        6,
-    ]
+    a = [1, 2, 3, 4, 5, 6]
     if with_nulls:
         a[4] = None
     return pl.LazyFrame({"a": pl.Series(a, dtype=integer_type)})

From 7f75375073b3e2eb89fc517efaf432dc69eecf85 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Mon, 28 Oct 2024 14:22:35 -0700
Subject: [PATCH 13/19] pass test_string_from_float

---
 .../tests/expressions/test_stringfunction.py   | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/python/cudf_polars/tests/expressions/test_stringfunction.py b/python/cudf_polars/tests/expressions/test_stringfunction.py
index d2a1c12a109..8d7d970eb07 100644
--- a/python/cudf_polars/tests/expressions/test_stringfunction.py
+++ b/python/cudf_polars/tests/expressions/test_stringfunction.py
@@ -75,6 +75,8 @@ def str_to_float_data(with_nulls):
         "inf",
         "+inf",
         "-inf",
+        "Inf",
+        "-Inf",
         "nan",
         "-1.234",
         "2e2",
@@ -425,8 +427,22 @@ def test_string_to_float(str_to_float_data, floating_type):
     assert_gpu_result_equal(query)
 
 
-def test_string_from_float(str_from_float_data):
+def test_string_from_float(request, str_from_float_data):
+    if str_from_float_data.collect_schema()["a"] == pl.Float32:
+        # libcudf will return a string representing the precision out to
+        # a certain number of hardcoded decimal places. This results in
+        # the fractional part being thrown away which causes discrepancies
+        # for certain numbers. For instance, the float32 representation of
+        # 1.1 is 1.100000023841858. When cast to a string, this will become
+        # 1.100000024. But the float64 representation of 1.1 is
+        # 1.1000000000000000888 which will result in libcudf truncating the
+        # final value to 1.1.
+        request.applymarker(pytest.mark.xfail(reason="libcudf truncation"))
     query = str_from_float_data.select(pl.col("a").cast(pl.String))
+
+    # libcudf reads float('inf') -> "inf"
+    # but polars reads float('inf') -> "Inf"
+    query = query.select(pl.col("a").str.to_lowercase())
     assert_gpu_result_equal(query)
 
 

From 9c9d395e84fade005839a6d4eb9a0d2d5b775e28 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Mon, 28 Oct 2024 15:14:49 -0700
Subject: [PATCH 14/19] add failing test to plugin xfail list

---
 python/cudf_polars/cudf_polars/testing/plugin.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/python/cudf_polars/cudf_polars/testing/plugin.py b/python/cudf_polars/cudf_polars/testing/plugin.py
index a3607159e01..a611fdab9f8 100644
--- a/python/cudf_polars/cudf_polars/testing/plugin.py
+++ b/python/cudf_polars/cudf_polars/testing/plugin.py
@@ -158,6 +158,7 @@ def pytest_configure(config: pytest.Config):
     "tests/unit/sql/test_cast.py::test_cast_errors[values0-values::uint8-conversion from `f64` to `u64` failed]": "Casting that raises not supported on GPU",
     "tests/unit/sql/test_cast.py::test_cast_errors[values1-values::uint4-conversion from `i64` to `u32` failed]": "Casting that raises not supported on GPU",
     "tests/unit/sql/test_cast.py::test_cast_errors[values2-values::int1-conversion from `i64` to `i8` failed]": "Casting that raises not supported on GPU",
+    "tests/unit/sql/test_cast.py::test_cast_errors[values5-values::int4-conversion from `str` to `i32` failed]": "Cast raises, but error user recieves is wrong",
     "tests/unit/sql/test_miscellaneous.py::test_read_csv": "Incorrect handling of missing_is_null in read_csv",
     "tests/unit/sql/test_wildcard_opts.py::test_select_wildcard_errors": "Raises correctly but with different exception",
     "tests/unit/streaming/test_streaming_io.py::test_parquet_eq_statistics": "Debug output on stderr doesn't match",

From 00bd36c771ad8b2b2dfae2ca969198a5f2642cf5 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com>
Date: Tue, 29 Oct 2024 09:58:39 -0500
Subject: [PATCH 15/19] Update python/cudf_polars/cudf_polars/testing/plugin.py

Co-authored-by: Lawrence Mitchell <wence@gmx.li>
---
 python/cudf_polars/cudf_polars/testing/plugin.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf_polars/cudf_polars/testing/plugin.py b/python/cudf_polars/cudf_polars/testing/plugin.py
index a611fdab9f8..34492858062 100644
--- a/python/cudf_polars/cudf_polars/testing/plugin.py
+++ b/python/cudf_polars/cudf_polars/testing/plugin.py
@@ -158,7 +158,7 @@ def pytest_configure(config: pytest.Config):
     "tests/unit/sql/test_cast.py::test_cast_errors[values0-values::uint8-conversion from `f64` to `u64` failed]": "Casting that raises not supported on GPU",
     "tests/unit/sql/test_cast.py::test_cast_errors[values1-values::uint4-conversion from `i64` to `u32` failed]": "Casting that raises not supported on GPU",
     "tests/unit/sql/test_cast.py::test_cast_errors[values2-values::int1-conversion from `i64` to `i8` failed]": "Casting that raises not supported on GPU",
-    "tests/unit/sql/test_cast.py::test_cast_errors[values5-values::int4-conversion from `str` to `i32` failed]": "Cast raises, but error user recieves is wrong",
+    "tests/unit/sql/test_cast.py::test_cast_errors[values5-values::int4-conversion from `str` to `i32` failed]": "Cast raises, but error user receives is wrong",
     "tests/unit/sql/test_miscellaneous.py::test_read_csv": "Incorrect handling of missing_is_null in read_csv",
     "tests/unit/sql/test_wildcard_opts.py::test_select_wildcard_errors": "Raises correctly but with different exception",
     "tests/unit/streaming/test_streaming_io.py::test_parquet_eq_statistics": "Debug output on stderr doesn't match",

From c06f984e0c2602750406e3e65ddbb2e9cfde3bd2 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com>
Date: Tue, 29 Oct 2024 10:00:01 -0500
Subject: [PATCH 16/19] Update python/cudf_polars/cudf_polars/utils/dtypes.py

Co-authored-by: Lawrence Mitchell <wence@gmx.li>
---
 python/cudf_polars/cudf_polars/utils/dtypes.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf_polars/cudf_polars/utils/dtypes.py b/python/cudf_polars/cudf_polars/utils/dtypes.py
index 75b76e8e25e..99f07b47ebd 100644
--- a/python/cudf_polars/cudf_polars/utils/dtypes.py
+++ b/python/cudf_polars/cudf_polars/utils/dtypes.py
@@ -112,7 +112,7 @@ def is_order_preserving_cast(from_: plc.DataType, to: plc.DataType) -> bool:
             return True
     elif is_floating_point(from_) and is_floating_point(to):
         # True if the target is larger
-        if plc.types.size_of(to) > plc.types.size_of(from_):
+        if plc.types.size_of(to) >= plc.types.size_of(from_):
             return True
     elif (is_integral_not_bool(from_) and is_floating_point(to)) and (
         plc.types.size_of(to) > plc.types.size_of(from_)

From a68011a774650bf7a2e2a2d4258494363b8f8a15 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Tue, 29 Oct 2024 09:03:47 -0700
Subject: [PATCH 17/19] minor fixups

---
 python/cudf_polars/cudf_polars/utils/dtypes.py | 14 ++++++--------
 python/cudf_polars/tests/utils/test_dtypes.py  |  3 ++-
 2 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/python/cudf_polars/cudf_polars/utils/dtypes.py b/python/cudf_polars/cudf_polars/utils/dtypes.py
index 99f07b47ebd..81b360eb9fc 100644
--- a/python/cudf_polars/cudf_polars/utils/dtypes.py
+++ b/python/cudf_polars/cudf_polars/utils/dtypes.py
@@ -110,16 +110,14 @@ def is_order_preserving_cast(from_: plc.DataType, to: plc.DataType) -> bool:
             # Unsigned to signed is order preserving if target is large enough
             # But signed to unsigned is never order preserving due to negative values
             return True
-    elif is_floating_point(from_) and is_floating_point(to):
-        # True if the target is larger
-        if plc.types.size_of(to) >= plc.types.size_of(from_):
-            return True
-    elif (is_integral_not_bool(from_) and is_floating_point(to)) and (
-        plc.types.size_of(to) > plc.types.size_of(from_)
+    elif (
+        is_floating_point(from_)
+        and is_floating_point(to)
+        and (plc.types.size_of(to) >= plc.types.size_of(from_))
     ):
-        # Int64 fits in float64, but not in float32
+        # True if the target is larger
         return True
-    return False
+    return is_integral_not_bool(from_) and is_floating_point(to)
 
 
 @cache
diff --git a/python/cudf_polars/tests/utils/test_dtypes.py b/python/cudf_polars/tests/utils/test_dtypes.py
index 5b510830329..f9629ba8fb5 100644
--- a/python/cudf_polars/tests/utils/test_dtypes.py
+++ b/python/cudf_polars/tests/utils/test_dtypes.py
@@ -51,8 +51,9 @@ def test_is_order_preserving_cast():
     assert is_order_preserving_cast(UINT8, UINT16)  # Unsigned to larger unsigned
     assert is_order_preserving_cast(UINT8, FLOAT32)  # Unsigned to large enough float
     assert is_order_preserving_cast(FLOAT32, FLOAT64)  # Float to larger float
+    assert is_order_preserving_cast(INT64, FLOAT32)  # Int any float
 
     assert not is_order_preserving_cast(INT16, INT8)  # Bigger type
     assert not is_order_preserving_cast(INT8, UINT8)  # Different signedness
-    assert not is_order_preserving_cast(INT64, FLOAT32)  # Int to undersized float
     assert not is_order_preserving_cast(FLOAT32, INT32)  # Float to undersized int
+    assert not is_order_preserving_cast(FLOAT32, INT64)  # float to large int

From cf62714f582370863c940ac27fed82b5aa59af26 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Mon, 4 Nov 2024 16:32:18 -0800
Subject: [PATCH 18/19] allow float to int

---
 python/cudf_polars/cudf_polars/utils/dtypes.py | 4 +++-
 python/cudf_polars/tests/utils/test_dtypes.py  | 8 +++++---
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/python/cudf_polars/cudf_polars/utils/dtypes.py b/python/cudf_polars/cudf_polars/utils/dtypes.py
index e7952699154..a90c283ee54 100644
--- a/python/cudf_polars/cudf_polars/utils/dtypes.py
+++ b/python/cudf_polars/cudf_polars/utils/dtypes.py
@@ -118,7 +118,9 @@ def is_order_preserving_cast(from_: plc.DataType, to: plc.DataType) -> bool:
     ):
         # True if the target is larger
         return True
-    return is_integral_not_bool(from_) and is_floating_point(to)
+    return (is_integral_not_bool(from_) and is_floating_point(to)) or (
+        is_floating_point(from_) and is_integral_not_bool(to)
+    )
 
 
 @cache
diff --git a/python/cudf_polars/tests/utils/test_dtypes.py b/python/cudf_polars/tests/utils/test_dtypes.py
index f9629ba8fb5..f63c2079e04 100644
--- a/python/cudf_polars/tests/utils/test_dtypes.py
+++ b/python/cudf_polars/tests/utils/test_dtypes.py
@@ -3,11 +3,12 @@
 
 from __future__ import annotations
 
-import pylibcudf as plc
 import pytest
 
 import polars as pl
 
+import pylibcudf as plc
+
 from cudf_polars.utils.dtypes import from_polars, is_order_preserving_cast
 
 INT8 = plc.DataType(plc.TypeId.INT8)
@@ -52,8 +53,9 @@ def test_is_order_preserving_cast():
     assert is_order_preserving_cast(UINT8, FLOAT32)  # Unsigned to large enough float
     assert is_order_preserving_cast(FLOAT32, FLOAT64)  # Float to larger float
     assert is_order_preserving_cast(INT64, FLOAT32)  # Int any float
+    assert is_order_preserving_cast(FLOAT32, INT32)  # Float to undersized int
+    assert is_order_preserving_cast(FLOAT32, INT64)  # float to large int
 
     assert not is_order_preserving_cast(INT16, INT8)  # Bigger type
     assert not is_order_preserving_cast(INT8, UINT8)  # Different signedness
-    assert not is_order_preserving_cast(FLOAT32, INT32)  # Float to undersized int
-    assert not is_order_preserving_cast(FLOAT32, INT64)  # float to large int
+    assert not is_order_preserving_cast(FLOAT64, FLOAT32)  # Smaller float

From 0344f53fe2a8bfc14ef2d1f211c5fdbb63b846c2 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Tue, 5 Nov 2024 08:35:58 -0800
Subject: [PATCH 19/19] small fixes

---
 python/cudf_polars/cudf_polars/containers/column.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/cudf_polars/cudf_polars/containers/column.py b/python/cudf_polars/cudf_polars/containers/column.py
index a472daf53ca..93d95346a37 100644
--- a/python/cudf_polars/cudf_polars/containers/column.py
+++ b/python/cudf_polars/cudf_polars/containers/column.py
@@ -8,6 +8,8 @@
 import functools
 from typing import TYPE_CHECKING
 
+from polars.exceptions import InvalidOperationError
+
 import pylibcudf as plc
 from pylibcudf.strings.convert.convert_floats import from_floats, is_float, to_floats
 from pylibcudf.strings.convert.convert_integers import (
@@ -17,8 +19,6 @@
 )
 from pylibcudf.traits import is_floating_point
 
-from polars.exceptions import InvalidOperationError
-
 from cudf_polars.utils.dtypes import is_order_preserving_cast
 
 if TYPE_CHECKING: