From 02d1de2122930c379e9ba88dbf13bf8cdc5b3acb Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Fri, 25 Feb 2022 15:35:25 -0600
Subject: [PATCH 1/3] Fix warning related to integer overflow in pandas.

---
 python/cudf/cudf/tests/test_csv.py | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/python/cudf/cudf/tests/test_csv.py b/python/cudf/cudf/tests/test_csv.py
index f3d69e1745e..b0fcf6be98f 100644
--- a/python/cudf/cudf/tests/test_csv.py
+++ b/python/cudf/cudf/tests/test_csv.py
@@ -4,6 +4,7 @@
 import os
 import re
 import shutil
+import warnings
 from collections import OrderedDict
 from io import BytesIO, StringIO
 from pathlib import Path
@@ -1322,7 +1323,18 @@ def test_csv_reader_hexadecimals(pdf_dtype, gdf_dtype):
 
     if gdf_dtype is not None:
         # require explicit `hex` dtype to parse hexadecimals
-        pdf = pd.DataFrame(data=values, dtype=pdf_dtype, columns=["hex_int"])
+        with warnings.catch_warnings():
+            warnings.filterwarnings(
+                "ignore",
+                (
+                    "Values are too large to be losslessly cast to int32. In "
+                    "a future version this will raise OverflowError."
+                ),
+                category=FutureWarning,
+            )
+            pdf = pd.DataFrame(
+                data=values, dtype=pdf_dtype, columns=["hex_int"]
+            )
         gdf = read_csv(StringIO(buffer), dtype=[gdf_dtype], names=["hex_int"])
         np.testing.assert_array_equal(
             pdf["hex_int"], gdf["hex_int"].to_numpy()

From e015b2313ae231b8315a082a7801d9805d30cd97 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Mon, 28 Feb 2022 14:44:56 -0600
Subject: [PATCH 2/3] Refactor tests of overflowing hexadecimal values.

---
 python/cudf/cudf/tests/test_csv.py | 46 +++++++++++++++++++++---------
 1 file changed, 32 insertions(+), 14 deletions(-)

diff --git a/python/cudf/cudf/tests/test_csv.py b/python/cudf/cudf/tests/test_csv.py
index b0fcf6be98f..7dd735997d6 100644
--- a/python/cudf/cudf/tests/test_csv.py
+++ b/python/cudf/cudf/tests/test_csv.py
@@ -4,7 +4,6 @@
 import os
 import re
 import shutil
-import warnings
 from collections import OrderedDict
 from io import BytesIO, StringIO
 from pathlib import Path
@@ -1316,25 +1315,14 @@ def test_csv_reader_aligned_byte_range(tmpdir):
     [(None, None), ("int", "hex"), ("int32", "hex32"), ("int64", "hex64")],
 )
 def test_csv_reader_hexadecimals(pdf_dtype, gdf_dtype):
-    lines = ["0x0", "-0x1000", "0xfedcba", "0xABCDEF", "0xaBcDeF", "9512c20b"]
+    lines = ["0x0", "-0x1000", "0xfedcba", "0xABCDEF", "0xaBcDeF"]
     values = [int(hex_int, 16) for hex_int in lines]
 
     buffer = "\n".join(lines)
 
     if gdf_dtype is not None:
         # require explicit `hex` dtype to parse hexadecimals
-        with warnings.catch_warnings():
-            warnings.filterwarnings(
-                "ignore",
-                (
-                    "Values are too large to be losslessly cast to int32. In "
-                    "a future version this will raise OverflowError."
-                ),
-                category=FutureWarning,
-            )
-            pdf = pd.DataFrame(
-                data=values, dtype=pdf_dtype, columns=["hex_int"]
-            )
+        pdf = pd.DataFrame(data=values, dtype=pdf_dtype, columns=["hex_int"])
         gdf = read_csv(StringIO(buffer), dtype=[gdf_dtype], names=["hex_int"])
         np.testing.assert_array_equal(
             pdf["hex_int"], gdf["hex_int"].to_numpy()
@@ -1346,6 +1334,36 @@ def test_csv_reader_hexadecimals(pdf_dtype, gdf_dtype):
         assert_eq(pdf, gdf)
 
 
+@pytest.mark.parametrize(
+    "np_dtype, gdf_dtype",
+    [("int_", "hex"), ("int32", "hex32"), ("int64", "hex64")],
+)
+def test_csv_reader_hexadecimal_overflow(np_dtype, gdf_dtype):
+    # This tests values which cause an overflow warning that will become an
+    # error in pandas. NumPy wraps the overflow silently up to the bounds of a
+    # signed int64.
+    lines = [
+        "0x0",
+        "-0x1000",
+        "0xfedcba",
+        "0xABCDEF",
+        "0xaBcDeF",
+        "0x9512c20b",
+        "0x7fffffff",
+        "0x7fffffffffffffff",
+        "-0x8000000000000000",
+    ]
+    values = [int(hex_int, 16) for hex_int in lines]
+    buffer = "\n".join(lines)
+
+    gdf = read_csv(StringIO(buffer), dtype=[gdf_dtype], names=["hex_int"])
+
+    np_dtype = getattr(np, np_dtype)
+    expected = np.array(values, dtype=np_dtype)
+    actual = gdf["hex_int"].to_numpy()
+    np.testing.assert_array_equal(expected, actual)
+
+
 @pytest.mark.parametrize("quoting", [0, 1, 2, 3])
 def test_csv_reader_pd_consistent_quotes(quoting):
     names = ["text"]

From d7045b1fbe0f1c571eebda5473ba895ef2b9ccef Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Tue, 1 Mar 2022 09:44:56 -0600
Subject: [PATCH 3/3] Use string as dtype.

---
 python/cudf/cudf/tests/test_csv.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/python/cudf/cudf/tests/test_csv.py b/python/cudf/cudf/tests/test_csv.py
index 7dd735997d6..6176184b670 100644
--- a/python/cudf/cudf/tests/test_csv.py
+++ b/python/cudf/cudf/tests/test_csv.py
@@ -1336,7 +1336,7 @@ def test_csv_reader_hexadecimals(pdf_dtype, gdf_dtype):
 
 @pytest.mark.parametrize(
     "np_dtype, gdf_dtype",
-    [("int_", "hex"), ("int32", "hex32"), ("int64", "hex64")],
+    [("int", "hex"), ("int32", "hex32"), ("int64", "hex64")],
 )
 def test_csv_reader_hexadecimal_overflow(np_dtype, gdf_dtype):
     # This tests values which cause an overflow warning that will become an
@@ -1358,7 +1358,6 @@ def test_csv_reader_hexadecimal_overflow(np_dtype, gdf_dtype):
 
     gdf = read_csv(StringIO(buffer), dtype=[gdf_dtype], names=["hex_int"])
 
-    np_dtype = getattr(np, np_dtype)
     expected = np.array(values, dtype=np_dtype)
     actual = gdf["hex_int"].to_numpy()
     np.testing.assert_array_equal(expected, actual)