diff --git a/python/cudf/cudf/tests/test_csv.py b/python/cudf/cudf/tests/test_csv.py index f3d69e1745e..6176184b670 100644 --- a/python/cudf/cudf/tests/test_csv.py +++ b/python/cudf/cudf/tests/test_csv.py @@ -1315,7 +1315,7 @@ def test_csv_reader_aligned_byte_range(tmpdir): [(None, None), ("int", "hex"), ("int32", "hex32"), ("int64", "hex64")], ) def test_csv_reader_hexadecimals(pdf_dtype, gdf_dtype): - lines = ["0x0", "-0x1000", "0xfedcba", "0xABCDEF", "0xaBcDeF", "9512c20b"] + lines = ["0x0", "-0x1000", "0xfedcba", "0xABCDEF", "0xaBcDeF"] values = [int(hex_int, 16) for hex_int in lines] buffer = "\n".join(lines) @@ -1334,6 +1334,35 @@ def test_csv_reader_hexadecimals(pdf_dtype, gdf_dtype): assert_eq(pdf, gdf) +@pytest.mark.parametrize( + "np_dtype, gdf_dtype", + [("int", "hex"), ("int32", "hex32"), ("int64", "hex64")], +) +def test_csv_reader_hexadecimal_overflow(np_dtype, gdf_dtype): + # This tests values which cause an overflow warning that will become an + # error in pandas. NumPy wraps the overflow silently up to the bounds of a + # signed int64. + lines = [ + "0x0", + "-0x1000", + "0xfedcba", + "0xABCDEF", + "0xaBcDeF", + "0x9512c20b", + "0x7fffffff", + "0x7fffffffffffffff", + "-0x8000000000000000", + ] + values = [int(hex_int, 16) for hex_int in lines] + buffer = "\n".join(lines) + + gdf = read_csv(StringIO(buffer), dtype=[gdf_dtype], names=["hex_int"]) + + expected = np.array(values, dtype=np_dtype) + actual = gdf["hex_int"].to_numpy() + np.testing.assert_array_equal(expected, actual) + + @pytest.mark.parametrize("quoting", [0, 1, 2, 3]) def test_csv_reader_pd_consistent_quotes(quoting): names = ["text"]