From 0e45d133ce3631ac8b74d8188dc1c9d3361ee717 Mon Sep 17 00:00:00 2001 From: Karthikeyan Natarajan Date: Tue, 15 Nov 2022 05:28:27 +0530 Subject: [PATCH] remove breaking behavior on nulls --- cpp/src/io/utilities/parsing_utils.cuh | 10 +++++----- cpp/tests/io/csv_test.cpp | 3 ++- cpp/tests/io/json_test.cpp | 7 ++++--- python/cudf/cudf/tests/test_csv.py | 2 +- 4 files changed, 12 insertions(+), 10 deletions(-) diff --git a/cpp/src/io/utilities/parsing_utils.cuh b/cpp/src/io/utilities/parsing_utils.cuh index a78700db484..71c86553c4d 100644 --- a/cpp/src/io/utilities/parsing_utils.cuh +++ b/cpp/src/io/utilities/parsing_utils.cuh @@ -578,9 +578,9 @@ struct ConvertFunctor { return as_hex ? cudf::io::parse_numeric(begin, end, opts) : cudf::io::parse_numeric(begin, end, opts); }(); - if (value.has_value()) { static_cast(out_buffer)[row] = *value; } + static_cast(out_buffer)[row] = value.value_or(std::numeric_limits::quiet_NaN()); - return value.has_value(); + return true; } /** @@ -630,9 +630,9 @@ struct ConvertFunctor { } return cudf::io::parse_numeric(begin, end, opts); }(); - if (value.has_value()) { static_cast(out_buffer)[row] = *value; } + static_cast(out_buffer)[row] = value.value_or(std::numeric_limits::quiet_NaN()); - return value.has_value(); + return true; } /** @@ -659,7 +659,7 @@ struct ConvertFunctor { } return cudf::io::parse_numeric(begin, end, opts); }(); - if (value.has_value()) { static_cast(out_buffer)[row] = *value; } + static_cast(out_buffer)[row] = value.value_or(std::numeric_limits::quiet_NaN()); return value.has_value() and !std::isnan(*value); } diff --git a/cpp/tests/io/csv_test.cpp b/cpp/tests/io/csv_test.cpp index af2e0f5abb9..8acc6f8f6ee 100644 --- a/cpp/tests/io/csv_test.cpp +++ b/cpp/tests/io/csv_test.cpp @@ -1170,7 +1170,8 @@ TEST_F(CsvReaderTest, InvalidFloatingPoint) const auto col_data = cudf::test::to_host(view.column(0)); // col_data.first contains the column data - // ignore all data because it is all nulls. + for (const auto& elem : col_data.first) + ASSERT_TRUE(std::isnan(elem)); // col_data.second contains the bitmasks ASSERT_EQ(0u, col_data.second[0]); } diff --git a/cpp/tests/io/json_test.cpp b/cpp/tests/io/json_test.cpp index 5da4d8969d6..f7b21008f70 100644 --- a/cpp/tests/io/json_test.cpp +++ b/cpp/tests/io/json_test.cpp @@ -914,7 +914,8 @@ TEST_P(JsonReaderParamTest, InvalidFloatingPoint) const auto col_data = cudf::test::to_host(result.tbl->view().column(0)); // col_data.first contains the column data - // ignore all data because it is all nulls. + for (const auto& elem : col_data.first) + ASSERT_TRUE(std::isnan(elem)); // col_data.second contains the bitmasks ASSERT_EQ(0u, col_data.second[0]); } @@ -1495,7 +1496,7 @@ TEST_P(JsonReaderParamTest, JsonDtypeParsing) auto int_col = int_wrapper{ {0, 0, int_NA, 1, 1, int_NA, int_NA, int_NA, int_NA, 1, 0, int_NA, 1, 0, int_NA, int_NA}, - make_validity(validity)}; + cudf::test::iterators::nulls_at(std::vector{8})}; auto float_col = float_wrapper{{0.0, 0.0, double_NA, @@ -1534,7 +1535,7 @@ TEST_P(JsonReaderParamTest, JsonDtypeParsing) false, bool_NA, bool_NA}, - make_validity(validity)}; + cudf::test::iterators::nulls_at(std::vector{8})}; // Types to test const std::vector dtypes = { diff --git a/python/cudf/cudf/tests/test_csv.py b/python/cudf/cudf/tests/test_csv.py index 02569285efe..e85d404d2c4 100644 --- a/python/cudf/cudf/tests/test_csv.py +++ b/python/cudf/cudf/tests/test_csv.py @@ -834,7 +834,7 @@ def test_csv_reader_bools_NA(): expected = pd.DataFrame( { "text": ["true", "false", "foo", "bar", "qux"], - "int": [1.0, 0.0, 1.0, 0.0, np.nan], + "int": [1, 0, 1, 0, 0], } ) # breaking behaviour is np.nan for qux