From ff3b64325d3ac48fc0e8e0e9e1cf6246dd4aa075 Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Wed, 30 Nov 2022 17:13:12 -0800 Subject: [PATCH] Cover parsing to decimal types in `read_json` tests (#12229) Closes #9791 Adds `read_json` tests for decimal types. Tests are equivalent to existing `read_csv` tests for decimal types. Tests invoke both JSON parsers. Authors: - Vukasin Milovanovic (https://github.com/vuule) Approvers: - Nghia Truong (https://github.com/ttnghia) - MithunR (https://github.com/mythrocks) URL: https://github.com/rapidsai/cudf/pull/12229 --- cpp/tests/io/csv_test.cpp | 28 ++++++------- cpp/tests/io/json_test.cpp | 85 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 99 insertions(+), 14 deletions(-) diff --git a/cpp/tests/io/csv_test.cpp b/cpp/tests/io/csv_test.cpp index 737e9d8281e..c0af2579299 100644 --- a/cpp/tests/io/csv_test.cpp +++ b/cpp/tests/io/csv_test.cpp @@ -95,31 +95,31 @@ struct CsvReaderNumericTypeTest : public CsvReaderTest { using SupportedNumericTypes = cudf::test::Types; TYPED_TEST_SUITE(CsvReaderNumericTypeTest, SupportedNumericTypes); -// Typed test to be instantiated for numeric::decimal32 and numeric::decimal64 template struct CsvFixedPointReaderTest : public CsvReaderTest { void run_tests(const std::vector& reference_strings, numeric::scale_type scale) { - cudf::test::strings_column_wrapper strings(reference_strings.begin(), reference_strings.end()); - auto input_column = cudf::strings::to_fixed_point(cudf::strings_column_view(strings), - data_type{type_to_id(), scale}); + cudf::test::strings_column_wrapper const strings(reference_strings.begin(), + reference_strings.end()); + auto const expected = cudf::strings::to_fixed_point( + cudf::strings_column_view(strings), data_type{type_to_id(), scale}); - std::string buffer = std::accumulate(reference_strings.begin(), - reference_strings.end(), - std::string{}, - [](const std::string& acc, const std::string& rhs) { - return acc.empty() ? rhs : (acc + "\n" + rhs); - }); + auto const buffer = std::accumulate(reference_strings.begin(), + reference_strings.end(), + std::string{}, + [](const std::string& acc, const std::string& rhs) { + return acc.empty() ? rhs : (acc + "\n" + rhs); + }); - cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options const in_opts = cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()}) .dtypes({data_type{type_to_id(), scale}}) .header(-1); - const auto result = cudf::io::read_csv(in_opts); - const auto result_view = result.tbl->view(); + auto const result = cudf::io::read_csv(in_opts); + auto const result_view = result.tbl->view(); - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*input_column, result_view.column(0)); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*expected, result_view.column(0)); EXPECT_EQ(result_view.num_columns(), 1); } }; diff --git a/cpp/tests/io/json_test.cpp b/cpp/tests/io/json_test.cpp index f7b21008f70..3d5f047d52c 100644 --- a/cpp/tests/io/json_test.cpp +++ b/cpp/tests/io/json_test.cpp @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -215,6 +216,53 @@ std::string to_records_orient(std::vector> co return (result + suffix); } +template +struct JsonFixedPointReaderTest : public JsonReaderTest { +}; + +template +struct JsonValidFixedPointReaderTest : public JsonFixedPointReaderTest { + void run_test(std::vector const& reference_strings, + numeric::scale_type scale, + bool use_experimental_parser) + { + cudf::test::strings_column_wrapper const strings(reference_strings.begin(), + reference_strings.end()); + auto const expected = cudf::strings::to_fixed_point( + cudf::strings_column_view(strings), data_type{type_to_id(), scale}); + + auto const buffer = + std::accumulate(reference_strings.begin(), + reference_strings.end(), + std::string{}, + [](const std::string& acc, const std::string& rhs) { + return acc + (acc.empty() ? "" : "\n") + "{\"col0\":" + rhs + "}"; + }); + cudf::io::json_reader_options const in_opts = + cudf::io::json_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()}) + .dtypes({data_type{type_to_id(), scale}}) + .lines(true) + .experimental(use_experimental_parser); + + auto const result = cudf::io::read_json(in_opts); + auto const result_view = result.tbl->view(); + + ASSERT_EQ(result_view.num_columns(), 1); + EXPECT_EQ(result.metadata.schema_info[0].name, "col0"); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*expected, result_view.column(0)); + } + + void run_tests(std::vector const& reference_strings, numeric::scale_type scale) + { + // Test both parsers + run_test(reference_strings, scale, false); + run_test(reference_strings, scale, true); + } +}; + +TYPED_TEST_SUITE(JsonFixedPointReaderTest, cudf::test::FixedPointTypes); +TYPED_TEST_SUITE(JsonValidFixedPointReaderTest, cudf::test::FixedPointTypes); + // Parametrize qualifying JSON tests for executing both experimental reader and existing JSON lines // reader INSTANTIATE_TEST_CASE_P(JsonReaderParamTest, @@ -1563,4 +1611,41 @@ TEST_P(JsonReaderParamTest, JsonDtypeParsing) } } +TYPED_TEST(JsonValidFixedPointReaderTest, SingleColumnNegativeScale) +{ + this->run_tests({"1.23", "876e-2", "5.43e1", "-0.12", "0.25", "-0.23", "-0.27", "0.00", "0.00"}, + numeric::scale_type{-2}); +} + +TYPED_TEST(JsonValidFixedPointReaderTest, SingleColumnNoScale) +{ + this->run_tests({"123", "-87600e-2", "54.3e1", "-12", "25", "-23", "-27", "0", "0"}, + numeric::scale_type{0}); +} + +TYPED_TEST(JsonValidFixedPointReaderTest, SingleColumnPositiveScale) +{ + this->run_tests( + {"123000", "-87600000e-2", "54300e1", "-12000", "25000", "-23000", "-27000", "0000", "0000"}, + numeric::scale_type{3}); +} + +TYPED_TEST(JsonFixedPointReaderTest, EmptyValues) +{ + auto const buffer = std::string{"{\"col0\":}"}; + + cudf::io::json_reader_options const in_opts = + cudf::io::json_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()}) + .dtypes({data_type{type_to_id(), 0}}) + .lines(true); + + auto const result = cudf::io::read_json(in_opts); + auto const result_view = result.tbl->view(); + + ASSERT_EQ(result_view.num_columns(), 1); + EXPECT_EQ(result_view.num_rows(), 1); + EXPECT_EQ(result.metadata.schema_info[0].name, "col0"); + EXPECT_EQ(result_view.column(0).null_count(), 1); +} + CUDF_TEST_PROGRAM_MAIN()