Skip to content

Commit

Permalink
Cover parsing to decimal types in read_json tests (#12229)
Browse files Browse the repository at this point in the history
Closes #9791

Adds `read_json` tests for decimal types. 
Tests are equivalent to existing `read_csv` tests for decimal types.
Tests invoke both JSON parsers.

Authors:
  - Vukasin Milovanovic (https://github.com/vuule)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - MithunR (https://github.com/mythrocks)

URL: #12229
  • Loading branch information
vuule authored Dec 1, 2022
1 parent a442dd8 commit ff3b643
Show file tree
Hide file tree
Showing 2 changed files with 99 additions and 14 deletions.
28 changes: 14 additions & 14 deletions cpp/tests/io/csv_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,31 +95,31 @@ struct CsvReaderNumericTypeTest : public CsvReaderTest {
using SupportedNumericTypes = cudf::test::Types<int64_t, double>;
TYPED_TEST_SUITE(CsvReaderNumericTypeTest, SupportedNumericTypes);

// Typed test to be instantiated for numeric::decimal32 and numeric::decimal64
template <typename DecimalType>
struct CsvFixedPointReaderTest : public CsvReaderTest {
void run_tests(const std::vector<std::string>& reference_strings, numeric::scale_type scale)
{
cudf::test::strings_column_wrapper strings(reference_strings.begin(), reference_strings.end());
auto input_column = cudf::strings::to_fixed_point(cudf::strings_column_view(strings),
data_type{type_to_id<DecimalType>(), scale});
cudf::test::strings_column_wrapper const strings(reference_strings.begin(),
reference_strings.end());
auto const expected = cudf::strings::to_fixed_point(
cudf::strings_column_view(strings), data_type{type_to_id<DecimalType>(), scale});

std::string buffer = std::accumulate(reference_strings.begin(),
reference_strings.end(),
std::string{},
[](const std::string& acc, const std::string& rhs) {
return acc.empty() ? rhs : (acc + "\n" + rhs);
});
auto const buffer = std::accumulate(reference_strings.begin(),
reference_strings.end(),
std::string{},
[](const std::string& acc, const std::string& rhs) {
return acc.empty() ? rhs : (acc + "\n" + rhs);
});

cudf::io::csv_reader_options in_opts =
cudf::io::csv_reader_options const in_opts =
cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()})
.dtypes({data_type{type_to_id<DecimalType>(), scale}})
.header(-1);

const auto result = cudf::io::read_csv(in_opts);
const auto result_view = result.tbl->view();
auto const result = cudf::io::read_csv(in_opts);
auto const result_view = result.tbl->view();

CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*input_column, result_view.column(0));
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*expected, result_view.column(0));
EXPECT_EQ(result_view.num_columns(), 1);
}
};
Expand Down
85 changes: 85 additions & 0 deletions cpp/tests/io/json_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include <cudf/detail/iterator.cuh>
#include <cudf/io/datasource.hpp>
#include <cudf/io/json.hpp>
#include <cudf/strings/convert/convert_fixed_point.hpp>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf/table/table.hpp>
#include <cudf/table/table_view.hpp>
Expand Down Expand Up @@ -215,6 +216,53 @@ std::string to_records_orient(std::vector<std::map<std::string, std::string>> co
return (result + suffix);
}

template <typename DecimalType>
struct JsonFixedPointReaderTest : public JsonReaderTest {
};

template <typename DecimalType>
struct JsonValidFixedPointReaderTest : public JsonFixedPointReaderTest<DecimalType> {
void run_test(std::vector<std::string> const& reference_strings,
numeric::scale_type scale,
bool use_experimental_parser)
{
cudf::test::strings_column_wrapper const strings(reference_strings.begin(),
reference_strings.end());
auto const expected = cudf::strings::to_fixed_point(
cudf::strings_column_view(strings), data_type{type_to_id<DecimalType>(), scale});

auto const buffer =
std::accumulate(reference_strings.begin(),
reference_strings.end(),
std::string{},
[](const std::string& acc, const std::string& rhs) {
return acc + (acc.empty() ? "" : "\n") + "{\"col0\":" + rhs + "}";
});
cudf::io::json_reader_options const in_opts =
cudf::io::json_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()})
.dtypes({data_type{type_to_id<DecimalType>(), scale}})
.lines(true)
.experimental(use_experimental_parser);

auto const result = cudf::io::read_json(in_opts);
auto const result_view = result.tbl->view();

ASSERT_EQ(result_view.num_columns(), 1);
EXPECT_EQ(result.metadata.schema_info[0].name, "col0");
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*expected, result_view.column(0));
}

void run_tests(std::vector<std::string> const& reference_strings, numeric::scale_type scale)
{
// Test both parsers
run_test(reference_strings, scale, false);
run_test(reference_strings, scale, true);
}
};

TYPED_TEST_SUITE(JsonFixedPointReaderTest, cudf::test::FixedPointTypes);
TYPED_TEST_SUITE(JsonValidFixedPointReaderTest, cudf::test::FixedPointTypes);

// Parametrize qualifying JSON tests for executing both experimental reader and existing JSON lines
// reader
INSTANTIATE_TEST_CASE_P(JsonReaderParamTest,
Expand Down Expand Up @@ -1563,4 +1611,41 @@ TEST_P(JsonReaderParamTest, JsonDtypeParsing)
}
}

TYPED_TEST(JsonValidFixedPointReaderTest, SingleColumnNegativeScale)
{
this->run_tests({"1.23", "876e-2", "5.43e1", "-0.12", "0.25", "-0.23", "-0.27", "0.00", "0.00"},
numeric::scale_type{-2});
}

TYPED_TEST(JsonValidFixedPointReaderTest, SingleColumnNoScale)
{
this->run_tests({"123", "-87600e-2", "54.3e1", "-12", "25", "-23", "-27", "0", "0"},
numeric::scale_type{0});
}

TYPED_TEST(JsonValidFixedPointReaderTest, SingleColumnPositiveScale)
{
this->run_tests(
{"123000", "-87600000e-2", "54300e1", "-12000", "25000", "-23000", "-27000", "0000", "0000"},
numeric::scale_type{3});
}

TYPED_TEST(JsonFixedPointReaderTest, EmptyValues)
{
auto const buffer = std::string{"{\"col0\":}"};

cudf::io::json_reader_options const in_opts =
cudf::io::json_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()})
.dtypes({data_type{type_to_id<TypeParam>(), 0}})
.lines(true);

auto const result = cudf::io::read_json(in_opts);
auto const result_view = result.tbl->view();

ASSERT_EQ(result_view.num_columns(), 1);
EXPECT_EQ(result_view.num_rows(), 1);
EXPECT_EQ(result.metadata.schema_info[0].name, "col0");
EXPECT_EQ(result_view.column(0).null_count(), 1);
}

CUDF_TEST_PROGRAM_MAIN()

0 comments on commit ff3b643

Please sign in to comment.