Skip to content

Commit

Permalink
Merge branch 'branch-23.12' into feat/nightly_versions
Browse files Browse the repository at this point in the history
  • Loading branch information
vyasr authored Oct 30, 2023
2 parents 084fc47 + abc0d41 commit 8077513
Show file tree
Hide file tree
Showing 9 changed files with 166 additions and 54 deletions.
1 change: 0 additions & 1 deletion cpp/include/cudf/io/detail/json.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
#pragma once

#include <cudf/io/json.hpp>
#include <cudf/utilities/default_stream.hpp>

#include <rmm/cuda_stream_view.hpp>

Expand Down
4 changes: 4 additions & 0 deletions cpp/include/cudf/io/json.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -512,13 +512,15 @@ class json_reader_options_builder {
* @endcode
*
* @param options Settings for controlling reading behavior
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate device memory of the table in the returned
* table_with_metadata.
*
* @return The set of columns along with metadata
*/
table_with_metadata read_json(
json_reader_options options,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/** @} */ // end of group
Expand Down Expand Up @@ -861,9 +863,11 @@ class json_writer_options_builder {
* @endcode
*
* @param options Settings for controlling writing behavior
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource to use for device memory allocation
*/
void write_json(json_writer_options const& options,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/** @} */ // end of group
Expand Down
12 changes: 8 additions & 4 deletions cpp/src/io/functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,9 @@ compression_type infer_compression_type(compression_type compression, source_inf
return compression_type::NONE;
}

table_with_metadata read_json(json_reader_options options, rmm::mr::device_memory_resource* mr)
table_with_metadata read_json(json_reader_options options,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();

Expand All @@ -210,10 +212,12 @@ table_with_metadata read_json(json_reader_options options, rmm::mr::device_memor
options.get_byte_range_offset(),
options.get_byte_range_size_with_padding());

return json::detail::read_json(datasources, options, cudf::get_default_stream(), mr);
return json::detail::read_json(datasources, options, stream, mr);
}

void write_json(json_writer_options const& options, rmm::mr::device_memory_resource* mr)
void write_json(json_writer_options const& options,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
auto sinks = make_datasinks(options.get_sink());
CUDF_EXPECTS(sinks.size() == 1, "Multiple sinks not supported for JSON writing");
Expand All @@ -222,7 +226,7 @@ void write_json(json_writer_options const& options, rmm::mr::device_memory_resou
sinks[0].get(),
options.get_table(),
options,
cudf::get_default_stream(),
stream,
mr);
}

Expand Down
1 change: 0 additions & 1 deletion cpp/src/io/json/nested_json.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
#include <cudf/io/types.hpp>
#include <cudf/types.hpp>
#include <cudf/utilities/bit.hpp>
#include <cudf/utilities/default_stream.hpp>
#include <cudf/utilities/error.hpp>

#include <map>
Expand Down
65 changes: 37 additions & 28 deletions cpp/src/io/json/write_json.cu
Original file line number Diff line number Diff line change
Expand Up @@ -504,6 +504,12 @@ struct column_to_strings_fn {
{
}

~column_to_strings_fn() = default;
column_to_strings_fn(column_to_strings_fn const&) = delete;
column_to_strings_fn& operator=(column_to_strings_fn const&) = delete;
column_to_strings_fn(column_to_strings_fn&&) = delete;
column_to_strings_fn& operator=(column_to_strings_fn&&) = delete;

// unsupported type of column:
template <typename column_type>
std::enable_if_t<is_not_handled<column_type>(), std::unique_ptr<column>> operator()(
Expand Down Expand Up @@ -614,17 +620,18 @@ struct column_to_strings_fn {

auto child_string_with_null = [&]() {
if (child_view.type().id() == type_id::STRUCT) {
return (*this).template operator()<cudf::struct_view>(
child_view,
children_names.size() > child_index ? children_names[child_index].children
: std::vector<column_name_info>{});
} else if (child_view.type().id() == type_id::LIST) {
return (*this).template operator()<cudf::list_view>(child_view,
return this->template operator()<cudf::struct_view>(child_view,
children_names.size() > child_index
? children_names[child_index].children
: std::vector<column_name_info>{});
} else if (child_view.type().id() == type_id::LIST) {
return this->template operator()<cudf::list_view>(child_view,
children_names.size() > child_index
? children_names[child_index].children
: std::vector<column_name_info>{});
} else {
return cudf::type_dispatcher(child_view.type(), *this, child_view);
return cudf::type_dispatcher<cudf::id_to_type_impl, column_to_strings_fn const&>(
child_view.type(), *this, child_view);
}
};
auto new_offsets = cudf::lists::detail::get_normalized_offsets(
Expand Down Expand Up @@ -679,27 +686,29 @@ struct column_to_strings_fn {
//
auto i_col_begin =
thrust::make_zip_iterator(thrust::counting_iterator<size_t>(0), column_begin);
std::transform(i_col_begin,
i_col_begin + num_columns,
std::back_inserter(str_column_vec),
[this, &children_names](auto const& i_current_col) {
auto const i = thrust::get<0>(i_current_col);
auto const& current_col = thrust::get<1>(i_current_col);
// Struct needs children's column names
if (current_col.type().id() == type_id::STRUCT) {
return (*this).template operator()<cudf::struct_view>(
current_col,
children_names.size() > i ? children_names[i].children
: std::vector<column_name_info>{});
} else if (current_col.type().id() == type_id::LIST) {
return (*this).template operator()<cudf::list_view>(
current_col,
children_names.size() > i ? children_names[i].children
: std::vector<column_name_info>{});
} else {
return cudf::type_dispatcher(current_col.type(), *this, current_col);
}
});
std::transform(
i_col_begin,
i_col_begin + num_columns,
std::back_inserter(str_column_vec),
[this, &children_names](auto const& i_current_col) {
auto const i = thrust::get<0>(i_current_col);
auto const& current_col = thrust::get<1>(i_current_col);
// Struct needs children's column names
if (current_col.type().id() == type_id::STRUCT) {
return this->template operator()<cudf::struct_view>(current_col,
children_names.size() > i
? children_names[i].children
: std::vector<column_name_info>{});
} else if (current_col.type().id() == type_id::LIST) {
return this->template operator()<cudf::list_view>(current_col,
children_names.size() > i
? children_names[i].children
: std::vector<column_name_info>{});
} else {
return cudf::type_dispatcher<cudf::id_to_type_impl, column_to_strings_fn const&>(
current_col.type(), *this, current_col);
}
});

// create string table view from str_column_vec:
//
Expand Down
1 change: 1 addition & 0 deletions cpp/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -655,6 +655,7 @@ ConfigureTest(
STREAM_TEXT_TEST streams/text/ngrams_test.cpp streams/text/tokenize_test.cpp STREAM_MODE testing
)
ConfigureTest(STREAM_LISTS_TEST streams/lists_test.cpp STREAM_MODE testing)
ConfigureTest(STREAM_JSONIO_TEST streams/io/json_test.cpp STREAM_MODE testing)

# ##################################################################################################
# Install tests ####################################################################################
Expand Down
5 changes: 4 additions & 1 deletion cpp/tests/io/json_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <cudf_test/column_utilities.hpp>
#include <cudf_test/column_wrapper.hpp>
#include <cudf_test/cudf_gtest.hpp>
#include <cudf_test/default_stream.hpp>
#include <cudf_test/iterator_utilities.hpp>
#include <cudf_test/table_utilities.hpp>
#include <cudf_test/type_lists.hpp>
Expand Down Expand Up @@ -1422,7 +1423,9 @@ TEST_F(JsonReaderTest, JsonLongString)
.lines(true)
.na_rep("null");

cudf::io::write_json(options_builder.build(), rmm::mr::get_current_device_resource());
cudf::io::write_json(options_builder.build(),
cudf::test::get_default_stream(),
rmm::mr::get_current_device_resource());

cudf::table_view const expected = tbl_view;
std::map<std::string, data_type> types;
Expand Down
65 changes: 46 additions & 19 deletions cpp/tests/io/json_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

#include <cudf_test/base_fixture.hpp>
#include <cudf_test/column_wrapper.hpp>
#include <cudf_test/default_stream.hpp>
#include <cudf_test/iterator_utilities.hpp>

#include <cudf/detail/iterator.cuh>
Expand Down Expand Up @@ -49,22 +50,25 @@ TEST_F(JsonWriterTest, EmptyInput)
.build();

// Empty columns in table
cudf::io::write_json(out_options, rmm::mr::get_current_device_resource());
cudf::io::write_json(
out_options, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource());
std::string const expected = R"([])";
EXPECT_EQ(expected, std::string(out_buffer.data(), out_buffer.size()));

// Empty columns in table - JSON Lines
out_buffer.clear();
out_options.enable_lines(true);
cudf::io::write_json(out_options, rmm::mr::get_current_device_resource());
cudf::io::write_json(
out_options, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource());
std::string const expected_lines = "\n";
EXPECT_EQ(expected_lines, std::string(out_buffer.data(), out_buffer.size()));

// Empty table - JSON Lines
cudf::table_view tbl_view2{};
out_options.set_table(tbl_view2);
out_buffer.clear();
cudf::io::write_json(out_options, rmm::mr::get_current_device_resource());
cudf::io::write_json(
out_options, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource());
EXPECT_EQ(expected_lines, std::string(out_buffer.data(), out_buffer.size()));
}

Expand All @@ -89,17 +93,22 @@ TEST_F(JsonWriterTest, ErrorCases)
.build();

// not enough column names
EXPECT_THROW(cudf::io::write_json(out_options, rmm::mr::get_current_device_resource()),
cudf::logic_error);
EXPECT_THROW(
cudf::io::write_json(
out_options, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource()),
cudf::logic_error);

mt.schema_info.emplace_back("int16");
out_options.set_metadata(mt);
EXPECT_NO_THROW(cudf::io::write_json(out_options, rmm::mr::get_current_device_resource()));
EXPECT_NO_THROW(cudf::io::write_json(
out_options, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource()));

// chunk_rows must be at least 8
out_options.set_rows_per_chunk(0);
EXPECT_THROW(cudf::io::write_json(out_options, rmm::mr::get_current_device_resource()),
cudf::logic_error);
EXPECT_THROW(
cudf::io::write_json(
out_options, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource()),
cudf::logic_error);
}

TEST_F(JsonWriterTest, PlainTable)
Expand All @@ -121,7 +130,9 @@ TEST_F(JsonWriterTest, PlainTable)
.lines(false)
.na_rep("null");

cudf::io::write_json(options_builder.build(), rmm::mr::get_current_device_resource());
cudf::io::write_json(options_builder.build(),
cudf::test::get_default_stream(),
rmm::mr::get_current_device_resource());

std::string const expected =
R"([{"col1":"a","col2":"d","int":1,"float":1.5,"int16":null},{"col1":"b","col2":"e","int":2,"float":2.5,"int16":2},{"col1":"c","col2":"f","int":3,"float":3.5,"int16":null}])";
Expand Down Expand Up @@ -151,7 +162,9 @@ TEST_F(JsonWriterTest, SimpleNested)
.lines(true)
.na_rep("null");

cudf::io::write_json(options_builder.build(), rmm::mr::get_current_device_resource());
cudf::io::write_json(options_builder.build(),
cudf::test::get_default_stream(),
rmm::mr::get_current_device_resource());
std::string const expected = R"({"a":1,"b":2,"c":{"d":3},"f":5.5,"g":[1]}
{"a":6,"b":7,"c":{"d":8},"f":10.5}
{"a":1,"b":2,"c":{"e":4},"f":5.5,"g":[2,null]}
Expand Down Expand Up @@ -183,7 +196,9 @@ TEST_F(JsonWriterTest, MixedNested)
.lines(false)
.na_rep("null");

cudf::io::write_json(options_builder.build(), rmm::mr::get_current_device_resource());
cudf::io::write_json(options_builder.build(),
cudf::test::get_default_stream(),
rmm::mr::get_current_device_resource());
std::string const expected =
R"([{"a":1,"b":2,"c":{"d":[3]},"f":5.5,"g":[{"h":1}]},)"
R"({"a":6,"b":7,"c":{"d":[8]},"f":10.5},)"
Expand Down Expand Up @@ -216,7 +231,8 @@ TEST_F(JsonWriterTest, WriteReadNested)
.na_rep("null")
.build();

cudf::io::write_json(out_options, rmm::mr::get_current_device_resource());
cudf::io::write_json(
out_options, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource());
std::string const expected = R"({"a":1,"b":2,"c":{"d":3},"f":5.5,"g":[1]}
{"a":6,"b":7,"c":{"d":8},"f":10.5}
{"a":1,"b":2,"c":{"e":4},"f":5.5,"g":[2,null]}
Expand Down Expand Up @@ -291,7 +307,8 @@ TEST_F(JsonWriterTest, WriteReadNested)
mt.schema_info[2].children.clear();
out_options.set_metadata(mt);
out_buffer.clear();
cudf::io::write_json(out_options, rmm::mr::get_current_device_resource());
cudf::io::write_json(
out_options, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource());

in_options = cudf::io::json_reader_options::builder(
cudf::io::source_info{out_buffer.data(), out_buffer.size()})
Expand All @@ -314,7 +331,8 @@ TEST_F(JsonWriterTest, WriteReadNested)
// without column names
out_options.set_metadata(cudf::io::table_metadata{});
out_buffer.clear();
cudf::io::write_json(out_options, rmm::mr::get_current_device_resource());
cudf::io::write_json(
out_options, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource());
in_options = cudf::io::json_reader_options::builder(
cudf::io::source_info{out_buffer.data(), out_buffer.size()})
.lines(true)
Expand Down Expand Up @@ -352,7 +370,8 @@ TEST_F(JsonWriterTest, SpecialChars)
.na_rep("null")
.build();

cudf::io::write_json(out_options, rmm::mr::get_current_device_resource());
cudf::io::write_json(
out_options, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource());
std::string const expected = R"({"\"a\"":1,"'b'":"abcd"}
{"\"a\"":6,"'b'":"b\b\f\n\r\t"}
{"\"a\"":1,"'b'":"\"c\""}
Expand Down Expand Up @@ -385,7 +404,9 @@ TEST_F(JsonWriterTest, NullList)
.lines(true)
.na_rep("null");

cudf::io::write_json(options_builder.build(), rmm::mr::get_current_device_resource());
cudf::io::write_json(options_builder.build(),
cudf::test::get_default_stream(),
rmm::mr::get_current_device_resource());
std::string const expected = R"({"a":[null],"b":[[1,2,3],[null],[null,null,null],[4,null,5]]}
{"a":[2,null,null,3],"b":null}
{"a":[null,null,4],"b":[[2,null],null]}
Expand Down Expand Up @@ -424,7 +445,9 @@ TEST_F(JsonWriterTest, ChunkedNested)
.na_rep("null")
.rows_per_chunk(8);

cudf::io::write_json(options_builder.build(), rmm::mr::get_current_device_resource());
cudf::io::write_json(options_builder.build(),
cudf::test::get_default_stream(),
rmm::mr::get_current_device_resource());
std::string const expected =
R"({"a":1,"b":-2,"c":{},"e":[{"f":1}]}
{"a":2,"b":-2,"c":{}}
Expand Down Expand Up @@ -480,7 +503,9 @@ TEST_F(JsonWriterTest, StructAllNullCombinations)
.lines(true)
.na_rep("null");

cudf::io::write_json(options_builder.build(), rmm::mr::get_current_device_resource());
cudf::io::write_json(options_builder.build(),
cudf::test::get_default_stream(),
rmm::mr::get_current_device_resource());
std::string const expected = R"({}
{"e":1}
{"d":1}
Expand Down Expand Up @@ -542,7 +567,9 @@ TEST_F(JsonWriterTest, Unicode)
.lines(true)
.na_rep("null");

cudf::io::write_json(options_builder.build(), rmm::mr::get_current_device_resource());
cudf::io::write_json(options_builder.build(),
cudf::test::get_default_stream(),
rmm::mr::get_current_device_resource());

std::string const expected =
R"({"col1":"\"\\\/\b\f\n\r\t","col2":"C\u10ae\u226a\u31f3\u434f\u51f9\u6ca6\u738b\u8fbf\u9fb8\ua057\ubbdc\uc2a4\ud3f6\ue4fe\ufd20","int16":null}
Expand Down
Loading

0 comments on commit 8077513

Please sign in to comment.