Skip to content

Commit

Permalink
Remove mr param from write_csv and write_json (#16231)
Browse files Browse the repository at this point in the history
Fixes #16200

Authors:
  - Jayjeet Chakraborty (https://github.com/JayjeetAtGithub)
  - Vukasin Milovanovic (https://github.com/vuule)

Approvers:
  - Vukasin Milovanovic (https://github.com/vuule)
  - Bradley Dice (https://github.com/bdice)

URL: #16231
  • Loading branch information
JayjeetAtGithub authored Jul 10, 2024
1 parent 261f911 commit 64e3e8d
Show file tree
Hide file tree
Showing 9 changed files with 34 additions and 78 deletions.
4 changes: 1 addition & 3 deletions cpp/include/cudf/io/csv.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1756,11 +1756,9 @@ class csv_writer_options_builder {
*
* @param options Settings for controlling writing behavior
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource to use for device memory allocation
*/
void write_csv(csv_writer_options const& options,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
rmm::cuda_stream_view stream = cudf::get_default_stream());

/** @} */ // end of group
} // namespace io
Expand Down
4 changes: 1 addition & 3 deletions cpp/include/cudf/io/detail/csv.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,14 +49,12 @@ table_with_metadata read_csv(std::unique_ptr<cudf::io::datasource>&& source,
* @param column_names Column names for the output CSV
* @param options Settings for controlling behavior
* @param stream CUDA stream used for device memory operations and kernel launches.
* @param mr Device memory resource to use for device memory allocation
*/
void write_csv(data_sink* sink,
table_view const& table,
host_span<std::string const> column_names,
csv_writer_options const& options,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr);
rmm::cuda_stream_view stream);

} // namespace csv
} // namespace detail
Expand Down
4 changes: 1 addition & 3 deletions cpp/include/cudf/io/detail/json.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,11 @@ table_with_metadata read_json(host_span<std::unique_ptr<datasource>> sources,
* @param table The set of columns
* @param options Settings for controlling behavior
* @param stream CUDA stream used for device memory operations and kernel launches.
* @param mr Device memory resource to use for device memory allocation
*/
void write_json(data_sink* sink,
table_view const& table,
json_writer_options const& options,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr);
rmm::cuda_stream_view stream);

/**
* @brief Normalize single quotes to double quotes using FST
Expand Down
4 changes: 1 addition & 3 deletions cpp/include/cudf/io/json.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1018,11 +1018,9 @@ class json_writer_options_builder {
*
* @param options Settings for controlling writing behavior
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource to use for device memory allocation
*/
void write_json(json_writer_options const& options,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
rmm::cuda_stream_view stream = cudf::get_default_stream());

/** @} */ // end of group
} // namespace io
Expand Down
6 changes: 3 additions & 3 deletions cpp/src/io/csv/writer_impl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -430,13 +430,13 @@ void write_csv(data_sink* out_sink,
table_view const& table,
host_span<std::string const> user_column_names,
csv_writer_options const& options,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
rmm::cuda_stream_view stream)
{
// write header: column names separated by delimiter:
// (even for tables with no rows)
//
write_chunked_begin(out_sink, table, user_column_names, options, stream, mr);
write_chunked_begin(
out_sink, table, user_column_names, options, stream, rmm::mr::get_current_device_resource());

if (table.num_rows() > 0) {
// no need to check same-size columns constraint; auto-enforced by table_view
Expand Down
14 changes: 4 additions & 10 deletions cpp/src/io/functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -215,9 +215,7 @@ table_with_metadata read_json(json_reader_options options,
return json::detail::read_json(datasources, options, stream, mr);
}

void write_json(json_writer_options const& options,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
void write_json(json_writer_options const& options, rmm::cuda_stream_view stream)
{
auto sinks = make_datasinks(options.get_sink());
CUDF_EXPECTS(sinks.size() == 1, "Multiple sinks not supported for JSON writing");
Expand All @@ -226,8 +224,7 @@ void write_json(json_writer_options const& options,
sinks[0].get(),
options.get_table(),
options,
stream,
mr);
stream);
}

table_with_metadata read_csv(csv_reader_options options,
Expand All @@ -252,9 +249,7 @@ table_with_metadata read_csv(csv_reader_options options,
}

// Freeform API wraps the detail writer class API
void write_csv(csv_writer_options const& options,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
void write_csv(csv_writer_options const& options, rmm::cuda_stream_view stream)
{
using namespace cudf::io::detail;

Expand All @@ -266,8 +261,7 @@ void write_csv(csv_writer_options const& options,
options.get_table(),
options.get_names(),
options,
stream,
mr);
stream);
}

raw_orc_statistics read_raw_orc_statistics(source_info const& src_info,
Expand Down
8 changes: 3 additions & 5 deletions cpp/src/io/json/write_json.cu
Original file line number Diff line number Diff line change
Expand Up @@ -805,8 +805,7 @@ void write_chunked(data_sink* out_sink,
strings_column_view const& str_column_view,
int const skip_last_chars,
json_writer_options const& options,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
rmm::cuda_stream_view stream)
{
CUDF_FUNC_RANGE();
CUDF_EXPECTS(str_column_view.size() > 0, "Unexpected empty strings column.");
Expand All @@ -829,8 +828,7 @@ void write_chunked(data_sink* out_sink,
void write_json(data_sink* out_sink,
table_view const& table,
json_writer_options const& options,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
rmm::cuda_stream_view stream)
{
CUDF_FUNC_RANGE();
std::vector<column_name_info> user_column_names = [&]() {
Expand Down Expand Up @@ -912,7 +910,7 @@ void write_json(data_sink* out_sink,
bool const include_line_terminator =
(&sub_view != &vector_views.back()) or options.is_enabled_lines();
auto const skip_last_chars = (include_line_terminator ? 0 : line_terminator.size());
write_chunked(out_sink, str_concat_col->view(), skip_last_chars, options, stream, mr);
write_chunked(out_sink, str_concat_col->view(), skip_last_chars, options, stream);
}
} else {
if (options.is_enabled_lines()) {
Expand Down
4 changes: 1 addition & 3 deletions cpp/tests/io/json_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1400,9 +1400,7 @@ TEST_F(JsonReaderTest, JsonLongString)
.lines(true)
.na_rep("null");

cudf::io::write_json(options_builder.build(),
cudf::test::get_default_stream(),
rmm::mr::get_current_device_resource());
cudf::io::write_json(options_builder.build(), cudf::test::get_default_stream());

cudf::column_view int16_with_mask(repeat_times);
cudf::column_view int16(
Expand Down
64 changes: 19 additions & 45 deletions cpp/tests/io/json_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,25 +51,22 @@ TEST_F(JsonWriterTest, EmptyInput)
.build();

// Empty columns in table
cudf::io::write_json(
out_options, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource());
cudf::io::write_json(out_options, cudf::test::get_default_stream());
std::string const expected = R"([])";
EXPECT_EQ(expected, std::string(out_buffer.data(), out_buffer.size()));

// Empty columns in table - JSON Lines
out_buffer.clear();
out_options.enable_lines(true);
cudf::io::write_json(
out_options, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource());
cudf::io::write_json(out_options, cudf::test::get_default_stream());
std::string const expected_lines = "\n";
EXPECT_EQ(expected_lines, std::string(out_buffer.data(), out_buffer.size()));

// Empty table - JSON Lines
cudf::table_view tbl_view2{};
out_options.set_table(tbl_view2);
out_buffer.clear();
cudf::io::write_json(
out_options, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource());
cudf::io::write_json(out_options, cudf::test::get_default_stream());
EXPECT_EQ(expected_lines, std::string(out_buffer.data(), out_buffer.size()));
}

Expand All @@ -94,22 +91,17 @@ TEST_F(JsonWriterTest, ErrorCases)
.build();

// not enough column names
EXPECT_THROW(
cudf::io::write_json(
out_options, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource()),
cudf::logic_error);
EXPECT_THROW(cudf::io::write_json(out_options, cudf::test::get_default_stream()),
cudf::logic_error);

mt.schema_info.emplace_back("int16");
out_options.set_metadata(mt);
EXPECT_NO_THROW(cudf::io::write_json(
out_options, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource()));
EXPECT_NO_THROW(cudf::io::write_json(out_options, cudf::test::get_default_stream()));

// chunk_rows must be at least 8
out_options.set_rows_per_chunk(0);
EXPECT_THROW(
cudf::io::write_json(
out_options, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource()),
cudf::logic_error);
EXPECT_THROW(cudf::io::write_json(out_options, cudf::test::get_default_stream()),
cudf::logic_error);
}

TEST_F(JsonWriterTest, PlainTable)
Expand All @@ -131,9 +123,7 @@ TEST_F(JsonWriterTest, PlainTable)
.lines(false)
.na_rep("null");

cudf::io::write_json(options_builder.build(),
cudf::test::get_default_stream(),
rmm::mr::get_current_device_resource());
cudf::io::write_json(options_builder.build(), cudf::test::get_default_stream());

std::string const expected =
R"([{"col1":"a","col2":"d","int":1,"float":1.5,"int16":null},{"col1":"b","col2":"e","int":2,"float":2.5,"int16":2},{"col1":"c","col2":"f","int":3,"float":3.5,"int16":null}])";
Expand Down Expand Up @@ -163,9 +153,7 @@ TEST_F(JsonWriterTest, SimpleNested)
.lines(true)
.na_rep("null");

cudf::io::write_json(options_builder.build(),
cudf::test::get_default_stream(),
rmm::mr::get_current_device_resource());
cudf::io::write_json(options_builder.build(), cudf::test::get_default_stream());
std::string const expected = R"({"a":1,"b":2,"c":{"d":3},"f":5.5,"g":[1]}
{"a":6,"b":7,"c":{"d":8},"f":10.5}
{"a":1,"b":2,"c":{"e":4},"f":5.5,"g":[2,null]}
Expand Down Expand Up @@ -197,9 +185,7 @@ TEST_F(JsonWriterTest, MixedNested)
.lines(false)
.na_rep("null");

cudf::io::write_json(options_builder.build(),
cudf::test::get_default_stream(),
rmm::mr::get_current_device_resource());
cudf::io::write_json(options_builder.build(), cudf::test::get_default_stream());
std::string const expected =
R"([{"a":1,"b":2,"c":{"d":[3]},"f":5.5,"g":[{"h":1}]},)"
R"({"a":6,"b":7,"c":{"d":[8]},"f":10.5},)"
Expand Down Expand Up @@ -232,8 +218,7 @@ TEST_F(JsonWriterTest, WriteReadNested)
.na_rep("null")
.build();

cudf::io::write_json(
out_options, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource());
cudf::io::write_json(out_options, cudf::test::get_default_stream());
std::string const expected = R"({"a":1,"b":2,"c":{"d":3},"f":5.5,"g":[1]}
{"a":6,"b":7,"c":{"d":8},"f":10.5}
{"a":1,"b":2,"c":{"e":4},"f":5.5,"g":[2,null]}
Expand Down Expand Up @@ -308,8 +293,7 @@ TEST_F(JsonWriterTest, WriteReadNested)
mt.schema_info[2].children.clear();
out_options.set_metadata(mt);
out_buffer.clear();
cudf::io::write_json(
out_options, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource());
cudf::io::write_json(out_options, cudf::test::get_default_stream());

in_options = cudf::io::json_reader_options::builder(
cudf::io::source_info{out_buffer.data(), out_buffer.size()})
Expand All @@ -332,8 +316,7 @@ TEST_F(JsonWriterTest, WriteReadNested)
// without column names
out_options.set_metadata(cudf::io::table_metadata{});
out_buffer.clear();
cudf::io::write_json(
out_options, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource());
cudf::io::write_json(out_options, cudf::test::get_default_stream());
in_options = cudf::io::json_reader_options::builder(
cudf::io::source_info{out_buffer.data(), out_buffer.size()})
.lines(true)
Expand Down Expand Up @@ -371,8 +354,7 @@ TEST_F(JsonWriterTest, SpecialChars)
.na_rep("null")
.build();

cudf::io::write_json(
out_options, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource());
cudf::io::write_json(out_options, cudf::test::get_default_stream());
std::string const expected = R"({"\"a\"":1,"'b'":"abcd"}
{"\"a\"":6,"'b'":"b\b\f\n\r\t"}
{"\"a\"":1,"'b'":"\"c\""}
Expand Down Expand Up @@ -405,9 +387,7 @@ TEST_F(JsonWriterTest, NullList)
.lines(true)
.na_rep("null");

cudf::io::write_json(options_builder.build(),
cudf::test::get_default_stream(),
rmm::mr::get_current_device_resource());
cudf::io::write_json(options_builder.build(), cudf::test::get_default_stream());
std::string const expected = R"({"a":[null],"b":[[1,2,3],[null],[null,null,null],[4,null,5]]}
{"a":[2,null,null,3],"b":null}
{"a":[null,null,4],"b":[[2,null],null]}
Expand Down Expand Up @@ -446,9 +426,7 @@ TEST_F(JsonWriterTest, ChunkedNested)
.na_rep("null")
.rows_per_chunk(8);

cudf::io::write_json(options_builder.build(),
cudf::test::get_default_stream(),
rmm::mr::get_current_device_resource());
cudf::io::write_json(options_builder.build(), cudf::test::get_default_stream());
std::string const expected =
R"({"a":1,"b":-2,"c":{},"e":[{"f":1}]}
{"a":2,"b":-2,"c":{}}
Expand Down Expand Up @@ -504,9 +482,7 @@ TEST_F(JsonWriterTest, StructAllNullCombinations)
.lines(true)
.na_rep("null");

cudf::io::write_json(options_builder.build(),
cudf::test::get_default_stream(),
rmm::mr::get_current_device_resource());
cudf::io::write_json(options_builder.build(), cudf::test::get_default_stream());
std::string const expected = R"({}
{"e":1}
{"d":1}
Expand Down Expand Up @@ -568,9 +544,7 @@ TEST_F(JsonWriterTest, Unicode)
.lines(true)
.na_rep("null");

cudf::io::write_json(options_builder.build(),
cudf::test::get_default_stream(),
rmm::mr::get_current_device_resource());
cudf::io::write_json(options_builder.build(), cudf::test::get_default_stream());

std::string const expected =
R"({"col1":"\"\\\/\b\f\n\r\t","col2":"C\u10ae\u226a\u31f3\u434f\u51f9\u6ca6\u738b\u8fbf\u9fb8\ua057\ubbdc\uc2a4\ud3f6\ue4fe\ufd20","int16":null}
Expand Down

0 comments on commit 64e3e8d

Please sign in to comment.