From 89fa93ba41682b5c2a9dda29ae1624ff4e1e4aca Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Mon, 8 Jul 2024 19:46:05 -0700 Subject: [PATCH 1/6] Remove mr param from write_csv --- cpp/include/cudf/io/csv.hpp | 4 +--- cpp/include/cudf/io/detail/csv.hpp | 4 +--- cpp/src/io/csv/writer_impl.cu | 6 +++--- cpp/src/io/functions.cpp | 6 ++---- 4 files changed, 7 insertions(+), 13 deletions(-) diff --git a/cpp/include/cudf/io/csv.hpp b/cpp/include/cudf/io/csv.hpp index 68bb7fba00e..f532e9315d8 100644 --- a/cpp/include/cudf/io/csv.hpp +++ b/cpp/include/cudf/io/csv.hpp @@ -1756,11 +1756,9 @@ class csv_writer_options_builder { * * @param options Settings for controlling writing behavior * @param stream CUDA stream used for device memory operations and kernel launches - * @param mr Device memory resource to use for device memory allocation */ void write_csv(csv_writer_options const& options, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream()); /** @} */ // end of group } // namespace io diff --git a/cpp/include/cudf/io/detail/csv.hpp b/cpp/include/cudf/io/detail/csv.hpp index 50c1a7c163d..2a70fa888f4 100644 --- a/cpp/include/cudf/io/detail/csv.hpp +++ b/cpp/include/cudf/io/detail/csv.hpp @@ -49,14 +49,12 @@ table_with_metadata read_csv(std::unique_ptr&& source, * @param column_names Column names for the output CSV * @param options Settings for controlling behavior * @param stream CUDA stream used for device memory operations and kernel launches. - * @param mr Device memory resource to use for device memory allocation */ void write_csv(data_sink* sink, table_view const& table, host_span column_names, csv_writer_options const& options, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr); + rmm::cuda_stream_view stream); } // namespace csv } // namespace detail diff --git a/cpp/src/io/csv/writer_impl.cu b/cpp/src/io/csv/writer_impl.cu index 63eb0b03c5f..743da631a45 100644 --- a/cpp/src/io/csv/writer_impl.cu +++ b/cpp/src/io/csv/writer_impl.cu @@ -430,13 +430,13 @@ void write_csv(data_sink* out_sink, table_view const& table, host_span user_column_names, csv_writer_options const& options, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) + rmm::cuda_stream_view stream) { // write header: column names separated by delimiter: // (even for tables with no rows) // - write_chunked_begin(out_sink, table, user_column_names, options, stream, mr); + write_chunked_begin(out_sink, table, user_column_names, options, stream, + rmm::mr::get_current_device_resource()); if (table.num_rows() > 0) { // no need to check same-size columns constraint; auto-enforced by table_view diff --git a/cpp/src/io/functions.cpp b/cpp/src/io/functions.cpp index 5daa55d4552..5b9dfd77bfd 100644 --- a/cpp/src/io/functions.cpp +++ b/cpp/src/io/functions.cpp @@ -253,8 +253,7 @@ table_with_metadata read_csv(csv_reader_options options, // Freeform API wraps the detail writer class API void write_csv(csv_writer_options const& options, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) + rmm::cuda_stream_view stream) { using namespace cudf::io::detail; @@ -266,8 +265,7 @@ void write_csv(csv_writer_options const& options, options.get_table(), options.get_names(), options, - stream, - mr); + stream); } raw_orc_statistics read_raw_orc_statistics(source_info const& src_info, From 145e2104a4677048b92b60f3229cfd0b25a86605 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Mon, 8 Jul 2024 19:52:36 -0700 Subject: [PATCH 2/6] Remove mr param from write_json --- cpp/include/cudf/io/detail/json.hpp | 4 +--- cpp/include/cudf/io/json.hpp | 4 +--- cpp/src/io/functions.cpp | 6 ++---- cpp/src/io/json/write_json.cu | 6 +++--- 4 files changed, 7 insertions(+), 13 deletions(-) diff --git a/cpp/include/cudf/io/detail/json.hpp b/cpp/include/cudf/io/detail/json.hpp index 540a584908d..6ff1c12831b 100644 --- a/cpp/include/cudf/io/detail/json.hpp +++ b/cpp/include/cudf/io/detail/json.hpp @@ -46,13 +46,11 @@ table_with_metadata read_json(host_span> sources, * @param table The set of columns * @param options Settings for controlling behavior * @param stream CUDA stream used for device memory operations and kernel launches. - * @param mr Device memory resource to use for device memory allocation */ void write_json(data_sink* sink, table_view const& table, json_writer_options const& options, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr); + rmm::cuda_stream_view stream); /** * @brief Normalize single quotes to double quotes using FST diff --git a/cpp/include/cudf/io/json.hpp b/cpp/include/cudf/io/json.hpp index 8de690482f9..07cfec440e1 100644 --- a/cpp/include/cudf/io/json.hpp +++ b/cpp/include/cudf/io/json.hpp @@ -1018,11 +1018,9 @@ class json_writer_options_builder { * * @param options Settings for controlling writing behavior * @param stream CUDA stream used for device memory operations and kernel launches - * @param mr Device memory resource to use for device memory allocation */ void write_json(json_writer_options const& options, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream()); /** @} */ // end of group } // namespace io diff --git a/cpp/src/io/functions.cpp b/cpp/src/io/functions.cpp index 5b9dfd77bfd..93299229027 100644 --- a/cpp/src/io/functions.cpp +++ b/cpp/src/io/functions.cpp @@ -216,8 +216,7 @@ table_with_metadata read_json(json_reader_options options, } void write_json(json_writer_options const& options, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) + rmm::cuda_stream_view stream) { auto sinks = make_datasinks(options.get_sink()); CUDF_EXPECTS(sinks.size() == 1, "Multiple sinks not supported for JSON writing"); @@ -226,8 +225,7 @@ void write_json(json_writer_options const& options, sinks[0].get(), options.get_table(), options, - stream, - mr); + stream); } table_with_metadata read_csv(csv_reader_options options, diff --git a/cpp/src/io/json/write_json.cu b/cpp/src/io/json/write_json.cu index 997d6fd99f8..f4da22c3e43 100644 --- a/cpp/src/io/json/write_json.cu +++ b/cpp/src/io/json/write_json.cu @@ -829,8 +829,7 @@ void write_chunked(data_sink* out_sink, void write_json(data_sink* out_sink, table_view const& table, json_writer_options const& options, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) + rmm::cuda_stream_view stream) { CUDF_FUNC_RANGE(); std::vector user_column_names = [&]() { @@ -912,7 +911,8 @@ void write_json(data_sink* out_sink, bool const include_line_terminator = (&sub_view != &vector_views.back()) or options.is_enabled_lines(); auto const skip_last_chars = (include_line_terminator ? 0 : line_terminator.size()); - write_chunked(out_sink, str_concat_col->view(), skip_last_chars, options, stream, mr); + write_chunked(out_sink, str_concat_col->view(), skip_last_chars, options, stream, + rmm::mr::get_current_device_resource()); } } else { if (options.is_enabled_lines()) { From 9c4006db8c9911dfbd16d6a0b8fdf6163c91442a Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Mon, 8 Jul 2024 20:04:05 -0700 Subject: [PATCH 3/6] Run clang-format --- cpp/include/cudf/io/csv.hpp | 2 +- cpp/include/cudf/io/json.hpp | 2 +- cpp/src/io/csv/writer_impl.cu | 4 ++-- cpp/src/io/functions.cpp | 6 ++---- cpp/src/io/json/write_json.cu | 6 +++++- 5 files changed, 11 insertions(+), 9 deletions(-) diff --git a/cpp/include/cudf/io/csv.hpp b/cpp/include/cudf/io/csv.hpp index f532e9315d8..cc361f0918e 100644 --- a/cpp/include/cudf/io/csv.hpp +++ b/cpp/include/cudf/io/csv.hpp @@ -1758,7 +1758,7 @@ class csv_writer_options_builder { * @param stream CUDA stream used for device memory operations and kernel launches */ void write_csv(csv_writer_options const& options, - rmm::cuda_stream_view stream = cudf::get_default_stream()); + rmm::cuda_stream_view stream = cudf::get_default_stream()); /** @} */ // end of group } // namespace io diff --git a/cpp/include/cudf/io/json.hpp b/cpp/include/cudf/io/json.hpp index 07cfec440e1..7af90766ad0 100644 --- a/cpp/include/cudf/io/json.hpp +++ b/cpp/include/cudf/io/json.hpp @@ -1020,7 +1020,7 @@ class json_writer_options_builder { * @param stream CUDA stream used for device memory operations and kernel launches */ void write_json(json_writer_options const& options, - rmm::cuda_stream_view stream = cudf::get_default_stream()); + rmm::cuda_stream_view stream = cudf::get_default_stream()); /** @} */ // end of group } // namespace io diff --git a/cpp/src/io/csv/writer_impl.cu b/cpp/src/io/csv/writer_impl.cu index 743da631a45..00a6dcb2286 100644 --- a/cpp/src/io/csv/writer_impl.cu +++ b/cpp/src/io/csv/writer_impl.cu @@ -435,8 +435,8 @@ void write_csv(data_sink* out_sink, // write header: column names separated by delimiter: // (even for tables with no rows) // - write_chunked_begin(out_sink, table, user_column_names, options, stream, - rmm::mr::get_current_device_resource()); + write_chunked_begin( + out_sink, table, user_column_names, options, stream, rmm::mr::get_current_device_resource()); if (table.num_rows() > 0) { // no need to check same-size columns constraint; auto-enforced by table_view diff --git a/cpp/src/io/functions.cpp b/cpp/src/io/functions.cpp index 93299229027..d1811267942 100644 --- a/cpp/src/io/functions.cpp +++ b/cpp/src/io/functions.cpp @@ -215,8 +215,7 @@ table_with_metadata read_json(json_reader_options options, return json::detail::read_json(datasources, options, stream, mr); } -void write_json(json_writer_options const& options, - rmm::cuda_stream_view stream) +void write_json(json_writer_options const& options, rmm::cuda_stream_view stream) { auto sinks = make_datasinks(options.get_sink()); CUDF_EXPECTS(sinks.size() == 1, "Multiple sinks not supported for JSON writing"); @@ -250,8 +249,7 @@ table_with_metadata read_csv(csv_reader_options options, } // Freeform API wraps the detail writer class API -void write_csv(csv_writer_options const& options, - rmm::cuda_stream_view stream) +void write_csv(csv_writer_options const& options, rmm::cuda_stream_view stream) { using namespace cudf::io::detail; diff --git a/cpp/src/io/json/write_json.cu b/cpp/src/io/json/write_json.cu index f4da22c3e43..92f18b57321 100644 --- a/cpp/src/io/json/write_json.cu +++ b/cpp/src/io/json/write_json.cu @@ -911,7 +911,11 @@ void write_json(data_sink* out_sink, bool const include_line_terminator = (&sub_view != &vector_views.back()) or options.is_enabled_lines(); auto const skip_last_chars = (include_line_terminator ? 0 : line_terminator.size()); - write_chunked(out_sink, str_concat_col->view(), skip_last_chars, options, stream, + write_chunked(out_sink, + str_concat_col->view(), + skip_last_chars, + options, + stream, rmm::mr::get_current_device_resource()); } } else { From c3835315437ef69e2542499bde146b5326ea46bd Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Tue, 9 Jul 2024 01:07:34 -0700 Subject: [PATCH 4/6] Remove mr param from write_chunked --- cpp/src/io/json/write_json.cu | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/cpp/src/io/json/write_json.cu b/cpp/src/io/json/write_json.cu index 92f18b57321..c688c809e04 100644 --- a/cpp/src/io/json/write_json.cu +++ b/cpp/src/io/json/write_json.cu @@ -805,8 +805,7 @@ void write_chunked(data_sink* out_sink, strings_column_view const& str_column_view, int const skip_last_chars, json_writer_options const& options, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) + rmm::cuda_stream_view stream) { CUDF_FUNC_RANGE(); CUDF_EXPECTS(str_column_view.size() > 0, "Unexpected empty strings column."); @@ -911,12 +910,7 @@ void write_json(data_sink* out_sink, bool const include_line_terminator = (&sub_view != &vector_views.back()) or options.is_enabled_lines(); auto const skip_last_chars = (include_line_terminator ? 0 : line_terminator.size()); - write_chunked(out_sink, - str_concat_col->view(), - skip_last_chars, - options, - stream, - rmm::mr::get_current_device_resource()); + write_chunked(out_sink, str_concat_col->view(), skip_last_chars, options, stream); } } else { if (options.is_enabled_lines()) { From 22e150fc19e114f9d26f0a871c3d7a014e7335a1 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Wed, 10 Jul 2024 10:47:24 -0700 Subject: [PATCH 5/6] Remove mr param from write_json tests --- cpp/tests/io/json_test.cpp | 4 +-- cpp/tests/io/json_writer.cpp | 59 +++++++++++------------------------- 2 files changed, 19 insertions(+), 44 deletions(-) diff --git a/cpp/tests/io/json_test.cpp b/cpp/tests/io/json_test.cpp index 9c76c344157..993ab82f423 100644 --- a/cpp/tests/io/json_test.cpp +++ b/cpp/tests/io/json_test.cpp @@ -1400,9 +1400,7 @@ TEST_F(JsonReaderTest, JsonLongString) .lines(true) .na_rep("null"); - cudf::io::write_json(options_builder.build(), - cudf::test::get_default_stream(), - rmm::mr::get_current_device_resource()); + cudf::io::write_json(options_builder.build(), cudf::test::get_default_stream()); cudf::column_view int16_with_mask(repeat_times); cudf::column_view int16( diff --git a/cpp/tests/io/json_writer.cpp b/cpp/tests/io/json_writer.cpp index 946b939f456..11cd440494a 100644 --- a/cpp/tests/io/json_writer.cpp +++ b/cpp/tests/io/json_writer.cpp @@ -51,16 +51,14 @@ TEST_F(JsonWriterTest, EmptyInput) .build(); // Empty columns in table - cudf::io::write_json( - out_options, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource()); + cudf::io::write_json(out_options, cudf::test::get_default_stream()); std::string const expected = R"([])"; EXPECT_EQ(expected, std::string(out_buffer.data(), out_buffer.size())); // Empty columns in table - JSON Lines out_buffer.clear(); out_options.enable_lines(true); - cudf::io::write_json( - out_options, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource()); + cudf::io::write_json(out_options, cudf::test::get_default_stream()); std::string const expected_lines = "\n"; EXPECT_EQ(expected_lines, std::string(out_buffer.data(), out_buffer.size())); @@ -68,8 +66,7 @@ TEST_F(JsonWriterTest, EmptyInput) cudf::table_view tbl_view2{}; out_options.set_table(tbl_view2); out_buffer.clear(); - cudf::io::write_json( - out_options, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource()); + cudf::io::write_json(out_options, cudf::test::get_default_stream()); EXPECT_EQ(expected_lines, std::string(out_buffer.data(), out_buffer.size())); } @@ -94,21 +91,19 @@ TEST_F(JsonWriterTest, ErrorCases) .build(); // not enough column names - EXPECT_THROW( - cudf::io::write_json( - out_options, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource()), - cudf::logic_error); + EXPECT_THROW(cudf::io::write_json(out_options, cudf::test::get_default_stream()), + cudf::logic_error); mt.schema_info.emplace_back("int16"); out_options.set_metadata(mt); EXPECT_NO_THROW(cudf::io::write_json( - out_options, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource())); + out_options, cudf::test::get_default_stream()); // chunk_rows must be at least 8 out_options.set_rows_per_chunk(0); EXPECT_THROW( cudf::io::write_json( - out_options, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource()), + out_options, cudf::test::get_default_stream()), cudf::logic_error); } @@ -131,9 +126,7 @@ TEST_F(JsonWriterTest, PlainTable) .lines(false) .na_rep("null"); - cudf::io::write_json(options_builder.build(), - cudf::test::get_default_stream(), - rmm::mr::get_current_device_resource()); + cudf::io::write_json(options_builder.build(), cudf::test::get_default_stream()); std::string const expected = R"([{"col1":"a","col2":"d","int":1,"float":1.5,"int16":null},{"col1":"b","col2":"e","int":2,"float":2.5,"int16":2},{"col1":"c","col2":"f","int":3,"float":3.5,"int16":null}])"; @@ -163,9 +156,7 @@ TEST_F(JsonWriterTest, SimpleNested) .lines(true) .na_rep("null"); - cudf::io::write_json(options_builder.build(), - cudf::test::get_default_stream(), - rmm::mr::get_current_device_resource()); + cudf::io::write_json(options_builder.build(), cudf::test::get_default_stream()); std::string const expected = R"({"a":1,"b":2,"c":{"d":3},"f":5.5,"g":[1]} {"a":6,"b":7,"c":{"d":8},"f":10.5} {"a":1,"b":2,"c":{"e":4},"f":5.5,"g":[2,null]} @@ -197,9 +188,7 @@ TEST_F(JsonWriterTest, MixedNested) .lines(false) .na_rep("null"); - cudf::io::write_json(options_builder.build(), - cudf::test::get_default_stream(), - rmm::mr::get_current_device_resource()); + cudf::io::write_json(options_builder.build(), cudf::test::get_default_stream()); std::string const expected = R"([{"a":1,"b":2,"c":{"d":[3]},"f":5.5,"g":[{"h":1}]},)" R"({"a":6,"b":7,"c":{"d":[8]},"f":10.5},)" @@ -232,8 +221,7 @@ TEST_F(JsonWriterTest, WriteReadNested) .na_rep("null") .build(); - cudf::io::write_json( - out_options, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource()); + cudf::io::write_json(out_options, cudf::test::get_default_stream()); std::string const expected = R"({"a":1,"b":2,"c":{"d":3},"f":5.5,"g":[1]} {"a":6,"b":7,"c":{"d":8},"f":10.5} {"a":1,"b":2,"c":{"e":4},"f":5.5,"g":[2,null]} @@ -308,8 +296,7 @@ TEST_F(JsonWriterTest, WriteReadNested) mt.schema_info[2].children.clear(); out_options.set_metadata(mt); out_buffer.clear(); - cudf::io::write_json( - out_options, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource()); + cudf::io::write_json(out_options, cudf::test::get_default_stream()); in_options = cudf::io::json_reader_options::builder( cudf::io::source_info{out_buffer.data(), out_buffer.size()}) @@ -332,8 +319,7 @@ TEST_F(JsonWriterTest, WriteReadNested) // without column names out_options.set_metadata(cudf::io::table_metadata{}); out_buffer.clear(); - cudf::io::write_json( - out_options, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource()); + cudf::io::write_json(out_options, cudf::test::get_default_stream()); in_options = cudf::io::json_reader_options::builder( cudf::io::source_info{out_buffer.data(), out_buffer.size()}) .lines(true) @@ -371,8 +357,7 @@ TEST_F(JsonWriterTest, SpecialChars) .na_rep("null") .build(); - cudf::io::write_json( - out_options, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource()); + cudf::io::write_json(out_options, cudf::test::get_default_stream()); std::string const expected = R"({"\"a\"":1,"'b'":"abcd"} {"\"a\"":6,"'b'":"b\b\f\n\r\t"} {"\"a\"":1,"'b'":"\"c\""} @@ -405,9 +390,7 @@ TEST_F(JsonWriterTest, NullList) .lines(true) .na_rep("null"); - cudf::io::write_json(options_builder.build(), - cudf::test::get_default_stream(), - rmm::mr::get_current_device_resource()); + cudf::io::write_json(options_builder.build(), cudf::test::get_default_stream()); std::string const expected = R"({"a":[null],"b":[[1,2,3],[null],[null,null,null],[4,null,5]]} {"a":[2,null,null,3],"b":null} {"a":[null,null,4],"b":[[2,null],null]} @@ -446,9 +429,7 @@ TEST_F(JsonWriterTest, ChunkedNested) .na_rep("null") .rows_per_chunk(8); - cudf::io::write_json(options_builder.build(), - cudf::test::get_default_stream(), - rmm::mr::get_current_device_resource()); + cudf::io::write_json(options_builder.build(), cudf::test::get_default_stream()); std::string const expected = R"({"a":1,"b":-2,"c":{},"e":[{"f":1}]} {"a":2,"b":-2,"c":{}} @@ -504,9 +485,7 @@ TEST_F(JsonWriterTest, StructAllNullCombinations) .lines(true) .na_rep("null"); - cudf::io::write_json(options_builder.build(), - cudf::test::get_default_stream(), - rmm::mr::get_current_device_resource()); + cudf::io::write_json(options_builder.build(), cudf::test::get_default_stream()); std::string const expected = R"({} {"e":1} {"d":1} @@ -568,9 +547,7 @@ TEST_F(JsonWriterTest, Unicode) .lines(true) .na_rep("null"); - cudf::io::write_json(options_builder.build(), - cudf::test::get_default_stream(), - rmm::mr::get_current_device_resource()); + cudf::io::write_json(options_builder.build(), cudf::test::get_default_stream()); std::string const expected = R"({"col1":"\"\\\/\b\f\n\r\t","col2":"C\u10ae\u226a\u31f3\u434f\u51f9\u6ca6\u738b\u8fbf\u9fb8\ua057\ubbdc\uc2a4\ud3f6\ue4fe\ufd20","int16":null} From 7963902b92560a07ed01f467331b2e99a272a7a3 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Wed, 10 Jul 2024 11:37:17 -0700 Subject: [PATCH 6/6] Fix syntax error --- cpp/tests/io/json_writer.cpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/cpp/tests/io/json_writer.cpp b/cpp/tests/io/json_writer.cpp index 11cd440494a..2c4e29a01b9 100644 --- a/cpp/tests/io/json_writer.cpp +++ b/cpp/tests/io/json_writer.cpp @@ -96,15 +96,12 @@ TEST_F(JsonWriterTest, ErrorCases) mt.schema_info.emplace_back("int16"); out_options.set_metadata(mt); - EXPECT_NO_THROW(cudf::io::write_json( - out_options, cudf::test::get_default_stream()); + EXPECT_NO_THROW(cudf::io::write_json(out_options, cudf::test::get_default_stream())); // chunk_rows must be at least 8 out_options.set_rows_per_chunk(0); - EXPECT_THROW( - cudf::io::write_json( - out_options, cudf::test::get_default_stream()), - cudf::logic_error); + EXPECT_THROW(cudf::io::write_json(out_options, cudf::test::get_default_stream()), + cudf::logic_error); } TEST_F(JsonWriterTest, PlainTable)