From aa28cd85290d5c389202d5979370bec1f647ec11 Mon Sep 17 00:00:00 2001 From: Shruti Shivakumar Date: Fri, 27 Oct 2023 22:38:20 +0000 Subject: [PATCH 1/4] Added streams to CSV reader and writer api --- cpp/include/cudf/io/csv.hpp | 4 + cpp/include/cudf/io/detail/csv.hpp | 1 - cpp/src/io/csv/writer_impl.cu | 26 ++++--- cpp/src/io/functions.cpp | 12 ++- cpp/tests/CMakeLists.txt | 1 + cpp/tests/streams/io/csv_test.cpp | 114 +++++++++++++++++++++++++++++ 6 files changed, 144 insertions(+), 14 deletions(-) create mode 100644 cpp/tests/streams/io/csv_test.cpp diff --git a/cpp/include/cudf/io/csv.hpp b/cpp/include/cudf/io/csv.hpp index ac885c54356..435583e805d 100644 --- a/cpp/include/cudf/io/csv.hpp +++ b/cpp/include/cudf/io/csv.hpp @@ -1307,6 +1307,7 @@ class csv_reader_options_builder { * @endcode * * @param options Settings for controlling reading behavior + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate device memory of the table in the returned * table_with_metadata * @@ -1314,6 +1315,7 @@ class csv_reader_options_builder { */ table_with_metadata read_csv( csv_reader_options options, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group @@ -1715,9 +1717,11 @@ class csv_writer_options_builder { * @endcode * * @param options Settings for controlling writing behavior + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource to use for device memory allocation */ void write_csv(csv_writer_options const& options, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group diff --git a/cpp/include/cudf/io/detail/csv.hpp b/cpp/include/cudf/io/detail/csv.hpp index 9fdc7a47fb9..40ddcf385b0 100644 --- a/cpp/include/cudf/io/detail/csv.hpp +++ b/cpp/include/cudf/io/detail/csv.hpp @@ -17,7 +17,6 @@ #pragma once #include -#include #include diff --git a/cpp/src/io/csv/writer_impl.cu b/cpp/src/io/csv/writer_impl.cu index 8c586306ad5..570ca7be08f 100644 --- a/cpp/src/io/csv/writer_impl.cu +++ b/cpp/src/io/csv/writer_impl.cu @@ -146,6 +146,12 @@ struct column_to_strings_fn { { } + ~column_to_strings_fn() = default; + column_to_strings_fn(column_to_strings_fn const&) = delete; + column_to_strings_fn& operator=(column_to_strings_fn const&) = delete; + column_to_strings_fn(column_to_strings_fn&&) = delete; + column_to_strings_fn& operator=(column_to_strings_fn&&) = delete; + // Note: `null` replacement with `na_rep` deferred to `concatenate()` // instead of column-wise; might be faster // @@ -367,10 +373,10 @@ void write_chunked(data_sink* out_sink, CUDF_EXPECTS(str_column_view.size() > 0, "Unexpected empty strings column."); - cudf::string_scalar newline{options.get_line_terminator()}; + cudf::string_scalar newline(options.get_line_terminator(), true, stream); auto p_str_col_w_nl = cudf::strings::detail::join_strings(str_column_view, newline, - string_scalar("", false), + string_scalar("", false, stream), stream, rmm::mr::get_current_device_resource()); strings_column_view strings_column{p_str_col_w_nl->view()}; @@ -455,12 +461,14 @@ void write_csv(data_sink* out_sink, // populate vector of string-converted columns: // - std::transform(sub_view.begin(), - sub_view.end(), - std::back_inserter(str_column_vec), - [converter](auto const& current_col) { - return cudf::type_dispatcher(current_col.type(), converter, current_col); - }); + std::transform( + sub_view.begin(), + sub_view.end(), + std::back_inserter(str_column_vec), + [&converter = std::as_const(converter)](auto const& current_col) { + return cudf::type_dispatcher( + current_col.type(), converter, current_col); + }); // create string table view from str_column_vec: // @@ -479,7 +487,7 @@ void write_csv(data_sink* out_sink, strings::separator_on_nulls::YES, stream, rmm::mr::get_current_device_resource()); - cudf::string_scalar narep{options.get_na_rep()}; + cudf::string_scalar narep(options.get_na_rep(), true, stream); return cudf::strings::detail::replace_nulls( str_table_view.column(0), narep, stream, rmm::mr::get_current_device_resource()); }(); diff --git a/cpp/src/io/functions.cpp b/cpp/src/io/functions.cpp index 29ebb1ddbde..d00c66edddd 100644 --- a/cpp/src/io/functions.cpp +++ b/cpp/src/io/functions.cpp @@ -226,7 +226,9 @@ void write_json(json_writer_options const& options, rmm::mr::device_memory_resou mr); } -table_with_metadata read_csv(csv_reader_options options, rmm::mr::device_memory_resource* mr) +table_with_metadata read_csv(csv_reader_options options, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); @@ -241,12 +243,14 @@ table_with_metadata read_csv(csv_reader_options options, rmm::mr::device_memory_ return cudf::io::detail::csv::read_csv( // std::move(datasources[0]), options, - cudf::get_default_stream(), + stream, mr); } // Freeform API wraps the detail writer class API -void write_csv(csv_writer_options const& options, rmm::mr::device_memory_resource* mr) +void write_csv(csv_writer_options const& options, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { using namespace cudf::io::detail; @@ -258,7 +262,7 @@ void write_csv(csv_writer_options const& options, rmm::mr::device_memory_resourc options.get_table(), options.get_names(), options, - cudf::get_default_stream(), + stream, mr); } diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 95411668284..7561031062d 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -651,6 +651,7 @@ ConfigureTest( ConfigureTest(STREAM_SORTING_TEST streams/sorting_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_TEXT_TEST streams/text/ngrams_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_LISTS_TEST streams/lists_test.cpp STREAM_MODE testing) +ConfigureTest(STREAM_CSVIO_TEST streams/io/csv_test.cpp STREAM_MODE testing) # ################################################################################################## # Install tests #################################################################################### diff --git a/cpp/tests/streams/io/csv_test.cpp b/cpp/tests/streams/io/csv_test.cpp new file mode 100644 index 00000000000..0aef69a8ba3 --- /dev/null +++ b/cpp/tests/streams/io/csv_test.cpp @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include + +auto const temp_env = static_cast( + ::testing::AddGlobalTestEnvironment(new cudf::test::TempDirTestEnvironment)); + +class CSVTest : public cudf::test::BaseFixture {}; + +template +inline auto random_values(size_t size) +{ + std::vector values(size); + + using T1 = T; + using uniform_distribution = + typename std::conditional_t, + std::bernoulli_distribution, + std::conditional_t, + std::uniform_real_distribution, + std::uniform_int_distribution>>; + + static constexpr auto seed = 0xf00d; + static std::mt19937 engine{seed}; + static uniform_distribution dist{}; + std::generate_n(values.begin(), size, [&]() { return T{dist(engine)}; }); + + return values; +} + +TEST_F(CSVTest, CSVReader) +{ + constexpr auto num_rows = 10; + auto int8_values = random_values(num_rows); + auto int16_values = random_values(num_rows); + auto int32_values = random_values(num_rows); + auto int64_values = random_values(num_rows); + auto uint8_values = random_values(num_rows); + auto uint16_values = random_values(num_rows); + auto uint32_values = random_values(num_rows); + auto uint64_values = random_values(num_rows); + auto float32_values = random_values(num_rows); + auto float64_values = random_values(num_rows); + + auto filepath = temp_env->get_temp_dir() + "MultiColumn.csv"; + { + std::ostringstream line; + for (int i = 0; i < num_rows; ++i) { + line << std::to_string(int8_values[i]) << "," << int16_values[i] << "," << int32_values[i] + << "," << int64_values[i] << "," << std::to_string(uint8_values[i]) << "," + << uint16_values[i] << "," << uint32_values[i] << "," << uint64_values[i] << "," + << float32_values[i] << "," << float64_values[i] << "\n"; + } + std::ofstream outfile(filepath, std::ofstream::out); + outfile << line.str(); + } + + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) + .header(-1) + .dtypes({cudf::data_type{cudf::type_id::INT8}, + cudf::data_type{cudf::type_id::INT16}, + cudf::data_type{cudf::type_id::INT32}, + cudf::data_type{cudf::type_id::INT64}, + cudf::data_type{cudf::type_id::UINT8}, + cudf::data_type{cudf::type_id::UINT16}, + cudf::data_type{cudf::type_id::UINT32}, + cudf::data_type{cudf::type_id::UINT64}, + cudf::data_type{cudf::type_id::FLOAT32}, + cudf::data_type{cudf::type_id::FLOAT64}}); + auto result = cudf::io::read_csv(in_opts, cudf::test::get_default_stream()); +} + +TEST_F(CSVTest, CSVWriter) +{ + auto const input_strings = cudf::test::strings_column_wrapper{ + std::string{"All"} + "," + "the" + "," + "leaves", "are\"brown", "and\nthe\nsky\nis\ngrey"}; + auto const input_table = cudf::table_view{{input_strings}}; + + auto const filepath = temp_env->get_temp_dir() + "unquoted.csv"; + auto w_options = cudf::io::csv_writer_options::builder(cudf::io::sink_info{filepath}, input_table) + .include_header(false) + .inter_column_delimiter(',') + .quoting(cudf::io::quote_style::NONE); + cudf::io::write_csv(w_options.build(), cudf::test::get_default_stream()); +} From 75076fd201cc6bc827bffc2d42bbe4d65efad86a Mon Sep 17 00:00:00 2001 From: Shruti Shivakumar Date: Mon, 13 Nov 2023 20:46:00 +0000 Subject: [PATCH 2/4] simplified tests --- cpp/include/cudf_test/column_wrapper.hpp | 16 ++- cpp/src/io/csv/writer_impl.cu | 17 ++-- cpp/tests/CMakeLists.txt | 4 +- cpp/tests/streams/io/csv_test.cpp | 122 +++++++++++------------ 4 files changed, 79 insertions(+), 80 deletions(-) diff --git a/cpp/include/cudf_test/column_wrapper.hpp b/cpp/include/cudf_test/column_wrapper.hpp index e94dfea9dcf..b9f2e0d9868 100644 --- a/cpp/include/cudf_test/column_wrapper.hpp +++ b/cpp/include/cudf_test/column_wrapper.hpp @@ -803,7 +803,8 @@ class strings_column_wrapper : public detail::column_wrapper { offsets, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource()); auto d_bitmask = cudf::detail::make_device_uvector_sync( null_mask, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource()); - wrapped = cudf::make_strings_column(d_chars, d_offsets, d_bitmask, null_count); + wrapped = cudf::make_strings_column( + d_chars, d_offsets, d_bitmask, null_count, cudf::test::get_default_stream()); } /** @@ -1846,7 +1847,8 @@ class structs_column_wrapper : public detail::column_wrapper { child_column_wrappers.end(), std::back_inserter(child_columns), [&](auto const& column_wrapper) { - return std::make_unique(column_wrapper.get()); + return std::make_unique(column_wrapper.get(), + cudf::test::get_default_stream()); }); init(std::move(child_columns), validity); } @@ -1882,7 +1884,8 @@ class structs_column_wrapper : public detail::column_wrapper { child_column_wrappers.end(), std::back_inserter(child_columns), [&](auto const& column_wrapper) { - return std::make_unique(column_wrapper.get()); + return std::make_unique(column_wrapper.get(), + cudf::test::get_default_stream()); }); init(std::move(child_columns), validity_iter); } @@ -1906,8 +1909,11 @@ class structs_column_wrapper : public detail::column_wrapper { return cudf::test::detail::make_null_mask(validity.begin(), validity.end()); }(); - wrapped = cudf::make_structs_column( - num_rows, std::move(child_columns), null_count, std::move(null_mask)); + wrapped = cudf::make_structs_column(num_rows, + std::move(child_columns), + null_count, + std::move(null_mask), + cudf::test::get_default_stream()); } template diff --git a/cpp/src/io/csv/writer_impl.cu b/cpp/src/io/csv/writer_impl.cu index 570ca7be08f..56cc9917821 100644 --- a/cpp/src/io/csv/writer_impl.cu +++ b/cpp/src/io/csv/writer_impl.cu @@ -166,8 +166,9 @@ struct column_to_strings_fn { std::enable_if_t, std::unique_ptr> operator()( column_view const& column) const { - return cudf::strings::detail::from_booleans( - column, options_.get_true_value(), options_.get_false_value(), stream_, mr_); + string_scalar true_string(options_.get_true_value(), true, stream_, mr_); + string_scalar false_string(options_.get_false_value(), true, stream_, mr_); + return cudf::strings::detail::from_booleans(column, true_string, false_string, stream_, mr_); } // strings: @@ -181,7 +182,7 @@ struct column_to_strings_fn { } // handle special characters: {delimiter, '\n', "} in row: - string_scalar delimiter{std::string{options_.get_inter_column_delimiter()}, true, stream_}; + string_scalar delimiter{std::string{options_.get_inter_column_delimiter()}, true, stream_, mr_}; auto d_column = column_device_view::create(column_v, stream_); escape_strings_fn fn{*d_column, delimiter.value(stream_)}; @@ -478,18 +479,20 @@ void write_csv(data_sink* out_sink, // concatenate columns in each row into one big string column // (using null representation and delimiter): // - std::string delimiter_str{options.get_inter_column_delimiter()}; + // std::string delimiter_str{options.get_inter_column_delimiter()}; auto str_concat_col = [&] { + cudf::string_scalar delimiter_str( + std::string{options.get_inter_column_delimiter()}, true, stream); + cudf::string_scalar options_narep(options.get_na_rep(), true, stream); if (str_table_view.num_columns() > 1) return cudf::strings::detail::concatenate(str_table_view, delimiter_str, - options.get_na_rep(), + options_narep, strings::separator_on_nulls::YES, stream, rmm::mr::get_current_device_resource()); - cudf::string_scalar narep(options.get_na_rep(), true, stream); return cudf::strings::detail::replace_nulls( - str_table_view.column(0), narep, stream, rmm::mr::get_current_device_resource()); + str_table_view.column(0), options_narep, stream, rmm::mr::get_current_device_resource()); }(); write_chunked(out_sink, str_concat_col->view(), options, stream, mr); diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 5039c66cacc..4be4e146fd0 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -626,10 +626,12 @@ ConfigureTest( ConfigureTest(STREAM_BINARYOP_TEST streams/binaryop_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_CONCATENATE_TEST streams/concatenate_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_COPYING_TEST streams/copying_test.cpp STREAM_MODE testing) +ConfigureTest(STREAM_CSVIO_TEST streams/io/csv_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_FILLING_TEST streams/filling_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_GROUPBY_TEST streams/groupby_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_HASHING_TEST streams/hash_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_INTEROP_TEST streams/interop_test.cpp STREAM_MODE testing) +ConfigureTest(STREAM_JSONIO_TEST streams/io/json_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_NULL_MASK_TEST streams/null_mask_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_REPLACE_TEST streams/replace_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_SEARCH_TEST streams/search_test.cpp STREAM_MODE testing) @@ -655,8 +657,6 @@ ConfigureTest( STREAM_TEXT_TEST streams/text/ngrams_test.cpp streams/text/tokenize_test.cpp STREAM_MODE testing ) ConfigureTest(STREAM_LISTS_TEST streams/lists_test.cpp STREAM_MODE testing) -ConfigureTest(STREAM_CSVIO_TEST streams/io/csv_test.cpp STREAM_MODE testing) -ConfigureTest(STREAM_JSONIO_TEST streams/io/json_test.cpp STREAM_MODE testing) # ################################################################################################## # Install tests #################################################################################### diff --git a/cpp/tests/streams/io/csv_test.cpp b/cpp/tests/streams/io/csv_test.cpp index 0aef69a8ba3..6b903dbf78f 100644 --- a/cpp/tests/streams/io/csv_test.cpp +++ b/cpp/tests/streams/io/csv_test.cpp @@ -35,80 +35,70 @@ auto const temp_env = static_cast( class CSVTest : public cudf::test::BaseFixture {}; -template -inline auto random_values(size_t size) +TEST_F(CSVTest, CSVWriter) { - std::vector values(size); - - using T1 = T; - using uniform_distribution = - typename std::conditional_t, - std::bernoulli_distribution, - std::conditional_t, - std::uniform_real_distribution, - std::uniform_int_distribution>>; - - static constexpr auto seed = 0xf00d; - static std::mt19937 engine{seed}; - static uniform_distribution dist{}; - std::generate_n(values.begin(), size, [&]() { return T{dist(engine)}; }); - - return values; + constexpr auto num_rows = 10; + + std::vector zeros(num_rows, 0); + std::vector ones(num_rows, 1); + auto col6_data = cudf::detail::make_counting_transform_iterator(0, [&](auto i) { + return numeric::decimal128{ones[i], numeric::scale_type{12}}; + }); + auto col7_data = cudf::detail::make_counting_transform_iterator(0, [&](auto i) { + return numeric::decimal128{ones[i], numeric::scale_type{-12}}; + }); + + cudf::test::fixed_width_column_wrapper col0(zeros.begin(), zeros.end()); + cudf::test::fixed_width_column_wrapper col1(zeros.begin(), zeros.end()); + cudf::test::fixed_width_column_wrapper col2(zeros.begin(), zeros.end()); + cudf::test::fixed_width_column_wrapper col3(zeros.begin(), zeros.end()); + cudf::test::fixed_width_column_wrapper col4(zeros.begin(), zeros.end()); + cudf::test::fixed_width_column_wrapper col5(zeros.begin(), zeros.end()); + cudf::test::fixed_width_column_wrapper col6(col6_data, col6_data + num_rows); + cudf::test::fixed_width_column_wrapper col7(col7_data, col7_data + num_rows); + + std::vector col8_data(num_rows, "rapids"); + cudf::test::strings_column_wrapper col8(col8_data.begin(), col8_data.end()); + + cudf::table_view tab({col0, col1, col2, col3, col4, col5, col6, col7, col8}); + + auto const filepath = temp_env->get_temp_dir() + "multicolumn.csv"; + auto w_options = cudf::io::csv_writer_options::builder(cudf::io::sink_info{filepath}, tab) + .include_header(false) + .inter_column_delimiter(','); + cudf::io::write_csv(w_options.build(), cudf::test::get_default_stream()); } TEST_F(CSVTest, CSVReader) { constexpr auto num_rows = 10; - auto int8_values = random_values(num_rows); - auto int16_values = random_values(num_rows); - auto int32_values = random_values(num_rows); - auto int64_values = random_values(num_rows); - auto uint8_values = random_values(num_rows); - auto uint16_values = random_values(num_rows); - auto uint32_values = random_values(num_rows); - auto uint64_values = random_values(num_rows); - auto float32_values = random_values(num_rows); - auto float64_values = random_values(num_rows); - - auto filepath = temp_env->get_temp_dir() + "MultiColumn.csv"; - { - std::ostringstream line; - for (int i = 0; i < num_rows; ++i) { - line << std::to_string(int8_values[i]) << "," << int16_values[i] << "," << int32_values[i] - << "," << int64_values[i] << "," << std::to_string(uint8_values[i]) << "," - << uint16_values[i] << "," << uint32_values[i] << "," << uint64_values[i] << "," - << float32_values[i] << "," << float64_values[i] << "\n"; - } - std::ofstream outfile(filepath, std::ofstream::out); - outfile << line.str(); - } - - cudf::io::csv_reader_options in_opts = - cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) - .header(-1) - .dtypes({cudf::data_type{cudf::type_id::INT8}, - cudf::data_type{cudf::type_id::INT16}, - cudf::data_type{cudf::type_id::INT32}, - cudf::data_type{cudf::type_id::INT64}, - cudf::data_type{cudf::type_id::UINT8}, - cudf::data_type{cudf::type_id::UINT16}, - cudf::data_type{cudf::type_id::UINT32}, - cudf::data_type{cudf::type_id::UINT64}, - cudf::data_type{cudf::type_id::FLOAT32}, - cudf::data_type{cudf::type_id::FLOAT64}}); - auto result = cudf::io::read_csv(in_opts, cudf::test::get_default_stream()); -} -TEST_F(CSVTest, CSVWriter) -{ - auto const input_strings = cudf::test::strings_column_wrapper{ - std::string{"All"} + "," + "the" + "," + "leaves", "are\"brown", "and\nthe\nsky\nis\ngrey"}; - auto const input_table = cudf::table_view{{input_strings}}; + std::vector zeros(num_rows, 0); + std::vector ones(num_rows, 1); + auto col6_data = cudf::detail::make_counting_transform_iterator(0, [&](auto i) { + return numeric::decimal128{ones[i], numeric::scale_type{12}}; + }); + auto col7_data = cudf::detail::make_counting_transform_iterator(0, [&](auto i) { + return numeric::decimal128{ones[i], numeric::scale_type{-12}}; + }); + + cudf::test::fixed_width_column_wrapper col0(zeros.begin(), zeros.end()); + cudf::test::fixed_width_column_wrapper col1(zeros.begin(), zeros.end()); + cudf::test::fixed_width_column_wrapper col2(zeros.begin(), zeros.end()); + cudf::test::fixed_width_column_wrapper col3(zeros.begin(), zeros.end()); + cudf::test::fixed_width_column_wrapper col4(zeros.begin(), zeros.end()); + cudf::test::fixed_width_column_wrapper col5(zeros.begin(), zeros.end()); + cudf::test::fixed_width_column_wrapper col6(col6_data, col6_data + num_rows); + cudf::test::fixed_width_column_wrapper col7(col7_data, col7_data + num_rows); + + std::vector col8_data(num_rows, "rapids"); + cudf::test::strings_column_wrapper col8(col8_data.begin(), col8_data.end()); + + cudf::table_view tab({col0, col1, col2, col3, col4, col5, col6, col7, col8}); - auto const filepath = temp_env->get_temp_dir() + "unquoted.csv"; - auto w_options = cudf::io::csv_writer_options::builder(cudf::io::sink_info{filepath}, input_table) + auto const filepath = temp_env->get_temp_dir() + "multicolumn.csv"; + auto w_options = cudf::io::csv_writer_options::builder(cudf::io::sink_info{filepath}, tab) .include_header(false) - .inter_column_delimiter(',') - .quoting(cudf::io::quote_style::NONE); + .inter_column_delimiter(','); cudf::io::write_csv(w_options.build(), cudf::test::get_default_stream()); } From 656a5b670e31bb752c0e3b5dbed0461de6bda873 Mon Sep 17 00:00:00 2001 From: Shruti Shivakumar Date: Mon, 13 Nov 2023 22:22:23 +0000 Subject: [PATCH 3/4] code comment cleanup; initialization style --- cpp/src/io/csv/writer_impl.cu | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/cpp/src/io/csv/writer_impl.cu b/cpp/src/io/csv/writer_impl.cu index 56cc9917821..a5f080646ec 100644 --- a/cpp/src/io/csv/writer_impl.cu +++ b/cpp/src/io/csv/writer_impl.cu @@ -166,8 +166,8 @@ struct column_to_strings_fn { std::enable_if_t, std::unique_ptr> operator()( column_view const& column) const { - string_scalar true_string(options_.get_true_value(), true, stream_, mr_); - string_scalar false_string(options_.get_false_value(), true, stream_, mr_); + string_scalar true_string{options_.get_true_value(), true, stream_, mr_}; + string_scalar false_string{options_.get_false_value(), true, stream_, mr_}; return cudf::strings::detail::from_booleans(column, true_string, false_string, stream_, mr_); } @@ -374,10 +374,10 @@ void write_chunked(data_sink* out_sink, CUDF_EXPECTS(str_column_view.size() > 0, "Unexpected empty strings column."); - cudf::string_scalar newline(options.get_line_terminator(), true, stream); + cudf::string_scalar newline{options.get_line_terminator(), true, stream}; auto p_str_col_w_nl = cudf::strings::detail::join_strings(str_column_view, newline, - string_scalar("", false, stream), + string_scalar{"", false, stream}, stream, rmm::mr::get_current_device_resource()); strings_column_view strings_column{p_str_col_w_nl->view()}; @@ -479,11 +479,10 @@ void write_csv(data_sink* out_sink, // concatenate columns in each row into one big string column // (using null representation and delimiter): // - // std::string delimiter_str{options.get_inter_column_delimiter()}; auto str_concat_col = [&] { - cudf::string_scalar delimiter_str( - std::string{options.get_inter_column_delimiter()}, true, stream); - cudf::string_scalar options_narep(options.get_na_rep(), true, stream); + cudf::string_scalar delimiter_str{ + std::string{options.get_inter_column_delimiter()}, true, stream}; + cudf::string_scalar options_narep{options.get_na_rep(), true, stream}; if (str_table_view.num_columns() > 1) return cudf::strings::detail::concatenate(str_table_view, delimiter_str, From 08ac4a20d1afd64691e2dd372a488dafcd58b5dd Mon Sep 17 00:00:00 2001 From: Shruti Shivakumar Date: Tue, 14 Nov 2023 17:46:12 +0000 Subject: [PATCH 4/4] default mr usage; test header cleanup --- cpp/src/io/csv/writer_impl.cu | 6 +++--- cpp/tests/streams/io/csv_test.cpp | 2 -- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/cpp/src/io/csv/writer_impl.cu b/cpp/src/io/csv/writer_impl.cu index a5f080646ec..6e9c634804c 100644 --- a/cpp/src/io/csv/writer_impl.cu +++ b/cpp/src/io/csv/writer_impl.cu @@ -166,8 +166,8 @@ struct column_to_strings_fn { std::enable_if_t, std::unique_ptr> operator()( column_view const& column) const { - string_scalar true_string{options_.get_true_value(), true, stream_, mr_}; - string_scalar false_string{options_.get_false_value(), true, stream_, mr_}; + string_scalar true_string{options_.get_true_value(), true, stream_}; + string_scalar false_string{options_.get_false_value(), true, stream_}; return cudf::strings::detail::from_booleans(column, true_string, false_string, stream_, mr_); } @@ -182,7 +182,7 @@ struct column_to_strings_fn { } // handle special characters: {delimiter, '\n', "} in row: - string_scalar delimiter{std::string{options_.get_inter_column_delimiter()}, true, stream_, mr_}; + string_scalar delimiter{std::string{options_.get_inter_column_delimiter()}, true, stream_}; auto d_column = column_device_view::create(column_v, stream_); escape_strings_fn fn{*d_column, delimiter.value(stream_)}; diff --git a/cpp/tests/streams/io/csv_test.cpp b/cpp/tests/streams/io/csv_test.cpp index 6b903dbf78f..88514fa412c 100644 --- a/cpp/tests/streams/io/csv_test.cpp +++ b/cpp/tests/streams/io/csv_test.cpp @@ -25,8 +25,6 @@ #include #include -#include -#include #include #include