Skip to content

Commit

Permalink
BUG FIX: CSV Writer ignores the header parameter when no metadata is …
Browse files Browse the repository at this point in the history
…provided (#8740)

Fixes: #6669

Authors:
  - Sheilah Kirui (https://github.com/skirui-source)

Approvers:
  - Vukasin Milovanovic (https://github.com/vuule)
  - David Wendt (https://github.com/davidwendt)

URL: #8740
  • Loading branch information
skirui-source authored Oct 14, 2021
1 parent 4f47480 commit 800fd7b
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 4 deletions.
12 changes: 10 additions & 2 deletions cpp/src/io/csv/writer_impl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -285,8 +285,16 @@ void writer::impl::write_chunked_begin(table_view const& table,
const table_metadata* metadata,
rmm::cuda_stream_view stream)
{
if ((metadata != nullptr) && (options_.is_enabled_include_header())) {
auto const& column_names = metadata->column_names;
if (options_.is_enabled_include_header()) {
// need to generate column names if metadata is not provided
std::vector<std::string> generated_col_names;
if (metadata == nullptr) {
generated_col_names.resize(table.num_columns());
thrust::tabulate(generated_col_names.begin(), generated_col_names.end(), [](auto idx) {
return std::to_string(idx);
});
}
auto const& column_names = (metadata == nullptr) ? generated_col_names : metadata->column_names;
CUDF_EXPECTS(column_names.size() == static_cast<size_t>(table.num_columns()),
"Mismatch between number of column headers and table columns.");

Expand Down
34 changes: 32 additions & 2 deletions cpp/tests/io/csv_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,8 @@ TYPED_TEST(CsvFixedPointWriterTest, SingleColumnNegativeScale)
auto filepath = temp_env->get_temp_dir() + "FixedPointSingleColumnNegativeScale.csv";

cudf_io::csv_writer_options writer_options =
cudf_io::csv_writer_options::builder(cudf_io::sink_info(filepath), input_table);
cudf_io::csv_writer_options::builder(cudf_io::sink_info(filepath), input_table)
.include_header(false);

cudf_io::write_csv(writer_options);

Expand Down Expand Up @@ -453,7 +454,8 @@ TYPED_TEST(CsvFixedPointWriterTest, SingleColumnPositiveScale)
auto filepath = temp_env->get_temp_dir() + "FixedPointSingleColumnPositiveScale.csv";

cudf_io::csv_writer_options writer_options =
cudf_io::csv_writer_options::builder(cudf_io::sink_info(filepath), input_table);
cudf_io::csv_writer_options::builder(cudf_io::sink_info(filepath), input_table)
.include_header(false);

cudf_io::write_csv(writer_options);

Expand Down Expand Up @@ -2198,4 +2200,32 @@ TEST_F(CsvReaderTest, DtypesMapInvalid)
EXPECT_THROW(cudf_io::read_csv(in_opts), cudf::logic_error);
}

TEST_F(CsvReaderTest, CsvDefaultOptionsWriteReadMatch)
{
auto const filepath = temp_env->get_temp_dir() + "issue.csv";

// make up some kind of dataframe
auto int_column = column_wrapper<int32_t>{10, 20, 30};
auto str_column = column_wrapper<cudf::string_view>{"abc", "mno", "xyz"};
cudf::table_view input_table(std::vector<cudf::column_view>{int_column, str_column});

// write that dataframe to a csv using default options to some temporary file
cudf_io::csv_writer_options writer_options =
cudf_io::csv_writer_options::builder(cudf_io::sink_info{filepath}, input_table);
cudf_io::write_csv(writer_options);

// read the temp csv file using default options
cudf_io::csv_reader_options read_options =
cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath})
.dtypes(std::vector<data_type>{dtype<int32_t>(), dtype<cudf::string_view>()});

cudf_io::table_with_metadata new_table_and_metadata = cudf_io::read_csv(read_options);

// verify that the tables are identical, or as identical as expected.
const auto new_table_view = new_table_and_metadata.tbl->view();
CUDF_TEST_EXPECT_TABLES_EQUIVALENT(input_table, new_table_view);
EXPECT_EQ(new_table_and_metadata.metadata.column_names[0], "0");
EXPECT_EQ(new_table_and_metadata.metadata.column_names[1], "1");
}

CUDF_TEST_PROGRAM_MAIN()

0 comments on commit 800fd7b

Please sign in to comment.