Skip to content

Commit

Permalink
Adding support for writing empty dataframe (#8490)
Browse files Browse the repository at this point in the history
Closes #6691

Removed expectation of `table.num_columns() > 0`. Instead if `table.num_columns() == 0`, outputs a line terminator following Pandas behaviour.

Authors:
  - https://github.com/shaneding

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Charles Blackmon-Luca (https://github.com/charlesbluca)
  - Karthikeyan (https://github.com/karthikeyann)
  - Ram (Ramakrishna Prabhu) (https://github.com/rgsl888prabhu)

URL: #8490
  • Loading branch information
shaneding authored Jun 11, 2021
1 parent 6a23f20 commit 0a4e8a1
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 11 deletions.
10 changes: 6 additions & 4 deletions cpp/src/io/csv/writer_impl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,6 @@ void writer::impl::write_chunked_begin(table_view const& table,
if ((metadata != nullptr) && (options_.is_enabled_include_header())) {
CUDF_EXPECTS(metadata->column_names.size() == static_cast<size_t>(table.num_columns()),
"Mismatch between number of column headers and table columns.");

std::string delimiter_str{options_.get_inter_column_delimiter()};

// avoid delimiter after last element:
Expand All @@ -295,7 +294,12 @@ void writer::impl::write_chunked_begin(table_view const& table,
std::copy(metadata->column_names.begin(),
metadata->column_names.end() - 1,
std::ostream_iterator<std::string>(ss, delimiter_str.c_str()));
ss << metadata->column_names.back() << options_.get_line_terminator();

if (metadata->column_names.size() > 0) {
ss << metadata->column_names.back() << options_.get_line_terminator();
} else {
ss << options_.get_line_terminator();
}

out_sink_->host_write(ss.str().data(), ss.str().size());
}
Expand Down Expand Up @@ -355,8 +359,6 @@ void writer::impl::write(table_view const& table,
const table_metadata* metadata,
rmm::cuda_stream_view stream)
{
CUDF_EXPECTS(table.num_columns() > 0, "Empty table.");

// write header: column names separated by delimiter:
// (even for tables with no rows)
//
Expand Down
7 changes: 5 additions & 2 deletions cpp/tests/io/csv_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1754,9 +1754,12 @@ TEST_F(CsvReaderTest, EmptyFileWithWriter)
auto filepath = temp_env->get_temp_dir() + "EmptyFileWithWriter.csv";

cudf::table_view empty_table;
write_csv_helper(filepath, empty_table, false);
cudf_io::csv_reader_options in_opts =
cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath});
auto result = cudf_io::read_csv(in_opts);

// TODO is it ok for write_csv to throw instead of just writing an empty file?
EXPECT_THROW(write_csv_helper(filepath, empty_table, false), cudf::logic_error);
CUDF_TEST_EXPECT_TABLES_EQUIVALENT(empty_table, result.tbl->view());
}

class TestSource : public cudf::io::datasource {
Expand Down
1 change: 0 additions & 1 deletion python/cudf/cudf/_lib/csv.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -432,7 +432,6 @@ cpdef write_csv(
--------
cudf.io.csv.to_csv
"""

cdef table_view input_table_view = \
table.view() if index is True else table.data_view()
cdef bool include_header_c = header
Expand Down
16 changes: 12 additions & 4 deletions python/cudf/cudf/tests/test_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -1583,10 +1583,8 @@ def test_csv_writer_column_and_header_options(

def test_csv_writer_empty_columns_parameter(cudf_mixed_dataframe):
df = cudf_mixed_dataframe

buffer = BytesIO()
with pytest.raises(RuntimeError):
df.to_csv(buffer, columns=[], index=False)
write_str = df.to_csv(columns=[], index=False)
assert_eq(write_str, "\n")


def test_csv_writer_multiindex(tmpdir):
Expand Down Expand Up @@ -1979,3 +1977,13 @@ def test_to_csv_compression_error():
error_message = "Writing compressed csv is not currently supported in cudf"
with pytest.raises(NotImplementedError, match=re.escape(error_message)):
df.to_csv("test.csv", compression=compression)


def test_empty_df_no_index():
actual = cudf.DataFrame({})
buffer = BytesIO()
actual.to_csv(buffer, index=False)

result = cudf.read_csv(buffer)

assert_eq(actual, result)

0 comments on commit 0a4e8a1

Please sign in to comment.