From 8306ea0029b871445dd6aaee1fb9b3510ab20c45 Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Tue, 25 Jul 2023 16:09:28 -0700 Subject: [PATCH] Fix writing of ORC files with empty child string columns (#13745) Closes #13742 Fixes an OOB access in `rowgroup_char_counts_kernel` when the input column has no rows. This can happen with string columns with a parent list column. Authors: - Vukasin Milovanovic (https://github.com/vuule) Approvers: - Robert (Bobby) Evans (https://github.com/revans2) - Karthikeyan (https://github.com/karthikeyann) - Mike Wilson (https://github.com/hyperbolic2346) - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/13745 --- cpp/src/io/orc/dict_enc.cu | 4 +++- cpp/tests/io/orc_test.cpp | 17 +++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/cpp/src/io/orc/dict_enc.cu b/cpp/src/io/orc/dict_enc.cu index 63e1e77308d..c069cb67cec 100644 --- a/cpp/src/io/orc/dict_enc.cu +++ b/cpp/src/io/orc/dict_enc.cu @@ -45,7 +45,9 @@ __global__ void rowgroup_char_counts_kernel(device_2dspan char_counts auto const& offsets = str_col.child(strings_column_view::offsets_column_index); char_counts[str_col_idx][row_group_idx] = - offsets.element(start_row + num_rows) - offsets.element(start_row); + (num_rows == 0) + ? 0 + : offsets.element(start_row + num_rows) - offsets.element(start_row); } void rowgroup_char_counts(device_2dspan counts, diff --git a/cpp/tests/io/orc_test.cpp b/cpp/tests/io/orc_test.cpp index 2338edd9ccb..cff7b1cf081 100644 --- a/cpp/tests/io/orc_test.cpp +++ b/cpp/tests/io/orc_test.cpp @@ -1844,4 +1844,21 @@ TEST_F(OrcWriterTest, SlicedStringColumn) CUDF_TEST_EXPECT_TABLES_EQUAL(expected_slice, result.tbl->view()); } +TEST_F(OrcWriterTest, EmptyChildStringColumn) +{ + list_col col{{}, {}}; + table_view expected({col}); + + auto filepath = temp_env->get_temp_filepath("OrcEmptyChildStringColumn.orc"); + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected); + cudf::io::write_orc(out_opts); + + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).use_index(false); + auto result = cudf::io::read_orc(in_opts); + + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); +} + CUDF_TEST_PROGRAM_MAIN()