Skip to content

Commit

Permalink
Fix writing of ORC files with empty child string columns (#13745)
Browse files Browse the repository at this point in the history
Closes #13742
Fixes an OOB access in `rowgroup_char_counts_kernel` when the input column has no rows. This can happen with string columns with a parent list column.

Authors:
  - Vukasin Milovanovic (https://github.com/vuule)

Approvers:
  - Robert (Bobby) Evans (https://github.com/revans2)
  - Karthikeyan (https://github.com/karthikeyann)
  - Mike Wilson (https://github.com/hyperbolic2346)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: #13745
  • Loading branch information
vuule authored Jul 25, 2023
1 parent 120fc29 commit 8306ea0
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 1 deletion.
4 changes: 3 additions & 1 deletion cpp/src/io/orc/dict_enc.cu
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,9 @@ __global__ void rowgroup_char_counts_kernel(device_2dspan<size_type> char_counts

auto const& offsets = str_col.child(strings_column_view::offsets_column_index);
char_counts[str_col_idx][row_group_idx] =
offsets.element<size_type>(start_row + num_rows) - offsets.element<size_type>(start_row);
(num_rows == 0)
? 0
: offsets.element<size_type>(start_row + num_rows) - offsets.element<size_type>(start_row);
}

void rowgroup_char_counts(device_2dspan<size_type> counts,
Expand Down
17 changes: 17 additions & 0 deletions cpp/tests/io/orc_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1844,4 +1844,21 @@ TEST_F(OrcWriterTest, SlicedStringColumn)
CUDF_TEST_EXPECT_TABLES_EQUAL(expected_slice, result.tbl->view());
}

TEST_F(OrcWriterTest, EmptyChildStringColumn)
{
list_col<cudf::string_view> col{{}, {}};
table_view expected({col});

auto filepath = temp_env->get_temp_filepath("OrcEmptyChildStringColumn.orc");
cudf::io::orc_writer_options out_opts =
cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected);
cudf::io::write_orc(out_opts);

cudf::io::orc_reader_options in_opts =
cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).use_index(false);
auto result = cudf::io::read_orc(in_opts);

CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
}

CUDF_TEST_PROGRAM_MAIN()

0 comments on commit 8306ea0

Please sign in to comment.