From 2297f9a61e2f4153ab2e8a0631f7cfe7971ead14 Mon Sep 17 00:00:00 2001 From: Lawrence Mitchell Date: Fri, 14 Jun 2024 17:43:17 +0100 Subject: [PATCH] Fix initialization error in to_arrow for empty string views (#16033) When converting an empty string view to arrow, we don't bother with copies from device, but rather create the arrow arrays directly. The offset buffer is therefore a singleton int32 array with zero in it. Previously, the initialization of this array was incorrect, since mutable_data() returns a uint8_t pointer, and so setting the single element could leave 24 of the 32 bits uninitialized. Fix this by using memset instead to zero out the full buffer. Authors: - Lawrence Mitchell (https://github.com/wence-) Approvers: - David Wendt (https://github.com/davidwendt) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/16033 --- cpp/src/interop/to_arrow.cu | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/src/interop/to_arrow.cu b/cpp/src/interop/to_arrow.cu index 47aee982c32..2b3aa2f08f1 100644 --- a/cpp/src/interop/to_arrow.cu +++ b/cpp/src/interop/to_arrow.cu @@ -292,9 +292,9 @@ std::shared_ptr dispatch_to_arrow::operator()( auto child_arrays = fetch_child_array(input_view, {{}, {}}, ar_mr, stream); if (child_arrays.empty()) { // Empty string will have only one value in offset of 4 bytes - auto tmp_offset_buffer = allocate_arrow_buffer(4, ar_mr); - auto tmp_data_buffer = allocate_arrow_buffer(0, ar_mr); - tmp_offset_buffer->mutable_data()[0] = 0; + auto tmp_offset_buffer = allocate_arrow_buffer(sizeof(int32_t), ar_mr); + auto tmp_data_buffer = allocate_arrow_buffer(0, ar_mr); + memset(tmp_offset_buffer->mutable_data(), 0, sizeof(int32_t)); return std::make_shared( 0, std::move(tmp_offset_buffer), std::move(tmp_data_buffer));