From 7e97f4c5b153a12558d827d004806b70da60a377 Mon Sep 17 00:00:00 2001 From: Dave Baranec Date: Thu, 2 Dec 2021 10:29:55 -0600 Subject: [PATCH 1/4] Make sure we don't read an extra bitmask word at the end of a split copy if we don't need the slack bits. Prevents a potential read of unallocated memory. --- cpp/src/copying/contiguous_split.cu | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/cpp/src/copying/contiguous_split.cu b/cpp/src/copying/contiguous_split.cu index a9194ceea93..c0590d9add4 100644 --- a/cpp/src/copying/contiguous_split.cu +++ b/cpp/src/copying/contiguous_split.cu @@ -63,7 +63,7 @@ struct src_buf_info { int _offset_stack_pos, int _parent_offsets_index, bool _is_validity, - int _column_offset) + size_type _column_offset) : type(_type), offsets(_offsets), offset_stack_pos(_offset_stack_pos), @@ -195,9 +195,12 @@ __device__ void copy_buffer(uint8_t* __restrict__ dst, std::size_t idx = (num_bytes - remainder) / 4; uint32_t v = remainder > 0 ? (reinterpret_cast(src)[idx] - value_shift) : 0; while (remainder) { - uint32_t const next = bit_shift > 0 || remainder > 4 + // if we're doing a validity copy, do we need to read an extra bitmask word to OR it's relevant bits in? + auto const have_extra_rows = bit_shift > 0 && remainder == 4 ? (num_elements * 32) - num_rows < bit_shift : false; + uint32_t const next = (have_extra_rows || remainder > 4) ? (reinterpret_cast(src)[idx + 1] - value_shift) : 0; + uint32_t const val = (v >> bit_shift) | (next << (32 - bit_shift)); if (valid_count) { thread_valid_count += __popc(val); } reinterpret_cast(dst)[idx] = val; From 63b430d0c8e7c86ece94058af64de405bb7e1605 Mon Sep 17 00:00:00 2001 From: Dave Baranec Date: Fri, 3 Dec 2021 17:44:43 -0600 Subject: [PATCH 2/4] Add test. --- cpp/src/copying/contiguous_split.cu | 12 +++++++----- cpp/tests/copying/split_tests.cpp | 15 +++++++++++++++ 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/cpp/src/copying/contiguous_split.cu b/cpp/src/copying/contiguous_split.cu index c0590d9add4..cd7156c7f94 100644 --- a/cpp/src/copying/contiguous_split.cu +++ b/cpp/src/copying/contiguous_split.cu @@ -195,13 +195,15 @@ __device__ void copy_buffer(uint8_t* __restrict__ dst, std::size_t idx = (num_bytes - remainder) / 4; uint32_t v = remainder > 0 ? (reinterpret_cast(src)[idx] - value_shift) : 0; while (remainder) { - // if we're doing a validity copy, do we need to read an extra bitmask word to OR it's relevant bits in? - auto const have_extra_rows = bit_shift > 0 && remainder == 4 ? (num_elements * 32) - num_rows < bit_shift : false; - uint32_t const next = (have_extra_rows || remainder > 4) + // if we're doing a validity copy, do we need to read an extra bitmask word to OR it's + // relevant bits in? + auto const have_extra_rows = + bit_shift > 0 && remainder == 4 ? (num_elements * 32) - num_rows < bit_shift : false; + uint32_t const next = (have_extra_rows || remainder > 4) ? (reinterpret_cast(src)[idx + 1] - value_shift) : 0; - - uint32_t const val = (v >> bit_shift) | (next << (32 - bit_shift)); + + uint32_t const val = (v >> bit_shift) | (next << (32 - bit_shift)); if (valid_count) { thread_valid_count += __popc(val); } reinterpret_cast(dst)[idx] = val; v = next; diff --git a/cpp/tests/copying/split_tests.cpp b/cpp/tests/copying/split_tests.cpp index f7714ce9ac7..b5a793ecd1c 100644 --- a/cpp/tests/copying/split_tests.cpp +++ b/cpp/tests/copying/split_tests.cpp @@ -1315,6 +1315,21 @@ TEST_F(ContiguousSplitUntypedTest, ProgressiveSizes) } } +TEST_F(ContiguousSplitUntypedTest, ValidityEdgeCase) +{ + // tests an edge case where the splits cause the final validity data to be copied + // to be < 32 full bits, making sure we don't unintentionally read past the end of the input + auto col = cudf::make_numeric_column( + cudf::data_type{cudf::type_id::INT32}, 512, cudf::mask_state::ALL_VALID); + auto result = cudf::contiguous_split(cudf::table_view{{*col}}, {510}); + auto expected = cudf::split(cudf::table_view{{*col}}, {510}); + + EXPECT_EQ(expected.size(), result.size()); + for (unsigned long index = 0; index < result.size(); index++) { + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected[index].column(0), result[index].table.column(0)); + } +} + // contiguous split with strings struct ContiguousSplitStringTableTest : public SplitTest { }; From 830120b0952d1a026d2891bbe942f6d2db6612b7 Mon Sep 17 00:00:00 2001 From: Dave Baranec Date: Tue, 7 Dec 2021 16:52:24 -0600 Subject: [PATCH 3/4] Cleaned up some comments and clarified the logic a bit. --- cpp/src/copying/contiguous_split.cu | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/cpp/src/copying/contiguous_split.cu b/cpp/src/copying/contiguous_split.cu index cd7156c7f94..2e3d6da52cd 100644 --- a/cpp/src/copying/contiguous_split.cu +++ b/cpp/src/copying/contiguous_split.cu @@ -194,14 +194,14 @@ __device__ void copy_buffer(uint8_t* __restrict__ dst, if (value_shift || bit_shift) { std::size_t idx = (num_bytes - remainder) / 4; uint32_t v = remainder > 0 ? (reinterpret_cast(src)[idx] - value_shift) : 0; + auto const have_trailing_bits = ((num_elements * 32) - num_rows) < bit_shift; while (remainder) { - // if we're doing a validity copy, do we need to read an extra bitmask word to OR it's - // relevant bits in? - auto const have_extra_rows = - bit_shift > 0 && remainder == 4 ? (num_elements * 32) - num_rows < bit_shift : false; - uint32_t const next = (have_extra_rows || remainder > 4) - ? (reinterpret_cast(src)[idx + 1] - value_shift) - : 0; + // if we're at the very last word of a validity copy, we do not always need to read the next + // word to get the final trailing bits. + auto const read_trailing_bits = bit_shift > 0 && remainder == 4 && have_trailing_bits; + uint32_t const next = (read_trailing_bits || remainder > 4) + ? (reinterpret_cast(src)[idx + 1] - value_shift) + : 0; uint32_t const val = (v >> bit_shift) | (next << (32 - bit_shift)); if (valid_count) { thread_valid_count += __popc(val); } From 8c091112f50f5eee6b5522716bec7d01f2044608 Mon Sep 17 00:00:00 2001 From: Dave Baranec Date: Wed, 8 Dec 2021 10:35:08 -0600 Subject: [PATCH 4/4] Small variable naming cleanup. --- cpp/src/copying/contiguous_split.cu | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cpp/src/copying/contiguous_split.cu b/cpp/src/copying/contiguous_split.cu index 2e3d6da52cd..bcedc2f62c6 100644 --- a/cpp/src/copying/contiguous_split.cu +++ b/cpp/src/copying/contiguous_split.cu @@ -194,7 +194,9 @@ __device__ void copy_buffer(uint8_t* __restrict__ dst, if (value_shift || bit_shift) { std::size_t idx = (num_bytes - remainder) / 4; uint32_t v = remainder > 0 ? (reinterpret_cast(src)[idx] - value_shift) : 0; - auto const have_trailing_bits = ((num_elements * 32) - num_rows) < bit_shift; + + constexpr size_type rows_per_element = 32; + auto const have_trailing_bits = ((num_elements * rows_per_element) - num_rows) < bit_shift; while (remainder) { // if we're at the very last word of a validity copy, we do not always need to read the next // word to get the final trailing bits.