From b16f9c42044047fb402f57c29180cea72bec4701 Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Mon, 15 May 2023 07:58:29 -0400 Subject: [PATCH] Throw error if UNINITIALIZED is passed to cudf::state_null_count (#13292) Changes `cudf::state_null_count` to throw a `std::invalid_argument` error which `mask_state::UNINITIALIZED` is passed. This change is part of removing `UNKNOWN_NULL_COUNT` from cudf/libcudf. Reference: https://github.com/rapidsai/cudf/issues/11968 Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Nghia Truong (https://github.com/ttnghia) - Yunsong Wang (https://github.com/PointKernel) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/13292 --- cpp/include/cudf/copying.hpp | 6 ++++ cpp/include/cudf/null_mask.hpp | 2 ++ cpp/src/bitmask/null_mask.cu | 5 ++- cpp/src/column/column_factories.cpp | 54 ++++++++++++++++------------- cpp/src/copying/copy.cpp | 2 +- cpp/tests/bitmask/bitmask_tests.cpp | 1 + 6 files changed, 41 insertions(+), 29 deletions(-) diff --git a/cpp/include/cudf/copying.hpp b/cpp/include/cudf/copying.hpp index 921ef5f65f1..cdb8a1c8a09 100644 --- a/cpp/include/cudf/copying.hpp +++ b/cpp/include/cudf/copying.hpp @@ -228,6 +228,9 @@ std::unique_ptr empty_like(scalar const& input); * * Supports only fixed-width types. * + * If the `mask_alloc` allocates a validity mask that mask is also uninitialized + * and the validity bits and the null count should be set by the caller. + * * @param[in] input Immutable view of input column to emulate * @param[in] mask_alloc Optional, Policy for allocating null mask. Defaults to RETAIN * @param[in] mr Device memory resource used to allocate the returned column's device memory @@ -244,6 +247,9 @@ std::unique_ptr allocate_like( * * Supports only fixed-width types. * + * If the `mask_alloc` allocates a validity mask that mask is also uninitialized + * and the validity bits and the null count should be set by the caller. + * * @param[in] input Immutable view of input column to emulate * @param[in] size The desired number of elements that the new column should have capacity for * @param[in] mask_alloc Optional, Policy for allocating null mask. Defaults to RETAIN diff --git a/cpp/include/cudf/null_mask.hpp b/cpp/include/cudf/null_mask.hpp index 360006c1eea..e8bc97e95b3 100644 --- a/cpp/include/cudf/null_mask.hpp +++ b/cpp/include/cudf/null_mask.hpp @@ -36,6 +36,8 @@ namespace cudf { * @brief Returns the null count for a null mask of the specified `state` * representing `size` elements. * + * @throw std::invalid_argument if state is UNINITIALIZED + * * @param state The state of the null mask * @param size The number of elements represented by the mask * @return The count of null elements diff --git a/cpp/src/bitmask/null_mask.cu b/cpp/src/bitmask/null_mask.cu index b98a2196748..4c22988900b 100644 --- a/cpp/src/bitmask/null_mask.cu +++ b/cpp/src/bitmask/null_mask.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -50,10 +50,9 @@ size_type state_null_count(mask_state state, size_type size) { switch (state) { case mask_state::UNALLOCATED: return 0; - case mask_state::UNINITIALIZED: return UNKNOWN_NULL_COUNT; case mask_state::ALL_NULL: return size; case mask_state::ALL_VALID: return 0; - default: CUDF_FAIL("Invalid null mask state."); + default: CUDF_FAIL("Invalid null mask state.", std::invalid_argument); } } diff --git a/cpp/src/column/column_factories.cpp b/cpp/src/column/column_factories.cpp index 5f455e26e52..e147b12ad99 100644 --- a/cpp/src/column/column_factories.cpp +++ b/cpp/src/column/column_factories.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -81,12 +81,13 @@ std::unique_ptr make_numeric_column(data_type type, CUDF_EXPECTS(is_numeric(type), "Invalid, non-numeric type."); CUDF_EXPECTS(size >= 0, "Column size cannot be negative."); - return std::make_unique(type, - size, - rmm::device_buffer{size * cudf::size_of(type), stream, mr}, - detail::create_null_mask(size, state, stream, mr), - state_null_count(state, size), - std::vector>{}); + return std::make_unique( + type, + size, + rmm::device_buffer{size * cudf::size_of(type), stream, mr}, + detail::create_null_mask(size, state, stream, mr), + state == mask_state::UNINITIALIZED ? 0 : state_null_count(state, size), + std::vector>{}); } // Allocate storage for a specified number of numeric elements @@ -100,12 +101,13 @@ std::unique_ptr make_fixed_point_column(data_type type, CUDF_EXPECTS(is_fixed_point(type), "Invalid, non-fixed_point type."); CUDF_EXPECTS(size >= 0, "Column size cannot be negative."); - return std::make_unique(type, - size, - rmm::device_buffer{size * cudf::size_of(type), stream, mr}, - detail::create_null_mask(size, state, stream, mr), - state_null_count(state, size), - std::vector>{}); + return std::make_unique( + type, + size, + rmm::device_buffer{size * cudf::size_of(type), stream, mr}, + detail::create_null_mask(size, state, stream, mr), + state == mask_state::UNINITIALIZED ? 0 : state_null_count(state, size), + std::vector>{}); } // Allocate storage for a specified number of timestamp elements @@ -119,12 +121,13 @@ std::unique_ptr make_timestamp_column(data_type type, CUDF_EXPECTS(is_timestamp(type), "Invalid, non-timestamp type."); CUDF_EXPECTS(size >= 0, "Column size cannot be negative."); - return std::make_unique(type, - size, - rmm::device_buffer{size * cudf::size_of(type), stream, mr}, - detail::create_null_mask(size, state, stream, mr), - state_null_count(state, size), - std::vector>{}); + return std::make_unique( + type, + size, + rmm::device_buffer{size * cudf::size_of(type), stream, mr}, + detail::create_null_mask(size, state, stream, mr), + state == mask_state::UNINITIALIZED ? 0 : state_null_count(state, size), + std::vector>{}); } // Allocate storage for a specified number of duration elements @@ -138,12 +141,13 @@ std::unique_ptr make_duration_column(data_type type, CUDF_EXPECTS(is_duration(type), "Invalid, non-duration type."); CUDF_EXPECTS(size >= 0, "Column size cannot be negative."); - return std::make_unique(type, - size, - rmm::device_buffer{size * cudf::size_of(type), stream, mr}, - detail::create_null_mask(size, state, stream, mr), - state_null_count(state, size), - std::vector>{}); + return std::make_unique( + type, + size, + rmm::device_buffer{size * cudf::size_of(type), stream, mr}, + detail::create_null_mask(size, state, stream, mr), + state == mask_state::UNINITIALIZED ? 0 : state_null_count(state, size), + std::vector>{}); } // Allocate storage for a specified number of fixed width elements diff --git a/cpp/src/copying/copy.cpp b/cpp/src/copying/copy.cpp index 2bcfeae20c4..9d4b02ffb4f 100644 --- a/cpp/src/copying/copy.cpp +++ b/cpp/src/copying/copy.cpp @@ -129,7 +129,7 @@ std::unique_ptr allocate_like(column_view const& input, size, rmm::device_buffer(size * size_of(input.type()), stream, mr), detail::create_null_mask(size, allocate_mask, stream, mr), - state_null_count(allocate_mask, input.size())); + 0); } } // namespace detail diff --git a/cpp/tests/bitmask/bitmask_tests.cpp b/cpp/tests/bitmask/bitmask_tests.cpp index a4bbb213930..4693fc8e342 100644 --- a/cpp/tests/bitmask/bitmask_tests.cpp +++ b/cpp/tests/bitmask/bitmask_tests.cpp @@ -37,6 +37,7 @@ TEST_F(BitmaskUtilitiesTest, StateNullCount) EXPECT_EQ(0, cudf::state_null_count(cudf::mask_state::UNALLOCATED, 42)); EXPECT_EQ(42, cudf::state_null_count(cudf::mask_state::ALL_NULL, 42)); EXPECT_EQ(0, cudf::state_null_count(cudf::mask_state::ALL_VALID, 42)); + EXPECT_THROW(cudf::state_null_count(cudf::mask_state::UNINITIALIZED, 42), std::invalid_argument); } TEST_F(BitmaskUtilitiesTest, BitmaskAllocationSize)