Skip to content

Commit

Permalink
Throw error if UNINITIALIZED is passed to cudf::state_null_count (#13292
Browse files Browse the repository at this point in the history
)

Changes `cudf::state_null_count` to throw a `std::invalid_argument` error which `mask_state::UNINITIALIZED` is passed. This change is part of removing `UNKNOWN_NULL_COUNT` from cudf/libcudf.

Reference: #11968

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Yunsong Wang (https://github.com/PointKernel)
  - Bradley Dice (https://github.com/bdice)

URL: #13292
  • Loading branch information
davidwendt authored May 15, 2023
1 parent 2825d5a commit b16f9c4
Show file tree
Hide file tree
Showing 6 changed files with 41 additions and 29 deletions.
6 changes: 6 additions & 0 deletions cpp/include/cudf/copying.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,9 @@ std::unique_ptr<column> empty_like(scalar const& input);
*
* Supports only fixed-width types.
*
* If the `mask_alloc` allocates a validity mask that mask is also uninitialized
* and the validity bits and the null count should be set by the caller.
*
* @param[in] input Immutable view of input column to emulate
* @param[in] mask_alloc Optional, Policy for allocating null mask. Defaults to RETAIN
* @param[in] mr Device memory resource used to allocate the returned column's device memory
Expand All @@ -244,6 +247,9 @@ std::unique_ptr<column> allocate_like(
*
* Supports only fixed-width types.
*
* If the `mask_alloc` allocates a validity mask that mask is also uninitialized
* and the validity bits and the null count should be set by the caller.
*
* @param[in] input Immutable view of input column to emulate
* @param[in] size The desired number of elements that the new column should have capacity for
* @param[in] mask_alloc Optional, Policy for allocating null mask. Defaults to RETAIN
Expand Down
2 changes: 2 additions & 0 deletions cpp/include/cudf/null_mask.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ namespace cudf {
* @brief Returns the null count for a null mask of the specified `state`
* representing `size` elements.
*
* @throw std::invalid_argument if state is UNINITIALIZED
*
* @param state The state of the null mask
* @param size The number of elements represented by the mask
* @return The count of null elements
Expand Down
5 changes: 2 additions & 3 deletions cpp/src/bitmask/null_mask.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -50,10 +50,9 @@ size_type state_null_count(mask_state state, size_type size)
{
switch (state) {
case mask_state::UNALLOCATED: return 0;
case mask_state::UNINITIALIZED: return UNKNOWN_NULL_COUNT;
case mask_state::ALL_NULL: return size;
case mask_state::ALL_VALID: return 0;
default: CUDF_FAIL("Invalid null mask state.");
default: CUDF_FAIL("Invalid null mask state.", std::invalid_argument);
}
}

Expand Down
54 changes: 29 additions & 25 deletions cpp/src/column/column_factories.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -81,12 +81,13 @@ std::unique_ptr<column> make_numeric_column(data_type type,
CUDF_EXPECTS(is_numeric(type), "Invalid, non-numeric type.");
CUDF_EXPECTS(size >= 0, "Column size cannot be negative.");

return std::make_unique<column>(type,
size,
rmm::device_buffer{size * cudf::size_of(type), stream, mr},
detail::create_null_mask(size, state, stream, mr),
state_null_count(state, size),
std::vector<std::unique_ptr<column>>{});
return std::make_unique<column>(
type,
size,
rmm::device_buffer{size * cudf::size_of(type), stream, mr},
detail::create_null_mask(size, state, stream, mr),
state == mask_state::UNINITIALIZED ? 0 : state_null_count(state, size),
std::vector<std::unique_ptr<column>>{});
}

// Allocate storage for a specified number of numeric elements
Expand All @@ -100,12 +101,13 @@ std::unique_ptr<column> make_fixed_point_column(data_type type,
CUDF_EXPECTS(is_fixed_point(type), "Invalid, non-fixed_point type.");
CUDF_EXPECTS(size >= 0, "Column size cannot be negative.");

return std::make_unique<column>(type,
size,
rmm::device_buffer{size * cudf::size_of(type), stream, mr},
detail::create_null_mask(size, state, stream, mr),
state_null_count(state, size),
std::vector<std::unique_ptr<column>>{});
return std::make_unique<column>(
type,
size,
rmm::device_buffer{size * cudf::size_of(type), stream, mr},
detail::create_null_mask(size, state, stream, mr),
state == mask_state::UNINITIALIZED ? 0 : state_null_count(state, size),
std::vector<std::unique_ptr<column>>{});
}

// Allocate storage for a specified number of timestamp elements
Expand All @@ -119,12 +121,13 @@ std::unique_ptr<column> make_timestamp_column(data_type type,
CUDF_EXPECTS(is_timestamp(type), "Invalid, non-timestamp type.");
CUDF_EXPECTS(size >= 0, "Column size cannot be negative.");

return std::make_unique<column>(type,
size,
rmm::device_buffer{size * cudf::size_of(type), stream, mr},
detail::create_null_mask(size, state, stream, mr),
state_null_count(state, size),
std::vector<std::unique_ptr<column>>{});
return std::make_unique<column>(
type,
size,
rmm::device_buffer{size * cudf::size_of(type), stream, mr},
detail::create_null_mask(size, state, stream, mr),
state == mask_state::UNINITIALIZED ? 0 : state_null_count(state, size),
std::vector<std::unique_ptr<column>>{});
}

// Allocate storage for a specified number of duration elements
Expand All @@ -138,12 +141,13 @@ std::unique_ptr<column> make_duration_column(data_type type,
CUDF_EXPECTS(is_duration(type), "Invalid, non-duration type.");
CUDF_EXPECTS(size >= 0, "Column size cannot be negative.");

return std::make_unique<column>(type,
size,
rmm::device_buffer{size * cudf::size_of(type), stream, mr},
detail::create_null_mask(size, state, stream, mr),
state_null_count(state, size),
std::vector<std::unique_ptr<column>>{});
return std::make_unique<column>(
type,
size,
rmm::device_buffer{size * cudf::size_of(type), stream, mr},
detail::create_null_mask(size, state, stream, mr),
state == mask_state::UNINITIALIZED ? 0 : state_null_count(state, size),
std::vector<std::unique_ptr<column>>{});
}

// Allocate storage for a specified number of fixed width elements
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/copying/copy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ std::unique_ptr<column> allocate_like(column_view const& input,
size,
rmm::device_buffer(size * size_of(input.type()), stream, mr),
detail::create_null_mask(size, allocate_mask, stream, mr),
state_null_count(allocate_mask, input.size()));
0);
}

} // namespace detail
Expand Down
1 change: 1 addition & 0 deletions cpp/tests/bitmask/bitmask_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ TEST_F(BitmaskUtilitiesTest, StateNullCount)
EXPECT_EQ(0, cudf::state_null_count(cudf::mask_state::UNALLOCATED, 42));
EXPECT_EQ(42, cudf::state_null_count(cudf::mask_state::ALL_NULL, 42));
EXPECT_EQ(0, cudf::state_null_count(cudf::mask_state::ALL_VALID, 42));
EXPECT_THROW(cudf::state_null_count(cudf::mask_state::UNINITIALIZED, 42), std::invalid_argument);
}

TEST_F(BitmaskUtilitiesTest, BitmaskAllocationSize)
Expand Down

0 comments on commit b16f9c4

Please sign in to comment.