Skip to content

Commit

Permalink
Remove null mask and null count from column_view constructors (#13311)
Browse files Browse the repository at this point in the history
This is a breaking change that removes default values. Removing the default null count value of `UNKNOWN_NULL_COUNT` is necessary since we are removing `UNKNOWN_NULL_COUNT` altogether. A potential alternative was setting the default to 0, which would correspond to a default null mask of `nullptr` (the current default). However, that change is potentially more dangerous if callers were previously setting the null mask to something other than `nullptr` without setting the null count and relying on the behavior of `UNKNOWN_NULL_COUNT`. Removing the default parameter altogether is therefore the safer option here.

Contributes to #11968.

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - David Wendt (https://github.com/davidwendt)
  - Nghia Truong (https://github.com/ttnghia)
  - Vukasin Milovanovic (https://github.com/vuule)

URL: #13311
  • Loading branch information
vyasr authored May 16, 2023
1 parent 6447ab4 commit 9619439
Show file tree
Hide file tree
Showing 28 changed files with 113 additions and 70 deletions.
36 changes: 18 additions & 18 deletions cpp/include/cudf/column/column_view.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -280,17 +280,17 @@ class column_view_base {
* @param type The element type
* @param size The number of elements
* @param data Pointer to device memory containing the column elements
* @param null_mask Optional, pointer to device memory containing the null
* @param null_mask Pointer to device memory containing the null
* indicator bitmask
* @param null_count Optional, the number of null elements.
* @param offset optional, index of the first element
* @param null_count The number of null elements.
* @param offset Optional, index of the first element
*/
column_view_base(data_type type,
size_type size,
void const* data,
bitmask_type const* null_mask = nullptr,
size_type null_count = UNKNOWN_NULL_COUNT,
size_type offset = 0);
bitmask_type const* null_mask,
size_type null_count,
size_type offset = 0);
};

class mutable_column_view_base : public column_view_base {
Expand Down Expand Up @@ -374,18 +374,18 @@ class column_view : public detail::column_view_base {
* @param type The element type
* @param size The number of elements
* @param data Pointer to device memory containing the column elements
* @param null_mask Optional, pointer to device memory containing the null
* @param null_mask Pointer to device memory containing the null
* indicator bitmask
* @param null_count Optional, the number of null elements.
* @param offset optional, index of the first element
* @param children optional, depending on the element type, child columns may
* @param null_count The number of null elements.
* @param offset Optional, index of the first element
* @param children Optional, depending on the element type, child columns may
* contain additional data
*/
column_view(data_type type,
size_type size,
void const* data,
bitmask_type const* null_mask = nullptr,
size_type null_count = UNKNOWN_NULL_COUNT,
bitmask_type const* null_mask,
size_type null_count,
size_type offset = 0,
std::vector<column_view> const& children = {});

Expand Down Expand Up @@ -528,19 +528,19 @@ class mutable_column_view : public detail::column_view_base {
* @param type The element type
* @param size The number of elements
* @param data Pointer to device memory containing the column elements
* @param null_mask Optional, pointer to device memory containing the null
* @param null_mask Pointer to device memory containing the null
indicator
* bitmask
* @param null_count Optional, the number of null elements.
* @param offset optional, index of the first element
* @param children optional, depending on the element type, child columns may
* @param null_count The number of null elements.
* @param offset Optional, index of the first element
* @param children Optional, depending on the element type, child columns may
* contain additional data
*/
mutable_column_view(data_type type,
size_type size,
void* data,
bitmask_type* null_mask = nullptr,
size_type null_count = cudf::UNKNOWN_NULL_COUNT,
bitmask_type* null_mask,
size_type null_count,
size_type offset = 0,
std::vector<mutable_column_view> const& children = {});

Expand Down
2 changes: 1 addition & 1 deletion cpp/include/cudf/strings/convert/convert_datetime.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ std::unique_ptr<column> from_timestamps(
column_view const& timestamps,
std::string_view format = "%Y-%m-%dT%H:%M:%SZ",
strings_column_view const& names = strings_column_view(column_view{
data_type{type_id::STRING}, 0, nullptr}),
data_type{type_id::STRING}, 0, nullptr, nullptr, 0}),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/** @} */ // end of doxygen group
Expand Down
4 changes: 2 additions & 2 deletions cpp/include/cudf/strings/convert/convert_lists.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2022, NVIDIA CORPORATION.
* Copyright (c) 2021-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -60,7 +60,7 @@ std::unique_ptr<column> format_list_column(
lists_column_view const& input,
string_scalar const& na_rep = string_scalar("NULL"),
strings_column_view const& separators = strings_column_view(column_view{
data_type{type_id::STRING}, 0, nullptr}),
data_type{type_id::STRING}, 0, nullptr, nullptr, 0}),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/** @} */ // end of doxygen group
Expand Down
4 changes: 2 additions & 2 deletions cpp/include/cudf_test/tdigest_utilities.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ void tdigest_minmax_compare(cudf::tdigest::tdigest_column_view const& tdv,
spans.end(),
expected_min->mutable_view().template begin<double>(),
column_min<T>{});
column_view result_min(data_type{type_id::FLOAT64}, tdv.size(), tdv.min_begin());
column_view result_min(data_type{type_id::FLOAT64}, tdv.size(), tdv.min_begin(), nullptr, 0);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(result_min, *expected_min);

auto expected_max = cudf::make_fixed_width_column(
Expand All @@ -189,7 +189,7 @@ void tdigest_minmax_compare(cudf::tdigest::tdigest_column_view const& tdv,
spans.end(),
expected_max->mutable_view().template begin<double>(),
column_max<T>{});
column_view result_max(data_type{type_id::FLOAT64}, tdv.size(), tdv.max_begin());
column_view result_max(data_type{type_id::FLOAT64}, tdv.size(), tdv.max_begin(), nullptr, 0);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(result_max, *expected_max);
}

Expand Down
11 changes: 7 additions & 4 deletions cpp/src/binaryop/compiled/binary_ops.cu
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,11 @@ struct scalar_as_column_view {
return_type operator()(scalar const& s, rmm::cuda_stream_view, rmm::mr::device_memory_resource*)
{
auto& h_scalar_type_view = static_cast<cudf::scalar_type_t<T>&>(const_cast<scalar&>(s));
auto col_v =
column_view(s.type(), 1, h_scalar_type_view.data(), (bitmask_type const*)s.validity_data());
auto col_v = column_view(s.type(),
1,
h_scalar_type_view.data(),
reinterpret_cast<bitmask_type const*>(s.validity_data()),
!s.is_valid());
return std::pair{col_v, std::unique_ptr<column>(nullptr)};
}
template <typename T, CUDF_ENABLE_IF(!is_fixed_width<T>())>
Expand All @@ -74,8 +77,8 @@ scalar_as_column_view::return_type scalar_as_column_view::operator()<cudf::strin
auto offsets_column = std::get<0>(cudf::detail::make_offsets_child_column(
offsets_transformer_itr, offsets_transformer_itr + 1, stream, mr));

auto chars_column_v =
column_view(data_type{type_id::INT8}, h_scalar_type_view.size(), h_scalar_type_view.data());
auto chars_column_v = column_view(
data_type{type_id::INT8}, h_scalar_type_view.size(), h_scalar_type_view.data(), nullptr, 0);
// Construct string column_view
auto col_v = column_view(s.type(),
1,
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/column/column_factories.cu
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ std::unique_ptr<cudf::column> column_from_scalar_dispatch::operator()<cudf::stri

// Since we are setting every row to the scalar, the fill() never needs to access
// any of the children in the strings column which would otherwise cause an exception.
column_view sc{value.type(), size, nullptr};
column_view sc{value.type(), size, nullptr, nullptr, 0};
auto& sv = static_cast<scalar_type_t<cudf::string_view> const&>(value);

// fill the column with the scalar
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/copying/copy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ struct scalar_empty_like_functor_impl<cudf::list_view> {
auto ls = static_cast<list_scalar const*>(&input);

// TODO: add a manual constructor for lists_column_view.
column_view offsets{cudf::data_type{cudf::type_id::INT32}, 0, nullptr};
column_view offsets{cudf::data_type{cudf::type_id::INT32}, 0, nullptr, nullptr, 0};
std::vector<column_view> children;
children.push_back(offsets);
children.push_back(ls->view());
Expand Down
4 changes: 3 additions & 1 deletion cpp/src/copying/copy.cu
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,9 @@ std::unique_ptr<column> scatter_gather_based_if_else(cudf::scalar const& lhs,
auto scatter_source = std::vector<std::reference_wrapper<const scalar>>{std::ref(lhs)};
auto scatter_map_column_view = cudf::column_view{cudf::data_type{cudf::type_id::INT32},
static_cast<cudf::size_type>(scatter_map_size),
scatter_map.begin()};
scatter_map.begin(),
nullptr,
0};

auto result = cudf::detail::scatter(
scatter_source, scatter_map_column_view, table_view{std::vector<column_view>{rhs}}, stream, mr);
Expand Down
6 changes: 4 additions & 2 deletions cpp/src/copying/gather.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -68,7 +68,9 @@ std::unique_ptr<table> gather(table_view const& source_table,
"invalid gather map size");
auto map_col = column_view(data_type{type_to_id<size_type>()},
static_cast<size_type>(gather_map.size()),
gather_map.data());
gather_map.data(),
nullptr,
0);
return gather(source_table, map_col, bounds_policy, neg_indices, stream, mr);
}

Expand Down
4 changes: 3 additions & 1 deletion cpp/src/copying/scatter.cu
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,9 @@ std::unique_ptr<table> scatter(table_view const& source,
"invalid scatter map size");
auto map_col = column_view(data_type{type_to_id<size_type>()},
static_cast<size_type>(scatter_map.size()),
scatter_map.data());
scatter_map.data(),
nullptr,
0);
return scatter(source, map_col, target, stream, mr);
}

Expand Down
6 changes: 4 additions & 2 deletions cpp/src/dictionary/detail/concatenate.cu
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ std::unique_ptr<column> concatenate(host_span<column_view const> columns,
std::transform(columns.begin(), columns.end(), keys_views.begin(), [keys_type](auto cv) {
auto dict_view = dictionary_column_view(cv);
// empty column may not have keys so we create an empty column_view place-holder
if (dict_view.is_empty()) return column_view{keys_type, 0, nullptr};
if (dict_view.is_empty()) return column_view{keys_type, 0, nullptr, nullptr, 0};
auto keys = dict_view.keys();
CUDF_EXPECTS(keys.type() == keys_type, "key types of all dictionary columns must match");
return keys;
Expand Down Expand Up @@ -245,7 +245,9 @@ std::unique_ptr<column> concatenate(host_span<column_view const> columns,
std::vector<column_view> indices_views(columns.size());
std::transform(columns.begin(), columns.end(), indices_views.begin(), [](auto cv) {
auto dict_view = dictionary_column_view(cv);
if (dict_view.is_empty()) return column_view{data_type{type_id::UINT32}, 0, nullptr};
if (dict_view.is_empty()) {
return column_view{data_type{type_id::UINT32}, 0, nullptr, nullptr, 0};
}
return dict_view.get_indices_annotated(); // nicely includes validity mask and view offset
});
auto all_indices = cudf::detail::concatenate(indices_views, stream, mr);
Expand Down
5 changes: 3 additions & 2 deletions cpp/src/dictionary/dictionary_factories.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2021, NVIDIA CORPORATION.
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -137,7 +137,8 @@ std::unique_ptr<column> make_dictionary_column(std::unique_ptr<column> keys,
new_type, indices_size, std::move(*(contents.data.release())), rmm::device_buffer{}, 0);
}
// If the new type does not match, then convert the data.
cudf::column_view cast_view{cudf::data_type{indices_type}, indices_size, contents.data->data()};
cudf::column_view cast_view{
cudf::data_type{indices_type}, indices_size, contents.data->data(), nullptr, 0};
return cudf::detail::cast(cast_view, new_type, stream, mr);
}();

Expand Down
5 changes: 3 additions & 2 deletions cpp/src/dictionary/remove_keys.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -178,7 +178,8 @@ std::unique_ptr<column> remove_unused_keys(dictionary_column_view const& diction
rmm::device_uvector<uint32_t> keys_positions(keys_size, stream);
thrust::sequence(rmm::exec_policy(stream), keys_positions.begin(), keys_positions.end());
// wrap the indices for comparison in contains()
column_view keys_positions_view(data_type{type_id::UINT32}, keys_size, keys_positions.data());
column_view keys_positions_view(
data_type{type_id::UINT32}, keys_size, keys_positions.data(), nullptr, 0);
return cudf::detail::contains(indices_view, keys_positions_view, stream, mr);
}();
auto d_matches = matches->view().data<bool>();
Expand Down
4 changes: 3 additions & 1 deletion cpp/src/groupby/hash/groupby.cu
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,9 @@ class hash_compound_agg_finalizer final : public cudf::detail::aggregation_final
column_view null_removed_map(
data_type(type_to_id<size_type>()),
arg_result->size(),
static_cast<void const*>(arg_result->view().template data<size_type>()));
static_cast<void const*>(arg_result->view().template data<size_type>()),
nullptr,
0);
auto gather_argminmax =
cudf::detail::gather(table_view({col}),
null_removed_map,
Expand Down
10 changes: 7 additions & 3 deletions cpp/src/groupby/sort/aggregate.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -167,7 +167,9 @@ void aggregate_result_functor::operator()<aggregation::MIN>(aggregation const& a
column_view null_removed_map(
data_type(type_to_id<size_type>()),
argmin_result.size(),
static_cast<void const*>(argmin_result.template data<size_type>()));
static_cast<void const*>(argmin_result.template data<size_type>()),
nullptr,
0);
auto transformed_result =
cudf::detail::gather(table_view({values}),
null_removed_map,
Expand Down Expand Up @@ -207,7 +209,9 @@ void aggregate_result_functor::operator()<aggregation::MAX>(aggregation const& a
column_view null_removed_map(
data_type(type_to_id<size_type>()),
argmax_result.size(),
static_cast<void const*>(argmax_result.template data<size_type>()));
static_cast<void const*>(argmax_result.template data<size_type>()),
nullptr,
0);
auto transformed_result =
cudf::detail::gather(table_view({values}),
null_removed_map,
Expand Down
7 changes: 5 additions & 2 deletions cpp/src/groupby/sort/sort_helper.cu
Original file line number Diff line number Diff line change
Expand Up @@ -215,8 +215,11 @@ column_view sort_groupby_helper::unsorted_keys_labels(rmm::cuda_stream_view stre
column_ptr temp_labels = make_numeric_column(
data_type(type_to_id<size_type>()), _keys.num_rows(), mask_state::ALL_NULL, stream);

auto group_labels_view = cudf::column_view(
data_type(type_to_id<size_type>()), group_labels(stream).size(), group_labels(stream).data());
auto group_labels_view = cudf::column_view(data_type(type_to_id<size_type>()),
group_labels(stream).size(),
group_labels(stream).data(),
nullptr,
0);

auto scatter_map = key_sort_order(stream);

Expand Down
2 changes: 1 addition & 1 deletion cpp/src/io/csv/writer_impl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ struct column_to_strings_fn {
return cudf::strings::detail::from_timestamps(
column,
format,
strings_column_view(column_view{data_type{type_id::STRING}, 0, nullptr}),
strings_column_view(make_empty_column(type_id::STRING)->view()),
stream_,
mr_);
}
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/io/json/write_json.cu
Original file line number Diff line number Diff line change
Expand Up @@ -499,7 +499,7 @@ struct column_to_strings_fn {
return cudf::strings::detail::from_timestamps(
column,
format,
strings_column_view(column_view{data_type{type_id::STRING}, 0, nullptr}),
strings_column_view(column_view{data_type{type_id::STRING}, 0, nullptr, nullptr, 0}),
stream_,
mr_);
}
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/sort/segmented_sort_impl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -273,8 +273,8 @@ std::unique_ptr<column> segmented_sorted_order_common(
// insert segment id before all columns.
std::vector<column_view> keys_with_segid;
keys_with_segid.reserve(keys.num_columns() + 1);
keys_with_segid.push_back(
column_view(data_type(type_to_id<size_type>()), segment_ids.size(), segment_ids.data()));
keys_with_segid.push_back(column_view(
data_type(type_to_id<size_type>()), segment_ids.size(), segment_ids.data(), nullptr, 0));
keys_with_segid.insert(keys_with_segid.end(), keys.begin(), keys.end());
auto segid_keys = table_view(keys_with_segid);

Expand Down
7 changes: 5 additions & 2 deletions cpp/src/text/generate_ngrams.cu
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,11 @@ std::unique_ptr<cudf::column> generate_ngrams(cudf::strings_column_view const& s

// first create a new offsets vector removing nulls and empty strings from the input column
std::unique_ptr<cudf::column> non_empty_offsets_column = [&] {
cudf::column_view offsets_view(
cudf::data_type{cudf::type_id::INT32}, strings_count + 1, strings.offsets_begin());
cudf::column_view offsets_view(cudf::data_type{cudf::type_id::INT32},
strings_count + 1,
strings.offsets_begin(),
nullptr,
0);
auto table_offsets = cudf::detail::copy_if(
cudf::table_view({offsets_view}),
[d_strings, strings_count] __device__(cudf::size_type idx) {
Expand Down
5 changes: 3 additions & 2 deletions cpp/src/text/tokenize.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -215,7 +215,8 @@ std::unique_ptr<cudf::column> character_tokenize(cudf::strings_column_view const
});

// create the output chars column -- just a copy of the input's chars column
cudf::column_view chars_view(cudf::data_type{cudf::type_id::INT8}, chars_bytes, d_chars);
cudf::column_view chars_view(
cudf::data_type{cudf::type_id::INT8}, chars_bytes, d_chars, nullptr, 0);
auto chars_column = std::make_unique<cudf::column>(chars_view, stream, mr);

// return new strings column
Expand Down
3 changes: 2 additions & 1 deletion cpp/tests/interop/dlpack_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,8 @@ TYPED_TEST(DLPackNumericTests, ToDlpack1D)

// Verify that data matches input column
constexpr cudf::data_type type{cudf::type_to_id<TypeParam>()};
cudf::column_view const result_view(type, tensor.shape[0], tensor.data, col_view.null_mask());
cudf::column_view const result_view(
type, tensor.shape[0], tensor.data, col_view.null_mask(), col_view.null_count());
CUDF_TEST_EXPECT_COLUMNS_EQUAL(col_view, result_view);
}

Expand Down
4 changes: 2 additions & 2 deletions cpp/tests/iterator/sizes_to_offsets_iterator_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@ TYPED_TEST(SizesToOffsetsIteratorTestTyped, ExclusiveScan)

auto expected =
cudf::test::fixed_width_column_wrapper<T>(expected_values.begin(), expected_values.end());
auto result_col =
cudf::column_view(cudf::data_type(cudf::type_to_id<T>()), d_view.size(), result.data());
auto result_col = cudf::column_view(
cudf::data_type(cudf::type_to_id<T>()), d_view.size(), result.data(), nullptr, 0);

CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result_col, expected);
EXPECT_EQ(last.value(stream), expected_reduce);
Expand Down
Loading

0 comments on commit 9619439

Please sign in to comment.