Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove null mask and null count from column_view constructors #13311

Merged
merged 7 commits into from
May 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 18 additions & 18 deletions cpp/include/cudf/column/column_view.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -280,17 +280,17 @@ class column_view_base {
* @param type The element type
* @param size The number of elements
* @param data Pointer to device memory containing the column elements
* @param null_mask Optional, pointer to device memory containing the null
* @param null_mask Pointer to device memory containing the null
* indicator bitmask
* @param null_count Optional, the number of null elements.
* @param offset optional, index of the first element
* @param null_count The number of null elements.
* @param offset Optional, index of the first element
*/
column_view_base(data_type type,
size_type size,
void const* data,
bitmask_type const* null_mask = nullptr,
size_type null_count = UNKNOWN_NULL_COUNT,
size_type offset = 0);
bitmask_type const* null_mask,
size_type null_count,
vyasr marked this conversation as resolved.
Show resolved Hide resolved
size_type offset = 0);
};

class mutable_column_view_base : public column_view_base {
Expand Down Expand Up @@ -374,18 +374,18 @@ class column_view : public detail::column_view_base {
* @param type The element type
* @param size The number of elements
* @param data Pointer to device memory containing the column elements
* @param null_mask Optional, pointer to device memory containing the null
* @param null_mask Pointer to device memory containing the null
* indicator bitmask
* @param null_count Optional, the number of null elements.
* @param offset optional, index of the first element
* @param children optional, depending on the element type, child columns may
* @param null_count The number of null elements.
* @param offset Optional, index of the first element
* @param children Optional, depending on the element type, child columns may
* contain additional data
*/
column_view(data_type type,
size_type size,
void const* data,
bitmask_type const* null_mask = nullptr,
size_type null_count = UNKNOWN_NULL_COUNT,
bitmask_type const* null_mask,
size_type null_count,
size_type offset = 0,
std::vector<column_view> const& children = {});

Expand Down Expand Up @@ -528,19 +528,19 @@ class mutable_column_view : public detail::column_view_base {
* @param type The element type
* @param size The number of elements
* @param data Pointer to device memory containing the column elements
* @param null_mask Optional, pointer to device memory containing the null
* @param null_mask Pointer to device memory containing the null
indicator
* bitmask
* @param null_count Optional, the number of null elements.
* @param offset optional, index of the first element
* @param children optional, depending on the element type, child columns may
* @param null_count The number of null elements.
* @param offset Optional, index of the first element
* @param children Optional, depending on the element type, child columns may
* contain additional data
*/
mutable_column_view(data_type type,
size_type size,
void* data,
bitmask_type* null_mask = nullptr,
size_type null_count = cudf::UNKNOWN_NULL_COUNT,
bitmask_type* null_mask,
size_type null_count,
size_type offset = 0,
std::vector<mutable_column_view> const& children = {});

Expand Down
2 changes: 1 addition & 1 deletion cpp/include/cudf/strings/convert/convert_datetime.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ std::unique_ptr<column> from_timestamps(
column_view const& timestamps,
std::string_view format = "%Y-%m-%dT%H:%M:%SZ",
strings_column_view const& names = strings_column_view(column_view{
data_type{type_id::STRING}, 0, nullptr}),
data_type{type_id::STRING}, 0, nullptr, nullptr, 0}),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/** @} */ // end of doxygen group
Expand Down
4 changes: 2 additions & 2 deletions cpp/include/cudf/strings/convert/convert_lists.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2022, NVIDIA CORPORATION.
* Copyright (c) 2021-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -60,7 +60,7 @@ std::unique_ptr<column> format_list_column(
lists_column_view const& input,
string_scalar const& na_rep = string_scalar("NULL"),
strings_column_view const& separators = strings_column_view(column_view{
data_type{type_id::STRING}, 0, nullptr}),
data_type{type_id::STRING}, 0, nullptr, nullptr, 0}),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/** @} */ // end of doxygen group
Expand Down
4 changes: 2 additions & 2 deletions cpp/include/cudf_test/tdigest_utilities.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ void tdigest_minmax_compare(cudf::tdigest::tdigest_column_view const& tdv,
spans.end(),
expected_min->mutable_view().template begin<double>(),
column_min<T>{});
column_view result_min(data_type{type_id::FLOAT64}, tdv.size(), tdv.min_begin());
column_view result_min(data_type{type_id::FLOAT64}, tdv.size(), tdv.min_begin(), nullptr, 0);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(result_min, *expected_min);

auto expected_max = cudf::make_fixed_width_column(
Expand All @@ -189,7 +189,7 @@ void tdigest_minmax_compare(cudf::tdigest::tdigest_column_view const& tdv,
spans.end(),
expected_max->mutable_view().template begin<double>(),
column_max<T>{});
column_view result_max(data_type{type_id::FLOAT64}, tdv.size(), tdv.max_begin());
column_view result_max(data_type{type_id::FLOAT64}, tdv.size(), tdv.max_begin(), nullptr, 0);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(result_max, *expected_max);
}

Expand Down
11 changes: 7 additions & 4 deletions cpp/src/binaryop/compiled/binary_ops.cu
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,11 @@ struct scalar_as_column_view {
return_type operator()(scalar const& s, rmm::cuda_stream_view, rmm::mr::device_memory_resource*)
{
auto& h_scalar_type_view = static_cast<cudf::scalar_type_t<T>&>(const_cast<scalar&>(s));
auto col_v =
column_view(s.type(), 1, h_scalar_type_view.data(), (bitmask_type const*)s.validity_data());
auto col_v = column_view(s.type(),
1,
h_scalar_type_view.data(),
reinterpret_cast<bitmask_type const*>(s.validity_data()),
!s.is_valid());
return std::pair{col_v, std::unique_ptr<column>(nullptr)};
}
template <typename T, CUDF_ENABLE_IF(!is_fixed_width<T>())>
Expand All @@ -74,8 +77,8 @@ scalar_as_column_view::return_type scalar_as_column_view::operator()<cudf::strin
auto offsets_column = std::get<0>(cudf::detail::make_offsets_child_column(
offsets_transformer_itr, offsets_transformer_itr + 1, stream, mr));

auto chars_column_v =
column_view(data_type{type_id::INT8}, h_scalar_type_view.size(), h_scalar_type_view.data());
auto chars_column_v = column_view(
data_type{type_id::INT8}, h_scalar_type_view.size(), h_scalar_type_view.data(), nullptr, 0);
// Construct string column_view
auto col_v = column_view(s.type(),
1,
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/column/column_factories.cu
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ std::unique_ptr<cudf::column> column_from_scalar_dispatch::operator()<cudf::stri

// Since we are setting every row to the scalar, the fill() never needs to access
// any of the children in the strings column which would otherwise cause an exception.
column_view sc{value.type(), size, nullptr};
column_view sc{value.type(), size, nullptr, nullptr, 0};
auto& sv = static_cast<scalar_type_t<cudf::string_view> const&>(value);

// fill the column with the scalar
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/copying/copy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ struct scalar_empty_like_functor_impl<cudf::list_view> {
auto ls = static_cast<list_scalar const*>(&input);

// TODO: add a manual constructor for lists_column_view.
column_view offsets{cudf::data_type{cudf::type_id::INT32}, 0, nullptr};
column_view offsets{cudf::data_type{cudf::type_id::INT32}, 0, nullptr, nullptr, 0};
std::vector<column_view> children;
children.push_back(offsets);
children.push_back(ls->view());
Expand Down
4 changes: 3 additions & 1 deletion cpp/src/copying/copy.cu
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,9 @@ std::unique_ptr<column> scatter_gather_based_if_else(cudf::scalar const& lhs,
auto scatter_source = std::vector<std::reference_wrapper<const scalar>>{std::ref(lhs)};
auto scatter_map_column_view = cudf::column_view{cudf::data_type{cudf::type_id::INT32},
static_cast<cudf::size_type>(scatter_map_size),
scatter_map.begin()};
scatter_map.begin(),
nullptr,
0};

auto result = cudf::detail::scatter(
scatter_source, scatter_map_column_view, table_view{std::vector<column_view>{rhs}}, stream, mr);
Expand Down
6 changes: 4 additions & 2 deletions cpp/src/copying/gather.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -68,7 +68,9 @@ std::unique_ptr<table> gather(table_view const& source_table,
"invalid gather map size");
auto map_col = column_view(data_type{type_to_id<size_type>()},
static_cast<size_type>(gather_map.size()),
gather_map.data());
gather_map.data(),
nullptr,
0);
return gather(source_table, map_col, bounds_policy, neg_indices, stream, mr);
}

Expand Down
4 changes: 3 additions & 1 deletion cpp/src/copying/scatter.cu
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,9 @@ std::unique_ptr<table> scatter(table_view const& source,
"invalid scatter map size");
auto map_col = column_view(data_type{type_to_id<size_type>()},
static_cast<size_type>(scatter_map.size()),
scatter_map.data());
scatter_map.data(),
nullptr,
0);
return scatter(source, map_col, target, stream, mr);
}

Expand Down
6 changes: 4 additions & 2 deletions cpp/src/dictionary/detail/concatenate.cu
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ std::unique_ptr<column> concatenate(host_span<column_view const> columns,
std::transform(columns.begin(), columns.end(), keys_views.begin(), [keys_type](auto cv) {
auto dict_view = dictionary_column_view(cv);
// empty column may not have keys so we create an empty column_view place-holder
if (dict_view.is_empty()) return column_view{keys_type, 0, nullptr};
if (dict_view.is_empty()) return column_view{keys_type, 0, nullptr, nullptr, 0};
auto keys = dict_view.keys();
CUDF_EXPECTS(keys.type() == keys_type, "key types of all dictionary columns must match");
return keys;
Expand Down Expand Up @@ -245,7 +245,9 @@ std::unique_ptr<column> concatenate(host_span<column_view const> columns,
std::vector<column_view> indices_views(columns.size());
std::transform(columns.begin(), columns.end(), indices_views.begin(), [](auto cv) {
auto dict_view = dictionary_column_view(cv);
if (dict_view.is_empty()) return column_view{data_type{type_id::UINT32}, 0, nullptr};
if (dict_view.is_empty()) {
return column_view{data_type{type_id::UINT32}, 0, nullptr, nullptr, 0};
}
return dict_view.get_indices_annotated(); // nicely includes validity mask and view offset
});
auto all_indices = cudf::detail::concatenate(indices_views, stream, mr);
Expand Down
5 changes: 3 additions & 2 deletions cpp/src/dictionary/dictionary_factories.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2021, NVIDIA CORPORATION.
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -137,7 +137,8 @@ std::unique_ptr<column> make_dictionary_column(std::unique_ptr<column> keys,
new_type, indices_size, std::move(*(contents.data.release())), rmm::device_buffer{}, 0);
}
// If the new type does not match, then convert the data.
cudf::column_view cast_view{cudf::data_type{indices_type}, indices_size, contents.data->data()};
cudf::column_view cast_view{
cudf::data_type{indices_type}, indices_size, contents.data->data(), nullptr, 0};
return cudf::detail::cast(cast_view, new_type, stream, mr);
}();

Expand Down
5 changes: 3 additions & 2 deletions cpp/src/dictionary/remove_keys.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -178,7 +178,8 @@ std::unique_ptr<column> remove_unused_keys(dictionary_column_view const& diction
rmm::device_uvector<uint32_t> keys_positions(keys_size, stream);
thrust::sequence(rmm::exec_policy(stream), keys_positions.begin(), keys_positions.end());
// wrap the indices for comparison in contains()
column_view keys_positions_view(data_type{type_id::UINT32}, keys_size, keys_positions.data());
column_view keys_positions_view(
data_type{type_id::UINT32}, keys_size, keys_positions.data(), nullptr, 0);
return cudf::detail::contains(indices_view, keys_positions_view, stream, mr);
}();
auto d_matches = matches->view().data<bool>();
Expand Down
4 changes: 3 additions & 1 deletion cpp/src/groupby/hash/groupby.cu
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,9 @@ class hash_compound_agg_finalizer final : public cudf::detail::aggregation_final
column_view null_removed_map(
data_type(type_to_id<size_type>()),
arg_result->size(),
static_cast<void const*>(arg_result->view().template data<size_type>()));
static_cast<void const*>(arg_result->view().template data<size_type>()),
nullptr,
0);
auto gather_argminmax =
cudf::detail::gather(table_view({col}),
null_removed_map,
Expand Down
10 changes: 7 additions & 3 deletions cpp/src/groupby/sort/aggregate.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -167,7 +167,9 @@ void aggregate_result_functor::operator()<aggregation::MIN>(aggregation const& a
column_view null_removed_map(
data_type(type_to_id<size_type>()),
argmin_result.size(),
static_cast<void const*>(argmin_result.template data<size_type>()));
static_cast<void const*>(argmin_result.template data<size_type>()),
nullptr,
0);
auto transformed_result =
cudf::detail::gather(table_view({values}),
null_removed_map,
Expand Down Expand Up @@ -207,7 +209,9 @@ void aggregate_result_functor::operator()<aggregation::MAX>(aggregation const& a
column_view null_removed_map(
data_type(type_to_id<size_type>()),
argmax_result.size(),
static_cast<void const*>(argmax_result.template data<size_type>()));
static_cast<void const*>(argmax_result.template data<size_type>()),
nullptr,
0);
auto transformed_result =
cudf::detail::gather(table_view({values}),
null_removed_map,
Expand Down
7 changes: 5 additions & 2 deletions cpp/src/groupby/sort/sort_helper.cu
Original file line number Diff line number Diff line change
Expand Up @@ -215,8 +215,11 @@ column_view sort_groupby_helper::unsorted_keys_labels(rmm::cuda_stream_view stre
column_ptr temp_labels = make_numeric_column(
data_type(type_to_id<size_type>()), _keys.num_rows(), mask_state::ALL_NULL, stream);

auto group_labels_view = cudf::column_view(
data_type(type_to_id<size_type>()), group_labels(stream).size(), group_labels(stream).data());
auto group_labels_view = cudf::column_view(data_type(type_to_id<size_type>()),
group_labels(stream).size(),
group_labels(stream).data(),
nullptr,
0);

auto scatter_map = key_sort_order(stream);

Expand Down
2 changes: 1 addition & 1 deletion cpp/src/io/csv/writer_impl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ struct column_to_strings_fn {
return cudf::strings::detail::from_timestamps(
column,
format,
strings_column_view(column_view{data_type{type_id::STRING}, 0, nullptr}),
strings_column_view(make_empty_column(type_id::STRING)->view()),
stream_,
mr_);
}
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/io/json/write_json.cu
Original file line number Diff line number Diff line change
Expand Up @@ -499,7 +499,7 @@ struct column_to_strings_fn {
return cudf::strings::detail::from_timestamps(
column,
format,
strings_column_view(column_view{data_type{type_id::STRING}, 0, nullptr}),
strings_column_view(column_view{data_type{type_id::STRING}, 0, nullptr, nullptr, 0}),
stream_,
mr_);
}
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/sort/segmented_sort_impl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -273,8 +273,8 @@ std::unique_ptr<column> segmented_sorted_order_common(
// insert segment id before all columns.
std::vector<column_view> keys_with_segid;
keys_with_segid.reserve(keys.num_columns() + 1);
keys_with_segid.push_back(
column_view(data_type(type_to_id<size_type>()), segment_ids.size(), segment_ids.data()));
keys_with_segid.push_back(column_view(
data_type(type_to_id<size_type>()), segment_ids.size(), segment_ids.data(), nullptr, 0));
keys_with_segid.insert(keys_with_segid.end(), keys.begin(), keys.end());
auto segid_keys = table_view(keys_with_segid);

Expand Down
7 changes: 5 additions & 2 deletions cpp/src/text/generate_ngrams.cu
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,11 @@ std::unique_ptr<cudf::column> generate_ngrams(cudf::strings_column_view const& s

// first create a new offsets vector removing nulls and empty strings from the input column
std::unique_ptr<cudf::column> non_empty_offsets_column = [&] {
cudf::column_view offsets_view(
cudf::data_type{cudf::type_id::INT32}, strings_count + 1, strings.offsets_begin());
cudf::column_view offsets_view(cudf::data_type{cudf::type_id::INT32},
strings_count + 1,
strings.offsets_begin(),
nullptr,
0);
auto table_offsets = cudf::detail::copy_if(
cudf::table_view({offsets_view}),
[d_strings, strings_count] __device__(cudf::size_type idx) {
Expand Down
5 changes: 3 additions & 2 deletions cpp/src/text/tokenize.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -215,7 +215,8 @@ std::unique_ptr<cudf::column> character_tokenize(cudf::strings_column_view const
});

// create the output chars column -- just a copy of the input's chars column
cudf::column_view chars_view(cudf::data_type{cudf::type_id::INT8}, chars_bytes, d_chars);
cudf::column_view chars_view(
cudf::data_type{cudf::type_id::INT8}, chars_bytes, d_chars, nullptr, 0);
auto chars_column = std::make_unique<cudf::column>(chars_view, stream, mr);

// return new strings column
Expand Down
3 changes: 2 additions & 1 deletion cpp/tests/interop/dlpack_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,8 @@ TYPED_TEST(DLPackNumericTests, ToDlpack1D)

// Verify that data matches input column
constexpr cudf::data_type type{cudf::type_to_id<TypeParam>()};
cudf::column_view const result_view(type, tensor.shape[0], tensor.data, col_view.null_mask());
cudf::column_view const result_view(
type, tensor.shape[0], tensor.data, col_view.null_mask(), col_view.null_count());
CUDF_TEST_EXPECT_COLUMNS_EQUAL(col_view, result_view);
}

Expand Down
4 changes: 2 additions & 2 deletions cpp/tests/iterator/sizes_to_offsets_iterator_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@ TYPED_TEST(SizesToOffsetsIteratorTestTyped, ExclusiveScan)

auto expected =
cudf::test::fixed_width_column_wrapper<T>(expected_values.begin(), expected_values.end());
auto result_col =
cudf::column_view(cudf::data_type(cudf::type_to_id<T>()), d_view.size(), result.data());
auto result_col = cudf::column_view(
cudf::data_type(cudf::type_to_id<T>()), d_view.size(), result.data(), nullptr, 0);

CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result_col, expected);
EXPECT_EQ(last.value(stream), expected_reduce);
Expand Down
Loading