Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support for struct scalars. #8220

Merged
merged 6 commits into from
May 18, 2021
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ add_library(cudf
src/bitmask/null_mask.cu
src/column/column.cu
src/column/column_device_view.cu
src/column/column_factories.cpp
src/column/column_factories.cu
nvdbaranec marked this conversation as resolved.
Show resolved Hide resolved
src/column/column_view.cpp
src/comms/ipc/ipc.cpp
src/copying/concatenate.cu
Expand Down
54 changes: 54 additions & 0 deletions cpp/include/cudf/scalar/scalar.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#pragma once

#include <cudf/column/column.hpp>
#include <cudf/table/table.hpp>
#include <cudf/types.hpp>
#include <cudf/utilities/traits.hpp>

Expand Down Expand Up @@ -572,5 +573,58 @@ class list_scalar : public scalar {
cudf::column _data;
};

/**
* @brief An owning class to represent a struct value in device memory
*/
class struct_scalar : public scalar {
public:
struct_scalar();
~struct_scalar() = default;
struct_scalar(struct_scalar&& other) = default;
struct_scalar(struct_scalar const& other) = default;
struct_scalar& operator=(struct_scalar const& other) = delete;
struct_scalar& operator=(struct_scalar&& other) = delete;

/**
* @brief Construct a new struct scalar object from table_view
*
* The input table_view is deep-copied.
*
* @param data The table data to copy.
* @param is_valid Whether the value held by the scalar is valid
* @param stream CUDA stream used for device memory operations.
* @param mr Device memory resource to use for device memory allocation
*/
struct_scalar(table_view const& data,
bool is_valid = true,
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Construct a new struct scalar object from a host_span of column_views
*
* The input column_views are deep-copied.
*
* @param data The column_views to copy.
* @param is_valid Whether the value held by the scalar is valid
* @param stream CUDA stream used for device memory operations.
* @param mr Device memory resource to use for device memory allocation
*/
struct_scalar(host_span<const column_view> data,
nvdbaranec marked this conversation as resolved.
Show resolved Hide resolved
bool is_valid = true,
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Returns a non-owning, immutable view to underlying device data
*/
table_view view() const;

private:
table _data;

void superimpose_nulls(rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr);
};

/** @} */ // end of group
} // namespace cudf
30 changes: 29 additions & 1 deletion cpp/include/cudf/scalar/scalar_factories.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION.
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -156,5 +156,33 @@ std::unique_ptr<scalar> make_fixed_point_scalar(
return std::make_unique<scalar_type_t<T>>(value, scale, true, stream, mr);
}

/**
* @brief Construct a struct scalar using the given table_view.
*
* The columns must have 1 row.
*
* @param data The columnar data to store in the scalar object
* @param stream CUDA stream used for device memory operations.
* @param mr Device memory resource used to allocate the scalar's `data` and `is_valid` bool.
*/
std::unique_ptr<scalar> make_struct_scalar(
table_view const& data,
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Construct a struct scalar using the given span of column views.
*
* The columns must have 1 row.
*
* @param value The columnar data to store in the scalar object
* @param stream CUDA stream used for device memory operations.
* @param mr Device memory resource used to allocate the scalar's `data` and `is_valid` bool.
*/
std::unique_ptr<scalar> make_struct_scalar(
host_span<const column_view> data,
nvdbaranec marked this conversation as resolved.
Show resolved Hide resolved
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/** @} */ // end of group
} // namespace cudf
3 changes: 1 addition & 2 deletions cpp/include/cudf/types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ class scalar;

// clang-format off
class list_scalar;
class struct_scalar;
class string_scalar;
template <typename T> class numeric_scalar;
template <typename T> class fixed_point_scalar;
Expand All @@ -74,8 +75,6 @@ template <typename T> class timestamp_scalar_device_view;
template <typename T> class duration_scalar_device_view;
// clang-format on

class struct_scalar;

class table;
class table_view;
class mutable_table_view;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -16,6 +16,7 @@

#include <cudf/column/column_factories.hpp>
#include <cudf/detail/fill.hpp>
#include <cudf/detail/gather.cuh>
#include <cudf/detail/null_mask.hpp>
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/dictionary/dictionary_factories.hpp>
Expand Down Expand Up @@ -230,8 +231,18 @@ std::unique_ptr<cudf::column> column_from_scalar_dispatch::operator()<cudf::stru
size_type size,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr) const
{
CUDF_FAIL("TODO. struct_view currently not supported.");
{
auto ss = static_cast<scalar_type_t<cudf::struct_view> const&>(value);
auto iter = thrust::make_constant_iterator(0);

auto children = detail::gather(ss.view(), iter, iter + size, out_of_bounds_policy::NULLIFY, stream, mr);
auto const is_valid = ss.is_valid();
return make_structs_column(size,
std::move(children->release()),
is_valid ? 0 : size,
is_valid ? rmm::device_buffer{} : detail::create_null_mask(size, mask_state::ALL_NULL, stream, mr),
stream,
mr);
}

std::unique_ptr<column> make_column_from_scalar(scalar const& s,
Expand Down
49 changes: 49 additions & 0 deletions cpp/src/scalar/scalar.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,10 @@
* limitations under the License.
*/

#include "structs/utilities.hpp"
nvdbaranec marked this conversation as resolved.
Show resolved Hide resolved

#include <cudf/column/column.hpp>
#include <cudf/detail/utilities/vector_factories.hpp>
#include <cudf/fixed_point/fixed_point.hpp>
#include <cudf/scalar/scalar.hpp>
#include <cudf/strings/string_view.hpp>
Expand Down Expand Up @@ -446,4 +449,50 @@ list_scalar::list_scalar(cudf::column&& data,

column_view list_scalar::view() const { return _data.view(); }

struct_scalar::struct_scalar() : scalar(data_type(type_id::STRUCT)) {}

struct_scalar::struct_scalar(table_view const& data,
bool is_valid,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
: scalar(data_type(type_id::STRUCT), is_valid, stream, mr), _data(data, stream, mr)
{
CUDF_EXPECTS(
std::all_of(data.begin(), data.end(), [](column_view const& col) { return col.size() == 1; }),
"Struct scalar inputs must have exactly 1 row");

// validity pushdown
if (!is_valid) { superimpose_nulls(stream, mr); }
}

struct_scalar::struct_scalar(host_span<const column_view> data,
nvdbaranec marked this conversation as resolved.
Show resolved Hide resolved
bool is_valid,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
: scalar(data_type(type_id::STRUCT), is_valid, stream, mr),
_data(table_view{std::vector<column_view>{data.begin(), data.end()}}, stream, mr)
{
CUDF_EXPECTS(
std::all_of(data.begin(), data.end(), [](column_view const& col) { return col.size() == 1; }),
"Struct scalar inputs must have exactly 1 row");

// validity pushdown
if (!is_valid) { superimpose_nulls(stream, mr); }
nvdbaranec marked this conversation as resolved.
Show resolved Hide resolved
}

table_view struct_scalar::view() const { return _data.view(); }

void struct_scalar::superimpose_nulls(rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
// push validity mask down
std::vector<bitmask_type> host_validity({0});
auto validity = cudf::detail::make_device_uvector_sync(host_validity, stream, mr);
auto iter = thrust::make_counting_iterator(0);
std::for_each(iter, iter + _data.num_columns(), [&](size_type i) {
cudf::structs::detail::superimpose_parent_nulls(
validity.data(), 1, 1, _data.get_column(i), stream, mr);
});
}

} // namespace cudf
16 changes: 15 additions & 1 deletion cpp/src/scalar/scalar_factories.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION.
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -97,6 +97,20 @@ std::unique_ptr<scalar> make_fixed_width_scalar(data_type type,
return type_dispatcher(type, scalar_construction_helper{}, stream, mr);
}

std::unique_ptr<scalar> make_struct_scalar(table_view const& data,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
return std::make_unique<struct_scalar>(data, true, stream, mr);
}

std::unique_ptr<scalar> make_struct_scalar(host_span<const column_view> data,
nvdbaranec marked this conversation as resolved.
Show resolved Hide resolved
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
return std::make_unique<struct_scalar>(data, true, stream, mr);
}

namespace {
struct default_scalar_functor {
template <typename T>
Expand Down
67 changes: 9 additions & 58 deletions cpp/src/structs/structs_column_factories.cu
Original file line number Diff line number Diff line change
Expand Up @@ -14,67 +14,17 @@
* limitations under the License.
*/

#include "structs/utilities.hpp"
nvdbaranec marked this conversation as resolved.
Show resolved Hide resolved

#include <cudf/column/column_factories.hpp>
#include <cudf/detail/null_mask.hpp>
#include <cudf/types.hpp>

#include <rmm/cuda_stream_view.hpp>

#include <thrust/iterator/counting_iterator.h>

#include <algorithm>
#include <memory>
namespace cudf {
namespace {
// Helper function to superimpose validity of parent struct
// over the specified member (child) column.
void superimpose_parent_nullmask(bitmask_type const* parent_null_mask,
std::size_t parent_null_mask_size,
size_type parent_null_count,
column& child,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
if (!child.nullable()) {
// Child currently has no null mask. Copy parent's null mask.
child.set_null_mask(rmm::device_buffer{parent_null_mask, parent_null_mask_size, stream, mr});
child.set_null_count(parent_null_count);
} else {
// Child should have a null mask.
// `AND` the child's null mask with the parent's.

auto current_child_mask = child.mutable_view().null_mask();

std::vector<bitmask_type const*> masks{
reinterpret_cast<bitmask_type const*>(parent_null_mask),
reinterpret_cast<bitmask_type const*>(current_child_mask)};
std::vector<size_type> begin_bits{0, 0};
cudf::detail::inplace_bitmask_and(
device_span<bitmask_type>(current_child_mask, num_bitmask_words(child.size())),
masks,
begin_bits,
child.size(),
stream,
mr);
child.set_null_count(UNKNOWN_NULL_COUNT);
}

// If the child is also a struct, repeat for all grandchildren.
if (child.type().id() == cudf::type_id::STRUCT) {
const auto current_child_mask = child.mutable_view().null_mask();
std::for_each(thrust::make_counting_iterator(0),
thrust::make_counting_iterator(child.num_children()),
[&current_child_mask, &child, parent_null_mask_size, stream, mr](auto i) {
superimpose_parent_nullmask(current_child_mask,
parent_null_mask_size,
UNKNOWN_NULL_COUNT,
child.child(i),
stream,
mr);
});
}
}
} // namespace

/// Column factory that adopts child columns.
std::unique_ptr<cudf::column> make_structs_column(
Expand All @@ -95,12 +45,13 @@ std::unique_ptr<cudf::column> make_structs_column(

if (!null_mask.is_empty()) {
for (auto& child : child_columns) {
superimpose_parent_nullmask(static_cast<bitmask_type const*>(null_mask.data()),
null_mask.size(),
null_count,
*child,
stream,
mr);
cudf::structs::detail::superimpose_parent_nulls(
static_cast<bitmask_type const*>(null_mask.data()),
null_mask.size(),
null_count,
*child,
stream,
mr);
}
}

Expand Down
Loading