Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support for struct scalars. #8220

Merged
merged 6 commits into from
May 18, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ add_library(cudf
src/column/column.cu
src/column/column_device_view.cu
src/column/column_factories.cpp
src/column/column_factories.cu
nvdbaranec marked this conversation as resolved.
Show resolved Hide resolved
src/column/column_view.cpp
src/comms/ipc/ipc.cpp
src/copying/concatenate.cu
Expand Down Expand Up @@ -375,7 +376,7 @@ add_library(cudf
src/structs/copying/concatenate.cu
src/structs/structs_column_factories.cu
src/structs/structs_column_view.cpp
src/structs/utilities.cu
src/structs/utilities.cpp
src/table/table.cpp
src/table/table_device_view.cu
src/table/table_view.cpp
Expand Down
55 changes: 55 additions & 0 deletions cpp/include/cudf/scalar/scalar.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#pragma once

#include <cudf/column/column.hpp>
#include <cudf/table/table.hpp>
#include <cudf/types.hpp>
#include <cudf/utilities/traits.hpp>

Expand Down Expand Up @@ -572,5 +573,59 @@ class list_scalar : public scalar {
cudf::column _data;
};

/**
* @brief An owning class to represent a struct value in device memory
*/
class struct_scalar : public scalar {
public:
struct_scalar();
~struct_scalar() = default;
struct_scalar(struct_scalar&& other) = default;
struct_scalar(struct_scalar const& other) = default;
struct_scalar& operator=(struct_scalar const& other) = delete;
struct_scalar& operator=(struct_scalar&& other) = delete;

/**
* @brief Construct a new struct scalar object from table_view
*
* The input table_view is deep-copied.
*
* @param data The table data to copy.
* @param is_valid Whether the value held by the scalar is valid
* @param stream CUDA stream used for device memory operations.
* @param mr Device memory resource to use for device memory allocation
*/
struct_scalar(table_view const& data,
bool is_valid = true,
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Construct a new struct scalar object from a host_span of column_views
*
* The input column_views are deep-copied.
*
* @param data The column_views to copy.
* @param is_valid Whether the value held by the scalar is valid
* @param stream CUDA stream used for device memory operations.
* @param mr Device memory resource to use for device memory allocation
*/
struct_scalar(host_span<column_view const> data,
bool is_valid = true,
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Returns a non-owning, immutable view to underlying device data
*/
table_view view() const;

private:
table _data;

void init(bool is_valid, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr);
void superimpose_nulls(rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr);
};

/** @} */ // end of group
} // namespace cudf
30 changes: 29 additions & 1 deletion cpp/include/cudf/scalar/scalar_factories.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION.
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -168,5 +168,33 @@ std::unique_ptr<scalar> make_list_scalar(
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Construct a struct scalar using the given table_view.
*
* The columns must have 1 row.
*
* @param data The columnar data to store in the scalar object
* @param stream CUDA stream used for device memory operations.
* @param mr Device memory resource used to allocate the scalar's `data` and `is_valid` bool.
*/
std::unique_ptr<scalar> make_struct_scalar(
table_view const& data,
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Construct a struct scalar using the given span of column views.
*
* The columns must have 1 row.
*
* @param value The columnar data to store in the scalar object
* @param stream CUDA stream used for device memory operations.
* @param mr Device memory resource used to allocate the scalar's `data` and `is_valid` bool.
*/
std::unique_ptr<scalar> make_struct_scalar(
host_span<column_view const> data,
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/** @} */ // end of group
} // namespace cudf
3 changes: 1 addition & 2 deletions cpp/include/cudf/types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ class scalar;

// clang-format off
class list_scalar;
class struct_scalar;
class string_scalar;
template <typename T> class numeric_scalar;
template <typename T> class fixed_point_scalar;
Expand All @@ -74,8 +75,6 @@ template <typename T> class timestamp_scalar_device_view;
template <typename T> class duration_scalar_device_view;
// clang-format on

class struct_scalar;

class table;
class table_view;
class mutable_table_view;
Expand Down
85 changes: 0 additions & 85 deletions cpp/src/column/column_factories.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/dictionary/dictionary_factories.hpp>
#include <cudf/fixed_point/fixed_point.hpp>
#include <cudf/scalar/scalar.hpp>
#include <cudf/scalar/scalar_factories.hpp>
#include <cudf/strings/detail/fill.hpp>
#include <cudf/utilities/error.hpp>
Expand Down Expand Up @@ -159,90 +158,6 @@ std::unique_ptr<column> make_fixed_width_column(data_type type,
/// clang-format on
}

struct column_from_scalar_dispatch {
template <typename T>
std::unique_ptr<cudf::column> operator()(scalar const& value,
size_type size,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr) const
{
if (!value.is_valid())
return make_fixed_width_column(value.type(), size, mask_state::ALL_NULL, stream, mr);
auto output_column =
make_fixed_width_column(value.type(), size, mask_state::UNALLOCATED, stream, mr);
auto view = output_column->mutable_view();
detail::fill_in_place(view, 0, size, value, stream);
return output_column;
}
};

template <>
std::unique_ptr<cudf::column> column_from_scalar_dispatch::operator()<cudf::string_view>(
scalar const& value,
size_type size,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr) const
{
auto null_mask = detail::create_null_mask(size, mask_state::ALL_NULL, stream, mr);

if (!value.is_valid())
return std::make_unique<column>(value.type(),
size,
rmm::device_buffer{0, stream, mr},
null_mask,
size);

// Create a strings column_view with all nulls and no children.
// Since we are setting every row to the scalar, the fill() never needs to access
// any of the children in the strings column which would otherwise cause an exception.
column_view sc{
data_type{type_id::STRING}, size, nullptr, static_cast<bitmask_type*>(null_mask.data()), size};
auto sv = static_cast<scalar_type_t<cudf::string_view> const&>(value);
// fill the column with the scalar
auto output = strings::detail::fill(strings_column_view(sc), 0, size, sv, stream, mr);
output->set_null_mask(rmm::device_buffer{0, stream, mr}, 0); // should be no nulls
return output;
}

template <>
std::unique_ptr<cudf::column> column_from_scalar_dispatch::operator()<cudf::dictionary32>(
scalar const& value,
size_type size,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr) const
{
CUDF_FAIL("dictionary not supported when creating from scalar");
}

template <>
std::unique_ptr<cudf::column> column_from_scalar_dispatch::operator()<cudf::list_view>(
scalar const& value,
size_type size,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr) const
{
CUDF_FAIL("TODO");
}

template <>
std::unique_ptr<cudf::column> column_from_scalar_dispatch::operator()<cudf::struct_view>(
scalar const& value,
size_type size,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr) const
{
CUDF_FAIL("TODO. struct_view currently not supported.");
}

std::unique_ptr<column> make_column_from_scalar(scalar const& s,
size_type size,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
if (size == 0) return make_empty_column(s.type());
return type_dispatcher(s.type(), column_from_scalar_dispatch{}, s, size, stream, mr);
}

std::unique_ptr<column> make_dictionary_from_scalar(scalar const& s,
size_type size,
rmm::cuda_stream_view stream,
Expand Down
124 changes: 124 additions & 0 deletions cpp/src/column/column_factories.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <cudf/column/column_factories.hpp>
#include <cudf/detail/fill.hpp>
#include <cudf/detail/gather.cuh>
#include <cudf/dictionary/dictionary_factories.hpp>
#include <cudf/scalar/scalar.hpp>
#include <cudf/strings/detail/fill.hpp>

namespace cudf {

namespace {

struct column_from_scalar_dispatch {
template <typename T>
std::unique_ptr<cudf::column> operator()(scalar const& value,
size_type size,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr) const
{
if (!value.is_valid())
return make_fixed_width_column(value.type(), size, mask_state::ALL_NULL, stream, mr);
auto output_column =
make_fixed_width_column(value.type(), size, mask_state::UNALLOCATED, stream, mr);
auto view = output_column->mutable_view();
detail::fill_in_place(view, 0, size, value, stream);
return output_column;
}
};

template <>
std::unique_ptr<cudf::column> column_from_scalar_dispatch::operator()<cudf::string_view>(
scalar const& value,
size_type size,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr) const
{
auto null_mask = detail::create_null_mask(size, mask_state::ALL_NULL, stream, mr);

if (!value.is_valid())
return std::make_unique<column>(
value.type(), size, rmm::device_buffer{0, stream, mr}, null_mask, size);

// Create a strings column_view with all nulls and no children.
// Since we are setting every row to the scalar, the fill() never needs to access
// any of the children in the strings column which would otherwise cause an exception.
column_view sc{
data_type{type_id::STRING}, size, nullptr, static_cast<bitmask_type*>(null_mask.data()), size};
auto sv = static_cast<scalar_type_t<cudf::string_view> const&>(value);
// fill the column with the scalar
auto output = strings::detail::fill(strings_column_view(sc), 0, size, sv, stream, mr);
output->set_null_mask(rmm::device_buffer{0, stream, mr}, 0); // should be no nulls
return output;
}

template <>
std::unique_ptr<cudf::column> column_from_scalar_dispatch::operator()<cudf::dictionary32>(
scalar const& value,
size_type size,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr) const
{
CUDF_FAIL("dictionary not supported when creating from scalar");
}

template <>
std::unique_ptr<cudf::column> column_from_scalar_dispatch::operator()<cudf::list_view>(
scalar const& value,
size_type size,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr) const
{
CUDF_FAIL("TODO");
}

template <>
std::unique_ptr<cudf::column> column_from_scalar_dispatch::operator()<cudf::struct_view>(
scalar const& value,
size_type size,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr) const
{
auto ss = static_cast<scalar_type_t<cudf::struct_view> const&>(value);
auto iter = thrust::make_constant_iterator(0);

auto children =
detail::gather(ss.view(), iter, iter + size, out_of_bounds_policy::NULLIFY, stream, mr);
auto const is_valid = ss.is_valid();
return make_structs_column(size,
std::move(children->release()),
is_valid ? 0 : size,
is_valid
? rmm::device_buffer{}
: detail::create_null_mask(size, mask_state::ALL_NULL, stream, mr),
stream,
mr);
}

} // anonymous namespace

std::unique_ptr<column> make_column_from_scalar(scalar const& s,
size_type size,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
if (size == 0) return make_empty_column(s.type());
return type_dispatcher(s.type(), column_from_scalar_dispatch{}, s, size, stream, mr);
}

} // namespace cudf
Loading