Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Branch 0.19 merge 0.18 #7310

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion conda/recipes/libcudf/meta.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2018-2020, NVIDIA CORPORATION.
# Copyright (c) 2018-2021, NVIDIA CORPORATION.

{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %}
{% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
Expand Down Expand Up @@ -128,6 +128,7 @@ test:
- test -f $PREFIX/include/cudf/lists/contains.hpp
- test -f $PREFIX/include/cudf/lists/gather.hpp
- test -f $PREFIX/include/cudf/lists/lists_column_view.hpp
- test -f $PREFIX/include/cudf/lists/sorting.hpp
- test -f $PREFIX/include/cudf/merge.hpp
- test -f $PREFIX/include/cudf/null_mask.hpp
- test -f $PREFIX/include/cudf/partitioning.hpp
Expand Down
112 changes: 103 additions & 9 deletions cpp/include/cudf/copying.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2018-2020, NVIDIA CORPORATION.
* Copyright (c) 2018-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -476,23 +476,50 @@ std::vector<column_view> split(column_view const& input, std::vector<size_type>
*/
std::vector<table_view> split(table_view const& input, std::vector<size_type> const& splits);

/**
* @brief Column data in a serialized format
*
* @ingroup copy_split
*
* Contains data from an array of columns in two contiguous buffers: one on host, which contains
* table metadata and one on device which contains the table data.
*/
struct packed_columns {
/**
* @brief Host-side metadata buffer used for reconstructing columns via unpack.
*
* @ingroup copy_split
*/
struct metadata {
metadata(std::vector<uint8_t>&& v) : data_(std::move(v)) {}
uint8_t const* data() const { return data_.data(); }
size_t size() const { return data_.size(); }

private:
std::vector<uint8_t> data_;
};

std::unique_ptr<metadata> metadata_;
std::unique_ptr<rmm::device_buffer> gpu_data;
};

/**
* @brief The result(s) of a `contiguous_split`
*
* @ingroup copy_split
*
* Each table_view resulting from a split operation performed by contiguous_split,
* will be returned wrapped in a `contiguous_split_result`. The table_view and internal
* will be returned wrapped in a `packed_table`. The table_view and internal
* column_views in this struct are not owned by a top level cudf::table or cudf::column.
* The backing memory is instead owned by the `all_data` field and in one
* The backing memory and metadata is instead owned by the `data` field and is in one
* contiguous block.
*
* The user is responsible for assuring that the `table` or any derived table_views do
* not outlive the memory owned by `all_data`
* not outlive the memory owned by `data`
*/
struct contiguous_split_result {
struct packed_table {
cudf::table_view table;
std::unique_ptr<rmm::device_buffer> all_data;
packed_columns data;
};

/**
Expand All @@ -502,7 +529,7 @@ struct contiguous_split_result {
* @ingroup copy_split
*
* The memory for the output views is allocated in a single contiguous `rmm::device_buffer` returned
* in the `contiguous_split_result`. There is no top-level owning table.
* in the `packed_table`. There is no top-level owning table.
*
* The returned views of `input` are constructed from a vector of indices, that indicate
* where each split should occur. The `i`th returned `table_view` is sliced as
Expand All @@ -514,7 +541,7 @@ struct contiguous_split_result {
*
* @note It is the caller's responsibility to ensure that the returned views
* do not outlive the viewed device memory contained in the `all_data` field of the
* returned contiguous_split_result.
* returned packed_table.
*
* @code{.pseudo}
* Example:
Expand All @@ -536,11 +563,78 @@ struct contiguous_split_result {
* @return The set of requested views of `input` indicated by the `splits` and the viewed memory
* buffer.
*/
std::vector<contiguous_split_result> contiguous_split(
std::vector<packed_table> contiguous_split(
cudf::table_view const& input,
std::vector<size_type> const& splits,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Deep-copy a `table_view` into a serialized contiguous memory format
*
* The metadata from the `table_view` is copied into a host vector of bytes and the data from the
* `table_view` is copied into a `device_buffer`. Pass the output of this function into
* `cudf::unpack` to deserialize.
*
* @param input View of the table to pack
* @param[in] mr Optional, The resource to use for all returned device allocations
* @return packed_columns A struct containing the serialized metadata and data in contiguous host
* and device memory respectively
*/
packed_columns pack(cudf::table_view const& input,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Produce the metadata used for packing a table stored in a contiguous buffer.
*
* The metadata from the `table_view` is copied into a host vector of bytes which can be used to
* construct a `packed_columns` or `packed_table` structure. The caller is responsible for
* guaranteeing that that all of the columns in the table point into `contiguous_buffer`.
*
* @param input View of the table to pack
* @param contgiuous_buffer A contiguous buffer of device memory which contains the data referenced
* by the columns in `table`
* @param buffer_size The size of `contiguous_buffer`.
* @return Vector of bytes representing the metadata used to `unpack` a packed_columns struct.
*/
packed_columns::metadata pack_metadata(table_view const& table,
uint8_t const* contiguous_buffer,
size_t buffer_size);

/**
* @brief Deserialize the result of `cudf::pack`
*
* Converts the result of a serialized table into a `table_view` that points to the data stored in
* the contiguous device buffer contained in `input`.
*
* It is the caller's responsibility to ensure that the `table_view` in the output does not outlive
* the data in the input.
*
* No new device memory is allocated in this function.
*
* @param input The packed columns to unpack
* @return The unpacked `table_view`
*/
table_view unpack(packed_columns const& input);

/**
* @brief Deserialize the result of `cudf::pack`
*
* Converts the result of a serialized table into a `table_view` that points to the data stored in
* the contiguous device buffer contained in `gpu_data` using the metadata contained in the host
* buffer `metadata`.
*
* It is the caller's responsibility to ensure that the `table_view` in the output does not outlive
* the data in the input.
*
* No new device memory is allocated in this function.
*
* @param metadata The host-side metadata buffer resulting from the initial pack() call
* @param gpu_data The device-side contiguous buffer storing the data that will be referenced by
* the resulting `table_view`
* @return The unpacked `table_view`
*/
table_view unpack(uint8_t const* metadata, uint8_t const* gpu_data);

/**
* @brief Returns a new column, where each element is selected from either @p lhs or
* @p rhs based on the value of the corresponding element in @p boolean_mask
Expand Down
15 changes: 12 additions & 3 deletions cpp/include/cudf/detail/copy.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2018-2020, NVIDIA CORPORATION.
* Copyright (c) 2018-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -92,13 +92,22 @@ std::unique_ptr<column> shift(
* @copydoc cudf::contiguous_split
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
std::vector<contiguous_split_result> contiguous_split(
**/
std::vector<packed_table> contiguous_split(
cudf::table_view const& input,
std::vector<size_type> const& splits,
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @copydoc cudf::pack
*
* @param stream Optional CUDA stream on which to execute kernels
**/
packed_columns pack(cudf::table_view const& input,
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @copydoc cudf::allocate_like(column_view const&, size_type, mask_allocation_policy,
* rmm::mr::device_memory_resource*)
Expand Down
29 changes: 28 additions & 1 deletion cpp/include/cudf/detail/sorting.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION.
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -63,5 +63,32 @@ std::unique_ptr<table> sort_by_key(
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @copydoc cudf::segmented_sorted_order
*
* @param[in] stream CUDA stream used for device memory operations and kernel launches.
*/
std::unique_ptr<column> segmented_sorted_order(
table_view const& keys,
column_view const& segment_offsets,
std::vector<order> const& column_order = {},
std::vector<null_order> const& null_precedence = {},
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @copydoc cudf::segmented_sort_by_key
*
* @param[in] stream CUDA stream used for device memory operations and kernel launches.
*/
std::unique_ptr<table> segmented_sort_by_key(
table_view const& values,
table_view const& keys,
column_view const& segment_offsets,
std::vector<order> const& column_order = {},
std::vector<null_order> const& null_precedence = {},
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

} // namespace detail
} // namespace cudf
14 changes: 9 additions & 5 deletions cpp/include/cudf/lists/detail/scatter.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@
#include <cudf/column/column_device_view.cuh>
#include <cudf/column/column_factories.hpp>
#include <cudf/copying.hpp>
#include <cudf/detail/get_value.cuh>
#include <cudf/detail/valid_if.cuh>
#include <cudf/lists/detail/utilities.cuh>
#include <cudf/lists/list_device_view.cuh>
#include <cudf/null_mask.hpp>
#include <cudf/strings/detail/utilities.cuh>
Expand Down Expand Up @@ -333,7 +333,8 @@ struct list_child_constructor {
auto source_lists = cudf::detail::lists_column_device_view(*source_column_device_view);
auto target_lists = cudf::detail::lists_column_device_view(*target_column_device_view);

auto const num_child_rows{get_num_child_rows(list_offsets, stream)};
auto const num_child_rows{
cudf::detail::get_value<size_type>(list_offsets, list_offsets.size() - 1, stream)};

auto const child_null_mask =
source_lists_column_view.child().nullable() || target_lists_column_view.child().nullable()
Expand Down Expand Up @@ -427,7 +428,8 @@ struct list_child_constructor {
auto source_lists = cudf::detail::lists_column_device_view(*source_column_device_view);
auto target_lists = cudf::detail::lists_column_device_view(*target_column_device_view);

int32_t num_child_rows{get_num_child_rows(list_offsets, stream)};
auto const num_child_rows{
cudf::detail::get_value<size_type>(list_offsets, list_offsets.size() - 1, stream)};

auto string_views = rmm::device_vector<string_view>(num_child_rows);

Expand Down Expand Up @@ -516,7 +518,8 @@ struct list_child_constructor {
auto source_lists = cudf::detail::lists_column_device_view(*source_column_device_view);
auto target_lists = cudf::detail::lists_column_device_view(*target_column_device_view);

auto num_child_rows = get_num_child_rows(list_offsets, stream);
auto const num_child_rows{
cudf::detail::get_value<size_type>(list_offsets, list_offsets.size() - 1, stream)};

auto child_list_views = rmm::device_uvector<unbound_list_view>(num_child_rows, stream, mr);

Expand Down Expand Up @@ -621,7 +624,8 @@ struct list_child_constructor {
auto const source_structs = source_lists_column_view.child();
auto const target_structs = target_lists_column_view.child();

auto const num_child_rows = get_num_child_rows(list_offsets, stream);
auto const num_child_rows{
cudf::detail::get_value<size_type>(list_offsets, list_offsets.size() - 1, stream)};

auto const num_struct_members =
std::distance(source_structs.child_begin(), source_structs.child_end());
Expand Down
46 changes: 0 additions & 46 deletions cpp/include/cudf/lists/detail/utilities.cuh

This file was deleted.

25 changes: 24 additions & 1 deletion cpp/include/cudf/lists/list_device_view.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION.
* Copyright (c) 2020-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -190,4 +190,27 @@ class list_device_view {
};
};

/**
* @brief returns size of the list by row index
*
*/
struct list_size_functor {
column_device_view const d_column;
CUDA_HOST_DEVICE_CALLABLE list_size_functor(column_device_view const& d_col) : d_column(d_col)
{
#if defined(__CUDA_ARCH__)
release_assert(d_col.type().id() == type_id::LIST && "Only list type column is supported");
#else
CUDF_EXPECTS(d_col.type().id() == type_id::LIST, "Only list type column is supported");
#endif
}
CUDA_DEVICE_CALLABLE size_type operator()(size_type idx)
{
if (d_column.is_null(idx)) return size_type{0};
auto d_offsets =
d_column.child(lists_column_view::offsets_column_index).data<size_type>() + d_column.offset();
return d_offsets[idx + 1] - d_offsets[idx];
}
};

} // namespace cudf
Loading