Skip to content

Commit

Permalink
Add detail interface for split and slice(table_view), refactors b…
Browse files Browse the repository at this point in the history
…oth function with `host_span` (#9226)

`cudf::detail::slice` performs a `segmented_count_unset_bits` that requires stream ordering. The depending `split` interface does not have an internal version that accepts a `stream` argument. Similarly for `slice(table_view)`. This PR fixes that.

Besides, slice/split interface is refactored to accept `host_span` to specify indices/splits, and is overloaded with `std::initializer_list`. This allows specifying the argument with both a container and a brace-init-list.

Authors:
  - Michael Wang (https://github.com/isVoid)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - H. Thomson Comer (https://github.com/thomcom)
  - David Wendt (https://github.com/davidwendt)

URL: #9226
  • Loading branch information
isVoid authored Oct 7, 2021
1 parent 187ab65 commit c6bc111
Show file tree
Hide file tree
Showing 12 changed files with 199 additions and 43 deletions.
36 changes: 28 additions & 8 deletions cpp/include/cudf/copying.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -407,10 +407,15 @@ std::unique_ptr<column> shift(
* the range [0, input.size()).
*
* @param input View of column to slice
* @param indices A vector of indices used to take slices of `input`.
* @param indices Indices used to take slices of `input`.
* @return Vector of views of `input` indicated by the ranges in `indices`.
*/
std::vector<column_view> slice(column_view const& input, std::vector<size_type> const& indices);
std::vector<column_view> slice(column_view const& input, host_span<size_type const> indices);
/**
* @ingroup copy_slice
* @copydoc cudf::slice(column_view const&, host_span<size_type const>)
*/
std::vector<column_view> slice(column_view const& input, std::initializer_list<size_type> indices);

/**
* @brief Slices a `table_view` into a set of `table_view`s according to a set of indices.
Expand Down Expand Up @@ -441,10 +446,15 @@ std::vector<column_view> slice(column_view const& input, std::vector<size_type>
* the range [0, input.size()).
*
* @param input View of table to slice
* @param indices A vector of indices used to take slices of `input`.
* @param indices Indices used to take slices of `input`.
* @return Vector of views of `input` indicated by the ranges in `indices`.
*/
std::vector<table_view> slice(table_view const& input, std::vector<size_type> const& indices);
std::vector<table_view> slice(table_view const& input, host_span<size_type const> indices);
/**
* @ingroup copy_slice
* @copydoc cudf::slice(table_view const&, host_span<size_type const>)
*/
std::vector<table_view> slice(table_view const& input, std::initializer_list<size_type> indices);

/**
* @brief Splits a `column_view` into a set of `column_view`s according to a set of indices
Expand Down Expand Up @@ -475,10 +485,15 @@ std::vector<table_view> slice(table_view const& input, std::vector<size_type> co
* @throws cudf::logic_error When the values in the `splits` are 'strictly decreasing'.
*
* @param input View of column to split
* @param splits A vector of indices where the view will be split
* @param splits Indices where the view will be split
* @return The set of requested views of `input` indicated by the `splits`.
*/
std::vector<column_view> split(column_view const& input, std::vector<size_type> const& splits);
std::vector<column_view> split(column_view const& input, host_span<size_type const> splits);
/**
* @ingroup copy_split
* @copydoc cudf::split(column_view const&, host_span<size_type const>)
*/
std::vector<column_view> split(column_view const& input, std::initializer_list<size_type> splits);

/**
* @brief Splits a `table_view` into a set of `table_view`s according to a set of indices
Expand Down Expand Up @@ -511,10 +526,15 @@ std::vector<column_view> split(column_view const& input, std::vector<size_type>
* @throws cudf::logic_error When the values in the `splits` are 'strictly decreasing'.
*
* @param input View of a table to split
* @param splits A vector of indices where the view will be split
* @param splits Indices where the view will be split
* @return The set of requested views of `input` indicated by the `splits`.
*/
std::vector<table_view> split(table_view const& input, std::vector<size_type> const& splits);
std::vector<table_view> split(table_view const& input, host_span<size_type const> splits);
/**
* @ingroup copy_split
* @copydoc cudf::split(table_view const&, host_span<size_type const>)
*/
std::vector<table_view> split(table_view const& input, std::initializer_list<size_type> splits);

/**
* @brief Column data in a serialized format
Expand Down
61 changes: 57 additions & 4 deletions cpp/include/cudf/detail/copy.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,13 @@
#include <cudf/column/column_view.hpp>
#include <cudf/copying.hpp>
#include <cudf/types.hpp>
#include <cudf/utilities/span.hpp>
#include <cudf/utilities/traits.hpp>

#include <rmm/cuda_stream_view.hpp>

#include <initializer_list>

namespace cudf {
namespace detail {
/**
Expand Down Expand Up @@ -67,21 +70,71 @@ ColumnView slice(ColumnView const& input, cudf::size_type begin, cudf::size_type
}

/**
* @copydoc cudf::slice(column_view const&,std::vector<size_type> const&)
* @copydoc cudf::slice(column_view const&, host_span<size_type const>)
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
std::vector<column_view> slice(column_view const& input,
std::vector<size_type> const& indices,
host_span<size_type const> indices,
rmm::cuda_stream_view stream = rmm::cuda_stream_default);
/**
* @copydoc cudf::slice(column_view const&, std::initializer_list<size_type>)
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
std::vector<column_view> slice(column_view const& input,
std::initializer_list<size_type> indices,
rmm::cuda_stream_view stream = rmm::cuda_stream_default);

/**
* @copydoc cudf::slice(table_view const&,std::vector<size_type> const&)
* @copydoc cudf::slice(table_view const&, host_span<size_type const>)
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
std::vector<table_view> slice(table_view const& input,
host_span<size_type const> indices,
rmm::cuda_stream_view stream = rmm::cuda_stream_default);
/**
* @copydoc cudf::slice(table_view const&, std::initializer_list<size_type>)
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
std::vector<table_view> slice(table_view const& input,
std::vector<size_type> const& indices,
std::initializer_list<size_type> indices,
rmm::cuda_stream_view stream = rmm::cuda_stream_default);

/**
* @copydoc cudf::split(column_view const&, host_span<size_type const>)
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
std::vector<column_view> split(column_view const& input,
host_span<size_type const> splits,
rmm::cuda_stream_view stream = rmm::cuda_stream_default);
/**
* @copydoc cudf::split(column_view const&, std::initializer_list<size_type>)
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
std::vector<column_view> split(column_view const& input,
std::initializer_list<size_type> splits,
rmm::cuda_stream_view stream = rmm::cuda_stream_default);

/**
* @copydoc cudf::split(table_view const&, host_span<size_type const>)
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
std::vector<table_view> split(table_view const& input,
host_span<size_type const> splits,
rmm::cuda_stream_view stream = rmm::cuda_stream_default);
/**
* @copydoc cudf::split(table_view const&, std::initializer_list<size_type>)
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
std::vector<table_view> split(table_view const& input,
std::initializer_list<size_type> splits,
rmm::cuda_stream_view stream = rmm::cuda_stream_default);

/**
Expand Down
5 changes: 3 additions & 2 deletions cpp/src/copying/sample.cu
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,9 @@ std::unique_ptr<table> sample(table_view const& input,
gather_map_mutable_view.begin<size_type>(),
thrust::default_random_engine(seed));

auto gather_map_view =
(n == num_rows) ? gather_map->view() : cudf::slice(gather_map->view(), {0, n})[0];
auto gather_map_view = (n == num_rows)
? gather_map->view()
: cudf::detail::slice(gather_map->view(), {0, n}, stream)[0];
return detail::gather(input,
gather_map_view.begin<size_type>(),
gather_map_view.end<size_type>(),
Expand Down
39 changes: 31 additions & 8 deletions cpp/src/copying/slice.cu
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
namespace cudf {
namespace detail {
std::vector<column_view> slice(column_view const& input,
std::vector<size_type> const& indices,
host_span<size_type const> indices,
rmm::cuda_stream_view stream)
{
CUDF_EXPECTS(indices.size() % 2 == 0, "indices size must be even");
Expand Down Expand Up @@ -64,16 +64,15 @@ std::vector<column_view> slice(column_view const& input,
}

std::vector<table_view> slice(table_view const& input,
std::vector<size_type> const& indices,
host_span<size_type const> indices,
rmm::cuda_stream_view stream)
{
CUDF_FUNC_RANGE();
CUDF_EXPECTS(indices.size() % 2 == 0, "indices size must be even");
if (indices.empty()) { return {}; }

// 2d arrangement of column_views that represent the outgoing table_views sliced_table[i][j]
// where i is the i'th column of the j'th table_view
auto op = [&indices, stream](auto const& c) { return cudf::detail::slice(c, indices, stream); };
auto op = [&indices, &stream](auto const& c) { return cudf::detail::slice(c, indices, stream); };
auto f = thrust::make_transform_iterator(input.begin(), op);

auto sliced_table = std::vector<std::vector<cudf::column_view>>(f, f + input.num_columns());
Expand All @@ -93,20 +92,44 @@ std::vector<table_view> slice(table_view const& input,
return result;
}

std::vector<column_view> slice(column_view const& input,
std::initializer_list<size_type> indices,
rmm::cuda_stream_view stream)
{
return slice(input, host_span<size_type const>(indices.begin(), indices.size()), stream);
}

std::vector<table_view> slice(table_view const& input,
std::initializer_list<size_type> indices,
rmm::cuda_stream_view stream)
{
return slice(input, host_span<size_type const>(indices.begin(), indices.size()), stream);
};

} // namespace detail

std::vector<cudf::column_view> slice(cudf::column_view const& input,
std::vector<size_type> const& indices)
std::vector<column_view> slice(column_view const& input, host_span<size_type const> indices)
{
CUDF_FUNC_RANGE();
return detail::slice(input, indices, rmm::cuda_stream_default);
}

std::vector<cudf::table_view> slice(cudf::table_view const& input,
std::vector<size_type> const& indices)
std::vector<table_view> slice(table_view const& input, host_span<size_type const> indices)
{
CUDF_FUNC_RANGE();
return detail::slice(input, indices, rmm::cuda_stream_default);
};

std::vector<column_view> slice(column_view const& input, std::initializer_list<size_type> indices)
{
CUDF_FUNC_RANGE();
return detail::slice(input, indices, rmm::cuda_stream_default);
}

std::vector<table_view> slice(table_view const& input, std::initializer_list<size_type> indices)
{
CUDF_FUNC_RANGE();
return detail::slice(input, indices, rmm::cuda_stream_default);
};

} // namespace cudf
67 changes: 58 additions & 9 deletions cpp/src/copying/split.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,22 @@
*/

#include <cudf/column/column.hpp>
#include <cudf/copying.hpp>
#include <cudf/detail/copy.hpp>
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/utilities/error.hpp>

#include <rmm/cuda_stream_view.hpp>

#include <algorithm>

namespace cudf {
namespace detail {
namespace {
template <typename T>
std::vector<T> split(T const& input, size_type column_size, std::vector<size_type> const& splits)
std::vector<T> split(T const& input,
size_type column_size,
host_span<size_type const> splits,
rmm::cuda_stream_view stream)
{
if (splits.empty() or column_size == 0) { return std::vector<T>{input}; }
CUDF_EXPECTS(splits.back() <= column_size, "splits can't exceed size of input columns");
Expand All @@ -38,24 +44,67 @@ std::vector<T> split(T const& input, size_type column_size, std::vector<size_typ

indices.push_back(column_size); // This to include rest of the elements

return cudf::slice(input, indices);
return detail::slice(input, indices, stream);
}

}; // anonymous namespace

std::vector<cudf::column_view> split(cudf::column_view const& input,
std::vector<size_type> const& splits)
host_span<size_type const> splits,
rmm::cuda_stream_view stream)
{
CUDF_FUNC_RANGE();
return split(input, input.size(), splits);
return split(input, input.size(), splits, stream);
}

std::vector<cudf::table_view> split(cudf::table_view const& input,
std::vector<size_type> const& splits)
host_span<size_type const> splits,
rmm::cuda_stream_view stream)
{
CUDF_FUNC_RANGE();
std::vector<table_view> result{};
if (input.num_columns() == 0) { return result; }
return split(input, input.column(0).size(), splits);
return split(input, input.column(0).size(), splits, stream);
}

std::vector<column_view> split(column_view const& input,
std::initializer_list<size_type> splits,
rmm::cuda_stream_view stream)
{
return split(input, host_span<size_type const>(splits.begin(), splits.size()), stream);
}

std::vector<table_view> split(table_view const& input,
std::initializer_list<size_type> splits,
rmm::cuda_stream_view stream)
{
return detail::split(input, host_span<size_type const>(splits.begin(), splits.size()), stream);
}

} // namespace detail

std::vector<cudf::column_view> split(cudf::column_view const& input,
host_span<size_type const> splits)
{
CUDF_FUNC_RANGE();
return detail::split(input, splits, rmm::cuda_stream_default);
}

std::vector<cudf::table_view> split(cudf::table_view const& input,
host_span<size_type const> splits)
{
CUDF_FUNC_RANGE();
return detail::split(input, splits, rmm::cuda_stream_default);
}

std::vector<column_view> split(column_view const& input, std::initializer_list<size_type> splits)
{
CUDF_FUNC_RANGE();
return detail::split(input, splits, rmm::cuda_stream_default);
}

std::vector<table_view> split(table_view const& input, std::initializer_list<size_type> splits)
{
CUDF_FUNC_RANGE();
return detail::split(input, splits, rmm::cuda_stream_default);
}

} // namespace cudf
6 changes: 4 additions & 2 deletions cpp/src/dictionary/encode.cu
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
*/

#include <cudf/column/column.hpp>
#include <cudf/copying.hpp>
#include <cudf/detail/copy.hpp>
#include <cudf/detail/null_mask.hpp>
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/detail/transform.hpp>
Expand Down Expand Up @@ -53,7 +53,9 @@ std::unique_ptr<column> encode(column_view const& input_column,

if (keys_column->has_nulls()) {
keys_column = std::make_unique<column>(
slice(keys_column->view(), std::vector<size_type>{0, keys_column->size() - 1}).front(),
cudf::detail::slice(
keys_column->view(), std::vector<size_type>{0, keys_column->size() - 1}, stream)
.front(),
stream,
mr);
keys_column->set_null_mask(rmm::device_buffer{0, stream, mr}, 0); // remove the null-mask
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/io/csv/writer_impl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
#include "writer_impl.hpp"

#include <cudf/column/column_device_view.cuh>
#include <cudf/copying.hpp>
#include <cudf/detail/copy.hpp>
#include <cudf/detail/null_mask.hpp>
#include <cudf/null_mask.hpp>
#include <cudf/scalar/scalar.hpp>
Expand Down Expand Up @@ -423,7 +423,7 @@ void writer::impl::write(table_view const& table,
});

// split table_view into chunks:
vector_views = cudf::split(table, splits);
vector_views = cudf::detail::split(table, splits, stream);
}

// convert each chunk to CSV:
Expand Down
Loading

0 comments on commit c6bc111

Please sign in to comment.