rapidsai · nvdbaranec · Feb 4, 2021 · Feb 4, 2021 · Feb 4, 2021 · Feb 4, 2021
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2020, NVIDIA CORPORATION.
+# Copyright (c) 2018-2021, NVIDIA CORPORATION.
 
 {% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %}
 {% set minor_version =  version.split('.')[0] + '.' + version.split('.')[1] %}
@@ -128,6 +128,7 @@ test:
     - test -f $PREFIX/include/cudf/lists/contains.hpp
     - test -f $PREFIX/include/cudf/lists/gather.hpp
     - test -f $PREFIX/include/cudf/lists/lists_column_view.hpp
+    - test -f $PREFIX/include/cudf/lists/sorting.hpp
     - test -f $PREFIX/include/cudf/merge.hpp
     - test -f $PREFIX/include/cudf/null_mask.hpp
     - test -f $PREFIX/include/cudf/partitioning.hpp

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020, NVIDIA CORPORATION.
+ * Copyright (c) 2018-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -476,23 +476,50 @@ std::vector<column_view> split(column_view const& input, std::vector<size_type>
  */
 std::vector<table_view> split(table_view const& input, std::vector<size_type> const& splits);
 
+/**
+ * @brief Column data in a serialized format
+ *
+ * @ingroup copy_split
+ *
+ * Contains data from an array of columns in two contiguous buffers: one on host, which contains
+ * table metadata and one on device which contains the table data.
+ */
+struct packed_columns {
+  /**
+   * @brief Host-side metadata buffer used for reconstructing columns via unpack.
+   *
+   * @ingroup copy_split
+   */
+  struct metadata {
+    metadata(std::vector<uint8_t>&& v) : data_(std::move(v)) {}
+    uint8_t const* data() const { return data_.data(); }
+    size_t size() const { return data_.size(); }
+
+   private:
+    std::vector<uint8_t> data_;
+  };
+
+  std::unique_ptr<metadata> metadata_;
+  std::unique_ptr<rmm::device_buffer> gpu_data;
+};
+
 /**
  * @brief The result(s) of a `contiguous_split`
  *
  * @ingroup copy_split
  *
  * Each table_view resulting from a split operation performed by contiguous_split,
- * will be returned wrapped in a `contiguous_split_result`.  The table_view and internal
+ * will be returned wrapped in a `packed_table`.  The table_view and internal
  * column_views in this struct are not owned by a top level cudf::table or cudf::column.
- * The backing memory is instead owned by the `all_data` field and in one
+ * The backing memory and metadata is instead owned by the `data` field and is in one
  * contiguous block.
  *
  * The user is responsible for assuring that the `table` or any derived table_views do
- * not outlive the memory owned by `all_data`
+ * not outlive the memory owned by `data`
  */
-struct contiguous_split_result {
+struct packed_table {
   cudf::table_view table;
-  std::unique_ptr<rmm::device_buffer> all_data;
+  packed_columns data;
 };
 
 /**
@@ -502,7 +529,7 @@ struct contiguous_split_result {
  * @ingroup copy_split
  *
  * The memory for the output views is allocated in a single contiguous `rmm::device_buffer` returned
- * in the `contiguous_split_result`. There is no top-level owning table.
+ * in the `packed_table`. There is no top-level owning table.
  *
  * The returned views of `input` are constructed from a vector of indices, that indicate
  * where each split should occur. The `i`th returned `table_view` is sliced as
@@ -514,7 +541,7 @@ struct contiguous_split_result {
  *
  * @note It is the caller's responsibility to ensure that the returned views
  * do not outlive the viewed device memory contained in the `all_data` field of the
- * returned contiguous_split_result.
+ * returned packed_table.
  *
  * @code{.pseudo}
  * Example:
@@ -536,11 +563,78 @@ struct contiguous_split_result {
  * @return The set of requested views of `input` indicated by the `splits` and the viewed memory
  * buffer.
  */
-std::vector<contiguous_split_result> contiguous_split(
+std::vector<packed_table> contiguous_split(
   cudf::table_view const& input,
   std::vector<size_type> const& splits,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
+/**
+ * @brief Deep-copy a `table_view` into a serialized contiguous memory format
+ *
+ * The metadata from the `table_view` is copied into a host vector of bytes and the data from the
+ * `table_view` is copied into a `device_buffer`. Pass the output of this function into
+ * `cudf::unpack` to deserialize.
+ *
+ * @param input View of the table to pack
+ * @param[in] mr Optional, The resource to use for all returned device allocations
+ * @return packed_columns A struct containing the serialized metadata and data in contiguous host
+ *         and device memory respectively
+ */
+packed_columns pack(cudf::table_view const& input,
+                    rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
+/**
+ * @brief Produce the metadata used for packing a table stored in a contiguous buffer.
+ *
+ * The metadata from the `table_view` is copied into a host vector of bytes which can be used to
+ * construct a `packed_columns` or `packed_table` structure.  The caller is responsible for
+ * guaranteeing that that all of the columns in the table point into `contiguous_buffer`.
+ *
+ * @param input View of the table to pack
+ * @param contgiuous_buffer A contiguous buffer of device memory which contains the data referenced
+ * by the columns in `table`
+ * @param buffer_size The size of `contiguous_buffer`.
+ * @return Vector of bytes representing the metadata used to `unpack` a packed_columns struct.
+ */
+packed_columns::metadata pack_metadata(table_view const& table,
+                                       uint8_t const* contiguous_buffer,
+                                       size_t buffer_size);
+
+/**
+ * @brief Deserialize the result of `cudf::pack`
+ *
+ * Converts the result of a serialized table into a `table_view` that points to the data stored in
+ * the contiguous device buffer contained in `input`.
+ *
+ * It is the caller's responsibility to ensure that the `table_view` in the output does not outlive
+ * the data in the input.
+ *
+ * No new device memory is allocated in this function.
+ *
+ * @param input The packed columns to unpack
+ * @return The unpacked `table_view`
+ */
+table_view unpack(packed_columns const& input);
+
+/**
+ * @brief Deserialize the result of `cudf::pack`
+ *
+ * Converts the result of a serialized table into a `table_view` that points to the data stored in
+ * the contiguous device buffer contained in `gpu_data` using the metadata contained in the host
+ * buffer `metadata`.
+ *
+ * It is the caller's responsibility to ensure that the `table_view` in the output does not outlive
+ * the data in the input.
+ *
+ * No new device memory is allocated in this function.
+ *
+ * @param metadata The host-side metadata buffer resulting from the initial pack() call
+ * @param gpu_data The device-side contiguous buffer storing the data that will be referenced by
+ * the resulting `table_view`
+ * @return The unpacked `table_view`
+ */
+table_view unpack(uint8_t const* metadata, uint8_t const* gpu_data);
+
 /**
  * @brief   Returns a new column, where each element is selected from either @p lhs or
  *          @p rhs based on the value of the corresponding element in @p boolean_mask

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020, NVIDIA CORPORATION.
+ * Copyright (c) 2018-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -92,13 +92,22 @@ std::unique_ptr<column> shift(
  * @copydoc cudf::contiguous_split
  *
  * @param stream CUDA stream used for device memory operations and kernel launches.
- */
-std::vector<contiguous_split_result> contiguous_split(
+ **/
+std::vector<packed_table> contiguous_split(
   cudf::table_view const& input,
   std::vector<size_type> const& splits,
   rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
+/**
+ * @copydoc cudf::pack
+ *
+ * @param stream Optional CUDA stream on which to execute kernels
+ **/
+packed_columns pack(cudf::table_view const& input,
+                    rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
+                    rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
 /**
  * @copydoc cudf::allocate_like(column_view const&, size_type, mask_allocation_policy,
  * rmm::mr::device_memory_resource*)

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -63,5 +63,32 @@ std::unique_ptr<table> sort_by_key(
   rmm::cuda_stream_view stream                   = rmm::cuda_stream_default,
   rmm::mr::device_memory_resource* mr            = rmm::mr::get_current_device_resource());
 
+/**
+ * @copydoc cudf::segmented_sorted_order
+ *
+ * @param[in] stream CUDA stream used for device memory operations and kernel launches.
+ */
+std::unique_ptr<column> segmented_sorted_order(
+  table_view const& keys,
+  column_view const& segment_offsets,
+  std::vector<order> const& column_order         = {},
+  std::vector<null_order> const& null_precedence = {},
+  rmm::cuda_stream_view stream                   = rmm::cuda_stream_default,
+  rmm::mr::device_memory_resource* mr            = rmm::mr::get_current_device_resource());
+
+/**
+ * @copydoc cudf::segmented_sort_by_key
+ *
+ * @param[in] stream CUDA stream used for device memory operations and kernel launches.
+ */
+std::unique_ptr<table> segmented_sort_by_key(
+  table_view const& values,
+  table_view const& keys,
+  column_view const& segment_offsets,
+  std::vector<order> const& column_order         = {},
+  std::vector<null_order> const& null_precedence = {},
+  rmm::cuda_stream_view stream                   = rmm::cuda_stream_default,
+  rmm::mr::device_memory_resource* mr            = rmm::mr::get_current_device_resource());
+
 }  // namespace detail
 }  // namespace cudf
@@ -19,8 +19,8 @@
 #include <cudf/column/column_device_view.cuh>
 #include <cudf/column/column_factories.hpp>
 #include <cudf/copying.hpp>
+#include <cudf/detail/get_value.cuh>
 #include <cudf/detail/valid_if.cuh>
-#include <cudf/lists/detail/utilities.cuh>
 #include <cudf/lists/list_device_view.cuh>
 #include <cudf/null_mask.hpp>
 #include <cudf/strings/detail/utilities.cuh>
@@ -333,7 +333,8 @@ struct list_child_constructor {
     auto source_lists = cudf::detail::lists_column_device_view(*source_column_device_view);
     auto target_lists = cudf::detail::lists_column_device_view(*target_column_device_view);
 
-    auto const num_child_rows{get_num_child_rows(list_offsets, stream)};
+    auto const num_child_rows{
+      cudf::detail::get_value<size_type>(list_offsets, list_offsets.size() - 1, stream)};
 
     auto const child_null_mask =
       source_lists_column_view.child().nullable() || target_lists_column_view.child().nullable()
@@ -427,7 +428,8 @@ struct list_child_constructor {
     auto source_lists = cudf::detail::lists_column_device_view(*source_column_device_view);
     auto target_lists = cudf::detail::lists_column_device_view(*target_column_device_view);
 
-    int32_t num_child_rows{get_num_child_rows(list_offsets, stream)};
+    auto const num_child_rows{
+      cudf::detail::get_value<size_type>(list_offsets, list_offsets.size() - 1, stream)};
 
     auto string_views = rmm::device_vector<string_view>(num_child_rows);
 
@@ -516,7 +518,8 @@ struct list_child_constructor {
     auto source_lists = cudf::detail::lists_column_device_view(*source_column_device_view);
     auto target_lists = cudf::detail::lists_column_device_view(*target_column_device_view);
 
-    auto num_child_rows = get_num_child_rows(list_offsets, stream);
+    auto const num_child_rows{
+      cudf::detail::get_value<size_type>(list_offsets, list_offsets.size() - 1, stream)};
 
     auto child_list_views = rmm::device_uvector<unbound_list_view>(num_child_rows, stream, mr);
 
@@ -621,7 +624,8 @@ struct list_child_constructor {
     auto const source_structs = source_lists_column_view.child();
     auto const target_structs = target_lists_column_view.child();
 
-    auto const num_child_rows = get_num_child_rows(list_offsets, stream);
+    auto const num_child_rows{
+      cudf::detail::get_value<size_type>(list_offsets, list_offsets.size() - 1, stream)};
 
     auto const num_struct_members =
       std::distance(source_structs.child_begin(), source_structs.child_end());

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -190,4 +190,27 @@ class list_device_view {
   };
 };
 
+/**
+ * @brief returns size of the list by row index
+ *
+ */
+struct list_size_functor {
+  column_device_view const d_column;
+  CUDA_HOST_DEVICE_CALLABLE list_size_functor(column_device_view const& d_col) : d_column(d_col)
+  {
+#if defined(__CUDA_ARCH__)
+    release_assert(d_col.type().id() == type_id::LIST && "Only list type column is supported");
+#else
+    CUDF_EXPECTS(d_col.type().id() == type_id::LIST, "Only list type column is supported");
+#endif
+  }
+  CUDA_DEVICE_CALLABLE size_type operator()(size_type idx)
+  {
+    if (d_column.is_null(idx)) return size_type{0};
+    auto d_offsets =
+      d_column.child(lists_column_view::offsets_column_index).data<size_type>() + d_column.offset();
+    return d_offsets[idx + 1] - d_offsets[idx];
+  }
+};
+
 }  // namespace cudf