Skip to content

Commit

Permalink
Support create lists column from a list_scalar (#8185)
Browse files Browse the repository at this point in the history
This PR adds support to `make_column_from_scalar` for `list_scalar`. For 0-length columns, a well-formed `LIST` type column, whose child column has the same column hierarchy to the row data stored in `list_scalar` is returned.

Example:
```
slr.data = [1, 2, 3] // An integer list of 1, 2, 3, `data` is an INT column
make_column_from_scalar(s, 2) // List<int> column: {[1, 2, 3], [1, 2, 3]}, whose child column is an `INT` column.

slr.data = [[1, 2], [3]] // A list of integer lists, `data` is a List<int> column
make_column_from_scalar(s, 0) // Well formed, 0-length List<List<int>> column, whose child column is a List<int> column.
```

Closes #8088

Authors:
  - Michael Wang (https://github.com/isVoid)

Approvers:
  - AJ Schmidt (https://github.com/ajschmidt8)
  - Devavret Makkar (https://github.com/devavret)
  - Mark Harris (https://github.com/harrism)

URL: #8185
  • Loading branch information
isVoid authored May 20, 2021
1 parent 2b9fc62 commit c732cef
Show file tree
Hide file tree
Showing 6 changed files with 413 additions and 4 deletions.
1 change: 1 addition & 0 deletions conda/recipes/libcudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ test:
- test -f $PREFIX/include/cudf/join.hpp
- test -f $PREFIX/include/cudf/lists/detail/concatenate.hpp
- test -f $PREFIX/include/cudf/lists/detail/copying.hpp
- test -f $PREFIX/include/cudf/lists/lists_column_factories.hpp
- test -f $PREFIX/include/cudf/lists/detail/drop_list_duplicates.hpp
- test -f $PREFIX/include/cudf/lists/detail/interleave_columns.hpp
- test -f $PREFIX/include/cudf/lists/detail/sorting.hpp
Expand Down
3 changes: 2 additions & 1 deletion cpp/include/cudf/column/column_factories.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -541,7 +541,8 @@ std::unique_ptr<cudf::column> make_structs_column(
*
* The output column will have the same type as `s.type()`
* The output column will contain all null rows if `s.invalid()==false`
* The output column will be empty if `size==0`.
* The output column will be empty if `size==0`. For LIST scalars, the column hierarchy
* from @p s is preserved.
*
* @param[in] s The scalar to use for values in the column.
* @param[in] size The number of rows for the output column.
Expand Down
42 changes: 42 additions & 0 deletions cpp/include/cudf/lists/lists_column_factories.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <cudf/column/column.hpp>
#include <cudf/scalar/scalar.hpp>
#include <cudf/types.hpp>

namespace cudf {
namespace lists {
namespace detail {

/**
* @brief Internal API to construct a lists column from a `list_scalar`, for public
* use, use `cudf::make_column_from_scalar`.
*
* @param[in] value The `list_scalar` to construct from
* @param[in] size The number of rows for the output column.
* @param[in] stream CUDA stream used for device memory operations and kernel launches.
* @param[in] mr Device memory resource used to allocate the returned column's device memory.
*/
std::unique_ptr<cudf::column> make_lists_column_from_scalar(
list_scalar const& value,
size_type size,
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

} // namespace detail
} // namespace lists
} // namespace cudf
8 changes: 6 additions & 2 deletions cpp/src/column/column_factories.cu
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <cudf/detail/fill.hpp>
#include <cudf/detail/gather.cuh>
#include <cudf/dictionary/dictionary_factories.hpp>
#include <cudf/lists/lists_column_factories.hpp>
#include <cudf/scalar/scalar.hpp>
#include <cudf/strings/detail/fill.hpp>

Expand All @@ -32,6 +33,7 @@ struct column_from_scalar_dispatch {
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr) const
{
if (size == 0) return make_empty_column(value.type());
if (!value.is_valid())
return make_fixed_width_column(value.type(), size, mask_state::ALL_NULL, stream, mr);
auto output_column =
Expand All @@ -49,6 +51,7 @@ std::unique_ptr<cudf::column> column_from_scalar_dispatch::operator()<cudf::stri
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr) const
{
if (size == 0) return make_empty_column(value.type());
auto null_mask = detail::create_null_mask(size, mask_state::ALL_NULL, stream, mr);

if (!value.is_valid())
Expand Down Expand Up @@ -84,7 +87,8 @@ std::unique_ptr<cudf::column> column_from_scalar_dispatch::operator()<cudf::list
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr) const
{
CUDF_FAIL("TODO");
auto lv = static_cast<list_scalar const*>(&value);
return lists::detail::make_lists_column_from_scalar(*lv, size, stream, mr);
}

template <>
Expand All @@ -94,6 +98,7 @@ std::unique_ptr<cudf::column> column_from_scalar_dispatch::operator()<cudf::stru
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr) const
{
if (size == 0) CUDF_FAIL("0-length struct column is unsupported.");
auto ss = static_cast<scalar_type_t<cudf::struct_view> const&>(value);
auto iter = thrust::make_constant_iterator(0);

Expand All @@ -117,7 +122,6 @@ std::unique_ptr<column> make_column_from_scalar(scalar const& s,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
if (size == 0) return make_empty_column(s.type());
return type_dispatcher(s.type(), column_from_scalar_dispatch{}, s, size, stream, mr);
}

Expand Down
67 changes: 66 additions & 1 deletion cpp/src/lists/lists_column_factories.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION.
* Copyright (c) 2020-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -16,10 +16,75 @@

#include <cudf/column/column.hpp>
#include <cudf/column/column_factories.hpp>
#include <cudf/column/column_view.hpp>
#include <cudf/detail/gather.cuh>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/exec_policy.hpp>

#include <thrust/iterator/constant_iterator.h>
#include <thrust/sequence.h>

namespace cudf {
namespace lists {
namespace detail {

std::unique_ptr<cudf::column> make_lists_column_from_scalar(list_scalar const& value,
size_type size,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
if (size == 0) {
return make_lists_column(0,
make_empty_column(data_type{type_to_id<offset_type>()}),
empty_like(value.view()),
0,
cudf::detail::create_null_mask(0, mask_state::UNALLOCATED, stream, mr),
stream,
mr);
}
auto mr_final = size == 1 ? mr : rmm::mr::get_current_device_resource();

// Handcraft a 1-row column
auto offsets = make_numeric_column(
data_type{type_to_id<offset_type>()}, 2, mask_state::UNALLOCATED, stream, mr_final);
auto m_offsets = offsets->mutable_view();
thrust::sequence(rmm::exec_policy(stream),
m_offsets.begin<size_type>(),
m_offsets.end<size_type>(),
0,
value.view().size());
size_type null_count = value.is_valid(stream) ? 0 : 1;
auto null_mask_state = null_count ? mask_state::ALL_NULL : mask_state::UNALLOCATED;
auto null_mask = cudf::detail::create_null_mask(1, null_mask_state, stream, mr_final);

if (size == 1) {
auto child = std::make_unique<column>(value.view(), stream, mr_final);
return make_lists_column(
1, std::move(offsets), std::move(child), null_count, std::move(null_mask), stream, mr_final);
}

auto children_views = std::vector<column_view>{offsets->view(), value.view()};
auto one_row_col_view = column_view(data_type{type_id::LIST},
1,
nullptr,
static_cast<bitmask_type const*>(null_mask.data()),
null_count,
0,
children_views);

auto begin = thrust::make_constant_iterator(0);
auto res = cudf::detail::gather(table_view({one_row_col_view}),
begin,
begin + size,
out_of_bounds_policy::DONT_CHECK,
stream,
mr_final);
return std::move(res->release()[0]);
}

} // namespace detail
} // namespace lists

/**
* @copydoc cudf::make_lists_column
Expand Down
Loading

0 comments on commit c732cef

Please sign in to comment.