Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[REVIEW] Initial support for struct columns #5807

Merged
merged 29 commits into from
Aug 13, 2020
Merged
Show file tree
Hide file tree
Changes from 26 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
0683c40
[WIP] [struct] Initial commit
mythrocks Jul 17, 2020
8ed38e3
[WIP] [struct] Review comments
mythrocks Jul 30, 2020
673765b
Merge remote-tracking branch 'origin/branch-0.15' into structs
mythrocks Jul 31, 2020
ec69d74
[struct] Added struct-support issue to CHANGELOG.md
mythrocks Jul 31, 2020
0261617
[struct] Switched to in-place bitmask_and()
mythrocks Aug 4, 2020
1577732
[struct] Minor cleanup.
mythrocks Aug 4, 2020
7e9d660
[struct] Added test for null-mask propagation (For non-null structs
mythrocks Aug 4, 2020
7e197d6
[struct] More tests and cleanup.
mythrocks Aug 4, 2020
8391618
Merge remote-tracking branch 'origin/branch-0.15' into structs
mythrocks Aug 4, 2020
d2216ce
[struct] Added structs to lists' tests.
mythrocks Aug 5, 2020
1d6b1ea
[struct] Added structs to lists' tests.
mythrocks Aug 5, 2020
1037904
[struct] Added doc for struct factory method.
mythrocks Aug 5, 2020
be7de05
[struct] clang-format
mythrocks Aug 6, 2020
3bdbe50
Merge remote-tracking branch 'origin/branch-0.15' into structs
mythrocks Aug 6, 2020
b4a879f
[struct] Added struct headers to meta.yaml
mythrocks Aug 6, 2020
5dbfd5e
[struct] Minor fixes from review:
mythrocks Aug 6, 2020
af89e97
Merge remote-tracking branch 'origin/branch-0.15' into structs
mythrocks Aug 6, 2020
91acab6
Merge remote-tracking branch 'origin/branch-0.15' into structs
mythrocks Aug 7, 2020
db85253
[struct] Switch tests to check column equivalence
mythrocks Aug 7, 2020
a48a5b7
[struct] clang-format changes in struct tests
mythrocks Aug 7, 2020
671ec1c
[struct] Corrected nullmask size check in struct factory
mythrocks Aug 7, 2020
6b9082e
[struct] Incorporated review changes.
mythrocks Aug 10, 2020
d1d7722
Merge remote-tracking branch 'origin/branch-0.15' into structs
mythrocks Aug 10, 2020
5ca14cb
Merge remote-tracking branch 'origin/branch-0.15' into structs
mythrocks Aug 11, 2020
bb93f7a
[struct] More review-related corrections
mythrocks Aug 12, 2020
906e04c
[struct] clang-format again.
mythrocks Aug 12, 2020
f6257b3
Merge remote-tracking branch 'origin/branch-0.15' into structs
mythrocks Aug 12, 2020
3380822
[struct] Added tests for empty columns.
mythrocks Aug 12, 2020
1c0ab90
Merge branch 'branch-0.15' into structs
harrism Aug 13, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@
- PR #5811 Add `nvtext::edit_distance` API
- PR #5789 Add groupby support for duration types
- PR #5810 Make Cython subdirs packages and simplify package_data
- PR #5807 Initial support for struct columns
- PR #5817 Enable more `fixed_point` unit tests by introducing "scale-less" constructor
- PR #5822 Add `cudf_kafka` to `custreamz` run time conda dependency and fix bash syntax issue
- PR #5845 Add support for `mask_to_bools`
Expand Down
2 changes: 2 additions & 0 deletions conda/recipes/libcudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,8 @@ test:
- test -f $PREFIX/include/cudf/strings/substring.hpp
- test -f $PREFIX/include/cudf/strings/translate.hpp
- test -f $PREFIX/include/cudf/strings/wrap.hpp
- test -f $PREFIX/include/cudf/structs/structs_column_view.hpp
- test -f $PREFIX/include/cudf/structs/struct_view.hpp
- test -f $PREFIX/include/cudf/table/table.hpp
- test -f $PREFIX/include/cudf/table/table_view.hpp
- test -f $PREFIX/include/cudf/transform.hpp
Expand Down
2 changes: 2 additions & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -567,6 +567,8 @@ add_library(cudf
src/lists/lists_column_view.cu
src/lists/copying/concatenate.cu
src/lists/copying/gather.cu
src/structs/structs_column_view.cu
src/structs/structs_column_factories.cu
src/text/detokenize.cu
src/text/edit_distance.cu
src/text/generate_ngrams.cu
Expand Down
1 change: 1 addition & 0 deletions cpp/include/cudf/column/column_device_view.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include <cudf/lists/list_view.cuh>
#include <cudf/strings/string_view.cuh>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf/structs/struct_view.hpp>
#include <cudf/types.hpp>
#include <cudf/utilities/bit.hpp>
#include <cudf/utilities/traits.hpp>
Expand Down
31 changes: 31 additions & 0 deletions cpp/include/cudf/column/column_factories.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -502,6 +502,37 @@ std::unique_ptr<cudf::column> make_lists_column(
cudaStream_t stream = 0,
rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource());

/**
* @brief Constructs a STRUCT column using specified child columns as members.
*
* Specified child/member columns and null_mask are adopted by resultant
* struct column.
*
* A struct column requires that all specified child columns have the same
* number of rows. A struct column's row count equals that of any/all
* of its child columns. A single struct row at any index is comprised of
* all the individual child column values at the same index, in the order
* specified in the list of child columns.
*
* The specified null mask governs which struct row has a null value. This
* is orthogonal to the null values of individual child columns.
*
* @param num_rows The number of struct values in the struct column.
* @param child_columns The list of child/members that the struct is comprised of.
* @param null_count The number of null values in the struct column.
* @param null_mask The bits specifying the null struct values in the column.
* @param stream Optional stream for use with all memory allocation and device kernels.
* @param mr Optional resource to use for device memory allocation.
*
*/
std::unique_ptr<cudf::column> make_structs_column(
size_type num_rows,
std::vector<std::unique_ptr<column>>&& child_columns,
size_type null_count,
rmm::device_buffer&& null_mask,
cudaStream_t stream = 0,
rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource());

/**
* @brief Return a column with size elements that are all equal to the
* given scalar.
Expand Down
10 changes: 10 additions & 0 deletions cpp/include/cudf/column/column_view.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,16 @@ class column_view : public detail::column_view_base {
**/
size_type num_children() const noexcept { return _children.size(); }

/**
* @brief Returns iterator to the beginning of the ordered sequence of child column-views.
*/
auto child_begin() const noexcept { return _children.cbegin(); }

/**
* @brief Returns iterator to the end of the ordered sequence of child column-views.
*/
auto child_end() const noexcept { return _children.cend(); }

private:
std::vector<column_view> _children{}; ///< Based on element type, children
///< may contain additional data
Expand Down
13 changes: 13 additions & 0 deletions cpp/include/cudf/detail/gather.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,19 @@ struct column_gatherer_impl<list_view, MapItRoot> {
}
};

template <typename MapItRoot>
struct column_gatherer_impl<struct_view, MapItRoot> {
std::unique_ptr<column> operator()(column_view const& column,
MapItRoot gather_map_begin,
MapItRoot gather_map_end,
bool nullify_out_of_bounds,
cudaStream_t stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FAIL("Gather not yet supported on struct_view.");
}
};

/**
* @brief Function object for gathering a type-erased
* column. To be used with the cudf::type_dispatcher.
Expand Down
43 changes: 39 additions & 4 deletions cpp/include/cudf/detail/null_mask.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,19 +26,54 @@ namespace detail {
*
* @param[in] stream CUDA stream used for device memory operations and kernel launches.
*/
std::vector<size_type> segmented_count_set_bits(bitmask_type const* bitmask,
std::vector<size_type> const& indices,
std::vector<size_type> segmented_count_set_bits(bitmask_type const *bitmask,
std::vector<size_type> const &indices,
cudaStream_t stream = 0);

/**
* @copydoc cudf::segmented_count_unset_bits
*
* @param[in] stream CUDA stream used for device memory operations and kernel launches.
*/
std::vector<size_type> segmented_count_unset_bits(bitmask_type const* bitmask,
std::vector<size_type> const& indices,
std::vector<size_type> segmented_count_unset_bits(bitmask_type const *bitmask,
std::vector<size_type> const &indices,
cudaStream_t stream = 0);

/**
* @brief Returns a bitwise AND of the specified bitmasks
*
* @param masks The list of data pointers of the bitmasks to be ANDed
* @param begin_bits The bit offsets from which each mask is to be ANDed
* @param mask_size The number of bits to be ANDed in each mask
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned device_buffer
* @return rmm::device_buffer Output bitmask
*/
rmm::device_buffer bitmask_and(std::vector<bitmask_type const *> const &masks,
std::vector<size_type> const &begin_bits,
size_type mask_size,
cudaStream_t stream,
rmm::mr::device_memory_resource *mr);

/**
* @brief Performs a bitwise AND of the specified bitmasks,
* and writes in place to destination
*
* @param dest_mask Destination to which the AND result is written
* @param masks The list of data pointers of the bitmasks to be ANDed
* @param begin_bits The bit offsets from which each mask is to be ANDed
* @param mask_size The number of bits to be ANDed in each mask
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned device_buffer
* @return rmm::device_buffer Output bitmask
*/
void inplace_bitmask_and(bitmask_type *dest_mask,
std::vector<bitmask_type const *> const &masks,
std::vector<size_type> const &begin_bits,
size_type mask_size,
cudaStream_t stream,
rmm::mr::device_memory_resource *mr);

} // namespace detail

} // namespace cudf
34 changes: 34 additions & 0 deletions cpp/include/cudf/structs/struct_view.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

/**
* @file struct_view.cuh
* @brief Class definition for cudf::struct_view.
*/

namespace cudf {

/**
* @brief A non-owning, immutable view of device data that represents
* a struct with fields of arbitrary types (including primitives, lists,
* and other structs)
*
*/
class struct_view {
};

} // namespace cudf
44 changes: 44 additions & 0 deletions cpp/include/cudf/structs/structs_column_view.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

#include <cudf/column/column.hpp>
#include <cudf/column/column_view.hpp>

namespace cudf {

class structs_column_view : private column_view {
public:
// Foundation members:
structs_column_view(structs_column_view const&) = default;
structs_column_view(structs_column_view&&) = default;
~structs_column_view() = default;
structs_column_view& operator=(structs_column_view const&) = default;
structs_column_view& operator=(structs_column_view&&) = default;

explicit structs_column_view(column_view const& rhs);

using column_view::child_begin;
using column_view::child_end;
using column_view::has_nulls;
using column_view::null_count;
using column_view::null_mask;
using column_view::offset;
using column_view::size;

}; // class structs_column_view;

} // namespace cudf
mythrocks marked this conversation as resolved.
Show resolved Hide resolved
4 changes: 4 additions & 0 deletions cpp/include/cudf/types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ class column_view;
class mutable_column_view;
class string_view;
class list_view;
class struct_view;

class scalar;
template <typename T>
Expand Down Expand Up @@ -87,6 +88,8 @@ class duration_scalar_device_view;

class list_scalar;

class struct_scalar;

class table;
class table_view;
class mutable_table_view;
Expand Down Expand Up @@ -217,6 +220,7 @@ enum class type_id : int32_t {
LIST, ///< List elements
DECIMAL32, ///< Fixed-point type with int32_t
DECIMAL64, ///< Fixed-point type with int64_t
STRUCT, ///< Struct elements
// `NUM_TYPE_IDS` must be last!
NUM_TYPE_IDS ///< Total number of type ids
};
Expand Down
5 changes: 3 additions & 2 deletions cpp/include/cudf/utilities/traits.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include <cudf/wrappers/timestamps.hpp>

#include <simt/type_traits>
#include "cudf/structs/struct_view.hpp"

namespace cudf {

Expand Down Expand Up @@ -488,7 +489,7 @@ template <typename T>
constexpr inline bool is_compound()
{
return std::is_same<T, cudf::string_view>::value or std::is_same<T, cudf::dictionary32>::value or
std::is_same<T, cudf::list_view>::value;
std::is_same<T, cudf::list_view>::value or std::is_same<T, cudf::struct_view>::value;
}

struct is_compound_impl {
Expand Down Expand Up @@ -530,7 +531,7 @@ constexpr inline bool is_compound(data_type type)
template <typename T>
constexpr inline bool is_nested()
{
return std::is_same<T, cudf::list_view>::value;
return std::is_same<T, cudf::list_view>::value || std::is_same<T, cudf::struct_view>::value;
}

struct is_nested_impl {
Expand Down
10 changes: 10 additions & 0 deletions cpp/include/cudf/utilities/type_dispatcher.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ CUDF_TYPE_MAPPING(dictionary32, type_id::DICTIONARY32);
CUDF_TYPE_MAPPING(cudf::list_view, type_id::LIST);
CUDF_TYPE_MAPPING(numeric::decimal32, type_id::DECIMAL32);
CUDF_TYPE_MAPPING(numeric::decimal64, type_id::DECIMAL64);
CUDF_TYPE_MAPPING(cudf::struct_view, type_id::STRUCT);

template <typename T>
struct type_to_scalar_type_impl {
Expand Down Expand Up @@ -200,6 +201,12 @@ struct type_to_scalar_type_impl<cudf::list_view> {
// using ScalarDeviceType = cudf::list_scalar_device_view;
};

template <> // TODO: Ditto, likewise.
struct type_to_scalar_type_impl<cudf::struct_view> {
using ScalarType = cudf::struct_scalar;
// using ScalarDeviceType = cudf::struct_scalar_device_view; // CALEB: TODO!
};

#ifndef MAP_TIMESTAMP_SCALAR
#define MAP_TIMESTAMP_SCALAR(Type) \
template <> \
Expand Down Expand Up @@ -422,6 +429,9 @@ CUDA_HOST_DEVICE_CALLABLE constexpr decltype(auto) type_dispatcher(cudf::data_ty
case type_id::DECIMAL64:
return f.template operator()<typename IdTypeMap<type_id::DECIMAL64>::type>(
std::forward<Ts>(args)...);
case type_id::STRUCT:
return f.template operator()<typename IdTypeMap<type_id::STRUCT>::type>(
std::forward<Ts>(args)...);
default: {
#ifndef __CUDA_ARCH__
CUDF_FAIL("Unsupported type_id.");
Expand Down
Loading