Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[REVIEW] Initial support for struct columns #5807

Merged
merged 29 commits into from
Aug 13, 2020
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
0683c40
[WIP] [struct] Initial commit
mythrocks Jul 17, 2020
8ed38e3
[WIP] [struct] Review comments
mythrocks Jul 30, 2020
673765b
Merge remote-tracking branch 'origin/branch-0.15' into structs
mythrocks Jul 31, 2020
ec69d74
[struct] Added struct-support issue to CHANGELOG.md
mythrocks Jul 31, 2020
0261617
[struct] Switched to in-place bitmask_and()
mythrocks Aug 4, 2020
1577732
[struct] Minor cleanup.
mythrocks Aug 4, 2020
7e9d660
[struct] Added test for null-mask propagation (For non-null structs
mythrocks Aug 4, 2020
7e197d6
[struct] More tests and cleanup.
mythrocks Aug 4, 2020
8391618
Merge remote-tracking branch 'origin/branch-0.15' into structs
mythrocks Aug 4, 2020
d2216ce
[struct] Added structs to lists' tests.
mythrocks Aug 5, 2020
1d6b1ea
[struct] Added structs to lists' tests.
mythrocks Aug 5, 2020
1037904
[struct] Added doc for struct factory method.
mythrocks Aug 5, 2020
be7de05
[struct] clang-format
mythrocks Aug 6, 2020
3bdbe50
Merge remote-tracking branch 'origin/branch-0.15' into structs
mythrocks Aug 6, 2020
b4a879f
[struct] Added struct headers to meta.yaml
mythrocks Aug 6, 2020
5dbfd5e
[struct] Minor fixes from review:
mythrocks Aug 6, 2020
af89e97
Merge remote-tracking branch 'origin/branch-0.15' into structs
mythrocks Aug 6, 2020
91acab6
Merge remote-tracking branch 'origin/branch-0.15' into structs
mythrocks Aug 7, 2020
db85253
[struct] Switch tests to check column equivalence
mythrocks Aug 7, 2020
a48a5b7
[struct] clang-format changes in struct tests
mythrocks Aug 7, 2020
671ec1c
[struct] Corrected nullmask size check in struct factory
mythrocks Aug 7, 2020
6b9082e
[struct] Incorporated review changes.
mythrocks Aug 10, 2020
d1d7722
Merge remote-tracking branch 'origin/branch-0.15' into structs
mythrocks Aug 10, 2020
5ca14cb
Merge remote-tracking branch 'origin/branch-0.15' into structs
mythrocks Aug 11, 2020
bb93f7a
[struct] More review-related corrections
mythrocks Aug 12, 2020
906e04c
[struct] clang-format again.
mythrocks Aug 12, 2020
f6257b3
Merge remote-tracking branch 'origin/branch-0.15' into structs
mythrocks Aug 12, 2020
3380822
[struct] Added tests for empty columns.
mythrocks Aug 12, 2020
1c0ab90
Merge branch 'branch-0.15' into structs
harrism Aug 13, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -562,6 +562,8 @@ add_library(cudf
src/lists/lists_column_view.cu
src/lists/copying/concatenate.cu
src/lists/copying/gather.cu
src/structs/structs_column_view.cu
src/structs/structs_column_factories.cu
src/text/detokenize.cu
src/text/generate_ngrams.cu
src/text/normalize.cu
Expand Down
1 change: 1 addition & 0 deletions cpp/include/cudf/column/column_device_view.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <thrust/iterator/transform_iterator.h>
#include <cudf/column/column_view.hpp>
#include <cudf/lists/list_view.cuh>
#include <cudf/structs/struct_view.hpp>
#include <cudf/strings/string_view.cuh>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf/types.hpp>
Expand Down
8 changes: 8 additions & 0 deletions cpp/include/cudf/column/column_factories.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -502,6 +502,14 @@ std::unique_ptr<cudf::column> make_lists_column(
cudaStream_t stream = 0,
rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource());

std::unique_ptr<cudf::column> make_structs_column(
size_type num_rows,
std::vector<std::unique_ptr<column>>&& child_column,
size_type null_count,
rmm::device_buffer&& null_mask,
cudaStream_t stream = 0,
rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource());

/**
* @brief Return a column with size elements that are all equal to the
* given scalar.
Expand Down
3 changes: 3 additions & 0 deletions cpp/include/cudf/column/column_view.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,9 @@ class column_view : public detail::column_view_base {
**/
size_type num_children() const noexcept { return _children.size(); }

auto child_begin() const noexcept { return _children.begin(); }
mythrocks marked this conversation as resolved.
Show resolved Hide resolved
auto child_end() const noexcept { return _children.end(); }

private:
std::vector<column_view> _children{}; ///< Based on element type, children
///< may contain additional data
Expand Down
13 changes: 13 additions & 0 deletions cpp/include/cudf/detail/gather.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,19 @@ struct column_gatherer_impl<list_view, MapItRoot> {
}
};

template <typename MapItRoot>
struct column_gatherer_impl<struct_view, MapItRoot> {
std::unique_ptr<column> operator()(column_view const& column,
MapItRoot gather_map_begin,
MapItRoot gather_map_end,
bool nullify_out_of_bounds,
cudaStream_t stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FAIL("Gather not yet supported on struct_view.");
}
};

/**
* @brief Function object for gathering a type-erased
* column. To be used with the cudf::type_dispatcher.
Expand Down
15 changes: 15 additions & 0 deletions cpp/include/cudf/detail/null_mask.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,21 @@ std::vector<size_type> segmented_count_unset_bits(bitmask_type const* bitmask,
std::vector<size_type> const& indices,
cudaStream_t stream = 0);

/**
* @brief Returns a bitwise AND of the specified bitmasks
*
* @param masks The list of data pointers of the bitmasks to be ANDed
* @param begin_bits The bit offsets from which each mask is to be ANDed
* @param mask_size The number of bits to be ANDed in each mask
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned device_buffer
* @return rmm::device_buffer Output bitmask
*/
rmm::device_buffer bitmask_and(std::vector<bitmask_type const *> const &masks,
std::vector<size_type> const &begin_bits,
size_type mask_size,
cudaStream_t stream,
rmm::mr::device_memory_resource *mr);
} // namespace detail

} // namespace cudf
35 changes: 35 additions & 0 deletions cpp/include/cudf/structs/struct_view.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

/**
* @file struct_view.cuh
* @brief Class definition for cudf::struct_view.
*/

namespace cudf {

/**
* @brief A non-owning, immutable view of device data that represents
* a struct with fields of arbitrary types (including primitives, lists,
* and other structs)
*
*/
class struct_view {

};

} // namespace cudf
mythrocks marked this conversation as resolved.
Show resolved Hide resolved
48 changes: 48 additions & 0 deletions cpp/include/cudf/structs/structs_column_view.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

#include <cudf/column/column.hpp>
#include <cudf/column/column_view.hpp>

namespace cudf {


class structs_column_view : private column_view
{

public:

// Foundation members:
structs_column_view(structs_column_view const&) = default;
structs_column_view(structs_column_view &&) = default;
~structs_column_view() = default;
structs_column_view& operator=(structs_column_view const&) = default;
structs_column_view& operator=(structs_column_view &&) = default;

explicit structs_column_view(column_view const& rhs);

using column_view::has_nulls;
using column_view::null_count;
using column_view::null_mask;
using column_view::offset;
using column_view::size;
using column_view::child_begin;
using column_view::child_end;

}; // class structs_column_view;

} // namespace cudf;
4 changes: 4 additions & 0 deletions cpp/include/cudf/types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ class column_view;
class mutable_column_view;
class string_view;
class list_view;
class struct_view;

class scalar;
template <typename T>
Expand All @@ -79,6 +80,8 @@ class duration_scalar_device_view;

class list_scalar;

class struct_scalar;

class table;
class table_view;
class mutable_table_view;
Expand Down Expand Up @@ -207,6 +210,7 @@ enum class type_id : int32_t {
DICTIONARY32, ///< Dictionary type using int32 indices
STRING, ///< String elements
LIST, ///< List elements
STRUCT, ///< Struct elements
// `NUM_TYPE_IDS` must be last!
NUM_TYPE_IDS ///< Total number of type ids
};
Expand Down
6 changes: 4 additions & 2 deletions cpp/include/cudf/utilities/traits.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include <cudf/wrappers/timestamps.hpp>

#include <type_traits>
#include "cudf/structs/struct_view.hpp"

namespace cudf {

Expand Down Expand Up @@ -455,7 +456,7 @@ template <typename T>
constexpr inline bool is_compound()
{
return std::is_same<T, cudf::string_view>::value or std::is_same<T, cudf::dictionary32>::value or
std::is_same<T, cudf::list_view>::value;
std::is_same<T, cudf::list_view>::value or std::is_same<T, cudf::struct_view>::value;
}

struct is_compound_impl {
Expand Down Expand Up @@ -497,7 +498,8 @@ constexpr inline bool is_compound(data_type type)
template <typename T>
constexpr inline bool is_nested()
{
return std::is_same<T, cudf::list_view>::value;
return std::is_same<T, cudf::list_view>::value
|| std::is_same<T, cudf::struct_view>::value;
}

struct is_nested_impl {
Expand Down
10 changes: 10 additions & 0 deletions cpp/include/cudf/utilities/type_dispatcher.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ CUDF_TYPE_MAPPING(cudf::duration_us, type_id::DURATION_MICROSECONDS);
CUDF_TYPE_MAPPING(cudf::duration_ns, type_id::DURATION_NANOSECONDS);
CUDF_TYPE_MAPPING(dictionary32, type_id::DICTIONARY32);
CUDF_TYPE_MAPPING(cudf::list_view, type_id::LIST);
CUDF_TYPE_MAPPING(cudf::struct_view, type_id::STRUCT);

template <typename T>
struct type_to_scalar_type_impl {
Expand Down Expand Up @@ -184,6 +185,12 @@ struct type_to_scalar_type_impl<cudf::list_view> {
// using ScalarDeviceType = cudf::list_scalar_device_view;
};

template <> // TODO: Ditto, likewise.
struct type_to_scalar_type_impl<cudf::struct_view> {
using ScalarType = cudf::struct_scalar;
// using ScalarDeviceType = cudf::struct_scalar_device_view; // CALEB: TODO!
};

#ifndef MAP_TIMESTAMP_SCALAR
#define MAP_TIMESTAMP_SCALAR(Type) \
template <> \
Expand Down Expand Up @@ -400,6 +407,9 @@ CUDA_HOST_DEVICE_CALLABLE constexpr decltype(auto) type_dispatcher(cudf::data_ty
case type_id::LIST:
return f.template operator()<typename IdTypeMap<type_id::LIST>::type>(
std::forward<Ts>(args)...);
case type_id::STRUCT:
return f.template operator()<typename IdTypeMap<type_id::STRUCT>::type>(
std::forward<Ts>(args)...);
default: {
#ifndef __CUDA_ARCH__
CUDF_FAIL("Unsupported type_id.");
Expand Down
61 changes: 31 additions & 30 deletions cpp/src/bitmask/null_mask.cu
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,36 @@ __global__ void offset_bitmask_and(bitmask_type *__restrict__ destination,
}
}

// convert [first_bit_index,last_bit_index) to
// [first_word_index,last_word_index)
struct to_word_index : public thrust::unary_function<size_type, size_type> {
const bool _inclusive = false;
size_type const *const _d_bit_indices = nullptr;

/**
* @brief Constructor of a functor that converts bit indices to bitmask word
* indices.
*
* @param[in] inclusive Flag that indicates whether bit indices are inclusive
* or exclusive.
* @param[in] d_bit_indices Pointer to an array of bit indices
*/
__host__ to_word_index(bool inclusive, size_type const *d_bit_indices)
: _inclusive(inclusive), _d_bit_indices(d_bit_indices)
{
}

__device__ size_type operator()(const size_type &i) const
{
auto bit_index = _d_bit_indices[i];
return word_index(bit_index) + ((_inclusive || intra_word_index(bit_index) == 0) ? 0 : 1);
}
};

} // namespace

namespace detail {

// Bitwise AND of the masks
rmm::device_buffer bitmask_and(std::vector<bitmask_type const *> const &masks,
std::vector<size_type> const &begin_bits,
Expand Down Expand Up @@ -378,35 +408,6 @@ rmm::device_buffer bitmask_and(std::vector<bitmask_type const *> const &masks,
return dest_mask;
}

// convert [first_bit_index,last_bit_index) to
// [first_word_index,last_word_index)
struct to_word_index : public thrust::unary_function<size_type, size_type> {
const bool _inclusive = false;
size_type const *const _d_bit_indices = nullptr;

/**
* @brief Constructor of a functor that converts bit indices to bitmask word
* indices.
*
* @param[in] inclusive Flag that indicates whether bit indices are inclusive
* or exclusive.
* @param[in] d_bit_indices Pointer to an array of bit indices
*/
__host__ to_word_index(bool inclusive, size_type const *d_bit_indices)
: _inclusive(inclusive), _d_bit_indices(d_bit_indices)
{
}

__device__ size_type operator()(const size_type &i) const
{
auto bit_index = _d_bit_indices[i];
return word_index(bit_index) + ((_inclusive || intra_word_index(bit_index) == 0) ? 0 : 1);
}
};

} // namespace

namespace detail {
cudf::size_type count_set_bits(bitmask_type const *bitmask,
size_type start,
size_type stop,
Expand Down Expand Up @@ -662,7 +663,7 @@ rmm::device_buffer bitmask_and(table_view const &view,
}
}

if (masks.size() > 0) { return bitmask_and(masks, offsets, view.num_rows(), stream, mr); }
if (masks.size() > 0) { return cudf::detail::bitmask_and(masks, offsets, view.num_rows(), stream, mr); }

return null_mask;
}
Expand Down
7 changes: 7 additions & 0 deletions cpp/src/column/column.cu
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,13 @@ struct create_column_from_view {
{
CUDF_FAIL("list_view not supported yet");
}

template <typename ColumnType,
std::enable_if_t<std::is_same<ColumnType, cudf::struct_view>::value> * = nullptr>
std::unique_ptr<column> operator()()
{
CUDF_FAIL("struct_view not supported yet");
}
};
} // anonymous namespace

Expand Down
10 changes: 10 additions & 0 deletions cpp/src/column/column_factories.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,16 @@ std::unique_ptr<cudf::column> column_from_scalar_dispatch::operator()<cudf::list
CUDF_FAIL("TODO");
}

template <>
std::unique_ptr<cudf::column> column_from_scalar_dispatch::operator()<cudf::struct_view>(
scalar const& value,
size_type size,
rmm::mr::device_memory_resource* mr,
cudaStream_t stream) const
{
CUDF_FAIL("TODO. struct_view currently not supported.");
}

std::unique_ptr<column> make_column_from_scalar(scalar const& s,
size_type size,
rmm::mr::device_memory_resource* mr,
Expand Down
15 changes: 15 additions & 0 deletions cpp/src/copying/copy.cu
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,21 @@ struct copy_if_else_functor_impl<list_view, Left, Right, Filter> {
}
};

template <typename Left, typename Right, typename Filter>
struct copy_if_else_functor_impl<struct_view, Left, Right, Filter> {
std::unique_ptr<column> operator()(Left const& lhs,
Right const& rhs,
size_type size,
bool left_nullable,
bool right_nullable,
Filter filter,
rmm::mr::device_memory_resource* mr,
cudaStream_t stream)
{
CUDF_FAIL("copy_if_else not supported for struct_view yet");
}
};

/**
* @brief Functor called by the `type_dispatcher` to invoke copy_if_else on combinations
* of column_view and scalar
Expand Down
Loading