Skip to content

Commit

Permalink
Add column type tests (#8505)
Browse files Browse the repository at this point in the history
Addresses column requests for #8357 

This PR adds nested type checks for `cudf::column`.

Authors:
  - Michael Wang (https://github.com/isVoid)

Approvers:
  - David Wendt (https://github.com/davidwendt)
  - Nghia Truong (https://github.com/ttnghia)
  - AJ Schmidt (https://github.com/ajschmidt8)
  - Robert Maynard (https://github.com/robertmaynard)

URL: #8505
  • Loading branch information
isVoid authored Jun 29, 2021
1 parent 2d9fd5f commit 0206fc9
Show file tree
Hide file tree
Showing 9 changed files with 335 additions and 17 deletions.
1 change: 1 addition & 0 deletions conda/recipes/libcudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,7 @@ test:
- test -f $PREFIX/include/cudf/utilities/error.hpp
- test -f $PREFIX/include/cudf/utilities/traits.hpp
- test -f $PREFIX/include/cudf/utilities/type_dispatcher.hpp
- test -f $PREFIX/include/cudf/utilities/type_checks.hpp
- test -f $PREFIX/include/cudf/utilities/default_stream.hpp
- test -f $PREFIX/include/cudf/wrappers/dictionary.hpp
- test -f $PREFIX/include/cudf/wrappers/durations.hpp
Expand Down
1 change: 1 addition & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,7 @@ add_library(cudf
src/unary/nan_ops.cu
src/unary/null_ops.cu
src/utilities/default_stream.cpp
src/utilities/type_checks.cpp
)

set_target_properties(cudf
Expand Down
3 changes: 2 additions & 1 deletion cpp/include/cudf/lists/detail/scatter.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include <cudf/null_mask.hpp>
#include <cudf/strings/detail/utilities.cuh>
#include <cudf/types.hpp>
#include <cudf/utilities/type_checks.hpp>

#include <rmm/device_uvector.hpp>
#include <rmm/exec_policy.hpp>
Expand Down Expand Up @@ -89,7 +90,7 @@ std::unique_ptr<column> scatter_impl(
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
{
assert_same_data_type(source, target);
CUDF_EXPECTS(column_types_equal(source, target), "Mismatched column types.");

auto const child_column_type = lists_column_view(target).child().type();

Expand Down
5 changes: 0 additions & 5 deletions cpp/include/cudf/lists/detail/scatter_helper.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -129,11 +129,6 @@ struct unbound_list_view {
size_type _size{}; // Number of elements in *this* list row.
};

/**
* @brief Checks that the specified columns have matching schemas, all the way down.
*/
void assert_same_data_type(column_view const& lhs, column_view const& rhs);

std::unique_ptr<column> build_lists_child_column_recursive(
data_type child_column_type,
rmm::device_uvector<unbound_list_view> const& list_vector,
Expand Down
38 changes: 38 additions & 0 deletions cpp/include/cudf/utilities/type_checks.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <cudf/column/column_view.hpp>

namespace cudf {

/**
* @brief Compares the type of two `column_view`s
*
* This function returns true if the type of `lhs` equals that of `rhs`.
* - For fixed point types, the scale is compared.
* - For dictionary types, the type of the keys are compared if both are
* non-empty columns.
* - For lists types, the type of child columns are compared recursively.
* - For struct types, the type of each field are compared in order.
* - For all other types, the `id` of `data_type` is compared.
*
* @param lhs The first `column_view` to compare
* @param rhs The second `column_view` to compare
* @return true if column types match
*/
bool column_types_equal(column_view const& lhs, column_view const& rhs);

} // namespace cudf
10 changes: 0 additions & 10 deletions cpp/src/lists/copying/scatter_helper.cu
Original file line number Diff line number Diff line change
Expand Up @@ -30,16 +30,6 @@ namespace cudf {
namespace lists {
namespace detail {

void assert_same_data_type(column_view const& lhs, column_view const& rhs)
{
CUDF_EXPECTS(lhs.type().id() == rhs.type().id(), "Mismatched Data types.");
// Empty string column has no children
CUDF_EXPECTS(lhs.type().id() == type_id::STRING or lhs.num_children() == rhs.num_children(),
"Mismatched number of child columns.");

for (int i{0}; i < lhs.num_children(); ++i) { assert_same_data_type(lhs.child(i), rhs.child(i)); }
}

/**
* @brief Constructs null mask for a scattered list's child column
*
Expand Down
72 changes: 72 additions & 0 deletions cpp/src/utilities/type_checks.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <cudf/dictionary/dictionary_column_view.hpp>
#include <cudf/lists/lists_column_view.hpp>
#include <cudf/utilities/type_checks.hpp>
#include <cudf/utilities/type_dispatcher.hpp>

#include <thrust/iterator/counting_iterator.h>

#include <algorithm>

namespace cudf {
namespace {

struct columns_equal_fn {
template <typename T>
bool operator()(column_view const&, column_view const&)
{
return true;
}
};

template <>
bool columns_equal_fn::operator()<dictionary32>(column_view const& lhs, column_view const& rhs)
{
auto const kidx = dictionary_column_view::keys_column_index;
return lhs.num_children() > 0 and rhs.num_children() > 0
? lhs.child(kidx).type() == rhs.child(kidx).type()
: lhs.is_empty() and rhs.is_empty();
}

template <>
bool columns_equal_fn::operator()<list_view>(column_view const& lhs, column_view const& rhs)
{
auto const& ci = lists_column_view::child_column_index;
return column_types_equal(lhs.child(ci), rhs.child(ci));
}

template <>
bool columns_equal_fn::operator()<struct_view>(column_view const& lhs, column_view const& rhs)
{
return lhs.num_children() == rhs.num_children() and
std::all_of(thrust::make_counting_iterator(0),
thrust::make_counting_iterator(lhs.num_children()),
[&](auto i) { return column_types_equal(lhs.child(i), rhs.child(i)); });
}

}; // namespace

// Implementation note: avoid using double dispatch for this function
// as it increases code paths to NxN for N types.
bool column_types_equal(column_view const& lhs, column_view const& rhs)
{
if (lhs.type() != rhs.type()) { return false; }
return type_dispatcher(lhs.type(), columns_equal_fn{}, lhs, rhs);
}

} // namespace cudf
3 changes: 2 additions & 1 deletion cpp/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,8 @@ ConfigureTest(UTILITIES_TEST
utilities_tests/column_utilities_tests.cpp
utilities_tests/column_wrapper_tests.cpp
utilities_tests/lists_column_wrapper_tests.cpp
utilities_tests/default_stream_tests.cpp)
utilities_tests/default_stream_tests.cpp
utilities_tests/type_check_tests.cpp)

###################################################################################################
# - span tests -------------------------------------------------------------------------------
Expand Down
Loading

0 comments on commit 0206fc9

Please sign in to comment.