Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/branch-21.08' into feature/ast_e…
Browse files Browse the repository at this point in the history
…quijoin
  • Loading branch information
vyasr committed Jun 29, 2021
2 parents 6c9251b + 1e53776 commit e1390bc
Show file tree
Hide file tree
Showing 110 changed files with 694 additions and 355 deletions.
2 changes: 1 addition & 1 deletion ci/gpu/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ fi

cd "$WORKSPACE/python/cudf"
gpuci_logger "Python py.test for cuDF"
py.test -n 6 --cache-clear --basetemp="$WORKSPACE/cudf-cuda-tmp" --junitxml="$WORKSPACE/junit-cudf.xml" -v --cov-config=.coveragerc --cov=cudf --cov-report=xml:"$WORKSPACE/python/cudf/cudf-coverage.xml" --cov-report term
py.test -n 6 --cache-clear --basetemp="$WORKSPACE/cudf-cuda-tmp" --ignore="$WORKSPACE/python/cudf/cudf/benchmarks" --junitxml="$WORKSPACE/junit-cudf.xml" -v --cov-config=.coveragerc --cov=cudf --cov-report=xml:"$WORKSPACE/python/cudf/cudf-coverage.xml" --cov-report term

cd "$WORKSPACE/python/dask_cudf"
gpuci_logger "Python py.test for dask-cudf"
Expand Down
4 changes: 2 additions & 2 deletions conda/environments/cudf_dev_cuda11.0.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ dependencies:
- numba>=0.53.1
- numpy
- pandas>=1.0,<1.3.0dev0
- pyarrow=1.0.1
- pyarrow=4.0.1
- fastavro>=0.22.9
- notebook>=0.5.0
- cython>=0.29,<0.30
Expand All @@ -44,8 +44,8 @@ dependencies:
- dask>=2021.6.0
- distributed>=2021.6.0
- streamz
- arrow-cpp=4.0.1
- dlpack>=0.5,<0.6.0a0
- arrow-cpp=1.0.1
- arrow-cpp-proc * cuda
- double-conversion
- rapidjson
Expand Down
4 changes: 2 additions & 2 deletions conda/environments/cudf_dev_cuda11.2.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ dependencies:
- numba>=0.53.1
- numpy
- pandas>=1.0,<1.3.0dev0
- pyarrow=1.0.1
- pyarrow=4.0.1
- fastavro>=0.22.9
- notebook>=0.5.0
- cython>=0.29,<0.30
Expand All @@ -44,8 +44,8 @@ dependencies:
- dask>=2021.6.0
- distributed>=2021.6.0
- streamz
- arrow-cpp=4.0.1
- dlpack>=0.5,<0.6.0a0
- arrow-cpp=1.0.1
- arrow-cpp-proc * cuda
- double-conversion
- rapidjson
Expand Down
2 changes: 1 addition & 1 deletion conda/recipes/cudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ requirements:
- setuptools
- numba >=0.53.1
- dlpack>=0.5,<0.6.0a0
- pyarrow 1.0.1
- pyarrow 4.0.1
- libcudf {{ version }}
- rmm {{ minor_version }}
- cudatoolkit {{ cuda_version }}
Expand Down
3 changes: 2 additions & 1 deletion conda/recipes/libcudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ requirements:
host:
- librmm {{ minor_version }}.*
- cudatoolkit {{ cuda_version }}.*
- arrow-cpp 1.0.1
- arrow-cpp 4.0.1
- arrow-cpp-proc * cuda
- dlpack>=0.5,<0.6.0a0
run:
Expand Down Expand Up @@ -220,6 +220,7 @@ test:
- test -f $PREFIX/include/cudf/utilities/error.hpp
- test -f $PREFIX/include/cudf/utilities/traits.hpp
- test -f $PREFIX/include/cudf/utilities/type_dispatcher.hpp
- test -f $PREFIX/include/cudf/utilities/type_checks.hpp
- test -f $PREFIX/include/cudf/utilities/default_stream.hpp
- test -f $PREFIX/include/cudf/wrappers/dictionary.hpp
- test -f $PREFIX/include/cudf/wrappers/durations.hpp
Expand Down
4 changes: 2 additions & 2 deletions conda/recipes/libcudf_kafka/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ requirements:
build:
- cmake >=3.20.1
host:
- libcudf {{ version }}
- librdkafka >=1.5.0,<1.5.3
- libcudf {{version}}
- librdkafka >=1.6.0,<1.7.0a0
run:
- {{ pin_compatible('librdkafka', max_pin='x.x') }} #TODO: librdkafka should be automatically included here by run_exports but is not

Expand Down
1 change: 1 addition & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,7 @@ add_library(cudf
src/unary/nan_ops.cu
src/unary/null_ops.cu
src/utilities/default_stream.cpp
src/utilities/type_checks.cpp
)

set_target_properties(cudf
Expand Down
2 changes: 1 addition & 1 deletion cpp/cmake/thirdparty/CUDF_GetArrow.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,6 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3)

endfunction()

set(CUDF_VERSION_Arrow 1.0.1)
set(CUDF_VERSION_Arrow 4.0.1)

find_and_configure_arrow(${CUDF_VERSION_Arrow} ${CUDF_USE_ARROW_STATIC} ${CUDF_ENABLE_ARROW_S3})
3 changes: 2 additions & 1 deletion cpp/include/cudf/lists/detail/scatter.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include <cudf/null_mask.hpp>
#include <cudf/strings/detail/utilities.cuh>
#include <cudf/types.hpp>
#include <cudf/utilities/type_checks.hpp>

#include <rmm/device_uvector.hpp>
#include <rmm/exec_policy.hpp>
Expand Down Expand Up @@ -89,7 +90,7 @@ std::unique_ptr<column> scatter_impl(
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
{
assert_same_data_type(source, target);
CUDF_EXPECTS(column_types_equal(source, target), "Mismatched column types.");

auto const child_column_type = lists_column_view(target).child().type();

Expand Down
5 changes: 0 additions & 5 deletions cpp/include/cudf/lists/detail/scatter_helper.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -129,11 +129,6 @@ struct unbound_list_view {
size_type _size{}; // Number of elements in *this* list row.
};

/**
* @brief Checks that the specified columns have matching schemas, all the way down.
*/
void assert_same_data_type(column_view const& lhs, column_view const& rhs);

std::unique_ptr<column> build_lists_child_column_recursive(
data_type child_column_type,
rmm::device_uvector<unbound_list_view> const& list_vector,
Expand Down
38 changes: 38 additions & 0 deletions cpp/include/cudf/utilities/type_checks.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <cudf/column/column_view.hpp>

namespace cudf {

/**
* @brief Compares the type of two `column_view`s
*
* This function returns true if the type of `lhs` equals that of `rhs`.
* - For fixed point types, the scale is compared.
* - For dictionary types, the type of the keys are compared if both are
* non-empty columns.
* - For lists types, the type of child columns are compared recursively.
* - For struct types, the type of each field are compared in order.
* - For all other types, the `id` of `data_type` is compared.
*
* @param lhs The first `column_view` to compare
* @param rhs The second `column_view` to compare
* @return true if column types match
*/
bool column_types_equal(column_view const& lhs, column_view const& rhs);

} // namespace cudf
10 changes: 0 additions & 10 deletions cpp/src/lists/copying/scatter_helper.cu
Original file line number Diff line number Diff line change
Expand Up @@ -30,16 +30,6 @@ namespace cudf {
namespace lists {
namespace detail {

void assert_same_data_type(column_view const& lhs, column_view const& rhs)
{
CUDF_EXPECTS(lhs.type().id() == rhs.type().id(), "Mismatched Data types.");
// Empty string column has no children
CUDF_EXPECTS(lhs.type().id() == type_id::STRING or lhs.num_children() == rhs.num_children(),
"Mismatched number of child columns.");

for (int i{0}; i < lhs.num_children(); ++i) { assert_same_data_type(lhs.child(i), rhs.child(i)); }
}

/**
* @brief Constructs null mask for a scattered list's child column
*
Expand Down
72 changes: 72 additions & 0 deletions cpp/src/utilities/type_checks.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <cudf/dictionary/dictionary_column_view.hpp>
#include <cudf/lists/lists_column_view.hpp>
#include <cudf/utilities/type_checks.hpp>
#include <cudf/utilities/type_dispatcher.hpp>

#include <thrust/iterator/counting_iterator.h>

#include <algorithm>

namespace cudf {
namespace {

struct columns_equal_fn {
template <typename T>
bool operator()(column_view const&, column_view const&)
{
return true;
}
};

template <>
bool columns_equal_fn::operator()<dictionary32>(column_view const& lhs, column_view const& rhs)
{
auto const kidx = dictionary_column_view::keys_column_index;
return lhs.num_children() > 0 and rhs.num_children() > 0
? lhs.child(kidx).type() == rhs.child(kidx).type()
: lhs.is_empty() and rhs.is_empty();
}

template <>
bool columns_equal_fn::operator()<list_view>(column_view const& lhs, column_view const& rhs)
{
auto const& ci = lists_column_view::child_column_index;
return column_types_equal(lhs.child(ci), rhs.child(ci));
}

template <>
bool columns_equal_fn::operator()<struct_view>(column_view const& lhs, column_view const& rhs)
{
return lhs.num_children() == rhs.num_children() and
std::all_of(thrust::make_counting_iterator(0),
thrust::make_counting_iterator(lhs.num_children()),
[&](auto i) { return column_types_equal(lhs.child(i), rhs.child(i)); });
}

}; // namespace

// Implementation note: avoid using double dispatch for this function
// as it increases code paths to NxN for N types.
bool column_types_equal(column_view const& lhs, column_view const& rhs)
{
if (lhs.type() != rhs.type()) { return false; }
return type_dispatcher(lhs.type(), columns_equal_fn{}, lhs, rhs);
}

} // namespace cudf
3 changes: 2 additions & 1 deletion cpp/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,8 @@ ConfigureTest(UTILITIES_TEST
utilities_tests/column_utilities_tests.cpp
utilities_tests/column_wrapper_tests.cpp
utilities_tests/lists_column_wrapper_tests.cpp
utilities_tests/default_stream_tests.cpp)
utilities_tests/default_stream_tests.cpp
utilities_tests/type_check_tests.cpp)

###################################################################################################
# - span tests -------------------------------------------------------------------------------
Expand Down
Loading

0 comments on commit e1390bc

Please sign in to comment.