diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index 664e774c68a..8f83c169330 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (c) 2018-2020, NVIDIA CORPORATION. +# Copyright (c) 2018-2021, NVIDIA CORPORATION. ############################################## # cuDF GPU build and test script for CI # ############################################## @@ -176,6 +176,28 @@ else ${gt} --gtest_output=xml:"$WORKSPACE/test-results/" done + ################################################################################ + # MEMCHECK - Run compute-sanitizer on GoogleTest (only in nightly builds) + ################################################################################ + if [[ "$BUILD_MODE" == "branch" && "$BUILD_TYPE" == "gpu" ]]; then + if [[ "$COMPUTE_SANITIZER_ENABLE" == "true" ]]; then + gpuci_logger "Memcheck on GoogleTests with rmm_mode=cuda" + export GTEST_CUDF_RMM_MODE=cuda + COMPUTE_SANITIZER_CMD="compute-sanitizer --tool memcheck" + mkdir -p "$WORKSPACE/test-results/" + for gt in gtests/*; do + test_name=$(basename ${gt}) + if [[ "$test_name" == "ERROR_TEST" ]]; then + continue + fi + echo "Running GoogleTest $test_name" + ${COMPUTE_SANITIZER_CMD} ${gt} | tee "$WORKSPACE/test-results/${test_name}.cs.log" + done + unset GTEST_CUDF_RMM_MODE + # test-results/*.cs.log are processed in gpuci + fi + fi + CUDF_CONDA_FILE=`find ${CONDA_ARTIFACT_PATH} -name "libcudf-*.tar.bz2"` CUDF_CONDA_FILE=`basename "$CUDF_CONDA_FILE" .tar.bz2` #get filename without extension CUDF_CONDA_FILE=${CUDF_CONDA_FILE//-/=} #convert to conda install diff --git a/conda/environments/cudf_dev_cuda11.0.yml b/conda/environments/cudf_dev_cuda11.0.yml index 83a4ffb7453..e7b92eddd9e 100644 --- a/conda/environments/cudf_dev_cuda11.0.yml +++ b/conda/environments/cudf_dev_cuda11.0.yml @@ -14,7 +14,7 @@ dependencies: - cmake>=3.20.1 - cmake_setuptools>=0.1.3 - python>=3.7,<3.9 - - numba>=0.53.1 + - numba>=0.54 - numpy - pandas>=1.0,<1.4.0dev0 - pyarrow=5.0.0=*cuda @@ -66,3 +66,4 @@ dependencies: - git+https://github.com/dask/distributed.git@main - git+https://github.com/python-streamz/streamz.git@master - pyorc + - ptxcompiler # [linux64] diff --git a/conda/environments/cudf_dev_cuda11.2.yml b/conda/environments/cudf_dev_cuda11.2.yml index 1e9de119c88..6fe8ed0fafe 100644 --- a/conda/environments/cudf_dev_cuda11.2.yml +++ b/conda/environments/cudf_dev_cuda11.2.yml @@ -14,7 +14,7 @@ dependencies: - cmake>=3.20.1 - cmake_setuptools>=0.1.3 - python>=3.7,<3.9 - - numba>=0.53.1 + - numba>=0.54 - numpy - pandas>=1.0,<1.4.0dev0 - pyarrow=5.0.0=*cuda @@ -66,3 +66,4 @@ dependencies: - git+https://github.com/dask/distributed.git@main - git+https://github.com/python-streamz/streamz.git@master - pyorc + - ptxcompiler # [linux64] diff --git a/conda/environments/cudf_dev_cuda11.5.yml b/conda/environments/cudf_dev_cuda11.5.yml index feaa0ae42b6..cc8d50a1717 100644 --- a/conda/environments/cudf_dev_cuda11.5.yml +++ b/conda/environments/cudf_dev_cuda11.5.yml @@ -14,7 +14,7 @@ dependencies: - cmake>=3.20.1 - cmake_setuptools>=0.1.3 - python>=3.7,<3.9 - - numba>=0.53.1 + - numba>=0.54 - numpy - pandas>=1.0,<1.4.0dev0 - pyarrow=5.0.0=*cuda @@ -66,3 +66,4 @@ dependencies: - git+https://github.com/dask/distributed.git@main - git+https://github.com/python-streamz/streamz.git@master - pyorc + - ptxcompiler # [linux64] diff --git a/conda/recipes/cudf/meta.yaml b/conda/recipes/cudf/meta.yaml index 6d56b0c0c94..46eefbc825f 100644 --- a/conda/recipes/cudf/meta.yaml +++ b/conda/recipes/cudf/meta.yaml @@ -29,7 +29,7 @@ requirements: - python - cython >=0.29,<0.30 - setuptools - - numba >=0.53.1 + - numba >=0.54 - dlpack>=0.5,<0.6.0a0 - pyarrow 5.0.0 *cuda - libcudf {{ version }} @@ -41,7 +41,7 @@ requirements: - typing_extensions - pandas >=1.0,<1.4.0dev0 - cupy >=9.5.0,<10.0.0a0 - - numba >=0.53.1 + - numba >=0.54 - numpy - {{ pin_compatible('pyarrow', max_pin='x.x.x') }} *cuda - fastavro >=0.22.0 @@ -51,6 +51,7 @@ requirements: - nvtx >=0.2.1 - packaging - cachetools + - ptxcompiler # [linux64] # CUDA enhanced compatibility. See https://github.com/rapidsai/ptxcompiler test: # [linux64] requires: # [linux64] diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 7a556d2c0f6..50bdc30b292 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -734,6 +734,27 @@ set(install_code_string [=[ set(ArrowCUDA_DIR "${Arrow_DIR}") find_dependency(ArrowCUDA) +]=] +) + +if(CUDF_ENABLE_ARROW_PARQUET) + string( + APPEND + install_code_string + [=[ + if(NOT Parquet_DIR) + set(Parquet_DIR "${Arrow_DIR}") + endif() + set(ArrowDataset_DIR "${Arrow_DIR}") + find_dependency(ArrowDataset) + ]=] + ) +endif() + +string( + APPEND + install_code_string + [=[ if(testing IN_LIST cudf_FIND_COMPONENTS) enable_language(CUDA) if(EXISTS "${CMAKE_CURRENT_LIST_DIR}/cudf-testing-dependencies.cmake") diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index fa1e61e26fd..72b247ae748 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -208,7 +208,6 @@ ConfigureBench(AST_BENCH ast/transform_benchmark.cpp) # * binaryop benchmark ---------------------------------------------------------------------------- ConfigureBench( BINARYOP_BENCH binaryop/binaryop_benchmark.cpp binaryop/compiled_binaryop_benchmark.cpp - binaryop/jit_binaryop_benchmark.cpp ) # ################################################################################################## diff --git a/cpp/benchmarks/binaryop/binaryop_benchmark.cpp b/cpp/benchmarks/binaryop/binaryop_benchmark.cpp index 9de1112a9db..314d657679b 100644 --- a/cpp/benchmarks/binaryop/binaryop_benchmark.cpp +++ b/cpp/benchmarks/binaryop/binaryop_benchmark.cpp @@ -74,14 +74,14 @@ static void BM_binaryop_transform(benchmark::State& state) auto const op = cudf::binary_operator::ADD; auto result_data_type = cudf::data_type(cudf::type_to_id()); if (reuse_columns) { - auto result = cudf::jit::binary_operation(columns.at(0), columns.at(0), op, result_data_type); + auto result = cudf::binary_operation(columns.at(0), columns.at(0), op, result_data_type); for (cudf::size_type i = 0; i < tree_levels - 1; i++) { - result = cudf::jit::binary_operation(result->view(), columns.at(0), op, result_data_type); + result = cudf::binary_operation(result->view(), columns.at(0), op, result_data_type); } } else { - auto result = cudf::jit::binary_operation(columns.at(0), columns.at(1), op, result_data_type); + auto result = cudf::binary_operation(columns.at(0), columns.at(1), op, result_data_type); std::for_each(std::next(columns.cbegin(), 2), columns.cend(), [&](auto const& col) { - result = cudf::jit::binary_operation(result->view(), col, op, result_data_type); + result = cudf::binary_operation(result->view(), col, op, result_data_type); }); } } diff --git a/cpp/benchmarks/binaryop/jit_binaryop_benchmark.cpp b/cpp/benchmarks/binaryop/jit_binaryop_benchmark.cpp deleted file mode 100644 index 7fda4a50ea1..00000000000 --- a/cpp/benchmarks/binaryop/jit_binaryop_benchmark.cpp +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Copyright (c) 2021, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include - -#include - -#include - -#include - -template -class JIT_BINARYOP : public cudf::benchmark { -}; - -template -void BM_binaryop(benchmark::State& state, cudf::binary_operator binop) -{ - const cudf::size_type column_size{(cudf::size_type)state.range(0)}; - - auto data_it = thrust::make_counting_iterator(0); - cudf::test::fixed_width_column_wrapper input1(data_it, data_it + column_size); - cudf::test::fixed_width_column_wrapper input2(data_it, data_it + column_size); - - auto lhs = cudf::column_view(input1); - auto rhs = cudf::column_view(input2); - auto output_dtype = cudf::data_type(cudf::type_to_id()); - - // Call once for hot cache. - cudf::jit::binary_operation(lhs, rhs, binop, output_dtype); - - for (auto _ : state) { - cuda_event_timer timer(state, true); - cudf::jit::binary_operation(lhs, rhs, binop, output_dtype); - } -} - -// TODO tparam boolean for null. -#define BINARYOP_BENCHMARK_DEFINE(TypeLhs, TypeRhs, binop, TypeOut) \ - BENCHMARK_TEMPLATE_DEFINE_F( \ - JIT_BINARYOP, binop, TypeLhs, TypeRhs, TypeOut, cudf::binary_operator::binop) \ - (::benchmark::State & st) \ - { \ - BM_binaryop(st, cudf::binary_operator::binop); \ - } \ - BENCHMARK_REGISTER_F(JIT_BINARYOP, binop) \ - ->Unit(benchmark::kMicrosecond) \ - ->UseManualTime() \ - ->Arg(10000) /* 10k */ \ - ->Arg(100000) /* 100k */ \ - ->Arg(1000000) /* 1M */ \ - ->Arg(10000000) /* 10M */ \ - ->Arg(100000000); /* 100M */ - -using namespace cudf; -using namespace numeric; - -// clang-format off -BINARYOP_BENCHMARK_DEFINE(float, int64_t, ADD, int32_t); -BINARYOP_BENCHMARK_DEFINE(duration_s, duration_D, SUB, duration_ms); -BINARYOP_BENCHMARK_DEFINE(float, float, MUL, int64_t); -BINARYOP_BENCHMARK_DEFINE(int64_t, int64_t, DIV, int64_t); -BINARYOP_BENCHMARK_DEFINE(int64_t, int64_t, TRUE_DIV, int64_t); -BINARYOP_BENCHMARK_DEFINE(int64_t, int64_t, FLOOR_DIV, int64_t); -BINARYOP_BENCHMARK_DEFINE(double, double, MOD, double); -BINARYOP_BENCHMARK_DEFINE(int32_t, int64_t, PMOD, double); -BINARYOP_BENCHMARK_DEFINE(int32_t, uint8_t, PYMOD, int64_t); -BINARYOP_BENCHMARK_DEFINE(int64_t, int64_t, POW, double); -BINARYOP_BENCHMARK_DEFINE(float, double, LOG_BASE, double); -BINARYOP_BENCHMARK_DEFINE(float, double, ATAN2, double); -BINARYOP_BENCHMARK_DEFINE(int, int, SHIFT_LEFT, int); -BINARYOP_BENCHMARK_DEFINE(int16_t, int64_t, SHIFT_RIGHT, int); -BINARYOP_BENCHMARK_DEFINE(int64_t, int32_t, SHIFT_RIGHT_UNSIGNED, int64_t); -BINARYOP_BENCHMARK_DEFINE(int64_t, int32_t, BITWISE_AND, int16_t); -BINARYOP_BENCHMARK_DEFINE(int16_t, int32_t, BITWISE_OR, int64_t); -BINARYOP_BENCHMARK_DEFINE(int16_t, int64_t, BITWISE_XOR, int32_t); -BINARYOP_BENCHMARK_DEFINE(double, int8_t, LOGICAL_AND, bool); -BINARYOP_BENCHMARK_DEFINE(int16_t, int64_t, LOGICAL_OR, bool); -BINARYOP_BENCHMARK_DEFINE(duration_ms, duration_ns, EQUAL, bool); -BINARYOP_BENCHMARK_DEFINE(decimal32, decimal32, NOT_EQUAL, bool); -BINARYOP_BENCHMARK_DEFINE(timestamp_s, timestamp_s, LESS, bool); -BINARYOP_BENCHMARK_DEFINE(timestamp_ms, timestamp_s, GREATER, bool); -BINARYOP_BENCHMARK_DEFINE(duration_ms, duration_ns, NULL_EQUALS, bool); -BINARYOP_BENCHMARK_DEFINE(decimal32, decimal32, NULL_MAX, decimal32); -BINARYOP_BENCHMARK_DEFINE(timestamp_D, timestamp_s, NULL_MIN, timestamp_s); diff --git a/cpp/cmake/Modules/JitifyPreprocessKernels.cmake b/cpp/cmake/Modules/JitifyPreprocessKernels.cmake index c2ad25760b8..6ab1293ab6f 100644 --- a/cpp/cmake/Modules/JitifyPreprocessKernels.cmake +++ b/cpp/cmake/Modules/JitifyPreprocessKernels.cmake @@ -51,8 +51,8 @@ function(jit_preprocess_files) endfunction() jit_preprocess_files( - SOURCE_DIRECTORY ${CUDF_SOURCE_DIR}/src FILES binaryop/jit/kernel.cu - transform/jit/masked_udf_kernel.cu transform/jit/kernel.cu rolling/jit/kernel.cu + SOURCE_DIRECTORY ${CUDF_SOURCE_DIR}/src FILES binaryop/jit/kernel.cu transform/jit/kernel.cu + rolling/jit/kernel.cu ) add_custom_target( diff --git a/cpp/cmake/thirdparty/get_arrow.cmake b/cpp/cmake/thirdparty/get_arrow.cmake index 5fe37402096..ae1448da502 100644 --- a/cpp/cmake/thirdparty/get_arrow.cmake +++ b/cpp/cmake/thirdparty/get_arrow.cmake @@ -90,7 +90,7 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB rapids_cpm_find( Arrow ${VERSION} - GLOBAL_TARGETS arrow_shared arrow_cuda_shared + GLOBAL_TARGETS arrow_shared parquet_shared arrow_cuda_shared arrow_dataset_shared CPM_ARGS GIT_REPOSITORY https://github.com/apache/arrow.git GIT_TAG apache-arrow-${VERSION} @@ -142,6 +142,15 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB set(ArrowCUDA_DIR "${Arrow_DIR}") find_package(Arrow REQUIRED QUIET) find_package(ArrowCUDA REQUIRED QUIET) + if(ENABLE_PARQUET) + if(NOT Parquet_DIR) + # Set this to enable `find_package(Parquet)` + set(Parquet_DIR "${Arrow_DIR}") + endif() + # Set this to enable `find_package(ArrowDataset)` + set(ArrowDataset_DIR "${Arrow_DIR}") + find_package(ArrowDataset REQUIRED QUIET) + endif() elseif(Arrow_ADDED) # Copy these files so we can avoid adding paths in Arrow_BINARY_DIR to # target_include_directories. That defeats ccache. @@ -182,24 +191,15 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB endif() if(Arrow_ADDED) + set(arrow_code_string [=[ - if (TARGET cudf::arrow_shared AND (NOT TARGET arrow_shared)) - add_library(arrow_shared ALIAS cudf::arrow_shared) - endif() - if (TARGET cudf::arrow_static AND (NOT TARGET arrow_static)) - add_library(arrow_static ALIAS cudf::arrow_static) - endif() - ]=] - ) - set(arrow_cuda_code_string - [=[ - if (TARGET cudf::arrow_cuda_shared AND (NOT TARGET arrow_cuda_shared)) - add_library(arrow_cuda_shared ALIAS cudf::arrow_cuda_shared) - endif() - if (TARGET cudf::arrow_cuda_static AND (NOT TARGET arrow_cuda_static)) - add_library(arrow_cuda_static ALIAS cudf::arrow_cuda_static) - endif() + if (TARGET cudf::arrow_shared AND (NOT TARGET arrow_shared)) + add_library(arrow_shared ALIAS cudf::arrow_shared) + endif() + if (TARGET cudf::arrow_static AND (NOT TARGET arrow_static)) + add_library(arrow_static ALIAS cudf::arrow_static) + endif() ]=] ) @@ -212,6 +212,17 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB FINAL_CODE_BLOCK arrow_code_string ) + set(arrow_cuda_code_string + [=[ + if (TARGET cudf::arrow_cuda_shared AND (NOT TARGET arrow_cuda_shared)) + add_library(arrow_cuda_shared ALIAS cudf::arrow_cuda_shared) + endif() + if (TARGET cudf::arrow_cuda_static AND (NOT TARGET arrow_cuda_static)) + add_library(arrow_cuda_static ALIAS cudf::arrow_cuda_static) + endif() + ]=] + ) + rapids_export( BUILD ArrowCUDA VERSION ${VERSION} @@ -220,6 +231,49 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB NAMESPACE cudf:: FINAL_CODE_BLOCK arrow_cuda_code_string ) + + if(ENABLE_PARQUET) + + set(arrow_dataset_code_string + [=[ + if (TARGET cudf::arrow_dataset_shared AND (NOT TARGET arrow_dataset_shared)) + add_library(arrow_dataset_shared ALIAS cudf::arrow_dataset_shared) + endif() + if (TARGET cudf::arrow_dataset_static AND (NOT TARGET arrow_dataset_static)) + add_library(arrow_dataset_static ALIAS cudf::arrow_dataset_static) + endif() + ]=] + ) + + rapids_export( + BUILD ArrowDataset + VERSION ${VERSION} + EXPORT_SET arrow_dataset_targets + GLOBAL_TARGETS arrow_dataset_shared arrow_dataset_static + NAMESPACE cudf:: + FINAL_CODE_BLOCK arrow_dataset_code_string + ) + + set(parquet_code_string + [=[ + if (TARGET cudf::parquet_shared AND (NOT TARGET parquet_shared)) + add_library(parquet_shared ALIAS cudf::parquet_shared) + endif() + if (TARGET cudf::parquet_static AND (NOT TARGET parquet_static)) + add_library(parquet_static ALIAS cudf::parquet_static) + endif() + ]=] + ) + + rapids_export( + BUILD Parquet + VERSION ${VERSION} + EXPORT_SET parquet_targets + GLOBAL_TARGETS parquet_shared parquet_static + NAMESPACE cudf:: + FINAL_CODE_BLOCK parquet_code_string + ) + endif() endif() # We generate the arrow-config and arrowcuda-config files when we built arrow locally, so always # do `find_dependency` @@ -230,10 +284,18 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB # ArrowCUDA_DIR to be where Arrow was found, since Arrow packages ArrowCUDA.config in a # non-standard location rapids_export_package(BUILD ArrowCUDA cudf-exports) + if(ENABLE_PARQUET) + rapids_export_package(BUILD Parquet cudf-exports) + rapids_export_package(BUILD ArrowDataset cudf-exports) + endif() include("${rapids-cmake-dir}/export/find_package_root.cmake") rapids_export_find_package_root(BUILD Arrow [=[${CMAKE_CURRENT_LIST_DIR}]=] cudf-exports) rapids_export_find_package_root(BUILD ArrowCUDA [=[${CMAKE_CURRENT_LIST_DIR}]=] cudf-exports) + if(ENABLE_PARQUET) + rapids_export_find_package_root(BUILD Parquet [=[${CMAKE_CURRENT_LIST_DIR}]=] cudf-exports) + rapids_export_find_package_root(BUILD ArrowDataset [=[${CMAKE_CURRENT_LIST_DIR}]=] cudf-exports) + endif() set(ARROW_FOUND "${ARROW_FOUND}" diff --git a/cpp/doxygen/Doxyfile b/cpp/doxygen/Doxyfile index 55e5119040e..6a556bb4b34 100644 --- a/cpp/doxygen/Doxyfile +++ b/cpp/doxygen/Doxyfile @@ -2089,7 +2089,7 @@ ENABLE_PREPROCESSING = YES # The default value is: NO. # This tag requires that the tag ENABLE_PREPROCESSING is set to YES. -MACRO_EXPANSION = NO +MACRO_EXPANSION = YES # If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES then # the macro expansion is limited to the macros specified with the PREDEFINED and @@ -2097,7 +2097,7 @@ MACRO_EXPANSION = NO # The default value is: NO. # This tag requires that the tag ENABLE_PREPROCESSING is set to YES. -EXPAND_ONLY_PREDEF = NO +EXPAND_ONLY_PREDEF = YES # If the SEARCH_INCLUDES tag is set to YES, the include files in the # INCLUDE_PATH will be searched if a #include is found. @@ -2129,7 +2129,8 @@ INCLUDE_FILE_PATTERNS = # recursively expanded use the := operator instead of the = operator. # This tag requires that the tag ENABLE_PREPROCESSING is set to YES. -PREDEFINED = +PREDEFINED = __device__= \ + __host__= # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this # tag can be used to specify a list of macro names that should be expanded. The diff --git a/cpp/include/cudf/binaryop.hpp b/cpp/include/cudf/binaryop.hpp index fe548a36cf0..a514010c1f0 100644 --- a/cpp/include/cudf/binaryop.hpp +++ b/cpp/include/cudf/binaryop.hpp @@ -210,83 +210,5 @@ cudf::data_type binary_operation_fixed_point_output_type(binary_operator op, cudf::data_type const& lhs, cudf::data_type const& rhs); -namespace jit { -/** - * @brief Performs a binary operation between a scalar and a column. - * - * The output contains the result of `op(lhs, rhs[i])` for all `0 <= i < rhs.size()` - * The scalar is the left operand and the column elements are the right operand. - * This distinction is significant in case of non-commutative binary operations - * - * Regardless of the operator, the validity of the output value is the logical - * AND of the validity of the two operands - * - * @param lhs The left operand scalar - * @param rhs The right operand column - * @param op The binary operator - * @param output_type The desired data type of the output column - * @param mr Device memory resource used to allocate the returned column's device memory - * @return Output column of `output_type` type containing the result of - * the binary operation - * @throw cudf::logic_error if @p output_type dtype isn't fixed-width - */ -std::unique_ptr binary_operation( - scalar const& lhs, - column_view const& rhs, - binary_operator op, - data_type output_type, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - -/** - * @brief Performs a binary operation between a column and a scalar. - * - * The output contains the result of `op(lhs[i], rhs)` for all `0 <= i < lhs.size()` - * The column elements are the left operand and the scalar is the right operand. - * This distinction is significant in case of non-commutative binary operations - * - * Regardless of the operator, the validity of the output value is the logical - * AND of the validity of the two operands - * - * @param lhs The left operand column - * @param rhs The right operand scalar - * @param op The binary operator - * @param output_type The desired data type of the output column - * @param mr Device memory resource used to allocate the returned column's device memory - * @return Output column of `output_type` type containing the result of - * the binary operation - * @throw cudf::logic_error if @p output_type dtype isn't fixed-width - */ -std::unique_ptr binary_operation( - column_view const& lhs, - scalar const& rhs, - binary_operator op, - data_type output_type, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - -/** - * @brief Performs a binary operation between two columns. - * - * The output contains the result of `op(lhs[i], rhs[i])` for all `0 <= i < lhs.size()` - * - * Regardless of the operator, the validity of the output value is the logical - * AND of the validity of the two operands - * - * @param lhs The left operand column - * @param rhs The right operand column - * @param op The binary operator - * @param output_type The desired data type of the output column - * @param mr Device memory resource used to allocate the returned column's device memory - * @return Output column of `output_type` type containing the result of - * the binary operation - * @throw cudf::logic_error if @p lhs and @p rhs are different sizes - * @throw cudf::logic_error if @p output_type dtype isn't fixed-width - */ -std::unique_ptr binary_operation( - column_view const& lhs, - column_view const& rhs, - binary_operator op, - data_type output_type, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); -} // namespace jit /** @} */ // end of group } // namespace cudf diff --git a/cpp/include/cudf/detail/binaryop.hpp b/cpp/include/cudf/detail/binaryop.hpp index ce7731ef7d2..9fa31d0e01d 100644 --- a/cpp/include/cudf/detail/binaryop.hpp +++ b/cpp/include/cudf/detail/binaryop.hpp @@ -22,52 +22,9 @@ namespace cudf { //! Inner interfaces and implementations namespace detail { -namespace jit { -/** - * @copydoc cudf::jit::binary_operation(scalar const&, column_view const&, binary_operator, - * data_type, rmm::mr::device_memory_resource *) - * - * @param stream CUDA stream used for device memory operations and kernel launches. - */ -std::unique_ptr binary_operation( - scalar const& lhs, - column_view const& rhs, - binary_operator op, - data_type output_type, - rmm::cuda_stream_view stream = rmm::cuda_stream_default, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @copydoc cudf::jit::binary_operation(column_view const&, scalar const&, binary_operator, - * data_type, rmm::mr::device_memory_resource *) - * - * @param stream CUDA stream used for device memory operations and kernel launches. - */ -std::unique_ptr binary_operation( - column_view const& lhs, - scalar const& rhs, - binary_operator op, - data_type output_type, - rmm::cuda_stream_view stream = rmm::cuda_stream_default, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - -/** - * @copydoc cudf::jit::binary_operation(column_view const&, column_view const&, - * binary_operator, data_type, rmm::mr::device_memory_resource *) - * - * @param stream CUDA stream used for device memory operations and kernel launches. - */ -std::unique_ptr binary_operation( - column_view const& lhs, - column_view const& rhs, - binary_operator op, - data_type output_type, - rmm::cuda_stream_view stream = rmm::cuda_stream_default, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); -} // namespace jit - -/** - * @copydoc cudf::jit::binary_operation(column_view const&, column_view const&, + * @copydoc cudf::binary_operation(column_view const&, column_view const&, * std::string const&, data_type, rmm::mr::device_memory_resource *) * * @param stream CUDA stream used for device memory operations and kernel launches. diff --git a/cpp/include/cudf/detail/scan.hpp b/cpp/include/cudf/detail/scan.hpp index 113c15f19a1..8e3db1c7b10 100644 --- a/cpp/include/cudf/detail/scan.hpp +++ b/cpp/include/cudf/detail/scan.hpp @@ -26,22 +26,25 @@ namespace detail { /** * @brief Computes the exclusive scan of a column. * - * The null values are skipped for the operation, and if an input element - * at `i` is null, then the output element at `i` will also be null. + * The null values are skipped for the operation, and if an input element at `i` is null, then the + * output element at `i` will also be null. * - * The identity value for the column type as per the aggregation type - * is used for the value of the first element in the output column. + * The identity value for the column type as per the aggregation type is used for the value of the + * first element in the output column. * - * @throws cudf::logic_error if column data_type is not an arithmetic type. + * Struct columns are allowed with aggregation types Min and Max. * - * @param input The input column view for the scan - * @param agg unique_ptr to aggregation operator applied by the scan - * @param null_handling Exclude null values when computing the result if - * null_policy::EXCLUDE. Include nulls if null_policy::INCLUDE. - * Any operation with a null results in a null. + * @throws cudf::logic_error if column data_type is not an arithmetic type or struct type but the + * `agg` is not Min or Max. + * + * @param input The input column view for the scan. + * @param agg unique_ptr to aggregation operator applied by the scan. + * @param null_handling Exclude null values when computing the result if null_policy::EXCLUDE. + * Include nulls if null_policy::INCLUDE. Any operation with a null results in + * a null. * @param stream CUDA stream used for device memory operations and kernel launches. - * @param mr Device memory resource used to allocate the returned scalar's device memory - * @returns Column with scan results + * @param mr Device memory resource used to allocate the returned scalar's device memory. + * @returns Column with scan results. */ std::unique_ptr scan_exclusive(column_view const& input, std::unique_ptr const& agg, @@ -52,22 +55,22 @@ std::unique_ptr scan_exclusive(column_view const& input, /** * @brief Computes the inclusive scan of a column. * - * The null values are skipped for the operation, and if an input element - * at `i` is null, then the output element at `i` will also be null. + * The null values are skipped for the operation, and if an input element at `i` is null, then the + * output element at `i` will also be null. * - * String columns are allowed with aggregation types Min and Max. + * String and struct columns are allowed with aggregation types Min and Max. * - * @throws cudf::logic_error if column data_type is not an arithmetic type - * or string type but the `agg` is not Min or Max + * @throws cudf::logic_error if column data_type is not an arithmetic type or string/struct types + * but the `agg` is not Min or Max. * - * @param input The input column view for the scan - * @param agg unique_ptr to aggregation operator applied by the scan - * @param null_handling Exclude null values when computing the result if - * null_policy::EXCLUDE. Include nulls if null_policy::INCLUDE. - * Any operation with a null results in a null. + * @param input The input column view for the scan. + * @param agg unique_ptr to aggregation operator applied by the scan. + * @param null_handling Exclude null values when computing the result if null_policy::EXCLUDE. + * Include nulls if null_policy::INCLUDE. Any operation with a null results in + * a null. * @param stream CUDA stream used for device memory operations and kernel launches. - * @param mr Device memory resource used to allocate the returned scalar's device memory - * @returns Column with scan results + * @param mr Device memory resource used to allocate the returned scalar's device memory. + * @returns Column with scan results. */ std::unique_ptr scan_inclusive(column_view const& input, std::unique_ptr const& agg, @@ -76,24 +79,24 @@ std::unique_ptr scan_inclusive(column_view const& input, rmm::mr::device_memory_resource* mr); /** - * @brief Generate row ranks for a column + * @brief Generate row ranks for a column. * - * @param order_by Input column to generate ranks for - * @param stream CUDA stream used for device memory operations and kernel launches - * @param mr Device memory resource used to allocate the returned column's device memory - * @return rank values + * @param order_by Input column to generate ranks for. + * @param stream CUDA stream used for device memory operations and kernel launches. + * @param mr Device memory resource used to allocate the returned column's device memory. + * @return rank values. */ std::unique_ptr inclusive_rank_scan(column_view const& order_by, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); /** - * @brief Generate row dense ranks for a column + * @brief Generate row dense ranks for a column. * - * @param order_by Input column to generate ranks for - * @param stream CUDA stream used for device memory operations and kernel launches - * @param mr Device memory resource used to allocate the returned column's device memory - * @return rank values + * @param order_by Input column to generate ranks for. + * @param stream CUDA stream used for device memory operations and kernel launches. + * @param mr Device memory resource used to allocate the returned column's device memory. + * @return rank values. */ std::unique_ptr inclusive_dense_rank_scan(column_view const& order_by, rmm::cuda_stream_view stream, diff --git a/cpp/include/cudf/lists/combine.hpp b/cpp/include/cudf/lists/combine.hpp index a9407ed57ca..61a81e8a745 100644 --- a/cpp/include/cudf/lists/combine.hpp +++ b/cpp/include/cudf/lists/combine.hpp @@ -26,7 +26,7 @@ namespace lists { * @file */ -/* +/** * @brief Flag to specify whether a null list element will be ignored from concatenation, or the * entire concatenation result involving null list elements will be a null element. */ diff --git a/cpp/include/cudf/scalar/scalar_device_view.cuh b/cpp/include/cudf/scalar/scalar_device_view.cuh index 884b412d3e2..56afa150dfc 100644 --- a/cpp/include/cudf/scalar/scalar_device_view.cuh +++ b/cpp/include/cudf/scalar/scalar_device_view.cuh @@ -91,6 +91,12 @@ class fixed_width_scalar_device_view_base : public detail::scalar_device_view_ba return *data(); } + /** + * @brief Stores the value in scalar + * + * @tparam T The desired type + * @param value The value to store in scalar + */ template __device__ void set_value(T value) { @@ -159,6 +165,11 @@ class fixed_width_scalar_device_view : public detail::fixed_width_scalar_device_ return fixed_width_scalar_device_view_base::value(); } + /** + * @brief Stores the value in scalar + * + * @param value The value to store in scalar + */ __device__ void set_value(T value) { fixed_width_scalar_device_view_base::set_value(value); } /** @@ -218,6 +229,11 @@ class fixed_point_scalar_device_view : public detail::scalar_device_view_base { { } + /** + * @brief Stores the value in scalar + * + * @param value The value to store in scalar + */ __device__ void set_value(rep_type value) { *_data = value; } /** diff --git a/cpp/include/cudf/strings/convert/convert_lists.hpp b/cpp/include/cudf/strings/convert/convert_lists.hpp index ec22186ea99..279bf44e7fc 100644 --- a/cpp/include/cudf/strings/convert/convert_lists.hpp +++ b/cpp/include/cudf/strings/convert/convert_lists.hpp @@ -50,7 +50,7 @@ namespace strings { * * @param input Lists column to format. * @param na_rep Replacment string for null elements. - * @param separator Strings to use for enclosing list components and separating elements. + * @param separators Strings to use for enclosing list components and separating elements. * @param mr Device memory resource used to allocate the returned column's device memory. * @return New strings column. */ diff --git a/cpp/include/cudf/strings/string.cuh b/cpp/include/cudf/strings/string.cuh index 82da5ad8f10..d85d19d7f10 100644 --- a/cpp/include/cudf/strings/string.cuh +++ b/cpp/include/cudf/strings/string.cuh @@ -52,6 +52,43 @@ inline __device__ bool is_integer(string_view const& d_str) thrust::seq, begin, end, [] __device__(auto chr) { return chr >= '0' && chr <= '9'; }); } +/** + * @brief Returns true if input contains the not-a-number string. + * + * The following are valid for this function: "NAN" and "NaN" + * @param d_str input string + * @return true if input is as valid NaN string. + */ +inline __device__ bool is_nan_str(string_view const& d_str) +{ + auto const ptr = d_str.data(); + return (d_str.size_bytes() == 3) && (ptr[0] == 'N' || ptr[0] == 'n') && + (ptr[1] == 'A' || ptr[1] == 'a') && (ptr[2] == 'N' || ptr[2] == 'n'); +} + +/** + * @brief Returns true if input contains the infinity string. + * + * The following are valid for this function: "INF", "INFINITY", and "Inf" + * @param d_str input string + * @return true if input is as valid Inf string. + */ +inline __device__ bool is_inf_str(string_view const& d_str) +{ + auto const ptr = d_str.data(); + auto const size = d_str.size_bytes(); + + if (size != 3 && size != 8) return false; + + auto const prefix_valid = (ptr[0] == 'I' || ptr[0] == 'i') && (ptr[1] == 'N' || ptr[1] == 'n') && + (ptr[2] == 'F' || ptr[2] == 'f'); + + return prefix_valid && + ((size == 3) || ((ptr[3] == 'I' || ptr[3] == 'i') && (ptr[4] == 'N' || ptr[4] == 'n') && + (ptr[5] == 'I' || ptr[5] == 'i') && (ptr[6] == 'T' || ptr[6] == 't') && + (ptr[7] == 'Y' || ptr[7] == 'y'))); +} + /** * @brief Returns `true` if all characters in the string * are valid for conversion to a float type. @@ -65,8 +102,8 @@ inline __device__ bool is_integer(string_view const& d_str) * An empty string returns `false`. * No bounds checking is performed to verify if the value would fit * within a specific float type. - * The following strings are also allowed "NaN", "Inf" and, "-Inf" - * and will return true. + * The following strings are also allowed and will return true: + * "NaN", "NAN", "Inf", "INF", "INFINITY" * * @param d_str String to check. * @return true if string has valid float characters @@ -74,29 +111,32 @@ inline __device__ bool is_integer(string_view const& d_str) inline __device__ bool is_float(string_view const& d_str) { if (d_str.empty()) return false; - // strings allowed by the converter - if (d_str.compare("NaN", 3) == 0) return true; - if (d_str.compare("Inf", 3) == 0) return true; - if (d_str.compare("-Inf", 4) == 0) return true; bool decimal_found = false; bool exponent_found = false; size_type bytes = d_str.size_bytes(); const char* data = d_str.data(); // sign character allowed at the beginning of the string - size_type chidx = (*data == '-' || *data == '+') ? 1 : 0; - bool result = chidx < bytes; + size_type ch_idx = (*data == '-' || *data == '+') ? 1 : 0; + + bool result = ch_idx < bytes; + // check for nan and infinity strings + if (result && data[ch_idx] > '9') { + auto const inf_nan = string_view(data + ch_idx, bytes - ch_idx); + if (is_nan_str(inf_nan) || is_inf_str(inf_nan)) return true; + } + // check for float chars [0-9] and a single decimal '.' // and scientific notation [eE][+-][0-9] - for (; chidx < bytes; ++chidx) { - auto chr = data[chidx]; + for (; ch_idx < bytes; ++ch_idx) { + auto chr = data[ch_idx]; if (chr >= '0' && chr <= '9') continue; if (!decimal_found && chr == '.') { decimal_found = true; // no more decimals continue; } if (!exponent_found && (chr == 'e' || chr == 'E')) { - if (chidx + 1 < bytes) chr = data[chidx + 1]; - if (chr == '-' || chr == '+') ++chidx; + if (ch_idx + 1 < bytes) chr = data[ch_idx + 1]; + if (chr == '-' || chr == '+') ++ch_idx; decimal_found = true; // no decimal allowed in exponent exponent_found = true; // no more exponents continue; diff --git a/cpp/include/cudf/table/row_operators.cuh b/cpp/include/cudf/table/row_operators.cuh index c719c564a87..70ccac2f75d 100644 --- a/cpp/include/cudf/table/row_operators.cuh +++ b/cpp/include/cudf/table/row_operators.cuh @@ -67,7 +67,7 @@ __device__ weak_ordering compare_elements(Element lhs, Element rhs) } } // namespace detail -/* +/** * @brief A specialization for floating-point `Element` type relational comparison * to derive the order of the elements with respect to `lhs`. Specialization is to * handle `nan` in the order shown below. @@ -187,6 +187,7 @@ class element_equality_comparator { * * @param lhs_element_index The index of the first element * @param rhs_element_index The index of the second element + * @return True if both lhs and rhs element are both nulls and `nulls_are_equal` is true, or equal * */ template transform( bool is_ptx, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); -std::unique_ptr generalized_masked_op( - table_view const& data_view, - std::string const& binary_udf, - data_type output_type, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - /** * @brief Creates a null_mask from `input` by converting `NaN` to null and * preserving existing null values and also returns new null_count. diff --git a/cpp/include/cudf_test/base_fixture.hpp b/cpp/include/cudf_test/base_fixture.hpp index 5fa07fd5568..e08bf6aa53a 100644 --- a/cpp/include/cudf_test/base_fixture.hpp +++ b/cpp/include/cudf_test/base_fixture.hpp @@ -50,6 +50,7 @@ class BaseFixture : public ::testing::Test { /** * @brief Returns pointer to `device_memory_resource` that should be used for * all tests inheriting from this fixture + * @return pointer to memory resource */ rmm::mr::device_memory_resource* mr() { return _mr; } }; @@ -170,6 +171,7 @@ class UniformRandomGenerator { /** * @brief Returns the next random number. + * @return generated random number */ template ()>* = nullptr> T generate() @@ -211,6 +213,7 @@ class TempDirTestEnvironment : public ::testing::Environment { /** * @brief Get a temporary filepath to use for the specified filename * + * @param filename name of the file to be placed in temporary directory. * @return std::string The temporary filepath */ std::string get_temp_filepath(std::string filename) { return tmpdir.path() + filename; } diff --git a/cpp/include/cudf_test/column_wrapper.hpp b/cpp/include/cudf_test/column_wrapper.hpp index f291b04776a..cd2ac9f3ec1 100644 --- a/cpp/include/cudf_test/column_wrapper.hpp +++ b/cpp/include/cudf_test/column_wrapper.hpp @@ -79,6 +79,7 @@ class column_wrapper { /** * @brief Releases internal unique_ptr to wrapped column + * @return unique_ptr to wrapped column */ std::unique_ptr release() { return std::move(wrapped); } @@ -1040,11 +1041,13 @@ class dictionary_column_wrapper : public detail::column_wrapper { /** * @brief Access keys column view + * @return column_view to keys column */ column_view keys() const { return cudf::dictionary_column_view{wrapped->view()}.keys(); } /** * @brief Access indices column view + * @return column_view to indices column */ column_view indices() const { return cudf::dictionary_column_view{wrapped->view()}.indices(); } diff --git a/cpp/include/cudf_test/file_utilities.hpp b/cpp/include/cudf_test/file_utilities.hpp index 90bf0cd99dc..8e242e5a4f3 100644 --- a/cpp/include/cudf_test/file_utilities.hpp +++ b/cpp/include/cudf_test/file_utilities.hpp @@ -24,6 +24,10 @@ #include +/** + * @brief RAII class for creating a temporary directory. + * + */ class temp_directory { std::string _path; @@ -49,5 +53,10 @@ class temp_directory { nftw(_path.c_str(), rm_files, 10, FTW_DEPTH | FTW_MOUNT | FTW_PHYS); } + /** + * @brief Returns the path of the temporary directory + * + * @return string path of the temporary directory + */ const std::string& path() const { return _path; } }; diff --git a/cpp/include/cudf_test/table_utilities.hpp b/cpp/include/cudf_test/table_utilities.hpp index 831c9f5ac14..f2427c5b8c6 100644 --- a/cpp/include/cudf_test/table_utilities.hpp +++ b/cpp/include/cudf_test/table_utilities.hpp @@ -39,7 +39,7 @@ void expect_table_properties_equal(cudf::table_view lhs, cudf::table_view rhs); */ void expect_tables_equal(cudf::table_view lhs, cudf::table_view rhs); -/* +/** * @brief Verifies the equivalency of two tables. * * Treats null elements as equivalent. Columns that have nullability but no nulls, diff --git a/cpp/src/binaryop/binaryop.cpp b/cpp/src/binaryop/binaryop.cpp index e84e175eaca..7087b71a84e 100644 --- a/cpp/src/binaryop/binaryop.cpp +++ b/cpp/src/binaryop/binaryop.cpp @@ -18,7 +18,6 @@ */ #include "compiled/binary_ops.hpp" -#include "jit/util.hpp" #include @@ -126,113 +125,6 @@ bool is_same_scale_necessary(binary_operator op) } namespace jit { - -void binary_operation(mutable_column_view& out, - column_view const& lhs, - scalar const& rhs, - binary_operator op, - OperatorType op_type, - rmm::cuda_stream_view stream) -{ - if (is_null_dependent(op)) { - std::string kernel_name = - jitify2::reflection::Template("cudf::binops::jit::kernel_v_s_with_validity") // - .instantiate(cudf::jit::get_type_name(out.type()), // list of template arguments - cudf::jit::get_type_name(lhs.type()), - cudf::jit::get_type_name(rhs.type()), - get_operator_name(op, op_type)); - - cudf::jit::get_program_cache(*binaryop_jit_kernel_cu_jit) - .get_kernel(kernel_name, {}, {}, {"-arch=sm_."}) // - ->configure_1d_max_occupancy(0, 0, 0, stream.value()) // - ->launch(out.size(), - cudf::jit::get_data_ptr(out), - cudf::jit::get_data_ptr(lhs), - cudf::jit::get_data_ptr(rhs), - out.null_mask(), - lhs.null_mask(), - lhs.offset(), - rhs.is_valid(stream)); - } else { - std::string kernel_name = - jitify2::reflection::Template("cudf::binops::jit::kernel_v_s") // - .instantiate(cudf::jit::get_type_name(out.type()), // list of template arguments - cudf::jit::get_type_name(lhs.type()), - cudf::jit::get_type_name(rhs.type()), - get_operator_name(op, op_type)); - - cudf::jit::get_program_cache(*binaryop_jit_kernel_cu_jit) - .get_kernel(kernel_name, {}, {}, {"-arch=sm_."}) // - ->configure_1d_max_occupancy(0, 0, 0, stream.value()) // - ->launch(out.size(), - cudf::jit::get_data_ptr(out), - cudf::jit::get_data_ptr(lhs), - cudf::jit::get_data_ptr(rhs)); - } -} - -void binary_operation(mutable_column_view& out, - column_view const& lhs, - scalar const& rhs, - binary_operator op, - rmm::cuda_stream_view stream) -{ - return binary_operation(out, lhs, rhs, op, OperatorType::Direct, stream); -} - -void binary_operation(mutable_column_view& out, - scalar const& lhs, - column_view const& rhs, - binary_operator op, - rmm::cuda_stream_view stream) -{ - return binary_operation(out, rhs, lhs, op, OperatorType::Reverse, stream); -} - -void binary_operation(mutable_column_view& out, - column_view const& lhs, - column_view const& rhs, - binary_operator op, - rmm::cuda_stream_view stream) -{ - if (is_null_dependent(op)) { - std::string kernel_name = - jitify2::reflection::Template("cudf::binops::jit::kernel_v_v_with_validity") // - .instantiate(cudf::jit::get_type_name(out.type()), // list of template arguments - cudf::jit::get_type_name(lhs.type()), - cudf::jit::get_type_name(rhs.type()), - get_operator_name(op, OperatorType::Direct)); - - cudf::jit::get_program_cache(*binaryop_jit_kernel_cu_jit) - .get_kernel(kernel_name, {}, {}, {"-arch=sm_."}) // - ->configure_1d_max_occupancy(0, 0, 0, stream.value()) // - ->launch(out.size(), - cudf::jit::get_data_ptr(out), - cudf::jit::get_data_ptr(lhs), - cudf::jit::get_data_ptr(rhs), - out.null_mask(), - lhs.null_mask(), - rhs.offset(), - rhs.null_mask(), - rhs.offset()); - } else { - std::string kernel_name = - jitify2::reflection::Template("cudf::binops::jit::kernel_v_v") // - .instantiate(cudf::jit::get_type_name(out.type()), // list of template arguments - cudf::jit::get_type_name(lhs.type()), - cudf::jit::get_type_name(rhs.type()), - get_operator_name(op, OperatorType::Direct)); - - cudf::jit::get_program_cache(*binaryop_jit_kernel_cu_jit) - .get_kernel(kernel_name, {}, {}, {"-arch=sm_."}) // - ->configure_1d_max_occupancy(0, 0, 0, stream.value()) // - ->launch(out.size(), - cudf::jit::get_data_ptr(out), - cudf::jit::get_data_ptr(lhs), - cudf::jit::get_data_ptr(rhs)); - } -} - void binary_operation(mutable_column_view& out, column_view const& lhs, column_view const& rhs, @@ -246,17 +138,15 @@ void binary_operation(mutable_column_view& out, std::string cuda_source = cudf::jit::parse_single_function_ptx(ptx, "GENERIC_BINARY_OP", output_type_name); - std::string kernel_name = - jitify2::reflection::Template("cudf::binops::jit::kernel_v_v") // - .instantiate(output_type_name, // list of template arguments - cudf::jit::get_type_name(lhs.type()), - cudf::jit::get_type_name(rhs.type()), - get_operator_name(binary_operator::GENERIC_BINARY, OperatorType::Direct)); + std::string kernel_name = jitify2::reflection::Template("cudf::binops::jit::kernel_v_v") + .instantiate(output_type_name, // list of template arguments + cudf::jit::get_type_name(lhs.type()), + cudf::jit::get_type_name(rhs.type()), + std::string("cudf::binops::jit::UserDefinedOp")); cudf::jit::get_program_cache(*binaryop_jit_kernel_cu_jit) - .get_kernel( - kernel_name, {}, {{"binaryop/jit/operation-udf.hpp", cuda_source}}, {"-arch=sm_."}) // - ->configure_1d_max_occupancy(0, 0, 0, stream.value()) // + .get_kernel(kernel_name, {}, {{"binaryop/jit/operation-udf.hpp", cuda_source}}, {"-arch=sm_."}) + ->configure_1d_max_occupancy(0, 0, 0, stream.value()) ->launch(out.size(), cudf::jit::get_data_ptr(out), cudf::jit::get_data_ptr(lhs), @@ -330,7 +220,6 @@ namespace detail { // There are 3 overloads of each of the following functions: // - `make_fixed_width_column_for_output` -// - `fixed_point_binary_operation` // - `binary_operation` // The overloads are overloaded on the first two parameters of each function: @@ -419,126 +308,6 @@ std::unique_ptr make_fixed_width_column_for_output(column_view const& lh } }; -namespace jit { - -std::unique_ptr binary_operation(scalar const& lhs, - column_view const& rhs, - binary_operator op, - data_type output_type, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) -{ - // calls compiled ops for string types - if (lhs.type().id() == type_id::STRING and rhs.type().id() == type_id::STRING) - return detail::binary_operation(lhs, rhs, op, output_type, stream, mr); - - // Check for datatype - CUDF_EXPECTS(is_fixed_width(output_type), "Invalid/Unsupported output datatype"); - CUDF_EXPECTS(not is_fixed_point(lhs.type()), "Invalid/Unsupported lhs datatype"); - CUDF_EXPECTS(not is_fixed_point(rhs.type()), "Invalid/Unsupported rhs datatype"); - CUDF_EXPECTS(is_fixed_width(lhs.type()), "Invalid/Unsupported lhs datatype"); - CUDF_EXPECTS(is_fixed_width(rhs.type()), "Invalid/Unsupported rhs datatype"); - - auto out = make_fixed_width_column_for_output(lhs, rhs, op, output_type, stream, mr); - - if (rhs.is_empty()) return out; - - auto out_view = out->mutable_view(); - binops::jit::binary_operation(out_view, lhs, rhs, op, stream); - return out; -} - -std::unique_ptr binary_operation(column_view const& lhs, - scalar const& rhs, - binary_operator op, - data_type output_type, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) -{ - // calls compiled ops for string types - if (lhs.type().id() == type_id::STRING and rhs.type().id() == type_id::STRING) - return detail::binary_operation(lhs, rhs, op, output_type, stream, mr); - - // Check for datatype - CUDF_EXPECTS(is_fixed_width(output_type), "Invalid/Unsupported output datatype"); - CUDF_EXPECTS(not is_fixed_point(lhs.type()), "Invalid/Unsupported lhs datatype"); - CUDF_EXPECTS(not is_fixed_point(rhs.type()), "Invalid/Unsupported rhs datatype"); - CUDF_EXPECTS(is_fixed_width(lhs.type()), "Invalid/Unsupported lhs datatype"); - CUDF_EXPECTS(is_fixed_width(rhs.type()), "Invalid/Unsupported rhs datatype"); - - auto out = make_fixed_width_column_for_output(lhs, rhs, op, output_type, stream, mr); - - if (lhs.is_empty()) return out; - - auto out_view = out->mutable_view(); - binops::jit::binary_operation(out_view, lhs, rhs, op, stream); - return out; -} - -std::unique_ptr binary_operation(column_view const& lhs, - column_view const& rhs, - binary_operator op, - data_type output_type, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) -{ - CUDF_EXPECTS(lhs.size() == rhs.size(), "Column sizes don't match"); - - // calls compiled ops for string types - if (lhs.type().id() == type_id::STRING and rhs.type().id() == type_id::STRING) - return detail::binary_operation(lhs, rhs, op, output_type, stream, mr); - - // Check for datatype - CUDF_EXPECTS(is_fixed_width(output_type), "Invalid/Unsupported output datatype"); - CUDF_EXPECTS(not is_fixed_point(lhs.type()), "Invalid/Unsupported lhs datatype"); - CUDF_EXPECTS(not is_fixed_point(rhs.type()), "Invalid/Unsupported rhs datatype"); - CUDF_EXPECTS(is_fixed_width(lhs.type()), "Invalid/Unsupported lhs datatype"); - CUDF_EXPECTS(is_fixed_width(rhs.type()), "Invalid/Unsupported rhs datatype"); - - auto out = make_fixed_width_column_for_output(lhs, rhs, op, output_type, stream, mr); - - if (lhs.is_empty() or rhs.is_empty()) return out; - - auto out_view = out->mutable_view(); - binops::jit::binary_operation(out_view, lhs, rhs, op, stream); - return out; -} -} // namespace jit -} // namespace detail - -namespace jit { -std::unique_ptr binary_operation(scalar const& lhs, - column_view const& rhs, - binary_operator op, - data_type output_type, - rmm::mr::device_memory_resource* mr) -{ - CUDF_FUNC_RANGE(); - return detail::jit::binary_operation(lhs, rhs, op, output_type, rmm::cuda_stream_default, mr); -} - -std::unique_ptr binary_operation(column_view const& lhs, - scalar const& rhs, - binary_operator op, - data_type output_type, - rmm::mr::device_memory_resource* mr) -{ - CUDF_FUNC_RANGE(); - return detail::jit::binary_operation(lhs, rhs, op, output_type, rmm::cuda_stream_default, mr); -} - -std::unique_ptr binary_operation(column_view const& lhs, - column_view const& rhs, - binary_operator op, - data_type output_type, - rmm::mr::device_memory_resource* mr) -{ - CUDF_FUNC_RANGE(); - return detail::jit::binary_operation(lhs, rhs, op, output_type, rmm::cuda_stream_default, mr); -} -} // namespace jit - -namespace detail { std::unique_ptr binary_operation(scalar const& lhs, column_view const& rhs, binary_operator op, diff --git a/cpp/src/binaryop/jit/kernel.cu b/cpp/src/binaryop/jit/kernel.cu index fcfe16f979d..c9cc61a4f34 100644 --- a/cpp/src/binaryop/jit/kernel.cu +++ b/cpp/src/binaryop/jit/kernel.cu @@ -18,66 +18,29 @@ * limitations under the License. */ -#include - #include #include #include #include #include +#include +#include + namespace cudf { namespace binops { namespace jit { -template -__global__ void kernel_v_s_with_validity(cudf::size_type size, - TypeOut* out_data, - TypeLhs* lhs_data, - TypeRhs* rhs_data, - cudf::bitmask_type* output_mask, - cudf::bitmask_type const* mask, - cudf::size_type offset, - bool scalar_valid) -{ - int tid = threadIdx.x; - int blkid = blockIdx.x; - int blksz = blockDim.x; - int gridsz = gridDim.x; - - int start = tid + blkid * blksz; - int step = blksz * gridsz; - - for (cudf::size_type i = start; i < size; i += step) { - bool output_valid = false; - out_data[i] = TypeOpe::template operate( - lhs_data[i], - rhs_data[0], - mask ? cudf::bit_is_set(mask, offset + i) : true, - scalar_valid, - output_valid); - if (output_mask && !output_valid) cudf::clear_bit(output_mask, i); - } -} - -template -__global__ void kernel_v_s(cudf::size_type size, - TypeOut* out_data, - TypeLhs* lhs_data, - TypeRhs* rhs_data) -{ - int tid = threadIdx.x; - int blkid = blockIdx.x; - int blksz = blockDim.x; - int gridsz = gridDim.x; - - int start = tid + blkid * blksz; - int step = blksz * gridsz; - - for (cudf::size_type i = start; i < size; i += step) { - out_data[i] = TypeOpe::template operate(lhs_data[i], rhs_data[0]); +struct UserDefinedOp { + template + static TypeOut operate(TypeLhs x, TypeRhs y) + { + TypeOut output; + using TypeCommon = typename cuda::std::common_type::type; + GENERIC_BINARY_OP(&output, static_cast(x), static_cast(y)); + return output; } -} +}; template __global__ void kernel_v_v(cudf::size_type size, diff --git a/cpp/src/binaryop/jit/operation.hpp b/cpp/src/binaryop/jit/operation.hpp deleted file mode 100644 index d117f2182f9..00000000000 --- a/cpp/src/binaryop/jit/operation.hpp +++ /dev/null @@ -1,646 +0,0 @@ -/* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. - * - * Copyright 2018-2019 BlazingDB, Inc. - * Copyright 2018 Christian Noboa Mardini - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - -#include -#include - -#include - -#include - -#pragma once - -using namespace cuda::std; - -namespace cudf { -namespace binops { -namespace jit { - -struct Add { - // Allow sum between chronos only when both input and output types - // are chronos. Unsupported combinations will fail to compile - template < - typename TypeOut, - typename TypeLhs, - typename TypeRhs, - enable_if_t<(is_chrono_v && is_chrono_v && is_chrono_v)>* = nullptr> - static TypeOut operate(TypeLhs x, TypeRhs y) - { - return x + y; - } - - template || !is_chrono_v || - !is_chrono_v)>* = nullptr> - static TypeOut operate(TypeLhs x, TypeRhs y) - { - using TypeCommon = typename common_type::type; - return static_cast(static_cast(x) + static_cast(y)); - } -}; - -using RAdd = Add; - -struct Sub { - // Allow difference between chronos only when both input and output types - // are chronos. Unsupported combinations will fail to compile - template < - typename TypeOut, - typename TypeLhs, - typename TypeRhs, - enable_if_t<(is_chrono_v && is_chrono_v && is_chrono_v)>* = nullptr> - static TypeOut operate(TypeLhs x, TypeRhs y) - { - return x - y; - } - - template || !is_chrono_v || - !is_chrono_v)>* = nullptr> - static TypeOut operate(TypeLhs x, TypeRhs y) - { - using TypeCommon = typename common_type::type; - return static_cast(static_cast(x) - static_cast(y)); - } -}; - -struct RSub { - template - static TypeOut operate(TypeLhs x, TypeRhs y) - { - return Sub::operate(y, x); - } -}; - -struct Mul { - template )>* = nullptr> - static TypeOut operate(TypeLhs x, TypeRhs y) - { - using TypeCommon = typename common_type::type; - return static_cast(static_cast(x) * static_cast(y)); - } - - template )>* = nullptr> - static TypeOut operate(TypeLhs x, TypeRhs y) - { - return DurationProduct(x, y); - } - - template && is_integral_v) || - (is_integral_v && is_duration_v)>* = nullptr> - static TypeOut DurationProduct(TypeLhs x, TypeRhs y) - { - return x * y; - } -}; - -using RMul = Mul; - -struct Div { - template )>* = nullptr> - static TypeOut operate(TypeLhs x, TypeRhs y) - { - using TypeCommon = typename common_type::type; - return static_cast(static_cast(x) / static_cast(y)); - } - - template )>* = nullptr> - static TypeOut operate(TypeLhs x, TypeRhs y) - { - return DurationDivide(x, y); - } - - template || is_duration_v)>* = nullptr> - static TypeOut DurationDivide(TypeLhs x, TypeRhs y) - { - return x / y; - } -}; - -struct RDiv { - template - static TypeOut operate(TypeLhs x, TypeRhs y) - { - return Div::operate(y, x); - } -}; - -struct TrueDiv { - template - static TypeOut operate(TypeLhs x, TypeRhs y) - { - return (static_cast(x) / static_cast(y)); - } -}; - -struct RTrueDiv { - template - static TypeOut operate(TypeLhs x, TypeRhs y) - { - return TrueDiv::operate(y, x); - } -}; - -struct FloorDiv { - template - static TypeOut operate(TypeLhs x, TypeRhs y) - { - return floor(static_cast(x) / static_cast(y)); - } -}; - -struct RFloorDiv { - template - static TypeOut operate(TypeLhs x, TypeRhs y) - { - return FloorDiv::operate(y, x); - } -}; - -struct Mod { - template < - typename TypeOut, - typename TypeLhs, - typename TypeRhs, - enable_if_t<(is_integral_v::type>)>* = nullptr> - static TypeOut operate(TypeLhs x, TypeRhs y) - { - using TypeCommon = typename common_type::type; - return static_cast(static_cast(x) % static_cast(y)); - } - - template < - typename TypeOut, - typename TypeLhs, - typename TypeRhs, - enable_if_t<(isFloat::type>)>* = nullptr> - static TypeOut operate(TypeLhs x, TypeRhs y) - { - return static_cast(fmodf(static_cast(x), static_cast(y))); - } - - template < - typename TypeOut, - typename TypeLhs, - typename TypeRhs, - enable_if_t<(isDouble::type>)>* = nullptr> - static TypeOut operate(TypeLhs x, TypeRhs y) - { - return static_cast(fmod(static_cast(x), static_cast(y))); - } - - template && is_duration_v)>* = nullptr> - static TypeOut operate(TypeLhs x, TypeRhs y) - { - return x % y; - } -}; - -struct RMod { - template - static TypeOut operate(TypeLhs x, TypeRhs y) - { - return Mod::operate(y, x); - } -}; - -struct PyMod { - template )>* = nullptr> - static TypeOut operate(TypeLhs x, TypeRhs y) - { - return ((x % y) + y) % y; - } - - template )>* = nullptr> - static TypeOut operate(TypeLhs x, TypeRhs y) - { - double x1 = static_cast(x); - double y1 = static_cast(y); - return fmod(fmod(x1, y1) + y1, y1); - } - - template && is_duration_v)>* = nullptr> - static TypeOut operate(TypeLhs x, TypeRhs y) - { - return ((x % y) + y) % y; - } -}; - -struct RPyMod { - template - static TypeOut operate(TypeLhs x, TypeRhs y) - { - return PyMod::operate(y, x); - } -}; - -struct Pow { - template - static TypeOut operate(TypeLhs x, TypeRhs y) - { - return pow(static_cast(x), static_cast(y)); - } -}; - -struct RPow { - template - static TypeOut operate(TypeLhs x, TypeRhs y) - { - return Pow::operate(y, x); - } -}; - -struct Equal { - template - static TypeOut operate(TypeLhs x, TypeRhs y) - { - return (x == y); - } -}; - -using REqual = Equal; - -struct NotEqual { - template - static TypeOut operate(TypeLhs x, TypeRhs y) - { - return (x != y); - } -}; - -using RNotEqual = NotEqual; - -struct Less { - template - static TypeOut operate(TypeLhs x, TypeRhs y) - { - return (x < y); - } -}; - -struct RLess { - template - static TypeOut operate(TypeLhs x, TypeRhs y) - { - return (y < x); - } -}; - -struct Greater { - template - static TypeOut operate(TypeLhs x, TypeRhs y) - { - return (x > y); - } -}; - -struct RGreater { - template - static TypeOut operate(TypeLhs x, TypeRhs y) - { - return (y > x); - } -}; - -struct LessEqual { - template - static TypeOut operate(TypeLhs x, TypeRhs y) - { - return (x <= y); - } -}; - -struct RLessEqual { - template - static TypeOut operate(TypeLhs x, TypeRhs y) - { - return (y <= x); - } -}; - -struct GreaterEqual { - template - static TypeOut operate(TypeLhs x, TypeRhs y) - { - return (x >= y); - } -}; - -struct RGreaterEqual { - template - static TypeOut operate(TypeLhs x, TypeRhs y) - { - return (y >= x); - } -}; - -struct BitwiseAnd { - template - static TypeOut operate(TypeLhs x, TypeRhs y) - { - return (static_cast(x) & static_cast(y)); - } -}; - -using RBitwiseAnd = BitwiseAnd; - -struct BitwiseOr { - template - static TypeOut operate(TypeLhs x, TypeRhs y) - { - return (static_cast(x) | static_cast(y)); - } -}; - -using RBitwiseOr = BitwiseOr; - -struct BitwiseXor { - template - static TypeOut operate(TypeLhs x, TypeRhs y) - { - return (static_cast(x) ^ static_cast(y)); - } -}; - -using RBitwiseXor = BitwiseXor; - -struct LogicalAnd { - template - static TypeOut operate(TypeLhs x, TypeRhs y) - { - return (x && y); - } -}; - -using RLogicalAnd = LogicalAnd; - -struct LogicalOr { - template - static TypeOut operate(TypeLhs x, TypeRhs y) - { - return (x || y); - } -}; - -using RLogicalOr = LogicalOr; - -struct UserDefinedOp { - template - static TypeOut operate(TypeLhs x, TypeRhs y) - { - TypeOut output; - using TypeCommon = typename common_type::type; - GENERIC_BINARY_OP(&output, static_cast(x), static_cast(y)); - return output; - } -}; - -struct ShiftLeft { - template - static TypeOut operate(TypeLhs x, TypeRhs y) - { - return (x << y); - } -}; - -struct RShiftLeft { - template - static TypeOut operate(TypeLhs x, TypeRhs y) - { - return (y << x); - } -}; - -struct ShiftRight { - template - static TypeOut operate(TypeLhs x, TypeRhs y) - { - return (x >> y); - } -}; - -struct RShiftRight { - template - static TypeOut operate(TypeLhs x, TypeRhs y) - { - return (y >> x); - } -}; - -struct ShiftRightUnsigned { - template - static TypeOut operate(TypeLhs x, TypeRhs y) - { - return (static_cast>(x) >> y); - } -}; - -struct RShiftRightUnsigned { - template - static TypeOut operate(TypeLhs x, TypeRhs y) - { - return (static_cast>(y) >> x); - } -}; - -struct LogBase { - template - static TypeOut operate(TypeLhs x, TypeRhs y) - { - return (std::log(static_cast(x)) / std::log(static_cast(y))); - } -}; - -struct RLogBase { - template - static TypeOut operate(TypeLhs x, TypeRhs y) - { - return LogBase::operate(y, x); - } -}; - -struct NullEquals { - template - static TypeOut operate(TypeLhs x, TypeRhs y, bool lhs_valid, bool rhs_valid, bool& output_valid) - { - output_valid = true; - if (!lhs_valid && !rhs_valid) return true; - if (lhs_valid && rhs_valid) return x == y; - return false; - } -}; - -struct RNullEquals { - template - static TypeOut operate(TypeLhs x, TypeRhs y, bool lhs_valid, bool rhs_valid, bool& output_valid) - { - output_valid = true; - return NullEquals::operate(y, x, rhs_valid, lhs_valid, output_valid); - } -}; - -struct NullMax { - template - static TypeOut operate(TypeLhs x, TypeRhs y, bool lhs_valid, bool rhs_valid, bool& output_valid) - { - output_valid = true; - if (!lhs_valid && !rhs_valid) { - output_valid = false; - return TypeOut{}; - } else if (lhs_valid && rhs_valid) { - return (TypeOut{x} > TypeOut{y}) ? TypeOut{x} : TypeOut{y}; - } else if (lhs_valid) - return TypeOut{x}; - else - return TypeOut{y}; - } -}; - -struct RNullMax { - template - static TypeOut operate(TypeLhs x, TypeRhs y, bool lhs_valid, bool rhs_valid, bool& output_valid) - { - return NullMax::operate(y, x, rhs_valid, lhs_valid, output_valid); - } -}; - -struct NullMin { - template - static TypeOut operate(TypeLhs x, TypeRhs y, bool lhs_valid, bool rhs_valid, bool& output_valid) - { - output_valid = true; - if (!lhs_valid && !rhs_valid) { - output_valid = false; - return TypeOut{}; - } else if (lhs_valid && rhs_valid) { - return (TypeOut{x} < TypeOut{y}) ? TypeOut{x} : TypeOut{y}; - } else if (lhs_valid) - return TypeOut{x}; - else - return TypeOut{y}; - } -}; - -struct RNullMin { - template - static TypeOut operate(TypeLhs x, TypeRhs y, bool lhs_valid, bool rhs_valid, bool& output_valid) - { - return NullMin::operate(y, x, rhs_valid, lhs_valid, output_valid); - } -}; - -struct PMod { - // Ideally, these two specializations - one for integral types and one for non integral - // types shouldn't be required, as std::fmod should promote integral types automatically - // to double and call the std::fmod overload for doubles. Sadly, doing this in jitified - // code does not work - it is having trouble deciding between float/double overloads - template ::type>)>* = - nullptr> - static TypeOut operate(TypeLhs x, TypeRhs y) - { - using common_t = typename cuda::std::common_type::type; - common_t xconv{x}; - common_t yconv{y}; - auto rem = xconv % yconv; - if (rem < 0) rem = (rem + yconv) % yconv; - return TypeOut{rem}; - } - - template ::type>)>* = nullptr> - static TypeOut operate(TypeLhs x, TypeRhs y) - { - using common_t = typename cuda::std::common_type::type; - common_t xconv{x}; - common_t yconv{y}; - auto rem = std::fmod(xconv, yconv); - if (rem < 0) rem = std::fmod(rem + yconv, yconv); - return TypeOut{rem}; - } -}; - -struct RPMod { - template - static TypeOut operate(TypeLhs x, TypeRhs y) - { - return PMod::operate(y, x); - } -}; - -struct ATan2 { - template - static TypeOut operate(TypeLhs x, TypeRhs y) - { - return TypeOut{std::atan2(double{x}, double{y})}; - } -}; - -struct RATan2 { - template - static TypeOut operate(TypeLhs x, TypeRhs y) - { - return TypeOut{ATan2::operate(y, x)}; - } -}; - -} // namespace jit -} // namespace binops -} // namespace cudf diff --git a/cpp/src/binaryop/jit/traits.hpp b/cpp/src/binaryop/jit/traits.hpp deleted file mode 100644 index 1033d38a668..00000000000 --- a/cpp/src/binaryop/jit/traits.hpp +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. - * - * Copyright 2018-2019 BlazingDB, Inc. - * Copyright 2018 Christian Noboa Mardini - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -// Include Jitify's cstddef header first -#include - -#include -#include -#include -#include - -#include -#include - -namespace cudf { -namespace binops { -namespace jit { - -// ------------------------------------------------------------------------- -// type_traits cannot tell the difference between float and double -template -constexpr bool isFloat = false; - -template -constexpr bool is_timestamp_v = - cuda::std::is_same_v || cuda::std::is_same_v || - cuda::std::is_same_v || cuda::std::is_same_v || - cuda::std::is_same_v; - -template -constexpr bool is_duration_v = - cuda::std::is_same_v || cuda::std::is_same_v || - cuda::std::is_same_v || cuda::std::is_same_v || - cuda::std::is_same_v; - -template -constexpr bool is_chrono_v = is_timestamp_v || is_duration_v; - -template <> -constexpr bool isFloat = true; - -template -constexpr bool isDouble = false; - -template <> -constexpr bool isDouble = true; - -} // namespace jit -} // namespace binops -} // namespace cudf diff --git a/cpp/src/binaryop/jit/util.hpp b/cpp/src/binaryop/jit/util.hpp deleted file mode 100644 index 34c42e28a8b..00000000000 --- a/cpp/src/binaryop/jit/util.hpp +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - -#include - -namespace cudf { -namespace binops { -namespace jit { - -/** - * @brief Orientation of lhs and rhs in operator - */ -enum class OperatorType { - Direct, ///< Orientation of operands is op(lhs, rhs) - Reverse ///< Orientation of operands is op(rhs, lhs) -}; - -/** - * @brief Get the Operator Name - * - * @param op The binary operator as enum of type cudf::binary_operator - * @param type @see OperatorType - * @return std::string The name of the operator as string - */ -std::string inline get_operator_name(binary_operator op, OperatorType type) -{ - std::string const operator_name = [op] { - // clang-format off - switch (op) { - case binary_operator::ADD: return "Add"; - case binary_operator::SUB: return "Sub"; - case binary_operator::MUL: return "Mul"; - case binary_operator::DIV: return "Div"; - case binary_operator::TRUE_DIV: return "TrueDiv"; - case binary_operator::FLOOR_DIV: return "FloorDiv"; - case binary_operator::MOD: return "Mod"; - case binary_operator::PYMOD: return "PyMod"; - case binary_operator::POW: return "Pow"; - case binary_operator::EQUAL: return "Equal"; - case binary_operator::NOT_EQUAL: return "NotEqual"; - case binary_operator::LESS: return "Less"; - case binary_operator::GREATER: return "Greater"; - case binary_operator::LESS_EQUAL: return "LessEqual"; - case binary_operator::GREATER_EQUAL: return "GreaterEqual"; - case binary_operator::BITWISE_AND: return "BitwiseAnd"; - case binary_operator::BITWISE_OR: return "BitwiseOr"; - case binary_operator::BITWISE_XOR: return "BitwiseXor"; - case binary_operator::LOGICAL_AND: return "LogicalAnd"; - case binary_operator::LOGICAL_OR: return "LogicalOr"; - case binary_operator::GENERIC_BINARY: return "UserDefinedOp"; - case binary_operator::SHIFT_LEFT: return "ShiftLeft"; - case binary_operator::SHIFT_RIGHT: return "ShiftRight"; - case binary_operator::SHIFT_RIGHT_UNSIGNED: return "ShiftRightUnsigned"; - case binary_operator::LOG_BASE: return "LogBase"; - case binary_operator::ATAN2: return "ATan2"; - case binary_operator::PMOD: return "PMod"; - case binary_operator::NULL_EQUALS: return "NullEquals"; - case binary_operator::NULL_MAX: return "NullMax"; - case binary_operator::NULL_MIN: return "NullMin"; - default: return ""; - } - // clang-format on - }(); - - if (operator_name == "") { return "None"; } - - return "cudf::binops::jit::" + - (type == OperatorType::Direct ? operator_name : 'R' + operator_name); -} - -} // namespace jit -} // namespace binops -} // namespace cudf diff --git a/cpp/src/groupby/sort/group_scan_util.cuh b/cpp/src/groupby/sort/group_scan_util.cuh index b565e8dc6d8..ae3e3232e06 100644 --- a/cpp/src/groupby/sort/group_scan_util.cuh +++ b/cpp/src/groupby/sort/group_scan_util.cuh @@ -239,7 +239,13 @@ struct group_scan_functor()}, gather_map.size(), gather_map.data()); + // // Gather the children elements of the prefix min/max struct elements first. + // + // Typically, we should use `get_sliced_child` for each child column to properly handle the + // input if it is a sliced view. However, since the input to this function is just generated + // from groupby internal APIs which is never a sliced view, we just use `child_begin` and + // `child_end` iterators for simplicity. auto scanned_children = cudf::detail::gather( table_view(std::vector{values.child_begin(), values.child_end()}), diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu index 25c4bd65c8f..e53fb3589bc 100644 --- a/cpp/src/io/orc/writer_impl.cu +++ b/cpp/src/io/orc/writer_impl.cu @@ -1496,15 +1496,23 @@ orc_table_view make_orc_table_view(table_view const& table, append_orc_column(col.child(lists_column_view::child_column_index), &orc_columns[new_col_idx], col_meta.child(lists_column_view::child_column_index)); - } else if (kind == TypeKind::STRUCT or kind == TypeKind::MAP) { - // MAP: skip to the list child - include grandchildren columns instead of children - auto const real_parent_col = - kind == TypeKind::MAP ? col.child(lists_column_view::child_column_index) : col; - for (auto child_idx = 0; child_idx != real_parent_col.num_children(); ++child_idx) { - append_orc_column(real_parent_col.child(child_idx), - &orc_columns[new_col_idx], - col_meta.child(child_idx)); + } else if (kind == TypeKind::STRUCT) { + for (auto child_idx = 0; child_idx != col.num_children(); ++child_idx) { + append_orc_column( + col.child(child_idx), &orc_columns[new_col_idx], col_meta.child(child_idx)); } + } else if (kind == TypeKind::MAP) { + // MAP: skip to the list child - include grandchildren columns instead of children + auto const real_parent_col = col.child(lists_column_view::child_column_index); + auto const& real_parent_meta = col_meta.child(lists_column_view::child_column_index); + CUDF_EXPECTS(real_parent_meta.num_children() == 2, + "Map struct column should have exactly two children"); + // process MAP key + append_orc_column( + real_parent_col.child(0), &orc_columns[new_col_idx], real_parent_meta.child(0)); + // process MAP value + append_orc_column( + real_parent_col.child(1), &orc_columns[new_col_idx], real_parent_meta.child(1)); } }; diff --git a/cpp/src/io/utilities/file_io_utilities.cpp b/cpp/src/io/utilities/file_io_utilities.cpp index 7a48b7d7301..d96bf93d10f 100644 --- a/cpp/src/io/utilities/file_io_utilities.cpp +++ b/cpp/src/io/utilities/file_io_utilities.cpp @@ -75,8 +75,8 @@ class cufile_shim { ~cufile_shim() { - driver_close(); - dlclose(cf_lib); + if (driver_close) driver_close(); + if (cf_lib) dlclose(cf_lib); } decltype(cuFileHandleRegister)* handle_register = nullptr; diff --git a/cpp/src/reductions/scan/scan_inclusive.cu b/cpp/src/reductions/scan/scan_inclusive.cu index 02ecd6df4d9..70f5ca90539 100644 --- a/cpp/src/reductions/scan/scan_inclusive.cu +++ b/cpp/src/reductions/scan/scan_inclusive.cu @@ -14,13 +14,17 @@ * limitations under the License. */ -#include "scan.cuh" +#include +#include #include +#include #include #include #include #include +#include +#include #include #include @@ -150,6 +154,72 @@ struct scan_functor { } }; +template +struct scan_functor { + static std::unique_ptr invoke(column_view const& input, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) + { + // Op is used only to determined if we want to find the min or max element. + auto constexpr is_min_op = std::is_same_v; + + // Build indices of the scan operation results (ARGMIN/ARGMAX). + // When finding ARGMIN, we need to consider nulls as larger than non-null elements, and the + // opposite for ARGMAX. + auto gather_map = rmm::device_uvector(input.size(), stream); + auto const do_scan = [&](auto const& binop) { + thrust::inclusive_scan(rmm::exec_policy(stream), + thrust::counting_iterator(0), + thrust::counting_iterator(input.size()), + gather_map.begin(), + binop); + }; + + auto constexpr null_precedence = is_min_op ? cudf::null_order::AFTER : cudf::null_order::BEFORE; + auto const flattened_input = cudf::structs::detail::flatten_nested_columns( + table_view{{input}}, {}, std::vector{null_precedence}); + auto const d_flattened_input_ptr = table_device_view::create(flattened_input, stream); + auto const flattened_null_precedences = + is_min_op ? cudf::detail::make_device_uvector_async(flattened_input.null_orders(), stream) + : rmm::device_uvector(0, stream); + + if (input.has_nulls()) { + auto const binop = cudf::reduction::detail::row_arg_minmax_fn( + input.size(), *d_flattened_input_ptr, flattened_null_precedences.data(), is_min_op); + do_scan(binop); + } else { + auto const binop = cudf::reduction::detail::row_arg_minmax_fn( + input.size(), *d_flattened_input_ptr, flattened_null_precedences.data(), is_min_op); + do_scan(binop); + } + + // Gather the children columns of the input column. Must use `get_sliced_child` to properly + // handle input in case it is a sliced view. + auto const input_children = [&] { + auto const it = cudf::detail::make_counting_transform_iterator( + 0, [structs_view = structs_column_view{input}, stream](auto const child_idx) { + return structs_view.get_sliced_child(child_idx); + }); + return std::vector(it, it + input.num_children()); + }(); + + // Gather the children elements of the prefix min/max struct elements for the output. + auto scanned_children = cudf::detail::gather(table_view{input_children}, + gather_map, + out_of_bounds_policy::DONT_CHECK, + negative_index_policy::NOT_ALLOWED, + stream, + mr) + ->release(); + + // Don't need to set a null mask because that will be handled at the caller. + return make_structs_column(input.size(), + std::move(scanned_children), + UNKNOWN_NULL_COUNT, + rmm::device_buffer{0, stream, mr}); + } +}; + /** * @brief Dispatcher for running a Scan operation on an input column * @@ -161,7 +231,11 @@ struct scan_dispatcher { template static constexpr bool is_supported() { - return std::is_invocable_v && !cudf::is_dictionary(); + if constexpr (std::is_same_v) { + return std::is_same_v || std::is_same_v; + } else { + return std::is_invocable_v && !cudf::is_dictionary(); + } } public: @@ -209,6 +283,15 @@ std::unique_ptr scan_inclusive( output->set_null_mask(mask_scan(input, scan_type::INCLUSIVE, stream, mr), UNKNOWN_NULL_COUNT); } + // If the input is a structs column, we also need to push down nulls from the parent output column + // into the children columns. + if (input.type().id() == type_id::STRUCT && output->has_nulls()) { + for (size_type idx = 0; idx < output->num_children(); ++idx) { + structs::detail::superimpose_parent_nulls( + output->view().null_mask(), output->null_count(), output->child(idx), stream, mr); + } + } + return output; } } // namespace detail diff --git a/cpp/src/strings/convert/convert_floats.cu b/cpp/src/strings/convert/convert_floats.cu index 366d4fe7d42..70b5f528213 100644 --- a/cpp/src/strings/convert/convert_floats.cu +++ b/cpp/src/strings/convert/convert_floats.cu @@ -45,7 +45,7 @@ namespace { * @brief This function converts the given string into a * floating point double value. * - * This will also map strings containing "NaN", "Inf" and "-Inf" + * This will also map strings containing "NaN", "Inf", etc. * to the appropriate float values. * * This function will also handle scientific notation format. @@ -55,16 +55,19 @@ __device__ inline double stod(string_view const& d_str) const char* in_ptr = d_str.data(); const char* end = in_ptr + d_str.size_bytes(); if (end == in_ptr) return 0.0; - // special strings - if (d_str.compare("NaN", 3) == 0) return std::numeric_limits::quiet_NaN(); - if (d_str.compare("Inf", 3) == 0) return std::numeric_limits::infinity(); - if (d_str.compare("-Inf", 4) == 0) return -std::numeric_limits::infinity(); double sign{1.0}; if (*in_ptr == '-' || *in_ptr == '+') { sign = (*in_ptr == '-' ? -1 : 1); ++in_ptr; } + // special strings: NaN, Inf + if ((in_ptr < end) && *in_ptr > '9') { + auto const inf_nan = string_view(in_ptr, static_cast(thrust::distance(in_ptr, end))); + if (string::is_nan_str(inf_nan)) return std::numeric_limits::quiet_NaN(); + if (string::is_inf_str(inf_nan)) return sign * std::numeric_limits::infinity(); + } + // Parse and store the mantissa as much as we can, // until we are about to exceed the limit of uint64_t constexpr uint64_t max_holding = (std::numeric_limits::max() - 9L) / 10L; diff --git a/cpp/src/transform/jit/masked_udf_kernel.cu b/cpp/src/transform/jit/masked_udf_kernel.cu deleted file mode 100644 index 319ad730c53..00000000000 --- a/cpp/src/transform/jit/masked_udf_kernel.cu +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (c) 2021, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include - -#include -#include - -#include -#include -#include -#include -#include - -namespace cudf { -namespace transformation { -namespace jit { - -template -struct Masked { - T value; - bool valid; -}; - -template -__device__ auto make_args(cudf::size_type id, TypeIn in_ptr, MaskType in_mask, OffsetType in_offset) -{ - bool valid = in_mask ? cudf::bit_is_set(in_mask, in_offset + id) : true; - return cuda::std::make_tuple(in_ptr[id], valid); -} - -template -__device__ auto make_args(cudf::size_type id, - InType in_ptr, - MaskType in_mask, // in practice, always cudf::bitmask_type const* - OffsetType in_offset, // in practice, always cudf::size_type - Arguments... args) -{ - bool valid = in_mask ? cudf::bit_is_set(in_mask, in_offset + id) : true; - return cuda::std::tuple_cat(cuda::std::make_tuple(in_ptr[id], valid), make_args(id, args...)); -} - -template -__global__ void generic_udf_kernel(cudf::size_type size, - TypeOut* out_data, - bool* out_mask, - Arguments... args) -{ - int const tid = threadIdx.x; - int const blkid = blockIdx.x; - int const blksz = blockDim.x; - int const gridsz = gridDim.x; - int const start = tid + blkid * blksz; - int const step = blksz * gridsz; - - Masked output; - for (cudf::size_type i = start; i < size; i += step) { - auto func_args = cuda::std::tuple_cat( - cuda::std::make_tuple(&output.value), - make_args(i, args...) // passed int64*, bool*, int64, int64*, bool*, int64 - ); - cuda::std::apply(GENERIC_OP, func_args); - out_data[i] = output.value; - out_mask[i] = output.valid; - } -} - -} // namespace jit -} // namespace transformation -} // namespace cudf diff --git a/cpp/src/transform/transform.cpp b/cpp/src/transform/transform.cpp index 5230b853a79..0cca6699586 100644 --- a/cpp/src/transform/transform.cpp +++ b/cpp/src/transform/transform.cpp @@ -19,12 +19,10 @@ #include #include #include -#include #include #include #include -#include #include #include @@ -65,80 +63,6 @@ void unary_operation(mutable_column_view output, cudf::jit::get_data_ptr(input)); } -std::vector make_template_types(column_view outcol_view, table_view const& data_view) -{ - std::string mskptr_type = - cudf::jit::get_type_name(cudf::data_type(cudf::type_to_id())) + "*"; - std::string offset_type = - cudf::jit::get_type_name(cudf::data_type(cudf::type_to_id())); - - std::vector template_types; - template_types.reserve((3 * data_view.num_columns()) + 1); - - template_types.push_back(cudf::jit::get_type_name(outcol_view.type())); - for (auto const& col : data_view) { - template_types.push_back(cudf::jit::get_type_name(col.type()) + "*"); - template_types.push_back(mskptr_type); - template_types.push_back(offset_type); - } - return template_types; -} - -void generalized_operation(table_view const& data_view, - std::string const& udf, - data_type output_type, - mutable_column_view outcol_view, - mutable_column_view outmsk_view, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) -{ - auto const template_types = make_template_types(outcol_view, data_view); - - std::string generic_kernel_name = - jitify2::reflection::Template("cudf::transformation::jit::generic_udf_kernel") - .instantiate(template_types); - - std::string generic_cuda_source = cudf::jit::parse_single_function_ptx( - udf, "GENERIC_OP", cudf::jit::get_type_name(output_type), {0}); - - std::vector kernel_args; - kernel_args.reserve((data_view.num_columns() * 3) + 3); - - cudf::size_type size = outcol_view.size(); - const void* outcol_ptr = cudf::jit::get_data_ptr(outcol_view); - const void* outmsk_ptr = cudf::jit::get_data_ptr(outmsk_view); - kernel_args.insert(kernel_args.begin(), {&size, &outcol_ptr, &outmsk_ptr}); - - std::vector data_ptrs; - std::vector mask_ptrs; - std::vector offsets; - - data_ptrs.reserve(data_view.num_columns()); - mask_ptrs.reserve(data_view.num_columns()); - offsets.reserve(data_view.num_columns()); - - auto const iters = thrust::make_zip_iterator( - thrust::make_tuple(data_ptrs.begin(), mask_ptrs.begin(), offsets.begin())); - - std::for_each(iters, iters + data_view.num_columns(), [&](auto const& tuple_vals) { - kernel_args.push_back(&thrust::get<0>(tuple_vals)); - kernel_args.push_back(&thrust::get<1>(tuple_vals)); - kernel_args.push_back(&thrust::get<2>(tuple_vals)); - }); - - std::transform(data_view.begin(), data_view.end(), iters, [&](column_view const& col) { - return thrust::make_tuple(cudf::jit::get_data_ptr(col), col.null_mask(), col.offset()); - }); - - cudf::jit::get_program_cache(*transform_jit_masked_udf_kernel_cu_jit) - .get_kernel(generic_kernel_name, - {}, - {{"transform/jit/operation-udf.hpp", generic_cuda_source}}, - {"-arch=sm_."}) - ->configure_1d_max_occupancy(0, 0, 0, stream.value()) - ->launch(kernel_args.data()); -} - } // namespace jit } // namespace transformation @@ -165,24 +89,6 @@ std::unique_ptr transform(column_view const& input, return output; } -std::unique_ptr generalized_masked_op(table_view const& data_view, - std::string const& udf, - data_type output_type, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) -{ - std::unique_ptr output = make_fixed_width_column(output_type, data_view.num_rows()); - std::unique_ptr output_mask = - make_fixed_width_column(cudf::data_type{cudf::type_id::BOOL8}, data_view.num_rows()); - - transformation::jit::generalized_operation( - data_view, udf, output_type, *output, *output_mask, stream, mr); - - auto final_output_mask = cudf::bools_to_mask(*output_mask); - output.get()->set_null_mask(std::move(*(final_output_mask.first))); - return output; -} - } // namespace detail std::unique_ptr transform(column_view const& input, @@ -195,12 +101,4 @@ std::unique_ptr transform(column_view const& input, return detail::transform(input, unary_udf, output_type, is_ptx, rmm::cuda_stream_default, mr); } -std::unique_ptr generalized_masked_op(table_view const& data_view, - std::string const& udf, - data_type output_type, - rmm::mr::device_memory_resource* mr) -{ - return detail::generalized_masked_op(data_view, udf, output_type, rmm::cuda_stream_default, mr); -} - } // namespace cudf diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 98bade7e15f..c1c209b2413 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -164,7 +164,6 @@ ConfigureTest( BINARY_TEST binaryop/binop-verify-input-test.cpp binaryop/binop-null-test.cpp - binaryop/binop-integration-test.cpp binaryop/binop-compiled-test.cpp binaryop/binop-compiled-fixed_point-test.cpp binaryop/binop-generic-ptx-test.cpp diff --git a/cpp/tests/binaryop/binop-compiled-fixed_point-test.cpp b/cpp/tests/binaryop/binop-compiled-fixed_point-test.cpp index 7925f0dd618..5020fbf898b 100644 --- a/cpp/tests/binaryop/binop-compiled-fixed_point-test.cpp +++ b/cpp/tests/binaryop/binop-compiled-fixed_point-test.cpp @@ -684,4 +684,44 @@ TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOpThrows) cudf::logic_error); } +template +struct FixedPointTest_64_128_Reps : public cudf::test::BaseFixture { +}; + +using Decimal64And128Types = cudf::test::Types; +TYPED_TEST_SUITE(FixedPointTest_64_128_Reps, Decimal64And128Types); + +TYPED_TEST(FixedPointTest_64_128_Reps, FixedPoint_64_128_ComparisonTests) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepType = device_storage_type_t; + + for (auto const rhs_value : {10000000000000000, 100000000000000000}) { + auto const lhs = fp_wrapper{{33041, 97290, 36438, 25379, 48473}, scale_type{2}}; + auto const rhs = make_fixed_point_scalar(rhs_value, scale_type{0}); + auto const trues = wrapper{{1, 1, 1, 1, 1}}; + auto const falses = wrapper{{0, 0, 0, 0, 0}}; + auto const bool_type = cudf::data_type{type_id::BOOL8}; + + auto const a = cudf::binary_operation(lhs, *rhs, binary_operator::LESS, bool_type); + auto const b = cudf::binary_operation(lhs, *rhs, binary_operator::LESS_EQUAL, bool_type); + auto const c = cudf::binary_operation(lhs, *rhs, binary_operator::GREATER, bool_type); + auto const d = cudf::binary_operation(lhs, *rhs, binary_operator::GREATER_EQUAL, bool_type); + auto const e = cudf::binary_operation(*rhs, lhs, binary_operator::GREATER, bool_type); + auto const f = cudf::binary_operation(*rhs, lhs, binary_operator::GREATER_EQUAL, bool_type); + auto const g = cudf::binary_operation(*rhs, lhs, binary_operator::LESS, bool_type); + auto const h = cudf::binary_operation(*rhs, lhs, binary_operator::LESS_EQUAL, bool_type); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(a->view(), trues); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(b->view(), trues); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(c->view(), falses); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(d->view(), falses); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(e->view(), trues); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(f->view(), trues); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(g->view(), falses); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(h->view(), falses); + } +} + } // namespace cudf::test::binop diff --git a/cpp/tests/binaryop/binop-compiled-test.cpp b/cpp/tests/binaryop/binop-compiled-test.cpp index 7a9f6135bcd..37212c30d80 100644 --- a/cpp/tests/binaryop/binop-compiled-test.cpp +++ b/cpp/tests/binaryop/binop-compiled-test.cpp @@ -679,3 +679,5 @@ TEST_F(BinaryOperationCompiledTest_NullOpsString, NullMin_Vector_Vector) } } // namespace cudf::test::binop + +CUDF_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/binaryop/binop-generic-ptx-test.cpp b/cpp/tests/binaryop/binop-generic-ptx-test.cpp index 6e35bdac41c..f4407834786 100644 --- a/cpp/tests/binaryop/binop-generic-ptx-test.cpp +++ b/cpp/tests/binaryop/binop-generic-ptx-test.cpp @@ -20,12 +20,18 @@ #include #include +#include #include namespace cudf { namespace test { namespace binop { struct BinaryOperationGenericPTXTest : public BinaryOperationTest { + protected: + void SetUp() override + { + if (!can_do_runtime_jit()) { GTEST_SKIP() << "Skipping tests that require 11.5 runtime"; } + } }; TEST_F(BinaryOperationGenericPTXTest, CAdd_Vector_Vector_FP32_FP32_FP32) diff --git a/cpp/tests/binaryop/binop-integration-test.cpp b/cpp/tests/binaryop/binop-integration-test.cpp deleted file mode 100644 index 427a21512a3..00000000000 --- a/cpp/tests/binaryop/binop-integration-test.cpp +++ /dev/null @@ -1,2716 +0,0 @@ -/* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. - * - * Copyright 2018-2019 BlazingDB, Inc. - * Copyright 2018 Christian Noboa Mardini - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include -#include -#include "cudf/utilities/error.hpp" - -namespace cudf { -namespace test { -namespace binop { - -constexpr debug_output_level verbosity{debug_output_level::ALL_ERRORS}; - -struct BinaryOperationIntegrationTest : public BinaryOperationTest { -}; - -TEST_F(BinaryOperationIntegrationTest, Add_Scalar_Vector_SI32_FP32_SI64) -{ - using TypeOut = int32_t; - using TypeLhs = float; - using TypeRhs = int64_t; - - using ADD = cudf::library::operation::Add; - - auto lhs = make_random_wrapped_scalar(); - auto rhs = make_random_wrapped_column(10000); - - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::ADD, data_type(type_to_id())); - - ASSERT_BINOP(*out, lhs, rhs, ADD()); -} - -TEST_F(BinaryOperationIntegrationTest, Add_Vector_Vector_SI32_FP32_FP32) -{ - using TypeOut = int32_t; - using TypeLhs = float; - using TypeRhs = float; - - using ADD = cudf::library::operation::Add; - - auto lhs = make_random_wrapped_column(10000); - auto rhs = make_random_wrapped_column(10000); - - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::ADD, data_type(type_to_id())); - - ASSERT_BINOP(*out, lhs, rhs, ADD()); -} - -TEST_F(BinaryOperationIntegrationTest, Sub_Scalar_Vector_SI32_FP32_FP32) -{ - using TypeOut = int32_t; - using TypeLhs = float; - using TypeRhs = int64_t; - - using SUB = cudf::library::operation::Sub; - - auto lhs = make_random_wrapped_scalar(); - auto rhs = make_random_wrapped_column(10000); - - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::SUB, data_type(type_to_id())); - - ASSERT_BINOP(*out, lhs, rhs, SUB()); -} - -TEST_F(BinaryOperationIntegrationTest, Add_Vector_Scalar_SI08_SI16_SI32) -{ - using TypeOut = int8_t; - using TypeLhs = int16_t; - using TypeRhs = int32_t; - - using ADD = cudf::library::operation::Add; - - auto lhs = make_random_wrapped_column(100); - auto rhs = make_random_wrapped_scalar(); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::ADD, data_type(type_to_id())); - - ASSERT_BINOP(*out, lhs, rhs, ADD()); -} - -TEST_F(BinaryOperationIntegrationTest, Add_Vector_Vector_SI32_FP64_SI08) -{ - using TypeOut = int32_t; - using TypeLhs = double; - using TypeRhs = int8_t; - - using ADD = cudf::library::operation::Add; - - auto lhs = make_random_wrapped_column(100); - auto rhs = make_random_wrapped_column(100); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::ADD, data_type(type_to_id())); - - ASSERT_BINOP(*out, lhs, rhs, ADD()); -} - -TEST_F(BinaryOperationIntegrationTest, Sub_Vector_Vector_SI64) -{ - using TypeOut = int64_t; - using TypeLhs = int64_t; - using TypeRhs = int64_t; - - using SUB = cudf::library::operation::Sub; - - auto lhs = make_random_wrapped_column(100); - auto rhs = make_random_wrapped_column(100); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::SUB, data_type(type_to_id())); - - ASSERT_BINOP(*out, lhs, rhs, SUB()); -} - -TEST_F(BinaryOperationIntegrationTest, Sub_Vector_Scalar_SI64_FP64_SI32) -{ - using TypeOut = int64_t; - using TypeLhs = double; - using TypeRhs = int32_t; - - using SUB = cudf::library::operation::Sub; - - auto lhs = make_random_wrapped_column(10000); - auto rhs = make_random_wrapped_scalar(); - - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::SUB, data_type(type_to_id())); - - ASSERT_BINOP(*out, lhs, rhs, SUB()); -} - -TEST_F(BinaryOperationIntegrationTest, Sub_Vector_Vector_TimepointD_DurationS_TimepointUS) -{ - using TypeOut = cudf::timestamp_us; - using TypeLhs = cudf::timestamp_D; - using TypeRhs = cudf::duration_s; - - using SUB = cudf::library::operation::Sub; - - auto lhs = make_random_wrapped_column(100); - auto rhs = make_random_wrapped_column(100); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::SUB, data_type(type_to_id())); - - ASSERT_BINOP(*out, lhs, rhs, SUB()); -} - -TEST_F(BinaryOperationIntegrationTest, Sub_Vector_Scalar_TimepointD_TimepointS_DurationS) -{ - using TypeOut = cudf::duration_s; - using TypeLhs = cudf::timestamp_D; - using TypeRhs = cudf::timestamp_s; - - using SUB = cudf::library::operation::Sub; - - auto lhs = make_random_wrapped_column(100); - auto rhs = cudf::scalar_type_t(typename TypeRhs::duration{34}, true); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::SUB, data_type(type_to_id())); - - ASSERT_BINOP(*out, lhs, rhs, SUB()); -} - -TEST_F(BinaryOperationIntegrationTest, Sub_Scalar_Vector_DurationS_DurationD_DurationMS) -{ - using TypeOut = cudf::duration_ms; - using TypeLhs = cudf::duration_s; - using TypeRhs = cudf::duration_D; - - using SUB = cudf::library::operation::Sub; - - auto lhs = cudf::scalar_type_t(TypeLhs{-9}); - auto rhs = make_random_wrapped_column(100); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::SUB, data_type(type_to_id())); - - ASSERT_BINOP(*out, lhs, rhs, SUB()); -} - -TEST_F(BinaryOperationIntegrationTest, Mul_Vector_Vector_SI64) -{ - using TypeOut = int64_t; - using TypeLhs = int64_t; - using TypeRhs = int64_t; - - using MUL = cudf::library::operation::Mul; - - auto lhs = make_random_wrapped_column(100); - auto rhs = make_random_wrapped_column(100); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::MUL, data_type(type_to_id())); - - ASSERT_BINOP(*out, lhs, rhs, MUL()); -} - -TEST_F(BinaryOperationIntegrationTest, Mul_Vector_Vector_SI64_FP32_FP32) -{ - using TypeOut = int64_t; - using TypeLhs = float; - using TypeRhs = float; - - using MUL = cudf::library::operation::Mul; - - auto lhs = make_random_wrapped_column(100); - auto rhs = make_random_wrapped_column(100); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::MUL, data_type(type_to_id())); - - ASSERT_BINOP(*out, lhs, rhs, MUL()); -} - -TEST_F(BinaryOperationIntegrationTest, Mul_Scalar_Vector_SI32_DurationD_DurationMS) -{ - // Double the duration of days and convert the time interval to ms - using TypeOut = cudf::duration_ms; - using TypeLhs = int32_t; - using TypeRhs = cudf::duration_D; - - using MUL = cudf::library::operation::Mul; - - auto lhs = cudf::scalar_type_t(2); - auto rhs = make_random_wrapped_column(100); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::MUL, data_type(type_to_id())); - - ASSERT_BINOP(*out, lhs, rhs, MUL()); -} - -TEST_F(BinaryOperationIntegrationTest, Mul_Vector_Vector_DurationS_SI32_DurationNS) -{ - // Multiple each duration with some random value and promote the result - using TypeOut = cudf::duration_ns; - using TypeLhs = cudf::duration_s; - using TypeRhs = int32_t; - - using MUL = cudf::library::operation::Mul; - - auto lhs = make_random_wrapped_column(100); - auto rhs = make_random_wrapped_column(100); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::MUL, data_type(type_to_id())); - - ASSERT_BINOP(*out, lhs, rhs, MUL()); -} - -TEST_F(BinaryOperationIntegrationTest, Div_Vector_Vector_SI64) -{ - using TypeOut = int64_t; - using TypeLhs = int64_t; - using TypeRhs = int64_t; - - using DIV = cudf::library::operation::Div; - - auto lhs = make_random_wrapped_column(100); - auto rhs = make_random_wrapped_column(100); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::DIV, data_type(type_to_id())); - - ASSERT_BINOP(*out, lhs, rhs, DIV()); -} - -TEST_F(BinaryOperationIntegrationTest, Div_Vector_Vector_SI64_FP32_FP32) -{ - using TypeOut = int64_t; - using TypeLhs = float; - using TypeRhs = float; - - using DIV = cudf::library::operation::Div; - - auto lhs = make_random_wrapped_column(100); - auto rhs = make_random_wrapped_column(100); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::DIV, data_type(type_to_id())); - - ASSERT_BINOP(*out, lhs, rhs, DIV()); -} - -TEST_F(BinaryOperationIntegrationTest, Div_Scalar_Vector_DurationD_SI32_DurationS) -{ - using TypeOut = cudf::duration_s; - using TypeLhs = cudf::duration_D; - using TypeRhs = int64_t; - - using DIV = cudf::library::operation::Div; - - // Divide 2 days by an integer and convert the ticks to seconds - auto lhs = cudf::scalar_type_t(TypeLhs{2}); - auto rhs = make_random_wrapped_column(100); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::DIV, data_type(type_to_id())); - - ASSERT_BINOP(*out, lhs, rhs, DIV()); -} - -TEST_F(BinaryOperationIntegrationTest, Div_Vector_Vector_DurationD_DurationS_DurationMS) -{ - using TypeOut = int64_t; - using TypeLhs = cudf::duration_D; - using TypeRhs = cudf::duration_s; - - using DIV = cudf::library::operation::Div; - - auto lhs = make_random_wrapped_column(100); - auto rhs = make_random_wrapped_column(100); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::DIV, data_type(type_to_id())); - - ASSERT_BINOP(*out, lhs, rhs, DIV()); -} - -TEST_F(BinaryOperationIntegrationTest, TrueDiv_Vector_Vector_SI64) -{ - using TypeOut = int64_t; - using TypeLhs = int64_t; - using TypeRhs = int64_t; - - using TRUEDIV = cudf::library::operation::TrueDiv; - - auto lhs = make_random_wrapped_column(100); - auto rhs = make_random_wrapped_column(100); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::TRUE_DIV, data_type(type_to_id())); - - ASSERT_BINOP(*out, lhs, rhs, TRUEDIV()); -} - -TEST_F(BinaryOperationIntegrationTest, FloorDiv_Vector_Vector_SI64) -{ - using TypeOut = int64_t; - using TypeLhs = int64_t; - using TypeRhs = int64_t; - - using FLOORDIV = cudf::library::operation::FloorDiv; - - auto lhs = make_random_wrapped_column(100); - auto rhs = make_random_wrapped_column(100); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::FLOOR_DIV, data_type(type_to_id())); - - ASSERT_BINOP(*out, lhs, rhs, FLOORDIV()); -} - -TEST_F(BinaryOperationIntegrationTest, FloorDiv_Vector_Vector_SI64_FP32_FP32) -{ - using TypeOut = int64_t; - using TypeLhs = float; - using TypeRhs = float; - - using FLOORDIV = cudf::library::operation::FloorDiv; - - auto lhs = make_random_wrapped_column(100); - auto rhs = make_random_wrapped_column(100); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::FLOOR_DIV, data_type(type_to_id())); - - ASSERT_BINOP(*out, lhs, rhs, FLOORDIV()); -} - -TEST_F(BinaryOperationIntegrationTest, Mod_Vector_Vector_SI64) -{ - using TypeOut = int64_t; - using TypeLhs = int64_t; - using TypeRhs = int64_t; - - using MOD = cudf::library::operation::Mod; - - auto lhs = make_random_wrapped_column(100); - auto rhs = make_random_wrapped_column(100); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::MOD, data_type(type_to_id())); - - ASSERT_BINOP(*out, lhs, rhs, MOD()); -} - -TEST_F(BinaryOperationIntegrationTest, Mod_Vector_Vector_FP32) -{ - using TypeOut = float; - using TypeLhs = float; - using TypeRhs = float; - - using MOD = cudf::library::operation::Mod; - - auto lhs = make_random_wrapped_column(100); - auto rhs = make_random_wrapped_column(100); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::MOD, data_type(type_to_id())); - - ASSERT_BINOP(*out, lhs, rhs, MOD()); -} - -TEST_F(BinaryOperationIntegrationTest, Mod_Vector_Vector_SI64_FP32_FP32) -{ - using TypeOut = int64_t; - using TypeLhs = float; - using TypeRhs = float; - - using MOD = cudf::library::operation::Mod; - - auto lhs = make_random_wrapped_column(100); - auto rhs = make_random_wrapped_column(100); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::MOD, data_type(type_to_id())); - - ASSERT_BINOP(*out, lhs, rhs, MOD()); -} - -TEST_F(BinaryOperationIntegrationTest, Mod_Vector_Vector_FP64) -{ - using TypeOut = double; - using TypeLhs = double; - using TypeRhs = double; - - using MOD = cudf::library::operation::Mod; - - auto lhs = make_random_wrapped_column(100); - auto rhs = make_random_wrapped_column(100); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::MOD, data_type(type_to_id())); - - ASSERT_BINOP(*out, lhs, rhs, MOD()); -} - -TEST_F(BinaryOperationIntegrationTest, Mod_Vector_Scalar_DurationD_SI32_DurationUS) -{ - using TypeOut = cudf::duration_us; - using TypeLhs = cudf::duration_D; - using TypeRhs = int64_t; - - using MOD = cudf::library::operation::Mod; - - // Half the number of days and convert the remainder ticks to microseconds - auto lhs = make_random_wrapped_column(100); - auto rhs = cudf::scalar_type_t(2); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::MOD, data_type(type_to_id())); - - ASSERT_BINOP(*out, lhs, rhs, MOD()); -} - -TEST_F(BinaryOperationIntegrationTest, Mod_Vector_Scalar_DurationS_DurationMS_DurationUS) -{ - using TypeOut = cudf::duration_us; - using TypeLhs = cudf::duration_s; - using TypeRhs = cudf::duration_ms; - - using MOD = cudf::library::operation::Mod; - - auto lhs = make_random_wrapped_column(100); - auto rhs = make_random_wrapped_column(100); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::MOD, data_type(type_to_id())); - - ASSERT_BINOP(*out, lhs, rhs, MOD()); -} - -TEST_F(BinaryOperationIntegrationTest, Pow_Vector_Vector_FP64_SI64_SI64) -{ - using TypeOut = double; - using TypeLhs = int64_t; - using TypeRhs = int64_t; - - using POW = cudf::library::operation::Pow; - - auto lhs = make_random_wrapped_column(100); - auto rhs = make_random_wrapped_column(100); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::POW, data_type(type_to_id())); - - /** - * According to CUDA Programming Guide, 'E.1. Standard Functions', 'Table 7 - Double-Precision - * Mathematical Standard Library Functions with Maximum ULP Error' - * The pow function has 2 (full range) maximum ulp error. - */ - ASSERT_BINOP(*out, lhs, rhs, POW(), NearEqualComparator{2}); -} - -TEST_F(BinaryOperationIntegrationTest, Pow_Vector_Vector_FP32) -{ - using TypeOut = float; - using TypeLhs = float; - using TypeRhs = float; - - using POW = cudf::library::operation::Pow; - - auto lhs = make_random_wrapped_column(100); - auto rhs = make_random_wrapped_column(100); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::POW, data_type(type_to_id())); - /** - * According to CUDA Programming Guide, 'E.1. Standard Functions', 'Table 7 - Double-Precision - * Mathematical Standard Library Functions with Maximum ULP Error' - * The pow function has 2 (full range) maximum ulp error. - */ - ASSERT_BINOP(*out, lhs, rhs, POW(), NearEqualComparator{2}); -} - -TEST_F(BinaryOperationIntegrationTest, And_Vector_Vector_SI16_SI64_SI32) -{ - using TypeOut = int16_t; - using TypeLhs = int64_t; - using TypeRhs = int32_t; - - using AND = cudf::library::operation::BitwiseAnd; - - auto lhs = make_random_wrapped_column(100); - auto rhs = make_random_wrapped_column(100); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::BITWISE_AND, data_type(type_to_id())); - - ASSERT_BINOP(*out, lhs, rhs, AND()); -} - -TEST_F(BinaryOperationIntegrationTest, Or_Vector_Vector_SI64_SI16_SI32) -{ - using TypeOut = int64_t; - using TypeLhs = int16_t; - using TypeRhs = int32_t; - - using OR = cudf::library::operation::BitwiseOr; - - auto lhs = make_random_wrapped_column(100); - auto rhs = make_random_wrapped_column(100); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::BITWISE_OR, data_type(type_to_id())); - - ASSERT_BINOP(*out, lhs, rhs, OR()); -} - -TEST_F(BinaryOperationIntegrationTest, Xor_Vector_Vector_SI32_SI16_SI64) -{ - using TypeOut = int32_t; - using TypeLhs = int16_t; - using TypeRhs = int64_t; - - using XOR = cudf::library::operation::BitwiseXor; - - auto lhs = make_random_wrapped_column(100); - auto rhs = make_random_wrapped_column(100); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::BITWISE_XOR, data_type(type_to_id())); - - ASSERT_BINOP(*out, lhs, rhs, XOR()); -} - -TEST_F(BinaryOperationIntegrationTest, Logical_And_Vector_Vector_SI16_FP64_SI8) -{ - using TypeOut = int16_t; - using TypeLhs = double; - using TypeRhs = int8_t; - - using AND = cudf::library::operation::LogicalAnd; - - auto lhs = make_random_wrapped_column(100); - auto rhs = make_random_wrapped_column(100); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::LOGICAL_AND, data_type(type_to_id())); - - ASSERT_BINOP(*out, lhs, rhs, AND()); -} - -TEST_F(BinaryOperationIntegrationTest, Logical_Or_Vector_Vector_B8_SI16_SI64) -{ - using TypeOut = bool; - using TypeLhs = int16_t; - using TypeRhs = int64_t; - - using OR = cudf::library::operation::LogicalOr; - - auto lhs = make_random_wrapped_column(100); - auto rhs = make_random_wrapped_column(100); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::LOGICAL_OR, data_type(type_to_id())); - - ASSERT_BINOP(*out, lhs, rhs, OR()); -} - -TEST_F(BinaryOperationIntegrationTest, Less_Scalar_Vector_B8_TSS_TSS) -{ - using TypeOut = bool; - using TypeLhs = cudf::timestamp_s; - using TypeRhs = cudf::timestamp_s; - - using LESS = cudf::library::operation::Less; - - auto lhs = make_random_wrapped_scalar(); - auto rhs = make_random_wrapped_column(10); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::LESS, data_type(type_to_id())); - - ASSERT_BINOP(*out, lhs, rhs, LESS()); -} - -TEST_F(BinaryOperationIntegrationTest, Greater_Scalar_Vector_B8_TSMS_TSS) -{ - using TypeOut = bool; - using TypeLhs = cudf::timestamp_ms; - using TypeRhs = cudf::timestamp_s; - - using GREATER = cudf::library::operation::Greater; - - auto lhs = make_random_wrapped_scalar(); - auto rhs = make_random_wrapped_column(100); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::GREATER, data_type(type_to_id())); - - ASSERT_BINOP(*out, lhs, rhs, GREATER()); -} - -TEST_F(BinaryOperationIntegrationTest, Less_Vector_Vector_B8_TSS_TSS) -{ - using TypeOut = bool; - using TypeLhs = cudf::timestamp_s; - using TypeRhs = cudf::timestamp_s; - - using LESS = cudf::library::operation::Less; - - auto lhs = make_random_wrapped_column(10); - auto rhs = make_random_wrapped_column(10); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::LESS, data_type(type_to_id())); - - ASSERT_BINOP(*out, lhs, rhs, LESS()); -} - -TEST_F(BinaryOperationIntegrationTest, Greater_Vector_Vector_B8_TSMS_TSS) -{ - using TypeOut = bool; - using TypeLhs = cudf::timestamp_ms; - using TypeRhs = cudf::timestamp_s; - - using GREATER = cudf::library::operation::Greater; - - cudf::test::UniformRandomGenerator rand_gen(1, 10); - auto itr = cudf::detail::make_counting_transform_iterator( - 0, [&rand_gen](auto row) { return rand_gen.generate() * 1000; }); - - cudf::test::fixed_width_column_wrapper lhs( - itr, itr + 100, make_validity_iter()); - - auto rhs = make_random_wrapped_column(100); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::GREATER, data_type(type_to_id())); - - ASSERT_BINOP(*out, lhs, rhs, GREATER()); -} - -TEST_F(BinaryOperationIntegrationTest, Less_Scalar_Vector_B8_STR_STR) -{ - using TypeOut = bool; - using TypeLhs = std::string; - using TypeRhs = std::string; - - using LESS = cudf::library::operation::Less; - - auto lhs = cudf::string_scalar("eee"); - auto rhs = cudf::test::strings_column_wrapper({"ééé", "bbb", "aa", "", "", "bb", "eee"}); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::LESS, data_type(type_to_id())); - - ASSERT_BINOP(*out, lhs, rhs, LESS()); -} - -TEST_F(BinaryOperationIntegrationTest, Less_Vector_Scalar_B8_STR_STR) -{ - using TypeOut = bool; - using TypeLhs = std::string; - using TypeRhs = std::string; - - using LESS = cudf::library::operation::Less; - - auto lhs = cudf::test::strings_column_wrapper({"ééé", "bbb", "aa", "", "", "bb", "eee"}); - auto rhs = cudf::string_scalar("eee"); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::LESS, data_type(type_to_id())); - - ASSERT_BINOP(*out, lhs, rhs, LESS()); -} - -TEST_F(BinaryOperationIntegrationTest, Less_Vector_Vector_B8_STR_STR) -{ - using TypeOut = bool; - using TypeLhs = std::string; - using TypeRhs = std::string; - - using LESS = cudf::library::operation::Less; - - auto lhs = cudf::test::strings_column_wrapper({"eee", "bb", "", "", "aa", "bbb", "ééé"}); - auto rhs = cudf::test::strings_column_wrapper({"ééé", "bbb", "aa", "", "", "bb", "eee"}); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::LESS, data_type(type_to_id())); - - ASSERT_BINOP(*out, lhs, rhs, LESS()); -} - -TEST_F(BinaryOperationIntegrationTest, Greater_Vector_Vector_B8_STR_STR) -{ - using TypeOut = bool; - using TypeLhs = std::string; - using TypeRhs = std::string; - - using GREATER = cudf::library::operation::Greater; - - auto lhs = cudf::test::strings_column_wrapper({"eee", "bb", "", "", "aa", "bbb", "ééé"}); - auto rhs = cudf::test::strings_column_wrapper({"ééé", "bbb", "aa", "", "", "bb", "eee"}); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::GREATER, data_type(type_to_id())); - - ASSERT_BINOP(*out, lhs, rhs, GREATER()); -} - -TEST_F(BinaryOperationIntegrationTest, Equal_Vector_Vector_B8_STR_STR) -{ - using TypeOut = bool; - using TypeLhs = std::string; - using TypeRhs = std::string; - - using EQUAL = cudf::library::operation::Equal; - - auto lhs = cudf::test::strings_column_wrapper({"eee", "bb", "", "", "aa", "bbb", "ééé"}); - auto rhs = cudf::test::strings_column_wrapper({"ééé", "bbb", "aa", "", "", "bb", "eee"}); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::EQUAL, data_type(type_to_id())); - - ASSERT_BINOP(*out, lhs, rhs, EQUAL()); -} - -TEST_F(BinaryOperationIntegrationTest, Equal_Vector_Scalar_B8_STR_STR) -{ - using TypeOut = bool; - using TypeLhs = std::string; - using TypeRhs = std::string; - - using EQUAL = cudf::library::operation::Equal; - - auto rhs = cudf::test::strings_column_wrapper({"eee", "bb", "", "", "aa", "bbb", "ééé"}); - auto lhs = cudf::string_scalar(""); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::EQUAL, data_type(type_to_id())); - - ASSERT_BINOP(*out, lhs, rhs, EQUAL()); -} - -TEST_F(BinaryOperationIntegrationTest, LessEqual_Vector_Vector_B8_STR_STR) -{ - using TypeOut = bool; - using TypeLhs = std::string; - using TypeRhs = std::string; - - using LESS_EQUAL = cudf::library::operation::LessEqual; - - auto lhs = cudf::test::strings_column_wrapper({"eee", "bb", "", "", "aa", "bbb", "ééé"}); - auto rhs = cudf::test::strings_column_wrapper({"ééé", "bbb", "aa", "", "", "bb", "eee"}); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::LESS_EQUAL, data_type(type_to_id())); - - ASSERT_BINOP(*out, lhs, rhs, LESS_EQUAL()); -} - -TEST_F(BinaryOperationIntegrationTest, GreaterEqual_Vector_Vector_B8_STR_STR) -{ - using TypeOut = bool; - using TypeLhs = std::string; - using TypeRhs = std::string; - - using GREATER_EQUAL = cudf::library::operation::GreaterEqual; - - auto lhs = cudf::test::strings_column_wrapper({"eee", "bb", "", "", "aa", "bbb", "ééé"}); - auto rhs = cudf::test::strings_column_wrapper({"ééé", "bbb", "aa", "", "", "bb", "eee"}); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::GREATER_EQUAL, data_type(type_to_id())); - - ASSERT_BINOP(*out, lhs, rhs, GREATER_EQUAL()); -} - -TEST_F(BinaryOperationIntegrationTest, ShiftLeft_Vector_Vector_SI32) -{ - using TypeOut = int; - using TypeLhs = int; - using TypeRhs = int; - - using SHIFT_LEFT = cudf::library::operation::ShiftLeft; - - auto lhs = make_random_wrapped_column(100); - // this generates values in the range 1-10 which should be reasonable for the shift - auto rhs = make_random_wrapped_column(100); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::SHIFT_LEFT, data_type(type_to_id())); - - ASSERT_BINOP(*out, lhs, rhs, SHIFT_LEFT()); -} - -TEST_F(BinaryOperationIntegrationTest, ShiftLeft_Vector_Vector_SI32_SI16_SI64) -{ - using TypeOut = int; - using TypeLhs = int16_t; - using TypeRhs = int64_t; - - using SHIFT_LEFT = cudf::library::operation::ShiftLeft; - - auto lhs = make_random_wrapped_column(100); - // this generates values in the range 1-10 which should be reasonable for the shift - auto rhs = make_random_wrapped_column(100); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::SHIFT_LEFT, data_type(type_to_id())); - - ASSERT_BINOP(*out, lhs, rhs, SHIFT_LEFT()); -} - -TEST_F(BinaryOperationIntegrationTest, ShiftLeft_Scalar_Vector_SI32) -{ - using TypeOut = int; - using TypeLhs = int; - using TypeRhs = int; - - using SHIFT_LEFT = cudf::library::operation::ShiftLeft; - - auto lhs = make_random_wrapped_scalar(); - // this generates values in the range 1-10 which should be reasonable for the shift - auto rhs = make_random_wrapped_column(100); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::SHIFT_LEFT, data_type(type_to_id())); - - ASSERT_BINOP(*out, lhs, rhs, SHIFT_LEFT()); -} - -TEST_F(BinaryOperationIntegrationTest, ShiftLeft_Vector_Scalar_SI32) -{ - using TypeOut = int; - using TypeLhs = int; - using TypeRhs = int; - - using SHIFT_LEFT = cudf::library::operation::ShiftLeft; - - auto lhs = make_random_wrapped_column(100); - // this generates values in the range 1-10 which should be reasonable for the shift - auto rhs = make_random_wrapped_scalar(); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::SHIFT_LEFT, data_type(type_to_id())); - - ASSERT_BINOP(*out, lhs, rhs, SHIFT_LEFT()); -} - -TEST_F(BinaryOperationIntegrationTest, ShiftRight_Vector_Vector_SI32) -{ - using TypeOut = int; - using TypeLhs = int; - using TypeRhs = int; - - using SHIFT_RIGHT = cudf::library::operation::ShiftRight; - - auto lhs = make_random_wrapped_column(100); - // this generates values in the range 1-10 which should be reasonable for the shift - auto rhs = make_random_wrapped_column(100); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::SHIFT_RIGHT, data_type(type_to_id())); - - ASSERT_BINOP(*out, lhs, rhs, SHIFT_RIGHT()); -} - -TEST_F(BinaryOperationIntegrationTest, ShiftRight_Vector_Vector_SI32_SI16_SI64) -{ - using TypeOut = int; - using TypeLhs = int16_t; - using TypeRhs = int64_t; - - using SHIFT_RIGHT = cudf::library::operation::ShiftRight; - - auto lhs = make_random_wrapped_column(100); - // this generates values in the range 1-10 which should be reasonable for the shift - auto rhs = make_random_wrapped_column(100); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::SHIFT_RIGHT, data_type(type_to_id())); - - ASSERT_BINOP(*out, lhs, rhs, SHIFT_RIGHT()); -} - -TEST_F(BinaryOperationIntegrationTest, ShiftRight_Scalar_Vector_SI32) -{ - using TypeOut = int; - using TypeLhs = int; - using TypeRhs = int; - - using SHIFT_RIGHT = cudf::library::operation::ShiftRight; - - auto lhs = make_random_wrapped_scalar(); - // this generates values in the range 1-10 which should be reasonable for the shift - auto rhs = make_random_wrapped_column(100); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::SHIFT_RIGHT, data_type(type_to_id())); - - ASSERT_BINOP(*out, lhs, rhs, SHIFT_RIGHT()); -} - -TEST_F(BinaryOperationIntegrationTest, ShiftRight_Vector_Scalar_SI32) -{ - using TypeOut = int; - using TypeLhs = int; - using TypeRhs = int; - - using SHIFT_RIGHT = cudf::library::operation::ShiftRight; - - auto lhs = make_random_wrapped_column(100); - // this generates values in the range 1-10 which should be reasonable for the shift - auto rhs = make_random_wrapped_scalar(); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::SHIFT_RIGHT, data_type(type_to_id())); - - ASSERT_BINOP(*out, lhs, rhs, SHIFT_RIGHT()); -} - -TEST_F(BinaryOperationIntegrationTest, ShiftRightUnsigned_Vector_Vector_SI32) -{ - using TypeOut = int; - using TypeLhs = int; - using TypeRhs = int; - - int num_els = 4; - - TypeLhs lhs[] = {-8, 78, -93, 0, -INT_MAX}; - cudf::test::fixed_width_column_wrapper lhs_w(lhs, lhs + num_els); - - TypeRhs shift[] = {1, 1, 3, 2, 16}; - cudf::test::fixed_width_column_wrapper shift_w(shift, shift + num_els); - - TypeOut expected[] = {2147483644, 39, 536870900, 0, 32768}; - cudf::test::fixed_width_column_wrapper expected_w(expected, expected + num_els); - - auto out = cudf::jit::binary_operation( - lhs_w, shift_w, cudf::binary_operator::SHIFT_RIGHT_UNSIGNED, data_type(type_to_id())); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*out, expected_w); -} - -TEST_F(BinaryOperationIntegrationTest, ShiftRightUnsigned_Vector_Vector_SI32_SI16_SI64) -{ - using TypeOut = int; - using TypeLhs = int16_t; - using TypeRhs = int64_t; - - using SHIFT_RIGHT_UNSIGNED = - cudf::library::operation::ShiftRightUnsigned; - - auto lhs = make_random_wrapped_column(100); - // this generates values in the range 1-10 which should be reasonable for the shift - auto rhs = make_random_wrapped_column(100); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::SHIFT_RIGHT_UNSIGNED, data_type(type_to_id())); - - ASSERT_BINOP(*out, lhs, rhs, SHIFT_RIGHT_UNSIGNED()); -} - -TEST_F(BinaryOperationIntegrationTest, ShiftRightUnsigned_Scalar_Vector_SI32) -{ - using TypeOut = int; - using TypeLhs = int; - using TypeRhs = int; - - using SHIFT_RIGHT_UNSIGNED = - cudf::library::operation::ShiftRightUnsigned; - - auto lhs = make_random_wrapped_scalar(); - // this generates values in the range 1-10 which should be reasonable for the shift - auto rhs = make_random_wrapped_column(100); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::SHIFT_RIGHT_UNSIGNED, data_type(type_to_id())); - - ASSERT_BINOP(*out, lhs, rhs, SHIFT_RIGHT_UNSIGNED()); -} - -TEST_F(BinaryOperationIntegrationTest, ShiftRightUnsigned_Vector_Scalar_SI32) -{ - using TypeOut = int; - using TypeLhs = int; - using TypeRhs = int; - - using SHIFT_RIGHT_UNSIGNED = - cudf::library::operation::ShiftRightUnsigned; - - auto lhs = make_random_wrapped_column(100); - // this generates values in the range 1-10 which should be reasonable for the shift - auto rhs = make_random_wrapped_scalar(); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::SHIFT_RIGHT_UNSIGNED, data_type(type_to_id())); - - ASSERT_BINOP(*out, lhs, rhs, SHIFT_RIGHT_UNSIGNED()); -} - -TEST_F(BinaryOperationIntegrationTest, LogBase_Vector_Scalar_SI32_SI32_float) -{ - using TypeOut = int; // Cast the result value to int for easy comparison - using TypeLhs = int32_t; // All input types get converted into doubles - using TypeRhs = float; - - using LOG_BASE = cudf::library::operation::LogBase; - - // Make sure there are no zeros. The log value is purposefully cast to int for easy comparison - auto elements = cudf::detail::make_counting_transform_iterator(1, [](auto i) { return i + 10; }); - fixed_width_column_wrapper lhs(elements, elements + 100); - // Find log to the base 10 - auto rhs = numeric_scalar(10); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::LOG_BASE, data_type(type_to_id())); - - ASSERT_BINOP(*out, lhs, rhs, LOG_BASE()); -} - -TEST_F(BinaryOperationIntegrationTest, LogBase_Scalar_Vector_float_SI32) -{ - using TypeOut = float; - using TypeLhs = int; - using TypeRhs = int; // Integral types promoted to double - - using LOG_BASE = cudf::library::operation::LogBase; - - // Make sure there are no zeros - auto elements = cudf::detail::make_counting_transform_iterator(1, [](auto i) { return i + 30; }); - fixed_width_column_wrapper rhs(elements, elements + 100); - // Find log to the base 2 - auto lhs = numeric_scalar(2); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::LOG_BASE, data_type(type_to_id())); - - ASSERT_BINOP(*out, lhs, rhs, LOG_BASE()); -} - -TEST_F(BinaryOperationIntegrationTest, LogBase_Vector_Vector_double_SI64_SI32) -{ - using TypeOut = double; - using TypeLhs = int64_t; - using TypeRhs = int32_t; // Integral types promoted to double - - using LOG_BASE = cudf::library::operation::LogBase; - - // Make sure there are no zeros - auto elements = - cudf::detail::make_counting_transform_iterator(1, [](auto i) { return std::pow(2, i); }); - fixed_width_column_wrapper lhs(elements, elements + 50); - - // Find log to the base 7 - auto rhs_elements = cudf::detail::make_counting_transform_iterator(0, [](auto) { return 7; }); - fixed_width_column_wrapper rhs(rhs_elements, rhs_elements + 50); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::LOG_BASE, data_type(type_to_id())); - - ASSERT_BINOP(*out, lhs, rhs, LOG_BASE()); -} - -TEST_F(BinaryOperationIntegrationTest, NullAwareEqual_Vector_Scalar_B8_SI32_SI32) -{ - using TypeOut = bool; - using TypeLhs = int32_t; - using TypeRhs = int32_t; - - auto int_col = - fixed_width_column_wrapper{{999, -37, 0, INT32_MAX}, {true, true, true, false}}; - auto int_scalar = cudf::scalar_type_t(999); - - auto op_col = cudf::jit::binary_operation( - int_col, int_scalar, cudf::binary_operator::NULL_EQUALS, data_type(type_to_id())); - - // Every row has a value - CUDF_TEST_EXPECT_COLUMNS_EQUAL( - *op_col, - fixed_width_column_wrapper{{true, false, false, false}, {true, true, true, true}}, - verbosity); -} - -TEST_F(BinaryOperationIntegrationTest, NullAwareEqual_Vector_ScalarInvalid_B8_SI32_SI32) -{ - using TypeOut = bool; - using TypeLhs = int32_t; - using TypeRhs = int32_t; - - auto int_col = fixed_width_column_wrapper{{-INT32_MAX, -37, 0, 499, 44, INT32_MAX}, - {false, true, false, true, true, false}}; - auto int_scalar = cudf::scalar_type_t(999); - int_scalar.set_valid_async(false); - - auto op_col = cudf::jit::binary_operation( - int_col, int_scalar, cudf::binary_operator::NULL_EQUALS, data_type(type_to_id())); - - // Every row has a value - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*op_col, - fixed_width_column_wrapper{ - {true, false, true, false, false, true}, - {true, true, true, true, true, true}, - }, - verbosity); -} - -TEST_F(BinaryOperationIntegrationTest, NullAwareEqual_Scalar_Vector_B8_tsD_tsD) -{ - using TypeOut = bool; - using TypeLhs = cudf::timestamp_D; - using TypeRhs = cudf::timestamp_D; - - cudf::test::fixed_width_column_wrapper ts_col{ - { - 999, // Random nullable field - 0, // This is the UNIX epoch - 1970-01-01 - 44376, // 2091-07-01 00:00:00 GMT - 47695, // 2100-08-02 00:00:00 GMT - 3, // Random nullable field - 66068, // 2150-11-21 00:00:00 GMT - 22270, // 2030-12-22 00:00:00 GMT - 111, // Random nullable field - }, - {false, true, true, true, false, true, true, false}}; - auto ts_scalar = cudf::scalar_type_t(typename TypeRhs::duration{44376}, true); - - auto op_col = cudf::jit::binary_operation( - ts_scalar, ts_col, cudf::binary_operator::NULL_EQUALS, data_type(type_to_id())); - - // Every row has a value - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*op_col, - fixed_width_column_wrapper{ - {false, false, true, false, false, false, false, false}, - {true, true, true, true, true, true, true, true}, - }, - verbosity); -} - -TEST_F(BinaryOperationIntegrationTest, NullAwareEqual_Vector_Scalar_B8_string_string_EmptyString) -{ - using TypeOut = bool; - - auto str_col = cudf::test::strings_column_wrapper({"eee", "bb", "", "", "aa", "bbb", "ééé"}, - {true, false, true, true, true, false, true}); - // Empty string - cudf::string_scalar str_scalar(""); - - auto op_col = cudf::jit::binary_operation( - str_col, str_scalar, cudf::binary_operator::NULL_EQUALS, data_type(type_to_id())); - - // Every row has a value - CUDF_TEST_EXPECT_COLUMNS_EQUAL( - *op_col, - fixed_width_column_wrapper{{false, false, false, true, false, false, false}, - {true, true, true, true, true, true, true}}, - verbosity); -} - -TEST_F(BinaryOperationIntegrationTest, NullAwareEqual_Scalar_Vector_B8_string_string_ValidString) -{ - using TypeOut = bool; - - auto str_col = cudf::test::strings_column_wrapper({"eee", "bb", "", "", "aa", "bbb", "ééé"}, - {true, false, true, true, true, false, true}); - // Match a valid string - cudf::string_scalar str_scalar(""); - - auto op_col = cudf::jit::binary_operation( - str_scalar, str_col, cudf::binary_operator::NULL_EQUALS, data_type(type_to_id())); - - // Every row has a value - CUDF_TEST_EXPECT_COLUMNS_EQUAL( - *op_col, - fixed_width_column_wrapper{{false, false, true, false, false, false, false}, - {true, true, true, true, true, true, true}}, - verbosity); -} - -TEST_F(BinaryOperationIntegrationTest, NullAwareEqual_Vector_Scalar_B8_string_string_NoMatch) -{ - using TypeOut = bool; - - // Try with non nullable input - auto str_col = - cudf::test::strings_column_wrapper({"eee", "bb", "", "", "aa", "bbb", "ééé"}); - // Matching a string that isn't present - cudf::string_scalar str_scalar("foo"); - - auto op_col = cudf::jit::binary_operation( - str_col, str_scalar, cudf::binary_operator::NULL_EQUALS, data_type(type_to_id())); - - // Every row has a value - CUDF_TEST_EXPECT_COLUMNS_EQUAL( - *op_col, - fixed_width_column_wrapper{{false, false, false, false, false, false, false}, - {true, true, true, true, true, true, true}}, - verbosity); -} - -TEST_F(BinaryOperationIntegrationTest, NullAwareEqual_Scalar_Vector_B8_string_string_NullNonNull) -{ - using TypeOut = bool; - - // Try with all invalid input - auto str_col = cudf::test::strings_column_wrapper({"eee", "bb", "", "", "aa", "bbb", "ééé"}, - {true, true, true, true, true, true, true}); - // Matching a scalar that is invalid - cudf::string_scalar str_scalar("foo"); - str_scalar.set_valid_async(false); - - auto op_col = cudf::jit::binary_operation( - str_scalar, str_col, cudf::binary_operator::NULL_EQUALS, data_type(type_to_id())); - - // Every row has a value - CUDF_TEST_EXPECT_COLUMNS_EQUAL( - *op_col, - fixed_width_column_wrapper{{false, false, false, false, false, false, false}, - {true, true, true, true, true, true, true}}, - verbosity); -} - -TEST_F(BinaryOperationIntegrationTest, NullAwareEqual_Vector_Scalar_B8_string_string_NullNonNull) -{ - using TypeOut = bool; - - // Try with all invalid input - auto str_col = - cudf::test::strings_column_wrapper({"eee", "bb", "", "", "aa", "bbb", "ééé"}, - {false, false, false, false, false, false, false}); - // Matching a scalar that is valid - cudf::string_scalar str_scalar("foo"); - - auto op_col = cudf::jit::binary_operation( - str_scalar, str_col, cudf::binary_operator::NULL_EQUALS, data_type(type_to_id())); - - // Every row has a value - CUDF_TEST_EXPECT_COLUMNS_EQUAL( - *op_col, - fixed_width_column_wrapper{{false, false, false, false, false, false, false}, - {true, true, true, true, true, true, true}}, - verbosity); -} - -TEST_F(BinaryOperationIntegrationTest, NullAwareEqual_Scalar_Vector_B8_string_string_NullNull) -{ - using TypeOut = bool; - - // Try with all invalid input - auto str_col = - cudf::test::strings_column_wrapper({"eee", "bb", "", "", "aa", "bbb", "ééé"}, - {false, false, false, false, false, false, false}); - // Matching a scalar that is invalid - cudf::string_scalar str_scalar("foo"); - str_scalar.set_valid_async(false); - - auto op_col = cudf::jit::binary_operation( - str_scalar, str_col, cudf::binary_operator::NULL_EQUALS, data_type(type_to_id())); - - // Every row has a value - CUDF_TEST_EXPECT_COLUMNS_EQUAL( - *op_col, - fixed_width_column_wrapper{{true, true, true, true, true, true, true}, - {true, true, true, true, true, true, true}}, - verbosity); -} - -TEST_F(BinaryOperationIntegrationTest, NullAwareEqual_Scalar_Vector_B8_string_string_MatchInvalid) -{ - using TypeOut = bool; - - auto str_col = cudf::test::strings_column_wrapper({"eee", "bb", "", "", "aa", "bbb", "ééé"}, - {true, false, true, true, true, false, true}); - // Matching an invalid string - cudf::string_scalar str_scalar("bb"); - - auto op_col = cudf::jit::binary_operation( - str_scalar, str_col, cudf::binary_operator::NULL_EQUALS, data_type(type_to_id())); - - // Every row has a value - CUDF_TEST_EXPECT_COLUMNS_EQUAL( - *op_col, - fixed_width_column_wrapper{{false, false, false, false, false, false, false}, - {true, true, true, true, true, true, true}}, - verbosity); -} - -TEST_F(BinaryOperationIntegrationTest, NullAwareEqual_Vector_InvalidScalar_B8_string_string) -{ - using TypeOut = bool; - - auto str_col = cudf::test::strings_column_wrapper({"eee", "bb", "", "", "aa", "bbb", "ééé"}, - {true, false, true, true, true, false, true}); - // Valid string invalidated - cudf::string_scalar str_scalar("bb"); - str_scalar.set_valid_async(false); - - auto op_col = cudf::jit::binary_operation( - str_col, str_scalar, cudf::binary_operator::NULL_EQUALS, data_type(type_to_id())); - - // Every row has a value - CUDF_TEST_EXPECT_COLUMNS_EQUAL( - *op_col, - fixed_width_column_wrapper{{false, true, false, false, false, true, false}, - {true, true, true, true, true, true, true}}, - verbosity); -} - -TEST_F(BinaryOperationIntegrationTest, NullAwareEqual_Vector_Vector_B8_tsD_tsD_NonNullable) -{ - using TypeOut = bool; - using TypeLhs = cudf::timestamp_D; - using TypeRhs = cudf::timestamp_D; - - cudf::test::fixed_width_column_wrapper lhs_col{ - 0, // This is the UNIX epoch - 1970-01-01 - 44376, // 2091-07-01 00:00:00 GMT - 47695, // 2100-08-02 00:00:00 GMT - 66068, // 2150-11-21 00:00:00 GMT - 22270, // 2030-12-22 00:00:00 GMT - }; - ASSERT_EQ(column_view{lhs_col}.nullable(), false); - cudf::test::fixed_width_column_wrapper rhs_col{ - 0, // This is the UNIX epoch - 1970-01-01 - 44380, // Mismatched - 47695, // 2100-08-02 00:00:00 GMT - 66070, // Mismatched - 22270, // 2030-12-22 00:00:00 GMT - }; - - auto op_col = cudf::jit::binary_operation( - lhs_col, rhs_col, cudf::binary_operator::NULL_EQUALS, data_type(type_to_id())); - - // Every row has a value - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*op_col, - fixed_width_column_wrapper{ - {true, false, true, false, true}, - {true, true, true, true, true}, - }, - verbosity); -} - -// Both vectors with mixed validity -TEST_F(BinaryOperationIntegrationTest, NullAwareEqual_Vector_Vector_B8_string_string_MixMix) -{ - using TypeOut = bool; - - auto lhs_col = - cudf::test::strings_column_wrapper({"eee", "invalid", "", "", "aa", "invalid", "ééé"}, - {true, false, true, true, true, false, true}); - auto rhs_col = - cudf::test::strings_column_wrapper({"foo", "valid", "", "", "invalid", "inv", "ééé"}, - {true, true, true, true, false, false, true}); - - auto op_col = cudf::jit::binary_operation( - lhs_col, rhs_col, cudf::binary_operator::NULL_EQUALS, data_type(type_to_id())); - - // Every row has a value - CUDF_TEST_EXPECT_COLUMNS_EQUAL( - *op_col, - fixed_width_column_wrapper{{false, false, true, true, false, true, true}, - {true, true, true, true, true, true, true}}, - verbosity); -} - -TEST_F(BinaryOperationIntegrationTest, NullAwareEqual_Vector_Vector_B8_string_string_MixValid) -{ - using TypeOut = bool; - - auto lhs_col = - cudf::test::strings_column_wrapper({"eee", "invalid", "", "", "aa", "invalid", "ééé"}, - {true, false, true, true, true, false, true}); - auto rhs_col = - cudf::test::strings_column_wrapper({"eee", "invalid", "", "", "aa", "invalid", "ééé"}); - - auto op_col = cudf::jit::binary_operation( - lhs_col, rhs_col, cudf::binary_operator::NULL_EQUALS, data_type(type_to_id())); - - // Every row has a value - CUDF_TEST_EXPECT_COLUMNS_EQUAL( - *op_col, - fixed_width_column_wrapper{{true, false, true, true, true, false, true}, - {true, true, true, true, true, true, true}}, - verbosity); -} - -TEST_F(BinaryOperationIntegrationTest, NullAwareEqual_Vector_Vector_B8_string_string_MixInvalid) -{ - using TypeOut = bool; - - auto lhs_col = - cudf::test::strings_column_wrapper({"eee", "invalid", "", "", "aa", "invalid", "ééé"}, - {true, false, true, true, true, false, true}); - auto rhs_col = - cudf::test::strings_column_wrapper({"eee", "invalid", "", "", "aa", "invalid", "ééé"}, - {false, false, false, false, false, false, false}); - - auto op_col = cudf::jit::binary_operation( - lhs_col, rhs_col, cudf::binary_operator::NULL_EQUALS, data_type(type_to_id())); - - // Every row has a value - CUDF_TEST_EXPECT_COLUMNS_EQUAL( - *op_col, - fixed_width_column_wrapper{{false, true, false, false, false, true, false}, - {true, true, true, true, true, true, true}}, - verbosity); -} - -TEST_F(BinaryOperationIntegrationTest, NullAwareEqual_Vector_Vector_B8_string_string_ValidValid) -{ - using TypeOut = bool; - - auto lhs_col = - cudf::test::strings_column_wrapper({"eee", "invalid", "", "", "aa", "invalid", "ééé"}); - auto rhs_col = - cudf::test::strings_column_wrapper({"eee", "invalid", "", "", "aa", "invalid", "ééé"}); - - auto op_col = cudf::jit::binary_operation( - lhs_col, rhs_col, cudf::binary_operator::NULL_EQUALS, data_type(type_to_id())); - - // Every row has a value - CUDF_TEST_EXPECT_COLUMNS_EQUAL( - *op_col, - fixed_width_column_wrapper{{true, true, true, true, true, true, true}, - {true, true, true, true, true, true, true}}, - verbosity); -} - -TEST_F(BinaryOperationIntegrationTest, NullAwareEqual_Vector_Vector_B8_string_string_ValidInvalid) -{ - using TypeOut = bool; - - auto lhs_col = - cudf::test::strings_column_wrapper({"eee", "invalid", "", "", "aa", "invalid", "ééé"}); - auto rhs_col = - cudf::test::strings_column_wrapper({"eee", "invalid", "", "", "aa", "invalid", "ééé"}, - {false, false, false, false, false, false, false}); - - auto op_col = cudf::jit::binary_operation( - lhs_col, rhs_col, cudf::binary_operator::NULL_EQUALS, data_type(type_to_id())); - - // Every row has a value - CUDF_TEST_EXPECT_COLUMNS_EQUAL( - *op_col, - fixed_width_column_wrapper{{false, false, false, false, false, false, false}, - {true, true, true, true, true, true, true}}, - verbosity); -} - -TEST_F(BinaryOperationIntegrationTest, NullAwareEqual_Vector_Vector_B8_string_string_InvalidInvalid) -{ - using TypeOut = bool; - - auto lhs_col = - cudf::test::strings_column_wrapper({"eee", "invalid", "", "", "aa", "invalid", "ééé"}, - {false, false, false, false, false, false, false}); - auto rhs_col = - cudf::test::strings_column_wrapper({"eee", "invalid", "", "", "aa", "invalid", "ééé"}, - {false, false, false, false, false, false, false}); - - auto op_col = cudf::jit::binary_operation( - lhs_col, rhs_col, cudf::binary_operator::NULL_EQUALS, data_type(type_to_id())); - - // Every row has a value - CUDF_TEST_EXPECT_COLUMNS_EQUAL( - *op_col, - fixed_width_column_wrapper{{true, true, true, true, true, true, true}, - {true, true, true, true, true, true, true}}, - verbosity); -} - -TEST_F(BinaryOperationIntegrationTest, NullAwareEqual_Vector_VectorAllInvalid_B8_SI32_SI32) -{ - using TypeOut = bool; - using TypeLhs = int32_t; - - auto lhs_col = fixed_width_column_wrapper{{-INT32_MAX, -37, 0, 499, 44, INT32_MAX}, - {false, false, false, false, false, false}}; - auto rhs_col = fixed_width_column_wrapper{{-47, 37, 12, 99, 4, -INT32_MAX}, - {false, false, false, false, false, false}}; - - auto op_col = cudf::jit::binary_operation( - lhs_col, rhs_col, cudf::binary_operator::NULL_EQUALS, data_type(type_to_id())); - - // Every row has a value - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*op_col, - fixed_width_column_wrapper{ - {true, true, true, true, true, true}, - {true, true, true, true, true, true}, - }, - verbosity); -} - -TEST_F(BinaryOperationIntegrationTest, NullAwareMin_Vector_Scalar_SI64_SI32_SI8) -{ - using TypeOut = int64_t; - using TypeLhs = int32_t; - using TypeRhs = int8_t; - - auto int_col = fixed_width_column_wrapper{ - {999, -37, 0, INT32_MAX}, - }; - auto int_scalar = cudf::scalar_type_t(77); - - auto op_col = cudf::jit::binary_operation( - int_col, int_scalar, cudf::binary_operator::NULL_MIN, data_type(type_to_id())); - - // Every row has a value - CUDF_TEST_EXPECT_COLUMNS_EQUAL( - *op_col, - fixed_width_column_wrapper{{77, -37, 0, 77}, {true, true, true, true}}, - verbosity); -} - -TEST_F(BinaryOperationIntegrationTest, NullAwareMax_Scalar_Vector_FP64_SI32_SI64) -{ - using TypeOut = double; - using TypeLhs = int32_t; - using TypeRhs = int64_t; - - auto int_col = - fixed_width_column_wrapper{{999, -37, 0, INT32_MAX, -INT32_MAX, -4379, 55}, - {false, true, false, true, false, true, false}}; - auto int_scalar = cudf::scalar_type_t(INT32_MAX); - - auto op_col = cudf::jit::binary_operation( - int_scalar, int_col, cudf::binary_operator::NULL_MAX, data_type(type_to_id())); - - // Every row has a value - CUDF_TEST_EXPECT_COLUMNS_EQUAL( - *op_col, - fixed_width_column_wrapper{ - {INT32_MAX, INT32_MAX, INT32_MAX, INT32_MAX, INT32_MAX, INT32_MAX, INT32_MAX}, - {true, true, true, true, true, true, true}}, - verbosity); -} - -TEST_F(BinaryOperationIntegrationTest, NullAwareMin_Vector_Scalar_SI64_SI32_FP32) -{ - using TypeOut = int64_t; - using TypeLhs = int32_t; - using TypeRhs = float; - - auto int_col = - fixed_width_column_wrapper{{999, -37, 0, INT32_MAX, -INT32_MAX, -4379, 55}, - {false, true, false, true, false, true, false}}; - auto float_scalar = cudf::scalar_type_t(-3.14f); - float_scalar.set_valid_async(false); - - auto op_col = cudf::jit::binary_operation( - int_col, float_scalar, cudf::binary_operator::NULL_MIN, data_type(type_to_id())); - - // Every row has a value - CUDF_TEST_EXPECT_COLUMNS_EQUAL( - *op_col, - fixed_width_column_wrapper{{0, -37, 0, INT32_MAX, 0, -4379, 0}, - {false, true, false, true, false, true, false}}, - verbosity); -} - -TEST_F(BinaryOperationIntegrationTest, NullAwareMax_Scalar_Vector_SI8_SI8_FP32) -{ - using TypeOut = int8_t; - using TypeLhs = int8_t; - using TypeRhs = float; - - auto int_col = fixed_width_column_wrapper{ - {9, -37, 0, 32, -47, -4, 55}, {false, false, false, false, false, false, false}}; - auto float_scalar = cudf::scalar_type_t(-3.14f); - float_scalar.set_valid_async(false); - - auto op_col = cudf::jit::binary_operation( - float_scalar, int_col, cudf::binary_operator::NULL_MAX, data_type(type_to_id())); - - // Every row has a value - CUDF_TEST_EXPECT_COLUMNS_EQUAL( - *op_col, - fixed_width_column_wrapper{{0, 0, 0, 0, 0, 0, 0}, - {false, false, false, false, false, false, false}}, - verbosity); -} - -TEST_F(BinaryOperationIntegrationTest, NullAwareMin_Vector_Vector_SI64_SI32_SI8) -{ - using TypeOut = int64_t; - using TypeLhs = int32_t; - - auto int_col = - fixed_width_column_wrapper{{999, -37, 0, INT32_MAX, -INT32_MAX, -4379, 55}, - {false, false, false, false, false, false, false}}; - auto another_int_col = fixed_width_column_wrapper{ - {9, -37, 0, 32, -47, -4, 55}, {false, false, false, false, false, false, false}}; - - auto op_col = cudf::jit::binary_operation( - int_col, another_int_col, cudf::binary_operator::NULL_MIN, data_type(type_to_id())); - - // Every row has a value - CUDF_TEST_EXPECT_COLUMNS_EQUAL( - *op_col, - fixed_width_column_wrapper{{0, 0, 0, 0, 0, 0, 0}, - {false, false, false, false, false, false, false}}, - verbosity); -} - -TEST_F(BinaryOperationIntegrationTest, NullAwareMax_Vector_Vector_SI64_SI32_SI8) -{ - using TypeOut = int64_t; - using TypeLhs = int32_t; - - auto int_col = fixed_width_column_wrapper{ - {999, -37, 0, INT32_MAX, -INT32_MAX, -4379, 55}, {true, true, true, true, true, true, true}}; - auto another_int_col = fixed_width_column_wrapper{ - {9, -37, 0, 32, -47, -4, 55}, {false, false, false, false, false, false, false}}; - - auto op_col = cudf::jit::binary_operation( - int_col, another_int_col, cudf::binary_operator::NULL_MAX, data_type(type_to_id())); - - // Every row has a value - CUDF_TEST_EXPECT_COLUMNS_EQUAL( - *op_col, - fixed_width_column_wrapper{{999, -37, 0, INT32_MAX, -INT32_MAX, -4379, 55}, - {true, true, true, true, true, true, true}}, - verbosity); -} - -TEST_F(BinaryOperationIntegrationTest, NullAwareMin_Vector_Vector_tsD_tsD_tsD) -{ - cudf::test::fixed_width_column_wrapper lhs_col{ - { - 0, // This is the UNIX epoch - 1970-01-01 - 44376, // 2091-07-01 00:00:00 GMT - 47695, // 2100-08-02 00:00:00 GMT - 66068, // 2150-11-21 00:00:00 GMT - 22270, // 2030-12-22 00:00:00 GMT - }, - {true, false, true, true, false}}; - cudf::test::fixed_width_column_wrapper rhs_col{ - { - 0, // This is the UNIX epoch - 1970-01-01 - 44380, // Mismatched - 47695, // 2100-08-02 00:00:00 GMT - 66070, // Mismatched - 22270, // 2030-12-22 00:00:00 GMT - }, - {false, true, true, true, false}}; - - auto op_col = cudf::jit::binary_operation( - lhs_col, rhs_col, cudf::binary_operator::NULL_MIN, data_type(type_to_id())); - - // Every row has a value - CUDF_TEST_EXPECT_COLUMNS_EQUAL( - *op_col, - fixed_width_column_wrapper{ - {0, 44380, 47695, 66068, 0}, {true, true, true, true, false}}, - verbosity); -} - -TEST_F(BinaryOperationIntegrationTest, NullAwareMax_Vector_Vector_SI32_SI64_SI8) -{ - using TypeOut = int32_t; - using TypeLhs = int64_t; - - auto int_col = - fixed_width_column_wrapper{{999, -37, 0, INT32_MAX, -INT32_MAX, -4379, 55}, - {false, false, false, false, false, false, false}}; - auto another_int_col = fixed_width_column_wrapper{ - {9, -37, 0, 32, -47, -4, 55}, {true, false, true, false, true, false, true}}; - - auto op_col = cudf::jit::binary_operation( - int_col, another_int_col, cudf::binary_operator::NULL_MAX, data_type(type_to_id())); - - // Every row has a value - CUDF_TEST_EXPECT_COLUMNS_EQUAL( - *op_col, - fixed_width_column_wrapper{{9, 0, 0, 0, -47, 0, 55}, - {true, false, true, false, true, false, true}}, - verbosity); -} - -TEST_F(BinaryOperationIntegrationTest, NullAwareMax_Vector_Vector_string_string_string_Mix) -{ - auto lhs_col = cudf::test::strings_column_wrapper( - {"eee", "invalid", "", "", "", "", "ééé", "foo", "bar", "abc", "def"}, - {false, true, true, false, true, true, true, false, false, true, true}); - auto rhs_col = cudf::test::strings_column_wrapper( - {"eee", "goo", "", "", "", "", "ééé", "bar", "foo", "def", "abc"}, - {false, true, true, true, false, true, true, false, false, true, true}); - - auto op_col = cudf::jit::binary_operation( - lhs_col, rhs_col, cudf::binary_operator::NULL_MAX, data_type{type_id::STRING}); - - auto exp_col = cudf::test::strings_column_wrapper( - {"", "invalid", "", "", "", "", "ééé", "", "", "def", "def"}, - {false, true, true, true, true, true, true, false, false, true, true}); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*op_col, exp_col, verbosity); -} - -TEST_F(BinaryOperationIntegrationTest, NullAwareMin_Vector_Scalar_string_string_string_Mix) -{ - auto lhs_col = cudf::test::strings_column_wrapper( - {"eee", "invalid", "", "", "", "", "ééé", "foo", "bar", "abc", "foo"}, - {false, true, true, false, true, true, true, false, false, true, true}); - cudf::string_scalar str_scalar("foo"); - - // Returns a non-nullable column as all elements are valid - it will have the scalar - // value at the very least - auto op_col = cudf::jit::binary_operation( - lhs_col, str_scalar, cudf::binary_operator::NULL_MIN, data_type{type_id::STRING}); - - auto exp_col = cudf::test::strings_column_wrapper( - {"foo", "foo", "", "foo", "", "", "foo", "foo", "foo", "abc", "foo"}); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*op_col, exp_col, verbosity); -} - -TEST_F(BinaryOperationIntegrationTest, NullAwareMax_Scalar_Vector_string_string_string_Mix) -{ - auto lhs_col = cudf::test::strings_column_wrapper( - {"eee", "invalid", "", "", "", "", "ééé", "foo", "bar", "abc", "foo"}, - {false, true, true, false, true, true, true, false, false, true, true}); - cudf::string_scalar str_scalar("foo"); - str_scalar.set_valid_async(false); - - // Returns the lhs_col - auto op_col = cudf::jit::binary_operation( - str_scalar, lhs_col, cudf::binary_operator::NULL_MAX, data_type{type_id::STRING}); - - auto exp_col = cudf::test::strings_column_wrapper( - {"", "invalid", "", "", "", "", "ééé", "", "", "abc", "foo"}, - {false, true, true, false, true, true, true, false, false, true, true}); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*op_col, exp_col, verbosity); -} - -TEST_F(BinaryOperationIntegrationTest, CastAdd_Vector_Vector_SI32_float_float) -{ - using TypeOut = int32_t; - using TypeLhs = float; - using TypeRhs = float; // Integral types promoted to double - - using ADD = cudf::library::operation::Add; - - auto lhs = cudf::test::fixed_width_column_wrapper{1.3f, 1.6f}; - auto rhs = cudf::test::fixed_width_column_wrapper{1.3f, 1.6f}; - auto expected = cudf::test::fixed_width_column_wrapper{2, 3}; - - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::ADD, data_type(type_to_id())); - - ASSERT_BINOP(*out, lhs, rhs, ADD()); -} - -TEST_F(BinaryOperationIntegrationTest, Add_Vector_Vector_TimepointD_DurationS_TimepointUS) -{ - using TypeOut = cudf::timestamp_us; - using TypeLhs = cudf::timestamp_D; - using TypeRhs = cudf::duration_s; - - using ADD = cudf::library::operation::Add; - - auto lhs = make_random_wrapped_column(100); - auto rhs = make_random_wrapped_column(100); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::ADD, data_type(type_to_id())); - - ASSERT_BINOP(*out, lhs, rhs, ADD()); -} - -TEST_F(BinaryOperationIntegrationTest, Add_Vector_Scalar_DurationD_TimepointS_TimepointS) -{ - using TypeOut = cudf::timestamp_s; - using TypeLhs = cudf::duration_D; - using TypeRhs = cudf::timestamp_s; - - using ADD = cudf::library::operation::Add; - - auto lhs = make_random_wrapped_column(100); - auto rhs = cudf::scalar_type_t(typename TypeRhs::duration{34}, true); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::ADD, data_type(type_to_id())); - - ASSERT_BINOP(*out, lhs, rhs, ADD()); -} - -TEST_F(BinaryOperationIntegrationTest, Add_Scalar_Vector_DurationS_DurationD_DurationMS) -{ - using TypeOut = cudf::duration_ms; - using TypeLhs = cudf::duration_s; - using TypeRhs = cudf::duration_D; - - using ADD = cudf::library::operation::Add; - - auto lhs = cudf::scalar_type_t(TypeLhs{-9}); - auto rhs = make_random_wrapped_column(100); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::ADD, data_type(type_to_id())); - - ASSERT_BINOP(*out, lhs, rhs, ADD()); -} - -TEST_F(BinaryOperationIntegrationTest, ShiftRightUnsigned_Scalar_Vector_SI64_SI64_SI32) -{ - using TypeOut = int64_t; - using TypeLhs = int64_t; - using TypeRhs = int32_t; - - using SHIFT_RIGHT_UNSIGNED = - cudf::library::operation::ShiftRightUnsigned; - - auto lhs = cudf::scalar_type_t(-12); - // this generates values in the range 1-10 which should be reasonable for the shift - auto rhs = make_random_wrapped_column(100); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::SHIFT_RIGHT_UNSIGNED, data_type(type_to_id())); - - ASSERT_BINOP(*out, lhs, rhs, SHIFT_RIGHT_UNSIGNED()); -} - -TEST_F(BinaryOperationIntegrationTest, PMod_Scalar_Vector_FP32) -{ - using TypeOut = float; - using TypeLhs = float; - using TypeRhs = float; - - auto lhs = cudf::scalar_type_t(-86099.68377); - auto rhs = fixed_width_column_wrapper{{90770.74881, -15456.4335, 32213.22119}}; - - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::PMOD, data_type(type_to_id())); - - auto expected_result = - fixed_width_column_wrapper{{4671.0625, -8817.51953125, 10539.974609375}}; - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*out, expected_result); -} - -TEST_F(BinaryOperationIntegrationTest, PMod_Vector_Scalar_FP64) -{ - using TypeOut = double; - using TypeLhs = double; - using TypeRhs = double; - - auto lhs = fixed_width_column_wrapper{{90770.74881, -15456.4335, 32213.22119}}; - auto rhs = cudf::scalar_type_t(-86099.68377); - - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::PMOD, data_type(type_to_id())); - - auto expected_result = fixed_width_column_wrapper{ - {4671.0650400000013178, -15456.433499999999185, 32213.221190000000206}}; - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*out, expected_result); -} - -TEST_F(BinaryOperationIntegrationTest, PMod_Vector_Vector_FP64_FP32_FP64) -{ - using TypeOut = double; - using TypeLhs = float; - using TypeRhs = double; - - auto lhs = fixed_width_column_wrapper{ - {24854.55893, 79946.87288, -86099.68377, -86099.68377, 1.0, 1.0, -1.0, -1.0}}; - auto rhs = fixed_width_column_wrapper{{90770.74881, - -15456.4335, - 36223.96138, - -15456.4335, - 2.1336193413893147E307, - -2.1336193413893147E307, - 2.1336193413893147E307, - -2.1336193413893147E307}}; - - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::PMOD, data_type(type_to_id())); - - auto expected_result = fixed_width_column_wrapper{{24854.55859375, - 2664.7075000000040745, - 22572.196640000001935, - -8817.5200000000040745, - 1.0, - 1.0, - 0.0, - 0.0}}; - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*out, expected_result); -} - -TEST_F(BinaryOperationIntegrationTest, PMod_Vector_Vector_FP64_SI32_SI64) -{ - using TypeOut = double; - using TypeLhs = int32_t; - using TypeRhs = int64_t; - - using PMOD = cudf::library::operation::PMod; - - auto lhs = make_random_wrapped_column(1000); - auto rhs = make_random_wrapped_column(1000); - - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::PMOD, data_type(type_to_id())); - - ASSERT_BINOP(*out, lhs, rhs, PMOD()); -} - -TEST_F(BinaryOperationIntegrationTest, PMod_Vector_Vector_SI64_SI32_SI64) -{ - using TypeOut = int64_t; - using TypeLhs = int32_t; - using TypeRhs = int64_t; - - using PMOD = cudf::library::operation::PMod; - - auto lhs = make_random_wrapped_column(1000); - auto rhs = make_random_wrapped_column(1000); - - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::PMOD, data_type(type_to_id())); - - ASSERT_BINOP(*out, lhs, rhs, PMOD()); -} - -TEST_F(BinaryOperationIntegrationTest, PMod_Vector_Vector_SI64_FP64_FP64) -{ - using TypeOut = int64_t; - using TypeLhs = double; - using TypeRhs = double; - - using PMOD = cudf::library::operation::PMod; - - auto lhs = make_random_wrapped_column(1000); - auto rhs = make_random_wrapped_column(1000); - - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::PMOD, data_type(type_to_id())); - - ASSERT_BINOP(*out, lhs, rhs, PMOD()); -} - -TEST_F(BinaryOperationIntegrationTest, ATan2_Scalar_Vector_FP32) -{ - using TypeOut = float; - using TypeLhs = float; - using TypeRhs = float; - - using ATAN2 = cudf::library::operation::ATan2; - - auto lhs = make_random_wrapped_scalar(); - auto rhs = make_random_wrapped_column(10000); - - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::ATAN2, data_type(type_to_id())); - - // atan2 has a max ULP error of 2 per CUDA programming guide - ASSERT_BINOP(*out, lhs, rhs, ATAN2(), NearEqualComparator{2}); -} - -TEST_F(BinaryOperationIntegrationTest, ATan2_Vector_Scalar_FP64) -{ - using TypeOut = double; - using TypeLhs = double; - using TypeRhs = double; - - using ATAN2 = cudf::library::operation::ATan2; - - auto lhs = make_random_wrapped_column(10000); - auto rhs = make_random_wrapped_scalar(); - - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::ATAN2, data_type(type_to_id())); - - // atan2 has a max ULP error of 2 per CUDA programming guide - ASSERT_BINOP(*out, lhs, rhs, ATAN2(), NearEqualComparator{2}); -} - -TEST_F(BinaryOperationIntegrationTest, ATan2_Vector_Vector_FP64_FP32_FP64) -{ - using TypeOut = double; - using TypeLhs = float; - using TypeRhs = double; - - using ATAN2 = cudf::library::operation::ATan2; - - auto lhs = make_random_wrapped_column(10000); - auto rhs = make_random_wrapped_column(10000); - - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::ATAN2, data_type(type_to_id())); - - // atan2 has a max ULP error of 2 per CUDA programming guide - ASSERT_BINOP(*out, lhs, rhs, ATAN2(), NearEqualComparator{2}); -} - -TEST_F(BinaryOperationIntegrationTest, ATan2_Vector_Vector_FP64_SI32_SI64) -{ - using TypeOut = double; - using TypeLhs = int32_t; - using TypeRhs = int64_t; - - using ATAN2 = cudf::library::operation::ATan2; - - auto lhs = make_random_wrapped_column(10000); - auto rhs = make_random_wrapped_column(10000); - - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::ATAN2, data_type(type_to_id())); - - // atan2 has a max ULP error of 2 per CUDA programming guide - ASSERT_BINOP(*out, lhs, rhs, ATAN2(), NearEqualComparator{2}); -} - -template -struct FixedPointTestAllReps : public cudf::test::BaseFixture { -}; - -template -using wrapper = cudf::test::fixed_width_column_wrapper; -TYPED_TEST_SUITE(FixedPointTestAllReps, cudf::test::FixedPointTypes); - -TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpAdd) -{ - using namespace numeric; - using decimalXX = TypeParam; - - auto const sz = std::size_t{1000}; - - auto begin = cudf::detail::make_counting_transform_iterator(1, [](auto i) { - return decimalXX{i, scale_type{0}}; - }); - auto const vec1 = std::vector(begin, begin + sz); - auto const vec2 = std::vector(sz, decimalXX{2, scale_type{0}}); - auto expected = std::vector(sz); - - std::transform(std::cbegin(vec1), - std::cend(vec1), - std::cbegin(vec2), - std::begin(expected), - std::plus()); - - auto const lhs = wrapper(vec1.begin(), vec1.end()); - auto const rhs = wrapper(vec2.begin(), vec2.end()); - auto const expected_col = wrapper(expected.begin(), expected.end()); - - auto const type = - cudf::binary_operation_fixed_point_output_type(cudf::binary_operator::ADD, - static_cast(lhs).type(), - static_cast(rhs).type()); - auto const result = cudf::binary_operation(lhs, rhs, cudf::binary_operator::ADD, type); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_col, result->view()); -} - -TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpMultiply) -{ - using namespace numeric; - using decimalXX = TypeParam; - - auto const sz = std::size_t{1000}; - - auto begin = cudf::detail::make_counting_transform_iterator(1, [](auto i) { - return decimalXX{i, scale_type{0}}; - }); - auto const vec1 = std::vector(begin, begin + sz); - auto const vec2 = std::vector(sz, decimalXX{2, scale_type{0}}); - auto expected = std::vector(sz); - - std::transform(std::cbegin(vec1), - std::cend(vec1), - std::cbegin(vec2), - std::begin(expected), - std::multiplies()); - - auto const lhs = wrapper(vec1.begin(), vec1.end()); - auto const rhs = wrapper(vec2.begin(), vec2.end()); - auto const expected_col = wrapper(expected.begin(), expected.end()); - - auto const type = - cudf::binary_operation_fixed_point_output_type(cudf::binary_operator::MUL, - static_cast(lhs).type(), - static_cast(rhs).type()); - auto const result = cudf::binary_operation(lhs, rhs, cudf::binary_operator::MUL, type); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_col, result->view()); -} - -template -using fp_wrapper = cudf::test::fixed_point_column_wrapper; - -TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpMultiply2) -{ - using namespace numeric; - using decimalXX = TypeParam; - using RepType = device_storage_type_t; - - auto const lhs = fp_wrapper{{11, 22, 33, 44, 55}, scale_type{-1}}; - auto const rhs = fp_wrapper{{10, 10, 10, 10, 10}, scale_type{0}}; - auto const expected = fp_wrapper{{110, 220, 330, 440, 550}, scale_type{-1}}; - - auto const type = - cudf::binary_operation_fixed_point_output_type(cudf::binary_operator::MUL, - static_cast(lhs).type(), - static_cast(rhs).type()); - auto const result = cudf::binary_operation(lhs, rhs, cudf::binary_operator::MUL, type); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); -} - -TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpDiv) -{ - using namespace numeric; - using decimalXX = TypeParam; - using RepType = device_storage_type_t; - - auto const lhs = fp_wrapper{{10, 30, 50, 70}, scale_type{-1}}; - auto const rhs = fp_wrapper{{4, 4, 4, 4}, scale_type{0}}; - auto const expected = fp_wrapper{{2, 7, 12, 17}, scale_type{-1}}; - - auto const type = - cudf::binary_operation_fixed_point_output_type(cudf::binary_operator::DIV, - static_cast(lhs).type(), - static_cast(rhs).type()); - auto const result = cudf::binary_operation(lhs, rhs, cudf::binary_operator::DIV, type); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); -} - -TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpDiv2) -{ - using namespace numeric; - using decimalXX = TypeParam; - using RepType = device_storage_type_t; - - auto const lhs = fp_wrapper{{10, 30, 50, 70}, scale_type{-1}}; - auto const rhs = fp_wrapper{{4, 4, 4, 4}, scale_type{-2}}; - auto const expected = fp_wrapper{{2, 7, 12, 17}, scale_type{1}}; - - auto const type = - cudf::binary_operation_fixed_point_output_type(cudf::binary_operator::DIV, - static_cast(lhs).type(), - static_cast(rhs).type()); - auto const result = cudf::binary_operation(lhs, rhs, cudf::binary_operator::DIV, type); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); -} - -TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpDiv3) -{ - using namespace numeric; - using decimalXX = TypeParam; - using RepType = device_storage_type_t; - - auto const lhs = fp_wrapper{{10, 30, 50, 70}, scale_type{-1}}; - auto const rhs = make_fixed_point_scalar(12, scale_type{-1}); - auto const expected = fp_wrapper{{0, 2, 4, 5}, scale_type{0}}; - - auto const type = cudf::binary_operation_fixed_point_output_type( - cudf::binary_operator::DIV, static_cast(lhs).type(), rhs->type()); - auto const result = cudf::binary_operation(lhs, *rhs, cudf::binary_operator::DIV, type); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); -} - -TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpDiv4) -{ - using namespace numeric; - using decimalXX = TypeParam; - using RepType = device_storage_type_t; - - auto begin = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i * 11; }); - auto result_begin = - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i * 11) / 12; }); - auto const lhs = fp_wrapper(begin, begin + 1000, scale_type{-1}); - auto const rhs = make_fixed_point_scalar(12, scale_type{-1}); - auto const expected = fp_wrapper(result_begin, result_begin + 1000, scale_type{0}); - - auto const type = cudf::binary_operation_fixed_point_output_type( - cudf::binary_operator::DIV, static_cast(lhs).type(), rhs->type()); - auto const result = cudf::binary_operation(lhs, *rhs, cudf::binary_operator::DIV, type); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); -} - -TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpAdd2) -{ - using namespace numeric; - using decimalXX = TypeParam; - using RepType = device_storage_type_t; - - auto const lhs = fp_wrapper{{11, 22, 33, 44, 55}, scale_type{-1}}; - auto const rhs = fp_wrapper{{100, 200, 300, 400, 500}, scale_type{-2}}; - auto const expected = fp_wrapper{{210, 420, 630, 840, 1050}, scale_type{-2}}; - - auto const type = - cudf::binary_operation_fixed_point_output_type(cudf::binary_operator::ADD, - static_cast(lhs).type(), - static_cast(rhs).type()); - auto const result = cudf::binary_operation(lhs, rhs, cudf::binary_operator::ADD, type); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); -} - -TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpAdd3) -{ - using namespace numeric; - using decimalXX = TypeParam; - using RepType = device_storage_type_t; - - auto const lhs = fp_wrapper{{1100, 2200, 3300, 4400, 5500}, scale_type{-3}}; - auto const rhs = fp_wrapper{{100, 200, 300, 400, 500}, scale_type{-2}}; - auto const expected = fp_wrapper{{2100, 4200, 6300, 8400, 10500}, scale_type{-3}}; - - auto const type = - cudf::binary_operation_fixed_point_output_type(cudf::binary_operator::ADD, - static_cast(lhs).type(), - static_cast(rhs).type()); - auto const result = cudf::binary_operation(lhs, rhs, cudf::binary_operator::ADD, type); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); -} - -TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpAdd4) -{ - using namespace numeric; - using decimalXX = TypeParam; - using RepType = device_storage_type_t; - - auto const lhs = fp_wrapper{{11, 22, 33, 44, 55}, scale_type{-1}}; - auto const rhs = make_fixed_point_scalar(100, scale_type{-2}); - auto const expected = fp_wrapper{{210, 320, 430, 540, 650}, scale_type{-2}}; - - auto const type = cudf::binary_operation_fixed_point_output_type( - cudf::binary_operator::ADD, static_cast(lhs).type(), rhs->type()); - auto const result = cudf::binary_operation(lhs, *rhs, cudf::binary_operator::ADD, type); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); -} - -TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpAdd5) -{ - using namespace numeric; - using decimalXX = TypeParam; - using RepType = device_storage_type_t; - - auto const lhs = make_fixed_point_scalar(100, scale_type{-2}); - auto const rhs = fp_wrapper{{11, 22, 33, 44, 55}, scale_type{-1}}; - auto const expected = fp_wrapper{{210, 320, 430, 540, 650}, scale_type{-2}}; - - auto const type = cudf::binary_operation_fixed_point_output_type( - cudf::binary_operator::ADD, lhs->type(), static_cast(rhs).type()); - auto const result = cudf::binary_operation(*lhs, rhs, cudf::binary_operator::ADD, type); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); -} - -TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpAdd6) -{ - using namespace numeric; - using decimalXX = TypeParam; - using RepType = device_storage_type_t; - - auto const col = fp_wrapper{{3, 4, 5, 6, 7, 8}, scale_type{0}}; - - auto const expected1 = fp_wrapper{{6, 8, 10, 12, 14, 16}, scale_type{0}}; - auto const expected2 = fp_wrapper{{0, 0, 1, 1, 1, 1}, scale_type{1}}; - auto const type1 = cudf::data_type{cudf::type_to_id(), 0}; - auto const type2 = cudf::data_type{cudf::type_to_id(), 1}; - auto const result1 = cudf::binary_operation(col, col, cudf::binary_operator::ADD, type1); - auto const result2 = cudf::binary_operation(col, col, cudf::binary_operator::ADD, type2); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected1, result1->view()); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected2, result2->view()); -} - -TYPED_TEST(FixedPointTestAllReps, FixedPointCast) -{ - using namespace numeric; - using decimalXX = TypeParam; - using RepType = device_storage_type_t; - - auto const col = fp_wrapper{{6, 8, 10, 12, 14, 16}, scale_type{0}}; - auto const expected = fp_wrapper{{0, 0, 1, 1, 1, 1}, scale_type{1}}; - auto const type = cudf::data_type{cudf::type_to_id(), 1}; - auto const result = cudf::cast(col, type); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); -} - -TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpMultiplyScalar) -{ - using namespace numeric; - using decimalXX = TypeParam; - using RepType = device_storage_type_t; - - auto const lhs = fp_wrapper{{11, 22, 33, 44, 55}, scale_type{-1}}; - auto const rhs = make_fixed_point_scalar(100, scale_type{-1}); - auto const expected = fp_wrapper{{1100, 2200, 3300, 4400, 5500}, scale_type{-2}}; - - auto const type = cudf::binary_operation_fixed_point_output_type( - cudf::binary_operator::MUL, static_cast(lhs).type(), rhs->type()); - auto const result = cudf::binary_operation(lhs, *rhs, cudf::binary_operator::MUL, type); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); -} - -TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpSimplePlus) -{ - using namespace numeric; - using decimalXX = TypeParam; - using RepType = device_storage_type_t; - - auto const lhs = fp_wrapper{{150, 200}, scale_type{-2}}; - auto const rhs = fp_wrapper{{2250, 1005}, scale_type{-3}}; - auto const expected = fp_wrapper{{3750, 3005}, scale_type{-3}}; - - auto const type = - cudf::binary_operation_fixed_point_output_type(cudf::binary_operator::ADD, - static_cast(lhs).type(), - static_cast(rhs).type()); - auto const result = cudf::binary_operation(lhs, rhs, cudf::binary_operator::ADD, type); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); -} - -TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpEqualSimple) -{ - using namespace numeric; - using decimalXX = TypeParam; - using RepType = device_storage_type_t; - - auto const trues = std::vector(4, true); - auto const col1 = fp_wrapper{{1, 2, 3, 4}, scale_type{0}}; - auto const col2 = fp_wrapper{{100, 200, 300, 400}, scale_type{-2}}; - auto const expected = wrapper(trues.begin(), trues.end()); - - auto const result = - cudf::binary_operation(col1, col2, binary_operator::EQUAL, cudf::data_type{type_id::BOOL8}); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); -} - -TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpEqualSimpleScale0) -{ - using namespace numeric; - using decimalXX = TypeParam; - using RepType = device_storage_type_t; - - auto const trues = std::vector(4, true); - auto const col = fp_wrapper{{1, 2, 3, 4}, scale_type{0}}; - auto const expected = wrapper(trues.begin(), trues.end()); - - auto const result = - cudf::binary_operation(col, col, binary_operator::EQUAL, cudf::data_type{type_id::BOOL8}); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); -} - -TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpEqualSimpleScale0Null) -{ - using namespace numeric; - using decimalXX = TypeParam; - using RepType = device_storage_type_t; - - auto const col1 = fp_wrapper{{1, 2, 3, 4}, {1, 1, 1, 1}, scale_type{0}}; - auto const col2 = fp_wrapper{{1, 2, 3, 4}, {0, 0, 0, 0}, scale_type{0}}; - auto const expected = wrapper{{0, 1, 0, 1}, {0, 0, 0, 0}}; - - auto const result = - cudf::binary_operation(col1, col2, binary_operator::EQUAL, cudf::data_type{type_id::BOOL8}); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); -} - -TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpEqualSimpleScale2Null) -{ - using namespace numeric; - using decimalXX = TypeParam; - using RepType = device_storage_type_t; - - auto const col1 = fp_wrapper{{1, 2, 3, 4}, {1, 1, 1, 1}, scale_type{-2}}; - auto const col2 = fp_wrapper{{1, 2, 3, 4}, {0, 0, 0, 0}, scale_type{0}}; - auto const expected = wrapper{{0, 1, 0, 1}, {0, 0, 0, 0}}; - - auto const result = - cudf::binary_operation(col1, col2, binary_operator::EQUAL, cudf::data_type{type_id::BOOL8}); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); -} - -TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpEqualLessGreater) -{ - using namespace numeric; - using decimalXX = TypeParam; - using RepType = device_storage_type_t; - - auto const sz = std::size_t{1000}; - - // TESTING binary op ADD - - auto begin = cudf::detail::make_counting_transform_iterator(1, [](auto e) { return e * 1000; }); - auto const vec1 = std::vector(begin, begin + sz); - auto const vec2 = std::vector(sz, 0); - - auto const iota_3 = fp_wrapper(vec1.begin(), vec1.end(), scale_type{-3}); - auto const zeros_3 = fp_wrapper(vec2.begin(), vec2.end(), scale_type{-1}); - - auto const type = - cudf::binary_operation_fixed_point_output_type(cudf::binary_operator::ADD, - static_cast(iota_3).type(), - static_cast(zeros_3).type()); - auto const iota_3_after_add = cudf::binary_operation(zeros_3, iota_3, binary_operator::ADD, type); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(iota_3, iota_3_after_add->view()); - - // TESTING binary op EQUAL, LESS, GREATER - - auto const trues = std::vector(sz, true); - auto const true_col = wrapper(trues.begin(), trues.end()); - - auto const btype = cudf::data_type{type_id::BOOL8}; - auto const equal_result = - cudf::binary_operation(iota_3, iota_3_after_add->view(), binary_operator::EQUAL, btype); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(true_col, equal_result->view()); - - auto const less_result = - cudf::binary_operation(zeros_3, iota_3_after_add->view(), binary_operator::LESS, btype); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(true_col, less_result->view()); - - auto const greater_result = - cudf::binary_operation(iota_3_after_add->view(), zeros_3, binary_operator::GREATER, btype); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(true_col, greater_result->view()); -} - -TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpNullMaxSimple) -{ - using namespace numeric; - using decimalXX = TypeParam; - using RepType = device_storage_type_t; - - auto const trues = std::vector(4, true); - auto const col1 = fp_wrapper{{40, 30, 20, 10, 0}, {1, 0, 1, 1, 0}, scale_type{-2}}; - auto const col2 = fp_wrapper{{10, 20, 30, 40, 0}, {1, 1, 1, 0, 0}, scale_type{-2}}; - auto const expected = fp_wrapper{{40, 20, 30, 10, 0}, {1, 1, 1, 1, 0}, scale_type{-2}}; - - auto const type = - cudf::binary_operation_fixed_point_output_type(cudf::binary_operator::NULL_MAX, - static_cast(col1).type(), - static_cast(col2).type()); - auto const result = cudf::binary_operation(col1, col2, binary_operator::NULL_MAX, type); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); -} - -TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpNullMinSimple) -{ - using namespace numeric; - using decimalXX = TypeParam; - using RepType = device_storage_type_t; - - auto const trues = std::vector(4, true); - auto const col1 = fp_wrapper{{40, 30, 20, 10, 0}, {1, 1, 1, 0, 0}, scale_type{-1}}; - auto const col2 = fp_wrapper{{10, 20, 30, 40, 0}, {1, 0, 1, 1, 0}, scale_type{-1}}; - auto const expected = fp_wrapper{{10, 30, 20, 40, 0}, {1, 1, 1, 1, 0}, scale_type{-1}}; - - auto const type = - cudf::binary_operation_fixed_point_output_type(cudf::binary_operator::NULL_MIN, - static_cast(col1).type(), - static_cast(col2).type()); - auto const result = cudf::binary_operation(col1, col2, binary_operator::NULL_MIN, type); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); -} - -TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpNullEqualsSimple) -{ - using namespace numeric; - using decimalXX = TypeParam; - using RepType = device_storage_type_t; - - auto const trues = std::vector(4, true); - auto const col1 = fp_wrapper{{400, 300, 300, 100}, {1, 1, 1, 0}, scale_type{-2}}; - auto const col2 = fp_wrapper{{40, 200, 20, 400}, {1, 0, 1, 0}, scale_type{-1}}; - auto const expected = wrapper{{1, 0, 0, 1}, {1, 1, 1, 1}}; - - auto const result = cudf::binary_operation( - col1, col2, binary_operator::NULL_EQUALS, cudf::data_type{type_id::BOOL8}); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); -} - -TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOp_Div) -{ - using namespace numeric; - using decimalXX = TypeParam; - using RepType = device_storage_type_t; - - auto const lhs = fp_wrapper{{100, 300, 500, 700}, scale_type{-2}}; - auto const rhs = fp_wrapper{{4, 4, 4, 4}, scale_type{0}}; - auto const expected = fp_wrapper{{25, 75, 125, 175}, scale_type{-2}}; - - auto const type = data_type{type_to_id(), -2}; - auto const result = cudf::binary_operation(lhs, rhs, cudf::binary_operator::DIV, type); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); -} - -TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOp_Div2) -{ - using namespace numeric; - using decimalXX = TypeParam; - using RepType = device_storage_type_t; - - auto const lhs = fp_wrapper{{100000, 300000, 500000, 700000}, scale_type{-3}}; - auto const rhs = fp_wrapper{{20, 20, 20, 20}, scale_type{-1}}; - auto const expected = fp_wrapper{{5000, 15000, 25000, 35000}, scale_type{-2}}; - - auto const type = data_type{type_to_id(), -2}; - auto const result = cudf::binary_operation(lhs, rhs, cudf::binary_operator::DIV, type); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); -} - -TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOp_Div3) -{ - using namespace numeric; - using decimalXX = TypeParam; - using RepType = device_storage_type_t; - - auto const lhs = fp_wrapper{{10000, 30000, 50000, 70000}, scale_type{-2}}; - auto const rhs = fp_wrapper{{3, 9, 3, 3}, scale_type{0}}; - auto const expected = fp_wrapper{{3333, 3333, 16666, 23333}, scale_type{-2}}; - - auto const type = data_type{type_to_id(), -2}; - auto const result = cudf::binary_operation(lhs, rhs, cudf::binary_operator::DIV, type); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); -} - -TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOp_Div4) -{ - using namespace numeric; - using decimalXX = TypeParam; - using RepType = device_storage_type_t; - - auto const lhs = fp_wrapper{{10, 30, 50, 70}, scale_type{1}}; - auto const rhs = make_fixed_point_scalar(3, scale_type{0}); - auto const expected = fp_wrapper{{3, 10, 16, 23}, scale_type{1}}; - - auto const type = data_type{type_to_id(), 1}; - auto const result = cudf::binary_operation(lhs, *rhs, cudf::binary_operator::DIV, type); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); -} - -TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOp_Div6) -{ - using namespace numeric; - using decimalXX = TypeParam; - using RepType = device_storage_type_t; - - auto const lhs = make_fixed_point_scalar(3000, scale_type{-3}); - auto const rhs = fp_wrapper{{10, 30, 50, 70}, scale_type{-1}}; - - auto const expected = fp_wrapper{{300, 100, 60, 42}, scale_type{-2}}; - - auto const type = data_type{type_to_id(), -2}; - auto const result = cudf::binary_operation(*lhs, rhs, cudf::binary_operator::DIV, type); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); -} - -TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOp_Div7) -{ - using namespace numeric; - using decimalXX = TypeParam; - using RepType = device_storage_type_t; - - auto const lhs = make_fixed_point_scalar(1200, scale_type{0}); - auto const rhs = fp_wrapper{{100, 200, 300, 500, 600, 800, 1200, 1300}, scale_type{-2}}; - - auto const expected = fp_wrapper{{12, 6, 4, 2, 2, 1, 1, 0}, scale_type{2}}; - - auto const type = data_type{type_to_id(), 2}; - auto const result = cudf::binary_operation(*lhs, rhs, cudf::binary_operator::DIV, type); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); -} - -TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOp_Div8) -{ - using namespace numeric; - using decimalXX = TypeParam; - using RepType = device_storage_type_t; - - auto const lhs = fp_wrapper{{4000, 6000, 80000}, scale_type{-1}}; - auto const rhs = make_fixed_point_scalar(5000, scale_type{-3}); - auto const expected = fp_wrapper{{0, 1, 16}, scale_type{2}}; - - auto const type = data_type{type_to_id(), 2}; - auto const result = cudf::binary_operation(lhs, *rhs, cudf::binary_operator::DIV, type); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); -} - -TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOp_Div9) -{ - using namespace numeric; - using decimalXX = TypeParam; - using RepType = device_storage_type_t; - - auto const lhs = fp_wrapper{{10, 20, 30}, scale_type{2}}; - auto const rhs = make_fixed_point_scalar(7, scale_type{1}); - auto const expected = fp_wrapper{{1, 2, 4}, scale_type{1}}; - - auto const type = data_type{type_to_id(), 1}; - auto const result = cudf::binary_operation(lhs, *rhs, cudf::binary_operator::DIV, type); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); -} - -TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOp_Div10) -{ - using namespace numeric; - using decimalXX = TypeParam; - using RepType = device_storage_type_t; - - auto const lhs = fp_wrapper{{100, 200, 300}, scale_type{1}}; - auto const rhs = make_fixed_point_scalar(7, scale_type{0}); - auto const expected = fp_wrapper{{14, 28, 42}, scale_type{1}}; - - auto const type = data_type{type_to_id(), 1}; - auto const result = cudf::binary_operation(lhs, *rhs, cudf::binary_operator::DIV, type); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); -} - -TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOp_Div11) -{ - using namespace numeric; - using decimalXX = TypeParam; - using RepType = device_storage_type_t; - - auto const lhs = fp_wrapper{{1000, 2000, 3000}, scale_type{1}}; - auto const rhs = fp_wrapper{{7, 7, 7}, scale_type{0}}; - auto const expected = fp_wrapper{{142, 285, 428}, scale_type{1}}; - - auto const type = data_type{type_to_id(), 1}; - auto const result = cudf::binary_operation(lhs, rhs, cudf::binary_operator::DIV, type); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); -} - -TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpThrows) -{ - using namespace numeric; - using decimalXX = TypeParam; - using RepType = device_storage_type_t; - - auto const col = fp_wrapper{{100, 300, 500, 700}, scale_type{-2}}; - auto const non_bool_type = data_type{type_to_id(), -2}; - EXPECT_THROW(cudf::binary_operation(col, col, cudf::binary_operator::LESS, non_bool_type), - cudf::logic_error); -} - -template -struct FixedPointTest_64_128_Reps : public cudf::test::BaseFixture { -}; - -using Decimal64And128Types = cudf::test::Types; -TYPED_TEST_SUITE(FixedPointTest_64_128_Reps, Decimal64And128Types); - -TYPED_TEST(FixedPointTest_64_128_Reps, FixedPoint_64_128_ComparisonTests) -{ - using namespace numeric; - using decimalXX = TypeParam; - using RepType = device_storage_type_t; - - for (auto const rhs_value : {10000000000000000, 100000000000000000}) { - auto const lhs = fp_wrapper{{33041, 97290, 36438, 25379, 48473}, scale_type{2}}; - auto const rhs = make_fixed_point_scalar(rhs_value, scale_type{0}); - auto const trues = wrapper{{1, 1, 1, 1, 1}}; - auto const falses = wrapper{{0, 0, 0, 0, 0}}; - auto const bool_type = cudf::data_type{type_id::BOOL8}; - - auto const a = cudf::binary_operation(lhs, *rhs, binary_operator::LESS, bool_type); - auto const b = cudf::binary_operation(lhs, *rhs, binary_operator::LESS_EQUAL, bool_type); - auto const c = cudf::binary_operation(lhs, *rhs, binary_operator::GREATER, bool_type); - auto const d = cudf::binary_operation(lhs, *rhs, binary_operator::GREATER_EQUAL, bool_type); - auto const e = cudf::binary_operation(*rhs, lhs, binary_operator::GREATER, bool_type); - auto const f = cudf::binary_operation(*rhs, lhs, binary_operator::GREATER_EQUAL, bool_type); - auto const g = cudf::binary_operation(*rhs, lhs, binary_operator::LESS, bool_type); - auto const h = cudf::binary_operation(*rhs, lhs, binary_operator::LESS_EQUAL, bool_type); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(a->view(), trues); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(b->view(), trues); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(c->view(), falses); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(d->view(), falses); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(e->view(), trues); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(f->view(), trues); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(g->view(), falses); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(h->view(), falses); - } -} - -} // namespace binop -} // namespace test -} // namespace cudf - -CUDF_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/binaryop/binop-null-test.cpp b/cpp/tests/binaryop/binop-null-test.cpp index 25ec3b30834..55ddde5ce5f 100644 --- a/cpp/tests/binaryop/binop-null-test.cpp +++ b/cpp/tests/binaryop/binop-null-test.cpp @@ -23,6 +23,8 @@ #include #include +#include + namespace cudf { namespace test { namespace binop { @@ -52,6 +54,12 @@ struct BinaryOperationNullTest : public BinaryOperationTest { default: CUDF_FAIL("Unknown mask state " + std::to_string(static_cast(state))); } } + + protected: + void SetUp() override + { + if (!can_do_runtime_jit()) { GTEST_SKIP() << "Skipping tests that require 11.5 runtime"; } + } }; // namespace binop TEST_F(BinaryOperationNullTest, Scalar_Null_Vector_Valid) @@ -66,8 +74,8 @@ TEST_F(BinaryOperationNullTest, Scalar_Null_Vector_Valid) lhs.set_valid_async(false); auto rhs = make_random_wrapped_column(100, mask_state::ALL_VALID); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::ADD, data_type(type_to_id())); + auto out = + cudf::binary_operation(lhs, rhs, cudf::binary_operator::ADD, data_type(type_to_id())); ASSERT_BINOP(*out, lhs, rhs, ADD()); } @@ -83,8 +91,8 @@ TEST_F(BinaryOperationNullTest, Scalar_Valid_Vector_NonNullable) auto lhs = make_random_wrapped_scalar(); auto rhs = make_random_wrapped_column(100, mask_state::UNALLOCATED); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::ADD, data_type(type_to_id())); + auto out = + cudf::binary_operation(lhs, rhs, cudf::binary_operator::ADD, data_type(type_to_id())); ASSERT_BINOP(*out, lhs, rhs, ADD()); } @@ -101,8 +109,8 @@ TEST_F(BinaryOperationNullTest, Scalar_Null_Vector_NonNullable) lhs.set_valid_async(false); auto rhs = make_random_wrapped_column(100, mask_state::UNALLOCATED); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::ADD, data_type(type_to_id())); + auto out = + cudf::binary_operation(lhs, rhs, cudf::binary_operator::ADD, data_type(type_to_id())); ASSERT_BINOP(*out, lhs, rhs, ADD()); } @@ -118,8 +126,8 @@ TEST_F(BinaryOperationNullTest, Vector_Null_Scalar_Valid) auto lhs = make_random_wrapped_scalar(); auto rhs = make_random_wrapped_column(100, mask_state::ALL_NULL); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::ADD, data_type(type_to_id())); + auto out = + cudf::binary_operation(lhs, rhs, cudf::binary_operator::ADD, data_type(type_to_id())); ASSERT_BINOP(*out, lhs, rhs, ADD()); } @@ -135,8 +143,8 @@ TEST_F(BinaryOperationNullTest, Vector_Null_Vector_Valid) auto lhs = make_random_wrapped_column(100, mask_state::ALL_NULL); auto rhs = make_random_wrapped_column(100, mask_state::ALL_VALID); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::ADD, data_type(type_to_id())); + auto out = + cudf::binary_operation(lhs, rhs, cudf::binary_operator::ADD, data_type(type_to_id())); ASSERT_BINOP(*out, lhs, rhs, ADD()); } @@ -152,8 +160,8 @@ TEST_F(BinaryOperationNullTest, Vector_Null_Vector_NonNullable) auto lhs = make_random_wrapped_column(100, mask_state::ALL_NULL); auto rhs = make_random_wrapped_column(100, mask_state::UNALLOCATED); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::ADD, data_type(type_to_id())); + auto out = + cudf::binary_operation(lhs, rhs, cudf::binary_operator::ADD, data_type(type_to_id())); ASSERT_BINOP(*out, lhs, rhs, ADD()); } @@ -169,8 +177,8 @@ TEST_F(BinaryOperationNullTest, Vector_Valid_Vector_NonNullable) auto lhs = make_random_wrapped_column(100, mask_state::ALL_VALID); auto rhs = make_random_wrapped_column(100, mask_state::UNALLOCATED); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::ADD, data_type(type_to_id())); + auto out = + cudf::binary_operation(lhs, rhs, cudf::binary_operator::ADD, data_type(type_to_id())); ASSERT_BINOP(*out, lhs, rhs, ADD()); } @@ -186,8 +194,8 @@ TEST_F(BinaryOperationNullTest, Vector_NonNullable_Vector_NonNullable) auto lhs = make_random_wrapped_column(100, mask_state::UNALLOCATED); auto rhs = make_random_wrapped_column(100, mask_state::UNALLOCATED); - auto out = cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::ADD, data_type(type_to_id())); + auto out = + cudf::binary_operation(lhs, rhs, cudf::binary_operator::ADD, data_type(type_to_id())); ASSERT_BINOP(*out, lhs, rhs, ADD()); } diff --git a/cpp/tests/binaryop/binop-verify-input-test.cpp b/cpp/tests/binaryop/binop-verify-input-test.cpp index 779dc7c4c1f..167fbc22bde 100644 --- a/cpp/tests/binaryop/binop-verify-input-test.cpp +++ b/cpp/tests/binaryop/binop-verify-input-test.cpp @@ -35,9 +35,9 @@ TEST_F(BinopVerifyInputTest, Vector_Scalar_ErrorOutputVectorType) auto lhs = make_random_wrapped_scalar(); auto rhs = make_random_wrapped_column(10); - EXPECT_THROW(cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::ADD, data_type(type_id::NUM_TYPE_IDS)), - cudf::logic_error); + EXPECT_THROW( + cudf::binary_operation(lhs, rhs, cudf::binary_operator::ADD, data_type(type_id::NUM_TYPE_IDS)), + cudf::logic_error); } TEST_F(BinopVerifyInputTest, Vector_Vector_ErrorSecondOperandVectorZeroSize) @@ -49,9 +49,9 @@ TEST_F(BinopVerifyInputTest, Vector_Vector_ErrorSecondOperandVectorZeroSize) auto lhs = make_random_wrapped_column(1); auto rhs = make_random_wrapped_column(10); - EXPECT_THROW(cudf::jit::binary_operation( - lhs, rhs, cudf::binary_operator::ADD, data_type(type_to_id())), - cudf::logic_error); + EXPECT_THROW( + cudf::binary_operation(lhs, rhs, cudf::binary_operator::ADD, data_type(type_to_id())), + cudf::logic_error); } } // namespace binop diff --git a/cpp/tests/binaryop/util/runtime_support.h b/cpp/tests/binaryop/util/runtime_support.h new file mode 100644 index 00000000000..250d34a0879 --- /dev/null +++ b/cpp/tests/binaryop/util/runtime_support.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +inline bool can_do_runtime_jit() +{ + // We require a CUDA NVRTC of 11.5+ to do runtime jit + // as we need support for __int128 + + int runtime = 0; + auto error_value = cudaRuntimeGetVersion(&runtime); + return (error_value == cudaSuccess) && (runtime >= 11050); +} diff --git a/cpp/tests/iterator/iterator_tests.cuh b/cpp/tests/iterator/iterator_tests.cuh index 4ec347c4bc1..07eb595449c 100644 --- a/cpp/tests/iterator/iterator_tests.cuh +++ b/cpp/tests/iterator/iterator_tests.cuh @@ -18,8 +18,8 @@ #include #include -#include // include iterator header -#include //for meanvar +#include +#include // for meanvar #include #include @@ -28,6 +28,7 @@ #include #include +#include #include #include @@ -83,7 +84,17 @@ struct IteratorTest : public cudf::test::BaseFixture { EXPECT_EQ(thrust::distance(d_in, d_in_last), num_items); auto dev_expected = cudf::detail::make_device_uvector_sync(expected); - bool result = thrust::equal(thrust::device, d_in, d_in_last, dev_expected.begin()); + // using a temporary vector and calling transform and all_of separately is + // equivalent to thrust::equal but compiles ~3x faster + auto dev_results = rmm::device_uvector(num_items, rmm::cuda_stream_default); + thrust::transform(thrust::device, + d_in, + d_in_last, + dev_expected.begin(), + dev_results.begin(), + thrust::equal_to{}); + auto result = thrust::all_of( + thrust::device, dev_results.begin(), dev_results.end(), thrust::identity{}); EXPECT_TRUE(result) << "thrust test"; } diff --git a/cpp/tests/iterator/optional_iterator_test_numeric.cu b/cpp/tests/iterator/optional_iterator_test_numeric.cu index 6d51f4a5c14..a8c135a726f 100644 --- a/cpp/tests/iterator/optional_iterator_test_numeric.cu +++ b/cpp/tests/iterator/optional_iterator_test_numeric.cu @@ -50,21 +50,15 @@ struct transformer_optional_meanvar { } }; -struct sum_if_not_null { - template - CUDA_HOST_DEVICE_CALLABLE thrust::optional operator()(const thrust::optional& lhs, - const thrust::optional& rhs) - { - return lhs.value_or(T{0}) + rhs.value_or(T{0}); - } +template +struct optional_to_meanvar { + CUDA_HOST_DEVICE_CALLABLE T operator()(const thrust::optional& v) { return v.value_or(T{0}); } }; // TODO: enable this test also at __CUDACC_DEBUG__ // This test causes fatal compilation error only at device debug mode. // Workaround: exclude this test only at device debug mode. #if !defined(__CUDACC_DEBUG__) -// This test computes `count`, `sum`, `sum_of_squares` at a single reduction call. -// It would be useful for `var`, `std` operation TYPED_TEST(NumericOptionalIteratorTest, mean_var_output) { using T = TypeParam; @@ -104,22 +98,27 @@ TYPED_TEST(NumericOptionalIteratorTest, mean_var_output) expected_value.value_squared = std::accumulate( replaced_array.begin(), replaced_array.end(), T{0}, [](T acc, T i) { return acc + i * i; }); - // std::cout << "expected = " << expected_value << std::endl; - // GPU test auto it_dev = d_col->optional_begin(cudf::contains_nulls::YES{}); auto it_dev_squared = thrust::make_transform_iterator(it_dev, transformer); - auto result = thrust::reduce(it_dev_squared, - it_dev_squared + d_col->size(), - thrust::optional{T_output{}}, - sum_if_not_null{}); + + // this can be computed with a single reduce and without a temporary output vector + // but the approach increases the compile time by ~2x + auto results = rmm::device_uvector(d_col->size(), rmm::cuda_stream_default); + thrust::transform(thrust::device, + it_dev_squared, + it_dev_squared + d_col->size(), + results.begin(), + optional_to_meanvar{}); + auto result = thrust::reduce(thrust::device, results.begin(), results.end(), T_output{}); + if (not std::is_floating_point()) { - EXPECT_EQ(expected_value, *result) << "optional iterator reduction sum"; + EXPECT_EQ(expected_value, result) << "optional iterator reduction sum"; } else { - EXPECT_NEAR(expected_value.value, result->value, 1e-3) << "optional iterator reduction sum"; - EXPECT_NEAR(expected_value.value_squared, result->value_squared, 1e-3) + EXPECT_NEAR(expected_value.value, result.value, 1e-3) << "optional iterator reduction sum"; + EXPECT_NEAR(expected_value.value_squared, result.value_squared, 1e-3) << "optional iterator reduction sum squared"; - EXPECT_EQ(expected_value.count, result->count) << "optional iterator reduction count"; + EXPECT_EQ(expected_value.count, result.count) << "optional iterator reduction count"; } } #endif diff --git a/cpp/tests/reductions/reduction_tests.cpp b/cpp/tests/reductions/reduction_tests.cpp index 2c9279260e7..d8ee8f9d08d 100644 --- a/cpp/tests/reductions/reduction_tests.cpp +++ b/cpp/tests/reductions/reduction_tests.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -28,7 +29,6 @@ #include #include #include -#include #include diff --git a/cpp/tests/reductions/scan_tests.cpp b/cpp/tests/reductions/scan_tests.cpp index d1e983460d5..0892436eb47 100644 --- a/cpp/tests/reductions/scan_tests.cpp +++ b/cpp/tests/reductions/scan_tests.cpp @@ -18,6 +18,7 @@ #include #include +#include #include #include @@ -397,3 +398,198 @@ TYPED_TEST(ScanDurationTest, Sum) EXPECT_THROW(cudf::scan(col, cudf::make_sum_aggregation(), cudf::scan_type::EXCLUSIVE), cudf::logic_error); } + +struct StructScanTest : public cudf::test::BaseFixture { +}; + +TEST_F(StructScanTest, StructScanMinMaxNoNull) +{ + using INTS_CW = cudf::test::fixed_width_column_wrapper; + using STRINGS_CW = cudf::test::strings_column_wrapper; + using STRUCTS_CW = cudf::test::structs_column_wrapper; + + auto const input = [] { + auto child1 = STRINGS_CW{"año", "bit", "₹1", "aaa", "zit", "bat", "aab", "$1", "€1", "wut"}; + auto child2 = INTS_CW{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + return STRUCTS_CW{{child1, child2}}; + }(); + + { + auto const expected = [] { + auto child1 = STRINGS_CW{"año", "año", "año", "aaa", "aaa", "aaa", "aaa", "$1", "$1", "$1"}; + auto child2 = INTS_CW{1, 1, 1, 4, 4, 4, 4, 8, 8, 8}; + return STRUCTS_CW{{child1, child2}}; + }(); + auto const result = cudf::scan(input, cudf::make_min_aggregation(), cudf::scan_type::INCLUSIVE); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); + } + + { + auto const expected = [] { + auto child1 = STRINGS_CW{"año", "bit", "₹1", "₹1", "₹1", "₹1", "₹1", "₹1", "₹1", "₹1"}; + auto child2 = INTS_CW{1, 2, 3, 3, 3, 3, 3, 3, 3, 3}; + return STRUCTS_CW{{child1, child2}}; + }(); + auto const result = cudf::scan(input, cudf::make_max_aggregation(), cudf::scan_type::INCLUSIVE); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); + } +} + +TEST_F(StructScanTest, StructScanMinMaxSlicedInput) +{ + using INTS_CW = cudf::test::fixed_width_column_wrapper; + using STRINGS_CW = cudf::test::strings_column_wrapper; + using STRUCTS_CW = cudf::test::structs_column_wrapper; + constexpr int32_t dont_care{1}; + + auto const input_original = [] { + auto child1 = STRINGS_CW{"$dont_care", + "$dont_care", + "año", + "bit", + "₹1", + "aaa", + "zit", + "bat", + "aab", + "$1", + "€1", + "wut", + "₹dont_care"}; + auto child2 = INTS_CW{dont_care, dont_care, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, dont_care}; + return STRUCTS_CW{{child1, child2}}; + }(); + + auto const input = cudf::slice(input_original, {2, 12})[0]; + + { + auto const expected = [] { + auto child1 = STRINGS_CW{"año", "año", "año", "aaa", "aaa", "aaa", "aaa", "$1", "$1", "$1"}; + auto child2 = INTS_CW{1, 1, 1, 4, 4, 4, 4, 8, 8, 8}; + return STRUCTS_CW{{child1, child2}}; + }(); + auto const result = cudf::scan(input, cudf::make_min_aggregation(), cudf::scan_type::INCLUSIVE); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); + } + + { + auto const expected = [] { + auto child1 = STRINGS_CW{"año", "bit", "₹1", "₹1", "₹1", "₹1", "₹1", "₹1", "₹1", "₹1"}; + auto child2 = INTS_CW{1, 2, 3, 3, 3, 3, 3, 3, 3, 3}; + return STRUCTS_CW{{child1, child2}}; + }(); + auto const result = cudf::scan(input, cudf::make_max_aggregation(), cudf::scan_type::INCLUSIVE); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); + } +} + +TEST_F(StructScanTest, StructScanMinMaxWithNulls) +{ + using INTS_CW = cudf::test::fixed_width_column_wrapper; + using STRINGS_CW = cudf::test::strings_column_wrapper; + using STRUCTS_CW = cudf::test::structs_column_wrapper; + using cudf::test::iterators::nulls_at; + + auto const input = [] { + auto child1 = STRINGS_CW{{"año", + "bit", + "₹1" /*NULL*/, + "aaa" /*NULL*/, + "zit", + "bat", + "aab", + "$1" /*NULL*/, + "€1" /*NULL*/, + "wut"}, + nulls_at({2, 7})}; + auto child2 = INTS_CW{{1, 2, 3 /*NULL*/, 4 /*NULL*/, 5, 6, 7, 8 /*NULL*/, 9 /*NULL*/, 10}, + nulls_at({2, 7})}; + return STRUCTS_CW{{child1, child2}, nulls_at({3, 8})}; + }(); + + { + auto const expected = [] { + auto child1 = STRINGS_CW{ + "año", "año", "año", "" /*NULL*/, "año", "año", "aab", "aab", "" /*NULL*/, "aab"}; + auto child2 = INTS_CW{1, 1, 1, 0 /*NULL*/, 1, 1, 7, 7, 0 /*NULL*/, 7}; + return STRUCTS_CW{{child1, child2}, nulls_at({3, 8})}; + }(); + + auto const result = cudf::scan( + input, cudf::make_min_aggregation(), cudf::scan_type::INCLUSIVE, null_policy::EXCLUDE); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); + } + + { + auto const expected = [] { + auto child1 = STRINGS_CW{ + "año", "bit", "bit", "" /*NULL*/, "zit", "zit", "zit", "zit", "" /*NULL*/, "zit"}; + auto child2 = INTS_CW{1, 2, 2, 0 /*NULL*/, 5, 5, 5, 5, 0 /*NULL*/, 5}; + return STRUCTS_CW{{child1, child2}, nulls_at({3, 8})}; + }(); + + auto const result = cudf::scan( + input, cudf::make_max_aggregation(), cudf::scan_type::INCLUSIVE, null_policy::EXCLUDE); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); + } + + { + auto const expected = [] { + auto child1 = STRINGS_CW{"año", + "año", + "año", + "" /*NULL*/, + "" /*NULL*/, + "" /*NULL*/, + "" /*NULL*/, + "" /*NULL*/, + "" /*NULL*/, + "" /*NULL*/}; + auto child2 = INTS_CW{1, + 1, + 1, + 0 /*NULL*/, + 0 /*NULL*/, + 0 /*NULL*/, + 0 /*NULL*/, + 0 /*NULL*/, + 0 /*NULL*/, + 0 /*NULL*/}; + return STRUCTS_CW{{child1, child2}, nulls_at({3, 4, 5, 6, 7, 8, 9})}; + }(); + + auto const result = cudf::scan( + input, cudf::make_min_aggregation(), cudf::scan_type::INCLUSIVE, null_policy::INCLUDE); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); + } + + { + auto const expected = [] { + auto child1 = STRINGS_CW{"año", + "bit", + "bit", + "" /*NULL*/, + "" /*NULL*/, + "" /*NULL*/, + "" /*NULL*/, + "" /*NULL*/, + "" /*NULL*/, + "" /*NULL*/}; + auto child2 = INTS_CW{1, + 2, + 2, + 0 /*NULL*/, + 0 /*NULL*/, + 0 /*NULL*/, + 0 /*NULL*/, + 0 /*NULL*/, + 0 /*NULL*/, + 0 /*NULL*/}; + return STRUCTS_CW{{child1, child2}, nulls_at({3, 4, 5, 6, 7, 8, 9})}; + }(); + + auto const result = cudf::scan( + input, cudf::make_max_aggregation(), cudf::scan_type::INCLUSIVE, null_policy::INCLUDE); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); + } +} diff --git a/cpp/tests/strings/floats_tests.cpp b/cpp/tests/strings/floats_tests.cpp index 126bffa1e49..e6f4f6bb8d9 100644 --- a/cpp/tests/strings/floats_tests.cpp +++ b/cpp/tests/strings/floats_tests.cpp @@ -58,32 +58,20 @@ TEST_F(StringsConvertTest, IsFloat) CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected1); cudf::test::strings_column_wrapper strings2( - {"+175", "-34", "9.8", "1234567890", "6.7e17", "-917.2e5"}); + {"-34", "9.8", "1234567890", "-917.2e5", "INF", "NAN", "-Inf", "INFINITY"}); results = cudf::strings::is_float(cudf::strings_column_view(strings2)); - cudf::test::fixed_width_column_wrapper expected2({1, 1, 1, 1, 1, 1}); + cudf::test::fixed_width_column_wrapper expected2({1, 1, 1, 1, 1, 1, 1, 1}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected2); } TEST_F(StringsConvertTest, ToFloats32) { - std::vector h_strings{"1234", - nullptr, - "-876", - "543.2", - "-0.12", - ".25", - "-.002", - "", - "-0.0", - "1.2e4", - "NaN", - "abc123", - "123abc", - "456e", - "-1.78e+5", - "-122.33644782123456789", - "12e+309", - "3.4028236E38"}; + std::vector h_strings{ + "1234", nullptr, "-876", "543.2", + "-0.12", ".25", "-.002", "", + "-0.0", "1.2e4", "NAN", "abc123", + "123abc", "456e", "-1.78e+5", "-122.33644782123456789", + "12e+309", "3.4028236E38", "INF", "Infinity"}; cudf::test::strings_column_wrapper strings( h_strings.begin(), h_strings.end(), @@ -135,24 +123,11 @@ TEST_F(StringsConvertTest, FromFloats32) TEST_F(StringsConvertTest, ToFloats64) { - std::vector h_strings{"1234", - nullptr, - "-876", - "543.2", - "-0.12", - ".25", - "-.002", - "", - "-0.0", - "1.28e256", - "NaN", - "abc123", - "123abc", - "456e", - "-1.78e+5", - "-122.33644782", - "12e+309", - "1.7976931348623159E308"}; + std::vector h_strings{ + "1234", nullptr, "-876", "543.2", "-0.12", ".25", + "-.002", "", "-0.0", "1.28e256", "NaN", "abc123", + "123abc", "456e", "-1.78e+5", "-122.33644782", "12e+309", "1.7976931348623159E308", + "-Inf", "-INFINITY"}; cudf::test::strings_column_wrapper strings( h_strings.begin(), h_strings.end(), diff --git a/docs/cudf/source/api_docs/groupby.rst b/docs/cudf/source/api_docs/groupby.rst index cf08d1d791b..575d7442cdf 100644 --- a/docs/cudf/source/api_docs/groupby.rst +++ b/docs/cudf/source/api_docs/groupby.rst @@ -59,6 +59,7 @@ Computations / descriptive stats GroupBy.std GroupBy.sum GroupBy.var + GroupBy.corr The following methods are available in both ``SeriesGroupBy`` and ``DataFrameGroupBy`` objects, but may differ slightly, usually in that diff --git a/docs/cudf/source/basics/groupby.rst b/docs/cudf/source/basics/groupby.rst index 04c4d42fa2a..f3269768025 100644 --- a/docs/cudf/source/basics/groupby.rst +++ b/docs/cudf/source/basics/groupby.rst @@ -127,6 +127,13 @@ Aggregations on groups is supported via the ``agg`` method: a 1 4 1 2.0 2 5 2 4.5 + >>> df.groupby("a").corr(method="pearson") + b c + a + 1 b 1.000000 0.866025 + c 0.866025 1.000000 + 2 b 1.000000 1.000000 + c 1.000000 1.000000 The following table summarizes the available aggregations and the types that support them: @@ -169,6 +176,9 @@ that support them: +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ | unique | ✅ | ✅ | ✅ | ✅ | | | | | +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ + | corr | ✅ | | | | | | | ✅ | + +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ + GroupBy apply ------------- diff --git a/java/pom.xml b/java/pom.xml index 87d43ec1272..c5a3bc64fad 100755 --- a/java/pom.xml +++ b/java/pom.xml @@ -297,9 +297,6 @@ LICENSE - - ${project.build.directory}/native-deps/ - @@ -499,14 +496,14 @@ copy-native-libs - validate + generate-resources copy-resources true ${skipNativeCopy} - ${project.build.directory}/native-deps/${os.arch}/${os.name} + ${project.build.outputDirectory}/${os.arch}/${os.name} ${native.build.path} diff --git a/java/src/main/java/ai/rapids/cudf/ColumnView.java b/java/src/main/java/ai/rapids/cudf/ColumnView.java index 729444f460c..6d0d24baf99 100644 --- a/java/src/main/java/ai/rapids/cudf/ColumnView.java +++ b/java/src/main/java/ai/rapids/cudf/ColumnView.java @@ -30,6 +30,10 @@ */ public class ColumnView implements AutoCloseable, BinaryOperable { + static { + NativeDepsLoader.loadNativeDeps(); + } + public static final long UNKNOWN_NULL_COUNT = -1; protected long viewHandle; diff --git a/java/src/main/java/ai/rapids/cudf/NativeDepsLoader.java b/java/src/main/java/ai/rapids/cudf/NativeDepsLoader.java index 8780ecc3aa3..9663fbcafb4 100755 --- a/java/src/main/java/ai/rapids/cudf/NativeDepsLoader.java +++ b/java/src/main/java/ai/rapids/cudf/NativeDepsLoader.java @@ -81,9 +81,7 @@ public static synchronized void loadNativeDeps() { /** * Allows other libraries to reuse the same native deps loading logic. Libraries will be searched - * for under ${os.arch}/${os.name}/ in the class path using the class loader for this class. It - * will also look for the libraries under ./target/native-deps/${os.arch}/${os.name} to help - * facilitate testing while building. + * for under ${os.arch}/${os.name}/ in the class path using the class loader for this class. *
* Because this just loads the libraries and loading the libraries themselves needs to be a * singleton operation it is recommended that any library using this provide their own wrapper @@ -203,12 +201,7 @@ private static File createFile(String os, String arch, String baseName) throws I File loc; URL resource = loader.getResource(path); if (resource == null) { - // It looks like we are not running from the jar, or there are issues with the jar - File f = new File("./target/native-deps/" + path); - if (!f.exists()) { - throw new FileNotFoundException("Could not locate native dependency " + path); - } - resource = f.toURI().toURL(); + throw new FileNotFoundException("Could not locate native dependency " + path); } try (InputStream in = resource.openStream()) { loc = File.createTempFile(baseName, ".so"); diff --git a/java/src/test/java/ai/rapids/cudf/ArrowColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ArrowColumnVectorTest.java index d5d4059d18d..2a11b24b3a8 100644 --- a/java/src/test/java/ai/rapids/cudf/ArrowColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/ArrowColumnVectorTest.java @@ -21,7 +21,6 @@ import java.nio.ByteBuffer; import java.util.ArrayList; -import ai.rapids.cudf.HostColumnVector.BasicType; import ai.rapids.cudf.HostColumnVector.ListType; import ai.rapids.cudf.HostColumnVector.StructType; @@ -40,7 +39,7 @@ import org.junit.jupiter.api.Test; -import static ai.rapids.cudf.TableTest.assertColumnsAreEqual; +import static ai.rapids.cudf.AssertUtils.assertColumnsAreEqual; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; diff --git a/java/src/test/java/ai/rapids/cudf/AssertUtils.java b/java/src/test/java/ai/rapids/cudf/AssertUtils.java new file mode 100644 index 00000000000..184e7dd0c57 --- /dev/null +++ b/java/src/test/java/ai/rapids/cudf/AssertUtils.java @@ -0,0 +1,272 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package ai.rapids.cudf; + +import java.util.List; + +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; + +/** Utility methods for asserting in unit tests */ +public class AssertUtils { + + /** + * Checks and asserts that passed in columns match + * @param expect The expected result column + * @param cv The input column + */ + public static void assertColumnsAreEqual(ColumnView expect, ColumnView cv) { + assertColumnsAreEqual(expect, cv, "unnamed"); + } + + /** + * Checks and asserts that passed in columns match + * @param expected The expected result column + * @param cv The input column + * @param colName The name of the column + */ + public static void assertColumnsAreEqual(ColumnView expected, ColumnView cv, String colName) { + assertPartialColumnsAreEqual(expected, 0, expected.getRowCount(), cv, colName, true, false); + } + + /** + * Checks and asserts that passed in host columns match + * @param expected The expected result host column + * @param cv The input host column + * @param colName The name of the host column + */ + public static void assertColumnsAreEqual(HostColumnVector expected, HostColumnVector cv, String colName) { + assertPartialColumnsAreEqual(expected, 0, expected.getRowCount(), cv, colName, true, false); + } + + /** + * Checks and asserts that passed in Struct columns match + * @param expected The expected result Struct column + * @param cv The input Struct column + */ + public static void assertStructColumnsAreEqual(ColumnView expected, ColumnView cv) { + assertPartialStructColumnsAreEqual(expected, 0, expected.getRowCount(), cv, "unnamed", true, false); + } + + /** + * Checks and asserts that passed in Struct columns match + * @param expected The expected result Struct column + * @param rowOffset The row number to look from + * @param length The number of rows to consider + * @param cv The input Struct column + * @param colName The name of the column + * @param enableNullCountCheck Whether to check for nulls in the Struct column + * @param enableNullabilityCheck Whether the table have a validity mask + */ + public static void assertPartialStructColumnsAreEqual(ColumnView expected, long rowOffset, long length, + ColumnView cv, String colName, boolean enableNullCountCheck, boolean enableNullabilityCheck) { + try (HostColumnVector hostExpected = expected.copyToHost(); + HostColumnVector hostcv = cv.copyToHost()) { + assertPartialColumnsAreEqual(hostExpected, rowOffset, length, hostcv, colName, enableNullCountCheck, enableNullabilityCheck); + } + } + + /** + * Checks and asserts that passed in columns match + * @param expected The expected result column + * @param cv The input column + * @param colName The name of the column + * @param enableNullCheck Whether to check for nulls in the column + * @param enableNullabilityCheck Whether the table have a validity mask + */ + public static void assertPartialColumnsAreEqual(ColumnView expected, long rowOffset, long length, + ColumnView cv, String colName, boolean enableNullCheck, boolean enableNullabilityCheck) { + try (HostColumnVector hostExpected = expected.copyToHost(); + HostColumnVector hostcv = cv.copyToHost()) { + assertPartialColumnsAreEqual(hostExpected, rowOffset, length, hostcv, colName, enableNullCheck, enableNullabilityCheck); + } + } + + /** + * Checks and asserts that passed in host columns match + * @param expected The expected result host column + * @param rowOffset start row index + * @param length number of rows from starting offset + * @param cv The input host column + * @param colName The name of the host column + * @param enableNullCountCheck Whether to check for nulls in the host column + */ + public static void assertPartialColumnsAreEqual(HostColumnVectorCore expected, long rowOffset, long length, + HostColumnVectorCore cv, String colName, boolean enableNullCountCheck, boolean enableNullabilityCheck) { + assertEquals(expected.getType(), cv.getType(), "Type For Column " + colName); + assertEquals(length, cv.getRowCount(), "Row Count For Column " + colName); + assertEquals(expected.getNumChildren(), cv.getNumChildren(), "Child Count for Column " + colName); + if (enableNullCountCheck) { + assertEquals(expected.getNullCount(), cv.getNullCount(), "Null Count For Column " + colName); + } else { + // TODO add in a proper check when null counts are supported by serializing a partitioned column + } + if (enableNullabilityCheck) { + assertEquals(expected.hasValidityVector(), cv.hasValidityVector(), "Column nullability is different than expected"); + } + DType type = expected.getType(); + for (long expectedRow = rowOffset; expectedRow < (rowOffset + length); expectedRow++) { + long tableRow = expectedRow - rowOffset; + assertEquals(expected.isNull(expectedRow), cv.isNull(tableRow), + "NULL for Column " + colName + " Row " + tableRow); + if (!expected.isNull(expectedRow)) { + switch (type.typeId) { + case BOOL8: // fall through + case INT8: // fall through + case UINT8: + assertEquals(expected.getByte(expectedRow), cv.getByte(tableRow), + "Column " + colName + " Row " + tableRow); + break; + case INT16: // fall through + case UINT16: + assertEquals(expected.getShort(expectedRow), cv.getShort(tableRow), + "Column " + colName + " Row " + tableRow); + break; + case INT32: // fall through + case UINT32: // fall through + case TIMESTAMP_DAYS: + case DURATION_DAYS: + case DECIMAL32: + assertEquals(expected.getInt(expectedRow), cv.getInt(tableRow), + "Column " + colName + " Row " + tableRow); + break; + case INT64: // fall through + case UINT64: // fall through + case DURATION_MICROSECONDS: // fall through + case DURATION_MILLISECONDS: // fall through + case DURATION_NANOSECONDS: // fall through + case DURATION_SECONDS: // fall through + case TIMESTAMP_MICROSECONDS: // fall through + case TIMESTAMP_MILLISECONDS: // fall through + case TIMESTAMP_NANOSECONDS: // fall through + case TIMESTAMP_SECONDS: + case DECIMAL64: + assertEquals(expected.getLong(expectedRow), cv.getLong(tableRow), + "Column " + colName + " Row " + tableRow); + break; + case DECIMAL128: + assertEquals(expected.getBigDecimal(expectedRow), cv.getBigDecimal(tableRow), + "Column " + colName + " Row " + tableRow); + break; + case FLOAT32: + CudfTestBase.assertEqualsWithinPercentage(expected.getFloat(expectedRow), cv.getFloat(tableRow), 0.0001, + "Column " + colName + " Row " + tableRow); + break; + case FLOAT64: + CudfTestBase.assertEqualsWithinPercentage(expected.getDouble(expectedRow), cv.getDouble(tableRow), 0.0001, + "Column " + colName + " Row " + tableRow); + break; + case STRING: + assertArrayEquals(expected.getUTF8(expectedRow), cv.getUTF8(tableRow), + "Column " + colName + " Row " + tableRow); + break; + case LIST: + HostMemoryBuffer expectedOffsets = expected.getOffsets(); + HostMemoryBuffer cvOffsets = cv.getOffsets(); + int expectedChildRows = expectedOffsets.getInt((expectedRow + 1) * 4) - + expectedOffsets.getInt(expectedRow * 4); + int cvChildRows = cvOffsets.getInt((tableRow + 1) * 4) - + cvOffsets.getInt(tableRow * 4); + assertEquals(expectedChildRows, cvChildRows, "Child row count for Column " + + colName + " Row " + tableRow); + break; + case STRUCT: + // parent column only has validity which was checked above + break; + default: + throw new IllegalArgumentException(type + " is not supported yet"); + } + } + } + + if (type.isNestedType()) { + switch (type.typeId) { + case LIST: + int expectedChildRowOffset = 0; + int numChildRows = 0; + if (length > 0) { + HostMemoryBuffer expectedOffsets = expected.getOffsets(); + HostMemoryBuffer cvOffsets = cv.getOffsets(); + expectedChildRowOffset = expectedOffsets.getInt(rowOffset * 4); + numChildRows = expectedOffsets.getInt((rowOffset + length) * 4) - + expectedChildRowOffset; + } + assertPartialColumnsAreEqual(expected.getNestedChildren().get(0), expectedChildRowOffset, + numChildRows, cv.getNestedChildren().get(0), colName + " list child", + enableNullCountCheck, enableNullabilityCheck); + break; + case STRUCT: + List expectedChildren = expected.getNestedChildren(); + List cvChildren = cv.getNestedChildren(); + for (int i = 0; i < expectedChildren.size(); i++) { + HostColumnVectorCore expectedChild = expectedChildren.get(i); + HostColumnVectorCore cvChild = cvChildren.get(i); + String childName = colName + " child " + i; + assertEquals(length, cvChild.getRowCount(), "Row Count for Column " + colName); + assertPartialColumnsAreEqual(expectedChild, rowOffset, length, cvChild, + colName, enableNullCountCheck, enableNullabilityCheck); + } + break; + default: + throw new IllegalArgumentException(type + " is not supported yet"); + } + } + } + + /** + * Checks and asserts that the two tables from a given rowindex match based on a provided schema. + * @param expected the expected result table + * @param rowOffset the row number to start checking from + * @param length the number of rows to check + * @param table the input table to compare against expected + * @param enableNullCheck whether to check for nulls or not + * @param enableNullabilityCheck whether the table have a validity mask + */ + public static void assertPartialTablesAreEqual(Table expected, long rowOffset, long length, Table table, + boolean enableNullCheck, boolean enableNullabilityCheck) { + assertEquals(expected.getNumberOfColumns(), table.getNumberOfColumns()); + assertEquals(length, table.getRowCount(), "ROW COUNT"); + for (int col = 0; col < expected.getNumberOfColumns(); col++) { + ColumnVector expect = expected.getColumn(col); + ColumnVector cv = table.getColumn(col); + String name = String.valueOf(col); + if (rowOffset != 0 || length != expected.getRowCount()) { + name = name + " PART " + rowOffset + "-" + (rowOffset + length - 1); + } + assertPartialColumnsAreEqual(expect, rowOffset, length, cv, name, enableNullCheck, enableNullabilityCheck); + } + } + + /** + * Checks and asserts that the two tables match + * @param expected the expected result table + * @param table the input table to compare against expected + */ + public static void assertTablesAreEqual(Table expected, Table table) { + assertPartialTablesAreEqual(expected, 0, expected.getRowCount(), table, true, false); + } + + public static void assertTableTypes(DType[] expectedTypes, Table t) { + int len = t.getNumberOfColumns(); + assertEquals(expectedTypes.length, len); + for (int i = 0; i < len; i++) { + ColumnVector vec = t.getColumn(i); + DType type = vec.getType(); + assertEquals(expectedTypes[i], type, "Types don't match at " + i); + } + } +} diff --git a/java/src/test/java/ai/rapids/cudf/BinaryOpTest.java b/java/src/test/java/ai/rapids/cudf/BinaryOpTest.java index 894861b8c44..0ca997d3c80 100644 --- a/java/src/test/java/ai/rapids/cudf/BinaryOpTest.java +++ b/java/src/test/java/ai/rapids/cudf/BinaryOpTest.java @@ -27,7 +27,7 @@ import java.util.Arrays; import java.util.stream.IntStream; -import static ai.rapids.cudf.TableTest.assertColumnsAreEqual; +import static ai.rapids.cudf.AssertUtils.assertColumnsAreEqual; import static ai.rapids.cudf.TestUtils.*; import static org.junit.jupiter.api.Assertions.assertThrows; diff --git a/java/src/test/java/ai/rapids/cudf/ByteColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ByteColumnVectorTest.java index 878fa7e4516..a26dbec4907 100644 --- a/java/src/test/java/ai/rapids/cudf/ByteColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/ByteColumnVectorTest.java @@ -127,9 +127,9 @@ public void testCastToByte() { ColumnVector expected1 = ColumnVector.fromBytes((byte)4, (byte)3, (byte)8); ColumnVector expected2 = ColumnVector.fromBytes((byte)100); ColumnVector expected3 = ColumnVector.fromBytes((byte)-23)) { - TableTest.assertColumnsAreEqual(expected1, byteColumnVector1); - TableTest.assertColumnsAreEqual(expected2, byteColumnVector2); - TableTest.assertColumnsAreEqual(expected3, byteColumnVector3); + AssertUtils.assertColumnsAreEqual(expected1, byteColumnVector1); + AssertUtils.assertColumnsAreEqual(expected2, byteColumnVector2); + AssertUtils.assertColumnsAreEqual(expected3, byteColumnVector3); } } diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java index a582541a0d4..fa9052029cc 100644 --- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java @@ -34,8 +34,10 @@ import java.util.stream.Collectors; import java.util.stream.IntStream; +import static ai.rapids.cudf.AssertUtils.assertColumnsAreEqual; +import static ai.rapids.cudf.AssertUtils.assertStructColumnsAreEqual; +import static ai.rapids.cudf.AssertUtils.assertTablesAreEqual; import static ai.rapids.cudf.QuantileMethod.*; -import static ai.rapids.cudf.TableTest.*; import static org.junit.jupiter.api.Assertions.*; import static org.junit.jupiter.api.Assumptions.assumeTrue; @@ -86,8 +88,8 @@ void testTransformVector() { ColumnVector cv1 = cv.transform(ptx, true); ColumnVector cv2 = cv.transform(cuda, false); ColumnVector expected = ColumnVector.fromBoxedInts(2*2-2, 3*3-3, null, 4*4-4)) { - TableTest.assertColumnsAreEqual(expected, cv1); - TableTest.assertColumnsAreEqual(expected, cv2); + assertColumnsAreEqual(expected, cv1); + assertColumnsAreEqual(expected, cv2); } } @@ -252,7 +254,7 @@ void testStringCreation() { try (ColumnVector cv = ColumnVector.fromStrings("d", "sd", "sde", null, "END"); HostColumnVector host = cv.copyToHost(); ColumnVector backAgain = host.copyToDevice()) { - TableTest.assertColumnsAreEqual(cv, backAgain); + assertColumnsAreEqual(cv, backAgain); } } @@ -265,7 +267,7 @@ void testUTF8StringCreation() { null, "END".getBytes(StandardCharsets.UTF_8)); ColumnVector expected = ColumnVector.fromStrings("d", "sd", "sde", null, "END")) { - TableTest.assertColumnsAreEqual(expected, cv); + assertColumnsAreEqual(expected, cv); } } @@ -299,7 +301,7 @@ void testConcatNoNulls() { ColumnVector v2 = ColumnVector.fromInts(8, 9); ColumnVector v = ColumnVector.concatenate(v0, v1, v2); ColumnVector expected = ColumnVector.fromInts(1, 2, 3, 4, 5, 6, 7, 8, 9)) { - TableTest.assertColumnsAreEqual(expected, v); + assertColumnsAreEqual(expected, v); } } @@ -310,7 +312,7 @@ void testConcatWithNulls() { ColumnVector v2 = ColumnVector.fromBoxedDoubles(null, 9.0); ColumnVector v = ColumnVector.concatenate(v0, v1, v2); ColumnVector expected = ColumnVector.fromBoxedDoubles(1., 2., 3., 4., 5., 6., 7., null, 9.)) { - TableTest.assertColumnsAreEqual(expected, v); + assertColumnsAreEqual(expected, v); } } @@ -1882,13 +1884,13 @@ void testSubvector() { try (ColumnVector vec = ColumnVector.fromBoxedInts(1, 2, 3, null, 5); ColumnVector expected = ColumnVector.fromBoxedInts(2, 3, null, 5); ColumnVector found = vec.subVector(1, 5)) { - TableTest.assertColumnsAreEqual(expected, found); + assertColumnsAreEqual(expected, found); } try (ColumnVector vec = ColumnVector.fromStrings("1", "2", "3", null, "5"); ColumnVector expected = ColumnVector.fromStrings("2", "3", null, "5"); ColumnVector found = vec.subVector(1, 5)) { - TableTest.assertColumnsAreEqual(expected, found); + assertColumnsAreEqual(expected, found); } } @@ -2014,7 +2016,7 @@ void testTrimStringsWhiteSpace() { try (ColumnVector cv = ColumnVector.fromStrings(" 123", "123 ", null, " 123 ", "\t\t123\n\n"); ColumnVector trimmed = cv.strip(); ColumnVector expected = ColumnVector.fromStrings("123", "123", null, "123", "123")) { - TableTest.assertColumnsAreEqual(expected, trimmed); + assertColumnsAreEqual(expected, trimmed); } } @@ -2024,7 +2026,7 @@ void testTrimStrings() { Scalar one = Scalar.fromString(" 1"); ColumnVector trimmed = cv.strip(one); ColumnVector expected = ColumnVector.fromStrings("23", "23", null, "23", "\t\t123\n\n")) { - TableTest.assertColumnsAreEqual(expected, trimmed); + assertColumnsAreEqual(expected, trimmed); } } @@ -2033,7 +2035,7 @@ void testLeftTrimStringsWhiteSpace() { try (ColumnVector cv = ColumnVector.fromStrings(" 123", "123 ", null, " 123 ", "\t\t123\n\n"); ColumnVector trimmed = cv.lstrip(); ColumnVector expected = ColumnVector.fromStrings("123", "123 ", null, "123 ", "123\n\n")) { - TableTest.assertColumnsAreEqual(expected, trimmed); + assertColumnsAreEqual(expected, trimmed); } } @@ -2043,7 +2045,7 @@ void testLeftTrimStrings() { Scalar one = Scalar.fromString(" 1"); ColumnVector trimmed = cv.lstrip(one); ColumnVector expected = ColumnVector.fromStrings("23", "23 ", null, "231", "\t\t123\n\n")) { - TableTest.assertColumnsAreEqual(expected, trimmed); + assertColumnsAreEqual(expected, trimmed); } } @@ -2052,7 +2054,7 @@ void testRightTrimStringsWhiteSpace() { try (ColumnVector cv = ColumnVector.fromStrings(" 123", "123 ", null, " 123 ", "\t\t123\n\n"); ColumnVector trimmed = cv.rstrip(); ColumnVector expected = ColumnVector.fromStrings(" 123", "123", null, " 123", "\t\t123")) { - TableTest.assertColumnsAreEqual(expected, trimmed); + assertColumnsAreEqual(expected, trimmed); } } @@ -2062,7 +2064,7 @@ void testRightTrimStrings() { Scalar one = Scalar.fromString(" 1"); ColumnVector trimmed = cv.rstrip(one); ColumnVector expected = ColumnVector.fromStrings("123", "123", null, "123", "\t\t123\n\n")) { - TableTest.assertColumnsAreEqual(expected, trimmed); + assertColumnsAreEqual(expected, trimmed); } } @@ -2108,7 +2110,7 @@ void testCountElements() { Arrays.asList(1, 2, 3), Arrays.asList(1, 2, 3, 4)); ColumnVector lengths = cv.countElements(); ColumnVector expected = ColumnVector.fromBoxedInts(1, 2, null, 2, 3, 4)) { - TableTest.assertColumnsAreEqual(expected, lengths); + assertColumnsAreEqual(expected, lengths); } } @@ -2117,7 +2119,7 @@ void testStringLengths() { try (ColumnVector cv = ColumnVector.fromStrings("1", "12", null, "123", "1234"); ColumnVector lengths = cv.getCharLengths(); ColumnVector expected = ColumnVector.fromBoxedInts(1, 2, null, 3, 4)) { - TableTest.assertColumnsAreEqual(expected, lengths); + assertColumnsAreEqual(expected, lengths); } } @@ -2126,7 +2128,7 @@ void testGetByteCount() { try (ColumnVector cv = ColumnVector.fromStrings("1", "12", "123", null, "1234"); ColumnVector byteLengthVector = cv.getByteCount(); ColumnVector expected = ColumnVector.fromBoxedInts(1, 2, 3, null, 4)) { - TableTest.assertColumnsAreEqual(expected, byteLengthVector); + assertColumnsAreEqual(expected, byteLengthVector); } } @@ -4919,11 +4921,12 @@ void testIsFloat() { try (ColumnVector floatStringCV = ColumnVector.fromStrings(floatStrings); ColumnVector isFloat = floatStringCV.isFloat(); ColumnVector floats = floatStringCV.asFloats(); - ColumnVector expectedFloats = ColumnVector.fromBoxedFloats(0f, 0f, Float.POSITIVE_INFINITY, - Float.NEGATIVE_INFINITY, 0f, 0f, -0f, 0f, Float.MAX_VALUE, Float.POSITIVE_INFINITY, - -Float.MAX_VALUE, Float.NEGATIVE_INFINITY, 1.2e-24f, 0f, 0f, null, 423f); - ColumnVector expected = ColumnVector.fromBoxedBooleans(false, false, true, true, false, - false, true, true, true, true, true, true, true, false, false, null, true)) { + ColumnVector expectedFloats = ColumnVector.fromBoxedFloats(0f, Float.NaN, Float.POSITIVE_INFINITY, + Float.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY, Float.POSITIVE_INFINITY, -0f, 0f, + Float.MAX_VALUE, Float.POSITIVE_INFINITY, -Float.MAX_VALUE, Float.NEGATIVE_INFINITY, + 1.2e-24f, 0f, 0f, null, 423f); + ColumnVector expected = ColumnVector.fromBoxedBooleans(false, true, true, true, true, + true, true, true, true, true, true, true, true, false, false, null, true)) { assertColumnsAreEqual(expected, isFloat); assertColumnsAreEqual(expectedFloats, floats); } @@ -4944,12 +4947,12 @@ void testIsDouble() { try (ColumnVector doubleStringCV = ColumnVector.fromStrings(doubleStrings); ColumnVector isDouble = doubleStringCV.isFloat(); ColumnVector doubles = doubleStringCV.asDoubles(); - ColumnVector expectedDoubles = ColumnVector.fromBoxedDoubles(0d, 0d, - Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY, 0d, 0d, -0d, 0d, Double.MAX_VALUE, - Double.POSITIVE_INFINITY, -Double.MAX_VALUE, Double.NEGATIVE_INFINITY, 1.2e-234d, 0d, - 0d, null, 423d); - ColumnVector expected = ColumnVector.fromBoxedBooleans(false, false, true, true, false, - false, true, true, true, true, true, true, true, false, false, null, true)) { + ColumnVector expectedDoubles = ColumnVector.fromBoxedDoubles(0d, Double.NaN, + Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, Double.POSITIVE_INFINITY, + -0d, 0d, Double.MAX_VALUE, Double.POSITIVE_INFINITY, -Double.MAX_VALUE, Double.NEGATIVE_INFINITY, + 1.2e-234d, 0d, 0d, null, 423d); + ColumnVector expected = ColumnVector.fromBoxedBooleans(false, true, true, true, true, + true, true, true, true, true, true, true, true, false, false, null, true)) { assertColumnsAreEqual(expected, isDouble); assertColumnsAreEqual(expectedDoubles, doubles); } diff --git a/java/src/test/java/ai/rapids/cudf/IfElseTest.java b/java/src/test/java/ai/rapids/cudf/IfElseTest.java index 86ddcc23416..a078befdf40 100644 --- a/java/src/test/java/ai/rapids/cudf/IfElseTest.java +++ b/java/src/test/java/ai/rapids/cudf/IfElseTest.java @@ -25,7 +25,7 @@ import java.util.stream.Stream; -import static ai.rapids.cudf.TableTest.assertColumnsAreEqual; +import static ai.rapids.cudf.AssertUtils.assertColumnsAreEqual; import static org.junit.jupiter.api.Assertions.assertThrows; public class IfElseTest extends CudfTestBase { diff --git a/java/src/test/java/ai/rapids/cudf/IntColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/IntColumnVectorTest.java index dd03c4de69e..2fb8164534b 100644 --- a/java/src/test/java/ai/rapids/cudf/IntColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/IntColumnVectorTest.java @@ -117,8 +117,8 @@ public void testCastToInt() { ColumnVector expected1 = ColumnVector.fromInts(4, 3, 8); ColumnVector intColumnVector2 = shortColumnVector.asInts(); ColumnVector expected2 = ColumnVector.fromInts(100)) { - TableTest.assertColumnsAreEqual(expected1, intColumnVector1); - TableTest.assertColumnsAreEqual(expected2, intColumnVector2); + AssertUtils.assertColumnsAreEqual(expected1, intColumnVector1); + AssertUtils.assertColumnsAreEqual(expected2, intColumnVector2); } } diff --git a/java/src/test/java/ai/rapids/cudf/ScalarTest.java b/java/src/test/java/ai/rapids/cudf/ScalarTest.java index 0889363c2d0..86c340bb321 100644 --- a/java/src/test/java/ai/rapids/cudf/ScalarTest.java +++ b/java/src/test/java/ai/rapids/cudf/ScalarTest.java @@ -29,7 +29,7 @@ import java.nio.charset.StandardCharsets; import java.util.Arrays; -import static ai.rapids.cudf.TableTest.assertColumnsAreEqual; +import static ai.rapids.cudf.AssertUtils.assertColumnsAreEqual; import static org.junit.jupiter.api.Assertions.*; public class ScalarTest extends CudfTestBase { diff --git a/java/src/test/java/ai/rapids/cudf/TableTest.java b/java/src/test/java/ai/rapids/cudf/TableTest.java index 4512a08430c..fa221e19387 100644 --- a/java/src/test/java/ai/rapids/cudf/TableTest.java +++ b/java/src/test/java/ai/rapids/cudf/TableTest.java @@ -57,6 +57,11 @@ import java.util.stream.Collectors; import static ai.rapids.cudf.ColumnWriterOptions.mapColumn; +import static ai.rapids.cudf.AssertUtils.assertColumnsAreEqual; +import static ai.rapids.cudf.AssertUtils.assertPartialColumnsAreEqual; +import static ai.rapids.cudf.AssertUtils.assertPartialTablesAreEqual; +import static ai.rapids.cudf.AssertUtils.assertTableTypes; +import static ai.rapids.cudf.AssertUtils.assertTablesAreEqual; import static ai.rapids.cudf.ParquetWriterOptions.listBuilder; import static ai.rapids.cudf.ParquetWriterOptions.structBuilder; import static ai.rapids.cudf.Table.TestBuilder; @@ -70,11 +75,11 @@ import static org.junit.jupiter.api.Assertions.assertTrue; public class TableTest extends CudfTestBase { - private static final File TEST_PARQUET_FILE = new File("src/test/resources/acq.parquet"); - private static final File TEST_ORC_FILE = new File("src/test/resources/TestOrcFile.orc"); - private static final File TEST_ORC_TIMESTAMP_DATE_FILE = new File( - "src/test/resources/timestamp-date-test.orc"); - private static final File TEST_DECIMAL_PARQUET_FILE = new File("src/test/resources/decimal.parquet"); + private static final File TEST_PARQUET_FILE = TestUtils.getResourceAsFile("acq.parquet"); + private static final File TEST_ORC_FILE = TestUtils.getResourceAsFile("TestOrcFile.orc"); + private static final File TEST_ORC_TIMESTAMP_DATE_FILE = TestUtils.getResourceAsFile("timestamp-date-test.orc"); + private static final File TEST_DECIMAL_PARQUET_FILE = TestUtils.getResourceAsFile("decimal.parquet"); + private static final File TEST_SIMPLE_CSV_FILE = TestUtils.getResourceAsFile("simple.csv"); private static final Schema CSV_DATA_BUFFER_SCHEMA = Schema.builder() .column(DType.INT32, "A") @@ -94,242 +99,6 @@ public class TableTest extends CudfTestBase { "8|118.2|128\n" + "9|119.8|129").getBytes(StandardCharsets.UTF_8); - /** - * Checks and asserts that passed in columns match - * @param expect The expected result column - * @param cv The input column - */ - public static void assertColumnsAreEqual(ColumnView expect, ColumnView cv) { - assertColumnsAreEqual(expect, cv, "unnamed"); - } - - /** - * Checks and asserts that passed in columns match - * @param expected The expected result column - * @param cv The input column - * @param colName The name of the column - */ - public static void assertColumnsAreEqual(ColumnView expected, ColumnView cv, String colName) { - assertPartialColumnsAreEqual(expected, 0, expected.getRowCount(), cv, colName, true, false); - } - - /** - * Checks and asserts that passed in host columns match - * @param expected The expected result host column - * @param cv The input host column - * @param colName The name of the host column - */ - public static void assertColumnsAreEqual(HostColumnVector expected, HostColumnVector cv, String colName) { - assertPartialColumnsAreEqual(expected, 0, expected.getRowCount(), cv, colName, true, false); - } - - /** - * Checks and asserts that passed in Struct columns match - * @param expected The expected result Struct column - * @param cv The input Struct column - */ - public static void assertStructColumnsAreEqual(ColumnView expected, ColumnView cv) { - assertPartialStructColumnsAreEqual(expected, 0, expected.getRowCount(), cv, "unnamed", true, false); - } - - /** - * Checks and asserts that passed in Struct columns match - * @param expected The expected result Struct column - * @param rowOffset The row number to look from - * @param length The number of rows to consider - * @param cv The input Struct column - * @param colName The name of the column - * @param enableNullCountCheck Whether to check for nulls in the Struct column - * @param enableNullabilityCheck Whether the table have a validity mask - */ - public static void assertPartialStructColumnsAreEqual(ColumnView expected, long rowOffset, long length, - ColumnView cv, String colName, boolean enableNullCountCheck, boolean enableNullabilityCheck) { - try (HostColumnVector hostExpected = expected.copyToHost(); - HostColumnVector hostcv = cv.copyToHost()) { - assertPartialColumnsAreEqual(hostExpected, rowOffset, length, hostcv, colName, enableNullCountCheck, enableNullabilityCheck); - } - } - - /** - * Checks and asserts that passed in columns match - * @param expected The expected result column - * @param cv The input column - * @param colName The name of the column - * @param enableNullCheck Whether to check for nulls in the column - * @param enableNullabilityCheck Whether the table have a validity mask - */ - public static void assertPartialColumnsAreEqual(ColumnView expected, long rowOffset, long length, - ColumnView cv, String colName, boolean enableNullCheck, boolean enableNullabilityCheck) { - try (HostColumnVector hostExpected = expected.copyToHost(); - HostColumnVector hostcv = cv.copyToHost()) { - assertPartialColumnsAreEqual(hostExpected, rowOffset, length, hostcv, colName, enableNullCheck, enableNullabilityCheck); - } - } - - /** - * Checks and asserts that passed in host columns match - * @param expected The expected result host column - * @param rowOffset start row index - * @param length number of rows from starting offset - * @param cv The input host column - * @param colName The name of the host column - * @param enableNullCountCheck Whether to check for nulls in the host column - */ - public static void assertPartialColumnsAreEqual(HostColumnVectorCore expected, long rowOffset, long length, - HostColumnVectorCore cv, String colName, boolean enableNullCountCheck, boolean enableNullabilityCheck) { - assertEquals(expected.getType(), cv.getType(), "Type For Column " + colName); - assertEquals(length, cv.getRowCount(), "Row Count For Column " + colName); - assertEquals(expected.getNumChildren(), cv.getNumChildren(), "Child Count for Column " + colName); - if (enableNullCountCheck) { - assertEquals(expected.getNullCount(), cv.getNullCount(), "Null Count For Column " + colName); - } else { - // TODO add in a proper check when null counts are supported by serializing a partitioned column - } - if (enableNullabilityCheck) { - assertEquals(expected.hasValidityVector(), cv.hasValidityVector(), "Column nullability is different than expected"); - } - DType type = expected.getType(); - for (long expectedRow = rowOffset; expectedRow < (rowOffset + length); expectedRow++) { - long tableRow = expectedRow - rowOffset; - assertEquals(expected.isNull(expectedRow), cv.isNull(tableRow), - "NULL for Column " + colName + " Row " + tableRow); - if (!expected.isNull(expectedRow)) { - switch (type.typeId) { - case BOOL8: // fall through - case INT8: // fall through - case UINT8: - assertEquals(expected.getByte(expectedRow), cv.getByte(tableRow), - "Column " + colName + " Row " + tableRow); - break; - case INT16: // fall through - case UINT16: - assertEquals(expected.getShort(expectedRow), cv.getShort(tableRow), - "Column " + colName + " Row " + tableRow); - break; - case INT32: // fall through - case UINT32: // fall through - case TIMESTAMP_DAYS: - case DURATION_DAYS: - case DECIMAL32: - assertEquals(expected.getInt(expectedRow), cv.getInt(tableRow), - "Column " + colName + " Row " + tableRow); - break; - case INT64: // fall through - case UINT64: // fall through - case DURATION_MICROSECONDS: // fall through - case DURATION_MILLISECONDS: // fall through - case DURATION_NANOSECONDS: // fall through - case DURATION_SECONDS: // fall through - case TIMESTAMP_MICROSECONDS: // fall through - case TIMESTAMP_MILLISECONDS: // fall through - case TIMESTAMP_NANOSECONDS: // fall through - case TIMESTAMP_SECONDS: - case DECIMAL64: - assertEquals(expected.getLong(expectedRow), cv.getLong(tableRow), - "Column " + colName + " Row " + tableRow); - break; - case DECIMAL128: - assertEquals(expected.getBigDecimal(expectedRow), cv.getBigDecimal(tableRow), - "Column " + colName + " Row " + tableRow); - break; - case FLOAT32: - assertEqualsWithinPercentage(expected.getFloat(expectedRow), cv.getFloat(tableRow), 0.0001, - "Column " + colName + " Row " + tableRow); - break; - case FLOAT64: - assertEqualsWithinPercentage(expected.getDouble(expectedRow), cv.getDouble(tableRow), 0.0001, - "Column " + colName + " Row " + tableRow); - break; - case STRING: - assertArrayEquals(expected.getUTF8(expectedRow), cv.getUTF8(tableRow), - "Column " + colName + " Row " + tableRow); - break; - case LIST: - HostMemoryBuffer expectedOffsets = expected.getOffsets(); - HostMemoryBuffer cvOffsets = cv.getOffsets(); - int expectedChildRows = expectedOffsets.getInt((expectedRow + 1) * 4) - - expectedOffsets.getInt(expectedRow * 4); - int cvChildRows = cvOffsets.getInt((tableRow + 1) * 4) - - cvOffsets.getInt(tableRow * 4); - assertEquals(expectedChildRows, cvChildRows, "Child row count for Column " + - colName + " Row " + tableRow); - break; - case STRUCT: - // parent column only has validity which was checked above - break; - default: - throw new IllegalArgumentException(type + " is not supported yet"); - } - } - } - - if (type.isNestedType()) { - switch (type.typeId) { - case LIST: - int expectedChildRowOffset = 0; - int numChildRows = 0; - if (length > 0) { - HostMemoryBuffer expectedOffsets = expected.getOffsets(); - HostMemoryBuffer cvOffsets = cv.getOffsets(); - expectedChildRowOffset = expectedOffsets.getInt(rowOffset * 4); - numChildRows = expectedOffsets.getInt((rowOffset + length) * 4) - - expectedChildRowOffset; - } - assertPartialColumnsAreEqual(expected.getNestedChildren().get(0), expectedChildRowOffset, - numChildRows, cv.getNestedChildren().get(0), colName + " list child", - enableNullCountCheck, enableNullabilityCheck); - break; - case STRUCT: - List expectedChildren = expected.getNestedChildren(); - List cvChildren = cv.getNestedChildren(); - for (int i = 0; i < expectedChildren.size(); i++) { - HostColumnVectorCore expectedChild = expectedChildren.get(i); - HostColumnVectorCore cvChild = cvChildren.get(i); - String childName = colName + " child " + i; - assertEquals(length, cvChild.getRowCount(), "Row Count for Column " + colName); - assertPartialColumnsAreEqual(expectedChild, rowOffset, length, cvChild, - colName, enableNullCountCheck, enableNullabilityCheck); - } - break; - default: - throw new IllegalArgumentException(type + " is not supported yet"); - } - } - } - - /** - * Checks and asserts that the two tables from a given rowindex match based on a provided schema. - * @param expected the expected result table - * @param rowOffset the row number to start checking from - * @param length the number of rows to check - * @param table the input table to compare against expected - * @param enableNullCheck whether to check for nulls or not - * @param enableNullabilityCheck whether the table have a validity mask - */ - public static void assertPartialTablesAreEqual(Table expected, long rowOffset, long length, Table table, - boolean enableNullCheck, boolean enableNullabilityCheck) { - assertEquals(expected.getNumberOfColumns(), table.getNumberOfColumns()); - assertEquals(length, table.getRowCount(), "ROW COUNT"); - for (int col = 0; col < expected.getNumberOfColumns(); col++) { - ColumnVector expect = expected.getColumn(col); - ColumnVector cv = table.getColumn(col); - String name = String.valueOf(col); - if (rowOffset != 0 || length != expected.getRowCount()) { - name = name + " PART " + rowOffset + "-" + (rowOffset + length - 1); - } - assertPartialColumnsAreEqual(expect, rowOffset, length, cv, name, enableNullCheck, enableNullabilityCheck); - } - } - - /** - * Checks and asserts that the two tables match - * @param expected the expected result table - * @param table the input table to compare against expected - */ - public static void assertTablesAreEqual(Table expected, Table table) { - assertPartialTablesAreEqual(expected, 0, expected.getRowCount(), table, true, false); - } - void assertTablesHaveSameValues(HashMap[] expectedTable, Table table) { assertEquals(expectedTable.length, table.getNumberOfColumns()); int numCols = table.getNumberOfColumns(); @@ -358,16 +127,6 @@ void assertTablesHaveSameValues(HashMap[] expectedTable, Table } } - public static void assertTableTypes(DType[] expectedTypes, Table t) { - int len = t.getNumberOfColumns(); - assertEquals(expectedTypes.length, len); - for (int i = 0; i < len; i++) { - ColumnVector vec = t.getColumn(i); - DType type = vec.getType(); - assertEquals(expectedTypes[i], type, "Types don't match at " + i); - } - } - @Test void testMergeSimple() { try (Table table1 = new Table.TestBuilder() @@ -548,7 +307,7 @@ void testReadCSVPrune() { .column(0, 1, 2, 3, 4, 5, 6, 7, 8, 9) .column(110.0, 111.0, 112.0, 113.0, 114.0, 115.0, 116.0, 117.0, 118.2, 119.8) .build(); - Table table = Table.readCSV(schema, opts, new File("./src/test/resources/simple.csv"))) { + Table table = Table.readCSV(schema, opts, TEST_SIMPLE_CSV_FILE)) { assertTablesAreEqual(expected, table); } } @@ -675,7 +434,7 @@ void testReadCSV() { .column(120L, 121L, 122L, 123L, 124L, 125L, 126L, 127L, 128L, 129L) .column("one", "two", "three", "four", "five", "six", "seven\ud801\uddb8", "eight\uBF68", "nine\u03E8", "ten") .build(); - Table table = Table.readCSV(schema, new File("./src/test/resources/simple.csv"))) { + Table table = Table.readCSV(schema, TEST_SIMPLE_CSV_FILE)) { assertTablesAreEqual(expected, table); } } diff --git a/java/src/test/java/ai/rapids/cudf/TestUtils.java b/java/src/test/java/ai/rapids/cudf/TestUtils.java index 5a799c666c2..a1acab5883b 100644 --- a/java/src/test/java/ai/rapids/cudf/TestUtils.java +++ b/java/src/test/java/ai/rapids/cudf/TestUtils.java @@ -1,6 +1,6 @@ /* * - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,6 +18,9 @@ package ai.rapids.cudf; +import java.io.File; +import java.net.URISyntaxException; +import java.net.URL; import java.util.ArrayList; import java.util.List; import java.util.Random; @@ -211,4 +214,16 @@ static Double[] getDoubles(final long seed, final int size, int specialValues) { }); return result; } + + public static File getResourceAsFile(String resourceName) { + URL url = TestUtils.class.getClassLoader().getResource(resourceName); + if (url == null) { + throw new IllegalArgumentException("Unable to locate resource: " + resourceName); + } + try { + return new File(url.toURI()); + } catch (URISyntaxException e) { + throw new RuntimeException(e); + } + } } diff --git a/java/src/test/java/ai/rapids/cudf/TimestampColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/TimestampColumnVectorTest.java index 8bf1370a0f7..9a929cec98d 100644 --- a/java/src/test/java/ai/rapids/cudf/TimestampColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/TimestampColumnVectorTest.java @@ -22,7 +22,7 @@ import java.util.function.Function; -import static ai.rapids.cudf.TableTest.assertColumnsAreEqual; +import static ai.rapids.cudf.AssertUtils.assertColumnsAreEqual; import static org.junit.jupiter.api.Assertions.assertEquals; public class TimestampColumnVectorTest extends CudfTestBase { diff --git a/java/src/test/java/ai/rapids/cudf/UnaryOpTest.java b/java/src/test/java/ai/rapids/cudf/UnaryOpTest.java index 76970e8bf76..7fcb7cbd85b 100644 --- a/java/src/test/java/ai/rapids/cudf/UnaryOpTest.java +++ b/java/src/test/java/ai/rapids/cudf/UnaryOpTest.java @@ -22,7 +22,7 @@ import ai.rapids.cudf.HostColumnVector.Builder; import org.junit.jupiter.api.Test; -import static ai.rapids.cudf.TableTest.assertColumnsAreEqual; +import static ai.rapids.cudf.AssertUtils.assertColumnsAreEqual; public class UnaryOpTest extends CudfTestBase { private static final Double[] DOUBLES_1 = new Double[]{1.0, 10.0, -100.1, 5.3, 50.0, 100.0, null, Double.NaN, Double.POSITIVE_INFINITY, 1/9.0, Double.NEGATIVE_INFINITY, 500.0, -500.0}; diff --git a/java/src/test/java/ai/rapids/cudf/ast/CompiledExpressionTest.java b/java/src/test/java/ai/rapids/cudf/ast/CompiledExpressionTest.java index 2fb6792b409..e50da0a4d4d 100644 --- a/java/src/test/java/ai/rapids/cudf/ast/CompiledExpressionTest.java +++ b/java/src/test/java/ai/rapids/cudf/ast/CompiledExpressionTest.java @@ -36,7 +36,7 @@ import java.util.function.Function; import java.util.stream.Stream; -import static ai.rapids.cudf.TableTest.assertColumnsAreEqual; +import static ai.rapids.cudf.AssertUtils.assertColumnsAreEqual; public class CompiledExpressionTest extends CudfTestBase { @Test diff --git a/python/cudf/cudf/__init__.py b/python/cudf/cudf/__init__.py index f696a00d1ed..2461e7b09bc 100644 --- a/python/cudf/cudf/__init__.py +++ b/python/cudf/cudf/__init__.py @@ -102,6 +102,17 @@ from cudf.utils.dtypes import _NA_REP from cudf.utils.utils import set_allocator +try: + from ptxcompiler.patch import patch_numba_codegen_if_needed +except ImportError: + pass +else: + # Patch Numba to support CUDA enhanced compatibility. + # See https://github.com/rapidsai/ptxcompiler for + # details. + patch_numba_codegen_if_needed() + del patch_numba_codegen_if_needed + cuda.set_memory_manager(rmm.RMMNumbaManager) cupy.cuda.set_allocator(rmm.rmm_cupy_allocator) diff --git a/python/cudf/cudf/_lib/aggregation.pyx b/python/cudf/cudf/_lib/aggregation.pyx index 4f703724cef..68f7101b6ee 100644 --- a/python/cudf/cudf/_lib/aggregation.pyx +++ b/python/cudf/cudf/_lib/aggregation.pyx @@ -1,6 +1,6 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. -from enum import Enum +from enum import Enum, IntEnum import numba import numpy as np @@ -30,6 +30,7 @@ from cudf._lib.types import Interpolation cimport cudf._lib.cpp.aggregation as libcudf_aggregation cimport cudf._lib.cpp.types as libcudf_types +from cudf._lib.cpp.aggregation cimport underlying_type_t_correlation_type import cudf @@ -57,6 +58,22 @@ class AggregationKind(Enum): UNIQUE = libcudf_aggregation.aggregation.Kind.COLLECT_SET PTX = libcudf_aggregation.aggregation.Kind.PTX CUDA = libcudf_aggregation.aggregation.Kind.CUDA + CORRELATION = libcudf_aggregation.aggregation.Kind.CORRELATION + + +class CorrelationType(IntEnum): + PEARSON = ( + + libcudf_aggregation.correlation_type.PEARSON + ) + KENDALL = ( + + libcudf_aggregation.correlation_type.KENDALL + ) + SPEARMAN = ( + + libcudf_aggregation.correlation_type.SPEARMAN + ) cdef class Aggregation: @@ -321,6 +338,22 @@ cdef class Aggregation: )) return agg + @classmethod + def corr(cls, method, libcudf_types.size_type min_periods): + cdef Aggregation agg = cls() + cdef libcudf_aggregation.correlation_type c_method = ( + ( + ( + CorrelationType[method.upper()] + ) + ) + ) + agg.c_obj = move( + libcudf_aggregation.make_correlation_aggregation[aggregation]( + c_method, min_periods + )) + return agg + cdef class RollingAggregation: """A Cython wrapper for rolling window aggregations. @@ -692,6 +725,24 @@ cdef class GroupbyAggregation: ) return agg + @classmethod + def corr(cls, method, libcudf_types.size_type min_periods): + cdef GroupbyAggregation agg = cls() + cdef libcudf_aggregation.correlation_type c_method = ( + ( + ( + CorrelationType[method.upper()] + ) + ) + ) + agg.c_obj = move( + libcudf_aggregation. + make_correlation_aggregation[groupby_aggregation]( + c_method, min_periods + )) + return agg + + cdef class GroupbyScanAggregation: """A Cython wrapper for groupby scan aggregations. diff --git a/python/cudf/cudf/_lib/cpp/aggregation.pxd b/python/cudf/cudf/_lib/cpp/aggregation.pxd index 13bfa49057c..3982b4fecbb 100644 --- a/python/cudf/cudf/_lib/cpp/aggregation.pxd +++ b/python/cudf/cudf/_lib/cpp/aggregation.pxd @@ -1,5 +1,5 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. - +# Copyright (c) 2020-2021, NVIDIA CORPORATION. +from libc.stdint cimport int32_t from libcpp.memory cimport unique_ptr from libcpp.string cimport string from libcpp.vector cimport vector @@ -11,6 +11,7 @@ from cudf._lib.cpp.types cimport ( size_type, ) +ctypedef int32_t underlying_type_t_correlation_type cdef extern from "cudf/aggregation.hpp" namespace "cudf" nogil: @@ -38,6 +39,8 @@ cdef extern from "cudf/aggregation.hpp" namespace "cudf" nogil: COLLECT_SET 'cudf::aggregation::COLLECT_SET' PTX 'cudf::aggregation::PTX' CUDA 'cudf::aggregation::CUDA' + CORRELATION 'cudf::aggregation::CORRELATION' + Kind kind cdef cppclass rolling_aggregation: @@ -53,6 +56,11 @@ cdef extern from "cudf/aggregation.hpp" namespace "cudf" nogil: CUDA 'cudf::udf_type::CUDA' PTX 'cudf::udf_type::PTX' + ctypedef enum correlation_type: + PEARSON 'cudf::correlation_type::PEARSON' + KENDALL 'cudf::correlation_type::KENDALL' + SPEARMAN 'cudf::correlation_type::SPEARMAN' + cdef unique_ptr[T] make_sum_aggregation[T]() except + cdef unique_ptr[T] make_product_aggregation[T]() except + @@ -106,3 +114,6 @@ cdef extern from "cudf/aggregation.hpp" namespace "cudf" nogil: udf_type type, string user_defined_aggregator, data_type output_type) except + + + cdef unique_ptr[T] make_correlation_aggregation[T]( + correlation_type type, size_type min_periods) except + diff --git a/python/cudf/cudf/_lib/cpp/transform.pxd b/python/cudf/cudf/_lib/cpp/transform.pxd index 3153427ce3c..590a371ff52 100644 --- a/python/cudf/cudf/_lib/cpp/transform.pxd +++ b/python/cudf/cudf/_lib/cpp/transform.pxd @@ -34,12 +34,6 @@ cdef extern from "cudf/transform.hpp" namespace "cudf" nogil: bool is_ptx ) except + - cdef unique_ptr[column] generalized_masked_op( - const table_view& data_view, - string udf, - data_type output_type, - ) except + - cdef pair[unique_ptr[table], unique_ptr[column]] encode( table_view input ) except + diff --git a/python/cudf/cudf/_lib/groupby.pyx b/python/cudf/cudf/_lib/groupby.pyx index 0968d22d465..314542c9549 100644 --- a/python/cudf/cudf/_lib/groupby.pyx +++ b/python/cudf/cudf/_lib/groupby.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. from collections import defaultdict @@ -54,7 +54,7 @@ _CATEGORICAL_AGGS = {"COUNT", "SIZE", "NUNIQUE", "UNIQUE"} _STRING_AGGS = {"COUNT", "SIZE", "MAX", "MIN", "NUNIQUE", "NTH", "COLLECT", "UNIQUE"} _LIST_AGGS = {"COLLECT"} -_STRUCT_AGGS = set() +_STRUCT_AGGS = {"CORRELATION"} _INTERVAL_AGGS = set() _DECIMAL_AGGS = {"COUNT", "SUM", "ARGMIN", "ARGMAX", "MIN", "MAX", "NUNIQUE", "NTH", "COLLECT"} diff --git a/python/cudf/cudf/_lib/transform.pyx b/python/cudf/cudf/_lib/transform.pyx index a0eb7c68183..96d25cb92c9 100644 --- a/python/cudf/cudf/_lib/transform.pyx +++ b/python/cudf/cudf/_lib/transform.pyx @@ -123,30 +123,6 @@ def transform(Column input, op): return Column.from_unique_ptr(move(c_output)) -def masked_udf(incols, op, output_type): - cdef table_view data_view = table_view_from_table( - incols, ignore_index=True) - cdef string c_str = op.encode("UTF-8") - cdef type_id c_tid - cdef data_type c_dtype - - c_tid = ( - SUPPORTED_NUMPY_TO_LIBCUDF_TYPES[ - output_type - ] - ) - c_dtype = data_type(c_tid) - - with nogil: - c_output = move(libcudf_transform.generalized_masked_op( - data_view, - c_str, - c_dtype, - )) - - return Column.from_unique_ptr(move(c_output)) - - def table_encode(input): cdef table_view c_input = table_view_from_table( input, ignore_index=True) diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index a167383c65c..2a91abc5701 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -97,69 +97,6 @@ def str_to_boolean(column: StringColumn): cudf.dtype("timedelta64[ns]"): str_cast.int2timedelta, } -_NAN_INF_VARIATIONS = [ - "nan", - "NAN", - "Nan", - "naN", - "nAN", - "NAn", - "nAn", - "-inf", - "-INF", - "-InF", - "-inF", - "-iNF", - "-INf", - "-iNf", - "+inf", - "+INF", - "+InF", - "+inF", - "+iNF", - "+INf", - "+Inf", - "+iNf", - "inf", - "INF", - "InF", - "inF", - "iNF", - "INf", - "iNf", -] -_LIBCUDF_SUPPORTED_NAN_INF_VARIATIONS = [ - "NaN", - "NaN", - "NaN", - "NaN", - "NaN", - "NaN", - "NaN", - "-Inf", - "-Inf", - "-Inf", - "-Inf", - "-Inf", - "-Inf", - "-Inf", - "Inf", - "Inf", - "Inf", - "Inf", - "Inf", - "Inf", - "Inf", - "Inf", - "Inf", - "Inf", - "Inf", - "Inf", - "Inf", - "Inf", - "Inf", -] - def _is_supported_regex_flags(flags): return flags == 0 or ( @@ -5309,16 +5246,6 @@ def as_numerical_column( "type due to presence of non-integer values." ) elif out_dtype.kind == "f": - # TODO: Replace this `replace` call with a - # case-insensitive method once following - # issue is fixed: https://github.com/rapidsai/cudf/issues/5217 - old_values = cudf.core.column.as_column(_NAN_INF_VARIATIONS) - new_values = cudf.core.column.as_column( - _LIBCUDF_SUPPORTED_NAN_INF_VARIATIONS - ) - string_col = libcudf.replace.replace( - string_col, old_values, new_values - ) if not libstrings.is_float(string_col).all(): raise ValueError( "Could not convert strings to float " diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py index 7f9f61ed3fd..f1d622362e2 100644 --- a/python/cudf/cudf/core/groupby/groupby.py +++ b/python/cudf/cudf/core/groupby/groupby.py @@ -1,6 +1,7 @@ # Copyright (c) 2020-2021, NVIDIA CORPORATION. import collections +import itertools import pickle import warnings @@ -13,7 +14,8 @@ from cudf._typing import DataFrameOrSeries from cudf.api.types import is_list_like from cudf.core.abc import Serializable -from cudf.core.column.column import arange +from cudf.core.column.column import arange, as_column +from cudf.core.multiindex import MultiIndex from cudf.utils.utils import GetAttrGetItemMixin, cached_property @@ -69,6 +71,8 @@ def __init__( """ self.obj = obj self._as_index = as_index + self._by = by + self._level = level self._sort = sort self._dropna = dropna @@ -777,6 +781,121 @@ def median(self): """Get the column-wise median of the values in each group.""" return self.agg("median") + def corr(self, method="pearson", min_periods=1): + """ + Compute pairwise correlation of columns, excluding NA/null values. + + Parameters + ---------- + method: {"pearson", "kendall", "spearman"} or callable, + default "pearson". Currently only the pearson correlation + coefficient is supported. + + min_periods: int, optional + Minimum number of observations required per pair of columns + to have a valid result. + + Returns + ---------- + DataFrame + Correlation matrix. + + Examples + -------- + >>> import cudf + >>> gdf = cudf.DataFrame({ + ... "id": ["a", "a", "a", "b", "b", "b", "c", "c", "c"], + ... "val1": [5, 4, 6, 4, 8, 7, 4, 5, 2], + ... "val2": [4, 5, 6, 1, 2, 9, 8, 5, 1], + ... "val3": [4, 5, 6, 1, 2, 9, 8, 5, 1]}) + >>> gdf + id val1 val2 val3 + 0 a 5 4 4 + 1 a 4 5 5 + 2 a 6 6 6 + 3 b 4 1 1 + 4 b 8 2 2 + 5 b 7 9 9 + 6 c 4 8 8 + 7 c 5 5 5 + 8 c 2 1 1 + >>> gdf.groupby("id").corr(method="pearson") + val1 val2 val3 + id + a val1 1.000000 0.500000 0.500000 + val2 0.500000 1.000000 1.000000 + val3 0.500000 1.000000 1.000000 + b val1 1.000000 0.385727 0.385727 + val2 0.385727 1.000000 1.000000 + val3 0.385727 1.000000 1.000000 + c val1 1.000000 0.714575 0.714575 + val2 0.714575 1.000000 1.000000 + val3 0.714575 1.000000 1.000000 + """ + + if not method.lower() in ("pearson",): + raise NotImplementedError( + "Only pearson correlation is currently supported" + ) + + # create expanded dataframe consisting all combinations of the + # struct columns-pairs to be correlated + # i.e (('col1', 'col1'), ('col1', 'col2'), ('col2', 'col2')) + _cols = self.grouping.values.columns.tolist() + len_cols = len(_cols) + + new_df_data = {} + for x, y in itertools.combinations_with_replacement(_cols, 2): + new_df_data[(x, y)] = cudf.DataFrame._from_data( + {"x": self.obj._data[x], "y": self.obj._data[y]} + ).to_struct() + new_gb = cudf.DataFrame._from_data(new_df_data).groupby( + by=self.grouping.keys + ) + + try: + gb_corr = new_gb.agg(lambda x: x.corr(method, min_periods)) + except RuntimeError as e: + if "Unsupported groupby reduction type-agg combination" in str(e): + raise TypeError( + "Correlation accepts only numerical column-pairs" + ) + raise + + # ensure that column-pair labels are arranged in ascending order + cols_list = [ + (y, x) if i > j else (x, y) + for j, y in enumerate(_cols) + for i, x in enumerate(_cols) + ] + cols_split = [ + cols_list[i : i + len_cols] + for i in range(0, len(cols_list), len_cols) + ] + + # interleave: combine the correlation results for each column-pair + # into a single column + res = cudf.DataFrame._from_data( + { + x: gb_corr.loc[:, i].interleave_columns() + for i, x in zip(cols_split, _cols) + } + ) + + # create a multiindex for the groupby correlated dataframe, + # to match pandas behavior + unsorted_idx = gb_corr.index.repeat(len_cols) + idx_sort_order = unsorted_idx._get_sorted_inds() + sorted_idx = unsorted_idx._gather(idx_sort_order) + if len(gb_corr): + # TO-DO: Should the operation below be done on the CPU instead? + sorted_idx._data[None] = as_column( + cudf.Series(_cols).tile(len(gb_corr.index)) + ) + res.index = MultiIndex._from_data(sorted_idx._data) + + return res + def var(self, ddof=1): """Compute the column-wise variance of the values in each group. diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index d07caef11d5..d555b5c4033 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -8924,3 +8924,118 @@ def test_frame_series_where_other(data): expected = gdf.where(gdf["b"] == 1, 0) actual = pdf.where(pdf["b"] == 1, 0) assert_eq(expected, actual) + + +@pytest.mark.parametrize( + "data, gkey", + [ + ( + { + "id": ["a", "a", "a", "b", "b", "b", "c", "c", "c"], + "val1": [5, 4, 6, 4, 8, 7, 4, 5, 2], + "val2": [4, 5, 6, 1, 2, 9, 8, 5, 1], + "val3": [4, 5, 6, 1, 2, 9, 8, 5, 1], + }, + ["id", "val1", "val2"], + ), + ( + { + "id": [0] * 4 + [1] * 3, + "a": [10, 3, 4, 2, -3, 9, 10], + "b": [10, 23, -4, 2, -3, 9, 19], + }, + ["id", "a"], + ), + ( + { + "id": ["a", "a", "b", "b", "c", "c"], + "val": [None, None, None, None, None, None], + }, + ["id"], + ), + ( + { + "id": ["a", "a", "b", "b", "c", "c"], + "val1": [None, 4, 6, 8, None, 2], + "val2": [4, 5, None, 2, 9, None], + }, + ["id"], + ), + ({"id": [1.0], "val1": [2.0], "val2": [3.0]}, ["id"]), + ], +) +@pytest.mark.parametrize( + "min_per", [0, 1, 2, 3, 4], +) +def test_pearson_corr_passing(data, gkey, min_per): + gdf = cudf.DataFrame(data) + pdf = gdf.to_pandas() + + actual = gdf.groupby(gkey).corr(method="pearson", min_periods=min_per) + expected = pdf.groupby(gkey).corr(method="pearson", min_periods=min_per) + + assert_eq(expected, actual) + + +@pytest.mark.parametrize("method", ["kendall", "spearman"]) +def test_pearson_corr_unsupported_methods(method): + gdf = cudf.DataFrame( + { + "id": ["a", "a", "a", "b", "b", "b", "c", "c", "c"], + "val1": [5, 4, 6, 4, 8, 7, 4, 5, 2], + "val2": [4, 5, 6, 1, 2, 9, 8, 5, 1], + "val3": [4, 5, 6, 1, 2, 9, 8, 5, 1], + } + ) + + with pytest.raises( + NotImplementedError, + match="Only pearson correlation is currently supported", + ): + gdf.groupby("id").corr(method) + + +def test_pearson_corr_empty_columns(): + gdf = cudf.DataFrame(columns=["id", "val1", "val2"]) + pdf = gdf.to_pandas() + + actual = gdf.groupby("id").corr("pearson") + expected = pdf.groupby("id").corr("pearson") + + assert_eq( + expected, actual, check_dtype=False, check_index_type=False, + ) + + +@pytest.mark.parametrize( + "data", + [ + { + "id": ["a", "a", "a", "b", "b", "b", "c", "c", "c"], + "val1": ["v", "n", "k", "l", "m", "i", "y", "r", "w"], + "val2": ["d", "d", "d", "e", "e", "e", "f", "f", "f"], + }, + { + "id": ["a", "a", "a", "b", "b", "b", "c", "c", "c"], + "val1": [1, 1, 1, 2, 2, 2, 3, 3, 3], + "val2": ["d", "d", "d", "e", "e", "e", "f", "f", "f"], + }, + ], +) +@pytest.mark.parametrize("gkey", ["id", "val1", "val2"]) +def test_pearson_corr_invalid_column_types(data, gkey): + with pytest.raises( + TypeError, match="Correlation accepts only numerical column-pairs", + ): + cudf.DataFrame(data).groupby(gkey).corr("pearson") + + +def test_pearson_corr_multiindex_dataframe(): + gdf = cudf.DataFrame( + {"a": [1, 1, 2, 2], "b": [1, 1, 2, 3], "c": [2, 3, 4, 5]} + ).set_index(["a", "b"]) + + actual = gdf.groupby(level="a").corr("pearson") + expected = gdf.to_pandas().groupby(level="a").corr("pearson") + + assert_eq(expected, actual) diff --git a/python/cudf/cudf/tests/test_extension_compilation.py b/python/cudf/cudf/tests/test_extension_compilation.py index 39fa7b11ce2..47c9448cf63 100644 --- a/python/cudf/cudf/tests/test_extension_compilation.py +++ b/python/cudf/cudf/tests/test_extension_compilation.py @@ -1,5 +1,6 @@ import operator +import cupy as cp import pytest from numba import cuda, types from numba.cuda import compile_ptx @@ -71,8 +72,8 @@ def test_execute_masked_binary(op, ty): def func(x, y): return op(x, y) - @cuda.jit(debug=True) - def test_kernel(x, y): + @cuda.jit + def test_kernel(x, y, err): # Reference result with unmasked value u = func(x, y) @@ -87,14 +88,22 @@ def test_kernel(x, y): # Check masks are as expected, and unmasked result matches masked # result if r0.valid: - raise RuntimeError("Expected r0 to be invalid") + # TODO: ideally, we would raise an exception here rather + # than return an "error code", and that is what the + # previous version of this (and below) tests did. But, + # Numba kernels cannot currently use `debug=True` with + # CUDA enhanced compatibility. Once a solution to that is + # reached, we should switch back to raising exceptions + # here. + err[0] = 1 if not r1.valid: - raise RuntimeError("Expected r1 to be valid") + err[0] = 2 if u != r1.value: - print("Values: ", u, r1.value) - raise RuntimeError("u != r1.value") + err[0] = 3 - test_kernel[1, 1](1, 2) + err = cp.asarray([0], dtype="int8") + test_kernel[1, 1](1, 2, err) + assert err[0] == 0 @pytest.mark.parametrize("op", ops) @@ -187,18 +196,20 @@ def test_is_na(fn): device_fn = cuda.jit(device=True)(fn) - @cuda.jit(debug=True) - def test_kernel(): + @cuda.jit + def test_kernel(err): valid_is_na = device_fn(valid) invalid_is_na = device_fn(invalid) if valid_is_na: - raise RuntimeError("Valid masked value is NA and should not be") + err[0] = 1 if not invalid_is_na: - raise RuntimeError("Invalid masked value is not NA and should be") + err[0] = 2 - test_kernel[1, 1]() + err = cp.asarray([0], dtype="int8") + test_kernel[1, 1](err) + assert err[0] == 0 def func_lt_na(x): @@ -271,8 +282,8 @@ def test_na_masked_comparisons(fn, ty): device_fn = cuda.jit(device=True)(fn) - @cuda.jit(debug=True) - def test_kernel(): + @cuda.jit + def test_kernel(err): unmasked = ty(1) valid_masked = Masked(unmasked, True) invalid_masked = Masked(unmasked, False) @@ -281,12 +292,14 @@ def test_kernel(): invalid_cmp_na = device_fn(invalid_masked) if valid_cmp_na: - raise RuntimeError("Valid masked value compared True with NA") + err[0] = 1 if invalid_cmp_na: - raise RuntimeError("Invalid masked value compared True with NA") + err[0] = 2 - test_kernel[1, 1]() + err = cp.asarray([0], dtype="int8") + test_kernel[1, 1](err) + assert err[0] == 0 # xfail because scalars do not yet cast for a comparison to NA @@ -297,13 +310,15 @@ def test_na_scalar_comparisons(fn, ty): device_fn = cuda.jit(device=True)(fn) - @cuda.jit(debug=True) - def test_kernel(): + @cuda.jit + def test_kernel(err): unmasked = ty(1) unmasked_cmp_na = device_fn(unmasked) if unmasked_cmp_na: - raise RuntimeError("Unmasked value compared True with NA") + err[0] = 1 - test_kernel[1, 1]() + err = cp.asarray([0], dtype="int8") + test_kernel[1, 1](err) + assert err[0] == 0 diff --git a/python/cudf/cudf/tests/test_udf_binops.py b/python/cudf/cudf/tests/test_udf_binops.py index 4d6188acf8c..935c3868a68 100644 --- a/python/cudf/cudf/tests/test_udf_binops.py +++ b/python/cudf/cudf/tests/test_udf_binops.py @@ -6,11 +6,20 @@ from numba.cuda import compile_ptx from numba.np import numpy_support +import rmm + import cudf from cudf import Series, _lib as libcudf from cudf.utils import dtypes as dtypeutils +_driver_version = rmm._cuda.gpu.driverGetVersion() +_runtime_version = rmm._cuda.gpu.runtimeGetVersion() +_CUDA_JIT128INT_SUPPORTED = (_driver_version >= 11050) and ( + _runtime_version >= 11050 +) + +@pytest.mark.skipif(not _CUDA_JIT128INT_SUPPORTED, reason="requires CUDA 11.5") @pytest.mark.parametrize( "dtype", sorted(list(dtypeutils.NUMERIC_TYPES - {"int8"})) ) diff --git a/python/dask_cudf/dask_cudf/io/csv.py b/python/dask_cudf/dask_cudf/io/csv.py index 132201a349e..ebb02e3b6d4 100644 --- a/python/dask_cudf/dask_cudf/io/csv.py +++ b/python/dask_cudf/dask_cudf/io/csv.py @@ -110,9 +110,17 @@ def _internal_read_csv(path, chunksize="256 MiB", **kwargs): if chunksize is None: return read_csv_without_chunksize(path, **kwargs) + # Let dask.dataframe generate meta dask_reader = make_reader(cudf.read_csv, "read_csv", "CSV") - usecols = kwargs.pop("usecols", None) - meta = dask_reader(filenames[0], **kwargs)._meta + kwargs1 = kwargs.copy() + usecols = kwargs1.pop("usecols", None) + dtype = kwargs1.pop("dtype", None) + meta = dask_reader(filenames[0], **kwargs1)._meta + names = meta.columns + if usecols or dtype: + # Regenerate meta with original kwargs if + # `usecols` or `dtype` was specified + meta = dask_reader(filenames[0], **kwargs)._meta dsk = {} i = 0 @@ -127,18 +135,13 @@ def _internal_read_csv(path, chunksize="256 MiB", **kwargs): chunksize, ) # specify which chunk of the file we care about if start != 0: - kwargs2[ - "names" - ] = meta.columns # no header in the middle of the file + kwargs2["names"] = names # no header in the middle of the file kwargs2["header"] = None - kwargs2["usecols"] = usecols dsk[(name, i)] = (apply, _read_csv, [fn, dtypes], kwargs2) i += 1 divisions = [None] * (len(dsk) + 1) - if usecols is not None: - meta = meta[usecols] return dd.core.new_dd_object(dsk, name, meta, divisions) diff --git a/python/dask_cudf/dask_cudf/io/tests/test_csv.py b/python/dask_cudf/dask_cudf/io/tests/test_csv.py index 98061f6c624..32960a90bd7 100644 --- a/python/dask_cudf/dask_cudf/io/tests/test_csv.py +++ b/python/dask_cudf/dask_cudf/io/tests/test_csv.py @@ -136,7 +136,8 @@ def test_read_csv_chunksize_none(tmp_path, compression, size): dd.assert_eq(df, df2) -def test_csv_reader_usecols(tmp_path): +@pytest.mark.parametrize("dtype", [{"b": str, "c": int}, None]) +def test_csv_reader_usecols(tmp_path, dtype): df = cudf.DataFrame( { "a": [1, 2, 3, 4] * 100, @@ -147,6 +148,6 @@ def test_csv_reader_usecols(tmp_path): csv_path = str(tmp_path / "usecols_data.csv") df.to_csv(csv_path, index=False) ddf = dask_cudf.from_cudf(df[["b", "c"]], npartitions=5) - ddf2 = dask_cudf.read_csv(csv_path, usecols=["b", "c"]) + ddf2 = dask_cudf.read_csv(csv_path, usecols=["b", "c"], dtype=dtype) dd.assert_eq(ddf, ddf2, check_divisions=False, check_index=False)