diff --git a/build.sh b/build.sh index bc49b76d44e..70b93427d5c 100755 --- a/build.sh +++ b/build.sh @@ -134,18 +134,20 @@ if hasArg clean; then done fi -if (( ${BUILD_ALL_GPU_ARCH} == 0 )); then - CUDF_CMAKE_CUDA_ARCHITECTURES="-DCMAKE_CUDA_ARCHITECTURES=" - echo "Building for the architecture of the GPU in the system..." -else - CUDF_CMAKE_CUDA_ARCHITECTURES="" - echo "Building for *ALL* supported GPU architectures..." -fi ################################################################################ # Configure, build, and install libcudf if buildAll || hasArg libcudf; then + + if (( ${BUILD_ALL_GPU_ARCH} == 0 )); then + CUDF_CMAKE_CUDA_ARCHITECTURES="-DCMAKE_CUDA_ARCHITECTURES=" + echo "Building for the architecture of the GPU in the system..." + else + CUDF_CMAKE_CUDA_ARCHITECTURES="" + echo "Building for *ALL* supported GPU architectures..." + fi + cmake -S $REPODIR/cpp -B ${LIB_BUILD_DIR} \ -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} \ ${CUDF_CMAKE_CUDA_ARCHITECTURES} \ @@ -192,19 +194,16 @@ fi # Build libcudf_kafka library if hasArg libcudf_kafka; then cmake -S $REPODIR/cpp/libcudf_kafka -B ${KAFKA_LIB_BUILD_DIR} \ - ${CUDF_CMAKE_CUDA_ARCHITECTURES} \ -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} \ + -DBUILD_TESTS=${BUILD_TESTS} \ -DCMAKE_BUILD_TYPE=${BUILD_TYPE} + cd ${KAFKA_LIB_BUILD_DIR} + cmake --build . -j${PARALLEL_LEVEL} ${VERBOSE_FLAG} + if [[ ${INSTALL_TARGET} != "" ]]; then cmake --build . -j${PARALLEL_LEVEL} --target install ${VERBOSE_FLAG} - else - cmake --build . -j${PARALLEL_LEVEL} --target libcudf_kafka ${VERBOSE_FLAG} - fi - - if [[ ${BUILD_TESTS} == "ON" ]]; then - cmake --build . -j${PARALLEL_LEVEL} --target build_tests_libcudf_kafka ${VERBOSE_FLAG} fi fi diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index 61f551f4b6d..819a0dcf6bf 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -49,6 +49,9 @@ function sed_runner() { # cpp update sed_runner 's/'"CUDA_DATAFRAME VERSION .* LANGUAGES"'/'"CUDA_DATAFRAME VERSION ${NEXT_FULL_TAG} LANGUAGES"'/g' cpp/CMakeLists.txt +# cpp libcudf_kafka update +sed_runner 's/'"CUDA_KAFKA VERSION .* LANGUAGES"'/'"CUDA_KAFKA VERSION ${NEXT_FULL_TAG} LANGUAGES"'/g' cpp/libcudf_kafka/CMakeLists.txt + # doxyfile update sed_runner 's/PROJECT_NUMBER = .*/PROJECT_NUMBER = '${NEXT_FULL_TAG}'/g' cpp/doxygen/Doxyfile diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml index 1be8a6b450a..39587b4bd05 100644 --- a/conda/recipes/libcudf/meta.yaml +++ b/conda/recipes/libcudf/meta.yaml @@ -33,7 +33,7 @@ build: requirements: build: - - cmake >=3.17.0 + - cmake >=3.18 host: - librmm {{ minor_version }}.* - cudatoolkit {{ cuda_version }}.* diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index fc439ebfa7f..48562476070 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -554,12 +554,6 @@ if(CUDF_BUILD_BENCHMARKS) GIT_SHALLOW TRUE OPTIONS "BENCHMARK_ENABLE_TESTING OFF" "BENCHMARK_ENABLE_INSTALL OFF") - if(benchmark_ADDED) - install(TARGETS benchmark - benchmark_main - DESTINATION lib - EXPORT cudf-targets) - endif() add_subdirectory(benchmarks) endif() @@ -636,6 +630,15 @@ elseif(TARGET arrow_static) endif() endif() +if(TARGET gtest) + get_target_property(gtest_is_imported gtest IMPORTED) + if(NOT gtest_is_imported) + export(TARGETS gtest gmock gtest_main gmock_main + FILE ${CUDF_BINARY_DIR}/cudf-gtesting-targets.cmake + NAMESPACE GTest::) + endif() +endif() + export(EXPORT cudf-targets FILE ${CUDF_BINARY_DIR}/cudf-targets.cmake NAMESPACE cudf::) diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index 7fd84b508ac..5aa7e0132f8 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -175,10 +175,12 @@ ConfigureBench(BINARYOP_BENCH binaryop/binaryop_benchmark.cu) ################################################################################################### # - nvtext benchmark ------------------------------------------------------------------- ConfigureBench(TEXT_BENCH + text/ngrams_benchmark.cpp text/normalize_benchmark.cpp text/normalize_spaces_benchmark.cpp - text/tokenize_benchmark.cpp - text/subword_benchmark.cpp) + text/replace_benchmark.cpp + text/subword_benchmark.cpp + text/tokenize_benchmark.cpp) ################################################################################################### # - strings benchmark ------------------------------------------------------------------- diff --git a/cpp/benchmarks/text/ngrams_benchmark.cpp b/cpp/benchmarks/text/ngrams_benchmark.cpp new file mode 100644 index 00000000000..1fe8e3b7f2e --- /dev/null +++ b/cpp/benchmarks/text/ngrams_benchmark.cpp @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +class TextNGrams : public cudf::benchmark { +}; + +enum class ngrams_type { tokens, characters }; + +static void BM_ngrams(benchmark::State& state, ngrams_type nt) +{ + auto const n_rows = static_cast(state.range(0)); + auto const max_str_length = static_cast(state.range(1)); + data_profile table_profile; + table_profile.set_distribution_params( + cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length); + auto const table = + create_random_table({cudf::type_id::STRING}, 1, row_count{n_rows}, table_profile); + cudf::strings_column_view input(table->view().column(0)); + + for (auto _ : state) { + cuda_event_timer raii(state, true, 0); + switch (nt) { + case ngrams_type::tokens: nvtext::generate_ngrams(input); break; + case ngrams_type::characters: nvtext::generate_character_ngrams(input); break; + } + } + + state.SetBytesProcessed(state.iterations() * input.chars_size()); +} + +static void generate_bench_args(benchmark::internal::Benchmark* b) +{ + int const min_rows = 1 << 12; + int const max_rows = 1 << 24; + int const row_mult = 8; + int const min_rowlen = 5; + int const max_rowlen = 40; + int const len_mult = 2; + generate_string_bench_args(b, min_rows, max_rows, row_mult, min_rowlen, max_rowlen, len_mult); +} + +#define NVTEXT_BENCHMARK_DEFINE(name) \ + BENCHMARK_DEFINE_F(TextNGrams, name) \ + (::benchmark::State & st) { BM_ngrams(st, ngrams_type::name); } \ + BENCHMARK_REGISTER_F(TextNGrams, name) \ + ->Apply(generate_bench_args) \ + ->UseManualTime() \ + ->Unit(benchmark::kMillisecond); + +NVTEXT_BENCHMARK_DEFINE(tokens) +NVTEXT_BENCHMARK_DEFINE(characters) diff --git a/cpp/benchmarks/text/replace_benchmark.cpp b/cpp/benchmarks/text/replace_benchmark.cpp new file mode 100644 index 00000000000..f5428aee225 --- /dev/null +++ b/cpp/benchmarks/text/replace_benchmark.cpp @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include +#include + +#include + +class TextReplace : public cudf::benchmark { +}; + +static void BM_replace(benchmark::State& state) +{ + auto const n_rows = static_cast(state.range(0)); + auto const n_length = static_cast(state.range(1)); + + std::vector words{" ", "one ", "two ", "three ", "four ", + "five ", "six ", "sevén ", "eight ", "nine ", + "ten ", "eleven ", "twelve ", "thirteen ", "fourteen ", + "fifteen ", "sixteen ", "seventeen ", "eighteen ", "nineteen "}; + + std::default_random_engine generator; + std::uniform_int_distribution tokens_dist(0, words.size() - 1); + std::string row; // build a row of random tokens + while (static_cast(row.size()) < n_length) row += words[tokens_dist(generator)]; + + std::uniform_int_distribution position_dist(0, 16); + + auto elements = cudf::detail::make_counting_transform_iterator( + 0, [&](auto idx) { return row.c_str() + position_dist(generator); }); + cudf::test::strings_column_wrapper input(elements, elements + n_rows); + cudf::strings_column_view view(input); + + cudf::test::strings_column_wrapper targets({"one", "two", "sevén", "zero"}); + cudf::test::strings_column_wrapper replacements({"1", "2", "7", "0"}); + + for (auto _ : state) { + cuda_event_timer raii(state, true, 0); + nvtext::replace_tokens( + view, cudf::strings_column_view(targets), cudf::strings_column_view(replacements)); + } + + state.SetBytesProcessed(state.iterations() * view.chars_size()); +} + +static void generate_bench_args(benchmark::internal::Benchmark* b) +{ + int const min_rows = 1 << 12; + int const max_rows = 1 << 24; + int const row_multiplier = 8; + int const min_row_length = 1 << 5; + int const max_row_length = 1 << 13; + int const length_multiplier = 4; + generate_string_bench_args( + b, min_rows, max_rows, row_multiplier, min_row_length, max_row_length, length_multiplier); +} + +#define NVTEXT_BENCHMARK_DEFINE(name) \ + BENCHMARK_DEFINE_F(TextReplace, name) \ + (::benchmark::State & st) { BM_replace(st); } \ + BENCHMARK_REGISTER_F(TextReplace, name) \ + ->Apply(generate_bench_args) \ + ->UseManualTime() \ + ->Unit(benchmark::kMillisecond); + +NVTEXT_BENCHMARK_DEFINE(replace) diff --git a/cpp/cmake/cudf-build-config.cmake.in b/cpp/cmake/cudf-build-config.cmake.in index d0c5a608e45..ed1926f20f0 100644 --- a/cpp/cmake/cudf-build-config.cmake.in +++ b/cpp/cmake/cudf-build-config.cmake.in @@ -2,6 +2,22 @@ cmake_minimum_required(VERSION 3.18) +set(_possible_targets_to_promote + cudf::cudf + GTest::gmock + GTest::gmock_main + GTest::gtest + GTest::gtest_main + cudf::cudftestutil + rmm::rmm + arrow_shared + arrow_cuda_shared ) +foreach(target IN LISTS _possible_targets_to_promote) + if(NOT TARGET ${target}) + list(APPEND _targets_to_promote ${target}) + endif() +endforeach() + set(CUDF_VERSION @CUDF_VERSION@) set(CUDF_VERSION_MAJOR @CUDF_VERSION_MAJOR@) set(CUDF_VERSION_MINOR @CUDF_VERSION_MINOR@) @@ -36,21 +52,29 @@ include(@CUDF_SOURCE_DIR@/cmake/thirdparty/CUDF_GetThrust.cmake) # find rmm set(CUDF_MIN_VERSION_rmm "${CUDF_VERSION_MAJOR}.${CUDF_VERSION_MINOR}") include(@CUDF_SOURCE_DIR@/cmake/thirdparty/CUDF_GetRMM.cmake) -# find gtest -include(@CUDF_SOURCE_DIR@/cmake/thirdparty/CUDF_GetGTest.cmake) # find arrow -if(NOT EXISTS "${CMAKE_CURRENT_LIST_DIR}/cudf-arrow-targets.cmake") +if(EXISTS "${CMAKE_CURRENT_LIST_DIR}/cudf-arrow-targets.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/cudf-arrow-targets.cmake") +else() + if(NOT DEFINED CUDF_USE_ARROW_STATIC) + set(CUDF_USE_ARROW_STATIC OFF) + endif() include(@CUDF_SOURCE_DIR@/cmake/thirdparty/CUDF_GetArrow.cmake) endif() +# find GTest +if(EXISTS "${CMAKE_CURRENT_LIST_DIR}/cudf-gtesting-targets.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/cudf-gtesting-targets.cmake") +else() + # find gtest + include(@CUDF_SOURCE_DIR@/cmake/thirdparty/CUDF_GetGTest.cmake) +endif() + list(POP_FRONT CMAKE_MODULE_PATH) -if(EXISTS "${CMAKE_CURRENT_LIST_DIR}/cudf-arrow-targets.cmake") - include("${CMAKE_CURRENT_LIST_DIR}/cudf-arrow-targets.cmake") -endif() -include("${CMAKE_CURRENT_LIST_DIR}/cudf-targets.cmake") +include("${CMAKE_CURRENT_LIST_DIR}/cudf-targets.cmake") if(EXISTS "${CMAKE_CURRENT_LIST_DIR}/cudf-testing-targets.cmake") include("${CMAKE_CURRENT_LIST_DIR}/cudf-testing-targets.cmake") endif() @@ -59,6 +83,12 @@ include("${CMAKE_CURRENT_LIST_DIR}/cudf-config-version.cmake") check_required_components(cudf) +foreach(target IN LISTS _targets_to_promote) + if(TARGET ${target}) + fix_cmake_global_defaults(${target}) + endif() +endforeach() + set(${CMAKE_FIND_PACKAGE_NAME}_CONFIG "${CMAKE_CURRENT_LIST_FILE}") include(FindPackageHandleStandardArgs) diff --git a/cpp/cmake/cudf-config.cmake.in b/cpp/cmake/cudf-config.cmake.in index 14f8a661c2f..66c669851fa 100644 --- a/cpp/cmake/cudf-config.cmake.in +++ b/cpp/cmake/cudf-config.cmake.in @@ -23,21 +23,16 @@ targets: cudf::cudf - The main cudf library. This module offers an optional testing component which defines the -following IMPORTED GLOBAL targets: +following IMPORTED GLOBAL targets: cudf::cudftestutil - The main cudf testing library - cudf::gmock - cudf::gmock_main - cudf::gtest - cudf::gtest_main - Result Variables ^^^^^^^^^^^^^^^^ This module will set the following variables in your project:: - CUDF_FOUND + cudf_FOUND CUDF_VERSION CUDF_VERSION_MAJOR CUDF_VERSION_MINOR @@ -49,13 +44,11 @@ cmake_minimum_required(VERSION 3.18) set(_possible_targets_to_promote cudf::cudf - cudf::benchmark - cudf::benchmark_main - cudf::gmock - cudf::gtest - cudf::gmock_main - cudf::gtest_main cudf::cudftestutil + GTest::gmock + GTest::gmock_main + GTest::gtest + GTest::gtest_main rmm::rmm arrow_shared arrow_cuda_shared ) @@ -101,17 +94,22 @@ include("${CMAKE_CURRENT_LIST_DIR}/cudf-targets.cmake") if(testing IN_LIST cudf_FIND_COMPONENTS) enable_language(CUDA) - find_dependency(GTest @CUDF_MIN_VERSION_GTest@) + find_dependency(GTest @CUDF_MIN_VERSION_GTest@ CONFIG) + include("${CMAKE_CURRENT_LIST_DIR}/cudf-testing-targets.cmake") + endif() include("${CMAKE_CURRENT_LIST_DIR}/cudf-config-version.cmake") check_required_components(cudf) -foreach(t IN LISTS _targets_to_promote) - if(TARGET ${t}) - set_target_properties(${t} PROPERTIES IMPORTED_GLOBAL TRUE) +foreach(target IN LISTS _targets_to_promote) + if(TARGET ${target}) + get_target_property(_already_global ${target} IMPORTED_GLOBAL) + if(NOT _already_global) + set_target_properties(${target} PROPERTIES IMPORTED_GLOBAL TRUE) + endif() endif() endforeach() set(${CMAKE_FIND_PACKAGE_NAME}_CONFIG "${CMAKE_CURRENT_LIST_FILE}") diff --git a/cpp/cmake/thirdparty/CUDF_GetGTest.cmake b/cpp/cmake/thirdparty/CUDF_GetGTest.cmake index 666ba0fbb2c..9e4f3c137b1 100644 --- a/cpp/cmake/thirdparty/CUDF_GetGTest.cmake +++ b/cpp/cmake/thirdparty/CUDF_GetGTest.cmake @@ -26,7 +26,7 @@ function(find_and_configure_gtest VERSION) GIT_REPOSITORY https://github.com/google/googletest.git GIT_TAG release-${VERSION} GIT_SHALLOW TRUE - OPTIONS "INSTALL_GTEST OFF" + OPTIONS "INSTALL_GTEST ON" # googletest >= 1.10.0 provides a cmake config file -- use it if it exists FIND_PACKAGE_ARGUMENTS "CONFIG") # Add GTest aliases if they don't already exist. @@ -43,14 +43,6 @@ function(find_and_configure_gtest VERSION) fix_cmake_global_defaults(GTest::gmock) fix_cmake_global_defaults(GTest::gtest_main) fix_cmake_global_defaults(GTest::gmock_main) - if(GTest_ADDED) - install(TARGETS gmock - gtest - gmock_main - gtest_main - DESTINATION lib - EXPORT cudf-testing-targets) - endif() endfunction() set(CUDF_MIN_VERSION_GTest 1.10.0) diff --git a/cpp/cmake/thirdparty/CUDF_GetRMM.cmake b/cpp/cmake/thirdparty/CUDF_GetRMM.cmake index e5d1f2f07a9..136947674f9 100644 --- a/cpp/cmake/thirdparty/CUDF_GetRMM.cmake +++ b/cpp/cmake/thirdparty/CUDF_GetRMM.cmake @@ -55,11 +55,6 @@ function(find_and_configure_rmm VERSION) # Make sure consumers of cudf can also see rmm::rmm fix_cmake_global_defaults(rmm::rmm) - - if(NOT rmm_BINARY_DIR IN_LIST CMAKE_PREFIX_PATH) - list(APPEND CMAKE_PREFIX_PATH "${rmm_BINARY_DIR}") - set(CMAKE_PREFIX_PATH ${CMAKE_PREFIX_PATH} PARENT_SCOPE) - endif() endfunction() set(CUDF_MIN_VERSION_rmm "${CUDF_VERSION_MAJOR}.${CUDF_VERSION_MINOR}") diff --git a/cpp/include/cudf/column/column_device_view.cuh b/cpp/include/cudf/column/column_device_view.cuh index 5a02f5bbe55..14d44b77fad 100644 --- a/cpp/include/cudf/column/column_device_view.cuh +++ b/cpp/include/cudf/column/column_device_view.cuh @@ -472,6 +472,13 @@ class alignas(16) column_device_view : public detail::column_device_view_base { return d_children[child_index]; } + /** + * @brief Returns the number of child columns + * + * @return The number of child columns + */ + __host__ __device__ size_type num_child_columns() const noexcept { return _num_children; } + protected: column_device_view* d_children{}; ///< Array of `column_device_view` ///< objects in device memory. diff --git a/cpp/include/cudf/column/column_factories.hpp b/cpp/include/cudf/column/column_factories.hpp index 31196824845..43c2407d629 100644 --- a/cpp/include/cudf/column/column_factories.hpp +++ b/cpp/include/cudf/column/column_factories.hpp @@ -21,7 +21,6 @@ #include #include -#include namespace cudf { /** diff --git a/cpp/include/cudf/concatenate.hpp b/cpp/include/cudf/concatenate.hpp index 8333cf41b77..182cbbdc3ec 100644 --- a/cpp/include/cudf/concatenate.hpp +++ b/cpp/include/cudf/concatenate.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,9 +17,9 @@ #include #include +#include #include -#include namespace cudf { /** @@ -36,13 +36,13 @@ namespace cudf { * * Returns empty `device_buffer` if the column is not nullable * - * @param views Vector of column views whose bitmask will to be concatenated + * @param views host_span of column views whose bitmask will to be concatenated * @param mr Device memory resource used for allocating the new device_buffer * @return rmm::device_buffer A `device_buffer` containing the bitmasks of all * the column views in the views vector */ rmm::device_buffer concatenate_masks( - std::vector const& views, + host_span views, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -51,14 +51,13 @@ rmm::device_buffer concatenate_masks( * @throws cudf::logic_error * If types of the input columns mismatch * - * @param columns_to_concat The column views to be concatenated into a single - * column + * @param columns_to_concat host_span of column views to be concatenated into a single column * @param mr Device memory resource used to allocate the returned column's device memory. * @return Unique pointer to a single table having all the rows from the * elements of `columns_to_concat` respectively in the same order. */ std::unique_ptr concatenate( - std::vector const& columns_to_concat, + host_span columns_to_concat, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -82,14 +81,13 @@ std::unique_ptr concatenate( * @throws cudf::logic_error * If number of columns mismatch * - * @param tables_to_concat The table views to be concatenated into a single - * table + * @param tables_to_concat host_span of table views to be concatenated into a single table * @param mr Device memory resource used to allocate the returned table's device memory. * @return Unique pointer to a single table having all the rows from the * elements of `tables_to_concat` respectively in the same order. */ std::unique_ptr concatenate( - std::vector const& tables_to_concat, + host_span tables_to_concat, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group diff --git a/cpp/include/cudf/detail/concatenate.cuh b/cpp/include/cudf/detail/concatenate.cuh index a30ad6e853d..5f0399d6172 100644 --- a/cpp/include/cudf/detail/concatenate.cuh +++ b/cpp/include/cudf/detail/concatenate.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ #include #include #include +#include #include @@ -34,8 +35,8 @@ namespace detail { * * @param stream CUDA stream used for device memory operations and kernel launches. */ -void concatenate_masks(rmm::device_vector const& d_views, - rmm::device_vector const& d_offsets, +void concatenate_masks(device_span d_views, + device_span d_offsets, bitmask_type* dest_mask, size_type output_size, rmm::cuda_stream_view stream); @@ -45,7 +46,7 @@ void concatenate_masks(rmm::device_vector const& d_views, * * @param stream CUDA stream used for device memory operations and kernel launches. */ -void concatenate_masks(std::vector const& views, +void concatenate_masks(host_span views, bitmask_type* dest_mask, rmm::cuda_stream_view stream); diff --git a/cpp/include/cudf/detail/concatenate.hpp b/cpp/include/cudf/detail/concatenate.hpp index 43eb5203b37..f7f5567cd76 100644 --- a/cpp/include/cudf/detail/concatenate.hpp +++ b/cpp/include/cudf/detail/concatenate.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,6 +18,7 @@ #include #include #include +#include #include @@ -27,22 +28,22 @@ namespace cudf { //! Inner interfaces and implementations namespace detail { /** - * @copydoc cudf::concatenate(std::vector const&,rmm::mr::device_memory_resource*) + * @copydoc cudf::concatenate(host_span,rmm::mr::device_memory_resource*) * * @param stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr concatenate( - std::vector const& columns_to_concat, + host_span columns_to_concat, rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @copydoc cudf::concatenate(std::vector const&,rmm::mr::device_memory_resource*) + * @copydoc cudf::concatenate(host_span,rmm::mr::device_memory_resource*) * * @param stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr
concatenate( - std::vector const& tables_to_concat, + host_span tables_to_concat, rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); diff --git a/cpp/include/cudf/detail/groupby.hpp b/cpp/include/cudf/detail/groupby.hpp index ce5fdb92bd1..36a76c7b6de 100644 --- a/cpp/include/cudf/detail/groupby.hpp +++ b/cpp/include/cudf/detail/groupby.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,6 +16,7 @@ #include #include +#include #include @@ -36,12 +37,12 @@ namespace hash { * @return true A hash-based groupby can be used * @return false A hash-based groupby cannot be used */ -bool can_use_hash_groupby(table_view const& keys, std::vector const& requests); +bool can_use_hash_groupby(table_view const& keys, host_span requests); // Hash-based groupby std::pair, std::vector> groupby( table_view const& keys, - std::vector const& requests, + host_span requests, null_policy include_null_keys, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); diff --git a/cpp/include/cudf/detail/groupby/sort_helper.hpp b/cpp/include/cudf/detail/groupby/sort_helper.hpp index a68d649b8c8..bfc9673d3cb 100644 --- a/cpp/include/cudf/detail/groupby/sort_helper.hpp +++ b/cpp/include/cudf/detail/groupby/sort_helper.hpp @@ -93,7 +93,7 @@ struct sort_groupby_helper { */ std::unique_ptr sorted_values( column_view const& values, - rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -108,7 +108,7 @@ struct sort_groupby_helper { */ std::unique_ptr grouped_values( column_view const& values, - rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -117,7 +117,7 @@ struct sort_groupby_helper { * @return a new table in which each row is a unique row in the sorted key table. */ std::unique_ptr
unique_keys( - rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -126,13 +126,13 @@ struct sort_groupby_helper { * @return a new table containing the sorted keys. */ std::unique_ptr
sorted_keys( - rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** * @brief Get the number of groups in `keys` */ - size_type num_groups() { return group_offsets().size() - 1; } + size_type num_groups(rmm::cuda_stream_view stream) { return group_offsets(stream).size() - 1; } /** * @brief Return the effective number of keys @@ -141,7 +141,7 @@ struct sort_groupby_helper { * When include_null_keys = NO, returned value is the number of rows in `keys` * in which no element is null */ - size_type num_keys(rmm::cuda_stream_view stream = rmm::cuda_stream_default); + size_type num_keys(rmm::cuda_stream_view stream); /** * @brief Get the sorted order of `keys`. @@ -156,7 +156,7 @@ struct sort_groupby_helper { * * @return the sort order indices for `keys`. */ - column_view key_sort_order(rmm::cuda_stream_view stream = rmm::cuda_stream_default); + column_view key_sort_order(rmm::cuda_stream_view stream); /** * @brief Get each group's offset into the sorted order of `keys`. @@ -169,13 +169,13 @@ struct sort_groupby_helper { * @return vector of offsets of the starting point of each group in the sorted * key table */ - index_vector const& group_offsets(rmm::cuda_stream_view stream = rmm::cuda_stream_default); + index_vector const& group_offsets(rmm::cuda_stream_view stream); /** * @brief Get the group labels corresponding to the sorted order of `keys`. * * Each group is assigned a unique numerical "label" in - * `[0, 1, 2, ... , num_groups() - 1, num_groups())`. + * `[0, 1, 2, ... , num_groups() - 1, num_groups(stream))`. * For a row in sorted `keys`, its corresponding group label indicates which * group it belongs to. * @@ -184,7 +184,7 @@ struct sort_groupby_helper { * * @return vector of group labels for each row in the sorted key column */ - index_vector const& group_labels(rmm::cuda_stream_view stream = rmm::cuda_stream_default); + index_vector const& group_labels(rmm::cuda_stream_view stream); private: /** @@ -192,7 +192,7 @@ struct sort_groupby_helper { * * Returns the group label for every row in the original `keys` table. For a * given unique key row, its group label is equivalent to what is returned by - * `group_labels()`. However, if a row contains a null value, and + * `group_labels(stream)`. However, if a row contains a null value, and * `include_null_keys == NO`, then its label is NULL. * * Computes and stores unsorted labels on first invocation and returns stored @@ -201,7 +201,7 @@ struct sort_groupby_helper { * @return A nullable column of `INT32` containing group labels in the order * of the unsorted key table */ - column_view unsorted_keys_labels(rmm::cuda_stream_view stream = rmm::cuda_stream_default); + column_view unsorted_keys_labels(rmm::cuda_stream_view stream); /** * @brief Get the column representing the row bitmask for the `keys` @@ -215,7 +215,7 @@ struct sort_groupby_helper { * Computes and stores bitmask on first invocation and returns stored column * on subsequent calls. */ - column_view keys_bitmask_column(rmm::cuda_stream_view stream = rmm::cuda_stream_default); + column_view keys_bitmask_column(rmm::cuda_stream_view stream); private: column_ptr _key_sorted_order; ///< Indices to produce _keys in sorted order diff --git a/cpp/include/cudf/detail/null_mask.hpp b/cpp/include/cudf/detail/null_mask.hpp index b0870ef8d9a..77cb321a12c 100644 --- a/cpp/include/cudf/detail/null_mask.hpp +++ b/cpp/include/cudf/detail/null_mask.hpp @@ -53,7 +53,7 @@ void set_null_mask(bitmask_type *bitmask, * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ std::vector segmented_count_set_bits(bitmask_type const *bitmask, - std::vector const &indices, + host_span indices, rmm::cuda_stream_view stream); /** @@ -62,7 +62,7 @@ std::vector segmented_count_set_bits(bitmask_type const *bitmask, * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ std::vector segmented_count_unset_bits(bitmask_type const *bitmask, - std::vector const &indices, + host_span indices, rmm::cuda_stream_view stream); /** diff --git a/cpp/include/cudf/dictionary/detail/concatenate.hpp b/cpp/include/cudf/dictionary/detail/concatenate.hpp index ae2e0f0ba38..c2fe2dce1fe 100644 --- a/cpp/include/cudf/dictionary/detail/concatenate.hpp +++ b/cpp/include/cudf/dictionary/detail/concatenate.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ #include #include +#include #include @@ -36,7 +37,7 @@ namespace detail { * @return New column with concatenated results. */ std::unique_ptr concatenate( - std::vector const& columns, + host_span columns, rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); diff --git a/cpp/include/cudf/fixed_point/fixed_point.hpp b/cpp/include/cudf/fixed_point/fixed_point.hpp index eb752a8a0ea..952075b1703 100644 --- a/cpp/include/cudf/fixed_point/fixed_point.hpp +++ b/cpp/include/cudf/fixed_point/fixed_point.hpp @@ -218,14 +218,15 @@ class fixed_point { using rep = Rep; /** - * @brief Constructor that will perform shifting to store value appropriately + * @brief Constructor that will perform shifting to store value appropriately (from floating point + * types) * - * @tparam T The type that you are constructing from (integral or floating) + * @tparam T The floating point type that you are constructing from * @param value The value that will be constructed from * @param scale The exponent that is applied to Rad to perform shifting */ template () && + typename cuda::std::enable_if_t() && is_supported_representation_type()>* = nullptr> CUDA_HOST_DEVICE_CALLABLE explicit fixed_point(T const& value, scale_type const& scale) : _value{static_cast(detail::shift(value, scale))}, _scale{scale} @@ -233,8 +234,25 @@ class fixed_point { } /** - * @brief Constructor that will not perform shifting (assumes value already - * shifted) + * @brief Constructor that will perform shifting to store value appropriately (from integral + * types) + * + * @tparam T The integral type that you are constructing from + * @param value The value that will be constructed from + * @param scale The exponent that is applied to Rad to perform shifting + */ + template () && + is_supported_representation_type()>* = nullptr> + CUDA_HOST_DEVICE_CALLABLE explicit fixed_point(T const& value, scale_type const& scale) + // `value` is cast to `Rep` to avoid overflow in cases where + // constructing to `Rep` that is wider than `T` + : _value{detail::shift(static_cast(value), scale)}, _scale{scale} + { + } + + /** + * @brief Constructor that will not perform shifting (assumes value already shifted) * * @param s scaled_integer that contains scale and already shifted value */ @@ -260,18 +278,33 @@ class fixed_point { fixed_point() : _value{0}, _scale{scale_type{0}} {} /** - * @brief Explicit conversion operator + * @brief Explicit conversion operator for casting to floating point types * - * @tparam U The type that is being explicitly converted to (integral or floating) + * @tparam U The floating point type that is being explicitly converted to * @return The `fixed_point` number in base 10 (aka human readable format) */ template ()>* = nullptr> - CUDA_HOST_DEVICE_CALLABLE explicit constexpr operator U() const + typename cuda::std::enable_if_t::value>* = nullptr> + explicit constexpr operator U() const { return detail::shift(static_cast(_value), detail::negate(_scale)); } + /** + * @brief Explicit conversion operator for casting to integral types + * + * @tparam U The integral type that is being explicitly converted to + * @return The `fixed_point` number in base 10 (aka human readable format) + */ + template ::value>* = nullptr> + explicit constexpr operator U() const + { + // Don't cast to U until converting to Rep because in certain cases casting to U before shifting + // will result in integer overflow (i.e. if U = int32_t, Rep = int64_t and _value > 2 billion) + return static_cast(detail::shift(_value, detail::negate(_scale))); + } + CUDA_HOST_DEVICE_CALLABLE operator scaled_integer() const { return scaled_integer{_value, _scale}; diff --git a/cpp/include/cudf/groupby.hpp b/cpp/include/cudf/groupby.hpp index 1dfacd53e0d..19f87873873 100644 --- a/cpp/include/cudf/groupby.hpp +++ b/cpp/include/cudf/groupby.hpp @@ -19,6 +19,7 @@ #include #include #include +#include #include @@ -163,7 +164,7 @@ class groupby { * specified in `requests`. */ std::pair, std::vector> aggregate( - std::vector const& requests, + host_span requests, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -218,7 +219,7 @@ class groupby { * specified in `requests`. */ std::pair, std::vector> scan( - std::vector const& requests, + host_span requests, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -277,18 +278,18 @@ class groupby { * aggregation requests. */ std::pair, std::vector> dispatch_aggregation( - std::vector const& requests, + host_span requests, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); // Sort-based groupby std::pair, std::vector> sort_aggregate( - std::vector const& requests, + host_span requests, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); std::pair, std::vector> sort_scan( - std::vector const& requests, + host_span requests, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); }; diff --git a/cpp/include/cudf/lists/detail/concatenate.hpp b/cpp/include/cudf/lists/detail/concatenate.hpp index f9adc893b8e..30797443c35 100644 --- a/cpp/include/cudf/lists/detail/concatenate.hpp +++ b/cpp/include/cudf/lists/detail/concatenate.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,6 +18,7 @@ #include #include #include +#include #include @@ -42,7 +43,7 @@ namespace detail { * @return New column with concatenated results. */ std::unique_ptr concatenate( - std::vector const& columns, + host_span columns, rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); diff --git a/cpp/include/cudf/null_mask.hpp b/cpp/include/cudf/null_mask.hpp index 0d4de1a9beb..ae6c0cfdbd7 100644 --- a/cpp/include/cudf/null_mask.hpp +++ b/cpp/include/cudf/null_mask.hpp @@ -16,6 +16,7 @@ #pragma once #include +#include #include @@ -136,38 +137,32 @@ cudf::size_type count_unset_bits(bitmask_type const* bitmask, size_type start, s * `[indices[2*i], indices[(2*i)+1])` (where 0 <= i < indices.size() / 2). * * Returns an empty vector if `bitmask == nullptr`. + * * @throws cudf::logic_error if `indices.size() % 2 != 0` - * @throws cudf::logic_error if `indices[2*i] < 0 or - * indices[2*i] > indices[(2*i)+1]` - * - * @param[in] bitmask Bitmask residing in device memory whose bits will be - * counted - * @param[in] indices A vector of indices used to specify ranges to count the - * number of set bits - * @return std::vector A vector storing the number of non-zero bits - * in the specified ranges + * @throws cudf::logic_error if `indices[2*i] < 0 or indices[2*i] > indices[(2*i)+1]` + * + * @param[in] bitmask Bitmask residing in device memory whose bits will be counted + * @param[in] indices A host_span of indices specifying ranges to count the number of set bits + * @return A vector storing the number of non-zero bits in the specified ranges */ std::vector segmented_count_set_bits(bitmask_type const* bitmask, - std::vector const& indices); + host_span indices); /** * @brief Given a bitmask, counts the number of unset (0) bits in every range * `[indices[2*i], indices[(2*i)+1])` (where 0 <= i < indices.size() / 2). * * Returns an empty vector if `bitmask == nullptr`. + * * @throws cudf::logic_error if `indices.size() % 2 != 0` - * @throws cudf::logic_error if `indices[2*i] < 0 or - * indices[2*i] > indices[(2*i)+1]` - * - * @param[in] bitmask Bitmask residing in device memory whose bits will be - * counted - * @param[in] indices A vector of indices used to specify ranges to count the - * number of unset bits - * @return std::vector A vector storing the number of zero bits in - * the specified ranges + * @throws cudf::logic_error if `indices[2*i] < 0 or indices[2*i] > indices[(2*i)+1]` + * + * @param[in] bitmask Bitmask residing in device memory whose bits will be counted + * @param[in] indices A host_span of indices specifying ranges to count the number of unset bits + * @return A vector storing the number of zero bits in the specified ranges */ std::vector segmented_count_unset_bits(bitmask_type const* bitmask, - std::vector const& indices); + host_span indices); /** * @brief Creates a `device_buffer` from a slice of bitmask defined by a range diff --git a/cpp/include/cudf/strings/detail/concatenate.hpp b/cpp/include/cudf/strings/detail/concatenate.hpp index 3e6fc6d67fc..0740039e896 100644 --- a/cpp/include/cudf/strings/detail/concatenate.hpp +++ b/cpp/include/cudf/strings/detail/concatenate.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,6 +18,7 @@ #include #include #include +#include #include @@ -41,7 +42,7 @@ namespace detail { * @return New column with concatenated results. */ std::unique_ptr concatenate( - std::vector const& columns, + host_span columns, rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); diff --git a/cpp/include/cudf/structs/detail/concatenate.hpp b/cpp/include/cudf/structs/detail/concatenate.hpp index ef3da82cfeb..a098703e4b0 100644 --- a/cpp/include/cudf/structs/detail/concatenate.hpp +++ b/cpp/include/cudf/structs/detail/concatenate.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,6 +18,7 @@ #include #include #include +#include namespace cudf { namespace structs { @@ -48,7 +49,7 @@ namespace detail { * @return New column with concatenated results. */ std::unique_ptr concatenate( - std::vector const& columns, + host_span columns, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); diff --git a/cpp/include/cudf/table/row_operators.cuh b/cpp/include/cudf/table/row_operators.cuh index 04d215ff7cb..5af3c29a3d9 100644 --- a/cpp/include/cudf/table/row_operators.cuh +++ b/cpp/include/cudf/table/row_operators.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -91,6 +91,26 @@ __device__ weak_ordering relational_compare(Element lhs, Element rhs) return detail::compare_elements(lhs, rhs); } +/** + * @brief Compare the nulls according to null order. + * + * @param lhs_is_null boolean representing if lhs is null + * @param rhs_is_null boolean representing if lhs is null + * @param null_precedence null order + * @return Indicates the relationship between null in lhs and rhs columns. + */ +inline __device__ auto null_compare(bool lhs_is_null, bool rhs_is_null, null_order null_precedence) +{ + if (lhs_is_null and rhs_is_null) { // null (table_device_view const& lhs, + table_device_view const& rhs); +extern template bool is_relationally_comparable( + mutable_table_device_view const& lhs, mutable_table_device_view const& rhs); +} // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/table/table_view.hpp b/cpp/include/cudf/table/table_view.hpp index 083366cc310..5cdecab9115 100644 --- a/cpp/include/cudf/table/table_view.hpp +++ b/cpp/include/cudf/table/table_view.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -291,4 +291,21 @@ table_view scatter_columns(table_view const& source, std::vector const& map, table_view const& target); +namespace detail { +/** + * @brief Indicates whether respective columns in input tables are relationally comparable. + * + * @param lhs The first table + * @param rhs The second table (may be the same table as `lhs`) + * @return true all of respective columns on `lhs` and 'rhs` tables are comparable. + * @return false any of respective columns on `lhs` and 'rhs` tables are not comparable. + */ +template +bool is_relationally_comparable(TableView const& lhs, TableView const& rhs); + +extern template bool is_relationally_comparable(table_view const& lhs, + table_view const& rhs); +extern template bool is_relationally_comparable(mutable_table_view const& lhs, + mutable_table_view const& rhs); +} // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/types.hpp b/cpp/include/cudf/types.hpp index 7a3316a0571..727284194d8 100644 --- a/cpp/include/cudf/types.hpp +++ b/cpp/include/cudf/types.hpp @@ -260,12 +260,12 @@ class data_type { /** * @brief Returns the type identifier */ - CUDA_HOST_DEVICE_CALLABLE type_id id() const noexcept { return _id; } + constexpr type_id id() const noexcept { return _id; } /** * @brief Returns the scale (for fixed_point types) */ - CUDA_HOST_DEVICE_CALLABLE int32_t scale() const noexcept { return _fixed_point_scale; } + constexpr int32_t scale() const noexcept { return _fixed_point_scale; } private: type_id _id{type_id::EMPTY}; @@ -287,7 +287,7 @@ class data_type { * @return true `lhs` is equal to `rhs` * @return false `lhs` is not equal to `rhs` */ -inline bool operator==(data_type const& lhs, data_type const& rhs) +constexpr bool operator==(data_type const& lhs, data_type const& rhs) { // use std::tie in the future, breaks JITIFY currently return lhs.id() == rhs.id() && lhs.scale() == rhs.scale(); diff --git a/cpp/include/cudf/utilities/span.hpp b/cpp/include/cudf/utilities/span.hpp index c13e5ce44ae..999306d4ee7 100644 --- a/cpp/include/cudf/utilities/span.hpp +++ b/cpp/include/cudf/utilities/span.hpp @@ -126,16 +126,31 @@ struct host_span : public cudf::detail::span_base::value>* = nullptr> + // Constructor from container + template < + typename C, + // Only supported containers of types convertible to T + std::enable_if_t::value && + std::is_convertible().data()))> (*)[], + T (*)[]>::value>* = nullptr> constexpr host_span(C& in) : base(in.data(), in.size()) { } - template ::value>* = nullptr> + // Constructor from const container + template < + typename C, + // Only supported containers of types convertible to T + std::enable_if_t::value && + std::is_convertible().data()))> (*)[], + T (*)[]>::value>* = nullptr> constexpr host_span(C const& in) : base(in.data(), in.size()) { } + // Copy construction to support const conversion template ::value>* = nullptr> + template < + typename C, + // Only supported containers of types convertible to T + std::enable_if_t::value && + std::is_convertible().data()))> (*)[], + T (*)[]>::value>* = nullptr> constexpr device_span(C& in) : base(thrust::raw_pointer_cast(in.data()), in.size()) { } - template ::value>* = nullptr> + template < + typename C, + // Only supported containers of types convertible to T + std::enable_if_t::value && + std::is_convertible().data()))> (*)[], + T (*)[]>::value>* = nullptr> constexpr device_span(C const& in) : base(thrust::raw_pointer_cast(in.data()), in.size()) { } diff --git a/cpp/libcudf_kafka/CMakeLists.txt b/cpp/libcudf_kafka/CMakeLists.txt index a307bf9d3f0..e178f5a6280 100644 --- a/cpp/libcudf_kafka/CMakeLists.txt +++ b/cpp/libcudf_kafka/CMakeLists.txt @@ -13,112 +13,51 @@ # See the License for the specific language governing permissions and # limitations under the License. #============================================================================= -cmake_minimum_required(VERSION 3.14 FATAL_ERROR) +cmake_minimum_required(VERSION 3.18 FATAL_ERROR) -project(CUDA_KAFKA VERSION 0.15.0 LANGUAGES C CXX CUDA) - -# TODO: Since we have no actual CUDA code in cudf_kafka this should be removed in the future -# in favor of using FindCUDAToolkit to get the needed CUDA include headers -if(NOT CMAKE_CUDA_COMPILER) - message(SEND_ERROR "CMake cannot locate a CUDA compiler") -endif(NOT CMAKE_CUDA_COMPILER) - -################################################################################################### -# - build type ------------------------------------------------------------------------------------ - -# Set a default build type if none was specified -set(DEFAULT_BUILD_TYPE "Release") - -if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) - message(STATUS "Setting build type to '${DEFAULT_BUILD_TYPE}' since none specified.") - set(CMAKE_BUILD_TYPE "${DEFAULT_BUILD_TYPE}" CACHE - STRING "Choose the type of build." FORCE) - # Set the possible values of build type for cmake-gui - set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS - "Debug" "Release" "MinSizeRel" "RelWithDebInfo") -endif(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) +project(CUDA_KAFKA VERSION 0.19.0 LANGUAGES CXX) ################################################################################################### -# - compiler options ------------------------------------------------------------------------------ - -set(CMAKE_CXX_STANDARD 14) -set(CMAKE_CXX_STANDARD_REQUIRED ON) +# - Build options +option(BUILD_TESTS "Build tests for libcudf_kafka" ON) -# To apply RUNPATH to transitive dependencies (this is a temporary solution) -set(CMAKE_SHARED_LINKER_FLAGS "-Wl,--disable-new-dtags") -set(CMAKE_EXE_LINKER_FLAGS "-Wl,--disable-new-dtags") - -# Build options -option(BUILD_TESTS "Configure CMake to build tests" ON) +message(VERBOSE "CUDF_KAFKA: Build gtests: ${BUILD_TESTS}") ################################################################################################### -# - cmake modules --------------------------------------------------------------------------------- - -message(VERBOSE "CMAKE_CURRENT_SOURCE_DIR: ${CMAKE_CURRENT_SOURCE_DIR}") -set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules/" ${CMAKE_MODULE_PATH}) +# - Dependencies -include(FeatureSummary) -include(CheckIncludeFiles) -include(CheckLibraryExists) +# CPM +include(../cmake/thirdparty/CUDF_GetCPM.cmake) -################################################################################################### -# - conda environment ----------------------------------------------------------------------------- +# libcudf +include(cmake/thirdparty/CUDF_KAFKA_GetCUDF.cmake) -if("$ENV{CONDA_BUILD}" STREQUAL "1") - set(CMAKE_SYSTEM_PREFIX_PATH "$ENV{BUILD_PREFIX};$ENV{PREFIX};${CMAKE_SYSTEM_PREFIX_PATH}") - set(CONDA_INCLUDE_DIRS "$ENV{BUILD_PREFIX}/include" "$ENV{PREFIX}/include") - set(CONDA_LINK_DIRS "$ENV{BUILD_PREFIX}/lib" "$ENV{PREFIX}/lib") - message(VERBOSE "Conda build detected, CMAKE_SYSTEM_PREFIX_PATH set to: ${CMAKE_SYSTEM_PREFIX_PATH}") -endif() +# librdkafka +include(cmake/thirdparty/CUDF_KAFKA_GetRDKafka.cmake) -################################################################################################### -# - add gtest ------------------------------------------------------------------------------------- +# # GTests if enabled +if (BUILD_TESTS) + # GoogleTest + include(../cmake/thirdparty/CUDF_GetGTest.cmake) -# TODO: This is currently using a nearly duplicate Google Test Module due to CMake source limitations. -# this should be standardized in the future to use the same Google Test Module as cudf -if(BUILD_TESTS) + # include CTest module -- automatically calls enable_testing() include(CTest) - include(ConfigureGoogleTest) - - if(GTEST_FOUND) - message(VERBOSE "Google C++ Testing Framework (Google Test) found in ${GTEST_ROOT}") - include_directories(${GTEST_INCLUDE_DIR}) - add_subdirectory(${CMAKE_SOURCE_DIR}/tests) - else() - message(AUTHOR_WARNING "Google C++ Testing Framework (Google Test) not found: automated tests are disabled.") - endif(GTEST_FOUND) -endif(BUILD_TESTS) - -message(VERBOSE "CUDF_KAFKA_TEST_LIST set to: ${CUDF_KAFKA_TEST_LIST}") + add_subdirectory(tests) +endif() ################################################################################################### # - include paths --------------------------------------------------------------------------------- -if(CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES) - include_directories("${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}") -endif(CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES) - include_directories("${CMAKE_BINARY_DIR}/include" - "${CMAKE_BINARY_DIR}/include/jit" "${CMAKE_SOURCE_DIR}/include" "${CMAKE_SOURCE_DIR}/src") -if(CONDA_INCLUDE_DIRS) - include_directories("${CONDA_INCLUDE_DIRS}") -endif(CONDA_INCLUDE_DIRS) - ################################################################################################### # - library paths --------------------------------------------------------------------------------- link_directories("${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}" # CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES is an undocumented/unsupported variable containing the link directories for nvcc "${CMAKE_BINARY_DIR}/lib" - "${CMAKE_BINARY_DIR}" - "${GTEST_LIBRARY_DIR}" - "${RMM_LIBRARY}") - -if(CONDA_LINK_DIRS) - link_directories("${CONDA_LINK_DIRS}") -endif(CONDA_LINK_DIRS) + "${CMAKE_BINARY_DIR}") ################################################################################################### # - library target -------------------------------------------------------------------------------- @@ -127,37 +66,12 @@ add_library(cudf_kafka SHARED src/kafka_consumer.cpp ) -set_target_properties(cudf_kafka PROPERTIES BUILD_RPATH "\$ORIGIN") - -# Include paths -include_directories("${CMAKE_SOURCE_DIR}/include" - "${CMAKE_CURRENT_SOURCE_DIR}/include/cudf") - -################################################################################################### -# cudf_kafka - librdkafka ------------------------------------------------------------------------- - -find_path(RDKAFKA_INCLUDE "librdkafka" HINTS "$ENV{RDKAFKA_ROOT}/include") -find_library(RDKAFKA++_LIBRARY "rdkafka++" HINTS "$ENV{RDKAFKA_ROOT}/lib" "$ENV{RDKAFKA_ROOT}/build") - -message(VERBOSE "RDKAFKA: RDKAFKA++_LIBRARY set to ${RDKAFKA++_LIBRARY}") -message(VERBOSE "RDKAFKA: RDKAFKA_INCLUDE set to ${RDKAFKA_INCLUDE}") - -target_link_libraries(cudf_kafka ${RDKAFKA++_LIBRARY}) -include_directories("${RDKAFKA_INCLUDE}") - ################################################################################################### # - cudf_kafka Install ---------------------------------------------------------------------------- -target_link_libraries(cudf_kafka cudf) +target_link_libraries(cudf_kafka cudf::cudf RDKAFKA::RDKAFKA) install(TARGETS cudf_kafka DESTINATION lib) install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include DESTINATION include) - -add_custom_target(build_tests_libcudf_kafka - DEPENDS ${CUDF_KAFKA_TEST_LIST}) - -add_custom_target(test_libcudf_kafka - COMMAND ctest - DEPENDS build_tests_libcudf_kafka) diff --git a/cpp/libcudf_kafka/cmake/Modules/ConfigureGoogleTest.cmake b/cpp/libcudf_kafka/cmake/Modules/ConfigureGoogleTest.cmake deleted file mode 100644 index e2b3aff8546..00000000000 --- a/cpp/libcudf_kafka/cmake/Modules/ConfigureGoogleTest.cmake +++ /dev/null @@ -1,46 +0,0 @@ -set(GTEST_ROOT "${CMAKE_BINARY_DIR}/googletest") - -set(GTEST_CMAKE_ARGS "") - -configure_file("${CMAKE_SOURCE_DIR}/cmake/Templates/GoogleTest.CMakeLists.txt.cmake" - "${GTEST_ROOT}/CMakeLists.txt") - -file(MAKE_DIRECTORY "${GTEST_ROOT}/build") -file(MAKE_DIRECTORY "${GTEST_ROOT}/install") - -execute_process(COMMAND ${CMAKE_COMMAND} -G ${CMAKE_GENERATOR} . - RESULT_VARIABLE GTEST_CONFIG - WORKING_DIRECTORY ${GTEST_ROOT}) - -if(GTEST_CONFIG) - message(FATAL_ERROR "Configuring GoogleTest failed: " ${GTEST_CONFIG}) -endif(GTEST_CONFIG) - -set(PARALLEL_BUILD -j) -if($ENV{PARALLEL_LEVEL}) - set(NUM_JOBS $ENV{PARALLEL_LEVEL}) - set(PARALLEL_BUILD "${PARALLEL_BUILD}${NUM_JOBS}") -endif($ENV{PARALLEL_LEVEL}) - -if(${NUM_JOBS}) - if(${NUM_JOBS} EQUAL 1) - message(VERBOSE "GTEST BUILD: Enabling Sequential CMake build") - elseif(${NUM_JOBS} GREATER 1) - message(VERBOSE "GTEST BUILD: Enabling Parallel CMake build with ${NUM_JOBS} jobs") - endif(${NUM_JOBS} EQUAL 1) -else() - message(VERBOSE "GTEST BUILD: Enabling Parallel CMake build with all threads") -endif(${NUM_JOBS}) - -execute_process(COMMAND ${CMAKE_COMMAND} --build .. -- ${PARALLEL_BUILD} - RESULT_VARIABLE GTEST_BUILD - WORKING_DIRECTORY ${GTEST_ROOT}/build) - -if(GTEST_BUILD) - message(FATAL_ERROR "Building GoogleTest failed: " ${GTEST_BUILD}) -endif(GTEST_BUILD) - -message(VERBOSE "GoogleTest installed here: " ${GTEST_ROOT}/install) -set(GTEST_INCLUDE_DIR "${GTEST_ROOT}/install/include") -set(GTEST_LIBRARY_DIR "${GTEST_ROOT}/install/lib") -set(GTEST_FOUND TRUE) diff --git a/cpp/libcudf_kafka/cmake/Templates/GoogleTest.CMakeLists.txt.cmake b/cpp/libcudf_kafka/cmake/Templates/GoogleTest.CMakeLists.txt.cmake deleted file mode 100644 index 07692cd3d32..00000000000 --- a/cpp/libcudf_kafka/cmake/Templates/GoogleTest.CMakeLists.txt.cmake +++ /dev/null @@ -1,12 +0,0 @@ -cmake_minimum_required(VERSION 3.12) - -include(ExternalProject) - -ExternalProject_Add(GoogleTest - GIT_REPOSITORY https://github.com/google/googletest.git - GIT_TAG release-1.8.0 - GIT_SHALLOW true - SOURCE_DIR "${GTEST_ROOT}/googletest" - BINARY_DIR "${GTEST_ROOT}/build" - INSTALL_DIR "${GTEST_ROOT}/install" - CMAKE_ARGS ${GTEST_CMAKE_ARGS} -DCMAKE_INSTALL_PREFIX=${GTEST_ROOT}/install) diff --git a/cpp/libcudf_kafka/cmake/thirdparty/CUDF_KAFKA_GetCUDF.cmake b/cpp/libcudf_kafka/cmake/thirdparty/CUDF_KAFKA_GetCUDF.cmake new file mode 100644 index 00000000000..1f7c15d4f75 --- /dev/null +++ b/cpp/libcudf_kafka/cmake/thirdparty/CUDF_KAFKA_GetCUDF.cmake @@ -0,0 +1,46 @@ +#============================================================================= +# Copyright (c) 2021, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#============================================================================= + +function(cudfkafka_save_if_enabled var) + if(CUDF_KAFKA_${var}) + unset(${var} PARENT_SCOPE) + unset(${var} CACHE) + endif() +endfunction() + +function(cudfkafka_restore_if_enabled var) + if(CUDF_KAFKA_${var}) + set(${var} ON CACHE INTERNAL "" FORCE) + endif() +endfunction() + +function(find_and_configure_cudf VERSION) + cudfkafka_save_if_enabled(BUILD_TESTS) + cudfkafka_save_if_enabled(BUILD_BENCHMARKS) + CPMFindPackage(NAME cudf + VERSION ${VERSION} + GIT_REPOSITORY https://github.com/rapidsai/cudf.git + GIT_TAG branch-${VERSION} + GIT_SHALLOW TRUE + SOURCE_SUBDIR cpp + OPTIONS "BUILD_TESTS OFF" + "BUILD_BENCHMARKS OFF") + cudfkafka_restore_if_enabled(BUILD_TESTS) + cudfkafka_restore_if_enabled(BUILD_BENCHMARKS) +endfunction() + +set(CUDF_KAFKA_MIN_VERSION_cudf 0.19) +find_and_configure_cudf(${CUDF_KAFKA_MIN_VERSION_cudf}) diff --git a/cpp/libcudf_kafka/cmake/thirdparty/CUDF_KAFKA_GetRDKafka.cmake b/cpp/libcudf_kafka/cmake/thirdparty/CUDF_KAFKA_GetRDKafka.cmake new file mode 100644 index 00000000000..5c07db66668 --- /dev/null +++ b/cpp/libcudf_kafka/cmake/thirdparty/CUDF_KAFKA_GetRDKafka.cmake @@ -0,0 +1,25 @@ +#============================================================================= +# Copyright (c) 2021, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#============================================================================= + +find_path(RDKAFKA_INCLUDE "librdkafka" HINTS "$ENV{RDKAFKA_ROOT}/include") +find_library(RDKAFKA++_LIBRARY "rdkafka++" HINTS "$ENV{RDKAFKA_ROOT}/lib" "$ENV{RDKAFKA_ROOT}/build") + +if(RDKAFKA_INCLUDE AND RDKAFKA++_LIBRARY) + add_library(rdkafka INTERFACE) + target_link_libraries(rdkafka INTERFACE "${RDKAFKA++_LIBRARY}") + target_include_directories(rdkafka INTERFACE "${RDKAFKA_INCLUDE}") + add_library(RDKAFKA::RDKAFKA ALIAS rdkafka) +endif() \ No newline at end of file diff --git a/cpp/libcudf_kafka/tests/CMakeLists.txt b/cpp/libcudf_kafka/tests/CMakeLists.txt index af0ea1c8239..e813ed5439e 100644 --- a/cpp/libcudf_kafka/tests/CMakeLists.txt +++ b/cpp/libcudf_kafka/tests/CMakeLists.txt @@ -1,5 +1,5 @@ #============================================================================= -# Copyright (c) 2018-2020, NVIDIA CORPORATION. +# Copyright (c) 2018-2021, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,100 +14,25 @@ # limitations under the License. #============================================================================= -cmake_minimum_required(VERSION 3.14 FATAL_ERROR) - -project(KAFKA_TESTS VERSION 0.15.0 LANGUAGES C CXX CUDA) - -# TODO: Since we have no actual CUDA code in cudf_kafka this should be removed in the future -# in favor of using FindCUDAToolkit to get the needed CUDA include headers -if(NOT CMAKE_CUDA_COMPILER) - message(SEND_ERROR "CMake cannot locate a CUDA compiler") -endif(NOT CMAKE_CUDA_COMPILER) - -################################################################################################### -# - build type ------------------------------------------------------------------------------------ - -# Set a default build type if none was specified -set(DEFAULT_BUILD_TYPE "Release") - -if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) - message(STATUS "Setting build type to '${DEFAULT_BUILD_TYPE}' since none specified.") - set(CMAKE_BUILD_TYPE "${DEFAULT_BUILD_TYPE}" CACHE - STRING "Choose the type of build." FORCE) - # Set the possible values of build type for cmake-gui - set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS - "Debug" "Release" "MinSizeRel" "RelWithDebInfo") -endif(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) - -################################################################################################### -# - compiler options ------------------------------------------------------------------------------ - -set(CMAKE_CXX_STANDARD 14) -set(CMAKE_CXX_STANDARD_REQUIRED ON) - -# To apply RUNPATH to transitive dependencies (this is a temporary solution) -set(CMAKE_SHARED_LINKER_FLAGS "-Wl,--disable-new-dtags") -set(CMAKE_EXE_LINKER_FLAGS "-Wl,--disable-new-dtags") - -################################################################################################### -# - conda environment ----------------------------------------------------------------------------- - -if("$ENV{CONDA_BUILD}" STREQUAL "1") - set(CMAKE_SYSTEM_PREFIX_PATH "$ENV{BUILD_PREFIX};$ENV{PREFIX};${CMAKE_SYSTEM_PREFIX_PATH}") - set(CONDA_INCLUDE_DIRS "$ENV{BUILD_PREFIX}/include" "$ENV{PREFIX}/include") - set(CONDA_LINK_DIRS "$ENV{BUILD_PREFIX}/lib" "$ENV{PREFIX}/lib") - message(STATUS "Conda build detected, CMAKE_SYSTEM_PREFIX_PATH set to: ${CMAKE_SYSTEM_PREFIX_PATH}") -endif() - ################################################################################################### # - compiler function ----------------------------------------------------------------------------- -set(CUDF_KAFKA_TEST_LIST CACHE INTERNAL "CUDF_KAFKA_TEST_LIST") - -function(ConfigureTest CMAKE_TEST_NAME CMAKE_TEST_SRC) - add_executable(${CMAKE_TEST_NAME} - ${CMAKE_TEST_SRC}) - set_target_properties(${CMAKE_TEST_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON) - target_link_libraries(${CMAKE_TEST_NAME} gmock gtest gtest_main pthread cuda cudf_kafka) - set_target_properties(${CMAKE_TEST_NAME} PROPERTIES - RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/gtests") +function(ConfigureTest CMAKE_TEST_NAME ) + add_executable(${CMAKE_TEST_NAME} ${ARGN}) + set_target_properties(${CMAKE_TEST_NAME} + PROPERTIES RUNTIME_OUTPUT_DIRECTORY "$") + target_link_libraries(${CMAKE_TEST_NAME} PRIVATE GTest::gmock_main GTest::gtest_main cudf_kafka) + target_include_directories(${CMAKE_TEST_NAME} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/../include) add_test(NAME ${CMAKE_TEST_NAME} COMMAND ${CMAKE_TEST_NAME}) - set(CUDF_KAFKA_TEST_LIST ${CUDF_KAFKA_TEST_LIST} ${CMAKE_TEST_NAME} CACHE INTERNAL "CUDF_KAFKA_TEST_LIST") -endfunction(ConfigureTest) +endfunction() ################################################################################################### -# - include paths --------------------------------------------------------------------------------- - -if(CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES) - include_directories("${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}") -endif(CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES) - -include_directories("${CMAKE_BINARY_DIR}/include" - "${CMAKE_SOURCE_DIR}/include" - "${CMAKE_SOURCE_DIR}../../../tests" - "${CMAKE_SOURCE_DIR}" - "${CMAKE_SOURCE_DIR}/src" - "${GTEST_INCLUDE_DIR}") - -if(CONDA_INCLUDE_DIRS) - include_directories("${CONDA_INCLUDE_DIRS}") -endif(CONDA_INCLUDE_DIRS) +# - Kafka host tests ---------------------------------------------------------------------------------- +ConfigureTest(KAFKA_HOST_TEST + kafka_consumer_tests.cpp) ################################################################################################### -# - library paths --------------------------------------------------------------------------------- - -link_directories("${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}" # CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES is an undocumented/unsupported variable containing the link directories for nvcc - "${CMAKE_BINARY_DIR}/lib" - "${CMAKE_BINARY_DIR}" - "${GTEST_LIBRARY_DIR}" - "${RMM_LIBRARY}") - -if(CONDA_LINK_DIRS) - link_directories("${CONDA_LINK_DIRS}") -endif(CONDA_LINK_DIRS) - +### enable testing ################################################################################ ################################################################################################### -# - create tests ---------------------------------------------------------------------------------- -ConfigureTest(CUDF_KAFKA_HOST_READ kafka_consumer_tests.cpp) enable_testing() diff --git a/cpp/src/bitmask/null_mask.cu b/cpp/src/bitmask/null_mask.cu index 845a5512c27..28d1411c30d 100644 --- a/cpp/src/bitmask/null_mask.cu +++ b/cpp/src/bitmask/null_mask.cu @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -30,7 +31,6 @@ #include #include #include -#include #include #include @@ -466,7 +466,7 @@ cudf::size_type count_unset_bits(bitmask_type const *bitmask, } std::vector segmented_count_set_bits(bitmask_type const *bitmask, - std::vector const &indices, + host_span indices, rmm::cuda_stream_view stream) { CUDF_EXPECTS(indices.size() % 2 == 0, @@ -489,8 +489,8 @@ std::vector segmented_count_set_bits(bitmask_type const *bitmask, } size_type num_ranges = indices.size() / 2; - thrust::host_vector h_first_indices(num_ranges); - thrust::host_vector h_last_indices(num_ranges); + std::vector h_first_indices(num_ranges); + std::vector h_last_indices(num_ranges); thrust::stable_partition_copy(thrust::seq, std::begin(indices), std::end(indices), @@ -499,9 +499,9 @@ std::vector segmented_count_set_bits(bitmask_type const *bitmask, h_last_indices.begin(), [](auto i) { return (i % 2) == 0; }); - rmm::device_vector d_first_indices = h_first_indices; - rmm::device_vector d_last_indices = h_last_indices; - rmm::device_vector d_null_counts(num_ranges, 0); + auto d_first_indices = make_device_uvector_async(h_first_indices, stream); + auto d_last_indices = make_device_uvector_async(h_last_indices, stream); + rmm::device_uvector d_null_counts(num_ranges, stream); auto word_num_set_bits = thrust::make_transform_iterator( thrust::make_counting_iterator(0), @@ -510,12 +510,12 @@ std::vector segmented_count_set_bits(bitmask_type const *bitmask, thrust::make_counting_iterator(0), // We cannot use lambda as cub::DeviceSegmentedReduce::Sum() requires // first_word_indices and last_word_indices to have the same type. - to_word_index(true, d_first_indices.data().get())); + to_word_index(true, d_first_indices.data())); auto last_word_indices = thrust::make_transform_iterator( thrust::make_counting_iterator(0), // We cannot use lambda as cub::DeviceSegmentedReduce::Sum() requires // first_word_indices and last_word_indices to have the same type. - to_word_index(false, d_last_indices.data().get())); + to_word_index(false, d_last_indices.data())); // first allocate temporary memroy @@ -560,7 +560,7 @@ std::vector segmented_count_set_bits(bitmask_type const *bitmask, std::vector ret(num_ranges); CUDA_TRY(cudaMemcpyAsync(ret.data(), - d_null_counts.data().get(), + d_null_counts.data(), num_ranges * sizeof(size_type), cudaMemcpyDeviceToHost, stream.value())); @@ -571,7 +571,7 @@ std::vector segmented_count_set_bits(bitmask_type const *bitmask, } std::vector segmented_count_unset_bits(bitmask_type const *bitmask, - std::vector const &indices, + host_span indices, rmm::cuda_stream_view stream) { if (indices.empty()) { @@ -669,7 +669,7 @@ cudf::size_type count_unset_bits(bitmask_type const *bitmask, size_type start, s // Count non-zero bits in the specified ranges std::vector segmented_count_set_bits(bitmask_type const *bitmask, - std::vector const &indices) + host_span indices) { CUDF_FUNC_RANGE(); return detail::segmented_count_set_bits(bitmask, indices, rmm::cuda_stream_default); @@ -677,7 +677,7 @@ std::vector segmented_count_set_bits(bitmask_type const *bitmask, // Count zero bits in the specified ranges std::vector segmented_count_unset_bits(bitmask_type const *bitmask, - std::vector const &indices) + host_span indices) { CUDF_FUNC_RANGE(); return detail::segmented_count_unset_bits(bitmask, indices, rmm::cuda_stream_default); diff --git a/cpp/src/copying/concatenate.cu b/cpp/src/copying/concatenate.cu index 8cf9db465f3..1b948083982 100644 --- a/cpp/src/copying/concatenate.cu +++ b/cpp/src/copying/concatenate.cu @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -29,7 +30,6 @@ #include #include -#include #include #include @@ -50,19 +50,18 @@ constexpr bool use_fused_kernel_heuristic(bool const has_nulls, size_t const num return has_nulls || num_columns > 4; } -auto create_device_views(std::vector const& views, rmm::cuda_stream_view stream) +auto create_device_views(host_span views, rmm::cuda_stream_view stream) { // Create device views for each input view using CDViewPtr = decltype( column_device_view::create(std::declval(), std::declval())); auto device_view_owners = std::vector(views.size()); - std::transform( - views.cbegin(), views.cend(), device_view_owners.begin(), [stream](auto const& col) { - // TODO creating this device view can invoke null count computation - // even though it isn't used. See this issue: - // https://github.com/rapidsai/cudf/issues/4368 - return column_device_view::create(col, stream); - }); + std::transform(views.begin(), views.end(), device_view_owners.begin(), [stream](auto const& col) { + // TODO creating this device view can invoke null count computation + // even though it isn't used. See this issue: + // https://github.com/rapidsai/cudf/issues/4368 + return column_device_view::create(col, stream); + }); // Assemble contiguous array of device views auto device_views = thrust::host_vector(); @@ -74,7 +73,7 @@ auto create_device_views(std::vector const& views, rmm::cuda_stream // TODO each of these device vector copies invoke stream synchronization // which appears to add unnecessary overhead. See this issue: // https://github.com/rapidsai/rmm/issues/120 - auto d_views = rmm::device_vector{device_views}; + auto d_views = make_device_uvector_async(device_views); // Compute the partition offsets auto offsets = thrust::host_vector(views.size() + 1); @@ -85,7 +84,7 @@ auto create_device_views(std::vector const& views, rmm::cuda_stream std::next(offsets.begin()), [](auto const& col) { return col.size(); }, thrust::plus{}); - auto const d_offsets = rmm::device_vector{offsets}; + auto d_offsets = make_device_uvector_async(offsets); auto const output_size = offsets.back(); return std::make_tuple( @@ -132,8 +131,8 @@ __global__ void concatenate_masks_kernel(column_device_view const* views, } } -void concatenate_masks(rmm::device_vector const& d_views, - rmm::device_vector const& d_offsets, +void concatenate_masks(device_span d_views, + device_span d_offsets, bitmask_type* dest_mask, size_type output_size, rmm::cuda_stream_view stream) @@ -141,14 +140,14 @@ void concatenate_masks(rmm::device_vector const& d_views, constexpr size_type block_size{256}; cudf::detail::grid_1d config(output_size, block_size); concatenate_masks_kernel<<>>( - d_views.data().get(), - d_offsets.data().get(), + d_views.data(), + d_offsets.data(), static_cast(d_views.size()), dest_mask, output_size); } -void concatenate_masks(std::vector const& views, +void concatenate_masks(host_span views, bitmask_type* dest_mask, rmm::cuda_stream_view stream) { @@ -214,7 +213,7 @@ __global__ void fused_concatenate_kernel(column_device_view const* input_views, } template -std::unique_ptr fused_concatenate(std::vector const& views, +std::unique_ptr fused_concatenate(host_span views, bool const has_nulls, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) @@ -245,8 +244,8 @@ std::unique_ptr fused_concatenate(std::vector const& views, auto const kernel = has_nulls ? fused_concatenate_kernel : fused_concatenate_kernel; kernel<<>>( - d_views.data().get(), - d_offsets.data().get(), + d_views.data(), + d_offsets.data(), static_cast(d_views.size()), *d_out_view, d_valid_count.data()); @@ -257,7 +256,7 @@ std::unique_ptr fused_concatenate(std::vector const& views, } template -std::unique_ptr for_each_concatenate(std::vector const& views, +std::unique_ptr for_each_concatenate(host_span views, bool const has_nulls, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) @@ -289,7 +288,7 @@ std::unique_ptr for_each_concatenate(std::vector const& vie } struct concatenate_dispatch { - std::vector const& views; + host_span views; rmm::cuda_stream_view stream; rmm::mr::device_memory_resource* mr; @@ -298,7 +297,7 @@ struct concatenate_dispatch { std::unique_ptr operator()() { bool const has_nulls = - std::any_of(views.cbegin(), views.cend(), [](auto const& col) { return col.has_nulls(); }); + std::any_of(views.begin(), views.end(), [](auto const& col) { return col.has_nulls(); }); // Use a heuristic to guess when the fused kernel will be faster if (use_fused_kernel_heuristic(has_nulls, views.size())) { @@ -392,7 +391,7 @@ void bounds_and_type_check(ColIter begin, ColIter end) } // anonymous namespace // Concatenates the elements from a vector of column_views -std::unique_ptr concatenate(std::vector const& columns_to_concat, +std::unique_ptr concatenate(host_span columns_to_concat, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { @@ -411,15 +410,15 @@ std::unique_ptr concatenate(std::vector const& columns_to_c columns_to_concat.front().type(), concatenate_dispatch{columns_to_concat, stream, mr}); } -std::unique_ptr
concatenate(std::vector const& tables_to_concat, +std::unique_ptr
concatenate(host_span tables_to_concat, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { if (tables_to_concat.empty()) { return std::make_unique
(); } table_view const first_table = tables_to_concat.front(); - CUDF_EXPECTS(std::all_of(tables_to_concat.cbegin(), - tables_to_concat.cend(), + CUDF_EXPECTS(std::all_of(tables_to_concat.begin(), + tables_to_concat.end(), [&first_table](auto const& t) { return t.num_columns() == first_table.num_columns(); }), @@ -428,8 +427,8 @@ std::unique_ptr
concatenate(std::vector const& tables_to_conc std::vector> concat_columns; for (size_type i = 0; i < first_table.num_columns(); ++i) { std::vector cols; - std::transform(tables_to_concat.cbegin(), - tables_to_concat.cend(), + std::transform(tables_to_concat.begin(), + tables_to_concat.end(), std::back_inserter(cols), [i](auto const& t) { return t.column(i); }); @@ -442,7 +441,7 @@ std::unique_ptr
concatenate(std::vector const& tables_to_conc } // namespace detail -rmm::device_buffer concatenate_masks(std::vector const& views, +rmm::device_buffer concatenate_masks(host_span views, rmm::mr::device_memory_resource* mr) { bool const has_nulls = @@ -465,14 +464,14 @@ rmm::device_buffer concatenate_masks(std::vector const& views, } // Concatenates the elements from a vector of column_views -std::unique_ptr concatenate(std::vector const& columns_to_concat, +std::unique_ptr concatenate(host_span columns_to_concat, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); return detail::concatenate(columns_to_concat, rmm::cuda_stream_default, mr); } -std::unique_ptr
concatenate(std::vector const& tables_to_concat, +std::unique_ptr
concatenate(host_span tables_to_concat, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); diff --git a/cpp/src/dictionary/detail/concatenate.cu b/cpp/src/dictionary/detail/concatenate.cu index 05349a5f968..cdf086e3f4a 100644 --- a/cpp/src/dictionary/detail/concatenate.cu +++ b/cpp/src/dictionary/detail/concatenate.cu @@ -62,8 +62,7 @@ struct compute_children_offsets_fn { * * @param columns The input dictionary columns. */ - compute_children_offsets_fn(std::vector const& columns) - : columns_ptrs{columns.size()} + compute_children_offsets_fn(host_span columns) : columns_ptrs{columns.size()} { std::transform( columns.begin(), columns.end(), columns_ptrs.begin(), [](auto& cv) { return &cv; }); @@ -187,7 +186,7 @@ struct dispatch_compute_indices { } // namespace -std::unique_ptr concatenate(std::vector const& columns, +std::unique_ptr concatenate(host_span columns, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { diff --git a/cpp/src/groupby/common/utils.hpp b/cpp/src/groupby/common/utils.hpp index 40bc96c6103..e8d5c60f81a 100644 --- a/cpp/src/groupby/common/utils.hpp +++ b/cpp/src/groupby/common/utils.hpp @@ -18,13 +18,14 @@ #include #include +#include #include namespace cudf { namespace groupby { namespace detail { inline std::vector extract_results( - std::vector const& requests, cudf::detail::result_cache& cache) + host_span requests, cudf::detail::result_cache& cache) { std::vector results(requests.size()); diff --git a/cpp/src/groupby/groupby.cu b/cpp/src/groupby/groupby.cu index cdd8ceb0a6c..34c57996af3 100644 --- a/cpp/src/groupby/groupby.cu +++ b/cpp/src/groupby/groupby.cu @@ -55,7 +55,7 @@ groupby::groupby(table_view const& keys, // Select hash vs. sort groupby implementation std::pair, std::vector> groupby::dispatch_aggregation( - std::vector const& requests, + host_span requests, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { @@ -79,7 +79,7 @@ groupby::~groupby() = default; namespace { /// Make an empty table with appropriate types for requested aggs -auto empty_results(std::vector const& requests) +auto empty_results(host_span requests) { std::vector empty_results; @@ -102,7 +102,7 @@ auto empty_results(std::vector const& requests) } /// Verifies the agg requested on the request's values is valid -void verify_valid_requests(std::vector const& requests) +void verify_valid_requests(host_span requests) { CUDF_EXPECTS( std::all_of( @@ -143,7 +143,7 @@ void verify_valid_requests(std::vector const& requests) // Compute aggregation requests std::pair, std::vector> groupby::aggregate( - std::vector const& requests, rmm::mr::device_memory_resource* mr) + host_span requests, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); CUDF_EXPECTS( @@ -156,12 +156,12 @@ std::pair, std::vector> groupby::aggr if (_keys.num_rows() == 0) { return std::make_pair(empty_like(_keys), empty_results(requests)); } - return dispatch_aggregation(requests, 0, mr); + return dispatch_aggregation(requests, rmm::cuda_stream_default, mr); } // Compute scan requests std::pair, std::vector> groupby::scan( - std::vector const& requests, rmm::mr::device_memory_resource* mr) + host_span requests, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); CUDF_EXPECTS( @@ -190,7 +190,7 @@ groupby::groups groupby::get_groups(table_view values, rmm::mr::device_memory_re if (values.num_columns()) { auto grouped_values = cudf::detail::gather(values, - helper().key_sort_order(), + helper().key_sort_order(rmm::cuda_stream_default), cudf::out_of_bounds_policy::DONT_CHECK, cudf::detail::negative_index_policy::NOT_ALLOWED, rmm::cuda_stream_default, diff --git a/cpp/src/groupby/hash/groupby.cu b/cpp/src/groupby/hash/groupby.cu index c54ecee9ccb..38aacbe59a7 100644 --- a/cpp/src/groupby/hash/groupby.cu +++ b/cpp/src/groupby/hash/groupby.cu @@ -110,7 +110,7 @@ class hash_compound_agg_finalizer final : public cudf::detail::aggregation_final data_type result_type; cudf::detail::result_cache* sparse_results; cudf::detail::result_cache* dense_results; - rmm::device_vector const& gather_map; + device_span gather_map; size_type const map_size; Map const& map; bitmask_type const* __restrict__ row_bitmask; @@ -122,7 +122,7 @@ class hash_compound_agg_finalizer final : public cudf::detail::aggregation_final column_view col, cudf::detail::result_cache* sparse_results, cudf::detail::result_cache* dense_results, - rmm::device_vector const& gather_map, + device_span gather_map, size_type map_size, Map const& map, bitmask_type const* row_bitmask, @@ -272,7 +272,7 @@ class hash_compound_agg_finalizer final : public cudf::detail::aggregation_final // flatten aggs to filter in single pass aggs std::tuple, std::vector> -flatten_single_pass_aggs(std::vector const& requests) +flatten_single_pass_aggs(host_span requests) { std::vector columns; std::vector agg_kinds; @@ -311,10 +311,10 @@ flatten_single_pass_aggs(std::vector const& requests) */ template void sparse_to_dense_results(table_view const& keys, - std::vector const& requests, + host_span requests, cudf::detail::result_cache* sparse_results, cudf::detail::result_cache* dense_results, - rmm::device_vector const& gather_map, + device_span gather_map, size_type map_size, Map const& map, bool keys_have_nulls, @@ -421,7 +421,7 @@ auto create_sparse_results_table(table_view const& flattened_values, */ template void compute_single_pass_aggs(table_view const& keys, - std::vector const& requests, + host_span requests, cudf::detail::result_cache* sparse_results, Map& map, null_policy include_null_keys, @@ -469,10 +469,10 @@ void compute_single_pass_aggs(table_view const& keys, * `map`. */ template -std::pair, size_type> extract_populated_keys( +std::pair, size_type> extract_populated_keys( Map map, size_type num_keys, rmm::cuda_stream_view stream) { - rmm::device_vector populated_keys(num_keys); + rmm::device_uvector populated_keys(num_keys, stream); auto get_key = [] __device__(auto const& element) { size_type key, value; @@ -520,7 +520,7 @@ std::pair, size_type> extract_populated_keys( */ template std::unique_ptr
groupby_null_templated(table_view const& keys, - std::vector const& requests, + host_span requests, cudf::detail::result_cache* cache, null_policy include_null_keys, rmm::cuda_stream_view stream, @@ -539,9 +539,9 @@ std::unique_ptr
groupby_null_templated(table_view const& keys, // Extract the populated indices from the hash map and create a gather map. // Gathering using this map from sparse results will give dense results. - rmm::device_vector gather_map; - size_type map_size; - std::tie(gather_map, map_size) = extract_populated_keys(*map, keys.num_rows(), stream); + auto map_and_size = extract_populated_keys(*map, keys.num_rows(), stream); + rmm::device_uvector gather_map{std::move(map_and_size.first)}; + size_type const map_size = map_and_size.second; // Compact all results from sparse_results and insert into cache sparse_to_dense_results(keys, @@ -576,7 +576,7 @@ std::unique_ptr
groupby_null_templated(table_view const& keys, * @return true A hash-based groupby should be used * @return false A hash-based groupby should not be used */ -bool can_use_hash_groupby(table_view const& keys, std::vector const& requests) +bool can_use_hash_groupby(table_view const& keys, host_span requests) { return std::all_of(requests.begin(), requests.end(), [](aggregation_request const& r) { return std::all_of(r.aggregations.begin(), r.aggregations.end(), [](auto const& a) { @@ -588,7 +588,7 @@ bool can_use_hash_groupby(table_view const& keys, std::vector, std::vector> groupby( table_view const& keys, - std::vector const& requests, + host_span requests, null_policy include_null_keys, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) diff --git a/cpp/src/groupby/sort/aggregate.cpp b/cpp/src/groupby/sort/aggregate.cpp index b171b19413b..4e2303c8b9b 100644 --- a/cpp/src/groupby/sort/aggregate.cpp +++ b/cpp/src/groupby/sort/aggregate.cpp @@ -70,8 +70,9 @@ void aggregrate_result_functor::operator()(aggregation agg, get_grouped_values().nullable() ? detail::group_count_valid( - get_grouped_values(), helper.group_labels(), helper.num_groups(), stream, mr) - : detail::group_count_all(helper.group_offsets(), helper.num_groups(), stream, mr)); + get_grouped_values(), helper.group_labels(stream), helper.num_groups(stream), stream, mr) + : detail::group_count_all( + helper.group_offsets(stream), helper.num_groups(stream), stream, mr)); } template <> @@ -80,7 +81,9 @@ void aggregrate_result_functor::operator()(aggregation c if (cache.has_result(col_idx, agg)) return; cache.add_result( - col_idx, agg, detail::group_count_all(helper.group_offsets(), helper.num_groups(), stream, mr)); + col_idx, + agg, + detail::group_count_all(helper.group_offsets(stream), helper.num_groups(stream), stream, mr)); } template <> @@ -88,10 +91,11 @@ void aggregrate_result_functor::operator()(aggregation const& { if (cache.has_result(col_idx, agg)) return; - cache.add_result(col_idx, - agg, - detail::group_sum( - get_grouped_values(), helper.num_groups(), helper.group_labels(), stream, mr)); + cache.add_result( + col_idx, + agg, + detail::group_sum( + get_grouped_values(), helper.num_groups(stream), helper.group_labels(stream), stream, mr)); }; template <> @@ -102,9 +106,9 @@ void aggregrate_result_functor::operator()(aggregation cons cache.add_result(col_idx, agg, detail::group_argmax(get_grouped_values(), - helper.num_groups(), - helper.group_labels(), - helper.key_sort_order(), + helper.num_groups(stream), + helper.group_labels(stream), + helper.key_sort_order(stream), stream, mr)); }; @@ -117,9 +121,9 @@ void aggregrate_result_functor::operator()(aggregation cons cache.add_result(col_idx, agg, detail::group_argmin(get_grouped_values(), - helper.num_groups(), - helper.group_labels(), - helper.key_sort_order(), + helper.num_groups(stream), + helper.group_labels(stream), + helper.key_sort_order(stream), stream, mr)); }; @@ -132,7 +136,7 @@ void aggregrate_result_functor::operator()(aggregation const& auto result = [&]() { if (cudf::is_fixed_width(values.type())) { return detail::group_min( - get_grouped_values(), helper.num_groups(), helper.group_labels(), stream, mr); + get_grouped_values(), helper.num_groups(stream), helper.group_labels(stream), stream, mr); } else { auto argmin_agg = make_argmin_aggregation(); operator()(*argmin_agg); @@ -169,7 +173,7 @@ void aggregrate_result_functor::operator()(aggregation const& auto result = [&]() { if (cudf::is_fixed_width(values.type())) { return detail::group_max( - get_grouped_values(), helper.num_groups(), helper.group_labels(), stream, mr); + get_grouped_values(), helper.num_groups(stream), helper.group_labels(stream), stream, mr); } else { auto argmax_agg = make_argmax_aggregation(); operator()(*argmax_agg); @@ -238,7 +242,7 @@ void aggregrate_result_functor::operator()(aggregation co auto result = detail::group_var(get_grouped_values(), mean_result, group_sizes, - helper.group_labels(), + helper.group_labels(stream), var_agg._ddof, stream, mr); @@ -271,8 +275,8 @@ void aggregrate_result_functor::operator()(aggregation co auto result = detail::group_quantiles(get_sorted_values(), group_sizes, - helper.group_offsets(), - helper.num_groups(), + helper.group_offsets(stream), + helper.num_groups(stream), quantile_agg._quantiles, quantile_agg._interpolation, stream, @@ -291,8 +295,8 @@ void aggregrate_result_functor::operator()(aggregation cons auto result = detail::group_quantiles(get_sorted_values(), group_sizes, - helper.group_offsets(), - helper.num_groups(), + helper.group_offsets(stream), + helper.num_groups(stream), {0.5}, interpolation::LINEAR, stream, @@ -308,9 +312,9 @@ void aggregrate_result_functor::operator()(aggregation con auto nunique_agg = static_cast(agg); auto result = detail::group_nunique(get_sorted_values(), - helper.group_labels(), - helper.num_groups(), - helper.group_offsets(), + helper.group_labels(stream), + helper.num_groups(stream), + helper.group_offsets(stream), nunique_agg._null_handling, stream, mr); @@ -337,9 +341,9 @@ void aggregrate_result_functor::operator()(aggregation agg, detail::group_nth_element(get_grouped_values(), group_sizes, - helper.group_labels(), - helper.group_offsets(), - helper.num_groups(), + helper.group_labels(stream), + helper.group_offsets(stream), + helper.num_groups(stream), nth_element_agg._n, nth_element_agg._null_handling, stream, @@ -357,7 +361,7 @@ void aggregrate_result_functor::operator()(aggregatio if (cache.has_result(col_idx, agg)) return; auto result = detail::group_collect( - get_grouped_values(), helper.group_offsets(), helper.num_groups(), stream, mr); + get_grouped_values(), helper.group_offsets(stream), helper.num_groups(stream), stream, mr); cache.add_result(col_idx, agg, std::move(result)); }; @@ -373,7 +377,7 @@ void aggregrate_result_functor::operator()(aggregation if (cache.has_result(col_idx, agg)) { return; } auto const collect_result = detail::group_collect( - get_grouped_values(), helper.group_offsets(), helper.num_groups(), stream, mr); + get_grouped_values(), helper.group_offsets(stream), helper.num_groups(stream), stream, mr); auto const nulls_equal = static_cast(agg)._null_equal; cache.add_result(col_idx, @@ -385,7 +389,7 @@ void aggregrate_result_functor::operator()(aggregation // Sort-based groupby std::pair, std::vector> groupby::sort_aggregate( - std::vector const& requests, + host_span requests, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { diff --git a/cpp/src/groupby/sort/functors.hpp b/cpp/src/groupby/sort/functors.hpp index 565320fbe80..afb92f8e141 100644 --- a/cpp/src/groupby/sort/functors.hpp +++ b/cpp/src/groupby/sort/functors.hpp @@ -64,7 +64,7 @@ struct store_result_functor { // It's overridden in scan implementation. return sorted_values->view(); else - return (grouped_values = helper.grouped_values(values))->view(); + return (grouped_values = helper.grouped_values(values, stream))->view(); }; /** @@ -76,7 +76,7 @@ struct store_result_functor { column_view get_sorted_values() { return sorted_values ? sorted_values->view() - : (sorted_values = helper.sorted_values(values))->view(); + : (sorted_values = helper.sorted_values(values, stream))->view(); }; protected: diff --git a/cpp/src/groupby/sort/group_nth_element.cu b/cpp/src/groupby/sort/group_nth_element.cu index 5c8e8b790d4..e6c10aa1056 100644 --- a/cpp/src/groupby/sort/group_nth_element.cu +++ b/cpp/src/groupby/sort/group_nth_element.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -69,7 +69,7 @@ std::unique_ptr group_nth_element(column_view const &values, auto bitmask_iterator = thrust::make_transform_iterator(cudf::detail::make_validity_iterator(*values_view), [] __device__(auto b) { return static_cast(b); }); - rmm::device_vector intra_group_index(values.size()); + rmm::device_uvector intra_group_index(values.size(), stream); // intra group index for valids only. thrust::exclusive_scan_by_key(rmm::exec_policy(stream), group_labels.begin(), @@ -77,9 +77,9 @@ std::unique_ptr group_nth_element(column_view const &values, bitmask_iterator, intra_group_index.begin()); // group_size to recalculate n if n<0 - rmm::device_vector group_count = [&] { + rmm::device_uvector group_count = [&] { if (n < 0) { - rmm::device_vector group_count(num_groups); + rmm::device_uvector group_count(num_groups, stream); thrust::reduce_by_key(rmm::exec_policy(stream), group_labels.begin(), group_labels.end(), @@ -88,7 +88,7 @@ std::unique_ptr group_nth_element(column_view const &values, group_count.begin()); return group_count; } else { - return rmm::device_vector(); + return rmm::device_uvector(0, stream); } }(); // gather the valid index == n diff --git a/cpp/src/groupby/sort/group_quantiles.cu b/cpp/src/groupby/sort/group_quantiles.cu index fcadb2e71fb..c9f9e3cad9e 100644 --- a/cpp/src/groupby/sort/group_quantiles.cu +++ b/cpp/src/groupby/sort/group_quantiles.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -80,7 +80,7 @@ struct quantiles_functor { column_view const& group_sizes, cudf::device_span group_offsets, size_type const num_groups, - rmm::device_vector const& quantile, + device_span quantile, interpolation interpolation, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) @@ -112,7 +112,7 @@ struct quantiles_functor { *group_size_view, *result_view, group_offsets.data(), - quantile.data().get(), + quantile.data(), static_cast(quantile.size()), interpolation}); } else { @@ -125,7 +125,7 @@ struct quantiles_functor { *group_size_view, *result_view, group_offsets.data(), - quantile.data().get(), + quantile.data(), static_cast(quantile.size()), interpolation}); } diff --git a/cpp/src/groupby/sort/scan.cpp b/cpp/src/groupby/sort/scan.cpp index 63de4ea8684..336a6777ffa 100644 --- a/cpp/src/groupby/sort/scan.cpp +++ b/cpp/src/groupby/sort/scan.cpp @@ -59,7 +59,7 @@ struct scan_result_functor final : store_result_functor { if (grouped_values) return grouped_values->view(); else - return (grouped_values = helper.grouped_values(values))->view(); + return (grouped_values = helper.grouped_values(values, stream))->view(); }; }; @@ -71,7 +71,8 @@ void scan_result_functor::operator()(aggregation const& agg) cache.add_result( col_idx, agg, - detail::sum_scan(get_grouped_values(), helper.num_groups(), helper.group_labels(), stream, mr)); + detail::sum_scan( + get_grouped_values(), helper.num_groups(stream), helper.group_labels(stream), stream, mr)); } template <> @@ -82,7 +83,8 @@ void scan_result_functor::operator()(aggregation const& agg) cache.add_result( col_idx, agg, - detail::min_scan(get_grouped_values(), helper.num_groups(), helper.group_labels(), stream, mr)); + detail::min_scan( + get_grouped_values(), helper.num_groups(stream), helper.group_labels(stream), stream, mr)); } template <> @@ -93,7 +95,8 @@ void scan_result_functor::operator()(aggregation const& agg) cache.add_result( col_idx, agg, - detail::max_scan(get_grouped_values(), helper.num_groups(), helper.group_labels(), stream, mr)); + detail::max_scan( + get_grouped_values(), helper.num_groups(stream), helper.group_labels(stream), stream, mr)); } template <> @@ -101,13 +104,13 @@ void scan_result_functor::operator()(aggregation const& { if (cache.has_result(col_idx, agg)) return; - cache.add_result(col_idx, agg, detail::count_scan(helper.group_labels(), stream, mr)); + cache.add_result(col_idx, agg, detail::count_scan(helper.group_labels(stream), stream, mr)); } } // namespace detail // Sort-based groupby std::pair, std::vector> groupby::sort_scan( - std::vector const& requests, + host_span requests, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { diff --git a/cpp/src/groupby/sort/sort_helper.cu b/cpp/src/groupby/sort/sort_helper.cu index 6a9da36e21b..5e944f75712 100644 --- a/cpp/src/groupby/sort/sort_helper.cu +++ b/cpp/src/groupby/sort/sort_helper.cu @@ -141,7 +141,7 @@ column_view sort_groupby_helper::key_sort_order(rmm::cuda_stream_view stream) // presence of a null value within a row. This allows moving all rows that // contain a null value to the end of the sorted order. - auto augmented_keys = table_view({table_view({keys_bitmask_column()}), _keys}); + auto augmented_keys = table_view({table_view({keys_bitmask_column(stream)}), _keys}); _key_sorted_order = cudf::detail::stable_sorted_order( augmented_keys, @@ -164,7 +164,7 @@ sort_groupby_helper::index_vector const& sort_groupby_helper::group_offsets( _group_offsets = std::make_unique(num_keys(stream) + 1, stream); auto device_input_table = table_device_view::create(_keys, stream); - auto sorted_order = key_sort_order().data(); + auto sorted_order = key_sort_order(stream).data(); decltype(_group_offsets->begin()) result_end; if (has_nulls(_keys)) { @@ -207,9 +207,9 @@ sort_groupby_helper::index_vector const& sort_groupby_helper::group_labels( group_labels.end(), index_vector::value_type{0}); thrust::scatter(rmm::exec_policy(stream), - thrust::make_constant_iterator(1, decltype(num_groups())(1)), - thrust::make_constant_iterator(1, num_groups()), - group_offsets().begin() + 1, + thrust::make_constant_iterator(1, decltype(num_groups(stream))(1)), + thrust::make_constant_iterator(1, num_groups(stream)), + group_offsets(stream).begin() + 1, group_labels.begin()); thrust::inclusive_scan( @@ -226,9 +226,9 @@ column_view sort_groupby_helper::unsorted_keys_labels(rmm::cuda_stream_view stre data_type(type_to_id()), _keys.num_rows(), mask_state::ALL_NULL, stream); auto group_labels_view = cudf::column_view( - data_type(type_to_id()), group_labels().size(), group_labels().data()); + data_type(type_to_id()), group_labels(stream).size(), group_labels(stream).data()); - auto scatter_map = key_sort_order(); + auto scatter_map = key_sort_order(stream); std::unique_ptr
t_unsorted_keys_labels = cudf::detail::scatter(table_view({group_labels_view}), @@ -267,7 +267,7 @@ sort_groupby_helper::column_ptr sort_groupby_helper::sorted_values( column_view const& values, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { column_ptr values_sort_order = - cudf::detail::stable_sorted_order(table_view({unsorted_keys_labels(), values}), + cudf::detail::stable_sorted_order(table_view({unsorted_keys_labels(stream), values}), {}, std::vector(2, null_order::AFTER), stream, @@ -289,7 +289,7 @@ sort_groupby_helper::column_ptr sort_groupby_helper::sorted_values( sort_groupby_helper::column_ptr sort_groupby_helper::grouped_values( column_view const& values, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - auto gather_map = key_sort_order(); + auto gather_map = key_sort_order(stream); auto grouped_values_table = cudf::detail::gather(table_view({values}), gather_map, @@ -304,14 +304,14 @@ sort_groupby_helper::column_ptr sort_groupby_helper::grouped_values( std::unique_ptr
sort_groupby_helper::unique_keys(rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - auto idx_data = key_sort_order().data(); + auto idx_data = key_sort_order(stream).data(); auto gather_map_it = thrust::make_transform_iterator( - group_offsets().begin(), [idx_data] __device__(size_type i) { return idx_data[i]; }); + group_offsets(stream).begin(), [idx_data] __device__(size_type i) { return idx_data[i]; }); return cudf::detail::gather(_keys, gather_map_it, - gather_map_it + num_groups(), + gather_map_it + num_groups(stream), out_of_bounds_policy::DONT_CHECK, stream, mr); @@ -321,7 +321,7 @@ std::unique_ptr
sort_groupby_helper::sorted_keys(rmm::cuda_stream_view st rmm::mr::device_memory_resource* mr) { return cudf::detail::gather(_keys, - key_sort_order(), + key_sort_order(stream), cudf::out_of_bounds_policy::DONT_CHECK, cudf::detail::negative_index_policy::NOT_ALLOWED, stream, diff --git a/cpp/src/interop/from_arrow.cpp b/cpp/src/interop/from_arrow.cpp index 729b98d85a8..612e2111b66 100644 --- a/cpp/src/interop/from_arrow.cpp +++ b/cpp/src/interop/from_arrow.cpp @@ -150,8 +150,7 @@ struct dispatch_to_cudf_column { std::unique_ptr get_empty_type_column(size_type size) { - return std::make_unique( - data_type(type_id::EMPTY), size, std::move(rmm::device_buffer(0))); + return std::make_unique(data_type(type_id::EMPTY), size, rmm::device_buffer(0)); } /** diff --git a/cpp/src/io/orc/dict_enc.cu b/cpp/src/io/orc/dict_enc.cu index 99157a23fcb..e69a61bde66 100644 --- a/cpp/src/io/orc/dict_enc.cu +++ b/cpp/src/io/orc/dict_enc.cu @@ -17,6 +17,7 @@ #include "orc_common.h" #include "orc_gpu.h" +#include #include #include @@ -46,14 +47,16 @@ struct dictinit_state_s { }; /** - * @brief Return a 12-bit hash from a byte sequence + * @brief Return a 12-bit hash from a string */ -static inline __device__ uint32_t nvstr_init_hash(char const *ptr, uint32_t len) +static inline __device__ uint32_t hash_string(const string_view val) { - if (len != 0) { - return (ptr[0] + (ptr[len - 1] << 5) + (len << 10)) & ((1 << init_hash_bits) - 1); - } else { + if (val.empty()) { return 0; + } else { + char const *ptr = val.data(); + uint32_t len = val.size_bytes(); + return (ptr[0] + (ptr[len - 1] << 5) + (len << 10)) & ((1 << init_hash_bits) - 1); } } @@ -71,7 +74,8 @@ static __device__ void LoadNonNullIndices(volatile dictinit_state_s *s, { if (t == 0) { s->nnz = 0; } for (uint32_t i = 0; i < s->chunk.num_rows; i += block_size) { - const uint32_t *valid_map = s->chunk.valid_map_base; + const uint32_t *valid_map = s->chunk.leaf_column->null_mask(); + auto column_offset = s->chunk.leaf_column->offset(); uint32_t is_valid, nz_pos; if (t < block_size / 32) { if (!valid_map) { @@ -80,10 +84,10 @@ static __device__ void LoadNonNullIndices(volatile dictinit_state_s *s, uint32_t const row = s->chunk.start_row + i + t * 32; auto const chunk_end = s->chunk.start_row + s->chunk.num_rows; - auto const valid_map_idx = (row + s->chunk.column_offset) / 32; + auto const valid_map_idx = (row + column_offset) / 32; uint32_t valid = (row < chunk_end) ? valid_map[valid_map_idx] : 0; - auto const rows_in_next_word = (row + s->chunk.column_offset) & 0x1f; + auto const rows_in_next_word = (row + column_offset) & 0x1f; if (rows_in_next_word != 0) { auto const rows_in_current_word = 32 - rows_in_next_word; // Read next word if any rows are within the chunk @@ -111,12 +115,18 @@ static __device__ void LoadNonNullIndices(volatile dictinit_state_s *s, * @brief Gather all non-NULL string rows and compute total character data size * * @param[in] chunks DictionaryChunk device array [rowgroup][column] - * @param[in] num_columns Number of columns + * @param[in] num_columns Number of string columns */ // blockDim {block_size,1,1} template __global__ void __launch_bounds__(block_size, 2) - gpuInitDictionaryIndices(DictionaryChunk *chunks, uint32_t num_columns) + gpuInitDictionaryIndices(DictionaryChunk *chunks, + const table_device_view view, + uint32_t *dict_data, + uint32_t *dict_index, + size_t row_index_stride, + size_type *str_col_ids, + uint32_t num_columns) { __shared__ __align__(16) dictinit_state_s state_g; @@ -131,12 +141,21 @@ __global__ void __launch_bounds__(block_size, 2) dictinit_state_s *const s = &state_g; uint32_t col_id = blockIdx.x; uint32_t group_id = blockIdx.y; - const nvstrdesc_s *ck_data; - uint32_t *dict_data; uint32_t nnz, start_row, dict_char_count; int t = threadIdx.x; - if (t == 0) s->chunk = chunks[group_id * num_columns + col_id]; + if (t == 0) { + column_device_view *leaf_column_view = view.begin() + str_col_ids[col_id]; + s->chunk = chunks[group_id * num_columns + col_id]; + s->chunk.leaf_column = leaf_column_view; + s->chunk.dict_data = + dict_data + col_id * leaf_column_view->size() + group_id * row_index_stride; + s->chunk.dict_index = dict_index + col_id * leaf_column_view->size(); + s->chunk.start_row = group_id * row_index_stride; + s->chunk.num_rows = + min(row_index_stride, + max(static_cast(leaf_column_view->size() - s->chunk.start_row), size_t{0})); + } for (uint32_t i = 0; i < sizeof(s->map) / sizeof(uint32_t); i += block_size) { if (i + t < sizeof(s->map) / sizeof(uint32_t)) s->map.u32[i + t] = 0; } @@ -152,15 +171,15 @@ __global__ void __launch_bounds__(block_size, 2) nnz = s->nnz; dict_data = s->chunk.dict_data; start_row = s->chunk.start_row; - ck_data = static_cast(s->chunk.column_data_base) + start_row; for (uint32_t i = 0; i < nnz; i += block_size) { uint32_t ck_row = 0; uint32_t hash = 0; uint32_t len = 0; if (i + t < nnz) { - ck_row = s->dict[i + t]; - len = static_cast(ck_data[ck_row].count); - hash = nvstr_init_hash(ck_data[ck_row].ptr, len); + ck_row = s->dict[i + t]; + string_view string_val = s->chunk.leaf_column->element(ck_row + start_row); + len = static_cast(string_val.size_bytes()); + hash = hash_string(string_val); } len = block_reduce(temp_storage.reduce_storage).Sum(len); if (t == 0) s->chunk.string_char_count += len; @@ -200,10 +219,11 @@ __global__ void __launch_bounds__(block_size, 2) uint32_t ck_row = 0, pos = 0, hash = 0, pos_old, pos_new, sh, colliding_row; bool collision; if (i + t < nnz) { - ck_row = dict_data[i + t] - start_row; - hash = nvstr_init_hash(ck_data[ck_row].ptr, static_cast(ck_data[ck_row].count)); - sh = (hash & 1) ? 16 : 0; - pos_old = s->map.u16[hash]; + ck_row = dict_data[i + t] - start_row; + string_view string_val = s->chunk.leaf_column->element(ck_row + start_row); + hash = hash_string(string_val); + sh = (hash & 1) ? 16 : 0; + pos_old = s->map.u16[hash]; } // The isolation of the atomicAdd, along with pos_old/pos_new is to guarantee deterministic // behavior for the first row in the hash map that will be used for early duplicate detection @@ -233,18 +253,16 @@ __global__ void __launch_bounds__(block_size, 2) for (uint32_t i = 0; i < nnz; i += block_size) { uint32_t ck_row = 0, ck_row_ref = 0, is_dupe = 0; if (i + t < nnz) { - const char *str1, *str2; - uint32_t len1, len2, hash; - ck_row = s->dict[i + t]; - str1 = ck_data[ck_row].ptr; - len1 = static_cast(ck_data[ck_row].count); - hash = nvstr_init_hash(str1, len1); - ck_row_ref = s->dict[(hash > 0) ? s->map.u16[hash - 1] : 0]; + ck_row = s->dict[i + t]; + string_view string_value = s->chunk.leaf_column->element(ck_row + start_row); + auto const string_length = static_cast(string_value.size_bytes()); + auto const hash = hash_string(string_value); + ck_row_ref = s->dict[(hash > 0) ? s->map.u16[hash - 1] : 0]; if (ck_row_ref != ck_row) { - str2 = ck_data[ck_row_ref].ptr; - len2 = static_cast(ck_data[ck_row_ref].count); - is_dupe = nvstr_is_equal(str1, len1, str2, len2); - dict_char_count += (is_dupe) ? 0 : len1; + string_view reference_string = + s->chunk.leaf_column->element(ck_row_ref + start_row); + is_dupe = (string_value == reference_string); + dict_char_count += (is_dupe) ? 0 : string_length; } } uint32_t dupes_in_block; @@ -269,6 +287,12 @@ __global__ void __launch_bounds__(block_size, 2) chunks[group_id * num_columns + col_id].string_char_count = s->chunk.string_char_count; chunks[group_id * num_columns + col_id].num_dict_strings = nnz - s->total_dupes; chunks[group_id * num_columns + col_id].dict_char_count = dict_char_count; + chunks[group_id * num_columns + col_id].leaf_column = s->chunk.leaf_column; + + chunks[group_id * num_columns + col_id].dict_data = s->chunk.dict_data; + chunks[group_id * num_columns + col_id].dict_index = s->chunk.dict_index; + chunks[group_id * num_columns + col_id].start_row = s->chunk.start_row; + chunks[group_id * num_columns + col_id].num_rows = s->chunk.num_rows; } } @@ -357,7 +381,6 @@ __global__ void __launch_bounds__(block_size) uint32_t num_strings; uint32_t *dict_data, *dict_index; uint32_t dict_char_count; - const nvstrdesc_s *str_data; int t = threadIdx.x; if (t == 0) s->stripe = stripes[stripe_id * num_columns + col_id]; @@ -366,21 +389,20 @@ __global__ void __launch_bounds__(block_size) num_strings = s->stripe.num_strings; dict_data = s->stripe.dict_data; if (!dict_data) return; - dict_index = s->stripe.dict_index; - str_data = static_cast(s->stripe.column_data_base); - dict_char_count = 0; + dict_index = s->stripe.dict_index; + string_view current_string = string_view::min(); + dict_char_count = 0; for (uint32_t i = 0; i < num_strings; i += block_size) { uint32_t cur = (i + t < num_strings) ? dict_data[i + t] : 0; uint32_t cur_len = 0; - const char *cur_ptr; - bool is_dupe = false; + bool is_dupe = false; if (i + t < num_strings) { - cur_ptr = str_data[cur].ptr; - cur_len = str_data[cur].count; + current_string = s->stripe.leaf_column->element(cur); + cur_len = current_string.size_bytes(); } if (i + t != 0 && i + t < num_strings) { uint32_t prev = dict_data[i + t - 1]; - is_dupe = nvstr_is_equal(cur_ptr, cur_len, str_data[prev].ptr, str_data[prev].count); + is_dupe = (current_string == (s->stripe.leaf_column->element(prev))); } dict_char_count += (is_dupe) ? 0 : cur_len; uint32_t dupes_in_block; @@ -403,14 +425,14 @@ __global__ void __launch_bounds__(block_size) } /** - * @brief Launches kernel for initializing dictionary chunks - * - * @param[in] chunks DictionaryChunk device array [rowgroup][column] - * @param[in] num_columns Number of columns - * @param[in] num_rowgroups Number of row groups - * @param[in] stream CUDA stream to use, default `rmm::cuda_stream_default` + * @copydoc cudf::io::orc::gpu::InitDictionaryIndices */ -void InitDictionaryIndices(DictionaryChunk *chunks, +void InitDictionaryIndices(const table_device_view &view, + DictionaryChunk *chunks, + uint32_t *dict_data, + uint32_t *dict_index, + size_t row_index_stride, + size_type *str_col_ids, uint32_t num_columns, uint32_t num_rowgroups, rmm::cuda_stream_view stream) @@ -418,20 +440,12 @@ void InitDictionaryIndices(DictionaryChunk *chunks, static constexpr int block_size = 512; dim3 dim_block(block_size, 1); dim3 dim_grid(num_columns, num_rowgroups); - gpuInitDictionaryIndices - <<>>(chunks, num_columns); + gpuInitDictionaryIndices<<>>( + chunks, view, dict_data, dict_index, row_index_stride, str_col_ids, num_columns); } /** - * @brief Launches kernel for building stripe dictionaries - * - * @param[in] stripes StripeDictionary device array [stripe][column] - * @param[in] stripes_host StripeDictionary host array [stripe][column] - * @param[in] chunks DictionaryChunk device array [rowgroup][column] - * @param[in] num_stripes Number of stripes - * @param[in] num_rowgroups Number of row groups - * @param[in] num_columns Number of columns - * @param[in] stream CUDA stream to use, default `rmm::cuda_stream_default` + * @copydoc cudf::io::orc::gpu::BuildStripeDictionaries */ void BuildStripeDictionaries(StripeDictionary *stripes, StripeDictionary *stripes_host, @@ -447,18 +461,16 @@ void BuildStripeDictionaries(StripeDictionary *stripes, stripes, chunks, num_columns); for (uint32_t i = 0; i < num_stripes * num_columns; i++) { if (stripes_host[i].dict_data != nullptr) { - thrust::device_ptr p = thrust::device_pointer_cast(stripes_host[i].dict_data); - const nvstrdesc_s *str_data = - static_cast(stripes_host[i].column_data_base); + thrust::device_ptr dict_data_ptr = + thrust::device_pointer_cast(stripes_host[i].dict_data); + column_device_view *string_column = stripes_host[i].leaf_column; // NOTE: Requires the --expt-extended-lambda nvcc flag thrust::sort(rmm::exec_policy(stream), - p, - p + stripes_host[i].num_strings, - [str_data] __device__(const uint32_t &lhs, const uint32_t &rhs) { - return nvstr_is_lesser(str_data[lhs].ptr, - (uint32_t)str_data[lhs].count, - str_data[rhs].ptr, - (uint32_t)str_data[rhs].count); + dict_data_ptr, + dict_data_ptr + stripes_host[i].num_strings, + [string_column] __device__(const uint32_t &lhs, const uint32_t &rhs) { + return string_column->element(lhs) < + string_column->element(rhs); }); } } diff --git a/cpp/src/io/orc/orc_gpu.h b/cpp/src/io/orc/orc_gpu.h index 7ad92e40cb4..55df0adf95b 100644 --- a/cpp/src/io/orc/orc_gpu.h +++ b/cpp/src/io/orc/orc_gpu.h @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -124,16 +125,15 @@ struct RowGroup { * @brief Struct to describe an encoder data chunk */ struct EncChunk { - const uint32_t *valid_map_base; // base ptr of input valid bit map - size_type column_offset; // index of the first element relative to the base memory - const void *column_data_base; // base ptr of input column data - uint32_t start_row; // start row of this chunk - uint32_t num_rows; // number of rows in this chunk - uint32_t valid_rows; // max number of valid rows - uint8_t encoding_kind; // column encoding kind (orc::ColumnEncodingKind) - uint8_t type_kind; // column data type (orc::TypeKind) - uint8_t dtype_len; // data type length - uint8_t scale; // scale for decimals or timestamps + uint32_t start_row; // start row of this chunk + uint32_t num_rows; // number of rows in this chunk + uint8_t encoding_kind; // column encoding kind (orc::ColumnEncodingKind) + uint8_t type_kind; // column data type (orc::TypeKind) + uint8_t dtype_len; // data type length + uint8_t scale; // scale for decimals or timestamps + + uint32_t *dict_index; // dictionary index from row index + column_device_view *leaf_column; }; /** @@ -163,10 +163,7 @@ struct StripeStream { * @brief Struct to describe a dictionary chunk */ struct DictionaryChunk { - const uint32_t *valid_map_base; // base ptr of input valid bit map - size_type column_offset; // index of the first element relative to the base memory - const void *column_data_base; // base ptr of column data (ptr,len pair) - uint32_t *dict_data; // dictionary data (index of non-null rows) + uint32_t *dict_data; // dictionary data (index of non-null rows) uint32_t *dict_index; // row indices of corresponding string (row from dictionary index) uint32_t start_row; // start row of this chunk uint32_t num_rows; // num rows in this chunk @@ -175,20 +172,23 @@ struct DictionaryChunk { string_char_count; // total size of string data (NOTE: assumes less than 4G bytes per chunk) uint32_t num_dict_strings; // number of strings in dictionary uint32_t dict_char_count; // size of dictionary string data for this chunk + + column_device_view *leaf_column; //!< Pointer to string column }; /** * @brief Struct to describe a dictionary */ struct StripeDictionary { - const void *column_data_base; // base ptr of column data (ptr,len pair) - uint32_t *dict_data; // row indices of corresponding string (row from dictionary index) - uint32_t *dict_index; // dictionary index from row index - uint32_t column_id; // real column id - uint32_t start_chunk; // first chunk in stripe - uint32_t num_chunks; // number of chunks in the stripe - uint32_t num_strings; // number of unique strings in the dictionary - uint32_t dict_char_count; // total size of dictionary string data + uint32_t *dict_data; // row indices of corresponding string (row from dictionary index) + uint32_t *dict_index; // dictionary index from row index + uint32_t column_id; // real column id + uint32_t start_chunk; // first chunk in stripe + uint32_t num_chunks; // number of chunks in the stripe + uint32_t num_strings; // number of unique strings in the dictionary + uint32_t dict_char_count; // total size of dictionary string data + + column_device_view *leaf_column; //!< Pointer to string column }; /** @@ -313,6 +313,17 @@ void EncodeStripeDictionaries(StripeDictionary *stripes, detail::device_2dspan enc_streams, rmm::cuda_stream_view stream = rmm::cuda_stream_default); +/** + * @brief Set leaf column element of EncChunk + * + * @param[in] view table device view representing input table + * @param[in,out] chunks encoder chunk device array [column][rowgroup] + * @param[in] stream CUDA stream to use, default `rmm::cuda_stream_default` + */ +void set_chunk_columns(const table_device_view &view, + detail::device_2dspan chunks, + rmm::cuda_stream_view stream); + /** * @brief Launches kernel for compacting chunked column data prior to compression * @@ -350,15 +361,25 @@ void CompressOrcDataStreams(uint8_t *compressed_data, /** * @brief Launches kernel for initializing dictionary chunks * + * @param[in] view table device view representing input table * @param[in,out] chunks DictionaryChunk device array [rowgroup][column] + * @param[in] dict_data dictionary data (index of non-null rows) + * @param[in] dict_index row indices of corresponding string (row from dictionary index) + * @param[in] row_index_stride Rowgroup size in rows + * @param[in] str_col_ids List of columns that are strings type * @param[in] num_columns Number of columns * @param[in] num_rowgroups Number of row groups * @param[in] stream CUDA stream to use, default `rmm::cuda_stream_default` */ -void InitDictionaryIndices(DictionaryChunk *chunks, +void InitDictionaryIndices(const table_device_view &view, + DictionaryChunk *chunks, + uint32_t *dict_data, + uint32_t *dict_index, + size_t row_index_stride, + size_type *str_col_ids, uint32_t num_columns, uint32_t num_rowgroups, - rmm::cuda_stream_view stream = rmm::cuda_stream_default); + rmm::cuda_stream_view stream); /** * @brief Launches kernel for building stripe dictionaries diff --git a/cpp/src/io/orc/stripe_enc.cu b/cpp/src/io/orc/stripe_enc.cu index aef32efaf6e..10932d36309 100644 --- a/cpp/src/io/orc/stripe_enc.cu +++ b/cpp/src/io/orc/stripe_enc.cu @@ -669,19 +669,20 @@ __global__ void __launch_bounds__(block_size) if (t * 8 < nrows) { uint32_t row = s->chunk.start_row + present_rows + t * 8; uint8_t valid = 0; - if (row < s->chunk.valid_rows) { - if (s->chunk.valid_map_base) { - size_type current_valid_offset = row + s->chunk.column_offset; - size_type next_valid_offset = current_valid_offset + min(32, s->chunk.valid_rows); + if (row < s->chunk.leaf_column->size()) { + if (s->chunk.leaf_column->nullable()) { + size_type current_valid_offset = row + s->chunk.leaf_column->offset(); + size_type next_valid_offset = + current_valid_offset + min(32, s->chunk.leaf_column->size()); bitmask_type mask = cudf::detail::get_mask_offset_word( - s->chunk.valid_map_base, 0, current_valid_offset, next_valid_offset); + s->chunk.leaf_column->null_mask(), 0, current_valid_offset, next_valid_offset); valid = 0xff & mask; } else { valid = 0xff; } - if (row + 7 > s->chunk.valid_rows) { - valid = valid & ((1 << (s->chunk.valid_rows & 7)) - 1); + if (row + 7 > s->chunk.leaf_column->size()) { + valid = valid & ((1 << (s->chunk.leaf_column->size() & 7)) - 1); } } s->valid_buf[(row >> 3) & 0x1ff] = valid; @@ -729,19 +730,18 @@ __global__ void __launch_bounds__(block_size) lengths_to_positions(s->buf.u32, 512, t); __syncthreads(); if (valid) { - int nz_idx = (s->nnz + s->buf.u32[t] - 1) & (maxnumvals - 1); - void const *base = s->chunk.column_data_base; + int nz_idx = (s->nnz + s->buf.u32[t] - 1) & (maxnumvals - 1); switch (s->chunk.type_kind) { case INT: case DATE: - case FLOAT: s->vals.u32[nz_idx] = static_cast(base)[row]; break; + case FLOAT: s->vals.u32[nz_idx] = s->chunk.leaf_column->element(row); break; case DOUBLE: - case LONG: s->vals.u64[nz_idx] = static_cast(base)[row]; break; - case SHORT: s->vals.u32[nz_idx] = static_cast(base)[row]; break; + case LONG: s->vals.u64[nz_idx] = s->chunk.leaf_column->element(row); break; + case SHORT: s->vals.u32[nz_idx] = s->chunk.leaf_column->element(row); break; case BOOLEAN: - case BYTE: s->vals.u8[nz_idx] = static_cast(base)[row]; break; + case BYTE: s->vals.u8[nz_idx] = s->chunk.leaf_column->element(row); break; case TIMESTAMP: { - int64_t ts = static_cast(base)[row]; + int64_t ts = s->chunk.leaf_column->element(row); int32_t ts_scale = kTimeScale[min(s->chunk.scale, 9)]; int64_t seconds = ts / ts_scale; int64_t nanos = (ts - seconds * ts_scale); @@ -772,16 +772,13 @@ __global__ void __launch_bounds__(block_size) } case STRING: if (s->chunk.encoding_kind == DICTIONARY_V2) { - uint32_t dict_idx = static_cast(base)[row]; - if (dict_idx > 0x7fffffffu) - dict_idx = static_cast(base)[dict_idx & 0x7fffffffu]; + uint32_t dict_idx = s->chunk.dict_index[row]; + if (dict_idx > 0x7fffffffu) dict_idx = s->chunk.dict_index[dict_idx & 0x7fffffffu]; s->vals.u32[nz_idx] = dict_idx; } else { - const nvstrdesc_s *str_desc = static_cast(base) + row; - const char *ptr = str_desc->ptr; - uint32_t count = static_cast(str_desc->count); - s->u.strenc.str_data[s->buf.u32[t] - 1] = ptr; - s->lengths.u32[nz_idx] = count; + string_view value = s->chunk.leaf_column->element(row); + s->u.strenc.str_data[s->buf.u32[t] - 1] = value.data(); + s->lengths.u32[nz_idx] = value.size_bytes(); } break; default: break; @@ -899,8 +896,8 @@ __global__ void __launch_bounds__(block_size) streams[col_id][group_id].lengths[t] = s->strm_pos[t]; if (!s->stream.data_ptrs[t]) { streams[col_id][group_id].data_ptrs[t] = - static_cast(const_cast(s->chunk.column_data_base)) + - s->chunk.start_row * s->chunk.dtype_len; + static_cast(const_cast(s->chunk.leaf_column->head())) + + (s->chunk.leaf_column->offset() + s->chunk.start_row) * s->chunk.dtype_len; } } } @@ -939,8 +936,8 @@ __global__ void __launch_bounds__(block_size) s->nrows = s->u.dict_stripe.num_strings; s->cur_row = 0; } - auto const str_desc = static_cast(s->u.dict_stripe.column_data_base); - auto const dict_data = s->u.dict_stripe.dict_data; + column_device_view *string_column = s->u.dict_stripe.leaf_column; + auto const dict_data = s->u.dict_stripe.dict_data; __syncthreads(); if (s->chunk.encoding_kind != DICTIONARY_V2) { return; // This column isn't using dictionary encoding -> bail out @@ -951,8 +948,13 @@ __global__ void __launch_bounds__(block_size) uint32_t string_idx = (t < numvals) ? dict_data[s->cur_row + t] : 0; if (cid == CI_DICTIONARY) { // Encoding string contents - const char *ptr = (t < numvals) ? str_desc[string_idx].ptr : 0; - uint32_t count = (t < numvals) ? static_cast(str_desc[string_idx].count) : 0; + const char *ptr = 0; + uint32_t count = 0; + if (t < numvals) { + auto string_val = string_column->element(string_idx); + ptr = string_val.data(); + count = string_val.size_bytes(); + } s->u.strenc.str_data[t] = ptr; StoreStringData(s->stream.data_ptrs[CI_DICTIONARY] + s->strm_pos[CI_DICTIONARY], &s->u.strenc, @@ -961,7 +963,10 @@ __global__ void __launch_bounds__(block_size) if (!t) { s->strm_pos[CI_DICTIONARY] += s->u.strenc.char_count; } } else { // Encoding string lengths - uint32_t count = (t < numvals) ? static_cast(str_desc[string_idx].count) : 0; + uint32_t count = + (t < numvals) + ? static_cast(string_column->element(string_idx).size_bytes()) + : 0; uint32_t nz_idx = (s->cur_row + t) & 0x3ff; if (t < numvals) s->lengths.u32[nz_idx] = count; __syncthreads(); @@ -982,6 +987,15 @@ __global__ void __launch_bounds__(block_size) if (t == 0) { strm_ptr->lengths[cid] = s->strm_pos[cid]; } } +__global__ void __launch_bounds__(512) + gpu_set_chunk_columns(const table_device_view view, device_2dspan chunks) +{ + // Set leaf_column member of EncChunk + for (size_type i = threadIdx.x; i < chunks.size().second; i += blockDim.x) { + chunks[blockIdx.x][i].leaf_column = view.begin() + blockIdx.x; + } +} + /** * @brief Merge chunked column data into a single contiguous stream * @@ -1189,6 +1203,16 @@ void EncodeStripeDictionaries(StripeDictionary *stripes, <<>>(stripes, chunks, enc_streams); } +void set_chunk_columns(const table_device_view &view, + device_2dspan chunks, + rmm::cuda_stream_view stream) +{ + dim3 dim_block(512, 1); + dim3 dim_grid(chunks.size().first, 1); + + gpu_set_chunk_columns<<>>(view, chunks); +} + void CompactOrcDataStreams(device_2dspan strm_desc, device_2dspan enc_streams, rmm::cuda_stream_view stream) diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu index eb5e90bbeec..10050806552 100644 --- a/cpp/src/io/orc/writer_impl.cu +++ b/cpp/src/io/orc/writer_impl.cu @@ -42,7 +42,6 @@ namespace detail { namespace orc { using namespace cudf::io::orc; using namespace cudf::io; -using cudf::io::orc::gpu::nvstrdesc_s; struct row_group_index_info { int32_t pos = -1; // Position @@ -111,39 +110,6 @@ constexpr T to_clockscale(cudf::type_id timestamp_id) } // namespace -/** - * @brief Helper kernel for converting string data/offsets into nvstrdesc - * REMOVEME: Once we eliminate the legacy readers/writers, the kernels could be - * made to use the native offset+data layout. - */ -__global__ void stringdata_to_nvstrdesc(gpu::nvstrdesc_s *dst, - const size_type *offsets, - const char *strdata, - const uint32_t *nulls, - const size_type column_offset, - size_type column_size) -{ - size_type row = blockIdx.x * blockDim.x + threadIdx.x; - if (row < column_size) { - uint32_t is_valid = (nulls != nullptr) - ? (nulls[(row + column_offset) / 32] >> ((row + column_offset) % 32)) & 1 - : 1; - size_t count; - const char *ptr; - if (is_valid) { - size_type cur = offsets[row]; - size_type next = offsets[row + 1]; - ptr = strdata + cur; - count = (next > cur) ? next - cur : 0; - } else { - ptr = nullptr; - count = 0; - } - dst[row].ptr = ptr; - dst[row].count = count; - } -} - /** * @brief Helper class that adds ORC-specific column info */ @@ -160,31 +126,14 @@ class orc_column_view { rmm::cuda_stream_view stream) : _id(id), _str_id(str_id), - _string_type(col.type().id() == type_id::STRING), - _type_width(_string_type ? 0 : cudf::size_of(col.type())), + _is_string_type(col.type().id() == type_id::STRING), + _type_width(_is_string_type ? 0 : cudf::size_of(col.type())), _data_count(col.size()), _null_count(col.null_count()), - _data(col.head() + col.offset() * _type_width), _nulls(col.null_mask()), - _column_offset(col.offset()), _clockscale(to_clockscale(col.type().id())), _type_kind(to_orc_type(col.type().id())) { - if (_string_type && _data_count > 0) { - strings_column_view view{col}; - _indexes = rmm::device_buffer(_data_count * sizeof(gpu::nvstrdesc_s), stream); - - stringdata_to_nvstrdesc<<<((_data_count - 1) >> 8) + 1, 256, 0, stream.value()>>>( - static_cast(_indexes.data()), - view.offsets().data() + view.offset(), - view.chars().data(), - _nulls, - _column_offset, - _data_count); - _data = _indexes.data(); - - stream.synchronize(); - } // Generating default name if name isn't present in metadata if (metadata && _id < metadata->column_names.size()) { _name = metadata->column_names[_id]; @@ -193,7 +142,7 @@ class orc_column_view { } } - auto is_string() const noexcept { return _string_type; } + auto is_string() const noexcept { return _is_string_type; } void set_dict_stride(size_t stride) noexcept { dict_stride = stride; } auto get_dict_stride() const noexcept { return dict_stride; } @@ -207,7 +156,7 @@ class orc_column_view { } auto host_dict_chunk(size_t rowgroup) const { - assert(_string_type); + assert(_is_string_type); return &dict[rowgroup * dict_stride + _str_id]; } auto device_dict_chunk() const { return d_dict; } @@ -223,7 +172,7 @@ class orc_column_view { } auto host_stripe_dict(size_t stripe) const { - assert(_string_type); + assert(_is_string_type); return &stripe_dict[stripe * dict_stride + _str_id]; } auto device_stripe_dict() const { return d_stripe_dict; } @@ -233,9 +182,7 @@ class orc_column_view { size_t data_count() const noexcept { return _data_count; } size_t null_count() const noexcept { return _null_count; } bool nullable() const noexcept { return (_nulls != nullptr); } - void const *data() const noexcept { return _data; } uint32_t const *nulls() const noexcept { return _nulls; } - size_type column_offset() const noexcept { return _column_offset; } uint8_t clockscale() const noexcept { return _clockscale; } void set_orc_encoding(ColumnEncodingKind e) { _encoding_kind = e; } @@ -245,17 +192,15 @@ class orc_column_view { private: // Identifier within set of columns and string columns, respectively - size_t _id = 0; - size_t _str_id = 0; - bool _string_type = false; - - size_t _type_width = 0; - size_t _data_count = 0; - size_t _null_count = 0; - void const *_data = nullptr; - uint32_t const *_nulls = nullptr; - size_type _column_offset = 0; - uint8_t _clockscale = 0; + size_t _id = 0; + size_t _str_id = 0; + bool _is_string_type = false; + + size_t _type_width = 0; + size_t _data_count = 0; + size_t _null_count = 0; + uint32_t const *_nulls = nullptr; + uint8_t _clockscale = 0; // ORC-related members std::string _name{}; @@ -263,7 +208,6 @@ class orc_column_view { ColumnEncodingKind _encoding_kind; // String dictionary-related members - rmm::device_buffer _indexes; size_t dict_stride = 0; gpu::DictionaryChunk const *dict = nullptr; gpu::StripeDictionary const *stripe_dict = nullptr; @@ -308,8 +252,10 @@ std::vector writer::impl::gather_stripe_info( return infos; } -void writer::impl::init_dictionaries(orc_column_view *columns, +void writer::impl::init_dictionaries(const table_device_view &view, + orc_column_view *columns, std::vector const &str_col_ids, + device_span d_str_col_ids, uint32_t *dict_data, uint32_t *dict_index, hostdevice_vector *dict) @@ -321,26 +267,17 @@ void writer::impl::init_dictionaries(orc_column_view *columns, auto &str_column = columns[str_col_ids[i]]; str_column.set_dict_stride(str_col_ids.size()); str_column.attach_dict_chunk(dict->host_ptr(), dict->device_ptr()); - - for (size_t g = 0; g < num_rowgroups; g++) { - auto *ck = &(*dict)[g * str_col_ids.size() + i]; - ck->valid_map_base = str_column.nulls(); - ck->column_offset = str_column.column_offset(); - ck->column_data_base = str_column.data(); - ck->dict_data = dict_data + i * str_column.data_count() + g * row_index_stride_; - ck->dict_index = dict_index + i * str_column.data_count(); // Indexed by abs row - ck->start_row = g * row_index_stride_; - ck->num_rows = std::min(row_index_stride_, - std::max(str_column.data_count() - ck->start_row, 0)); - ck->num_strings = 0; - ck->string_char_count = 0; - ck->num_dict_strings = 0; - ck->dict_char_count = 0; - } } - dict->host_to_device(stream); - gpu::InitDictionaryIndices(dict->device_ptr(), str_col_ids.size(), num_rowgroups, stream); + gpu::InitDictionaryIndices(view, + dict->device_ptr(), + dict_data, + dict_index, + row_index_stride_, + d_str_col_ids.data(), + d_str_col_ids.size(), + num_rowgroups, + stream); dict->device_to_host(stream, true); } @@ -358,19 +295,19 @@ void writer::impl::build_dictionaries(orc_column_view *columns, str_column.attach_stripe_dict(stripe_dict.host_ptr(), stripe_dict.device_ptr()); for (auto const &stripe : stripe_bounds) { - auto &sd = stripe_dict[stripe.id * str_col_ids.size() + col_idx]; - sd.column_data_base = str_column.host_dict_chunk(0)->column_data_base; - sd.dict_data = str_column.host_dict_chunk(stripe.first)->dict_data; - sd.dict_index = dict_index + col_idx * str_column.data_count(); // Indexed by abs row - sd.column_id = str_col_ids[col_idx]; - sd.start_chunk = stripe.first; - sd.num_chunks = stripe.size; - sd.dict_char_count = 0; + auto &sd = stripe_dict[stripe.id * str_col_ids.size() + col_idx]; + sd.dict_data = str_column.host_dict_chunk(stripe.first)->dict_data; + sd.dict_index = dict_index + col_idx * str_column.data_count(); // Indexed by abs row + sd.column_id = str_col_ids[col_idx]; + sd.start_chunk = stripe.first; + sd.num_chunks = stripe.size; + sd.dict_char_count = 0; sd.num_strings = std::accumulate(stripe.cbegin(), stripe.cend(), 0, [&](auto dt_str_cnt, auto rg_idx) { const auto &dt = dict[rg_idx * str_col_ids.size() + col_idx]; return dt_str_cnt + dt.num_dict_strings; }); + sd.leaf_column = dict[col_idx].leaf_column; } if (enable_dictionary_) { @@ -384,8 +321,8 @@ void writer::impl::build_dictionaries(orc_column_view *columns, string_column_cost{}, [&](auto cost, auto rg_idx) -> string_column_cost { const auto &dt = dict[rg_idx * str_col_ids.size() + col_idx]; - return {cost.dictionary + dt.dict_char_count + dt.num_dict_strings, - cost.direct + dt.string_char_count}; + return {cost.direct + dt.string_char_count, + cost.dictionary + dt.dict_char_count + dt.num_dict_strings}; }); // Disable dictionary if it does not reduce the output size if (col_cost.dictionary >= col_cost.direct) { @@ -593,15 +530,16 @@ struct segmented_valid_cnt_input { std::vector indices; }; -encoded_data writer::impl::encode_columns(host_span columns, +encoded_data writer::impl::encode_columns(const table_device_view &view, + host_span columns, std::vector const &str_col_ids, host_span stripe_bounds, orc_streams const &streams) { auto const num_columns = columns.size(); auto const num_rowgroups = stripes_size(stripe_bounds); - hostdevice_2dvector chunks(num_columns, num_rowgroups); - hostdevice_2dvector chunk_streams(num_columns, num_rowgroups); + hostdevice_2dvector chunks(num_columns, num_rowgroups, stream); + hostdevice_2dvector chunk_streams(num_columns, num_rowgroups, stream); auto const stream_offsets = streams.compute_offsets(columns, num_rowgroups); rmm::device_uvector encoded_data(stream_offsets.data_size(), stream); @@ -614,23 +552,17 @@ encoded_data writer::impl::encode_columns(host_span colum auto const rg_idx = *rg_idx_it; auto &ck = chunks[column.id()][rg_idx]; - ck.start_row = (rg_idx * row_index_stride_); - ck.num_rows = std::min(row_index_stride_, column.data_count() - ck.start_row); - ck.valid_rows = column.data_count(); + ck.start_row = (rg_idx * row_index_stride_); + ck.num_rows = std::min(row_index_stride_, column.data_count() - ck.start_row); ck.encoding_kind = column.orc_encoding(); ck.type_kind = column.orc_kind(); if (ck.type_kind == TypeKind::STRING) { - ck.valid_map_base = column.nulls(); - ck.column_offset = column.column_offset(); - ck.column_data_base = (ck.encoding_kind == DICTIONARY_V2) - ? column.host_stripe_dict(stripe.id)->dict_index - : column.data(); + ck.dict_index = (ck.encoding_kind == DICTIONARY_V2) + ? column.host_stripe_dict(stripe.id)->dict_index + : nullptr; ck.dtype_len = 1; } else { - ck.valid_map_base = column.nulls(); - ck.column_offset = column.column_offset(); - ck.column_data_base = column.data(); - ck.dtype_len = column.type_width(); + ck.dtype_len = column.type_width(); } ck.scale = column.clockscale(); // Only need to check row groups that end within the stripe @@ -730,6 +662,8 @@ encoded_data writer::impl::encode_columns(host_span colum chunks.host_to_device(stream); chunk_streams.host_to_device(stream); + gpu::set_chunk_columns(view, chunks, stream); + if (!str_col_ids.empty()) { auto d_stripe_dict = columns[str_col_ids[0]].device_stripe_dict(); gpu::EncodeStripeDictionaries( @@ -791,8 +725,8 @@ std::vector> writer::impl::gather_statistic_blobs( size_t num_chunks = num_rowgroups * columns.size(); std::vector> stat_blobs(num_stat_blobs); - hostdevice_vector stat_desc(columns.size()); - hostdevice_vector stat_merge(num_stat_blobs); + hostdevice_vector stat_desc(columns.size(), stream); + hostdevice_vector stat_merge(num_stat_blobs, stream); rmm::device_uvector stat_chunks(num_chunks + num_stat_blobs, stream); rmm::device_uvector stat_groups(num_chunks, stream); @@ -811,11 +745,8 @@ std::vector> writer::impl::gather_statistic_blobs( case TypeKind::STRING: desc->stats_dtype = dtype_string; break; default: desc->stats_dtype = dtype_none; break; } - desc->num_rows = column.data_count(); - desc->num_values = column.data_count(); - desc->valid_map_base = column.nulls(); - desc->column_offset = column.column_offset(); - desc->column_data_base = column.data(); + desc->num_rows = column.data_count(); + desc->num_values = column.data_count(); if (desc->stats_dtype == dtype_timestamp64) { // Timestamp statistics are in milliseconds switch (column.clockscale()) { @@ -869,8 +800,8 @@ std::vector> writer::impl::gather_statistic_blobs( stat_merge.device_ptr(), stat_chunks.data() + num_chunks, num_stat_blobs, stream); stat_merge.device_to_host(stream, true); - hostdevice_vector blobs(stat_merge[num_stat_blobs - 1].start_chunk + - stat_merge[num_stat_blobs - 1].num_chunks); + hostdevice_vector blobs( + stat_merge[num_stat_blobs - 1].start_chunk + stat_merge[num_stat_blobs - 1].num_chunks, stream); gpu::orc_encode_statistics(blobs.device_ptr(), stat_merge.device_ptr(), stat_chunks.data() + num_chunks, @@ -1061,6 +992,22 @@ void writer::impl::init_state() out_sink_->host_write(MAGIC, std::strlen(MAGIC)); } +rmm::device_uvector get_string_column_ids(const table_device_view &view, + rmm::cuda_stream_view stream) +{ + rmm::device_uvector string_column_ids(view.num_columns(), stream); + auto iter = thrust::make_counting_iterator(0); + auto end_iter = thrust::copy_if(rmm::exec_policy(stream), + iter, + iter + view.num_columns(), + string_column_ids.begin(), + [view] __device__(size_type index) { + return (view.column(index).type().id() == type_id::STRING); + }); + string_column_ids.resize(end_iter - string_column_ids.begin(), stream); + return string_column_ids; +} + void writer::impl::write(table_view const &table) { CUDF_EXPECTS(not closed, "Data has already been flushed to out and closed"); @@ -1074,6 +1021,9 @@ void writer::impl::write(table_view const &table) "be specified"); } + auto device_columns = table_device_view::create(table, stream); + auto string_column_ids = get_string_column_ids(*device_columns, stream); + // Wrapper around cudf columns to attach ORC-specific type info std::vector orc_columns; orc_columns.reserve(num_columns); @@ -1093,9 +1043,15 @@ void writer::impl::write(table_view const &table) // Build per-column dictionary indices const auto num_rowgroups = div_by_rowgroups(num_rows); const auto num_dict_chunks = num_rowgroups * str_col_ids.size(); - hostdevice_vector dict(num_dict_chunks); + hostdevice_vector dict(num_dict_chunks, stream); if (!str_col_ids.empty()) { - init_dictionaries(orc_columns.data(), str_col_ids, dict_data.data(), dict_index.data(), &dict); + init_dictionaries(*device_columns, + orc_columns.data(), + str_col_ids, + string_column_ids, + dict_data.data(), + dict_index.data(), + &dict); } // Decide stripe boundaries early on, based on uncompressed size @@ -1103,23 +1059,22 @@ void writer::impl::write(table_view const &table) // Build stripe-level dictionaries const auto num_stripe_dict = stripe_bounds.size() * str_col_ids.size(); - hostdevice_vector stripe_dict(num_stripe_dict); + hostdevice_vector stripe_dict(num_stripe_dict, stream); if (!str_col_ids.empty()) { build_dictionaries( orc_columns.data(), str_col_ids, stripe_bounds, dict, dict_index.data(), stripe_dict); } auto streams = create_streams(orc_columns, stripe_bounds); - auto enc_data = encode_columns(orc_columns, str_col_ids, stripe_bounds, streams); + auto enc_data = encode_columns(*device_columns, orc_columns, str_col_ids, stripe_bounds, streams); // Assemble individual disparate column chunks into contiguous data streams const auto num_index_streams = (num_columns + 1); const auto num_data_streams = streams.size() - num_index_streams; - hostdevice_2dvector strm_descs(stripe_bounds.size(), num_data_streams); + hostdevice_2dvector strm_descs(stripe_bounds.size(), num_data_streams, stream); auto stripes = gather_stripes(num_rows, num_index_streams, stripe_bounds, &enc_data.streams, &strm_descs); - auto device_columns = table_device_view::create(table); // Gather column statistics std::vector> column_stats; if (enable_statistics_ && num_columns > 0 && num_rows > 0) { @@ -1160,8 +1115,8 @@ void writer::impl::write(table_view const &table) // Compress the data streams rmm::device_buffer compressed_data(compressed_bfr_size, stream); - hostdevice_vector comp_out(num_compressed_blocks); - hostdevice_vector comp_in(num_compressed_blocks); + hostdevice_vector comp_out(num_compressed_blocks, stream); + hostdevice_vector comp_in(num_compressed_blocks, stream); if (compression_kind_ != NONE) { strm_descs.host_to_device(stream); gpu::CompressOrcDataStreams(static_cast(compressed_data.data()), diff --git a/cpp/src/io/orc/writer_impl.hpp b/cpp/src/io/orc/writer_impl.hpp index f0ec3a70cec..352cb11440f 100644 --- a/cpp/src/io/orc/writer_impl.hpp +++ b/cpp/src/io/orc/writer_impl.hpp @@ -186,14 +186,18 @@ class writer::impl { /** * @brief Builds up column dictionaries indices * + * @param view Table device view representing input table * @param columns List of columns * @param str_col_ids List of columns that are strings type + * @param d_str_col_ids List of columns that are strings type in device memory * @param dict_data Dictionary data memory * @param dict_index Dictionary index memory * @param dict List of dictionary chunks */ - void init_dictionaries(orc_column_view* columns, + void init_dictionaries(const table_device_view& view, + orc_column_view* columns, std::vector const& str_col_ids, + device_span d_str_col_ids, uint32_t* dict_data, uint32_t* dict_index, hostdevice_vector* dict); @@ -238,13 +242,15 @@ class writer::impl { /** * @brief Encodes the input columns into streams. * + * @param view Table device view representing input table * @param columns List of columns * @param str_col_ids List of columns that are strings type * @param stripe_bounds List of stripe boundaries * @param stream CUDA stream used for device memory operations and kernel launches * @return Encoded data and per-chunk stream descriptors */ - encoded_data encode_columns(host_span columns, + encoded_data encode_columns(const table_device_view& view, + host_span columns, std::vector const& str_col_ids, host_span stripe_bounds, orc_streams const& streams); diff --git a/cpp/src/io/parquet/page_dict.cu b/cpp/src/io/parquet/page_dict.cu index 46d471d5cf7..2676f30474d 100644 --- a/cpp/src/io/parquet/page_dict.cu +++ b/cpp/src/io/parquet/page_dict.cu @@ -52,8 +52,10 @@ inline __device__ uint32_t uint64_hash16(uint64_t v) return uint32_hash16((uint32_t)(v + (v >> 32))); } -inline __device__ uint32_t nvstr_hash16(const uint8_t *p, uint32_t len) +inline __device__ uint32_t hash_string(const string_view &val) { + const char *p = val.data(); + uint32_t len = val.size_bytes(); uint32_t hash = len; if (len > 0) { uint32_t align_p = 3 & reinterpret_cast(p); @@ -181,7 +183,7 @@ __global__ void __launch_bounds__(block_size, 1) } else if (dtype == INT96) { dtype_len_in = 8; } else { - dtype_len_in = (dtype == BYTE_ARRAY) ? sizeof(nvstrdesc_s) : dtype_len; + dtype_len_in = dtype_len; } __syncthreads(); while (s->row_cnt < s->ck.num_rows) { @@ -206,7 +208,7 @@ __global__ void __launch_bounds__(block_size, 1) if (dtype == BYTE_ARRAY) { auto str1 = s->col.leaf_column->element(row); len += str1.size_bytes(); - hash = nvstr_hash16(reinterpret_cast(str1.data()), str1.size_bytes()); + hash = hash_string(str1); // Walk the list of rows with the same hash next_addr = &s->hashmap[hash]; while ((next = atomicCAS(next_addr, 0, row + 1)) != 0) { diff --git a/cpp/src/io/parquet/page_enc.cu b/cpp/src/io/parquet/page_enc.cu index 3b29394686f..51ec0013f1a 100644 --- a/cpp/src/io/parquet/page_enc.cu +++ b/cpp/src/io/parquet/page_enc.cu @@ -79,8 +79,10 @@ struct page_enc_state_s { /** * @brief Return a 12-bit hash from a byte sequence */ -inline __device__ uint32_t nvstr_init_hash(const uint8_t *ptr, uint32_t len) +inline __device__ uint32_t hash_string(const string_view &val) { + char const *ptr = val.data(); + uint32_t len = val.size_bytes(); if (len != 0) { return (ptr[0] + (ptr[len - 1] << 5) + (len << 10)) & ((1 << init_hash_bits) - 1); } else { @@ -199,7 +201,7 @@ __global__ void __launch_bounds__(block_size) // dtype_len, which determines how much memory we need to allocate for the fragment. dtype_len_in = 8; } else { - dtype_len_in = (dtype == BYTE_ARRAY) ? sizeof(nvstrdesc_s) : dtype_len; + dtype_len_in = dtype_len; } __syncthreads(); @@ -218,7 +220,7 @@ __global__ void __launch_bounds__(block_size) if (dtype == BYTE_ARRAY) { auto str = s->col.leaf_column->element(val_idx); len += str.size_bytes(); - hash = nvstr_init_hash(reinterpret_cast(str.data()), str.size_bytes()); + hash = hash_string(str); } else if (dtype_len_in == 8) { hash = uint64_init_hash(s->col.leaf_column->element(val_idx)); } else { @@ -1059,7 +1061,7 @@ __global__ void __launch_bounds__(128, 8) gpuEncodePages(EncPage *pages, } else if (dtype == INT96) { dtype_len_in = 8; } else { - dtype_len_in = (dtype == BYTE_ARRAY) ? sizeof(nvstrdesc_s) : dtype_len_out; + dtype_len_in = dtype_len_out; } dict_bits = (dtype == BOOLEAN) ? 1 : (s->page.dict_bits_plus1 - 1); if (t == 0) { diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu index 31baf419f45..1e8a6920ea4 100644 --- a/cpp/src/io/parquet/writer_impl.cu +++ b/cpp/src/io/parquet/writer_impl.cu @@ -969,7 +969,7 @@ void writer::impl::write(table_view const &table) } // Create table_device_view so that corresponding column_device_view data // can be written into col_desc members - auto parent_column_table_device_view = table_device_view::create(single_streams_table); + auto parent_column_table_device_view = table_device_view::create(single_streams_table, stream); rmm::device_uvector leaf_column_views(0, stream); // Initialize column description diff --git a/cpp/src/io/statistics/column_stats.cu b/cpp/src/io/statistics/column_stats.cu index 128bd905259..52f21f0a9ad 100644 --- a/cpp/src/io/statistics/column_stats.cu +++ b/cpp/src/io/statistics/column_stats.cu @@ -187,12 +187,6 @@ gatherFloatColumnStats(stats_state_s *s, statistics_dtype dtype, uint32_t t, Sto } } -// FIXME: Use native libcudf string type -struct nvstrdesc_s { - const char *ptr; - size_t count; -}; - /** * @brief Gather statistics for string columns * diff --git a/cpp/src/io/statistics/column_stats.h b/cpp/src/io/statistics/column_stats.h index d1d414aa7b4..d7895de50ce 100644 --- a/cpp/src/io/statistics/column_stats.h +++ b/cpp/src/io/statistics/column_stats.h @@ -45,10 +45,7 @@ struct stats_column_desc { uint32_t num_rows; //!< number of rows in column uint32_t num_values; //!< Number of data values in column. Different from num_rows in case of //!< nested columns - const uint32_t *valid_map_base; //!< base of valid bit map for this column (null if not present) - size_type column_offset; //! < index of the first element relative to the base memory - const void *column_data_base; //!< base ptr to column data - int32_t ts_scale; //!< timestamp scale (>0: multiply by scale, <0: divide by -scale) + int32_t ts_scale; //!< timestamp scale (>0: multiply by scale, <0: divide by -scale) column_device_view *leaf_column; //!< Pointer to leaf column column_device_view *parent_column; //!< Pointer to parent column. Is nullptr if not list type. diff --git a/cpp/src/join/hash_join.cu b/cpp/src/join/hash_join.cu index b64e91c18bd..d827d03a6c0 100644 --- a/cpp/src/join/hash_join.cu +++ b/cpp/src/join/hash_join.cu @@ -442,7 +442,9 @@ std::pair, std::unique_ptr
> construct_join_output_ stream, rmm::mr::get_current_device_resource()); common_table = cudf::detail::concatenate( - {common_from_build->view(), common_from_probe->view()}, stream, mr); + std::vector({common_from_build->view(), common_from_probe->view()}), + stream, + mr); } joined_indices = concatenate_vector_pairs(complement_indices, joined_indices); } else { diff --git a/cpp/src/lists/copying/concatenate.cu b/cpp/src/lists/copying/concatenate.cu index c6ca56085c8..facf2827f56 100644 --- a/cpp/src/lists/copying/concatenate.cu +++ b/cpp/src/lists/copying/concatenate.cu @@ -48,7 +48,7 @@ namespace { * @param[in] mr Device memory resource used to allocate the * returned column's device memory. */ -std::unique_ptr merge_offsets(std::vector const& columns, +std::unique_ptr merge_offsets(host_span columns, size_type total_list_count, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) @@ -90,7 +90,7 @@ std::unique_ptr merge_offsets(std::vector const& colu * @copydoc cudf::lists::detail::concatenate */ std::unique_ptr concatenate( - std::vector const& columns, + host_span columns, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { @@ -119,7 +119,7 @@ std::unique_ptr concatenate( // if any of the input columns have nulls, construct the output mask bool const has_nulls = - std::any_of(columns.cbegin(), columns.cend(), [](auto const& col) { return col.has_nulls(); }); + std::any_of(columns.begin(), columns.end(), [](auto const& col) { return col.has_nulls(); }); rmm::device_buffer null_mask = create_null_mask( total_list_count, has_nulls ? mask_state::UNINITIALIZED : mask_state::UNALLOCATED); if (has_nulls) { diff --git a/cpp/src/replace/replace.cu b/cpp/src/replace/replace.cu index 783e0b4b1de..cb142c2c1e2 100644 --- a/cpp/src/replace/replace.cu +++ b/cpp/src/replace/replace.cu @@ -450,7 +450,8 @@ std::unique_ptr replace_kernel_forwarder::operator()({values.keys(), replacements.keys()}), stream); return cudf::dictionary::detail::add_keys(input, new_keys->view(), stream, mr); }(); auto matched_view = cudf::dictionary_column_view(matched_input->view()); diff --git a/cpp/src/rolling/grouped_rolling.cu b/cpp/src/rolling/grouped_rolling.cu index b8cb5e45fec..34d6d5fa194 100644 --- a/cpp/src/rolling/grouped_rolling.cu +++ b/cpp/src/rolling/grouped_rolling.cu @@ -838,8 +838,8 @@ std::unique_ptr grouped_time_range_rolling_window(table_view const& grou index_vector group_offsets(0, stream), group_labels(0, stream); if (group_keys.num_columns() > 0) { sort_groupby_helper helper{group_keys, cudf::null_policy::INCLUDE, cudf::sorted::YES}; - group_offsets = index_vector(helper.group_offsets(), stream); - group_labels = index_vector(helper.group_labels(), stream); + group_offsets = index_vector(helper.group_offsets(stream), stream); + group_labels = index_vector(helper.group_labels(stream), stream); } // Assumes that `timestamp_column` is actually of a timestamp type. diff --git a/cpp/src/sort/is_sorted.cu b/cpp/src/sort/is_sorted.cu index 5c31e565530..d1a1169dae4 100644 --- a/cpp/src/sort/is_sorted.cu +++ b/cpp/src/sort/is_sorted.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -34,10 +35,12 @@ auto is_sorted(cudf::table_view const& in, std::vector const& null_precedence, rmm::cuda_stream_view stream) { - auto in_d = table_device_view::create(in); - rmm::device_vector d_column_order(column_order); + // 0-table_view, 1-column_order, 2-null_precedence, 3-validity_columns + auto flattened = structs::detail::flatten_nested_columns(in, column_order, null_precedence); + auto in_d = table_device_view::create(std::get<0>(flattened), stream); + rmm::device_vector d_column_order(std::get<1>(flattened)); rmm::device_vector const d_null_precedence = - (has_nulls) ? rmm::device_vector{null_precedence} + (has_nulls) ? rmm::device_vector{std::get<2>(flattened)} : rmm::device_vector{}; auto ineq_op = row_lexicographic_comparator( *in_d, *in_d, d_column_order.data().get(), d_null_precedence.data().get()); diff --git a/cpp/src/sort/sort_impl.cuh b/cpp/src/sort/sort_impl.cuh index 4fc83d343d5..506334c2a3d 100644 --- a/cpp/src/sort/sort_impl.cuh +++ b/cpp/src/sort/sort_impl.cuh @@ -21,6 +21,8 @@ #include #include #include +#include +#include #include #include @@ -112,7 +114,7 @@ std::unique_ptr sorted_order(table_view input, 0); // fast-path for single column sort - if (input.num_columns() == 1) { + if (input.num_columns() == 1 and not cudf::is_nested(input.column(0).type())) { auto const single_col = input.column(0); auto const col_order = column_order.empty() ? order::ASCENDING : column_order.front(); auto const null_prec = null_precedence.empty() ? null_order::BEFORE : null_precedence.front(); @@ -120,11 +122,13 @@ std::unique_ptr sorted_order(table_view input, : sorted_order(single_col, col_order, null_prec, stream, mr); } - auto device_table = table_device_view::create(input, stream); - rmm::device_vector d_column_order(column_order); + auto flattened = structs::detail::flatten_nested_columns(input, column_order, null_precedence); + auto& input_flattened = std::get<0>(flattened); + auto device_table = table_device_view::create(input_flattened, stream); + rmm::device_vector d_column_order(std::get<1>(flattened)); - if (has_nulls(input)) { - rmm::device_vector d_null_precedence(null_precedence); + if (has_nulls(input_flattened)) { + rmm::device_vector d_null_precedence(std::get<2>(flattened)); auto comparator = row_lexicographic_comparator( *device_table, *device_table, d_column_order.data().get(), d_null_precedence.data().get()); if (stable) { diff --git a/cpp/src/strings/copying/concatenate.cu b/cpp/src/strings/copying/concatenate.cu index 65c6c8f2836..48358cb4a38 100644 --- a/cpp/src/strings/copying/concatenate.cu +++ b/cpp/src/strings/copying/concatenate.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -27,6 +28,7 @@ #include #include +#include "thrust/iterator/transform_iterator.h" #include #include @@ -65,8 +67,7 @@ struct chars_size_transform { } }; -auto create_strings_device_views(std::vector const& views, - rmm::cuda_stream_view stream) +auto create_strings_device_views(host_span views, rmm::cuda_stream_view stream) { CUDF_FUNC_RANGE(); // Assemble contiguous array of device views @@ -77,33 +78,30 @@ auto create_strings_device_views(std::vector const& views, // Compute the partition offsets and size of offset column // Note: Using 64-bit size_t so we can detect overflow of 32-bit size_type - auto input_offsets = thrust::host_vector(views.size() + 1); + auto input_offsets = std::vector(views.size() + 1); auto offset_it = std::next(input_offsets.begin()); thrust::transform( - thrust::host, views.cbegin(), views.cend(), offset_it, [](auto const& col) -> size_t { + thrust::host, views.begin(), views.end(), offset_it, [](auto const& col) -> size_t { return static_cast(col.size()); }); thrust::inclusive_scan(thrust::host, offset_it, input_offsets.end(), offset_it); - auto const d_input_offsets = rmm::device_vector{input_offsets}; - auto const output_size = input_offsets.back(); + auto d_input_offsets = cudf::detail::make_device_uvector_async(input_offsets, stream); + auto const output_size = input_offsets.back(); // Compute the partition offsets and size of chars column // Note: Using 64-bit size_t so we can detect overflow of 32-bit size_type - // Note: Using separate transform and inclusive_scan because - // transform_inclusive_scan fails to compile with: - // error: the default constructor of "cudf::column_device_view" cannot be - // referenced -- it is a deleted function - auto d_partition_offsets = rmm::device_vector(views.size() + 1); - thrust::transform(rmm::exec_policy(stream), - device_views_ptr, - device_views_ptr + views.size(), - std::next(d_partition_offsets.begin()), - chars_size_transform{}); - thrust::inclusive_scan(rmm::exec_policy(stream), - d_partition_offsets.cbegin(), - d_partition_offsets.cend(), - d_partition_offsets.begin()); - auto const output_chars_size = d_partition_offsets.back(); + auto d_partition_offsets = rmm::device_uvector(views.size() + 1, stream); + size_t zero{0}; + d_partition_offsets.set_element_async(0, zero, stream); // zero first element + + thrust::transform_inclusive_scan(rmm::exec_policy(stream), + device_views_ptr, + device_views_ptr + views.size(), + std::next(d_partition_offsets.begin()), + chars_size_transform{}, + thrust::plus{}); + auto const output_chars_size = d_partition_offsets.back_element(stream); + stream.synchronize(); // ensure copy of output_chars_size is complete before returning return std::make_tuple(std::move(device_view_owners), device_views_ptr, @@ -205,7 +203,7 @@ __global__ void fused_concatenate_string_chars_kernel(column_device_view const* } } -std::unique_ptr concatenate(std::vector const& columns, +std::unique_ptr concatenate(host_span columns, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { @@ -257,8 +255,8 @@ std::unique_ptr concatenate(std::vector const& columns, : fused_concatenate_string_offset_kernel; kernel<<>>( d_views, - d_input_offsets.data().get(), - d_partition_offsets.data().get(), + d_input_offsets.data(), + d_partition_offsets.data(), static_cast(columns.size()), strings_count, d_new_offsets, @@ -277,7 +275,7 @@ std::unique_ptr concatenate(std::vector const& columns, auto const kernel = fused_concatenate_string_chars_kernel; kernel<<>>( d_views, - d_partition_offsets.data().get(), + d_partition_offsets.data(), static_cast(columns.size()), total_bytes, d_new_chars); diff --git a/cpp/src/structs/copying/concatenate.cu b/cpp/src/structs/copying/concatenate.cu index b2f861c7c8d..6f18c4bcbd4 100644 --- a/cpp/src/structs/copying/concatenate.cu +++ b/cpp/src/structs/copying/concatenate.cu @@ -36,7 +36,7 @@ namespace detail { /** * @copydoc cudf::structs::detail::concatenate */ -std::unique_ptr concatenate(std::vector const& columns, +std::unique_ptr concatenate(host_span columns, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { @@ -49,7 +49,7 @@ std::unique_ptr concatenate(std::vector const& columns, std::transform(ordered_children.begin(), ordered_children.end(), std::back_inserter(children), - [mr, stream](std::vector const& cols) { + [mr, stream](host_span cols) { return cudf::detail::concatenate(cols, stream, mr); }); @@ -57,7 +57,7 @@ std::unique_ptr concatenate(std::vector const& columns, // if any of the input columns have nulls, construct the output mask bool const has_nulls = - std::any_of(columns.cbegin(), columns.cend(), [](auto const& col) { return col.has_nulls(); }); + std::any_of(columns.begin(), columns.end(), [](auto const& col) { return col.has_nulls(); }); rmm::device_buffer null_mask = create_null_mask(total_length, has_nulls ? mask_state::UNINITIALIZED : mask_state::UNALLOCATED); if (has_nulls) { diff --git a/cpp/src/structs/utilities.cu b/cpp/src/structs/utilities.cu index 09e6c5d949d..174e36a1628 100644 --- a/cpp/src/structs/utilities.cu +++ b/cpp/src/structs/utilities.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,7 +17,10 @@ #include #include +#include +#include #include +#include namespace cudf { namespace structs { @@ -27,7 +30,7 @@ namespace detail { * @copydoc cudf::structs::detail::extract_ordered_struct_children */ std::vector> extract_ordered_struct_children( - std::vector const& struct_cols) + host_span struct_cols) { auto const num_children = struct_cols[0].num_children(); auto const num_cols = static_cast(struct_cols.size()); @@ -56,6 +59,103 @@ std::vector> extract_ordered_struct_children( return result; } +/** + * @brief Flattens struct columns to constituent non-struct columns in the input table. + * + */ +struct flattened_table { + // reference variables + table_view const& input; + std::vector const& column_order; + std::vector const& null_precedence; + // output + std::vector> validity_as_column; + std::vector flat_columns; + std::vector flat_column_order; + std::vector flat_null_precedence; + + flattened_table(table_view const& input, + std::vector const& column_order, + std::vector const& null_precedence) + : input(input), column_order(column_order), null_precedence(null_precedence) + { + } + + // Convert null_mask to BOOL8 columns and flatten the struct children in order. + void flatten_struct_column(structs_column_view const& col, + order col_order, + null_order col_null_order) + { + if (col.nullable()) { + validity_as_column.push_back(cudf::is_valid(col)); + validity_as_column.back()->set_null_mask(copy_bitmask(col)); + flat_columns.push_back(validity_as_column.back()->view()); + if (not column_order.empty()) flat_column_order.push_back(col_order); // doesn't matter. + if (not null_precedence.empty()) flat_null_precedence.push_back(col_null_order); + } + for (decltype(col.num_children()) i = 0; i < col.num_children(); ++i) { + auto const& child = col.get_sliced_child(i); + if (child.type().id() == type_id::STRUCT) { + flatten_struct_column(structs_column_view{child}, col_order, null_order::BEFORE); + // default spark behaviour is null_order::BEFORE + } else { + flat_columns.push_back(child); + if (not column_order.empty()) flat_column_order.push_back(col_order); + if (not null_precedence.empty()) flat_null_precedence.push_back(null_order::BEFORE); + // default spark behaviour is null_order::BEFORE + } + } + } + // Note: possibly expand for flattening list columns too. + + /** + * @copydoc flattened_table + * + * @return tuple with flattened table, flattened column order, flattened null precedence, + * vector of boolean columns (struct validity). + */ + auto operator()() + { + for (auto i = 0; i < input.num_columns(); ++i) { + auto const& col = input.column(i); + if (col.type().id() == type_id::STRUCT) { + flatten_struct_column(structs_column_view{col}, + (column_order.empty() ? order() : column_order[i]), + (null_precedence.empty() ? null_order() : null_precedence[i])); + } else { + flat_columns.push_back(col); + if (not column_order.empty()) flat_column_order.push_back(column_order[i]); + if (not null_precedence.empty()) flat_null_precedence.push_back(null_precedence[i]); + } + } + + return std::make_tuple(table_view{flat_columns}, + std::move(flat_column_order), + std::move(flat_null_precedence), + std::move(validity_as_column)); + } +}; + +/** + * @copydoc cudf::detail::flatten_nested_columns + */ +std::tuple, + std::vector, + std::vector>> +flatten_nested_columns(table_view const& input, + std::vector const& column_order, + std::vector const& null_precedence) +{ + std::vector> validity_as_column; + auto const has_struct = std::any_of( + input.begin(), input.end(), [](auto const& col) { return col.type().id() == type_id::STRUCT; }); + if (not has_struct) + return std::make_tuple(input, column_order, null_precedence, std::move(validity_as_column)); + + return flattened_table{input, column_order, null_precedence}(); +} + } // namespace detail } // namespace structs } // namespace cudf diff --git a/cpp/src/structs/utilities.hpp b/cpp/src/structs/utilities.hpp index 1e0511cfd83..c0111d0bbde 100644 --- a/cpp/src/structs/utilities.hpp +++ b/cpp/src/structs/utilities.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,6 +16,8 @@ #pragma once #include +#include +#include namespace cudf { namespace structs { @@ -45,7 +47,26 @@ namespace detail { * @return New column with concatenated results. */ std::vector> extract_ordered_struct_children( - std::vector const& struct_cols); + host_span struct_cols); + +/** + * @brief Flatten table with struct columns to table with constituent columns of struct columns. + * + * If a table does not have struct columns, same input arguments are returned. + * + * @param input input table to be flattened + * @param column_order column order for input table + * @param null_precedence null order for input table + * @return tuple with flattened table, flattened column order, flattened null precedence, + * vector of boolean columns (struct validity). + */ +std::tuple, + std::vector, + std::vector>> +flatten_nested_columns(table_view const& input, + std::vector const& column_order, + std::vector const& null_precedence); } // namespace detail } // namespace structs diff --git a/cpp/src/table/table_device_view.cu b/cpp/src/table/table_device_view.cu index bdce1c325c5..62daeed6d79 100644 --- a/cpp/src/table/table_device_view.cu +++ b/cpp/src/table/table_device_view.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,6 +21,8 @@ #include +#include + namespace cudf { namespace detail { template @@ -52,5 +54,45 @@ template class table_device_view_base; // Explicit instantiation for a device table of mutable views template class table_device_view_base; +namespace { +struct is_relationally_comparable_impl { + template + constexpr bool operator()() + { + return cudf::is_relationally_comparable(); + } +}; +} // namespace + +template +bool is_relationally_comparable(TableView const& lhs, TableView const& rhs) +{ + return thrust::all_of(thrust::counting_iterator(0), + thrust::counting_iterator(lhs.num_columns()), + [lhs, rhs] __device__(auto const i) { + // Simplified this for compile time. (Ideally use double_type_dispatcher) + // TODO: possible to implement without double type dispatcher. + return lhs.column(i).type() == rhs.column(i).type() and + type_dispatcher(lhs.column(i).type(), + is_relationally_comparable_impl{}); + }); +} + +// Explicit extern template instantiation for a table of immutable views +extern template bool is_relationally_comparable(table_view const& lhs, + table_view const& rhs); + +// Explicit extern template instantiation for a table of mutable views +extern template bool is_relationally_comparable(mutable_table_view const& lhs, + mutable_table_view const& rhs); + +// Explicit extern template instantiation for a device table of immutable views +template bool is_relationally_comparable(table_device_view const& lhs, + table_device_view const& rhs); + +// Explicit extern template instantiation for a device table of mutable views +template bool is_relationally_comparable( + mutable_table_device_view const& lhs, mutable_table_device_view const& rhs); + } // namespace detail } // namespace cudf diff --git a/cpp/src/text/generate_ngrams.cu b/cpp/src/text/generate_ngrams.cu index 3c583622ed8..4a41dacbd30 100644 --- a/cpp/src/text/generate_ngrams.cu +++ b/cpp/src/text/generate_ngrams.cu @@ -50,7 +50,7 @@ struct ngram_generator_fn { cudf::column_device_view const d_strings; cudf::size_type ngrams; cudf::string_view const d_separator; - int32_t const* d_offsets{}; + int32_t* d_offsets{}; char* d_chars{}; /** @@ -62,7 +62,7 @@ struct ngram_generator_fn { * @param idx Index of the kernel thread. * @return Number of bytes required for the string for this thread. */ - __device__ cudf::size_type operator()(cudf::size_type idx) + __device__ void operator()(cudf::size_type idx) { char* out_ptr = d_chars ? d_chars + d_offsets[idx] : nullptr; cudf::size_type bytes = 0; @@ -74,7 +74,7 @@ struct ngram_generator_fn { bytes += d_separator.size_bytes(); if (out_ptr) out_ptr = cudf::strings::detail::copy_string(out_ptr, d_separator); } - return bytes; + if (!d_chars) d_offsets[idx] = bytes; } }; @@ -109,11 +109,11 @@ std::unique_ptr generate_ngrams( if (d_strings.is_null(idx)) return false; return !d_strings.element(idx).empty(); }, - stream, - mr) + stream) ->release(); strings_count = table_offsets.front()->size() - 1; - return std::move(table_offsets.front()); + auto result = std::move(table_offsets.front()); + return result; }(); // this allows freeing the temporary table_offsets CUDF_EXPECTS(strings_count >= ngrams, "Insufficient number of strings to generate ngrams"); @@ -131,30 +131,13 @@ std::unique_ptr generate_ngrams( // compute the number of strings of ngrams auto const ngrams_count = strings_count - ngrams + 1; - // build output offsets by computing the output bytes for each generated ngram - auto offsets_transformer_itr = cudf::detail::make_counting_transform_iterator( - 0, ngram_generator_fn{d_strings, ngrams, d_separator}); - auto offsets_column = cudf::strings::detail::make_offsets_child_column( - offsets_transformer_itr, offsets_transformer_itr + ngrams_count, stream, mr); - auto d_offsets = offsets_column->view().data(); - - // build the chars column - // generate the ngrams from the input strings and copy them into the chars data buffer - cudf::size_type const total_bytes = thrust::device_pointer_cast(d_offsets)[ngrams_count]; - auto chars_column = - cudf::strings::detail::create_chars_child_column(ngrams_count, 0, total_bytes, stream, mr); - char* const d_chars = chars_column->mutable_view().data(); - - thrust::for_each_n(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - ngrams_count, - ngram_generator_fn{d_strings, ngrams, d_separator, d_offsets, d_chars}); - chars_column->set_null_count(0); + auto children = cudf::strings::detail::make_strings_children( + ngram_generator_fn{d_strings, ngrams, d_separator}, ngrams_count, 0, stream, mr); // make the output strings column from the offsets and chars column return cudf::make_strings_column(ngrams_count, - std::move(offsets_column), - std::move(chars_column), + std::move(children.first), + std::move(children.second), 0, rmm::device_buffer{0, stream, mr}, stream, diff --git a/cpp/tests/copying/concatenate_tests.cu b/cpp/tests/copying/concatenate_tests.cu index e63cbac1e72..cea53326895 100644 --- a/cpp/tests/copying/concatenate_tests.cu +++ b/cpp/tests/copying/concatenate_tests.cu @@ -99,7 +99,7 @@ TYPED_TEST(TypedColumnTest, ConcatenateNoColumns) TYPED_TEST(TypedColumnTest, ConcatenateColumnView) { - cudf::column original{this->type(), this->num_elements(), this->data, this->mask}; + column original{this->type(), this->num_elements(), this->data, this->mask}; std::vector indices{0, this->num_elements() / 3, this->num_elements() / 3, @@ -223,7 +223,7 @@ TEST_F(TableTest, ConcatenateTables) cols_table2.push_back(col3_table2.release()); Table t2(std::move(cols_table2)); - auto concat_table = cudf::concatenate({t1.view(), t2.view()}); + auto concat_table = cudf::concatenate(std::vector({t1, t2})); CUDF_TEST_EXPECT_TABLES_EQUAL(*concat_table, gold_table); } @@ -341,7 +341,8 @@ TEST_F(TableTest, SizeOverflowTest) auto many_chars = cudf::make_fixed_width_column(cudf::data_type{cudf::type_id::INT8}, size); cudf::table_view tbl({*many_chars}); - EXPECT_THROW(cudf::concatenate({tbl, tbl, tbl, tbl, tbl, tbl}), cudf::logic_error); + EXPECT_THROW(cudf::concatenate(std::vector({tbl, tbl, tbl, tbl, tbl, tbl})), + cudf::logic_error); } // string column, overflow on chars @@ -356,7 +357,8 @@ TEST_F(TableTest, SizeOverflowTest) 1, offsets.release(), std::move(many_chars), 0, rmm::device_buffer{0}); cudf::table_view tbl({*col}); - EXPECT_THROW(cudf::concatenate({tbl, tbl, tbl, tbl, tbl, tbl}), cudf::logic_error); + EXPECT_THROW(cudf::concatenate(std::vector({tbl, tbl, tbl, tbl, tbl, tbl})), + cudf::logic_error); } // string column, overflow on offsets (rows) @@ -372,7 +374,8 @@ TEST_F(TableTest, SizeOverflowTest) size, std::move(many_offsets), chars.release(), 0, rmm::device_buffer{0}); cudf::table_view tbl({*col}); - EXPECT_THROW(cudf::concatenate({tbl, tbl, tbl, tbl, tbl, tbl}), cudf::logic_error); + EXPECT_THROW(cudf::concatenate(std::vector({tbl, tbl, tbl, tbl, tbl, tbl})), + cudf::logic_error); } // list, structs too long @@ -395,8 +398,8 @@ TEST_F(TableTest, SizeOverflowTest) 1, offsets.release(), std::move(struct_col), 0, rmm::device_buffer{0}); cudf::table_view tbl({*col}); - EXPECT_THROW(cudf::concatenate({tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl}), - cudf::logic_error); + auto tables = std::vector({tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl}); + EXPECT_THROW(cudf::concatenate(tables), cudf::logic_error); } // struct, list child too long @@ -419,8 +422,8 @@ TEST_F(TableTest, SizeOverflowTest) auto col = cudf::make_structs_column(size, std::move(children), 0, rmm::device_buffer{0}); cudf::table_view tbl({*col}); - EXPECT_THROW(cudf::concatenate({tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl}), - cudf::logic_error); + auto tables = std::vector({tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl, tbl}); + EXPECT_THROW(cudf::concatenate(tables), cudf::logic_error); } } @@ -463,12 +466,14 @@ TEST_F(StructsColumnTest, ConcatenateStructs) // build expected output std::vector> expected_children; - expected_children.push_back( - cudf::concatenate({name_cols[0], name_cols[1], name_cols[2], name_cols[3]})); - expected_children.push_back( - cudf::concatenate({age_cols[0], age_cols[1], age_cols[2], age_cols[3]})); - expected_children.push_back( - cudf::concatenate({is_human_cols[0], is_human_cols[1], is_human_cols[2], is_human_cols[3]})); + auto name_col_vec = + std::vector({name_cols[0], name_cols[1], name_cols[2], name_cols[3]}); + auto age_col_vec = std::vector({age_cols[0], age_cols[1], age_cols[2], age_cols[3]}); + auto is_human_col_vec = std::vector( + {is_human_cols[0], is_human_cols[1], is_human_cols[2], is_human_cols[3]}); + expected_children.push_back(cudf::concatenate(name_col_vec)); + expected_children.push_back(cudf::concatenate(age_col_vec)); + expected_children.push_back(cudf::concatenate(is_human_col_vec)); std::vector struct_validity({1, 0, 1, 1, 1, 0}); auto expected = make_structs_column( 6, @@ -484,7 +489,7 @@ TEST_F(StructsColumnTest, ConcatenateStructs) src.push_back(structs_column_wrapper({name_cols[3], age_cols[3], is_human_cols[3]}, {1, 0})); // concatenate - auto result = cudf::concatenate({src[0], src[1], src[2], src[3]}); + auto result = cudf::concatenate(std::vector({src[0], src[1], src[2], src[3]})); cudf::test::expect_columns_equivalent(*result, *expected); } @@ -536,9 +541,13 @@ TEST_F(StructsColumnTest, ConcatenateSplitStructs) // build expected output std::vector> expected_children; - expected_children.push_back(cudf::concatenate({split_names_cols[0], split_names_cols[1]})); - expected_children.push_back(cudf::concatenate({split_ages_cols[0], split_ages_cols[1]})); - expected_children.push_back(cudf::concatenate({split_is_human_cols[0], split_is_human_cols[1]})); + auto expected_names = std::vector({split_names_cols[0], split_names_cols[1]}); + auto expected_ages = std::vector({split_ages_cols[0], split_ages_cols[1]}); + auto expected_is_human = + std::vector({split_is_human_cols[0], split_is_human_cols[1]}); + expected_children.push_back(cudf::concatenate(expected_names)); + expected_children.push_back(cudf::concatenate(expected_ages)); + expected_children.push_back(cudf::concatenate(expected_is_human)); auto expected = make_structs_column(7, std::move(expected_children), 0, rmm::device_buffer{}); // concatenate as structs @@ -552,7 +561,8 @@ TEST_F(StructsColumnTest, ConcatenateSplitStructs) } // concatenate - auto result = cudf::concatenate({src[0], src[1]}); + + auto result = cudf::concatenate(std::vector({src[0], src[1]})); cudf::test::expect_columns_equivalent(*result, *expected); } @@ -607,8 +617,11 @@ TEST_F(StructsColumnTest, ConcatenateStructsNested) // build expected output std::vector> expected_children; - expected_children.push_back(cudf::concatenate({inner_structs[0], inner_structs[1]})); - expected_children.push_back(cudf::concatenate({inner_lists[0], inner_lists[1]})); + + expected_children.push_back( + cudf::concatenate(std::vector({inner_structs[0], inner_structs[1]}))); + expected_children.push_back( + cudf::concatenate(std::vector({inner_lists[0], inner_lists[1]}))); auto expected = make_structs_column(11, std::move(expected_children), 0, rmm::device_buffer{}); // concatenate as structs @@ -621,7 +634,7 @@ TEST_F(StructsColumnTest, ConcatenateStructsNested) } // concatenate - auto result = cudf::concatenate({src[0], src[1]}); + auto result = cudf::concatenate(std::vector({src[0], src[1]})); cudf::test::expect_columns_equivalent(*result, *expected); } @@ -635,7 +648,7 @@ TEST_F(ListsColumnTest, ConcatenateLists) cudf::test::lists_column_wrapper b{4, 5, 6, 7, 8, 9, 10}; cudf::test::lists_column_wrapper expected{{0, 1, 2, 3}, {4, 5, 6, 7, 8, 9, 10}}; - auto result = cudf::concatenate({a, b}); + auto result = cudf::concatenate(std::vector({a, b})); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expected); } @@ -646,7 +659,7 @@ TEST_F(ListsColumnTest, ConcatenateLists) cudf::test::lists_column_wrapper expected{ {0, 1, 1}, {2, 3}, {4, 5}, {6}, {8, 9, 9, 9}, {10, 11}}; - auto result = cudf::concatenate({a, b}); + auto result = cudf::concatenate(std::vector({a, b})); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expected); } @@ -657,7 +670,7 @@ TEST_F(ListsColumnTest, ConcatenateLists) cudf::test::lists_column_wrapper expected{ {0, 1}, {2, 3, 4, 5}, {6, 7, 8}, {9}, {10, 11}, {12, 13, 14, 15}}; - auto result = cudf::concatenate({a, b}); + auto result = cudf::concatenate(std::vector({a, b})); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expected); } @@ -674,7 +687,7 @@ TEST_F(ListsColumnTest, ConcatenateEmptyLists) cudf::test::lists_column_wrapper b{4, 5, 6, 7}; cudf::test::lists_column_wrapper expected{4, 5, 6, 7}; - auto result = cudf::concatenate({a, b}); + auto result = cudf::concatenate(std::vector({a, b})); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expected); } @@ -684,7 +697,7 @@ TEST_F(ListsColumnTest, ConcatenateEmptyLists) cudf::test::lists_column_wrapper d{4, 5, 6, 7}; cudf::test::lists_column_wrapper expected{4, 5, 6, 7}; - auto result = cudf::concatenate({a, b, c, d}); + auto result = cudf::concatenate(std::vector({a, b, c, d})); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expected); } @@ -694,7 +707,7 @@ TEST_F(ListsColumnTest, ConcatenateEmptyLists) cudf::test::lists_column_wrapper b{4, 5, 6, 7}; cudf::test::lists_column_wrapper expected{LCW{}, {4, 5, 6, 7}}; - auto result = cudf::concatenate({a, b}); + auto result = cudf::concatenate(std::vector({a, b})); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expected); } @@ -704,7 +717,7 @@ TEST_F(ListsColumnTest, ConcatenateEmptyLists) cudf::test::lists_column_wrapper d{4, 5, 6, 7}; cudf::test::lists_column_wrapper expected{LCW{}, LCW{}, LCW{}, {4, 5, 6, 7}}; - auto result = cudf::concatenate({a, b, c, d}); + auto result = cudf::concatenate(std::vector({a, b, c, d})); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expected); } @@ -715,7 +728,7 @@ TEST_F(ListsColumnTest, ConcatenateEmptyLists) cudf::test::lists_column_wrapper d{4, 5, 6, 7}; cudf::test::lists_column_wrapper expected{{1, 2}, LCW{}, LCW{}, {4, 5, 6, 7}}; - auto result = cudf::concatenate({a, b, c, d}); + auto result = cudf::concatenate(std::vector({a, b, c, d})); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expected); } @@ -732,7 +745,7 @@ TEST_F(ListsColumnTest, ConcatenateListsWithNulls) cudf::test::lists_column_wrapper b{{{4, 6, 7}, valids}}; cudf::test::lists_column_wrapper expected{{{0, 1, 2, 3}, valids}, {{4, 6, 7}, valids}}; - auto result = cudf::concatenate({a, b}); + auto result = cudf::concatenate(std::vector({a, b})); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expected); } @@ -746,7 +759,7 @@ TEST_F(ListsColumnTest, ConcatenateNestedLists) cudf::test::lists_column_wrapper expected{ {{0, 1}, {2}}, {{4, 5, 6, 7, 8, 9, 10}}, {{6, 7}}, {{8, 9, 10}, {11, 12}}}; - auto result = cudf::concatenate({a, b}); + auto result = cudf::concatenate(std::vector({a, b})); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expected); } @@ -770,7 +783,7 @@ TEST_F(ListsColumnTest, ConcatenateNestedLists) {{{31, 32}, {33, 34}}, {{35, 36}, {37, 38}}, {{39, 40}}}, {{{71, 72}, {74}}, {{75, 76, 77, 78}, {77, 78}}, {{79, 80, 81}}}}; - auto result = cudf::concatenate({a, b}); + auto result = cudf::concatenate(std::vector({a, b})); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expected); } @@ -789,7 +802,7 @@ TEST_F(ListsColumnTest, ConcatenateNestedEmptyLists) cudf::test::lists_column_wrapper expected{ {{LCW{}}}, {{0, 1}, {2, 3}}, {{6, 7}}, {LCW{}, {11, 12}}}; - auto result = cudf::concatenate({a, b}); + auto result = cudf::concatenate(std::vector({a, b})); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expected); } @@ -815,7 +828,7 @@ TEST_F(ListsColumnTest, ConcatenateNestedEmptyLists) {{{31, 32}, {33, 34}}, {{35, 36}, {37, 38}, {1, 2}}, {{39, 40}}}, {{{LCW{}}}}}; - auto result = cudf::concatenate({a, b}); + auto result = cudf::concatenate(std::vector({a, b})); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expected); } @@ -834,7 +847,7 @@ TEST_F(ListsColumnTest, ConcatenateNestedListsWithNulls) cudf::test::lists_column_wrapper expected{{{{0, 1}, {2, 3}}, valids}, {{{4}, {6, 7}}, valids}}; - auto result = cudf::concatenate({a, b}); + auto result = cudf::concatenate(std::vector({a, b})); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expected); } @@ -848,7 +861,7 @@ TEST_F(ListsColumnTest, ConcatenateNestedListsWithNulls) {{6, 7}}, {{{{8, 9, 10}, valids}, {11, 12}}, valids}}; - auto result = cudf::concatenate({a, b}); + auto result = cudf::concatenate(std::vector({a, b})); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expected); } @@ -864,7 +877,8 @@ TEST_F(ListsColumnTest, ConcatenateMismatchedHierarchies) cudf::test::lists_column_wrapper a{{{{LCW{}}}}}; cudf::test::lists_column_wrapper b{{{LCW{}}}}; cudf::test::lists_column_wrapper c{{LCW{}}}; - EXPECT_THROW(cudf::concatenate({a, b, c}), cudf::logic_error); + + EXPECT_THROW(cudf::concatenate(std::vector({a, b, c})), cudf::logic_error); } { @@ -872,20 +886,23 @@ TEST_F(ListsColumnTest, ConcatenateMismatchedHierarchies) cudf::test::lists_column_wrapper a{{{{{LCW{}}}}, valids.begin()}}; cudf::test::lists_column_wrapper b{{{LCW{}}}}; cudf::test::lists_column_wrapper c{{LCW{}}}; - EXPECT_THROW(cudf::concatenate({a, b, c}), cudf::logic_error); + + EXPECT_THROW(cudf::concatenate(std::vector({a, b, c})), cudf::logic_error); } { cudf::test::lists_column_wrapper a{{{{LCW{}}}}}; cudf::test::lists_column_wrapper b{1, 2, 3}; cudf::test::lists_column_wrapper c{{3, 4, 5}}; - EXPECT_THROW(cudf::concatenate({a, b, c}), cudf::logic_error); + + EXPECT_THROW(cudf::concatenate(std::vector({a, b, c})), cudf::logic_error); } { cudf::test::lists_column_wrapper a{{{1, 2, 3}}}; cudf::test::lists_column_wrapper b{{4, 5}}; - EXPECT_THROW(cudf::concatenate({a, b}), cudf::logic_error); + + EXPECT_THROW(cudf::concatenate(std::vector({a, b})), cudf::logic_error); } } @@ -910,14 +927,16 @@ TEST_F(ListsColumnTest, SlicedColumns) {{4, 4, 4}, {5, 5}, {6, 6}}, {{-1, -1, -1, -1}, {-2}}, {{-3, -3, -3, -3}, {-4}}}; - auto result0 = cudf::concatenate({split_a[0], split_b[0]}); + + auto result0 = cudf::concatenate(std::vector({split_a[0], split_b[0]})); cudf::test::expect_columns_equivalent(*result0, expected0); cudf::test::lists_column_wrapper expected1{{{1, 1, 1}, {2, 2}, {3, 3}}, {{4, 4, 4}, {5, 5}, {6, 6}}, {{-5, -5, -5, -5}, {-6}}, {{-7, -7, -7, -7}, {-8}}}; - auto result1 = cudf::concatenate({split_a[0], split_b[1]}); + + auto result1 = cudf::concatenate(std::vector({split_a[0], split_b[1]})); cudf::test::expect_columns_equivalent(*result1, expected1); cudf::test::lists_column_wrapper expected2{ @@ -926,14 +945,16 @@ TEST_F(ListsColumnTest, SlicedColumns) {{-1, -1, -1, -1}, {-2}}, {{-3, -3, -3, -3}, {-4}}, }; - auto result2 = cudf::concatenate({split_a[1], split_b[0]}); + + auto result2 = cudf::concatenate(std::vector({split_a[1], split_b[0]})); cudf::test::expect_columns_equivalent(*result2, expected2); cudf::test::lists_column_wrapper expected3{{{7, 7, 7}, {8, 8}, {9, 9}}, {{10, 10, 10}, {11, 11}, {12, 12}}, {{-5, -5, -5, -5}, {-6}}, {{-7, -7, -7, -7}, {-8}}}; - auto result3 = cudf::concatenate({split_a[1], split_b[1]}); + + auto result3 = cudf::concatenate(std::vector({split_a[1], split_b[1]})); cudf::test::expect_columns_equivalent(*result3, expected3); } @@ -958,7 +979,9 @@ TEST_F(ListsColumnTest, SlicedColumns) {LCW{}, {LCW{}}, {{6, 6}, {2}}}, {{LCW{}}}, {LCW{}, {LCW{}}}}; - auto result0 = cudf::concatenate({split_a[0], split_b[0]}); + + auto result0 = cudf::concatenate(std::vector({split_a[0], split_b[0]})); + cudf::test::expect_columns_equivalent(*result0, expected0); cudf::test::lists_column_wrapper expected1{ @@ -967,7 +990,8 @@ TEST_F(ListsColumnTest, SlicedColumns) {{{1, 2, 9}, LCW{}}, {{5, 6, 7, 8, 9}, {0}, {15, 17}}}, {{LCW{}}}, }; - auto result1 = cudf::concatenate({split_a[0], split_b[1]}); + + auto result1 = cudf::concatenate(std::vector({split_a[0], split_b[1]})); cudf::test::expect_columns_equivalent(*result1, expected1); cudf::test::lists_column_wrapper expected2{ @@ -975,7 +999,8 @@ TEST_F(ListsColumnTest, SlicedColumns) {LCW{}, LCW{}, {{10, 10, 10}, {11, 11}, {12, 12}}, LCW{}}, {{LCW{}}}, {LCW{}, {LCW{}}}}; - auto result2 = cudf::concatenate({split_a[1], split_b[0]}); + + auto result2 = cudf::concatenate(std::vector({split_a[1], split_b[0]})); cudf::test::expect_columns_equivalent(*result2, expected2); cudf::test::lists_column_wrapper expected3{ @@ -984,7 +1009,8 @@ TEST_F(ListsColumnTest, SlicedColumns) {{{1, 2, 9}, LCW{}}, {{5, 6, 7, 8, 9}, {0}, {15, 17}}}, {{LCW{}}}, }; - auto result3 = cudf::concatenate({split_a[1], split_b[1]}); + + auto result3 = cudf::concatenate(std::vector({split_a[1], split_b[1]})); cudf::test::expect_columns_equivalent(*result3, expected3); } } @@ -1015,14 +1041,16 @@ TEST_F(ListsColumnTest, SlicedColumnsWithNulls) {{{{-1, -1, -1, -1}, valids}, {-2}}, valids}, {{{{-3, -3, -3, -3}, valids}, {-4}}, valids}, {{{{-5, -5, -5, -5}, valids}, {-6}}, valids}}; - auto result0 = cudf::concatenate({split_a[0], split_b[0]}); + + auto result0 = cudf::concatenate(std::vector({split_a[0], split_b[0]})); cudf::test::expect_columns_equivalent(*result0, expected0); cudf::test::lists_column_wrapper expected1{{{{1, 1, 1}, valids}, {2, 2}, {{3, 3}, valids}}, {{{4, 4, 4}, {{5, 5}, valids}, {6, 6}}, valids}, {{7, 7, 7}, {8, 8}, {9, 9}}, {{{{-7, -7, -7, -7}, valids}, {-8}}, valids}}; - auto result1 = cudf::concatenate({split_a[0], split_b[1]}); + + auto result1 = cudf::concatenate(std::vector({split_a[0], split_b[1]})); cudf::test::expect_columns_equivalent(*result1, expected1); cudf::test::lists_column_wrapper expected2{ @@ -1030,13 +1058,15 @@ TEST_F(ListsColumnTest, SlicedColumnsWithNulls) {{{{-1, -1, -1, -1}, valids}, {-2}}, valids}, {{{{-3, -3, -3, -3}, valids}, {-4}}, valids}, {{{{-5, -5, -5, -5}, valids}, {-6}}, valids}}; - auto result2 = cudf::concatenate({split_a[1], split_b[0]}); + + auto result2 = cudf::concatenate(std::vector({split_a[1], split_b[0]})); cudf::test::expect_columns_equivalent(*result2, expected2); cudf::test::lists_column_wrapper expected3{ {{{10, 10, 10}, {11, 11}, {{12, 12}, valids}}, valids}, {{{{-7, -7, -7, -7}, valids}, {-8}}, valids}}; - auto result3 = cudf::concatenate({split_a[1], split_b[1]}); + + auto result3 = cudf::concatenate(std::vector({split_a[1], split_b[1]})); cudf::test::expect_columns_equivalent(*result3, expected3); } @@ -1068,7 +1098,8 @@ TEST_F(ListsColumnTest, SlicedColumnsWithNulls) {{LCW{}, {{LCW{}}, valids}}, valids}, {{{{1, 2, 9}, LCW{}}, {{5, 6, 7, 8, 9}, {0}, {15, 17}}}, valids}, }; - auto result0 = cudf::concatenate({split_a[0], split_b[0]}); + + auto result0 = cudf::concatenate(std::vector({split_a[0], split_b[0]})); cudf::test::expect_columns_equivalent(*result0, expected0); cudf::test::lists_column_wrapper expected1{ @@ -1079,7 +1110,8 @@ TEST_F(ListsColumnTest, SlicedColumnsWithNulls) {{{LCW{}, LCW{}}, valids}}, {{LCW{}}}, }; - auto result1 = cudf::concatenate({split_a[0], split_b[1]}); + + auto result1 = cudf::concatenate(std::vector({split_a[0], split_b[1]})); cudf::test::expect_columns_equivalent(*result1, expected1); cudf::test::lists_column_wrapper expected2{ @@ -1088,14 +1120,16 @@ TEST_F(ListsColumnTest, SlicedColumnsWithNulls) {{LCW{}, {{LCW{}}, valids}}, valids}, {{{{1, 2, 9}, LCW{}}, {{5, 6, 7, 8, 9}, {0}, {15, 17}}}, valids}, }; - auto result2 = cudf::concatenate({split_a[1], split_b[0]}); + + auto result2 = cudf::concatenate(std::vector({split_a[1], split_b[0]})); cudf::test::expect_columns_equivalent(*result2, expected2); cudf::test::lists_column_wrapper expected3{ {LCW{}, LCW{}, {{{10, 10, 10}, {{11, 11}, valids}, {12, 12}}, valids}, LCW{}}, {{LCW{}}}, }; - auto result3 = cudf::concatenate({split_a[1], split_b[1]}); + + auto result3 = cudf::concatenate(std::vector({split_a[1], split_b[1]})); cudf::test::expect_columns_equivalent(*result3, expected3); } } @@ -1140,11 +1174,12 @@ TEST_F(ListsColumnTest, ListOfStructs) } // build expected output - auto expected_child = - cudf::concatenate({inner_structs[0], inner_structs[1], inner_structs[2], inner_structs[3]}); + auto struct_views = std::vector( + {inner_structs[0], inner_structs[1], inner_structs[2], inner_structs[3]}); + auto expected_child = cudf::concatenate(struct_views); fixed_width_column_wrapper offsets_w{0, 1, 1, 1, 1, 4, 6, 6, 6, 10, 11}; - auto expected = make_lists_column( - 10, std::move(offsets_w.release()), std::move(expected_child), 0, rmm::device_buffer{}); + auto expected = + make_lists_column(10, offsets_w.release(), std::move(expected_child), 0, rmm::device_buffer{}); // lists std::vector> offsets; @@ -1154,7 +1189,7 @@ TEST_F(ListsColumnTest, ListOfStructs) offsets.push_back({0, 0, 4, 5}); // concatenate as lists - std::vector> src; + std::vector> src; for (size_t idx = 0; idx < inner_structs.size(); idx++) { int size = static_cast(offsets[idx]).size() - 1; src.push_back(make_lists_column( @@ -1162,7 +1197,7 @@ TEST_F(ListsColumnTest, ListOfStructs) } // concatenate - auto result = cudf::concatenate({*src[0], *src[1], *src[2], *src[3]}); + auto result = cudf::concatenate(std::vector({*src[0], *src[1], *src[2], *src[3]})); cudf::test::expect_columns_equivalent(*result, *expected); } @@ -1189,8 +1224,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointConcatentate) auto const b = fw_wrapper(vec.begin() + 300, vec.begin() + 700); auto const c = fw_wrapper(vec.begin() + 700, vec.end()); - auto const columns = std::vector{a, b, c}; - auto const results = cudf::concatenate(columns); + auto const results = cudf::concatenate(std::vector{a, b, c}); auto const expected = fw_wrapper(vec.begin(), vec.end()); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); @@ -1208,8 +1242,7 @@ TEST_F(FixedPointTest, FixedPointConcatentate) auto const b = fp_wrapper(vec.begin() + 300, vec.begin() + 700, scale_type{-2}); auto const c = fp_wrapper(vec.begin() + 700, vec.end(), /*****/ scale_type{-2}); - auto const columns = std::vector{a, b, c}; - auto const results = cudf::concatenate(columns); + auto const results = cudf::concatenate(std::vector{a, b, c}); auto const expected = fp_wrapper(vec.begin(), vec.end(), scale_type{-2}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); @@ -1227,8 +1260,7 @@ TEST_F(FixedPointTest, FixedPointScaleMismatch) auto const b = fp_wrapper(vec.begin() + 300, vec.begin() + 700, scale_type{-2}); auto const c = fp_wrapper(vec.begin() + 700, vec.end(), /*****/ scale_type{-3}); - auto const columns = std::vector{a, b, c}; - EXPECT_THROW(cudf::concatenate(columns), cudf::logic_error); + EXPECT_THROW(cudf::concatenate(std::vector{a, b, c}), cudf::logic_error); } struct DictionaryConcatTest : public cudf::test::BaseFixture { diff --git a/cpp/tests/interop/from_arrow_test.cpp b/cpp/tests/interop/from_arrow_test.cpp index 9f5bbe2dcb9..d79307dcbf6 100644 --- a/cpp/tests/interop/from_arrow_test.cpp +++ b/cpp/tests/interop/from_arrow_test.cpp @@ -168,7 +168,7 @@ TEST_F(FromArrowTest, StructColumn) std::vector>{{"string", "integral", "bool", "nested_list", "struct"}}; auto str_col = cudf::test::strings_column_wrapper{ - "Samuel Vimes", "Carrot Ironfoundersson", "Angua von Uberwald"} + "Samuel Vimes", "Carrot Ironfoundersson", "Angua von Überwald"} .release(); auto str_col2 = cudf::test::strings_column_wrapper{{"CUDF", "ROCKS", "EVERYWHERE"}, {0, 1, 0}}.release(); @@ -198,7 +198,7 @@ TEST_F(FromArrowTest, StructColumn) cudf::table_view expected_cudf_table({struct_col->view()}); // Create Arrow table - std::vector str{"Samuel Vimes", "Carrot Ironfoundersson", "Angua von Uberwald"}; + std::vector str{"Samuel Vimes", "Carrot Ironfoundersson", "Angua von Überwald"}; std::vector str2{"CUDF", "ROCKS", "EVERYWHERE"}; auto str_array = get_arrow_array(str); auto int_array = get_arrow_array({48, 27, 25}); diff --git a/cpp/tests/interop/to_arrow_test.cpp b/cpp/tests/interop/to_arrow_test.cpp index c8e56711135..57275433516 100644 --- a/cpp/tests/interop/to_arrow_test.cpp +++ b/cpp/tests/interop/to_arrow_test.cpp @@ -270,7 +270,7 @@ TEST_F(ToArrowTest, StructColumn) std::vector>{{"string", "integral", "bool", "nested_list", "struct"}}; auto str_col = cudf::test::strings_column_wrapper{ - "Samuel Vimes", "Carrot Ironfoundersson", "Angua von Uberwald"} + "Samuel Vimes", "Carrot Ironfoundersson", "Angua von Überwald"} .release(); auto str_col2 = cudf::test::strings_column_wrapper{{"CUDF", "ROCKS", "EVERYWHERE"}, {0, 1, 0}}.release(); @@ -306,7 +306,7 @@ TEST_F(ToArrowTest, StructColumn) metadata.children_meta = {{"string"}, {"integral"}, {"bool"}, {"nested_list"}, sub_metadata}; // Create Arrow table - std::vector str{"Samuel Vimes", "Carrot Ironfoundersson", "Angua von Uberwald"}; + std::vector str{"Samuel Vimes", "Carrot Ironfoundersson", "Angua von Überwald"}; std::vector str2{"CUDF", "ROCKS", "EVERYWHERE"}; auto str_array = get_arrow_array(str); auto int_array = get_arrow_array({48, 27, 25}); diff --git a/cpp/tests/io/orc_test.cpp b/cpp/tests/io/orc_test.cpp index b0dc01ea001..108befa80a7 100644 --- a/cpp/tests/io/orc_test.cpp +++ b/cpp/tests/io/orc_test.cpp @@ -29,6 +29,7 @@ #include #include #include +#include #include @@ -395,7 +396,7 @@ TEST_F(OrcWriterTest, MultiColumnWithNulls) auto col3_mask = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i == (num_rows - 1)); }); auto col4_mask = - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i >= 40 || i <= 60); }); + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i >= 40 && i <= 60); }); auto col5_mask = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i > 80); }); @@ -657,7 +658,7 @@ TEST_F(OrcChunkedWriterTest, SimpleTable) auto table1 = create_random_fixed_table(5, 5, true); auto table2 = create_random_fixed_table(5, 5, true); - auto full_table = cudf::concatenate({*table1, *table2}); + auto full_table = cudf::concatenate(std::vector({*table1, *table2})); auto filepath = temp_env->get_temp_filepath("ChunkedSimple.orc"); cudf_io::chunked_orc_writer_options opts = @@ -677,7 +678,7 @@ TEST_F(OrcChunkedWriterTest, LargeTables) auto table1 = create_random_fixed_table(512, 4096, true); auto table2 = create_random_fixed_table(512, 8192, true); - auto full_table = cudf::concatenate({*table1, *table2}); + auto full_table = cudf::concatenate(std::vector({*table1, *table2})); auto filepath = temp_env->get_temp_filepath("ChunkedLarge.orc"); cudf_io::chunked_orc_writer_options opts = @@ -737,7 +738,7 @@ TEST_F(OrcChunkedWriterTest, Strings) cols.push_back(strings2.release()); cudf::table tbl2(std::move(cols)); - auto expected = cudf::concatenate({tbl1, tbl2}); + auto expected = cudf::concatenate(std::vector({tbl1, tbl2})); auto filepath = temp_env->get_temp_filepath("ChunkedStrings.orc"); cudf_io::chunked_orc_writer_options opts = @@ -799,7 +800,7 @@ TEST_F(OrcChunkedWriterTest, ReadStripes) auto table1 = create_random_fixed_table(5, 5, true); auto table2 = create_random_fixed_table(5, 5, true); - auto full_table = cudf::concatenate({*table2, *table1, *table2}); + auto full_table = cudf::concatenate(std::vector({*table2, *table1, *table2})); auto filepath = temp_env->get_temp_filepath("ChunkedStripes.orc"); cudf_io::chunked_orc_writer_options opts = @@ -863,7 +864,7 @@ TYPED_TEST(OrcChunkedWriterNumericTypeTest, UnalignedSize) cols.push_back(c2b_w.release()); cudf::table tbl2(std::move(cols)); - auto expected = cudf::concatenate({tbl1, tbl2}); + auto expected = cudf::concatenate(std::vector({tbl1, tbl2})); auto filepath = temp_env->get_temp_filepath("ChunkedUnalignedSize.orc"); cudf_io::chunked_orc_writer_options opts = @@ -910,7 +911,7 @@ TYPED_TEST(OrcChunkedWriterNumericTypeTest, UnalignedSize2) cols.push_back(c2b_w.release()); cudf::table tbl2(std::move(cols)); - auto expected = cudf::concatenate({tbl1, tbl2}); + auto expected = cudf::concatenate(std::vector({tbl1, tbl2})); auto filepath = temp_env->get_temp_filepath("ChunkedUnalignedSize2.orc"); cudf_io::chunked_orc_writer_options opts = diff --git a/cpp/tests/io/parquet_test.cpp b/cpp/tests/io/parquet_test.cpp index 013457d8ed6..880f11aaeb2 100644 --- a/cpp/tests/io/parquet_test.cpp +++ b/cpp/tests/io/parquet_test.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -472,7 +473,7 @@ TEST_F(ParquetWriterTest, MultiColumnWithNulls) auto col3_mask = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i == (num_rows - 1)); }); auto col4_mask = - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i >= 40 || i <= 60); }); + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i >= 40 && i <= 60); }); auto col5_mask = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i > 80); }); auto col6_mask = @@ -1218,7 +1219,7 @@ TEST_F(ParquetChunkedWriterTest, SimpleTable) auto table1 = create_random_fixed_table(5, 5, true); auto table2 = create_random_fixed_table(5, 5, true); - auto full_table = cudf::concatenate({*table1, *table2}); + auto full_table = cudf::concatenate(std::vector({*table1, *table2})); auto filepath = temp_env->get_temp_filepath("ChunkedSimple.parquet"); cudf_io::chunked_parquet_writer_options args = @@ -1238,7 +1239,7 @@ TEST_F(ParquetChunkedWriterTest, LargeTables) auto table1 = create_random_fixed_table(512, 4096, true); auto table2 = create_random_fixed_table(512, 8192, true); - auto full_table = cudf::concatenate({*table1, *table2}); + auto full_table = cudf::concatenate(std::vector({*table1, *table2})); auto filepath = temp_env->get_temp_filepath("ChunkedLarge.parquet"); cudf_io::chunked_parquet_writer_options args = @@ -1300,7 +1301,7 @@ TEST_F(ParquetChunkedWriterTest, Strings) cols.push_back(strings2.release()); cudf::table tbl2(std::move(cols)); - auto expected = cudf::concatenate({tbl1, tbl2}); + auto expected = cudf::concatenate(std::vector({tbl1, tbl2})); auto filepath = temp_env->get_temp_filepath("ChunkedStrings.parquet"); cudf_io::chunked_parquet_writer_options args = @@ -1359,7 +1360,7 @@ TEST_F(ParquetChunkedWriterTest, ListColumn) auto tbl0 = table_view({col0_tbl0, col1_tbl0, col2_tbl0}); auto tbl1 = table_view({col0_tbl1, col1_tbl1, col2_tbl1}); - auto expected = cudf::concatenate({tbl0, tbl1}); + auto expected = cudf::concatenate(std::vector({tbl0, tbl1})); auto filepath = temp_env->get_temp_filepath("ChunkedLists.parquet"); cudf_io::chunked_parquet_writer_options args = @@ -1413,7 +1414,7 @@ TEST_F(ParquetChunkedWriterTest, ListOfStruct) auto table_2 = table_view({*list_col_2}); - auto full_table = cudf::concatenate({table_1, table_2}); + auto full_table = cudf::concatenate(std::vector({table_1, table_2})); cudf_io::table_input_metadata expected_metadata(table_1); expected_metadata.column_metadata[0].set_name("family"); @@ -1504,7 +1505,7 @@ TEST_F(ParquetChunkedWriterTest, ListOfStructOfStructOfListOfList) auto table_2 = table_view({*list_col_2}); - auto full_table = cudf::concatenate({table_1, table_2}); + auto full_table = cudf::concatenate(std::vector({table_1, table_2})); cudf_io::table_input_metadata expected_metadata(table_1); expected_metadata.column_metadata[0].set_name("family"); @@ -1639,7 +1640,7 @@ TEST_F(ParquetChunkedWriterTest, DifferentNullability) auto table1 = create_random_fixed_table(5, 5, true); auto table2 = create_random_fixed_table(5, 5, false); - auto full_table = cudf::concatenate({*table1, *table2}); + auto full_table = cudf::concatenate(std::vector({*table1, *table2})); auto filepath = temp_env->get_temp_filepath("ChunkedNullable.parquet"); cudf_io::chunked_parquet_writer_options args = @@ -1678,7 +1679,7 @@ TEST_F(ParquetChunkedWriterTest, DifferentNullabilityStruct) auto struct_2_2 = cudf::test::structs_column_wrapper{{is_human_2, struct_1_2}}; auto table_2 = cudf::table_view({struct_2_2}); - auto full_table = cudf::concatenate({table_1, table_2}); + auto full_table = cudf::concatenate(std::vector({table_1, table_2})); cudf_io::table_input_metadata expected_metadata(table_1); expected_metadata.column_metadata[0].set_name("being"); @@ -1707,7 +1708,7 @@ TEST_F(ParquetChunkedWriterTest, ForcedNullability) auto table1 = create_random_fixed_table(5, 5, false); auto table2 = create_random_fixed_table(5, 5, false); - auto full_table = cudf::concatenate({*table1, *table2}); + auto full_table = cudf::concatenate(std::vector({*table1, *table2})); auto filepath = temp_env->get_temp_filepath("ChunkedNoNullable.parquet"); @@ -1764,7 +1765,7 @@ TEST_F(ParquetChunkedWriterTest, ForcedNullabilityList) auto table1 = table_view({col00, col10}); auto table2 = table_view({col01, col11}); - auto full_table = cudf::concatenate({table1, table2}); + auto full_table = cudf::concatenate(std::vector({table1, table2})); cudf_io::table_input_metadata metadata(table1); metadata.column_metadata[0].set_nullability(true); // List is nullable at first (root) level @@ -1809,7 +1810,7 @@ TEST_F(ParquetChunkedWriterTest, ForcedNullabilityStruct) auto struct_2_2 = cudf::test::structs_column_wrapper{{is_human_2, struct_1_2}}; auto table_2 = cudf::table_view({struct_2_2}); - auto full_table = cudf::concatenate({table_1, table_2}); + auto full_table = cudf::concatenate(std::vector({table_1, table_2})); cudf_io::table_input_metadata expected_metadata(table_1); expected_metadata.column_metadata[0].set_name("being").set_nullability(false); @@ -1838,7 +1839,7 @@ TEST_F(ParquetChunkedWriterTest, ReadRowGroups) auto table1 = create_random_fixed_table(5, 5, true); auto table2 = create_random_fixed_table(5, 5, true); - auto full_table = cudf::concatenate({*table2, *table1, *table2}); + auto full_table = cudf::concatenate(std::vector({*table2, *table1, *table2})); auto filepath = temp_env->get_temp_filepath("ChunkedRowGroups.parquet"); cudf_io::chunked_parquet_writer_options args = @@ -1951,7 +1952,7 @@ TYPED_TEST(ParquetChunkedWriterNumericTypeTest, UnalignedSize) cols.push_back(c2b_w.release()); cudf::table tbl2(std::move(cols)); - auto expected = cudf::concatenate({tbl1, tbl2}); + auto expected = cudf::concatenate(std::vector({tbl1, tbl2})); auto filepath = temp_env->get_temp_filepath("ChunkedUnalignedSize.parquet"); cudf_io::chunked_parquet_writer_options args = @@ -1998,7 +1999,7 @@ TYPED_TEST(ParquetChunkedWriterNumericTypeTest, UnalignedSize2) cols.push_back(c2b_w.release()); cudf::table tbl2(std::move(cols)); - auto expected = cudf::concatenate({tbl1, tbl2}); + auto expected = cudf::concatenate(std::vector({tbl1, tbl2})); auto filepath = temp_env->get_temp_filepath("ChunkedUnalignedSize2.parquet"); cudf_io::chunked_parquet_writer_options args = diff --git a/cpp/tests/merge/merge_test.cpp b/cpp/tests/merge/merge_test.cpp index 451fa82d5a3..b7d98704aff 100644 --- a/cpp/tests/merge/merge_test.cpp +++ b/cpp/tests/merge/merge_test.cpp @@ -705,7 +705,7 @@ TEST_F(MergeTest, KeysWithNulls) auto valids2 = cudf::detail::make_counting_transform_iterator( 0, [](auto row) { return (row % 15 == 0) ? false : true; }); cudf::test::fixed_width_column_wrapper data2(data_iter, data_iter + nrows, valids2); - auto all_data = cudf::concatenate({data1, data2}); + auto all_data = cudf::concatenate(std::vector{{data1, data2}}); std::vector column_orders{cudf::order::ASCENDING, cudf::order::DESCENDING}; std::vector null_precedences{cudf::null_order::AFTER, cudf::null_order::BEFORE}; diff --git a/cpp/tests/sort/is_sorted_tests.cpp b/cpp/tests/sort/is_sorted_tests.cpp index 1e6bb2a70fb..abc9a9bfe9e 100644 --- a/cpp/tests/sort/is_sorted_tests.cpp +++ b/cpp/tests/sort/is_sorted_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -162,6 +162,73 @@ auto nulls_before() return strings_column_wrapper({"identical", "identical"}, {0, 1}); } +// ----- struct_view {"nestedInt" : {"Int" : 0 }, "float" : 1} + +template +typename std::enable_if::value, structs_column_wrapper>::type +ascending() +{ + using T1 = int32_t; + auto int_col = fixed_width_column_wrapper({std::numeric_limits::lowest(), + T1(-100), + T1(-10), + T1(-10), + T1(0), + T1(10), + T1(10), + T1(100), + std::numeric_limits::max()}); + auto nestedInt_col = structs_column_wrapper{{int_col}}; + auto float_col = ascending(); + return structs_column_wrapper{{nestedInt_col, float_col}}; +} + +template +typename std::enable_if::value, structs_column_wrapper>::type +descending() +{ + using T1 = int32_t; + auto int_col = fixed_width_column_wrapper({std::numeric_limits::max(), + T1(100), + T1(10), + T1(10), + T1(0), + T1(-10), + T1(-10), + T1(-100), + std::numeric_limits::lowest()}); + auto nestedInt_col = structs_column_wrapper{{int_col}}; + auto float_col = descending(); + return structs_column_wrapper{{nestedInt_col, float_col}}; +} + +template <> +auto empty() +{ + auto int_col = fixed_width_column_wrapper(); + auto col1 = structs_column_wrapper{{int_col}}; + auto col2 = fixed_width_column_wrapper(); + return structs_column_wrapper{{col1, col2}}; +} + +template <> +auto nulls_after() +{ + auto int_col = fixed_width_column_wrapper({1, 1}); + auto col1 = structs_column_wrapper{{int_col}}; + auto col2 = fixed_width_column_wrapper({1, 1}); + return structs_column_wrapper{{col1, col2}, {1, 0}}; +} + +template <> +auto nulls_before() +{ + auto int_col = fixed_width_column_wrapper({1, 1}); + auto col1 = structs_column_wrapper{{int_col}}; + auto col2 = fixed_width_column_wrapper({1, 1}); + return structs_column_wrapper{{col1, col2}, {0, 1}}; +} + } // namespace testdata } // anonymous namespace @@ -172,7 +239,8 @@ template struct IsSortedTest : public BaseFixture { }; -TYPED_TEST_CASE(IsSortedTest, ComparableTypes); +using SupportedTypes = Concat>; +TYPED_TEST_CASE(IsSortedTest, SupportedTypes); TYPED_TEST(IsSortedTest, NoColumns) { diff --git a/cpp/tests/sort/sort_test.cpp b/cpp/tests/sort/sort_test.cpp index 5359014a831..9eb082c513c 100644 --- a/cpp/tests/sort/sort_test.cpp +++ b/cpp/tests/sort/sort_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -197,6 +197,386 @@ TYPED_TEST(Sort, WithAllValid) } } +TYPED_TEST(Sort, WithStructColumn) +{ + using T = TypeParam; + + std::initializer_list names = {"Samuel Vimes", + "Carrot Ironfoundersson", + "Angua von Überwald", + "Cheery Littlebottom", + "Detritus", + "Mr Slant"}; + auto num_rows{std::distance(names.begin(), names.end())}; + auto names_col = cudf::test::strings_column_wrapper{names.begin(), names.end()}; + auto ages_col = cudf::test::fixed_width_column_wrapper{{48, 27, 25, 31, 351, 351}}; + + auto is_human_col = cudf::test::fixed_width_column_wrapper{ + {true, true, false, false, false, false}, {1, 1, 0, 1, 1, 0}}; + + auto struct_col = + cudf::test::structs_column_wrapper{{names_col, ages_col, is_human_col}}.release(); + auto struct_col_view{struct_col->view()}; + EXPECT_EQ(num_rows, struct_col->size()); + + fixed_width_column_wrapper col1{{5, 4, 3, 5, 8, 9}}; + strings_column_wrapper col2({"d", "e", "a", "d", "k", "a"}); + fixed_width_column_wrapper col3{{10, 40, 70, 5, 2, 20}}; + table_view input{{col1, col2, col3, struct_col_view}}; + + fixed_width_column_wrapper expected{{2, 1, 0, 3, 4, 5}}; + std::vector column_order{ + order::ASCENDING, order::ASCENDING, order::DESCENDING, order::ASCENDING}; + + auto got = sorted_order(input, column_order); + + // Skip validating bools order. Valid true bools are all + // equivalent, and yield random order after thrust::sort + if (!std::is_same::value) { + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, got->view()); + + // Run test for sort and sort_by_key + run_sort_test(input, expected, column_order); + } else { + // Run test for sort and sort_by_key + fixed_width_column_wrapper expected_for_bool{{2, 5, 3, 0, 1, 4}}; + run_sort_test(input, expected_for_bool, column_order); + } +} + +TYPED_TEST(Sort, WithNestedStructColumn) +{ + using T = TypeParam; + + std::initializer_list names = {"Samuel Vimes", + "Carrot Ironfoundersson", + "Angua von Überwald", + "Cheery Littlebottom", + "Detritus", + "Mr Slant"}; + std::vector v{1, 1, 0, 1, 1, 0}; + auto names_col = cudf::test::strings_column_wrapper{names.begin(), names.end()}; + auto ages_col = cudf::test::fixed_width_column_wrapper{{48, 27, 25, 31, 351, 351}}; + auto is_human_col = cudf::test::fixed_width_column_wrapper{ + {true, true, false, false, false, false}, {1, 1, 0, 1, 1, 0}}; + auto struct_col1 = cudf::test::structs_column_wrapper{{names_col, ages_col, is_human_col}, v}; + + auto ages_col2 = cudf::test::fixed_width_column_wrapper{{48, 27, 25, 31, 351, 351}}; + auto struct_col2 = cudf::test::structs_column_wrapper{{ages_col2, struct_col1}}.release(); + + auto struct_col_view{struct_col2->view()}; + + fixed_width_column_wrapper col1{{6, 6, 6, 6, 6, 6}}; + fixed_width_column_wrapper col2{{1, 1, 1, 2, 2, 2}}; + table_view input{{col1, col2, struct_col_view}}; + + fixed_width_column_wrapper expected{{3, 5, 4, 2, 1, 0}}; + std::vector column_order{order::ASCENDING, order::DESCENDING, order::ASCENDING}; + + auto got = sorted_order(input, column_order); + + // Skip validating bools order. Valid true bools are all + // equivalent, and yield random order after thrust::sort + if (!std::is_same::value) { + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, got->view()); + + // Run test for sort and sort_by_key + run_sort_test(input, expected, column_order); + } else { + // Run test for sort and sort_by_key + fixed_width_column_wrapper expected_for_bool{{2, 5, 1, 3, 4, 0}}; + run_sort_test(input, expected_for_bool, column_order); + } +} + +TYPED_TEST(Sort, WithSingleStructColumn) +{ + using T = TypeParam; + + std::initializer_list names = {"Samuel Vimes", + "Carrot Ironfoundersson", + "Angua von Überwald", + "Cheery Littlebottom", + "Detritus", + "Mr Slant"}; + std::vector v{1, 1, 0, 1, 1, 0}; + auto names_col = cudf::test::strings_column_wrapper{names.begin(), names.end()}; + auto ages_col = cudf::test::fixed_width_column_wrapper{{48, 27, 25, 31, 351, 351}}; + auto is_human_col = cudf::test::fixed_width_column_wrapper{ + {true, true, false, false, false, false}, {1, 1, 0, 1, 1, 0}}; + auto struct_col = + cudf::test::structs_column_wrapper{{names_col, ages_col, is_human_col}, v}.release(); + auto struct_col_view{struct_col->view()}; + table_view input{{struct_col_view}}; + + fixed_width_column_wrapper expected{{2, 5, 1, 3, 4, 0}}; + std::vector column_order{order::ASCENDING}; + + auto got = sorted_order(input, column_order); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, got->view()); + + // Run test for sort and sort_by_key + run_sort_test(input, expected, column_order); +} + +TYPED_TEST(Sort, WithSlicedStructColumn) +{ + using T = TypeParam; + /* + /+-------------+ + | s| + +--------------+ + 0 | {"bbe", 1, 7}| + 1 | {"bbe", 1, 8}| + 2 | {"aaa", 0, 1}| + 3 | {"abc", 0, 1}| + 4 | {"ab", 0, 9}| + 5 | {"za", 2, 5}| + 6 | {"b", 1, 7}| + 7 | { @, 3, 3}| + +--------------+ + */ + // clang-format off + using FWCW = cudf::test::fixed_width_column_wrapper; + std::vector string_valids{ 1, 1, 1, 1, 1, 1, 1, 0}; + std::initializer_list names = {"bbe", "bbe", "aaa", "abc", "ab", "za", "b", "x"}; + auto col2 = FWCW{{ 1, 1, 0, 0, 0, 2, 1, 3}}; + auto col3 = FWCW{{ 7, 8, 1, 1, 9, 5, 7, 3}}; + auto col1 = cudf::test::strings_column_wrapper{names.begin(), names.end(), string_valids.begin()}; + auto struct_col = structs_column_wrapper{{col1, col2, col3}}.release(); + // clang-format on + auto struct_col_view{struct_col->view()}; + table_view input{{struct_col_view}}; + auto sliced_columns = cudf::split(struct_col_view, std::vector{3}); + auto sliced_tables = cudf::split(input, std::vector{3}); + std::vector column_order{order::ASCENDING}; + /* + asce_null_first sliced[3:] + /+-------------+ + | s| + +--------------+ + 7 | { @, 3, 3}| 7=4 + 2 | {"aaa", 0, 1}| + 4 | {"ab", 0, 9}| 4=1 + 3 | {"abc", 0, 1}| 3=0 + 6 | {"b", 1, 7}| 6=3 + 0 | {"bbe", 1, 7}| + 1 | {"bbe", 1, 8}| + 5 | {"za", 2, 5}| 5=2 + +--------------+ + */ + + // normal + fixed_width_column_wrapper expected{{7, 2, 4, 3, 6, 0, 1, 5}}; + auto got = sorted_order(input, column_order); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, got->view()); + // Run test for sort and sort_by_key + run_sort_test(input, expected, column_order); + + // table with sliced column + table_view input2{{sliced_columns[1]}}; + fixed_width_column_wrapper expected2{{4, 1, 0, 3, 2}}; + got = sorted_order(input2, column_order); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected2, got->view()); + // Run test for sort and sort_by_key + run_sort_test(input2, expected2, column_order); + + // sliced table[1] + fixed_width_column_wrapper expected3{{4, 1, 0, 3, 2}}; + got = sorted_order(sliced_tables[1], column_order); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected3, got->view()); + // Run test for sort and sort_by_key + run_sort_test(sliced_tables[1], expected3, column_order); + + // sliced table[0] + fixed_width_column_wrapper expected4{{2, 0, 1}}; + got = sorted_order(sliced_tables[0], column_order); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected4, got->view()); + // Run test for sort and sort_by_key + run_sort_test(sliced_tables[0], expected4, column_order); +} + +TYPED_TEST(Sort, SlicedColumns) +{ + using T = TypeParam; + using FWCW = cudf::test::fixed_width_column_wrapper; + + // clang-format off + std::vector string_valids{ 1, 1, 1, 1, 1, 1, 1, 0}; + std::initializer_list names = {"bbe", "bbe", "aaa", "abc", "ab", "za", "b", "x"}; + auto col2 = FWCW{{ 7, 8, 1, 1, 9, 5, 7, 3}}; + auto col1 = cudf::test::strings_column_wrapper{names.begin(), names.end(), string_valids.begin()}; + // clang-format on + table_view input{{col1, col2}}; + auto sliced_columns1 = cudf::split(col1, std::vector{3}); + auto sliced_columns2 = cudf::split(col1, std::vector{3}); + auto sliced_tables = cudf::split(input, std::vector{3}); + std::vector column_order{order::ASCENDING, order::ASCENDING}; + + // normal + // fixed_width_column_wrapper expected{{2, 3, 7, 5, 0, 6, 1, 4}}; + fixed_width_column_wrapper expected{{7, 2, 4, 3, 6, 0, 1, 5}}; + auto got = sorted_order(input, column_order); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, got->view()); + // Run test for sort and sort_by_key + run_sort_test(input, expected, column_order); + + // table with sliced column + table_view input2{{sliced_columns1[1], sliced_columns2[1]}}; + // fixed_width_column_wrapper expected2{{0, 4, 2, 3, 1}}; + fixed_width_column_wrapper expected2{{4, 1, 0, 3, 2}}; + got = sorted_order(input2, column_order); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected2, got->view()); + // Run test for sort and sort_by_key + run_sort_test(input2, expected2, column_order); +} + +TYPED_TEST(Sort, WithStructColumnCombinations) +{ + using T = TypeParam; + using FWCW = cudf::test::fixed_width_column_wrapper; + + // clang-format off + /* + +------------+ + | s| + +------------+ + 0 | {0, null}| + 1 | {1, null}| + 2 | null| + 3 |{null, null}| + 4 | null| + 5 |{null, null}| + 6 | {null, 1}| + 7 | {null, 0}| + +------------+ + */ + std::vector struct_valids{1, 1, 0, 1, 0, 1, 1, 1}; + auto col1 = FWCW{{ 0, 1, 9, -1, 9, -1, -1, -1}, {1, 1, 1, 0, 1, 0, 0, 0}}; + auto col2 = FWCW{{-1, -1, 9, -1, 9, -1, 1, 0}, {0, 0, 1, 0, 1, 0, 1, 1}}; + auto struct_col = cudf::test::structs_column_wrapper{{col1, col2}, struct_valids}.release(); + /* + desc_nulls_first desc_nulls_last asce_nulls_first asce_nulls_last + +------------+ +------------+ +------------+ +------------+ + | s| | s| | s| | s| + +------------+ +------------+ +------------+ +------------+ + 2 | null| 1 | {1, null}| 2 | null| 3 |{null, null}| + 4 | null| 0 | {0, null}| 4 | null| 5 |{null, null}| + 1 | {1, null}| 6 | {null, 1}| 3 |{null, null}| 7 | {null, 0}| + 0 | {0, null}| 7 | {null, 0}| 5 |{null, null}| 6 | {null, 1}| + 6 | {null, 1}| 3 |{null, null}| 7 | {null, 0}| 0 | {0, null}| + 7 | {null, 0}| 5 |{null, null}| 6 | {null, 1}| 1 | {1, null}| + 3 |{null, null}| 2 | null| 0 | {0, null}| 2 | null| + 5 |{null, null}| 4 | null| 1 | {1, null}| 4 | null| + +------------+ +------------+ +------------+ +------------+ + */ + // clang-format on + auto struct_col_view{struct_col->view()}; + table_view input{{struct_col_view}}; + std::vector column_order1{order::DESCENDING}; + + // desc_nulls_first + fixed_width_column_wrapper expected1{{2, 4, 1, 0, 6, 7, 3, 5}}; + auto got = sorted_order(input, column_order1, {null_order::AFTER}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected1, got->view()); + // Run test for sort and sort_by_key + run_sort_test(input, expected1, column_order1, {null_order::AFTER}); + + // desc_nulls_last + fixed_width_column_wrapper expected2{{1, 0, 6, 7, 3, 5, 2, 4}}; + got = sorted_order(input, column_order1, {null_order::BEFORE}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected2, got->view()); + // Run test for sort and sort_by_key + run_sort_test(input, expected2, column_order1, {null_order::BEFORE}); + + // asce_nulls_first + std::vector column_order2{order::ASCENDING}; + fixed_width_column_wrapper expected3{{2, 4, 3, 5, 7, 6, 0, 1}}; + got = sorted_order(input, column_order2, {null_order::BEFORE}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected3, got->view()); + // Run test for sort and sort_by_key + run_sort_test(input, expected3, column_order2, {null_order::BEFORE}); + + // asce_nulls_last + fixed_width_column_wrapper expected4{{3, 5, 7, 6, 0, 1, 2, 4}}; + got = sorted_order(input, column_order2, {null_order::AFTER}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected4, got->view()); + // Run test for sort and sort_by_key + run_sort_test(input, expected4, column_order2, {null_order::AFTER}); +} + +TYPED_TEST(Sort, WithStructColumnCombinationsWithoutNulls) +{ + using T = TypeParam; + using FWCW = cudf::test::fixed_width_column_wrapper; + + // clang-format off + /* + +------------+ + | s| + +------------+ + 0 | {0, null}| + 1 | {1, null}| + 2 | {9, 9}| + 3 |{null, null}| + 4 | {9, 9}| + 5 |{null, null}| + 6 | {null, 1}| + 7 | {null, 0}| + +------------+ + */ + auto col1 = FWCW{{ 0, 1, 9, -1, 9, -1, -1, -1}, {1, 1, 1, 0, 1, 0, 0, 0}}; + auto col2 = FWCW{{-1, -1, 9, -1, 9, -1, 1, 0}, {0, 0, 1, 0, 1, 0, 1, 1}}; + auto struct_col = cudf::test::structs_column_wrapper{{col1, col2}}.release(); + /* (nested columns are always nulls_first, spark requirement) + desc_nulls_* asce_nulls_* + +------------+ +------------+ + | s| | s| + +------------+ +------------+ + 2 | {9, 9}| 3 |{null, null}| + 4 | {9, 9}| 5 |{null, null}| + 1 | {1, null}| 7 | {null, 0}| + 0 | {0, null}| 6 | {null, 1}| + 6 | {null, 1}| 0 | {0, null}| + 7 | {null, 0}| 1 | {1, null}| + 3 |{null, null}| 2 | {9, 9}| + 5 |{null, null}| 4 | {9, 9}| + +------------+ +------------+ + */ + // clang-format on + auto struct_col_view{struct_col->view()}; + table_view input{{struct_col_view}}; + std::vector column_order{order::DESCENDING}; + + // desc_nulls_first + fixed_width_column_wrapper expected1{{2, 4, 1, 0, 6, 7, 3, 5}}; + auto got = sorted_order(input, column_order, {null_order::AFTER}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected1, got->view()); + // Run test for sort and sort_by_key + run_sort_test(input, expected1, column_order, {null_order::AFTER}); + + // desc_nulls_last + got = sorted_order(input, column_order, {null_order::BEFORE}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected1, got->view()); + // Run test for sort and sort_by_key + run_sort_test(input, expected1, column_order, {null_order::BEFORE}); + + // asce_nulls_first + std::vector column_order2{order::ASCENDING}; + fixed_width_column_wrapper expected2{{3, 5, 7, 6, 0, 1, 2, 4}}; + got = sorted_order(input, column_order2, {null_order::BEFORE}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected2, got->view()); + // Run test for sort and sort_by_key + run_sort_test(input, expected2, column_order2, {null_order::BEFORE}); + + // asce_nulls_last + got = sorted_order(input, column_order2, {null_order::AFTER}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected2, got->view()); + // Run test for sort and sort_by_key + run_sort_test(input, expected2, column_order2, {null_order::AFTER}); +} + TYPED_TEST(Sort, Stable) { using T = TypeParam; diff --git a/cpp/tests/structs/structs_column_tests.cu b/cpp/tests/structs/structs_column_tests.cu index 2a0856133ba..e1438c33044 100644 --- a/cpp/tests/structs/structs_column_tests.cu +++ b/cpp/tests/structs/structs_column_tests.cu @@ -68,7 +68,7 @@ TYPED_TEST(TypedStructColumnWrapperTest, TestColumnFactoryConstruction) { auto names_col = cudf::test::strings_column_wrapper{ - "Samuel Vimes", "Carrot Ironfoundersson", "Angua von Uberwald"} + "Samuel Vimes", "Carrot Ironfoundersson", "Angua von Überwald"} .release(); int num_rows{names_col->size()}; @@ -95,7 +95,7 @@ TYPED_TEST(TypedStructColumnWrapperTest, TestColumnFactoryConstruction) // Check child columns for exactly correct values. vector_of_columns expected_children; expected_children.emplace_back(cudf::test::strings_column_wrapper{ - "Samuel Vimes", "Carrot Ironfoundersson", "Angua von Uberwald"} + "Samuel Vimes", "Carrot Ironfoundersson", "Angua von Überwald"} .release()); expected_children.emplace_back( cudf::test::fixed_width_column_wrapper{48, 27, 25}.release()); @@ -116,7 +116,7 @@ TYPED_TEST(TypedStructColumnWrapperTest, TestColumnWrapperConstruction) { std::initializer_list names = {"Samuel Vimes", "Carrot Ironfoundersson", - "Angua von Uberwald", + "Angua von Überwald", "Cheery Littlebottom", "Detritus", "Mr Slant"}; @@ -174,7 +174,7 @@ TYPED_TEST(TypedStructColumnWrapperTest, TestStructsContainingLists) std::initializer_list names = {"Samuel Vimes", "Carrot Ironfoundersson", - "Angua von Uberwald", + "Angua von Überwald", "Cheery Littlebottom", "Detritus", "Mr Slant"}; @@ -234,7 +234,7 @@ TYPED_TEST(TypedStructColumnWrapperTest, StructOfStructs) auto names = {"Samuel Vimes", "Carrot Ironfoundersson", - "Angua von Uberwald", + "Angua von Überwald", "Cheery Littlebottom", "Detritus", "Mr Slant"}; @@ -300,7 +300,7 @@ TYPED_TEST(TypedStructColumnWrapperTest, TestNullMaskPropagationForNonNullStruct auto names = {"Samuel Vimes", "Carrot Ironfoundersson", - "Angua von Uberwald", + "Angua von Überwald", "Cheery Littlebottom", "Detritus", "Mr Slant"}; @@ -393,7 +393,7 @@ TYPED_TEST(TypedStructColumnWrapperTest, TestListsOfStructs) std::initializer_list names = {"Samuel Vimes", "Carrot Ironfoundersson", - "Angua von Uberwald", + "Angua von Überwald", "Cheery Littlebottom", "Detritus", "Mr Slant"}; diff --git a/cpp/tests/unary/cast_tests.cpp b/cpp/tests/unary/cast_tests.cpp index e8953ab9a30..15d014f9d9c 100644 --- a/cpp/tests/unary/cast_tests.cpp +++ b/cpp/tests/unary/cast_tests.cpp @@ -537,6 +537,9 @@ inline auto make_fixed_point_data_type(int32_t scale) return cudf::data_type{cudf::type_to_id(), scale}; } +struct FixedPointTestSingleType : public cudf::test::BaseFixture { +}; + template struct FixedPointTests : public cudf::test::BaseFixture { }; @@ -592,6 +595,18 @@ TYPED_TEST(FixedPointTests, CastToInt32) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } +TEST_F(FixedPointTestSingleType, CastDecimal64ToInt32) +{ + using fp_wrapper = cudf::test::fixed_point_column_wrapper; + using fw_wrapper = cudf::test::fixed_width_column_wrapper; + + auto const input = fp_wrapper{{7246212000}, numeric::scale_type{-5}}; + auto const expected = fw_wrapper{72462}; + auto const result = cudf::cast(input, make_data_type()); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); +} + TYPED_TEST(FixedPointTests, CastToIntLarge) { using namespace numeric; @@ -659,6 +674,18 @@ TYPED_TEST(FixedPointTests, CastFromInt) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); } +TEST_F(FixedPointTestSingleType, CastInt32ToDecimal64) +{ + using fp_wrapper = cudf::test::fixed_point_column_wrapper; + using fw_wrapper = cudf::test::fixed_width_column_wrapper; + + auto const input = fw_wrapper{-48938}; + auto const expected = fp_wrapper{{-4893800000LL}, numeric::scale_type{-5}}; + auto const result = cudf::cast(input, make_fixed_point_data_type(-5)); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); +} + TYPED_TEST(FixedPointTests, CastFromIntLarge) { using namespace numeric; diff --git a/docs/cudf/source/groupby.md b/docs/cudf/source/groupby.md index 7e96d4fe38c..5376df261e7 100644 --- a/docs/cudf/source/groupby.md +++ b/docs/cudf/source/groupby.md @@ -137,6 +137,7 @@ The following table summarizes the available aggregations and the types that sup | nunique | ✅ | ✅ | ✅ | ✅ | | | | nth | ✅ | ✅ | ✅ | | | | | collect | ✅ | ✅ | ✅ | | ✅ | | +| unique | ✅ | ✅ | ✅ | ✅ | | | ## GroupBy apply diff --git a/java/src/main/java/ai/rapids/cudf/ColumnView.java b/java/src/main/java/ai/rapids/cudf/ColumnView.java index e50a9e86ead..90fe3553abc 100644 --- a/java/src/main/java/ai/rapids/cudf/ColumnView.java +++ b/java/src/main/java/ai/rapids/cudf/ColumnView.java @@ -288,19 +288,34 @@ public final ColumnVector isNull() { /** * Returns a Boolean vector with the same number of rows as this instance, that has * TRUE for any entry that is an integer, and FALSE if its not an integer. A null will be returned - * for null entries + * for null entries. * * NOTE: Integer doesn't mean a 32-bit integer. It means a number that is not a fraction. * i.e. If this method returns true for a value it could still result in an overflow or underflow * if you convert it to a Java integral type * - * @return - Boolean vector + * @return Boolean vector */ public final ColumnVector isInteger() { assert type.equals(DType.STRING); return new ColumnVector(isInteger(getNativeView())); } + /** + * Returns a Boolean vector with the same number of rows as this instance, that has + * TRUE for any entry that is an integer, and FALSE if its not an integer. A null will be returned + * for null entries. + * + * @param intType the data type that should be used for bounds checking. Note that only + * integer types are allowed. + * @return Boolean vector + */ + public final ColumnVector isInteger(DType intType) { + assert type.equals(DType.STRING); + return new ColumnVector(isIntegerWithType(getNativeView(), + intType.getTypeId().getNativeId(), intType.getScale())); + } + /** * Returns a Boolean vector with the same number of rows as this instance, that has * TRUE for any entry that is a float, and FALSE if its not a float. A null will be returned @@ -373,7 +388,19 @@ public final ColumnVector findAndReplaceAll(ColumnView oldValues, ColumnView new * @return - ColumnVector with nulls replaced by scalar */ public final ColumnVector replaceNulls(Scalar scalar) { - return new ColumnVector(replaceNulls(getNativeView(), scalar.getScalarHandle())); + return new ColumnVector(replaceNullsScalar(getNativeView(), scalar.getScalarHandle())); + } + + /** + * Returns a ColumnVector with any null values replaced with the corresponding row in the + * specified replacement column. + * This column and the replacement column must have the same type and number of rows. + * + * @param replacements column of replacement values + * @return column with nulls replaced by corresponding row of replacements column + */ + public final ColumnVector replaceNulls(ColumnView replacements) { + return new ColumnVector(replaceNullsColumn(getNativeView(), replacements.getNativeView())); } /** @@ -2825,7 +2852,9 @@ private static native long rollingWindow( private static native long charLengths(long viewHandle) throws CudfException; - private static native long replaceNulls(long viewHandle, long scalarHandle) throws CudfException; + private static native long replaceNullsScalar(long viewHandle, long scalarHandle) throws CudfException; + + private static native long replaceNullsColumn(long viewHandle, long replaceViewHandle) throws CudfException; private static native long ifElseVV(long predVec, long trueVec, long falseVec) throws CudfException; @@ -2845,6 +2874,8 @@ private static native long rollingWindow( private static native long isInteger(long viewHandle); + private static native long isIntegerWithType(long viewHandle, int typeId, int typeScale); + private static native long isNotNanNative(long viewHandle); private static native long isNotNullNative(long viewHandle); diff --git a/java/src/main/java/ai/rapids/cudf/OrderByArg.java b/java/src/main/java/ai/rapids/cudf/OrderByArg.java new file mode 100644 index 00000000000..fbdd7035c76 --- /dev/null +++ b/java/src/main/java/ai/rapids/cudf/OrderByArg.java @@ -0,0 +1,59 @@ +/* + * + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package ai.rapids.cudf; + +import java.io.Serializable; + +/** + * Provides the ordering for specific columns. + */ +public final class OrderByArg implements Serializable { + final int index; + final boolean isDescending; + final boolean isNullSmallest; + + OrderByArg(int index, boolean isDescending, boolean isNullSmallest) { + this.index = index; + this.isDescending = isDescending; + this.isNullSmallest = isNullSmallest; + } + + public static OrderByArg asc(final int index) { + return new OrderByArg(index, false, false); + } + + public static OrderByArg desc(final int index) { + return new OrderByArg(index, true, false); + } + + public static OrderByArg asc(final int index, final boolean isNullSmallest) { + return new OrderByArg(index, false, isNullSmallest); + } + + public static OrderByArg desc(final int index, final boolean isNullSmallest) { + return new OrderByArg(index, true, isNullSmallest); + } + + @Override + public String toString() { + return "ORDER BY " + index + + (isDescending ? " DESC " : " ASC ") + + (isNullSmallest ? "NULL SMALLEST" : "NULL LARGEST"); + } +} diff --git a/java/src/main/java/ai/rapids/cudf/Table.java b/java/src/main/java/ai/rapids/cudf/Table.java index 4da99d811f2..6e0b7d3bb94 100644 --- a/java/src/main/java/ai/rapids/cudf/Table.java +++ b/java/src/main/java/ai/rapids/cudf/Table.java @@ -25,7 +25,6 @@ import ai.rapids.cudf.HostColumnVector.StructType; import java.io.File; -import java.io.Serializable; import java.math.BigDecimal; import java.math.RoundingMode; import java.nio.ByteBuffer; @@ -1444,7 +1443,7 @@ public ColumnVector sortOrder(OrderByArg... args) { * responsible for cleaning up * the {@link ColumnVector} returned as part of the output {@link Table} *

- * Example usage: orderBy(true, Table.asc(0), Table.desc(3)...); + * Example usage: orderBy(true, OrderByArg.asc(0), OrderByArg.desc(3)...); * @param args Suppliers to initialize sortKeys. * @return Sorted Table */ @@ -1512,22 +1511,6 @@ public static Table merge(List

tables, OrderByArg... args) { return merge(tables.toArray(new Table[tables.size()]), args); } - public static OrderByArg asc(final int index) { - return new OrderByArg(index, false, false); - } - - public static OrderByArg desc(final int index) { - return new OrderByArg(index, true, false); - } - - public static OrderByArg asc(final int index, final boolean isNullSmallest) { - return new OrderByArg(index, false, isNullSmallest); - } - - public static OrderByArg desc(final int index, final boolean isNullSmallest) { - return new OrderByArg(index, true, isNullSmallest); - } - /** * Returns count aggregation with only valid values. * Null values are skipped. @@ -2093,25 +2076,6 @@ public static Table fromPackedTable(ByteBuffer metadata, DeviceMemoryBuffer data // HELPER CLASSES ///////////////////////////////////////////////////////////////////////////// - public static final class OrderByArg implements Serializable { - final int index; - final boolean isDescending; - final boolean isNullSmallest; - - OrderByArg(int index, boolean isDescending, boolean isNullSmallest) { - this.index = index; - this.isDescending = isDescending; - this.isNullSmallest = isNullSmallest; - } - - @Override - public String toString() { - return "ORDER BY " + index + - (isDescending ? " DESC " : " ASC ") + - (isNullSmallest ? "NULL SMALLEST" : "NULL LARGEST"); - } - } - /** * class to encapsulate indices and table */ diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp index 4132016d85c..dc1acc50b5f 100644 --- a/java/src/main/native/src/ColumnViewJni.cpp +++ b/java/src/main/native/src/ColumnViewJni.cpp @@ -121,8 +121,9 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_lowerStrings(JNIEnv *env, CATCH_STD(env, 0); } -JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_replaceNulls(JNIEnv *env, jclass, - jlong j_col, jlong j_scalar) { +JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_replaceNullsScalar(JNIEnv *env, jclass, + jlong j_col, + jlong j_scalar) { JNI_NULL_CHECK(env, j_col, "column is null", 0); JNI_NULL_CHECK(env, j_scalar, "scalar is null", 0); try { @@ -135,6 +136,21 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_replaceNulls(JNIEnv *env, CATCH_STD(env, 0); } +JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_replaceNullsColumn(JNIEnv *env, jclass, + jlong j_col, + jlong j_replace_col) { + JNI_NULL_CHECK(env, j_col, "column is null", 0); + JNI_NULL_CHECK(env, j_replace_col, "replacement column is null", 0); + try { + cudf::jni::auto_set_device(env); + auto col = reinterpret_cast(j_col); + auto replacements = reinterpret_cast(j_replace_col); + std::unique_ptr result = cudf::replace_nulls(*col, *replacements); + return reinterpret_cast(result.release()); + } + CATCH_STD(env, 0); +} + JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_ifElseVV(JNIEnv *env, jclass, jlong j_pred_vec, jlong j_true_vec, @@ -1788,6 +1804,23 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_isInteger(JNIEnv *env, jo CATCH_STD(env, 0) } +JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_isIntegerWithType(JNIEnv *env, jobject, + jlong handle, + jint j_dtype, + jint scale) { + + JNI_NULL_CHECK(env, handle, "native view handle is null", 0) + + try { + cudf::jni::auto_set_device(env); + cudf::column_view *view = reinterpret_cast(handle); + cudf::data_type int_dtype = cudf::jni::make_data_type(j_dtype, scale); + std::unique_ptr result = cudf::strings::is_integer(*view, int_dtype); + return reinterpret_cast(result.release()); + } + CATCH_STD(env, 0) +} + JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_copyColumnViewToCV(JNIEnv *env, jobject j_object, jlong handle) { diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java index 02fbe56431b..fe1cba5ceb1 100644 --- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java @@ -1368,7 +1368,7 @@ void testFromScalarNullByte() { } @Test - void testReplaceEmptyColumn() { + void testReplaceNullsScalarEmptyColumn() { try (ColumnVector input = ColumnVector.fromBoxedBooleans(); ColumnVector expected = ColumnVector.fromBoxedBooleans(); Scalar s = Scalar.fromBool(false); @@ -1378,7 +1378,7 @@ void testReplaceEmptyColumn() { } @Test - void testReplaceNullBoolsWithAllNulls() { + void testReplaceNullsScalarBoolsWithAllNulls() { try (ColumnVector input = ColumnVector.fromBoxedBooleans(null, null, null, null); ColumnVector expected = ColumnVector.fromBoxedBooleans(false, false, false, false); Scalar s = Scalar.fromBool(false); @@ -1388,7 +1388,7 @@ void testReplaceNullBoolsWithAllNulls() { } @Test - void testReplaceSomeNullBools() { + void testReplaceNullsScalarSomeNullBools() { try (ColumnVector input = ColumnVector.fromBoxedBooleans(false, null, null, false); ColumnVector expected = ColumnVector.fromBoxedBooleans(false, true, true, false); Scalar s = Scalar.fromBool(true); @@ -1398,7 +1398,7 @@ void testReplaceSomeNullBools() { } @Test - void testReplaceNullIntegersWithAllNulls() { + void testReplaceNullsScalarIntegersWithAllNulls() { try (ColumnVector input = ColumnVector.fromBoxedInts(null, null, null, null); ColumnVector expected = ColumnVector.fromBoxedInts(0, 0, 0, 0); Scalar s = Scalar.fromInt(0); @@ -1408,7 +1408,7 @@ void testReplaceNullIntegersWithAllNulls() { } @Test - void testReplaceSomeNullIntegers() { + void testReplaceNullsScalarSomeNullIntegers() { try (ColumnVector input = ColumnVector.fromBoxedInts(1, 2, null, 4, null); ColumnVector expected = ColumnVector.fromBoxedInts(1, 2, 999, 4, 999); Scalar s = Scalar.fromInt(999); @@ -1418,7 +1418,7 @@ void testReplaceSomeNullIntegers() { } @Test - void testReplaceNullsFailsOnTypeMismatch() { + void testReplaceNullsScalarFailsOnTypeMismatch() { try (ColumnVector input = ColumnVector.fromBoxedInts(1, 2, null, 4, null); Scalar s = Scalar.fromBool(true)) { assertThrows(CudfException.class, () -> input.replaceNulls(s).close()); @@ -1434,6 +1434,44 @@ void testReplaceNullsWithNullScalar() { } } + @Test + void testReplaceNullsColumnEmptyColumn() { + try (ColumnVector input = ColumnVector.fromBoxedBooleans(); + ColumnVector r = ColumnVector.fromBoxedBooleans(); + ColumnVector expected = ColumnVector.fromBoxedBooleans(); + ColumnVector result = input.replaceNulls(r)) { + assertColumnsAreEqual(expected, result); + } + } + + @Test + void testReplaceNullsColumnBools() { + try (ColumnVector input = ColumnVector.fromBoxedBooleans(null, true, null, false); + ColumnVector r = ColumnVector.fromBoxedBooleans(false, null, true, true); + ColumnVector expected = ColumnVector.fromBoxedBooleans(false, true, true, false); + ColumnVector result = input.replaceNulls(r)) { + assertColumnsAreEqual(expected, result); + } + } + + @Test + void testReplaceNullsColumnIntegers() { + try (ColumnVector input = ColumnVector.fromBoxedInts(1, 2, null, 4, null); + ColumnVector r = ColumnVector.fromBoxedInts(996, 997, 998, 909, null); + ColumnVector expected = ColumnVector.fromBoxedInts(1, 2, 998, 4, null); + ColumnVector result = input.replaceNulls(r)) { + assertColumnsAreEqual(expected, result); + } + } + + @Test + void testReplaceNullsColumnFailsOnTypeMismatch() { + try (ColumnVector input = ColumnVector.fromBoxedInts(1, 2, null, 4, null); + ColumnVector r = ColumnVector.fromBoxedBooleans(true)) { + assertThrows(CudfException.class, () -> input.replaceNulls(r).close()); + } + } + static QuantileMethod[] methods = {LINEAR, LOWER, HIGHER, MIDPOINT, NEAREST}; static double[] quantiles = {0.0, 0.25, 0.33, 0.5, 1.0}; @@ -3339,6 +3377,69 @@ void testNansToNulls() { } } + @Test + void testIsIntegerWithBounds() { + String[] intStrings = {"A", "nan", "Inf", "-Inf", "3.5", + String.valueOf(Byte.MIN_VALUE), + String.valueOf(Byte.MIN_VALUE + 1L), + String.valueOf(Byte.MIN_VALUE - 1L), + String.valueOf(Byte.MAX_VALUE), + String.valueOf(Byte.MAX_VALUE + 1L), + String.valueOf(Byte.MAX_VALUE - 1L), + String.valueOf(Short.MIN_VALUE), + String.valueOf(Short.MIN_VALUE + 1L), + String.valueOf(Short.MIN_VALUE - 1L), + String.valueOf(Short.MAX_VALUE), + String.valueOf(Short.MAX_VALUE + 1L), + String.valueOf(Short.MAX_VALUE - 1L), + String.valueOf(Integer.MIN_VALUE), + String.valueOf(Integer.MIN_VALUE + 1L), + String.valueOf(Integer.MIN_VALUE - 1L), + String.valueOf(Integer.MAX_VALUE), + String.valueOf(Integer.MAX_VALUE + 1L), + String.valueOf(Integer.MAX_VALUE - 1L), + String.valueOf(Long.MIN_VALUE), + String.valueOf(Long.MIN_VALUE + 1L), + "-9223372036854775809", + String.valueOf(Long.MAX_VALUE), + "9223372036854775808", + String.valueOf(Long.MAX_VALUE - 1L)}; + try (ColumnVector intStringCV = ColumnVector.fromStrings(intStrings); + ColumnVector isByte = intStringCV.isInteger(DType.INT8); + ColumnVector expectedByte = ColumnVector.fromBoxedBooleans( + false, false, false, false, false, + true, true, false, true, false, true, + false, false, false, false, false, false, + false, false, false, false, false, false, + false, false, false, false, false, false); + ColumnVector isShort = intStringCV.isInteger(DType.INT16); + ColumnVector expectedShort = ColumnVector.fromBoxedBooleans( + false, false, false, false, false, + true, true, true, true, true, true, + true, true, false, true, false, true, + false, false, false, false, false, false, + false, false, false, false, false, false); + ColumnVector isInt = intStringCV.isInteger(DType.INT32); + ColumnVector expectedInt = ColumnVector.fromBoxedBooleans( + false, false, false, false, false, + true, true, true, true, true, true, + true, true, true, true, true, true, + true, true, false, true, false, true, + false, false, false, false, false, false); + ColumnVector isLong = intStringCV.isInteger(DType.INT64); + ColumnVector expectedLong = ColumnVector.fromBoxedBooleans( + false, false, false, false, false, + true, true, true, true, true, true, + true, true, true, true, true, true, + true, true, true, true, true, true, + true, true, false, true, false, true)) { + assertColumnsAreEqual(expectedByte, isByte); + assertColumnsAreEqual(expectedShort, isShort); + assertColumnsAreEqual(expectedInt, isInt); + assertColumnsAreEqual(expectedLong, isLong); + } + } + @Test void testIsInteger() { String[] intStrings = {"A", "nan", "Inf", "-Inf", "Infinity", "infinity", "2147483647", diff --git a/java/src/test/java/ai/rapids/cudf/TableTest.java b/java/src/test/java/ai/rapids/cudf/TableTest.java index 4eee3e97e6e..b6350a207c1 100644 --- a/java/src/test/java/ai/rapids/cudf/TableTest.java +++ b/java/src/test/java/ai/rapids/cudf/TableTest.java @@ -369,9 +369,9 @@ void testMergeSimple() { .column(3, 2, 1, 2, null, 3, 5, 2) .column(1, 9, 7, 3, 5, 3, 1, 10) .build(); - Table sortedTable1 = table1.orderBy(Table.asc(0), Table.desc(1)); - Table sortedTable2 = table2.orderBy(Table.asc(0), Table.desc(1)); - Table merged = Table.merge(Arrays.asList(sortedTable1, sortedTable2), Table.asc(0), Table.desc(1))) { + Table sortedTable1 = table1.orderBy(OrderByArg.asc(0), OrderByArg.desc(1)); + Table sortedTable2 = table2.orderBy(OrderByArg.asc(0), OrderByArg.desc(1)); + Table merged = Table.merge(Arrays.asList(sortedTable1, sortedTable2), OrderByArg.asc(0), OrderByArg.desc(1))) { assertTablesAreEqual(expected, merged); } } @@ -388,7 +388,7 @@ void testOrderByAD() { .column(2, 1, 4, 3, 5) .column(9, 7, 5, 3, 1) .build(); - Table sortedTable = table.orderBy(Table.asc(0), Table.desc(1))) { + Table sortedTable = table.orderBy(OrderByArg.asc(0), OrderByArg.desc(1))) { assertTablesAreEqual(expected, sortedTable); } } @@ -405,7 +405,7 @@ void testSortOrderSimple() { .column(2, 1, 4, 3, 5) .column(9, 7, 5, 3, 1) .build(); - ColumnVector gatherMap = table.sortOrder(Table.asc(0), Table.desc(1)); + ColumnVector gatherMap = table.sortOrder(OrderByArg.asc(0), OrderByArg.desc(1)); Table sortedTable = table.gather(gatherMap)) { assertTablesAreEqual(expected, sortedTable); } @@ -423,7 +423,7 @@ void testOrderByDD() { .column(5, 4, 3, 2, 1) .column(1, 5, 3, 9, 7) .build(); - Table sortedTable = table.orderBy(Table.desc(0), Table.desc(1))) { + Table sortedTable = table.orderBy(OrderByArg.desc(0), OrderByArg.desc(1))) { assertTablesAreEqual(expected, sortedTable); } } @@ -442,7 +442,7 @@ void testOrderByWithNulls() { .column("1", "0", "2", "4", "3") .column(7, 9, 5, 1, 3) .build(); - Table sortedTable = table.orderBy(Table.asc(0), Table.desc(1))) { + Table sortedTable = table.orderBy(OrderByArg.asc(0), OrderByArg.desc(1))) { assertTablesAreEqual(expected, sortedTable); } } @@ -461,7 +461,7 @@ void testOrderByWithNullsAndStrings() { .column(null, null, 4, 3, 5) .column(9, 7, 5, 3, 1) .build(); - Table sortedTable = table.orderBy(Table.asc(0))) { + Table sortedTable = table.orderBy(OrderByArg.asc(0))) { assertTablesAreEqual(expected, sortedTable); } } @@ -867,7 +867,7 @@ void testLeftJoinWithNulls() { .column(null, null, 203, null, null, null, null, 201, 202, 204) // right .build(); Table joinedTable = leftTable.onColumns(0).leftJoin(rightTable.onColumns(0), true); - Table orderedJoinedTable = joinedTable.orderBy(Table.asc(1, true))) { + Table orderedJoinedTable = joinedTable.orderBy(OrderByArg.asc(1, true))) { assertTablesAreEqual(expected, orderedJoinedTable); } } @@ -891,7 +891,7 @@ void testLeftJoinOnNullKeys() { .build(); Table joinedTable = leftTable.onColumns(0).leftJoin(rightTable.onColumns(0)); - Table orderedJoinedTable = joinedTable.orderBy(Table.asc(1, true))) { + Table orderedJoinedTable = joinedTable.orderBy(OrderByArg.asc(1, true))) { assertTablesAreEqual(expectedResults, orderedJoinedTable); } @@ -902,7 +902,7 @@ void testLeftJoinOnNullKeys() { .build(); Table joinedTable = leftTable.onColumns(0).leftJoin(rightTable.onColumns(0), false); - Table orderedJoinedTable = joinedTable.orderBy(Table.asc(1, true))) { + Table orderedJoinedTable = joinedTable.orderBy(OrderByArg.asc(1, true))) { assertTablesAreEqual(expectedResults, orderedJoinedTable); } } @@ -919,7 +919,7 @@ void testLeftJoin() { .column( 20, 21, 22, 23, 24, 25, 26, 27, 28, 29) .build(); Table joinedTable = leftTable.onColumns(0).leftJoin(rightTable.onColumns(0), true); - Table orderedJoinedTable = joinedTable.orderBy(Table.asc(1, true)); + Table orderedJoinedTable = joinedTable.orderBy(OrderByArg.asc(1, true)); Table expected = new Table.TestBuilder() .column(360, 326, 254, 306, 109, 361, 251, 335, 301, 317) // common .column( 10, 11, 12, 13, 14, 15, 16, 17, 18, 19) // left @@ -945,7 +945,7 @@ void testFullJoinWithNonCommonKeys() { .column(null, null, null, null, null, 201, 200, null, 203, 202, 204, 205) // right .build(); Table joinedTable = leftTable.onColumns(0).fullJoin(rightTable.onColumns(0), true); - Table orderedJoinedTable = joinedTable.orderBy(Table.asc(0, true))) { + Table orderedJoinedTable = joinedTable.orderBy(OrderByArg.asc(0, true))) { assertTablesAreEqual(expected, orderedJoinedTable); } } @@ -968,7 +968,7 @@ void testFullJoinOnNullKeys() { .column( 200, 202, 200, 202, null, null, null, null, null, 201, null, 203, 204, 205) // right .build(); Table joinedTable = leftTable.onColumns(0).fullJoin(rightTable.onColumns(0)); - Table orderedJoinedTable = joinedTable.orderBy(Table.asc(0, true), Table.asc(1, true))) { + Table orderedJoinedTable = joinedTable.orderBy(OrderByArg.asc(0, true), OrderByArg.asc(1, true))) { assertTablesAreEqual(expectedResults, orderedJoinedTable); } @@ -980,7 +980,7 @@ void testFullJoinOnNullKeys() { .build(); Table joinedTable = leftTable.onColumns(0).fullJoin(rightTable.onColumns(0), false); Table orderedJoinedTable = joinedTable.orderBy( - Table.asc(0, true), Table.asc(1, true), Table.asc(2, true))) { + OrderByArg.asc(0, true), OrderByArg.asc(1, true), OrderByArg.asc(2, true))) { assertTablesAreEqual(expectedResults, orderedJoinedTable); } } @@ -997,7 +997,7 @@ void testFullJoinWithOnlyCommonKeys() { .column(200, 201, 202, 203, 204, 205, 206, 207, 208, 209) .build(); Table joinedTable = leftTable.onColumns(0).fullJoin(rightTable.onColumns(new int[]{0}), true); - Table orderedJoinedTable = joinedTable.orderBy(Table.asc(1, true)); + Table orderedJoinedTable = joinedTable.orderBy(OrderByArg.asc(1, true)); Table expected = new Table.TestBuilder() .column(360, 326, 254, 306, 109, 361, 251, 335, 301, 317) // common .column(100, 101, 102, 103, 104, 105, 106, 107, 108, 109) // left @@ -1023,7 +1023,7 @@ void testInnerJoinWithNonCommonKeys() { .column(202, 200, 201, 203) // right .build(); Table joinedTable = leftTable.onColumns(0).innerJoin(rightTable.onColumns(0), true); - Table orderedJoinedTable = joinedTable.orderBy(Table.asc(1, true))) { + Table orderedJoinedTable = joinedTable.orderBy(OrderByArg.asc(1, true))) { assertTablesAreEqual(expected, orderedJoinedTable); } } @@ -1046,7 +1046,7 @@ void testInnerJoinOnNullKeys() { .column(202, 200, 201, 203) // right .build(); Table joinedTable = leftTable.onColumns(0).innerJoin(rightTable.onColumns(0)); - Table orderedJoinedTable = joinedTable.orderBy(Table.asc(1, true))) { + Table orderedJoinedTable = joinedTable.orderBy(OrderByArg.asc(1, true))) { assertTablesAreEqual(expected, orderedJoinedTable); } @@ -1057,7 +1057,7 @@ void testInnerJoinOnNullKeys() { .column(202, 200, 203) // right .build(); Table joinedTable = leftTable.onColumns(0).innerJoin(rightTable.onColumns(0), false); - Table orderedJoinedTable = joinedTable.orderBy(Table.asc(1, true))){ + Table orderedJoinedTable = joinedTable.orderBy(OrderByArg.asc(1, true))){ assertTablesAreEqual(expected, orderedJoinedTable); } } @@ -1074,7 +1074,7 @@ void testInnerJoinWithOnlyCommonKeys() { .column(200, 201, 202, 203, 204, 205, 206, 207, 208, 209) .build(); Table joinedTable = leftTable.onColumns(0).innerJoin(rightTable.onColumns(new int[]{0}), true); - Table orderedJoinedTable = joinedTable.orderBy(Table.asc(1, true)); + Table orderedJoinedTable = joinedTable.orderBy(OrderByArg.asc(1, true)); Table expected = new Table.TestBuilder() .column(360, 326, 254, 306, 109, 361, 251, 335, 301, 317) // common .column(100, 101, 102, 103, 104, 105, 106, 107, 108, 109) // left @@ -1099,7 +1099,7 @@ void testLeftSemiJoin() { .column(102, 107, 108, 109) .build(); Table joinedTable = leftTable.onColumns(0).leftSemiJoin(rightTable.onColumns(0), true); - Table orderedJoinedTable = joinedTable.orderBy(Table.asc(1, true))) { + Table orderedJoinedTable = joinedTable.orderBy(OrderByArg.asc(1, true))) { assertTablesAreEqual(expected, orderedJoinedTable); } } @@ -1116,7 +1116,7 @@ void testLeftSemiJoinWithNulls() { .column("20", "21", "22", "23", "24", "25", "26", "27", "28", "29") .build(); Table joinedTable = leftTable.onColumns(0, 2).leftSemiJoin(rightTable.onColumns(0, 1), true); - Table orderedJoinedTable = joinedTable.orderBy(Table.asc(0, true)); + Table orderedJoinedTable = joinedTable.orderBy(OrderByArg.asc(0, true)); Table expected = new Table.TestBuilder() .column(254, 326, 361) .column(null, 11, 17) @@ -1143,7 +1143,7 @@ void testLeftSemiJoinOnNullKeys() { .column(102, 107, 108, 109) .build(); Table joinedTable = leftTable.onColumns(0).leftSemiJoin(rightTable.onColumns(0)); - Table orderedJoinedTable = joinedTable.orderBy(Table.asc(1, true))) { + Table orderedJoinedTable = joinedTable.orderBy(OrderByArg.asc(1, true))) { assertTablesAreEqual(expected, orderedJoinedTable); } @@ -1153,7 +1153,7 @@ void testLeftSemiJoinOnNullKeys() { .column(102, 107, 109) .build(); Table joinedTable = leftTable.onColumns(0).leftSemiJoin(rightTable.onColumns(0), false); - Table orderedJoinedTable = joinedTable.orderBy(Table.asc(1, true))) { + Table orderedJoinedTable = joinedTable.orderBy(OrderByArg.asc(1, true))) { assertTablesAreEqual(expected, orderedJoinedTable); } } @@ -1174,7 +1174,7 @@ void testLeftAntiJoin() { .column(100, 101, 103, 104, 105, 106) .build(); Table joinedTable = leftTable.onColumns(0).leftAntiJoin(rightTable.onColumns(0), true); - Table orderedJoinedTable = joinedTable.orderBy(Table.asc(1, true))) { + Table orderedJoinedTable = joinedTable.orderBy(OrderByArg.asc(1, true))) { assertTablesAreEqual(expected, orderedJoinedTable); } } @@ -1196,7 +1196,7 @@ void testLeftAntiJoinOnNullKeys() { .column(100, 101, 103, 104, 105, 106) .build(); Table joinedTable = leftTable.onColumns(0).leftAntiJoin(rightTable.onColumns(0)); - Table orderedJoinedTable = joinedTable.orderBy(Table.asc(1, true))) { + Table orderedJoinedTable = joinedTable.orderBy(OrderByArg.asc(1, true))) { assertTablesAreEqual(expected, orderedJoinedTable); } @@ -1206,7 +1206,7 @@ void testLeftAntiJoinOnNullKeys() { .column(100, 101, 103, 104, 105, 106, 108) .build(); Table joinedTable = leftTable.onColumns(0).leftAntiJoin(rightTable.onColumns(0), false); - Table orderedJoinedTable = joinedTable.orderBy(Table.asc(1, true))) { + Table orderedJoinedTable = joinedTable.orderBy(OrderByArg.asc(1, true))) { assertTablesAreEqual(expected, orderedJoinedTable); } } @@ -1224,7 +1224,7 @@ void testLeftAntiJoinWithNulls() { .column("20", "21", "22", "23", "24", "25", "26", "27", "28", "29") .build(); Table joinedTable = leftTable.onColumns(0, 2).leftAntiJoin(rightTable.onColumns(0, 1), true); - Table orderedJoinedTable = joinedTable.orderBy(Table.asc(2, true)); + Table orderedJoinedTable = joinedTable.orderBy(OrderByArg.asc(2, true)); Table expected = new Table.TestBuilder() .column( 360, 326, null, 306, null, 251, 301, 317) .column( 10, 11, null, 13, 14, 16, 18, 19) @@ -1249,8 +1249,8 @@ void testCrossJoin() { Table joinedTable = leftTable.crossJoin(rightTable); Table orderedJoinedTable = joinedTable.orderBy( - Table.asc(0, true), - Table.asc(1, true))) { + OrderByArg.asc(0, true), + OrderByArg.asc(1, true))) { assertTablesAreEqual(expected, orderedJoinedTable); } } @@ -2297,7 +2297,7 @@ void testGroupByUniqueCount() { try (Table t3 = t1 .groupBy(0, 1) .aggregate(Aggregation.nunique().onColumn(0)); - Table sorted = t3.orderBy(Table.asc(0), Table.asc(1), Table.asc(2)); + Table sorted = t3.orderBy(OrderByArg.asc(0), OrderByArg.asc(1), OrderByArg.asc(2)); Table expected = new Table.TestBuilder() .column( "1", "1", "1", "1") .column( 0, 1, 3, 5) @@ -2318,7 +2318,7 @@ void testGroupByUniqueCountNulls() { try (Table t3 = t1 .groupBy(0, 1) .aggregate(Aggregation.nunique(true).onColumn(0)); - Table sorted = t3.orderBy(Table.asc(0), Table.asc(1), Table.asc(2)); + Table sorted = t3.orderBy(OrderByArg.asc(0), OrderByArg.asc(1), OrderByArg.asc(2)); Table expected = new Table.TestBuilder() .column( "1", "1", "1", "1") .column( 0, 1, 3, 5) @@ -2370,8 +2370,8 @@ void testWindowingCount() { .decimal32Column(-1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3) // Decimal GBY Key .decimal64Column(1, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L, 6L, 6L) // Decimal OBY Key .build()) { - try (Table sorted = unsorted.orderBy(Table.asc(0), Table.asc(1), Table.asc(2)); - Table decSorted = unsorted.orderBy(Table.asc(0), Table.asc(4), Table.asc(5)); + try (Table sorted = unsorted.orderBy(OrderByArg.asc(0), OrderByArg.asc(1), OrderByArg.asc(2)); + Table decSorted = unsorted.orderBy(OrderByArg.asc(0), OrderByArg.asc(4), OrderByArg.asc(5)); ColumnVector expectSortedAggColumn = ColumnVector.fromBoxedInts(7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6)) { ColumnVector sortedAggColumn = sorted.getColumn(3); assertColumnsAreEqual(expectSortedAggColumn, sortedAggColumn); @@ -2406,8 +2406,8 @@ void testWindowingMin() { .decimal64Column(1, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L, 6L, 6L) // Decimal OBY Key .decimal64Column(2, 7L, 5L, 1L, 9L, 7L, 9L, 8L, 2L, 8L, 0L, 6L, 6L) // Decimal Agg Column .build()) { - try (Table sorted = unsorted.orderBy(Table.asc(0), Table.asc(1), Table.asc(2)); - Table decSorted = unsorted.orderBy(Table.asc(0), Table.asc(4), Table.asc(5)); + try (Table sorted = unsorted.orderBy(OrderByArg.asc(0), OrderByArg.asc(1), OrderByArg.asc(2)); + Table decSorted = unsorted.orderBy(OrderByArg.asc(0), OrderByArg.asc(4), OrderByArg.asc(5)); ColumnVector expectSortedAggCol = ColumnVector.fromBoxedInts(7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6); ColumnVector expectDecSortedAggCol = ColumnVector.decimalFromLongs(2, 7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6)) { ColumnVector sortedAggColumn = sorted.getColumn(3); @@ -2444,8 +2444,8 @@ void testWindowingMax() { .decimal64Column(1, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L, 6L, 6L) // Decimal OBY Key .decimal64Column(2, 7L, 5L, 1L, 9L, 7L, 9L, 8L, 2L, 8L, 0L, 6L, 6L) // Decimal Agg Column .build()) { - try (Table sorted = unsorted.orderBy(Table.asc(0), Table.asc(1), Table.asc(2)); - Table decSorted = unsorted.orderBy(Table.asc(0), Table.asc(4), Table.asc(5)); + try (Table sorted = unsorted.orderBy(OrderByArg.asc(0), OrderByArg.asc(1), OrderByArg.asc(2)); + Table decSorted = unsorted.orderBy(OrderByArg.asc(0), OrderByArg.asc(4), OrderByArg.asc(5)); ColumnVector expectSortedAggCol = ColumnVector.fromBoxedInts(7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6); ColumnVector expectDecSortedAggCol = ColumnVector.decimalFromLongs(2, 7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6)) { ColumnVector sortedAggColumn = sorted.getColumn(3); @@ -2479,7 +2479,7 @@ void testWindowingSum() { .column(1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6) // OBY Key .column(7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6) // Agg Column .build()) { - try (Table sorted = unsorted.orderBy(Table.asc(0), Table.asc(1), Table.asc(2)); + try (Table sorted = unsorted.orderBy(OrderByArg.asc(0), OrderByArg.asc(1), OrderByArg.asc(2)); ColumnVector expectSortedAggColumn = ColumnVector.fromBoxedInts(7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6)) { ColumnVector sortedAggColumn = sorted.getColumn(3); assertColumnsAreEqual(expectSortedAggColumn, sortedAggColumn); @@ -2509,8 +2509,8 @@ void testWindowingRowNumber() { .decimal64Column(1, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L, 6L, 6L) // Decimal OBY Key .decimal64Column(2, 7L, 5L, 1L, 9L, 7L, 9L, 8L, 2L, 8L, 0L, 6L, 6L) // Decimal Agg Column .build()) { - try (Table sorted = unsorted.orderBy(Table.asc(0), Table.asc(1), Table.asc(2)); - Table decSorted = unsorted.orderBy(Table.asc(0), Table.asc(4), Table.asc(5)); + try (Table sorted = unsorted.orderBy(OrderByArg.asc(0), OrderByArg.asc(1), OrderByArg.asc(2)); + Table decSorted = unsorted.orderBy(OrderByArg.asc(0), OrderByArg.asc(4), OrderByArg.asc(5)); ColumnVector expectSortedAggColumn = ColumnVector.fromBoxedInts(7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6); ColumnVector expectDecSortedAggColumn = ColumnVector.decimalFromLongs(2, 7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6)) { ColumnVector sortedAggColumn = sorted.getColumn(3); @@ -2590,7 +2590,7 @@ void testWindowingCollect() { ).build(); ColumnVector expectSortedAggColumn = ColumnVector .fromBoxedInts(7, 5, 1, 9, 7, 9, 8, 2, null, 0, 6, null)) { - try (Table sorted = raw.orderBy(Table.asc(0), Table.asc(1), Table.asc(2))) { + try (Table sorted = raw.orderBy(OrderByArg.asc(0), OrderByArg.asc(1), OrderByArg.asc(2))) { ColumnVector sortedAggColumn = sorted.getColumn(3); assertColumnsAreEqual(expectSortedAggColumn, sortedAggColumn); @@ -2652,8 +2652,8 @@ void testWindowingLead() { .decimal64Column(-2, 7L, 5L, 1L, 9L, 7L, 9L, 8L, 2L, 8L, 0L, 6L, 6L) // Decimal Agg Column .build()) { - try (Table sorted = unsorted.orderBy(Table.asc(0), Table.asc(1), Table.asc(2)); - Table decSorted = unsorted.orderBy(Table.asc(0), Table.asc(4), Table.asc(5)); + try (Table sorted = unsorted.orderBy(OrderByArg.asc(0), OrderByArg.asc(1), OrderByArg.asc(2)); + Table decSorted = unsorted.orderBy(OrderByArg.asc(0), OrderByArg.asc(4), OrderByArg.asc(5)); ColumnVector expectSortedAggColumn = ColumnVector.fromBoxedInts(7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6); ColumnVector expectDecSortedAggColumn = ColumnVector.decimalFromLongs(-2, 7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6)) { ColumnVector sortedAggColumn = sorted.getColumn(3); @@ -2745,8 +2745,8 @@ void testWindowingLag() { .decimal64Column(-2, 7L, 5L, 1L, 9L, 7L, 9L, 8L, 2L, 8L, 0L, 6L, 6L) // Decimal Agg Column .build()) { - try (Table sorted = unsorted.orderBy(Table.asc(0), Table.asc(1), Table.asc(2)); - Table decSorted = unsorted.orderBy(Table.asc(0), Table.asc(4), Table.asc(5)); + try (Table sorted = unsorted.orderBy(OrderByArg.asc(0), OrderByArg.asc(1), OrderByArg.asc(2)); + Table decSorted = unsorted.orderBy(OrderByArg.asc(0), OrderByArg.asc(4), OrderByArg.asc(5)); ColumnVector expectSortedAggColumn = ColumnVector.fromBoxedInts(7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6); ColumnVector decExpectSortedAggColumn = ColumnVector.decimalFromLongs(-2, 7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6)) { ColumnVector sortedAggColumn = sorted.getColumn(3); @@ -2833,7 +2833,7 @@ void testWindowingMean() { .column( 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6) // OBY Key .column( 7, 5, 3, 7, 7, 9, 8, 4, 8, 0, 4, 8) // Agg Column .build()) { - try (Table sorted = unsorted.orderBy(Table.asc(0), Table.asc(1), Table.asc(2)); + try (Table sorted = unsorted.orderBy(OrderByArg.asc(0), OrderByArg.asc(1), OrderByArg.asc(2)); ColumnVector expectedSortedAggCol = ColumnVector.fromBoxedInts(7, 5, 3, 7, 7, 9, 8, 4, 8, 0, 4, 8)) { ColumnVector sortedAggColumn = sorted.getColumn(3); assertColumnsAreEqual(expectedSortedAggCol, sortedAggColumn); @@ -2859,7 +2859,7 @@ void testWindowingOnMultipleDifferentColumns() { .column( 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6) // OBY Key .column( 7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6) // Agg Column .build()) { - try (Table sorted = unsorted.orderBy(Table.asc(0), Table.asc(1), Table.asc(2)); + try (Table sorted = unsorted.orderBy(OrderByArg.asc(0), OrderByArg.asc(1), OrderByArg.asc(2)); ColumnVector expectedSortedAggCol = ColumnVector.fromBoxedInts(7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6)) { ColumnVector sortedAggColumn = sorted.getColumn(3); assertColumnsAreEqual(expectedSortedAggCol, sortedAggColumn); @@ -2909,7 +2909,7 @@ void testWindowingWithoutGroupByColumns() { .build(); ColumnVector expectSortedAggColumn = ColumnVector.fromBoxedInts(7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6)) { - try (Table sorted = unsorted.orderBy(Table.asc(0))) { + try (Table sorted = unsorted.orderBy(OrderByArg.asc(0))) { ColumnVector sortedAggColumn = sorted.getColumn(1); assertColumnsAreEqual(expectSortedAggColumn, sortedAggColumn); @@ -2934,7 +2934,7 @@ void testTimeRangeWindowingCount() { .timestampDayColumn( 1, 1, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7) // Timestamp Key .column( 7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6, 8) // Agg Column .build()) { - try (Table sorted = unsorted.orderBy(Table.asc(0), Table.asc(1), Table.asc(2)); + try (Table sorted = unsorted.orderBy(OrderByArg.asc(0), OrderByArg.asc(1), OrderByArg.asc(2)); ColumnVector expectSortedAggColumn = ColumnVector.fromBoxedInts(7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6, 8)) { ColumnVector sortedAggColumn = sorted.getColumn(3); assertColumnsAreEqual(expectSortedAggColumn, sortedAggColumn); @@ -2962,7 +2962,7 @@ void testTimeRangeWindowingLead() { .timestampDayColumn( 1, 1, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7) // Timestamp Key .column( 7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6, 8) // Agg Column .build()) { - try (Table sorted = unsorted.orderBy(Table.asc(0), Table.asc(1), Table.asc(2)); + try (Table sorted = unsorted.orderBy(OrderByArg.asc(0), OrderByArg.asc(1), OrderByArg.asc(2)); ColumnVector expectSortedAggColumn = ColumnVector.fromBoxedInts(7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6, 8)) { ColumnVector sortedAggColumn = sorted.getColumn(3); assertColumnsAreEqual(expectSortedAggColumn, sortedAggColumn); @@ -2991,7 +2991,7 @@ void testTimeRangeWindowingMax() { .timestampDayColumn( 1, 1, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7) // Timestamp Key .column( 7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6, 8) // Agg Column .build()) { - try (Table sorted = unsorted.orderBy(Table.asc(0), Table.asc(1), Table.asc(2)); + try (Table sorted = unsorted.orderBy(OrderByArg.asc(0), OrderByArg.asc(1), OrderByArg.asc(2)); ColumnVector expectSortedAggColumn = ColumnVector.fromBoxedInts(7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6, 8)) { ColumnVector sortedAggColumn = sorted.getColumn(3); assertColumnsAreEqual(expectSortedAggColumn, sortedAggColumn); @@ -3029,7 +3029,7 @@ void testTimeRangeWindowingRowNumber() { .timestampDayColumn( 1, 1, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7) // Timestamp Key .column( 7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6, 8) // Agg Column .build()) { - try (Table sorted = unsorted.orderBy(Table.asc(0), Table.asc(1), Table.asc(2)); + try (Table sorted = unsorted.orderBy(OrderByArg.asc(0), OrderByArg.asc(1), OrderByArg.asc(2)); ColumnVector expectSortedAggColumn = ColumnVector.fromBoxedInts(7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6, 8)) { ColumnVector sortedAggColumn = sorted.getColumn(3); assertColumnsAreEqual(expectSortedAggColumn, sortedAggColumn); @@ -3056,7 +3056,7 @@ void testTimeRangeWindowingCountDescendingTimestamps() { .timestampDayColumn( 7, 6, 6, 5, 5, 4, 4, 3, 3, 3, 2, 1, 1) // Timestamp Key .column( 7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6, 8) // Agg Column .build()) { - try (Table sorted = unsorted.orderBy(Table.asc(0), Table.asc(1), Table.desc(2)); + try (Table sorted = unsorted.orderBy(OrderByArg.asc(0), OrderByArg.asc(1), OrderByArg.desc(2)); ColumnVector expectSortedAggColumn = ColumnVector.fromBoxedInts(7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6, 8)) { ColumnVector sortedAggColumn = sorted.getColumn(3); assertColumnsAreEqual(expectSortedAggColumn, sortedAggColumn); @@ -3093,7 +3093,7 @@ void testTimeRangeWindowingWithoutGroupByColumns() { try (Table unsorted = new Table.TestBuilder().timestampDayColumn( 1, 1, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7) // Timestamp Key .column( 7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6, 8) // Agg Column .build()) { - try (Table sorted = unsorted.orderBy(Table.asc(0)); + try (Table sorted = unsorted.orderBy(OrderByArg.asc(0)); ColumnVector expectSortedAggColumn = ColumnVector.fromBoxedInts(7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6, 8)) { ColumnVector sortedAggColumn = sorted.getColumn(1); assertColumnsAreEqual(expectSortedAggColumn, sortedAggColumn); @@ -3137,7 +3137,7 @@ void testTimeRangeWindowingCountUnboundedPreceding() { .timestampDayColumn( 1, 1, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7) // Timestamp Key .column( 7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6, 8) // Agg Column .build()) { - try (Table sorted = unsorted.orderBy(Table.asc(0), Table.asc(1), Table.asc(2)); + try (Table sorted = unsorted.orderBy(OrderByArg.asc(0), OrderByArg.asc(1), OrderByArg.asc(2)); ColumnVector expectSortedAggColumn = ColumnVector.fromBoxedInts(7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6, 8)) { ColumnVector sortedAggColumn = sorted.getColumn(3); assertColumnsAreEqual(expectSortedAggColumn, sortedAggColumn); @@ -3167,7 +3167,7 @@ void testTimeRangeWindowingCountUnboundedASCWithNullsFirst() { .timestampDayColumn( X, X, X, 2, 3, 5, X, X, 1, 2, 4, 5, 7) // Timestamp Key .column( 7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6, 8) // Agg Column .build()) { - try (Table sorted = unsorted.orderBy(Table.asc(0), Table.asc(1), Table.asc(2, true)); + try (Table sorted = unsorted.orderBy(OrderByArg.asc(0), OrderByArg.asc(1), OrderByArg.asc(2, true)); ColumnVector expectSortedAggColumn = ColumnVector.fromBoxedInts(7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6, 8)) { ColumnVector sortedAggColumn = sorted.getColumn(3); assertColumnsAreEqual(expectSortedAggColumn, sortedAggColumn); @@ -3239,7 +3239,7 @@ void testTimeRangeWindowingCountUnboundedDESCWithNullsFirst() { .timestampDayColumn( X, X, X, 5, 3, 2, X, X, 7, 5, 4, 2, 1) // Timestamp Key .column( 7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6, 8) // Agg Column .build()) { - try (Table sorted = unsorted.orderBy(Table.asc(0), Table.asc(1), Table.desc(2, false)); + try (Table sorted = unsorted.orderBy(OrderByArg.asc(0), OrderByArg.asc(1), OrderByArg.desc(2, false)); ColumnVector expectSortedAggColumn = ColumnVector.fromBoxedInts(7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6, 8)) { ColumnVector sortedAggColumn = sorted.getColumn(3); assertColumnsAreEqual(expectSortedAggColumn, sortedAggColumn); @@ -3316,7 +3316,7 @@ void testTimeRangeWindowingCountUnboundedASCWithNullsLast() { .timestampDayColumn( 2, 3, 5, X, X, X, 1, 2, 4, 5, 7, X, X) // Timestamp Key .column( 7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6, 8) // Agg Column .build()) { - try (Table sorted = unsorted.orderBy(Table.asc(0), Table.asc(1), Table.asc(2, false)); + try (Table sorted = unsorted.orderBy(OrderByArg.asc(0), OrderByArg.asc(1), OrderByArg.asc(2, false)); ColumnVector expectSortedAggColumn = ColumnVector.fromBoxedInts(7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6, 8)) { ColumnVector sortedAggColumn = sorted.getColumn(3); assertColumnsAreEqual(expectSortedAggColumn, sortedAggColumn); @@ -3387,7 +3387,7 @@ void testTimeRangeWindowingCountUnboundedDESCWithNullsLast() { .timestampDayColumn( 5, 3, 2, X, X, X, 7, 5, 4, 2, 1, X, X) // Timestamp Key .column( 7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6, 8) // Agg Column .build()) { - try (Table sorted = unsorted.orderBy(Table.asc(0), Table.asc(1), Table.desc(2, true)); + try (Table sorted = unsorted.orderBy(OrderByArg.asc(0), OrderByArg.asc(1), OrderByArg.desc(2, true)); ColumnVector expectSortedAggColumn = ColumnVector.fromBoxedInts(7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6, 8)) { ColumnVector sortedAggColumn = sorted.getColumn(3); assertColumnsAreEqual(expectSortedAggColumn, sortedAggColumn); @@ -3463,7 +3463,7 @@ void testGroupByCountWithNulls() { .column( 1, 1, 1, null, 1, 1) .build()) { try (Table tmp = t1.groupBy(0).aggregate(count(1), count(2), count(3)); - Table t3 = tmp.orderBy(Table.asc(0, true)); + Table t3 = tmp.orderBy(OrderByArg.asc(0, true)); HostColumnVector groupCol = t3.getColumn(0).copyToHost(); HostColumnVector countCol = t3.getColumn(1).copyToHost(); HostColumnVector nullCountCol = t3.getColumn(2).copyToHost(); @@ -3500,7 +3500,7 @@ void testGroupByCountWithNullsIncluded() { .column( 1, 1, 1, null, 1, 1) .build()) { try (Table tmp = t1.groupBy(0).aggregate(count(1, true), count(2, true), count(3, true), count(3)); - Table t3 = tmp.orderBy(Table.asc(0, true)); + Table t3 = tmp.orderBy(OrderByArg.asc(0, true)); HostColumnVector groupCol = t3.getColumn(0).copyToHost(); HostColumnVector countCol = t3.getColumn(1).copyToHost(); HostColumnVector nullCountCol = t3.getColumn(2).copyToHost(); @@ -3547,7 +3547,7 @@ void testGroupByCountWithCollapsingNulls() { .build(); try (Table tmp = t1.groupBy(options, 0).aggregate(count(1), count(2), count(3)); - Table t3 = tmp.orderBy(Table.asc(0, true)); + Table t3 = tmp.orderBy(OrderByArg.asc(0, true)); HostColumnVector groupCol = t3.getColumn(0).copyToHost(); HostColumnVector countCol = t3.getColumn(1).copyToHost(); HostColumnVector nullCountCol = t3.getColumn(2).copyToHost(); @@ -3615,7 +3615,7 @@ void testGroupByArgMax() { try (Table t3 = t1.groupBy(0, 1) .aggregate(Aggregation.argMax().onColumn(2)); Table sorted = t3 - .orderBy(Table.asc(0), Table.asc(1), Table.asc(2)); + .orderBy(OrderByArg.asc(0), OrderByArg.asc(1), OrderByArg.asc(2)); Table expected = new Table.TestBuilder() .column(1, 1, 1, 1) .column(0, 1, 2, 3) @@ -3637,7 +3637,7 @@ void testGroupByArgMin() { try (Table t3 = t1.groupBy(0, 1) .aggregate(Aggregation.argMin().onColumn(2)); Table sorted = t3 - .orderBy(Table.asc(0), Table.asc(1), Table.asc(2)); + .orderBy(OrderByArg.asc(0), OrderByArg.asc(1), OrderByArg.asc(2)); Table expected = new Table.TestBuilder() .column(1, 1, 1, 1) .column(0, 1, 2, 3) @@ -3654,7 +3654,7 @@ void testGroupByMinBool() { .column(true, null, false, true, null, null) .column( 1, 1, 2, 2, 3, 3).build(); Table other = t1.groupBy(1).aggregate(min(0)); - Table ordered = other.orderBy(Table.asc(0)); + Table ordered = other.orderBy(OrderByArg.asc(0)); Table expected = new Table.TestBuilder() .column(1, 2, 3) .column (true, false, null) @@ -3669,7 +3669,7 @@ void testGroupByMaxBool() { .column(false, null, false, true, null, null) .column( 1, 1, 2, 2, 3, 3).build(); Table other = t1.groupBy(1).aggregate(max(0)); - Table ordered = other.orderBy(Table.asc(0)); + Table ordered = other.orderBy(OrderByArg.asc(0)); Table expected = new Table.TestBuilder() .column(1, 2, 3) .column (false, true, null) @@ -3695,7 +3695,7 @@ void testGroupByDuplicateAggregates() { .column( 1, 2, 2, 1).build()) { try (Table t3 = t1.groupBy(0, 1) .aggregate(max(2), min(2), min(2), max(2), min(2), count(1)); - Table t4 = t3.orderBy(Table.asc(2))) { + Table t4 = t3.orderBy(OrderByArg.asc(2))) { // verify t4 assertEquals(4, t4.getRowCount()); assertTablesAreEqual(t4, expected); diff --git a/python/cudf/cudf/_lib/aggregation.pyx b/python/cudf/cudf/_lib/aggregation.pyx index 840f0c98987..7138bb49743 100644 --- a/python/cudf/cudf/_lib/aggregation.pyx +++ b/python/cudf/cudf/_lib/aggregation.pyx @@ -41,7 +41,7 @@ class AggregationKind(Enum): ALL = libcudf_aggregation.aggregation.Kind.ALL SUM_OF_SQUARES = libcudf_aggregation.aggregation.Kind.SUM_OF_SQUARES MEAN = libcudf_aggregation.aggregation.Kind.MEAN - VARIANCE = libcudf_aggregation.aggregation.Kind.VARIANCE + VAR = libcudf_aggregation.aggregation.Kind.VARIANCE STD = libcudf_aggregation.aggregation.Kind.STD MEDIAN = libcudf_aggregation.aggregation.Kind.MEDIAN QUANTILE = libcudf_aggregation.aggregation.Kind.QUANTILE @@ -50,13 +50,12 @@ class AggregationKind(Enum): NUNIQUE = libcudf_aggregation.aggregation.Kind.NUNIQUE NTH = libcudf_aggregation.aggregation.Kind.NTH_ELEMENT COLLECT = libcudf_aggregation.aggregation.Kind.COLLECT - COLLECT_SET = libcudf_aggregation.aggregation.Kind.COLLECT_SET + UNIQUE = libcudf_aggregation.aggregation.Kind.COLLECT_SET PTX = libcudf_aggregation.aggregation.Kind.PTX CUDA = libcudf_aggregation.aggregation.Kind.CUDA cdef class Aggregation: - def __init__(self, op, **kwargs): self.c_obj = move(make_aggregation(op, kwargs)) @@ -246,7 +245,7 @@ cdef class _AggregationFactory: return agg @classmethod - def collect_set(cls): + def unique(cls): cdef Aggregation agg = Aggregation.__new__(Aggregation) agg.c_obj = move(libcudf_aggregation.make_collect_set_aggregation()) return agg diff --git a/python/cudf/cudf/_lib/cpp/concatenate.pxd b/python/cudf/cudf/_lib/cpp/concatenate.pxd index b5ec3bcb7d4..c776d23aa85 100644 --- a/python/cudf/cudf/_lib/cpp/concatenate.pxd +++ b/python/cudf/cudf/_lib/cpp/concatenate.pxd @@ -5,12 +5,22 @@ from libcpp.vector cimport vector from cudf._lib.cpp.column.column cimport column, column_view from cudf._lib.cpp.table.table cimport table, table_view -from rmm._lib.device_buffer cimport device_buffer +from cudf._lib.cpp.utilities.host_span cimport host_span +from rmm._lib.device_buffer cimport device_buffer cdef extern from "cudf/concatenate.hpp" namespace "cudf" nogil: + # The versions of concatenate taking vectors don't exist in libcudf + # C++, but passing a vector works because a host_span is implicitly + # constructable from a vector. In case they are needed in the future, + # host_span versions can be added, e.g: + # + # cdef device_buffer concatenate_masks "cudf::concatenate_masks"( + # host_span[column_view] views + # ) except + + cdef device_buffer concatenate_masks "cudf::concatenate_masks"( - const vector[column_view] columns + const vector[column_view] views ) except + cdef unique_ptr[column] concatenate_columns "cudf::concatenate"( const vector[column_view] columns diff --git a/python/cudf/cudf/_lib/cpp/labeling.pxd b/python/cudf/cudf/_lib/cpp/labeling.pxd new file mode 100644 index 00000000000..996ae4f9e38 --- /dev/null +++ b/python/cudf/cudf/_lib/cpp/labeling.pxd @@ -0,0 +1,19 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. + +from libcpp.memory cimport unique_ptr + +from cudf._lib.cpp.column.column cimport column +from cudf._lib.cpp.column.column_view cimport column_view + +cdef extern from "cudf/labeling/label_bins.hpp" namespace "cudf" nogil: + ctypedef enum inclusive: + YES "cudf::inclusive::YES" + NO "cudf::inclusive::NO" + + cdef unique_ptr[column] label_bins ( + const column_view &input, + const column_view &left_edges, + inclusive left_inclusive, + const column_view &right_edges, + inclusive right_inclusive + ) except + diff --git a/python/cudf/cudf/_lib/cpp/lists/contains.pxd b/python/cudf/cudf/_lib/cpp/lists/contains.pxd new file mode 100644 index 00000000000..ec2f61d08fa --- /dev/null +++ b/python/cudf/cudf/_lib/cpp/lists/contains.pxd @@ -0,0 +1,15 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. + +from libcpp.memory cimport unique_ptr +from cudf._lib.cpp.scalar.scalar cimport scalar + +from cudf._lib.cpp.column.column cimport column +from cudf._lib.cpp.lists.lists_column_view cimport lists_column_view + +from cudf._lib.cpp.column.column_view cimport column_view + +cdef extern from "cudf/lists/contains.hpp" namespace "cudf::lists" nogil: + cdef unique_ptr[column] contains( + lists_column_view lists, + scalar search_key, + ) except + diff --git a/python/cudf/cudf/_lib/cpp/utilities/host_span.pxd b/python/cudf/cudf/_lib/cpp/utilities/host_span.pxd new file mode 100644 index 00000000000..cbbe3710347 --- /dev/null +++ b/python/cudf/cudf/_lib/cpp/utilities/host_span.pxd @@ -0,0 +1,8 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. + +from libcpp.vector cimport vector + +cdef extern from "cudf/utilities/span.hpp" namespace "cudf" nogil: + cdef cppclass host_span[T]: + host_span() except + + host_span(vector[T]) except + diff --git a/python/cudf/cudf/_lib/groupby.pyx b/python/cudf/cudf/_lib/groupby.pyx index 0f5cdc73d3b..713a2274a77 100644 --- a/python/cudf/cudf/_lib/groupby.pyx +++ b/python/cudf/cudf/_lib/groupby.pyx @@ -35,13 +35,15 @@ _GROUPBY_AGGS = { "median", "nunique", "nth", - "collect" + "collect", + "unique", } _CATEGORICAL_AGGS = { "count", "size", "nunique", + "unique", } _STRING_AGGS = { @@ -51,13 +53,15 @@ _STRING_AGGS = { "min", "nunique", "nth", - "collect" + "collect", + "unique", } _LIST_AGGS = { - "collect" + "collect", } + cdef class GroupBy: cdef unique_ptr[libcudf_groupby.groupby] c_obj cdef dict __dict__ @@ -145,12 +149,23 @@ cdef class GroupBy: vector[libcudf_groupby.aggregation_result] ] c_result - with nogil: - c_result = move( - self.c_obj.get()[0].aggregate( - c_agg_requests + try: + with nogil: + c_result = move( + self.c_obj.get()[0].aggregate( + c_agg_requests + ) ) - ) + except RuntimeError as e: + # TODO: remove this try..except after + # https://github.com/rapidsai/cudf/issues/7611 + # is resolved + if ("make_empty_column") in str(e): + raise NotImplementedError( + "Aggregation not supported for empty columns" + ) from e + else: + raise grouped_keys = Table.from_unique_ptr( move(c_result.first), diff --git a/python/cudf/cudf/_lib/labeling.pyx b/python/cudf/cudf/_lib/labeling.pyx new file mode 100644 index 00000000000..1b553024347 --- /dev/null +++ b/python/cudf/cudf/_lib/labeling.pyx @@ -0,0 +1,47 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. + +import numpy as np +from enum import IntEnum + +from libc.stdint cimport uint32_t +from libcpp cimport bool as cbool +from libcpp.memory cimport unique_ptr +from libcpp.utility cimport move + +from cudf._lib.column cimport Column +from cudf._lib.replace import replace_nulls + +from cudf._lib.cpp.labeling cimport inclusive +from cudf._lib.cpp.labeling cimport label_bins as cpp_label_bins +from cudf._lib.cpp.column.column cimport column +from cudf._lib.cpp.column.column_view cimport column_view + + +# Note that the parameter input shadows a Python built-in in the local scope, +# but I'm not too concerned about that since there's no use-case for actual +# input in this context. +def label_bins(Column input, Column left_edges, cbool left_inclusive, + Column right_edges, cbool right_inclusive): + cdef inclusive c_left_inclusive = \ + inclusive.YES if left_inclusive else inclusive.NO + cdef inclusive c_right_inclusive = \ + inclusive.YES if right_inclusive else inclusive.NO + + cdef column_view input_view = input.view() + cdef column_view left_edges_view = left_edges.view() + cdef column_view right_edges_view = right_edges.view() + + cdef unique_ptr[column] c_result + + with nogil: + c_result = move( + cpp_label_bins( + input_view, + left_edges_view, + c_left_inclusive, + right_edges_view, + c_right_inclusive, + ) + ) + + return Column.from_unique_ptr(move(c_result)) diff --git a/python/cudf/cudf/_lib/lists.pyx b/python/cudf/cudf/_lib/lists.pyx index 2971aad8313..7f745e58c67 100644 --- a/python/cudf/cudf/_lib/lists.pyx +++ b/python/cudf/cudf/_lib/lists.pyx @@ -17,6 +17,9 @@ from cudf._lib.cpp.lists.lists_column_view cimport lists_column_view from cudf._lib.cpp.column.column_view cimport column_view from cudf._lib.cpp.column.column cimport column +from cudf._lib.scalar cimport DeviceScalar +from cudf._lib.cpp.scalar.scalar cimport scalar + from cudf._lib.cpp.table.table cimport table from cudf._lib.cpp.table.table_view cimport table_view from cudf._lib.cpp.types cimport size_type, order, null_order @@ -29,6 +32,8 @@ from cudf._lib.types cimport ( ) from cudf.core.dtypes import ListDtype +from cudf._lib.cpp.lists.contains cimport contains + from cudf._lib.cpp.lists.extract cimport extract_list_element @@ -93,6 +98,7 @@ def extract_element(Column col, size_type index): cdef shared_ptr[lists_column_view] list_view = ( make_shared[lists_column_view](col.view()) ) + cdef unique_ptr[column] c_result with nogil: @@ -100,3 +106,21 @@ def extract_element(Column col, size_type index): result = Column.from_unique_ptr(move(c_result)) return result + + +def contains_scalar(Column col, DeviceScalar search_key): + cdef shared_ptr[lists_column_view] list_view = ( + make_shared[lists_column_view](col.view()) + ) + cdef const scalar* search_key_value = search_key.get_raw_ptr() + + cdef unique_ptr[column] c_result + + with nogil: + c_result = move(contains( + list_view.get()[0], + search_key_value[0], + )) + + result = Column.from_unique_ptr(move(c_result)) + return result diff --git a/python/cudf/cudf/_lib/parquet.pyx b/python/cudf/cudf/_lib/parquet.pyx index 0158df46cc4..d8b4fbbbe4b 100644 --- a/python/cudf/cudf/_lib/parquet.pyx +++ b/python/cudf/cudf/_lib/parquet.pyx @@ -294,7 +294,9 @@ cpdef write_parquet( cdef unique_ptr[cudf_io_types.data_sink] _data_sink cdef cudf_io_types.sink_info sink = make_sink_info(path, _data_sink) - if index is not False and not isinstance(table._index, cudf.RangeIndex): + if index is True or ( + index is None and not isinstance(table._index, cudf.RangeIndex) + ): tv = table.view() tbl_meta = make_unique[table_input_metadata](tv) for level, idx_name in enumerate(table._index.names): diff --git a/python/cudf/cudf/_lib/utils.pyx b/python/cudf/cudf/_lib/utils.pyx index 4fe795e57a9..13eedb34c18 100644 --- a/python/cudf/cudf/_lib/utils.pyx +++ b/python/cudf/cudf/_lib/utils.pyx @@ -99,15 +99,31 @@ cpdef generate_pandas_metadata(Table table, index): idx = table.index if isinstance(idx, cudf.core.index.RangeIndex): - descr = { - "kind": "range", - "name": table.index.name, - "start": table.index.start, - "stop": table.index.stop, - "step": table.index.step, - } + if index is None: + descr = { + "kind": "range", + "name": table.index.name, + "start": table.index.start, + "stop": table.index.stop, + "step": table.index.step, + } + else: + # When `index=True`, RangeIndex needs to be materialized. + materialized_idx = cudf.Index(idx._values, name=idx.name) + descr = \ + _index_level_name( + index_name=materialized_idx.name, + level=level, + column_names=col_names + ) + index_levels.append(materialized_idx) else: - descr = _index_level_name(idx.name, level, col_names) + descr = \ + _index_level_name( + index_name=idx.name, + level=level, + column_names=col_names + ) if is_categorical_dtype(idx): raise ValueError( "'category' column dtypes are currently not " diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py index a563248f4ab..0bacbe04356 100644 --- a/python/cudf/cudf/core/column/datetime.py +++ b/python/cudf/cudf/core/column/datetime.py @@ -178,6 +178,8 @@ def normalize_binop_value(self, other: DatetimeLikeScalar) -> ScalarLike: return cudf.Scalar(None, dtype=other.dtype) return cudf.Scalar(other) + elif other is None: + return cudf.Scalar(other, dtype=self.dtype) else: raise TypeError(f"cannot normalize {type(other)}") diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py index 7fbe602f07a..4ba675516ae 100644 --- a/python/cudf/cudf/core/column/decimal.py +++ b/python/cudf/cudf/core/column/decimal.py @@ -72,6 +72,9 @@ def binary_operator(self, op, other, reflect=False): result.dtype.precision = _binop_precision(self.dtype, other.dtype, op) return result + def _apply_scan_op(self, op: str) -> ColumnBase: + return libcudf.reduce.scan(op, self, True) + def as_decimal_column( self, dtype: Dtype, **kwargs ) -> "cudf.core.column.DecimalColumn": diff --git a/python/cudf/cudf/core/column/lists.py b/python/cudf/cudf/core/column/lists.py index 2204fbdea1f..b7f34e8c007 100644 --- a/python/cudf/cudf/core/column/lists.py +++ b/python/cudf/cudf/core/column/lists.py @@ -7,7 +7,12 @@ import cudf from cudf._lib.copying import segmented_gather -from cudf._lib.lists import count_elements, extract_element, sort_lists +from cudf._lib.lists import ( + contains_scalar, + count_elements, + extract_element, + sort_lists, +) from cudf.core.buffer import Buffer from cudf.core.column import ColumnBase, as_column, column from cudf.core.column.methods import ColumnMethodsMixin @@ -210,6 +215,44 @@ def get(self, index): else: raise IndexError("list index out of range") + def contains(self, search_key): + """ + Creates a column of bool values indicating whether the specified scalar + is an element of each row of a list column. + + Parameters + ---------- + search_key : scalar + element being searched for in each row of the list column + + Returns + ------- + Column + + Examples + -------- + >>> s = cudf.Series([[1, 2, 3], [3, 4, 5], [4, 5, 6]]) + >>> s.list.contains(4) + Series([False, True, True]) + dtype: bool + """ + try: + res = self._return_or_inplace( + contains_scalar(self._column, search_key.device_value) + ) + except RuntimeError as e: + if ( + "Type/Scale of search key does not" + "match list column element type" in str(e) + ): + raise TypeError( + "Type/Scale of search key does not" + "match list column element type" + ) from e + raise + else: + return res + @property def leaves(self): """ diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py index e22b511db01..a39638106bb 100644 --- a/python/cudf/cudf/core/column/timedelta.py +++ b/python/cudf/cudf/core/column/timedelta.py @@ -275,6 +275,8 @@ def normalize_binop_value(self, other) -> BinaryOperand: return cudf.Scalar(other) elif np.isscalar(other): return cudf.Scalar(other) + elif other is None: + return cudf.Scalar(other, dtype=self.dtype) else: raise TypeError(f"cannot normalize {type(other)}") diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index bd009a9ad84..b5f57356698 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -3841,10 +3841,32 @@ def argsort(self, ascending=True, na_position="last"): - Support axis='index' only. - Not supporting: inplace, kind - Ascending can be a list of bools to control per column + + Examples + -------- + >>> import cudf + >>> df = cudf.DataFrame({'a':[10, 0, 2], 'b':[-10, 10, 1]}) + >>> df + a b + 0 10 -10 + 1 0 10 + 2 2 1 + >>> inds = df.argsort() + >>> inds + 0 1 + 1 2 + 2 0 + dtype: int32 + >>> df.take(inds) + a b + 1 0 10 + 2 2 1 + 0 10 -10 """ - return self._get_sorted_inds( + inds_col = self._get_sorted_inds( ascending=ascending, na_position=na_position ) + return cudf.Series(inds_col) @annotate("SORT_INDEX", color="red", domain="cudf_python") def sort_index( diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 9d4643da637..a664c4fb182 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -4725,8 +4725,9 @@ def cumsum(self, axis=0, skipna=True, *args, **kwargs): result_col[first_index:] = None # pandas always returns int64 dtype if original dtype is int or `bool` - if np.issubdtype(result_col.dtype, np.integer) or np.issubdtype( - result_col.dtype, np.bool_ + if not is_decimal_dtype(result_col.dtype) and ( + np.issubdtype(result_col.dtype, np.integer) + or np.issubdtype(result_col.dtype, np.bool_) ): return Series( result_col.astype(np.int64)._apply_scan_op("sum"), @@ -4774,6 +4775,11 @@ def cumprod(self, axis=0, skipna=True, *args, **kwargs): if axis not in (None, 0): raise NotImplementedError("axis parameter is not implemented yet") + if is_decimal_dtype(self.dtype): + raise NotImplementedError( + "cumprod does not currently support decimal types" + ) + skipna = True if skipna is None else skipna if skipna: diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py index 18f2d7e474b..eb8aaaadd51 100644 --- a/python/cudf/cudf/tests/test_binops.py +++ b/python/cudf/cudf/tests/test_binops.py @@ -1773,6 +1773,51 @@ def decimal_series(input, dtype): utils.assert_eq(expect, got) +@pytest.mark.parametrize( + "dtype", + [ + "uint8", + "uint16", + "uint32", + "uint64", + "int8", + "int16", + "int32", + "int64", + "float32", + "float64", + "str", + "datetime64[ns]", + "datetime64[us]", + "datetime64[ms]", + "datetime64[s]", + "timedelta64[ns]", + "timedelta64[us]", + "timedelta64[ms]", + "timedelta64[s]", + ], +) +@pytest.mark.parametrize("null_scalar", [None, cudf.NA, np.datetime64("NaT")]) +@pytest.mark.parametrize("cmpop", _cmpops) +def test_column_null_scalar_comparison(dtype, null_scalar, cmpop): + # This test is meant to validate that comparing + # a series of any dtype with a null scalar produces + # a new series where all the elements are . + + if isinstance(null_scalar, np.datetime64): + if np.dtype(dtype).kind not in "mM": + pytest.skip() + null_scalar = null_scalar.astype(dtype) + + dtype = np.dtype(dtype) + + data = [1, 2, 3, 4, 5] + sr = cudf.Series(data, dtype=dtype) + result = cmpop(sr, null_scalar) + + assert result.isnull().all() + + @pytest.mark.parametrize("fn", ["eq", "ne", "lt", "gt", "le", "ge"]) def test_equality_ops_index_mismatch(fn): a = cudf.Series( diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index 76a02d5e74a..d72b88f1713 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -8495,3 +8495,24 @@ def test_explode(data, labels, ignore_index, p_index, label_to_explode): got = gdf.explode(label_to_explode, ignore_index) assert_eq(expect, got, check_dtype=False) + + +@pytest.mark.parametrize( + "df,ascending,expected", + [ + ( + cudf.DataFrame({"a": [10, 0, 2], "b": [-10, 10, 1]}), + True, + cudf.Series([1, 2, 0], dtype="int32"), + ), + ( + cudf.DataFrame({"a": [10, 0, 2], "b": [-10, 10, 1]}), + False, + cudf.Series([0, 2, 1], dtype="int32"), + ), + ], +) +def test_dataframe_argsort(df, ascending, expected): + actual = df.argsort(ascending=ascending) + + assert_eq(actual, expected) diff --git a/python/cudf/cudf/tests/test_groupby.py b/python/cudf/cudf/tests/test_groupby.py index 8011510d340..a96db59dee3 100644 --- a/python/cudf/cudf/tests/test_groupby.py +++ b/python/cudf/cudf/tests/test_groupby.py @@ -12,7 +12,13 @@ import cudf from cudf.core import DataFrame, Series from cudf.core._compat import PANDAS_GE_110 -from cudf.tests.utils import assert_eq, assert_exceptions_equal +from cudf.tests.utils import ( + DATETIME_TYPES, + SIGNED_TYPES, + TIMEDELTA_TYPES, + assert_eq, + assert_exceptions_equal, +) _now = np.datetime64("now") _tomorrow = _now + np.timedelta64(1, "D") @@ -1532,3 +1538,26 @@ def test_groupby_nonempty_no_keys(pdf): lambda: gdf.groupby([]), compare_error_message=False, ) + + +@pytest.mark.parametrize( + "by,data", + [ + # ([], []), # error? + ([1, 1, 2, 2], [0, 0, 1, 1]), + ([1, 2, 3, 4], [0, 0, 0, 0]), + ([1, 2, 1, 2], [0, 1, 1, 1]), + ], +) +@pytest.mark.parametrize( + "dtype", + SIGNED_TYPES + DATETIME_TYPES + TIMEDELTA_TYPES + ["string", "category"], +) +def test_groupby_unique(by, data, dtype): + pdf = pd.DataFrame({"by": by, "data": data}) + pdf["data"] = pdf["data"].astype(dtype) + gdf = cudf.from_pandas(pdf) + + expect = pdf.groupby("by")["data"].unique() + got = gdf.groupby("by")["data"].unique() + assert_eq(expect, got) diff --git a/python/cudf/cudf/tests/test_list.py b/python/cudf/cudf/tests/test_list.py index 2ab1382b34e..5645ce60596 100644 --- a/python/cudf/cudf/tests/test_list.py +++ b/python/cudf/cudf/tests/test_list.py @@ -246,3 +246,38 @@ def test_get_nulls(): with pytest.raises(IndexError, match="list index out of range"): sr = cudf.Series([[], [], []]) sr.list.get(100) + + +@pytest.mark.parametrize( + "data, scalar, expect", + [ + ([[1, 2, 3], []], 1, [True, False],), + ([[1, 2, 3], [], [3, 4, 5]], 6, [False, False, False],), + ([[1.0, 2.0, 3.0], None, []], 2.0, [True, None, False],), + ([[None, "b", "c"], [], ["b", "e", "f"]], "b", [True, False, True],), + ([[None, 2, 3], None, []], 1, [None, None, False]), + ([[None, "b", "c"], [], ["b", "e", "f"]], "d", [None, False, False],), + ], +) +def test_contains_scalar(data, scalar, expect): + sr = cudf.Series(data) + expect = cudf.Series(expect) + got = sr.list.contains(cudf.Scalar(scalar, sr.dtype.element_type)) + assert_eq(expect, got) + + +@pytest.mark.parametrize( + "data, expect", + [ + ([[1, 2, 3], []], [None, None],), + ([[1.0, 2.0, 3.0], None, []], [None, None, None],), + ([[None, 2, 3], [], None], [None, None, None],), + ([[1, 2, 3], [3, 4, 5]], [None, None],), + ([[], [], []], [None, None, None],), + ], +) +def test_contains_null_search_key(data, expect): + sr = cudf.Series(data) + expect = cudf.Series(expect, dtype="bool") + got = sr.list.contains(cudf.Scalar(cudf.NA, sr.dtype.element_type)) + assert_eq(expect, got) diff --git a/python/cudf/cudf/tests/test_parquet.py b/python/cudf/cudf/tests/test_parquet.py index a7a11c95e30..fe418d1ade1 100644 --- a/python/cudf/cudf/tests/test_parquet.py +++ b/python/cudf/cudf/tests/test_parquet.py @@ -1,4 +1,5 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. + import datetime import math import os @@ -1718,24 +1719,24 @@ def test_parquet_nullable_boolean(tmpdir, engine): ], ) @pytest.mark.parametrize("index", [None, True, False]) -def test_parquet_index(tmpdir, pdf, index): - pandas_path = tmpdir.join("pandas_index.parquet") - cudf_path = tmpdir.join("pandas_index.parquet") +def test_parquet_index(pdf, index): + pandas_buffer = BytesIO() + cudf_buffer = BytesIO() gdf = cudf.from_pandas(pdf) - pdf.to_parquet(pandas_path, index=index) - gdf.to_parquet(cudf_path, index=index) + pdf.to_parquet(pandas_buffer, index=index) + gdf.to_parquet(cudf_buffer, index=index) - expected = pd.read_parquet(cudf_path) - actual = cudf.read_parquet(cudf_path) + expected = pd.read_parquet(cudf_buffer) + actual = cudf.read_parquet(pandas_buffer) - assert_eq(expected, actual) + assert_eq(expected, actual, check_index_type=True) - expected = pd.read_parquet(pandas_path) - actual = cudf.read_parquet(pandas_path) + expected = pd.read_parquet(pandas_buffer) + actual = cudf.read_parquet(cudf_buffer) - assert_eq(expected, actual) + assert_eq(expected, actual, check_index_type=True) @pytest.mark.parametrize("engine", ["cudf", "pyarrow"]) diff --git a/python/cudf/cudf/tests/test_scan.py b/python/cudf/cudf/tests/test_scan.py index dce65947460..f7e8c5a8563 100644 --- a/python/cudf/cudf/tests/test_scan.py +++ b/python/cudf/cudf/tests/test_scan.py @@ -6,6 +6,7 @@ import cudf from cudf.tests.utils import INTEGER_TYPES, NUMERIC_TYPES, assert_eq, gen_rand +from cudf.core.dtypes import Decimal64Dtype params_sizes = [0, 1, 2, 5] @@ -61,6 +62,21 @@ def test_cumsum_masked(): assert_eq(got, expected) +@pytest.mark.parametrize( + "dtype", + [Decimal64Dtype(8, 4), Decimal64Dtype(10, 5), Decimal64Dtype(12, 7)], +) +def test_cumsum_decimal(dtype): + data = ["243.32", "48.245", "-7234.298", np.nan, "-467.2"] + gser = cudf.Series(data).astype(dtype) + pser = pd.Series(data, dtype="float64") + + got = gser.cumsum() + expected = cudf.Series.from_pandas(pser.cumsum()).astype(dtype) + + assert_eq(got, expected) + + @pytest.mark.parametrize("dtype,nelem", list(_gen_params())) def test_cummin(dtype, nelem): if dtype == np.int8: @@ -103,6 +119,21 @@ def test_cummin_masked(): assert_eq(gs.cummin(), expected) +@pytest.mark.parametrize( + "dtype", + [Decimal64Dtype(8, 4), Decimal64Dtype(11, 6), Decimal64Dtype(14, 7)], +) +def test_cummin_decimal(dtype): + data = ["8394.294", np.nan, "-9940.444", np.nan, "-23.928"] + gser = cudf.Series(data).astype(dtype) + pser = pd.Series(data, dtype="float64") + + got = gser.cummin() + expected = cudf.Series.from_pandas(pser.cummin()).astype(dtype) + + assert_eq(got, expected) + + @pytest.mark.parametrize("dtype,nelem", list(_gen_params())) def test_cummax(dtype, nelem): if dtype == np.int8: @@ -145,6 +176,21 @@ def test_cummax_masked(): assert_eq(gs.cummax(), expected) +@pytest.mark.parametrize( + "dtype", + [Decimal64Dtype(8, 4), Decimal64Dtype(11, 6), Decimal64Dtype(14, 7)], +) +def test_cummax_decimal(dtype): + data = [np.nan, "54.203", "8.222", "644.32", "-562.272"] + gser = cudf.Series(data).astype(dtype) + pser = pd.Series(data, dtype="float64") + + got = gser.cummax() + expected = cudf.Series.from_pandas(pser.cummax()).astype(dtype) + + assert_eq(got, expected) + + @pytest.mark.parametrize("dtype,nelem", list(_gen_params())) def test_cumprod(dtype, nelem): if dtype == np.int8: diff --git a/python/cudf/cudf/utils/dtypes.py b/python/cudf/cudf/utils/dtypes.py index 8875a36dba8..8af225ecb58 100644 --- a/python/cudf/cudf/utils/dtypes.py +++ b/python/cudf/cudf/utils/dtypes.py @@ -154,7 +154,15 @@ def is_numerical_dtype(obj): def is_string_dtype(obj): - return pd.api.types.is_string_dtype(obj) and not is_categorical_dtype(obj) + return ( + pd.api.types.is_string_dtype(obj) + # Reject all cudf extension types. + and not is_categorical_dtype(obj) + and not is_decimal_dtype(obj) + and not is_list_dtype(obj) + and not is_struct_dtype(obj) + and not is_interval_dtype(obj) + ) def is_datetime_dtype(obj): diff --git a/python/cudf/cudf/utils/ioutils.py b/python/cudf/cudf/utils/ioutils.py index 5d52d6c7da4..16511627aa2 100644 --- a/python/cudf/cudf/utils/ioutils.py +++ b/python/cudf/cudf/utils/ioutils.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. import datetime import os @@ -193,7 +193,10 @@ index : bool, default None If ``True``, include the dataframe's index(es) in the file output. If ``False``, they will not be written to the file. If ``None``, the - engine's default behavior will be used. + engine's default behavior will be used. However, instead of being saved + as values, the ``RangeIndex`` will be stored as a range in the metadata + so it doesn’t require much space and is faster. Other indexes will + be included as columns in the file output. partition_cols : list, optional, default None Column names by which to partition the dataset Columns are partitioned in the order they are given