diff --git a/ci/cpu/upload.sh b/ci/cpu/upload.sh index 88a48ea2e3b..29f6265ec63 100755 --- a/ci/cpu/upload.sh +++ b/ci/cpu/upload.sh @@ -1,6 +1,7 @@ #!/bin/bash # Copyright (c) 2018-2022, NVIDIA CORPORATION. # Adopted from https://github.com/tmcdonell/travis-scripts/blob/dfaac280ac2082cd6bcaba3217428347899f2975/update-accelerate-buildbot.sh +# Copyright (c) 2020-2022, NVIDIA CORPORATION. set -e diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index 5575b69c226..f07d7984cd1 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -1,4 +1,5 @@ #!/bin/bash +# Copyright (c) 2020-2022, NVIDIA CORPORATION. ######################## # cuDF Version Updater # ######################## diff --git a/ci/utils/nbtestlog2junitxml.py b/ci/utils/nbtestlog2junitxml.py index 6a421279112..14384af3225 100644 --- a/ci/utils/nbtestlog2junitxml.py +++ b/ci/utils/nbtestlog2junitxml.py @@ -1,3 +1,4 @@ +# Copyright (c) 2020-2022, NVIDIA CORPORATION. # Generate a junit-xml file from parsing a nbtest log import re diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 9936db5b2fa..d9422edaa8f 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -156,6 +156,21 @@ include(cmake/Modules/JitifyPreprocessKernels.cmake) # find cuFile include(cmake/Modules/FindcuFile.cmake) +# Workaround until https://github.com/rapidsai/rapids-cmake/issues/176 is resolved +if(NOT BUILD_SHARED_LIBS) + include("${rapids-cmake-dir}/export/find_package_file.cmake") + list(APPEND METADATA_KINDS BUILD INSTALL) + foreach(METADATA_KIND IN LISTS METADATA_KINDS) + rapids_export_find_package_file( + ${METADATA_KIND} "${CUDF_SOURCE_DIR}/cmake/Modules/FindcuFile.cmake" cudf-exports + ) + rapids_export_package(${METADATA_KIND} cuco cudf-exports) + rapids_export_package(${METADATA_KIND} ZLIB cudf-exports) + rapids_export_package(${METADATA_KIND} cuFile cudf-exports) + rapids_export_package(${METADATA_KIND} nvcomp cudf-exports) + endforeach() +endif() + # ################################################################################################## # * library targets ------------------------------------------------------------------------------- diff --git a/cpp/benchmarks/column/concatenate.cpp b/cpp/benchmarks/column/concatenate.cpp index 21e5db8ca8f..67ea6129a74 100644 --- a/cpp/benchmarks/column/concatenate.cpp +++ b/cpp/benchmarks/column/concatenate.cpp @@ -45,7 +45,7 @@ static void BM_concatenate(benchmark::State& state) auto input_columns = input->view(); std::vector column_views(input_columns.begin(), input_columns.end()); - CHECK_CUDA(0); + CUDF_CHECK_CUDA(0); for (auto _ : state) { cuda_event_timer raii(state, true, rmm::cuda_stream_default); @@ -87,7 +87,7 @@ static void BM_concatenate_tables(benchmark::State& state) return table->view(); }); - CHECK_CUDA(0); + CUDF_CHECK_CUDA(0); for (auto _ : state) { cuda_event_timer raii(state, true, rmm::cuda_stream_default); @@ -146,7 +146,7 @@ static void BM_concatenate_strings(benchmark::State& state) return static_cast(col); }); - CHECK_CUDA(0); + CUDF_CHECK_CUDA(0); for (auto _ : state) { cuda_event_timer raii(state, true, rmm::cuda_stream_default); diff --git a/cpp/benchmarks/common/generate_input.cu b/cpp/benchmarks/common/generate_input.cu index 3af64b0945a..b6a37453a13 100644 --- a/cpp/benchmarks/common/generate_input.cu +++ b/cpp/benchmarks/common/generate_input.cu @@ -87,6 +87,15 @@ T get_distribution_mean(distribution_params const& dist) } } +/** + * @brief Computes the average element size in a column, given the data profile. + * + * Random distribution parameters like average string length and maximum list nesting level affect + * the element size of non-fixed-width columns. For lists and structs, `avg_element_size` is called + * recursively to determine the size of nested columns. + */ +size_t avg_element_size(data_profile const& profile, cudf::data_type dtype); + // Utilities to determine the mean size of an element, given the data profile template ())> size_t non_fixed_width_size(data_profile const& profile) @@ -112,10 +121,22 @@ size_t non_fixed_width_size(data_profile const& profile) { auto const dist_params = profile.get_distribution_params(); auto const single_level_mean = get_distribution_mean(dist_params.length_params); - auto const element_size = cudf::size_of(cudf::data_type{dist_params.element_type}); + auto const element_size = avg_element_size(profile, cudf::data_type{dist_params.element_type}); return element_size * pow(single_level_mean, dist_params.max_depth); } +template <> +size_t non_fixed_width_size(data_profile const& profile) +{ + auto const dist_params = profile.get_distribution_params(); + return std::accumulate(dist_params.leaf_types.cbegin(), + dist_params.leaf_types.cend(), + 0ul, + [&](auto& sum, auto type_id) { + return sum + avg_element_size(profile, cudf::data_type{type_id}); + }); +} + struct non_fixed_width_size_fn { template size_t operator()(data_profile const& profile) @@ -527,14 +548,6 @@ std::unique_ptr create_random_column(data_prof CUDF_FAIL("not implemented yet"); } -template <> -std::unique_ptr create_random_column(data_profile const& profile, - thrust::minstd_rand& engine, - cudf::size_type num_rows) -{ - CUDF_FAIL("not implemented yet"); -} - /** * @brief Functor to dispatch create_random_column calls. */ @@ -549,6 +562,93 @@ struct create_rand_col_fn { } }; +/** + * @brief Calculates the number of direct parents needed to generate a struct column hierarchy with + * lowest maximum number of children in any nested column. + * + * Used to generate an "evenly distributed" struct column hierarchy with the given number of leaf + * columns and nesting levels. The column tree is considered evenly distributed if all columns have + * nearly the same number of child columns (difference not larger than one). + */ +int num_direct_parents(int num_lvls, int num_leaf_columns) +{ + // Estimated average number of children in the hierarchy; + auto const num_children_avg = std::pow(num_leaf_columns, 1. / num_lvls); + // Minimum number of children columns for any column in the hierarchy + int const num_children_min = std::floor(num_children_avg); + // Maximum number of children columns for any column in the hierarchy + int const num_children_max = num_children_min + 1; + + // Minimum number of columns needed so that their number of children does not exceed the maximum + int const min_for_current_nesting = std::ceil((double)num_leaf_columns / num_children_max); + // Minimum number of columns needed so that columns at the higher levels have at least the minimum + // number of children + int const min_for_upper_nesting = std::pow(num_children_min, num_lvls - 1); + // Both conditions need to be satisfied + return std::max(min_for_current_nesting, min_for_upper_nesting); +} + +template <> +std::unique_ptr create_random_column(data_profile const& profile, + thrust::minstd_rand& engine, + cudf::size_type num_rows) +{ + auto const dist_params = profile.get_distribution_params(); + + // Generate leaf columns + std::vector> children; + children.reserve(dist_params.leaf_types.size()); + std::transform(dist_params.leaf_types.cbegin(), + dist_params.leaf_types.cend(), + std::back_inserter(children), + [&](auto& type_id) { + return cudf::type_dispatcher( + cudf::data_type(type_id), create_rand_col_fn{}, profile, engine, num_rows); + }); + + auto valid_dist = + random_value_fn(distribution_params{1. - profile.get_null_frequency().value_or(0)}); + + // Generate the column bottom-up + for (int lvl = dist_params.max_depth; lvl > 0; --lvl) { + // Generating the next level + std::vector> parents; + parents.resize(num_direct_parents(lvl, children.size())); + + auto current_child = children.begin(); + for (auto current_parent = parents.begin(); current_parent != parents.end(); ++current_parent) { + auto [null_mask, null_count] = [&]() { + if (profile.get_null_frequency().has_value()) { + auto valids = valid_dist(engine, num_rows); + return cudf::detail::valid_if(valids.begin(), valids.end(), thrust::identity{}); + } + return std::pair{}; + }(); + + // Adopt remaining children as evenly as possible + auto const num_to_adopt = cudf::util::div_rounding_up_unsafe( + std::distance(current_child, children.end()), std::distance(current_parent, parents.end())); + CUDF_EXPECTS(num_to_adopt > 0, "No children columns left to adopt"); + + std::vector> children_to_adopt; + children_to_adopt.insert(children_to_adopt.end(), + std::make_move_iterator(current_child), + std::make_move_iterator(current_child + num_to_adopt)); + current_child += children_to_adopt.size(); + + *current_parent = cudf::make_structs_column( + num_rows, std::move(children_to_adopt), null_count, std::move(null_mask)); + } + + if (lvl == 1) { + CUDF_EXPECTS(parents.size() == 1, "There should be one top-level column"); + return std::move(parents.front()); + } + children = std::move(parents); + } + CUDF_FAIL("Reached unreachable code in struct column creation"); +} + template struct clamp_down : public thrust::unary_function { T max; diff --git a/cpp/benchmarks/common/generate_input.hpp b/cpp/benchmarks/common/generate_input.hpp index c955f60f97e..8a4e3783da5 100644 --- a/cpp/benchmarks/common/generate_input.hpp +++ b/cpp/benchmarks/common/generate_input.hpp @@ -171,6 +171,15 @@ struct distribution_params +struct distribution_params>> { + std::vector leaf_types; + cudf::size_type max_depth; +}; + // Present for compilation only. To be implemented once reader/writers support the fixed width type. template struct distribution_params()>> { @@ -214,6 +223,8 @@ class data_profile { distribution_params string_dist_desc{{distribution_id::NORMAL, 0, 32}}; distribution_params list_dist_desc{ cudf::type_id::INT32, {distribution_id::GEOMETRIC, 0, 100}, 2}; + distribution_params struct_dist_desc{ + {cudf::type_id::INT32, cudf::type_id::FLOAT32, cudf::type_id::STRING}, 2}; std::map> decimal_params; double bool_probability = 0.5; @@ -281,6 +292,12 @@ class data_profile { return list_dist_desc; } + template >* = nullptr> + distribution_params get_distribution_params() const + { + return struct_dist_desc; + } + template ()>* = nullptr> distribution_params get_distribution_params() const { @@ -357,8 +374,28 @@ class data_profile { void set_cardinality(cudf::size_type c) { cardinality = c; } void set_avg_run_length(cudf::size_type avg_rl) { avg_run_length = avg_rl; } - void set_list_depth(cudf::size_type max_depth) { list_dist_desc.max_depth = max_depth; } + void set_list_depth(cudf::size_type max_depth) + { + CUDF_EXPECTS(max_depth > 0, "List depth must be positive"); + list_dist_desc.max_depth = max_depth; + } + void set_list_type(cudf::type_id type) { list_dist_desc.element_type = type; } + + void set_struct_depth(cudf::size_type max_depth) + { + CUDF_EXPECTS(max_depth > 0, "Struct depth must be positive"); + struct_dist_desc.max_depth = max_depth; + } + + void set_struct_types(std::vector const& types) + { + CUDF_EXPECTS( + std::none_of( + types.cbegin(), types.cend(), [](auto& type) { return type == cudf::type_id::STRUCT; }), + "Cannot include STRUCT as its own subtype"); + struct_dist_desc.leaf_types = types; + } }; /** diff --git a/cpp/benchmarks/fixture/benchmark_fixture.hpp b/cpp/benchmarks/fixture/benchmark_fixture.hpp index 5f23cbbafdd..e153abee3a3 100644 --- a/cpp/benchmarks/fixture/benchmark_fixture.hpp +++ b/cpp/benchmarks/fixture/benchmark_fixture.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/benchmarks/io/cuio_common.cpp b/cpp/benchmarks/io/cuio_common.cpp index 3743be8bd5a..afe0cc77a4c 100644 --- a/cpp/benchmarks/io/cuio_common.cpp +++ b/cpp/benchmarks/io/cuio_common.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/benchmarks/io/cuio_common.hpp b/cpp/benchmarks/io/cuio_common.hpp index c74ee191d4e..2ed534d5333 100644 --- a/cpp/benchmarks/io/cuio_common.hpp +++ b/cpp/benchmarks/io/cuio_common.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/benchmarks/io/orc/orc_reader.cpp b/cpp/benchmarks/io/orc/orc_reader.cpp index 29d4860a0e5..0fc2238a272 100644 --- a/cpp/benchmarks/io/orc/orc_reader.cpp +++ b/cpp/benchmarks/io/orc/orc_reader.cpp @@ -166,6 +166,7 @@ RD_BENCHMARK_DEFINE_ALL_SOURCES(ORC_RD_BM_INPUTS_DEFINE, decimal, type_group_id: RD_BENCHMARK_DEFINE_ALL_SOURCES(ORC_RD_BM_INPUTS_DEFINE, timestamps, type_group_id::TIMESTAMP); RD_BENCHMARK_DEFINE_ALL_SOURCES(ORC_RD_BM_INPUTS_DEFINE, string, cudf::type_id::STRING); RD_BENCHMARK_DEFINE_ALL_SOURCES(ORC_RD_BM_INPUTS_DEFINE, list, cudf::type_id::LIST); +RD_BENCHMARK_DEFINE_ALL_SOURCES(ORC_RD_BM_INPUTS_DEFINE, struct, cudf::type_id::STRUCT); BENCHMARK_DEFINE_F(OrcRead, column_selection) (::benchmark::State& state) { BM_orc_read_varying_options(state); } diff --git a/cpp/benchmarks/io/orc/orc_writer.cpp b/cpp/benchmarks/io/orc/orc_writer.cpp index e24ca7f749d..525c13af5c0 100644 --- a/cpp/benchmarks/io/orc/orc_writer.cpp +++ b/cpp/benchmarks/io/orc/orc_writer.cpp @@ -116,6 +116,7 @@ WR_BENCHMARK_DEFINE_ALL_SINKS(ORC_WR_BM_INOUTS_DEFINE, decimal, type_group_id::F WR_BENCHMARK_DEFINE_ALL_SINKS(ORC_WR_BM_INOUTS_DEFINE, timestamps, type_group_id::TIMESTAMP); WR_BENCHMARK_DEFINE_ALL_SINKS(ORC_WR_BM_INOUTS_DEFINE, string, cudf::type_id::STRING); WR_BENCHMARK_DEFINE_ALL_SINKS(ORC_WR_BM_INOUTS_DEFINE, list, cudf::type_id::LIST); +WR_BENCHMARK_DEFINE_ALL_SINKS(ORC_WR_BM_INOUTS_DEFINE, struct, cudf::type_id::STRUCT); BENCHMARK_DEFINE_F(OrcWrite, writer_options) (::benchmark::State& state) { BM_orc_write_varying_options(state); } diff --git a/cpp/benchmarks/io/parquet/parquet_reader.cpp b/cpp/benchmarks/io/parquet/parquet_reader.cpp index 74613e50158..8a97fd35c31 100644 --- a/cpp/benchmarks/io/parquet/parquet_reader.cpp +++ b/cpp/benchmarks/io/parquet/parquet_reader.cpp @@ -166,6 +166,7 @@ RD_BENCHMARK_DEFINE_ALL_SOURCES(PARQ_RD_BM_INPUTS_DEFINE, decimal, type_group_id RD_BENCHMARK_DEFINE_ALL_SOURCES(PARQ_RD_BM_INPUTS_DEFINE, timestamps, type_group_id::TIMESTAMP); RD_BENCHMARK_DEFINE_ALL_SOURCES(PARQ_RD_BM_INPUTS_DEFINE, string, cudf::type_id::STRING); RD_BENCHMARK_DEFINE_ALL_SOURCES(PARQ_RD_BM_INPUTS_DEFINE, list, cudf::type_id::LIST); +RD_BENCHMARK_DEFINE_ALL_SOURCES(PARQ_RD_BM_INPUTS_DEFINE, struct, cudf::type_id::STRUCT); BENCHMARK_DEFINE_F(ParquetRead, column_selection) (::benchmark::State& state) { BM_parq_read_varying_options(state); } diff --git a/cpp/benchmarks/io/parquet/parquet_writer.cpp b/cpp/benchmarks/io/parquet/parquet_writer.cpp index d203f0d27c8..d25fae42d0e 100644 --- a/cpp/benchmarks/io/parquet/parquet_writer.cpp +++ b/cpp/benchmarks/io/parquet/parquet_writer.cpp @@ -111,6 +111,7 @@ WR_BENCHMARK_DEFINE_ALL_SINKS(PARQ_WR_BM_INOUTS_DEFINE, decimal, type_group_id:: WR_BENCHMARK_DEFINE_ALL_SINKS(PARQ_WR_BM_INOUTS_DEFINE, timestamps, type_group_id::TIMESTAMP); WR_BENCHMARK_DEFINE_ALL_SINKS(PARQ_WR_BM_INOUTS_DEFINE, string, cudf::type_id::STRING); WR_BENCHMARK_DEFINE_ALL_SINKS(PARQ_WR_BM_INOUTS_DEFINE, list, cudf::type_id::LIST); +WR_BENCHMARK_DEFINE_ALL_SINKS(PARQ_WR_BM_INOUTS_DEFINE, struct, cudf::type_id::STRUCT); BENCHMARK_DEFINE_F(ParquetWrite, writer_options) (::benchmark::State& state) { BM_parq_write_varying_options(state); } diff --git a/cpp/benchmarks/join/conditional_join.cu b/cpp/benchmarks/join/conditional_join.cu index 69fb28d29b2..3c4208bf0fc 100644 --- a/cpp/benchmarks/join/conditional_join.cu +++ b/cpp/benchmarks/join/conditional_join.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/benchmarks/join/generate_input_tables.cuh b/cpp/benchmarks/join/generate_input_tables.cuh index e846317f472..5df77ac4319 100644 --- a/cpp/benchmarks/join/generate_input_tables.cuh +++ b/cpp/benchmarks/join/generate_input_tables.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -139,18 +139,18 @@ void generate_input_tables(key_type* const build_tbl, // Maximize exposed parallelism while minimizing storage for curand state int num_blocks_init_build_tbl{-1}; - CUDA_TRY(cudaOccupancyMaxActiveBlocksPerMultiprocessor( + CUDF_CUDA_TRY(cudaOccupancyMaxActiveBlocksPerMultiprocessor( &num_blocks_init_build_tbl, init_build_tbl, block_size, 0)); int num_blocks_init_probe_tbl{-1}; - CUDA_TRY(cudaOccupancyMaxActiveBlocksPerMultiprocessor( + CUDF_CUDA_TRY(cudaOccupancyMaxActiveBlocksPerMultiprocessor( &num_blocks_init_probe_tbl, init_probe_tbl, block_size, 0)); int dev_id{-1}; - CUDA_TRY(cudaGetDevice(&dev_id)); + CUDF_CUDA_TRY(cudaGetDevice(&dev_id)); int num_sms{-1}; - CUDA_TRY(cudaDeviceGetAttribute(&num_sms, cudaDevAttrMultiProcessorCount, dev_id)); + CUDF_CUDA_TRY(cudaDeviceGetAttribute(&num_sms, cudaDevAttrMultiProcessorCount, dev_id)); const int num_states = num_sms * std::max(num_blocks_init_build_tbl, num_blocks_init_probe_tbl) * block_size; @@ -158,12 +158,12 @@ void generate_input_tables(key_type* const build_tbl, init_curand<<<(num_states - 1) / block_size + 1, block_size>>>(devStates.data(), num_states); - CHECK_CUDA(0); + CUDF_CHECK_CUDA(0); init_build_tbl<<>>( build_tbl, build_tbl_size, multiplicity, devStates.data(), num_states); - CHECK_CUDA(0); + CUDF_CHECK_CUDA(0); auto const rand_max = std::numeric_limits::max(); @@ -177,5 +177,5 @@ void generate_input_tables(key_type* const build_tbl, devStates.data(), num_states); - CHECK_CUDA(0); + CUDF_CHECK_CUDA(0); } diff --git a/cpp/benchmarks/join/join_common.hpp b/cpp/benchmarks/join/join_common.hpp index 27339248968..6ff2543cf7d 100644 --- a/cpp/benchmarks/join/join_common.hpp +++ b/cpp/benchmarks/join/join_common.hpp @@ -116,7 +116,7 @@ static void BM_join(state_type& state, Join JoinFunc) auto build_payload_column = cudf::sequence(build_table_size, *init); auto probe_payload_column = cudf::sequence(probe_table_size, *init); - CHECK_CUDA(0); + CUDF_CHECK_CUDA(0); cudf::table_view build_table({build_key_column->view(), *build_payload_column}); cudf::table_view probe_table({probe_key_column->view(), *probe_payload_column}); diff --git a/cpp/benchmarks/join/left_join.cu b/cpp/benchmarks/join/left_join.cu index e332b70e30a..58a1c2d7f29 100644 --- a/cpp/benchmarks/join/left_join.cu +++ b/cpp/benchmarks/join/left_join.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/benchmarks/lists/copying/scatter_lists.cu b/cpp/benchmarks/lists/copying/scatter_lists.cu index 22e4be9ce9d..7f6d5cc5468 100644 --- a/cpp/benchmarks/lists/copying/scatter_lists.cu +++ b/cpp/benchmarks/lists/copying/scatter_lists.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/benchmarks/null_mask/set_null_mask.cpp b/cpp/benchmarks/null_mask/set_null_mask.cpp index d48d49c205d..2057951ff8d 100644 --- a/cpp/benchmarks/null_mask/set_null_mask.cpp +++ b/cpp/benchmarks/null_mask/set_null_mask.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/benchmarks/string/convert_durations.cpp b/cpp/benchmarks/string/convert_durations.cpp index 5259ac29f99..dc9a1e991b2 100644 --- a/cpp/benchmarks/string/convert_durations.cpp +++ b/cpp/benchmarks/string/convert_durations.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/benchmarks/synchronization/synchronization.cpp b/cpp/benchmarks/synchronization/synchronization.cpp index bd8a4d1de76..bbf90e6f68a 100644 --- a/cpp/benchmarks/synchronization/synchronization.cpp +++ b/cpp/benchmarks/synchronization/synchronization.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,32 +29,32 @@ cuda_event_timer::cuda_event_timer(benchmark::State& state, // flush all of L2$ if (flush_l2_cache) { int current_device = 0; - CUDA_TRY(cudaGetDevice(¤t_device)); + CUDF_CUDA_TRY(cudaGetDevice(¤t_device)); int l2_cache_bytes = 0; - CUDA_TRY(cudaDeviceGetAttribute(&l2_cache_bytes, cudaDevAttrL2CacheSize, current_device)); + CUDF_CUDA_TRY(cudaDeviceGetAttribute(&l2_cache_bytes, cudaDevAttrL2CacheSize, current_device)); if (l2_cache_bytes > 0) { const int memset_value = 0; rmm::device_buffer l2_cache_buffer(l2_cache_bytes, stream); - CUDA_TRY( + CUDF_CUDA_TRY( cudaMemsetAsync(l2_cache_buffer.data(), memset_value, l2_cache_bytes, stream.value())); } } - CUDA_TRY(cudaEventCreate(&start)); - CUDA_TRY(cudaEventCreate(&stop)); - CUDA_TRY(cudaEventRecord(start, stream.value())); + CUDF_CUDA_TRY(cudaEventCreate(&start)); + CUDF_CUDA_TRY(cudaEventCreate(&stop)); + CUDF_CUDA_TRY(cudaEventRecord(start, stream.value())); } cuda_event_timer::~cuda_event_timer() { - CUDA_TRY(cudaEventRecord(stop, stream.value())); - CUDA_TRY(cudaEventSynchronize(stop)); + CUDF_CUDA_TRY(cudaEventRecord(stop, stream.value())); + CUDF_CUDA_TRY(cudaEventSynchronize(stop)); float milliseconds = 0.0f; - CUDA_TRY(cudaEventElapsedTime(&milliseconds, start, stop)); + CUDF_CUDA_TRY(cudaEventElapsedTime(&milliseconds, start, stop)); p_state->SetIterationTime(milliseconds / (1000.0f)); - CUDA_TRY(cudaEventDestroy(start)); - CUDA_TRY(cudaEventDestroy(stop)); + CUDF_CUDA_TRY(cudaEventDestroy(start)); + CUDF_CUDA_TRY(cudaEventDestroy(stop)); } diff --git a/cpp/benchmarks/text/replace.cpp b/cpp/benchmarks/text/replace.cpp index 0a0e6a1667c..3fbb6054d5c 100644 --- a/cpp/benchmarks/text/replace.cpp +++ b/cpp/benchmarks/text/replace.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/benchmarks/text/subword.cpp b/cpp/benchmarks/text/subword.cpp index 2406ddd39ae..150f578a22a 100644 --- a/cpp/benchmarks/text/subword.cpp +++ b/cpp/benchmarks/text/subword.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/benchmarks/type_dispatcher/type_dispatcher.cu b/cpp/benchmarks/type_dispatcher/type_dispatcher.cu index 6ab6f9a2095..aba78dad3fe 100644 --- a/cpp/benchmarks/type_dispatcher/type_dispatcher.cu +++ b/cpp/benchmarks/type_dispatcher/type_dispatcher.cu @@ -196,13 +196,13 @@ void type_dispatcher_benchmark(::benchmark::State& state) rmm::device_uvector d_vec(n_cols, rmm::cuda_stream_default); if (dispatching_type == NO_DISPATCHING) { - CUDA_TRY(cudaMemcpy( + CUDF_CUDA_TRY(cudaMemcpy( d_vec.data(), h_vec_p.data(), sizeof(TypeParam*) * n_cols, cudaMemcpyHostToDevice)); } // Warm up launch_kernel(source_table, d_vec.data(), work_per_thread); - CUDA_TRY(cudaDeviceSynchronize()); + CUDF_CUDA_TRY(cudaDeviceSynchronize()); for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 diff --git a/cpp/cmake/thirdparty/get_cucollections.cmake b/cpp/cmake/thirdparty/get_cucollections.cmake index 5a20f78b798..1639655d1e9 100644 --- a/cpp/cmake/thirdparty/get_cucollections.cmake +++ b/cpp/cmake/thirdparty/get_cucollections.cmake @@ -18,10 +18,13 @@ function(find_and_configure_cucollections) # Find or install cuCollections rapids_cpm_find( # cuCollections doesn't have a version yet - cuco 0.0 + cuco 0.0.1 GLOBAL_TARGETS cuco::cuco + BUILD_EXPORT_SET cudf-exports + INSTALL_EXPORT_SET cudf-exports CPM_ARGS GITHUB_REPOSITORY NVIDIA/cuCollections - GIT_TAG 6ec8b6dcdeceea07ab4456d32461a05c18864411 + GIT_TAG fb58a38701f1c24ecfe07d8f1f208bbe80930da5 + EXCLUDE_FROM_ALL ${BUILD_SHARED_LIBS} OPTIONS "BUILD_TESTS OFF" "BUILD_BENCHMARKS OFF" "BUILD_EXAMPLES OFF" ) diff --git a/cpp/cmake/thirdparty/get_nvcomp.cmake b/cpp/cmake/thirdparty/get_nvcomp.cmake index c1765408d62..0356725548b 100644 --- a/cpp/cmake/thirdparty/get_nvcomp.cmake +++ b/cpp/cmake/thirdparty/get_nvcomp.cmake @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2021, NVIDIA CORPORATION. +# Copyright (c) 2021-2022, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at @@ -13,14 +13,15 @@ # ============================================================================= # This function finds nvcomp and sets any additional necessary environment variables. -function(find_and_configure_nvcomp VERSION) - - # Find or install nvcomp +function(find_and_configure_nvcomp VERSION_MIN VERSION_MAX) + # Search for latest version of nvComp + rapids_find_package(nvcomp ${VERSION_MAX} QUIET) + # If latest isn't found, fall back to building oldest support from source rapids_cpm_find( - nvcomp ${VERSION} + nvcomp ${VERSION_MIN} GLOBAL_TARGETS nvcomp::nvcomp CPM_ARGS GITHUB_REPOSITORY NVIDIA/nvcomp - GIT_TAG c435afaf4ba8a8d12f379d688effcb185886cec1 + GIT_TAG v${VERSION_MIN} OPTIONS "BUILD_STATIC ON" "BUILD_TESTS OFF" "BUILD_BENCHMARKS OFF" "BUILD_EXAMPLES OFF" ) @@ -32,9 +33,8 @@ function(find_and_configure_nvcomp VERSION) if(TARGET nvcomp AND PER_THREAD_DEFAULT_STREAM) target_compile_definitions(nvcomp PRIVATE CUDA_API_PER_THREAD_DEFAULT_STREAM) endif() - endfunction() -set(CUDF_MIN_VERSION_nvCOMP 2.1.0) - -find_and_configure_nvcomp(${CUDF_MIN_VERSION_nvCOMP}) +set(CUDF_MIN_VERSION_nvCOMP 2.2.0) +set(CUDF_MAX_VERSION_nvCOMP 2.3.0) +find_and_configure_nvcomp(${CUDF_MIN_VERSION_nvCOMP} ${CUDF_MAX_VERSION_nvCOMP}) diff --git a/cpp/docs/DEVELOPER_GUIDE.md b/cpp/docs/DEVELOPER_GUIDE.md index eeebe38d873..1599c81cbe5 100644 --- a/cpp/docs/DEVELOPER_GUIDE.md +++ b/cpp/docs/DEVELOPER_GUIDE.md @@ -373,7 +373,7 @@ namespace detail{ void external_function(..., rmm::cuda_stream_view stream){ // Implementation uses the stream with async APIs. rmm::device_buffer buff(...,stream); - CUDA_TRY(cudaMemcpyAsync(...,stream.value())); + CUDF_CUDA_TRY(cudaMemcpyAsync(...,stream.value())); kernel<<<..., stream>>>(...); thrust::algorithm(rmm::exec_policy(stream), ...); } @@ -777,7 +777,7 @@ CUDF_FAIL("This code path should not be reached."); ### CUDA Error Checking -Use the `CUDA_TRY` macro to check for the successful completion of CUDA runtime API functions. This +Use the `CUDF_CUDA_TRY` macro to check for the successful completion of CUDA runtime API functions. This macro throws a `cudf::cuda_error` exception if the CUDA API return value is not `cudaSuccess`. The thrown exception includes a description of the CUDA error code in its `what()` message. diff --git a/cpp/examples/basic/CMakeLists.txt b/cpp/examples/basic/CMakeLists.txt index 0ada2977ead..0871778db81 100644 --- a/cpp/examples/basic/CMakeLists.txt +++ b/cpp/examples/basic/CMakeLists.txt @@ -1,3 +1,5 @@ +# Copyright (c) 2020-2022, NVIDIA CORPORATION. + cmake_minimum_required(VERSION 3.18) project( diff --git a/cpp/include/cudf/detail/copy_if.cuh b/cpp/include/cudf/detail/copy_if.cuh index 0087dd1b173..ecaa4a30cf0 100644 --- a/cpp/include/cudf/detail/copy_if.cuh +++ b/cpp/include/cudf/detail/copy_if.cuh @@ -254,10 +254,10 @@ struct scatter_gather_functor { if (output.nullable()) { // Have to initialize the output mask to all zeros because we may update // it with atomicOr(). - CUDA_TRY(cudaMemsetAsync(static_cast(output.null_mask()), - 0, - cudf::bitmask_allocation_size_bytes(output.size()), - stream.value())); + CUDF_CUDA_TRY(cudaMemsetAsync(static_cast(output.null_mask()), + 0, + cudf::bitmask_allocation_size_bytes(output.size()), + stream.value())); } auto output_device_view = cudf::mutable_column_device_view::create(output, stream); @@ -344,7 +344,7 @@ std::unique_ptr copy_if( // initialize just the first element of block_offsets to 0 since the InclusiveSum below // starts at the second element. - CUDA_TRY(cudaMemsetAsync(block_offsets.begin(), 0, sizeof(cudf::size_type), stream.value())); + CUDF_CUDA_TRY(cudaMemsetAsync(block_offsets.begin(), 0, sizeof(cudf::size_type), stream.value())); // 2. Find the offset for each block's output using a scan of block counts if (grid.num_blocks > 1) { @@ -370,7 +370,7 @@ std::unique_ptr
copy_if( // As it is InclusiveSum, last value in block_offsets will be output_size // unless num_blocks == 1, in which case output_size is just block_counts[0] cudf::size_type output_size{0}; - CUDA_TRY(cudaMemcpyAsync( + CUDF_CUDA_TRY(cudaMemcpyAsync( &output_size, grid.num_blocks > 1 ? block_offsets.begin() + grid.num_blocks : block_counts.begin(), sizeof(cudf::size_type), diff --git a/cpp/include/cudf/detail/copy_range.cuh b/cpp/include/cudf/detail/copy_range.cuh index ac59b429a2c..6703db305a1 100644 --- a/cpp/include/cudf/detail/copy_range.cuh +++ b/cpp/include/cudf/detail/copy_range.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -185,7 +185,7 @@ void copy_range(SourceValueIterator source_value_begin, nullptr); } - CHECK_CUDA(stream.value()); + CUDF_CHECK_CUDA(stream.value()); } /** diff --git a/cpp/include/cudf/detail/get_value.cuh b/cpp/include/cudf/detail/get_value.cuh index 56c0289dc0a..49a406ab5f0 100644 --- a/cpp/include/cudf/detail/get_value.cuh +++ b/cpp/include/cudf/detail/get_value.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -49,11 +49,11 @@ T get_value(column_view const& col_view, size_type element_index, rmm::cuda_stre CUDF_EXPECTS(element_index >= 0 && element_index < col_view.size(), "invalid element_index value"); T result; - CUDA_TRY(cudaMemcpyAsync(&result, - col_view.data() + element_index, - sizeof(T), - cudaMemcpyDeviceToHost, - stream.value())); + CUDF_CUDA_TRY(cudaMemcpyAsync(&result, + col_view.data() + element_index, + sizeof(T), + cudaMemcpyDeviceToHost, + stream.value())); stream.synchronize(); return result; } diff --git a/cpp/include/cudf/detail/null_mask.cuh b/cpp/include/cudf/detail/null_mask.cuh index be010689847..7aec56fdc51 100644 --- a/cpp/include/cudf/detail/null_mask.cuh +++ b/cpp/include/cudf/detail/null_mask.cuh @@ -170,20 +170,20 @@ size_type inplace_bitmask_binop( rmm::device_uvector d_masks(masks.size(), stream, mr); rmm::device_uvector d_begin_bits(masks_begin_bits.size(), stream, mr); - CUDA_TRY(cudaMemcpyAsync( + CUDF_CUDA_TRY(cudaMemcpyAsync( d_masks.data(), masks.data(), masks.size_bytes(), cudaMemcpyHostToDevice, stream.value())); - CUDA_TRY(cudaMemcpyAsync(d_begin_bits.data(), - masks_begin_bits.data(), - masks_begin_bits.size_bytes(), - cudaMemcpyHostToDevice, - stream.value())); + CUDF_CUDA_TRY(cudaMemcpyAsync(d_begin_bits.data(), + masks_begin_bits.data(), + masks_begin_bits.size_bytes(), + cudaMemcpyHostToDevice, + stream.value())); auto constexpr block_size = 256; cudf::detail::grid_1d config(dest_mask.size(), block_size); offset_bitmask_binop <<>>( op, dest_mask, d_masks, d_begin_bits, mask_size_bits, d_counter.data()); - CHECK_CUDA(stream.value()); + CUDF_CHECK_CUDA(stream.value()); return d_counter.value(stream); } @@ -298,27 +298,25 @@ rmm::device_uvector segmented_count_bits(bitmask_type const* bitmask, // Allocate temporary memory. size_t temp_storage_bytes{0}; - CUDA_TRY(cub::DeviceSegmentedReduce::Sum(nullptr, - temp_storage_bytes, - num_set_bits_in_word, - d_bit_counts.begin(), - num_ranges, - first_word_indices, - last_word_indices, - stream.value())); + CUDF_CUDA_TRY(cub::DeviceSegmentedReduce::Sum(nullptr, + temp_storage_bytes, + num_set_bits_in_word, + d_bit_counts.begin(), + num_ranges, + first_word_indices, + last_word_indices, + stream.value())); rmm::device_buffer d_temp_storage(temp_storage_bytes, stream); // Perform segmented reduction. - CUDA_TRY(cub::DeviceSegmentedReduce::Sum(d_temp_storage.data(), - temp_storage_bytes, - num_set_bits_in_word, - d_bit_counts.begin(), - num_ranges, - first_word_indices, - last_word_indices, - stream.value())); - - CHECK_CUDA(stream.value()); + CUDF_CUDA_TRY(cub::DeviceSegmentedReduce::Sum(d_temp_storage.data(), + temp_storage_bytes, + num_set_bits_in_word, + d_bit_counts.begin(), + num_ranges, + first_word_indices, + last_word_indices, + stream.value())); // Adjust counts in segment boundaries (if segments are not word-aligned). constexpr size_type block_size{256}; @@ -350,7 +348,7 @@ rmm::device_uvector segmented_count_bits(bitmask_type const* bitmask, }); } - CHECK_CUDA(stream.value()); + CUDF_CHECK_CUDA(stream.value()); return d_bit_counts; } diff --git a/cpp/include/cudf/detail/utilities/cuda.cuh b/cpp/include/cudf/detail/utilities/cuda.cuh index a00bd64caa3..23d0ff26e0f 100644 --- a/cpp/include/cudf/detail/utilities/cuda.cuh +++ b/cpp/include/cudf/detail/utilities/cuda.cuh @@ -127,12 +127,12 @@ cudf::size_type elements_per_thread(Kernel kernel, // calculate theoretical occupancy int max_blocks = 0; - CUDA_TRY(cudaOccupancyMaxActiveBlocksPerMultiprocessor(&max_blocks, kernel, block_size, 0)); + CUDF_CUDA_TRY(cudaOccupancyMaxActiveBlocksPerMultiprocessor(&max_blocks, kernel, block_size, 0)); int device = 0; - CUDA_TRY(cudaGetDevice(&device)); + CUDF_CUDA_TRY(cudaGetDevice(&device)); int num_sms = 0; - CUDA_TRY(cudaDeviceGetAttribute(&num_sms, cudaDevAttrMultiProcessorCount, device)); + CUDF_CUDA_TRY(cudaDeviceGetAttribute(&num_sms, cudaDevAttrMultiProcessorCount, device)); int per_thread = total_size / (max_blocks * num_sms * block_size); return std::clamp(per_thread, 1, max_per_thread); } diff --git a/cpp/include/cudf/detail/utilities/vector_factories.hpp b/cpp/include/cudf/detail/utilities/vector_factories.hpp index e3f44ce0bee..63ac48f6060 100644 --- a/cpp/include/cudf/detail/utilities/vector_factories.hpp +++ b/cpp/include/cudf/detail/utilities/vector_factories.hpp @@ -53,7 +53,7 @@ rmm::device_uvector make_zeroed_device_uvector_async( rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { rmm::device_uvector ret(size, stream, mr); - CUDA_TRY(cudaMemsetAsync(ret.data(), 0, size * sizeof(T), stream.value())); + CUDF_CUDA_TRY(cudaMemsetAsync(ret.data(), 0, size * sizeof(T), stream.value())); return ret; } @@ -75,7 +75,7 @@ rmm::device_uvector make_zeroed_device_uvector_sync( rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { rmm::device_uvector ret(size, stream, mr); - CUDA_TRY(cudaMemsetAsync(ret.data(), 0, size * sizeof(T), stream.value())); + CUDF_CUDA_TRY(cudaMemsetAsync(ret.data(), 0, size * sizeof(T), stream.value())); stream.synchronize(); return ret; } @@ -99,11 +99,11 @@ rmm::device_uvector make_device_uvector_async( rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { rmm::device_uvector ret(source_data.size(), stream, mr); - CUDA_TRY(cudaMemcpyAsync(ret.data(), - source_data.data(), - source_data.size() * sizeof(T), - cudaMemcpyDefault, - stream.value())); + CUDF_CUDA_TRY(cudaMemcpyAsync(ret.data(), + source_data.data(), + source_data.size() * sizeof(T), + cudaMemcpyDefault, + stream.value())); return ret; } @@ -151,11 +151,11 @@ rmm::device_uvector make_device_uvector_async( rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { rmm::device_uvector ret(source_data.size(), stream, mr); - CUDA_TRY(cudaMemcpyAsync(ret.data(), - source_data.data(), - source_data.size() * sizeof(T), - cudaMemcpyDefault, - stream.value())); + CUDF_CUDA_TRY(cudaMemcpyAsync(ret.data(), + source_data.data(), + source_data.size() * sizeof(T), + cudaMemcpyDefault, + stream.value())); return ret; } @@ -286,7 +286,7 @@ template OutContainer make_vector_async(device_span v, rmm::cuda_stream_view stream) { OutContainer result(v.size()); - CUDA_TRY(cudaMemcpyAsync( + CUDF_CUDA_TRY(cudaMemcpyAsync( result.data(), v.data(), v.size() * sizeof(T), cudaMemcpyDeviceToHost, stream.value())); return result; } diff --git a/cpp/include/cudf/io/text/data_chunk_source_factories.hpp b/cpp/include/cudf/io/text/data_chunk_source_factories.hpp index ffe159b59dc..56db3fd6216 100644 --- a/cpp/include/cudf/io/text/data_chunk_source_factories.hpp +++ b/cpp/include/cudf/io/text/data_chunk_source_factories.hpp @@ -78,14 +78,14 @@ class istream_data_chunk_reader : public data_chunk_reader { { // create an event to track the completion of the last device-to-host copy. for (std::size_t i = 0; i < _tickets.size(); i++) { - CUDA_TRY(cudaEventCreate(&(_tickets[i].event))); + CUDF_CUDA_TRY(cudaEventCreate(&(_tickets[i].event))); } } ~istream_data_chunk_reader() { for (std::size_t i = 0; i < _tickets.size(); i++) { - CUDA_TRY(cudaEventDestroy(_tickets[i].event)); + CUDF_CUDA_TRY(cudaEventDestroy(_tickets[i].event)); } } @@ -101,7 +101,7 @@ class istream_data_chunk_reader : public data_chunk_reader { _next_ticket_idx = (_next_ticket_idx + 1) % _tickets.size(); // synchronize on the last host-to-device copy, so we don't clobber the host buffer. - CUDA_TRY(cudaEventSynchronize(h_ticket.event)); + CUDF_CUDA_TRY(cudaEventSynchronize(h_ticket.event)); // resize the host buffer as necessary to contain the requested number of bytes if (h_ticket.buffer.size() < read_size) { h_ticket.buffer.resize(read_size); } @@ -116,7 +116,7 @@ class istream_data_chunk_reader : public data_chunk_reader { auto chunk = rmm::device_uvector(read_size, stream); // copy the host-pinned data on to device - CUDA_TRY(cudaMemcpyAsync( // + CUDF_CUDA_TRY(cudaMemcpyAsync( // chunk.data(), h_ticket.buffer.data(), read_size, @@ -124,7 +124,7 @@ class istream_data_chunk_reader : public data_chunk_reader { stream.value())); // record the host-to-device copy. - CUDA_TRY(cudaEventRecord(h_ticket.event, stream.value())); + CUDF_CUDA_TRY(cudaEventRecord(h_ticket.event, stream.value())); // return the view over device memory so it can be processed. return std::make_unique(std::move(chunk)); diff --git a/cpp/include/cudf/strings/detail/utilities.cuh b/cpp/include/cudf/strings/detail/utilities.cuh index 4b036fb7f0e..b9ea2d9ecff 100644 --- a/cpp/include/cudf/strings/detail/utilities.cuh +++ b/cpp/include/cudf/strings/detail/utilities.cuh @@ -67,7 +67,7 @@ std::unique_ptr make_offsets_child_column( // we use inclusive-scan on a shifted output (d_offsets+1) and then set the first // offset values to zero manually. thrust::inclusive_scan(rmm::exec_policy(stream), begin, end, d_offsets + 1); - CUDA_TRY(cudaMemsetAsync(d_offsets, 0, sizeof(int32_t), stream.value())); + CUDF_CUDA_TRY(cudaMemsetAsync(d_offsets, 0, sizeof(int32_t), stream.value())); return offsets_column; } diff --git a/cpp/include/cudf/strings/string_view.cuh b/cpp/include/cudf/strings/string_view.cuh index 27ee5cf95cd..a486a5a765c 100644 --- a/cpp/include/cudf/strings/string_view.cuh +++ b/cpp/include/cudf/strings/string_view.cuh @@ -99,7 +99,8 @@ CUDF_HOST_DEVICE inline string_view string_view::max() #if defined(__CUDA_ARCH__) psentinel = &cudf::strings::detail::max_string_sentinel[0]; #else - CUDA_TRY(cudaGetSymbolAddress((void**)&psentinel, cudf::strings::detail::max_string_sentinel)); + CUDF_CUDA_TRY( + cudaGetSymbolAddress((void**)&psentinel, cudf::strings::detail::max_string_sentinel)); #endif return string_view(psentinel, 4); } diff --git a/cpp/include/cudf/table/table_device_view.cuh b/cpp/include/cudf/table/table_device_view.cuh index 3ed18099463..8d08a3fd0b0 100644 --- a/cpp/include/cudf/table/table_device_view.cuh +++ b/cpp/include/cudf/table/table_device_view.cuh @@ -145,7 +145,7 @@ auto contiguous_copy_column_device_views(HostTableView source_view, rmm::cuda_st auto d_columns = detail::child_columns_to_device_array( source_view.begin(), source_view.end(), h_ptr, d_ptr); - CUDA_TRY(cudaMemcpyAsync(d_ptr, h_ptr, views_size_bytes, cudaMemcpyDefault, stream.value())); + CUDF_CUDA_TRY(cudaMemcpyAsync(d_ptr, h_ptr, views_size_bytes, cudaMemcpyDefault, stream.value())); stream.synchronize(); return std::make_tuple(std::move(descendant_storage), d_columns); } diff --git a/cpp/include/cudf/utilities/error.hpp b/cpp/include/cudf/utilities/error.hpp index 2036723a6ed..8be1a7e3a32 100644 --- a/cpp/include/cudf/utilities/error.hpp +++ b/cpp/include/cudf/utilities/error.hpp @@ -1,3 +1,19 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + #pragma once #include @@ -99,7 +115,7 @@ inline void throw_cuda_error(cudaError_t error, const char* file, unsigned int l * cudaSuccess, invokes cudaGetLastError() to clear the error and throws an * exception detailing the CUDA error that occurred */ -#define CUDA_TRY(call) \ +#define CUDF_CUDA_TRY(call) \ do { \ cudaError_t const status = (call); \ if (cudaSuccess != status) { \ @@ -122,12 +138,12 @@ inline void throw_cuda_error(cudaError_t error, const char* file, unsigned int l * asynchronous kernel launch. */ #ifndef NDEBUG -#define CHECK_CUDA(stream) \ - do { \ - CUDA_TRY(cudaStreamSynchronize(stream)); \ - CUDA_TRY(cudaPeekAtLastError()); \ +#define CUDF_CHECK_CUDA(stream) \ + do { \ + CUDF_CUDA_TRY(cudaStreamSynchronize(stream)); \ + CUDF_CUDA_TRY(cudaPeekAtLastError()); \ } while (0); #else -#define CHECK_CUDA(stream) CUDA_TRY(cudaPeekAtLastError()); +#define CUDF_CHECK_CUDA(stream) CUDF_CUDA_TRY(cudaPeekAtLastError()); #endif /** @} */ diff --git a/cpp/include/cudf_test/column_utilities.hpp b/cpp/include/cudf_test/column_utilities.hpp index 4c2d4d429eb..b28ed4f70fa 100644 --- a/cpp/include/cudf_test/column_utilities.hpp +++ b/cpp/include/cudf_test/column_utilities.hpp @@ -183,7 +183,8 @@ template ()>* = nullptr std::pair, std::vector> to_host(column_view c) { thrust::host_vector host_data(c.size()); - CUDA_TRY(cudaMemcpy(host_data.data(), c.data(), c.size() * sizeof(T), cudaMemcpyDeviceToHost)); + CUDF_CUDA_TRY( + cudaMemcpy(host_data.data(), c.data(), c.size() * sizeof(T), cudaMemcpyDeviceToHost)); return {host_data, bitmask_to_host(c)}; } @@ -206,7 +207,7 @@ std::pair, std::vector> to_host(column_view auto host_rep_types = thrust::host_vector(c.size()); - CUDA_TRY(cudaMemcpy( + CUDF_CUDA_TRY(cudaMemcpy( host_rep_types.data(), c.begin(), c.size() * sizeof(Rep), cudaMemcpyDeviceToHost)); auto to_fp = [&](Rep val) { return T{scaled_integer{val, scale_type{c.type().scale()}}}; }; diff --git a/cpp/scripts/run-clang-format.py b/cpp/scripts/run-clang-format.py index 3d462d65fb8..2a732dfdc67 100755 --- a/cpp/scripts/run-clang-format.py +++ b/cpp/scripts/run-clang-format.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2021, NVIDIA CORPORATION. +# Copyright (c) 2019-2022, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/cpp/scripts/run-clang-tidy.py b/cpp/scripts/run-clang-tidy.py index 30e937d7f4d..f6b24ccb4bb 100644 --- a/cpp/scripts/run-clang-tidy.py +++ b/cpp/scripts/run-clang-tidy.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021, NVIDIA CORPORATION. +# Copyright (c) 2021-2022, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/cpp/src/binaryop/compiled/binary_ops.cuh b/cpp/src/binaryop/compiled/binary_ops.cuh index 9b3e33f491e..ec41fbb8883 100644 --- a/cpp/src/binaryop/compiled/binary_ops.cuh +++ b/cpp/src/binaryop/compiled/binary_ops.cuh @@ -265,7 +265,7 @@ void for_each(rmm::cuda_stream_view stream, cudf::size_type size, Functor f) { int block_size; int min_grid_size; - CUDA_TRY( + CUDF_CUDA_TRY( cudaOccupancyMaxPotentialBlockSize(&min_grid_size, &block_size, for_each_kernel)); // 2 elements per thread. const int grid_size = util::div_rounding_up_safe(size, 2 * block_size); diff --git a/cpp/src/bitmask/is_element_valid.cpp b/cpp/src/bitmask/is_element_valid.cpp index 47870e01567..4a94ec9759c 100644 --- a/cpp/src/bitmask/is_element_valid.cpp +++ b/cpp/src/bitmask/is_element_valid.cpp @@ -1,6 +1,6 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -34,11 +34,11 @@ bool is_element_valid_sync(column_view const& col_view, bitmask_type word; // null_mask() returns device ptr to bitmask without offset size_type index = element_index + col_view.offset(); - CUDA_TRY(cudaMemcpyAsync(&word, - col_view.null_mask() + word_index(index), - sizeof(bitmask_type), - cudaMemcpyDeviceToHost, - stream.value())); + CUDF_CUDA_TRY(cudaMemcpyAsync(&word, + col_view.null_mask() + word_index(index), + sizeof(bitmask_type), + cudaMemcpyDeviceToHost, + stream.value())); stream.synchronize(); return static_cast(word & (bitmask_type{1} << intra_word_index(index))); } diff --git a/cpp/src/bitmask/null_mask.cu b/cpp/src/bitmask/null_mask.cu index d1107ad3cfd..756cf3421c9 100644 --- a/cpp/src/bitmask/null_mask.cu +++ b/cpp/src/bitmask/null_mask.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -90,7 +90,7 @@ rmm::device_buffer create_null_mask(size_type size, if (state != mask_state::UNINITIALIZED) { uint8_t fill_value = (state == mask_state::ALL_VALID) ? 0xff : 0x00; - CUDA_TRY(cudaMemsetAsync( + CUDF_CUDA_TRY(cudaMemsetAsync( static_cast(mask.data()), fill_value, mask_size, stream.value())); } @@ -146,7 +146,7 @@ void set_null_mask(bitmask_type* bitmask, cudf::detail::grid_1d config(number_of_mask_words, 256); set_null_mask_kernel<<>>( static_cast(bitmask), begin_bit, end_bit, valid, number_of_mask_words); - CHECK_CUDA(stream.value()); + CUDF_CHECK_CUDA(stream.value()); } } @@ -220,7 +220,7 @@ rmm::device_buffer copy_bitmask(bitmask_type const* mask, cudf::detail::grid_1d config(number_of_mask_words, 256); copy_offset_bitmask<<>>( static_cast(dest_mask.data()), mask, begin_bit, end_bit, number_of_mask_words); - CHECK_CUDA(stream.value()); + CUDF_CHECK_CUDA(stream.value()); } return dest_mask; } diff --git a/cpp/src/column/column_device_view.cu b/cpp/src/column/column_device_view.cu index dd1803f4b90..fc244521617 100644 --- a/cpp/src/column/column_device_view.cu +++ b/cpp/src/column/column_device_view.cu @@ -77,11 +77,11 @@ create_device_view_from_view(ColumnView const& source, rmm::cuda_stream_view str new ColumnDeviceView(source, staging_buffer.data(), descendant_storage->data()), deleter}; // copy the CPU memory with all the children into device memory - CUDA_TRY(cudaMemcpyAsync(descendant_storage->data(), - staging_buffer.data(), - descendant_storage->size(), - cudaMemcpyDefault, - stream.value())); + CUDF_CUDA_TRY(cudaMemcpyAsync(descendant_storage->data(), + staging_buffer.data(), + descendant_storage->size(), + cudaMemcpyDefault, + stream.value())); stream.synchronize(); diff --git a/cpp/src/copying/contiguous_split.cu b/cpp/src/copying/contiguous_split.cu index 46470e69611..514374d450d 100644 --- a/cpp/src/copying/contiguous_split.cu +++ b/cpp/src/copying/contiguous_split.cu @@ -1046,11 +1046,11 @@ std::vector contiguous_split(cudf::table_view const& input, setup_source_buf_info(input.begin(), input.end(), h_src_buf_info, h_src_buf_info); // HtoD indices and source buf info to device - CUDA_TRY(cudaMemcpyAsync(d_indices, - h_indices, - indices_size + src_buf_info_size, - cudaMemcpyHostToDevice, - stream.value())); + CUDF_CUDA_TRY(cudaMemcpyAsync(d_indices, + h_indices, + indices_size + src_buf_info_size, + cudaMemcpyHostToDevice, + stream.value())); // packed block of memory 2. partition buffer sizes and dst_buf_info structs std::size_t const buf_sizes_size = @@ -1180,11 +1180,11 @@ std::vector contiguous_split(cudf::table_view const& input, } // DtoH buf sizes and col info back to the host - CUDA_TRY(cudaMemcpyAsync(h_buf_sizes, - d_buf_sizes, - buf_sizes_size + dst_buf_info_size, - cudaMemcpyDeviceToHost, - stream.value())); + CUDF_CUDA_TRY(cudaMemcpyAsync(h_buf_sizes, + d_buf_sizes, + buf_sizes_size + dst_buf_info_size, + cudaMemcpyDeviceToHost, + stream.value())); stream.synchronize(); // allocate output partition buffers @@ -1224,14 +1224,14 @@ std::vector contiguous_split(cudf::table_view const& input, }); // HtoD src and dest buffers - CUDA_TRY(cudaMemcpyAsync( + CUDF_CUDA_TRY(cudaMemcpyAsync( d_src_bufs, h_src_bufs, src_bufs_size + dst_bufs_size, cudaMemcpyHostToDevice, stream.value())); // perform the copy. copy_data(num_bufs, num_src_bufs, d_src_bufs, d_dst_bufs, d_dst_buf_info, stream); // DtoH dst info (to retrieve null counts) - CUDA_TRY(cudaMemcpyAsync( + CUDF_CUDA_TRY(cudaMemcpyAsync( h_dst_buf_info, d_dst_buf_info, dst_buf_info_size, cudaMemcpyDeviceToHost, stream.value())); stream.synchronize(); diff --git a/cpp/src/groupby/hash/groupby.cu b/cpp/src/groupby/hash/groupby.cu index 49ed0b7fc1d..44df981f5bf 100644 --- a/cpp/src/groupby/hash/groupby.cu +++ b/cpp/src/groupby/hash/groupby.cu @@ -645,10 +645,14 @@ bool can_use_hash_groupby(table_view const& keys, host_spankind); + std::all_of(r.aggregations.begin(), r.aggregations.end(), [v_type](auto const& a) { + return cudf::has_atomic_support(cudf::detail::target_type(v_type, a->kind)) and + is_hash_aggregation(a->kind); }); }); } diff --git a/cpp/src/hash/concurrent_unordered_map.cuh b/cpp/src/hash/concurrent_unordered_map.cuh index 0ae0baa9908..76f3fba4689 100644 --- a/cpp/src/hash/concurrent_unordered_map.cuh +++ b/cpp/src/hash/concurrent_unordered_map.cuh @@ -432,11 +432,11 @@ class concurrent_unordered_map { m_hashtbl_values = m_allocator.allocate(m_capacity, stream); } - CUDA_TRY(cudaMemcpyAsync(m_hashtbl_values, - other.m_hashtbl_values, - m_capacity * sizeof(value_type), - cudaMemcpyDefault, - stream.value())); + CUDF_CUDA_TRY(cudaMemcpyAsync(m_hashtbl_values, + other.m_hashtbl_values, + m_capacity * sizeof(value_type), + cudaMemcpyDefault, + stream.value())); } void clear_async(rmm::cuda_stream_view stream = rmm::cuda_stream_default) @@ -460,10 +460,10 @@ class concurrent_unordered_map { cudaError_t status = cudaPointerGetAttributes(&hashtbl_values_ptr_attributes, m_hashtbl_values); if (cudaSuccess == status && isPtrManaged(hashtbl_values_ptr_attributes)) { - CUDA_TRY(cudaMemPrefetchAsync( + CUDF_CUDA_TRY(cudaMemPrefetchAsync( m_hashtbl_values, m_capacity * sizeof(value_type), dev_id, stream.value())); } - CUDA_TRY(cudaMemPrefetchAsync(this, sizeof(*this), dev_id, stream.value())); + CUDF_CUDA_TRY(cudaMemPrefetchAsync(this, sizeof(*this), dev_id, stream.value())); } /** @@ -532,8 +532,8 @@ class concurrent_unordered_map { if (cudaSuccess == status && isPtrManaged(hashtbl_values_ptr_attributes)) { int dev_id = 0; - CUDA_TRY(cudaGetDevice(&dev_id)); - CUDA_TRY(cudaMemPrefetchAsync( + CUDF_CUDA_TRY(cudaGetDevice(&dev_id)); + CUDF_CUDA_TRY(cudaMemPrefetchAsync( m_hashtbl_values, m_capacity * sizeof(value_type), dev_id, stream.value())); } } @@ -543,6 +543,6 @@ class concurrent_unordered_map { m_hashtbl_values, m_capacity, m_unused_key, m_unused_element); } - CUDA_TRY(cudaGetLastError()); + CUDF_CHECK_CUDA(stream.value()); } }; diff --git a/cpp/src/hash/concurrent_unordered_multimap.cuh b/cpp/src/hash/concurrent_unordered_multimap.cuh index cdf5b6a8649..aa5b8db393f 100644 --- a/cpp/src/hash/concurrent_unordered_multimap.cuh +++ b/cpp/src/hash/concurrent_unordered_multimap.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020, NVIDIA CORPORATION. + * Copyright (c) 2017-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -488,11 +488,11 @@ class concurrent_unordered_multimap { m_hashtbl_values = m_allocator.allocate(m_hashtbl_capacity, stream); } - CUDA_TRY(cudaMemcpyAsync(m_hashtbl_values, - other.m_hashtbl_values, - m_hashtbl_size * sizeof(value_type), - cudaMemcpyDefault, - stream.value())); + CUDF_CUDA_TRY(cudaMemcpyAsync(m_hashtbl_values, + other.m_hashtbl_values, + m_hashtbl_size * sizeof(value_type), + cudaMemcpyDefault, + stream.value())); } void clear_async(rmm::cuda_stream_view stream = rmm::cuda_stream_default) @@ -519,7 +519,7 @@ class concurrent_unordered_multimap { cudaError_t status = cudaPointerGetAttributes(&hashtbl_values_ptr_attributes, m_hashtbl_values); if (cudaSuccess == status && isPtrManaged(hashtbl_values_ptr_attributes)) { - CUDA_TRY(cudaMemPrefetchAsync( + CUDF_CUDA_TRY(cudaMemPrefetchAsync( m_hashtbl_values, m_hashtbl_size * sizeof(value_type), dev_id, stream.value())); } } @@ -575,8 +575,8 @@ class concurrent_unordered_multimap { if (cudaSuccess == status && isPtrManaged(hashtbl_values_ptr_attributes)) { int dev_id = 0; - CUDA_TRY(cudaGetDevice(&dev_id)); - CUDA_TRY(cudaMemPrefetchAsync( + CUDF_CUDA_TRY(cudaGetDevice(&dev_id)); + CUDF_CUDA_TRY(cudaMemPrefetchAsync( m_hashtbl_values, m_hashtbl_size * sizeof(value_type), dev_id, stream.value())); } } @@ -584,7 +584,7 @@ class concurrent_unordered_multimap { if (init) { init_hashtbl<<<((m_hashtbl_size - 1) / block_size) + 1, block_size, 0, stream.value()>>>( m_hashtbl_values, m_hashtbl_size, unused_key, unused_element); - CUDA_TRY(cudaGetLastError()); + CUDF_CHECK_CUDA(stream.value()); } } }; diff --git a/cpp/src/interop/dlpack.cpp b/cpp/src/interop/dlpack.cpp index 01ca32e6a2f..e5da4794ca3 100644 --- a/cpp/src/interop/dlpack.cpp +++ b/cpp/src/interop/dlpack.cpp @@ -144,7 +144,7 @@ std::unique_ptr
from_dlpack(DLManagedTensor const* managed_tensor, // Make sure the current device ID matches the Tensor's device ID if (tensor.device.device_type != kDLCPU) { int device_id = 0; - CUDA_TRY(cudaGetDevice(&device_id)); + CUDF_CUDA_TRY(cudaGetDevice(&device_id)); CUDF_EXPECTS(tensor.device.device_id == device_id, "DLTensor device ID must be current device"); } @@ -184,11 +184,11 @@ std::unique_ptr
from_dlpack(DLManagedTensor const* managed_tensor, for (auto& col : columns) { col = make_numeric_column(dtype, num_rows, mask_state::UNALLOCATED, stream, mr); - CUDA_TRY(cudaMemcpyAsync(col->mutable_view().head(), - reinterpret_cast(tensor_data), - bytes, - cudaMemcpyDefault, - stream.value())); + CUDF_CUDA_TRY(cudaMemcpyAsync(col->mutable_view().head(), + reinterpret_cast(tensor_data), + bytes, + cudaMemcpyDefault, + stream.value())); tensor_data += col_stride; } @@ -234,7 +234,7 @@ DLManagedTensor* to_dlpack(table_view const& input, tensor.strides[1] = num_rows; } - CUDA_TRY(cudaGetDevice(&tensor.device.device_id)); + CUDF_CUDA_TRY(cudaGetDevice(&tensor.device.device_id)); tensor.device.device_type = kDLCUDA; // If there is only one column, then a 1D tensor can just copy the pointer @@ -254,11 +254,11 @@ DLManagedTensor* to_dlpack(table_view const& input, auto tensor_data = reinterpret_cast(tensor.data); for (auto const& col : input) { - CUDA_TRY(cudaMemcpyAsync(reinterpret_cast(tensor_data), - get_column_data(col), - stride_bytes, - cudaMemcpyDefault, - stream.value())); + CUDF_CUDA_TRY(cudaMemcpyAsync(reinterpret_cast(tensor_data), + get_column_data(col), + stride_bytes, + cudaMemcpyDefault, + stream.value())); tensor_data += stride_bytes; } diff --git a/cpp/src/interop/from_arrow.cu b/cpp/src/interop/from_arrow.cu index 99b657fb9d5..6c5cd56d2a7 100644 --- a/cpp/src/interop/from_arrow.cu +++ b/cpp/src/interop/from_arrow.cu @@ -108,11 +108,11 @@ struct dispatch_to_cudf_column { stream, mr); auto mask_buffer = array.null_bitmap(); - CUDA_TRY(cudaMemcpyAsync(mask->data(), - reinterpret_cast(mask_buffer->address()), - array.null_bitmap()->size(), - cudaMemcpyDefault, - stream.value())); + CUDF_CUDA_TRY(cudaMemcpyAsync(mask->data(), + reinterpret_cast(mask_buffer->address()), + array.null_bitmap()->size(), + cudaMemcpyDefault, + stream.value())); return mask; } @@ -135,7 +135,7 @@ struct dispatch_to_cudf_column { auto const has_nulls = skip_mask ? false : array.null_bitmap_data() != nullptr; auto col = make_fixed_width_column(type, num_rows, mask_state::UNALLOCATED, stream, mr); auto mutable_column_view = col->mutable_view(); - CUDA_TRY(cudaMemcpyAsync( + CUDF_CUDA_TRY(cudaMemcpyAsync( mutable_column_view.data(), reinterpret_cast(data_buffer->address()) + array.offset() * sizeof(T), sizeof(T) * num_rows, @@ -191,7 +191,7 @@ std::unique_ptr dispatch_to_cudf_column::operator() auto col = make_fixed_width_column(type, num_rows, mask_state::UNALLOCATED, stream, mr); auto mutable_column_view = col->mutable_view(); - CUDA_TRY(cudaMemcpyAsync( + CUDF_CUDA_TRY(cudaMemcpyAsync( mutable_column_view.data(), reinterpret_cast(data_buffer->address()) + array.offset() * sizeof(DeviceType), sizeof(DeviceType) * num_rows, @@ -227,11 +227,11 @@ std::unique_ptr dispatch_to_cudf_column::operator()( { auto data_buffer = array.data()->buffers[1]; auto data = rmm::device_buffer(data_buffer->size(), stream, mr); - CUDA_TRY(cudaMemcpyAsync(data.data(), - reinterpret_cast(data_buffer->address()), - data_buffer->size(), - cudaMemcpyDefault, - stream.value())); + CUDF_CUDA_TRY(cudaMemcpyAsync(data.data(), + reinterpret_cast(data_buffer->address()), + data_buffer->size(), + cudaMemcpyDefault, + stream.value())); auto out_col = mask_to_bools(static_cast(data.data()), array.offset(), array.offset() + array.length(), diff --git a/cpp/src/interop/to_arrow.cu b/cpp/src/interop/to_arrow.cu index c7409978bb2..517a83c716e 100644 --- a/cpp/src/interop/to_arrow.cu +++ b/cpp/src/interop/to_arrow.cu @@ -53,11 +53,11 @@ std::shared_ptr fetch_data_buffer(column_view input_view, auto data_buffer = allocate_arrow_buffer(data_size_in_bytes, ar_mr); - CUDA_TRY(cudaMemcpyAsync(data_buffer->mutable_data(), - input_view.data(), - data_size_in_bytes, - cudaMemcpyDeviceToHost, - stream.value())); + CUDF_CUDA_TRY(cudaMemcpyAsync(data_buffer->mutable_data(), + input_view.data(), + data_size_in_bytes, + cudaMemcpyDeviceToHost, + stream.value())); return std::move(data_buffer); } @@ -73,7 +73,7 @@ std::shared_ptr fetch_mask_buffer(column_view input_view, if (input_view.has_nulls()) { auto mask_buffer = allocate_arrow_bitmap(static_cast(input_view.size()), ar_mr); - CUDA_TRY(cudaMemcpyAsync( + CUDF_CUDA_TRY(cudaMemcpyAsync( mask_buffer->mutable_data(), (input_view.offset() > 0) ? cudf::copy_bitmask(input_view).data() : input_view.null_mask(), mask_size_in_bytes, @@ -163,11 +163,11 @@ std::shared_ptr dispatch_to_arrow::operator()( auto const buf_size_in_bytes = buf.size() * sizeof(DeviceType); auto data_buffer = allocate_arrow_buffer(buf_size_in_bytes, ar_mr); - CUDA_TRY(cudaMemcpyAsync(data_buffer->mutable_data(), - buf.data(), - buf_size_in_bytes, - cudaMemcpyDeviceToHost, - stream.value())); + CUDF_CUDA_TRY(cudaMemcpyAsync(data_buffer->mutable_data(), + buf.data(), + buf_size_in_bytes, + cudaMemcpyDeviceToHost, + stream.value())); auto type = arrow::decimal(18, -input.type().scale()); auto mask = fetch_mask_buffer(input, ar_mr, stream); @@ -197,11 +197,11 @@ std::shared_ptr dispatch_to_arrow::operator() auto const buf_size_in_bytes = buf.size() * sizeof(DeviceType); auto data_buffer = allocate_arrow_buffer(buf_size_in_bytes, ar_mr); - CUDA_TRY(cudaMemcpyAsync(data_buffer->mutable_data(), - buf.data(), - buf_size_in_bytes, - cudaMemcpyDeviceToHost, - stream.value())); + CUDF_CUDA_TRY(cudaMemcpyAsync(data_buffer->mutable_data(), + buf.data(), + buf_size_in_bytes, + cudaMemcpyDeviceToHost, + stream.value())); auto type = arrow::decimal(18, -input.type().scale()); auto mask = fetch_mask_buffer(input, ar_mr, stream); @@ -222,11 +222,11 @@ std::shared_ptr dispatch_to_arrow::operator()(column_view in auto data_buffer = allocate_arrow_buffer(static_cast(bitmask.first->size()), ar_mr); - CUDA_TRY(cudaMemcpyAsync(data_buffer->mutable_data(), - bitmask.first->data(), - bitmask.first->size(), - cudaMemcpyDeviceToHost, - stream.value())); + CUDF_CUDA_TRY(cudaMemcpyAsync(data_buffer->mutable_data(), + bitmask.first->data(), + bitmask.first->size(), + cudaMemcpyDeviceToHost, + stream.value())); return to_arrow_array(id, static_cast(input.size()), std::move(data_buffer), diff --git a/cpp/src/io/avro/reader_impl.cu b/cpp/src/io/avro/reader_impl.cu index b5b76c2def8..5885b61b35b 100644 --- a/cpp/src/io/avro/reader_impl.cu +++ b/cpp/src/io/avro/reader_impl.cu @@ -192,9 +192,9 @@ rmm::device_buffer decompress_data(datasource& source, for (int loop_cnt = 0; loop_cnt < 2; loop_cnt++) { inflate_in.host_to_device(stream); - CUDA_TRY( + CUDF_CUDA_TRY( cudaMemsetAsync(inflate_out.device_ptr(), 0, inflate_out.memory_size(), stream.value())); - CUDA_TRY(gpuinflate( + CUDF_CUDA_TRY(gpuinflate( inflate_in.device_ptr(), inflate_out.device_ptr(), inflate_in.size(), 0, stream)); inflate_out.device_to_host(stream, true); @@ -424,11 +424,11 @@ std::vector decode_data(metadata& meta, // Copy valid bits that are shared between columns for (size_t i = 0; i < out_buffers.size(); i++) { if (valid_alias[i] != nullptr) { - CUDA_TRY(cudaMemcpyAsync(out_buffers[i].null_mask(), - valid_alias[i], - out_buffers[i].null_mask_size(), - cudaMemcpyHostToDevice, - stream.value())); + CUDF_CUDA_TRY(cudaMemcpyAsync(out_buffers[i].null_mask(), + valid_alias[i], + out_buffers[i].null_mask_size(), + cudaMemcpyHostToDevice, + stream.value())); } } schema_desc.device_to_host(stream, true); diff --git a/cpp/src/io/comp/debrotli.cu b/cpp/src/io/comp/debrotli.cu index b4a42a66133..631cf19b2aa 100644 --- a/cpp/src/io/comp/debrotli.cu +++ b/cpp/src/io/comp/debrotli.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020, NVIDIA CORPORATION. + * Copyright (c) 2018-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -2048,7 +2048,7 @@ size_t __host__ get_gpu_debrotli_scratch_size(int max_num_inputs) int sm_count = 0; int dev = 0; uint32_t max_fb_size, min_fb_size, fb_size; - CUDA_TRY(cudaGetDevice(&dev)); + CUDF_CUDA_TRY(cudaGetDevice(&dev)); if (cudaSuccess == cudaDeviceGetAttribute(&sm_count, cudaDevAttrMultiProcessorCount, dev)) { // printf("%d SMs on device %d\n", sm_count, dev); max_num_inputs = @@ -2092,14 +2092,14 @@ cudaError_t __host__ gpu_debrotli(gpu_inflate_input_s* inputs, scratch_size = min(scratch_size, (size_t)0xffffffffu); fb_heap_size = (uint32_t)((scratch_size - sizeof(brotli_dictionary_s)) & ~0xf); - CUDA_TRY(cudaMemsetAsync(scratch_u8, 0, 2 * sizeof(uint32_t), stream.value())); + CUDF_CUDA_TRY(cudaMemsetAsync(scratch_u8, 0, 2 * sizeof(uint32_t), stream.value())); // NOTE: The 128KB dictionary copy can have a relatively large overhead since source isn't // page-locked - CUDA_TRY(cudaMemcpyAsync(scratch_u8 + fb_heap_size, - get_brotli_dictionary(), - sizeof(brotli_dictionary_s), - cudaMemcpyHostToDevice, - stream.value())); + CUDF_CUDA_TRY(cudaMemcpyAsync(scratch_u8 + fb_heap_size, + get_brotli_dictionary(), + sizeof(brotli_dictionary_s), + cudaMemcpyHostToDevice, + stream.value())); gpu_debrotli_kernel<<>>( inputs, outputs, scratch_u8, fb_heap_size, count32); #if DUMP_FB_HEAP @@ -2107,7 +2107,7 @@ cudaError_t __host__ gpu_debrotli(gpu_inflate_input_s* inputs, uint32_t cur = 0; printf("heap dump (%d bytes)\n", fb_heap_size); while (cur < fb_heap_size && !(cur & 3)) { - CUDA_TRY(cudaMemcpyAsync( + CUDF_CUDA_TRY(cudaMemcpyAsync( &dump[0], scratch_u8 + cur, 2 * sizeof(uint32_t), cudaMemcpyDeviceToHost, stream.value())); stream.synchronize(); printf("@%d: next = %d, size = %d\n", cur, dump[0], dump[1]); diff --git a/cpp/src/io/csv/reader_impl.cu b/cpp/src/io/csv/reader_impl.cu index ae9738164f3..cd070d28f38 100644 --- a/cpp/src/io/csv/reader_impl.cu +++ b/cpp/src/io/csv/reader_impl.cu @@ -260,11 +260,11 @@ std::pair, selected_rows_offsets> load_data_and_gather auto const previous_data_size = d_data.size(); d_data.resize(target_pos - buffer_pos, stream); - CUDA_TRY(cudaMemcpyAsync(d_data.begin() + previous_data_size, - data.begin() + buffer_pos + previous_data_size, - target_pos - buffer_pos - previous_data_size, - cudaMemcpyDefault, - stream.value())); + CUDF_CUDA_TRY(cudaMemcpyAsync(d_data.begin() + previous_data_size, + data.begin() + buffer_pos + previous_data_size, + target_pos - buffer_pos - previous_data_size, + cudaMemcpyDefault, + stream.value())); // Pass 1: Count the potential number of rows in each character block for each // possible parser state at the beginning of the block. @@ -280,11 +280,11 @@ std::pair, selected_rows_offsets> load_data_and_gather range_end, skip_rows, stream); - CUDA_TRY(cudaMemcpyAsync(row_ctx.host_ptr(), - row_ctx.device_ptr(), - num_blocks * sizeof(uint64_t), - cudaMemcpyDeviceToHost, - stream.value())); + CUDF_CUDA_TRY(cudaMemcpyAsync(row_ctx.host_ptr(), + row_ctx.device_ptr(), + num_blocks * sizeof(uint64_t), + cudaMemcpyDeviceToHost, + stream.value())); stream.synchronize(); // Sum up the rows in each character block, selecting the row count that @@ -300,11 +300,11 @@ std::pair, selected_rows_offsets> load_data_and_gather // At least one row in range in this batch all_row_offsets.resize(total_rows - skip_rows, stream); - CUDA_TRY(cudaMemcpyAsync(row_ctx.device_ptr(), - row_ctx.host_ptr(), - num_blocks * sizeof(uint64_t), - cudaMemcpyHostToDevice, - stream.value())); + CUDF_CUDA_TRY(cudaMemcpyAsync(row_ctx.device_ptr(), + row_ctx.host_ptr(), + num_blocks * sizeof(uint64_t), + cudaMemcpyHostToDevice, + stream.value())); // Pass 2: Output row offsets cudf::io::csv::gpu::gather_row_offsets(parse_opts.view(), @@ -321,11 +321,11 @@ std::pair, selected_rows_offsets> load_data_and_gather stream); // With byte range, we want to keep only one row out of the specified range if (range_end < data.size()) { - CUDA_TRY(cudaMemcpyAsync(row_ctx.host_ptr(), - row_ctx.device_ptr(), - num_blocks * sizeof(uint64_t), - cudaMemcpyDeviceToHost, - stream.value())); + CUDF_CUDA_TRY(cudaMemcpyAsync(row_ctx.host_ptr(), + row_ctx.device_ptr(), + num_blocks * sizeof(uint64_t), + cudaMemcpyDeviceToHost, + stream.value())); stream.synchronize(); size_t rows_out_of_range = 0; @@ -370,11 +370,11 @@ std::pair, selected_rows_offsets> load_data_and_gather // Remove header rows and extract header const size_t header_row_index = std::max(header_rows, 1) - 1; if (header_row_index + 1 < row_offsets.size()) { - CUDA_TRY(cudaMemcpyAsync(row_ctx.host_ptr(), - row_offsets.data() + header_row_index, - 2 * sizeof(uint64_t), - cudaMemcpyDeviceToHost, - stream.value())); + CUDF_CUDA_TRY(cudaMemcpyAsync(row_ctx.host_ptr(), + row_offsets.data() + header_row_index, + 2 * sizeof(uint64_t), + cudaMemcpyDeviceToHost, + stream.value())); stream.synchronize(); const auto header_start = buffer_pos + row_ctx[0]; diff --git a/cpp/src/io/csv/writer_impl.cu b/cpp/src/io/csv/writer_impl.cu index cb2197cf755..2aa93ae4d0f 100644 --- a/cpp/src/io/csv/writer_impl.cu +++ b/cpp/src/io/csv/writer_impl.cu @@ -378,11 +378,11 @@ void write_chunked(data_sink* out_sink, } else { // copy the bytes to host to write them out thrust::host_vector h_bytes(total_num_bytes); - CUDA_TRY(cudaMemcpyAsync(h_bytes.data(), - ptr_all_bytes, - total_num_bytes * sizeof(char), - cudaMemcpyDeviceToHost, - stream.value())); + CUDF_CUDA_TRY(cudaMemcpyAsync(h_bytes.data(), + ptr_all_bytes, + total_num_bytes * sizeof(char), + cudaMemcpyDeviceToHost, + stream.value())); stream.synchronize(); out_sink->host_write(h_bytes.data(), total_num_bytes); diff --git a/cpp/src/io/json/json_gpu.cu b/cpp/src/io/json/json_gpu.cu index d26831b9112..56a00191ae4 100644 --- a/cpp/src/io/json/json_gpu.cu +++ b/cpp/src/io/json/json_gpu.cu @@ -689,7 +689,7 @@ void convert_json_to_columns(parse_options_view const& opts, { int block_size; int min_grid_size; - CUDA_TRY(cudaOccupancyMaxPotentialBlockSize( + CUDF_CUDA_TRY(cudaOccupancyMaxPotentialBlockSize( &min_grid_size, &block_size, convert_data_to_columns_kernel)); const int grid_size = (row_offsets.size() + block_size - 1) / block_size; @@ -703,7 +703,7 @@ void convert_json_to_columns(parse_options_view const& opts, valid_fields, num_valid_fields); - CUDA_TRY(cudaGetLastError()); + CUDF_CHECK_CUDA(stream.value()); } /** @@ -721,7 +721,7 @@ std::vector detect_data_types( { int block_size; int min_grid_size; - CUDA_TRY( + CUDF_CUDA_TRY( cudaOccupancyMaxPotentialBlockSize(&min_grid_size, &block_size, detect_data_types_kernel)); auto d_column_infos = [&]() { @@ -763,7 +763,7 @@ void collect_keys_info(parse_options_view const& options, { int block_size; int min_grid_size; - CUDA_TRY( + CUDF_CUDA_TRY( cudaOccupancyMaxPotentialBlockSize(&min_grid_size, &block_size, collect_keys_info_kernel)); // Calculate actual block count to use based on records count @@ -772,7 +772,7 @@ void collect_keys_info(parse_options_view const& options, collect_keys_info_kernel<<>>( options, data, row_offsets, keys_cnt, keys_info); - CUDA_TRY(cudaGetLastError()); + CUDF_CHECK_CUDA(stream.value()); } } // namespace gpu diff --git a/cpp/src/io/json/reader_impl.cu b/cpp/src/io/json/reader_impl.cu index 5ca947f3ee5..20eeec267b1 100644 --- a/cpp/src/io/json/reader_impl.cu +++ b/cpp/src/io/json/reader_impl.cu @@ -274,7 +274,7 @@ rmm::device_uvector find_record_starts(json_reader_options const& read // Manually adding an extra row to account for the first row in the file if (reader_opts.get_byte_range_offset() == 0) { find_result_ptr++; - CUDA_TRY(cudaMemsetAsync(rec_starts.data(), 0ull, sizeof(uint64_t), stream.value())); + CUDF_CUDA_TRY(cudaMemsetAsync(rec_starts.data(), 0ull, sizeof(uint64_t), stream.value())); } std::vector chars_to_find{'\n'}; @@ -356,18 +356,18 @@ std::pair, col_map_ptr_type> get_column_names_and_map( uint64_t first_row_len = d_data.size(); if (rec_starts.size() > 1) { // Set first_row_len to the offset of the second row, if it exists - CUDA_TRY(cudaMemcpyAsync(&first_row_len, - rec_starts.data() + 1, - sizeof(uint64_t), - cudaMemcpyDeviceToHost, - stream.value())); + CUDF_CUDA_TRY(cudaMemcpyAsync(&first_row_len, + rec_starts.data() + 1, + sizeof(uint64_t), + cudaMemcpyDeviceToHost, + stream.value())); } std::vector first_row(first_row_len); - CUDA_TRY(cudaMemcpyAsync(first_row.data(), - d_data.data(), - first_row_len * sizeof(char), - cudaMemcpyDeviceToHost, - stream.value())); + CUDF_CUDA_TRY(cudaMemcpyAsync(first_row.data(), + d_data.data(), + first_row_len * sizeof(char), + cudaMemcpyDeviceToHost, + stream.value())); stream.synchronize(); // Determine the row format between: diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 059df283c94..83c23774362 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -431,7 +431,7 @@ rmm::device_buffer reader::impl::decompress_stripe_data( device_span inflate_out_view(inflate_out.data(), num_compressed_blocks); switch (decompressor->GetKind()) { case orc::ZLIB: - CUDA_TRY( + CUDF_CUDA_TRY( gpuinflate(inflate_in.data(), inflate_out.data(), num_compressed_blocks, 0, stream)); break; case orc::SNAPPY: @@ -440,7 +440,7 @@ rmm::device_buffer reader::impl::decompress_stripe_data( num_compressed_blocks}; snappy_decompress(inflate_in_view, inflate_out_view, max_uncomp_block_size, stream); } else { - CUDA_TRY( + CUDF_CUDA_TRY( gpu_unsnap(inflate_in.data(), inflate_out.data(), num_compressed_blocks, stream)); } break; @@ -449,7 +449,7 @@ rmm::device_buffer reader::impl::decompress_stripe_data( decompress_check(inflate_out_view, any_block_failure.device_ptr(), stream); } if (num_uncompressed_blocks > 0) { - CUDA_TRY(gpu_copy_uncompressed_blocks( + CUDF_CUDA_TRY(gpu_copy_uncompressed_blocks( inflate_in.data() + num_compressed_blocks, num_uncompressed_blocks, stream)); } gpu::PostDecompressionReassemble(compinfo.device_ptr(), compinfo.size(), stream); @@ -1129,7 +1129,7 @@ table_with_metadata reader::impl::read(size_type skip_rows, _metadata.per_file_metadata[stripe_source_mapping.source_idx].source->host_read( offset, len); CUDF_EXPECTS(buffer->size() == len, "Unexpected discrepancy in bytes read."); - CUDA_TRY(cudaMemcpyAsync( + CUDF_CUDA_TRY(cudaMemcpyAsync( d_dst, buffer->data(), len, cudaMemcpyHostToDevice, stream.value())); stream.synchronize(); } diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu index 30385d395f1..d0c1cea97a8 100644 --- a/cpp/src/io/orc/writer_impl.cu +++ b/cpp/src/io/orc/writer_impl.cu @@ -706,11 +706,11 @@ std::vector> calculate_aligned_rowgroup_bounds( auto aligned_rgs = hostdevice_2dvector( segmentation.num_rowgroups(), orc_table.num_columns(), stream); - CUDA_TRY(cudaMemcpyAsync(aligned_rgs.base_device_ptr(), - segmentation.rowgroups.base_device_ptr(), - aligned_rgs.count() * sizeof(rowgroup_rows), - cudaMemcpyDefault, - stream.value())); + CUDF_CUDA_TRY(cudaMemcpyAsync(aligned_rgs.base_device_ptr(), + segmentation.rowgroups.base_device_ptr(), + aligned_rgs.count() * sizeof(rowgroup_rows), + cudaMemcpyDefault, + stream.value())); auto const d_stripes = cudf::detail::make_device_uvector_async(segmentation.stripes, stream); // One thread per column, per stripe @@ -1330,7 +1330,7 @@ std::future writer::impl::write_data_stream(gpu::StripeStream const& strm_ if (out_sink_->is_device_write_preferred(length)) { return out_sink_->device_write_async(stream_in, length, stream); } else { - CUDA_TRY( + CUDF_CUDA_TRY( cudaMemcpyAsync(stream_out, stream_in, length, cudaMemcpyDeviceToHost, stream.value())); stream.synchronize(); @@ -1419,10 +1419,10 @@ void pushdown_lists_null_mask(orc_column_view const& col, rmm::cuda_stream_view stream) { // Set all bits - correct unless there's a mismatch between offsets and null mask - CUDA_TRY(cudaMemsetAsync(static_cast(out_mask.data()), - 255, - out_mask.size() * sizeof(bitmask_type), - stream.value())); + CUDF_CUDA_TRY(cudaMemsetAsync(static_cast(out_mask.data()), + 255, + out_mask.size() * sizeof(bitmask_type), + stream.value())); // Reset bits where a null list element has rows in the child column thrust::for_each_n( @@ -1946,7 +1946,7 @@ void writer::impl::write(table_view const& table) } else { return pinned_buffer{[](size_t size) { uint8_t* ptr = nullptr; - CUDA_TRY(cudaMallocHost(&ptr, size)); + CUDF_CUDA_TRY(cudaMallocHost(&ptr, size)); return ptr; }(max_stream_size), cudaFreeHost}; diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu index 33151102aec..56eb34bbe2f 100644 --- a/cpp/src/io/parquet/reader_impl.cu +++ b/cpp/src/io/parquet/reader_impl.cu @@ -1226,24 +1226,24 @@ rmm::device_buffer reader::impl::decompress_page_data( argc++; }); - CUDA_TRY(cudaMemcpyAsync(inflate_in.device_ptr(start_pos), - inflate_in.host_ptr(start_pos), - sizeof(decltype(inflate_in)::value_type) * (argc - start_pos), - cudaMemcpyHostToDevice, - stream.value())); - CUDA_TRY(cudaMemcpyAsync(inflate_out.device_ptr(start_pos), - inflate_out.host_ptr(start_pos), - sizeof(decltype(inflate_out)::value_type) * (argc - start_pos), - cudaMemcpyHostToDevice, - stream.value())); + CUDF_CUDA_TRY(cudaMemcpyAsync(inflate_in.device_ptr(start_pos), + inflate_in.host_ptr(start_pos), + sizeof(decltype(inflate_in)::value_type) * (argc - start_pos), + cudaMemcpyHostToDevice, + stream.value())); + CUDF_CUDA_TRY(cudaMemcpyAsync(inflate_out.device_ptr(start_pos), + inflate_out.host_ptr(start_pos), + sizeof(decltype(inflate_out)::value_type) * (argc - start_pos), + cudaMemcpyHostToDevice, + stream.value())); switch (codec.compression_type) { case parquet::GZIP: - CUDA_TRY(gpuinflate(inflate_in.device_ptr(start_pos), - inflate_out.device_ptr(start_pos), - argc - start_pos, - 1, - stream)) + CUDF_CUDA_TRY(gpuinflate(inflate_in.device_ptr(start_pos), + inflate_out.device_ptr(start_pos), + argc - start_pos, + 1, + stream)) break; case parquet::SNAPPY: if (nvcomp_integration::is_stable_enabled()) { @@ -1252,27 +1252,27 @@ rmm::device_buffer reader::impl::decompress_page_data( codec.max_decompressed_size, stream); } else { - CUDA_TRY(gpu_unsnap(inflate_in.device_ptr(start_pos), - inflate_out.device_ptr(start_pos), - argc - start_pos, - stream)); + CUDF_CUDA_TRY(gpu_unsnap(inflate_in.device_ptr(start_pos), + inflate_out.device_ptr(start_pos), + argc - start_pos, + stream)); } break; case parquet::BROTLI: - CUDA_TRY(gpu_debrotli(inflate_in.device_ptr(start_pos), - inflate_out.device_ptr(start_pos), - debrotli_scratch.data(), - debrotli_scratch.size(), - argc - start_pos, - stream)); + CUDF_CUDA_TRY(gpu_debrotli(inflate_in.device_ptr(start_pos), + inflate_out.device_ptr(start_pos), + debrotli_scratch.data(), + debrotli_scratch.size(), + argc - start_pos, + stream)); break; default: CUDF_FAIL("Unexpected decompression dispatch"); break; } - CUDA_TRY(cudaMemcpyAsync(inflate_out.host_ptr(start_pos), - inflate_out.device_ptr(start_pos), - sizeof(decltype(inflate_out)::value_type) * (argc - start_pos), - cudaMemcpyDeviceToHost, - stream.value())); + CUDF_CUDA_TRY(cudaMemcpyAsync(inflate_out.host_ptr(start_pos), + inflate_out.device_ptr(start_pos), + sizeof(decltype(inflate_out)::value_type) * (argc - start_pos), + cudaMemcpyDeviceToHost, + stream.value())); } } diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu index 872ca6f6656..70a594423c9 100644 --- a/cpp/src/io/parquet/writer_impl.cu +++ b/cpp/src/io/parquet/writer_impl.cu @@ -1123,7 +1123,7 @@ void writer::impl::encode_pages(hostdevice_2dvector& chunks if (nvcomp_integration::is_stable_enabled()) { snappy_compress(comp_in, comp_stat, max_page_uncomp_data_size, stream); } else { - CUDA_TRY(gpu_snap(comp_in.data(), comp_stat.data(), pages_in_batch, stream)); + CUDF_CUDA_TRY(gpu_snap(comp_in.data(), comp_stat.data(), pages_in_batch, stream)); } break; default: break; @@ -1136,11 +1136,11 @@ void writer::impl::encode_pages(hostdevice_2dvector& chunks GatherPages(d_chunks_in_batch.flat_view(), pages, stream); auto h_chunks_in_batch = chunks.host_view().subspan(first_rowgroup, rowgroups_in_batch); - CUDA_TRY(cudaMemcpyAsync(h_chunks_in_batch.data(), - d_chunks_in_batch.data(), - d_chunks_in_batch.flat_view().size_bytes(), - cudaMemcpyDeviceToHost, - stream.value())); + CUDF_CUDA_TRY(cudaMemcpyAsync(h_chunks_in_batch.data(), + d_chunks_in_batch.data(), + d_chunks_in_batch.flat_view().size_bytes(), + cudaMemcpyDeviceToHost, + stream.value())); stream.synchronize(); } @@ -1579,28 +1579,28 @@ void writer::impl::write(table_view const& table, std::vector co // we still need to do a (much smaller) memcpy for the statistics. if (ck.ck_stat_size != 0) { column_chunk_meta.statistics_blob.resize(ck.ck_stat_size); - CUDA_TRY(cudaMemcpyAsync(column_chunk_meta.statistics_blob.data(), - dev_bfr, - ck.ck_stat_size, - cudaMemcpyDeviceToHost, - stream.value())); + CUDF_CUDA_TRY(cudaMemcpyAsync(column_chunk_meta.statistics_blob.data(), + dev_bfr, + ck.ck_stat_size, + cudaMemcpyDeviceToHost, + stream.value())); stream.synchronize(); } } else { if (!host_bfr) { host_bfr = pinned_buffer{[](size_t size) { uint8_t* ptr = nullptr; - CUDA_TRY(cudaMallocHost(&ptr, size)); + CUDF_CUDA_TRY(cudaMallocHost(&ptr, size)); return ptr; }(max_chunk_bfr_size), cudaFreeHost}; } // copy the full data - CUDA_TRY(cudaMemcpyAsync(host_bfr.get(), - dev_bfr, - ck.ck_stat_size + ck.compressed_size, - cudaMemcpyDeviceToHost, - stream.value())); + CUDF_CUDA_TRY(cudaMemcpyAsync(host_bfr.get(), + dev_bfr, + ck.ck_stat_size + ck.compressed_size, + cudaMemcpyDeviceToHost, + stream.value())); stream.synchronize(); out_sink_[p]->host_write(host_bfr.get() + ck.ck_stat_size, ck.compressed_size); if (ck.ck_stat_size != 0) { diff --git a/cpp/src/io/utilities/column_buffer.hpp b/cpp/src/io/utilities/column_buffer.hpp index 34d8307b024..fd510466477 100644 --- a/cpp/src/io/utilities/column_buffer.hpp +++ b/cpp/src/io/utilities/column_buffer.hpp @@ -55,7 +55,7 @@ inline rmm::device_buffer create_data(data_type type, std::size_t data_size = size_of(type) * size; rmm::device_buffer data(data_size, stream, mr); - CUDA_TRY(cudaMemsetAsync(data.data(), 0, data_size, stream.value())); + CUDF_CUDA_TRY(cudaMemsetAsync(data.data(), 0, data_size, stream.value())); return data; } diff --git a/cpp/src/io/utilities/config_utils.cpp b/cpp/src/io/utilities/config_utils.cpp index a6bfb0d888f..ed8c3d6e1e3 100644 --- a/cpp/src/io/utilities/config_utils.cpp +++ b/cpp/src/io/utilities/config_utils.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/io/utilities/config_utils.hpp b/cpp/src/io/utilities/config_utils.hpp index 4b993043dd1..80c20529687 100644 --- a/cpp/src/io/utilities/config_utils.hpp +++ b/cpp/src/io/utilities/config_utils.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/io/utilities/hostdevice_vector.hpp b/cpp/src/io/utilities/hostdevice_vector.hpp index 367bbfcbdfa..5c73cf31428 100644 --- a/cpp/src/io/utilities/hostdevice_vector.hpp +++ b/cpp/src/io/utilities/hostdevice_vector.hpp @@ -54,7 +54,7 @@ class hostdevice_vector { : num_elements(initial_size), max_elements(max_size) { if (max_elements != 0) { - CUDA_TRY(cudaMallocHost(&h_data, sizeof(T) * max_elements)); + CUDF_CUDA_TRY(cudaMallocHost(&h_data, sizeof(T) * max_elements)); d_data.resize(sizeof(T) * max_elements, stream); } } @@ -101,14 +101,14 @@ class hostdevice_vector { void host_to_device(rmm::cuda_stream_view stream, bool synchronize = false) { - CUDA_TRY(cudaMemcpyAsync( + CUDF_CUDA_TRY(cudaMemcpyAsync( d_data.data(), h_data, memory_size(), cudaMemcpyHostToDevice, stream.value())); if (synchronize) { stream.synchronize(); } } void device_to_host(rmm::cuda_stream_view stream, bool synchronize = false) { - CUDA_TRY(cudaMemcpyAsync( + CUDF_CUDA_TRY(cudaMemcpyAsync( h_data, d_data.data(), memory_size(), cudaMemcpyDeviceToHost, stream.value())); if (synchronize) { stream.synchronize(); } } diff --git a/cpp/src/io/utilities/parsing_utils.cu b/cpp/src/io/utilities/parsing_utils.cu index 2db87736848..a03789464cc 100644 --- a/cpp/src/io/utilities/parsing_utils.cu +++ b/cpp/src/io/utilities/parsing_utils.cu @@ -1,3 +1,19 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + #include #include #include @@ -106,7 +122,7 @@ cudf::size_type find_all_from_set(device_span data, { int block_size = 0; // suggested thread count to use int min_grid_size = 0; // minimum block count required - CUDA_TRY( + CUDF_CUDA_TRY( cudaOccupancyMaxPotentialBlockSize(&min_grid_size, &block_size, count_and_set_positions)); const int grid_size = divCeil(data.size(), (size_t)block_size); @@ -131,7 +147,7 @@ cudf::size_type find_all_from_set(host_span data, int block_size = 0; // suggested thread count to use int min_grid_size = 0; // minimum block count required - CUDA_TRY( + CUDF_CUDA_TRY( cudaOccupancyMaxPotentialBlockSize(&min_grid_size, &block_size, count_and_set_positions)); const size_t chunk_count = divCeil(data.size(), max_chunk_bytes); @@ -143,7 +159,7 @@ cudf::size_type find_all_from_set(host_span data, const int grid_size = divCeil(chunk_bits, block_size); // Copy chunk to device - CUDA_TRY( + CUDF_CUDA_TRY( cudaMemcpyAsync(d_chunk.data(), h_chunk, chunk_bytes, cudaMemcpyDefault, stream.value())); for (char key : keys) { diff --git a/cpp/src/jit/cache.cpp b/cpp/src/jit/cache.cpp index 159681eaffc..8228ff6da1f 100644 --- a/cpp/src/jit/cache.cpp +++ b/cpp/src/jit/cache.cpp @@ -77,9 +77,9 @@ std::filesystem::path get_cache_dir() int device; int cc_major; int cc_minor; - CUDA_TRY(cudaGetDevice(&device)); - CUDA_TRY(cudaDeviceGetAttribute(&cc_major, cudaDevAttrComputeCapabilityMajor, device)); - CUDA_TRY(cudaDeviceGetAttribute(&cc_minor, cudaDevAttrComputeCapabilityMinor, device)); + CUDF_CUDA_TRY(cudaGetDevice(&device)); + CUDF_CUDA_TRY(cudaDeviceGetAttribute(&cc_major, cudaDevAttrComputeCapabilityMajor, device)); + CUDF_CUDA_TRY(cudaDeviceGetAttribute(&cc_minor, cudaDevAttrComputeCapabilityMinor, device)); int cc = cc_major * 10 + cc_minor; kernel_cache_path /= std::to_string(cc); diff --git a/cpp/src/join/conditional_join.cu b/cpp/src/join/conditional_join.cu index dc62eeec539..9bf7e6a7a43 100644 --- a/cpp/src/join/conditional_join.cu +++ b/cpp/src/join/conditional_join.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/merge/merge.cu b/cpp/src/merge/merge.cu index 043c04b409e..01a94457b69 100644 --- a/cpp/src/merge/merge.cu +++ b/cpp/src/merge/merge.cu @@ -139,7 +139,7 @@ void materialize_bitmask(column_view const& left_col, } } - CHECK_CUDA(stream.value()); + CUDF_CHECK_CUDA(stream.value()); } struct side_index_generator { @@ -212,7 +212,7 @@ index_vector generate_merged_indices(table_view const& left_table, ineq_op); } - CHECK_CUDA(stream.value()); + CUDF_CHECK_CUDA(stream.value()); return merged_indices; } diff --git a/cpp/src/quantiles/quantiles_util.hpp b/cpp/src/quantiles/quantiles_util.hpp index 171b81152ff..280a42d9e20 100644 --- a/cpp/src/quantiles/quantiles_util.hpp +++ b/cpp/src/quantiles/quantiles_util.hpp @@ -29,7 +29,7 @@ CUDF_HOST_DEVICE inline Result get_array_value(T const* devarr, size_type locati #if defined(__CUDA_ARCH__) result = devarr[location]; #else - CUDA_TRY(cudaMemcpy(&result, devarr + location, sizeof(T), cudaMemcpyDeviceToHost)); + CUDF_CUDA_TRY(cudaMemcpy(&result, devarr + location, sizeof(T), cudaMemcpyDeviceToHost)); #endif return static_cast(result); } diff --git a/cpp/src/reductions/minmax.cu b/cpp/src/reductions/minmax.cu index 61f728447e8..454a8c9d694 100644 --- a/cpp/src/reductions/minmax.cu +++ b/cpp/src/reductions/minmax.cu @@ -216,7 +216,7 @@ struct minmax_functor { // copy the minmax_pair to the host; does not copy the strings using OutputType = minmax_pair; OutputType host_result; - CUDA_TRY(cudaMemcpyAsync( + CUDF_CUDA_TRY(cudaMemcpyAsync( &host_result, dev_result.data(), sizeof(OutputType), cudaMemcpyDeviceToHost, stream.value())); // strings are copied to create the scalars here return {std::make_unique(host_result.min_val, true, stream, mr), @@ -235,7 +235,7 @@ struct minmax_functor { // copy the minmax_pair to the host to call get_element using OutputType = minmax_pair; OutputType host_result; - CUDA_TRY(cudaMemcpyAsync( + CUDF_CUDA_TRY(cudaMemcpyAsync( &host_result, dev_result.data(), sizeof(OutputType), cudaMemcpyDeviceToHost, stream.value())); // get the keys for those indexes auto const keys = dictionary_column_view(col).keys(); diff --git a/cpp/src/reductions/scan/scan_exclusive.cu b/cpp/src/reductions/scan/scan_exclusive.cu index 3b8cc17c4aa..885d7e904b4 100644 --- a/cpp/src/reductions/scan/scan_exclusive.cu +++ b/cpp/src/reductions/scan/scan_exclusive.cu @@ -67,7 +67,7 @@ struct scan_dispatcher { thrust::exclusive_scan( rmm::exec_policy(stream), begin, begin + input.size(), output.data(), identity, Op{}); - CHECK_CUDA(stream.value()); + CUDF_CHECK_CUDA(stream.value()); return output_column; } diff --git a/cpp/src/reductions/scan/scan_inclusive.cu b/cpp/src/reductions/scan/scan_inclusive.cu index 9d07f340ebf..5ffdf1f5c56 100644 --- a/cpp/src/reductions/scan/scan_inclusive.cu +++ b/cpp/src/reductions/scan/scan_inclusive.cu @@ -120,7 +120,7 @@ struct scan_functor { thrust::inclusive_scan( rmm::exec_policy(stream), begin, begin + input_view.size(), result.data(), Op{}); - CHECK_CUDA(stream.value()); + CUDF_CHECK_CUDA(stream.value()); return output_column; } }; diff --git a/cpp/src/rolling/rolling_detail.cuh b/cpp/src/rolling/rolling_detail.cuh index d704b18774f..ca07d60f426 100644 --- a/cpp/src/rolling/rolling_detail.cuh +++ b/cpp/src/rolling/rolling_detail.cuh @@ -1281,7 +1281,7 @@ std::unique_ptr rolling_window_udf(column_view const& input, output->set_null_count(output->size() - device_valid_count.value(stream)); // check the stream for debugging - CHECK_CUDA(stream.value()); + CUDF_CHECK_CUDA(stream.value()); return output; } diff --git a/cpp/src/scalar/scalar.cpp b/cpp/src/scalar/scalar.cpp index 76ec171052a..19bb60ef1a8 100644 --- a/cpp/src/scalar/scalar.cpp +++ b/cpp/src/scalar/scalar.cpp @@ -114,7 +114,7 @@ std::string string_scalar::to_string(rmm::cuda_stream_view stream) const { std::string result; result.resize(_data.size()); - CUDA_TRY(cudaMemcpyAsync( + CUDF_CUDA_TRY(cudaMemcpyAsync( &result[0], _data.data(), _data.size(), cudaMemcpyDeviceToHost, stream.value())); stream.synchronize(); return result; diff --git a/cpp/src/search/search.cu b/cpp/src/search/search.cu index 477666d93ae..29eddf703df 100644 --- a/cpp/src/search/search.cu +++ b/cpp/src/search/search.cu @@ -97,7 +97,8 @@ std::unique_ptr search_ordered(table_view const& t, // Handle empty inputs if (t.num_rows() == 0) { - CUDA_TRY(cudaMemsetAsync(result_out, 0, values.num_rows() * sizeof(size_type), stream.value())); + CUDF_CUDA_TRY( + cudaMemsetAsync(result_out, 0, values.num_rows() * sizeof(size_type), stream.value())); return result; } diff --git a/cpp/src/strings/combine/join.cu b/cpp/src/strings/combine/join.cu index adfd24f1ca2..6a90a605ca3 100644 --- a/cpp/src/strings/combine/join.cu +++ b/cpp/src/strings/combine/join.cu @@ -87,11 +87,11 @@ std::unique_ptr join_strings(strings_column_view const& strings, auto offsets_view = offsets_column->mutable_view(); // set the first entry to 0 and the last entry to bytes int32_t new_offsets[] = {0, static_cast(bytes)}; - CUDA_TRY(cudaMemcpyAsync(offsets_view.data(), - new_offsets, - sizeof(new_offsets), - cudaMemcpyHostToDevice, - stream.value())); + CUDF_CUDA_TRY(cudaMemcpyAsync(offsets_view.data(), + new_offsets, + sizeof(new_offsets), + cudaMemcpyHostToDevice, + stream.value())); // build null mask // only one entry so it is either all valid or all null diff --git a/cpp/src/strings/convert/convert_durations.cu b/cpp/src/strings/convert/convert_durations.cu index ac3c4df6aeb..1a423ef8eec 100644 --- a/cpp/src/strings/convert/convert_durations.cu +++ b/cpp/src/strings/convert/convert_durations.cu @@ -153,11 +153,11 @@ struct format_compiler { // create program in device memory d_items.resize(items.size(), stream); - CUDA_TRY(cudaMemcpyAsync(d_items.data(), - items.data(), - items.size() * sizeof(items[0]), - cudaMemcpyHostToDevice, - stream.value())); + CUDF_CUDA_TRY(cudaMemcpyAsync(d_items.data(), + items.data(), + items.size() * sizeof(items[0]), + cudaMemcpyHostToDevice, + stream.value())); } format_item const* compiled_format_items() { return d_items.data(); } diff --git a/cpp/src/strings/copying/concatenate.cu b/cpp/src/strings/copying/concatenate.cu index 9fa033e9f9a..fedb8d38a08 100644 --- a/cpp/src/strings/copying/concatenate.cu +++ b/cpp/src/strings/copying/concatenate.cu @@ -297,7 +297,7 @@ std::unique_ptr concatenate(host_span columns, cudf::detail::get_value(offsets_child, column_size + column_offset, stream) - bytes_offset; - CUDA_TRY( + CUDF_CUDA_TRY( cudaMemcpyAsync(d_new_chars, d_chars, bytes, cudaMemcpyDeviceToDevice, stream.value())); // get ready for the next column diff --git a/cpp/src/strings/regex/regexec.cu b/cpp/src/strings/regex/regexec.cu index b286812226b..3bcf55cf069 100644 --- a/cpp/src/strings/regex/regexec.cu +++ b/cpp/src/strings/regex/regexec.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -163,7 +163,7 @@ std::unique_ptr> reprog_devic } // copy flat prog to device memory - CUDA_TRY(cudaMemcpyAsync( + CUDF_CUDA_TRY(cudaMemcpyAsync( d_buffer->data(), h_buffer.data(), memsize, cudaMemcpyHostToDevice, stream.value())); // auto deleter = [d_buffer, d_relists](reprog_device* t) { diff --git a/cpp/src/strings/repeat_strings.cu b/cpp/src/strings/repeat_strings.cu index c0673a5e2b5..d496b46bc36 100644 --- a/cpp/src/strings/repeat_strings.cu +++ b/cpp/src/strings/repeat_strings.cu @@ -81,10 +81,10 @@ auto generate_empty_output(strings_column_view const& input, auto offsets_column = make_numeric_column( data_type{type_to_id()}, strings_count + 1, mask_state::UNALLOCATED, stream, mr); - CUDA_TRY(cudaMemsetAsync(offsets_column->mutable_view().template data(), - 0, - offsets_column->size() * sizeof(offset_type), - stream.value())); + CUDF_CUDA_TRY(cudaMemsetAsync(offsets_column->mutable_view().template data(), + 0, + offsets_column->size() * sizeof(offset_type), + stream.value())); return make_strings_column(strings_count, std::move(offsets_column), @@ -264,7 +264,7 @@ auto make_strings_children(Func fn, } else { // Compute the offsets values from the provided output string sizes. auto const string_sizes = output_strings_sizes.value(); - CUDA_TRY(cudaMemsetAsync(d_offsets, 0, sizeof(offset_type), stream.value())); + CUDF_CUDA_TRY(cudaMemsetAsync(d_offsets, 0, sizeof(offset_type), stream.value())); thrust::inclusive_scan(rmm::exec_policy(stream), string_sizes.template begin(), string_sizes.template end(), diff --git a/cpp/src/strings/utilities.cu b/cpp/src/strings/utilities.cu index 825f09c66e6..d7cc72fdfff 100644 --- a/cpp/src/strings/utilities.cu +++ b/cpp/src/strings/utilities.cu @@ -130,9 +130,9 @@ const character_flags_table_type* get_character_flags_table() { return d_character_codepoint_flags.find_or_initialize([&](void) { character_flags_table_type* table = nullptr; - CUDA_TRY(cudaMemcpyToSymbol( + CUDF_CUDA_TRY(cudaMemcpyToSymbol( character_codepoint_flags, g_character_codepoint_flags, sizeof(g_character_codepoint_flags))); - CUDA_TRY(cudaGetSymbolAddress((void**)&table, character_codepoint_flags)); + CUDF_CUDA_TRY(cudaGetSymbolAddress((void**)&table, character_codepoint_flags)); return table; }); } @@ -144,9 +144,9 @@ const character_cases_table_type* get_character_cases_table() { return d_character_cases_table.find_or_initialize([&](void) { character_cases_table_type* table = nullptr; - CUDA_TRY(cudaMemcpyToSymbol( + CUDF_CUDA_TRY(cudaMemcpyToSymbol( character_cases_table, g_character_cases_table, sizeof(g_character_cases_table))); - CUDA_TRY(cudaGetSymbolAddress((void**)&table, character_cases_table)); + CUDF_CUDA_TRY(cudaGetSymbolAddress((void**)&table, character_cases_table)); return table; }); } @@ -158,9 +158,9 @@ const special_case_mapping* get_special_case_mapping_table() { return d_special_case_mappings.find_or_initialize([&](void) { special_case_mapping* table = nullptr; - CUDA_TRY(cudaMemcpyToSymbol( + CUDF_CUDA_TRY(cudaMemcpyToSymbol( character_special_case_mappings, g_special_case_mappings, sizeof(g_special_case_mappings))); - CUDA_TRY(cudaGetSymbolAddress((void**)&table, character_special_case_mappings)); + CUDF_CUDA_TRY(cudaGetSymbolAddress((void**)&table, character_special_case_mappings)); return table; }); } diff --git a/cpp/src/text/edit_distance.cu b/cpp/src/text/edit_distance.cu index 6ec364cc048..b69d735f612 100644 --- a/cpp/src/text/edit_distance.cu +++ b/cpp/src/text/edit_distance.cu @@ -231,7 +231,7 @@ std::unique_ptr edit_distance_matrix(cudf::strings_column_view con cudf::size_type n_upper = (strings_count * (strings_count - 1)) / 2; rmm::device_uvector offsets(n_upper, stream); auto d_offsets = offsets.data(); - CUDA_TRY(cudaMemsetAsync(d_offsets, 0, n_upper * sizeof(cudf::size_type), stream.value())); + CUDF_CUDA_TRY(cudaMemsetAsync(d_offsets, 0, n_upper * sizeof(cudf::size_type), stream.value())); thrust::for_each_n( rmm::exec_policy(stream), thrust::make_counting_iterator(0), diff --git a/cpp/src/text/subword/load_hash_file.cu b/cpp/src/text/subword/load_hash_file.cu index 9ab769f9edd..00094f2de71 100644 --- a/cpp/src/text/subword/load_hash_file.cu +++ b/cpp/src/text/subword/load_hash_file.cu @@ -52,12 +52,12 @@ rmm::device_uvector get_codepoint_metadata(rmm::cuda_st table + cp_section1_end, table + codepoint_metadata_size, codepoint_metadata_default_value); - CUDA_TRY(cudaMemcpyAsync(table, - codepoint_metadata, - cp_section1_end * sizeof(codepoint_metadata[0]), // 1st section - cudaMemcpyHostToDevice, - stream.value())); - CUDA_TRY(cudaMemcpyAsync( + CUDF_CUDA_TRY(cudaMemcpyAsync(table, + codepoint_metadata, + cp_section1_end * sizeof(codepoint_metadata[0]), // 1st section + cudaMemcpyHostToDevice, + stream.value())); + CUDF_CUDA_TRY(cudaMemcpyAsync( table + cp_section2_begin, cp_metadata_917505_917999, (cp_section2_end - cp_section2_begin + 1) * sizeof(codepoint_metadata[0]), // 2nd section @@ -80,24 +80,24 @@ rmm::device_uvector get_aux_codepoint_data(rmm::cuda_st table + aux_section1_end, table + aux_codepoint_data_size, aux_codepoint_default_value); - CUDA_TRY(cudaMemcpyAsync(table, - aux_codepoint_data, - aux_section1_end * sizeof(aux_codepoint_data[0]), // 1st section - cudaMemcpyHostToDevice, - stream.value())); - CUDA_TRY(cudaMemcpyAsync( + CUDF_CUDA_TRY(cudaMemcpyAsync(table, + aux_codepoint_data, + aux_section1_end * sizeof(aux_codepoint_data[0]), // 1st section + cudaMemcpyHostToDevice, + stream.value())); + CUDF_CUDA_TRY(cudaMemcpyAsync( table + aux_section2_begin, aux_cp_data_44032_55203, (aux_section2_end - aux_section2_begin + 1) * sizeof(aux_codepoint_data[0]), // 2nd section cudaMemcpyHostToDevice, stream.value())); - CUDA_TRY(cudaMemcpyAsync( + CUDF_CUDA_TRY(cudaMemcpyAsync( table + aux_section3_begin, aux_cp_data_70475_71099, (aux_section3_end - aux_section3_begin + 1) * sizeof(aux_codepoint_data[0]), // 3rd section cudaMemcpyHostToDevice, stream.value())); - CUDA_TRY(cudaMemcpyAsync( + CUDF_CUDA_TRY(cudaMemcpyAsync( table + aux_section4_begin, aux_cp_data_119134_119232, (aux_section4_end - aux_section4_begin + 1) * sizeof(aux_codepoint_data[0]), // 4th section @@ -236,33 +236,33 @@ std::unique_ptr load_vocabulary_file( cudf::mask_state::UNALLOCATED, stream, mr); - CUDA_TRY(cudaMemcpyAsync(result.table->mutable_view().data(), - table.data(), - table.size() * sizeof(uint64_t), - cudaMemcpyHostToDevice, - stream.value())); + CUDF_CUDA_TRY(cudaMemcpyAsync(result.table->mutable_view().data(), + table.data(), + table.size() * sizeof(uint64_t), + cudaMemcpyHostToDevice, + stream.value())); result.bin_coefficients = cudf::make_numeric_column(cudf::data_type{cudf::type_id::UINT64}, bin_coefficients.size(), cudf::mask_state::UNALLOCATED, stream, mr); - CUDA_TRY(cudaMemcpyAsync(result.bin_coefficients->mutable_view().data(), - bin_coefficients.data(), - bin_coefficients.size() * sizeof(uint64_t), - cudaMemcpyHostToDevice, - stream.value())); + CUDF_CUDA_TRY(cudaMemcpyAsync(result.bin_coefficients->mutable_view().data(), + bin_coefficients.data(), + bin_coefficients.size() * sizeof(uint64_t), + cudaMemcpyHostToDevice, + stream.value())); result.bin_offsets = cudf::make_numeric_column(cudf::data_type{cudf::type_id::UINT16}, bin_offsets.size(), cudf::mask_state::UNALLOCATED, stream, mr); - CUDA_TRY(cudaMemcpyAsync(result.bin_offsets->mutable_view().data(), - bin_offsets.data(), - bin_offsets.size() * sizeof(uint16_t), - cudaMemcpyHostToDevice, - stream.value())); + CUDF_CUDA_TRY(cudaMemcpyAsync(result.bin_offsets->mutable_view().data(), + bin_offsets.data(), + bin_offsets.size() * sizeof(uint16_t), + cudaMemcpyHostToDevice, + stream.value())); auto cp_metadata = detail::get_codepoint_metadata(stream); auto const cp_metadata_size = static_cast(cp_metadata.size()); diff --git a/cpp/src/text/subword/wordpiece_tokenizer.cu b/cpp/src/text/subword/wordpiece_tokenizer.cu index 82bb50c6aaa..7d8df583039 100644 --- a/cpp/src/text/subword/wordpiece_tokenizer.cu +++ b/cpp/src/text/subword/wordpiece_tokenizer.cu @@ -457,7 +457,7 @@ void wordpiece_tokenizer::tokenize(uvector_pair& cps_and_offsets, rmm::cuda_stre num_code_points, device_token_ids.data(), device_tokens_per_word.data()); - CHECK_CUDA(stream.value()); + CUDF_CHECK_CUDA(stream.value()); cudf::detail::grid_1d const grid_mark{static_cast(num_strings + 1), THREADS_PER_BLOCK}; @@ -469,7 +469,7 @@ void wordpiece_tokenizer::tokenize(uvector_pair& cps_and_offsets, rmm::cuda_stre device_start_word_indices, device_end_word_indices, num_strings); - CHECK_CUDA(stream.value()); + CUDF_CHECK_CUDA(stream.value()); // check for special tokens and adjust indices thrust::for_each_n( @@ -512,7 +512,7 @@ void wordpiece_tokenizer::tokenize(uvector_pair& cps_and_offsets, rmm::cuda_stre num_words, device_token_ids.data(), device_tokens_per_word.data()); - CHECK_CUDA(stream.value()); + CUDF_CHECK_CUDA(stream.value()); // Repurpose the input array for the token ids. In the worst case, each code point ends up being a // token so this will always have enough memory to store the contiguous tokens. diff --git a/cpp/src/transform/compute_column.cu b/cpp/src/transform/compute_column.cu index bc3678380be..74433af9f05 100644 --- a/cpp/src/transform/compute_column.cu +++ b/cpp/src/transform/compute_column.cu @@ -102,9 +102,9 @@ std::unique_ptr compute_column(table_view const& table, // Configure kernel parameters auto const& device_expression_data = parser.device_expression_data; int device_id; - CUDA_TRY(cudaGetDevice(&device_id)); + CUDF_CUDA_TRY(cudaGetDevice(&device_id)); int shmem_limit_per_block; - CUDA_TRY( + CUDF_CUDA_TRY( cudaDeviceGetAttribute(&shmem_limit_per_block, cudaDevAttrMaxSharedMemoryPerBlock, device_id)); auto constexpr MAX_BLOCK_SIZE = 128; auto const block_size = @@ -125,7 +125,7 @@ std::unique_ptr compute_column(table_view const& table, <<>>( *table_device, device_expression_data, *mutable_output_device); } - CHECK_CUDA(stream.value()); + CUDF_CHECK_CUDA(stream.value()); return output_column; } diff --git a/cpp/src/transform/row_bit_count.cu b/cpp/src/transform/row_bit_count.cu index 0f06be0149e..744cec90fd9 100644 --- a/cpp/src/transform/row_bit_count.cu +++ b/cpp/src/transform/row_bit_count.cu @@ -505,9 +505,9 @@ std::unique_ptr row_bit_count(table_view const& t, // of memory of size (# input rows * sizeof(row_span) * max_branch_depth). auto const shmem_per_thread = sizeof(row_span) * h_info.max_branch_depth; int device_id; - CUDA_TRY(cudaGetDevice(&device_id)); + CUDF_CUDA_TRY(cudaGetDevice(&device_id)); int shmem_limit_per_block; - CUDA_TRY( + CUDF_CUDA_TRY( cudaDeviceGetAttribute(&shmem_limit_per_block, cudaDevAttrMaxSharedMemoryPerBlock, device_id)); constexpr int max_block_size = 256; auto const block_size = diff --git a/cpp/src/unary/unary_ops.cuh b/cpp/src/unary/unary_ops.cuh index 19d78b010ec..08b68cc0591 100644 --- a/cpp/src/unary/unary_ops.cuh +++ b/cpp/src/unary/unary_ops.cuh @@ -70,7 +70,7 @@ struct launcher { thrust::transform( rmm::exec_policy(stream), input.begin(), input.end(), output_view.begin(), F{}); - CHECK_CUDA(stream.value()); + CUDF_CHECK_CUDA(stream.value()); return output; } diff --git a/cpp/tests/bitmask/bitmask_tests.cpp b/cpp/tests/bitmask/bitmask_tests.cpp index 3a479f0860b..6c2c0716331 100644 --- a/cpp/tests/bitmask/bitmask_tests.cpp +++ b/cpp/tests/bitmask/bitmask_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -88,10 +88,10 @@ rmm::device_uvector make_mask(cudf::size_type size, bool fil return cudf::detail::make_zeroed_device_uvector_sync(size); } else { auto ret = rmm::device_uvector(size, rmm::cuda_stream_default); - CUDA_TRY(cudaMemsetAsync(ret.data(), - ~cudf::bitmask_type{0}, - size * sizeof(cudf::bitmask_type), - rmm::cuda_stream_default.value())); + CUDF_CUDA_TRY(cudaMemsetAsync(ret.data(), + ~cudf::bitmask_type{0}, + size * sizeof(cudf::bitmask_type), + rmm::cuda_stream_default.value())); return ret; } } @@ -530,10 +530,10 @@ void cleanEndWord(rmm::device_buffer& mask, int begin_bit, int end_bit) auto number_of_bits = end_bit - begin_bit; if (number_of_bits % 32 != 0) { cudf::bitmask_type end_mask = 0; - CUDA_TRY(cudaMemcpy( + CUDF_CUDA_TRY(cudaMemcpy( &end_mask, ptr + number_of_mask_words - 1, sizeof(end_mask), cudaMemcpyDeviceToHost)); end_mask = end_mask & ((1 << (number_of_bits % 32)) - 1); - CUDA_TRY(cudaMemcpy( + CUDF_CUDA_TRY(cudaMemcpy( ptr + number_of_mask_words - 1, &end_mask, sizeof(end_mask), cudaMemcpyHostToDevice)); } } diff --git a/cpp/tests/copying/concatenate_tests.cu b/cpp/tests/copying/concatenate_tests.cu index 93e4e588e0e..4d76008fd13 100644 --- a/cpp/tests/copying/concatenate_tests.cu +++ b/cpp/tests/copying/concatenate_tests.cu @@ -62,9 +62,9 @@ struct TypedColumnTest : public cudf::test::BaseFixture { std::iota(h_data.begin(), h_data.end(), char{0}); std::vector h_mask(mask.size()); std::iota(h_mask.begin(), h_mask.end(), char{0}); - CUDA_TRY(cudaMemcpyAsync( + CUDF_CUDA_TRY(cudaMemcpyAsync( typed_data, h_data.data(), data.size(), cudaMemcpyHostToDevice, stream.value())); - CUDA_TRY(cudaMemcpyAsync( + CUDF_CUDA_TRY(cudaMemcpyAsync( typed_mask, h_mask.data(), mask.size(), cudaMemcpyHostToDevice, stream.value())); stream.synchronize(); } diff --git a/cpp/tests/device_atomics/device_atomics_test.cu b/cpp/tests/device_atomics/device_atomics_test.cu index 581268f26f4..1067366d010 100644 --- a/cpp/tests/device_atomics/device_atomics_test.cu +++ b/cpp/tests/device_atomics/device_atomics_test.cu @@ -155,7 +155,7 @@ struct AtomicsTest : public cudf::test::BaseFixture { auto host_result = cudf::detail::make_host_vector_sync(dev_result); - CHECK_CUDA(rmm::cuda_stream_default.value()); + CUDF_CHECK_CUDA(rmm::cuda_stream_default.value()); if (!is_timestamp_sum()) { EXPECT_EQ(host_result[0], exact[0]) << "atomicAdd test failed"; @@ -302,7 +302,7 @@ struct AtomicsBitwiseOpTest : public cudf::test::BaseFixture { auto host_result = cudf::detail::make_host_vector_sync(dev_result); - CHECK_CUDA(rmm::cuda_stream_default.value()); + CUDF_CHECK_CUDA(rmm::cuda_stream_default.value()); // print_exact(exact, "exact"); // print_exact(host_result.data(), "result"); diff --git a/cpp/tests/error/error_handling_test.cu b/cpp/tests/error/error_handling_test.cu index da9509e94a6..4327a8b694b 100644 --- a/cpp/tests/error/error_handling_test.cu +++ b/cpp/tests/error/error_handling_test.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020, NVIDIA CORPORATION. + * Copyright (c) 2018-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -36,28 +36,28 @@ TEST(ExpectsTest, TryCatch) TEST(CudaTryTest, Error) { - CUDA_EXPECT_THROW_MESSAGE(CUDA_TRY(cudaErrorLaunchFailure), + CUDA_EXPECT_THROW_MESSAGE(CUDF_CUDA_TRY(cudaErrorLaunchFailure), "cudaErrorLaunchFailure unspecified launch failure"); } -TEST(CudaTryTest, Success) { EXPECT_NO_THROW(CUDA_TRY(cudaSuccess)); } +TEST(CudaTryTest, Success) { EXPECT_NO_THROW(CUDF_CUDA_TRY(cudaSuccess)); } TEST(CudaTryTest, TryCatch) { - CUDA_EXPECT_THROW_MESSAGE(CUDA_TRY(cudaErrorMemoryAllocation), + CUDA_EXPECT_THROW_MESSAGE(CUDF_CUDA_TRY(cudaErrorMemoryAllocation), "cudaErrorMemoryAllocation out of memory"); } -TEST(StreamCheck, success) { EXPECT_NO_THROW(CHECK_CUDA(0)); } +TEST(StreamCheck, success) { EXPECT_NO_THROW(CUDF_CHECK_CUDA(0)); } namespace { // Some silly kernel that will cause an error void __global__ test_kernel(int* data) { data[threadIdx.x] = threadIdx.x; } } // namespace -// In a release build and without explicit synchronization, CHECK_CUDA may +// In a release build and without explicit synchronization, CUDF_CHECK_CUDA may // or may not fail on erroneous asynchronous CUDA calls. Invoke // cudaStreamSynchronize to guarantee failure on error. In a non-release build, -// CHECK_CUDA deterministically fails on erroneous asynchronous CUDA +// CUDF_CHECK_CUDA deterministically fails on erroneous asynchronous CUDA // calls. TEST(StreamCheck, FailedKernel) { @@ -67,7 +67,7 @@ TEST(StreamCheck, FailedKernel) #ifdef NDEBUG stream.synchronize(); #endif - EXPECT_THROW(CHECK_CUDA(stream.value()), cudf::cuda_error); + EXPECT_THROW(CUDF_CHECK_CUDA(stream.value()), cudf::cuda_error); } TEST(StreamCheck, CatchFailedKernel) @@ -78,7 +78,7 @@ TEST(StreamCheck, CatchFailedKernel) #ifndef NDEBUG stream.synchronize(); #endif - CUDA_EXPECT_THROW_MESSAGE(CHECK_CUDA(stream.value()), + CUDA_EXPECT_THROW_MESSAGE(CUDF_CHECK_CUDA(stream.value()), "cudaErrorInvalidConfiguration " "invalid configuration argument"); } diff --git a/cpp/tests/io/parquet_test.cpp b/cpp/tests/io/parquet_test.cpp index 1f4a8a7e508..cd0aab3caeb 100644 --- a/cpp/tests/io/parquet_test.cpp +++ b/cpp/tests/io/parquet_test.cpp @@ -1101,11 +1101,11 @@ class custom_test_data_sink : public cudf::io::data_sink { { return std::async(std::launch::deferred, [=] { char* ptr = nullptr; - CUDA_TRY(cudaMallocHost(&ptr, size)); - CUDA_TRY(cudaMemcpyAsync(ptr, gpu_data, size, cudaMemcpyDeviceToHost, stream.value())); + CUDF_CUDA_TRY(cudaMallocHost(&ptr, size)); + CUDF_CUDA_TRY(cudaMemcpyAsync(ptr, gpu_data, size, cudaMemcpyDeviceToHost, stream.value())); stream.synchronize(); outfile_.write(ptr, size); - CUDA_TRY(cudaFreeHost(ptr)); + CUDF_CUDA_TRY(cudaFreeHost(ptr)); }); } @@ -2166,11 +2166,11 @@ class custom_test_memmap_sink : public cudf::io::data_sink { { return std::async(std::launch::deferred, [=] { char* ptr = nullptr; - CUDA_TRY(cudaMallocHost(&ptr, size)); - CUDA_TRY(cudaMemcpyAsync(ptr, gpu_data, size, cudaMemcpyDeviceToHost, stream.value())); + CUDF_CUDA_TRY(cudaMallocHost(&ptr, size)); + CUDF_CUDA_TRY(cudaMemcpyAsync(ptr, gpu_data, size, cudaMemcpyDeviceToHost, stream.value())); stream.synchronize(); mm_writer->host_write(ptr, size); - CUDA_TRY(cudaFreeHost(ptr)); + CUDF_CUDA_TRY(cudaFreeHost(ptr)); }); } diff --git a/cpp/tests/join/join_tests.cpp b/cpp/tests/join/join_tests.cpp index 57041e448a2..f560ce7f20c 100644 --- a/cpp/tests/join/join_tests.cpp +++ b/cpp/tests/join/join_tests.cpp @@ -1423,7 +1423,7 @@ TEST_F(JoinTest, HashJoinLargeOutputSize) // self-join a table of zeroes to generate an output row count that would overflow int32_t std::size_t col_size = 65567; rmm::device_buffer zeroes(col_size * sizeof(int32_t), rmm::cuda_stream_default); - CUDA_TRY(cudaMemsetAsync(zeroes.data(), 0, zeroes.size(), rmm::cuda_stream_default.value())); + CUDF_CUDA_TRY(cudaMemsetAsync(zeroes.data(), 0, zeroes.size(), rmm::cuda_stream_default.value())); cudf::column_view col_zeros(cudf::data_type{cudf::type_id::INT32}, col_size, zeroes.data()); cudf::table_view tview{{col_zeros}}; cudf::hash_join hash_join(tview, cudf::null_equality::UNEQUAL); diff --git a/cpp/tests/join/mixed_join_tests.cu b/cpp/tests/join/mixed_join_tests.cu index f9ed22150b7..df5b1f5c14a 100644 --- a/cpp/tests/join/mixed_join_tests.cu +++ b/cpp/tests/join/mixed_join_tests.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/tests/scalar/scalar_device_view_test.cu b/cpp/tests/scalar/scalar_device_view_test.cu index ee4c878726f..30c843a91c4 100644 --- a/cpp/tests/scalar/scalar_device_view_test.cu +++ b/cpp/tests/scalar/scalar_device_view_test.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -60,13 +60,13 @@ TYPED_TEST(TypedScalarDeviceViewTest, Value) rmm::device_scalar result{rmm::cuda_stream_default}; test_set_value<<<1, 1>>>(scalar_device_view, scalar_device_view1); - CHECK_CUDA(0); + CUDF_CHECK_CUDA(0); EXPECT_EQ(s1.value(), value); EXPECT_TRUE(s1.is_valid()); test_value<<<1, 1>>>(scalar_device_view, scalar_device_view1, result.data()); - CHECK_CUDA(0); + CUDF_CHECK_CUDA(0); EXPECT_TRUE(result.value(rmm::cuda_stream_default)); } @@ -85,7 +85,7 @@ TYPED_TEST(TypedScalarDeviceViewTest, ConstructNull) rmm::device_scalar result{rmm::cuda_stream_default}; test_null<<<1, 1>>>(scalar_device_view, result.data()); - CHECK_CUDA(0); + CUDF_CHECK_CUDA(0); EXPECT_FALSE(result.value(rmm::cuda_stream_default)); } @@ -105,7 +105,7 @@ TYPED_TEST(TypedScalarDeviceViewTest, SetNull) EXPECT_TRUE(s.is_valid()); test_setnull<<<1, 1>>>(scalar_device_view); - CHECK_CUDA(0); + CUDF_CHECK_CUDA(0); EXPECT_FALSE(s.is_valid()); } @@ -131,7 +131,7 @@ TEST_F(StringScalarDeviceViewTest, Value) auto value_v = cudf::detail::make_device_uvector_sync(value); test_string_value<<<1, 1>>>(scalar_device_view, value_v.data(), value.size(), result.data()); - CHECK_CUDA(0); + CUDF_CHECK_CUDA(0); EXPECT_TRUE(result.value(rmm::cuda_stream_default)); } diff --git a/cpp/tests/sort/sort_test.cpp b/cpp/tests/sort/sort_test.cpp index a6e1a25ec17..b9ea7a0b078 100644 --- a/cpp/tests/sort/sort_test.cpp +++ b/cpp/tests/sort/sort_test.cpp @@ -86,7 +86,7 @@ TYPED_TEST(Sort, WithNullMax) // the rest of the values are equivalent and yields random sorted order. auto to_host = [](column_view const& col) { thrust::host_vector h_data(col.size()); - CUDA_TRY(cudaMemcpy( + CUDF_CUDA_TRY(cudaMemcpy( h_data.data(), col.data(), h_data.size() * sizeof(int32_t), cudaMemcpyDefault)); return h_data; }; @@ -124,7 +124,7 @@ TYPED_TEST(Sort, WithNullMin) // the rest of the values are equivalent and yields random sorted order. auto to_host = [](column_view const& col) { thrust::host_vector h_data(col.size()); - CUDA_TRY(cudaMemcpy( + CUDF_CUDA_TRY(cudaMemcpy( h_data.data(), col.data(), h_data.size() * sizeof(int32_t), cudaMemcpyDefault)); return h_data; }; @@ -160,7 +160,7 @@ TYPED_TEST(Sort, WithMixedNullOrder) // the rest of the values are equivalent and yields random sorted order. auto to_host = [](column_view const& col) { thrust::host_vector h_data(col.size()); - CUDA_TRY(cudaMemcpy( + CUDF_CUDA_TRY(cudaMemcpy( h_data.data(), col.data(), h_data.size() * sizeof(int32_t), cudaMemcpyDefault)); return h_data; }; diff --git a/cpp/tests/sort/stable_sort_tests.cpp b/cpp/tests/sort/stable_sort_tests.cpp index b6b7495136e..ee43c9e7b4b 100644 --- a/cpp/tests/sort/stable_sort_tests.cpp +++ b/cpp/tests/sort/stable_sort_tests.cpp @@ -94,7 +94,7 @@ TYPED_TEST(StableSort, WithNullMax) // the rest of the values are equivalent and yields random sorted order. auto to_host = [](column_view const& col) { thrust::host_vector h_data(col.size()); - CUDA_TRY(cudaMemcpy( + CUDF_CUDA_TRY(cudaMemcpy( h_data.data(), col.data(), h_data.size() * sizeof(int32_t), cudaMemcpyDefault)); return h_data; }; @@ -130,7 +130,7 @@ TYPED_TEST(StableSort, WithNullMin) // the rest of the values are equivalent and yields random sorted order. auto to_host = [](column_view const& col) { thrust::host_vector h_data(col.size()); - CUDA_TRY(cudaMemcpy( + CUDF_CUDA_TRY(cudaMemcpy( h_data.data(), col.data(), h_data.size() * sizeof(int32_t), cudaMemcpyDefault)); return h_data; }; diff --git a/cpp/tests/strings/factories_test.cu b/cpp/tests/strings/factories_test.cu index 0ba4b268c70..6861737bfb5 100644 --- a/cpp/tests/strings/factories_test.cu +++ b/cpp/tests/strings/factories_test.cu @@ -78,7 +78,7 @@ TEST_F(StringsFactoriesTest, CreateColumnFromPair) h_offsets[idx + 1] = offset; } auto d_strings = cudf::detail::make_device_uvector_sync(strings); - CUDA_TRY(cudaMemcpy(d_buffer.data(), h_buffer.data(), memsize, cudaMemcpyHostToDevice)); + CUDF_CUDA_TRY(cudaMemcpy(d_buffer.data(), h_buffer.data(), memsize, cudaMemcpyHostToDevice)); auto column = cudf::make_strings_column(d_strings); EXPECT_EQ(column->type(), cudf::data_type{cudf::type_id::STRING}); EXPECT_EQ(column->null_count(), nulls); diff --git a/cpp/tests/strings/integers_tests.cpp b/cpp/tests/strings/integers_tests.cpp index 7f8a31ef9bb..5802a1ddc0a 100644 --- a/cpp/tests/strings/integers_tests.cpp +++ b/cpp/tests/strings/integers_tests.cpp @@ -302,10 +302,10 @@ TYPED_TEST(StringsIntegerConvertTest, FromToInteger) auto integers = cudf::make_numeric_column(cudf::data_type{cudf::type_to_id()}, (cudf::size_type)d_integers.size()); auto integers_view = integers->mutable_view(); - CUDA_TRY(cudaMemcpy(integers_view.data(), - d_integers.data(), - d_integers.size() * sizeof(TypeParam), - cudaMemcpyDeviceToDevice)); + CUDF_CUDA_TRY(cudaMemcpy(integers_view.data(), + d_integers.data(), + d_integers.size() * sizeof(TypeParam), + cudaMemcpyDeviceToDevice)); integers_view.set_null_count(0); // convert to strings diff --git a/cpp/tests/types/type_dispatcher_test.cu b/cpp/tests/types/type_dispatcher_test.cu index dca80b597c0..d8b2a736bde 100644 --- a/cpp/tests/types/type_dispatcher_test.cu +++ b/cpp/tests/types/type_dispatcher_test.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -71,7 +71,7 @@ TYPED_TEST(TypedDispatcherTest, DeviceDispatch) { auto result = cudf::detail::make_zeroed_device_uvector_sync(1); dispatch_test_kernel<<<1, 1>>>(cudf::type_to_id(), result.data()); - CUDA_TRY(cudaDeviceSynchronize()); + CUDF_CUDA_TRY(cudaDeviceSynchronize()); EXPECT_EQ(true, result.front_element(rmm::cuda_stream_default)); } @@ -132,7 +132,7 @@ TYPED_TEST(TypedDoubleDispatcherTest, DeviceDoubleDispatch) auto result = cudf::detail::make_zeroed_device_uvector_sync(1); double_dispatch_test_kernel<<<1, 1>>>( cudf::type_to_id(), cudf::type_to_id(), result.data()); - CUDA_TRY(cudaDeviceSynchronize()); + CUDF_CUDA_TRY(cudaDeviceSynchronize()); EXPECT_EQ(true, result.front_element(rmm::cuda_stream_default)); } diff --git a/cpp/tests/utilities/column_utilities.cu b/cpp/tests/utilities/column_utilities.cu index 68626c2d4d3..015178f8c7c 100644 --- a/cpp/tests/utilities/column_utilities.cu +++ b/cpp/tests/utilities/column_utilities.cu @@ -824,16 +824,16 @@ std::vector bitmask_to_host(cudf::column_view const& c) auto num_bitmasks = num_bitmask_words(c.size()); std::vector host_bitmask(num_bitmasks); if (c.offset() == 0) { - CUDA_TRY(cudaMemcpy(host_bitmask.data(), - c.null_mask(), - num_bitmasks * sizeof(bitmask_type), - cudaMemcpyDeviceToHost)); + CUDF_CUDA_TRY(cudaMemcpy(host_bitmask.data(), + c.null_mask(), + num_bitmasks * sizeof(bitmask_type), + cudaMemcpyDeviceToHost)); } else { auto mask = copy_bitmask(c.null_mask(), c.offset(), c.offset() + c.size()); - CUDA_TRY(cudaMemcpy(host_bitmask.data(), - mask.data(), - num_bitmasks * sizeof(bitmask_type), - cudaMemcpyDeviceToHost)); + CUDF_CUDA_TRY(cudaMemcpy(host_bitmask.data(), + mask.data(), + num_bitmasks * sizeof(bitmask_type), + cudaMemcpyDeviceToHost)); } return host_bitmask; diff --git a/docs/cudf/source/conf.py b/docs/cudf/source/conf.py index 562501c01c6..dbdf8e59e6a 100644 --- a/docs/cudf/source/conf.py +++ b/docs/cudf/source/conf.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# Copyright (c) 2018-2021, NVIDIA CORPORATION. +# Copyright (c) 2018-2022, NVIDIA CORPORATION. # # cudf documentation build configuration file, created by # sphinx-quickstart on Wed May 3 10:59:22 2017. diff --git a/docs/cudf/source/user_guide/10min-cudf-cupy.ipynb b/docs/cudf/source/user_guide/10min-cudf-cupy.ipynb index 169eec07914..b34bbd3f193 100644 --- a/docs/cudf/source/user_guide/10min-cudf-cupy.ipynb +++ b/docs/cudf/source/user_guide/10min-cudf-cupy.ipynb @@ -45,23 +45,9 @@ "name": "stdout", "output_type": "stream", "text": [ - "158 µs ± 306 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n", - "419 µs ± 149 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/opt/conda/envs/rapids/lib/python3.7/site-packages/cudf/core/dataframe.py:3044: FutureWarning: The as_gpu_matrix method will be removed in a future cuDF release. Consider using `to_cupy` instead.\n", - " FutureWarning,\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "339 µs ± 282 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n" + "167 µs ± 789 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n", + "497 µs ± 1.19 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n", + "502 µs ± 1.34 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n" ] } ], @@ -72,9 +58,9 @@ " 'c':range(1000, nelem + 1000)}\n", " )\n", "\n", - "%timeit arr_cupy = cp.fromDlpack(df.to_dlpack())\n", + "%timeit arr_cupy = cp.from_dlpack(df.to_dlpack())\n", "%timeit arr_cupy = df.values\n", - "%timeit arr_cupy = cp.asarray(df.as_gpu_matrix())" + "%timeit arr_cupy = df.to_cupy()" ] }, { @@ -100,7 +86,7 @@ } ], "source": [ - "arr_cupy = cp.fromDlpack(df.to_dlpack())\n", + "arr_cupy = cp.from_dlpack(df.to_dlpack())\n", "arr_cupy" ] }, @@ -131,9 +117,9 @@ "name": "stdout", "output_type": "stream", "text": [ - "45.4 µs ± 63.9 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n", - "127 µs ± 351 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n", - "135 µs ± 5.24 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n" + "75.2 µs ± 117 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n", + "185 µs ± 630 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n", + "169 µs ± 1.24 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n" ] } ], @@ -141,7 +127,7 @@ "col = 'a'\n", "\n", "%timeit cola_cupy = cp.asarray(df[col])\n", - "%timeit cola_cupy = cp.fromDlpack(df[col].to_dlpack())\n", + "%timeit cola_cupy = cp.from_dlpack(df[col].to_dlpack())\n", "%timeit cola_cupy = df[col].values" ] }, @@ -270,7 +256,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "15.5 ms ± 7.55 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" + "22 ms ± 26.6 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" ] } ], @@ -524,7 +510,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "7.26 ms ± 3.32 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" + "8.6 ms ± 33.8 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" ] } ], @@ -544,7 +530,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "4.87 ms ± 2.08 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" + "5.56 ms ± 37.6 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" ] } ], @@ -1037,7 +1023,7 @@ } ], "source": [ - "new_arr = cp.fromDlpack(reshaped_df.to_dlpack())\n", + "new_arr = cp.from_dlpack(reshaped_df.to_dlpack())\n", "new_arr.sum(axis=1)" ] }, @@ -1075,7 +1061,7 @@ " if sparseformat == 'row':\n", " _sparse_constructor = cp.sparse.csr_matrix\n", "\n", - " return _sparse_constructor(cp.fromDlpack(data.to_dlpack()))" + " return _sparse_constructor(cp.from_dlpack(data.to_dlpack()))" ] }, { @@ -1154,134 +1140,141 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", " \n", " \n", - " \n", - " \n", " \n", - " \n", - " \n", " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", " \n", " \n", - " \n", - " \n", - " \n", " \n", " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", " \n", " \n", - " \n", " \n", - " \n", " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", " \n", " \n", - " \n", + " \n", " \n", - " \n", + " \n", + " \n", " \n", " \n", + " \n", + " \n", + " \n", " \n", " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", " \n", + " \n", + " \n", " \n", - " \n", " \n", " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", + " \n", " \n", " \n", "
0.00.00.00.00.04.7044333.3800140.00.0000000.00.011.0301360.00.0000000.00.00.0000000.0-1.1622750.0000000.0000000.0000000.0000000.000005.7268060.00.00.00.000000
10.00.00.00.00.00.0000000.00.0000000.00.00.00.0000000.00.011.4604030.00.0000000.0000000.0000000.0000005.9178460.0000005.908860.0000000.00.00.00.000000
20.00.00.00.00.00.0000000.00.0000000.00.00.00.0000000.00.00.0000000.00.0000000.4073920.0000000.0000000.0000006.6465640.000000.0000000.00.00.03.399164
30.00.00.00.00.00.0000000.00.0000000.00.00.00.0000000.00.014.0921000.0000000.00.3787810.4209530.0000000.0000000.000000.0000000.00.00.00.000000
40.00.00.00.00.00.0000000.08.2994250.00.00.02.0964010.00.1092422.5417980.00.0715638.2233870.0000000.00.0000000.0000000.0000000.0000010.7446240.00.00.00.000000
\n", "" ], "text/plain": [ - " a0 a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 a11 \\\n", - "0 0.0 0.0 0.0 0.0 0.0 4.704433 0.0 0.000000 0.0 0.0 0.0 0.000000 \n", - "1 0.0 0.0 0.0 0.0 0.0 0.000000 0.0 0.000000 0.0 0.0 0.0 0.000000 \n", - "2 0.0 0.0 0.0 0.0 0.0 0.000000 0.0 0.000000 0.0 0.0 0.0 0.000000 \n", - "3 0.0 0.0 0.0 0.0 0.0 0.000000 0.0 0.000000 0.0 0.0 0.0 0.000000 \n", - "4 0.0 0.0 0.0 0.0 0.0 0.000000 0.0 8.299425 0.0 0.0 0.0 2.096401 \n", + " a0 a1 a2 a3 a4 a5 a6 a7 a8 \\\n", + "0 0.0 0.0 0.0 3.380014 0.0 0.000000 11.030136 0.0 0.000000 \n", + "1 0.0 0.0 0.0 0.000000 0.0 0.000000 0.000000 0.0 0.000000 \n", + "2 0.0 0.0 0.0 0.000000 0.0 0.000000 0.000000 0.0 0.000000 \n", + "3 0.0 0.0 0.0 0.000000 0.0 0.000000 0.000000 0.0 14.092100 \n", + "4 0.0 0.0 0.0 0.000000 0.0 0.109242 2.541798 0.0 0.071563 \n", + "\n", + " a9 a10 a11 a12 a13 a14 a15 a16 \\\n", + "0 0.000000 0.000000 0.000000 0.000000 0.000000 0.00000 5.726806 0.0 \n", + "1 0.000000 0.000000 0.000000 5.917846 0.000000 5.90886 0.000000 0.0 \n", + "2 0.000000 0.000000 0.000000 0.000000 6.646564 0.00000 0.000000 0.0 \n", + "3 0.000000 0.378781 0.420953 0.000000 0.000000 0.00000 0.000000 0.0 \n", + "4 8.223387 0.000000 0.000000 0.000000 0.000000 0.00000 10.744624 0.0 \n", "\n", - " a12 a13 a14 a15 a16 a17 a18 a19 \n", - "0 0.0 0.0 0.000000 0.0 -1.162275 0.000000 0.0 0.0 \n", - "1 0.0 0.0 11.460403 0.0 0.000000 0.000000 0.0 0.0 \n", - "2 0.0 0.0 0.000000 0.0 0.000000 0.407392 0.0 0.0 \n", - "3 0.0 0.0 0.000000 0.0 0.000000 0.000000 0.0 0.0 \n", - "4 0.0 0.0 0.000000 0.0 0.000000 0.000000 0.0 0.0 " + " a17 a18 a19 \n", + "0 0.0 0.0 0.000000 \n", + "1 0.0 0.0 0.000000 \n", + "2 0.0 0.0 3.399164 \n", + "3 0.0 0.0 0.000000 \n", + "4 0.0 0.0 0.000000 " ] }, "execution_count": 20, @@ -1302,57 +1295,57 @@ "name": "stdout", "output_type": "stream", "text": [ - " (41, 0)\t8.237732918475851\n", - " (49, 0)\t-4.161219849238402\n", - " (70, 0)\t-1.646588718395583\n", - " (80, 0)\t11.607048248828713\n", - " (81, 0)\t11.387095517746493\n", - " (105, 0)\t4.059008225609349\n", - " (107, 0)\t9.299030876304984\n", - " (108, 0)\t10.652087054434446\n", - " (127, 0)\t2.442578989241219\n", - " (133, 0)\t-0.7674141633646347\n", - " (135, 0)\t-6.091151515788713\n", - " (145, 0)\t2.968949150266586\n", - " (148, 0)\t5.649147779687932\n", - " (158, 0)\t7.7809955768930745\n", - " (166, 0)\t5.801884262747882\n", - " (175, 0)\t7.3205065025042\n", - " (181, 0)\t13.704683370645277\n", - " (204, 0)\t15.915619596241733\n", - " (207, 0)\t-0.2205888963107494\n", - " (209, 0)\t3.565578265020142\n", - " (215, 0)\t4.1493767841754154\n", - " (231, 0)\t3.4286524053271803\n", - " (233, 0)\t6.021200022977307\n", - " (241, 0)\t4.247163658236771\n", - " (249, 0)\t1.8502158424149273\n", + " (896, 0)\t0.7194778152522069\n", + " (385, 0)\t5.061243119202521\n", + " (899, 0)\t8.032932656540671\n", + " (1028, 0)\t10.072155866140903\n", + " (133, 0)\t13.27741318265092\n", + " (518, 0)\t2.242099518010387\n", + " (647, 0)\t6.487369007371155\n", + " (776, 0)\t5.621989952370181\n", + " (9, 0)\t8.833796529523534\n", + " (521, 0)\t7.719749292928572\n", + " (777, 0)\t7.4610987015782975\n", + " (394, 0)\t10.09026095476732\n", + " (140, 0)\t2.974228870142501\n", + " (653, 0)\t4.520704347545524\n", + " (1037, 0)\t4.53896886415556\n", + " (400, 0)\t4.0198547103826705\n", + " (401, 0)\t-0.2557920447399875\n", + " (1041, 0)\t1.8627471984893114\n", + " (146, 0)\t9.834516073722536\n", + " (1042, 0)\t7.850006814937681\n", + " (275, 0)\t1.5747512513374389\n", + " (662, 0)\t6.717038670488377\n", + " (25, 0)\t7.311464380885098\n", + " (281, 0)\t3.5147599499072024\n", + " (409, 0)\t1.121874214291239\n", " :\t:\n", - " (9729, 19)\t7.226429647432215\n", - " (9762, 19)\t-0.6042314722021014\n", - " (9764, 19)\t-1.4827372788735615\n", - " (9769, 19)\t4.140245505599609\n", - " (9776, 19)\t-0.3441145182655059\n", - " (9781, 19)\t-0.235562982602191\n", - " (9782, 19)\t2.1458765970993223\n", - " (9791, 19)\t7.219427633840467\n", - " (9803, 19)\t6.6874487362355115\n", - " (9807, 19)\t5.1769501512294465\n", - " (9823, 19)\t-1.1040045399744103\n", - " (9828, 19)\t3.074156937033751\n", - " (9849, 19)\t0.4663962936122451\n", - " (9851, 19)\t10.302861735090476\n", - " (9862, 19)\t1.9377857550195872\n", - " (9893, 19)\t8.991541850619656\n", - " (9896, 19)\t-0.9003118390325282\n", - " (9919, 19)\t2.4984693551284587\n", - " (9934, 19)\t1.6161057487404191\n", - " (9944, 19)\t6.063387997554039\n", - " (9945, 19)\t11.038782286791717\n", - " (9954, 19)\t13.750186699958661\n", - " (9979, 19)\t0.9225731640357893\n", - " (9995, 19)\t-1.775155437069923\n", - " (9998, 19)\t12.265785237649636\n" + " (8290, 19)\t19.23532976720017\n", + " (8679, 19)\t3.9092712623274224\n", + " (8935, 19)\t0.8411008847310036\n", + " (9063, 19)\t12.010953214709328\n", + " (9319, 19)\t3.470064419440258\n", + " (8683, 19)\t14.397876149427695\n", + " (8300, 19)\t10.524275022546979\n", + " (8301, 19)\t0.6266917401191829\n", + " (8557, 19)\t-0.4554588974911311\n", + " (9197, 19)\t12.379896820812874\n", + " (8304, 19)\t1.3276250981825033\n", + " (8563, 19)\t-1.579631321204169\n", + " (8442, 19)\t6.881252269650868\n", + " (8315, 19)\t0.5811637925849389\n", + " (8575, 19)\t15.52855242553137\n", + " (9343, 19)\t-0.12679091919544638\n", + " (9569, 19)\t9.316119794827424\n", + " (9570, 19)\t10.791371431930969\n", + " (9443, 19)\t4.7035396189880645\n", + " (9452, 19)\t-0.9924476181662789\n", + " (9713, 19)\t-3.2038209275781346\n", + " (9719, 19)\t0.6578276176100656\n", + " (9847, 19)\t9.57555910183088\n", + " (9724, 19)\t0.990362915454171\n", + " (9855, 19)\t1.153449284622368\n" ] } ], @@ -1373,7 +1366,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -1387,7 +1380,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.12" + "version": "3.9.7" } }, "nbformat": 4, diff --git a/fetch_rapids.cmake b/fetch_rapids.cmake index 17ba1c6d53f..2382abe38a9 100644 --- a/fetch_rapids.cmake +++ b/fetch_rapids.cmake @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2018-2021, NVIDIA CORPORATION. +# Copyright (c) 2018-2022, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at diff --git a/java/src/main/java/ai/rapids/cudf/nvcomp/LZ4Compressor.java b/java/src/main/java/ai/rapids/cudf/nvcomp/LZ4Compressor.java deleted file mode 100644 index 67a770f1346..00000000000 --- a/java/src/main/java/ai/rapids/cudf/nvcomp/LZ4Compressor.java +++ /dev/null @@ -1,126 +0,0 @@ -/* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package ai.rapids.cudf.nvcomp; - -import ai.rapids.cudf.Cuda; -import ai.rapids.cudf.BaseDeviceMemoryBuffer; -import ai.rapids.cudf.DeviceMemoryBuffer; -import ai.rapids.cudf.HostMemoryBuffer; - -/** Single-buffer compressor implementing LZ4 */ -public class LZ4Compressor { - - /** LZ4 compression settings corresponding to a chunk size */ - public static final class Configuration { - private final long metadataBytes; - private final long tempBytes; - private final long maxCompressedBytes; - - Configuration(long metadataBytes, long tempBytes, long maxCompressedBytes) { - this.metadataBytes = metadataBytes; - this.tempBytes = tempBytes; - this.maxCompressedBytes = maxCompressedBytes; - } - - /** Get the size of the metadata information in bytes */ - public long getMetadataBytes() { - return metadataBytes; - } - - /** Get the size of the temporary storage in bytes needed to compress */ - public long getTempBytes() { - return tempBytes; - } - - /** Get the maximum compressed output size in bytes */ - public long getMaxCompressedBytes() { - return maxCompressedBytes; - } - } - - /** - * Get the compression configuration necessary for a particular chunk size. - * @param chunkSize size of an LZ4 chunk in bytes - * @param uncompressedSize total size of the uncompressed data - * @return compression configuration for the specified chunk size - */ - public static Configuration configure(long chunkSize, long uncompressedSize) { - long[] configs = NvcompJni.lz4CompressConfigure(chunkSize, uncompressedSize); - assert configs.length == 3; - return new Configuration(configs[0], configs[1], configs[2]); - } - - /** - * Synchronously compress a buffer with LZ4. - * @param input buffer to compress - * @param inputType type of data within the buffer - * @param chunkSize compression chunk size to use - * @param tempBuffer temporary storage space - * @param output buffer that will contain the compressed result - * @param stream CUDA stream to use - * @return size of the resulting compressed data stored to the output buffer - */ - public static long compress(BaseDeviceMemoryBuffer input, CompressionType inputType, - long chunkSize, BaseDeviceMemoryBuffer tempBuffer, - BaseDeviceMemoryBuffer output, Cuda.Stream stream) { - if (chunkSize <= 0) { - throw new IllegalArgumentException("Illegal chunk size: " + chunkSize); - } - try (DeviceMemoryBuffer devOutputSizeBuffer = DeviceMemoryBuffer.allocate(Long.BYTES); - HostMemoryBuffer hostOutputSizeBuffer = HostMemoryBuffer.allocate(Long.BYTES)) { - compressAsync(devOutputSizeBuffer, input, inputType, chunkSize, tempBuffer, output, stream); - hostOutputSizeBuffer.copyFromDeviceBuffer(devOutputSizeBuffer, stream); - return hostOutputSizeBuffer.getLong(0); - } - } - - /** - * Asynchronously compress a buffer with LZ4. The compressed size output buffer must be pinned - * memory for this operation to be truly asynchronous. Note that the caller must synchronize - * on the specified CUDA stream in order to safely examine the compressed output size! - * @param compressedSizeOutputBuffer device memory where the compressed output size will be stored - * @param input buffer to compress - * @param inputType type of data within the buffer - * @param chunkSize compression chunk size to use - * @param tempBuffer temporary storage space - * @param output buffer that will contain the compressed result - * @param stream CUDA stream to use - */ - public static void compressAsync(DeviceMemoryBuffer compressedSizeOutputBuffer, - BaseDeviceMemoryBuffer input, CompressionType inputType, - long chunkSize, BaseDeviceMemoryBuffer tempBuffer, - BaseDeviceMemoryBuffer output, Cuda.Stream stream) { - if (chunkSize <= 0) { - throw new IllegalArgumentException("Illegal chunk size: " + chunkSize); - } - if (compressedSizeOutputBuffer.getLength() < 8) { - throw new IllegalArgumentException("compressed output size buffer must be able to hold " + - "at least 8 bytes, size is only " + compressedSizeOutputBuffer.getLength()); - } - NvcompJni.lz4CompressAsync( - compressedSizeOutputBuffer.getAddress(), - input.getAddress(), - input.getLength(), - inputType.nativeId, - chunkSize, - tempBuffer.getAddress(), - tempBuffer.getLength(), - output.getAddress(), - output.getLength(), - stream.getStream()); - } -} diff --git a/java/src/main/java/ai/rapids/cudf/nvcomp/LZ4Decompressor.java b/java/src/main/java/ai/rapids/cudf/nvcomp/LZ4Decompressor.java deleted file mode 100644 index 46b3127581b..00000000000 --- a/java/src/main/java/ai/rapids/cudf/nvcomp/LZ4Decompressor.java +++ /dev/null @@ -1,118 +0,0 @@ -/* - * Copyright (c) 2021, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package ai.rapids.cudf.nvcomp; - -import ai.rapids.cudf.BaseDeviceMemoryBuffer; -import ai.rapids.cudf.Cuda; - -/** Single-buffer decompression using LZ4 */ -public class LZ4Decompressor { - - /** - * LZ4 decompression settings corresponding to an LZ4 compressed input. - * NOTE: Each instance must be closed to avoid a native memory leak. - */ - public static final class Configuration implements AutoCloseable { - private final long metadataPtr; - private final long metadataSize; - private final long tempBytes; - private final long uncompressedBytes; - - Configuration(long metadataPtr, long metadataSize, long tempBytes, - long uncompressedBytes) { - this.metadataPtr = metadataPtr; - this.metadataSize = metadataSize; - this.tempBytes = tempBytes; - this.uncompressedBytes = uncompressedBytes; - } - - /** Get the host address of the metadata */ - public long getMetadataPtr() { - return metadataPtr; - } - - /** Get the size of the metadata in bytes */ - public long getMetadataSize() { - return metadataSize; - } - - /** Get the size of the temporary buffer in bytes needed to decompress */ - public long getTempBytes() { - return tempBytes; - } - - /** Get the size of the uncompressed data in bytes */ - public long getUncompressedBytes() { - return uncompressedBytes; - } - - @Override - public void close() { - NvcompJni.lz4DestroyMetadata(metadataPtr); - } - } - - /** - * Determine if a buffer is data compressed with LZ4. - * @param buffer data to examine - * @param stream CUDA stream to use - * @return true if the data is LZ4 compressed - */ - public static boolean isLZ4Data(BaseDeviceMemoryBuffer buffer, Cuda.Stream stream) { - return NvcompJni.isLZ4Data(buffer.getAddress(), buffer.getLength(), stream.getStream()); - } - - /** - * Get the decompression configuration from compressed data. - * NOTE: The resulting configuration object must be closed to avoid a native memory leak. - * @param compressed data that has been compressed by the LZ4 compressor - * @param stream CUDA stream to use - * @return decompression configuration for the specified input - */ - public static Configuration configure(BaseDeviceMemoryBuffer compressed, Cuda.Stream stream) { - long[] configs = NvcompJni.lz4DecompressConfigure(compressed.getAddress(), - compressed.getLength(), stream.getStream()); - assert configs.length == 4; - return new Configuration(configs[0], configs[1], configs[2], configs[3]); - } - - /** - * Asynchronously decompress data compressed with the LZ4 compressor. - * @param compressed buffer containing LZ4-compressed data - * @param config decompression configuration - * @param temp temporary storage buffer - * @param outputBuffer buffer that will be written with the uncompressed output - * @param stream CUDA stream to use - */ - public static void decompressAsync( - BaseDeviceMemoryBuffer compressed, - Configuration config, - BaseDeviceMemoryBuffer temp, - BaseDeviceMemoryBuffer outputBuffer, - Cuda.Stream stream) { - NvcompJni.lz4DecompressAsync( - compressed.getAddress(), - compressed.getLength(), - config.getMetadataPtr(), - config.getMetadataSize(), - temp.getAddress(), - temp.getLength(), - outputBuffer.getAddress(), - outputBuffer.getLength(), - stream.getStream()); - } -} diff --git a/java/src/main/java/ai/rapids/cudf/nvcomp/NvcompJni.java b/java/src/main/java/ai/rapids/cudf/nvcomp/NvcompJni.java index 58f8390d0eb..57094008c08 100644 --- a/java/src/main/java/ai/rapids/cudf/nvcomp/NvcompJni.java +++ b/java/src/main/java/ai/rapids/cudf/nvcomp/NvcompJni.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,100 +24,6 @@ class NvcompJni { NativeDepsLoader.loadNativeDeps(); } - /** - * Determine if data is compressed with the nvcomp LZ4 compressor. - * @param inPtr device address of the compressed data - * @param inSize size of the compressed data in bytes - * @param stream CUDA stream to use - * @return true if the data is compressed with the nvcomp LZ4 compressor - */ - static native boolean isLZ4Data(long inPtr, long inSize, long stream); - - /** - * Determine if the metadata corresponds to data compressed with the nvcomp LZ4 compressor. - * @param metadataPtr address of the metadata object - * @return true if the metadata describes data compressed with the nvcomp LZ4 compressor. - */ - static native boolean isLZ4Metadata(long metadataPtr); - - /** - * Return the LZ4 compression configuration necessary for a particular chunk size. - * @param chunkSize maximum size of an uncompressed chunk in bytes - * @param uncompressedSize total size of the uncompressed data - * @return array of three longs containing metadata size, temp storage size, - * and output buffer size - */ - static native long[] lz4CompressConfigure(long chunkSize, long uncompressedSize); - - /** - * Perform LZ4 compression asynchronously using the specified CUDA stream. - * @param compressedSizeOutputPtr host address of a 64-bit integer to update - * with the resulting compressed size of the - * data. For the operation to be truly - * asynchronous this should point to pinned - * host memory. - * @param inPtr device address of the uncompressed data - * @param inSize size of the uncompressed data in bytes - * @param inputType type of uncompressed data - * @param chunkSize size of an LZ4 chunk in bytes - * @param tempPtr device address of the temporary compression storage buffer - * @param tempSize size of the temporary storage buffer in bytes - * @param outPtr device address of the output buffer - * @param outSize size of the output buffer in bytes - * @param stream CUDA stream to use - */ - static native void lz4CompressAsync( - long compressedSizeOutputPtr, - long inPtr, - long inSize, - int inputType, - long chunkSize, - long tempPtr, - long tempSize, - long outPtr, - long outSize, - long stream); - - /** - * Return the decompression configuration for a compressed input. - * NOTE: The resulting configuration object must be closed to destroy the corresponding - * host-side metadata created by this method to avoid a native memory leak. - * @param inPtr device address of the compressed data - * @param inSize size of the compressed data - * @return array of four longs containing metadata address, metadata size, temp storage size, - * and output buffer size - */ - static native long[] lz4DecompressConfigure(long inPtr, long inSize, long stream); - - /** - * Perform LZ4 decompression asynchronously using the specified CUDA stream. - * @param inPtr device address of the uncompressed data - * @param inSize size of the uncompressed data in bytes - * @param metadataPtr host address of the metadata - * @param metadataSize size of the metadata in bytes - * @param tempPtr device address of the temporary compression storage buffer - * @param tempSize size of the temporary storage buffer in bytes - * @param outPtr device address of the output buffer - * @param outSize size of the output buffer in bytes - * @param stream CUDA stream to use - */ - static native void lz4DecompressAsync( - long inPtr, - long inSize, - long metadataPtr, - long metadataSize, - long tempPtr, - long tempSize, - long outPtr, - long outSize, - long stream); - - /** - * Destroy host-side metadata created by {@link NvcompJni#lz4DecompressConfigure(long, long, long)} - * @param metadataPtr host address of metadata - */ - static native void lz4DestroyMetadata(long metadataPtr); - /** * Get the temporary workspace size required to perform compression of entire LZ4 batch. * @param batchSize number of chunks in the batch diff --git a/java/src/main/native/src/NvcompJni.cpp b/java/src/main/native/src/NvcompJni.cpp index 533654baee1..e616b7f66be 100644 --- a/java/src/main/native/src/NvcompJni.cpp +++ b/java/src/main/native/src/NvcompJni.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -56,131 +56,6 @@ void check_nvcomp_status(JNIEnv *env, nvcompStatus_t status) { extern "C" { -JNIEXPORT jboolean JNICALL Java_ai_rapids_cudf_nvcomp_NvcompJni_isLZ4Data(JNIEnv *env, jclass, - jlong j_in_ptr, - jlong j_in_size, - jlong j_stream) { - try { - cudf::jni::auto_set_device(env); - auto in_ptr = reinterpret_cast(j_in_ptr); - auto in_size = static_cast(j_in_size); - auto stream = reinterpret_cast(j_stream); - return nvcompLZ4IsData(in_ptr, in_size, stream); - } - CATCH_STD(env, 0) -} - -JNIEXPORT jboolean JNICALL Java_ai_rapids_cudf_nvcomp_NvcompJni_isLZ4Metadata(JNIEnv *env, jclass, - jlong metadata_ptr) { - try { - cudf::jni::auto_set_device(env); - return nvcompLZ4IsMetadata(reinterpret_cast(metadata_ptr)); - } - CATCH_STD(env, 0) -} - -JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_nvcomp_NvcompJni_lz4CompressConfigure( - JNIEnv *env, jclass, jlong j_chunk_size, jlong j_uncompressed_size) { - try { - cudf::jni::auto_set_device(env); - nvcompLZ4FormatOpts opts{}; - opts.chunk_size = static_cast(j_chunk_size); - auto uncompressed_size = static_cast(j_uncompressed_size); - std::size_t metadata_bytes = 0; - std::size_t temp_bytes = 0; - std::size_t out_bytes = 0; - auto status = nvcompLZ4CompressConfigure(&opts, NVCOMP_TYPE_CHAR, uncompressed_size, - &metadata_bytes, &temp_bytes, &out_bytes); - check_nvcomp_status(env, status); - cudf::jni::native_jlongArray result(env, 3); - result[0] = static_cast(metadata_bytes); - result[1] = static_cast(temp_bytes); - result[2] = static_cast(out_bytes); - return result.get_jArray(); - } - CATCH_STD(env, 0); -} - -JNIEXPORT void JNICALL Java_ai_rapids_cudf_nvcomp_NvcompJni_lz4CompressAsync( - JNIEnv *env, jclass, jlong j_compressed_size_ptr, jlong j_in_ptr, jlong j_in_size, - jint j_input_type, jlong j_chunk_size, jlong j_temp_ptr, jlong j_temp_size, jlong j_out_ptr, - jlong j_out_size, jlong j_stream) { - try { - cudf::jni::auto_set_device(env); - auto in_ptr = reinterpret_cast(j_in_ptr); - auto in_size = static_cast(j_in_size); - auto comp_type = static_cast(j_input_type); - nvcompLZ4FormatOpts opts{}; - opts.chunk_size = static_cast(j_chunk_size); - auto temp_ptr = reinterpret_cast(j_temp_ptr); - auto temp_size = static_cast(j_temp_size); - auto out_ptr = reinterpret_cast(j_out_ptr); - auto compressed_size_ptr = reinterpret_cast(j_compressed_size_ptr); - auto stream = reinterpret_cast(j_stream); - auto status = nvcompLZ4CompressAsync(&opts, comp_type, in_ptr, in_size, temp_ptr, temp_size, - out_ptr, compressed_size_ptr, stream); - check_nvcomp_status(env, status); - } - CATCH_STD(env, ); -} - -JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_nvcomp_NvcompJni_lz4DecompressConfigure( - JNIEnv *env, jclass, jlong j_input_ptr, jlong j_input_size, jlong j_stream) { - try { - cudf::jni::auto_set_device(env); - auto compressed_ptr = reinterpret_cast(j_input_ptr); - auto compressed_bytes = static_cast(j_input_size); - void *metadata_ptr = nullptr; - std::size_t metadata_bytes = 0; - std::size_t temp_bytes = 0; - std::size_t uncompressed_bytes = 0; - auto stream = reinterpret_cast(j_stream); - auto status = - nvcompLZ4DecompressConfigure(compressed_ptr, compressed_bytes, &metadata_ptr, - &metadata_bytes, &temp_bytes, &uncompressed_bytes, stream); - check_nvcomp_status(env, status); - cudf::jni::native_jlongArray result(env, 4); - result[0] = reinterpret_cast(metadata_ptr); - result[1] = static_cast(metadata_bytes); - result[2] = static_cast(temp_bytes); - result[3] = static_cast(uncompressed_bytes); - return result.get_jArray(); - } - CATCH_STD(env, 0); -} - -JNIEXPORT void JNICALL Java_ai_rapids_cudf_nvcomp_NvcompJni_lz4DecompressAsync( - JNIEnv *env, jclass, jlong j_in_ptr, jlong j_in_size, jlong j_metadata_ptr, - jlong j_metadata_size, jlong j_temp_ptr, jlong j_temp_size, jlong j_out_ptr, jlong j_out_size, - jlong j_stream) { - try { - cudf::jni::auto_set_device(env); - auto compressed_ptr = reinterpret_cast(j_in_ptr); - auto compressed_bytes = static_cast(j_in_size); - auto metadata_ptr = reinterpret_cast(j_metadata_ptr); - auto metadata_bytes = static_cast(j_metadata_size); - auto temp_ptr = reinterpret_cast(j_temp_ptr); - auto temp_bytes = static_cast(j_temp_size); - auto uncompressed_ptr = reinterpret_cast(j_out_ptr); - auto uncompressed_bytes = static_cast(j_out_size); - auto stream = reinterpret_cast(j_stream); - auto status = nvcompLZ4DecompressAsync(compressed_ptr, compressed_bytes, metadata_ptr, - metadata_bytes, temp_ptr, temp_bytes, uncompressed_ptr, - uncompressed_bytes, stream); - check_nvcomp_status(env, status); - } - CATCH_STD(env, ); -} - -JNIEXPORT void JNICALL Java_ai_rapids_cudf_nvcomp_NvcompJni_lz4DestroyMetadata(JNIEnv *env, jclass, - jlong metadata_ptr) { - try { - cudf::jni::auto_set_device(env); - nvcompLZ4DestroyMetadata(reinterpret_cast(metadata_ptr)); - } - CATCH_STD(env, ); -} - JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_nvcomp_NvcompJni_batchedLZ4CompressGetTempSize( JNIEnv *env, jclass, jlong j_batch_size, jlong j_max_chunk_size) { try { diff --git a/java/src/main/native/src/TableJni.cpp b/java/src/main/native/src/TableJni.cpp index 78ac8a18107..cebe476dd87 100644 --- a/java/src/main/native/src/TableJni.cpp +++ b/java/src/main/native/src/TableJni.cpp @@ -137,8 +137,8 @@ class jni_writer_data_sink final : public cudf::io::data_sink { left_to_copy < buffer_amount_available ? left_to_copy : buffer_amount_available; char *copy_to = current_buffer_data + current_buffer_written; - CUDA_TRY(cudaMemcpyAsync(copy_to, copy_from, amount_to_copy, cudaMemcpyDeviceToHost, - stream.value())); + CUDF_CUDA_TRY(cudaMemcpyAsync(copy_to, copy_from, amount_to_copy, cudaMemcpyDeviceToHost, + stream.value())); copy_from = copy_from + amount_to_copy; current_buffer_written += amount_to_copy; diff --git a/java/src/main/native/src/cudf_jni_apis.hpp b/java/src/main/native/src/cudf_jni_apis.hpp index 12fd45b831a..2ac535bbf2f 100644 --- a/java/src/main/native/src/cudf_jni_apis.hpp +++ b/java/src/main/native/src/cudf_jni_apis.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/java/src/main/native/src/map_lookup.cu b/java/src/main/native/src/map_lookup.cu index 683651799e7..13d1a5a94a9 100644 --- a/java/src/main/native/src/map_lookup.cu +++ b/java/src/main/native/src/map_lookup.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -123,7 +123,7 @@ get_gather_map_for_map_values(column_view const &input, string_scalar &lookup_ke gpu_find_first<<>>( *input_device_view, *output_view, lookup_key_device_view); - CHECK_CUDA(stream.value()); + CUDF_CHECK_CUDA(stream.value()); return gather_map; } diff --git a/java/src/main/native/src/row_conversion.cu b/java/src/main/native/src/row_conversion.cu index 4d78f416134..96ee95c476d 100644 --- a/java/src/main/native/src/row_conversion.cu +++ b/java/src/main/native/src/row_conversion.cu @@ -1766,9 +1766,9 @@ std::vector> convert_to_rows( std::optional> variable_width_offsets, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource *mr) { int device_id; - CUDA_TRY(cudaGetDevice(&device_id)); + CUDF_CUDA_TRY(cudaGetDevice(&device_id)); int total_shmem_in_bytes; - CUDA_TRY( + CUDF_CUDA_TRY( cudaDeviceGetAttribute(&total_shmem_in_bytes, cudaDevAttrMaxSharedMemoryPerBlock, device_id)); #ifndef __CUDA_ARCH__ // __host__ code. @@ -2097,9 +2097,9 @@ std::unique_ptr convert_from_rows(lists_column_view const &input, auto const num_rows = input.parent().size(); int device_id; - CUDA_TRY(cudaGetDevice(&device_id)); + CUDF_CUDA_TRY(cudaGetDevice(&device_id)); int total_shmem_in_bytes; - CUDA_TRY( + CUDF_CUDA_TRY( cudaDeviceGetAttribute(&total_shmem_in_bytes, cudaDevAttrMaxSharedMemoryPerBlock, device_id)); #ifndef __CUDA_ARCH__ // __host__ code. diff --git a/java/src/test/java/ai/rapids/cudf/nvcomp/NvcompTest.java b/java/src/test/java/ai/rapids/cudf/nvcomp/NvcompTest.java index c36d241500a..ec14a1cfee6 100644 --- a/java/src/test/java/ai/rapids/cudf/nvcomp/NvcompTest.java +++ b/java/src/test/java/ai/rapids/cudf/nvcomp/NvcompTest.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,16 +28,6 @@ public class NvcompTest { private static final Logger log = LoggerFactory.getLogger(ColumnVector.class); - @Test - void testLZ4RoundTripViaLZ4DecompressorSync() { - lz4RoundTrip(false); - } - - @Test - void testLZ4RoundTripViaLZ4DecompressorAsync() { - lz4RoundTrip(true); - } - @Test void testBatchedLZ4RoundTripAsync() { final Cuda.Stream stream = Cuda.DEFAULT_STREAM; @@ -134,90 +124,4 @@ private DeviceMemoryBuffer initBatchBuffer(long[] data, int bufferId) { throw new RuntimeException(t); } } - - private void lz4RoundTrip(boolean useAsync) { - final Cuda.Stream stream = Cuda.DEFAULT_STREAM; - final long chunkSize = 64 * 1024; - final int numElements = 10 * 1024 * 1024 + 1; - long[] data = new long[numElements]; - for (int i = 0; i < numElements; ++i) { - data[i] = i; - } - - DeviceMemoryBuffer tempBuffer = null; - DeviceMemoryBuffer compressedBuffer = null; - DeviceMemoryBuffer uncompressedBuffer = null; - try (ColumnVector v = ColumnVector.fromLongs(data)) { - BaseDeviceMemoryBuffer inputBuffer = v.getDeviceBufferFor(BufferType.DATA); - final long uncompressedSize = inputBuffer.getLength(); - log.debug("Uncompressed size is {}", uncompressedSize); - - LZ4Compressor.Configuration compressConf = - LZ4Compressor.configure(chunkSize, uncompressedSize); - Assertions.assertTrue(compressConf.getMetadataBytes() > 0); - log.debug("Using {} temporary space for lz4 compression", compressConf.getTempBytes()); - tempBuffer = DeviceMemoryBuffer.allocate(compressConf.getTempBytes()); - log.debug("lz4 compressed size estimate is {}", compressConf.getMaxCompressedBytes()); - - compressedBuffer = DeviceMemoryBuffer.allocate(compressConf.getMaxCompressedBytes()); - - long startTime = System.nanoTime(); - long compressedSize; - if (useAsync) { - try (DeviceMemoryBuffer devCompressedSizeBuffer = DeviceMemoryBuffer.allocate(8); - HostMemoryBuffer hostCompressedSizeBuffer = HostMemoryBuffer.allocate(8)) { - LZ4Compressor.compressAsync(devCompressedSizeBuffer, inputBuffer, CompressionType.CHAR, - chunkSize, tempBuffer, compressedBuffer, stream); - hostCompressedSizeBuffer.copyFromDeviceBufferAsync(devCompressedSizeBuffer, stream); - stream.sync(); - compressedSize = hostCompressedSizeBuffer.getLong(0); - } - } else { - compressedSize = LZ4Compressor.compress(inputBuffer, CompressionType.CHAR, chunkSize, - tempBuffer, compressedBuffer, stream); - } - double duration = (System.nanoTime() - startTime) / 1000.0; - log.info("Compressed with lz4 to {} in {} us", compressedSize, duration); - - tempBuffer.close(); - tempBuffer = null; - - try (LZ4Decompressor.Configuration decompressConf = - LZ4Decompressor.configure(compressedBuffer, stream)) { - final long tempSize = decompressConf.getTempBytes(); - - log.debug("Using {} temporary space for lz4 compression", tempSize); - tempBuffer = DeviceMemoryBuffer.allocate(tempSize); - - final long outSize = decompressConf.getUncompressedBytes(); - Assertions.assertEquals(inputBuffer.getLength(), outSize); - - uncompressedBuffer = DeviceMemoryBuffer.allocate(outSize); - - LZ4Decompressor.decompressAsync(compressedBuffer, decompressConf, tempBuffer, - uncompressedBuffer, stream); - - try (ColumnVector v2 = new ColumnVector( - DType.INT64, - numElements, - Optional.empty(), - uncompressedBuffer, - null, - null); - HostColumnVector hv2 = v2.copyToHost()) { - uncompressedBuffer = null; - for (int i = 0; i < numElements; ++i) { - long val = hv2.getLong(i); - if (val != i) { - Assertions.fail("Expected " + i + " at " + i + " found " + val); - } - } - } - } - } finally { - closeBuffer(tempBuffer); - closeBuffer(compressedBuffer); - closeBuffer(uncompressedBuffer); - } - } } diff --git a/python/cudf/cudf/_fuzz_testing/io.py b/python/cudf/cudf/_fuzz_testing/io.py index dfc59a1f18d..a6f27691f56 100644 --- a/python/cudf/cudf/_fuzz_testing/io.py +++ b/python/cudf/cudf/_fuzz_testing/io.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2021, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. import copy import json diff --git a/python/cudf/cudf/_fuzz_testing/main.py b/python/cudf/cudf/_fuzz_testing/main.py index 6b536fc3e2e..54e49b63e41 100644 --- a/python/cudf/cudf/_fuzz_testing/main.py +++ b/python/cudf/cudf/_fuzz_testing/main.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2021, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. from cudf._fuzz_testing import fuzzer diff --git a/python/cudf/cudf/_version.py b/python/cudf/cudf/_version.py index c6281349c50..60a2afed39b 100644 --- a/python/cudf/cudf/_version.py +++ b/python/cudf/cudf/_version.py @@ -1,3 +1,4 @@ +# Copyright (c) 2020-2022, NVIDIA CORPORATION. # This file helps to compute a version number in source trees obtained from # git-archive tarball (such as those provided by githubs download-from-tag # feature). Distribution tarballs (built by setup.py sdist) and build diff --git a/python/cudf/cudf/comm/gpuarrow.py b/python/cudf/cudf/comm/gpuarrow.py index 7879261139d..f21eb4e4d8c 100644 --- a/python/cudf/cudf/comm/gpuarrow.py +++ b/python/cudf/cudf/comm/gpuarrow.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2022, NVIDIA CORPORATION. from collections import OrderedDict from collections.abc import Sequence diff --git a/python/cudf/cudf/core/column/lists.py b/python/cudf/cudf/core/column/lists.py index 60d13150b39..3f8c8997803 100644 --- a/python/cudf/cudf/core/column/lists.py +++ b/python/cudf/cudf/core/column/lists.py @@ -2,7 +2,7 @@ import pickle from functools import cached_property -from typing import List, Sequence +from typing import List, Optional, Sequence import numpy as np import pyarrow as pa @@ -337,16 +337,20 @@ def __init__(self, parent: ParentType): ) super().__init__(parent=parent) - def get(self, index: int) -> ParentType: + def get( + self, index: int, default: Optional[ScalarLike] = None + ) -> ParentType: """ - Extract element at the given index from each component + Extract element at the given index from each list. - Extract element from lists, tuples, or strings in - each element in the Series/Index. + If the index is out of bounds for any list, + return or, if provided, ``default``. + Thus, this method never raises an ``IndexError``. Parameters ---------- index : int + default : scalar, optional Returns ------- @@ -360,14 +364,37 @@ def get(self, index: int) -> ParentType: 1 5 2 6 dtype: int64 + + >>> s = cudf.Series([[1, 2], [3, 4, 5], [4, 5, 6]]) + >>> s.list.get(2) + 0 + 1 5 + 2 6 + dtype: int64 + + >>> s = cudf.Series([[1, 2], [3, 4, 5], [4, 5, 6]]) + >>> s.list.get(2, default=0) + 0 0 + 1 5 + 2 6 + dtype: int64 """ - min_col_list_len = self.len().min() - if -min_col_list_len <= index < min_col_list_len: - return self._return_or_inplace( - extract_element(self._column, index) + out = extract_element(self._column, index) + + if not (default is None or default is cudf.NA): + # determine rows for which `index` is out-of-bounds + lengths = count_elements(self._column) + out_of_bounds_mask = (np.negative(index) > lengths) | ( + index >= lengths ) - else: - raise IndexError("list index out of range") + + # replace the value in those rows (should be NA) with `default` + if out_of_bounds_mask.any(): + out = out._scatter_by_column( + out_of_bounds_mask, cudf.Scalar(default) + ) + + return self._return_or_inplace(out) def contains(self, search_key: ScalarLike) -> ParentType: """ diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index ef8e9c4dffc..d5d45c341d5 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -2006,7 +2006,9 @@ def filter_alphanum( repl = "" return self._return_or_inplace( - libstrings.filter_alphanum(self._column, cudf.Scalar(repl), keep), + libstrings.filter_alphanum( + self._column, cudf.Scalar(repl, "str"), keep + ), ) def slice_from( @@ -2141,7 +2143,7 @@ def slice_replace( return self._return_or_inplace( libstrings.slice_replace( - self._column, start, stop, cudf.Scalar(repl) + self._column, start, stop, cudf.Scalar(repl, "str") ), ) @@ -2192,7 +2194,7 @@ def insert(self, start: int = 0, repl: str = None) -> SeriesOrIndex: repl = "" return self._return_or_inplace( - libstrings.insert(self._column, start, cudf.Scalar(repl)), + libstrings.insert(self._column, start, cudf.Scalar(repl, "str")), ) def get(self, i: int = 0) -> SeriesOrIndex: @@ -2643,7 +2645,7 @@ def rsplit( ) else: result_table = libstrings.rsplit_record( - self._column, cudf.Scalar(pat), n + self._column, cudf.Scalar(pat, "str"), n ) return self._return_or_inplace(result_table, expand=expand) @@ -2726,7 +2728,7 @@ def partition(self, sep: str = " ", expand: bool = True) -> SeriesOrIndex: return self._return_or_inplace( cudf.core.frame.Frame( - *libstrings.partition(self._column, cudf.Scalar(sep)) + *libstrings.partition(self._column, cudf.Scalar(sep, "str")) ), expand=expand, ) @@ -2793,7 +2795,7 @@ def rpartition(self, sep: str = " ", expand: bool = True) -> SeriesOrIndex: return self._return_or_inplace( cudf.core.frame.Frame( - *libstrings.rpartition(self._column, cudf.Scalar(sep)) + *libstrings.rpartition(self._column, cudf.Scalar(sep, "str")) ), expand=expand, ) @@ -3194,7 +3196,7 @@ def strip(self, to_strip: str = None) -> SeriesOrIndex: to_strip = "" return self._return_or_inplace( - libstrings.strip(self._column, cudf.Scalar(to_strip)) + libstrings.strip(self._column, cudf.Scalar(to_strip, "str")) ) def lstrip(self, to_strip: str = None) -> SeriesOrIndex: @@ -3241,7 +3243,7 @@ def lstrip(self, to_strip: str = None) -> SeriesOrIndex: to_strip = "" return self._return_or_inplace( - libstrings.lstrip(self._column, cudf.Scalar(to_strip)) + libstrings.lstrip(self._column, cudf.Scalar(to_strip, "str")) ) def rstrip(self, to_strip: str = None) -> SeriesOrIndex: @@ -3296,7 +3298,7 @@ def rstrip(self, to_strip: str = None) -> SeriesOrIndex: to_strip = "" return self._return_or_inplace( - libstrings.rstrip(self._column, cudf.Scalar(to_strip)) + libstrings.rstrip(self._column, cudf.Scalar(to_strip, "str")) ) def wrap(self, width: int, **kwargs) -> SeriesOrIndex: @@ -4245,7 +4247,7 @@ def filter_characters( table = str.maketrans(table) return self._return_or_inplace( libstrings.filter_characters( - self._column, table, keep, cudf.Scalar(repl) + self._column, table, keep, cudf.Scalar(repl, "str") ), ) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 08a30729e7c..1b85769b84d 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -6126,7 +6126,7 @@ def make_binop_func(op, postprocess=None): # def postprocess(left, right, output) # where left and right are the inputs to the binop and output is the result # of calling the wrapped Frame binop. - wrapped_func = getattr(Frame, op) + wrapped_func = getattr(IndexedFrame, op) @functools.wraps(wrapped_func) def wrapper(self, other, axis="columns", level=None, fill_value=None): diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index 75c6e4d0964..1382ebfd8ee 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -3649,1623 +3649,6 @@ def __invert__(self): self._index, ) - @_cudf_nvtx_annotate - def add(self, other, axis, level=None, fill_value=None): - """ - Get Addition of dataframe or series and other, element-wise (binary - operator `add`). - - Equivalent to ``frame + other``, but with support to substitute a - ``fill_value`` for missing data in one of the inputs. - - Parameters - ---------- - - other : scalar, sequence, Series, or DataFrame - Any single or multiple element data structure, or list-like object. - axis : int or string - Only ``0`` is supported for series, ``1`` or ``columns`` supported - for dataframe - fill_value : float or None, default None - Fill existing missing (NaN) values, and any new element needed - for successful DataFrame alignment, with this value before - computation. If data in both corresponding DataFrame locations - is missing the result will be missing. - - Returns - ------- - DataFrame or Series - Result of the arithmetic operation. - - Examples - -------- - - **DataFrame** - - >>> df = cudf.DataFrame({'angles': [0, 3, 4], - ... 'degrees': [360, 180, 360]}, - ... index=['circle', 'triangle', 'rectangle']) - >>> df + 1 - angles degrees - circle 1 361 - triangle 4 181 - rectangle 5 361 - >>> df.add(1) - angles degrees - circle 1 361 - triangle 4 181 - rectangle 5 361 - - **Series** - - >>> a = cudf.Series([1, 1, 1, None], index=['a', 'b', 'c', 'd']) - >>> b = cudf.Series([1, None, 1, None], index=['a', 'b', 'd', 'e']) - >>> a.add(b) - a 2 - b - c - d - e - dtype: int64 - >>> a.add(b, fill_value=0) - a 2 - b 1 - c 1 - d 1 - e - dtype: int64 - """ - if level is not None: - raise NotImplementedError("level parameter is not supported yet.") - - return self._binaryop(other, "__add__", fill_value) - - @_cudf_nvtx_annotate - def radd(self, other, axis, level=None, fill_value=None): - """ - Get Addition of dataframe or series and other, element-wise (binary - operator `radd`). - - Equivalent to ``other + frame``, but with support to substitute a - fill_value for missing data in one of the inputs. With reverse - version, `add`. - - Parameters - ---------- - - other : scalar, sequence, Series, or DataFrame - Any single or multiple element data structure, or list-like object. - axis : int or string - Only ``0`` is supported for series, ``1`` or ``columns`` supported - for dataframe - fill_value : float or None, default None - Fill existing missing (NaN) values, and any new element needed - for successful DataFrame alignment, with this value before - computation. If data in both corresponding DataFrame locations - is missing the result will be missing. - - Returns - ------- - DataFrame or Series - Result of the arithmetic operation. - - Examples - -------- - - **DataFrame** - - >>> import cudf - >>> df = cudf.DataFrame({'angles': [0, 3, 4], - ... 'degrees': [360, 180, 360]}, - ... index=['circle', 'triangle', 'rectangle']) - >>> df + 1 - angles degrees - circle 1 361 - triangle 4 181 - rectangle 5 361 - >>> df.radd(1) - angles degrees - circle 1 361 - triangle 4 181 - rectangle 5 361 - - **Series** - - >>> a = cudf.Series([1, 2, 3, None], index=['a', 'b', 'c', 'd']) - >>> a - a 1 - b 2 - c 3 - d - dtype: int64 - >>> b = cudf.Series([1, None, 1, None], index=['a', 'b', 'd', 'e']) - >>> b - a 1 - b - d 1 - e - dtype: int64 - >>> a.add(b, fill_value=0) - a 2 - b 2 - c 3 - d 1 - e - dtype: int64 - - """ - - if level is not None: - raise NotImplementedError("level parameter is not supported yet.") - - return self._binaryop(other, "__radd__", fill_value) - - @_cudf_nvtx_annotate - def subtract(self, other, axis, level=None, fill_value=None): - """ - Get Subtraction of dataframe or series and other, element-wise (binary - operator `sub`). - - Equivalent to ``frame - other``, but with support to substitute a - fill_value for missing data in one of the inputs. With reverse - version, `rsub`. - - Parameters - ---------- - - other : scalar, sequence, Series, or DataFrame - Any single or multiple element data structure, or list-like object. - axis : int or string - Only ``0`` is supported for series, ``1`` or ``columns`` supported - for dataframe - fill_value : float or None, default None - Fill existing missing (NaN) values, and any new element needed - for successful DataFrame alignment, with this value before - computation. If data in both corresponding DataFrame locations - is missing the result will be missing. - - Returns - ------- - DataFrame or Series - Result of the arithmetic operation. - - Examples - -------- - - **DataFrame** - - >>> import cudf - >>> df = cudf.DataFrame({'angles': [0, 3, 4], - ... 'degrees': [360, 180, 360]}, - ... index=['circle', 'triangle', 'rectangle']) - >>> df.sub(1) - angles degrees - circle -1 359 - triangle 2 179 - rectangle 3 359 - >>> df.sub([1, 2]) - angles degrees - circle -1 358 - triangle 2 178 - rectangle 3 358 - - **Series** - - >>> a = cudf.Series([10, 20, None, 30, None], index=['a', 'b', 'c', 'd', 'e']) - >>> a - a 10 - b 20 - c - d 30 - e - dtype: int64 - >>> b = cudf.Series([1, None, 2, 30], index=['a', 'c', 'b', 'd']) - >>> b - a 1 - c - b 2 - d 30 - dtype: int64 - >>> a.subtract(b, fill_value=2) - a 9 - b 18 - c - d 0 - e - dtype: int64 - - """ # noqa: E501 - - if level is not None: - raise NotImplementedError("level parameter is not supported yet.") - - return self._binaryop(other, "__sub__", fill_value) - - sub = subtract - - @_cudf_nvtx_annotate - def rsub(self, other, axis, level=None, fill_value=None): - """ - Get Subtraction of dataframe or series and other, element-wise (binary - operator `rsub`). - - Equivalent to ``other - frame``, but with support to substitute a - fill_value for missing data in one of the inputs. With reverse - version, `sub`. - - Parameters - ---------- - - other : scalar, sequence, Series, or DataFrame - Any single or multiple element data structure, or list-like object. - axis : int or string - Only ``0`` is supported for series, ``1`` or ``columns`` supported - for dataframe - fill_value : float or None, default None - Fill existing missing (NaN) values, and any new element needed - for successful DataFrame alignment, with this value before - computation. If data in both corresponding DataFrame locations - is missing the result will be missing. - - Returns - ------- - DataFrame or Series - Result of the arithmetic operation. - - Examples - -------- - - **DataFrame** - - >>> import cudf - >>> df = cudf.DataFrame({'angles': [0, 3, 4], - ... 'degrees': [360, 180, 360]}, - ... index=['circle', 'triangle', 'rectangle']) - >>> df - angles degrees - circle 0 360 - triangle 3 180 - rectangle 4 360 - >>> df.rsub(1) - angles degrees - circle 1 -359 - triangle -2 -179 - rectangle -3 -359 - >>> df.rsub([1, 2]) - angles degrees - circle 1 -358 - triangle -2 -178 - rectangle -3 -358 - - **Series** - - >>> import cudf - >>> a = cudf.Series([1, 2, 3, None], index=['a', 'b', 'c', 'd']) - >>> a - a 1 - b 2 - c 3 - d - dtype: int64 - >>> b = cudf.Series([1, None, 2, None], index=['a', 'b', 'd', 'e']) - >>> b - a 1 - b - d 2 - e - dtype: int64 - >>> a.rsub(b, fill_value=10) - a 0 - b 8 - c 7 - d -8 - e - dtype: int64 - """ - if level is not None: - raise NotImplementedError("level parameter is not supported yet.") - - return self._binaryop(other, "__rsub__", fill_value) - - @_cudf_nvtx_annotate - def multiply(self, other, axis, level=None, fill_value=None): - """ - Get Multiplication of dataframe or series and other, element-wise - (binary operator `mul`). - - Equivalent to ``frame * other``, but with support to substitute a - fill_value for missing data in one of the inputs. With reverse - version, `rmul`. - - Parameters - ---------- - - other : scalar, sequence, Series, or DataFrame - Any single or multiple element data structure, or list-like object. - axis : int or string - Only ``0`` is supported for series, ``1`` or ``columns`` supported - for dataframe - fill_value : float or None, default None - Fill existing missing (NaN) values, and any new element needed - for successful DataFrame alignment, with this value before - computation. If data in both corresponding DataFrame locations - is missing the result will be missing. - - Returns - ------- - DataFrame or Series - Result of the arithmetic operation. - - Examples - -------- - - **DataFrame** - - >>> import cudf - >>> df = cudf.DataFrame({'angles': [0, 3, 4], - ... 'degrees': [360, 180, 360]}, - ... index=['circle', 'triangle', 'rectangle']) - >>> other = cudf.DataFrame({'angles': [0, 3, 4]}, - ... index=['circle', 'triangle', 'rectangle']) - >>> df * other - angles degrees - circle 0 - triangle 9 - rectangle 16 - >>> df.mul(other, fill_value=0) - angles degrees - circle 0 0 - triangle 9 0 - rectangle 16 0 - - **Series** - - >>> import cudf - >>> a = cudf.Series([1, 2, 3, None], index=['a', 'b', 'c', 'd']) - >>> a - a 1 - b 2 - c 3 - d - dtype: int64 - >>> b = cudf.Series([1, None, 2, None], index=['a', 'b', 'd', 'e']) - >>> b - a 1 - b - d 2 - e - dtype: int64 - >>> a.multiply(b, fill_value=0) - a 1 - b 0 - c 0 - d 0 - e - dtype: int64 - - """ - - if level is not None: - raise NotImplementedError("level parameter is not supported yet.") - - return self._binaryop(other, "__mul__", fill_value) - - mul = multiply - - @_cudf_nvtx_annotate - def rmul(self, other, axis, level=None, fill_value=None): - """ - Get Multiplication of dataframe or series and other, element-wise - (binary operator `rmul`). - - Equivalent to ``other * frame``, but with support to substitute a - fill_value for missing data in one of the inputs. With reverse - version, `mul`. - - Parameters - ---------- - - other : scalar, sequence, Series, or DataFrame - Any single or multiple element data structure, or list-like object. - axis : int or string - Only ``0`` is supported for series, ``1`` or ``columns`` supported - for dataframe - fill_value : float or None, default None - Fill existing missing (NaN) values, and any new element needed - for successful DataFrame alignment, with this value before - computation. If data in both corresponding DataFrame locations - is missing the result will be missing. - - Returns - ------- - DataFrame or Series - Result of the arithmetic operation. - - Examples - -------- - - **DataFrame** - - >>> import cudf - >>> df = cudf.DataFrame({'angles': [0, 3, 4], - ... 'degrees': [360, 180, 360]}, - ... index=['circle', 'triangle', 'rectangle']) - >>> other = cudf.DataFrame({'angles': [0, 3, 4]}, - ... index=['circle', 'triangle', 'rectangle']) - >>> other * df - angles degrees - circle 0 - triangle 9 - rectangle 16 - >>> df.rmul(other, fill_value=0) - angles degrees - circle 0 0 - triangle 9 0 - rectangle 16 0 - - **Series** - - >>> import cudf - >>> a = cudf.Series([10, 20, None, 30, 40], index=['a', 'b', 'c', 'd', 'e']) - >>> a - a 10 - b 20 - c - d 30 - e 40 - dtype: int64 - >>> b = cudf.Series([None, 1, 20, 5, 4], index=['a', 'b', 'd', 'e', 'f']) - >>> b - a - b 1 - d 20 - e 5 - f 4 - dtype: int64 - >>> a.rmul(b, fill_value=2) - a 20 - b 20 - c - d 600 - e 200 - f 8 - dtype: int64 - """ # noqa E501 - - if level is not None: - raise NotImplementedError("level parameter is not supported yet.") - - return self._binaryop(other, "__rmul__", fill_value) - - @_cudf_nvtx_annotate - def mod(self, other, axis, level=None, fill_value=None): - """ - Get Modulo division of dataframe or series and other, element-wise - (binary operator `mod`). - - Equivalent to ``frame % other``, but with support to substitute a - fill_value for missing data in one of the inputs. With reverse - version, `rmod`. - - Parameters - ---------- - - other : scalar, sequence, Series, or DataFrame - Any single or multiple element data structure, or list-like object. - axis : int or string - Only ``0`` is supported for series, ``1`` or ``columns`` supported - for dataframe - fill_value : float or None, default None - Fill existing missing (NaN) values, and any new element needed - for successful DataFrame alignment, with this value before - computation. If data in both corresponding DataFrame locations - is missing the result will be missing. - - Returns - ------- - DataFrame or Series - Result of the arithmetic operation. - - Examples - -------- - - **DataFrame** - - >>> import cudf - >>> df = cudf.DataFrame({'angles': [0, 3, 4], - ... 'degrees': [360, 180, 360]}, - ... index=['circle', 'triangle', 'rectangle']) - >>> df % 100 - angles degrees - circle 0 60 - triangle 3 80 - rectangle 4 60 - >>> df.mod(100) - angles degrees - circle 0 60 - triangle 3 80 - rectangle 4 60 - - **Series** - - >>> import cudf - >>> series = cudf.Series([10, 20, 30]) - >>> series - 0 10 - 1 20 - 2 30 - dtype: int64 - >>> series.mod(4) - 0 2 - 1 0 - 2 2 - dtype: int64 - - - """ - if level is not None: - raise NotImplementedError("level parameter is not supported yet.") - - return self._binaryop(other, "__mod__", fill_value) - - @_cudf_nvtx_annotate - def rmod(self, other, axis, level=None, fill_value=None): - """ - Get Modulo division of dataframe or series and other, element-wise - (binary operator `rmod`). - - Equivalent to ``other % frame``, but with support to substitute a - fill_value for missing data in one of the inputs. With reverse - version, `mod`. - - Parameters - ---------- - - other : scalar, sequence, Series, or DataFrame - Any single or multiple element data structure, or list-like object. - axis : int or string - Only ``0`` is supported for series, ``1`` or ``columns`` supported - for dataframe - fill_value : float or None, default None - Fill existing missing (NaN) values, and any new element needed - for successful DataFrame alignment, with this value before - computation. If data in both corresponding DataFrame locations - is missing the result will be missing. - - Returns - ------- - DataFrame or Series - Result of the arithmetic operation. - - Examples - -------- - - **DataFrame** - - >>> import cudf - >>> df = cudf.DataFrame({'angles': [1, 3, 4], - ... 'degrees': [360, 180, 360]}, - ... index=['circle', 'triangle', 'rectangle']) - >>> 100 % df - angles degrees - circle 0 100 - triangle 1 100 - rectangle 0 100 - >>> df.rmod(100) - angles degrees - circle 0 100 - triangle 1 100 - rectangle 0 100 - - **Series** - - >>> import cudf - >>> a = cudf.Series([10, 20, None, 30, 40], index=['a', 'b', 'c', 'd', 'e']) - >>> a - a 10 - b 20 - c - d 30 - e 40 - dtype: int64 - >>> b = cudf.Series([None, 1, 20, 5, 4], index=['a', 'b', 'd', 'e', 'f']) - >>> b - a - b 1 - d 20 - e 5 - f 4 - dtype: int64 - >>> a.rmod(b, fill_value=10) - a 0 - b 1 - c - d 20 - e 5 - f 4 - dtype: int64 - """ # noqa E501 - - if level is not None: - raise NotImplementedError("level parameter is not supported yet.") - - return self._binaryop(other, "__rmod__", fill_value) - - @_cudf_nvtx_annotate - def pow(self, other, axis, level=None, fill_value=None): - """ - Get Exponential power of dataframe series and other, element-wise - (binary operator `pow`). - - Equivalent to ``frame ** other``, but with support to substitute a - fill_value for missing data in one of the inputs. With reverse - version, `rpow`. - - Parameters - ---------- - - other : scalar, sequence, Series, or DataFrame - Any single or multiple element data structure, or list-like object. - axis : int or string - Only ``0`` is supported for series, ``1`` or ``columns`` supported - for dataframe - fill_value : float or None, default None - Fill existing missing (NaN) values, and any new element needed - for successful DataFrame alignment, with this value before - computation. If data in both corresponding DataFrame locations - is missing the result will be missing. - - Returns - ------- - DataFrame or Series - Result of the arithmetic operation. - - Examples - -------- - - **DataFrame** - - >>> import cudf - >>> df = cudf.DataFrame({'angles': [1, 3, 4], - ... 'degrees': [360, 180, 360]}, - ... index=['circle', 'triangle', 'rectangle']) - >>> df ** 2 - angles degrees - circle 0 129600 - triangle 9 32400 - rectangle 16 129600 - >>> df.pow(2) - angles degrees - circle 0 129600 - triangle 9 32400 - rectangle 16 129600 - - **Series** - - >>> import cudf - >>> a = cudf.Series([1, 2, 3, None], index=['a', 'b', 'c', 'd']) - >>> a - a 1 - b 2 - c 3 - d - dtype: int64 - >>> b = cudf.Series([10, None, 12, None], index=['a', 'b', 'd', 'e']) - >>> b - a 10 - b - d 12 - e - dtype: int64 - >>> a.pow(b, fill_value=0) - a 1 - b 1 - c 1 - d 0 - e - dtype: int64 - """ - - if level is not None: - raise NotImplementedError("level parameter is not supported yet.") - - return self._binaryop(other, "__pow__", fill_value) - - @_cudf_nvtx_annotate - def rpow(self, other, axis, level=None, fill_value=None): - """ - Get Exponential power of dataframe or series and other, element-wise - (binary operator `pow`). - - Equivalent to ``other ** frame``, but with support to substitute a - fill_value for missing data in one of the inputs. With reverse - version, `pow`. - - Parameters - ---------- - - other : scalar, sequence, Series, or DataFrame - Any single or multiple element data structure, or list-like object. - axis : int or string - Only ``0`` is supported for series, ``1`` or ``columns`` supported - for dataframe - fill_value : float or None, default None - Fill existing missing (NaN) values, and any new element needed - for successful DataFrame alignment, with this value before - computation. If data in both corresponding DataFrame locations - is missing the result will be missing. - - Returns - ------- - DataFrame or Series - Result of the arithmetic operation. - - Examples - -------- - - **DataFrame** - - >>> import cudf - >>> df = cudf.DataFrame({'angles': [1, 3, 4], - ... 'degrees': [360, 180, 360]}, - ... index=['circle', 'triangle', 'rectangle']) - >>> 1 ** df - angles degrees - circle 1 1 - triangle 1 1 - rectangle 1 1 - >>> df.rpow(1) - angles degrees - circle 1 1 - triangle 1 1 - rectangle 1 1 - - **Series** - - >>> import cudf - >>> a = cudf.Series([1, 2, 3, None], index=['a', 'b', 'c', 'd']) - >>> a - a 1 - b 2 - c 3 - d - dtype: int64 - >>> b = cudf.Series([10, None, 12, None], index=['a', 'b', 'd', 'e']) - >>> b - a 10 - b - d 12 - e - dtype: int64 - >>> a.rpow(b, fill_value=0) - a 10 - b 0 - c 0 - d 1 - e - dtype: int64 - """ - - if level is not None: - raise NotImplementedError("level parameter is not supported yet.") - - return self._binaryop(other, "__rpow__", fill_value) - - @_cudf_nvtx_annotate - def floordiv(self, other, axis, level=None, fill_value=None): - """ - Get Integer division of dataframe or series and other, element-wise - (binary operator `floordiv`). - - Equivalent to ``frame // other``, but with support to substitute a - fill_value for missing data in one of the inputs. With reverse - version, `rfloordiv`. - - Parameters - ---------- - - other : scalar, sequence, Series, or DataFrame - Any single or multiple element data structure, or list-like object. - axis : int or string - Only ``0`` is supported for series, ``1`` or ``columns`` supported - for dataframe - fill_value : float or None, default None - Fill existing missing (NaN) values, and any new element needed - for successful DataFrame alignment, with this value before - computation. If data in both corresponding DataFrame locations - is missing the result will be missing. - - Returns - ------- - DataFrame or Series - Result of the arithmetic operation. - - Examples - -------- - - **DataFrame** - - >>> import cudf - >>> df = cudf.DataFrame({'angles': [1, 3, 4], - ... 'degrees': [360, 180, 360]}, - ... index=['circle', 'triangle', 'rectangle']) - >>> df.floordiv(2) - angles degrees - circle 0 180 - triangle 1 90 - rectangle 2 180 - >>> df // 2 - angles degrees - circle 0 180 - triangle 1 90 - rectangle 2 180 - - **Series** - - >>> import cudf - >>> a = cudf.Series([1, 1, 1, None], index=['a', 'b', 'c', 'd']) - >>> a - a 1 - b 1 - c 1 - d - dtype: int64 - >>> b = cudf.Series([1, None, 1, None], index=['a', 'b', 'd', 'e']) - >>> b - a 1 - b - d 1 - e - dtype: int64 - >>> a.floordiv(b) - a 1 - b - c - d - e - dtype: int64 - """ - - if level is not None: - raise NotImplementedError("level parameter is not supported yet.") - - return self._binaryop(other, "__floordiv__", fill_value) - - @_cudf_nvtx_annotate - def rfloordiv(self, other, axis, level=None, fill_value=None): - """ - Get Integer division of dataframe or series and other, element-wise - (binary operator `rfloordiv`). - - Equivalent to ``other // dataframe``, but with support to substitute - a fill_value for missing data in one of the inputs. With reverse - version, `floordiv`. - - Parameters - ---------- - - other : scalar, sequence, Series, or DataFrame - Any single or multiple element data structure, or list-like object. - axis : int or string - Only ``0`` is supported for series, ``1`` or ``columns`` supported - for dataframe - fill_value : float or None, default None - Fill existing missing (NaN) values, and any new element needed - for successful DataFrame alignment, with this value before - computation. If data in both corresponding DataFrame locations - is missing the result will be missing. - - Returns - ------- - DataFrame or Series - Result of the arithmetic operation. - - Examples - -------- - - **DataFrame** - - >>> import cudf - >>> df = cudf.DataFrame({'col1': [10, 11, 23], - ... 'col2': [101, 122, 321]}) - >>> df - col1 col2 - 0 10 101 - 1 11 122 - 2 23 321 - >>> df.rfloordiv(df) - col1 col2 - 0 1 1 - 1 1 1 - 2 1 1 - >>> df.rfloordiv(200) - col1 col2 - 0 20 1 - 1 18 1 - 2 8 0 - >>> df.rfloordiv(100) - col1 col2 - 0 10 0 - 1 9 0 - 2 4 0 - - **Series** - - >>> import cudf - >>> s = cudf.Series([1, 2, 10, 17]) - >>> s - 0 1 - 1 2 - 2 10 - 3 17 - dtype: int64 - >>> s.rfloordiv(100) - 0 100 - 1 50 - 2 10 - 3 5 - dtype: int64 - >>> s = cudf.Series([10, 20, None]) - >>> s - 0 10 - 1 20 - 2 - dtype: int64 - >>> s.rfloordiv(200) - 0 20 - 1 10 - 2 - dtype: int64 - >>> s.rfloordiv(200, fill_value=2) - 0 20 - 1 10 - 2 100 - dtype: int64 - """ - - if level is not None: - raise NotImplementedError("level parameter is not supported yet.") - - return self._binaryop(other, "__rfloordiv__", fill_value) - - @_cudf_nvtx_annotate - def truediv(self, other, axis, level=None, fill_value=None): - """ - Get Floating division of dataframe or series and other, element-wise - (binary operator `truediv`). - - Equivalent to ``frame / other``, but with support to substitute a - fill_value for missing data in one of the inputs. With reverse - version, `rtruediv`. - - Parameters - ---------- - - other : scalar, sequence, Series, or DataFrame - Any single or multiple element data structure, or list-like object. - axis : int or string - Only ``0`` is supported for series, ``1`` or ``columns`` supported - for dataframe - fill_value : float or None, default None - Fill existing missing (NaN) values, and any new element needed - for successful DataFrame alignment, with this value before - computation. If data in both corresponding DataFrame locations - is missing the result will be missing. - - Returns - ------- - DataFrame or Series - Result of the arithmetic operation. - - Examples - -------- - - **DataFrame** - - >>> import cudf - >>> df = cudf.DataFrame({'angles': [0, 3, 4], - ... 'degrees': [360, 180, 360]}, - ... index=['circle', 'triangle', 'rectangle']) - >>> df.truediv(10) - angles degrees - circle 0.0 36.0 - triangle 0.3 18.0 - rectangle 0.4 36.0 - >>> df.div(10) - angles degrees - circle 0.0 36.0 - triangle 0.3 18.0 - rectangle 0.4 36.0 - >>> df / 10 - angles degrees - circle 0.0 36.0 - triangle 0.3 18.0 - rectangle 0.4 36.0 - - **Series** - - >>> import cudf - >>> a = cudf.Series([1, 10, 20, None], index=['a', 'b', 'c', 'd']) - >>> a - a 1 - b 10 - c 20 - d - dtype: int64 - >>> b = cudf.Series([1, None, 2, None], index=['a', 'b', 'd', 'e']) - >>> b - a 1 - b - d 2 - e - dtype: int64 - >>> a.truediv(b, fill_value=0) - a 1.0 - b Inf - c Inf - d 0.0 - e - dtype: float64 - """ - - if level is not None: - raise NotImplementedError("level parameter is not supported yet.") - - return self._binaryop(other, "__truediv__", fill_value) - - # Alias for truediv - div = truediv - divide = truediv - - @_cudf_nvtx_annotate - def rtruediv(self, other, axis, level=None, fill_value=None): - """ - Get Floating division of dataframe or series and other, element-wise - (binary operator `rtruediv`). - - Equivalent to ``other / frame``, but with support to substitute a - fill_value for missing data in one of the inputs. With reverse - version, `truediv`. - - Parameters - ---------- - - other : scalar, sequence, Series, or DataFrame - Any single or multiple element data structure, or list-like object. - axis : int or string - Only ``0`` is supported for series, ``1`` or ``columns`` supported - for dataframe - fill_value : float or None, default None - Fill existing missing (NaN) values, and any new element needed - for successful DataFrame alignment, with this value before - computation. If data in both corresponding DataFrame locations - is missing the result will be missing. - - Returns - ------- - DataFrame or Series - Result of the arithmetic operation. - - Examples - -------- - - **DataFrame** - - >>> import cudf - >>> df = cudf.DataFrame({'angles': [0, 3, 4], - ... 'degrees': [360, 180, 360]}, - ... index=['circle', 'triangle', 'rectangle']) - >>> df - angles degrees - circle 0 360 - triangle 3 180 - rectangle 4 360 - >>> df.rtruediv(10) - angles degrees - circle inf 0.027778 - triangle 3.333333 0.055556 - rectangle 2.500000 0.027778 - >>> df.rdiv(10) - angles degrees - circle inf 0.027778 - triangle 3.333333 0.055556 - rectangle 2.500000 0.027778 - >>> 10 / df - angles degrees - circle inf 0.027778 - triangle 3.333333 0.055556 - rectangle 2.500000 0.027778 - - **Series** - - >>> import cudf - >>> a = cudf.Series([10, 20, None, 30], index=['a', 'b', 'c', 'd']) - >>> a - a 10 - b 20 - c - d 30 - dtype: int64 - >>> b = cudf.Series([1, None, 2, 3], index=['a', 'b', 'd', 'e']) - >>> b - a 1 - b - d 2 - e 3 - dtype: int64 - >>> a.rtruediv(b, fill_value=0) - a 0.1 - b 0.0 - c - d 0.066666667 - e Inf - dtype: float64 - """ - - if level is not None: - raise NotImplementedError("level parameter is not supported yet.") - - return self._binaryop(other, "__rtruediv__", fill_value) - - # Alias for rtruediv - rdiv = rtruediv - - @_cudf_nvtx_annotate - def eq(self, other, axis="columns", level=None, fill_value=None): - """Equal to, element-wise (binary operator eq). - - Parameters - ---------- - other : Series or scalar value - fill_value : None or value - Value to fill nulls with before computation. If data in both - corresponding Series locations is null the result will be null - - Returns - ------- - Frame - The result of the operation. - - Examples - -------- - **DataFrame** - - >>> left = cudf.DataFrame({ - ... 'a': [1, 2, 3], - ... 'b': [4, 5, 6], - ... 'c': [7, 8, 9]} - ... ) - >>> right = cudf.DataFrame({ - ... 'a': [1, 2, 3], - ... 'b': [4, 5, 6], - ... 'd': [10, 12, 12]} - ... ) - >>> left.eq(right) - a b c d - 0 True True - 1 True True - 2 True True - >>> left.eq(right, fill_value=7) - a b c d - 0 True True True False - 1 True True False False - 2 True True False False - - **Series** - - >>> a = cudf.Series([1, 2, 3, None, 10, 20], - ... index=['a', 'c', 'd', 'e', 'f', 'g']) - >>> a - a 1 - c 2 - d 3 - e - f 10 - g 20 - dtype: int64 - >>> b = cudf.Series([-10, 23, -1, None, None], - ... index=['a', 'b', 'c', 'd', 'e']) - >>> b - a -10 - b 23 - c -1 - d - e - dtype: int64 - >>> a.eq(b, fill_value=2) - a False - b False - c False - d False - e - f False - g False - dtype: bool - """ - return self._binaryop( - other=other, op="__eq__", fill_value=fill_value, can_reindex=True - ) - - @_cudf_nvtx_annotate - def ne(self, other, axis="columns", level=None, fill_value=None): - """Not equal to, element-wise (binary operator ne). - - Parameters - ---------- - other : Series or scalar value - fill_value : None or value - Value to fill nulls with before computation. If data in both - corresponding Series locations is null the result will be null - - Returns - ------- - Frame - The result of the operation. - - Examples - -------- - **DataFrame** - - >>> left = cudf.DataFrame({ - ... 'a': [1, 2, 3], - ... 'b': [4, 5, 6], - ... 'c': [7, 8, 9]} - ... ) - >>> right = cudf.DataFrame({ - ... 'a': [1, 2, 3], - ... 'b': [4, 5, 6], - ... 'd': [10, 12, 12]} - ... ) - >>> left.ne(right) - a b c d - 0 False False - 1 False False - 2 False False - >>> left.ne(right, fill_value=7) - a b c d - 0 False False False True - 1 False False True True - 2 False False True True - - **Series** - - >>> a = cudf.Series([1, 2, 3, None, 10, 20], - ... index=['a', 'c', 'd', 'e', 'f', 'g']) - >>> a - a 1 - c 2 - d 3 - e - f 10 - g 20 - dtype: int64 - >>> b = cudf.Series([-10, 23, -1, None, None], - ... index=['a', 'b', 'c', 'd', 'e']) - >>> b - a -10 - b 23 - c -1 - d - e - dtype: int64 - >>> a.ne(b, fill_value=2) - a True - b True - c True - d True - e - f True - g True - dtype: bool - """ # noqa: E501 - return self._binaryop( - other=other, op="__ne__", fill_value=fill_value, can_reindex=True - ) - - @_cudf_nvtx_annotate - def lt(self, other, axis="columns", level=None, fill_value=None): - """Less than, element-wise (binary operator lt). - - Parameters - ---------- - other : Series or scalar value - fill_value : None or value - Value to fill nulls with before computation. If data in both - corresponding Series locations is null the result will be null - - Returns - ------- - Frame - The result of the operation. - - Examples - -------- - **DataFrame** - - >>> left = cudf.DataFrame({ - ... 'a': [1, 2, 3], - ... 'b': [4, 5, 6], - ... 'c': [7, 8, 9]} - ... ) - >>> right = cudf.DataFrame({ - ... 'a': [1, 2, 3], - ... 'b': [4, 5, 6], - ... 'd': [10, 12, 12]} - ... ) - >>> left.lt(right) - a b c d - 0 False False - 1 False False - 2 False False - >>> left.lt(right, fill_value=7) - a b c d - 0 False False False True - 1 False False False True - 2 False False False True - - **Series** - - >>> a = cudf.Series([1, 2, 3, None, 10, 20], - ... index=['a', 'c', 'd', 'e', 'f', 'g']) - >>> a - a 1 - c 2 - d 3 - e - f 10 - g 20 - dtype: int64 - >>> b = cudf.Series([-10, 23, -1, None, None], - ... index=['a', 'b', 'c', 'd', 'e']) - >>> b - a -10 - b 23 - c -1 - d - e - dtype: int64 - >>> a.lt(b, fill_value=-10) - a False - b True - c False - d False - e - f False - g False - dtype: bool - """ # noqa: E501 - return self._binaryop( - other=other, op="__lt__", fill_value=fill_value, can_reindex=True - ) - - @_cudf_nvtx_annotate - def le(self, other, axis="columns", level=None, fill_value=None): - """Less than or equal, element-wise (binary operator le). - - Parameters - ---------- - other : Series or scalar value - fill_value : None or value - Value to fill nulls with before computation. If data in both - corresponding Series locations is null the result will be null - - Returns - ------- - Frame - The result of the operation. - - Examples - -------- - **DataFrame** - - >>> left = cudf.DataFrame({ - ... 'a': [1, 2, 3], - ... 'b': [4, 5, 6], - ... 'c': [7, 8, 9]} - ... ) - >>> right = cudf.DataFrame({ - ... 'a': [1, 2, 3], - ... 'b': [4, 5, 6], - ... 'd': [10, 12, 12]} - ... ) - >>> left.le(right) - a b c d - 0 True True - 1 True True - 2 True True - >>> left.le(right, fill_value=7) - a b c d - 0 True True True True - 1 True True False True - 2 True True False True - - **Series** - - >>> a = cudf.Series([1, 2, 3, None, 10, 20], - ... index=['a', 'c', 'd', 'e', 'f', 'g']) - >>> a - a 1 - c 2 - d 3 - e - f 10 - g 20 - dtype: int64 - >>> b = cudf.Series([-10, 23, -1, None, None], - ... index=['a', 'b', 'c', 'd', 'e']) - >>> b - a -10 - b 23 - c -1 - d - e - dtype: int64 - >>> a.le(b, fill_value=-10) - a False - b True - c False - d False - e - f False - g False - dtype: bool - """ # noqa: E501 - return self._binaryop( - other=other, op="__le__", fill_value=fill_value, can_reindex=True - ) - - @_cudf_nvtx_annotate - def gt(self, other, axis="columns", level=None, fill_value=None): - """Greater than, element-wise (binary operator gt). - - Parameters - ---------- - other : Series or scalar value - fill_value : None or value - Value to fill nulls with before computation. If data in both - corresponding Series locations is null the result will be null - - Returns - ------- - Frame - The result of the operation. - - Examples - -------- - **DataFrame** - - >>> left = cudf.DataFrame({ - ... 'a': [1, 2, 3], - ... 'b': [4, 5, 6], - ... 'c': [7, 8, 9]} - ... ) - >>> right = cudf.DataFrame({ - ... 'a': [1, 2, 3], - ... 'b': [4, 5, 6], - ... 'd': [10, 12, 12]} - ... ) - >>> left.gt(right) - a b c d - 0 False False - 1 False False - 2 False False - >>> left.gt(right, fill_value=7) - a b c d - 0 False False False False - 1 False False True False - 2 False False True False - - **Series** - - >>> a = cudf.Series([1, 2, 3, None, 10, 20], - ... index=['a', 'c', 'd', 'e', 'f', 'g']) - >>> a - a 1 - c 2 - d 3 - e - f 10 - g 20 - dtype: int64 - >>> b = cudf.Series([-10, 23, -1, None, None], - ... index=['a', 'b', 'c', 'd', 'e']) - >>> b - a -10 - b 23 - c -1 - d - e - dtype: int64 - >>> a.gt(b) - a True - b False - c True - d False - e False - f False - g False - dtype: bool - """ # noqa: E501 - return self._binaryop( - other=other, op="__gt__", fill_value=fill_value, can_reindex=True - ) - - @_cudf_nvtx_annotate - def ge(self, other, axis="columns", level=None, fill_value=None): - """Greater than or equal, element-wise (binary operator ge). - - Parameters - ---------- - other : Series or scalar value - fill_value : None or value - Value to fill nulls with before computation. If data in both - corresponding Series locations is null the result will be null - - Returns - ------- - Frame - The result of the operation. - - Examples - -------- - **DataFrame** - - >>> left = cudf.DataFrame({ - ... 'a': [1, 2, 3], - ... 'b': [4, 5, 6], - ... 'c': [7, 8, 9]} - ... ) - >>> right = cudf.DataFrame({ - ... 'a': [1, 2, 3], - ... 'b': [4, 5, 6], - ... 'd': [10, 12, 12]} - ... ) - >>> left.ge(right) - a b c d - 0 True True - 1 True True - 2 True True - >>> left.ge(right, fill_value=7) - a b c d - 0 True True True False - 1 True True True False - 2 True True True False - - **Series** - - >>> a = cudf.Series([1, 2, 3, None, 10, 20], - ... index=['a', 'c', 'd', 'e', 'f', 'g']) - >>> a - a 1 - c 2 - d 3 - e - f 10 - g 20 - dtype: int64 - >>> b = cudf.Series([-10, 23, -1, None, None], - ... index=['a', 'b', 'c', 'd', 'e']) - >>> b - a -10 - b 23 - c -1 - d - e - dtype: int64 - >>> a.ge(b) - a True - b False - c True - d False - e False - f False - g False - dtype: bool - """ # noqa: E501 - return self._binaryop( - other=other, op="__ge__", fill_value=fill_value, can_reindex=True - ) - def nunique(self, dropna: bool = True): """ Returns a per column mapping with counts of unique values for diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index 3d025738974..10736948b57 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -5,6 +5,7 @@ import numbers import operator +import textwrap import warnings from collections import Counter, abc from functools import cached_property @@ -44,6 +45,7 @@ from cudf.core.index import Index, RangeIndex, _index_from_columns from cudf.core.multiindex import MultiIndex from cudf.core.udf.utils import _compile_or_get, _supported_cols_from_frame +from cudf.utils import docutils from cudf.utils.utils import _cudf_nvtx_annotate doc_reset_index_template = """ @@ -72,6 +74,55 @@ """ +doc_binop_template = textwrap.dedent( + """ + Get {operation} of DataFrame or Series and other, element-wise (binary + operator `{op_name}`). + + Equivalent to ``frame + other``, but with support to substitute a + ``fill_value`` for missing data in one of the inputs. + + Parameters + ---------- + other : scalar, sequence, Series, or DataFrame + Any single or multiple element data structure, or list-like object. + axis : int or string + Only ``0`` is supported for series, ``1`` or ``columns`` supported + for dataframe + level : int or name + Broadcast across a level, matching Index values on the + passed MultiIndex level. Not yet supported. + fill_value : float or None, default None + Fill existing missing (NaN) values, and any new element needed + for successful DataFrame alignment, with this value before + computation. If data in both corresponding DataFrame locations + is missing the result will be missing. + + Returns + ------- + DataFrame or Series + Result of the arithmetic operation. + + Examples + -------- + + **DataFrame** + + >>> df = cudf.DataFrame( + ... {{'angles': [0, 3, 4], 'degrees': [360, 180, 360]}}, + ... index=['circle', 'triangle', 'rectangle'] + ... ) + {df_op_example} + + **Series** + + >>> a = cudf.Series([1, 1, 1, None], index=['a', 'b', 'c', 'd']) + >>> b = cudf.Series([1, None, 1, None], index=['a', 'b', 'd', 'e']) + {ser_op_example} + """ +) + + def _get_host_unique(array): if isinstance(array, (cudf.Series, cudf.Index, ColumnBase)): return array.unique.to_pandas() @@ -2653,6 +2704,845 @@ def _explode(self, explode_column: Any, ignore_index: bool): res.index.names = self._index.names return res + @_cudf_nvtx_annotate + @docutils.doc_apply( + doc_binop_template.format( + operation="Addition", + op_name="add", + equivalent_op="frame + other", + df_op_example=textwrap.dedent( + """ + >>> df.add(1) + angles degrees + circle 1 361 + triangle 4 181 + rectangle 5 361 + """, + ), + ser_op_example=textwrap.dedent( + """ + >>> a.add(b) + a 2 + b + c + d + e + dtype: int64 + >>> a.add(b, fill_value=0) + a 2 + b 1 + c 1 + d 1 + e + dtype: int64 + """ + ), + ) + ) + def add(self, other, axis, level=None, fill_value=None): # noqa: D102 + if level is not None: + raise NotImplementedError("level parameter is not supported yet.") + + return self._binaryop(other, "__add__", fill_value) + + @_cudf_nvtx_annotate + @docutils.doc_apply( + doc_binop_template.format( + operation="Addition", + op_name="radd", + equivalent_op="other + frame", + df_op_example=textwrap.dedent( + """ + >>> df.radd(1) + angles degrees + circle 1 361 + triangle 4 181 + rectangle 5 361 + """ + ), + ser_op_example=textwrap.dedent( + """ + >>> a.radd(b) + a 2 + b + c + d + e + dtype: int64 + >>> a.radd(b, fill_value=0) + a 2 + b 1 + c 1 + d 1 + e + dtype: int64 + """ + ), + ) + ) + def radd(self, other, axis, level=None, fill_value=None): # noqa: D102 + if level is not None: + raise NotImplementedError("level parameter is not supported yet.") + + return self._binaryop(other, "__radd__", fill_value) + + @_cudf_nvtx_annotate + @docutils.doc_apply( + doc_binop_template.format( + operation="Subtraction", + op_name="sub", + equivalent_op="frame - other", + df_op_example=textwrap.dedent( + """ + >>> df.sub(1) + angles degrees + circle -1 359 + triangle 2 179 + rectangle 3 359 + """ + ), + ser_op_example=textwrap.dedent( + """ + >>> a.sub(b) + a 0 + b + c + d + e + dtype: int64 + >>> a.sub(b, fill_value=0) + a 2 + b 1 + c 1 + d -1 + e + dtype: int64 + """ + ), + ) + ) + def subtract(self, other, axis, level=None, fill_value=None): # noqa: D102 + if level is not None: + raise NotImplementedError("level parameter is not supported yet.") + + return self._binaryop(other, "__sub__", fill_value) + + sub = subtract + + @_cudf_nvtx_annotate + @docutils.doc_apply( + doc_binop_template.format( + operation="Subtraction", + op_name="rsub", + equivalent_op="other - frame", + df_op_example=textwrap.dedent( + """ + >>> df.rsub(1) + angles degrees + circle 1 -359 + triangle -2 -179 + rectangle -3 -359 + """ + ), + ser_op_example=textwrap.dedent( + """ + >>> a.rsub(b) + a 0 + b + c + d + e + dtype: int64 + >>> a.rsub(b, fill_value=0) + a 0 + b -1 + c -1 + d 1 + e + dtype: int64 + """ + ), + ) + ) + def rsub(self, other, axis, level=None, fill_value=None): # noqa: D102 + if level is not None: + raise NotImplementedError("level parameter is not supported yet.") + + return self._binaryop(other, "__rsub__", fill_value) + + @_cudf_nvtx_annotate + @docutils.doc_apply( + doc_binop_template.format( + operation="Multiplication", + op_name="mul", + equivalent_op="frame * other", + df_op_example=textwrap.dedent( + """ + >>> df.multiply(1) + angles degrees + circle 0 360 + triangle 3 180 + rectangle 4 360 + """ + ), + ser_op_example=textwrap.dedent( + """ + >>> a.multiply(b) + a 1 + b + c + d + e + dtype: int64 + >>> a.multiply(b, fill_value=0) + a 1 + b 0 + c 0 + d 0 + e + dtype: int64 + """ + ), + ) + ) + def multiply(self, other, axis, level=None, fill_value=None): # noqa: D102 + if level is not None: + raise NotImplementedError("level parameter is not supported yet.") + + return self._binaryop(other, "__mul__", fill_value) + + mul = multiply + + @_cudf_nvtx_annotate + @docutils.doc_apply( + doc_binop_template.format( + operation="Multiplication", + op_name="rmul", + equivalent_op="other * frame", + df_op_example=textwrap.dedent( + """ + >>> df.rmul(1) + angles degrees + circle 0 360 + triangle 3 180 + rectangle 4 360 + """ + ), + ser_op_example=textwrap.dedent( + """ + >>> a.rmul(b) + a 1 + b + c + d + e + dtype: int64 + >>> a.rmul(b, fill_value=0) + a 1 + b 0 + c 0 + d 0 + e + dtype: int64 + """ + ), + ) + ) + def rmul(self, other, axis, level=None, fill_value=None): # noqa: D102 + if level is not None: + raise NotImplementedError("level parameter is not supported yet.") + + return self._binaryop(other, "__rmul__", fill_value) + + @_cudf_nvtx_annotate + @docutils.doc_apply( + doc_binop_template.format( + operation="Modulo", + op_name="mod", + equivalent_op="frame % other", + df_op_example=textwrap.dedent( + """ + >>> df.mod(1) + angles degrees + circle 0 0 + triangle 0 0 + rectangle 0 0 + """ + ), + ser_op_example=textwrap.dedent( + """ + >>> a.mod(b) + a 0 + b + c + d + e + dtype: int64 + >>> a.mod(b, fill_value=0) + a 0 + b 4294967295 + c 4294967295 + d 0 + e + dtype: int64 + """ + ), + ) + ) + def mod(self, other, axis, level=None, fill_value=None): # noqa: D102 + if level is not None: + raise NotImplementedError("level parameter is not supported yet.") + + return self._binaryop(other, "__mod__", fill_value) + + @_cudf_nvtx_annotate + @docutils.doc_apply( + doc_binop_template.format( + operation="Modulo", + op_name="rmod", + equivalent_op="other % frame", + df_op_example=textwrap.dedent( + """ + >>> df.rmod(1) + angles degrees + circle 4294967295 1 + triangle 1 1 + rectangle 1 1 + """ + ), + ser_op_example=textwrap.dedent( + """ + >>> a.rmod(b) + a 0 + b + c + d + e + dtype: int64 + >>> a.rmod(b, fill_value=0) + a 0 + b 0 + c 0 + d 4294967295 + e + dtype: int64 + """ + ), + ) + ) + def rmod(self, other, axis, level=None, fill_value=None): # noqa: D102 + if level is not None: + raise NotImplementedError("level parameter is not supported yet.") + + return self._binaryop(other, "__rmod__", fill_value) + + @_cudf_nvtx_annotate + @docutils.doc_apply( + doc_binop_template.format( + operation="Exponential", + op_name="pow", + equivalent_op="frame ** other", + df_op_example=textwrap.dedent( + """ + >>> df.pow(1) + angles degrees + circle 0 360 + triangle 2 180 + rectangle 4 360 + """ + ), + ser_op_example=textwrap.dedent( + """ + >>> a.pow(b) + a 1 + b + c + d + e + dtype: int64 + >>> a.pow(b, fill_value=0) + a 1 + b 1 + c 1 + d 0 + e + dtype: int64 + """ + ), + ) + ) + def pow(self, other, axis, level=None, fill_value=None): # noqa: D102 + if level is not None: + raise NotImplementedError("level parameter is not supported yet.") + + return self._binaryop(other, "__pow__", fill_value) + + @_cudf_nvtx_annotate + @docutils.doc_apply( + doc_binop_template.format( + operation="Exponential", + op_name="rpow", + equivalent_op="other ** frame", + df_op_example=textwrap.dedent( + """ + >>> df.rpow(1) + angles degrees + circle 1 1 + triangle 1 1 + rectangle 1 1 + """ + ), + ser_op_example=textwrap.dedent( + """ + >>> a.rpow(b) + a 1 + b + c + d + e + dtype: int64 + >>> a.rpow(b, fill_value=0) + a 1 + b 0 + c 0 + d 1 + e + dtype: int64 + """ + ), + ) + ) + def rpow(self, other, axis, level=None, fill_value=None): # noqa: D102 + if level is not None: + raise NotImplementedError("level parameter is not supported yet.") + + return self._binaryop(other, "__rpow__", fill_value) + + @_cudf_nvtx_annotate + @docutils.doc_apply( + doc_binop_template.format( + operation="Integer division", + op_name="floordiv", + equivalent_op="frame // other", + df_op_example=textwrap.dedent( + """ + >>> df.floordiv(1) + angles degrees + circle 0 360 + triangle 3 180 + rectangle 4 360 + """ + ), + ser_op_example=textwrap.dedent( + """ + >>> a.floordiv(b) + a 1 + b + c + d + e + dtype: int64 + >>> a.floordiv(b, fill_value=0) + a 1 + b 9223372036854775807 + c 9223372036854775807 + d 0 + e + dtype: int64 + """ + ), + ) + ) + def floordiv(self, other, axis, level=None, fill_value=None): # noqa: D102 + if level is not None: + raise NotImplementedError("level parameter is not supported yet.") + + return self._binaryop(other, "__floordiv__", fill_value) + + @_cudf_nvtx_annotate + @docutils.doc_apply( + doc_binop_template.format( + operation="Integer division", + op_name="rfloordiv", + equivalent_op="other // frame", + df_op_example=textwrap.dedent( + """ + >>> df.rfloordiv(1) + angles degrees + circle 9223372036854775807 0 + triangle 0 0 + rectangle 0 0 + """ + ), + ser_op_example=textwrap.dedent( + """ + >>> a.rfloordiv(b) + a 1 + b + c + d + e + dtype: int64 + >>> a.rfloordiv(b, fill_value=0) + a 1 + b 0 + c 0 + d 9223372036854775807 + e + dtype: int64 + """ + ), + ) + ) + def rfloordiv( + self, other, axis, level=None, fill_value=None + ): # noqa: D102 + if level is not None: + raise NotImplementedError("level parameter is not supported yet.") + + return self._binaryop(other, "__rfloordiv__", fill_value) + + @_cudf_nvtx_annotate + @docutils.doc_apply( + doc_binop_template.format( + operation="Floating division", + op_name="truediv", + equivalent_op="frame / other", + df_op_example=textwrap.dedent( + """ + >>> df.truediv(1) + angles degrees + circle 0.0 360.0 + triangle 3.0 180.0 + rectangle 4.0 360.0 + """ + ), + ser_op_example=textwrap.dedent( + """ + >>> a.truediv(b) + a 1.0 + b + c + d + e + dtype: float64 + >>> a.truediv(b, fill_value=0) + a 1.0 + b Inf + c Inf + d 0.0 + e + dtype: float64 + """ + ), + ) + ) + def truediv(self, other, axis, level=None, fill_value=None): # noqa: D102 + if level is not None: + raise NotImplementedError("level parameter is not supported yet.") + + return self._binaryop(other, "__truediv__", fill_value) + + # Alias for truediv + div = truediv + divide = truediv + + @_cudf_nvtx_annotate + @docutils.doc_apply( + doc_binop_template.format( + operation="Floating division", + op_name="rtruediv", + equivalent_op="other / frame", + df_op_example=textwrap.dedent( + """ + >>> df.rtruediv(1) + angles degrees + circle inf 0.002778 + triangle 0.333333 0.005556 + rectangle 0.250000 0.002778 + """ + ), + ser_op_example=textwrap.dedent( + """ + >>> a.rtruediv(b) + a 1.0 + b + c + d + e + dtype: float64 + >>> a.rtruediv(b, fill_value=0) + a 1.0 + b 0.0 + c 0.0 + d Inf + e + dtype: float64 + """ + ), + ) + ) + def rtruediv(self, other, axis, level=None, fill_value=None): # noqa: D102 + if level is not None: + raise NotImplementedError("level parameter is not supported yet.") + + return self._binaryop(other, "__rtruediv__", fill_value) + + # Alias for rtruediv + rdiv = rtruediv + + @_cudf_nvtx_annotate + @docutils.doc_apply( + doc_binop_template.format( + operation="Equal to", + op_name="eq", + equivalent_op="frame == other", + df_op_example=textwrap.dedent( + """ + >>> df.eq(1) + angles degrees + circle False False + triangle False False + rectangle False False + """ + ), + ser_op_example=textwrap.dedent( + """ + >>> a.eq(b) + a True + b + c + d + e + dtype: bool + >>> a.eq(b, fill_value=0) + a True + b False + c False + d False + e + dtype: bool + """ + ), + ) + ) + def eq( + self, other, axis="columns", level=None, fill_value=None + ): # noqa: D102 + return self._binaryop( + other=other, op="__eq__", fill_value=fill_value, can_reindex=True + ) + + @_cudf_nvtx_annotate + @docutils.doc_apply( + doc_binop_template.format( + operation="Not equal to", + op_name="ne", + equivalent_op="frame != other", + df_op_example=textwrap.dedent( + """ + >>> df.ne(1) + angles degrees + circle True True + triangle True True + rectangle True True + """ + ), + ser_op_example=textwrap.dedent( + """ + >>> a.ne(b) + a False + b + c + d + e + dtype: bool + >>> a.ne(b, fill_value=0) + a False + b True + c True + d True + e + dtype: bool + """ + ), + ) + ) + def ne( + self, other, axis="columns", level=None, fill_value=None + ): # noqa: D102 + return self._binaryop( + other=other, op="__ne__", fill_value=fill_value, can_reindex=True + ) + + @_cudf_nvtx_annotate + @docutils.doc_apply( + doc_binop_template.format( + operation="Less than", + op_name="lt", + equivalent_op="frame < other", + df_op_example=textwrap.dedent( + """ + >>> df.lt(1) + angles degrees + circle True False + triangle False False + rectangle False False + """ + ), + ser_op_example=textwrap.dedent( + """ + >>> a.lt(b) + a False + b + c + d + e + dtype: bool + >>> a.lt(b, fill_value=0) + a False + b False + c False + d True + e + dtype: bool + """ + ), + ) + ) + def lt( + self, other, axis="columns", level=None, fill_value=None + ): # noqa: D102 + return self._binaryop( + other=other, op="__lt__", fill_value=fill_value, can_reindex=True + ) + + @_cudf_nvtx_annotate + @docutils.doc_apply( + doc_binop_template.format( + operation="Less than or equal to", + op_name="le", + equivalent_op="frame <= other", + df_op_example=textwrap.dedent( + """ + >>> df.le(1) + angles degrees + circle True False + triangle False False + rectangle False False + """ + ), + ser_op_example=textwrap.dedent( + """ + >>> a.le(b) + a True + b + c + d + e + dtype: bool + >>> a.le(b, fill_value=0) + a True + b False + c False + d True + e + dtype: bool + """ + ), + ) + ) + def le( + self, other, axis="columns", level=None, fill_value=None + ): # noqa: D102 + return self._binaryop( + other=other, op="__le__", fill_value=fill_value, can_reindex=True + ) + + @_cudf_nvtx_annotate + @docutils.doc_apply( + doc_binop_template.format( + operation="Greater than", + op_name="gt", + equivalent_op="frame > other", + df_op_example=textwrap.dedent( + """ + >>> df.gt(1) + angles degrees + circle False True + triangle True True + rectangle True True + """ + ), + ser_op_example=textwrap.dedent( + """ + >>> a.gt(b) + a False + b + c + d + e + dtype: bool + >>> a.gt(b, fill_value=0) + a False + b True + c True + d False + e + dtype: bool + """ + ), + ) + ) + def gt( + self, other, axis="columns", level=None, fill_value=None + ): # noqa: D102 + return self._binaryop( + other=other, op="__gt__", fill_value=fill_value, can_reindex=True + ) + + @_cudf_nvtx_annotate + @docutils.doc_apply( + doc_binop_template.format( + operation="Greater than or equal to", + op_name="ge", + equivalent_op="frame >= other", + df_op_example=textwrap.dedent( + """ + >>> df.ge(1) + angles degrees + circle False True + triangle True True + rectangle True True + """ + ), + ser_op_example=textwrap.dedent( + """ + >>> a.ge(b) + a True + b + c + d + e + dtype: bool + >>> a.ge(b, fill_value=0) + a True + b True + c True + d False + e + dtype: bool + """ + ), + ) + ) + def ge( + self, other, axis="columns", level=None, fill_value=None + ): # noqa: D102 + return self._binaryop( + other=other, op="__ge__", fill_value=fill_value, can_reindex=True + ) + def _check_duplicate_level_names(specified, level_names): """Raise if any of `specified` has duplicates in `level_names`.""" diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index d14942cd3ce..965810a19e6 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -53,7 +53,6 @@ from cudf.core.column.string import StringMethods from cudf.core.column.struct import StructMethods from cudf.core.column_accessor import ColumnAccessor -from cudf.core.frame import Frame from cudf.core.groupby.groupby import SeriesGroupBy from cudf.core.index import BaseIndex, RangeIndex, as_index from cudf.core.indexed_frame import ( @@ -3284,7 +3283,7 @@ def make_binop_func(op): # appropriate API for Series as required for pandas compatibility. The # main effect is reordering and error-checking parameters in # Series-specific ways. - wrapped_func = getattr(Frame, op) + wrapped_func = getattr(IndexedFrame, op) @functools.wraps(wrapped_func) def wrapper(self, other, level=None, fill_value=None, axis=0): diff --git a/python/cudf/cudf/core/udf/row_function.py b/python/cudf/cudf/core/udf/row_function.py index 5cda9fb8218..1d0bd5ac99d 100644 --- a/python/cudf/cudf/core/udf/row_function.py +++ b/python/cudf/cudf/core/udf/row_function.py @@ -1,3 +1,4 @@ +# Copyright (c) 2021-2022, NVIDIA CORPORATION. import math import numpy as np diff --git a/python/cudf/cudf/core/udf/scalar_function.py b/python/cudf/cudf/core/udf/scalar_function.py index 7f3b461a1f0..a7b887dd2d5 100644 --- a/python/cudf/cudf/core/udf/scalar_function.py +++ b/python/cudf/cudf/core/udf/scalar_function.py @@ -1,3 +1,5 @@ +# Copyright (c) 2020-2022, NVIDIA CORPORATION. + from numba import cuda from numba.np import numpy_support diff --git a/python/cudf/cudf/core/udf/templates.py b/python/cudf/cudf/core/udf/templates.py index 8cb11133323..3ac7083582f 100644 --- a/python/cudf/cudf/core/udf/templates.py +++ b/python/cudf/cudf/core/udf/templates.py @@ -1,3 +1,5 @@ +# Copyright (c) 2020-2022, NVIDIA CORPORATION. + unmasked_input_initializer_template = """\ d_{idx} = input_col_{idx} masked_{idx} = Masked(d_{idx}[i], True) diff --git a/python/cudf/cudf/core/udf/typing.py b/python/cudf/cudf/core/udf/typing.py index 2be1691a1a6..cdb9492c695 100644 --- a/python/cudf/cudf/core/udf/typing.py +++ b/python/cudf/cudf/core/udf/typing.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021-2022, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. import operator diff --git a/python/cudf/cudf/datasets.py b/python/cudf/cudf/datasets.py index d7a2fedef59..c6091ab60fc 100644 --- a/python/cudf/cudf/datasets.py +++ b/python/cudf/cudf/datasets.py @@ -1,3 +1,5 @@ +# Copyright (c) 2020-2022, NVIDIA CORPORATION. + import numpy as np import pandas as pd diff --git a/python/cudf/cudf/errors.py b/python/cudf/cudf/errors.py index 5d6f52c0307..bd264940081 100644 --- a/python/cudf/cudf/errors.py +++ b/python/cudf/cudf/errors.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. class UnsupportedCUDAError(Exception): diff --git a/python/cudf/cudf/tests/test_copying.py b/python/cudf/cudf/tests/test_copying.py index 0d0ba579f22..1d3d9e91ae2 100644 --- a/python/cudf/cudf/tests/test_copying.py +++ b/python/cudf/cudf/tests/test_copying.py @@ -1,3 +1,5 @@ +# Copyright (c) 2020-2022, NVIDIA CORPORATION. + import numpy as np import pandas as pd import pytest diff --git a/python/cudf/cudf/tests/test_factorize.py b/python/cudf/cudf/tests/test_factorize.py index 3081b7c4a6e..90cf11d7dde 100644 --- a/python/cudf/cudf/tests/test_factorize.py +++ b/python/cudf/cudf/tests/test_factorize.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018, NVIDIA CORPORATION. +# Copyright (c) 2018-2022, NVIDIA CORPORATION. import cupy as cp import numpy as np diff --git a/python/cudf/cudf/tests/test_gcs.py b/python/cudf/cudf/tests/test_gcs.py index 307232b1305..f15d705c4e2 100644 --- a/python/cudf/cudf/tests/test_gcs.py +++ b/python/cudf/cudf/tests/test_gcs.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. import io import os diff --git a/python/cudf/cudf/tests/test_hdfs.py b/python/cudf/cudf/tests/test_hdfs.py index 2d61d6693cb..de4303a34a8 100644 --- a/python/cudf/cudf/tests/test_hdfs.py +++ b/python/cudf/cudf/tests/test_hdfs.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. import os from io import BytesIO diff --git a/python/cudf/cudf/tests/test_list.py b/python/cudf/cudf/tests/test_list.py index 6a665a2b43c..dc624ebe2b5 100644 --- a/python/cudf/cudf/tests/test_list.py +++ b/python/cudf/cudf/tests/test_list.py @@ -292,10 +292,32 @@ def test_get_nested_lists(): assert_eq(expect, got) -def test_get_nulls(): - with pytest.raises(IndexError, match="list index out of range"): - sr = cudf.Series([[], [], []]) - sr.list.get(100) +def test_get_default(): + sr = cudf.Series([[1, 2], [3, 4, 5], [6, 7, 8, 9]]) + + assert_eq(cudf.Series([cudf.NA, 5, 8]), sr.list.get(2)) + assert_eq(cudf.Series([cudf.NA, 5, 8]), sr.list.get(2, default=cudf.NA)) + assert_eq(cudf.Series([0, 5, 8]), sr.list.get(2, default=0)) + assert_eq(cudf.Series([0, 3, 7]), sr.list.get(-3, default=0)) + assert_eq(cudf.Series([2, 5, 9]), sr.list.get(-1)) + + string_sr = cudf.Series( + [["apple", "banana"], ["carrot", "daffodil", "elephant"]] + ) + assert_eq( + cudf.Series(["default", "elephant"]), + string_sr.list.get(2, default="default"), + ) + + sr_with_null = cudf.Series([[0, cudf.NA], [1]]) + assert_eq(cudf.Series([cudf.NA, 0]), sr_with_null.list.get(1, default=0)) + + sr_nested = cudf.Series([[[1, 2], [3, 4], [5, 6]], [[5, 6], [7, 8]]]) + assert_eq(cudf.Series([[3, 4], [7, 8]]), sr_nested.list.get(1)) + assert_eq(cudf.Series([[5, 6], cudf.NA]), sr_nested.list.get(2)) + assert_eq( + cudf.Series([[5, 6], [0, 0]]), sr_nested.list.get(2, default=[0, 0]) + ) @pytest.mark.parametrize( diff --git a/python/cudf/cudf/tests/test_pack.py b/python/cudf/cudf/tests/test_pack.py index 942cba2842a..67435c71e6b 100644 --- a/python/cudf/cudf/tests/test_pack.py +++ b/python/cudf/cudf/tests/test_pack.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021, NVIDIA CORPORATION. +# Copyright (c) 2021-2022, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/python/cudf/cudf/tests/test_sorting.py b/python/cudf/cudf/tests/test_sorting.py index 10c3689fcd7..a182a5e7d24 100644 --- a/python/cudf/cudf/tests/test_sorting.py +++ b/python/cudf/cudf/tests/test_sorting.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018-2021, NVIDIA CORPORATION. +# Copyright (c) 2018-2022, NVIDIA CORPORATION. import string from itertools import product diff --git a/python/cudf/cudf/tests/test_string.py b/python/cudf/cudf/tests/test_string.py index 493098cd494..d600fdeee27 100644 --- a/python/cudf/cudf/tests/test_string.py +++ b/python/cudf/cudf/tests/test_string.py @@ -1299,6 +1299,12 @@ def test_string_slice_replace(string, number, diff, repr): ) +def test_string_slice_replace_fail(): + gs = cudf.Series(["abc", "xyz", ""]) + with pytest.raises(TypeError): + gs.str.slice_replace(0, 1, ["_"]) + + def test_string_insert(): gs = cudf.Series(["hello world", "holy accéntéd", "batman", None, ""]) @@ -1312,6 +1318,9 @@ def test_string_insert(): ps.str.slice(stop=5) + "---" + ps.str.slice(start=5), ) + with pytest.raises(TypeError): + gs.str.insert(0, ["+"]) + _string_char_types_data = [ ["abc", "xyz", "a", "ab", "123", "097"], @@ -1404,6 +1413,9 @@ def test_string_filter_alphanum(): expected.append(rs) assert_eq(gs.str.filter_alphanum("*", keep=False), cudf.Series(expected)) + with pytest.raises(TypeError): + gs.str.filter_alphanum(["a"]) + @pytest.mark.parametrize( "case_op", ["title", "capitalize", "lower", "upper", "swapcase"] @@ -1504,6 +1516,14 @@ def test_strings_partition(data): assert_eq(pi.str.partition("-"), gi.str.partition("-")) +def test_string_partition_fail(): + gs = cudf.Series(["abc", "aa", "cba"]) + with pytest.raises(TypeError): + gs.str.partition(["a"]) + with pytest.raises(TypeError): + gs.str.rpartition(["a"]) + + @pytest.mark.parametrize( "data", [ @@ -1640,6 +1660,16 @@ def test_strings_strip_tests(data, to_strip): ) +def test_string_strip_fail(): + gs = cudf.Series(["a", "aa", ""]) + with pytest.raises(TypeError): + gs.str.strip(["a"]) + with pytest.raises(TypeError): + gs.str.lstrip(["a"]) + with pytest.raises(TypeError): + gs.str.rstrip(["a"]) + + @pytest.mark.parametrize( "data", [ @@ -2364,6 +2394,9 @@ def test_string_str_filter_characters(): ) assert_eq(expected, gs.str.filter_characters(filter, True, " ")) + with pytest.raises(TypeError): + gs.str.filter_characters(filter, True, ["a"]) + def test_string_str_code_points(): diff --git a/python/cudf/cudf/tests/test_struct.py b/python/cudf/cudf/tests/test_struct.py index 167f171fa26..8d1056ca9cc 100644 --- a/python/cudf/cudf/tests/test_struct.py +++ b/python/cudf/cudf/tests/test_struct.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. import numpy as np import pandas as pd diff --git a/python/cudf/cudf/utils/cudautils.py b/python/cudf/cudf/utils/cudautils.py index 742c747ab69..96b124c27ec 100755 --- a/python/cudf/cudf/utils/cudautils.py +++ b/python/cudf/cudf/utils/cudautils.py @@ -1,4 +1,5 @@ -# Copyright (c) 2018-2021, NVIDIA CORPORATION. +# Copyright (c) 2018-2022, NVIDIA CORPORATION. + from pickle import dumps import cachetools diff --git a/python/cudf/cudf/utils/gpu_utils.py b/python/cudf/cudf/utils/gpu_utils.py index bd3da4ea2ba..a722d350ef4 100644 --- a/python/cudf/cudf/utils/gpu_utils.py +++ b/python/cudf/cudf/utils/gpu_utils.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. def validate_setup(): diff --git a/python/cudf/cudf/utils/queryutils.py b/python/cudf/cudf/utils/queryutils.py index 64218ddf46a..cdaaff6b2af 100644 --- a/python/cudf/cudf/utils/queryutils.py +++ b/python/cudf/cudf/utils/queryutils.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018, NVIDIA CORPORATION. +# Copyright (c) 2018-2022, NVIDIA CORPORATION. import ast import datetime as dt diff --git a/python/cudf_kafka/cudf_kafka/_version.py b/python/cudf_kafka/cudf_kafka/_version.py index 6cd10cc10bf..3c1d113fd47 100644 --- a/python/cudf_kafka/cudf_kafka/_version.py +++ b/python/cudf_kafka/cudf_kafka/_version.py @@ -1,3 +1,4 @@ +# Copyright (c) 2020-2022, NVIDIA CORPORATION. # This file helps to compute a version number in source trees obtained from # git-archive tarball (such as those provided by githubs download-from-tag # feature). Distribution tarballs (built by setup.py sdist) and build diff --git a/python/cudf_kafka/versioneer.py b/python/cudf_kafka/versioneer.py index a3b0246e785..dbddb6e0fd0 100644 --- a/python/cudf_kafka/versioneer.py +++ b/python/cudf_kafka/versioneer.py @@ -1,3 +1,4 @@ +# Copyright (c) 2020-2022, NVIDIA CORPORATION. # Version: 0.18 """The Versioneer - like a rocketeer, but for versions. diff --git a/python/custreamz/custreamz/_version.py b/python/custreamz/custreamz/_version.py index 106fc3524f9..a017486df32 100644 --- a/python/custreamz/custreamz/_version.py +++ b/python/custreamz/custreamz/_version.py @@ -1,3 +1,4 @@ +# Copyright (c) 2020-2022, NVIDIA CORPORATION. # This file helps to compute a version number in source trees obtained from # git-archive tarball (such as those provided by githubs download-from-tag # feature). Distribution tarballs (built by setup.py sdist) and build diff --git a/python/custreamz/custreamz/tests/test_dataframes.py b/python/custreamz/custreamz/tests/test_dataframes.py index a7378408c24..51f55684500 100644 --- a/python/custreamz/custreamz/tests/test_dataframes.py +++ b/python/custreamz/custreamz/tests/test_dataframes.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. """ Tests for Streamz Dataframes (SDFs) built on top of cuDF DataFrames. diff --git a/python/dask_cudf/dask_cudf/_version.py b/python/dask_cudf/dask_cudf/_version.py index 104879fce36..f0dbcac0017 100644 --- a/python/dask_cudf/dask_cudf/_version.py +++ b/python/dask_cudf/dask_cudf/_version.py @@ -1,3 +1,4 @@ +# Copyright (c) 2020-2022, NVIDIA CORPORATION. # This file helps to compute a version number in source trees obtained from # git-archive tarball (such as those provided by githubs download-from-tag # feature). Distribution tarballs (built by setup.py sdist) and build diff --git a/python/dask_cudf/dask_cudf/io/orc.py b/python/dask_cudf/dask_cudf/io/orc.py index 2d326e41c3e..3c11fe3ffbb 100644 --- a/python/dask_cudf/dask_cudf/io/orc.py +++ b/python/dask_cudf/dask_cudf/io/orc.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. from io import BufferedWriter, IOBase diff --git a/python/dask_cudf/dask_cudf/tests/test_accessor.py b/python/dask_cudf/dask_cudf/tests/test_accessor.py index 84c0e0e9b39..95cf0c8d56d 100644 --- a/python/dask_cudf/dask_cudf/tests/test_accessor.py +++ b/python/dask_cudf/dask_cudf/tests/test_accessor.py @@ -384,7 +384,7 @@ def test_contains(data, search_key): "data, index, expectation", [ (data_test_1(), 1, does_not_raise()), - (data_test_2(), 2, pytest.raises(IndexError)), + (data_test_2(), 2, does_not_raise()), ], ) def test_get(data, index, expectation):