diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4cdcac88091..cc08b832e69 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -56,12 +56,20 @@ repos: - id: clang-format types_or: [c, c++, cuda] args: ["-fallback-style=none", "-style=file", "-i"] + exclude: | + (?x)^( + ^cpp/src/io/parquet/ipc/Schema_generated.h| + ^cpp/src/io/parquet/ipc/Message_generated.h| + ^cpp/include/cudf_test/cxxopts.hpp| + ) - repo: https://github.com/sirosen/texthooks rev: 0.6.6 hooks: - id: fix-smartquotes exclude: | (?x)^( + ^cpp/src/io/parquet/ipc/Schema_generated.h| + ^cpp/src/io/parquet/ipc/Message_generated.h| ^cpp/include/cudf_test/cxxopts.hpp| ^python/cudf/cudf/tests/data/subword_tokenizer_data/.*| ^python/cudf/cudf/tests/text/test_text_methods.py diff --git a/ci/build_cpp.sh b/ci/build_cpp.sh index 740a6409ccd..e5fcef17a83 100755 --- a/ci/build_cpp.sh +++ b/ci/build_cpp.sh @@ -13,12 +13,10 @@ export CMAKE_GENERATOR=Ninja rapids-print-env -version=$(rapids-generate-version) - rapids-logger "Begin cpp build" # With boa installed conda build forward to boa -RAPIDS_PACKAGE_VERSION=${version} rapids-conda-retry mambabuild \ +RAPIDS_PACKAGE_VERSION=$(rapids-generate-version) rapids-conda-retry mambabuild \ conda/recipes/libcudf rapids-upload-conda-to-s3 cpp diff --git a/ci/build_docs.sh b/ci/build_docs.sh index 67a5415f353..14dc7a59048 100755 --- a/ci/build_docs.sh +++ b/ci/build_docs.sh @@ -14,7 +14,7 @@ ENV_YAML_DIR="$(mktemp -d)" rapids-dependency-file-generator \ --output conda \ - --file_key docs \ + --file-key docs \ --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee "${ENV_YAML_DIR}/env.yaml" rapids-mamba-retry env create --yes -f "${ENV_YAML_DIR}/env.yaml" -n docs diff --git a/ci/check_style.sh b/ci/check_style.sh index 029cd305f1d..634d8b0d702 100755 --- a/ci/check_style.sh +++ b/ci/check_style.sh @@ -10,7 +10,7 @@ ENV_YAML_DIR="$(mktemp -d)" rapids-dependency-file-generator \ --output conda \ - --file_key checks \ + --file-key checks \ --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee "${ENV_YAML_DIR}/env.yaml" rapids-mamba-retry env create --yes -f "${ENV_YAML_DIR}/env.yaml" -n checks diff --git a/ci/configure_cpp_static.sh b/ci/configure_cpp_static.sh index 11d5585d98f..51e41b065fb 100755 --- a/ci/configure_cpp_static.sh +++ b/ci/configure_cpp_static.sh @@ -12,7 +12,7 @@ REQUIREMENTS_FILE="${ENV_YAML_DIR}/requirements.txt" rapids-dependency-file-generator \ --output requirements \ - --file_key test_static_build \ + --file-key test_static_build \ --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch)" | tee "${REQUIREMENTS_FILE}" python -m pip install -r "${REQUIREMENTS_FILE}" diff --git a/ci/test_cpp_common.sh b/ci/test_cpp_common.sh index da847137a2b..f5a8de543f6 100755 --- a/ci/test_cpp_common.sh +++ b/ci/test_cpp_common.sh @@ -11,7 +11,7 @@ ENV_YAML_DIR="$(mktemp -d)" rapids-dependency-file-generator \ --output conda \ - --file_key test_cpp \ + --file-key test_cpp \ --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch)" | tee "${ENV_YAML_DIR}/env.yaml" rapids-mamba-retry env create --yes -f "${ENV_YAML_DIR}/env.yaml" -n test diff --git a/ci/test_java.sh b/ci/test_java.sh index c93079742f0..9713eb192d2 100755 --- a/ci/test_java.sh +++ b/ci/test_java.sh @@ -11,7 +11,7 @@ ENV_YAML_DIR="$(mktemp -d)" rapids-dependency-file-generator \ --output conda \ - --file_key test_java \ + --file-key test_java \ --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch)" | tee "${ENV_YAML_DIR}/env.yaml" rapids-mamba-retry env create --yes -f "${ENV_YAML_DIR}/env.yaml" -n test diff --git a/ci/test_notebooks.sh b/ci/test_notebooks.sh index 8be2d374bed..da9478ce25d 100755 --- a/ci/test_notebooks.sh +++ b/ci/test_notebooks.sh @@ -11,7 +11,7 @@ ENV_YAML_DIR="$(mktemp -d)" rapids-dependency-file-generator \ --output conda \ - --file_key test_notebooks \ + --file-key test_notebooks \ --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee "${ENV_YAML_DIR}/env.yaml" rapids-mamba-retry env create --yes -f "${ENV_YAML_DIR}/env.yaml" -n test diff --git a/ci/test_python_common.sh b/ci/test_python_common.sh index 7559d970f6d..e8849588aa5 100755 --- a/ci/test_python_common.sh +++ b/ci/test_python_common.sh @@ -13,7 +13,7 @@ ENV_YAML_DIR="$(mktemp -d)" rapids-dependency-file-generator \ --output conda \ - --file_key test_python \ + --file-key test_python \ --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee "${ENV_YAML_DIR}/env.yaml" rapids-mamba-retry env create --yes -f "${ENV_YAML_DIR}/env.yaml" -n test diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index ca85996b990..aab0a9b2d49 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -664,6 +664,7 @@ add_library( src/utilities/default_stream.cpp src/utilities/linked_column.cpp src/utilities/logger.cpp + src/utilities/pinned_memory.cpp src/utilities/stacktrace.cpp src/utilities/stream_pool.cpp src/utilities/traits.cpp diff --git a/cpp/benchmarks/fixture/nvbench_fixture.hpp b/cpp/benchmarks/fixture/nvbench_fixture.hpp index ebcbcb17e98..df1492690bb 100644 --- a/cpp/benchmarks/fixture/nvbench_fixture.hpp +++ b/cpp/benchmarks/fixture/nvbench_fixture.hpp @@ -15,8 +15,8 @@ */ #pragma once -#include #include +#include #include #include @@ -81,17 +81,18 @@ struct nvbench_base_fixture { "\nExpecting: cuda, pool, async, arena, managed, or managed_pool"); } - inline rmm::host_async_resource_ref make_cuio_host_pinned() + inline rmm::host_device_async_resource_ref make_cuio_host_pinned() { static std::shared_ptr mr = std::make_shared(); return *mr; } - inline rmm::host_async_resource_ref create_cuio_host_memory_resource(std::string const& mode) + inline rmm::host_device_async_resource_ref create_cuio_host_memory_resource( + std::string const& mode) { if (mode == "pinned") return make_cuio_host_pinned(); - if (mode == "pinned_pool") return cudf::io::get_host_memory_resource(); + if (mode == "pinned_pool") return cudf::get_pinned_memory_resource(); CUDF_FAIL("Unknown cuio_host_mem parameter: " + mode + "\nExpecting: pinned or pinned_pool"); } @@ -112,14 +113,14 @@ struct nvbench_base_fixture { rmm::mr::set_current_device_resource(mr.get()); std::cout << "RMM memory resource = " << rmm_mode << "\n"; - cudf::io::set_host_memory_resource(create_cuio_host_memory_resource(cuio_host_mode)); + cudf::set_pinned_memory_resource(create_cuio_host_memory_resource(cuio_host_mode)); std::cout << "CUIO host memory resource = " << cuio_host_mode << "\n"; } ~nvbench_base_fixture() { // Ensure the the pool is freed before the CUDA context is destroyed: - cudf::io::set_host_memory_resource(this->make_cuio_host_pinned()); + cudf::set_pinned_memory_resource(this->make_cuio_host_pinned()); } std::shared_ptr mr; diff --git a/cpp/benchmarks/io/cuio_common.cpp b/cpp/benchmarks/io/cuio_common.cpp index 37ced8ea703..645994f3f0d 100644 --- a/cpp/benchmarks/io/cuio_common.cpp +++ b/cpp/benchmarks/io/cuio_common.cpp @@ -19,6 +19,9 @@ #include #include +#include +#include + #include #include @@ -28,6 +31,14 @@ temp_directory const cuio_source_sink_pair::tmpdir{"cudf_gbench"}; +// Don't use cudf's pinned pool for the source data +rmm::host_async_resource_ref pinned_memory_resource() +{ + static rmm::mr::pinned_host_memory_resource mr = rmm::mr::pinned_host_memory_resource{}; + + return mr; +} + std::string random_file_in_dir(std::string const& dir_path) { // `mkstemp` modifies the template in place @@ -41,6 +52,7 @@ std::string random_file_in_dir(std::string const& dir_path) cuio_source_sink_pair::cuio_source_sink_pair(io_type type) : type{type}, + pinned_buffer({pinned_memory_resource(), cudf::get_default_stream()}), d_buffer{0, cudf::get_default_stream()}, file_name{random_file_in_dir(tmpdir.path())}, void_sink{cudf::io::data_sink::create()} diff --git a/cpp/benchmarks/io/cuio_common.hpp b/cpp/benchmarks/io/cuio_common.hpp index d4f39a5f243..64d6021cf50 100644 --- a/cpp/benchmarks/io/cuio_common.hpp +++ b/cpp/benchmarks/io/cuio_common.hpp @@ -18,7 +18,7 @@ #include -#include +#include #include #include @@ -79,7 +79,7 @@ class cuio_source_sink_pair { io_type const type; std::vector h_buffer; - cudf::detail::pinned_host_vector pinned_buffer; + cudf::detail::host_vector pinned_buffer; rmm::device_uvector d_buffer; std::string const file_name; std::unique_ptr void_sink; diff --git a/cpp/benchmarks/io/parquet/parquet_reader_multithread.cpp b/cpp/benchmarks/io/parquet/parquet_reader_multithread.cpp index a67d1932951..b4c8ed78ed8 100644 --- a/cpp/benchmarks/io/parquet/parquet_reader_multithread.cpp +++ b/cpp/benchmarks/io/parquet/parquet_reader_multithread.cpp @@ -20,9 +20,9 @@ #include #include -#include #include #include +#include #include #include diff --git a/cpp/benchmarks/io/text/multibyte_split.cpp b/cpp/benchmarks/io/text/multibyte_split.cpp index b5d855d8881..67705863d41 100644 --- a/cpp/benchmarks/io/text/multibyte_split.cpp +++ b/cpp/benchmarks/io/text/multibyte_split.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,7 +22,6 @@ #include #include -#include #include #include #include @@ -132,9 +131,10 @@ static void bench_multibyte_split(nvbench::state& state, auto const delim_factor = static_cast(delim_percent) / 100; std::unique_ptr datasource; - auto device_input = create_random_input(file_size_approx, delim_factor, 0.05, delim); - auto host_input = std::vector{}; - auto host_pinned_input = cudf::detail::pinned_host_vector{}; + auto device_input = create_random_input(file_size_approx, delim_factor, 0.05, delim); + auto host_input = std::vector{}; + auto host_pinned_input = + cudf::detail::make_pinned_vector_async(0, cudf::get_default_stream()); if (source_type != data_chunk_source_type::device && source_type != data_chunk_source_type::host_pinned) { diff --git a/cpp/include/cudf/ast/expressions.hpp b/cpp/include/cudf/ast/expressions.hpp index 26916e49012..918271e3e4f 100644 --- a/cpp/include/cudf/ast/expressions.hpp +++ b/cpp/include/cudf/ast/expressions.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -478,7 +478,10 @@ class operation : public expression { * * @return Vector of operands */ - std::vector> get_operands() const { return operands; } + [[nodiscard]] std::vector> get_operands() const + { + return operands; + } /** * @copydoc expression::accept diff --git a/cpp/include/cudf/column/column_device_view.cuh b/cpp/include/cudf/column/column_device_view.cuh index 19722d127cb..787e9c2c479 100644 --- a/cpp/include/cudf/column/column_device_view.cuh +++ b/cpp/include/cudf/column/column_device_view.cuh @@ -442,7 +442,7 @@ class alignas(16) column_device_view : public detail::column_device_view_base { * @return string_view instance representing this element at this index */ template )> - __device__ T element(size_type element_index) const noexcept + __device__ [[nodiscard]] T element(size_type element_index) const noexcept { size_type index = element_index + offset(); // account for this view's _offset char const* d_strings = static_cast(_data); @@ -501,7 +501,7 @@ class alignas(16) column_device_view : public detail::column_device_view_base { * @return dictionary32 instance representing this element at this index */ template )> - __device__ T element(size_type element_index) const noexcept + __device__ [[nodiscard]] T element(size_type element_index) const noexcept { size_type index = element_index + offset(); // account for this view's _offset auto const indices = d_children[0]; @@ -519,7 +519,7 @@ class alignas(16) column_device_view : public detail::column_device_view_base { * @return numeric::fixed_point representing the element at this index */ template ())> - __device__ T element(size_type element_index) const noexcept + __device__ [[nodiscard]] T element(size_type element_index) const noexcept { using namespace numeric; using rep = typename T::rep; @@ -858,7 +858,7 @@ class alignas(16) column_device_view : public detail::column_device_view_base { */ [[nodiscard]] __device__ device_span children() const noexcept { - return device_span(d_children, _num_children); + return {d_children, static_cast(_num_children)}; } /** @@ -1032,7 +1032,7 @@ class alignas(16) mutable_column_device_view : public detail::column_device_view * @return Reference to the element at the specified index */ template ())> - __device__ T& element(size_type element_index) const noexcept + __device__ [[nodiscard]] T& element(size_type element_index) const noexcept { return data()[element_index]; } diff --git a/cpp/include/cudf/detail/aggregation/aggregation.hpp b/cpp/include/cudf/detail/aggregation/aggregation.hpp index 87c0f8ec7f1..edee83783b8 100644 --- a/cpp/include/cudf/detail/aggregation/aggregation.hpp +++ b/cpp/include/cudf/detail/aggregation/aggregation.hpp @@ -24,6 +24,7 @@ #include #include +#include namespace cudf { namespace detail { @@ -510,7 +511,7 @@ class quantile_aggregation final : public groupby_aggregation, public reduce_agg void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); } private: - size_t hash_impl() const + [[nodiscard]] size_t hash_impl() const { return std::hash{}(static_cast(_interpolation)) ^ std::accumulate( @@ -596,7 +597,10 @@ class nunique_aggregation final : public groupby_aggregation, void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); } private: - size_t hash_impl() const { return std::hash{}(static_cast(_null_handling)); } + [[nodiscard]] size_t hash_impl() const + { + return std::hash{}(static_cast(_null_handling)); + } }; /** @@ -638,7 +642,7 @@ class nth_element_aggregation final : public groupby_aggregation, void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); } private: - size_t hash_impl() const + [[nodiscard]] size_t hash_impl() const { return std::hash{}(_n) ^ std::hash{}(static_cast(_null_handling)); } @@ -763,7 +767,10 @@ class collect_list_aggregation final : public rolling_aggregation, void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); } private: - size_t hash_impl() const { return std::hash{}(static_cast(_null_handling)); } + [[nodiscard]] size_t hash_impl() const + { + return std::hash{}(static_cast(_null_handling)); + } }; /** @@ -813,7 +820,7 @@ class collect_set_aggregation final : public rolling_aggregation, void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); } protected: - size_t hash_impl() const + [[nodiscard]] size_t hash_impl() const { return std::hash{}(static_cast(_null_handling) ^ static_cast(_nulls_equal) ^ static_cast(_nans_equal)); @@ -866,10 +873,10 @@ class lead_lag_aggregation final : public rolling_aggregation { class udf_aggregation final : public rolling_aggregation { public: udf_aggregation(aggregation::Kind type, - std::string const& user_defined_aggregator, + std::string user_defined_aggregator, data_type output_type) : aggregation{type}, - _source{user_defined_aggregator}, + _source{std::move(user_defined_aggregator)}, _operator_name{(type == aggregation::PTX) ? "rolling_udf_ptx" : "rolling_udf_cuda"}, _function_name{"rolling_udf"}, _output_type{output_type} @@ -973,7 +980,7 @@ class merge_sets_aggregation final : public groupby_aggregation, public reduce_a void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); } protected: - size_t hash_impl() const + [[nodiscard]] size_t hash_impl() const { return std::hash{}(static_cast(_nulls_equal) ^ static_cast(_nans_equal)); } @@ -1046,7 +1053,7 @@ class covariance_aggregation final : public groupby_aggregation { void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); } protected: - size_t hash_impl() const + [[nodiscard]] size_t hash_impl() const { return std::hash{}(_min_periods) ^ std::hash{}(_ddof); } @@ -1088,7 +1095,7 @@ class correlation_aggregation final : public groupby_aggregation { void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); } protected: - size_t hash_impl() const + [[nodiscard]] size_t hash_impl() const { return std::hash{}(static_cast(_type)) ^ std::hash{}(_min_periods); } diff --git a/cpp/include/cudf/detail/contiguous_split.hpp b/cpp/include/cudf/detail/contiguous_split.hpp index de00b61cdca..1467ed1aa67 100644 --- a/cpp/include/cudf/detail/contiguous_split.hpp +++ b/cpp/include/cudf/detail/contiguous_split.hpp @@ -104,7 +104,7 @@ class metadata_builder { * * @returns A vector containing the serialized column metadata */ - std::vector build() const; + [[nodiscard]] std::vector build() const; /** * @brief Clear the internal buffer containing all added metadata. diff --git a/cpp/include/cudf/detail/normalizing_iterator.cuh b/cpp/include/cudf/detail/normalizing_iterator.cuh index 32df13104e0..308fd188b09 100644 --- a/cpp/include/cudf/detail/normalizing_iterator.cuh +++ b/cpp/include/cudf/detail/normalizing_iterator.cuh @@ -51,7 +51,7 @@ struct alignas(16) base_normalator { */ CUDF_HOST_DEVICE inline Derived& operator++() { - Derived& derived = static_cast(*this); + auto& derived = static_cast(*this); derived.p_ += width_; return derived; } @@ -71,7 +71,7 @@ struct alignas(16) base_normalator { */ CUDF_HOST_DEVICE inline Derived& operator--() { - Derived& derived = static_cast(*this); + auto& derived = static_cast(*this); derived.p_ -= width_; return derived; } @@ -91,7 +91,7 @@ struct alignas(16) base_normalator { */ CUDF_HOST_DEVICE inline Derived& operator+=(difference_type offset) { - Derived& derived = static_cast(*this); + auto& derived = static_cast(*this); derived.p_ += offset * width_; return derived; } @@ -121,7 +121,7 @@ struct alignas(16) base_normalator { */ CUDF_HOST_DEVICE inline Derived& operator-=(difference_type offset) { - Derived& derived = static_cast(*this); + auto& derived = static_cast(*this); derived.p_ -= offset * width_; return derived; } diff --git a/cpp/include/cudf/detail/structs/utilities.hpp b/cpp/include/cudf/detail/structs/utilities.hpp index e736514ac29..beedc009c84 100644 --- a/cpp/include/cudf/detail/structs/utilities.hpp +++ b/cpp/include/cudf/detail/structs/utilities.hpp @@ -25,6 +25,8 @@ #include #include +#include + namespace cudf::structs::detail { enum class column_nullability { @@ -112,12 +114,12 @@ class flattened_table { * @param columns_ Newly allocated columns to back the table_view * @param nullable_data_ Newly generated temporary data that needs to be kept alive */ - flattened_table(table_view const& flattened_columns_, + flattened_table(table_view flattened_columns_, std::vector const& orders_, std::vector const& null_orders_, std::vector>&& columns_, temporary_nullable_data&& nullable_data_) - : _flattened_columns{flattened_columns_}, + : _flattened_columns{std::move(flattened_columns_)}, _orders{orders_}, _null_orders{null_orders_}, _columns{std::move(columns_)}, @@ -170,11 +172,11 @@ class flattened_table { * orders, flattened null precedence, alongside the supporting columns and device_buffers * for the flattened table. */ -[[nodiscard]] std::unique_ptr flatten_nested_columns( +[[nodiscard]] std::unique_ptr flatten_nested_columns( table_view const& input, - std::vector const& column_order, - std::vector const& null_precedence, - column_nullability nullability, + std::vector const& column_order, + std::vector const& null_precedence, + cudf::structs::detail::column_nullability nullability, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); @@ -194,11 +196,11 @@ class flattened_table { * @param mr Device memory resource used to allocate new device memory * @return A new column with potentially new null mask */ -[[nodiscard]] std::unique_ptr superimpose_nulls(bitmask_type const* null_mask, - size_type null_count, - std::unique_ptr&& input, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr); +[[nodiscard]] std::unique_ptr superimpose_nulls(bitmask_type const* null_mask, + cudf::size_type null_count, + std::unique_ptr&& input, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr); /** * @brief Push down nulls from the given input column into its children columns, using bitwise AND. diff --git a/cpp/include/cudf/detail/utilities/rmm_host_vector.hpp b/cpp/include/cudf/detail/utilities/host_vector.hpp similarity index 91% rename from cpp/include/cudf/detail/utilities/rmm_host_vector.hpp rename to cpp/include/cudf/detail/utilities/host_vector.hpp index 6901a19473e..2d14d0306cd 100644 --- a/cpp/include/cudf/detail/utilities/rmm_host_vector.hpp +++ b/cpp/include/cudf/detail/utilities/host_vector.hpp @@ -19,6 +19,7 @@ #include #include +#include #include #include @@ -32,8 +33,6 @@ namespace cudf::detail { /*! \p rmm_host_allocator is a CUDA-specific host memory allocator * that employs \c a `rmm::host_async_resource_ref` for allocation. * - * This implementation is ported from pinned_host_vector in cudf. - * * \see https://en.cppreference.com/w/cpp/memory/allocator */ template @@ -42,8 +41,6 @@ class rmm_host_allocator; /*! \p rmm_host_allocator is a CUDA-specific host memory allocator * that employs \c an `cudf::host_async_resource_ref` for allocation. * - * This implementation is ported from pinned_host_vector in cudf. - * * \see https://en.cppreference.com/w/cpp/memory/allocator */ template <> @@ -70,8 +67,7 @@ class rmm_host_allocator { * The \p rmm_host_allocator provides an interface for host memory allocation through the user * provided \c `rmm::host_async_resource_ref`. The \p rmm_host_allocator does not take ownership of * this reference and therefore it is the user's responsibility to ensure its lifetime for the - * duration of the lifetime of the \p rmm_host_allocator. This implementation is ported from - * pinned_host_vector in cudf. + * duration of the lifetime of the \p rmm_host_allocator. * * \see https://en.cppreference.com/w/cpp/memory/allocator */ @@ -86,7 +82,7 @@ class rmm_host_allocator { using size_type = std::size_t; ///< The type used for the size of the allocation using difference_type = std::ptrdiff_t; ///< The type of the distance between two pointers - typedef cuda::std::true_type propagate_on_container_move_assignment; + using propagate_on_container_move_assignment = cuda::std::true_type; /** * @brief converts a `rmm_host_allocator` to `rmm_host_allocator` @@ -121,8 +117,12 @@ class rmm_host_allocator { inline pointer allocate(size_type cnt) { if (cnt > this->max_size()) { throw std::bad_alloc(); } // end if - return static_cast( - mr.allocate_async(cnt * sizeof(value_type), rmm::RMM_DEFAULT_HOST_ALIGNMENT, stream)); + auto const result = + mr.allocate_async(cnt * sizeof(value_type), rmm::RMM_DEFAULT_HOST_ALIGNMENT, stream); + // Synchronize to ensure the memory is allocated before thrust::host_vector initialization + // TODO: replace thrust::host_vector with a type that does not require synchronization + stream.synchronize(); + return static_cast(result); } /** @@ -147,7 +147,7 @@ class rmm_host_allocator { * @return The maximum number of objects that may be allocated * by a single call to \p allocate(). */ - constexpr inline size_type max_size() const + [[nodiscard]] constexpr inline size_type max_size() const { return (std::numeric_limits::max)() / sizeof(T); } @@ -182,6 +182,6 @@ class rmm_host_allocator { * @brief A vector class with rmm host memory allocator */ template -using rmm_host_vector = thrust::host_vector>; +using host_vector = thrust::host_vector>; } // namespace cudf::detail diff --git a/cpp/include/cudf/detail/utilities/pinned_host_vector.hpp b/cpp/include/cudf/detail/utilities/pinned_host_vector.hpp deleted file mode 100644 index c22b6a6ba15..00000000000 --- a/cpp/include/cudf/detail/utilities/pinned_host_vector.hpp +++ /dev/null @@ -1,216 +0,0 @@ -/* - * Copyright (c) 2008-2024, NVIDIA CORPORATION - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -#include - -#include -#include -#include // for bad_alloc - -namespace cudf::detail { - -/*! \p pinned_allocator is a CUDA-specific host memory allocator - * that employs \c cudaMallocHost for allocation. - * - * This implementation is ported from the experimental/pinned_allocator - * that Thrust used to provide. - * - * \see https://en.cppreference.com/w/cpp/memory/allocator - */ -template -class pinned_allocator; - -/*! \p pinned_allocator is a CUDA-specific host memory allocator - * that employs \c cudaMallocHost for allocation. - * - * This implementation is ported from the experimental/pinned_allocator - * that Thrust used to provide. - * - * \see https://en.cppreference.com/w/cpp/memory/allocator - */ -template <> -class pinned_allocator { - public: - using value_type = void; ///< The type of the elements in the allocator - using pointer = void*; ///< The type returned by address() / allocate() - using const_pointer = void const*; ///< The type returned by address() - using size_type = std::size_t; ///< The type used for the size of the allocation - using difference_type = std::ptrdiff_t; ///< The type of the distance between two pointers - - /** - * @brief converts a `pinned_allocator` to `pinned_allocator` - */ - template - struct rebind { - using other = pinned_allocator; ///< The rebound type - }; -}; - -/*! \p pinned_allocator is a CUDA-specific host memory allocator - * that employs \c cudaMallocHost for allocation. - * - * This implementation is ported from the experimental/pinned_allocator - * that Thrust used to provide. - * - * \see https://en.cppreference.com/w/cpp/memory/allocator - */ -template -class pinned_allocator { - public: - using value_type = T; ///< The type of the elements in the allocator - using pointer = T*; ///< The type returned by address() / allocate() - using const_pointer = T const*; ///< The type returned by address() - using reference = T&; ///< The parameter type for address() - using const_reference = T const&; ///< The parameter type for address() - using size_type = std::size_t; ///< The type used for the size of the allocation - using difference_type = std::ptrdiff_t; ///< The type of the distance between two pointers - - /** - * @brief converts a `pinned_allocator` to `pinned_allocator` - */ - template - struct rebind { - using other = pinned_allocator; ///< The rebound type - }; - - /** - * @brief pinned_allocator's null constructor does nothing. - */ - __host__ __device__ inline pinned_allocator() {} - - /** - * @brief pinned_allocator's null destructor does nothing. - */ - __host__ __device__ inline ~pinned_allocator() {} - - /** - * @brief pinned_allocator's copy constructor does nothing. - */ - __host__ __device__ inline pinned_allocator(pinned_allocator const&) {} - - /** - * @brief pinned_allocator's copy constructor does nothing. - * - * This version of pinned_allocator's copy constructor - * is templated on the \c value_type of the pinned_allocator - * to copy from. It is provided merely for convenience; it - * does nothing. - */ - template - __host__ __device__ inline pinned_allocator(pinned_allocator const&) - { - } - - /** - * @brief This method returns the address of a \c reference of - * interest. - * - * @param r The \c reference of interest. - * @return \c r's address. - */ - __host__ __device__ inline pointer address(reference r) { return &r; } - - /** - * @brief This method returns the address of a \c const_reference - * of interest. - * - * @param r The \c const_reference of interest. - * @return \c r's address. - */ - __host__ __device__ inline const_pointer address(const_reference r) { return &r; } - - /** - * @brief This method allocates storage for objects in pinned host - * memory. - * - * @param cnt The number of objects to allocate. - * @return a \c pointer to the newly allocated objects. - * @note The second parameter to this function is meant as a - * hint pointer to a nearby memory location, but is - * not used by this allocator. - * @note This method does not invoke \p value_type's constructor. - * It is the responsibility of the caller to initialize the - * objects at the returned \c pointer. - */ - __host__ inline pointer allocate(size_type cnt, const_pointer /*hint*/ = 0) - { - if (cnt > this->max_size()) { throw std::bad_alloc(); } // end if - - pointer result(0); - CUDF_CUDA_TRY(cudaMallocHost(reinterpret_cast(&result), cnt * sizeof(value_type))); - return result; - } - - /** - * @brief This method deallocates pinned host memory previously allocated - * with this \c pinned_allocator. - * - * @param p A \c pointer to the previously allocated memory. - * @note The second parameter is the number of objects previously allocated - * but is ignored by this allocator. - * @note This method does not invoke \p value_type's destructor. - * It is the responsibility of the caller to destroy - * the objects stored at \p p. - */ - __host__ inline void deallocate(pointer p, size_type /*cnt*/) - { - auto dealloc_worked = cudaFreeHost(p); - (void)dealloc_worked; - assert(dealloc_worked == cudaSuccess); - } - - /** - * @brief This method returns the maximum size of the \c cnt parameter - * accepted by the \p allocate() method. - * - * @return The maximum number of objects that may be allocated - * by a single call to \p allocate(). - */ - inline size_type max_size() const { return (std::numeric_limits::max)() / sizeof(T); } - - /** - * @brief This method tests this \p pinned_allocator for equality to - * another. - * - * @param x The other \p pinned_allocator of interest. - * @return This method always returns \c true. - */ - __host__ __device__ inline bool operator==(pinned_allocator const& x) const { return true; } - - /** - * @brief This method tests this \p pinned_allocator for inequality - * to another. - * - * @param x The other \p pinned_allocator of interest. - * @return This method always returns \c false. - */ - __host__ __device__ inline bool operator!=(pinned_allocator const& x) const - { - return !operator==(x); - } -}; - -/** - * @brief A vector class with pinned host memory allocator - */ -template -using pinned_host_vector = thrust::host_vector>; - -} // namespace cudf::detail diff --git a/cpp/include/cudf/detail/utilities/stream_pool.hpp b/cpp/include/cudf/detail/utilities/stream_pool.hpp index e19cc3ec2f7..64c1d4ae514 100644 --- a/cpp/include/cudf/detail/utilities/stream_pool.hpp +++ b/cpp/include/cudf/detail/utilities/stream_pool.hpp @@ -73,7 +73,7 @@ class cuda_stream_pool { * * @return the number of stream objects in the pool */ - virtual std::size_t get_stream_pool_size() const = 0; + [[nodiscard]] virtual std::size_t get_stream_pool_size() const = 0; }; /** diff --git a/cpp/include/cudf/detail/utilities/vector_factories.hpp b/cpp/include/cudf/detail/utilities/vector_factories.hpp index 293a4096c57..20cb55bb1c7 100644 --- a/cpp/include/cudf/detail/utilities/vector_factories.hpp +++ b/cpp/include/cudf/detail/utilities/vector_factories.hpp @@ -21,8 +21,10 @@ * @file vector_factories.hpp */ +#include #include #include +#include #include #include @@ -380,7 +382,7 @@ thrust::host_vector make_host_vector_async(device_span v, rmm::cuda_ * @brief Asynchronously construct a `std::vector` containing a copy of data from a device * container * - * @note This function synchronizes `stream`. + * @note This function does not synchronize `stream`. * * @tparam Container The type of the container to copy from * @tparam T The type of the data to copy @@ -439,6 +441,40 @@ thrust::host_vector make_host_vector_sync( return make_host_vector_sync(device_span{c}, stream); } +/** + * @brief Asynchronously construct a pinned `cudf::detail::host_vector` of the given size + * + * @note This function may not synchronize `stream`. + * + * @tparam T The type of the vector data + * @param size The number of elements in the created vector + * @param stream The stream on which to allocate memory + * @return A host_vector of the given size + */ +template +host_vector make_pinned_vector_async(size_t size, rmm::cuda_stream_view stream) +{ + return host_vector(size, {cudf::get_pinned_memory_resource(), stream}); +} + +/** + * @brief Synchronously construct a pinned `cudf::detail::host_vector` of the given size + * + * @note This function synchronizes `stream`. + * + * @tparam T The type of the vector data + * @param size The number of elements in the created vector + * @param stream The stream on which to allocate memory + * @return A host_vector of the given size + */ +template +host_vector make_pinned_vector_sync(size_t size, rmm::cuda_stream_view stream) +{ + auto result = make_pinned_vector_async(size, stream); + stream.synchronize(); + return result; +} + } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/fixed_point/fixed_point.hpp b/cpp/include/cudf/fixed_point/fixed_point.hpp index e39d75757e8..6c3c3b4da07 100644 --- a/cpp/include/cudf/fixed_point/fixed_point.hpp +++ b/cpp/include/cudf/fixed_point/fixed_point.hpp @@ -291,14 +291,14 @@ class fixed_point { * * @return The underlying value of the `fixed_point` number */ - CUDF_HOST_DEVICE inline rep value() const { return _value; } + CUDF_HOST_DEVICE [[nodiscard]] inline rep value() const { return _value; } /** * @brief Method that returns the scale of the `fixed_point` number * * @return The scale of the `fixed_point` number */ - CUDF_HOST_DEVICE inline scale_type scale() const { return _scale; } + CUDF_HOST_DEVICE [[nodiscard]] inline scale_type scale() const { return _scale; } /** * @brief Explicit conversion operator to `bool` @@ -573,7 +573,7 @@ class fixed_point { * @param scale The `scale` of the returned `fixed_point` number * @return `fixed_point` number with a new `scale` */ - CUDF_HOST_DEVICE inline fixed_point rescaled(scale_type scale) const + CUDF_HOST_DEVICE [[nodiscard]] inline fixed_point rescaled(scale_type scale) const { if (scale == _scale) { return *this; } Rep const value = detail::shift(_value, scale_type{scale - _scale}); diff --git a/cpp/include/cudf/interop.hpp b/cpp/include/cudf/interop.hpp index f3ff0009d5c..56ec62fa6e1 100644 --- a/cpp/include/cudf/interop.hpp +++ b/cpp/include/cudf/interop.hpp @@ -40,6 +40,8 @@ #include +#include + struct DLManagedTensor; struct ArrowDeviceArray; @@ -121,7 +123,7 @@ struct column_metadata { * * @param _name Name of the column */ - column_metadata(std::string const& _name) : name(_name) {} + column_metadata(std::string _name) : name(std::move(_name)) {} column_metadata() = default; }; diff --git a/cpp/include/cudf/interop/detail/arrow.hpp b/cpp/include/cudf/interop/detail/arrow.hpp index 8043ecf5422..906d48f636b 100644 --- a/cpp/include/cudf/interop/detail/arrow.hpp +++ b/cpp/include/cudf/interop/detail/arrow.hpp @@ -24,8 +24,12 @@ #define ARROW_C_DEVICE_DATA_INTERFACE // Device type for the allocated memory -typedef int32_t ArrowDeviceType; +using ArrowDeviceType = int32_t; +// The Arrow spec specifies using macros rather than enums here to avoid being +// susceptible to changes in the underlying type chosen by the compiler, but +// clang-tidy doesn't like this. +// NOLINTBEGIN // CPU device, same as using ArrowArray directly #define ARROW_DEVICE_CPU 1 // CUDA GPU Device @@ -34,6 +38,7 @@ typedef int32_t ArrowDeviceType; #define ARROW_DEVICE_CUDA_HOST 3 // CUDA managed/unified memory allocated by cudaMallocManaged #define ARROW_DEVICE_CUDA_MANAGED 13 +// NOLINTEND struct ArrowDeviceArray { struct ArrowArray array; diff --git a/cpp/include/cudf/io/arrow_io_source.hpp b/cpp/include/cudf/io/arrow_io_source.hpp index 5f79f05c5a1..d7a48c34e12 100644 --- a/cpp/include/cudf/io/arrow_io_source.hpp +++ b/cpp/include/cudf/io/arrow_io_source.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,6 +23,7 @@ #include #include +#include namespace cudf::io { /** @@ -49,7 +50,10 @@ class arrow_io_source : public datasource { * * @param file The `arrow` object from which the data is read */ - explicit arrow_io_source(std::shared_ptr file) : arrow_file(file) {} + explicit arrow_io_source(std::shared_ptr file) + : arrow_file(std::move(file)) + { + } /** * @brief Returns a buffer with a subset of data from the `arrow` source. diff --git a/cpp/include/cudf/io/csv.hpp b/cpp/include/cudf/io/csv.hpp index a20f75cecd7..68bb7fba00e 100644 --- a/cpp/include/cudf/io/csv.hpp +++ b/cpp/include/cudf/io/csv.hpp @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -431,7 +432,8 @@ class csv_reader_options { * * @return Per-column types */ - std::variant, std::map> const& get_dtypes() const + [[nodiscard]] std::variant, std::map> const& + get_dtypes() const { return _dtypes; } @@ -441,49 +443,49 @@ class csv_reader_options { * * @return Additional values to recognize as boolean true values */ - std::vector const& get_true_values() const { return _true_values; } + [[nodiscard]] std::vector const& get_true_values() const { return _true_values; } /** * @brief Returns additional values to recognize as boolean false values. * * @return Additional values to recognize as boolean false values */ - std::vector const& get_false_values() const { return _false_values; } + [[nodiscard]] std::vector const& get_false_values() const { return _false_values; } /** * @brief Returns additional values to recognize as null values. * * @return Additional values to recognize as null values */ - std::vector const& get_na_values() const { return _na_values; } + [[nodiscard]] std::vector const& get_na_values() const { return _na_values; } /** * @brief Whether to keep the built-in default NA values. * * @return `true` if the built-in default NA values are kept */ - bool is_enabled_keep_default_na() const { return _keep_default_na; } + [[nodiscard]] bool is_enabled_keep_default_na() const { return _keep_default_na; } /** * @brief Whether to disable null filter. * * @return `true` if null filter is enabled */ - bool is_enabled_na_filter() const { return _na_filter; } + [[nodiscard]] bool is_enabled_na_filter() const { return _na_filter; } /** * @brief Whether to parse dates as DD/MM versus MM/DD. * * @return True if dates are parsed as DD/MM, false if MM/DD */ - bool is_enabled_dayfirst() const { return _dayfirst; } + [[nodiscard]] bool is_enabled_dayfirst() const { return _dayfirst; } /** * @brief Returns timestamp_type to which all timestamp columns will be cast. * * @return timestamp_type to which all timestamp columns will be cast */ - data_type get_timestamp_type() const { return _timestamp_type; } + [[nodiscard]] data_type get_timestamp_type() const { return _timestamp_type; } /** * @brief Sets compression format of the source. @@ -1399,8 +1401,8 @@ class csv_writer_options { * @param sink The sink used for writer output * @param table Table to be written to output */ - explicit csv_writer_options(sink_info const& sink, table_view const& table) - : _sink(sink), _table(table), _rows_per_chunk(table.num_rows()) + explicit csv_writer_options(sink_info sink, table_view const& table) + : _sink(std::move(sink)), _table(table), _rows_per_chunk(table.num_rows()) { } diff --git a/cpp/include/cudf/io/detail/parquet.hpp b/cpp/include/cudf/io/detail/parquet.hpp index 978216d971e..21c870cb75e 100644 --- a/cpp/include/cudf/io/detail/parquet.hpp +++ b/cpp/include/cudf/io/detail/parquet.hpp @@ -160,7 +160,7 @@ class chunked_reader : private reader { * destructor needs to be defined in a separate source file which can access to that object's * declaration. */ - ~chunked_reader(); + ~chunked_reader() override; /** * @copydoc cudf::io::chunked_parquet_reader::has_next diff --git a/cpp/include/cudf/io/json.hpp b/cpp/include/cudf/io/json.hpp index 65ba8f25577..8de690482f9 100644 --- a/cpp/include/cudf/io/json.hpp +++ b/cpp/include/cudf/io/json.hpp @@ -26,6 +26,7 @@ #include #include +#include #include #include @@ -166,9 +167,9 @@ class json_reader_options { * * @returns Data types of the columns */ - std::variant, - std::map, - std::map> const& + [[nodiscard]] std::variant, + std::map, + std::map> const& get_dtypes() const { return _dtypes; @@ -179,28 +180,28 @@ class json_reader_options { * * @return Compression format of the source */ - compression_type get_compression() const { return _compression; } + [[nodiscard]] compression_type get_compression() const { return _compression; } /** * @brief Returns number of bytes to skip from source start. * * @return Number of bytes to skip from source start */ - size_t get_byte_range_offset() const { return _byte_range_offset; } + [[nodiscard]] size_t get_byte_range_offset() const { return _byte_range_offset; } /** * @brief Returns number of bytes to read. * * @return Number of bytes to read */ - size_t get_byte_range_size() const { return _byte_range_size; } + [[nodiscard]] size_t get_byte_range_size() const { return _byte_range_size; } /** * @brief Returns number of bytes to read with padding. * * @return Number of bytes to read with padding */ - size_t get_byte_range_size_with_padding() const + [[nodiscard]] size_t get_byte_range_size_with_padding() const { if (_byte_range_size == 0) { return 0; @@ -214,7 +215,7 @@ class json_reader_options { * * @return Number of bytes to pad */ - size_t get_byte_range_padding() const + [[nodiscard]] size_t get_byte_range_padding() const { auto const num_columns = std::visit([](auto const& dtypes) { return dtypes.size(); }, _dtypes); @@ -236,67 +237,68 @@ class json_reader_options { * * @return Delimiter separating records in JSON lines */ - char get_delimiter() const { return _delimiter; } + [[nodiscard]] char get_delimiter() const { return _delimiter; } /** * @brief Whether to read the file as a json object per line. * * @return `true` if reading the file as a json object per line */ - bool is_enabled_lines() const { return _lines; } + [[nodiscard]] bool is_enabled_lines() const { return _lines; } /** * @brief Whether to parse mixed types as a string column. * * @return `true` if mixed types are parsed as a string column */ - bool is_enabled_mixed_types_as_string() const { return _mixed_types_as_string; } + [[nodiscard]] bool is_enabled_mixed_types_as_string() const { return _mixed_types_as_string; } /** * @brief Whether to prune columns on read, selected based on the @ref set_dtypes option. * * When set as true, if the reader options include @ref set_dtypes, then * the reader will only return those columns which are mentioned in @ref set_dtypes. - * If false, then all columns are returned, independent of the @ref set_dtypes setting. + * If false, then all columns are returned, independent of the @ref set_dtypes + * setting. * * @return True if column pruning is enabled */ - bool is_enabled_prune_columns() const { return _prune_columns; } + [[nodiscard]] bool is_enabled_prune_columns() const { return _prune_columns; } /** * @brief Whether to parse dates as DD/MM versus MM/DD. * * @returns true if dates are parsed as DD/MM, false if MM/DD */ - bool is_enabled_dayfirst() const { return _dayfirst; } + [[nodiscard]] bool is_enabled_dayfirst() const { return _dayfirst; } /** * @brief Whether the reader should keep quotes of string values. * * @returns true if the reader should keep quotes, false otherwise */ - bool is_enabled_keep_quotes() const { return _keep_quotes; } + [[nodiscard]] bool is_enabled_keep_quotes() const { return _keep_quotes; } /** * @brief Whether the reader should normalize single quotes around strings * * @returns true if the reader should normalize single quotes, false otherwise */ - bool is_enabled_normalize_single_quotes() const { return _normalize_single_quotes; } + [[nodiscard]] bool is_enabled_normalize_single_quotes() const { return _normalize_single_quotes; } /** * @brief Whether the reader should normalize unquoted whitespace characters * * @returns true if the reader should normalize whitespace, false otherwise */ - bool is_enabled_normalize_whitespace() const { return _normalize_whitespace; } + [[nodiscard]] bool is_enabled_normalize_whitespace() const { return _normalize_whitespace; } /** * @brief Queries the JSON reader's behavior on invalid JSON lines. * * @returns An enum that specifies the JSON reader's behavior on invalid JSON lines. */ - json_recovery_mode_t recovery_mode() const { return _recovery_mode; } + [[nodiscard]] json_recovery_mode_t recovery_mode() const { return _recovery_mode; } /** * @brief Set data types for columns to be read. @@ -717,8 +719,8 @@ class json_writer_options { * @param sink The sink used for writer output * @param table Table to be written to output */ - explicit json_writer_options(sink_info const& sink, table_view const& table) - : _sink(sink), _table(table), _rows_per_chunk(table.num_rows()) + explicit json_writer_options(sink_info sink, table_view table) + : _sink(std::move(sink)), _table(std::move(table)), _rows_per_chunk(table.num_rows()) { } diff --git a/cpp/include/cudf/io/memory_resource.hpp b/cpp/include/cudf/io/memory_resource.hpp deleted file mode 100644 index a36e220ae7b..00000000000 --- a/cpp/include/cudf/io/memory_resource.hpp +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright (c) 2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -#include - -namespace cudf::io { - -/** - * @brief Set the rmm resource to be used for host memory allocations by - * cudf::detail::hostdevice_vector - * - * hostdevice_vector is a utility class that uses a pair of host and device-side buffers for - * bouncing state between the cpu and the gpu. The resource set with this function (typically a - * pinned memory allocator) is what it uses to allocate space for it's host-side buffer. - * - * @param mr The rmm resource to be used for host-side allocations - * @return The previous resource that was in use - */ -rmm::host_async_resource_ref set_host_memory_resource(rmm::host_async_resource_ref mr); - -/** - * @brief Get the rmm resource being used for host memory allocations by - * cudf::detail::hostdevice_vector - * - * @return The rmm resource used for host-side allocations - */ -rmm::host_async_resource_ref get_host_memory_resource(); - -/** - * @brief Options to configure the default host memory resource - */ -struct host_mr_options { - std::optional pool_size; ///< The size of the pool to use for the default host memory - ///< resource. If not set, the default pool size is used. -}; - -/** - * @brief Configure the size of the default host memory resource. - * - * @throws cudf::logic_error if called after the default host memory resource has been created - * - * @param opts Options to configure the default host memory resource - * @return True if this call successfully configured the host memory resource, false if a - * a resource was already configured. - */ -bool config_default_host_memory_resource(host_mr_options const& opts); - -} // namespace cudf::io diff --git a/cpp/include/cudf/io/orc.hpp b/cpp/include/cudf/io/orc.hpp index 8140f8897b7..623c1d9fc72 100644 --- a/cpp/include/cudf/io/orc.hpp +++ b/cpp/include/cudf/io/orc.hpp @@ -28,6 +28,7 @@ #include #include #include +#include #include namespace cudf { @@ -125,7 +126,7 @@ class orc_reader_options { * * @return Number of rows to skip from the start */ - int64_t get_skip_rows() const { return _skip_rows; } + [[nodiscard]] int64_t get_skip_rows() const { return _skip_rows; } /** * @brief Returns number of row to read. @@ -133,35 +134,38 @@ class orc_reader_options { * @return Number of rows to read; `nullopt` if the option hasn't been set (in which case the file * is read until the end) */ - std::optional const& get_num_rows() const { return _num_rows; } + [[nodiscard]] std::optional const& get_num_rows() const { return _num_rows; } /** * @brief Whether to use row index to speed-up reading. * * @return `true` if row index is used to speed-up reading */ - bool is_enabled_use_index() const { return _use_index; } + [[nodiscard]] bool is_enabled_use_index() const { return _use_index; } /** * @brief Whether to use numpy-compatible dtypes. * * @return `true` if numpy-compatible dtypes are used */ - bool is_enabled_use_np_dtypes() const { return _use_np_dtypes; } + [[nodiscard]] bool is_enabled_use_np_dtypes() const { return _use_np_dtypes; } /** * @brief Returns timestamp type to which timestamp column will be cast. * * @return Timestamp type to which timestamp column will be cast */ - data_type get_timestamp_type() const { return _timestamp_type; } + [[nodiscard]] data_type get_timestamp_type() const { return _timestamp_type; } /** * @brief Returns fully qualified names of columns that should be read as 128-bit Decimal. * * @return Fully qualified names of columns that should be read as 128-bit Decimal */ - std::vector const& get_decimal128_columns() const { return _decimal128_columns; } + [[nodiscard]] std::vector const& get_decimal128_columns() const + { + return _decimal128_columns; + } // Setters @@ -603,8 +607,8 @@ class orc_writer_options { * @param sink The sink used for writer output * @param table Table to be written to output */ - explicit orc_writer_options(sink_info const& sink, table_view const& table) - : _sink(sink), _table(table) + explicit orc_writer_options(sink_info sink, table_view table) + : _sink(std::move(sink)), _table(std::move(table)) { } @@ -676,7 +680,7 @@ class orc_writer_options { * * @return Row index stride */ - auto get_row_index_stride() const + [[nodiscard]] auto get_row_index_stride() const { auto const unaligned_stride = std::min(_row_index_stride, get_stripe_size_rows()); return unaligned_stride - unaligned_stride % 8; @@ -1048,7 +1052,7 @@ class chunked_orc_writer_options { * * @param sink The sink used for writer output */ - chunked_orc_writer_options(sink_info const& sink) : _sink(sink) {} + chunked_orc_writer_options(sink_info sink) : _sink(std::move(sink)) {} public: /** @@ -1107,7 +1111,7 @@ class chunked_orc_writer_options { * * @return Row index stride */ - auto get_row_index_stride() const + [[nodiscard]] auto get_row_index_stride() const { auto const unaligned_stride = std::min(_row_index_stride, get_stripe_size_rows()); return unaligned_stride - unaligned_stride % 8; diff --git a/cpp/include/cudf/io/parquet.hpp b/cpp/include/cudf/io/parquet.hpp index 51eeed5b721..431f14af522 100644 --- a/cpp/include/cudf/io/parquet.hpp +++ b/cpp/include/cudf/io/parquet.hpp @@ -187,7 +187,7 @@ class parquet_reader_options { * * @return Timestamp type used to cast timestamp columns */ - data_type get_timestamp_type() const { return _timestamp_type; } + [[nodiscard]] data_type get_timestamp_type() const { return _timestamp_type; } /** * @brief Sets names of the columns to be read. @@ -626,7 +626,7 @@ class parquet_writer_options_base { * * @param sink The sink used for writer output */ - explicit parquet_writer_options_base(sink_info const& sink) : _sink(sink) {} + explicit parquet_writer_options_base(sink_info sink) : _sink(std::move(sink)) {} public: /** @@ -1287,7 +1287,7 @@ class chunked_parquet_writer_options : public parquet_writer_options_base { * * @param sink Sink used for writer output */ - explicit chunked_parquet_writer_options(sink_info const& sink); + explicit chunked_parquet_writer_options(sink_info sink); friend chunked_parquet_writer_options_builder; diff --git a/cpp/include/cudf/io/types.hpp b/cpp/include/cudf/io/types.hpp index 150e997f533..0dab1c606de 100644 --- a/cpp/include/cudf/io/types.hpp +++ b/cpp/include/cudf/io/types.hpp @@ -30,6 +30,7 @@ #include #include #include +#include #include namespace cudf { @@ -247,10 +248,10 @@ struct column_name_info { * @param _is_nullable True if column is nullable * @param _is_binary True if column is binary data */ - column_name_info(std::string const& _name, + column_name_info(std::string _name, std::optional _is_nullable = std::nullopt, std::optional _is_binary = std::nullopt) - : name(_name), is_nullable(_is_nullable), is_binary(_is_binary) + : name(std::move(_name)), is_nullable(_is_nullable), is_binary(_is_binary) { } diff --git a/cpp/include/cudf/join.hpp b/cpp/include/cudf/join.hpp index 825f758adbd..ba485bd6372 100644 --- a/cpp/include/cudf/join.hpp +++ b/cpp/include/cudf/join.hpp @@ -336,8 +336,8 @@ class hash_join { * the result of performing an inner join between two tables with `build` and `probe` * as the join keys . */ - std::pair>, - std::unique_ptr>> + [[nodiscard]] std::pair>, + std::unique_ptr>> inner_join(cudf::table_view const& probe, std::optional output_size = {}, rmm::cuda_stream_view stream = cudf::get_default_stream(), @@ -359,10 +359,10 @@ class hash_join { * * @return A pair of columns [`left_indices`, `right_indices`] that can be used to construct * the result of performing a left join between two tables with `build` and `probe` - * as the join keys . + * as the join keys. */ - std::pair>, - std::unique_ptr>> + [[nodiscard]] std::pair>, + std::unique_ptr>> left_join(cudf::table_view const& probe, std::optional output_size = {}, rmm::cuda_stream_view stream = cudf::get_default_stream(), @@ -386,8 +386,8 @@ class hash_join { * the result of performing a full join between two tables with `build` and `probe` * as the join keys . */ - std::pair>, - std::unique_ptr>> + [[nodiscard]] std::pair>, + std::unique_ptr>> full_join(cudf::table_view const& probe, std::optional output_size = {}, rmm::cuda_stream_view stream = cudf::get_default_stream(), @@ -440,7 +440,7 @@ class hash_join { * @return The exact number of output when performing a full join between two tables with `build` * and `probe` as the join keys . */ - std::size_t full_join_size( + [[nodiscard]] std::size_t full_join_size( cudf::table_view const& probe, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) const; @@ -492,12 +492,12 @@ class distinct_hash_join { * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned indices' device memory. * - * @return A pair of columns [`build_indices`, `probe_indices`] that can be used to construct - * the result of performing an inner join between two tables with `build` and `probe` - * as the join keys. + * @return A pair of columns [`build_indices`, `probe_indices`] that can be used to + * construct the result of performing an inner join between two tables + * with `build` and `probe` as the join keys. */ - std::pair>, - std::unique_ptr>> + [[nodiscard]] std::pair>, + std::unique_ptr>> inner_join(rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) const; @@ -512,10 +512,11 @@ class distinct_hash_join { * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned table and columns' device * memory. - * @return A `build_indices` column that can be used to construct the result of performing a left - * join between two tables with `build` and `probe` as the join keys. + * @return A `build_indices` column that can be used to construct the result of + * performing a left join between two tables with `build` and `probe` as the join + * keys. */ - std::unique_ptr> left_join( + [[nodiscard]] std::unique_ptr> left_join( rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) const; diff --git a/cpp/include/cudf/scalar/scalar.hpp b/cpp/include/cudf/scalar/scalar.hpp index da1d0d743a7..d78907b473a 100644 --- a/cpp/include/cudf/scalar/scalar.hpp +++ b/cpp/include/cudf/scalar/scalar.hpp @@ -187,7 +187,7 @@ class fixed_width_scalar : public scalar { * @param stream CUDA stream used for device memory operations. * @return Value of the scalar */ - T value(rmm::cuda_stream_view stream = cudf::get_default_stream()) const; + [[nodiscard]] T value(rmm::cuda_stream_view stream = cudf::get_default_stream()) const; /** * @brief Returns a raw pointer to the value in device memory. @@ -199,7 +199,7 @@ class fixed_width_scalar : public scalar { * @brief Returns a const raw pointer to the value in device memory. * @return A const raw pointer to the value in device memory */ - T const* data() const; + [[nodiscard]] T const* data() const; protected: rmm::device_scalar _data; ///< device memory containing the value @@ -245,8 +245,8 @@ class numeric_scalar : public detail::fixed_width_scalar { static_assert(is_numeric(), "Unexpected non-numeric type."); public: - numeric_scalar() = delete; - ~numeric_scalar() = default; + numeric_scalar() = delete; + ~numeric_scalar() override = default; /** * @brief Move constructor for numeric_scalar. @@ -393,7 +393,7 @@ class fixed_point_scalar : public scalar { * @param stream CUDA stream used for device memory operations. * @return The value of the scalar */ - rep_type value(rmm::cuda_stream_view stream = cudf::get_default_stream()) const; + [[nodiscard]] rep_type value(rmm::cuda_stream_view stream = cudf::get_default_stream()) const; /** * @brief Get the decimal32, decimal64 or decimal128. @@ -401,7 +401,8 @@ class fixed_point_scalar : public scalar { * @param stream CUDA stream used for device memory operations. * @return The decimal32, decimal64 or decimal128 value */ - T fixed_point_value(rmm::cuda_stream_view stream = cudf::get_default_stream()) const; + [[nodiscard]] T fixed_point_value( + rmm::cuda_stream_view stream = cudf::get_default_stream()) const; /** * @brief Explicit conversion operator to get the value of the scalar on the host. @@ -418,7 +419,7 @@ class fixed_point_scalar : public scalar { * @brief Returns a const raw pointer to the value in device memory. * @return a const raw pointer to the value in device memory */ - rep_type const* data() const; + [[nodiscard]] rep_type const* data() const; protected: rmm::device_scalar _data; ///< device memory containing the value @@ -565,8 +566,8 @@ class chrono_scalar : public detail::fixed_width_scalar { static_assert(is_chrono(), "Unexpected non-chrono type"); public: - chrono_scalar() = delete; - ~chrono_scalar() = default; + chrono_scalar() = delete; + ~chrono_scalar() override = default; /** * @brief Move constructor for chrono_scalar. diff --git a/cpp/include/cudf/strings/regex/regex_program.hpp b/cpp/include/cudf/strings/regex/regex_program.hpp index bdf541f455f..95c86ae0f8a 100644 --- a/cpp/include/cudf/strings/regex/regex_program.hpp +++ b/cpp/include/cudf/strings/regex/regex_program.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -74,35 +74,35 @@ struct regex_program { * * @return regex pattern as a string */ - std::string pattern() const; + [[nodiscard]] std::string pattern() const; /** * @brief Return the regex_flags used to create this instance * * @return regex flags setting */ - regex_flags flags() const; + [[nodiscard]] regex_flags flags() const; /** * @brief Return the capture_groups used to create this instance * * @return capture groups setting */ - capture_groups capture() const; + [[nodiscard]] capture_groups capture() const; /** * @brief Return the number of instructions in this instance * * @return Number of instructions */ - int32_t instructions_count() const; + [[nodiscard]] int32_t instructions_count() const; /** * @brief Return the number of capture groups in this instance * * @return Number of groups */ - int32_t groups_count() const; + [[nodiscard]] int32_t groups_count() const; /** * @brief Return the size of the working memory for the regex execution @@ -110,7 +110,7 @@ struct regex_program { * @param num_strings Number of strings for computation * @return Size of the working memory in bytes */ - std::size_t compute_working_memory_size(int32_t num_strings) const; + [[nodiscard]] std::size_t compute_working_memory_size(int32_t num_strings) const; ~regex_program(); diff --git a/cpp/include/cudf/strings/string_view.cuh b/cpp/include/cudf/strings/string_view.cuh index 74df1ea1887..93cc787683b 100644 --- a/cpp/include/cudf/strings/string_view.cuh +++ b/cpp/include/cudf/strings/string_view.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -110,7 +110,7 @@ static __constant__ char max_string_sentinel[5]{"\xF7\xBF\xBF\xBF"}; * * @return An empty string */ -CUDF_HOST_DEVICE inline string_view string_view::min() { return string_view(); } +CUDF_HOST_DEVICE inline string_view string_view::min() { return {}; } /** * @brief Return maximum value associated with the string type @@ -130,7 +130,7 @@ CUDF_HOST_DEVICE inline string_view string_view::max() CUDF_CUDA_TRY( cudaGetSymbolAddress((void**)&psentinel, cudf::strings::detail::max_string_sentinel)); #endif - return string_view(psentinel, 4); + return {psentinel, 4}; } __device__ inline size_type string_view::length() const @@ -439,7 +439,7 @@ __device__ inline string_view string_view::substr(size_type pos, size_type count auto const itr = begin() + pos; auto const spos = itr.byte_offset(); auto const epos = count >= 0 ? (itr + count).byte_offset() : size_bytes(); - return string_view(data() + spos, epos - spos); + return {data() + spos, epos - spos}; } __device__ inline size_type string_view::character_offset(size_type bytepos) const diff --git a/cpp/include/cudf/table/table.hpp b/cpp/include/cudf/table/table.hpp index 8efe6eb8c72..c4f14af53fb 100644 --- a/cpp/include/cudf/table/table.hpp +++ b/cpp/include/cudf/table/table.hpp @@ -144,7 +144,7 @@ class table { */ template - table_view select(InputIterator begin, InputIterator end) const + [[nodiscard]] table_view select(InputIterator begin, InputIterator end) const { std::vector columns(std::distance(begin, end)); std::transform( diff --git a/cpp/include/cudf/table/table_view.hpp b/cpp/include/cudf/table/table_view.hpp index ad12b1eef4e..a71e0558dec 100644 --- a/cpp/include/cudf/table/table_view.hpp +++ b/cpp/include/cudf/table/table_view.hpp @@ -123,7 +123,7 @@ class table_view_base { * @param column_index The index of the desired column * @return A reference to the desired column */ - ColumnView const& column(size_type column_index) const; + [[nodiscard]] ColumnView const& column(size_type column_index) const; /** * @brief Returns the number of columns @@ -224,7 +224,7 @@ class table_view : public detail::table_view_base { * specified by the elements of `column_indices` */ template - table_view select(InputIterator begin, InputIterator end) const + [[nodiscard]] table_view select(InputIterator begin, InputIterator end) const { std::vector columns(std::distance(begin, end)); std::transform(begin, end, columns.begin(), [this](auto index) { return this->column(index); }); diff --git a/cpp/include/cudf/utilities/error.hpp b/cpp/include/cudf/utilities/error.hpp index 719d44a9ab3..f019f516b84 100644 --- a/cpp/include/cudf/utilities/error.hpp +++ b/cpp/include/cudf/utilities/error.hpp @@ -48,7 +48,7 @@ struct stacktrace_recorder { * * @return The pointer to a null-terminated string storing the output stacktrace */ - char const* stacktrace() const { return _stacktrace.c_str(); } + [[nodiscard]] char const* stacktrace() const { return _stacktrace.c_str(); } protected: std::string const _stacktrace; //!< The whole stacktrace stored as one string. @@ -78,7 +78,7 @@ struct logic_error : public std::logic_error, public stacktrace_recorder { // TODO Add an error code member? This would be useful for translating an // exception to an error code in a pure-C API - ~logic_error() + ~logic_error() override { // Needed so that the first instance of the implicit destructor for any TU isn't 'constructed' // from a host+device function marking the implicit version also as host+device @@ -106,7 +106,7 @@ struct cuda_error : public std::runtime_error, public stacktrace_recorder { * * @return CUDA error code */ - cudaError_t error_code() const { return _cudaError; } + [[nodiscard]] cudaError_t error_code() const { return _cudaError; } protected: cudaError_t _cudaError; //!< CUDA error code @@ -237,7 +237,7 @@ inline void throw_cuda_error(cudaError_t error, char const* file, unsigned int l // Calls cudaGetLastError to clear the error status. It is nearly certain that a fatal error // occurred if it still returns the same error after a cleanup. cudaGetLastError(); - auto const last = cudaFree(0); + auto const last = cudaFree(nullptr); auto const msg = std::string{"CUDA error encountered at: " + std::string{file} + ":" + std::to_string(line) + ": " + std::to_string(error) + " " + cudaGetErrorName(error) + " " + cudaGetErrorString(error)}; diff --git a/cpp/include/cudf/utilities/pinned_memory.hpp b/cpp/include/cudf/utilities/pinned_memory.hpp new file mode 100644 index 00000000000..b423eab6d38 --- /dev/null +++ b/cpp/include/cudf/utilities/pinned_memory.hpp @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include + +namespace cudf { + +/** + * @brief Set the rmm resource to be used for pinned memory allocations. + * + * @param mr The rmm resource to be used for pinned allocations + * @return The previous resource that was in use + */ +rmm::host_device_async_resource_ref set_pinned_memory_resource( + rmm::host_device_async_resource_ref mr); + +/** + * @brief Get the rmm resource being used for pinned memory allocations. + * + * @return The rmm resource used for pinned allocations + */ +rmm::host_device_async_resource_ref get_pinned_memory_resource(); + +/** + * @brief Options to configure the default pinned memory resource + */ +struct pinned_mr_options { + std::optional pool_size; ///< The size of the pool to use for the default pinned memory + ///< resource. If not set, the default pool size is used. +}; + +/** + * @brief Configure the size of the default pinned memory resource. + * + * @param opts Options to configure the default pinned memory resource + * @return True if this call successfully configured the pinned memory resource, false if a + * a resource was already configured. + */ +bool config_default_pinned_memory_resource(pinned_mr_options const& opts); + +} // namespace cudf diff --git a/cpp/include/cudf/utilities/span.hpp b/cpp/include/cudf/utilities/span.hpp index 47e92d61a9f..3b35e60e034 100644 --- a/cpp/include/cudf/utilities/span.hpp +++ b/cpp/include/cudf/utilities/span.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,6 +28,7 @@ #include #include #include +#include namespace cudf { /** @@ -90,7 +91,7 @@ class span_base { * * @return Reference to the first element in the span */ - constexpr reference front() const { return _data[0]; } + [[nodiscard]] constexpr reference front() const { return _data[0]; } // not noexcept due to undefined behavior when size = 0 /** * @brief Returns a reference to the last element in the span. @@ -99,7 +100,7 @@ class span_base { * * @return Reference to the last element in the span */ - constexpr reference back() const { return _data[_size - 1]; } + [[nodiscard]] constexpr reference back() const { return _data[_size - 1]; } // not noexcept due to undefined behavior when idx < 0 || idx >= size /** * @brief Returns a reference to the idx-th element of the sequence. @@ -119,7 +120,7 @@ class span_base { * * @return An iterator to the first element of the span */ - constexpr iterator begin() const noexcept { return _data; } + [[nodiscard]] constexpr iterator begin() const noexcept { return _data; } /** * @brief Returns an iterator to the element following the last element of the span. * @@ -127,13 +128,13 @@ class span_base { * * @return An iterator to the element following the last element of the span */ - constexpr iterator end() const noexcept { return _data + _size; } + [[nodiscard]] constexpr iterator end() const noexcept { return _data + _size; } /** * @brief Returns a pointer to the beginning of the sequence. * * @return A pointer to the first element of the span */ - constexpr pointer data() const noexcept { return _data; } + [[nodiscard]] constexpr pointer data() const noexcept { return _data; } /** * @brief Returns the number of elements in the span. @@ -160,7 +161,10 @@ class span_base { * @param count Number of elements from the beginning of this span to put in the subspan. * @return A subspan of the first N elements of the sequence */ - constexpr Derived first(size_type count) const noexcept { return Derived(_data, count); } + [[nodiscard]] constexpr Derived first(size_type count) const noexcept + { + return Derived(_data, count); + } /** * @brief Obtains a subspan consisting of the last N elements of the sequence @@ -168,7 +172,7 @@ class span_base { * @param count Number of elements from the end of this span to put in the subspan * @return A subspan of the last N elements of the sequence */ - constexpr Derived last(size_type count) const noexcept + [[nodiscard]] constexpr Derived last(size_type count) const noexcept { return Derived(_data + _size - count, count); } @@ -180,7 +184,7 @@ class span_base { * @param count The number of elements in the subspan * @return A subspan of the sequence, of requested count and offset */ - constexpr Derived subspan(size_type offset, size_type count) const noexcept + [[nodiscard]] constexpr Derived subspan(size_type offset, size_type count) const noexcept { return Derived(_data + offset, count); } @@ -365,7 +369,7 @@ class base_2dspan { * @param data Pointer to the data * @param size Size of the 2D span as pair */ - base_2dspan(T* data, size_type size) noexcept : _data{data}, _size{size} {} + base_2dspan(T* data, size_type size) noexcept : _data{data}, _size{std::move(size)} {} /** * @brief Returns a pointer to the beginning of the sequence. diff --git a/cpp/include/cudf/utilities/thread_pool.hpp b/cpp/include/cudf/utilities/thread_pool.hpp index 74a2531710b..c8c3eb097c4 100644 --- a/cpp/include/cudf/utilities/thread_pool.hpp +++ b/cpp/include/cudf/utilities/thread_pool.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -201,8 +201,8 @@ class thread_pool { running = false; destroy_threads(); thread_count = _thread_count ? _thread_count : std::thread::hardware_concurrency(); - threads.reset(new std::thread[thread_count]); - paused = was_paused; + threads = std::make_unique(thread_count); + paused = was_paused; create_threads(); running = true; } diff --git a/cpp/include/cudf/wrappers/dictionary.hpp b/cpp/include/cudf/wrappers/dictionary.hpp index 37264c5a33c..95f4ac00a53 100644 --- a/cpp/include/cudf/wrappers/dictionary.hpp +++ b/cpp/include/cudf/wrappers/dictionary.hpp @@ -87,7 +87,7 @@ struct dictionary_wrapper { * * @return The value of this dictionary wrapper */ - CUDF_HOST_DEVICE inline value_type value() const { return _value; } + CUDF_HOST_DEVICE [[nodiscard]] inline value_type value() const { return _value; } /** * @brief Returns the maximum value of the value type. diff --git a/cpp/include/cudf/wrappers/durations.hpp b/cpp/include/cudf/wrappers/durations.hpp index 62aa22c2788..840dba4f4ba 100644 --- a/cpp/include/cudf/wrappers/durations.hpp +++ b/cpp/include/cudf/wrappers/durations.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -56,13 +56,13 @@ using duration_us = cuda::std::chrono::duration; -static_assert(sizeof(duration_D) == sizeof(typename duration_D::rep), ""); -static_assert(sizeof(duration_h) == sizeof(typename duration_h::rep), ""); -static_assert(sizeof(duration_m) == sizeof(typename duration_m::rep), ""); -static_assert(sizeof(duration_s) == sizeof(typename duration_s::rep), ""); -static_assert(sizeof(duration_ms) == sizeof(typename duration_ms::rep), ""); -static_assert(sizeof(duration_us) == sizeof(typename duration_us::rep), ""); -static_assert(sizeof(duration_ns) == sizeof(typename duration_ns::rep), ""); +static_assert(sizeof(duration_D) == sizeof(typename duration_D::rep)); +static_assert(sizeof(duration_h) == sizeof(typename duration_h::rep)); +static_assert(sizeof(duration_m) == sizeof(typename duration_m::rep)); +static_assert(sizeof(duration_s) == sizeof(typename duration_s::rep)); +static_assert(sizeof(duration_ms) == sizeof(typename duration_ms::rep)); +static_assert(sizeof(duration_us) == sizeof(typename duration_us::rep)); +static_assert(sizeof(duration_ns) == sizeof(typename duration_ns::rep)); /** @} */ // end of group } // namespace cudf diff --git a/cpp/include/cudf/wrappers/timestamps.hpp b/cpp/include/cudf/wrappers/timestamps.hpp index 0341ac6ede4..5194a3e8f96 100644 --- a/cpp/include/cudf/wrappers/timestamps.hpp +++ b/cpp/include/cudf/wrappers/timestamps.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -73,13 +73,13 @@ using timestamp_us = detail::timestamp; */ using timestamp_ns = detail::timestamp; -static_assert(sizeof(timestamp_D) == sizeof(typename timestamp_D::rep), ""); -static_assert(sizeof(timestamp_h) == sizeof(typename timestamp_h::rep), ""); -static_assert(sizeof(timestamp_m) == sizeof(typename timestamp_m::rep), ""); -static_assert(sizeof(timestamp_s) == sizeof(typename timestamp_s::rep), ""); -static_assert(sizeof(timestamp_ms) == sizeof(typename timestamp_ms::rep), ""); -static_assert(sizeof(timestamp_us) == sizeof(typename timestamp_us::rep), ""); -static_assert(sizeof(timestamp_ns) == sizeof(typename timestamp_ns::rep), ""); +static_assert(sizeof(timestamp_D) == sizeof(typename timestamp_D::rep)); +static_assert(sizeof(timestamp_h) == sizeof(typename timestamp_h::rep)); +static_assert(sizeof(timestamp_m) == sizeof(typename timestamp_m::rep)); +static_assert(sizeof(timestamp_s) == sizeof(typename timestamp_s::rep)); +static_assert(sizeof(timestamp_ms) == sizeof(typename timestamp_ms::rep)); +static_assert(sizeof(timestamp_us) == sizeof(typename timestamp_us::rep)); +static_assert(sizeof(timestamp_ns) == sizeof(typename timestamp_ns::rep)); /** @} */ // end of group } // namespace cudf diff --git a/cpp/include/cudf_test/base_fixture.hpp b/cpp/include/cudf_test/base_fixture.hpp index 18f75bbc842..0e35ff64af4 100644 --- a/cpp/include/cudf_test/base_fixture.hpp +++ b/cpp/include/cudf_test/base_fixture.hpp @@ -66,7 +66,7 @@ class BaseFixtureWithParam : public ::testing::TestWithParam { * all tests inheriting from this fixture * @return pointer to memory resource */ - rmm::device_async_resource_ref mr() const { return _mr; } + [[nodiscard]] rmm::device_async_resource_ref mr() const { return _mr; } }; /** diff --git a/cpp/include/cudf_test/column_wrapper.hpp b/cpp/include/cudf_test/column_wrapper.hpp index dc873658abf..47d17988775 100644 --- a/cpp/include/cudf_test/column_wrapper.hpp +++ b/cpp/include/cudf_test/column_wrapper.hpp @@ -1121,14 +1121,20 @@ class dictionary_column_wrapper : public detail::column_wrapper { * * @return column_view to keys column */ - column_view keys() const { return cudf::dictionary_column_view{wrapped->view()}.keys(); } + [[nodiscard]] column_view keys() const + { + return cudf::dictionary_column_view{wrapped->view()}.keys(); + } /** * @brief Access indices column view * * @return column_view to indices column */ - column_view indices() const { return cudf::dictionary_column_view{wrapped->view()}.indices(); } + [[nodiscard]] column_view indices() const + { + return cudf::dictionary_column_view{wrapped->view()}.indices(); + } /** * @brief Default constructor initializes an empty dictionary column of strings @@ -1792,7 +1798,10 @@ class lists_column_wrapper : public detail::column_wrapper { return {std::move(cols), std::move(stubs)}; } - column_view get_view() const { return root ? lists_column_view(*wrapped).child() : *wrapped; } + [[nodiscard]] column_view get_view() const + { + return root ? lists_column_view(*wrapped).child() : *wrapped; + } int depth = 0; bool root = false; diff --git a/cpp/include/cudf_test/stream_checking_resource_adaptor.hpp b/cpp/include/cudf_test/stream_checking_resource_adaptor.hpp index cafde6ca7d5..5a077e86a0f 100644 --- a/cpp/include/cudf_test/stream_checking_resource_adaptor.hpp +++ b/cpp/include/cudf_test/stream_checking_resource_adaptor.hpp @@ -110,7 +110,7 @@ class stream_checking_resource_adaptor final : public rmm::mr::device_memory_res * @param other The other resource to compare to * @return Whether or not the two resources are equivalent */ - bool do_is_equal(device_memory_resource const& other) const noexcept override + [[nodiscard]] bool do_is_equal(device_memory_resource const& other) const noexcept override { if (this == &other) { return true; } auto cast = dynamic_cast const*>(&other); diff --git a/cpp/src/binaryop/binaryop.cpp b/cpp/src/binaryop/binaryop.cpp index ac31f9045fe..8ac1491547d 100644 --- a/cpp/src/binaryop/binaryop.cpp +++ b/cpp/src/binaryop/binaryop.cpp @@ -153,7 +153,7 @@ void binary_operation(mutable_column_view& out, cudf::jit::get_program_cache(*binaryop_jit_kernel_cu_jit) .get_kernel(kernel_name, {}, {{"binaryop/jit/operation-udf.hpp", cuda_source}}, {"-arch=sm_."}) - ->configure_1d_max_occupancy(0, 0, 0, stream.value()) + ->configure_1d_max_occupancy(0, 0, nullptr, stream.value()) ->launch(out.size(), cudf::jit::get_data_ptr(out), cudf::jit::get_data_ptr(lhs), diff --git a/cpp/src/binaryop/compiled/operation.cuh b/cpp/src/binaryop/compiled/operation.cuh index 43b4bd232c4..57113785a29 100644 --- a/cpp/src/binaryop/compiled/operation.cuh +++ b/cpp/src/binaryop/compiled/operation.cuh @@ -173,8 +173,8 @@ struct PMod { __device__ inline auto operator()(TypeLhs x, TypeRhs y) { using common_t = std::common_type_t; - common_t xconv = static_cast(x); - common_t yconv = static_cast(y); + auto xconv = static_cast(x); + auto yconv = static_cast(y); auto rem = xconv % yconv; if constexpr (std::is_signed_v) if (rem < 0) rem = (rem + yconv) % yconv; @@ -188,8 +188,8 @@ struct PMod { __device__ inline auto operator()(TypeLhs x, TypeRhs y) { using common_t = std::common_type_t; - common_t xconv = static_cast(x); - common_t yconv = static_cast(y); + auto xconv = static_cast(x); + auto yconv = static_cast(y); auto rem = std::fmod(xconv, yconv); if (rem < 0) rem = std::fmod(rem + yconv, yconv); return rem; diff --git a/cpp/src/binaryop/compiled/util.cpp b/cpp/src/binaryop/compiled/util.cpp index 02f4e480ecb..2b6a4f58895 100644 --- a/cpp/src/binaryop/compiled/util.cpp +++ b/cpp/src/binaryop/compiled/util.cpp @@ -123,7 +123,7 @@ struct is_supported_operation_functor { template struct nested_support_functor { template - inline constexpr bool call(data_type out_type) const + [[nodiscard]] inline constexpr bool call(data_type out_type) const { return is_binary_operation_supported{}.template operator()( out_type); @@ -163,7 +163,7 @@ struct is_supported_operation_functor { }; template - inline constexpr bool bool_op(data_type out) const + [[nodiscard]] inline constexpr bool bool_op(data_type out) const { return out.id() == type_id::BOOL8 and is_binary_operation_supported{}.template operator()(); diff --git a/cpp/src/copying/pack.cpp b/cpp/src/copying/pack.cpp index b0208a58896..819ad593c0a 100644 --- a/cpp/src/copying/pack.cpp +++ b/cpp/src/copying/pack.cpp @@ -181,7 +181,7 @@ class metadata_builder_impl { col_type, col_size, col_null_count, data_offset, null_mask_offset, num_children); } - std::vector build() const + [[nodiscard]] std::vector build() const { auto output = std::vector(metadata.size() * sizeof(detail::serialized_column)); std::memcpy(output.data(), metadata.data(), output.size()); diff --git a/cpp/src/datetime/timezone.cpp b/cpp/src/datetime/timezone.cpp index a3471485293..1b0d201501b 100644 --- a/cpp/src/datetime/timezone.cpp +++ b/cpp/src/datetime/timezone.cpp @@ -221,7 +221,7 @@ class posix_parser { /** * @brief Returns the remaining number of characters in the input. */ - auto remaining_char_cnt() const { return end - cur; } + [[nodiscard]] auto remaining_char_cnt() const { return end - cur; } /** * @brief Returns the next character in the input. diff --git a/cpp/src/interop/arrow_utilities.cpp b/cpp/src/interop/arrow_utilities.cpp index 05beecfbf9b..dd9e9600a87 100644 --- a/cpp/src/interop/arrow_utilities.cpp +++ b/cpp/src/interop/arrow_utilities.cpp @@ -23,7 +23,7 @@ namespace cudf { namespace detail { -data_type arrow_to_cudf_type(const ArrowSchemaView* arrow_view) +data_type arrow_to_cudf_type(ArrowSchemaView const* arrow_view) { switch (arrow_view->type) { case NANOARROW_TYPE_NA: return data_type(type_id::EMPTY); diff --git a/cpp/src/interop/arrow_utilities.hpp b/cpp/src/interop/arrow_utilities.hpp index defddb4dc42..4e2628ab689 100644 --- a/cpp/src/interop/arrow_utilities.hpp +++ b/cpp/src/interop/arrow_utilities.hpp @@ -37,7 +37,7 @@ static constexpr int fixed_width_data_buffer_idx = 1; * @param arrow_view SchemaView to pull the logical and storage types from * @return Column type id */ -data_type arrow_to_cudf_type(const ArrowSchemaView* arrow_view); +data_type arrow_to_cudf_type(ArrowSchemaView const* arrow_view); /** * @brief Map cudf column type id to ArrowType id diff --git a/cpp/src/interop/detail/arrow_allocator.cpp b/cpp/src/interop/detail/arrow_allocator.cpp index 3e6a337457a..2a19a5360fe 100644 --- a/cpp/src/interop/detail/arrow_allocator.cpp +++ b/cpp/src/interop/detail/arrow_allocator.cpp @@ -38,7 +38,7 @@ T enable_hugepage(T&& buf) } #ifdef MADV_HUGEPAGE - const auto pagesize = sysconf(_SC_PAGESIZE); + auto const pagesize = sysconf(_SC_PAGESIZE); void* addr = const_cast(buf->data()); if (addr == nullptr) { return std::move(buf); } auto length{static_cast(buf->size())}; diff --git a/cpp/src/interop/from_arrow.cu b/cpp/src/interop/from_arrow.cu index f100ca0cc2b..579820cbae3 100644 --- a/cpp/src/interop/from_arrow.cu +++ b/cpp/src/interop/from_arrow.cu @@ -78,6 +78,7 @@ data_type arrow_to_cudf_type(arrow::DataType const& arrow_type) } } case arrow::Type::STRING: return data_type(type_id::STRING); + case arrow::Type::LARGE_STRING: return data_type(type_id::STRING); case arrow::Type::DICTIONARY: return data_type(type_id::DICTIONARY32); case arrow::Type::LIST: return data_type(type_id::LIST); case arrow::Type::DECIMAL: { @@ -276,21 +277,42 @@ std::unique_ptr dispatch_to_cudf_column::operator()( rmm::device_async_resource_ref mr) { if (array.length() == 0) { return make_empty_column(type_id::STRING); } - auto str_array = static_cast(&array); - auto offset_array = std::make_unique( - str_array->value_offsets()->size() / sizeof(int32_t), str_array->value_offsets(), nullptr); - auto char_array = std::make_unique( - str_array->value_data()->size(), str_array->value_data(), nullptr); - auto offsets_column = dispatch_to_cudf_column{}.operator()( - *offset_array, data_type(type_id::INT32), true, stream, mr); - auto chars_column = dispatch_to_cudf_column{}.operator()( - *char_array, data_type(type_id::INT8), true, stream, mr); + std::unique_ptr offsets_column; + std::unique_ptr char_array; + + if (array.type_id() == arrow::Type::LARGE_STRING) { + auto str_array = static_cast(&array); + auto offset_array = std::make_unique( + str_array->value_offsets()->size() / sizeof(int64_t), str_array->value_offsets(), nullptr); + offsets_column = dispatch_to_cudf_column{}.operator()( + *offset_array, data_type(type_id::INT64), true, stream, mr); + char_array = std::make_unique( + str_array->value_data()->size(), str_array->value_data(), nullptr); + } else if (array.type_id() == arrow::Type::STRING) { + auto str_array = static_cast(&array); + auto offset_array = std::make_unique( + str_array->value_offsets()->size() / sizeof(int32_t), str_array->value_offsets(), nullptr); + offsets_column = dispatch_to_cudf_column{}.operator()( + *offset_array, data_type(type_id::INT32), true, stream, mr); + char_array = std::make_unique( + str_array->value_data()->size(), str_array->value_data(), nullptr); + } else { + throw std::runtime_error("Unsupported array type"); + } + + rmm::device_buffer chars(char_array->length(), stream, mr); + auto data_buffer = char_array->data()->buffers[1]; + CUDF_CUDA_TRY(cudaMemcpyAsync(chars.data(), + reinterpret_cast(data_buffer->address()), + chars.size(), + cudaMemcpyDefault, + stream.value())); auto const num_rows = offsets_column->size() - 1; auto out_col = make_strings_column(num_rows, std::move(offsets_column), - std::move(chars_column->release().data.release()[0]), + std::move(chars), array.null_count(), std::move(*get_mask_buffer(array, stream, mr))); diff --git a/cpp/src/interop/from_arrow_host.cu b/cpp/src/interop/from_arrow_host.cu index 36bb35d9419..854a1d68fdc 100644 --- a/cpp/src/interop/from_arrow_host.cu +++ b/cpp/src/interop/from_arrow_host.cu @@ -140,7 +140,7 @@ std::unique_ptr dispatch_copy_from_arrow_host::operator()(ArrowSch bool skip_mask) { auto data_buffer = input->buffers[fixed_width_data_buffer_idx]; - const auto buffer_length = bitmask_allocation_size_bytes(input->length + input->offset); + auto const buffer_length = bitmask_allocation_size_bytes(input->length + input->offset); auto data = rmm::device_buffer(buffer_length, stream, mr); CUDF_CUDA_TRY(cudaMemcpyAsync(data.data(), @@ -322,7 +322,7 @@ template <> std::unique_ptr dispatch_copy_from_arrow_host::operator()( ArrowSchemaView* schema, ArrowArray const* input, data_type type, bool skip_mask) { - const void* offset_buffers[2] = {nullptr, input->buffers[fixed_width_data_buffer_idx]}; + void const* offset_buffers[2] = {nullptr, input->buffers[fixed_width_data_buffer_idx]}; ArrowArray offsets_array = { .length = input->offset + input->length + 1, .null_count = 0, diff --git a/cpp/src/interop/to_arrow.cu b/cpp/src/interop/to_arrow.cu index e871e656c48..47aee982c32 100644 --- a/cpp/src/interop/to_arrow.cu +++ b/cpp/src/interop/to_arrow.cu @@ -306,11 +306,19 @@ std::shared_ptr dispatch_to_arrow::operator()( static_cast(sview.chars_size(stream))}, ar_mr, stream); - return std::make_shared(static_cast(input_view.size()), - offset_buffer, - data_buffer, - fetch_mask_buffer(input_view, ar_mr, stream), - static_cast(input_view.null_count())); + if (sview.offsets().type().id() == cudf::type_id::INT64) { + return std::make_shared(static_cast(input_view.size()), + offset_buffer, + data_buffer, + fetch_mask_buffer(input_view, ar_mr, stream), + static_cast(input_view.null_count())); + } else { + return std::make_shared(static_cast(input_view.size()), + offset_buffer, + data_buffer, + fetch_mask_buffer(input_view, ar_mr, stream), + static_cast(input_view.null_count())); + } } template <> diff --git a/cpp/src/io/avro/avro.cpp b/cpp/src/io/avro/avro.cpp index 221cdf93042..2041f03cd81 100644 --- a/cpp/src/io/avro/avro.cpp +++ b/cpp/src/io/avro/avro.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -485,8 +485,8 @@ std::string schema_parser::get_str() char const* cur = start; while (cur < m_end && *cur++ != '"') ; - int32_t len = static_cast(cur - start - 1); - m_cur = cur; + auto len = static_cast(cur - start - 1); + m_cur = cur; return s.assign(start, std::max(len, 0)); } diff --git a/cpp/src/io/comp/uncomp.cpp b/cpp/src/io/comp/uncomp.cpp index 3e5d966282d..ab516dd585d 100644 --- a/cpp/src/io/comp/uncomp.cpp +++ b/cpp/src/io/comp/uncomp.cpp @@ -305,7 +305,7 @@ std::vector decompress(compression_type compression, host_spannum_entries; i++) { - zip_cdfh_s const* cdfh = reinterpret_cast( + auto const* cdfh = reinterpret_cast( reinterpret_cast(za.cdfh) + cdfh_ofs); int cdfh_len = sizeof(zip_cdfh_s) + cdfh->fname_len + cdfh->extra_len + cdfh->comment_len; if (cdfh_ofs + cdfh_len > za.eocd->cdir_size || cdfh->sig != 0x0201'4b50) { @@ -314,8 +314,8 @@ std::vector decompress(compression_type compression, host_spancomp_method == 8 && cdfh->comp_size > 0 && cdfh->uncomp_size > 0) { - size_t lfh_ofs = cdfh->hdr_ofs; - zip_lfh_s const* lfh = reinterpret_cast(raw + lfh_ofs); + size_t lfh_ofs = cdfh->hdr_ofs; + auto const* lfh = reinterpret_cast(raw + lfh_ofs); if (lfh_ofs + sizeof(zip_lfh_s) <= src.size() && lfh->sig == 0x0403'4b50 && lfh_ofs + sizeof(zip_lfh_s) + lfh->fname_len + lfh->extra_len <= src.size()) { if (lfh->comp_method == 8 && lfh->comp_size > 0 && lfh->uncomp_size > 0) { @@ -340,7 +340,7 @@ std::vector decompress(compression_type compression, host_span 4) { - bz2_file_header_s const* fhdr = reinterpret_cast(raw); + auto const* fhdr = reinterpret_cast(raw); // Check for BZIP2 file signature "BZh1" to "BZh9" if (fhdr->sig[0] == 'B' && fhdr->sig[1] == 'Z' && fhdr->sig[2] == 'h' && fhdr->blksz >= '1' && fhdr->blksz <= '9') { diff --git a/cpp/src/io/csv/reader_impl.cu b/cpp/src/io/csv/reader_impl.cu index 5dee0c17a33..05faded651d 100644 --- a/cpp/src/io/csv/reader_impl.cu +++ b/cpp/src/io/csv/reader_impl.cu @@ -27,6 +27,7 @@ #include "io/utilities/parsing_utils.cuh" #include +#include #include #include #include diff --git a/cpp/src/io/functions.cpp b/cpp/src/io/functions.cpp index 1ed8ee5ce06..5daa55d4552 100644 --- a/cpp/src/io/functions.cpp +++ b/cpp/src/io/functions.cpp @@ -306,14 +306,14 @@ raw_orc_statistics read_raw_orc_statistics(source_info const& src_info, // Get file-level statistics, statistics of each column of file for (auto const& stats : metadata.ff.statistics) { - result.file_stats.push_back(std::string(stats.cbegin(), stats.cend())); + result.file_stats.emplace_back(stats.cbegin(), stats.cend()); } // Get stripe-level statistics for (auto const& stripes_stats : metadata.md.stripeStats) { result.stripes_stats.emplace_back(); for (auto const& stats : stripes_stats.colStats) { - result.stripes_stats.back().push_back(std::string(stats.cbegin(), stats.cend())); + result.stripes_stats.back().emplace_back(stats.cbegin(), stats.cend()); } } @@ -1026,8 +1026,8 @@ parquet_writer_options_builder& parquet_writer_options_builder::column_chunks_fi return *this; } -chunked_parquet_writer_options::chunked_parquet_writer_options(sink_info const& sink) - : parquet_writer_options_base(sink) +chunked_parquet_writer_options::chunked_parquet_writer_options(sink_info sink) + : parquet_writer_options_base(std::move(sink)) { } diff --git a/cpp/src/io/json/nested_json_gpu.cu b/cpp/src/io/json/nested_json_gpu.cu index b243e4ba006..031edfde4f6 100644 --- a/cpp/src/io/json/nested_json_gpu.cu +++ b/cpp/src/io/json/nested_json_gpu.cu @@ -245,7 +245,7 @@ struct TransduceToken { RelativeOffsetT const relative_offset, SymbolT const read_symbol) const { - const bool is_end_of_invalid_line = + bool const is_end_of_invalid_line = (state_id == static_cast(TT_INV) && match_id == static_cast(dfa_symbol_group_id::DELIMITER)); @@ -265,15 +265,15 @@ struct TransduceToken { // Number of tokens emitted on invalid lines constexpr int32_t num_inv_tokens = 2; - const bool is_delimiter = match_id == static_cast(dfa_symbol_group_id::DELIMITER); + bool const is_delimiter = match_id == static_cast(dfa_symbol_group_id::DELIMITER); // If state is either invalid or we're entering an invalid state, we discard tokens - const bool is_part_of_invalid_line = + bool const is_part_of_invalid_line = (match_id != static_cast(dfa_symbol_group_id::ERROR) && state_id == static_cast(TT_VLD)); // Indicates whether we transition from an invalid line to a potentially valid line - const bool is_end_of_invalid_line = (state_id == static_cast(TT_INV) && is_delimiter); + bool const is_end_of_invalid_line = (state_id == static_cast(TT_INV) && is_delimiter); int32_t const emit_count = is_end_of_invalid_line ? num_inv_tokens : (is_part_of_invalid_line && !is_delimiter ? 1 : 0); diff --git a/cpp/src/io/json/read_json.cu b/cpp/src/io/json/read_json.cu index df5c7bc21e1..e999be8f83a 100644 --- a/cpp/src/io/json/read_json.cu +++ b/cpp/src/io/json/read_json.cu @@ -85,7 +85,7 @@ device_span ingest_raw_input(device_span buffer, sources.end(), prefsum_source_sizes.begin(), std::plus{}, - [](const std::unique_ptr& s) { return s->size(); }); + [](std::unique_ptr const& s) { return s->size(); }); auto upper = std::upper_bound(prefsum_source_sizes.begin(), prefsum_source_sizes.end(), range_offset); size_t start_source = std::distance(prefsum_source_sizes.begin(), upper); diff --git a/cpp/src/io/orc/orc.hpp b/cpp/src/io/orc/orc.hpp index fd55cbb6846..e1403acd455 100644 --- a/cpp/src/io/orc/orc.hpp +++ b/cpp/src/io/orc/orc.hpp @@ -511,7 +511,7 @@ class ProtobufWriter { TypeKind kind, ColStatsBlob const* stats); - std::size_t size() const { return m_buff.size(); } + [[nodiscard]] std::size_t size() const { return m_buff.size(); } uint8_t const* data() { return m_buff.data(); } std::vector& buffer() { return m_buff; } diff --git a/cpp/src/io/orc/orc_field_writer.hpp b/cpp/src/io/orc/orc_field_writer.hpp index 4862562d526..731e9d7687e 100644 --- a/cpp/src/io/orc/orc_field_writer.hpp +++ b/cpp/src/io/orc/orc_field_writer.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -31,10 +31,10 @@ namespace io { namespace orc { struct ProtobufWriter::ProtobufFieldWriter { - int struct_size; + int struct_size{0}; ProtobufWriter* p; - ProtobufFieldWriter(ProtobufWriter* pbw) : struct_size(0), p(pbw) {} + ProtobufFieldWriter(ProtobufWriter* pbw) : p(pbw) {} /** * @brief Function to write a unsigned integer to the internal buffer diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 5034aa14a95..01ee5ad177d 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -22,6 +22,7 @@ #include #include +#include #include #include @@ -536,7 +537,7 @@ void reader_impl::load_next_stripe_data(read_mode mode) _file_itm_data.selected_stripes.begin() + stripe_start, _file_itm_data.selected_stripes.begin() + stripe_start + stripe_count, std::size_t{0}, - [](std::size_t count, const auto& stripe) { return count + stripe.stripe_info->numberOfRows; }); + [](std::size_t count, auto const& stripe) { return count + stripe.stripe_info->numberOfRows; }); // Decoding range needs to be reset to start from the first position in `decode_stripe_ranges`. _chunk_read_data.curr_decode_stripe_range = 0; diff --git a/cpp/src/io/orc/reader_impl_decode.cu b/cpp/src/io/orc/reader_impl_decode.cu index da9fb802a0a..72eb41b1360 100644 --- a/cpp/src/io/orc/reader_impl_decode.cu +++ b/cpp/src/io/orc/reader_impl_decode.cu @@ -810,7 +810,7 @@ void reader_impl::decompress_and_decode_stripes(read_mode mode) cudf::detail::hostdevice_2dvector(stripe_count, num_lvl_columns, _stream); memset(chunks.base_host_ptr(), 0, chunks.size_bytes()); - const bool use_index = + bool const use_index = _options.use_index && // Do stripes have row group index _metadata.is_row_grp_idx_present() && diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu index 344e216cdc8..e9e031a407a 100644 --- a/cpp/src/io/orc/writer_impl.cu +++ b/cpp/src/io/orc/writer_impl.cu @@ -27,7 +27,6 @@ #include #include #include -#include #include #include #include @@ -2339,7 +2338,7 @@ auto convert_table_to_orc_data(table_view const& input, std::move(streams), std::move(stripes), std::move(stripe_dicts.views), - cudf::detail::pinned_host_vector()}; + cudf::detail::make_pinned_vector_async(0, stream)}; } // Allocate intermediate output stream buffer @@ -2407,7 +2406,7 @@ auto convert_table_to_orc_data(table_view const& input, return max_stream_size; }(); - cudf::detail::pinned_host_vector bounce_buffer(max_out_stream_size); + auto bounce_buffer = cudf::detail::make_pinned_vector_async(max_out_stream_size, stream); auto intermediate_stats = gather_statistic_blobs(stats_freq, orc_table, segmentation, stream); diff --git a/cpp/src/io/parquet/compact_protocol_reader.cpp b/cpp/src/io/parquet/compact_protocol_reader.cpp index c9212334a96..192833507b0 100644 --- a/cpp/src/io/parquet/compact_protocol_reader.cpp +++ b/cpp/src/io/parquet/compact_protocol_reader.cpp @@ -42,7 +42,7 @@ class parquet_field { public: virtual ~parquet_field() = default; - int field() const { return _field_val; } + [[nodiscard]] int field() const { return _field_val; } }; std::string field_type_string(FieldType type) diff --git a/cpp/src/io/parquet/compact_protocol_writer.hpp b/cpp/src/io/parquet/compact_protocol_writer.hpp index c2e6178acbf..d4778b1ea15 100644 --- a/cpp/src/io/parquet/compact_protocol_writer.hpp +++ b/cpp/src/io/parquet/compact_protocol_writer.hpp @@ -64,11 +64,11 @@ class CompactProtocolWriter { class CompactProtocolFieldWriter { CompactProtocolWriter& writer; size_t struct_start_pos; - int current_field_value; + int current_field_value{0}; public: CompactProtocolFieldWriter(CompactProtocolWriter& caller) - : writer(caller), struct_start_pos(writer.m_buf.size()), current_field_value(0) + : writer(caller), struct_start_pos(writer.m_buf.size()) { } diff --git a/cpp/src/io/parquet/ipc/Schema_generated.h b/cpp/src/io/parquet/ipc/Schema_generated.h index 27141b4af31..c091204417a 100644 --- a/cpp/src/io/parquet/ipc/Schema_generated.h +++ b/cpp/src/io/parquet/ipc/Schema_generated.h @@ -139,13 +139,13 @@ inline const MetadataVersion (&EnumValuesMetadataVersion())[5] return values; } -inline const char* const* EnumNamesMetadataVersion() +inline char const* const* EnumNamesMetadataVersion() { - static const char* const names[6] = {"V1", "V2", "V3", "V4", "V5", nullptr}; + static char const* const names[6] = {"V1", "V2", "V3", "V4", "V5", nullptr}; return names; } -inline const char* EnumNameMetadataVersion(MetadataVersion e) +inline char const* EnumNameMetadataVersion(MetadataVersion e) { if (::flatbuffers::IsOutRange(e, MetadataVersion_V1, MetadataVersion_V5)) return ""; const size_t index = static_cast(e); @@ -190,14 +190,14 @@ inline const Feature (&EnumValuesFeature())[3] return values; } -inline const char* const* EnumNamesFeature() +inline char const* const* EnumNamesFeature() { - static const char* const names[4] = { + static char const* const names[4] = { "UNUSED", "DICTIONARY_REPLACEMENT", "COMPRESSED_BODY", nullptr}; return names; } -inline const char* EnumNameFeature(Feature e) +inline char const* EnumNameFeature(Feature e) { if (::flatbuffers::IsOutRange(e, Feature_UNUSED, Feature_COMPRESSED_BODY)) return ""; const size_t index = static_cast(e); @@ -217,13 +217,13 @@ inline const UnionMode (&EnumValuesUnionMode())[2] return values; } -inline const char* const* EnumNamesUnionMode() +inline char const* const* EnumNamesUnionMode() { - static const char* const names[3] = {"Sparse", "Dense", nullptr}; + static char const* const names[3] = {"Sparse", "Dense", nullptr}; return names; } -inline const char* EnumNameUnionMode(UnionMode e) +inline char const* EnumNameUnionMode(UnionMode e) { if (::flatbuffers::IsOutRange(e, UnionMode_Sparse, UnionMode_Dense)) return ""; const size_t index = static_cast(e); @@ -244,13 +244,13 @@ inline const Precision (&EnumValuesPrecision())[3] return values; } -inline const char* const* EnumNamesPrecision() +inline char const* const* EnumNamesPrecision() { - static const char* const names[4] = {"HALF", "SINGLE", "DOUBLE", nullptr}; + static char const* const names[4] = {"HALF", "SINGLE", "DOUBLE", nullptr}; return names; } -inline const char* EnumNamePrecision(Precision e) +inline char const* EnumNamePrecision(Precision e) { if (::flatbuffers::IsOutRange(e, Precision_HALF, Precision_DOUBLE)) return ""; const size_t index = static_cast(e); @@ -270,13 +270,13 @@ inline const DateUnit (&EnumValuesDateUnit())[2] return values; } -inline const char* const* EnumNamesDateUnit() +inline char const* const* EnumNamesDateUnit() { - static const char* const names[3] = {"DAY", "MILLISECOND", nullptr}; + static char const* const names[3] = {"DAY", "MILLISECOND", nullptr}; return names; } -inline const char* EnumNameDateUnit(DateUnit e) +inline char const* EnumNameDateUnit(DateUnit e) { if (::flatbuffers::IsOutRange(e, DateUnit_DAY, DateUnit_MILLISECOND)) return ""; const size_t index = static_cast(e); @@ -299,14 +299,14 @@ inline const TimeUnit (&EnumValuesTimeUnit())[4] return values; } -inline const char* const* EnumNamesTimeUnit() +inline char const* const* EnumNamesTimeUnit() { - static const char* const names[5] = { + static char const* const names[5] = { "SECOND", "MILLISECOND", "MICROSECOND", "NANOSECOND", nullptr}; return names; } -inline const char* EnumNameTimeUnit(TimeUnit e) +inline char const* EnumNameTimeUnit(TimeUnit e) { if (::flatbuffers::IsOutRange(e, TimeUnit_SECOND, TimeUnit_NANOSECOND)) return ""; const size_t index = static_cast(e); @@ -328,13 +328,13 @@ inline const IntervalUnit (&EnumValuesIntervalUnit())[3] return values; } -inline const char* const* EnumNamesIntervalUnit() +inline char const* const* EnumNamesIntervalUnit() { - static const char* const names[4] = {"YEAR_MONTH", "DAY_TIME", "MONTH_DAY_NANO", nullptr}; + static char const* const names[4] = {"YEAR_MONTH", "DAY_TIME", "MONTH_DAY_NANO", nullptr}; return names; } -inline const char* EnumNameIntervalUnit(IntervalUnit e) +inline char const* EnumNameIntervalUnit(IntervalUnit e) { if (::flatbuffers::IsOutRange(e, IntervalUnit_YEAR_MONTH, IntervalUnit_MONTH_DAY_NANO)) return ""; const size_t index = static_cast(e); @@ -389,9 +389,9 @@ inline const Type (&EnumValuesType())[27] return values; } -inline const char* const* EnumNamesType() +inline char const* const* EnumNamesType() { - static const char* const names[28] = { + static char const* const names[28] = { "NONE", "Null", "Int", "FloatingPoint", "Binary", "Utf8", "Bool", "Decimal", "Date", "Time", "Timestamp", "Interval", @@ -402,7 +402,7 @@ inline const char* const* EnumNamesType() return names; } -inline const char* EnumNameType(Type e) +inline char const* EnumNameType(Type e) { if (::flatbuffers::IsOutRange(e, Type_NONE, Type_LargeListView)) return ""; const size_t index = static_cast(e); @@ -544,10 +544,10 @@ struct TypeTraits { static const Type enum_value = Type_LargeListView; }; -bool VerifyType(::flatbuffers::Verifier& verifier, const void* obj, Type type); +bool VerifyType(::flatbuffers::Verifier& verifier, void const* obj, Type type); bool VerifyTypeVector(::flatbuffers::Verifier& verifier, - const ::flatbuffers::Vector<::flatbuffers::Offset>* values, - const ::flatbuffers::Vector* types); + ::flatbuffers::Vector<::flatbuffers::Offset> const* values, + ::flatbuffers::Vector const* types); /// ---------------------------------------------------------------------- /// Dictionary encoding metadata @@ -566,13 +566,13 @@ inline const DictionaryKind (&EnumValuesDictionaryKind())[1] return values; } -inline const char* const* EnumNamesDictionaryKind() +inline char const* const* EnumNamesDictionaryKind() { - static const char* const names[2] = {"DenseArray", nullptr}; + static char const* const names[2] = {"DenseArray", nullptr}; return names; } -inline const char* EnumNameDictionaryKind(DictionaryKind e) +inline char const* EnumNameDictionaryKind(DictionaryKind e) { if (::flatbuffers::IsOutRange(e, DictionaryKind_DenseArray, DictionaryKind_DenseArray)) return ""; const size_t index = static_cast(e); @@ -594,13 +594,13 @@ inline const Endianness (&EnumValuesEndianness())[2] return values; } -inline const char* const* EnumNamesEndianness() +inline char const* const* EnumNamesEndianness() { - static const char* const names[3] = {"Little", "Big", nullptr}; + static char const* const names[3] = {"Little", "Big", nullptr}; return names; } -inline const char* EnumNameEndianness(Endianness e) +inline char const* EnumNameEndianness(Endianness e) { if (::flatbuffers::IsOutRange(e, Endianness_Little, Endianness_Big)) return ""; const size_t index = static_cast(e); @@ -652,7 +652,7 @@ struct NullBuilder { } ::flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); + auto const end = fbb_.EndTable(start_); auto o = ::flatbuffers::Offset(end); return o; } @@ -685,7 +685,7 @@ struct Struct_Builder { } ::flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); + auto const end = fbb_.EndTable(start_); auto o = ::flatbuffers::Offset(end); return o; } @@ -715,7 +715,7 @@ struct ListBuilder { } ::flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); + auto const end = fbb_.EndTable(start_); auto o = ::flatbuffers::Offset(end); return o; } @@ -747,7 +747,7 @@ struct LargeListBuilder { } ::flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); + auto const end = fbb_.EndTable(start_); auto o = ::flatbuffers::Offset(end); return o; } @@ -780,7 +780,7 @@ struct ListViewBuilder { } ::flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); + auto const end = fbb_.EndTable(start_); auto o = ::flatbuffers::Offset(end); return o; } @@ -812,7 +812,7 @@ struct LargeListViewBuilder { } ::flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); + auto const end = fbb_.EndTable(start_); auto o = ::flatbuffers::Offset(end); return o; } @@ -851,7 +851,7 @@ struct FixedSizeListBuilder { } ::flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); + auto const end = fbb_.EndTable(start_); auto o = ::flatbuffers::Offset(end); return o; } @@ -916,7 +916,7 @@ struct MapBuilder { } ::flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); + auto const end = fbb_.EndTable(start_); auto o = ::flatbuffers::Offset(end); return o; } @@ -941,9 +941,9 @@ struct Union FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { { return static_cast(GetField(VT_MODE, 0)); } - const ::flatbuffers::Vector* typeIds() const + ::flatbuffers::Vector const* typeIds() const { - return GetPointer*>(VT_TYPEIDS); + return GetPointer<::flatbuffers::Vector const*>(VT_TYPEIDS); } bool Verify(::flatbuffers::Verifier& verifier) const { @@ -971,7 +971,7 @@ struct UnionBuilder { } ::flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); + auto const end = fbb_.EndTable(start_); auto o = ::flatbuffers::Offset(end); return o; } @@ -991,7 +991,7 @@ inline ::flatbuffers::Offset CreateUnion( inline ::flatbuffers::Offset CreateUnionDirect( ::flatbuffers::FlatBufferBuilder& _fbb, cudf::io::parquet::flatbuf::UnionMode mode = cudf::io::parquet::flatbuf::UnionMode_Sparse, - const std::vector* typeIds = nullptr) + std::vector const* typeIds = nullptr) { auto typeIds__ = typeIds ? _fbb.CreateVector(*typeIds) : 0; return cudf::io::parquet::flatbuf::CreateUnion(_fbb, mode, typeIds__); @@ -1027,7 +1027,7 @@ struct IntBuilder { } ::flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); + auto const end = fbb_.EndTable(start_); auto o = ::flatbuffers::Offset(end); return o; } @@ -1071,7 +1071,7 @@ struct FloatingPointBuilder { } ::flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); + auto const end = fbb_.EndTable(start_); auto o = ::flatbuffers::Offset(end); return o; } @@ -1105,7 +1105,7 @@ struct Utf8Builder { } ::flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); + auto const end = fbb_.EndTable(start_); auto o = ::flatbuffers::Offset(end); return o; } @@ -1136,7 +1136,7 @@ struct BinaryBuilder { } ::flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); + auto const end = fbb_.EndTable(start_); auto o = ::flatbuffers::Offset(end); return o; } @@ -1168,7 +1168,7 @@ struct LargeUtf8Builder { } ::flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); + auto const end = fbb_.EndTable(start_); auto o = ::flatbuffers::Offset(end); return o; } @@ -1200,7 +1200,7 @@ struct LargeBinaryBuilder { } ::flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); + auto const end = fbb_.EndTable(start_); auto o = ::flatbuffers::Offset(end); return o; } @@ -1237,7 +1237,7 @@ struct Utf8ViewBuilder { } ::flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); + auto const end = fbb_.EndTable(start_); auto o = ::flatbuffers::Offset(end); return o; } @@ -1274,7 +1274,7 @@ struct BinaryViewBuilder { } ::flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); + auto const end = fbb_.EndTable(start_); auto o = ::flatbuffers::Offset(end); return o; } @@ -1312,7 +1312,7 @@ struct FixedSizeBinaryBuilder { } ::flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); + auto const end = fbb_.EndTable(start_); auto o = ::flatbuffers::Offset(end); return o; } @@ -1344,7 +1344,7 @@ struct BoolBuilder { } ::flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); + auto const end = fbb_.EndTable(start_); auto o = ::flatbuffers::Offset(end); return o; } @@ -1379,7 +1379,7 @@ struct RunEndEncodedBuilder { } ::flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); + auto const end = fbb_.EndTable(start_); auto o = ::flatbuffers::Offset(end); return o; } @@ -1437,7 +1437,7 @@ struct DecimalBuilder { } ::flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); + auto const end = fbb_.EndTable(start_); auto o = ::flatbuffers::Offset(end); return o; } @@ -1489,7 +1489,7 @@ struct DateBuilder { } ::flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); + auto const end = fbb_.EndTable(start_); auto o = ::flatbuffers::Offset(end); return o; } @@ -1548,7 +1548,7 @@ struct TimeBuilder { } ::flatbuffers::Offset