From c7fc01794a78ccb6b4dc173d4ed20db156ebb39a Mon Sep 17 00:00:00 2001 From: Mike Sarahan Date: Thu, 14 Nov 2024 15:11:20 -0600 Subject: [PATCH 1/9] adding telemetry (#1692) close #1691 Authors: - Mike Sarahan (https://github.com/msarahan) Approvers: - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/rmm/pull/1692 --- .github/workflows/pr.yaml | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 4dfcaf1ae..6780298c3 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -19,6 +19,7 @@ jobs: - conda-python-build - conda-python-tests - docs-build + - telemetry-setup - wheel-build-cpp - wheel-build-python - wheel-tests @@ -28,7 +29,17 @@ jobs: if: always() with: needs: ${{ toJSON(needs) }} + telemetry-setup: + runs-on: ubuntu-latest + continue-on-error: true + env: + OTEL_SERVICE_NAME: "pr-rmm" + steps: + - name: Telemetry setup + uses: rapidsai/shared-actions/telemetry-dispatch-stash-base-env-vars@main changed-files: + needs: + - telemetry-setup secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/changed-files.yaml@branch-24.12 with: @@ -50,9 +61,12 @@ jobs: - '!img/**' checks: secrets: inherit + needs: + - telemetry-setup uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.12 with: enable_check_generated_files: false + ignored_pr_jobs: telemetry-summarize conda-cpp-build: needs: checks secrets: inherit @@ -114,6 +128,8 @@ jobs: script: ci/test_wheel.sh devcontainer: secrets: inherit + needs: + - telemetry-setup uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-24.12 with: arch: '["amd64"]' @@ -122,3 +138,18 @@ jobs: sccache -z; build-all -DBUILD_BENCHMARKS=ON --verbose; sccache -s; + + telemetry-summarize: + runs-on: ubuntu-latest + needs: pr-builder + if: always() + continue-on-error: true + steps: + - name: Load stashed telemetry env vars + uses: rapidsai/shared-actions/telemetry-dispatch-load-base-env-vars@main + with: + load_service_name: true + - name: Telemetry summarize + uses: rapidsai/shared-actions/telemetry-dispatch-write-summary@main + with: + cert_concat: "${{ secrets.OTEL_EXPORTER_OTLP_CA_CERTIFICATE }};${{ secrets.OTEL_EXPORTER_OTLP_CLIENT_CERTIFICATE }};${{ secrets.OTEL_EXPORTER_OTLP_CLIENT_KEY }}" From 96418d36eed56b63ae02fce0cd7a6880fba02030 Mon Sep 17 00:00:00 2001 From: Ray Douglass Date: Fri, 15 Nov 2024 09:35:22 -0500 Subject: [PATCH 2/9] DOC v25.02 Updates [skip ci] --- .../cuda11.8-conda/devcontainer.json | 6 ++--- .devcontainer/cuda11.8-pip/devcontainer.json | 6 ++--- .../cuda12.5-conda/devcontainer.json | 6 ++--- .devcontainer/cuda12.5-pip/devcontainer.json | 6 ++--- .github/workflows/build.yaml | 16 ++++++------- .github/workflows/pr.yaml | 24 +++++++++---------- .github/workflows/test.yaml | 6 ++--- VERSION | 2 +- dependencies.yaml | 6 ++--- python/rmm/pyproject.toml | 2 +- 10 files changed, 40 insertions(+), 40 deletions(-) diff --git a/.devcontainer/cuda11.8-conda/devcontainer.json b/.devcontainer/cuda11.8-conda/devcontainer.json index 549ffa67b..cf93b101a 100644 --- a/.devcontainer/cuda11.8-conda/devcontainer.json +++ b/.devcontainer/cuda11.8-conda/devcontainer.json @@ -5,17 +5,17 @@ "args": { "CUDA": "11.8", "PYTHON_PACKAGE_MANAGER": "conda", - "BASE": "rapidsai/devcontainers:24.12-cpp-cuda11.8-mambaforge-ubuntu22.04" + "BASE": "rapidsai/devcontainers:25.02-cpp-cuda11.8-mambaforge-ubuntu22.04" } }, "runArgs": [ "--rm", "--name", - "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.12-cuda11.8-conda" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.02-cuda11.8-conda" ], "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.12": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:25.2": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" diff --git a/.devcontainer/cuda11.8-pip/devcontainer.json b/.devcontainer/cuda11.8-pip/devcontainer.json index d6dd7b6ce..bd3aa8665 100644 --- a/.devcontainer/cuda11.8-pip/devcontainer.json +++ b/.devcontainer/cuda11.8-pip/devcontainer.json @@ -5,17 +5,17 @@ "args": { "CUDA": "11.8", "PYTHON_PACKAGE_MANAGER": "pip", - "BASE": "rapidsai/devcontainers:24.12-cpp-cuda11.8-ubuntu22.04" + "BASE": "rapidsai/devcontainers:25.02-cpp-cuda11.8-ubuntu22.04" } }, "runArgs": [ "--rm", "--name", - "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.12-cuda11.8-pip" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.02-cuda11.8-pip" ], "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.12": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:25.2": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" diff --git a/.devcontainer/cuda12.5-conda/devcontainer.json b/.devcontainer/cuda12.5-conda/devcontainer.json index 17e8d5cd0..eba61f0b9 100644 --- a/.devcontainer/cuda12.5-conda/devcontainer.json +++ b/.devcontainer/cuda12.5-conda/devcontainer.json @@ -5,17 +5,17 @@ "args": { "CUDA": "12.5", "PYTHON_PACKAGE_MANAGER": "conda", - "BASE": "rapidsai/devcontainers:24.12-cpp-mambaforge-ubuntu22.04" + "BASE": "rapidsai/devcontainers:25.02-cpp-mambaforge-ubuntu22.04" } }, "runArgs": [ "--rm", "--name", - "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.12-cuda12.5-conda" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.02-cuda12.5-conda" ], "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.12": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:25.2": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" diff --git a/.devcontainer/cuda12.5-pip/devcontainer.json b/.devcontainer/cuda12.5-pip/devcontainer.json index 54964d880..48aa3f083 100644 --- a/.devcontainer/cuda12.5-pip/devcontainer.json +++ b/.devcontainer/cuda12.5-pip/devcontainer.json @@ -5,17 +5,17 @@ "args": { "CUDA": "12.5", "PYTHON_PACKAGE_MANAGER": "pip", - "BASE": "rapidsai/devcontainers:24.12-cpp-cuda12.5-ubuntu22.04" + "BASE": "rapidsai/devcontainers:25.02-cpp-cuda12.5-ubuntu22.04" } }, "runArgs": [ "--rm", "--name", - "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.12-cuda12.5-pip" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.02-cuda12.5-pip" ], "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.12": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:25.2": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 6fa11225e..f7abc2d58 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -28,7 +28,7 @@ concurrency: jobs: cpp-build: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -37,7 +37,7 @@ jobs: python-build: needs: [cpp-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -46,7 +46,7 @@ jobs: upload-conda: needs: [cpp-build, python-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -56,7 +56,7 @@ jobs: if: github.ref_type == 'branch' needs: python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -68,7 +68,7 @@ jobs: run_script: "ci/build_docs.sh" wheel-build-cpp: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 with: matrix_filter: group_by([.ARCH, (.CUDA_VER|split(".")|map(tonumber)|.[0])]) | map(max_by(.PY_VER|split(".")|map(tonumber))) build_type: ${{ inputs.build_type || 'branch' }} @@ -79,7 +79,7 @@ jobs: wheel-build-python: needs: wheel-build-cpp secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -89,7 +89,7 @@ jobs: wheel-publish-cpp: needs: wheel-build-cpp secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -100,7 +100,7 @@ jobs: wheel-publish-python: needs: wheel-build-python secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 6780298c3..d0287319f 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -25,7 +25,7 @@ jobs: - wheel-tests - devcontainer secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-25.02 if: always() with: needs: ${{ toJSON(needs) }} @@ -41,7 +41,7 @@ jobs: needs: - telemetry-setup secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/changed-files.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/changed-files.yaml@branch-25.02 with: files_yaml: | test_cpp: @@ -63,40 +63,40 @@ jobs: secrets: inherit needs: - telemetry-setup - uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-25.02 with: enable_check_generated_files: false ignored_pr_jobs: telemetry-summarize conda-cpp-build: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-25.02 with: build_type: pull-request conda-cpp-tests: needs: [conda-cpp-build, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-25.02 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_cpp with: build_type: pull-request conda-python-build: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-25.02 with: build_type: pull-request conda-python-tests: needs: [conda-python-build, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-25.02 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python with: build_type: pull-request docs-build: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.02 with: build_type: pull-request node_type: "gpu-v100-latest-1" @@ -106,7 +106,7 @@ jobs: wheel-build-cpp: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 with: matrix_filter: group_by([.ARCH, (.CUDA_VER|split(".")|map(tonumber)|.[0])]) | map(max_by(.PY_VER|split(".")|map(tonumber))) build_type: pull-request @@ -114,14 +114,14 @@ jobs: wheel-build-python: needs: wheel-build-cpp secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 with: build_type: pull-request script: ci/build_wheel_python.sh wheel-tests: needs: [wheel-build-python, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.02 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python with: build_type: pull-request @@ -130,7 +130,7 @@ jobs: secrets: inherit needs: - telemetry-setup - uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-25.02 with: arch: '["amd64"]' cuda: '["12.5"]' diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 34a0f746d..b38c1e3cb 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -16,7 +16,7 @@ on: jobs: cpp-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-25.02 with: build_type: nightly branch: ${{ inputs.branch }} @@ -24,7 +24,7 @@ jobs: sha: ${{ inputs.sha }} python-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-25.02 with: build_type: nightly branch: ${{ inputs.branch }} @@ -32,7 +32,7 @@ jobs: sha: ${{ inputs.sha }} wheel-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.02 with: build_type: nightly branch: ${{ inputs.branch }} diff --git a/VERSION b/VERSION index af28c42b5..72eefaf7c 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -24.12.00 +25.02.00 diff --git a/dependencies.yaml b/dependencies.yaml index 3e2c2eb29..070248edb 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -163,15 +163,15 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - librmm-cu12==24.12.*,>=0.0.0a0 + - librmm-cu12==25.2.*,>=0.0.0a0 - matrix: cuda: "11.*" cuda_suffixed: "true" packages: - - librmm-cu11==24.12.*,>=0.0.0a0 + - librmm-cu11==25.2.*,>=0.0.0a0 - matrix: null packages: - - librmm==24.12.*,>=0.0.0a0 + - librmm==25.2.*,>=0.0.0a0 checks: common: - output_types: [conda, requirements] diff --git a/python/rmm/pyproject.toml b/python/rmm/pyproject.toml index aaaa15482..dfa680a98 100644 --- a/python/rmm/pyproject.toml +++ b/python/rmm/pyproject.toml @@ -130,7 +130,7 @@ requires = [ "cmake>=3.26.4,!=3.30.0", "cuda-python>=11.7.1,<12.0a0,<=11.8.3", "cython>=3.0.0", - "librmm==24.12.*,>=0.0.0a0", + "librmm==25.2.*,>=0.0.0a0", "ninja", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. From 929a595e639b61b583cc584b1c291f9559cef673 Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Tue, 19 Nov 2024 09:51:53 -0500 Subject: [PATCH 3/9] Make `cudaMallocAsync` logic non-optional as we require CUDA 11.2+ (#1667) We can remove the optimizations around `CUDA_STATIC_RUNTIME` and instead see if the function is already in the process space so that RMM doesn't need to have any build context to run properly Fixes #1679 Authors: - Robert Maynard (https://github.com/robertmaynard) Approvers: - Mark Harris (https://github.com/harrism) - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/rmm/pull/1667 --- CMakeLists.txt | 1 - .../multi_stream_allocations_bench.cu | 6 - .../random_allocations/random_allocations.cpp | 6 - include/rmm/detail/dynamic_load_runtime.hpp | 191 ------------------ include/rmm/detail/runtime_async_alloc.hpp | 79 ++++++++ .../mr/device/cuda_async_memory_resource.hpp | 54 ++--- .../cuda_async_view_memory_resource.hpp | 32 +-- python/rmm/rmm/tests/test_rmm.py | 34 +--- tests/CMakeLists.txt | 17 +- tests/mr/device/cuda_async_mr_tests.cpp | 17 +- tests/mr/device/cuda_async_view_mr_tests.cpp | 13 +- .../mr/device/mr_ref_multithreaded_tests.cpp | 16 +- tests/mr/device/mr_ref_test.hpp | 2 +- tests/mr/device/mr_ref_tests.cpp | 4 - tests/mr/device/thrust_allocator_tests.cu | 16 +- tests/prefetch_tests.cpp | 2 +- 16 files changed, 131 insertions(+), 359 deletions(-) delete mode 100644 include/rmm/detail/dynamic_load_runtime.hpp create mode 100644 include/rmm/detail/runtime_async_alloc.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 44d7fbb79..07bd368ee 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -89,7 +89,6 @@ target_include_directories(rmm INTERFACE "$Apply(benchmark_range); return; } -#endif if (name == "pool") { BENCHMARK_CAPTURE(BM_MultiStreamAllocations, pool_mr, &make_pool) // @@ -248,9 +244,7 @@ int main(int argc, char** argv) resource_names.emplace_back(args["resource"].as()); } else { resource_names.emplace_back("cuda"); -#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT resource_names.emplace_back("cuda_async"); -#endif resource_names.emplace_back("pool"); resource_names.emplace_back("arena"); resource_names.emplace_back("binning"); diff --git a/benchmarks/random_allocations/random_allocations.cpp b/benchmarks/random_allocations/random_allocations.cpp index 57116743b..2971f7e40 100644 --- a/benchmarks/random_allocations/random_allocations.cpp +++ b/benchmarks/random_allocations/random_allocations.cpp @@ -316,9 +316,7 @@ int main(int argc, char** argv) std::map const funcs({{"arena", &make_arena}, {"binning", &make_binning}, {"cuda", &make_cuda}, -#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT {"cuda_async", &make_cuda_async}, -#endif {"pool", &make_pool}}); auto resource = args["resource"].as(); @@ -340,11 +338,7 @@ int main(int argc, char** argv) std::string mr_name = args["resource"].as(); declare_benchmark(mr_name); } else { -#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT std::vector mrs{"pool", "binning", "arena", "cuda_async", "cuda"}; -#else - std::vector mrs{"pool", "binning", "arena", "cuda"}; -#endif std::for_each( std::cbegin(mrs), std::cend(mrs), [](auto const& mr) { declare_benchmark(mr); }); } diff --git a/include/rmm/detail/dynamic_load_runtime.hpp b/include/rmm/detail/dynamic_load_runtime.hpp deleted file mode 100644 index 214228752..000000000 --- a/include/rmm/detail/dynamic_load_runtime.hpp +++ /dev/null @@ -1,191 +0,0 @@ -/* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include -#include - -#include - -#include - -#include -#include - -namespace RMM_NAMESPACE { -namespace detail { - -/** - * @brief `dynamic_load_runtime` loads the cuda runtime library at runtime - * - * By loading the cudart library at runtime we can use functions that - * are added in newer minor versions of the cuda runtime. - */ -struct dynamic_load_runtime { - static void* get_cuda_runtime_handle() - { - auto close_cudart = [](void* handle) { ::dlclose(handle); }; - auto open_cudart = []() { - ::dlerror(); - const int major = CUDART_VERSION / 1000; - - // In CUDA 12 the SONAME is correctly defined as libcudart.12, but for - // CUDA<=11 it includes an extra 0 minor version e.g. libcudart.11.0. We - // also allow finding the linker name. - const std::string libname_ver_cuda_11 = "libcudart.so." + std::to_string(major) + ".0"; - const std::string libname_ver_cuda_12 = "libcudart.so." + std::to_string(major); - const std::string libname = "libcudart.so"; - - void* ptr = nullptr; - for (auto&& name : {libname_ver_cuda_12, libname_ver_cuda_11, libname}) { - ptr = dlopen(name.c_str(), RTLD_LAZY); - if (ptr != nullptr) break; - } - - if (ptr != nullptr) { return ptr; } - - RMM_FAIL("Unable to dlopen cudart"); - }; - static std::unique_ptr cudart_handle{open_cudart(), close_cudart}; - return cudart_handle.get(); - } - - template - using function_sig = std::add_pointer_t; - - template - static std::optional function(const char* func_name) - { - auto* runtime = get_cuda_runtime_handle(); - auto* handle = ::dlsym(runtime, func_name); - if (!handle) { return std::nullopt; } - auto* function_ptr = reinterpret_cast(handle); - return std::optional(function_ptr); - } -}; - -#if defined(RMM_STATIC_CUDART) -// clang-format off -#define RMM_CUDART_API_WRAPPER(name, signature) \ - template \ - static cudaError_t name(Args... args) \ - { \ - _Pragma("GCC diagnostic push") \ - _Pragma("GCC diagnostic ignored \"-Waddress\"") \ - static_assert(static_cast(::name), \ - "Failed to find #name function with arguments #signature"); \ - _Pragma("GCC diagnostic pop") \ - return ::name(args...); \ - } -// clang-format on -#else -#define RMM_CUDART_API_WRAPPER(name, signature) \ - template \ - static cudaError_t name(Args... args) \ - { \ - static const auto func = dynamic_load_runtime::function(#name); \ - if (func) { return (*func)(args...); } \ - RMM_FAIL("Failed to find #name function in libcudart.so"); \ - } -#endif - -#if CUDART_VERSION >= 11020 // 11.2 introduced cudaMallocAsync -/** - * @brief Bind to the stream-ordered memory allocator functions - * at runtime. - * - * This allows RMM users to compile/link against CUDA 11.2+ and run with - * < CUDA 11.2 runtime as these functions are found at call time. - */ -struct async_alloc { - static bool is_supported() - { -#if defined(RMM_STATIC_CUDART) - static bool runtime_supports_pool = (CUDART_VERSION >= 11020); -#else - static bool runtime_supports_pool = - dynamic_load_runtime::function>( - "cudaFreeAsync") - .has_value(); -#endif - - static auto driver_supports_pool{[] { - int cuda_pool_supported{}; - auto result = cudaDeviceGetAttribute(&cuda_pool_supported, - cudaDevAttrMemoryPoolsSupported, - rmm::get_current_cuda_device().value()); - return result == cudaSuccess and cuda_pool_supported == 1; - }()}; - return runtime_supports_pool and driver_supports_pool; - } - - /** - * @brief Check whether the specified `cudaMemAllocationHandleType` is supported on the present - * CUDA driver/runtime version. - * - * @note This query was introduced in CUDA 11.3 so on CUDA 11.2 this function will only return - * true for `cudaMemHandleTypeNone`. - * - * @param handle_type An IPC export handle type to check for support. - * @return true if supported - * @return false if unsupported - */ - static bool is_export_handle_type_supported(cudaMemAllocationHandleType handle_type) - { - int supported_handle_types_bitmask{}; -#if CUDART_VERSION >= 11030 // 11.3 introduced cudaDevAttrMemoryPoolSupportedHandleTypes - if (cudaMemHandleTypeNone != handle_type) { - auto const result = cudaDeviceGetAttribute(&supported_handle_types_bitmask, - cudaDevAttrMemoryPoolSupportedHandleTypes, - rmm::get_current_cuda_device().value()); - - // Don't throw on cudaErrorInvalidValue - auto const unsupported_runtime = (result == cudaErrorInvalidValue); - if (unsupported_runtime) return false; - // throw any other error that may have occurred - RMM_CUDA_TRY(result); - } - -#endif - return (supported_handle_types_bitmask & handle_type) == handle_type; - } - - template - using cudart_sig = dynamic_load_runtime::function_sig; - - using cudaMemPoolCreate_sig = cudart_sig; - RMM_CUDART_API_WRAPPER(cudaMemPoolCreate, cudaMemPoolCreate_sig); - - using cudaMemPoolSetAttribute_sig = cudart_sig; - RMM_CUDART_API_WRAPPER(cudaMemPoolSetAttribute, cudaMemPoolSetAttribute_sig); - - using cudaMemPoolDestroy_sig = cudart_sig; - RMM_CUDART_API_WRAPPER(cudaMemPoolDestroy, cudaMemPoolDestroy_sig); - - using cudaMallocFromPoolAsync_sig = cudart_sig; - RMM_CUDART_API_WRAPPER(cudaMallocFromPoolAsync, cudaMallocFromPoolAsync_sig); - - using cudaFreeAsync_sig = cudart_sig; - RMM_CUDART_API_WRAPPER(cudaFreeAsync, cudaFreeAsync_sig); - - using cudaDeviceGetDefaultMemPool_sig = cudart_sig; - RMM_CUDART_API_WRAPPER(cudaDeviceGetDefaultMemPool, cudaDeviceGetDefaultMemPool_sig); -}; -#endif - -#undef RMM_CUDART_API_WRAPPER -} // namespace detail -} // namespace RMM_NAMESPACE diff --git a/include/rmm/detail/runtime_async_alloc.hpp b/include/rmm/detail/runtime_async_alloc.hpp new file mode 100644 index 000000000..6ddb2228b --- /dev/null +++ b/include/rmm/detail/runtime_async_alloc.hpp @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include + +#include + +#include + +#include +#include + +namespace RMM_NAMESPACE { +namespace detail { + +/** + * @brief Determine at runtime if the CUDA driver supports the stream-ordered + * memory allocator functions. + * + * This allows RMM users to compile/link against CUDA 11.2+ and run with + * older drivers. + */ + +struct runtime_async_alloc { + static bool is_supported() + { + static auto driver_supports_pool{[] { + int cuda_pool_supported{}; + auto result = cudaDeviceGetAttribute(&cuda_pool_supported, + cudaDevAttrMemoryPoolsSupported, + rmm::get_current_cuda_device().value()); + return result == cudaSuccess and cuda_pool_supported == 1; + }()}; + return driver_supports_pool; + } + + /** + * @brief Check whether the specified `cudaMemAllocationHandleType` is supported on the present + * CUDA driver/runtime version. + * + * @param handle_type An IPC export handle type to check for support. + * @return true if supported + * @return false if unsupported + */ + static bool is_export_handle_type_supported(cudaMemAllocationHandleType handle_type) + { + int supported_handle_types_bitmask{}; + if (cudaMemHandleTypeNone != handle_type) { + auto const result = cudaDeviceGetAttribute(&supported_handle_types_bitmask, + cudaDevAttrMemoryPoolSupportedHandleTypes, + rmm::get_current_cuda_device().value()); + + // Don't throw on cudaErrorInvalidValue + auto const unsupported_runtime = (result == cudaErrorInvalidValue); + if (unsupported_runtime) return false; + // throw any other error that may have occurred + RMM_CUDA_TRY(result); + } + return (supported_handle_types_bitmask & handle_type) == handle_type; + } +}; + +} // namespace detail +} // namespace RMM_NAMESPACE diff --git a/include/rmm/mr/device/cuda_async_memory_resource.hpp b/include/rmm/mr/device/cuda_async_memory_resource.hpp index 52fd2fe4e..b1fc0b112 100644 --- a/include/rmm/mr/device/cuda_async_memory_resource.hpp +++ b/include/rmm/mr/device/cuda_async_memory_resource.hpp @@ -17,9 +17,9 @@ #include #include -#include #include #include +#include #include #include #include @@ -31,12 +31,6 @@ #include #include -#if CUDART_VERSION >= 11020 // 11.2 introduced cudaMallocAsync -#ifndef RMM_DISABLE_CUDA_MALLOC_ASYNC -#define RMM_CUDA_MALLOC_ASYNC_SUPPORT -#endif -#endif - namespace RMM_NAMESPACE { namespace mr { /** @@ -91,9 +85,8 @@ class cuda_async_memory_resource final : public device_memory_resource { std::optional release_threshold = {}, std::optional export_handle_type = {}) { -#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT // Check if cudaMallocAsync Memory pool supported - RMM_EXPECTS(rmm::detail::async_alloc::is_supported(), + RMM_EXPECTS(rmm::detail::runtime_async_alloc::is_supported(), "cudaMallocAsync not supported with this CUDA driver/runtime version"); // Construct explicit pool @@ -101,12 +94,13 @@ class cuda_async_memory_resource final : public device_memory_resource { pool_props.allocType = cudaMemAllocationTypePinned; pool_props.handleTypes = static_cast( export_handle_type.value_or(allocation_handle_type::none)); - RMM_EXPECTS(rmm::detail::async_alloc::is_export_handle_type_supported(pool_props.handleTypes), - "Requested IPC memory handle type not supported"); + RMM_EXPECTS( + rmm::detail::runtime_async_alloc::is_export_handle_type_supported(pool_props.handleTypes), + "Requested IPC memory handle type not supported"); pool_props.location.type = cudaMemLocationTypeDevice; pool_props.location.id = rmm::get_current_cuda_device().value(); cudaMemPool_t cuda_pool_handle{}; - RMM_CUDA_TRY(rmm::detail::async_alloc::cudaMemPoolCreate(&cuda_pool_handle, &pool_props)); + RMM_CUDA_TRY(cudaMemPoolCreate(&cuda_pool_handle, &pool_props)); pool_ = cuda_async_view_memory_resource{cuda_pool_handle}; // CUDA drivers before 11.5 have known incompatibilities with the async allocator. @@ -117,41 +111,34 @@ class cuda_async_memory_resource final : public device_memory_resource { constexpr auto min_async_version{11050}; if (driver_version < min_async_version) { int disabled{0}; - RMM_CUDA_TRY(rmm::detail::async_alloc::cudaMemPoolSetAttribute( - pool_handle(), cudaMemPoolReuseAllowOpportunistic, &disabled)); + RMM_CUDA_TRY( + cudaMemPoolSetAttribute(pool_handle(), cudaMemPoolReuseAllowOpportunistic, &disabled)); } auto const [free, total] = rmm::available_device_memory(); // Need an l-value to take address to pass to cudaMemPoolSetAttribute uint64_t threshold = release_threshold.value_or(total); - RMM_CUDA_TRY(rmm::detail::async_alloc::cudaMemPoolSetAttribute( - pool_handle(), cudaMemPoolAttrReleaseThreshold, &threshold)); + RMM_CUDA_TRY( + cudaMemPoolSetAttribute(pool_handle(), cudaMemPoolAttrReleaseThreshold, &threshold)); // Allocate and immediately deallocate the initial_pool_size to prime the pool with the // specified size auto const pool_size = initial_pool_size.value_or(free / 2); auto* ptr = do_allocate(pool_size, cuda_stream_default); do_deallocate(ptr, pool_size, cuda_stream_default); -#else - RMM_FAIL( - "cudaMallocAsync not supported by the version of the CUDA Toolkit used for this build"); -#endif } -#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT /** * @brief Returns the underlying native handle to the CUDA pool * + * @return cudaMemPool_t Handle to the underlying CUDA pool */ [[nodiscard]] cudaMemPool_t pool_handle() const noexcept { return pool_.pool_handle(); } -#endif ~cuda_async_memory_resource() override { -#if defined(RMM_CUDA_MALLOC_ASYNC_SUPPORT) - RMM_ASSERT_CUDA_SUCCESS(rmm::detail::async_alloc::cudaMemPoolDestroy(pool_handle())); -#endif + RMM_ASSERT_CUDA_SUCCESS(cudaMemPoolDestroy(pool_handle())); } cuda_async_memory_resource(cuda_async_memory_resource const&) = delete; cuda_async_memory_resource(cuda_async_memory_resource&&) = delete; @@ -159,9 +146,7 @@ class cuda_async_memory_resource final : public device_memory_resource { cuda_async_memory_resource& operator=(cuda_async_memory_resource&&) = delete; private: -#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT cuda_async_view_memory_resource pool_{}; -#endif /** * @brief Allocates memory of size at least \p bytes. @@ -175,12 +160,7 @@ class cuda_async_memory_resource final : public device_memory_resource { void* do_allocate(std::size_t bytes, rmm::cuda_stream_view stream) override { void* ptr{nullptr}; -#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT ptr = pool_.allocate(bytes, stream); -#else - (void)bytes; - (void)stream; -#endif return ptr; } @@ -194,13 +174,7 @@ class cuda_async_memory_resource final : public device_memory_resource { */ void do_deallocate(void* ptr, std::size_t bytes, rmm::cuda_stream_view stream) override { -#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT pool_.deallocate(ptr, bytes, stream); -#else - (void)ptr; - (void)bytes; - (void)stream; -#endif } /** @@ -213,11 +187,7 @@ class cuda_async_memory_resource final : public device_memory_resource { [[nodiscard]] bool do_is_equal(device_memory_resource const& other) const noexcept override { auto const* async_mr = dynamic_cast(&other); -#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT return (async_mr != nullptr) && (this->pool_handle() == async_mr->pool_handle()); -#else - return async_mr != nullptr; -#endif } }; diff --git a/include/rmm/mr/device/cuda_async_view_memory_resource.hpp b/include/rmm/mr/device/cuda_async_view_memory_resource.hpp index 3e1900e72..180c412ee 100644 --- a/include/rmm/mr/device/cuda_async_view_memory_resource.hpp +++ b/include/rmm/mr/device/cuda_async_view_memory_resource.hpp @@ -17,7 +17,6 @@ #include #include -#include #include #include #include @@ -28,10 +27,6 @@ #include #include -#if CUDART_VERSION >= 11020 // 11.2 introduced cudaMallocAsync -#define RMM_CUDA_MALLOC_ASYNC_SUPPORT -#endif - namespace RMM_NAMESPACE { namespace mr { /** @@ -46,13 +41,12 @@ namespace mr { */ class cuda_async_view_memory_resource final : public device_memory_resource { public: -#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT /** * @brief Constructs a cuda_async_view_memory_resource which uses an existing CUDA memory pool. * The provided pool is not owned by cuda_async_view_memory_resource and must remain valid * during the lifetime of the memory resource. * - * @throws rmm::runtime_error if the CUDA version does not support `cudaMallocAsync` + * @throws rmm::logic_error if the CUDA version does not support `cudaMallocAsync` * * @param valid_pool_handle Handle to a CUDA memory pool which will be used to * serve allocation requests. @@ -71,15 +65,13 @@ class cuda_async_view_memory_resource final : public device_memory_resource { RMM_EXPECTS(result == cudaSuccess && cuda_pool_supported, "cudaMallocAsync not supported with this CUDA driver/runtime version"); } -#endif -#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT /** * @brief Returns the underlying native handle to the CUDA pool * + * @return cudaMemPool_t Handle to the underlying CUDA pool */ [[nodiscard]] cudaMemPool_t pool_handle() const noexcept { return cuda_pool_handle_; } -#endif cuda_async_view_memory_resource() = default; cuda_async_view_memory_resource(cuda_async_view_memory_resource const&) = @@ -92,9 +84,7 @@ class cuda_async_view_memory_resource final : public device_memory_resource { default; ///< @default_move_assignment{cuda_async_view_memory_resource} private: -#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT cudaMemPool_t cuda_pool_handle_{}; -#endif /** * @brief Allocates memory of size at least \p bytes. @@ -108,15 +98,9 @@ class cuda_async_view_memory_resource final : public device_memory_resource { void* do_allocate(std::size_t bytes, rmm::cuda_stream_view stream) override { void* ptr{nullptr}; -#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT if (bytes > 0) { - RMM_CUDA_TRY_ALLOC(rmm::detail::async_alloc::cudaMallocFromPoolAsync( - &ptr, bytes, pool_handle(), stream.value())); + RMM_CUDA_TRY_ALLOC(cudaMallocFromPoolAsync(&ptr, bytes, pool_handle(), stream.value())); } -#else - (void)bytes; - (void)stream; -#endif return ptr; } @@ -132,15 +116,7 @@ class cuda_async_view_memory_resource final : public device_memory_resource { [[maybe_unused]] std::size_t bytes, rmm::cuda_stream_view stream) override { -#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT - if (ptr != nullptr) { - RMM_ASSERT_CUDA_SUCCESS(rmm::detail::async_alloc::cudaFreeAsync(ptr, stream.value())); - } -#else - (void)ptr; - (void)bytes; - (void)stream; -#endif + if (ptr != nullptr) { RMM_ASSERT_CUDA_SUCCESS(cudaFreeAsync(ptr, stream.value())); } } /** diff --git a/python/rmm/rmm/tests/test_rmm.py b/python/rmm/rmm/tests/test_rmm.py index b52ea0179..d7d692287 100644 --- a/python/rmm/rmm/tests/test_rmm.py +++ b/python/rmm/rmm/tests/test_rmm.py @@ -32,12 +32,6 @@ cuda.set_memory_manager(RMMNumbaManager) -_driver_version = rmm._cuda.gpu.driverGetVersion() -_runtime_version = rmm._cuda.gpu.runtimeGetVersion() -_CUDAMALLOC_ASYNC_SUPPORTED = (_driver_version >= 11020) and ( - _runtime_version >= 11020 -) - _SYSTEM_MEMORY_SUPPORTED = rmm._cuda.gpu.getDeviceAttribute( cudart.cudaDeviceAttr.cudaDevAttrPageableMemoryAccess, rmm._cuda.gpu.getDevice(), @@ -657,10 +651,6 @@ def test_mr_upstream_lifetime(): del pool_mr -@pytest.mark.skipif( - not _CUDAMALLOC_ASYNC_SUPPORTED, - reason="cudaMallocAsync not supported", -) @pytest.mark.parametrize("dtype", _dtypes) @pytest.mark.parametrize("nelem", _nelems) @pytest.mark.parametrize("alloc", _allocs) @@ -671,15 +661,11 @@ def test_cuda_async_memory_resource(dtype, nelem, alloc): array_tester(dtype, nelem, alloc) -@pytest.mark.skipif( - not _CUDAMALLOC_ASYNC_SUPPORTED, - reason="cudaMallocAsync not supported", -) def test_cuda_async_memory_resource_ipc(): # TODO: We don't have a great way to check if IPC is supported in Python, # without using the C++ function - # rmm::detail::async_alloc::is_export_handle_type_supported. We can't - # accurately test driver and runtime versions for this via Python because + # rmm::detail::runtime_async_alloc::is_export_handle_type_supported. + # We can't accurately test this via Python because # cuda-python always has the IPC handle enum defined (which normally # requires a CUDA 11.3 runtime) and the cuda-compat package in Docker # containers prevents us from assuming that the driver we see actually @@ -702,10 +688,6 @@ def test_cuda_async_memory_resource_ipc(): assert rmm.mr.get_current_device_resource_type() is type(mr) -@pytest.mark.skipif( - not _CUDAMALLOC_ASYNC_SUPPORTED, - reason="cudaMallocAsync not supported", -) @pytest.mark.parametrize("nelems", _nelems) def test_cuda_async_memory_resource_stream(nelems): # test that using CudaAsyncMemoryResource @@ -719,10 +701,6 @@ def test_cuda_async_memory_resource_stream(nelems): np.testing.assert_equal(expected, result) -@pytest.mark.skipif( - not _CUDAMALLOC_ASYNC_SUPPORTED, - reason="cudaMallocAsync not supported", -) @pytest.mark.parametrize("nelem", _nelems) @pytest.mark.parametrize("alloc", _allocs) def test_cuda_async_memory_resource_threshold(nelem, alloc): @@ -739,13 +717,7 @@ def test_cuda_async_memory_resource_threshold(nelem, alloc): "mr", [ rmm.mr.CudaMemoryResource, - pytest.param( - rmm.mr.CudaAsyncMemoryResource, - marks=pytest.mark.skipif( - not _CUDAMALLOC_ASYNC_SUPPORTED, - reason="cudaMallocAsync not supported", - ), - ), + pytest.param(rmm.mr.CudaAsyncMemoryResource), ], ) def test_limiting_resource_adaptor(mr): diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index a482c8cc1..476028af0 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -84,7 +84,7 @@ endfunction() function(ConfigureTest TEST_NAME) set(options) - set(one_value GPUS PERCENT) + set(one_value CUDART GPUS PERCENT) set(multi_value) cmake_parse_arguments(_RMM_TEST "${options}" "${one_value}" "${multi_value}" ${ARGN}) if(NOT DEFINED _RMM_TEST_GPUS AND NOT DEFINED _RMM_TEST_PERCENT) @@ -98,13 +98,23 @@ function(ConfigureTest TEST_NAME) set(_RMM_TEST_PERCENT 100) endif() + if(_RMM_TEST_CUDART STREQUAL SHARED) + set(cudart_link_libs $ CUDA::cudart) + elseif(_RMM_TEST_CUDART STREQUAL STATIC) + set(cudart_link_libs $ CUDA::cudart_static) + else() + set(cudart_link_libs rmm) + endif() + # Test with legacy default stream. ConfigureTestInternal(${TEST_NAME} ${_RMM_TEST_UNPARSED_ARGUMENTS}) + target_link_libraries(${TEST_NAME} ${cudart_link_libs}) # Test with per-thread default stream. string(REGEX REPLACE "_TEST$" "_PTDS_TEST" PTDS_TEST_NAME "${TEST_NAME}") ConfigureTestInternal("${PTDS_TEST_NAME}" ${_RMM_TEST_UNPARSED_ARGUMENTS}) target_compile_definitions("${PTDS_TEST_NAME}" PUBLIC CUDA_API_PER_THREAD_DEFAULT_STREAM) + target_link_libraries(${PTDS_TEST_NAME} ${cudart_link_libs}) foreach(name ${TEST_NAME} ${PTDS_TEST_NAME} ${NS_TEST_NAME}) rapids_test_add( @@ -130,7 +140,10 @@ ConfigureTest(ADAPTOR_TEST mr/device/adaptor_tests.cpp) ConfigureTest(POOL_MR_TEST mr/device/pool_mr_tests.cpp GPUS 1 PERCENT 100) # cuda_async mr tests -ConfigureTest(CUDA_ASYNC_MR_TEST mr/device/cuda_async_mr_tests.cpp GPUS 1 PERCENT 60) +ConfigureTest(CUDA_ASYNC_MR_STATIC_CUDART_TEST mr/device/cuda_async_mr_tests.cpp GPUS 1 PERCENT 60 + CUDART STATIC) +ConfigureTest(CUDA_ASYNC_MR_SHARED_CUDART_TEST mr/device/cuda_async_mr_tests.cpp GPUS 1 PERCENT 60 + CUDART SHARED) # thrust allocator tests ConfigureTest(THRUST_ALLOCATOR_TEST mr/device/thrust_allocator_tests.cu GPUS 1 PERCENT 60) diff --git a/tests/mr/device/cuda_async_mr_tests.cpp b/tests/mr/device/cuda_async_mr_tests.cpp index 90c7b0ff9..a39188548 100644 --- a/tests/mr/device/cuda_async_mr_tests.cpp +++ b/tests/mr/device/cuda_async_mr_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -31,24 +31,13 @@ class AsyncMRTest : public ::testing::Test { protected: void SetUp() override { - if (!rmm::detail::async_alloc::is_supported()) { + if (!rmm::detail::runtime_async_alloc::is_supported()) { GTEST_SKIP() << "Skipping tests since cudaMallocAsync not supported with this CUDA " << "driver/runtime version"; } } }; -TEST_F(AsyncMRTest, ThrowIfNotSupported) -{ - auto construct_mr = []() { cuda_async_mr mr; }; -#ifndef RMM_CUDA_MALLOC_ASYNC_SUPPORT - EXPECT_THROW(construct_mr(), rmm::logic_error); -#else - EXPECT_NO_THROW(construct_mr()); -#endif -} - -#if defined(RMM_CUDA_MALLOC_ASYNC_SUPPORT) TEST_F(AsyncMRTest, ExplicitInitialPoolSize) { const auto pool_init_size{100}; @@ -77,7 +66,5 @@ TEST_F(AsyncMRTest, DifferentPoolsUnequal) EXPECT_FALSE(mr1.is_equal(mr2)); } -#endif - } // namespace } // namespace rmm::test diff --git a/tests/mr/device/cuda_async_view_mr_tests.cpp b/tests/mr/device/cuda_async_view_mr_tests.cpp index fe82431a9..f3a02cbf0 100644 --- a/tests/mr/device/cuda_async_view_mr_tests.cpp +++ b/tests/mr/device/cuda_async_view_mr_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,13 +29,10 @@ using cuda_async_view_mr = rmm::mr::cuda_async_view_memory_resource; static_assert(cuda::mr::resource_with); static_assert(cuda::mr::async_resource_with); -#if defined(RMM_CUDA_MALLOC_ASYNC_SUPPORT) - TEST(PoolTest, UsePool) { cudaMemPool_t memPool{}; - RMM_CUDA_TRY(rmm::detail::async_alloc::cudaDeviceGetDefaultMemPool( - &memPool, rmm::get_current_cuda_device().value())); + RMM_CUDA_TRY(cudaDeviceGetDefaultMemPool(&memPool, rmm::get_current_cuda_device().value())); const auto pool_init_size{100}; cuda_async_view_mr mr{memPool}; @@ -53,7 +50,7 @@ TEST(PoolTest, NotTakingOwnershipOfPool) cudaMemPool_t memPool{}; - RMM_CUDA_TRY(rmm::detail::async_alloc::cudaMemPoolCreate(&memPool, &poolProps)); + RMM_CUDA_TRY(cudaMemPoolCreate(&memPool, &poolProps)); { const auto pool_init_size{100}; @@ -64,7 +61,7 @@ TEST(PoolTest, NotTakingOwnershipOfPool) } auto destroy_valid_pool = [&]() { - auto result = rmm::detail::async_alloc::cudaMemPoolDestroy(memPool); + auto result = cudaMemPoolDestroy(memPool); RMM_EXPECTS(result == cudaSuccess, "Pool wrapper did destroy pool"); }; @@ -81,7 +78,5 @@ TEST(PoolTest, ThrowIfNullptrPool) EXPECT_THROW(construct_mr(), rmm::logic_error); } -#endif - } // namespace } // namespace rmm::test diff --git a/tests/mr/device/mr_ref_multithreaded_tests.cpp b/tests/mr/device/mr_ref_multithreaded_tests.cpp index 944ba1807..9e7c8c2e8 100644 --- a/tests/mr/device/mr_ref_multithreaded_tests.cpp +++ b/tests/mr/device/mr_ref_multithreaded_tests.cpp @@ -36,17 +36,11 @@ namespace { struct mr_ref_test_mt : public mr_ref_test {}; -INSTANTIATE_TEST_CASE_P(MultiThreadResourceTests, - mr_ref_test_mt, - ::testing::Values("CUDA", -#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT - "CUDA_Async", -#endif - "Managed", - "Pool", - "Arena", - "Binning"), - [](auto const& info) { return info.param; }); +INSTANTIATE_TEST_CASE_P( + MultiThreadResourceTests, + mr_ref_test_mt, + ::testing::Values("CUDA", "CUDA_Async", "Managed", "Pool", "Arena", "Binning"), + [](auto const& info) { return info.param; }); template void spawn_n(std::size_t num_threads, Task task, Arguments&&... args) diff --git a/tests/mr/device/mr_ref_test.hpp b/tests/mr/device/mr_ref_test.hpp index 6e63b3838..2af0eff44 100644 --- a/tests/mr/device/mr_ref_test.hpp +++ b/tests/mr/device/mr_ref_test.hpp @@ -347,7 +347,7 @@ inline auto make_host_pinned() { return std::make_shared(); } return std::shared_ptr{nullptr}; diff --git a/tests/mr/device/mr_ref_tests.cpp b/tests/mr/device/mr_ref_tests.cpp index 55e91d765..41af050a0 100644 --- a/tests/mr/device/mr_ref_tests.cpp +++ b/tests/mr/device/mr_ref_tests.cpp @@ -30,9 +30,7 @@ namespace { INSTANTIATE_TEST_SUITE_P(ResourceTests, mr_ref_test, ::testing::Values("CUDA", -#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT "CUDA_Async", -#endif "Managed", "System", "Pool", @@ -46,9 +44,7 @@ INSTANTIATE_TEST_SUITE_P(ResourceTests, INSTANTIATE_TEST_SUITE_P(ResourceAllocationTests, mr_ref_allocation_test, ::testing::Values("CUDA", -#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT "CUDA_Async", -#endif "Managed", "System" "Pool", diff --git a/tests/mr/device/thrust_allocator_tests.cu b/tests/mr/device/thrust_allocator_tests.cu index 84f599957..46447aa09 100644 --- a/tests/mr/device/thrust_allocator_tests.cu +++ b/tests/mr/device/thrust_allocator_tests.cu @@ -69,17 +69,11 @@ TEST_P(allocator_test, multi_device) }()); } -INSTANTIATE_TEST_CASE_P(ThrustAllocatorTests, - allocator_test, - ::testing::Values("CUDA", -#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT - "CUDA_Async", -#endif - "Managed", - "Pool", - "Arena", - "Binning"), - [](auto const& info) { return info.param; }); +INSTANTIATE_TEST_CASE_P( + ThrustAllocatorTests, + allocator_test, + ::testing::Values("CUDA", "CUDA_Async", "Managed", "Pool", "Arena", "Binning"), + [](auto const& info) { return info.param; }); } // namespace } // namespace rmm::test diff --git a/tests/prefetch_tests.cpp b/tests/prefetch_tests.cpp index 6c7bb2dd3..4a2c41a2b 100644 --- a/tests/prefetch_tests.cpp +++ b/tests/prefetch_tests.cpp @@ -53,8 +53,8 @@ struct PrefetchTest : public ::testing::Test { // https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__MEMORY.html#group__CUDART__MEMORY_1g8048f6ea5ad77917444567656c140c5a // specifically for when cudaMemRangeAttribute::cudaMemRangeAttributeLastPrefetchLocation is // used. - constexpr size_t prefetch_data_size = 4; if constexpr (std::is_same_v) { + constexpr size_t prefetch_data_size = 4; int prefetch_location{0}; RMM_CUDA_TRY( cudaMemRangeGetAttribute(&prefetch_location, From 3b5f6af2eaa0519643ccc2a4c1395307bfd3ad7e Mon Sep 17 00:00:00 2001 From: Mark Harris <783069+harrism@users.noreply.github.com> Date: Wed, 20 Nov 2024 12:22:49 +1100 Subject: [PATCH 4/9] Query total memory in failure_callback_resource_adaptor tests (#1734) Fixes #1733 by querying total device memory and using twice as much in tests that are expected to fail allocation. Authors: - Mark Harris (https://github.com/harrism) Approvers: - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/rmm/pull/1734 --- python/rmm/rmm/tests/test_rmm.py | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/python/rmm/rmm/tests/test_rmm.py b/python/rmm/rmm/tests/test_rmm.py index d7d692287..182434dc5 100644 --- a/python/rmm/rmm/tests/test_rmm.py +++ b/python/rmm/rmm/tests/test_rmm.py @@ -795,10 +795,28 @@ def callback(nbytes: int) -> bool: rmm.mr.set_current_device_resource(mr) with pytest.raises(MemoryError): - rmm.DeviceBuffer(size=int(1e11)) + from rmm.mr import available_device_memory + + total_memory = available_device_memory()[1] + rmm.DeviceBuffer(size=total_memory * 2) assert retried[0] +def test_failure_callback_resource_adaptor_error(): + def callback(nbytes: int) -> bool: + raise RuntimeError("MyError") + + cuda_mr = rmm.mr.CudaMemoryResource() + mr = rmm.mr.FailureCallbackResourceAdaptor(cuda_mr, callback) + rmm.mr.set_current_device_resource(mr) + + with pytest.raises(RuntimeError, match="MyError"): + from rmm.mr import available_device_memory + + total_memory = available_device_memory()[1] + rmm.DeviceBuffer(size=total_memory * 2) + + @pytest.mark.parametrize("managed", [True, False]) def test_prefetch_resource_adaptor(managed): if managed: @@ -823,18 +841,6 @@ def test_prefetch_resource_adaptor(managed): assert_prefetched(db, device) -def test_failure_callback_resource_adaptor_error(): - def callback(nbytes: int) -> bool: - raise RuntimeError("MyError") - - cuda_mr = rmm.mr.CudaMemoryResource() - mr = rmm.mr.FailureCallbackResourceAdaptor(cuda_mr, callback) - rmm.mr.set_current_device_resource(mr) - - with pytest.raises(RuntimeError, match="MyError"): - rmm.DeviceBuffer(size=int(1e11)) - - def test_dev_buf_circle_ref_dealloc(): # This test creates a reference cycle containing a `DeviceBuffer` # and ensures that the garbage collector does not clear it, i.e., From 4cfa31fae4cac9294f34f636d37414b724fdbd14 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 26 Nov 2024 12:23:40 -0800 Subject: [PATCH 5/9] Create logger wrapper around spdlog that can be easily reused in other libraries (#1722) This PR defines a new way to produce a logger wrapping spdlog. The logger's interface is declared in a template header file that can be processed by CMake to produce an interface that may be customized for placement into any project. The new implementation uses the PImpl idiom to isolate the spdlog (and transitively, fmt) dependency from the public API of the logger. The implementation is defined in an impl header. A corresponding source template file is provided that simply includes this header. All of these files are wrapped in some CMake logic for producing a custom target for a given project. rmm leverages this new logger by requesting the creation of a logger target and a corresponding implementation. This is a breaking change because consumers of rmm will need to link the new `rmm_logger_impl` target into their own libraries to get logging support. Once this gets merged, the plan is to move this implementation out of rmm into its own repository. At that point, the logger may also be used to completely replace logger implementations in cudf, raft, and cuml (as well as any other RAPIDS libraries that are aiming to provide their own logging implementation). Once everything in RAPIDS is migrated to using the new logger, we will update the way that it uses spdlog to completely hide all spdlog symbols, which solves a half dozen different problems for us when it comes to packaging (symbol collision issues, ABI compatibility, conda environment conflicts, bundling of headers into conda packages, etc). Resolves #1709 Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Robert Maynard (https://github.com/robertmaynard) - Mark Harris (https://github.com/harrism) - Jake Awe (https://github.com/AyodeAwe) URL: https://github.com/rapidsai/rmm/pull/1722 --- CMakeLists.txt | 6 +- README.md | 4 +- benchmarks/CMakeLists.txt | 11 +- benchmarks/replay/replay.cpp | 4 +- ci/check_symbols.sh | 23 +- cmake/thirdparty/get_spdlog.cmake | 3 +- include/rmm/detail/format.hpp | 48 -- include/rmm/detail/logging_assert.hpp | 2 +- include/rmm/logger.hpp | 117 ---- .../rmm/mr/device/arena_memory_resource.hpp | 26 +- include/rmm/mr/device/detail/arena.hpp | 28 +- .../detail/stream_ordered_memory_resource.hpp | 2 +- .../mr/device/logging_resource_adaptor.hpp | 64 +-- .../mr/device/tracking_resource_adaptor.hpp | 4 +- python/rmm/CMakeLists.txt | 6 +- python/rmm/docs/conf.py | 2 + python/rmm/rmm/__init__.py | 4 +- python/rmm/rmm/_cuda/CMakeLists.txt | 2 +- python/rmm/rmm/librmm/CMakeLists.txt | 2 +- python/rmm/rmm/librmm/_logger.pxd | 71 +-- python/rmm/rmm/librmm/_logger.pyx | 2 - python/rmm/rmm/pylibrmm/CMakeLists.txt | 2 +- python/rmm/rmm/pylibrmm/logger.pyx | 24 +- python/rmm/rmm/tests/test_rmm.py | 12 +- rapids_logger/CMakeLists.txt | 177 ++++++ rapids_logger/README.md | 22 + rapids_logger/logger.cpp.in | 17 + rapids_logger/logger.hpp.in | 504 ++++++++++++++++++ rapids_logger/logger_impl.hpp.in | 205 +++++++ tests/CMakeLists.txt | 19 +- tests/logger_tests.cpp | 4 +- tests/mr/device/tracking_mr_tests.cpp | 15 +- 32 files changed, 1068 insertions(+), 364 deletions(-) delete mode 100644 include/rmm/logger.hpp create mode 100644 rapids_logger/CMakeLists.txt create mode 100644 rapids_logger/README.md create mode 100644 rapids_logger/logger.cpp.in create mode 100644 rapids_logger/logger.hpp.in create mode 100644 rapids_logger/logger_impl.hpp.in diff --git a/CMakeLists.txt b/CMakeLists.txt index 07bd368ee..7a7ba3525 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -73,7 +73,9 @@ rapids_find_package( # add third party dependencies using CPM rapids_cpm_init() -include(cmake/thirdparty/get_spdlog.cmake) +add_subdirectory(rapids_logger) +rapids_make_logger(rmm EXPORT_SET rmm-exports) + include(cmake/thirdparty/get_cccl.cmake) include(cmake/thirdparty/get_nvtx.cmake) @@ -93,8 +95,8 @@ else() target_link_libraries(rmm INTERFACE CUDA::cudart) endif() +target_link_libraries(rmm INTERFACE rmm_logger) target_link_libraries(rmm INTERFACE CCCL::CCCL) -target_link_libraries(rmm INTERFACE spdlog::spdlog_header_only) target_link_libraries(rmm INTERFACE dl) target_link_libraries(rmm INTERFACE nvtx3::nvtx3-cpp) target_compile_features(rmm INTERFACE cxx_std_17 $) diff --git a/README.md b/README.md index 0a25cd348..d72d411f5 100644 --- a/README.md +++ b/README.md @@ -658,8 +658,8 @@ of more detailed logging. The default is `INFO`. Available levels are `TRACE`, ` The log relies on the [spdlog](https://github.com/gabime/spdlog.git) library. Note that to see logging below the `INFO` level, the application must also set the logging level at -run time. C++ applications must must call `rmm::logger().set_level()`, for example to enable all -levels of logging down to `TRACE`, call `rmm::logger().set_level(spdlog::level::trace)` (and compile +run time. C++ applications must must call `rmm::default_logger().set_level()`, for example to enable all +levels of logging down to `TRACE`, call `rmm::default_logger().set_level(spdlog::level::trace)` (and compile librmm with `-DRMM_LOGGING_LEVEL=TRACE`). Python applications must call `rmm.set_logging_level()`, for example to enable all levels of logging down to `TRACE`, call `rmm.set_logging_level("trace")` (and compile the RMM Python module with `-DRMM_LOGGING_LEVEL=TRACE`). diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index 0487a2dfa..b3b60cfcb 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -34,9 +34,9 @@ function(ConfigureBench BENCH_NAME) RUNTIME_OUTPUT_DIRECTORY "$" CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES}" INSTALL_RPATH "\$ORIGIN/../../../lib") - target_link_libraries(${BENCH_NAME} benchmark::benchmark pthread rmm) + target_link_libraries(${BENCH_NAME} PRIVATE benchmark::benchmark pthread rmm) target_compile_definitions(${BENCH_NAME} - PUBLIC "SPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_${RMM_LOGGING_LEVEL}") + PUBLIC "RMM_LOG_ACTIVE_LEVEL=RMM_LOG_LEVEL_${RMM_LOGGING_LEVEL}") if(PER_THREAD_DEFAULT_STREAM) target_compile_definitions(${BENCH_NAME} PUBLIC CUDA_API_PER_THREAD_DEFAULT_STREAM) @@ -44,6 +44,9 @@ function(ConfigureBench BENCH_NAME) target_compile_options(${BENCH_NAME} PUBLIC $<$:-Wall -Werror -Wno-unknown-pragmas>) + + target_link_libraries(${BENCH_NAME} PRIVATE rmm_bench_logger) + if(DISABLE_DEPRECATION_WARNING) target_compile_options( ${BENCH_NAME} PUBLIC $<$:-Xcompiler=-Wno-deprecated-declarations>) @@ -58,6 +61,10 @@ function(ConfigureBench BENCH_NAME) EXCLUDE_FROM_ALL) endfunction(ConfigureBench) +# Create an object library for the logger so that we don't have to recompile it. +add_library(rmm_bench_logger OBJECT) +target_link_libraries(rmm_bench_logger PRIVATE rmm_logger_impl) + # random allocations benchmark ConfigureBench(RANDOM_ALLOCATIONS_BENCH random_allocations/random_allocations.cpp) diff --git a/benchmarks/replay/replay.cpp b/benchmarks/replay/replay.cpp index 7f45b7691..8edbf11f9 100644 --- a/benchmarks/replay/replay.cpp +++ b/benchmarks/replay/replay.cpp @@ -33,10 +33,10 @@ #include #include #include -#include #include #include +#include #include #include #include @@ -403,7 +403,7 @@ int main(int argc, char** argv) auto const num_threads = per_thread_events.size(); // Uncomment to enable / change default log level - // rmm::detail::logger().set_level(spdlog::level::trace); + // rmm::logger().set_level(rmm::level_enum::trace); if (args.count("resource") > 0) { std::string mr_name = args["resource"].as(); diff --git a/ci/check_symbols.sh b/ci/check_symbols.sh index 1d73a082b..155e509da 100755 --- a/ci/check_symbols.sh +++ b/ci/check_symbols.sh @@ -47,32 +47,13 @@ for dso_file in ${dso_files}; do echo " * WEAK: $(grep --count -E ' WEAK ' < ${symbol_file})" echo " * LOCAL: $(grep --count -E ' LOCAL ' < ${symbol_file})" - # Explanation for '-v' uses here: - # - # * 'format_error' symbols are intentionally exported, that type of error - # can be thrown across library boundaries. See "Problems with C++ exceptions" - # at https://gcc.gnu.org/wiki/Visibility. echo "checking for 'fmt::' symbols..." - if grep -E 'fmt\:\:' < "${symbol_file}" \ - | grep -v 'format_error' - then + if grep -E 'fmt\:\:' < "${symbol_file}"; then raise-symbols-found-error 'fmt::' fi - # Explanation for '-v' uses here: - # - # * trivially-destructible objects sometimes get an entry in the symbol table - # for a specialization of `std::_Destroy_aux()` called to destroy them. - # There is one for `spdlog::details::log_msg_buffer like that: - # - # 'std::_Destroy_aux::__destroy' - # - # That should be safe to export. - # echo "checking for 'spdlog::' symbols..." - if grep -E 'spdlog\:\:' < "${symbol_file}" \ - | grep -v 'std\:\:_Destroy_aux' - then + if grep -E 'spdlog\:\:' < "${symbol_file}"; then raise-symbols-found-error 'spdlog::' fi echo "No symbol visibility issues found" diff --git a/cmake/thirdparty/get_spdlog.cmake b/cmake/thirdparty/get_spdlog.cmake index 7f80b3726..212f604c3 100644 --- a/cmake/thirdparty/get_spdlog.cmake +++ b/cmake/thirdparty/get_spdlog.cmake @@ -12,7 +12,7 @@ # the License. # ============================================================================= -# Use CPM to find or clone speedlog +# Use CPM to find or clone speedlog. function(find_and_configure_spdlog) include(${rapids-cmake-dir}/cpm/spdlog.cmake) @@ -20,7 +20,6 @@ function(find_and_configure_spdlog) FMT_OPTION "EXTERNAL_FMT_HO" INSTALL_EXPORT_SET rmm-exports BUILD_EXPORT_SET rmm-exports) - endfunction() find_and_configure_spdlog() diff --git a/include/rmm/detail/format.hpp b/include/rmm/detail/format.hpp index 21acac032..6cd1dd9d2 100644 --- a/include/rmm/detail/format.hpp +++ b/include/rmm/detail/format.hpp @@ -20,60 +20,12 @@ #include #include -#include -#include -#include #include -#include #include namespace RMM_NAMESPACE { namespace detail { -/** - * @brief Format a message string with printf-style formatting - * - * This function performs printf-style formatting to avoid the need for fmt - * or spdlog's own templated APIs (which would require exposing spdlog - * symbols publicly) and returns the formatted message as a `std::string`. - * - * @param format The format string - * @param args The format arguments - * @return The formatted message - * @throw rmm::logic_error if an error occurs during formatting - */ -template -std::string formatted_log(std::string const& format, Args&&... args) -{ - auto convert_to_c_string = [](auto&& arg) -> decltype(auto) { - using ArgType = std::decay_t; - if constexpr (std::is_same_v) { - return arg.c_str(); - } else { - return std::forward(arg); - } - }; - - // NOLINTBEGIN(cppcoreguidelines-pro-type-vararg) - auto retsize = - std::snprintf(nullptr, 0, format.c_str(), convert_to_c_string(std::forward(args))...); - RMM_EXPECTS(retsize >= 0, "Error during formatting."); - if (retsize == 0) { return {}; } - auto size = static_cast(retsize) + 1; // for null terminator - // NOLINTNEXTLINE(modernize-avoid-c-arrays, cppcoreguidelines-avoid-c-arrays) - std::unique_ptr buf(new char[size]); - std::snprintf(buf.get(), size, format.c_str(), convert_to_c_string(std::forward(args))...); - // NOLINTEND(cppcoreguidelines-pro-type-vararg) - return {buf.get(), buf.get() + size - 1}; // drop '\0' -} - -// specialization for no arguments -template <> -inline std::string formatted_log(std::string const& format) -{ - return format; -} - // Stringify a size in bytes to a human-readable value inline std::string format_bytes(std::size_t value) { diff --git a/include/rmm/detail/logging_assert.hpp b/include/rmm/detail/logging_assert.hpp index 4d702ee2b..c3b12ffe3 100644 --- a/include/rmm/detail/logging_assert.hpp +++ b/include/rmm/detail/logging_assert.hpp @@ -31,7 +31,7 @@ */ #ifdef NDEBUG #define RMM_LOGGING_ASSERT(_expr) (void)0 -#elif SPDLOG_ACTIVE_LEVEL < SPDLOG_LEVEL_OFF +#elif RMM_LOG_ACTIVE_LEVEL < RMM_LOG_LEVEL_OFF #define RMM_LOGGING_ASSERT(_expr) \ do { \ bool const success = (_expr); \ diff --git a/include/rmm/logger.hpp b/include/rmm/logger.hpp deleted file mode 100644 index 2cfd921b1..000000000 --- a/include/rmm/logger.hpp +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Copyright (c) 2020-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include - -#include -#include - -#include - -namespace RMM_NAMESPACE { - -namespace detail { - -/** - * @brief Returns the default log filename for the RMM global logger. - * - * If the environment variable `RMM_DEBUG_LOG_FILE` is defined, its value is used as the path and - * name of the log file. Otherwise, the file `rmm_log.txt` in the current working directory is used. - * - * @return std::string The default log file name. - */ -inline std::string default_log_filename() -{ - auto* filename = std::getenv("RMM_DEBUG_LOG_FILE"); - return (filename == nullptr) ? std::string{"rmm_log.txt"} : std::string{filename}; -} - -/** - * @brief Simple wrapper around a spdlog::logger that performs RMM-specific initialization - */ -struct logger_wrapper { - spdlog::logger logger_; ///< The underlying logger - - logger_wrapper() - : logger_{"RMM", - std::make_shared( - default_log_filename(), true // truncate file - )} - { - logger_.set_pattern("[%6t][%H:%M:%S:%f][%-6l] %v"); - logger_.flush_on(spdlog::level::warn); -#if SPDLOG_ACTIVE_LEVEL <= SPDLOG_LEVEL_INFO -#ifdef CUDA_API_PER_THREAD_DEFAULT_STREAM - logger_.info("----- RMM LOG BEGIN [PTDS ENABLED] -----"); -#else - logger_.info("----- RMM LOG BEGIN [PTDS DISABLED] -----"); -#endif - logger_.flush(); -#endif - } -}; - -inline spdlog::logger& logger() -{ - static detail::logger_wrapper wrapped{}; - return wrapped.logger_; -} -} // namespace detail - -/** - * @brief Returns the global RMM logger - * - * @ingroup logging - * - * This is a spdlog logger. The easiest way to log messages is to use the `RMM_LOG_*` macros. - * - * @return spdlog::logger& The logger. - */ -[[deprecated( - "Support for direct access to spdlog loggers in rmm is planned for " - "removal")]] RMM_EXPORT inline spdlog::logger& -logger() -{ - return detail::logger(); -} - -//! @cond Doxygen_Suppress -// -// The default is INFO, but it should be used sparingly, so that by default a log file is only -// output if there is important information, warnings, errors, and critical failures -// Log messages that require computation should only be used at level TRACE and DEBUG -#define RMM_LOG_TRACE(...) \ - SPDLOG_LOGGER_TRACE(&rmm::detail::logger(), rmm::detail::formatted_log(__VA_ARGS__)) -#define RMM_LOG_DEBUG(...) \ - SPDLOG_LOGGER_DEBUG(&rmm::detail::logger(), rmm::detail::formatted_log(__VA_ARGS__)) -#define RMM_LOG_INFO(...) \ - SPDLOG_LOGGER_INFO(&rmm::detail::logger(), rmm::detail::formatted_log(__VA_ARGS__)) -#define RMM_LOG_WARN(...) \ - SPDLOG_LOGGER_WARN(&rmm::detail::logger(), rmm::detail::formatted_log(__VA_ARGS__)) -#define RMM_LOG_ERROR(...) \ - SPDLOG_LOGGER_ERROR(&rmm::detail::logger(), rmm::detail::formatted_log(__VA_ARGS__)) -#define RMM_LOG_CRITICAL(...) \ - SPDLOG_LOGGER_CRITICAL(&rmm::detail::logger(), rmm::detail::formatted_log(__VA_ARGS__)) - -//! @endcond - -} // namespace RMM_NAMESPACE - -//! @endcond diff --git a/include/rmm/mr/device/arena_memory_resource.hpp b/include/rmm/mr/device/arena_memory_resource.hpp index d3a4bb09d..fe07aab04 100644 --- a/include/rmm/mr/device/arena_memory_resource.hpp +++ b/include/rmm/mr/device/arena_memory_resource.hpp @@ -27,8 +27,6 @@ #include -#include - #include #include #include @@ -98,12 +96,9 @@ class arena_memory_resource final : public device_memory_resource { : global_arena_{upstream_mr, arena_size}, dump_log_on_failure_{dump_log_on_failure} { if (dump_log_on_failure_) { - logger_ = - std::make_shared("arena_memory_dump", - std::make_shared( - "rmm_arena_memory_dump.log", true /*truncate file*/)); + logger_ = std::make_shared("arena_memory_dump", "rmm_arena_memory_dump.log"); // Set the level to `debug` for more detailed output. - logger_->set_level(spdlog::level::info); + logger_->set_level(level_enum::info); } } @@ -120,17 +115,9 @@ class arena_memory_resource final : public device_memory_resource { explicit arena_memory_resource(Upstream* upstream_mr, std::optional arena_size = std::nullopt, bool dump_log_on_failure = false) - : global_arena_{to_device_async_resource_ref_checked(upstream_mr), arena_size}, - dump_log_on_failure_{dump_log_on_failure} + : arena_memory_resource{ + to_device_async_resource_ref_checked(upstream_mr), arena_size, dump_log_on_failure} { - if (dump_log_on_failure_) { - logger_ = - std::make_shared("arena_memory_dump", - std::make_shared( - "rmm_arena_memory_dump.log", true /*truncate file*/)); - // Set the level to `debug` for more detailed output. - logger_->set_level(spdlog::level::info); - } } ~arena_memory_resource() override = default; @@ -336,8 +323,7 @@ class arena_memory_resource final : public device_memory_resource { void dump_memory_log(size_t bytes) { logger_->info("**************************************************"); - logger_->info(rmm::detail::formatted_log("Ran out of memory trying to allocate %s.", - rmm::detail::format_bytes(bytes))); + logger_->info("Ran out of memory trying to allocate %s.", rmm::detail::format_bytes(bytes)); logger_->info("**************************************************"); logger_->info("Global arena:"); global_arena_.dump_memory_log(logger_); @@ -366,7 +352,7 @@ class arena_memory_resource final : public device_memory_resource { /// If true, dump memory information to log on allocation failure. bool dump_log_on_failure_{}; /// The logger for memory dump. - std::shared_ptr logger_{}; + std::shared_ptr logger_{}; /// Mutex for read and write locks on arena maps. mutable std::shared_mutex map_mtx_; /// Mutex for shared and unique locks on the mr. diff --git a/include/rmm/mr/device/detail/arena.hpp b/include/rmm/mr/device/detail/arena.hpp index 419c4fcf4..20095d504 100644 --- a/include/rmm/mr/device/detail/arena.hpp +++ b/include/rmm/mr/device/detail/arena.hpp @@ -28,8 +28,6 @@ #include -#include - #include #include #include @@ -645,33 +643,29 @@ class global_arena final { /** * @brief Dump memory to log. * - * @param logger the spdlog logger to use + * @param logger the logger to use */ - RMM_HIDDEN void dump_memory_log(std::shared_ptr const& logger) const + void dump_memory_log(std::shared_ptr const& logger) const { std::lock_guard lock(mtx_); - logger->info(rmm::detail::formatted_log(" Arena size: %s", - rmm::detail::format_bytes(upstream_block_.size()))); - logger->info(rmm::detail::formatted_log(" # superblocks: %zu", superblocks_.size())); + logger->info(" Arena size: %s", rmm::detail::format_bytes(upstream_block_.size())); + logger->info(" # superblocks: %zu", superblocks_.size()); if (!superblocks_.empty()) { - logger->debug( - rmm::detail::formatted_log(" Total size of superblocks: %s", - rmm::detail::format_bytes(total_memory_size(superblocks_)))); + logger->debug(" Total size of superblocks: %s", + rmm::detail::format_bytes(total_memory_size(superblocks_))); auto const total_free = total_free_size(superblocks_); auto const max_free = max_free_size(superblocks_); auto const fragmentation = (1 - max_free / static_cast(total_free)) * 100; - logger->info(rmm::detail::formatted_log(" Total free memory: %s", - rmm::detail::format_bytes(total_free))); - logger->info(rmm::detail::formatted_log(" Largest block of free memory: %s", - rmm::detail::format_bytes(max_free))); - logger->info(rmm::detail::formatted_log(" Fragmentation: %0.2f", fragmentation)); + logger->info(" Total free memory: %s", rmm::detail::format_bytes(total_free)); + logger->info(" Largest block of free memory: %s", rmm::detail::format_bytes(max_free)); + logger->info(" Fragmentation: %0.2f", fragmentation); auto index = decltype(superblocks_.size()){0}; char* prev_end{}; for (auto const& sblk : superblocks_) { if (prev_end == nullptr) { prev_end = sblk.pointer(); } - logger->debug(rmm::detail::formatted_log( + logger->debug( " Superblock %zu: start=%p, end=%p, size=%s, empty=%s, # free blocks=%zu, max " "free=%s, " "gap=%s", @@ -682,7 +676,7 @@ class global_arena final { sblk.empty() ? "T" : "F", sblk.free_blocks(), rmm::detail::format_bytes(sblk.max_free_size()), - rmm::detail::format_bytes(static_cast(sblk.pointer() - prev_end)))); + rmm::detail::format_bytes(static_cast(sblk.pointer() - prev_end))); prev_end = sblk.end(); index++; } diff --git a/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp b/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp index f177504f2..0900d44b2 100644 --- a/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp +++ b/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp @@ -458,7 +458,7 @@ class stream_ordered_memory_resource : public crtp, public device_ void log_summary_trace() { -#if (SPDLOG_ACTIVE_LEVEL <= SPDLOG_LEVEL_TRACE) +#if (RMM_LOG_ACTIVE_LEVEL <= RMM_LOG_LEVEL_TRACE) std::size_t num_blocks{0}; std::size_t max_block{0}; std::size_t free_mem{0}; diff --git a/include/rmm/mr/device/logging_resource_adaptor.hpp b/include/rmm/mr/device/logging_resource_adaptor.hpp index 578543852..3e6e5babc 100644 --- a/include/rmm/mr/device/logging_resource_adaptor.hpp +++ b/include/rmm/mr/device/logging_resource_adaptor.hpp @@ -19,14 +19,10 @@ #include #include #include +#include #include #include -#include -#include -#include -#include - #include #include #include @@ -35,6 +31,7 @@ namespace RMM_NAMESPACE { namespace mr { + /** * @addtogroup device_resource_adaptors * @{ @@ -78,9 +75,8 @@ class logging_resource_adaptor final : public device_memory_resource { logging_resource_adaptor(Upstream* upstream, std::string const& filename = get_default_filename(), bool auto_flush = false) - : logger_{make_logger(filename)}, upstream_{to_device_async_resource_ref_checked(upstream)} + : logging_resource_adaptor(to_device_async_resource_ref_checked(upstream), filename, auto_flush) { - init_logger(auto_flush); } /** @@ -98,9 +94,8 @@ class logging_resource_adaptor final : public device_memory_resource { * performance. */ logging_resource_adaptor(Upstream* upstream, std::ostream& stream, bool auto_flush = false) - : logger_{make_logger(stream)}, upstream_{to_device_async_resource_ref_checked(upstream)} + : logging_resource_adaptor(to_device_async_resource_ref_checked(upstream), stream, auto_flush) { - init_logger(auto_flush); } /** @@ -118,11 +113,10 @@ class logging_resource_adaptor final : public device_memory_resource { * performance. */ logging_resource_adaptor(Upstream* upstream, - spdlog::sinks_init_list sinks, + std::initializer_list sinks, bool auto_flush = false) - : logger_{make_logger(sinks)}, upstream_{to_device_async_resource_ref_checked(upstream)} + : logging_resource_adaptor{to_device_async_resource_ref_checked(upstream), sinks, auto_flush} { - init_logger(auto_flush); } /** @@ -148,9 +142,8 @@ class logging_resource_adaptor final : public device_memory_resource { logging_resource_adaptor(device_async_resource_ref upstream, std::string const& filename = get_default_filename(), bool auto_flush = false) - : logger_{make_logger(filename)}, upstream_{upstream} + : logging_resource_adaptor{make_logger(filename), upstream, auto_flush} { - init_logger(auto_flush); } /** @@ -168,9 +161,8 @@ class logging_resource_adaptor final : public device_memory_resource { logging_resource_adaptor(device_async_resource_ref upstream, std::ostream& stream, bool auto_flush = false) - : logger_{make_logger(stream)}, upstream_{upstream} + : logging_resource_adaptor{make_logger(stream), upstream, auto_flush} { - init_logger(auto_flush); } /** @@ -186,11 +178,10 @@ class logging_resource_adaptor final : public device_memory_resource { * performance. */ logging_resource_adaptor(device_async_resource_ref upstream, - spdlog::sinks_init_list sinks, + std::initializer_list sinks, bool auto_flush = false) - : logger_{make_logger(sinks)}, upstream_{upstream} + : logging_resource_adaptor{make_logger(sinks), upstream, auto_flush} { - init_logger(auto_flush); } logging_resource_adaptor() = delete; @@ -241,29 +232,24 @@ class logging_resource_adaptor final : public device_memory_resource { } private: - static auto make_logger(std::ostream& stream) - { - return std::make_shared( - "RMM", std::make_shared(stream)); - } + static auto make_logger(std::ostream& stream) { return std::make_shared("RMM", stream); } static auto make_logger(std::string const& filename) { - return std::make_shared( - "RMM", std::make_shared(filename, true /*truncate file*/)); + return std::make_shared("RMM", filename); } - static auto make_logger(spdlog::sinks_init_list sinks) + static auto make_logger(std::initializer_list sinks) { - return std::make_shared("RMM", sinks); + return std::make_shared("RMM", sinks); } - /** - * @brief Initialize the logger. - */ - void init_logger(bool auto_flush) + logging_resource_adaptor(std::shared_ptr logger, + device_async_resource_ref upstream, + bool auto_flush) + : logger_{logger}, upstream_{upstream} { - if (auto_flush) { logger_->flush_on(spdlog::level::info); } + if (auto_flush) { logger_->flush_on(level_enum::info); } logger_->set_pattern("%v"); logger_->info(header()); logger_->set_pattern("%t,%H:%M:%S.%f,%v"); @@ -298,12 +284,11 @@ class logging_resource_adaptor final : public device_memory_resource { { try { auto const ptr = get_upstream_resource().allocate_async(bytes, stream); - logger_->info(rmm::detail::formatted_log( - "allocate,%p,%zu,%s", ptr, bytes, rmm::detail::format_stream(stream))); + logger_->info("allocate,%p,%zu,%s", ptr, bytes, rmm::detail::format_stream(stream)); return ptr; } catch (...) { - logger_->info(rmm::detail::formatted_log( - "allocate failure,%p,%zu,%s", nullptr, bytes, rmm::detail::format_stream(stream))); + logger_->info( + "allocate failure,%p,%zu,%s", nullptr, bytes, rmm::detail::format_stream(stream)); throw; } } @@ -324,8 +309,7 @@ class logging_resource_adaptor final : public device_memory_resource { */ void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override { - logger_->info( - rmm::detail::formatted_log("free,%p,%zu,%s", ptr, bytes, rmm::detail::format_stream(stream))); + logger_->info("free,%p,%zu,%s", ptr, bytes, rmm::detail::format_stream(stream)); get_upstream_resource().deallocate_async(ptr, bytes, stream); } @@ -344,7 +328,7 @@ class logging_resource_adaptor final : public device_memory_resource { return get_upstream_resource() == cast->get_upstream_resource(); } - std::shared_ptr logger_; ///< spdlog logger object + std::shared_ptr logger_{}; device_async_resource_ref upstream_; ///< The upstream resource used for satisfying ///< allocation requests diff --git a/include/rmm/mr/device/tracking_resource_adaptor.hpp b/include/rmm/mr/device/tracking_resource_adaptor.hpp index 8131eef4d..09631960e 100644 --- a/include/rmm/mr/device/tracking_resource_adaptor.hpp +++ b/include/rmm/mr/device/tracking_resource_adaptor.hpp @@ -185,9 +185,9 @@ class tracking_resource_adaptor final : public device_memory_resource { */ void log_outstanding_allocations() const { -#if SPDLOG_ACTIVE_LEVEL <= SPDLOG_LEVEL_DEBUG +#if RMM_LOG_ACTIVE_LEVEL <= RMM_LOG_LEVEL_DEBUG RMM_LOG_DEBUG("Outstanding Allocations: %s", get_outstanding_allocations_str()); -#endif // SPDLOG_ACTIVE_LEVEL <= SPDLOG_LEVEL_DEBUG +#endif // RMM_LOG_ACTIVE_LEVEL <= RMM_LOG_LEVEL_DEBUG } private: diff --git a/python/rmm/CMakeLists.txt b/python/rmm/CMakeLists.txt index ac8495e14..a39ac8868 100644 --- a/python/rmm/CMakeLists.txt +++ b/python/rmm/CMakeLists.txt @@ -27,7 +27,11 @@ include(rapids-cython-core) rapids_cython_init() # pass through logging level to spdlog -add_compile_definitions("SPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_${RMM_LOGGING_LEVEL}") +add_compile_definitions("RMM_LOG_ACTIVE_LEVEL=RMM_LOG_LEVEL_${RMM_LOGGING_LEVEL}") + +# Create an object library for the logger so that we don't have to recompile it. +add_library(cpp_logger OBJECT) +target_link_libraries(cpp_logger PRIVATE rmm::rmm_logger_impl) add_subdirectory(rmm/_cuda) add_subdirectory(rmm/librmm) diff --git a/python/rmm/docs/conf.py b/python/rmm/docs/conf.py index 2aad3a82c..99242daa5 100644 --- a/python/rmm/docs/conf.py +++ b/python/rmm/docs/conf.py @@ -238,6 +238,8 @@ def on_missing_reference(app, env, node, contnode): "thrust", "spdlog", "stream_ref", + # logger names (we may eventually want to link out for those) + "sink_ptr", # libcu++ names "cuda", "cuda::mr", diff --git a/python/rmm/rmm/__init__.py b/python/rmm/rmm/__init__.py index 832fec095..5c865eba8 100644 --- a/python/rmm/rmm/__init__.py +++ b/python/rmm/rmm/__init__.py @@ -22,7 +22,7 @@ flush_logger, get_flush_level, get_logging_level, - logging_level, + level_enum, set_flush_level, set_logging_level, should_log, @@ -45,7 +45,7 @@ "get_log_filenames", "get_logging_level", "is_initialized", - "logging_level", + "level_enum", "mr", "register_reinitialize_hook", "reinitialize", diff --git a/python/rmm/rmm/_cuda/CMakeLists.txt b/python/rmm/rmm/_cuda/CMakeLists.txt index 7fd27d110..7759432d3 100644 --- a/python/rmm/rmm/_cuda/CMakeLists.txt +++ b/python/rmm/rmm/_cuda/CMakeLists.txt @@ -13,7 +13,7 @@ # ============================================================================= set(cython_sources stream.pyx) -set(linked_libraries rmm::rmm) +set(linked_libraries rmm::rmm cpp_logger) rapids_cython_create_modules(SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" CXX) diff --git a/python/rmm/rmm/librmm/CMakeLists.txt b/python/rmm/rmm/librmm/CMakeLists.txt index 5da2a1a01..dc807fdba 100644 --- a/python/rmm/rmm/librmm/CMakeLists.txt +++ b/python/rmm/rmm/librmm/CMakeLists.txt @@ -13,7 +13,7 @@ # ============================================================================= set(cython_sources _logger.pyx) -set(linked_libraries rmm::rmm) +set(linked_libraries rmm::rmm cpp_logger) # Build all of the Cython targets rapids_cython_create_modules(SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" diff --git a/python/rmm/rmm/librmm/_logger.pxd b/python/rmm/rmm/librmm/_logger.pxd index fb2126b2f..bd0728bc1 100644 --- a/python/rmm/rmm/librmm/_logger.pxd +++ b/python/rmm/rmm/librmm/_logger.pxd @@ -13,54 +13,27 @@ # limitations under the License. from libcpp cimport bool - - -cdef extern from "spdlog/common.h" namespace "spdlog::level" nogil: - cpdef enum logging_level "spdlog::level::level_enum": - """ - The debug logging level for RMM. - - Debug logging prints messages to a log file. See - `Debug Logging `_ - for more information. - - Valid levels, in decreasing order of verbosity, are TRACE, DEBUG, - INFO, WARN, ERR, CRITICAL, and OFF. Default is INFO. - - Examples - -------- - >>> import rmm - >>> rmm.logging_level.DEBUG - - >>> rmm.logging_level.DEBUG.value - 1 - >>> rmm.logging_level.DEBUG.name - 'DEBUG' - - See Also - -------- - set_logging_level : Set the debug logging level - get_logging_level : Get the current debug logging level - """ - TRACE "spdlog::level::trace" - DEBUG "spdlog::level::debug" - INFO "spdlog::level::info" - WARN "spdlog::level::warn" - ERR "spdlog::level::err" - CRITICAL "spdlog::level::critical" - OFF "spdlog::level::off" - - -cdef extern from "spdlog/spdlog.h" namespace "spdlog" nogil: - cdef cppclass spdlog_logger "spdlog::logger": - spdlog_logger() except + - void set_level(logging_level level) - logging_level level() +from libcpp.string cimport string + + +cdef extern from "rmm/logger.hpp" namespace "rmm" nogil: + cpdef enum class level_enum: + trace + debug + info + warn + error + critical + off + n_levels + + cdef cppclass logger: + logger(string name, string filename) except + + void set_level(level_enum log_level) except + + level_enum level() except + void flush() except + - void flush_on(logging_level level) - logging_level flush_level() - bool should_log(logging_level msg_level) - + void flush_on(level_enum level) except + + level_enum flush_level() except + + bool should_log(level_enum msg_level) except + -cdef extern from "rmm/logger.hpp" namespace "rmm::detail" nogil: - cdef spdlog_logger& logger() except + + cdef logger& default_logger() except + diff --git a/python/rmm/rmm/librmm/_logger.pyx b/python/rmm/rmm/librmm/_logger.pyx index 4392cb106..57bbf5c62 100644 --- a/python/rmm/rmm/librmm/_logger.pyx +++ b/python/rmm/rmm/librmm/_logger.pyx @@ -11,5 +11,3 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - -from rmm.librmm._logger cimport logging_level # no-cython-lint diff --git a/python/rmm/rmm/pylibrmm/CMakeLists.txt b/python/rmm/rmm/pylibrmm/CMakeLists.txt index 0e88f01bb..0012cb93d 100644 --- a/python/rmm/rmm/pylibrmm/CMakeLists.txt +++ b/python/rmm/rmm/pylibrmm/CMakeLists.txt @@ -13,7 +13,7 @@ # ============================================================================= set(cython_sources device_buffer.pyx logger.pyx memory_resource.pyx cuda_stream.pyx helper.pyx) -set(linked_libraries rmm::rmm) +set(linked_libraries rmm::rmm cpp_logger) # Build all of the Cython targets rapids_cython_create_modules(SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" diff --git a/python/rmm/rmm/pylibrmm/logger.pyx b/python/rmm/rmm/pylibrmm/logger.pyx index 119e1c92f..9d5877fae 100644 --- a/python/rmm/rmm/pylibrmm/logger.pyx +++ b/python/rmm/rmm/pylibrmm/logger.pyx @@ -14,14 +14,14 @@ import warnings -from rmm.librmm._logger cimport logger +from rmm.librmm._logger cimport default_logger -from rmm.librmm._logger import logging_level +from rmm.librmm._logger import level_enum def _validate_level_type(level): - if not isinstance(level, logging_level): - raise TypeError("level must be an instance of the logging_level enum") + if not isinstance(level, level_enum): + raise TypeError("level must be an instance of the level_enum enum") def should_log(level): @@ -54,7 +54,7 @@ def should_log(level): If the logging level is not an instance of the ``logging_level`` enum. """ _validate_level_type(level) - return logger().should_log(level) + return default_logger().should_log(level) def set_logging_level(level): @@ -86,10 +86,10 @@ def set_logging_level(level): >>> rmm.set_logging_level(rmm.logging_level.WARN) # set logging level to warn """ _validate_level_type(level) - logger().set_level(level) + default_logger().set_level(level) if not should_log(level): - warnings.warn(f"RMM will not log logging_level.{level.name}. This " + warnings.warn(f"RMM will not log level_enum.{level.name}. This " "may be because the C++ library is compiled for a " "less-verbose logging level.") @@ -118,7 +118,7 @@ def get_logging_level(): >>> rmm.get_logging_level() # get current logging level """ - return logging_level(logger().level()) + return default_logger().level() def flush_logger(): @@ -140,7 +140,7 @@ def flush_logger(): >>> import rmm >>> rmm.flush_logger() # flush the logger """ - logger().flush() + default_logger().flush() def set_flush_level(level): @@ -174,10 +174,10 @@ def set_flush_level(level): >>> rmm.flush_on(rmm.logging_level.WARN) # set flush level to warn """ _validate_level_type(level) - logger().flush_on(level) + default_logger().flush_on(level) if not should_log(level): - warnings.warn(f"RMM will not log logging_level.{level.name}. This " + warnings.warn(f"RMM will not log level_enum.{level.name}. This " "may be because the C++ library is compiled for a " "less-verbose logging level.") @@ -208,4 +208,4 @@ def get_flush_level(): >>> rmm.flush_level() # get current flush level """ - return logging_level(logger().flush_level()) + return default_logger().flush_level() diff --git a/python/rmm/rmm/tests/test_rmm.py b/python/rmm/rmm/tests/test_rmm.py index 182434dc5..7d13b5cac 100644 --- a/python/rmm/rmm/tests/test_rmm.py +++ b/python/rmm/rmm/tests/test_rmm.py @@ -29,6 +29,7 @@ import rmm._cuda.stream from rmm.allocators.cupy import rmm_cupy_allocator from rmm.allocators.numba import RMMNumbaManager +from rmm.pylibrmm.logger import level_enum cuda.set_memory_manager(RMMNumbaManager) @@ -1033,22 +1034,23 @@ def test_rmm_device_buffer_copy(cuda_ary, make_copy): np.testing.assert_equal(expected, result) -@pytest.mark.parametrize("level", rmm.logging_level) +@pytest.mark.parametrize("level", level_enum) def test_valid_logging_level(level): + default_level = level_enum.info with warnings.catch_warnings(): warnings.filterwarnings( - "ignore", message="RMM will not log logging_level.TRACE." + "ignore", message="RMM will not log level_enum.trace." ) warnings.filterwarnings( - "ignore", message="RMM will not log logging_level.DEBUG." + "ignore", message="RMM will not log level_enum.debug." ) rmm.set_logging_level(level) assert rmm.get_logging_level() == level - rmm.set_logging_level(rmm.logging_level.INFO) # reset to default + rmm.set_logging_level(default_level) # reset to default rmm.set_flush_level(level) assert rmm.get_flush_level() == level - rmm.set_flush_level(rmm.logging_level.INFO) # reset to default + rmm.set_flush_level(default_level) # reset to default rmm.should_log(level) diff --git a/rapids_logger/CMakeLists.txt b/rapids_logger/CMakeLists.txt new file mode 100644 index 000000000..fd50276ca --- /dev/null +++ b/rapids_logger/CMakeLists.txt @@ -0,0 +1,177 @@ +# ============================================================================= +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +# cmake-lint: disable=R0915 + +include_guard(GLOBAL) + +cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR) + +include(../rapids_config.cmake) + +include(rapids-cmake) +include(rapids-cpm) + +project( + RAPIDS_LOGGER + VERSION 0.0.1 + LANGUAGES CXX) + +rapids_cmake_build_type(Release) + +rapids_cpm_init() + +include(../cmake/thirdparty/get_spdlog.cmake) + +#[=======================================================================[.rst: +rapids_make_logger +------------------ + +Generate a logger implementation customized for the specified namespace. + +.. code-block:: cmake + + rapids_make_logger( + [EXPORT_SET ] + [LOGGER_TARGET ] + [LOGGER_HEADER_DIR ] + [LOGGER_MACRO_PREFIX ] + ) + +This function produces an interface target named that, when linked to by other targets, provides them a header file that defines a logger interface for the specified namespace. The logger is generated from the provided template files and is configured to use the specified namespace and macro prefix. The generated logger is placed in the specified header directory. + +The logger implementation lives in a separate header file that is not included in the declaration header. The logger implementation is compiled into a separate target that must be linked to by any target that uses the logger. + + +``logger_namespace`` + The namespace for which to generate the logger implementation. + +``EXPORT_SET`` + The name of the export set to which the logger target should be added. If not specified, the logger target is not added to any export set. + +``LOGGER_TARGET`` + The name of the logger (and logger impl) target to create. If not specified, defaults to _logger. + +``LOGGER_HEADER_DIR`` + The directory in which to place the generated logger header file. If not specified, the logger header file is placed in include/. + +``LOGGER_MACRO_PREFIX`` + The prefix to use for the logger macros. If not specified, the macro prefix is the uppercase version of the logger namespace. + +Result Targets +^^^^^^^^^^^^^^^^ + is an interface target that provides the logger interface for the specified namespace. + + _impl is an interface target that provides the logger implementation for the specified namespace. This target must be linked to by any target that uses the logger. Targets linking to this target will have the logger implementation compiled into them. + +Examples +^^^^^^^^ + +Example on how to use :cmake:command:`rapids_make_logger`. + + +.. code-block:: cmake + + # Generate a logger for the namespace "rapids" and associate it with the + # export set "rapids-exports". + rapids_make_logger(rapids + EXPORT_SET rapids-exports + ) + + # Generate a logger for the namespace "rmm" that does not support logging. + rapids_make_logger(rapids) + + +#]=======================================================================] +function(rapids_make_logger logger_namespace) + list(APPEND CMAKE_MESSAGE_CONTEXT "rapids_make_logger") + + set(_rapids_options) + set(_rapids_one_value EXPORT_SET LOGGER_TARGET LOGGER_HEADER_DIR LOGGER_MACRO_PREFIX) + set(_rapids_multi_value) + cmake_parse_arguments(_RAPIDS "${_rapids_options}" "${_rapids_one_value}" + "${_rapids_multi_value}" ${ARGN}) + + # Most arguments are optional and can be inferred from the namespace by default. + set(_RAPIDS_LOGGER_NAMESPACE ${logger_namespace}) + if(NOT _RAPIDS_LOGGER_TARGET) + set(_RAPIDS_LOGGER_TARGET "${logger_namespace}_logger") + endif() + if(NOT _RAPIDS_LOGGER_HEADER_DIR) + set(_RAPIDS_LOGGER_HEADER_DIR "include/${logger_namespace}") + endif() + if(NOT _RAPIDS_LOGGER_MACRO_PREFIX) + string(TOUPPER ${logger_namespace} _RAPIDS_LOGGER_MACRO_PREFIX) + endif() + + # All paths are computed relative to the current source/binary dir of the file from which the + # function is invoked. As a result we cannot use relative paths here because CMake will root these + # paths incorrectly for configure_file/install. + set(BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}/${_RAPIDS_LOGGER_HEADER_DIR}) + # TODO: Verify that installation works correctly with prefix removed. + set(INSTALL_DIR ${_RAPIDS_LOGGER_HEADER_DIR}) + + set(LOGGER_OUTPUT_FILE ${BUILD_DIR}/logger.hpp) + configure_file(${CMAKE_CURRENT_FUNCTION_LIST_DIR}/logger.hpp.in ${LOGGER_OUTPUT_FILE}) + install(FILES ${LOGGER_OUTPUT_FILE} DESTINATION ${INSTALL_DIR}) + + set(LOGGER_IMPL_OUTPUT_FILE ${BUILD_DIR}/logger_impl/logger_impl.hpp) + configure_file(${CMAKE_CURRENT_FUNCTION_LIST_DIR}/logger_impl.hpp.in ${LOGGER_IMPL_OUTPUT_FILE}) + install(FILES ${LOGGER_IMPL_OUTPUT_FILE} DESTINATION ${INSTALL_DIR}/logger_impl) + + add_library(${_RAPIDS_LOGGER_TARGET} INTERFACE) + include(GNUInstallDirs) + # Note: The BUILD_INTERFACE setting assumes that LOGGER_HEADER_DIR is the subdirectory of + # CMAKE_INSTALL_INCLUDEDIR relative to which all includes are rooted in the C++ code files. I + # think that is a safe assumption though since if it were violated then the INSTALL_INTERFACE + # would not only be incorrect (if computed using LOGGER_HEADER_DIR), but it would also break + # consumers of the installed package who expect to be able to write `#include + # <${LOGGER_HEADER_DIR/include\//}/logger.hpp>` and have it work. + target_include_directories( + ${_RAPIDS_LOGGER_TARGET} + INTERFACE "$" + "$") + target_compile_features(${_RAPIDS_LOGGER_TARGET} INTERFACE cxx_std_17) + + # Create an interface target that will trigger compilation of the logger implementation in any + # target that is linked to it. + set(LOGGER_IMPL_SRC_OUTPUT_FILE ${BUILD_DIR}/logger_impl/logger.cpp) + configure_file(${CMAKE_CURRENT_FUNCTION_LIST_DIR}/logger.cpp.in ${LOGGER_IMPL_SRC_OUTPUT_FILE}) + install(FILES ${LOGGER_IMPL_SRC_OUTPUT_FILE} DESTINATION ${INSTALL_DIR}/logger_impl) + + # Note that we cannot specify the source files directly in add_library, see the CMake + # documentation explaining that these do not populate INTERFACE_SOURCES. + # https://cmake.org/cmake/help/latest/command/add_library.html#interface-with-sources + set(impl_target ${_RAPIDS_LOGGER_TARGET}_impl) + add_library(${impl_target} INTERFACE) + target_sources( + ${impl_target} + INTERFACE $ + $) + target_link_libraries(${impl_target} INTERFACE ${_RAPIDS_LOGGER_TARGET} + spdlog::spdlog_header_only) + set_target_properties(${impl_target} PROPERTIES POSITION_INDEPENDENT_CODE ON + INTERFACE_POSITION_INDEPENDENT_CODE ON) + + set(_install_export) + if(_RAPIDS_EXPORT_SET) + set(_install_export EXPORT ${_RAPIDS_EXPORT_SET}) + endif() + + install(TARGETS ${_RAPIDS_LOGGER_TARGET} ${_install_export}) + if(TARGET ${impl_target}) + install(TARGETS ${impl_target} ${_install_export}) + endif() + +endfunction() diff --git a/rapids_logger/README.md b/rapids_logger/README.md new file mode 100644 index 000000000..00e505fc7 --- /dev/null +++ b/rapids_logger/README.md @@ -0,0 +1,22 @@ +# About + +The `rapids-logger` project defines an easy way to produce a project-specific logger using the excellent [spdlog](https://github.com/gabime/spdlog) package. +The goal of this project is to ensure that projects wishing to provide their own logger may do so easily without needing to reimplement their own custom wrappers around spdlog. +A core goal of the project is to ensure that the custom logger implementation does not leak any spdlog symbols, allowing the safe coexistence of different projects in the same environment even if they use different versions of spdlog. +That goal is the primary reason to prefer using this project rather than directly exposing a specialized instance of a spdlog logger in your own project. + +`rapids-logger` is designed to be used via CMake. +Its CMake defines a function `rapids_make_logger` that can be used to produce a project-specific logger class in a provided namespace. +The resulting logger exposes spdlog-like functionality via the [PImpl idiom](https://en.cppreference.com/w/cpp/language/pimpl) to avoid exposing spdlog symbols publicly. +It uses CMake and template C++ files to generate a public header file to describe the user interface and an inline header that should be placed in a single TU by consumers to compile the implementation. +To simplify usage, each invocation of the function produces two CMake targets, one representing the public header and one representing a trivial source file including the inline header. +Projects using `rapids-logger` should make the first target part of their public link interface while the latter should be linked to privately so that it is compiled into the project's library without public exposure. + +To mirror spdlog, each generated logger also ships with a set of logging macros `_LOG_` that may be used to control logging at compile-time as well as runtime using a compile-time variable `_LOG_ACTIVE_LEVEL`. +For example, a project called "rapids" will be able to write code like this: +``` +RAPIDS_LOG_WARN("Some message to be shown when the warning level is enabled"); +``` +and control whether that warning is shown by compiling the code with `RAPIDS_LOG_ACTIVE_LEVEL=RAPIDS_LOG_LEVEL_WARN`. +Each project is endowed with its own definition of levels, so different projects in the same environment may be safely configured independently of each other and of spdlog. +Each project is also given a `default_logger` function that produces a global logger that may be used anywhere, but projects may also freely instantiate additional loggers as needed. diff --git a/rapids_logger/logger.cpp.in b/rapids_logger/logger.cpp.in new file mode 100644 index 000000000..36bb2ce02 --- /dev/null +++ b/rapids_logger/logger.cpp.in @@ -0,0 +1,17 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "logger_impl.hpp" diff --git a/rapids_logger/logger.hpp.in b/rapids_logger/logger.hpp.in new file mode 100644 index 000000000..cd2bb2c79 --- /dev/null +++ b/rapids_logger/logger.hpp.in @@ -0,0 +1,504 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +namespace __attribute__((visibility("default"))) @_RAPIDS_LOGGER_NAMESPACE@ { + +// These values must be kept in sync with spdlog! +#define @_RAPIDS_LOGGER_MACRO_PREFIX@_LOG_LEVEL_TRACE 0 +#define @_RAPIDS_LOGGER_MACRO_PREFIX@_LOG_LEVEL_DEBUG 1 +#define @_RAPIDS_LOGGER_MACRO_PREFIX@_LOG_LEVEL_INFO 2 +#define @_RAPIDS_LOGGER_MACRO_PREFIX@_LOG_LEVEL_WARN 3 +#define @_RAPIDS_LOGGER_MACRO_PREFIX@_LOG_LEVEL_ERROR 4 +#define @_RAPIDS_LOGGER_MACRO_PREFIX@_LOG_LEVEL_CRITICAL 5 +#define @_RAPIDS_LOGGER_MACRO_PREFIX@_LOG_LEVEL_OFF 6 + +/** + * @brief The log levels supported by the logger. + * + * These levels correspond to the levels defined by spdlog. + */ +enum class level_enum : int32_t { + trace = @_RAPIDS_LOGGER_MACRO_PREFIX@_LOG_LEVEL_TRACE, + debug = @_RAPIDS_LOGGER_MACRO_PREFIX@_LOG_LEVEL_DEBUG, + info = @_RAPIDS_LOGGER_MACRO_PREFIX@_LOG_LEVEL_INFO, + warn = @_RAPIDS_LOGGER_MACRO_PREFIX@_LOG_LEVEL_WARN, + error = @_RAPIDS_LOGGER_MACRO_PREFIX@_LOG_LEVEL_ERROR, + critical = @_RAPIDS_LOGGER_MACRO_PREFIX@_LOG_LEVEL_CRITICAL, + off = @_RAPIDS_LOGGER_MACRO_PREFIX@_LOG_LEVEL_OFF, + n_levels +}; + +namespace detail { +// Forward declare the implementation classes. +class logger_impl; +class sink_impl; +} + +// Forward declare for the sink for the logger to use. +class sink; +using sink_ptr = std::shared_ptr; + +/** + * @class logger + * @brief A logger class that either uses the real implementation (via spdlog) or performs no-ops if + * not supported. + */ +class logger { + public: + logger() = delete; ///< Not default constructible + logger(logger const&) = delete; ///< Not copy constructible + logger& operator=(logger const&) = delete; ///< Not copy assignable + + logger(logger&& other); ///< @default_move_constructor + logger& operator=(logger&& other); ///< @default_move_assignment{logger} + + /** + * @brief A class to manage a vector of sinks. + * + * This class is used internally by the logger class to manage its sinks. It handles synchronization of the sinks with the sinks in the underlying spdlog logger such that all vector-like operations performed on this class are reflected in the underlying spdlog logger's set of sinks. + */ + class sink_vector { + public: + using Iterator = std::vector::iterator; ///< The iterator type + using ConstIterator = std::vector::const_iterator; ///< The const iterator type + + /** + * @brief Construct a new sink_vector object + * + * @param parent The logger whose sinks are being managed + * @param sinks The sinks to manage + */ + explicit sink_vector(logger& parent, std::vector sinks={}) : parent{parent}, sinks_{sinks} {} + + /** + * @brief Add a sink to the vector. + * + * @param sink The sink to add + */ + void push_back(sink_ptr const& sink); + + /** + * @brief Add a sink to the vector. + * + * @param sink The sink to add + */ + void push_back(sink_ptr&& sink); + + /** + * @brief Remove the last sink from the vector. + */ + void pop_back(); + + /** + * @brief Remove all sinks from the vector. + */ + void clear(); + + /** + * @brief Get an iterator to the beginning of the vector. + * + * @return Iterator The iterator + */ + Iterator begin() { return sinks_.begin(); } + + /** + * @brief Get an iterator to the end of the vector. + * + * @return Iterator The iterator + */ + Iterator end() { return sinks_.end(); } + + /** + * @brief Get a const iterator to the beginning of the vector. + * + * @return ConstIterator The const iterator + */ + ConstIterator begin() const { return sinks_.begin(); } + + /** + * @brief Get a const iterator to the end of the vector. + * + * @return ConstIterator The const iterator + */ + ConstIterator end() const { return sinks_.end(); } + + /** + * @brief Get a const iterator to the beginning of the vector. + * + * @return ConstIterator The const iterator + */ + ConstIterator cbegin() const { return sinks_.cbegin(); } + + /** + * @brief Get a const iterator to the end of the vector. + * + * @return ConstIterator The const iterator + */ + ConstIterator cend() const { return sinks_.cend(); } + private: + logger& parent; ///< The logger this vector belongs to + std::vector sinks_; ///< The sinks + }; + + // TODO: When we migrate to C++20 we can use std::format and format strings + // instead of the printf-style printing used here. + /** + * @brief Format and log a message at the specified level. + * + * This function performs printf-style formatting to avoid the need for fmt + * or spdlog's own templated APIs (which would require exposing spdlog + * symbols publicly) and then invokes the base implementation with the + * preformatted string. + * + * @param lvl The log level + * @param format The format string + * @param args The format arguments + */ + template + void log(level_enum lvl, std::string const& format, Args&&... args) { + auto convert_to_c_string = [](auto&& arg) -> decltype(auto) { + using ArgType = std::decay_t; + if constexpr (std::is_same_v) { + return arg.c_str(); + } else { + return std::forward(arg); + } + }; + + // NOLINTBEGIN(cppcoreguidelines-pro-type-vararg) + auto formatted_size = + std::snprintf(nullptr, 0, format.c_str(), convert_to_c_string(std::forward(args))...); + if (formatted_size < 0) { throw std::runtime_error("Error during formatting."); } + if (formatted_size == 0) { log(lvl, {}); } + auto size = static_cast(formatted_size) + 1; // for null terminator + // NOLINTNEXTLINE(modernize-avoid-c-arrays, cppcoreguidelines-avoid-c-arrays) + std::unique_ptr buf(new char[size]); + std::snprintf(buf.get(), size, format.c_str(), convert_to_c_string(std::forward(args))...); + // NOLINTEND(cppcoreguidelines-pro-type-vararg) + log(lvl, {buf.get(), buf.get() + size - 1}); // drop '\0' + }; + + /** + * @brief Log a message at the TRACE level. + * + * @param format The format string + * @param args The format arguments + */ + template + void trace(std::string const& format, Args&&... args) + { + log(level_enum::trace, format, std::forward(args)...); + } + + /** + * @brief Log a message at the DEBUG level. + * + * @param format The format string + * @param args The format arguments + */ + template + void debug(std::string const& format, Args&&... args) + { + log(level_enum::debug, format, std::forward(args)...); + } + + /** + * @brief Log a message at the INFO level. + * + * @param format The format string + * @param args The format arguments + */ + template + void info(std::string const& format, Args&&... args) + { + log(level_enum::info, format, std::forward(args)...); + } + + /** + * @brief Log a message at the WARN level. + * + * @param format The format string + * @param args The format arguments + */ + template + void warn(std::string const& format, Args&&... args) + { + log(level_enum::warn, format, std::forward(args)...); + } + + /** + * @brief Log a message at the ERROR level. + * + * @param format The format string + * @param args The format arguments + */ + template + void error(std::string const& format, Args&&... args) + { + log(level_enum::error, format, std::forward(args)...); + } + + /** + * @brief Log a message at the CRITICAL level. + * + * @param format The format string + * @param args The format arguments + */ + template + void critical(std::string const& format, Args&&... args) + { + log(level_enum::critical, format, std::forward(args)...); + } + + // Everything below here is conditionally compiled based on whether logging is supported. + /** + * @brief Construct a new logger object + * + * @param name The name of the logger + * @param filename The name of the log file + */ + logger(std::string name, std::string filename); + + /** + * @brief Construct a new logger object + * + * @param name The name of the logger + * @param stream The stream to log to + */ + logger(std::string name, std::ostream& stream); + + /** + * @brief Construct a new logger object + * + * @param name The name of the logger + * @param sinks The sinks to log to + * + * Note that we must use a vector because initializer_lists are not flexible + * enough to support programmatic construction in callers, and an + * iterator-based API would require templating and thus exposing spdlog + * types. + */ + logger(std::string name, std::vector sinks); + + /** + * @brief Destroy the logger object + */ + ~logger(); + + /** + * @brief Log a message at the specified level. + * + * This is the core logging routine that dispatches to spdlog. + * + * @param lvl The log level + * @param message The message to log + */ + void log(level_enum lvl, std::string const& message); + + /** + * @brief Get the sinks for the logger. + * + * @return The sinks + */ + const sink_vector& sinks() const; + + /** + * @brief Get the sinks for the logger. + * + * @return The sinks + */ + sink_vector& sinks(); + + /** + * @brief Get the current log level. + * + * @return The current log level + */ + level_enum level() const; + + /** + * @brief Set the log level. + * + * @param log_level The new log level + */ + void set_level(level_enum log_level); + + /** + * @brief Flush the logger. + */ + void flush(); + + /** + * @brief Flush all writes on the specified level or above. + */ + void flush_on(level_enum log_level); + + /** + * @brief Get the current flush level. + */ + level_enum flush_level() const; + + /** + * @brief Check if the logger should log a message at the specified level. + * + * @param msg_level The level of the message + * @return true if the message should be logged, false otherwise + */ + bool should_log(level_enum msg_level) const; + + /** + * @brief Set the pattern for the logger. + * + * @param pattern The pattern to use + */ + void set_pattern(std::string pattern); + + private: + std::unique_ptr impl; ///< The logger implementation + sink_vector sinks_; ///< The sinks for the logger +}; + +/** + * @brief A sink for the logger. + * + * These sinks are wrappers around the spdlog sinks that allow us to keep the + * spdlog types private and avoid exposing them in the public API. + */ +class sink { + public: + ~sink(); + protected: + explicit sink(std::unique_ptr impl); + std::unique_ptr impl; + // The sink vector needs to be able to pass the underlying sink to the spdlog logger. + friend class logger::sink_vector; +}; + +/** + * @brief A sink that writes to a file. + * + * See spdlog::sinks::basic_file_sink_mt for more information. + */ +class basic_file_sink_mt : public sink { + public: + basic_file_sink_mt(std::string const& filename, bool truncate = false); +}; + +/** + * @brief A sink that writes to an ostream. + * + * See spdlog::sinks::ostream_sink_mt for more information. + */ +class ostream_sink_mt : public sink { + public: + ostream_sink_mt(std::ostream& stream, bool force_flush = false); +}; + + +/** + * @brief Returns the default log filename for the global logger. + * + * If the environment variable `@_RAPIDS_LOGGER_NAMESPACE@_DEBUG_LOG_FILE` is defined, its value is used as the path and + * name of the log file. Otherwise, the file `@_RAPIDS_LOGGER_NAMESPACE@_log.txt` in the current working directory is used. + * + * @return std::string The default log file name. + */ +inline std::string default_log_filename() +{ + auto* filename = std::getenv("@_RAPIDS_LOGGER_MACRO_PREFIX@_DEBUG_LOG_FILE"); + // TODO: Do we prefer rmm's default (a file rmm_log.txt) or cudf's default (a + // stderr sink)? I think the latter is better. + return (filename == nullptr) ? std::string{"@_RAPIDS_LOGGER_NAMESPACE@_log.txt"} : std::string{filename}; +} + +/** + * @brief Get the default logger. + * + * @return logger& The default logger + */ +inline logger& default_logger() +{ + static logger logger_ = [] { + logger logger_ { + "@_RAPIDS_LOGGER_MACRO_PREFIX@", default_log_filename() + }; +#if @_RAPIDS_LOGGER_MACRO_PREFIX@_LOG_ACTIVE_LEVEL <= @_RAPIDS_LOGGER_MACRO_PREFIX@_LOG_LEVEL_INFO +#ifdef CUDA_API_PER_THREAD_DEFAULT_STREAM + logger_.info("----- @_RAPIDS_LOGGER_MACRO_PREFIX@ LOG BEGIN [PTDS ENABLED] -----"); +#else + logger_.info("----- @_RAPIDS_LOGGER_MACRO_PREFIX@ LOG BEGIN [PTDS DISABLED] -----"); +#endif +#endif + return logger_; + }(); + return logger_; +} + +// Macros for easier logging, similar to spdlog. +#if !defined(@_RAPIDS_LOGGER_MACRO_PREFIX@_LOG_ACTIVE_LEVEL) +#define @_RAPIDS_LOGGER_MACRO_PREFIX@_LOG_ACTIVE_LEVEL @_RAPIDS_LOGGER_MACRO_PREFIX@_LOG_LEVEL_INFO +#endif + +#define @_RAPIDS_LOGGER_MACRO_PREFIX@_LOGGER_CALL(logger, level, ...) (logger).log(level, __VA_ARGS__) + +#if @_RAPIDS_LOGGER_MACRO_PREFIX@_LOG_ACTIVE_LEVEL <= @_RAPIDS_LOGGER_MACRO_PREFIX@_LOG_LEVEL_TRACE +#define @_RAPIDS_LOGGER_MACRO_PREFIX@_LOG_TRACE(...) \ + @_RAPIDS_LOGGER_MACRO_PREFIX@_LOGGER_CALL(@_RAPIDS_LOGGER_NAMESPACE@::default_logger(), @_RAPIDS_LOGGER_NAMESPACE@::level_enum::trace, __VA_ARGS__) +#else +#define @_RAPIDS_LOGGER_MACRO_PREFIX@_LOG_TRACE(...) (void)0 +#endif + +#if @_RAPIDS_LOGGER_MACRO_PREFIX@_LOG_ACTIVE_LEVEL <= @_RAPIDS_LOGGER_MACRO_PREFIX@_LOG_LEVEL_DEBUG +#define @_RAPIDS_LOGGER_MACRO_PREFIX@_LOG_DEBUG(...) \ + @_RAPIDS_LOGGER_MACRO_PREFIX@_LOGGER_CALL(@_RAPIDS_LOGGER_NAMESPACE@::default_logger(), @_RAPIDS_LOGGER_NAMESPACE@::level_enum::debug, __VA_ARGS__) +#else +#define @_RAPIDS_LOGGER_MACRO_PREFIX@_LOG_DEBUG(...) (void)0 +#endif + +#if @_RAPIDS_LOGGER_MACRO_PREFIX@_LOG_ACTIVE_LEVEL <= @_RAPIDS_LOGGER_MACRO_PREFIX@_LOG_LEVEL_INFO +#define @_RAPIDS_LOGGER_MACRO_PREFIX@_LOG_INFO(...) @_RAPIDS_LOGGER_MACRO_PREFIX@_LOGGER_CALL(@_RAPIDS_LOGGER_NAMESPACE@::default_logger(), @_RAPIDS_LOGGER_NAMESPACE@::level_enum::info, __VA_ARGS__) +#else +#define @_RAPIDS_LOGGER_MACRO_PREFIX@_LOG_INFO(...) (void)0 +#endif + +#if @_RAPIDS_LOGGER_MACRO_PREFIX@_LOG_ACTIVE_LEVEL <= @_RAPIDS_LOGGER_MACRO_PREFIX@_LOG_LEVEL_WARN +#define @_RAPIDS_LOGGER_MACRO_PREFIX@_LOG_WARN(...) @_RAPIDS_LOGGER_MACRO_PREFIX@_LOGGER_CALL(@_RAPIDS_LOGGER_NAMESPACE@::default_logger(), @_RAPIDS_LOGGER_NAMESPACE@::level_enum::warn, __VA_ARGS__) +#else +#define @_RAPIDS_LOGGER_MACRO_PREFIX@_LOG_WARN(...) (void)0 +#endif + +#if @_RAPIDS_LOGGER_MACRO_PREFIX@_LOG_ACTIVE_LEVEL <= @_RAPIDS_LOGGER_MACRO_PREFIX@_LOG_LEVEL_ERROR +#define @_RAPIDS_LOGGER_MACRO_PREFIX@_LOG_ERROR(...) \ + @_RAPIDS_LOGGER_MACRO_PREFIX@_LOGGER_CALL(@_RAPIDS_LOGGER_NAMESPACE@::default_logger(), @_RAPIDS_LOGGER_NAMESPACE@::level_enum::error, __VA_ARGS__) +#else +#define @_RAPIDS_LOGGER_MACRO_PREFIX@_LOG_ERROR(...) (void)0 +#endif + +#if @_RAPIDS_LOGGER_MACRO_PREFIX@_LOG_ACTIVE_LEVEL <= @_RAPIDS_LOGGER_MACRO_PREFIX@_LOG_LEVEL_CRITICAL +#define @_RAPIDS_LOGGER_MACRO_PREFIX@_LOG_CRITICAL(...) \ + @_RAPIDS_LOGGER_MACRO_PREFIX@_LOGGER_CALL(@_RAPIDS_LOGGER_NAMESPACE@::default_logger(), @_RAPIDS_LOGGER_NAMESPACE@::level_enum::critical, __VA_ARGS__) +#else +#define @_RAPIDS_LOGGER_MACRO_PREFIX@_LOG_CRITICAL(...) (void)0 +#endif + +} // namespace @_RAPIDS_LOGGER_NAMESPACE@ diff --git a/rapids_logger/logger_impl.hpp.in b/rapids_logger/logger_impl.hpp.in new file mode 100644 index 000000000..717a00ac9 --- /dev/null +++ b/rapids_logger/logger_impl.hpp.in @@ -0,0 +1,205 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "../logger.hpp" + +// Include C headers for which we need symbols to be made public and don't want +// the below symbol hiding pattern to apply. +#include +#include +#include + +// Start hiding before including spdlog headers. +#pragma GCC visibility push(hidden) + +#include +#include +#include + +#include +#include +#include + +namespace @_RAPIDS_LOGGER_NAMESPACE@ { + +namespace detail { +namespace { + +/** + * @brief Convert a string to a log level. + * + * This function is used to process env-var specifications of log levels. + * @param env_lvl_str The string to convert. + * @return The log level. + */ +level_enum string_to_level(std::string_view const env_lvl_str) +{ + if (env_lvl_str == "TRACE") return level_enum::trace; + if (env_lvl_str == "DEBUG") return level_enum::debug; + if (env_lvl_str == "INFO") return level_enum::info; + if (env_lvl_str == "WARN") return level_enum::warn; + if (env_lvl_str == "ERROR") return level_enum::error; + if (env_lvl_str == "CRITICAL") return level_enum::critical; + if (env_lvl_str == "OFF") return level_enum::off; + std::ostringstream os{}; + os << "Invalid logging level: " << env_lvl_str; + throw std::invalid_argument(os.str()); +} + +/** + * @brief Convert a log level to an spdlog log level. + * + * @param lvl The log level to convert. + * @return The spdlog log level. + */ +spdlog::level::level_enum to_spdlog_level(level_enum lvl) +{ + return static_cast(static_cast(lvl)); +} + +/** + * @brief Convert an spdlog log level to a log level. + * + * @param lvl The spdlog log level to convert. + * @return The log level. + */ +level_enum from_spdlog_level(spdlog::level::level_enum lvl) +{ + return static_cast(static_cast(lvl)); +} +} + +/** + * @brief The sink_impl class is a wrapper around an spdlog sink. + * + * This class is the impl part of the PImpl for the sink. + */ +class sink_impl { +public: + sink_impl(std::shared_ptr sink) : underlying{sink} {} +private: + std::shared_ptr underlying; + // The sink_vector needs to be able to pass the underlying sink to the spdlog logger. + friend class logger::sink_vector; +}; + +/** + * @brief The logger_impl class is a wrapper around an spdlog logger. + * + * This class is the impl part of the PImpl for the logger. + */ +class logger_impl { + public: + logger_impl(std::string name) : underlying{spdlog::logger{name}} { + underlying.set_pattern("[%6t][%H:%M:%S:%f][%-6l] %v"); + auto const env_logging_level = + std::getenv("@_RAPIDS_LOGGER_MACRO_PREFIX@_DEFAULT_LOGGING_LEVEL"); + if (env_logging_level != nullptr) { set_level(detail::string_to_level(env_logging_level)); } + auto const env_flush_level = std::getenv("@_RAPIDS_LOGGER_MACRO_PREFIX@_DEFAULT_FLUSH_LEVEL"); + if (env_flush_level != nullptr) { flush_on(detail::string_to_level(env_flush_level)); } + } + + void log(level_enum lvl, std::string const& message) { underlying.log(to_spdlog_level(lvl), message); } + void set_level(level_enum log_level) { underlying.set_level(to_spdlog_level(log_level)); } + void flush() { underlying.flush(); } + void flush_on(level_enum log_level) { underlying.flush_on(to_spdlog_level(log_level)); } + level_enum flush_level() const { return from_spdlog_level(underlying.flush_level()); } + bool should_log(level_enum lvl) const { return underlying.should_log(to_spdlog_level(lvl)); } + level_enum level() const { return from_spdlog_level(underlying.level()); } + void set_pattern(std::string pattern) { underlying.set_pattern(pattern); } + const std::vector &sinks() const { return underlying.sinks(); } + std::vector &sinks() { return underlying.sinks(); } + +private: + spdlog::logger underlying; ///< The spdlog logger +}; + +} // namespace detail + +// Sink vector functions +void logger::sink_vector::push_back(sink_ptr const& sink) { + sinks_.push_back(sink); + parent.impl->sinks().push_back(sink->impl->underlying); +} +void logger::sink_vector::push_back(sink_ptr&& sink) { + sinks_.push_back(sink); + parent.impl->sinks().push_back(sink->impl->underlying); +} +void logger::sink_vector::pop_back() { + sinks_.pop_back(); + parent.impl->sinks().pop_back(); +} +void logger::sink_vector::clear() { + sinks_.clear(); + parent.impl->sinks().clear(); +} + +// Sink methods +sink::sink(std::unique_ptr impl) : impl{std::move(impl)} {} + +sink::~sink() = default; + +basic_file_sink_mt::basic_file_sink_mt(std::string const& filename, bool truncate) + : sink{std::make_unique(std::make_shared(filename, truncate))} {} + +ostream_sink_mt::ostream_sink_mt(std::ostream& stream, bool force_flush) + : sink{std::make_unique(std::make_shared(stream, force_flush))} {} + +// Logger methods +logger::logger(std::string name, std::string filename) + : impl{std::make_unique(name)}, sinks_{*this} { + sinks_.push_back(std::make_shared(filename, true)); +} + +logger::logger(std::string name, std::ostream& stream) + : impl{std::make_unique(name)}, sinks_{*this} { + sinks_.push_back(std::make_shared(stream)); +} + +logger::logger(std::string name, std::vector sinks) + : impl{std::make_unique(name)}, sinks_{*this} { + for (auto const& s : sinks) { + sinks_.push_back(s); + } +} + +logger::~logger() = default; +logger::logger(logger&& other) = default; +logger& logger::operator=(logger&& other) { + impl = std::move(other.impl); + sinks_.clear(); + for (auto const& s : other.sinks_) { + sinks_.push_back(s); + } + return *this; +} + +void logger::log(level_enum lvl, std::string const& message) { impl->log(lvl, message); } +void logger::set_level(level_enum log_level) { impl->set_level(log_level); } +void logger::flush() { impl->flush(); } +void logger::flush_on(level_enum log_level) { impl->flush_on(log_level); } +level_enum logger::flush_level() const { return impl->flush_level(); } +bool logger::should_log(level_enum lvl) const { return impl->should_log(lvl); } +level_enum logger::level() const { return impl->level(); } +void logger::set_pattern(std::string pattern) { impl->set_pattern(pattern); } +const logger::sink_vector& logger::sinks() const { return sinks_; } +logger::sink_vector& logger::sinks() { return sinks_; } + +} // namespace @_RAPIDS_LOGGER_NAMESPACE@ +// This visibility pragma must be here so that both our logger types and those coming from includes are hidden. +#pragma GCC visibility pop diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 476028af0..b718691ca 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -27,8 +27,9 @@ rapids_cmake_support_conda_env(conda_env) function(ConfigureTestInternal TEST_NAME) add_executable(${TEST_NAME} ${ARGN}) target_include_directories(${TEST_NAME} PRIVATE "$") - target_link_libraries(${TEST_NAME} GTest::gmock GTest::gtest GTest::gmock_main GTest::gtest_main - pthread rmm $) + target_link_libraries( + ${TEST_NAME} PRIVATE GTest::gmock GTest::gtest GTest::gmock_main GTest::gtest_main pthread rmm + $) set_target_properties( ${TEST_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON @@ -40,9 +41,11 @@ function(ConfigureTestInternal TEST_NAME) CUDA_STANDARD 17 CUDA_STANDARD_REQUIRED ON) target_compile_definitions(${TEST_NAME} - PUBLIC "SPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_${RMM_LOGGING_LEVEL}") + PUBLIC "RMM_LOG_ACTIVE_LEVEL=RMM_LOG_LEVEL_${RMM_LOGGING_LEVEL}") target_compile_options(${TEST_NAME} PUBLIC $<$:-Wall -Werror>) + target_link_libraries(${TEST_NAME} PRIVATE rmm_test_logger) + if(DISABLE_DEPRECATION_WARNING) target_compile_options( ${TEST_NAME} PUBLIC $<$:-Xcompiler=-Wno-deprecated-declarations>) @@ -127,6 +130,12 @@ function(ConfigureTest TEST_NAME) endfunction() +# Create an object library for the logger so that we don't have to recompile it. +add_library(rmm_test_logger OBJECT) +target_link_libraries(rmm_test_logger PRIVATE rmm_logger_impl) + +include(../cmake/thirdparty/get_spdlog.cmake) + # test sources # device mr_ref tests @@ -159,6 +168,8 @@ ConfigureTest(STATISTICS_TEST mr/device/statistics_mr_tests.cpp) # tracking adaptor tests ConfigureTest(TRACKING_TEST mr/device/tracking_mr_tests.cpp) +target_link_libraries(TRACKING_TEST PRIVATE spdlog::spdlog_header_only) +target_link_libraries(TRACKING_PTDS_TEST PRIVATE spdlog::spdlog_header_only) # out-of-memory callback adaptor tests ConfigureTest(FAILURE_CALLBACK_TEST mr/device/failure_callback_mr_tests.cpp) @@ -195,6 +206,8 @@ ConfigureTest(PREFETCH_TEST prefetch_tests.cpp) # logger tests ConfigureTest(LOGGER_TEST logger_tests.cpp) +target_link_libraries(LOGGER_TEST PRIVATE spdlog::spdlog_header_only) +target_link_libraries(LOGGER_PTDS_TEST PRIVATE spdlog::spdlog_header_only) # arena MR tests ConfigureTest(ARENA_MR_TEST mr/device/arena_mr_tests.cpp GPUS 1 PERCENT 100) diff --git a/tests/logger_tests.cpp b/tests/logger_tests.cpp index 8a5d37be2..619143294 100644 --- a/tests/logger_tests.cpp +++ b/tests/logger_tests.cpp @@ -152,8 +152,8 @@ TEST(Adaptor, MultiSinkConstructor) std::string filename2{temp_dir.generate_path("test_multi_2.txt")}; rmm::mr::cuda_memory_resource upstream; - auto file_sink1 = std::make_shared(filename1, true); - auto file_sink2 = std::make_shared(filename2, true); + auto file_sink1 = std::make_shared(filename1, true); + auto file_sink2 = std::make_shared(filename2, true); rmm::mr::logging_resource_adaptor log_mr{&upstream, {file_sink1, file_sink2}}; diff --git a/tests/mr/device/tracking_mr_tests.cpp b/tests/mr/device/tracking_mr_tests.cpp index 3fce55fb8..c40a9127d 100644 --- a/tests/mr/device/tracking_mr_tests.cpp +++ b/tests/mr/device/tracking_mr_tests.cpp @@ -23,7 +23,6 @@ #include #include -#include namespace rmm::test { namespace { @@ -203,9 +202,9 @@ TEST(TrackingTest, DeallocWrongBytes) TEST(TrackingTest, LogOutstandingAllocations) { std::ostringstream oss; - auto oss_sink = std::make_shared(oss); - rmm::detail::logger().sinks().push_back(oss_sink); - auto old_level = rmm::detail::logger().level(); + auto oss_sink = std::make_shared(oss); + auto old_level = rmm::default_logger().level(); + rmm::default_logger().sinks().push_back(oss_sink); tracking_adaptor mr{rmm::mr::get_current_device_resource_ref()}; std::vector allocations; @@ -213,10 +212,10 @@ TEST(TrackingTest, LogOutstandingAllocations) allocations.push_back(mr.allocate(ten_MiB)); } - rmm::detail::logger().set_level(spdlog::level::debug); + rmm::default_logger().set_level(rmm::level_enum::debug); EXPECT_NO_THROW(mr.log_outstanding_allocations()); -#if SPDLOG_ACTIVE_LEVEL <= SPDLOG_LEVEL_DEBUG +#if RMM_LOG_ACTIVE_LEVEL <= RMM_LOG_LEVEL_DEBUG EXPECT_NE(oss.str().find("Outstanding Allocations"), std::string::npos); #endif @@ -224,8 +223,8 @@ TEST(TrackingTest, LogOutstandingAllocations) mr.deallocate(allocation, ten_MiB); } - rmm::detail::logger().set_level(old_level); - rmm::detail::logger().sinks().pop_back(); + rmm::default_logger().set_level(old_level); + rmm::default_logger().sinks().pop_back(); } } // namespace From c9c6039ab71f91fb41376abea7ec36b8a2563de1 Mon Sep 17 00:00:00 2001 From: Jake Awe <50372925+AyodeAwe@users.noreply.github.com> Date: Tue, 26 Nov 2024 15:27:22 -0600 Subject: [PATCH 6/9] Add breaking change workflow trigger (#1719) * add breaking change notifier [skip ci] * test commit * use target --- .../trigger-breaking-change-alert.yaml | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 .github/workflows/trigger-breaking-change-alert.yaml diff --git a/.github/workflows/trigger-breaking-change-alert.yaml b/.github/workflows/trigger-breaking-change-alert.yaml new file mode 100644 index 000000000..3b972f31c --- /dev/null +++ b/.github/workflows/trigger-breaking-change-alert.yaml @@ -0,0 +1,26 @@ +name: Trigger Breaking Change Notifications + +on: + pull_request_target: + types: + - closed + - reopened + - labeled + - unlabeled + +jobs: + trigger-notifier: + if: contains(github.event.pull_request.labels.*.name, 'breaking') + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/breaking-change-alert.yaml@branch-24.12 + with: + sender_login: ${{ github.event.sender.login }} + sender_avatar: ${{ github.event.sender.avatar_url }} + repo: ${{ github.repository }} + pr_number: ${{ github.event.pull_request.number }} + pr_title: "${{ github.event.pull_request.title }}" + pr_body: "${{ github.event.pull_request.body || '_Empty PR description_' }}" + pr_base_ref: ${{ github.event.pull_request.base.ref }} + pr_author: ${{ github.event.pull_request.user.login }} + event_action: ${{ github.event.action }} + pr_merged: ${{ github.event.pull_request.merged }} From a6a455dc23404b531db8271aaf84b688b2ffabc9 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Tue, 26 Nov 2024 16:38:51 -0600 Subject: [PATCH 7/9] Require approval to run CI on draft PRs (#1737) By default, CI runs on draft PRs. This leads to many CI runs that may be unnecessary. With this PR's change to `.github/copy-pr-bot.yaml`, an `/ok to test` comment from a trusted user is required to trigger CI on draft PRs. Non-draft PRs will run CI by default, assuming that all commits are signed by trusted users. Otherwise an `/ok to test` is required (as before) -- see the `copy-pr-bot` docs at https://docs.gha-runners.nvidia.com/apps/copy-pr-bot/ for more information. Part of https://github.com/rapidsai/build-planning/issues/123. Authors: - Bradley Dice (https://github.com/bdice) Approvers: - James Lamb (https://github.com/jameslamb) URL: https://github.com/rapidsai/rmm/pull/1737 --- .github/copy-pr-bot.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/copy-pr-bot.yaml b/.github/copy-pr-bot.yaml index 895ba83ee..e0ea775aa 100644 --- a/.github/copy-pr-bot.yaml +++ b/.github/copy-pr-bot.yaml @@ -2,3 +2,4 @@ # https://docs.gha-runners.nvidia.com/apps/copy-pr-bot/ enabled: true +auto_sync_draft: false From f9b9f843466b2dcd8872f1707d274ad15be37324 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 26 Nov 2024 15:45:55 -0800 Subject: [PATCH 8/9] Use consistent signature for target_link_libraries (#1738) It looks like while #1722 introduced usage of the modern target_link_libraries syntax it did not adjust all other calls because I wasn't setting up coverage usage locally or anywhere else in CI. Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Kyle Edwards (https://github.com/KyleFromNVIDIA) - Robert Maynard (https://github.com/robertmaynard) URL: https://github.com/rapidsai/rmm/pull/1738 --- tests/CMakeLists.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index b718691ca..413f27f26 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -69,7 +69,7 @@ function(ConfigureTestInternal TEST_NAME) ${TEST_NAME} PUBLIC $<$:-O0 --coverage -fprofile-abs-path -fkeep-inline-functions -fno-elide-constructors>) target_link_options(${TEST_NAME} PRIVATE --coverage) - target_link_libraries(${TEST_NAME} gcov) + target_link_libraries(${TEST_NAME} PRIVATE gcov) endif() # Add coverage-generated files to clean target @@ -111,13 +111,13 @@ function(ConfigureTest TEST_NAME) # Test with legacy default stream. ConfigureTestInternal(${TEST_NAME} ${_RMM_TEST_UNPARSED_ARGUMENTS}) - target_link_libraries(${TEST_NAME} ${cudart_link_libs}) + target_link_libraries(${TEST_NAME} PRIVATE ${cudart_link_libs}) # Test with per-thread default stream. string(REGEX REPLACE "_TEST$" "_PTDS_TEST" PTDS_TEST_NAME "${TEST_NAME}") ConfigureTestInternal("${PTDS_TEST_NAME}" ${_RMM_TEST_UNPARSED_ARGUMENTS}) target_compile_definitions("${PTDS_TEST_NAME}" PUBLIC CUDA_API_PER_THREAD_DEFAULT_STREAM) - target_link_libraries(${PTDS_TEST_NAME} ${cudart_link_libs}) + target_link_libraries(${PTDS_TEST_NAME} PRIVATE ${cudart_link_libs}) foreach(name ${TEST_NAME} ${PTDS_TEST_NAME} ${NS_TEST_NAME}) rapids_test_add( From d4066fa611c803430c9bc5dbe8e243f89bb9a25c Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 27 Nov 2024 06:42:20 -0800 Subject: [PATCH 9/9] Fix some logger issues (#1739) This fixes a handful of issues uncovered in downstream CI after #1722. The following are bugs introduced in #1722: - When using rmm from the build directory rather than an installation, the namespaced targets are not present so we must generate aliases. - The `RMM_LOGGING_ASSERT` macro is never used in rmm itself, so we didn't catch that it was still using the old version of the logger. While fixing the above, I also uncovered that building fmt in this environment unearths a gcc bug. The following are underlying issues uncovered by #1722: - spdlog's fmt CMake linkage is determined at build time. As a result, the conda package for spdlog is hardcoded to use fmt as a library (static or shared depends on what the `fmt::fmt` target winds up being when a consumer using spdlog finds fmt in CMake), which means that is propagated to all consumers of the librmm package via its CMake. This means that we often wind up with both fmt_header_only and fmt as link targets for many RAPIDS libraries. For now, this PR makes it so that if `rapids_cpm_find(spdlog)` does not find a copy of spdlog locally, the cloned version will use an external header-only fmt via rapids-cmake's logic, which ensures that packages like wheels do not export a libfmt or libspdlog dependency. However, in environments where `rapids_cpm_find(spdlog)` does find a preexisting package, we allow that package's fmt linkage to propagate. In conda environments, we know that this fmt linkage is to the library, so we keep fmt as part of rmm's runtime dependencies (by placing it in host and relying on the run export) so that libfmt is always available in environments using rmm. Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/rmm/pull/1739 --- CMakeLists.txt | 2 ++ ci/check_symbols.sh | 4 +++- cmake/thirdparty/get_spdlog.cmake | 8 ++++++++ conda/environments/all_cuda-118_arch-x86_64.yaml | 1 - conda/environments/all_cuda-125_arch-x86_64.yaml | 1 - conda/recipes/librmm/meta.yaml | 14 +++++++------- dependencies.yaml | 1 - include/rmm/detail/logging_assert.hpp | 2 +- rapids_logger/logger_impl.hpp.in | 6 ++++++ 9 files changed, 27 insertions(+), 12 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7a7ba3525..8c5bd5428 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -75,6 +75,8 @@ rapids_cpm_init() add_subdirectory(rapids_logger) rapids_make_logger(rmm EXPORT_SET rmm-exports) +add_library(rmm::rmm_logger ALIAS rmm_logger) +add_library(rmm::rmm_logger_impl ALIAS rmm_logger_impl) include(cmake/thirdparty/get_cccl.cmake) include(cmake/thirdparty/get_nvtx.cmake) diff --git a/ci/check_symbols.sh b/ci/check_symbols.sh index 155e509da..377a93cac 100755 --- a/ci/check_symbols.sh +++ b/ci/check_symbols.sh @@ -53,7 +53,9 @@ for dso_file in ${dso_files}; do fi echo "checking for 'spdlog::' symbols..." - if grep -E 'spdlog\:\:' < "${symbol_file}"; then + if grep -E 'spdlog\:\:' < "${symbol_file}" \ + | grep -v 'std\:\:_Destroy_aux' + then raise-symbols-found-error 'spdlog::' fi echo "No symbol visibility issues found" diff --git a/cmake/thirdparty/get_spdlog.cmake b/cmake/thirdparty/get_spdlog.cmake index 212f604c3..febdf4c5c 100644 --- a/cmake/thirdparty/get_spdlog.cmake +++ b/cmake/thirdparty/get_spdlog.cmake @@ -17,6 +17,14 @@ function(find_and_configure_spdlog) include(${rapids-cmake-dir}/cpm/spdlog.cmake) rapids_cpm_spdlog( + # The conda package for fmt is hard-coded to assume that we use a preexisting fmt library. This + # is why we have always had a libfmt linkage despite choosing to specify the header-only version + # of fmt. We need a more robust way of modifying this to support fully self-contained build and + # usage even in environments where fmt and/or spdlog are already present. The crudest solution + # would be to modify the interface compile definitions and link libraries of the spdlog target, + # if necessary. For now I'm specifying EXTERNAL_FMT_HO here so that in environments where spdlog + # is cloned and built from source we wind up with the behavior that we expect, but we'll have to + # resolve this properly eventually. FMT_OPTION "EXTERNAL_FMT_HO" INSTALL_EXPORT_SET rmm-exports BUILD_EXPORT_SET rmm-exports) diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 519c056b5..ad2cbf9e6 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -35,7 +35,6 @@ dependencies: - python>=3.10,<3.13 - rapids-build-backend>=0.3.0,<0.4.0.dev0 - scikit-build-core >=0.10.0 -- spdlog>=1.14.1,<1.15 - sphinx - sphinx-copybutton - sphinx-markdown-tables diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index 86e887c21..520c7d743 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -34,7 +34,6 @@ dependencies: - python>=3.10,<3.13 - rapids-build-backend>=0.3.0,<0.4.0.dev0 - scikit-build-core >=0.10.0 -- spdlog>=1.14.1,<1.15 - sphinx - sphinx-copybutton - sphinx-markdown-tables diff --git a/conda/recipes/librmm/meta.yaml b/conda/recipes/librmm/meta.yaml index 53e16ebdc..31aaf0e63 100644 --- a/conda/recipes/librmm/meta.yaml +++ b/conda/recipes/librmm/meta.yaml @@ -26,13 +26,13 @@ requirements: - {{ stdlib("c") }} host: - cuda-version ={{ cuda_version }} - # We require spdlog and fmt (which was de-vendored from spdlog - # conda-forge packages in 1.11.0) so that the spdlog headers are not - # pulled by CPM and installed as a part of the rmm packages. However, - # building against librmm still requires these headers. They are also - # added as a run requirement via the packages' run_exports. + # We need fmt here for now because the conda spdlog package is hard-coded + # to use fmt as a compiled library, not header-only, so we must ensure that + # the library is present for now so that if a downstream library tries to + # build against rmm and some other package in its build environment uses + # fmt (or spdlog) that the default rmm build is consistent with such + # environments. - fmt {{ fmt_version }} - - spdlog {{ spdlog_version }} build: script_env: @@ -77,8 +77,8 @@ outputs: {% if cuda_major == "11" %} - cudatoolkit {% endif %} + # See comment about fmt in the build section above. - fmt {{ fmt_version }} - - spdlog {{ spdlog_version }} test: commands: - test -d "${PREFIX}/include/rmm" diff --git a/dependencies.yaml b/dependencies.yaml index 070248edb..f92268639 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -112,7 +112,6 @@ dependencies: - c-compiler - cxx-compiler - fmt>=11.0.2,<12 - - spdlog>=1.14.1,<1.15 specific: - output_types: conda matrices: diff --git a/include/rmm/detail/logging_assert.hpp b/include/rmm/detail/logging_assert.hpp index c3b12ffe3..d5b2ca10a 100644 --- a/include/rmm/detail/logging_assert.hpp +++ b/include/rmm/detail/logging_assert.hpp @@ -38,7 +38,7 @@ if (!success) { \ RMM_LOG_CRITICAL( \ "[" __FILE__ ":" RMM_STRINGIFY(__LINE__) "] Assertion " RMM_STRINGIFY(_expr) " failed."); \ - rmm::detail::logger().flush(); \ + rmm::default_logger().flush(); \ /* NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-array-to-pointer-decay) */ \ assert(success); \ } \ diff --git a/rapids_logger/logger_impl.hpp.in b/rapids_logger/logger_impl.hpp.in index 717a00ac9..d5b467571 100644 --- a/rapids_logger/logger_impl.hpp.in +++ b/rapids_logger/logger_impl.hpp.in @@ -26,10 +26,16 @@ // Start hiding before including spdlog headers. #pragma GCC visibility push(hidden) +// This issue claims to have been resolved in gcc 8, but we still seem to encounter it here. +// The code compiles and links and all tests pass, and nm shows symbols resolved as expected. +// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80947 +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wattributes" #include #include #include +#pragma GCC diagnostic pop #include #include