diff --git a/.clang-tidy b/.clang-tidy new file mode 100644 index 000000000..04689c330 --- /dev/null +++ b/.clang-tidy @@ -0,0 +1,67 @@ +--- +Checks: 'clang-diagnostic-*, + clang-analyzer-*, + cppcoreguidelines-*, + modernize-*, + bugprone-*, + performance-*, + readability-*, + llvm-*, + -cppcoreguidelines-macro-usage, + -llvm-header-guard, + -modernize-use-trailing-return-type, + -readability-named-parameter' +WarningsAsErrors: '' +HeaderFilterRegex: '' +AnalyzeTemporaryDtors: false +FormatStyle: none +CheckOptions: + - key: cert-dcl16-c.NewSuffixes + value: 'L;LL;LU;LLU' + - key: cert-oop54-cpp.WarnOnlyIfThisHasSuspiciousField + value: '0' + - key: cert-str34-c.DiagnoseSignedUnsignedCharComparisons + value: '0' + - key: cppcoreguidelines-explicit-virtual-functions.IgnoreDestructors + value: '1' + - key: cppcoreguidelines-non-private-member-variables-in-classes.IgnoreClassesWithAllMemberVariablesBeingPublic + value: '1' + - key: google-readability-braces-around-statements.ShortStatementLines + value: '1' + - key: google-readability-function-size.StatementThreshold + value: '800' + - key: google-readability-namespace-comments.ShortNamespaceLines + value: '10' + - key: google-readability-namespace-comments.SpacesBeforeComments + value: '2' + - key: llvm-else-after-return.WarnOnConditionVariables + value: '0' + - key: llvm-else-after-return.WarnOnUnfixable + value: '0' + - key: llvm-qualified-auto.AddConstToQualified + value: '0' + - key: modernize-loop-convert.MaxCopySize + value: '16' + - key: modernize-loop-convert.MinConfidence + value: reasonable + - key: modernize-loop-convert.NamingStyle + value: CamelCase + - key: modernize-pass-by-value.IncludeStyle + value: llvm + - key: modernize-replace-auto-ptr.IncludeStyle + value: llvm + - key: modernize-use-nullptr.NullMacros + value: 'NULL' + - key: readability-identifier-length.IgnoredParameterNames + value: 'mr|os' + - key: readability-identifier-length.IgnoredVariableNames + value: 'mr|_' + #- key: readability-function-cognitive-complexity.IgnoreMacros + # value: '1' + - key: bugprone-easily-swappable-parameters.IgnoredParameterNames + value: 'alignment' + - key: cppcoreguidelines-avoid-magic-numbers.IgnorePowersOf2IntegerValues + value: '1' + - key: readability-magic-numbers.IgnorePowersOf2IntegerValues + value: '1' +... diff --git a/benchmarks/cuda_stream_pool/cuda_stream_pool_bench.cpp b/benchmarks/cuda_stream_pool/cuda_stream_pool_bench.cpp index a536077f9..6710ffe50 100644 --- a/benchmarks/cuda_stream_pool/cuda_stream_pool_bench.cpp +++ b/benchmarks/cuda_stream_pool/cuda_stream_pool_bench.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,36 +14,36 @@ * limitations under the License. */ -#include - #include #include #include +#include + #include static void BM_StreamPoolGetStream(benchmark::State& state) { rmm::cuda_stream_pool stream_pool{}; - for (auto _ : state) { - auto s = stream_pool.get_stream(); - cudaStreamQuery(s.value()); + for (auto _ : state) { // NOLINT(clang-analyzer-deadcode.DeadStores) + auto stream = stream_pool.get_stream(); + cudaStreamQuery(stream.value()); } - state.SetItemsProcessed(state.iterations()); + state.SetItemsProcessed(static_cast(state.iterations())); } BENCHMARK(BM_StreamPoolGetStream)->Unit(benchmark::kMicrosecond); static void BM_CudaStreamClass(benchmark::State& state) { - for (auto _ : state) { - auto s = rmm::cuda_stream{}; - cudaStreamQuery(s.view().value()); + for (auto _ : state) { // NOLINT(clang-analyzer-deadcode.DeadStores) + auto stream = rmm::cuda_stream{}; + cudaStreamQuery(stream.view().value()); } - state.SetItemsProcessed(state.iterations()); + state.SetItemsProcessed(static_cast(state.iterations())); } BENCHMARK(BM_CudaStreamClass)->Unit(benchmark::kMicrosecond); diff --git a/benchmarks/device_uvector/device_uvector_bench.cu b/benchmarks/device_uvector/device_uvector_bench.cu index 01d81c55d..7e73451e6 100644 --- a/benchmarks/device_uvector/device_uvector_bench.cu +++ b/benchmarks/device_uvector/device_uvector_bench.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,33 +14,35 @@ * limitations under the License. */ -#include - -#include #include #include #include #include #include +#include + +#include + static void BM_UvectorSizeConstruction(benchmark::State& state) { rmm::mr::cuda_memory_resource cuda_mr{}; rmm::mr::pool_memory_resource mr{&cuda_mr}; rmm::mr::set_current_device_resource(&mr); - for (auto _ : state) { + for (auto _ : state) { // NOLINT(clang-analyzer-deadcode.DeadStores) rmm::device_uvector vec(state.range(0), rmm::cuda_stream_view{}); cudaDeviceSynchronize(); } - state.SetItemsProcessed(state.iterations()); + state.SetItemsProcessed(static_cast(state.iterations())); rmm::mr::set_current_device_resource(nullptr); } + BENCHMARK(BM_UvectorSizeConstruction) - ->RangeMultiplier(10) - ->Range(10'000, 1'000'000'000) + ->RangeMultiplier(10) // NOLINT + ->Range(10'000, 1'000'000'000) // NOLINT ->Unit(benchmark::kMicrosecond); static void BM_ThrustVectorSizeConstruction(benchmark::State& state) @@ -49,19 +51,19 @@ static void BM_ThrustVectorSizeConstruction(benchmark::State& state) rmm::mr::pool_memory_resource mr{&cuda_mr}; rmm::mr::set_current_device_resource(&mr); - for (auto _ : state) { + for (auto _ : state) { // NOLINT(clang-analyzer-deadcode.DeadStores) rmm::device_vector vec(state.range(0)); cudaDeviceSynchronize(); } - state.SetItemsProcessed(state.iterations()); + state.SetItemsProcessed(static_cast(state.iterations())); rmm::mr::set_current_device_resource(nullptr); } BENCHMARK(BM_ThrustVectorSizeConstruction) - ->RangeMultiplier(10) - ->Range(10'000, 1'000'000'000) + ->RangeMultiplier(10) // NOLINT + ->Range(10'000, 1'000'000'000) // NOLINT ->Unit(benchmark::kMicrosecond); BENCHMARK_MAIN(); diff --git a/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu b/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu index 7d0a8a17a..5ed1b31f9 100644 --- a/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu +++ b/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu @@ -16,8 +16,6 @@ #include -#include - #include #include #include @@ -31,15 +29,18 @@ #include +#include + #include __global__ void compute_bound_kernel(int64_t* out) { clock_t clock_begin = clock64(); clock_t clock_current = clock_begin; + auto const million{1'000'000}; - if (threadIdx.x == 0) { - while (clock_current - clock_begin < 1000000) { + if (threadIdx.x == 0) { // NOLINT(readability-static-accessed-through-instance) + while (clock_current - clock_begin < million) { clock_current = clock64(); } } @@ -69,7 +70,7 @@ static void run_test(std::size_t num_kernels, } } -static void BM_MultiStreamAllocations(benchmark::State& state, MRFactoryFunc factory) +static void BM_MultiStreamAllocations(benchmark::State& state, MRFactoryFunc const& factory) { auto mr = factory(); @@ -77,18 +78,18 @@ static void BM_MultiStreamAllocations(benchmark::State& state, MRFactoryFunc fac auto num_streams = state.range(0); auto num_kernels = state.range(1); - auto do_prewarm = state.range(2); + bool do_prewarm = state.range(2) != 0; auto stream_pool = rmm::cuda_stream_pool(num_streams); if (do_prewarm) { run_prewarm(stream_pool, mr.get()); } - for (auto _ : state) { + for (auto _ : state) { // NOLINT(clang-analyzer-deadcode.DeadStores) run_test(num_kernels, stream_pool, mr.get()); cudaDeviceSynchronize(); } - state.SetItemsProcessed(state.iterations() * num_kernels); + state.SetItemsProcessed(static_cast(state.iterations() * num_kernels)); rmm::mr::set_current_device_resource(nullptr); } @@ -112,19 +113,22 @@ inline auto make_binning() auto pool = make_pool(); // Add a binning_memory_resource with fixed-size bins of sizes 256, 512, 1024, 2048 and 4096KiB // Larger allocations will use the pool resource - auto mr = rmm::mr::make_owning_wrapper(pool, 18, 22); + constexpr auto min_bin_pow2{18}; + constexpr auto max_bin_pow2{22}; + auto mr = rmm::mr::make_owning_wrapper( + pool, min_bin_pow2, max_bin_pow2); return mr; } -static void benchmark_range(benchmark::internal::Benchmark* b) +static void benchmark_range(benchmark::internal::Benchmark* bench) { - b // + bench // ->RangeMultiplier(2) ->Ranges({{1, 4}, {4, 4}, {false, true}}) ->Unit(benchmark::kMicrosecond); } -MRFactoryFunc get_mr_factory(std::string resource_name) +MRFactoryFunc get_mr_factory(std::string const& resource_name) { if (resource_name == "cuda") { return &make_cuda; } #ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT @@ -139,7 +143,7 @@ MRFactoryFunc get_mr_factory(std::string resource_name) RMM_FAIL(); } -void declare_benchmark(std::string name) +void declare_benchmark(std::string const& name) { if (name == "cuda") { BENCHMARK_CAPTURE(BM_MultiStreamAllocations, cuda, &make_cuda) // @@ -176,7 +180,8 @@ void declare_benchmark(std::string name) std::cout << "Error: invalid memory_resource name: " << name << std::endl; } -void run_profile(std::string resource_name, int kernel_count, int stream_count, bool prewarm) +// NOLINTNEXTLINE(bugprone-easily-swappable-parameters) +void run_profile(std::string const& resource_name, int kernel_count, int stream_count, bool prewarm) { auto mr_factory = get_mr_factory(resource_name); auto mr = mr_factory(); @@ -189,65 +194,75 @@ void run_profile(std::string resource_name, int kernel_count, int stream_count, int main(int argc, char** argv) { - ::benchmark::Initialize(&argc, argv); - - // Parse for replay arguments: - cxxopts::Options options( - "RMM Multi Stream Allocations Benchmark", - "Benchmarks interleaving temporary allocations with compute-bound kernels."); - - options.add_options()( // - "p,profile", - "Profiling mode: run once", - cxxopts::value()->default_value("false")); - - options.add_options()( // - "r,resource", - "Type of device_memory_resource", - cxxopts::value()->default_value("pool")); - - options.add_options()( // - "k,kernels", - "Number of kernels to run: (default: 8)", - cxxopts::value()->default_value("8")); - - options.add_options()( // - "s,streams", - "Number of streams in stream pool (default: 8)", - cxxopts::value()->default_value("8")); - - options.add_options()( // - "w,warm", - "Ensure each stream has enough memory to satisfy allocations.", - cxxopts::value()->default_value("false")); - - auto args = options.parse(argc, argv); - - if (args.count("profile") > 0) { - auto resource_name = args["resource"].as(); - auto num_kernels = args["kernels"].as(); - auto num_streams = args["streams"].as(); - auto prewarm = args["warm"].as(); - run_profile(resource_name, num_kernels, num_streams, prewarm); - } else { - auto resource_names = std::vector(); - - if (args.count("resource") > 0) { - resource_names.emplace_back(args["resource"].as()); + try { + ::benchmark::Initialize(&argc, argv); + + // Parse for replay arguments: + cxxopts::Options options( + "RMM Multi Stream Allocations Benchmark", + "Benchmarks interleaving temporary allocations with compute-bound kernels."); + + options.add_options()( // + "p,profile", + "Profiling mode: run once", + cxxopts::value()->default_value("false")); + + options.add_options()( // + "r,resource", + "Type of device_memory_resource", + cxxopts::value()->default_value("pool")); + + options.add_options()( // + "k,kernels", + "Number of kernels to run: (default: 8)", + cxxopts::value()->default_value("8")); + + options.add_options()( // + "s,streams", + "Number of streams in stream pool (default: 8)", + cxxopts::value()->default_value("8")); + + options.add_options()( // + "w,warm", + "Ensure each stream has enough memory to satisfy allocations.", + cxxopts::value()->default_value("false")); + + auto args = options.parse(argc, argv); + + if (args.count("profile") > 0) { + auto resource_name = args["resource"].as(); + auto num_kernels = args["kernels"].as(); + auto num_streams = args["streams"].as(); + auto prewarm = args["warm"].as(); + try { + run_profile(resource_name, num_kernels, num_streams, prewarm); + } catch (std::exception const& e) { + std::cout << "Exception caught: " << e.what() << std::endl; + } } else { - resource_names.emplace_back("cuda"); + auto resource_names = std::vector(); + + if (args.count("resource") > 0) { + resource_names.emplace_back(args["resource"].as()); + } else { + resource_names.emplace_back("cuda"); #ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT - resource_names.emplace_back("cuda_async"); + resource_names.emplace_back("cuda_async"); #endif - resource_names.emplace_back("pool"); - resource_names.emplace_back("arena"); - resource_names.emplace_back("binning"); - } + resource_names.emplace_back("pool"); + resource_names.emplace_back("arena"); + resource_names.emplace_back("binning"); + } - for (auto& resource_name : resource_names) { - declare_benchmark(resource_name); - } + for (auto& resource_name : resource_names) { + declare_benchmark(resource_name); + } - ::benchmark::RunSpecifiedBenchmarks(); + ::benchmark::RunSpecifiedBenchmarks(); + } + } catch (std::exception const& e) { + std::cout << "Exception caught: " << e.what() << std::endl; } + + return 0; } diff --git a/benchmarks/random_allocations/random_allocations.cpp b/benchmarks/random_allocations/random_allocations.cpp index a69b26b91..828561dd1 100644 --- a/benchmarks/random_allocations/random_allocations.cpp +++ b/benchmarks/random_allocations/random_allocations.cpp @@ -38,9 +38,9 @@ namespace { constexpr std::size_t size_mb{1 << 20}; struct allocation { - void* p{nullptr}; + void* ptr{nullptr}; std::size_t size{0}; - allocation(void* _p, std::size_t _size) : p{_p}, size{_size} {} + allocation(void* ptr, std::size_t size) : ptr{ptr}, size{size} {} allocation() = default; }; @@ -70,9 +70,10 @@ void random_allocation_free(rmm::mr::device_memory_resource& mr, max_usage *= size_mb; // convert to bytes - constexpr int allocation_probability = 73; // percent - std::uniform_int_distribution op_distribution(0, 99); - std::uniform_int_distribution index_distribution(0, num_allocations - 1); + constexpr int allocation_probability{73}; // percent + constexpr int max_op_chance{99}; + std::uniform_int_distribution op_distribution(0, max_op_chance); + std::uniform_int_distribution index_distribution(0, static_cast(num_allocations) - 1); int active_allocations{0}; std::size_t allocation_count{0}; @@ -117,7 +118,7 @@ void random_allocation_free(rmm::mr::device_memory_resource& mr, std::size_t index = index_distribution(generator) % active_allocations; active_allocations--; allocation to_free = remove_at(allocations, index); - mr.deallocate(to_free.p, to_free.size, stream); + mr.deallocate(to_free.ptr, to_free.size, stream); allocation_size -= to_free.size; #if VERBOSE @@ -136,11 +137,12 @@ void random_allocation_free(rmm::mr::device_memory_resource& mr, } } // namespace -void uniform_random_allocations(rmm::mr::device_memory_resource& mr, - std::size_t num_allocations, - std::size_t max_allocation_size, // in MiB - std::size_t max_usage, - rmm::cuda_stream_view stream = {}) +void uniform_random_allocations( + rmm::mr::device_memory_resource& mr, + std::size_t num_allocations, // NOLINT(bugprone-easily-swappable-parameters) + std::size_t max_allocation_size, // size in MiB + std::size_t max_usage, + rmm::cuda_stream_view stream = {}) { std::uniform_int_distribution size_distribution(1, max_allocation_size * size_mb); random_allocation_free(mr, size_distribution, num_allocations, max_usage, stream); @@ -176,7 +178,10 @@ inline auto make_binning() auto pool = make_pool(); // Add a binning_memory_resource with fixed-size bins of sizes 256, 512, 1024, 2048 and 4096KiB // Larger allocations will use the pool resource - auto mr = rmm::mr::make_owning_wrapper(pool, 18, 22); + constexpr auto min_bin_pow2{18}; + constexpr auto max_bin_pow2{22}; + auto mr = rmm::mr::make_owning_wrapper( + pool, min_bin_pow2, max_bin_pow2); return mr; } @@ -184,7 +189,7 @@ using MRFactoryFunc = std::function{1000, 10000, 100000}) - b->Args({num_allocations, size})->Unit(benchmark::kMillisecond); + for (int num_allocations : std::vector{1000, 10000, 100000}) { + bench->Args({num_allocations, size})->Unit(benchmark::kMillisecond); + } } -static void size_range(benchmark::internal::Benchmark* b, int num) +static void size_range(benchmark::internal::Benchmark* bench, int num) { - for (int max_size : std::vector{1, 4, 64, 256, 1024, 4096}) - b->Args({num, max_size})->Unit(benchmark::kMillisecond); + for (int max_size : std::vector{1, 4, 64, 256, 1024, 4096}) { + bench->Args({num, max_size})->Unit(benchmark::kMillisecond); + } } -static void num_size_range(benchmark::internal::Benchmark* b) +static void num_size_range(benchmark::internal::Benchmark* bench) { - for (int num_allocations : std::vector{1000, 10000, 100000}) - size_range(b, num_allocations); + for (int num_allocations : std::vector{1000, 10000, 100000}) { + size_range(bench, num_allocations); + } } -int num_allocations = -1; -int max_size = -1; +int num_allocations = -1; // NOLINT(cppcoreguidelines-avoid-non-const-global-variables) +int max_size = -1; // NOLINT(cppcoreguidelines-avoid-non-const-global-variables) -static void benchmark_range(benchmark::internal::Benchmark* b) +void benchmark_range(benchmark::internal::Benchmark* bench) { if (num_allocations > 0) { - if (max_size > 0) - b->Args({num_allocations, max_size})->Unit(benchmark::kMillisecond); - else - size_range(b, num_allocations); + if (max_size > 0) { + bench->Args({num_allocations, max_size})->Unit(benchmark::kMillisecond); + } else { + size_range(bench, num_allocations); + } } else { - if (max_size > 0) - num_range(b, max_size); - else - num_size_range(b); + if (max_size > 0) { + num_range(bench, max_size); + } else { + num_size_range(bench); + } } } -void declare_benchmark(std::string name) +void declare_benchmark(std::string const& name) { - if (name == "cuda") - BENCHMARK_CAPTURE(BM_RandomAllocations, cuda_mr, &make_cuda)->Apply(benchmark_range); - if (name == "cuda_async") - BENCHMARK_CAPTURE(BM_RandomAllocations, cuda_async_mr, &make_cuda_async) + if (name == "cuda") { + BENCHMARK_CAPTURE(BM_RandomAllocations, cuda_mr, &make_cuda) // NOLINT + ->Apply(benchmark_range); + } + if (name == "cuda_async") { + BENCHMARK_CAPTURE(BM_RandomAllocations, cuda_async_mr, &make_cuda_async) // NOLINT + ->Apply(benchmark_range); + } else if (name == "binning") { + BENCHMARK_CAPTURE(BM_RandomAllocations, binning_mr, &make_binning) // NOLINT ->Apply(benchmark_range); - else if (name == "binning") - BENCHMARK_CAPTURE(BM_RandomAllocations, binning_mr, &make_binning)->Apply(benchmark_range); - else if (name == "pool") - BENCHMARK_CAPTURE(BM_RandomAllocations, pool_mr, &make_pool)->Apply(benchmark_range); - else if (name == "arena") - BENCHMARK_CAPTURE(BM_RandomAllocations, arena_mr, &make_arena)->Apply(benchmark_range); - else + } else if (name == "pool") { + BENCHMARK_CAPTURE(BM_RandomAllocations, pool_mr, &make_pool) // NOLINT + ->Apply(benchmark_range); + } else if (name == "arena") { + BENCHMARK_CAPTURE(BM_RandomAllocations, arena_mr, &make_arena) // NOLINT + ->Apply(benchmark_range); + } else { std::cout << "Error: invalid memory_resource name: " << name << "\n"; + } } -static void profile_random_allocations(MRFactoryFunc factory, +static void profile_random_allocations(MRFactoryFunc const& factory, std::size_t num_allocations, std::size_t max_size) { @@ -267,66 +284,73 @@ static void profile_random_allocations(MRFactoryFunc factory, int main(int argc, char** argv) { - // benchmark::Initialize will remove GBench command line arguments it - // recognizes and leave any remaining arguments - ::benchmark::Initialize(&argc, argv); - - // Parse for replay arguments: - cxxopts::Options options("RMM Random Allocations Benchmark", - "Benchmarks random allocations within a size range."); - - options.add_options()( - "p,profile", "Profiling mode: run once", cxxopts::value()->default_value("false")); - options.add_options()("r,resource", - "Type of device_memory_resource", - cxxopts::value()->default_value("pool")); - options.add_options()("n,numallocs", - "Number of allocations (default of 0 tests a range)", - cxxopts::value()->default_value("1000")); - options.add_options()("m,maxsize", - "Maximum allocation size (default of 0 tests a range)", - cxxopts::value()->default_value("4096")); - - auto args = options.parse(argc, argv); - num_allocations = args["numallocs"].as(); - max_size = args["maxsize"].as(); - - if (args.count("profile") > 0) { - std::map const funcs({{"arena", &make_arena}, - {"binning", &make_binning}, - {"cuda", &make_cuda}, + try { + // benchmark::Initialize will remove GBench command line arguments it + // recognizes and leave any remaining arguments + ::benchmark::Initialize(&argc, argv); + + // Parse for replay arguments: + cxxopts::Options options("RMM Random Allocations Benchmark", + "Benchmarks random allocations within a size range."); + + options.add_options()( + "p,profile", "Profiling mode: run once", cxxopts::value()->default_value("false")); + options.add_options()("r,resource", + "Type of device_memory_resource", + cxxopts::value()->default_value("pool")); + options.add_options()("n,numallocs", + "Number of allocations (default of 0 tests a range)", + cxxopts::value()->default_value("1000")); + options.add_options()("m,maxsize", + "Maximum allocation size (default of 0 tests a range)", + cxxopts::value()->default_value("4096")); + + auto args = options.parse(argc, argv); + num_allocations = args["numallocs"].as(); + max_size = args["maxsize"].as(); + + if (args.count("profile") > 0) { + std::map const funcs({{"arena", &make_arena}, + {"binning", &make_binning}, + {"cuda", &make_cuda}, #ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT - {"cuda_async", &make_cuda_async}, + {"cuda_async", &make_cuda_async}, #endif - {"pool", &make_pool}}); - auto resource = args["resource"].as(); + {"pool", &make_pool}}); + auto resource = args["resource"].as(); - std::cout << "Profiling " << resource << " with " << num_allocations << " allocations of max " - << max_size << "B\n"; + std::cout << "Profiling " << resource << " with " << num_allocations << " allocations of max " + << max_size << "B\n"; - profile_random_allocations(funcs.at(resource), num_allocations, max_size); + profile_random_allocations(funcs.at(resource), num_allocations, max_size); - std::cout << "Finished\n"; - } else { - if (args.count("numallocs") == 0) { // if zero reset to -1 so we benchmark over a range - num_allocations = -1; - } - if (args.count("maxsize") == 0) { // if zero reset to -1 so we benchmark over a range - max_size = -1; - } - - if (args.count("resource") > 0) { - std::string mr_name = args["resource"].as(); - declare_benchmark(mr_name); + std::cout << "Finished\n"; } else { + if (args.count("numallocs") == 0) { // if zero reset to -1 so we benchmark over a range + num_allocations = -1; + } + if (args.count("maxsize") == 0) { // if zero reset to -1 so we benchmark over a range + max_size = -1; + } + + if (args.count("resource") > 0) { + std::string mr_name = args["resource"].as(); + declare_benchmark(mr_name); + } else { #ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT - std::array mrs{"pool", "binning", "arena", "cuda_async", "cuda"}; + std::vector mrs{"pool", "binning", "arena", "cuda_async", "cuda"}; #else - std::array mrs{"pool", "binning", "arena", "cuda"}; + std::vector mrs{"pool", "binning", "arena", "cuda"}; #endif - std::for_each(std::cbegin(mrs), std::cend(mrs), [](auto const& s) { declare_benchmark(s); }); + std::for_each( + std::cbegin(mrs), std::cend(mrs), [](auto const& mr) { declare_benchmark(mr); }); + } + ::benchmark::RunSpecifiedBenchmarks(); } - ::benchmark::RunSpecifiedBenchmarks(); + + } catch (std::exception const& e) { + std::cout << "Exception caught: " << e.what() << std::endl; } + return 0; } diff --git a/benchmarks/replay/replay.cpp b/benchmarks/replay/replay.cpp index 6fbd5f2ab..aa8c077da 100644 --- a/benchmarks/replay/replay.cpp +++ b/benchmarks/replay/replay.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,7 +14,6 @@ * limitations under the License. */ -#include #include #include #include @@ -37,6 +36,7 @@ #include +#include #include #include #include @@ -57,25 +57,29 @@ std::shared_ptr make_simulated(std::size_t simu inline auto make_pool(std::size_t simulated_size) { - return simulated_size == 0 - ? rmm::mr::make_owning_wrapper(make_cuda()) - : rmm::mr::make_owning_wrapper( - make_simulated(simulated_size), simulated_size, simulated_size); + if (simulated_size > 0) { + return rmm::mr::make_owning_wrapper( + make_simulated(simulated_size), simulated_size, simulated_size); + } + return rmm::mr::make_owning_wrapper(make_cuda()); } inline auto make_arena(std::size_t simulated_size) { - return simulated_size == 0 - ? rmm::mr::make_owning_wrapper(make_cuda()) - : rmm::mr::make_owning_wrapper( - make_simulated(simulated_size), simulated_size, simulated_size); + if (simulated_size > 0) { + return rmm::mr::make_owning_wrapper( + make_simulated(simulated_size), simulated_size, simulated_size); + } + return rmm::mr::make_owning_wrapper(make_cuda()); } inline auto make_binning(std::size_t simulated_size) { auto pool = make_pool(simulated_size); auto mr = rmm::mr::make_owning_wrapper(pool); - for (std::size_t i = 18; i <= 22; i++) { + const auto min_size_exp{18}; + const auto max_size_exp{22}; + for (std::size_t i = min_size_exp; i <= max_size_exp; i++) { mr->wrapped().add_bin(1 << i); } return mr; @@ -89,8 +93,8 @@ using MRFactoryFunc = std::function> const& events) : factory_{std::move(factory)}, simulated_size_{simulated_size}, - mr_{}, events_{events}, - allocation_map{events.size()}, - event_index{0} + allocation_map{events.size()} { } @@ -144,12 +146,14 @@ struct replay_benchmark { simulated_size_{other.simulated_size_}, mr_{std::move(other.mr_)}, events_{other.events_}, - allocation_map{events_.size()}, - event_index{0} + allocation_map{std::move(other.allocation_map)} { } + ~replay_benchmark() = default; replay_benchmark(replay_benchmark const&) = delete; + replay_benchmark& operator=(replay_benchmark const&) = delete; + replay_benchmark& operator=(replay_benchmark&& other) noexcept = delete; /// Add an allocation to the map (NOT thread safe) void set_allocation(uintptr_t ptr, allocation alloc) { allocation_map.insert({ptr, alloc}); } @@ -159,9 +163,9 @@ struct replay_benchmark { { auto iter = allocation_map.find(ptr); if (iter != allocation_map.end()) { - allocation a = iter->second; + allocation alloc = iter->second; allocation_map.erase(iter); - return a; + return alloc; } return allocation{}; } @@ -187,11 +191,12 @@ struct replay_benchmark { auto alloc = ptr_alloc.second; num_leaked++; total_leaked += alloc.size; - mr_->deallocate(alloc.p, alloc.size); + mr_->deallocate(alloc.ptr, alloc.size); } - if (num_leaked > 0) + if (num_leaked > 0) { std::cout << "LOG shows leak of " << num_leaked << " allocations of " << total_leaked << " total bytes\n"; + } allocation_map.clear(); mr_.reset(); } @@ -204,20 +209,20 @@ struct replay_benchmark { auto const& my_events = events_.at(state.thread_index); - for (auto _ : state) { - std::for_each(my_events.begin(), my_events.end(), [&state, this](auto e) { + for (auto _ : state) { // NOLINT(clang-analyzer-deadcode.DeadStores) + std::for_each(my_events.begin(), my_events.end(), [this](auto event) { // ensure correct ordering between threads std::unique_lock lock{event_mutex}; - if (event_index != e.index) { - cv.wait(lock, [&]() { return event_index == e.index; }); + if (event_index != event.index) { + cv.wait(lock, [&]() { return event_index == event.index; }); } - if (rmm::detail::action::ALLOCATE == e.act) { - auto p = mr_->allocate(e.size); - set_allocation(e.pointer, allocation{p, e.size}); + if (rmm::detail::action::ALLOCATE == event.act) { + auto ptr = mr_->allocate(event.size); + set_allocation(event.pointer, allocation{ptr, event.size}); } else { - auto a = remove_allocation(e.pointer); - mr_->deallocate(a.p, e.size); + auto alloc = remove_allocation(event.pointer); + mr_->deallocate(alloc.ptr, event.size); } event_index++; @@ -242,11 +247,11 @@ std::vector> parse_per_thread_events(std::string RMM_EXPECTS(std::all_of(all_events.begin(), all_events.end(), - [](auto const& e) { - cudaStream_t cs; - memcpy(&cs, &e.stream, sizeof(cudaStream_t)); - auto s = rmm::cuda_stream_view{cs}; - return s.is_default() or s.is_per_thread_default(); + [](auto const& event) { + cudaStream_t custream; + memcpy(&custream, &event.stream, sizeof(cudaStream_t)); + auto stream = rmm::cuda_stream_view{custream}; + return stream.is_default() or stream.is_per_thread_default(); }), "Non-default streams not currently supported."); @@ -294,112 +299,128 @@ void declare_benchmark(std::string const& name, std::vector> const& per_thread_events, std::size_t num_threads) { - if (name == "cuda") + if (name == "cuda") { benchmark::RegisterBenchmark("CUDA Resource", replay_benchmark(&make_cuda, simulated_size, per_thread_events)) ->Unit(benchmark::kMillisecond) - ->Threads(num_threads); - else if (name == "binning") + ->Threads(static_cast(num_threads)); + } else if (name == "binning") { benchmark::RegisterBenchmark("Binning Resource", replay_benchmark(&make_binning, simulated_size, per_thread_events)) ->Unit(benchmark::kMillisecond) - ->Threads(num_threads); - else if (name == "pool") + ->Threads(static_cast(num_threads)); + } else if (name == "pool") { benchmark::RegisterBenchmark("Pool Resource", replay_benchmark(&make_pool, simulated_size, per_thread_events)) ->Unit(benchmark::kMillisecond) - ->Threads(num_threads); - else if (name == "arena") + ->Threads(static_cast(num_threads)); + } else if (name == "arena") { benchmark::RegisterBenchmark("Arena Resource", replay_benchmark(&make_arena, simulated_size, per_thread_events)) ->Unit(benchmark::kMillisecond) - ->Threads(num_threads); - else + ->Threads(static_cast(num_threads)); + } else { std::cout << "Error: invalid memory_resource name: " << name << "\n"; + } } // Usage: REPLAY_BENCHMARK -f "path/to/log/file" int main(int argc, char** argv) { - // benchmark::Initialize will remove GBench command line arguments it - // recognizes and leave any remaining arguments - ::benchmark::Initialize(&argc, argv); - - // Parse for replay arguments: - auto args = [&argc, &argv]() { - cxxopts::Options options( - "RMM Replay Benchmark", - "Replays and benchmarks allocation activity captured from RMM logging."); - - options.add_options()("f,file", "Name of RMM log file.", cxxopts::value()); - options.add_options()("r,resource", - "Type of device_memory_resource", - cxxopts::value()->default_value("pool")); - options.add_options()("s,size", - "Size of simulated GPU memory in GiB. Not supported for the cuda memory " - "resource.", - cxxopts::value()->default_value("0")); - options.add_options()("v,verbose", - "Enable verbose printing of log events", - cxxopts::value()->default_value("false")); - - auto args = options.parse(argc, argv); - - if (args.count("file") == 0) { - std::cout << options.help() << std::endl; - exit(0); - } + try { + // benchmark::Initialize will remove GBench command line arguments it + // recognizes and leave any remaining arguments + ::benchmark::Initialize(&argc, argv); + + // Parse for replay arguments: + auto args = [&argc, &argv]() { + cxxopts::Options options( + "RMM Replay Benchmark", + "Replays and benchmarks allocation activity captured from RMM logging."); + + options.add_options()("f,file", "Name of RMM log file.", cxxopts::value()); + options.add_options()("r,resource", + "Type of device_memory_resource", + cxxopts::value()->default_value("pool")); + options.add_options()( + "s,size", + "Size of simulated GPU memory in GiB. Not supported for the cuda memory " + "resource.", + cxxopts::value()->default_value("0")); + options.add_options()("v,verbose", + "Enable verbose printing of log events", + cxxopts::value()->default_value("false")); + + auto args = options.parse(argc, argv); + + if (args.count("file") == 0) { + std::cout << options.help() << std::endl; + exit(0); + } - return args; - }(); + return args; + }(); - auto filename = args["file"].as(); + auto filename = args["file"].as(); - auto per_thread_events = parse_per_thread_events(filename); + auto per_thread_events = [filename]() { + try { + auto events = parse_per_thread_events(filename); + return events; + } catch (std::exception const& e) { + std::cout << "Failed to parse events: " << e.what() << std::endl; + return std::vector>{}; + } + }(); #ifdef CUDA_API_PER_THREAD_DEFAULT_STREAM - std::cout << "Using CUDA per-thread default stream.\n"; + std::cout << "Using CUDA per-thread default stream.\n"; #endif - auto const simulated_size = - static_cast(args["size"].as() * static_cast(1u << 30u)); - if (simulated_size != 0 && args["resource"].as() != "cuda") { - std::cout << "Simulating GPU with memory size of " << simulated_size << " bytes.\n"; - } + auto const simulated_size = + static_cast(args["size"].as() * static_cast(1U << 30U)); + if (simulated_size != 0 && args["resource"].as() != "cuda") { + std::cout << "Simulating GPU with memory size of " << simulated_size << " bytes.\n"; + } - std::cout << "Total Events: " - << std::accumulate( - per_thread_events.begin(), - per_thread_events.end(), - 0, - [](std::size_t accum, auto const& events) { return accum + events.size(); }) - << std::endl; - - for (std::size_t t = 0; t < per_thread_events.size(); ++t) { - std::cout << "Thread " << t << ": " << per_thread_events[t].size() << " events\n"; - if (args["verbose"].as()) { - for (auto const& e : per_thread_events[t]) { - std::cout << e << std::endl; + std::cout << "Total Events: " + << std::accumulate( + per_thread_events.begin(), + per_thread_events.end(), + 0, + [](std::size_t accum, auto const& events) { return accum + events.size(); }) + << std::endl; + + for (std::size_t thread = 0; thread < per_thread_events.size(); ++thread) { + std::cout << "Thread " << thread << ": " << per_thread_events[thread].size() << " events\n"; + if (args["verbose"].as()) { + for (auto const& event : per_thread_events[thread]) { + std::cout << event << std::endl; + } } } - } - - auto const num_threads = per_thread_events.size(); - // Uncomment to enable / change default log level - // rmm::logger().set_level(spdlog::level::trace); + auto const num_threads = per_thread_events.size(); + + // Uncomment to enable / change default log level + // rmm::logger().set_level(spdlog::level::trace); + + if (args.count("resource") > 0) { + std::string mr_name = args["resource"].as(); + declare_benchmark(mr_name, simulated_size, per_thread_events, num_threads); + } else { + std::array mrs{"pool", "arena", "binning", "cuda"}; + std::for_each(std::cbegin(mrs), + std::cend(mrs), + [&simulated_size, &per_thread_events, &num_threads](auto const& mr) { + declare_benchmark(mr, simulated_size, per_thread_events, num_threads); + }); + } - if (args.count("resource") > 0) { - std::string mr_name = args["resource"].as(); - declare_benchmark(mr_name, simulated_size, per_thread_events, num_threads); - } else { - std::array mrs{"pool", "arena", "binning", "cuda"}; - std::for_each(std::cbegin(mrs), - std::cend(mrs), - [&simulated_size, &per_thread_events, &num_threads](auto const& s) { - declare_benchmark(s, simulated_size, per_thread_events, num_threads); - }); + ::benchmark::RunSpecifiedBenchmarks(); + } catch (std::exception const& e) { + std::cout << "Exception caught: " << e.what() << std::endl; } - ::benchmark::RunSpecifiedBenchmarks(); + return 0; } diff --git a/benchmarks/synchronization/synchronization.cpp b/benchmarks/synchronization/synchronization.cpp index 5db8c4a3e..9e048b285 100644 --- a/benchmarks/synchronization/synchronization.cpp +++ b/benchmarks/synchronization/synchronization.cpp @@ -59,9 +59,10 @@ cuda_event_timer::~cuda_event_timer() RMM_CUDA_ASSERT_OK(cudaEventRecord(stop, stream.value())); RMM_CUDA_ASSERT_OK(cudaEventSynchronize(stop)); - float milliseconds = 0.0f; + float milliseconds = 0.0F; RMM_CUDA_ASSERT_OK(cudaEventElapsedTime(&milliseconds, start, stop)); - p_state->SetIterationTime(milliseconds / (1000.0f)); + const auto to_milliseconds{1.0F / 1000}; + p_state->SetIterationTime(milliseconds * to_milliseconds); RMM_CUDA_ASSERT_OK(cudaEventDestroy(start)); RMM_CUDA_ASSERT_OK(cudaEventDestroy(stop)); } diff --git a/benchmarks/synchronization/synchronization.hpp b/benchmarks/synchronization/synchronization.hpp index 6c2298575..b0007d9b2 100644 --- a/benchmarks/synchronization/synchronization.hpp +++ b/benchmarks/synchronization/synchronization.hpp @@ -89,9 +89,15 @@ class cuda_event_timer { // will be set to the value given by `cudaEventElapsedTime`. ~cuda_event_timer(); + // disable copy and move + cuda_event_timer(cuda_event_timer const&) = delete; + cuda_event_timer& operator=(cuda_event_timer const&) = delete; + cuda_event_timer(cuda_event_timer&&) = delete; + cuda_event_timer& operator=(cuda_event_timer&&) = delete; + private: - cudaEvent_t start; - cudaEvent_t stop; - rmm::cuda_stream_view stream; - benchmark::State* p_state; + cudaEvent_t start{}; + cudaEvent_t stop{}; + rmm::cuda_stream_view stream{}; + benchmark::State* p_state{}; }; diff --git a/benchmarks/utilities/log_parser.hpp b/benchmarks/utilities/log_parser.hpp index f4bbdbbc8..db939e65f 100644 --- a/benchmarks/utilities/log_parser.hpp +++ b/benchmarks/utilities/log_parser.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -31,8 +31,7 @@ #include #include -namespace rmm { -namespace detail { +namespace rmm::detail { enum class action : bool { ALLOCATE, FREE }; @@ -43,41 +42,57 @@ enum class action : bool { ALLOCATE, FREE }; struct event { event() = default; event(event const&) = default; - event(action a, std::size_t s, void const* p) - : act{a}, size{s}, pointer{reinterpret_cast(p)} + event& operator=(event const&) = default; + event(event&&) noexcept = default; + event& operator=(event&&) noexcept = default; + ~event() = default; + event(action act, std::size_t size, void const* ptr) + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + : act{act}, size{size}, pointer{reinterpret_cast(ptr)} { } - event(action a, std::size_t s, uintptr_t p) : act{a}, size{s}, pointer{p} {} + // NOLINTNEXTLINE(bugprone-easily-swappable-parameters) + event(action act, std::size_t size, uintptr_t ptr) : act{act}, size{size}, pointer{ptr} {} - event(std::size_t tid, action a, std::size_t sz, uintptr_t p, uintptr_t s, std::size_t i) - : act{a}, size{sz}, pointer{p}, thread_id{tid}, stream{s}, index{i} + event(std::size_t tid, + action act, + std::size_t size, // NOLINT(bugprone-easily-swappable-parameters) + uintptr_t ptr, + uintptr_t stream, + std::size_t index) + : act{act}, size{size}, pointer{ptr}, thread_id{tid}, stream{stream}, index{index} { } - event(std::size_t tid, action a, std::size_t sz, void* p, uintptr_t s, std::size_t i) - : event{tid, a, sz, reinterpret_cast(p), s, i} + event( + std::size_t tid, action act, std::size_t size, void* ptr, uintptr_t stream, std::size_t index) + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + : event{tid, act, size, reinterpret_cast(ptr), stream, index} { } - friend std::ostream& operator<<(std::ostream& os, event const& e); + friend std::ostream& operator<<(std::ostream& os, event const& evt); - action act{}; ///< Indicates if the event is an allocation or a free - std::size_t size{}; ///< The size of the memory allocated or freed - uintptr_t pointer{}; ///< The pointer returned from an allocation, or the - ///< pointer freed - std::size_t thread_id; ///< ID of the thread that initiated the event - uintptr_t stream; ///< Numeric representation of the CUDA stream on which the event occurred - std::size_t index; ///< Original ordering index of the event + action act{}; ///< Indicates if the event is an allocation or a free + std::size_t size{}; ///< The size of the memory allocated or freed + uintptr_t pointer{}; ///< The pointer returned from an allocation, or the + ///< pointer freed + std::size_t thread_id{}; ///< ID of the thread that initiated the event + uintptr_t stream{}; ///< Numeric representation of the CUDA stream on which the event occurred + std::size_t index{}; ///< Original ordering index of the event }; -inline std::ostream& operator<<(std::ostream& os, event const& e) +inline std::ostream& operator<<(std::ostream& os, event const& evt) { - auto act_string = (e.act == action::ALLOCATE) ? "allocate" : "free"; + const auto* act_string = (evt.act == action::ALLOCATE) ? "allocate" : "free"; - os << "Thread: " << e.thread_id << std::setw(9) << act_string - << " Size: " << std::setw(std::numeric_limits::digits10) << e.size << " Pointer: " - << "0x" << std::hex << e.pointer << std::dec << " Stream: " << e.stream; + const auto format_width{9}; + + os << "Thread: " << evt.thread_id << std::setw(format_width) << act_string + << " Size: " << std::setw(std::numeric_limits::digits10) << evt.size + << " Pointer: " + << "0x" << std::hex << evt.pointer << std::dec << " Stream: " << evt.stream; return os; } @@ -105,11 +120,12 @@ inline std::chrono::time_point parse_time(std::string int seconds = std::stoi(str_time.substr(previous, current - previous)); int microseconds = std::stoi(str_time.substr(current + 1, str_time.length())); - std::tm tm{seconds, minutes, hours, 1, 0, 1970, 0, 0, 0}; + auto const epoch_year{1970}; + std::tm time{seconds, minutes, hours, 1, 0, epoch_year, 0, 0, 0}; - auto tp = std::chrono::system_clock::from_time_t(std::mktime(&tm)); - tp += std::chrono::microseconds{microseconds}; - return tp; + auto timepoint = std::chrono::system_clock::from_time_t(std::mktime(&time)); + timepoint += std::chrono::microseconds{microseconds}; + return timepoint; } /** @@ -128,8 +144,9 @@ inline std::vector parse_csv(std::string const& filename) std::vector tids = csv.GetColumn("Thread"); std::vector actions = csv.GetColumn("Action"); - auto parse_pointer = [](std::string const& s, uintptr_t& ptr) { - ptr = std::stoll(s, nullptr, 16); + auto parse_pointer = [](std::string const& str, uintptr_t& ptr) { + auto const base{16}; + ptr = std::stoll(str, nullptr, base); }; std::vector pointers = csv.GetColumn("Pointer", parse_pointer); @@ -140,19 +157,18 @@ inline std::vector parse_csv(std::string const& filename) RMM_EXPECTS(std::all_of(std::begin(size_list), std::end(size_list), - [size = sizes.size()](auto i) { return i == size; }), + [size = sizes.size()](auto val) { return val == size; }), "Size mismatch in columns of parsed log."); std::vector events(sizes.size()); for (std::size_t i = 0; i < actions.size(); ++i) { - auto const& a = actions[i]; - RMM_EXPECTS((a == "allocate") or (a == "free"), "Invalid action string."); - auto act = (a == "allocate") ? action::ALLOCATE : action::FREE; + auto const& action = actions[i]; + RMM_EXPECTS((action == "allocate") or (action == "free"), "Invalid action string."); + auto act = (action == "allocate") ? action::ALLOCATE : action::FREE; events[i] = event{tids[i], act, sizes[i], pointers[i], streams[i], i}; } return events; } -} // namespace detail -} // namespace rmm +} // namespace rmm::detail diff --git a/benchmarks/utilities/simulated_memory_resource.hpp b/benchmarks/utilities/simulated_memory_resource.hpp index 67883ad5d..d8c7cf946 100644 --- a/benchmarks/utilities/simulated_memory_resource.hpp +++ b/benchmarks/utilities/simulated_memory_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,8 +20,7 @@ #include -namespace rmm { -namespace mr { +namespace rmm::mr { /** * @brief A device memory resource that simulates a fix-sized GPU. @@ -39,14 +38,18 @@ class simulated_memory_resource final : public device_memory_resource { * @param memory_size_bytes The size of the memory to simulate. */ explicit simulated_memory_resource(std::size_t memory_size_bytes) - : begin_{reinterpret_cast(0x100)}, - end_{reinterpret_cast(begin_ + memory_size_bytes)} + : begin_{reinterpret_cast(0x100)}, // NOLINT + end_{reinterpret_cast(begin_ + memory_size_bytes)} // NOLINT { } + ~simulated_memory_resource() override = default; + // Disable copy (and move) semantics. simulated_memory_resource(simulated_memory_resource const&) = delete; simulated_memory_resource& operator=(simulated_memory_resource const&) = delete; + simulated_memory_resource(simulated_memory_resource&&) = delete; + simulated_memory_resource& operator=(simulated_memory_resource&&) = delete; /** * @brief Query whether the resource supports use of non-null CUDA streams for @@ -54,14 +57,14 @@ class simulated_memory_resource final : public device_memory_resource { * * @returns bool false */ - bool supports_streams() const noexcept override { return false; } + [[nodiscard]] bool supports_streams() const noexcept override { return false; } /** * @brief Query whether the resource supports the get_mem_info API. * * @return false */ - bool supports_get_mem_info() const noexcept override { return false; } + [[nodiscard]] bool supports_get_mem_info() const noexcept override { return false; } private: /** @@ -76,22 +79,23 @@ class simulated_memory_resource final : public device_memory_resource { */ void* do_allocate(std::size_t bytes, cuda_stream_view) override { + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) RMM_EXPECTS(begin_ + bytes <= end_, rmm::bad_alloc, "Simulated memory size exceeded"); - auto p = static_cast(begin_); - begin_ += bytes; - return p; + auto* ptr = static_cast(begin_); + begin_ += bytes; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) + return ptr; } /** - * @brief Deallocate memory pointed to by \p p. + * @brief Deallocate memory pointed to by `p`. * * @note This call is ignored. * * @throws Nothing. * - * @param p Pointer to be deallocated + * @param ptr Pointer to be deallocated */ - void do_deallocate(void* p, std::size_t, cuda_stream_view) override {} + void do_deallocate(void* ptr, std::size_t, cuda_stream_view) override {} /** * @brief Get free and available memory for memory resource. @@ -99,14 +103,13 @@ class simulated_memory_resource final : public device_memory_resource { * @param stream to execute on. * @return std::pair containing free_size and total_size of memory. */ - std::pair do_get_mem_info(cuda_stream_view stream) const override + [[nodiscard]] std::pair do_get_mem_info( + cuda_stream_view stream) const override { return std::make_pair(0, 0); } - private: - char* begin_; - char* end_; + char* begin_{}; + char* end_{}; }; -} // namespace mr -} // namespace rmm +} // namespace rmm::mr diff --git a/cmake/thirdparty/get_gtest.cmake b/cmake/thirdparty/get_gtest.cmake index 94feca00c..4d4daff44 100644 --- a/cmake/thirdparty/get_gtest.cmake +++ b/cmake/thirdparty/get_gtest.cmake @@ -14,7 +14,6 @@ # Use CPM to find or clone gtest function(find_and_configure_gtest) - include(${rapids-cmake-dir}/cpm/gtest.cmake) rapids_cpm_gtest() diff --git a/include/rmm/cuda_device.hpp b/include/rmm/cuda_device.hpp index cad929de7..ab225490e 100644 --- a/include/rmm/cuda_device.hpp +++ b/include/rmm/cuda_device.hpp @@ -33,10 +33,10 @@ struct cuda_device_id { * * @param id The device's integer identifier */ - explicit constexpr cuda_device_id(value_type id) noexcept : id_{id} {} + explicit constexpr cuda_device_id(value_type dev_id) noexcept : id_{dev_id} {} /// Returns the wrapped integer value - constexpr value_type value() const noexcept { return id_; } + [[nodiscard]] constexpr value_type value() const noexcept { return id_; } private: value_type id_; @@ -52,7 +52,7 @@ namespace detail { */ inline cuda_device_id current_device() { - int dev_id; + int dev_id{}; RMM_CUDA_TRY(cudaGetDevice(&dev_id)); return cuda_device_id{dev_id}; } diff --git a/include/rmm/cuda_stream.hpp b/include/rmm/cuda_stream.hpp index 10d944c8f..185cd049e 100644 --- a/include/rmm/cuda_stream.hpp +++ b/include/rmm/cuda_stream.hpp @@ -57,13 +57,13 @@ class cuda_stream { */ cuda_stream() : stream_{[]() { - cudaStream_t* s = new cudaStream_t; - RMM_CUDA_TRY(cudaStreamCreate(s)); - return s; + auto* stream = new cudaStream_t; // NOLINT(cppcoreguidelines-owning-memory) + RMM_CUDA_TRY(cudaStreamCreate(stream)); + return stream; }(), - [](cudaStream_t* s) { - RMM_ASSERT_CUDA_SUCCESS(cudaStreamDestroy(*s)); - delete s; + [](cudaStream_t* stream) { + RMM_ASSERT_CUDA_SUCCESS(cudaStreamDestroy(*stream)); + delete stream; // NOLINT(cppcoreguidelines-owning-memory) }} { } @@ -74,14 +74,14 @@ class cuda_stream { * @return true If the owned stream has not been explicitly moved and is therefore non-null. * @return false If the owned stream has been explicitly moved and is therefore null. */ - bool is_valid() const { return stream_ != nullptr; } + [[nodiscard]] bool is_valid() const { return stream_ != nullptr; } /** * @brief Get the value of the wrapped CUDA stream. * * @return cudaStream_t The wrapped CUDA stream. */ - cudaStream_t value() const + [[nodiscard]] cudaStream_t value() const { RMM_LOGGING_ASSERT(is_valid()); return *stream_; @@ -97,7 +97,7 @@ class cuda_stream { * * @return rmm::cuda_stream_view The view of the CUDA stream */ - cuda_stream_view view() const { return cuda_stream_view{value()}; } + [[nodiscard]] cuda_stream_view view() const { return cuda_stream_view{value()}; } /** * @brief Implicit conversion to cuda_stream_view diff --git a/include/rmm/cuda_stream_view.hpp b/include/rmm/cuda_stream_view.hpp index c80d4de2f..f913609f9 100644 --- a/include/rmm/cuda_stream_view.hpp +++ b/include/rmm/cuda_stream_view.hpp @@ -54,7 +54,7 @@ class cuda_stream_view { * * @return cudaStream_t The wrapped stream. */ - constexpr cudaStream_t value() const noexcept { return stream_; } + [[nodiscard]] constexpr cudaStream_t value() const noexcept { return stream_; } /** * @brief Implicit conversion to cudaStream_t. @@ -64,26 +64,12 @@ class cuda_stream_view { /** * @brief Return true if the wrapped stream is the CUDA per-thread default stream. */ - bool is_per_thread_default() const noexcept - { -#ifdef CUDA_API_PER_THREAD_DEFAULT_STREAM - return value() == cudaStreamPerThread || value() == 0; -#else - return value() == cudaStreamPerThread; -#endif - } + [[nodiscard]] inline bool is_per_thread_default() const noexcept; /** * @brief Return true if the wrapped stream is explicitly the CUDA legacy default stream. */ - bool is_default() const noexcept - { -#ifdef CUDA_API_PER_THREAD_DEFAULT_STREAM - return value() == cudaStreamLegacy; -#else - return value() == cudaStreamLegacy || value() == 0; -#endif - } + [[nodiscard]] inline bool is_default() const noexcept; /** * @brief Synchronize the viewed CUDA stream. @@ -105,7 +91,7 @@ class cuda_stream_view { } private: - cudaStream_t stream_{0}; + cudaStream_t stream_{}; }; /** @@ -116,12 +102,38 @@ static constexpr cuda_stream_view cuda_stream_default{}; /** * @brief Static cuda_stream_view of cudaStreamLegacy, for convenience */ -static cuda_stream_view cuda_stream_legacy{cudaStreamLegacy}; + +static const cuda_stream_view cuda_stream_legacy{ + cudaStreamLegacy // NOLINT(cppcoreguidelines-pro-type-cstyle-cast) +}; /** * @brief Static cuda_stream_view of cudaStreamPerThread, for convenience */ -static cuda_stream_view cuda_stream_per_thread{cudaStreamPerThread}; +static const cuda_stream_view cuda_stream_per_thread{ + cudaStreamPerThread // NOLINT(cppcoreguidelines-pro-type-cstyle-cast) +}; + +[[nodiscard]] inline bool cuda_stream_view::is_per_thread_default() const noexcept +{ +#ifdef CUDA_API_PER_THREAD_DEFAULT_STREAM + return value() == cuda_stream_per_thread || value() == nullptr; +#else + return value() == cuda_stream_per_thread; +#endif +} + +/** + * @brief Return true if the wrapped stream is explicitly the CUDA legacy default stream. + */ +[[nodiscard]] inline bool cuda_stream_view::is_default() const noexcept +{ +#ifdef CUDA_API_PER_THREAD_DEFAULT_STREAM + return value() == cuda_stream_legacy; +#else + return value() == cuda_stream_legacy || value() == nullptr; +#endif +} /** * @brief Equality comparison operator for streams @@ -151,9 +163,9 @@ inline bool operator!=(cuda_stream_view lhs, cuda_stream_view rhs) { return not( * @param sv The cuda_stream_view to output * @return std::ostream& The output ostream */ -inline std::ostream& operator<<(std::ostream& os, cuda_stream_view sv) +inline std::ostream& operator<<(std::ostream& os, cuda_stream_view stream) { - os << sv.value(); + os << stream.value(); return os; } diff --git a/include/rmm/detail/aligned.hpp b/include/rmm/detail/aligned.hpp index 17973d033..321be53b5 100644 --- a/include/rmm/detail/aligned.hpp +++ b/include/rmm/detail/aligned.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,8 +22,7 @@ #include #include -namespace rmm { -namespace detail { +namespace rmm::detail { /** * @brief Default alignment used for host memory allocated by RMM. @@ -41,7 +40,7 @@ static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT{256}; * @brief Returns whether or not `n` is a power of 2. * */ -constexpr bool is_pow2(std::size_t n) { return (0 == (n & (n - 1))); } +constexpr bool is_pow2(std::size_t value) { return (0 == (value & (value - 1))); } /** * @brief Returns whether or not `alignment` is a valid memory alignment. @@ -57,10 +56,10 @@ constexpr bool is_supported_alignment(std::size_t alignment) { return is_pow2(al * * @return Return the aligned value, as one would expect */ -constexpr std::size_t align_up(std::size_t v, std::size_t align_bytes) noexcept +constexpr std::size_t align_up(std::size_t value, std::size_t alignment) noexcept { - assert(is_supported_alignment(align_bytes)); - return (v + (align_bytes - 1)) & ~(align_bytes - 1); + assert(is_supported_alignment(alignment)); + return (value + (alignment - 1)) & ~(alignment - 1); } /** @@ -71,10 +70,10 @@ constexpr std::size_t align_up(std::size_t v, std::size_t align_bytes) noexcept * * @return Return the aligned value, as one would expect */ -constexpr std::size_t align_down(std::size_t v, std::size_t align_bytes) noexcept +constexpr std::size_t align_down(std::size_t value, std::size_t alignment) noexcept { - assert(is_supported_alignment(align_bytes)); - return v & ~(align_bytes - 1); + assert(is_supported_alignment(alignment)); + return value & ~(alignment - 1); } /** @@ -85,10 +84,16 @@ constexpr std::size_t align_down(std::size_t v, std::size_t align_bytes) noexcep * * @return true if aligned */ -constexpr bool is_aligned(std::size_t v, std::size_t align_bytes) noexcept +constexpr bool is_aligned(std::size_t value, std::size_t alignment) noexcept { - assert(is_supported_alignment(align_bytes)); - return v == align_down(v, align_bytes); + assert(is_supported_alignment(alignment)); + return value == align_down(value, alignment); +} + +inline bool is_pointer_aligned(void* ptr, std::size_t alignment = CUDA_ALLOCATION_ALIGNMENT) +{ + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + return rmm::detail::is_aligned(reinterpret_cast(ptr), alignment); } /** @@ -130,6 +135,7 @@ void* aligned_allocate(std::size_t bytes, std::size_t alignment, Alloc alloc) char* const original = static_cast(alloc(padded_allocation_size)); // account for storage of offset immediately prior to the aligned pointer + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) void* aligned{original + sizeof(std::ptrdiff_t)}; // std::align modifies `aligned` to point to the first aligned location @@ -139,6 +145,7 @@ void* aligned_allocate(std::size_t bytes, std::size_t alignment, Alloc alloc) std::ptrdiff_t offset = static_cast(aligned) - original; // Store the offset immediately before the aligned pointer + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) *(static_cast(aligned) - 1) = offset; return aligned; @@ -160,16 +167,18 @@ void* aligned_allocate(std::size_t bytes, std::size_t alignment, Alloc alloc) * @tparam Dealloc A unary callable type that deallocates memory. */ template -void aligned_deallocate(void* p, std::size_t bytes, std::size_t alignment, Dealloc dealloc) +// NOLINTNEXTLINE(bugprone-easily-swappable-parameters) +void aligned_deallocate(void* ptr, std::size_t bytes, std::size_t alignment, Dealloc dealloc) { (void)alignment; // Get offset from the location immediately prior to the aligned pointer - std::ptrdiff_t const offset = *(reinterpret_cast(p) - 1); + // NOLINTNEXTLINE + std::ptrdiff_t const offset = *(reinterpret_cast(ptr) - 1); - void* const original = static_cast(p) - offset; + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) + void* const original = static_cast(ptr) - offset; dealloc(original); } -} // namespace detail -} // namespace rmm +} // namespace rmm::detail diff --git a/include/rmm/detail/cuda_util.hpp b/include/rmm/detail/cuda_util.hpp index d60bb560f..613b8d156 100644 --- a/include/rmm/detail/cuda_util.hpp +++ b/include/rmm/detail/cuda_util.hpp @@ -17,16 +17,15 @@ #include -namespace rmm { -namespace detail { +namespace rmm::detail { /// Gets the available and total device memory in bytes for the current device inline std::pair available_device_memory() { - std::size_t free{}, total{}; + std::size_t free{}; + std::size_t total{}; RMM_CUDA_TRY(cudaMemGetInfo(&free, &total)); return {free, total}; } -} // namespace detail -} // namespace rmm +} // namespace rmm::detail diff --git a/include/rmm/detail/error.hpp b/include/rmm/detail/error.hpp index 057f67ba3..1f550f75e 100644 --- a/include/rmm/detail/error.hpp +++ b/include/rmm/detail/error.hpp @@ -51,15 +51,10 @@ struct cuda_error : public std::runtime_error { */ class bad_alloc : public std::bad_alloc { public: - bad_alloc(const char* w) : std::bad_alloc{}, _what{std::string{std::bad_alloc::what()} + ": " + w} - { - } - - bad_alloc(std::string const& w) : bad_alloc(w.c_str()) {} - - virtual ~bad_alloc() = default; + bad_alloc(const char* msg) : _what{std::string{std::bad_alloc::what()} + ": " + msg} {} + bad_alloc(std::string const& msg) : bad_alloc(msg.c_str()) {} - virtual const char* what() const noexcept { return _what.c_str(); } + [[nodiscard]] const char* what() const noexcept override { return _what.c_str(); } private: std::string _what; @@ -105,10 +100,11 @@ class out_of_range : public std::out_of_range { GET_RMM_EXPECTS_MACRO(__VA_ARGS__, RMM_EXPECTS_3, RMM_EXPECTS_2) \ (__VA_ARGS__) #define GET_RMM_EXPECTS_MACRO(_1, _2, _3, NAME, ...) NAME -#define RMM_EXPECTS_3(_condition, _exception_type, _reason) \ - (!!(_condition)) ? static_cast(0) : throw _exception_type \ - { \ - "RMM failure at: " __FILE__ ":" RMM_STRINGIFY(__LINE__) ": " _reason \ +#define RMM_EXPECTS_3(_condition, _exception_type, _reason) \ + (!!(_condition)) ? static_cast(0) \ + : throw _exception_type /*NOLINT(bugprone-macro-parentheses)*/ \ + { \ + "RMM failure at: " __FILE__ ":" RMM_STRINGIFY(__LINE__) ": " _reason \ } #define RMM_EXPECTS_2(_condition, _reason) RMM_EXPECTS_3(_condition, rmm::logic_error, _reason) @@ -128,7 +124,8 @@ class out_of_range : public std::out_of_range { GET_RMM_FAIL_MACRO(__VA_ARGS__, RMM_FAIL_2, RMM_FAIL_1) \ (__VA_ARGS__) #define GET_RMM_FAIL_MACRO(_1, _2, NAME, ...) NAME -#define RMM_FAIL_2(_what, _exception_type) \ +#define RMM_FAIL_2(_what, _exception_type) \ + /*NOLINTNEXTLINE(bugprone-macro-parentheses)*/ \ throw _exception_type{"RMM failure at:" __FILE__ ":" RMM_STRINGIFY(__LINE__) ": " _what}; #define RMM_FAIL_1(_what) RMM_FAIL_2(_what, rmm::logic_error) @@ -162,6 +159,7 @@ class out_of_range : public std::out_of_range { cudaError_t const error = (_call); \ if (cudaSuccess != error) { \ cudaGetLastError(); \ + /*NOLINTNEXTLINE(bugprone-macro-parentheses)*/ \ throw _exception_type{std::string{"CUDA error at: "} + __FILE__ + ":" + \ RMM_STRINGIFY(__LINE__) + ": " + cudaGetErrorName(error) + " " + \ cudaGetErrorString(error)}; \ diff --git a/include/rmm/detail/stack_trace.hpp b/include/rmm/detail/stack_trace.hpp index 1e218fa53..1f76af0a4 100644 --- a/include/rmm/detail/stack_trace.hpp +++ b/include/rmm/detail/stack_trace.hpp @@ -28,14 +28,13 @@ #include #include #include + #include #include #include #endif -namespace rmm { - -namespace detail { +namespace rmm::detail { /** * @brief stack_trace is a class that will capture a stack on instatiation for output later. @@ -52,36 +51,37 @@ class stack_trace { { #if defined(RMM_ENABLE_STACK_TRACES) const int MaxStackDepth = 64; - void* stack[MaxStackDepth]; - auto const depth = backtrace(stack, MaxStackDepth); - stack_ptrs.insert(stack_ptrs.end(), &stack[0], &stack[depth]); + std::array stack{}; + auto const depth = backtrace(stack.begin(), MaxStackDepth); + stack_ptrs.insert(stack_ptrs.end(), stack.begin(), stack.begin() + depth); #endif // RMM_ENABLE_STACK_TRACES } - friend std::ostream& operator<<(std::ostream& os, const stack_trace& st) + friend std::ostream& operator<<(std::ostream& os, const stack_trace& trace) { #if defined(RMM_ENABLE_STACK_TRACES) std::unique_ptr strings( - backtrace_symbols(st.stack_ptrs.data(), st.stack_ptrs.size()), &::free); + backtrace_symbols(trace.stack_ptrs.data(), static_cast(trace.stack_ptrs.size())), + &::free); - if (strings.get() == nullptr) { + if (strings == nullptr) { os << "But no stack trace could be found!" << std::endl; } else { // Iterate over the stack pointers converting to a string - for (std::size_t i = 0; i < st.stack_ptrs.size(); ++i) { + for (std::size_t i = 0; i < trace.stack_ptrs.size(); ++i) { // Leading index os << "#" << i << " in "; auto const str = [&] { Dl_info info; - if (dladdr(st.stack_ptrs[i], &info)) { + if (dladdr(trace.stack_ptrs[i], &info) != 0) { int status = -1; // Demangle the name. This can occasionally fail std::unique_ptr demangled( - abi::__cxa_demangle(info.dli_sname, nullptr, 0, &status), &::free); + abi::__cxa_demangle(info.dli_sname, nullptr, nullptr, &status), &::free); // If it fails, fallback to the dli_name. - if (status == 0 or info.dli_sname) { - auto name = status == 0 ? demangled.get() : info.dli_sname; + if (status == 0 or (info.dli_sname != nullptr)) { + auto const* name = status == 0 ? demangled.get() : info.dli_sname; return name + std::string(" from ") + info.dli_fname; } } @@ -103,6 +103,4 @@ class stack_trace { #endif // RMM_ENABLE_STACK_TRACES }; -} // namespace detail - -} // namespace rmm +} // namespace rmm::detail diff --git a/include/rmm/device_buffer.hpp b/include/rmm/device_buffer.hpp index f034b28fe..ee8e4e927 100644 --- a/include/rmm/device_buffer.hpp +++ b/include/rmm/device_buffer.hpp @@ -88,10 +88,7 @@ class device_buffer { // `__host__ __device__` specifiers to the defaulted constructor when it is called within the // context of both host and device functions. Specifically, the `cudf::type_dispatcher` is a host- // device function. This causes warnings/errors because this ctor invokes host-only functions. - device_buffer() - : _data{nullptr}, _size{}, _capacity{}, _stream{}, _mr{rmm::mr::get_current_device_resource()} - { - } + device_buffer() : _mr{rmm::mr::get_current_device_resource()} {} /** * @brief Constructs a new device buffer of `size` uninitialized bytes @@ -310,7 +307,7 @@ class device_buffer { /** * @brief Returns raw pointer to underlying device memory allocation */ - void const* data() const noexcept { return _data; } + [[nodiscard]] void const* data() const noexcept { return _data; } /** * @brief Returns raw pointer to underlying device memory allocation @@ -321,7 +318,7 @@ class device_buffer { * @brief Returns size in bytes that was requested for the device memory * allocation */ - std::size_t size() const noexcept { return _size; } + [[nodiscard]] std::size_t size() const noexcept { return _size; } /** * @brief Returns whether the size in bytes of the `device_buffer` is zero. @@ -330,19 +327,19 @@ class device_buffer { * if `capacity() > 0`. * */ - bool is_empty() const noexcept { return 0 == size(); } + [[nodiscard]] bool is_empty() const noexcept { return 0 == size(); } /** * @brief Returns actual size in bytes of device memory allocation. * * The invariant `size() <= capacity()` holds. */ - std::size_t capacity() const noexcept { return _capacity; } + [[nodiscard]] std::size_t capacity() const noexcept { return _capacity; } /** * @brief Returns stream most recently specified for allocation/deallocation */ - cuda_stream_view stream() const noexcept { return _stream; } + [[nodiscard]] cuda_stream_view stream() const noexcept { return _stream; } /** * @brief Sets the stream to be used for deallocation @@ -360,7 +357,7 @@ class device_buffer { * @brief Returns pointer to the memory resource used to allocate and * deallocate the device memory */ - mr::device_memory_resource* memory_resource() const noexcept { return _mr; } + [[nodiscard]] mr::device_memory_resource* memory_resource() const noexcept { return _mr; } private: void* _data{nullptr}; ///< Pointer to device memory allocation diff --git a/include/rmm/device_scalar.hpp b/include/rmm/device_scalar.hpp index f44ba1c28..099abc08e 100644 --- a/include/rmm/device_scalar.hpp +++ b/include/rmm/device_scalar.hpp @@ -47,9 +47,9 @@ class device_scalar { ~device_scalar() = default; RMM_EXEC_CHECK_DISABLE - device_scalar(device_scalar&&) = default; + device_scalar(device_scalar&&) noexcept = default; - device_scalar& operator=(device_scalar&&) = default; + device_scalar& operator=(device_scalar&&) noexcept = default; /** * @brief Copy ctor is deleted as it doesn't allow a stream argument @@ -147,7 +147,10 @@ class device_scalar { * @return T The value of the scalar. * @param stream CUDA stream on which to perform the copy and synchronize. */ - value_type value(cuda_stream_view stream) const { return _storage.front_element(stream); } + [[nodiscard]] value_type value(cuda_stream_view stream) const + { + return _storage.front_element(stream); + } /** * @brief Sets the value of the `device_scalar` to the value of `v`. @@ -186,9 +189,9 @@ class device_scalar { * @param v The host value which will be copied to device * @param stream CUDA stream on which to perform the copy */ - void set_value_async(value_type const& v, cuda_stream_view s) + void set_value_async(value_type const& value, cuda_stream_view stream) { - _storage.set_element_async(0, v, s); + _storage.set_element_async(0, value, stream); } // Disallow passing literals to set_value to avoid race conditions where the memory holding the @@ -209,9 +212,9 @@ class device_scalar { * * @param stream CUDA stream on which to perform the copy */ - void set_value_to_zero_async(cuda_stream_view s) + void set_value_to_zero_async(cuda_stream_view stream) { - _storage.set_element_to_zero_async(value_type{0}, s); + _storage.set_element_to_zero_async(value_type{0}, stream); } /** @@ -222,7 +225,7 @@ class device_scalar { * streams (e.g. using `cudaStreamWaitEvent()` or `cudaStreamSynchronize()`), otherwise there may * be a race condition. */ - pointer data() noexcept { return static_cast(_storage.data()); } + [[nodiscard]] pointer data() noexcept { return static_cast(_storage.data()); } /** * @brief Returns const pointer to object in device memory. @@ -232,7 +235,10 @@ class device_scalar { * streams (e.g. using `cudaStreamWaitEvent()` or `cudaStreamSynchronize()`), otherwise there may * be a race condition. */ - const_pointer data() const noexcept { return static_cast(_storage.data()); } + [[nodiscard]] const_pointer data() const noexcept + { + return static_cast(_storage.data()); + } private: rmm::device_uvector _storage; diff --git a/include/rmm/device_uvector.hpp b/include/rmm/device_uvector.hpp index ca4cf6d30..b4b12c824 100644 --- a/include/rmm/device_uvector.hpp +++ b/include/rmm/device_uvector.hpp @@ -84,9 +84,9 @@ class device_uvector { ~device_uvector() = default; RMM_EXEC_CHECK_DISABLE - device_uvector(device_uvector&&) = default; + device_uvector(device_uvector&&) noexcept = default; - device_uvector& operator=(device_uvector&&) = default; + device_uvector& operator=(device_uvector&&) noexcept = default; /** * @brief Copy ctor is deleted as it doesn't allow a stream argument @@ -147,7 +147,7 @@ class device_uvector { * @param element_index Index of the specified element. * @return T* Pointer to the desired element */ - pointer element_ptr(std::size_t element_index) noexcept + [[nodiscard]] pointer element_ptr(std::size_t element_index) noexcept { assert(element_index < size()); return data() + element_index; @@ -161,7 +161,7 @@ class device_uvector { * @param element_index Index of the specified element. * @return T* Pointer to the desired element */ - const_pointer element_ptr(std::size_t element_index) const noexcept + [[nodiscard]] const_pointer element_ptr(std::size_t element_index) const noexcept { assert(element_index < size()); return data() + element_index; @@ -203,25 +203,28 @@ class device_uvector { * @param v The value to copy to the specified element * @param s The stream on which to perform the copy */ - void set_element_async(std::size_t element_index, value_type const& v, cuda_stream_view s) + void set_element_async(std::size_t element_index, + value_type const& value, + cuda_stream_view stream) { RMM_EXPECTS( element_index < size(), rmm::out_of_range, "Attempt to access out of bounds element."); + + if constexpr (std::is_same::value) { + RMM_CUDA_TRY( + cudaMemsetAsync(element_ptr(element_index), value, sizeof(value), stream.value())); + return; + } + if constexpr (std::is_fundamental::value) { - if constexpr (std::is_same::value) { - RMM_CUDA_TRY(cudaMemsetAsync(element_ptr(element_index), v, sizeof(v), s.value())); - } else { - if (v == value_type{0}) { - set_element_to_zero_async(element_index, s); - } else { - RMM_CUDA_TRY(cudaMemcpyAsync( - element_ptr(element_index), &v, sizeof(v), cudaMemcpyDefault, s.value())); - } + if (value == value_type{0}) { + set_element_to_zero_async(element_index, stream); + return; } - } else { - RMM_CUDA_TRY( - cudaMemcpyAsync(element_ptr(element_index), &v, sizeof(v), cudaMemcpyDefault, s.value())); } + + RMM_CUDA_TRY(cudaMemcpyAsync( + element_ptr(element_index), &value, sizeof(value), cudaMemcpyDefault, stream.value())); } // We delete the r-value reference overload to prevent asynchronously copying from a literal or @@ -250,11 +253,12 @@ class device_uvector { * @param element_index Index of the target element * @param s The stream on which to perform the copy */ - void set_element_to_zero_async(std::size_t element_index, cuda_stream_view s) + void set_element_to_zero_async(std::size_t element_index, cuda_stream_view stream) { RMM_EXPECTS( element_index < size(), rmm::out_of_range, "Attempt to access out of bounds element."); - RMM_CUDA_TRY(cudaMemsetAsync(element_ptr(element_index), 0, sizeof(value_type), s.value())); + RMM_CUDA_TRY( + cudaMemsetAsync(element_ptr(element_index), 0, sizeof(value_type), stream.value())); } /** @@ -283,13 +287,13 @@ class device_uvector { * @throws rmm::out_of_range exception if `element_index >= size()` * * @param element_index Index of the target element - * @param v The value to copy to the specified element - * @param s The stream on which to perform the copy + * @param value The value to copy to the specified element + * @param stream The stream on which to perform the copy */ - void set_element(std::size_t element_index, T const& v, cuda_stream_view s) + void set_element(std::size_t element_index, T const& value, cuda_stream_view stream) { - set_element_async(element_index, v, s); - s.synchronize_no_throw(); + set_element_async(element_index, value, stream); + stream.synchronize_no_throw(); } /** @@ -301,18 +305,18 @@ class device_uvector { * @throws rmm::out_of_range exception if `element_index >= size()` * * @param element_index Index of the desired element - * @param s The stream on which to perform the copy + * @param stream The stream on which to perform the copy * @return The value of the specified element */ - value_type element(std::size_t element_index, cuda_stream_view s) const + [[nodiscard]] value_type element(std::size_t element_index, cuda_stream_view stream) const { RMM_EXPECTS( element_index < size(), rmm::out_of_range, "Attempt to access out of bounds element."); - value_type v; - RMM_CUDA_TRY( - cudaMemcpyAsync(&v, element_ptr(element_index), sizeof(v), cudaMemcpyDefault, s.value())); - s.synchronize(); - return v; + value_type value; + RMM_CUDA_TRY(cudaMemcpyAsync( + &value, element_ptr(element_index), sizeof(value), cudaMemcpyDefault, stream.value())); + stream.synchronize(); + return value; } /** @@ -323,10 +327,13 @@ class device_uvector { * * @throws rmm::out_of_range exception if the vector is empty. * - * @param s The stream on which to perform the copy + * @param stream The stream on which to perform the copy * @return The value of the first element */ - value_type front_element(cuda_stream_view s) const { return element(0, s); } + [[nodiscard]] value_type front_element(cuda_stream_view stream) const + { + return element(0, stream); + } /** * @brief Returns the last element. @@ -336,10 +343,13 @@ class device_uvector { * * @throws rmm::out_of_range exception if the vector is empty. * - * @param s The stream on which to perform the copy + * @param stream The stream on which to perform the copy * @return The value of the last element */ - value_type back_element(cuda_stream_view s) const { return element(size() - 1, s); } + [[nodiscard]] value_type back_element(cuda_stream_view stream) const + { + return element(size() - 1, stream); + } /** * @brief Resizes the vector to contain `new_size` elements. @@ -384,7 +394,10 @@ class device_uvector { * @return std::size_t The number of elements that can be stored without requiring a new * allocation. */ - std::size_t capacity() const noexcept { return bytes_to_elements(_storage.capacity()); } + [[nodiscard]] std::size_t capacity() const noexcept + { + return bytes_to_elements(_storage.capacity()); + } /** * @brief Returns pointer to underlying device storage. @@ -394,7 +407,7 @@ class device_uvector { * * @return Raw pointer to element storage in device memory. */ - pointer data() noexcept { return static_cast(_storage.data()); } + [[nodiscard]] pointer data() noexcept { return static_cast(_storage.data()); } /** * @brief Returns const pointer to underlying device storage. @@ -404,7 +417,10 @@ class device_uvector { * * @return const_pointer Raw const pointer to element storage in device memory. */ - const_pointer data() const noexcept { return static_cast(_storage.data()); } + [[nodiscard]] const_pointer data() const noexcept + { + return static_cast(_storage.data()); + } /** * @brief Returns an iterator to the first element. @@ -413,7 +429,7 @@ class device_uvector { * * @return Iterator to the first element. */ - iterator begin() noexcept { return data(); } + [[nodiscard]] iterator begin() noexcept { return data(); } /** * @brief Returns a const_iterator to the first element. @@ -422,7 +438,7 @@ class device_uvector { * * @return Immutable iterator to the first element. */ - const_iterator cbegin() const noexcept { return data(); } + [[nodiscard]] const_iterator cbegin() const noexcept { return data(); } /** * @brief Returns a const_iterator to the first element. @@ -431,7 +447,7 @@ class device_uvector { * * @return Immutable iterator to the first element. */ - const_iterator begin() const noexcept { return cbegin(); } + [[nodiscard]] const_iterator begin() const noexcept { return cbegin(); } /** * @brief Returns an iterator to the element following the last element of the vector. @@ -441,7 +457,7 @@ class device_uvector { * * @return Iterator to one past the last element. */ - iterator end() noexcept { return data() + size(); } + [[nodiscard]] iterator end() noexcept { return data() + size(); } /** * @brief Returns a const_iterator to the element following the last element of the vector. @@ -451,7 +467,7 @@ class device_uvector { * * @return Immutable iterator to one past the last element. */ - const_iterator cend() const noexcept { return data() + size(); } + [[nodiscard]] const_iterator cend() const noexcept { return data() + size(); } /** * @brief Returns an iterator to the element following the last element of the vector. @@ -461,14 +477,14 @@ class device_uvector { * * @return Immutable iterator to one past the last element. */ - const_iterator end() const noexcept { return cend(); } + [[nodiscard]] const_iterator end() const noexcept { return cend(); } /** * @brief Returns the number of elements in the vector. * * @return The number of elements. */ - std::size_t size() const noexcept { return bytes_to_elements(_storage.size()); } + [[nodiscard]] std::size_t size() const noexcept { return bytes_to_elements(_storage.size()); } /** * @brief Returns true if the vector contains no elements, i.e., `size() == 0`. @@ -476,14 +492,14 @@ class device_uvector { * @return true The vector is empty * @return false The vector is not empty */ - bool is_empty() const noexcept { return size() == 0; } + [[nodiscard]] bool is_empty() const noexcept { return size() == 0; } /** * @brief Returns pointer to the resource used to allocate and deallocate the device storage. * * @return Pointer to underlying resource */ - mr::device_memory_resource* memory_resource() const noexcept + [[nodiscard]] mr::device_memory_resource* memory_resource() const noexcept { return _storage.memory_resource(); } @@ -491,12 +507,12 @@ class device_uvector { private: device_buffer _storage{}; ///< Device memory storage for vector elements - std::size_t constexpr elements_to_bytes(std::size_t num_elements) const noexcept + [[nodiscard]] std::size_t constexpr elements_to_bytes(std::size_t num_elements) const noexcept { return num_elements * sizeof(value_type); } - std::size_t constexpr bytes_to_elements(std::size_t num_bytes) const noexcept + [[nodiscard]] std::size_t constexpr bytes_to_elements(std::size_t num_bytes) const noexcept { return num_bytes / sizeof(value_type); } diff --git a/include/rmm/logger.hpp b/include/rmm/logger.hpp index 582190dca..43d4b6ea3 100644 --- a/include/rmm/logger.hpp +++ b/include/rmm/logger.hpp @@ -42,7 +42,7 @@ namespace detail { */ inline std::string default_log_filename() { - auto filename = std::getenv("RMM_DEBUG_LOG_FILE"); + auto* filename = std::getenv("RMM_DEBUG_LOG_FILE"); return (filename == nullptr) ? std::string{"rmm_log.txt"} : std::string{filename}; } @@ -75,16 +75,17 @@ struct logger_wrapper { struct bytes { std::size_t value; - friend std::ostream& operator<<(std::ostream& os, bytes const& b) + friend std::ostream& operator<<(std::ostream& os, bytes const& value) { - std::string const units[] = {"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"}; - int i = 0; - auto size = static_cast(b.value); + static std::array const units{ + "B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"}; + int index = 0; + auto size = static_cast(value.value); while (size > 1024) { size /= 1024; - i++; + index++; } - return os << size << ' ' << units[i]; + return os << size << ' ' << units.at(index); } }; @@ -99,8 +100,8 @@ struct bytes { */ inline spdlog::logger& logger() { - static detail::logger_wrapper w{}; - return w.logger_; + static detail::logger_wrapper wrapped{}; + return wrapped.logger_; } // The default is INFO, but it should be used sparingly, so that by default a log file is only diff --git a/include/rmm/mr/device/aligned_resource_adaptor.hpp b/include/rmm/mr/device/aligned_resource_adaptor.hpp index 4e29b90b3..31b23a442 100644 --- a/include/rmm/mr/device/aligned_resource_adaptor.hpp +++ b/include/rmm/mr/device/aligned_resource_adaptor.hpp @@ -55,20 +55,17 @@ class aligned_resource_adaptor final : public device_memory_resource { * @throws `rmm::logic_error` if `allocation_alignment` is not a power of 2 * * @param upstream The resource used for allocating/deallocating device memory. - * @param allocation_alignment The size used for allocation alignment. + * @param alignment The size used for allocation alignment. * @param alignment_threshold Only allocations with a size larger than or equal to this threshold * are aligned. */ - explicit aligned_resource_adaptor( - Upstream* upstream, - std::size_t allocation_alignment = rmm::detail::CUDA_ALLOCATION_ALIGNMENT, - std::size_t alignment_threshold = default_alignment_threshold) - : upstream_{upstream}, - allocation_alignment_{allocation_alignment}, - alignment_threshold_{alignment_threshold} + explicit aligned_resource_adaptor(Upstream* upstream, + std::size_t alignment = rmm::detail::CUDA_ALLOCATION_ALIGNMENT, + std::size_t alignment_threshold = default_alignment_threshold) + : upstream_{upstream}, alignment_{alignment}, alignment_threshold_{alignment_threshold} { RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer."); - RMM_EXPECTS(rmm::detail::is_supported_alignment(allocation_alignment), + RMM_EXPECTS(rmm::detail::is_supported_alignment(alignment), "Allocation alignment is not a power of 2."); } @@ -121,21 +118,21 @@ class aligned_resource_adaptor final : public device_memory_resource { */ void* do_allocate(std::size_t bytes, cuda_stream_view stream) override { - if (allocation_alignment_ == rmm::detail::CUDA_ALLOCATION_ALIGNMENT || - bytes < alignment_threshold_) { + if (alignment_ == rmm::detail::CUDA_ALLOCATION_ALIGNMENT || bytes < alignment_threshold_) { return upstream_->allocate(bytes, stream); - } else { - auto const size = upstream_allocation_size(bytes); - void* pointer = upstream_->allocate(size, stream); - auto const address = reinterpret_cast(pointer); - auto const aligned_address = rmm::detail::align_up(address, allocation_alignment_); - void* aligned_pointer = reinterpret_cast(aligned_address); - if (pointer != aligned_pointer) { - lock_guard lock(mtx_); - pointers_.emplace(aligned_pointer, pointer); - } - return aligned_pointer; } + auto const size = upstream_allocation_size(bytes); + void* pointer = upstream_->allocate(size, stream); + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + auto const address = reinterpret_cast(pointer); + auto const aligned_address = rmm::detail::align_up(address, alignment_); + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast,performance-no-int-to-ptr) + void* aligned_pointer = reinterpret_cast(aligned_address); + if (pointer != aligned_pointer) { + lock_guard lock(mtx_); + pointers_.emplace(aligned_pointer, pointer); + } + return aligned_pointer; } /** @@ -147,21 +144,20 @@ class aligned_resource_adaptor final : public device_memory_resource { * @param bytes Size of the allocation * @param stream Stream on which to perform the deallocation */ - void do_deallocate(void* p, std::size_t bytes, cuda_stream_view stream) override + void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override { - if (allocation_alignment_ == rmm::detail::CUDA_ALLOCATION_ALIGNMENT || - bytes < alignment_threshold_) { - upstream_->deallocate(p, bytes, stream); + if (alignment_ == rmm::detail::CUDA_ALLOCATION_ALIGNMENT || bytes < alignment_threshold_) { + upstream_->deallocate(ptr, bytes, stream); } else { { lock_guard lock(mtx_); - auto const i = pointers_.find(p); - if (i != pointers_.end()) { - p = i->second; - pointers_.erase(i); + auto const iter = pointers_.find(ptr); + if (iter != pointers_.end()) { + ptr = iter->second; + pointers_.erase(iter); } } - upstream_->deallocate(p, upstream_allocation_size(bytes), stream); + upstream_->deallocate(ptr, upstream_allocation_size(bytes), stream); } } @@ -176,14 +172,10 @@ class aligned_resource_adaptor final : public device_memory_resource { */ [[nodiscard]] bool do_is_equal(device_memory_resource const& other) const noexcept override { - if (this == &other) - return true; - else { - auto cast = dynamic_cast const*>(&other); - return cast != nullptr && upstream_->is_equal(*cast->get_upstream()) && - allocation_alignment_ == cast->allocation_alignment_ && - alignment_threshold_ == cast->alignment_threshold_; - } + if (this == &other) { return true; } + auto cast = dynamic_cast const*>(&other); + return cast != nullptr && upstream_->is_equal(*cast->get_upstream()) && + alignment_ == cast->alignment_ && alignment_threshold_ == cast->alignment_threshold_; } /** @@ -211,13 +203,13 @@ class aligned_resource_adaptor final : public device_memory_resource { */ std::size_t upstream_allocation_size(std::size_t bytes) const { - auto const aligned_size = rmm::detail::align_up(bytes, allocation_alignment_); - return aligned_size + allocation_alignment_ - rmm::detail::CUDA_ALLOCATION_ALIGNMENT; + auto const aligned_size = rmm::detail::align_up(bytes, alignment_); + return aligned_size + alignment_ - rmm::detail::CUDA_ALLOCATION_ALIGNMENT; } Upstream* upstream_; ///< The upstream resource used for satisfying allocation requests std::unordered_map pointers_; ///< Map of aligned pointers to upstream pointers. - std::size_t allocation_alignment_; ///< The size used for allocation alignment + std::size_t alignment_; ///< The size used for allocation alignment std::size_t alignment_threshold_; ///< The size above which allocations should be aligned mutable std::mutex mtx_; ///< Mutex for exclusive lock. }; diff --git a/include/rmm/mr/device/arena_memory_resource.hpp b/include/rmm/mr/device/arena_memory_resource.hpp index 63c56c1d8..ce8737225 100644 --- a/include/rmm/mr/device/arena_memory_resource.hpp +++ b/include/rmm/mr/device/arena_memory_resource.hpp @@ -100,9 +100,13 @@ class arena_memory_resource final : public device_memory_resource { } } + ~arena_memory_resource() override = default; + // Disable copy (and move) semantics. arena_memory_resource(arena_memory_resource const&) = delete; arena_memory_resource& operator=(arena_memory_resource const&) = delete; + arena_memory_resource(arena_memory_resource&&) noexcept = delete; + arena_memory_resource& operator=(arena_memory_resource&&) noexcept = delete; /** * @brief Queries whether the resource supports use of non-null CUDA streams for @@ -138,16 +142,16 @@ class arena_memory_resource final : public device_memory_resource { */ void* do_allocate(std::size_t bytes, cuda_stream_view stream) override { - if (bytes <= 0) return nullptr; + if (bytes <= 0) { return nullptr; } bytes = detail::arena::align_up(bytes); - auto& a = get_arena(stream); - void* pointer = a.allocate(bytes); + auto& arena = get_arena(stream); + void* pointer = arena.allocate(bytes); if (pointer == nullptr) { write_lock lock(mtx_); defragment(); - pointer = a.allocate(bytes); + pointer = arena.allocate(bytes); if (pointer == nullptr) { if (dump_log_on_failure_) { dump_memory_log(bytes); } RMM_FAIL("Maximum pool size exceeded", rmm::bad_alloc); @@ -158,19 +162,19 @@ class arena_memory_resource final : public device_memory_resource { } /** - * @brief Deallocate memory pointed to by `p`. + * @brief Deallocate memory pointed to by `ptr`. * - * @param p Pointer to be deallocated. + * @param ptr Pointer to be deallocated. * @param bytes The size in bytes of the allocation. This must be equal to the * value of `bytes` that was passed to the `allocate` call that returned `p`. * @param stream Stream on which to perform deallocation. */ - void do_deallocate(void* p, std::size_t bytes, cuda_stream_view stream) override + void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override { - if (p == nullptr || bytes <= 0) return; + if (ptr == nullptr || bytes <= 0) { return; } bytes = detail::arena::align_up(bytes); - get_arena(stream).deallocate(p, bytes, stream); + get_arena(stream).deallocate(ptr, bytes, stream); } /** @@ -179,11 +183,11 @@ class arena_memory_resource final : public device_memory_resource { void defragment() { RMM_CUDA_TRY(cudaDeviceSynchronize()); - for (auto& kv : thread_arenas_) { - kv.second->clean(); + for (auto& thread_arena : thread_arenas_) { + thread_arena.second->clean(); } - for (auto& kv : stream_arenas_) { - kv.second.clean(); + for (auto& stream_arena : stream_arenas_) { + stream_arena.second.clean(); } } @@ -195,11 +199,8 @@ class arena_memory_resource final : public device_memory_resource { */ arena& get_arena(cuda_stream_view stream) { - if (use_per_thread_arena(stream)) { - return get_thread_arena(); - } else { - return get_stream_arena(stream); - } + if (use_per_thread_arena(stream)) { return get_thread_arena(); } + return get_stream_arena(stream); } /** @@ -209,18 +210,18 @@ class arena_memory_resource final : public device_memory_resource { */ arena& get_thread_arena() { - auto const id = std::this_thread::get_id(); + auto const thread_id = std::this_thread::get_id(); { read_lock lock(mtx_); - auto const it = thread_arenas_.find(id); - if (it != thread_arenas_.end()) { return *it->second; } + auto const iter = thread_arenas_.find(thread_id); + if (iter != thread_arenas_.end()) { return *iter->second; } } { write_lock lock(mtx_); - auto a = std::make_shared(global_arena_); - thread_arenas_.emplace(id, a); - thread_local detail::arena::arena_cleaner cleaner{a}; - return *a; + auto thread_arena = std::make_shared(global_arena_); + thread_arenas_.emplace(thread_id, thread_arena); + thread_local detail::arena::arena_cleaner cleaner{thread_arena}; + return *thread_arena; } } @@ -234,8 +235,8 @@ class arena_memory_resource final : public device_memory_resource { RMM_LOGGING_ASSERT(!use_per_thread_arena(stream)); { read_lock lock(mtx_); - auto const it = stream_arenas_.find(stream.value()); - if (it != stream_arenas_.end()) { return it->second; } + auto const iter = stream_arenas_.find(stream.value()); + if (iter != stream_arenas_.end()) { return iter->second; } } { write_lock lock(mtx_); @@ -268,15 +269,15 @@ class arena_memory_resource final : public device_memory_resource { logger_->info("Global arena:"); global_arena_.dump_memory_log(logger_); logger_->info("Per-thread arenas:"); - for (auto const& t : thread_arenas_) { - logger_->info(" Thread {}:", t.first); - t.second->dump_memory_log(logger_); + for (auto const& thread_arena : thread_arenas_) { + logger_->info(" Thread {}:", thread_arena.first); + thread_arena.second->dump_memory_log(logger_); } if (!stream_arenas_.empty()) { logger_->info("Per-stream arenas:"); - for (auto const& s : stream_arenas_) { - logger_->info(" Stream {}:", static_cast(s.first)); - s.second.dump_memory_log(logger_); + for (auto const& stream_arena : stream_arenas_) { + logger_->info(" Stream {}:", static_cast(stream_arena.first)); + stream_arena.second.dump_memory_log(logger_); } } } diff --git a/include/rmm/mr/device/binning_memory_resource.hpp b/include/rmm/mr/device/binning_memory_resource.hpp index 7b0d9f48f..5d4b156c8 100644 --- a/include/rmm/mr/device/binning_memory_resource.hpp +++ b/include/rmm/mr/device/binning_memory_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,8 +27,7 @@ #include #include -namespace rmm { -namespace mr { +namespace rmm::mr { /** * @brief Allocates memory from upstream resources associated with bin sizes. @@ -70,22 +69,23 @@ class binning_memory_resource final : public device_memory_resource { * @param max_size_exponent The maximum base-2 exponent bin size. */ binning_memory_resource(Upstream* upstream_resource, - int8_t min_size_exponent, + int8_t min_size_exponent, // NOLINT(bugprone-easily-swappable-parameters) int8_t max_size_exponent) : upstream_mr_{[upstream_resource]() { RMM_EXPECTS(nullptr != upstream_resource, "Unexpected null upstream pointer."); return upstream_resource; }()} { - for (auto i = min_size_exponent; i <= max_size_exponent; i++) + for (auto i = min_size_exponent; i <= max_size_exponent; i++) { add_bin(1 << i); + } } /** * @brief Destroy the binning_memory_resource and free all memory allocated from the upstream * resource. */ - ~binning_memory_resource() = default; + ~binning_memory_resource() override = default; binning_memory_resource() = delete; binning_memory_resource(binning_memory_resource const&) = delete; @@ -99,21 +99,21 @@ class binning_memory_resource final : public device_memory_resource { * * @returns true */ - bool supports_streams() const noexcept override { return true; } + [[nodiscard]] bool supports_streams() const noexcept override { return true; } /** * @brief Query whether the resource supports the get_mem_info API. * * @return bool true if the resource supports get_mem_info, false otherwise. */ - bool supports_get_mem_info() const noexcept override { return false; } + [[nodiscard]] bool supports_get_mem_info() const noexcept override { return false; } /** * @brief Get the upstream memory_resource object. * * @return UpstreamResource* the upstream memory resource. */ - Upstream* get_upstream() const noexcept { return upstream_mr_; } + [[nodiscard]] Upstream* get_upstream() const noexcept { return upstream_mr_; } /** * @brief Add a bin allocator to this resource @@ -136,15 +136,13 @@ class binning_memory_resource final : public device_memory_resource { allocation_size = rmm::detail::align_up(allocation_size, rmm::detail::CUDA_ALLOCATION_ALIGNMENT); - if (nullptr != bin_resource) + if (nullptr != bin_resource) { resource_bins_.insert({allocation_size, bin_resource}); - else { - // If the bin already exists, do nothing. - if (resource_bins_.count(allocation_size) == 0) { - owned_bin_resources_.push_back( - std::make_unique>(upstream_mr_, allocation_size)); - resource_bins_.insert({allocation_size, owned_bin_resources_.back().get()}); - } + } else if (resource_bins_.count(allocation_size) == 0) { // do nothing if bin already exists + + owned_bin_resources_.push_back( + std::make_unique>(upstream_mr_, allocation_size)); + resource_bins_.insert({allocation_size, owned_bin_resources_.back().get()}); } } @@ -175,7 +173,7 @@ class binning_memory_resource final : public device_memory_resource { */ void* do_allocate(std::size_t bytes, cuda_stream_view stream) override { - if (bytes <= 0) return nullptr; + if (bytes <= 0) { return nullptr; } return get_resource(bytes)->allocate(bytes, stream); } @@ -189,10 +187,10 @@ class binning_memory_resource final : public device_memory_resource { * value of `bytes` that was passed to the `allocate` call that returned `p`. * @param stream Stream on which to perform deallocation */ - void do_deallocate(void* p, std::size_t bytes, cuda_stream_view stream) override + void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override { auto res = get_resource(bytes); - if (res != nullptr) res->deallocate(p, bytes, stream); + if (res != nullptr) { res->deallocate(ptr, bytes, stream); } } /** @@ -203,7 +201,8 @@ class binning_memory_resource final : public device_memory_resource { * @param stream the stream being executed on * @return std::pair with available and free memory for resource */ - std::pair do_get_mem_info(cuda_stream_view stream) const override + [[nodiscard]] std::pair do_get_mem_info( + cuda_stream_view stream) const override { return std::make_pair(0, 0); } @@ -215,5 +214,4 @@ class binning_memory_resource final : public device_memory_resource { std::map resource_bins_; }; -} // namespace mr -} // namespace rmm +} // namespace rmm::mr diff --git a/include/rmm/mr/device/cuda_async_memory_resource.hpp b/include/rmm/mr/device/cuda_async_memory_resource.hpp index 57ec97ff8..9111a2da3 100644 --- a/include/rmm/mr/device/cuda_async_memory_resource.hpp +++ b/include/rmm/mr/device/cuda_async_memory_resource.hpp @@ -32,8 +32,7 @@ #define RMM_CUDA_MALLOC_ASYNC_SUPPORT #endif -namespace rmm { -namespace mr { +namespace rmm::mr { /** * @brief `device_memory_resource` derived class that uses `cudaMallocAsync`/`cudaFreeAsync` for @@ -55,6 +54,7 @@ class cuda_async_memory_resource final : public device_memory_resource { * @param release_threshold Optional release threshold size in bytes of the pool. If no value is * provided, the release threshold is set to the total amount of memory on the current device. */ + // NOLINTNEXTLINE(bugprone-easily-swappable-parameters) cuda_async_memory_resource(thrust::optional initial_pool_size = {}, thrust::optional release_threshold = {}) { @@ -62,9 +62,9 @@ class cuda_async_memory_resource final : public device_memory_resource { // Check if cudaMallocAsync Memory pool supported auto const device = rmm::detail::current_device(); int cuda_pool_supported{}; - auto e = + auto result = cudaDeviceGetAttribute(&cuda_pool_supported, cudaDevAttrMemoryPoolsSupported, device.value()); - RMM_EXPECTS(e == cudaSuccess && cuda_pool_supported, + RMM_EXPECTS(result == cudaSuccess && cuda_pool_supported, "cudaMallocAsync not supported with this CUDA driver/runtime version"); // Construct explicit pool @@ -84,9 +84,9 @@ class cuda_async_memory_resource final : public device_memory_resource { // Allocate and immediately deallocate the initial_pool_size to prime the pool with the // specified size - auto const pool_size = initial_pool_size.value_or(free * 0.5); - auto p = do_allocate(pool_size, cuda_stream_default); - do_deallocate(p, pool_size, cuda_stream_default); + auto const pool_size = initial_pool_size.value_or(free / 2); + auto* ptr = do_allocate(pool_size, cuda_stream_default); + do_deallocate(ptr, pool_size, cuda_stream_default); #else RMM_FAIL( @@ -99,10 +99,10 @@ class cuda_async_memory_resource final : public device_memory_resource { * @brief Returns the underlying native handle to the CUDA pool * */ - cudaMemPool_t pool_handle() const noexcept { return cuda_pool_handle_; } + [[nodiscard]] cudaMemPool_t pool_handle() const noexcept { return cuda_pool_handle_; } #endif - ~cuda_async_memory_resource() + ~cuda_async_memory_resource() override { #if defined(RMM_CUDA_MALLOC_ASYNC_SUPPORT) RMM_ASSERT_CUDA_SUCCESS(cudaMemPoolDestroy(pool_handle())); @@ -119,18 +119,18 @@ class cuda_async_memory_resource final : public device_memory_resource { * * @returns bool true */ - bool supports_streams() const noexcept override { return true; } + [[nodiscard]] bool supports_streams() const noexcept override { return true; } /** * @brief Query whether the resource supports the get_mem_info API. * * @return true */ - bool supports_get_mem_info() const noexcept override { return false; } + [[nodiscard]] bool supports_get_mem_info() const noexcept override { return false; } private: #ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT - cudaMemPool_t cuda_pool_handle_; + cudaMemPool_t cuda_pool_handle_{}; #endif /** @@ -145,17 +145,17 @@ class cuda_async_memory_resource final : public device_memory_resource { */ void* do_allocate(std::size_t bytes, rmm::cuda_stream_view stream) override { - void* p{nullptr}; + void* ptr{nullptr}; #ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT if (bytes > 0) { - RMM_CUDA_TRY(cudaMallocFromPoolAsync(&p, bytes, pool_handle(), stream.value()), + RMM_CUDA_TRY(cudaMallocFromPoolAsync(&ptr, bytes, pool_handle(), stream.value()), rmm::bad_alloc); } #else (void)bytes; (void)stream; #endif - return p; + return ptr; } /** @@ -165,12 +165,12 @@ class cuda_async_memory_resource final : public device_memory_resource { * * @param p Pointer to be deallocated */ - void do_deallocate(void* p, std::size_t, rmm::cuda_stream_view stream) override + void do_deallocate(void* ptr, std::size_t, rmm::cuda_stream_view stream) override { #ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT - if (p != nullptr) { RMM_ASSERT_CUDA_SUCCESS(cudaFreeAsync(p, stream.value())); } + if (ptr != nullptr) { RMM_ASSERT_CUDA_SUCCESS(cudaFreeAsync(ptr, stream.value())); } #else - (void)p; + (void)ptr; (void)stream; #endif } @@ -184,7 +184,7 @@ class cuda_async_memory_resource final : public device_memory_resource { * @return true If the two resources are equivalent * @return false If the two resources are not equal */ - bool do_is_equal(device_memory_resource const& other) const noexcept override + [[nodiscard]] bool do_is_equal(device_memory_resource const& other) const noexcept override { return dynamic_cast(&other) != nullptr; } @@ -196,11 +196,11 @@ class cuda_async_memory_resource final : public device_memory_resource { * * @return std::pair contaiing free_size and total_size of memory */ - std::pair do_get_mem_info(rmm::cuda_stream_view) const override + [[nodiscard]] std::pair do_get_mem_info( + rmm::cuda_stream_view) const override { return std::make_pair(0, 0); } }; -} // namespace mr -} // namespace rmm +} // namespace rmm::mr diff --git a/include/rmm/mr/device/cuda_memory_resource.hpp b/include/rmm/mr/device/cuda_memory_resource.hpp index d419ce335..b5b3d87df 100644 --- a/include/rmm/mr/device/cuda_memory_resource.hpp +++ b/include/rmm/mr/device/cuda_memory_resource.hpp @@ -22,8 +22,7 @@ #include -namespace rmm { -namespace mr { +namespace rmm::mr { /** * @brief `device_memory_resource` derived class that uses cudaMalloc/Free for * allocation/deallocation. @@ -31,7 +30,7 @@ namespace mr { class cuda_memory_resource final : public device_memory_resource { public: cuda_memory_resource() = default; - ~cuda_memory_resource() = default; + ~cuda_memory_resource() override = default; cuda_memory_resource(cuda_memory_resource const&) = default; cuda_memory_resource(cuda_memory_resource&&) = default; cuda_memory_resource& operator=(cuda_memory_resource const&) = default; @@ -43,14 +42,14 @@ class cuda_memory_resource final : public device_memory_resource { * * @returns bool false */ - bool supports_streams() const noexcept override { return false; } + [[nodiscard]] bool supports_streams() const noexcept override { return false; } /** * @brief Query whether the resource supports the get_mem_info API. * * @return true */ - bool supports_get_mem_info() const noexcept override { return true; } + [[nodiscard]] bool supports_get_mem_info() const noexcept override { return true; } private: /** @@ -67,9 +66,9 @@ class cuda_memory_resource final : public device_memory_resource { */ void* do_allocate(std::size_t bytes, cuda_stream_view) override { - void* p{nullptr}; - RMM_CUDA_TRY(cudaMalloc(&p, bytes), rmm::bad_alloc); - return p; + void* ptr{nullptr}; + RMM_CUDA_TRY(cudaMalloc(&ptr, bytes), rmm::bad_alloc); + return ptr; } /** @@ -81,9 +80,9 @@ class cuda_memory_resource final : public device_memory_resource { * * @param p Pointer to be deallocated */ - void do_deallocate(void* p, std::size_t, cuda_stream_view) override + void do_deallocate(void* ptr, std::size_t, cuda_stream_view) override { - RMM_ASSERT_CUDA_SUCCESS(cudaFree(p)); + RMM_ASSERT_CUDA_SUCCESS(cudaFree(ptr)); } /** @@ -98,7 +97,7 @@ class cuda_memory_resource final : public device_memory_resource { * @return true If the two resources are equivalent * @return false If the two resources are not equal */ - bool do_is_equal(device_memory_resource const& other) const noexcept override + [[nodiscard]] bool do_is_equal(device_memory_resource const& other) const noexcept override { return dynamic_cast(&other) != nullptr; } @@ -110,13 +109,12 @@ class cuda_memory_resource final : public device_memory_resource { * * @return std::pair contaiing free_size and total_size of memory */ - std::pair do_get_mem_info(cuda_stream_view) const override + [[nodiscard]] std::pair do_get_mem_info(cuda_stream_view) const override { - std::size_t free_size; - std::size_t total_size; + std::size_t free_size{}; + std::size_t total_size{}; RMM_CUDA_TRY(cudaMemGetInfo(&free_size, &total_size)); return std::make_pair(free_size, total_size); } }; -} // namespace mr -} // namespace rmm +} // namespace rmm::mr diff --git a/include/rmm/mr/device/detail/arena.hpp b/include/rmm/mr/device/detail/arena.hpp index e03a6ee20..6cfe94058 100644 --- a/include/rmm/mr/device/detail/arena.hpp +++ b/include/rmm/mr/device/detail/arena.hpp @@ -38,7 +38,7 @@ namespace rmm::mr::detail::arena { /// Minimum size of a superblock (256 KiB). -constexpr std::size_t minimum_superblock_size = 1u << 18u; +constexpr std::size_t minimum_superblock_size = 1U << 18U; /** * @brief Represents a chunk of memory that can be allocated and deallocated. @@ -87,33 +87,32 @@ class block { * @return true Returns true if this block's `pointer` + `size` == `b.ptr`, and `not b.is_head`, false otherwise. */ - [[nodiscard]] bool is_contiguous_before(block const& b) const + [[nodiscard]] bool is_contiguous_before(block const& blk) const { - return pointer_ + size_ == b.pointer_; + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) + return pointer_ + size_ == blk.pointer_; } /** * @brief Is this block large enough to fit `sz` bytes? * - * @param sz The size in bytes to check for fit. + * @param size The size in bytes to check for fit. * @return true if this block is at least `sz` bytes. */ - [[nodiscard]] bool fits(std::size_t sz) const { return size_ >= sz; } + [[nodiscard]] bool fits(std::size_t size) const { return size_ >= size; } /** * @brief Split this block into two by the given size. * - * @param sz The size in bytes of the first block. + * @param size The size in bytes of the first block. * @return std::pair A pair of blocks split by sz. */ - [[nodiscard]] std::pair split(std::size_t sz) const + [[nodiscard]] std::pair split(std::size_t size) const { RMM_LOGGING_ASSERT(size_ >= sz); - if (size_ > sz) { - return {{pointer_, sz}, {pointer_ + sz, size_ - sz}}; - } else { - return {*this, {}}; - } + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) + if (size_ > size) { return {{pointer_, size}, {pointer_ + size, size_ - size}}; } + return {*this, {}}; } /** @@ -124,21 +123,21 @@ class block { * @param b block to merge. * @return block The merged block. */ - [[nodiscard]] block merge(block const& b) const + [[nodiscard]] block merge(block const& blk) const { RMM_LOGGING_ASSERT(is_contiguous_before(b)); - return {pointer_, size_ + b.size_}; + return {pointer_, size_ + blk.size_}; } /// Used by std::set to compare blocks. - bool operator<(block const& b) const { return pointer_ < b.pointer_; } + bool operator<(block const& blk) const { return pointer_ < blk.pointer_; } private: char* pointer_{}; ///< Raw memory pointer. std::size_t size_{}; ///< Size in bytes. }; -inline bool block_size_compare(block a, block b) { return a.size() < b.size(); } +inline bool block_size_compare(block lhs, block rhs) { return lhs.size() < rhs.size(); } /** * @brief Align up to the allocation alignment. @@ -146,9 +145,9 @@ inline bool block_size_compare(block a, block b) { return a.size() < b.size(); } * @param[in] v value to align * @return Return the aligned value */ -constexpr std::size_t align_up(std::size_t v) noexcept +constexpr std::size_t align_up(std::size_t value) noexcept { - return rmm::detail::align_up(v, rmm::detail::CUDA_ALLOCATION_ALIGNMENT); + return rmm::detail::align_up(value, rmm::detail::CUDA_ALLOCATION_ALIGNMENT); } /** @@ -157,9 +156,9 @@ constexpr std::size_t align_up(std::size_t v) noexcept * @param[in] v value to align * @return Return the aligned value */ -constexpr std::size_t align_down(std::size_t v) noexcept +constexpr std::size_t align_down(std::size_t value) noexcept { - return rmm::detail::align_down(v, rmm::detail::CUDA_ALLOCATION_ALIGNMENT); + return rmm::detail::align_down(value, rmm::detail::CUDA_ALLOCATION_ALIGNMENT); } /** @@ -179,24 +178,20 @@ constexpr std::size_t align_down(std::size_t v) noexcept inline block first_fit(std::set& free_blocks, std::size_t size) { auto const iter = std::find_if( - free_blocks.cbegin(), free_blocks.cend(), [size](auto const& b) { return b.fits(size); }); - - if (iter == free_blocks.cend()) { - return {}; - } else { - // Remove the block from the free_list. - auto const b = *iter; - auto const i = free_blocks.erase(iter); - - if (b.size() > size) { - // Split the block and put the remainder back. - auto const split = b.split(size); - free_blocks.insert(i, split.second); - return split.first; - } else { - return b; - } + free_blocks.cbegin(), free_blocks.cend(), [size](auto const& blk) { return blk.fits(size); }); + + if (iter == free_blocks.cend()) { return {}; } + // Remove the block from the free_list. + auto const blk = *iter; + auto const next = free_blocks.erase(iter); + + if (blk.size() > size) { + // Split the block and put the remainder back. + auto const split = blk.split(size); + free_blocks.insert(next, split.second); + return split.first; } + return blk; } /** @@ -206,35 +201,35 @@ inline block first_fit(std::set& free_blocks, std::size_t size) * @param b The block to coalesce. * @return block The coalesced block. */ -inline block coalesce_block(std::set& free_blocks, block const& b) +inline block coalesce_block(std::set& free_blocks, block const& blk) { - if (!b.is_valid()) return b; + if (!blk.is_valid()) { return blk; } // Find the right place (in ascending address order) to insert the block. - auto const next = free_blocks.lower_bound(b); + auto const next = free_blocks.lower_bound(blk); auto const previous = next == free_blocks.cbegin() ? next : std::prev(next); // Coalesce with neighboring blocks. - bool const merge_prev = previous->is_contiguous_before(b); - bool const merge_next = next != free_blocks.cend() && b.is_contiguous_before(*next); + bool const merge_prev = previous->is_contiguous_before(blk); + bool const merge_next = next != free_blocks.cend() && blk.is_contiguous_before(*next); block merged{}; if (merge_prev && merge_next) { - merged = previous->merge(b).merge(*next); + merged = previous->merge(blk).merge(*next); free_blocks.erase(previous); - auto const i = free_blocks.erase(next); - free_blocks.insert(i, merged); + auto const iter = free_blocks.erase(next); + free_blocks.insert(iter, merged); } else if (merge_prev) { - merged = previous->merge(b); - auto const i = free_blocks.erase(previous); - free_blocks.insert(i, merged); + merged = previous->merge(blk); + auto const iter = free_blocks.erase(previous); + free_blocks.insert(iter, merged); } else if (merge_next) { - merged = b.merge(*next); - auto const i = free_blocks.erase(next); - free_blocks.insert(i, merged); + merged = blk.merge(*next); + auto const iter = free_blocks.erase(next); + free_blocks.insert(iter, merged); } else { - free_blocks.emplace(b); - merged = b; + free_blocks.emplace(blk); + merged = blk; } return merged; } @@ -242,8 +237,9 @@ inline block coalesce_block(std::set& free_blocks, block const& b) template inline auto total_block_size(T const& blocks) { - return std::accumulate( - blocks.cbegin(), blocks.cend(), std::size_t{}, [](auto a, auto b) { return a + b.size(); }); + return std::accumulate(blocks.cbegin(), blocks.cend(), std::size_t{}, [](auto lhs, auto rhs) { + return lhs + rhs.size(); + }); } /** @@ -262,7 +258,7 @@ class global_arena final { /// The default maximum size for the global arena. static constexpr std::size_t default_maximum_size = std::numeric_limits::max(); /// Reserved memory that should not be allocated (64 MiB). - static constexpr std::size_t reserved_size = 1u << 26u; + static constexpr std::size_t reserved_size = 1U << 26U; /** * @brief Construct a global arena. @@ -289,7 +285,8 @@ class global_arena final { "Error, Maximum arena size required to be a multiple of 256 bytes"); if (initial_size == default_initial_size || maximum_size == default_maximum_size) { - std::size_t free{}, total{}; + std::size_t free{}; + std::size_t total{}; RMM_CUDA_TRY(cudaMemGetInfo(&free, &total)); if (initial_size == default_initial_size) { initial_size = align_up(std::min(free, total / 2)); @@ -304,8 +301,10 @@ class global_arena final { } // Disable copy (and move) semantics. - global_arena(const global_arena&) = delete; - global_arena& operator=(const global_arena&) = delete; + global_arena(global_arena const&) = delete; + global_arena& operator=(global_arena const&) = delete; + global_arena(global_arena&&) noexcept = delete; + global_arena& operator=(global_arena&&) noexcept = delete; /** * @brief Destroy the global arena and deallocate all memory it allocated using the upstream @@ -314,8 +313,8 @@ class global_arena final { ~global_arena() { lock_guard lock(mtx_); - for (auto const& b : upstream_blocks_) { - upstream_mr_->deallocate(b.pointer(), b.size()); + for (auto const& blk : upstream_blocks_) { + upstream_mr_->deallocate(blk.pointer(), blk.size()); } } @@ -334,16 +333,14 @@ class global_arena final { } /** - * @brief Deallocate memory pointed to by `p`. + * @brief Deallocate memory pointed to by `blk`. * - * @param p Pointer to be deallocated. - * @param bytes The size in bytes of the allocation. This must be equal to the value of `bytes` - * that was passed to the `allocate` call that returned `p`. + * @param blk Block to be deallocated. */ - void deallocate(block const& b) + void deallocate(block const& blk) { lock_guard lock(mtx_); - coalesce_block(free_blocks_, b); + coalesce_block(free_blocks_, blk); } /** @@ -354,8 +351,8 @@ class global_arena final { void deallocate(std::set const& free_blocks) { lock_guard lock(mtx_); - for (auto const& b : free_blocks) { - coalesce_block(free_blocks_, b); + for (auto const& blk : free_blocks) { + coalesce_block(free_blocks_, blk); } } @@ -375,9 +372,9 @@ class global_arena final { if (!free_blocks_.empty()) { logger->info(" Total size of free blocks: {}", rmm::detail::bytes{total_block_size(free_blocks_)}); - auto const m = + auto const largest_free = *std::max_element(free_blocks_.begin(), free_blocks_.end(), block_size_compare); - logger->info(" Size of largest free block: {}", rmm::detail::bytes{m.size()}); + logger->info(" Size of largest free block: {}", rmm::detail::bytes{largest_free.size()}); } logger->info(" # upstream blocks={}", upstream_blocks_.size()); @@ -397,8 +394,8 @@ class global_arena final { block get_block(std::size_t size) { // Find the first-fit free block. - auto const b = first_fit(free_blocks_, size); - if (b.is_valid()) return b; + auto const blk = first_fit(free_blocks_, size); + if (blk.is_valid()) { return blk; } // No existing larger blocks available, so grow the arena. auto const upstream_block = expand_arena(size_to_grow(size)); @@ -416,11 +413,8 @@ class global_arena final { */ constexpr std::size_t size_to_grow(std::size_t size) const { - if (current_size_ + size > maximum_size_) { - return 0; - } else { - return maximum_size_ - current_size_; - } + if (current_size_ + size > maximum_size_) { return 0; } + return maximum_size_ - current_size_; } /** @@ -435,9 +429,8 @@ class global_arena final { upstream_blocks_.push_back({upstream_mr_->allocate(size), size}); current_size_ += size; return upstream_blocks_.back(); - } else { - return {}; } + return {}; } /// The upstream resource to allocate memory from. @@ -473,9 +466,13 @@ class arena { */ explicit arena(global_arena& global_arena) : global_arena_{global_arena} {} + ~arena() = default; + // Disable copy (and move) semantics. - arena(const arena&) = delete; - arena& operator=(const arena&) = delete; + arena(arena const&) = delete; + arena& operator=(arena const&) = delete; + arena(arena&&) noexcept = delete; + arena& operator=(arena&&) noexcept = delete; /** * @brief Allocates memory of size at least `bytes`. @@ -488,23 +485,23 @@ class arena { void* allocate(std::size_t bytes) { lock_guard lock(mtx_); - auto const b = get_block(bytes); - return b.pointer(); + auto const blk = get_block(bytes); + return blk.pointer(); } /** - * @brief Deallocate memory pointed to by `p`, and possibly return superblocks to upstream. + * @brief Deallocate memory pointed to by `ptr`, and possibly return superblocks to upstream. * - * @param p Pointer to be deallocated. + * @param ptr Pointer to be deallocated. * @param bytes The size in bytes of the allocation. This must be equal to the value of `bytes` * that was passed to the `allocate` call that returned `p`. * @param stream Stream on which to perform deallocation. */ - void deallocate(void* p, std::size_t bytes, cuda_stream_view stream) + void deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) { lock_guard lock(mtx_); - block const b{p, bytes}; - auto const merged = coalesce_block(free_blocks_, b); + block const blk{ptr, bytes}; + auto const merged = coalesce_block(free_blocks_, blk); shrink_arena(merged, stream); } @@ -528,15 +525,11 @@ class arena { lock_guard lock(mtx_); logger->info(" # free blocks: {}", free_blocks_.size()); if (!free_blocks_.empty()) { - std::size_t total_free{}; - for (auto const& b : free_blocks_) { - total_free += b.size(); - } logger->info(" Total size of free blocks: {}", rmm::detail::bytes{total_block_size(free_blocks_)}); - auto const m = + auto const largest_free = *std::max_element(free_blocks_.begin(), free_blocks_.end(), block_size_compare); - logger->info(" Size of largest free block: {}", rmm::detail::bytes{m.size()}); + logger->info(" Size of largest free block: {}", rmm::detail::bytes{largest_free.size()}); } } @@ -549,14 +542,14 @@ class arena { * @brief Get an available memory block of at least `size` bytes. * * @param size The number of bytes to allocate. - * @return block A block of memory of at least `size` bytes. + * @return A block of memory of at least `size` bytes. */ block get_block(std::size_t size) { if (size < minimum_superblock_size) { // Find the first-fit free block. - auto const b = first_fit(free_blocks_, size); - if (b.is_valid()) { return b; } + auto const blk = first_fit(free_blocks_, size); + if (blk.is_valid()) { return blk; } } // No existing larger blocks available, so grow the arena and obtain a superblock. @@ -564,15 +557,14 @@ class arena { if (superblock.is_valid()) { coalesce_block(free_blocks_, superblock); return first_fit(free_blocks_, size); - } else { - return superblock; } + return superblock; } /** * @brief Allocate space from upstream to supply the arena and return a superblock. * - * @return block A superblock. + * @return A superblock. */ block expand_arena(std::size_t size) { @@ -583,15 +575,15 @@ class arena { /** * @brief Shrink this arena by returning free superblocks to upstream. * - * @param b The block that can be used to shrink the arena. + * @param blk The block that can be used to shrink the arena. * @param stream Stream on which to perform shrinking. */ - void shrink_arena(block const& b, cuda_stream_view stream) + void shrink_arena(block const& blk, cuda_stream_view stream) { - if (b.is_superblock() || free_blocks_.size() > max_free_blocks) { + if (blk.is_superblock() || free_blocks_.size() > max_free_blocks) { stream.synchronize_no_throw(); - global_arena_.deallocate(b); - free_blocks_.erase(b); + global_arena_.deallocate(blk); + free_blocks_.erase(blk); } } @@ -614,11 +606,13 @@ class arena { template class arena_cleaner { public: - explicit arena_cleaner(std::shared_ptr> const& a) : arena_(a) {} + explicit arena_cleaner(std::shared_ptr> const& arena) : arena_(arena) {} // Disable copy (and move) semantics. - arena_cleaner(const arena_cleaner&) = delete; - arena_cleaner& operator=(const arena_cleaner&) = delete; + arena_cleaner(arena_cleaner const&) = delete; + arena_cleaner& operator=(arena_cleaner const&) = delete; + arena_cleaner(arena_cleaner&&) noexcept = delete; + arena_cleaner& operator=(arena_cleaner&&) = delete; ~arena_cleaner() { diff --git a/include/rmm/mr/device/detail/coalescing_free_list.hpp b/include/rmm/mr/device/detail/coalescing_free_list.hpp index d0c0f399e..bbdd98ec9 100644 --- a/include/rmm/mr/device/detail/coalescing_free_list.hpp +++ b/include/rmm/mr/device/detail/coalescing_free_list.hpp @@ -25,9 +25,7 @@ #include #include -namespace rmm { -namespace mr { -namespace detail { +namespace rmm::mr::detail { /** * @brief A simple block structure specifying the size and location of a block @@ -46,14 +44,14 @@ struct block : public block_base { * * @return the pointer to the memory represented by this block. */ - inline char* pointer() const { return static_cast(ptr); } + [[nodiscard]] inline char* pointer() const { return static_cast(ptr); } /** * @brief Returns the size of the memory represented by this block. * * @return the size in bytes of the memory represented by this block. */ - inline std::size_t size() const { return size_bytes; } + [[nodiscard]] inline std::size_t size() const { return size_bytes; } /** * @brief Returns whether this block is the start of an allocation from an upstream allocator. @@ -62,7 +60,7 @@ struct block : public block_base { * * @return true if this block is the start of an allocation from an upstream allocator. */ - inline bool is_head() const { return head; } + [[nodiscard]] inline bool is_head() const { return head; } /** * @brief Comparison operator to enable comparing blocks and storing in ordered containers. @@ -81,46 +79,47 @@ struct block : public block_base { * `this` must immediately precede `b` and both `this` and `b` must be from the same upstream * allocation. That is, `this->is_contiguous_before(b)`. Otherwise behavior is undefined. * - * @param b block to merge - * @return block The merged block + * @param blk block to merge + * @return The merged block */ - inline block merge(block const& b) const noexcept + [[nodiscard]] inline block merge(block const& blk) const noexcept { assert(is_contiguous_before(b)); - return block(pointer(), size() + b.size(), is_head()); + return {pointer(), size() + blk.size(), is_head()}; } /** * @brief Verifies whether this block can be merged to the beginning of block b. * - * @param b The block to check for contiguity. - * @return true Returns true if this blocks's `ptr` + `size` == `b.ptr`, and `not b.is_head`, - false otherwise. + * @param blk The block to check for contiguity. + * @return Returns true if this blocks's `ptr` + `size` == `b.ptr`, and `not b.is_head`, + false otherwise. */ - inline bool is_contiguous_before(block const& b) const noexcept + [[nodiscard]] inline bool is_contiguous_before(block const& blk) const noexcept { - return (pointer() + size() == b.ptr) and not(b.is_head()); + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) + return (pointer() + size() == blk.ptr) and not(blk.is_head()); } /** * @brief Is this block large enough to fit `sz` bytes? * - * @param sz The size in bytes to check for fit. - * @return true if this block is at least `sz` bytes + * @param bytes The size in bytes to check for fit. + * @return true if this block is at least `bytes` bytes */ - inline bool fits(std::size_t sz) const noexcept { return size() >= sz; } + [[nodiscard]] inline bool fits(std::size_t bytes) const noexcept { return size() >= bytes; } /** * @brief Is this block a better fit for `sz` bytes than block `b`? * - * @param sz The size in bytes to check for best fit. - * @param b The other block to check for fit. - * @return true If this block is a tighter fit for `sz` bytes than block `b`. - * @return false If this block does not fit `sz` bytes or `b` is a tighter fit. + * @param bytes The size in bytes to check for best fit. + * @param blk The other block to check for fit. + * @return true If this block is a tighter fit for `bytes` bytes than block `blk`. + * @return false If this block does not fit `bytes` bytes or `blk` is a tighter fit. */ - inline bool is_better_fit(std::size_t sz, block const& b) const noexcept + [[nodiscard]] inline bool is_better_fit(std::size_t bytes, block const& blk) const noexcept { - return fits(sz) && (size() < b.size() || b.size() < sz); + return fits(bytes) && (size() < blk.size() || blk.size() < bytes); } /** @@ -128,7 +127,7 @@ struct block : public block_base { */ inline void print() const { - std::cout << reinterpret_cast(pointer()) << " " << size() << " B\n"; + std::cout << fmt::format("{} {} B", fmt::ptr(pointer()), size()) << std::endl; } private: @@ -137,9 +136,9 @@ struct block : public block_base { }; /// Print block on an ostream -inline std::ostream& operator<<(std::ostream& out, const block& b) +inline std::ostream& operator<<(std::ostream& out, const block& blk) { - out << b.pointer() << " " << b.size() << " B\n"; + out << fmt::format("{} {} B\n", fmt::ptr(blk.pointer()), blk.size()); return out; } @@ -166,8 +165,8 @@ struct compare_blocks { * @tparam list_type the type of the internal list data structure. */ struct coalescing_free_list : free_list { - coalescing_free_list() = default; - ~coalescing_free_list() = default; + coalescing_free_list() = default; + ~coalescing_free_list() override = default; coalescing_free_list(coalescing_free_list const&) = delete; coalescing_free_list& operator=(coalescing_free_list const&) = delete; @@ -180,47 +179,47 @@ struct coalescing_free_list : free_list { * * @param b The block to insert. */ - void insert(block_type const& b) + void insert(block_type const& block) { if (is_empty()) { - free_list::insert(cend(), b); + free_list::insert(cend(), block); return; } // Find the right place (in ascending ptr order) to insert the block // Can't use binary_search because it's a linked list and will be quadratic - auto const next = std::find_if(begin(), end(), [b](block_type const& i) { return b < i; }); + auto const next = + std::find_if(begin(), end(), [block](block_type const& blk) { return block < blk; }); auto const previous = (next == cbegin()) ? next : std::prev(next); // Coalesce with neighboring blocks or insert the new block if it can't be coalesced - bool const merge_prev = previous->is_contiguous_before(b); - bool const merge_next = (next != cend()) && b.is_contiguous_before(*next); + bool const merge_prev = previous->is_contiguous_before(block); + bool const merge_next = (next != cend()) && block.is_contiguous_before(*next); if (merge_prev && merge_next) { - *previous = previous->merge(b).merge(*next); + *previous = previous->merge(block).merge(*next); erase(next); } else if (merge_prev) { - *previous = previous->merge(b); + *previous = previous->merge(block); } else if (merge_next) { - *next = b.merge(*next); + *next = block.merge(*next); } else { - free_list::insert(next, b); // cannot be coalesced, just insert + free_list::insert(next, block); // cannot be coalesced, just insert } } /** - * @brief Moves blocks from range `[first, last)` into the free_list in their correct order, + * @brief Moves blocks from free_list `other` into this free_list in their correct order, * coalescing them with their preceding and following blocks if they are contiguous. * * @tparam InputIt iterator type - * @param first The beginning of the range of blocks to insert - * @param last The end of the range of blocks to insert. + * @param other free_list of blocks to insert */ void insert(free_list&& other) { std::for_each(std::make_move_iterator(other.begin()), std::make_move_iterator(other.end()), - [this](block_type&& b) { this->insert(std::move(b)); }); + [this](block_type&& block) { this->insert(block); }); } /** @@ -229,7 +228,7 @@ struct coalescing_free_list : free_list { * This is a "best fit" search. * * @param size The size in bytes of the desired block. - * @return block A block large enough to store `size` bytes. + * @return A block large enough to store `size` bytes. */ block_type get_block(std::size_t size) { @@ -259,6 +258,4 @@ struct coalescing_free_list : free_list { } }; // coalescing_free_list -} // namespace detail -} // namespace mr -} // namespace rmm +} // namespace rmm::mr::detail diff --git a/include/rmm/mr/device/detail/fixed_size_free_list.hpp b/include/rmm/mr/device/detail/fixed_size_free_list.hpp index a7794c9b2..1ca1656b0 100644 --- a/include/rmm/mr/device/detail/fixed_size_free_list.hpp +++ b/include/rmm/mr/device/detail/fixed_size_free_list.hpp @@ -21,13 +21,11 @@ #include #include -namespace rmm { -namespace mr { -namespace detail { +namespace rmm::mr::detail { struct fixed_size_free_list : free_list { - fixed_size_free_list() = default; - ~fixed_size_free_list() = default; + fixed_size_free_list() = default; + ~fixed_size_free_list() override = default; fixed_size_free_list(fixed_size_free_list const&) = delete; fixed_size_free_list& operator=(fixed_size_free_list const&) = delete; @@ -44,22 +42,21 @@ struct fixed_size_free_list : free_list { template fixed_size_free_list(InputIt first, InputIt last) { - std::for_each(first, last, [this](block_type const& b) { insert(b); }); + std::for_each(first, last, [this](block_type const& block) { insert(block); }); } /** * @brief Inserts a block into the `free_list` in the correct order, coalescing it with the * preceding and following blocks if either is contiguous. * - * @param b The block to insert. + * @param block The block to insert. */ - void insert(block_type const& b) { push_back(b); } + void insert(block_type const& block) { push_back(block); } /** - * @brief Splices blocks from range `[first, last)` onto the free_list. + * @brief Inserts blocks from another free list into this free_list. * - * @param first The beginning of the range of blocks to insert - * @param last The end of the range of blocks to insert. + * @param other The free_list to insert into this free_list. */ void insert(free_list&& other) { splice(cend(), std::move(other)); } @@ -67,20 +64,15 @@ struct fixed_size_free_list : free_list { * @brief Returns the first block in the free list. * * @param size The size in bytes of the desired block (unused). - * @return block A block large enough to store `size` bytes. + * @return A block large enough to store `size` bytes. */ block_type get_block(std::size_t size) { - if (is_empty()) - return block_type{}; - else { - block_type b = *begin(); - pop_front(); - return b; - } + if (is_empty()) { return block_type{}; } + block_type block = *begin(); + pop_front(); + return block; } }; -} // namespace detail -} // namespace mr -} // namespace rmm +} // namespace rmm::mr::detail diff --git a/include/rmm/mr/device/detail/free_list.hpp b/include/rmm/mr/device/detail/free_list.hpp index e6f4effc0..1e8623431 100644 --- a/include/rmm/mr/device/detail/free_list.hpp +++ b/include/rmm/mr/device/detail/free_list.hpp @@ -20,25 +20,26 @@ #include #include -namespace rmm { -namespace mr { -namespace detail { +namespace rmm::mr::detail { struct block_base { void* ptr{}; ///< Raw memory pointer + block_base() = default; + block_base(void* ptr) : ptr{ptr} {}; + /// Returns the raw pointer for this block - inline void* pointer() const { return ptr; } + [[nodiscard]] inline void* pointer() const { return ptr; } /// Returns true if this block is valid (non-null), false otherwise - inline bool is_valid() const { return pointer() != nullptr; } + [[nodiscard]] inline bool is_valid() const { return pointer() != nullptr; } /// Prints the block to stdout inline void print() const { std::cout << pointer(); } }; /// Print block_base on an ostream -inline std::ostream& operator<<(std::ostream& out, const block_base& b) +inline std::ostream& operator<<(std::ostream& out, const block_base& block) { - out << b.pointer(); + out << block.pointer(); return out; } @@ -72,20 +73,26 @@ class free_list { using iterator = typename list_type::iterator; using const_iterator = typename list_type::const_iterator; - iterator begin() noexcept { return blocks.begin(); } /// beginning of the free list - const_iterator begin() const noexcept { return blocks.begin(); } /// beginning of the free list - const_iterator cbegin() const noexcept { return blocks.cbegin(); } /// beginning of the free list + /// beginning of the free list + [[nodiscard]] iterator begin() noexcept { return blocks.begin(); } + /// beginning of the free list + [[nodiscard]] const_iterator begin() const noexcept { return blocks.begin(); } + /// beginning of the free list + [[nodiscard]] const_iterator cbegin() const noexcept { return blocks.cbegin(); } - iterator end() noexcept { return blocks.end(); } /// end of the free list - const_iterator end() const noexcept { return blocks.end(); } /// end of the free list - const_iterator cend() const noexcept { return blocks.cend(); } /// end of the free list + /// end of the free list + [[nodiscard]] iterator end() noexcept { return blocks.end(); } + /// beginning of the free list + [[nodiscard]] const_iterator end() const noexcept { return blocks.end(); } + /// beginning of the free list + [[nodiscard]] const_iterator cend() const noexcept { return blocks.cend(); } /** * @brief The size of the free list in blocks. * * @return size_type The number of blocks in the free list. */ - size_type size() const noexcept { return blocks.size(); } + [[nodiscard]] size_type size() const noexcept { return blocks.size(); } /** * @brief checks whether the free_list is empty. @@ -93,7 +100,7 @@ class free_list { * @return true If there are blocks in the free_list. * @return false If there are no blocks in the free_list. */ - bool is_empty() const noexcept { return blocks.empty(); } + [[nodiscard]] bool is_empty() const noexcept { return blocks.empty(); } /** * @brief Removes the block indicated by `iter` from the free list. @@ -114,8 +121,8 @@ class free_list { void print() const { std::cout << size() << std::endl; - for (auto const& b : blocks) { - std::cout << b << std::endl; + for (auto const& block : blocks) { + std::cout << block << std::endl; } } @@ -124,15 +131,15 @@ class free_list { * @brief Insert a block in the free list before the specified position * * @param pos iterator before which the block will be inserted. pos may be the end() iterator. - * @param b The block to insert. + * @param block The block to insert. */ - void insert(const_iterator pos, block_type const& b) { blocks.insert(pos, b); } + void insert(const_iterator pos, block_type const& block) { blocks.insert(pos, block); } /** * @brief Inserts a list of blocks in the free list before the specified position * * @param pos iterator before which the block will be inserted. pos may be the end() iterator. - * @param b The block to insert. + * @param other The free list to insert. */ void splice(const_iterator pos, free_list&& other) { @@ -142,16 +149,16 @@ class free_list { /** * @brief Appends the given block to the end of the free list. * - * @param b The block to append. + * @param block The block to append. */ - void push_back(const block_type& b) { blocks.push_back(b); } + void push_back(const block_type& block) { blocks.push_back(block); } /** * @brief Appends the given block to the end of the free list. `b` is moved to the new element. * - * @param b The block to append. + * @param block The block to append. */ - void push_back(block_type&& b) { blocks.push_back(std::move(b)); } + void push_back(block_type&& block) { blocks.push_back(std::move(block)); } /** * @brief Removes the first element of the free list. If there are no elements in the free list, @@ -165,6 +172,4 @@ class free_list { list_type blocks; // The internal container of blocks }; -} // namespace detail -} // namespace mr -} // namespace rmm +} // namespace rmm::mr::detail diff --git a/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp b/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp index 8824e0ad9..2a726377d 100644 --- a/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp +++ b/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp @@ -31,9 +31,7 @@ #include #include -namespace rmm { -namespace mr { -namespace detail { +namespace rmm::mr::detail { /** * @brief A CRTP helper function @@ -48,8 +46,8 @@ namespace detail { */ template struct crtp { - T& underlying() { return static_cast(*this); } - T const& underlying() const { return static_cast(*this); } + [[nodiscard]] T& underlying() { return static_cast(*this); } + [[nodiscard]] T const& underlying() const { return static_cast(*this); } }; /** @@ -76,7 +74,7 @@ struct crtp { template class stream_ordered_memory_resource : public crtp, public device_memory_resource { public: - ~stream_ordered_memory_resource() { release(); } + ~stream_ordered_memory_resource() override { release(); } stream_ordered_memory_resource() = default; stream_ordered_memory_resource(stream_ordered_memory_resource const&) = delete; @@ -148,12 +146,12 @@ class stream_ordered_memory_resource : public crtp, public device_ /** * @brief Returns the block `b` (last used on stream `stream_event`) to the pool. * - * @param b The block to insert into the pool. + * @param block The block to insert into the pool. * @param stream The stream on which the memory was last used. */ - void insert_block(block_type const& b, cuda_stream_view stream) + void insert_block(block_type const& block, cuda_stream_view stream) { - stream_free_blocks_[get_event(stream)].insert(b); + stream_free_blocks_[get_event(stream)].insert(block); } void insert_blocks(free_list&& blocks, cuda_stream_view stream) @@ -164,9 +162,10 @@ class stream_ordered_memory_resource : public crtp, public device_ void print_free_blocks() const { std::cout << "stream free blocks: "; - for (auto& s : stream_free_blocks_) { - std::cout << "stream: " << s.first.stream << " event: " << s.first.event << " "; - s.second.print(); + for (auto& free_blocks : stream_free_blocks_) { + std::cout << "stream: " << free_blocks.first.stream << " event: " << free_blocks.first.event + << " "; + free_blocks.second.print(); std::cout << std::endl; } std::cout << std::endl; @@ -193,32 +192,34 @@ class stream_ordered_memory_resource : public crtp, public device_ * * @throws `std::bad_alloc` if the requested allocation could not be fulfilled * - * @param bytes The size in bytes of the allocation - * @param stream The stream to associate this allocation with + * @param size The size in bytes of the allocation + * @param stream The stream in which to order this allocation * @return void* Pointer to the newly allocated memory */ - virtual void* do_allocate(std::size_t bytes, cuda_stream_view stream) override + void* do_allocate(std::size_t size, cuda_stream_view stream) override { RMM_LOG_TRACE("[A][stream {:p}][{}B]", fmt::ptr(stream.value()), bytes); - if (bytes <= 0) return nullptr; + if (size <= 0) { return nullptr; } lock_guard lock(mtx_); auto stream_event = get_event(stream); - bytes = rmm::detail::align_up(bytes, rmm::detail::CUDA_ALLOCATION_ALIGNMENT); - RMM_EXPECTS(bytes <= this->underlying().get_maximum_allocation_size(), + size = rmm::detail::align_up(size, rmm::detail::CUDA_ALLOCATION_ALIGNMENT); + RMM_EXPECTS(size <= this->underlying().get_maximum_allocation_size(), rmm::bad_alloc, "Maximum allocation size exceeded"); - auto const b = this->underlying().get_block(bytes, stream_event); + auto const block = this->underlying().get_block(size, stream_event); - RMM_LOG_TRACE( - "[A][stream {:p}][{}B][{:p}]", fmt::ptr(stream_event.stream), bytes, fmt::ptr(b.pointer())); + RMM_LOG_TRACE("[A][stream {:p}][{}B][{:p}]", + fmt::ptr(stream_event.stream), + bytes, + fmt::ptr(block.pointer())); log_summary_trace(); - return b.pointer(); + return block.pointer(); } /** @@ -227,25 +228,27 @@ class stream_ordered_memory_resource : public crtp, public device_ * @throws nothing * * @param p Pointer to be deallocated + * @param size The size in bytes of the allocation to deallocate + * @param stream The stream in which to order this deallocation */ - virtual void do_deallocate(void* p, std::size_t bytes, cuda_stream_view stream) override + void do_deallocate(void* ptr, std::size_t size, cuda_stream_view stream) override { RMM_LOG_TRACE("[D][stream {:p}][{}B][{:p}]", fmt::ptr(stream.value()), bytes, p); - if (bytes <= 0 || p == nullptr) return; + if (size <= 0 || ptr == nullptr) { return; } lock_guard lock(mtx_); auto stream_event = get_event(stream); - bytes = rmm::detail::align_up(bytes, rmm::detail::CUDA_ALLOCATION_ALIGNMENT); - auto const b = this->underlying().free_block(p, bytes); + size = rmm::detail::align_up(size, rmm::detail::CUDA_ALLOCATION_ALIGNMENT); + auto const block = this->underlying().free_block(ptr, size); // TODO: cudaEventRecord has significant overhead on deallocations. For the non-PTDS case // we may be able to delay recording the event in some situations. But using events rather than // streams allows stealing from deleted streams. RMM_ASSERT_CUDA_SUCCESS(cudaEventRecord(stream_event.event, stream.value())); - stream_free_blocks_[stream_event].insert(b); + stream_free_blocks_[stream_event].insert(block); log_summary_trace(); } @@ -261,6 +264,11 @@ class stream_ordered_memory_resource : public crtp, public device_ } ~event_wrapper() { RMM_ASSERT_CUDA_SUCCESS(cudaEventDestroy(event)); } cudaEvent_t event{}; + + event_wrapper(event_wrapper const&) = delete; + event_wrapper& operator=(event_wrapper const&) = delete; + event_wrapper(event_wrapper&&) noexcept = delete; + event_wrapper& operator=(event_wrapper&&) = delete; }; /** @@ -280,20 +288,22 @@ class stream_ordered_memory_resource : public crtp, public device_ // instance ensures it is destroyed cleaned up only after all are finished with it. thread_local auto event_tls = std::make_shared(); default_stream_events.insert(event_tls); - return stream_event_pair{stream.value(), event_tls.get()->event}; + return stream_event_pair{stream.value(), event_tls->event}; } // We use cudaStreamLegacy as the event map key for the default stream for consistency between // PTDS and non-PTDS mode. In PTDS mode, the cudaStreamLegacy map key will only exist if the // user explicitly passes it, so it is used as the default location for the free list // at construction. For consistency, the same key is used for null stream free lists in non-PTDS // mode. - auto const stream_to_store = stream.is_default() ? cudaStreamLegacy : stream.value(); + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast) + auto* const stream_to_store = stream.is_default() ? cudaStreamLegacy : stream.value(); auto const iter = stream_events_.find(stream_to_store); return (iter != stream_events_.end()) ? iter->second : [&]() { stream_event_pair stream_event{stream_to_store}; RMM_ASSERT_CUDA_SUCCESS( cudaEventCreateWithFlags(&stream_event.event, cudaEventDisableTiming)); + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) stream_events_[stream_to_store] = stream_event; return stream_event; }(); @@ -303,14 +313,14 @@ class stream_ordered_memory_resource : public crtp, public device_ * @brief Splits a block into an allocated block of `size` bytes and a remainder block, and * inserts the remainder into a free list. * - * @param b The block to split into allocated and remainder portions. + * @param block The block to split into allocated and remainder portions. * @param size The size of the block to allocate from `b`. * @param blocks The `free_list` in which to insert the remainder block. * @return The allocated block. */ - block_type allocate_and_insert_remainder(block_type b, std::size_t size, free_list& blocks) + block_type allocate_and_insert_remainder(block_type block, std::size_t size, free_list& blocks) { - auto const [allocated, remainder] = this->underlying().allocate_from_block(b, size); + auto const [allocated, remainder] = this->underlying().allocate_from_block(block, size); if (remainder.is_valid()) { blocks.insert(remainder); } return allocated; } @@ -327,8 +337,8 @@ class stream_ordered_memory_resource : public crtp, public device_ // Try to find a satisfactory block in free list for the same stream (no sync required) auto iter = stream_free_blocks_.find(stream_event); if (iter != stream_free_blocks_.end()) { - block_type const b = iter->second.get_block(size); - if (b.is_valid()) { return allocate_and_insert_remainder(b, size, iter->second); } + block_type const block = iter->second.get_block(size); + if (block.is_valid()) { return allocate_and_insert_remainder(block, size, iter->second); } } free_list& blocks = @@ -336,23 +346,23 @@ class stream_ordered_memory_resource : public crtp, public device_ // Try to find an existing block in another stream { - block_type const b = get_block_from_other_stream(size, stream_event, blocks, false); - if (b.is_valid()) return b; + block_type const block = get_block_from_other_stream(size, stream_event, blocks, false); + if (block.is_valid()) { return block; } } // no large enough blocks available on other streams, so sync and merge until we find one { - block_type const b = get_block_from_other_stream(size, stream_event, blocks, true); - if (b.is_valid()) return b; + block_type const block = get_block_from_other_stream(size, stream_event, blocks, true); + if (block.is_valid()) { return block; } } log_summary_trace(); // no large enough blocks available after merging, so grow the pool - block_type const b = + block_type const block = this->underlying().expand_pool(size, blocks, cuda_stream_view{stream_event.stream}); - return allocate_and_insert_remainder(b, size, blocks); + return allocate_and_insert_remainder(block, size, blocks); } /** @@ -373,45 +383,48 @@ class stream_ordered_memory_resource : public crtp, public device_ free_list& blocks, bool merge_first) { - for (auto it = stream_free_blocks_.begin(), next_it = it; it != stream_free_blocks_.end(); - it = next_it) { - ++next_it; // Points to element after `it` to allow erasing `it` in the loop body - auto other_event = it->first.event; - if (other_event != stream_event.event) { - free_list& other_blocks = it->second; - - block_type const b = [&]() { - if (merge_first) { - merge_lists(stream_event, blocks, other_event, std::move(other_blocks)); - - RMM_LOG_DEBUG("[A][Stream {:p}][{}B][Merged stream {:p}]", - fmt::ptr(stream_event.stream), - size, - fmt::ptr(it->first.stream)); - - stream_free_blocks_.erase(it); - - block_type const b = blocks.get_block(size); // get the best fit block in merged lists - if (b.is_valid()) { return allocate_and_insert_remainder(b, size, blocks); } - } else { - block_type const b = other_blocks.get_block(size); - if (b.is_valid()) { - // Since we found a block associated with a different stream, we have to insert a wait - // on the stream's associated event into the allocating stream. - RMM_CUDA_TRY(cudaStreamWaitEvent(stream_event.stream, other_event, 0)); - return allocate_and_insert_remainder(b, size, other_blocks); - } - } - return block_type{}; - }(); - - if (b.is_valid()) { + auto find_block = [&](auto iter) { + auto other_event = iter->first.event; + auto& other_blocks = iter->second; + if (merge_first) { + merge_lists(stream_event, blocks, other_event, std::move(other_blocks)); + + RMM_LOG_DEBUG("[A][Stream {:p}][{}B][Merged stream {:p}]", + fmt::ptr(stream_event.stream), + size, + fmt::ptr(iter->first.stream)); + + stream_free_blocks_.erase(iter); + + block_type const block = blocks.get_block(size); // get the best fit block in merged lists + if (block.is_valid()) { return allocate_and_insert_remainder(block, size, blocks); } + } else { + block_type const block = other_blocks.get_block(size); + if (block.is_valid()) { + // Since we found a block associated with a different stream, we have to insert a wait + // on the stream's associated event into the allocating stream. + RMM_CUDA_TRY(cudaStreamWaitEvent(stream_event.stream, other_event, 0)); + return allocate_and_insert_remainder(block, size, other_blocks); + } + } + return block_type{}; + }; + + for (auto iter = stream_free_blocks_.begin(), next_iter = iter; + iter != stream_free_blocks_.end(); + iter = next_iter) { + ++next_iter; // Points to element after `iter` to allow erasing `iter` in the loop body + + if (iter->first.event != stream_event.event) { + block_type const block = find_block(iter); + + if (block.is_valid()) { RMM_LOG_DEBUG((merge_first) ? "[A][Stream {:p}][{}B][Found after merging stream {:p}]" : "[A][Stream {:p}][{}B][Taken from stream {:p}]", fmt::ptr(stream_event.stream), size, - fmt::ptr(it->first.stream)); - return b; + fmt::ptr(iter->first.stream)); + return block; } } } @@ -486,6 +499,4 @@ class stream_ordered_memory_resource : public crtp, public device_ std::mutex mtx_; // mutex for thread-safe access }; // namespace detail -} // namespace detail -} // namespace mr -} // namespace rmm +} // namespace rmm::mr::detail diff --git a/include/rmm/mr/device/device_memory_resource.hpp b/include/rmm/mr/device/device_memory_resource.hpp index d9817a933..52aa8c79f 100644 --- a/include/rmm/mr/device/device_memory_resource.hpp +++ b/include/rmm/mr/device/device_memory_resource.hpp @@ -21,9 +21,8 @@ #include #include -namespace rmm { +namespace rmm::mr { -namespace mr { /** * @brief Base class for all libcudf device memory allocation. * @@ -82,7 +81,12 @@ namespace mr { */ class device_memory_resource { public: - virtual ~device_memory_resource() = default; + device_memory_resource() = default; + virtual ~device_memory_resource() = default; + device_memory_resource(device_memory_resource const&) = default; + device_memory_resource& operator=(device_memory_resource const&) = default; + device_memory_resource(device_memory_resource&&) noexcept = default; + device_memory_resource& operator=(device_memory_resource&&) noexcept = default; /** * @brief Allocates memory of size at least \p bytes. @@ -101,7 +105,7 @@ class device_memory_resource { */ void* allocate(std::size_t bytes, cuda_stream_view stream = cuda_stream_view{}) { - return do_allocate(rmm::detail::align_up(bytes, 8), stream); + return do_allocate(rmm::detail::align_up(bytes, allocation_size_alignment), stream); } /** @@ -122,9 +126,9 @@ class device_memory_resource { * value of `bytes` that was passed to the `allocate` call that returned `p`. * @param stream Stream on which to perform deallocation */ - void deallocate(void* p, std::size_t bytes, cuda_stream_view stream = cuda_stream_view{}) + void deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream = cuda_stream_view{}) { - do_deallocate(p, rmm::detail::align_up(bytes, 8), stream); + do_deallocate(ptr, rmm::detail::align_up(bytes, allocation_size_alignment), stream); } /** @@ -140,7 +144,10 @@ class device_memory_resource { * @param other The other resource to compare to * @returns If the two resources are equivalent */ - bool is_equal(device_memory_resource const& other) const noexcept { return do_is_equal(other); } + [[nodiscard]] bool is_equal(device_memory_resource const& other) const noexcept + { + return do_is_equal(other); + } /** * @brief Query whether the resource supports use of non-null CUDA streams for @@ -148,14 +155,14 @@ class device_memory_resource { * * @returns bool true if the resource supports non-null CUDA streams. */ - virtual bool supports_streams() const noexcept = 0; + [[nodiscard]] virtual bool supports_streams() const noexcept = 0; /** * @brief Query whether the resource supports the get_mem_info API. * * @return bool true if the resource supports get_mem_info, false otherwise. */ - virtual bool supports_get_mem_info() const noexcept = 0; + [[nodiscard]] virtual bool supports_get_mem_info() const noexcept = 0; /** * @brief Queries the amount of free and total memory for the resource. @@ -165,12 +172,15 @@ class device_memory_resource { * @returns a pair containing the free memory in bytes in .first and total amount of memory in * .second */ - std::pair get_mem_info(cuda_stream_view stream) const + [[nodiscard]] std::pair get_mem_info(cuda_stream_view stream) const { return do_get_mem_info(stream); } private: + // All allocations are padded to a multiple of allocation_size_alignment bytes. + static constexpr auto allocation_size_alignment = std::size_t{8}; + /** * @brief Allocates memory of size at least \p bytes. * @@ -196,7 +206,7 @@ class device_memory_resource { * value of `bytes` that was passed to the `allocate` call that returned `p`. * @param stream Stream on which to perform deallocation */ - virtual void do_deallocate(void* p, std::size_t bytes, cuda_stream_view stream) = 0; + virtual void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) = 0; /** * @brief Compare this resource to another. @@ -212,7 +222,7 @@ class device_memory_resource { * @return true If the two resources are equivalent * @return false If the two resources are not equal */ - virtual bool do_is_equal(device_memory_resource const& other) const noexcept + [[nodiscard]] virtual bool do_is_equal(device_memory_resource const& other) const noexcept { return this == &other; } @@ -225,7 +235,7 @@ class device_memory_resource { * @param stream the stream being executed on * @return std::pair with available and free memory for resource */ - virtual std::pair do_get_mem_info(cuda_stream_view stream) const = 0; + [[nodiscard]] virtual std::pair do_get_mem_info( + cuda_stream_view stream) const = 0; }; -} // namespace mr -} // namespace rmm +} // namespace rmm::mr diff --git a/include/rmm/mr/device/fixed_size_memory_resource.hpp b/include/rmm/mr/device/fixed_size_memory_resource.hpp index fb87691e8..4c29881fb 100644 --- a/include/rmm/mr/device/fixed_size_memory_resource.hpp +++ b/include/rmm/mr/device/fixed_size_memory_resource.hpp @@ -33,9 +33,7 @@ #include #include -namespace rmm { - -namespace mr { +namespace rmm::mr { /** * @brief A `device_memory_resource` which allocates memory blocks of a single fixed size. @@ -83,7 +81,7 @@ class fixed_size_memory_resource * @brief Destroy the `fixed_size_memory_resource` and free all memory allocated from upstream. * */ - ~fixed_size_memory_resource() { release(); } + ~fixed_size_memory_resource() override { release(); } fixed_size_memory_resource() = delete; fixed_size_memory_resource(fixed_size_memory_resource const&) = delete; @@ -97,14 +95,14 @@ class fixed_size_memory_resource * * @returns true */ - bool supports_streams() const noexcept override { return true; } + [[nodiscard]] bool supports_streams() const noexcept override { return true; } /** * @brief Query whether the resource supports the get_mem_info API. * * @return bool true if the resource supports get_mem_info, false otherwise. */ - bool supports_get_mem_info() const noexcept override { return false; } + [[nodiscard]] bool supports_get_mem_info() const noexcept override { return false; } /** * @brief Get the upstream memory_resource object. @@ -118,7 +116,7 @@ class fixed_size_memory_resource * * @return std::size_t size in bytes of allocated blocks. */ - std::size_t get_block_size() const noexcept { return block_size_; } + [[nodiscard]] std::size_t get_block_size() const noexcept { return block_size_; } protected: using free_list = detail::fixed_size_free_list; @@ -133,7 +131,7 @@ class fixed_size_memory_resource * @return std::size_t The (fixed) maximum size of a single allocation supported by this memory * resource */ - std::size_t get_maximum_allocation_size() const { return get_block_size(); } + [[nodiscard]] std::size_t get_maximum_allocation_size() const { return get_block_size(); } /** * @brief Allocate a block from upstream to supply the suballocation pool. @@ -160,49 +158,53 @@ class fixed_size_memory_resource */ free_list blocks_from_upstream(cuda_stream_view stream) { - void* p = upstream_mr_->allocate(upstream_chunk_size_, stream); - block_type b{p}; - upstream_blocks_.push_back(b); + void* ptr = upstream_mr_->allocate(upstream_chunk_size_, stream); + block_type block{ptr}; + upstream_blocks_.push_back(block); auto num_blocks = upstream_chunk_size_ / block_size_; - auto g = [p, this](int i) { return block_type{static_cast(p) + i * block_size_}; }; - auto first = thrust::make_transform_iterator(thrust::make_counting_iterator(std::size_t{0}), g); + auto block_gen = [ptr, this](int index) { + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) + return block_type{static_cast(ptr) + index * block_size_}; + }; + auto first = + thrust::make_transform_iterator(thrust::make_counting_iterator(std::size_t{0}), block_gen); return free_list(first, first + num_blocks); } /** - * @brief Splits block `b` if necessary to return a pointer to memory of `size` bytes. + * @brief Splits block if necessary to return a pointer to memory of `size` bytes. * * If the block is split, the remainder is returned to the pool. * - * @param b The block to allocate from. + * @param block The block to allocate from. * @param size The size in bytes of the requested allocation. * @param stream_event The stream and associated event on which the allocation will be used. * @return A pair comprising the allocated pointer and any unallocated remainder of the input * block. */ - split_block allocate_from_block(block_type const& b, std::size_t size) + split_block allocate_from_block(block_type const& block, std::size_t size) { - return {b, block_type{nullptr}}; + return {block, block_type{nullptr}}; } /** - * @brief Finds, frees and returns the block associated with pointer `p`. + * @brief Finds, frees and returns the block associated with pointer. * - * @param p The pointer to the memory to free. + * @param ptr The pointer to the memory to free. * @param size The size of the memory to free. Must be equal to the original allocation size. * @param stream The stream-event pair for the stream on which the memory was last used. * @return The (now freed) block associated with `p`. The caller is expected to return the block * to the pool. */ - block_type free_block(void* p, std::size_t size) noexcept + block_type free_block(void* ptr, std::size_t size) noexcept { // Deallocating a fixed-size block just inserts it in the free list, which is // handled by the parent class RMM_LOGGING_ASSERT(rmm::detail::align_up(size, rmm::detail::CUDA_ALLOCATION_ALIGNMENT) <= block_size_); - return block_type{p}; + return block_type{ptr}; } /** @@ -213,7 +215,8 @@ class fixed_size_memory_resource * @param stream the stream being executed on * @return std::pair with available and free memory for resource */ - std::pair do_get_mem_info(cuda_stream_view stream) const override + [[nodiscard]] std::pair do_get_mem_info( + cuda_stream_view stream) const override { return std::make_pair(0, 0); } @@ -226,8 +229,9 @@ class fixed_size_memory_resource { lock_guard lock(this->get_mutex()); - for (auto b : upstream_blocks_) - upstream_mr_->deallocate(b.pointer(), upstream_chunk_size_); + for (auto block : upstream_blocks_) { + upstream_mr_->deallocate(block.pointer(), upstream_chunk_size_); + } upstream_blocks_.clear(); } @@ -235,15 +239,14 @@ class fixed_size_memory_resource { lock_guard lock(this->get_mutex()); - std::size_t free, total; - std::tie(free, total) = upstream_mr_->get_mem_info(0); + auto const [free, total] = upstream_mr_->get_mem_info(0); std::cout << "GPU free memory: " << free << " total: " << total << "\n"; std::cout << "upstream_blocks: " << upstream_blocks_.size() << "\n"; std::size_t upstream_total{0}; - for (auto h : upstream_blocks_) { - h.print(); + for (auto blocks : upstream_blocks_) { + blocks.print(); upstream_total += upstream_chunk_size_; } std::cout << "total upstream: " << upstream_total << " B\n"; @@ -265,6 +268,7 @@ class fixed_size_memory_resource : std::make_pair(block_size_, blocks.size() * block_size_); } + private: Upstream* upstream_mr_; // The resource from which to allocate new blocks std::size_t const block_size_; // size of blocks this MR allocates @@ -274,5 +278,4 @@ class fixed_size_memory_resource std::vector upstream_blocks_; }; -} // namespace mr -} // namespace rmm +} // namespace rmm::mr diff --git a/include/rmm/mr/device/limiting_resource_adaptor.hpp b/include/rmm/mr/device/limiting_resource_adaptor.hpp index b83fe3911..05b45ce31 100644 --- a/include/rmm/mr/device/limiting_resource_adaptor.hpp +++ b/include/rmm/mr/device/limiting_resource_adaptor.hpp @@ -46,13 +46,12 @@ class limiting_resource_adaptor final : public device_memory_resource { * @param upstream The resource used for allocating/deallocating device memory * @param allocation_limit Maximum memory allowed for this allocator. */ - limiting_resource_adaptor( - Upstream* upstream, - std::size_t allocation_limit, - std::size_t allocation_alignment = rmm::detail::CUDA_ALLOCATION_ALIGNMENT) + limiting_resource_adaptor(Upstream* upstream, + std::size_t allocation_limit, + std::size_t alignment = rmm::detail::CUDA_ALLOCATION_ALIGNMENT) : allocation_limit_{allocation_limit}, allocated_bytes_(0), - allocation_alignment_(allocation_alignment), + alignment_(alignment), upstream_{upstream} { RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer."); @@ -70,7 +69,7 @@ class limiting_resource_adaptor final : public device_memory_resource { * * @return Upstream* Pointer to the upstream resource. */ - Upstream* get_upstream() const noexcept { return upstream_; } + [[nodiscard]] Upstream* get_upstream() const noexcept { return upstream_; } /** * @brief Checks whether the upstream resource supports streams. @@ -129,7 +128,7 @@ class limiting_resource_adaptor final : public device_memory_resource { */ void* do_allocate(std::size_t bytes, cuda_stream_view stream) override { - auto const proposed_size = rmm::detail::align_up(bytes, allocation_alignment_); + auto const proposed_size = rmm::detail::align_up(bytes, alignment_); auto const old = allocated_bytes_.fetch_add(proposed_size); if (old + proposed_size <= allocation_limit_) { try { @@ -145,18 +144,18 @@ class limiting_resource_adaptor final : public device_memory_resource { } /** - * @brief Free allocation of size `bytes` pointed to by `p` + * @brief Free allocation of size `bytes` pointed to by `ptr` * * @throws Nothing. * - * @param p Pointer to be deallocated + * @param ptr Pointer to be deallocated * @param bytes Size of the allocation * @param stream Stream on which to perform the deallocation */ - void do_deallocate(void* p, std::size_t bytes, cuda_stream_view stream) override + void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override { - std::size_t allocated_size = rmm::detail::align_up(bytes, allocation_alignment_); - upstream_->deallocate(p, bytes, stream); + std::size_t allocated_size = rmm::detail::align_up(bytes, alignment_); + upstream_->deallocate(ptr, bytes, stream); allocated_bytes_ -= allocated_size; } @@ -171,15 +170,10 @@ class limiting_resource_adaptor final : public device_memory_resource { */ [[nodiscard]] bool do_is_equal(device_memory_resource const& other) const noexcept override { - if (this == &other) - return true; - else { - auto const* cast = dynamic_cast const*>(&other); - if (cast != nullptr) - return upstream_->is_equal(*cast->get_upstream()); - else - return upstream_->is_equal(other); - } + if (this == &other) { return true; } + auto const* cast = dynamic_cast const*>(&other); + if (cast != nullptr) { return upstream_->is_equal(*cast->get_upstream()); } + return upstream_->is_equal(other); } /** @@ -203,7 +197,7 @@ class limiting_resource_adaptor final : public device_memory_resource { std::atomic allocated_bytes_; // todo: should be some way to ask the upstream... - std::size_t allocation_alignment_; + std::size_t alignment_; Upstream* upstream_; ///< The upstream resource used for satisfying ///< allocation requests diff --git a/include/rmm/mr/device/logging_resource_adaptor.hpp b/include/rmm/mr/device/logging_resource_adaptor.hpp index 1148afa88..26448f887 100644 --- a/include/rmm/mr/device/logging_resource_adaptor.hpp +++ b/include/rmm/mr/device/logging_resource_adaptor.hpp @@ -28,8 +28,7 @@ #include #include -namespace rmm { -namespace mr { +namespace rmm::mr { /** * @brief Resource that uses `Upstream` to allocate memory and logs information * about the requested allocation/deallocations. @@ -113,18 +112,18 @@ class logging_resource_adaptor final : public device_memory_resource { } logging_resource_adaptor() = delete; - ~logging_resource_adaptor() = default; + ~logging_resource_adaptor() override = default; logging_resource_adaptor(logging_resource_adaptor const&) = delete; - logging_resource_adaptor(logging_resource_adaptor&&) = default; logging_resource_adaptor& operator=(logging_resource_adaptor const&) = delete; - logging_resource_adaptor& operator=(logging_resource_adaptor&&) = default; + logging_resource_adaptor(logging_resource_adaptor&&) noexcept = default; + logging_resource_adaptor& operator=(logging_resource_adaptor&&) noexcept = default; /** * @brief Return pointer to the upstream resource. * * @return Upstream* Pointer to the upstream resource. */ - Upstream* get_upstream() const noexcept { return upstream_; } + [[nodiscard]] Upstream* get_upstream() const noexcept { return upstream_; } /** * @brief Checks whether the upstream resource supports streams. @@ -132,14 +131,17 @@ class logging_resource_adaptor final : public device_memory_resource { * @return true The upstream resource supports streams * @return false The upstream resource does not support streams. */ - bool supports_streams() const noexcept override { return upstream_->supports_streams(); } + [[nodiscard]] bool supports_streams() const noexcept override + { + return upstream_->supports_streams(); + } /** * @brief Query whether the resource supports the get_mem_info API. * * @return bool true if the upstream resource supports get_mem_info, false otherwise. */ - bool supports_get_mem_info() const noexcept override + [[nodiscard]] bool supports_get_mem_info() const noexcept override { return upstream_->supports_get_mem_info(); } @@ -154,15 +156,12 @@ class logging_resource_adaptor final : public device_memory_resource { * * @return CSV formatted header string of column names */ - std::string header() const { return std::string{"Thread,Time,Action,Pointer,Size,Stream"}; } + [[nodiscard]] std::string header() const + { + return std::string{"Thread,Time,Action,Pointer,Size,Stream"}; + } private: - // make_logging_adaptor needs access to private get_default_filename - template - friend logging_resource_adaptor make_logging_adaptor(T* upstream, - std::string const& filename, - bool auto_flush); - /** * @brief Return the value of the environment variable RMM_LOG_FILE. * @@ -172,7 +171,7 @@ class logging_resource_adaptor final : public device_memory_resource { */ static std::string get_default_filename() { - auto filename = std::getenv("RMM_LOG_FILE"); + auto* filename = std::getenv("RMM_LOG_FILE"); RMM_EXPECTS(filename != nullptr, "RMM logging requested without an explicit file name, but RMM_LOG_FILE is unset"); return std::string{filename}; @@ -210,13 +209,13 @@ class logging_resource_adaptor final : public device_memory_resource { */ void* do_allocate(std::size_t bytes, cuda_stream_view stream) override { - auto const p = upstream_->allocate(bytes, stream); - logger_->info("allocate,{},{},{}", p, bytes, fmt::ptr(stream.value())); - return p; + auto const ptr = upstream_->allocate(bytes, stream); + logger_->info("allocate,{},{},{}", ptr, bytes, fmt::ptr(stream.value())); + return ptr; } /** - * @brief Free allocation of size `bytes` pointed to by `p` and log the + * @brief Free allocation of size `bytes` pointed to by `ptr` and log the * deallocation. * * Every invocation of `logging_resource_adaptor::do_deallocate` will write @@ -227,14 +226,14 @@ class logging_resource_adaptor final : public device_memory_resource { * * @throws Nothing. * - * @param p Pointer to be deallocated + * @param ptr Pointer to be deallocated * @param bytes Size of the allocation * @param stream Stream on which to perform the deallocation */ - void do_deallocate(void* p, std::size_t bytes, cuda_stream_view stream) override + void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override { - logger_->info("free,{},{},{}", p, bytes, fmt::ptr(stream.value())); - upstream_->deallocate(p, bytes, stream); + logger_->info("free,{},{},{}", ptr, bytes, fmt::ptr(stream.value())); + upstream_->deallocate(ptr, bytes, stream); } /** @@ -246,18 +245,12 @@ class logging_resource_adaptor final : public device_memory_resource { * @return true If the two resources are equivalent * @return false If the two resources are not equal */ - bool do_is_equal(device_memory_resource const& other) const noexcept override + [[nodiscard]] bool do_is_equal(device_memory_resource const& other) const noexcept override { - if (this == &other) - return true; - else { - logging_resource_adaptor const* cast = - dynamic_cast const*>(&other); - if (cast != nullptr) - return upstream_->is_equal(*cast->get_upstream()); - else - return upstream_->is_equal(other); - } + if (this == &other) { return true; } + auto const* cast = dynamic_cast const*>(&other); + if (cast != nullptr) { return upstream_->is_equal(*cast->get_upstream()); } + return upstream_->is_equal(other); } /** @@ -268,11 +261,19 @@ class logging_resource_adaptor final : public device_memory_resource { * @param stream Stream on which to get the mem info. * @return std::pair contaiing free_size and total_size of memory */ - std::pair do_get_mem_info(cuda_stream_view stream) const override + [[nodiscard]] std::pair do_get_mem_info( + cuda_stream_view stream) const override { return upstream_->get_mem_info(stream); } + // make_logging_adaptor needs access to private get_default_filename + template + // NOLINTNEXTLINE(readability-redundant-declaration) + friend logging_resource_adaptor make_logging_adaptor(T* upstream, + std::string const& filename, + bool auto_flush); + std::shared_ptr logger_; ///< spdlog logger object Upstream* upstream_; ///< The upstream resource used for satisfying @@ -313,5 +314,4 @@ logging_resource_adaptor make_logging_adaptor(Upstream* upstream, return logging_resource_adaptor{upstream, stream, auto_flush}; } -} // namespace mr -} // namespace rmm +} // namespace rmm::mr diff --git a/include/rmm/mr/device/managed_memory_resource.hpp b/include/rmm/mr/device/managed_memory_resource.hpp index ebce40bf5..7cce644be 100644 --- a/include/rmm/mr/device/managed_memory_resource.hpp +++ b/include/rmm/mr/device/managed_memory_resource.hpp @@ -22,8 +22,7 @@ #include -namespace rmm { -namespace mr { +namespace rmm::mr { /** * @brief `device_memory_resource` derived class that uses * cudaMallocManaged/Free for allocation/deallocation. @@ -31,7 +30,7 @@ namespace mr { class managed_memory_resource final : public device_memory_resource { public: managed_memory_resource() = default; - ~managed_memory_resource() = default; + ~managed_memory_resource() override = default; managed_memory_resource(managed_memory_resource const&) = default; managed_memory_resource(managed_memory_resource&&) = default; managed_memory_resource& operator=(managed_memory_resource const&) = default; @@ -43,18 +42,18 @@ class managed_memory_resource final : public device_memory_resource { * * @returns false */ - bool supports_streams() const noexcept override { return false; } + [[nodiscard]] bool supports_streams() const noexcept override { return false; } /** * @brief Query whether the resource supports the get_mem_info API. * * @return bool true if the resource supports get_mem_info, false otherwise. */ - bool supports_get_mem_info() const noexcept override { return true; } + [[nodiscard]] bool supports_get_mem_info() const noexcept override { return true; } private: /** - * @brief Allocates memory of size at least \p bytes using cudaMallocManaged. + * @brief Allocates memory of size at least `bytes` using cudaMallocManaged. * * The returned pointer has at least 256B alignment. * @@ -71,23 +70,23 @@ class managed_memory_resource final : public device_memory_resource { // size allocations. if (bytes == 0) { return nullptr; } - void* p{nullptr}; - RMM_CUDA_TRY(cudaMallocManaged(&p, bytes), rmm::bad_alloc); - return p; + void* ptr{nullptr}; + RMM_CUDA_TRY(cudaMallocManaged(&ptr, bytes), rmm::bad_alloc); + return ptr; } /** - * @brief Deallocate memory pointed to by \p p. + * @brief Deallocate memory pointed to by `ptr`. * * @note Stream argument is ignored. * * @throws Nothing. * - * @param p Pointer to be deallocated + * @param ptr Pointer to be deallocated */ - void do_deallocate(void* p, std::size_t, cuda_stream_view) override + void do_deallocate(void* ptr, std::size_t, cuda_stream_view) override { - RMM_ASSERT_CUDA_SUCCESS(cudaFree(p)); + RMM_ASSERT_CUDA_SUCCESS(cudaFree(ptr)); } /** @@ -102,7 +101,7 @@ class managed_memory_resource final : public device_memory_resource { * @return true If the two resources are equivalent * @return false If the two resources are not equal */ - bool do_is_equal(device_memory_resource const& other) const noexcept override + [[nodiscard]] bool do_is_equal(device_memory_resource const& other) const noexcept override { return dynamic_cast(&other) != nullptr; } @@ -115,7 +114,8 @@ class managed_memory_resource final : public device_memory_resource { * @param stream to execute on * @return std::pair contaiing free_size and total_size of memory */ - std::pair do_get_mem_info(cuda_stream_view stream) const override + [[nodiscard]] std::pair do_get_mem_info( + cuda_stream_view stream) const override { std::size_t free_size{}; std::size_t total_size{}; @@ -124,5 +124,4 @@ class managed_memory_resource final : public device_memory_resource { } }; -} // namespace mr -} // namespace rmm +} // namespace rmm::mr diff --git a/include/rmm/mr/device/owning_wrapper.hpp b/include/rmm/mr/device/owning_wrapper.hpp index 6abe950b0..977ae0c11 100644 --- a/include/rmm/mr/device/owning_wrapper.hpp +++ b/include/rmm/mr/device/owning_wrapper.hpp @@ -22,21 +22,23 @@ #include #include -namespace rmm { -namespace mr { +namespace rmm::mr { namespace detail { /// Converts a tuple into a parameter pack template -auto make_resource_impl(UpstreamTuple const& t, std::index_sequence, Args&&... args) +auto make_resource_impl(UpstreamTuple const& upstreams, + std::index_sequence, + Args&&... args) { - return std::make_unique(std::get(t).get()..., std::forward(args)...); + return std::make_unique(std::get(upstreams).get()..., + std::forward(args)...); } template -auto make_resource(std::tuple...> const& t, Args&&... args) +auto make_resource(std::tuple...> const& upstreams, Args&&... args) { return make_resource_impl( - t, std::index_sequence_for{}, std::forward(args)...); + upstreams, std::index_sequence_for{}, std::forward(args)...); } } // namespace detail @@ -117,25 +119,31 @@ class owning_wrapper : public device_memory_resource { * @brief Returns a constant reference to the wrapped resource. * */ - Resource const& wrapped() const noexcept { return *wrapped_; } + [[nodiscard]] Resource const& wrapped() const noexcept { return *wrapped_; } /** * @brief Returns reference to the wrapped resource. * */ - Resource& wrapped() noexcept { return *wrapped_; } + [[nodiscard]] Resource& wrapped() noexcept { return *wrapped_; } /** * @copydoc rmm::mr::device_memory_resource::supports_streams() */ - bool supports_streams() const noexcept override { return wrapped().supports_streams(); } + [[nodiscard]] bool supports_streams() const noexcept override + { + return wrapped().supports_streams(); + } /** * @brief Query whether the resource supports the get_mem_info API. * * @return true if the upstream resource supports get_mem_info, false otherwise. */ - bool supports_get_mem_info() const noexcept override { return wrapped().supports_get_mem_info(); } + [[nodiscard]] bool supports_get_mem_info() const noexcept override + { + return wrapped().supports_get_mem_info(); + } private: /** @@ -156,17 +164,17 @@ class owning_wrapper : public device_memory_resource { /** * @brief Returns an allocation to the wrapped resource. * - * `p` must have been returned from a prior call to `do_allocate(bytes)`. + * `ptr` must have been returned from a prior call to `do_allocate(bytes)`. * * @throws Nothing. * - * @param p Pointer to the allocation to free. + * @param ptr Pointer to the allocation to free. * @param bytes Size of the allocation * @param stream Stream on which to deallocate the memory */ - void do_deallocate(void* p, std::size_t bytes, cuda_stream_view stream) override + void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override { - wrapped().deallocate(p, bytes, stream); + wrapped().deallocate(ptr, bytes, stream); } /** @@ -180,18 +188,12 @@ class owning_wrapper : public device_memory_resource { * @return true If the two resources are equal * @return false If the two resources are not equal */ - bool do_is_equal(device_memory_resource const& other) const noexcept override + [[nodiscard]] bool do_is_equal(device_memory_resource const& other) const noexcept override { - if (this == &other) { - return true; - } else { - auto casted = dynamic_cast const*>(&other); - if (nullptr != casted) { - return wrapped().is_equal(casted->wrapped()); - } else { - return wrapped().is_equal(other); - } - } + if (this == &other) { return true; } + auto casted = dynamic_cast const*>(&other); + if (nullptr != casted) { return wrapped().is_equal(casted->wrapped()); } + return wrapped().is_equal(other); } /** @@ -202,7 +204,8 @@ class owning_wrapper : public device_memory_resource { * @param stream Stream on which to get the mem info. * @return std::pair contaiing free_size and total_size of memory */ - std::pair do_get_mem_info(cuda_stream_view stream) const override + [[nodiscard]] std::pair do_get_mem_info( + cuda_stream_view stream) const override { return wrapped().get_mem_info(stream); } @@ -272,5 +275,4 @@ auto make_owning_wrapper(std::shared_ptr upstream, Args&&... args) std::forward(args)...); } -} // namespace mr -} // namespace rmm +} // namespace rmm::mr diff --git a/include/rmm/mr/device/per_device_resource.hpp b/include/rmm/mr/device/per_device_resource.hpp index 0f10b7f53..4ddbd874a 100644 --- a/include/rmm/mr/device/per_device_resource.hpp +++ b/include/rmm/mr/device/per_device_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -71,9 +71,7 @@ * @endcode */ -namespace rmm { - -namespace mr { +namespace rmm::mr { namespace detail { @@ -126,13 +124,14 @@ RMM_EXPORT inline auto& get_map() * @param id The id of the target device * @return Pointer to the current `device_memory_resource` for device `id` */ -inline device_memory_resource* get_per_device_resource(cuda_device_id id) +inline device_memory_resource* get_per_device_resource(cuda_device_id device_id) { std::lock_guard lock{detail::map_lock()}; auto& map = detail::get_map(); // If a resource was never set for `id`, set to the initial resource - auto const found = map.find(id.value()); - return (found == map.end()) ? (map[id.value()] = detail::initial_resource()) : found->second; + auto const found = map.find(device_id.value()); + return (found == map.end()) ? (map[device_id.value()] = detail::initial_resource()) + : found->second; } /** @@ -162,15 +161,15 @@ inline device_memory_resource* get_per_device_resource(cuda_device_id id) * for `id` * @return Pointer to the previous memory resource for `id` */ -inline device_memory_resource* set_per_device_resource(cuda_device_id id, +inline device_memory_resource* set_per_device_resource(cuda_device_id device_id, device_memory_resource* new_mr) { std::lock_guard lock{detail::map_lock()}; auto& map = detail::get_map(); - auto const old_itr = map.find(id.value()); + auto const old_itr = map.find(device_id.value()); // If a resource didn't previously exist for `id`, return pointer to initial_resource - auto old_mr = (old_itr == map.end()) ? detail::initial_resource() : old_itr->second; - map[id.value()] = (new_mr == nullptr) ? detail::initial_resource() : new_mr; + auto* old_mr = (old_itr == map.end()) ? detail::initial_resource() : old_itr->second; + map[device_id.value()] = (new_mr == nullptr) ? detail::initial_resource() : new_mr; return old_mr; } @@ -228,5 +227,4 @@ inline device_memory_resource* set_current_device_resource(device_memory_resourc { return set_per_device_resource(rmm::detail::current_device(), new_mr); } -} // namespace mr -} // namespace rmm +} // namespace rmm::mr diff --git a/include/rmm/mr/device/polymorphic_allocator.hpp b/include/rmm/mr/device/polymorphic_allocator.hpp index 4f97cf568..5c87ef7f7 100644 --- a/include/rmm/mr/device/polymorphic_allocator.hpp +++ b/include/rmm/mr/device/polymorphic_allocator.hpp @@ -24,8 +24,7 @@ #include #include -namespace rmm { -namespace mr { +namespace rmm::mr { /** * @brief A stream ordered Allocator using a `rmm::mr::device_memory_resource` to satisfy @@ -45,7 +44,6 @@ template class polymorphic_allocator { public: using value_type = T; - /** * @brief Construct a `polymorphic_allocator` using the return value of * `rmm::mr::get_current_device_resource()` as the underlying memory resource. @@ -53,15 +51,6 @@ class polymorphic_allocator { */ polymorphic_allocator() = default; - /** - * @brief Construct a `polymorphic_allocator` using `other.resource()` as the underlying memory - * resource. - * - * @param other The `polymorphic_resource` whose `resource()` will be used as the underlying - * resource of the new `polymorphic_allocator`. - */ - polymorphic_allocator(polymorphic_allocator const& other) = default; - /** * @brief Construct a `polymorphic_allocator` using the provided memory resource. * @@ -84,30 +73,30 @@ class polymorphic_allocator { } /** - * @brief Allocates storage for `n` objects of type `T` using the underlying memory resource. + * @brief Allocates storage for `num` objects of type `T` using the underlying memory resource. * - * @param n The number of objects to allocate storage for + * @param num The number of objects to allocate storage for * @param stream The stream on which to perform the allocation * @return Pointer to the allocated storage */ - value_type* allocate(std::size_t n, cuda_stream_view stream) + value_type* allocate(std::size_t num, cuda_stream_view stream) { - return static_cast(resource()->allocate(n * sizeof(T), stream)); + return static_cast(resource()->allocate(num * sizeof(T), stream)); } /** - * @brief Deallocates storage pointed to by `p`. + * @brief Deallocates storage pointed to by `ptr`. * - * `p` must have been allocated from a `rmm::mr::device_memory_resource` `r` that compares equal + * `ptr` must have been allocated from a `rmm::mr::device_memory_resource` `r` that compares equal * to `*resource()` using `r.allocate(n * sizeof(T))`. * - * @param p Pointer to memory to deallocate - * @param n Number of objects originally allocated + * @param ptr Pointer to memory to deallocate + * @param num Number of objects originally allocated * @param stream Stream on which to perform the deallocation */ - void deallocate(value_type* p, std::size_t n, cuda_stream_view stream) + void deallocate(value_type* ptr, std::size_t num, cuda_stream_view stream) { - resource()->deallocate(p, n * sizeof(T), stream); + resource()->deallocate(ptr, num * sizeof(T), stream); } /** @@ -115,7 +104,7 @@ class polymorphic_allocator { * * @return Pointer to the underlying resource. */ - device_memory_resource* resource() const noexcept { return mr_; } + [[nodiscard]] device_memory_resource* resource() const noexcept { return mr_; } private: device_memory_resource* mr_{ @@ -169,22 +158,14 @@ class stream_allocator_adaptor { * @note: The `stream` must not be destroyed before the `stream_allocator_adaptor`, otherwise * behavior is undefined. * - * @param a The stream ordered allocator to use as the underlying allocator + * @param allocator The stream ordered allocator to use as the underlying allocator * @param stream The stream used with the underlying allocator */ - stream_allocator_adaptor(Allocator const& a, cuda_stream_view stream) : alloc_{a}, stream_{stream} + stream_allocator_adaptor(Allocator const& allocator, cuda_stream_view stream) + : alloc_{allocator}, stream_{stream} { } - /** - * @brief Construct a `stream_allocator_adaptor` using `other.underlying_allocator()` and - * `other.stream()` as the underlying allocator and stream. - * - * @param other The other `stream_allocator_adaptor` whose underlying allocator and stream will be - * copied - */ - stream_allocator_adaptor(stream_allocator_adaptor const& other) = default; - /** * @brief Construct a `stream_allocator_adaptor` using `other.underlying_allocator()` and * `other.stream()` as the underlying allocator and stream. @@ -211,36 +192,36 @@ class stream_allocator_adaptor { }; /** - * @brief Allocates storage for `n` objects of type `T` using the underlying allocator on + * @brief Allocates storage for `num` objects of type `T` using the underlying allocator on * `stream()`. * - * @param n The number of objects to allocate storage for + * @param num The number of objects to allocate storage for * @return Pointer to the allocated storage */ - value_type* allocate(std::size_t n) { return alloc_.allocate(n, stream()); } + value_type* allocate(std::size_t num) { return alloc_.allocate(num, stream()); } /** - * @brief Deallocates storage pointed to by `p` using the underlying allocator on `stream()`. + * @brief Deallocates storage pointed to by `ptr` using the underlying allocator on `stream()`. * - * `p` must have been allocated from by an allocator `a` that compares equal to + * `ptr` must have been allocated from by an allocator `a` that compares equal to * `underlying_allocator()` using `a.allocate(n)`. * - * @param p Pointer to memory to deallocate - * @param n Number of objects originally allocated + * @param ptr Pointer to memory to deallocate + * @param num Number of objects originally allocated */ - void deallocate(value_type* p, std::size_t n) { alloc_.deallocate(p, n, stream()); } + void deallocate(value_type* ptr, std::size_t num) { alloc_.deallocate(ptr, num, stream()); } /** * @brief Returns the underlying stream on which calls to the underlying allocator are made. * */ - cuda_stream_view stream() const noexcept { return stream_; } + [[nodiscard]] cuda_stream_view stream() const noexcept { return stream_; } /** * @brief Returns the underlying stream-ordered allocator * */ - Allocator underlying_allocator() const noexcept { return alloc_; } + [[nodiscard]] Allocator underlying_allocator() const noexcept { return alloc_; } private: Allocator alloc_; ///< Underlying allocator used for (de)allocation @@ -266,14 +247,13 @@ bool operator!=(stream_allocator_adaptor const& lhs, stream_allocator_adaptor * @tparam Allocator Type of the stream-ordered allocator * @param allocator The allocator to use as the underlying allocator of the * `stream_allocator_adaptor` - * @param s The stream on which the `stream_allocator_adaptor` will perform (de)allocations + * @param stream The stream on which the `stream_allocator_adaptor` will perform (de)allocations * @return A `stream_allocator_adaptor` wrapping `allocator` and `s` */ template -auto make_stream_allocator_adaptor(Allocator const& allocator, cuda_stream_view s) +auto make_stream_allocator_adaptor(Allocator const& allocator, cuda_stream_view stream) { - return stream_allocator_adaptor{allocator, s}; + return stream_allocator_adaptor{allocator, stream}; } -} // namespace mr -} // namespace rmm +} // namespace rmm::mr diff --git a/include/rmm/mr/device/pool_memory_resource.hpp b/include/rmm/mr/device/pool_memory_resource.hpp index 7a2a5b9c7..526852355 100644 --- a/include/rmm/mr/device/pool_memory_resource.hpp +++ b/include/rmm/mr/device/pool_memory_resource.hpp @@ -41,8 +41,7 @@ #include #include -namespace rmm { -namespace mr { +namespace rmm::mr { /** * @brief A coalescing best-fit suballocator which uses a pool of memory allocated from @@ -100,7 +99,7 @@ class pool_memory_resource final * @brief Destroy the `pool_memory_resource` and deallocate all memory it allocated using * the upstream resource. */ - ~pool_memory_resource() { release(); } + ~pool_memory_resource() override { release(); } pool_memory_resource() = delete; pool_memory_resource(pool_memory_resource const&) = delete; @@ -114,14 +113,14 @@ class pool_memory_resource final * * @returns bool true. */ - bool supports_streams() const noexcept override { return true; } + [[nodiscard]] bool supports_streams() const noexcept override { return true; } /** * @brief Query whether the resource supports the get_mem_info API. * * @return bool false */ - bool supports_get_mem_info() const noexcept override { return false; } + [[nodiscard]] bool supports_get_mem_info() const noexcept override { return false; } /** * @brief Get the upstream memory_resource object. @@ -145,7 +144,7 @@ class pool_memory_resource final * * @return std::size_t The maximum size of a single allocation supported by this memory resource */ - std::size_t get_maximum_allocation_size() const + [[nodiscard]] std::size_t get_maximum_allocation_size() const { return std::numeric_limits::max(); } @@ -168,12 +167,14 @@ class pool_memory_resource final block_type try_to_expand(std::size_t try_size, std::size_t min_size, cuda_stream_view stream) { while (try_size >= min_size) { - auto b = block_from_upstream(try_size, stream); - if (b.has_value()) { - current_pool_size_ += b.value().size(); - return b.value(); + auto block = block_from_upstream(try_size, stream); + if (block.has_value()) { + current_pool_size_ += block.value().size(); + return block.value(); + } + if (try_size == min_size) { + break; // only try `size` once } - if (try_size == min_size) break; // only try `size` once try_size = std::max(min_size, try_size / 2); } RMM_LOG_ERROR("[A][Stream {}][Upstream {}B][FAILURE maximum pool size exceeded]", @@ -194,20 +195,19 @@ class pool_memory_resource final * @param initial_size The optional initial size for the pool * @param maximum_size The optional maximum size for the pool */ + // NOLINTNEXTLINE(bugprone-easily-swappable-parameters) void initialize_pool(thrust::optional initial_size, thrust::optional maximum_size) { auto const try_size = [&]() { if (not initial_size.has_value()) { - std::size_t free{}, total{}; - std::tie(free, total) = (get_upstream()->supports_get_mem_info()) - ? get_upstream()->get_mem_info(cuda_stream_legacy) - : rmm::detail::available_device_memory(); + auto const [free, total] = (get_upstream()->supports_get_mem_info()) + ? get_upstream()->get_mem_info(cuda_stream_legacy) + : rmm::detail::available_device_memory(); return rmm::detail::align_up(std::min(free, total / 2), rmm::detail::CUDA_ALLOCATION_ALIGNMENT); - } else { - return initial_size.value(); } + return initial_size.value(); }(); current_pool_size_ = 0; // try_to_expand will set this if it succeeds @@ -217,8 +217,8 @@ class pool_memory_resource final "Initial pool size exceeds the maximum pool size!"); if (try_size > 0) { - auto const b = try_to_expand(try_size, try_size, cuda_stream_legacy); - this->insert_block(b, cuda_stream_legacy); + auto const block = try_to_expand(try_size, try_size, cuda_stream_legacy); + this->insert_block(block, cuda_stream_legacy); } } @@ -252,7 +252,7 @@ class pool_memory_resource final * @param size The size of the minimum allocation immediately needed * @return std::size_t The computed size to grow the pool. */ - std::size_t size_to_grow(std::size_t size) const + [[nodiscard]] std::size_t size_to_grow(std::size_t size) const { if (maximum_pool_size_.has_value()) { auto const unaligned_remaining = maximum_pool_size_.value() - pool_size(); @@ -260,8 +260,8 @@ class pool_memory_resource final rmm::detail::align_up(unaligned_remaining, rmm::detail::CUDA_ALLOCATION_ALIGNMENT); auto const aligned_size = rmm::detail::align_up(size, rmm::detail::CUDA_ALLOCATION_ALIGNMENT); return (aligned_size <= remaining) ? std::max(aligned_size, remaining / 2) : 0; - } else - return std::max(size, pool_size()); + } + return std::max(size, pool_size()); }; /** @@ -275,64 +275,66 @@ class pool_memory_resource final { RMM_LOG_DEBUG("[A][Stream {}][Upstream {}B]", fmt::ptr(stream.value()), size); - if (size == 0) return {}; + if (size == 0) { return {}; } try { - void* p = upstream_mr_->allocate(size, stream); + void* ptr = upstream_mr_->allocate(size, stream); return thrust::optional{ - *upstream_blocks_.emplace(reinterpret_cast(p), size, true).first}; + *upstream_blocks_.emplace(static_cast(ptr), size, true).first}; } catch (std::exception const& e) { return thrust::nullopt; } } /** - * @brief Splits block `b` if necessary to return a pointer to memory of `size` bytes. + * @brief Splits `block` if necessary to return a pointer to memory of `size` bytes. * * If the block is split, the remainder is returned to the pool. * - * @param b The block to allocate from. + * @param block The block to allocate from. * @param size The size in bytes of the requested allocation. * @param stream_event The stream and associated event on which the allocation will be used. * @return A pair comprising the allocated pointer and any unallocated remainder of the input * block. */ - split_block allocate_from_block(block_type const& b, std::size_t size) + split_block allocate_from_block(block_type const& block, std::size_t size) { - block_type const alloc{b.pointer(), size, b.is_head()}; + block_type const alloc{block.pointer(), size, block.is_head()}; #ifdef RMM_POOL_TRACK_ALLOCATIONS allocated_blocks_.insert(alloc); #endif - auto rest = - (b.size() > size) ? block_type{b.pointer() + size, b.size() - size, false} : block_type{}; + auto rest = (block.size() > size) + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) + ? block_type{block.pointer() + size, block.size() - size, false} + : block_type{}; return {alloc, rest}; } /** - * @brief Finds, frees and returns the block associated with pointer `p`. + * @brief Finds, frees and returns the block associated with pointer `ptr`. * - * @param p The pointer to the memory to free. + * @param ptr The pointer to the memory to free. * @param size The size of the memory to free. Must be equal to the original allocation size. * @param stream The stream-event pair for the stream on which the memory was last used. * @return The (now freed) block associated with `p`. The caller is expected to return the block * to the pool. */ - block_type free_block(void* p, std::size_t size) noexcept + block_type free_block(void* ptr, std::size_t size) noexcept { #ifdef RMM_POOL_TRACK_ALLOCATIONS - if (p == nullptr) return block_type{}; - auto const i = allocated_blocks_.find(static_cast(p)); - RMM_LOGGING_ASSERT(i != allocated_blocks_.end()); + if (ptr == nullptr) return block_type{}; + auto const iter = allocated_blocks_.find(static_cast(ptr)); + RMM_LOGGING_ASSERT(iter != allocated_blocks_.end()); - auto block = *i; + auto block = *iter; RMM_LOGGING_ASSERT(block.size() == rmm::detail::align_up(size, allocation_alignment)); - allocated_blocks_.erase(i); + allocated_blocks_.erase(iter); return block; #else - auto const i = upstream_blocks_.find(static_cast(p)); - return block_type{static_cast(p), size, (i != upstream_blocks_.end())}; + auto const iter = upstream_blocks_.find(static_cast(ptr)); + return block_type{static_cast(ptr), size, (iter != upstream_blocks_.end())}; #endif } @@ -343,7 +345,7 @@ class pool_memory_resource final * * @return std::size_t The total size of the currently allocated pool. */ - std::size_t pool_size() const noexcept { return current_pool_size_; } + [[nodiscard]] std::size_t pool_size() const noexcept { return current_pool_size_; } /** * @brief Free all memory allocated from the upstream memory_resource. @@ -353,8 +355,9 @@ class pool_memory_resource final { lock_guard lock(this->get_mutex()); - for (auto b : upstream_blocks_) - upstream_mr_->deallocate(b.pointer(), b.size()); + for (auto block : upstream_blocks_) { + upstream_mr_->deallocate(block.pointer(), block.size()); + } upstream_blocks_.clear(); #ifdef RMM_POOL_TRACK_ALLOCATIONS allocated_blocks_.clear(); @@ -373,23 +376,22 @@ class pool_memory_resource final { lock_guard lock(this->get_mutex()); - std::size_t free, total; - std::tie(free, total) = upstream_mr_->get_mem_info(0); + auto const [free, total] = upstream_mr_->get_mem_info(0); std::cout << "GPU free memory: " << free << " total: " << total << "\n"; std::cout << "upstream_blocks: " << upstream_blocks_.size() << "\n"; std::size_t upstream_total{0}; - for (auto h : upstream_blocks_) { - h.print(); - upstream_total += h.size(); + for (auto blocks : upstream_blocks_) { + blocks.print(); + upstream_total += blocks.size(); } std::cout << "total upstream: " << upstream_total << " B\n"; #ifdef RMM_POOL_TRACK_ALLOCATIONS std::cout << "allocated_blocks: " << allocated_blocks_.size() << "\n"; - for (auto b : allocated_blocks_) - b.print(); + for (auto block : allocated_blocks_) + block.print(); #endif this->print_free_blocks(); @@ -407,9 +409,9 @@ class pool_memory_resource final { std::size_t largest{}; std::size_t total{}; - std::for_each(blocks.cbegin(), blocks.cend(), [&largest, &total](auto const& b) { - total += b.size(); - largest = std::max(largest, b.size()); + std::for_each(blocks.cbegin(), blocks.cend(), [&largest, &total](auto const& block) { + total += block.size(); + largest = std::max(largest, block.size()); }); return {largest, total}; } @@ -422,14 +424,14 @@ class pool_memory_resource final * @param stream to execute on * @return std::pair contaiing free_size and total_size of memory */ - std::pair do_get_mem_info(cuda_stream_view stream) const override + [[nodiscard]] std::pair do_get_mem_info( + cuda_stream_view stream) const override { - std::size_t free_size{}; - std::size_t total_size{}; // TODO implement this - return std::make_pair(free_size, total_size); + return {0, 0}; } + private: Upstream* upstream_mr_; // The "heap" to allocate the pool from std::size_t current_pool_size_{}; thrust::optional maximum_pool_size_{}; @@ -442,5 +444,4 @@ class pool_memory_resource final std::set> upstream_blocks_; }; // namespace mr -} // namespace mr -} // namespace rmm +} // namespace rmm::mr diff --git a/include/rmm/mr/device/statistics_resource_adaptor.hpp b/include/rmm/mr/device/statistics_resource_adaptor.hpp index bcc0bf10b..298ac8bc1 100644 --- a/include/rmm/mr/device/statistics_resource_adaptor.hpp +++ b/include/rmm/mr/device/statistics_resource_adaptor.hpp @@ -21,8 +21,7 @@ #include #include -namespace rmm { -namespace mr { +namespace rmm::mr { /** * @brief Resource that uses `Upstream` to allocate memory and tracks statistics * on memory allocations. @@ -52,17 +51,17 @@ class statistics_resource_adaptor final : public device_memory_resource { int64_t peak{0}; // Max value of `value` int64_t total{0}; // Sum of all added values - counter& operator+=(int64_t x) + counter& operator+=(int64_t val) { - value += x; - total += x; + value += val; + total += val; peak = std::max(value, peak); return *this; } - counter& operator-=(int64_t x) + counter& operator-=(int64_t val) { - value -= x; + value -= val; return *this; } }; @@ -81,11 +80,11 @@ class statistics_resource_adaptor final : public device_memory_resource { } statistics_resource_adaptor() = delete; - virtual ~statistics_resource_adaptor() = default; + ~statistics_resource_adaptor() override = default; statistics_resource_adaptor(statistics_resource_adaptor const&) = delete; - statistics_resource_adaptor(statistics_resource_adaptor&&) = default; statistics_resource_adaptor& operator=(statistics_resource_adaptor const&) = delete; - statistics_resource_adaptor& operator=(statistics_resource_adaptor&&) = default; + statistics_resource_adaptor(statistics_resource_adaptor&&) noexcept = default; + statistics_resource_adaptor& operator=(statistics_resource_adaptor&&) noexcept = default; /** * @brief Return pointer to the upstream resource. @@ -156,7 +155,7 @@ class statistics_resource_adaptor final : public device_memory_resource { */ void* do_allocate(std::size_t bytes, cuda_stream_view stream) override { - void* p = upstream_->allocate(bytes, stream); + void* ptr = upstream_->allocate(bytes, stream); // increment the stats { @@ -167,21 +166,21 @@ class statistics_resource_adaptor final : public device_memory_resource { allocations_ += 1; } - return p; + return ptr; } /** - * @brief Free allocation of size `bytes` pointed to by `p` + * @brief Free allocation of size `bytes` pointed to by `ptr` * * @throws Nothing. * - * @param p Pointer to be deallocated + * @param ptr Pointer to be deallocated * @param bytes Size of the allocation * @param stream Stream on which to perform the deallocation */ - void do_deallocate(void* p, std::size_t bytes, cuda_stream_view stream) override + void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override { - upstream_->deallocate(p, bytes, stream); + upstream_->deallocate(ptr, bytes, stream); { write_lock_t lock(mtx_); @@ -203,13 +202,10 @@ class statistics_resource_adaptor final : public device_memory_resource { */ bool do_is_equal(device_memory_resource const& other) const noexcept override { - if (this == &other) - return true; - else { - auto cast = dynamic_cast const*>(&other); - return cast != nullptr ? upstream_->is_equal(*cast->get_upstream()) - : upstream_->is_equal(other); - } + if (this == &other) { return true; } + auto cast = dynamic_cast const*>(&other); + return cast != nullptr ? upstream_->is_equal(*cast->get_upstream()) + : upstream_->is_equal(other); } /** @@ -244,5 +240,4 @@ statistics_resource_adaptor make_statistics_adaptor(Upstream* upstream return statistics_resource_adaptor{upstream}; } -} // namespace mr -} // namespace rmm +} // namespace rmm::mr diff --git a/include/rmm/mr/device/thread_safe_resource_adaptor.hpp b/include/rmm/mr/device/thread_safe_resource_adaptor.hpp index 2675a4df2..b1f898c5e 100644 --- a/include/rmm/mr/device/thread_safe_resource_adaptor.hpp +++ b/include/rmm/mr/device/thread_safe_resource_adaptor.hpp @@ -22,8 +22,7 @@ #include #include -namespace rmm { -namespace mr { +namespace rmm::mr { /** * @brief Resource that adapts `Upstream` memory resource adaptor to be thread safe. * @@ -54,7 +53,7 @@ class thread_safe_resource_adaptor final : public device_memory_resource { } thread_safe_resource_adaptor() = delete; - ~thread_safe_resource_adaptor() = default; + ~thread_safe_resource_adaptor() override = default; thread_safe_resource_adaptor(thread_safe_resource_adaptor const&) = delete; thread_safe_resource_adaptor(thread_safe_resource_adaptor&&) = delete; thread_safe_resource_adaptor& operator=(thread_safe_resource_adaptor const&) = delete; @@ -98,19 +97,18 @@ class thread_safe_resource_adaptor final : public device_memory_resource { } /** - * @brief Free allocation of size `bytes` pointed to to by `p` and log the - * deallocation. + * @brief Free allocation of size `bytes` pointed to to by `ptr`.s * * @throws Nothing. * - * @param p Pointer to be deallocated + * @param ptr Pointer to be deallocated * @param bytes Size of the allocation * @param stream Stream on which to perform the deallocation */ - void do_deallocate(void* p, std::size_t bytes, cuda_stream_view stream) override + void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override { lock_t lock(mtx); - upstream_->deallocate(p, bytes, stream); + upstream_->deallocate(ptr, bytes, stream); } /** @@ -124,15 +122,12 @@ class thread_safe_resource_adaptor final : public device_memory_resource { */ bool do_is_equal(device_memory_resource const& other) const noexcept override { - if (this == &other) - return true; - else { - auto thread_safe_other = dynamic_cast const*>(&other); - if (thread_safe_other != nullptr) - return upstream_->is_equal(*thread_safe_other->get_upstream()); - else - return upstream_->is_equal(other); + if (this == &other) { return true; } + auto thread_safe_other = dynamic_cast const*>(&other); + if (thread_safe_other != nullptr) { + return upstream_->is_equal(*thread_safe_other->get_upstream()); } + return upstream_->is_equal(other); } /** @@ -153,5 +148,4 @@ class thread_safe_resource_adaptor final : public device_memory_resource { Upstream* upstream_; ///< The upstream resource used for satisfying allocation requests }; -} // namespace mr -} // namespace rmm +} // namespace rmm::mr diff --git a/include/rmm/mr/device/thrust_allocator_adaptor.hpp b/include/rmm/mr/device/thrust_allocator_adaptor.hpp index d841304a6..56e910801 100644 --- a/include/rmm/mr/device/thrust_allocator_adaptor.hpp +++ b/include/rmm/mr/device/thrust_allocator_adaptor.hpp @@ -22,8 +22,7 @@ #include #include -namespace rmm { -namespace mr { +namespace rmm::mr { /** * @brief An `allocator` compatible with Thrust containers and algorithms using * a `device_memory_resource` for memory (de)allocation. @@ -91,39 +90,38 @@ class thrust_allocator : public thrust::device_malloc_allocator { /** * @brief Allocate objects of type `T` * - * @param n The number of elements of type `T` to allocate + * @param num The number of elements of type `T` to allocate * @return pointer Pointer to the newly allocated storage */ - pointer allocate(size_type n) + pointer allocate(size_type num) { - return thrust::device_pointer_cast(static_cast(_mr->allocate(n * sizeof(T), _stream))); + return thrust::device_pointer_cast(static_cast(_mr->allocate(num * sizeof(T), _stream))); } /** * @brief Deallocates objects of type `T` * - * @param p Pointer returned by a previous call to `allocate` - * @param n number of elements, *must* be equal to the argument passed to the + * @param ptr Pointer returned by a previous call to `allocate` + * @param num number of elements, *must* be equal to the argument passed to the * prior `allocate` call that produced `p` */ - void deallocate(pointer p, size_type n) + void deallocate(pointer ptr, size_type num) { - return _mr->deallocate(thrust::raw_pointer_cast(p), n * sizeof(T), _stream); + return _mr->deallocate(thrust::raw_pointer_cast(ptr), num * sizeof(T), _stream); } /** * @brief Returns the device memory resource used by this allocator. */ - device_memory_resource* resource() const noexcept { return _mr; } + [[nodiscard]] device_memory_resource* resource() const noexcept { return _mr; } /** * @brief Returns the stream used by this allocator. */ - cuda_stream_view stream() const noexcept { return _stream; } + [[nodiscard]] cuda_stream_view stream() const noexcept { return _stream; } private: cuda_stream_view _stream{}; device_memory_resource* _mr{rmm::mr::get_current_device_resource()}; }; -} // namespace mr -} // namespace rmm +} // namespace rmm::mr diff --git a/include/rmm/mr/device/tracking_resource_adaptor.hpp b/include/rmm/mr/device/tracking_resource_adaptor.hpp index 1a32a1c44..fbcb44898 100644 --- a/include/rmm/mr/device/tracking_resource_adaptor.hpp +++ b/include/rmm/mr/device/tracking_resource_adaptor.hpp @@ -25,8 +25,7 @@ #include #include -namespace rmm { -namespace mr { +namespace rmm::mr { /** * @brief Resource that uses `Upstream` to allocate memory and tracks allocations. * @@ -83,11 +82,11 @@ class tracking_resource_adaptor final : public device_memory_resource { } tracking_resource_adaptor() = delete; - virtual ~tracking_resource_adaptor() = default; + ~tracking_resource_adaptor() override = default; tracking_resource_adaptor(tracking_resource_adaptor const&) = delete; - tracking_resource_adaptor(tracking_resource_adaptor&&) = default; tracking_resource_adaptor& operator=(tracking_resource_adaptor const&) = delete; - tracking_resource_adaptor& operator=(tracking_resource_adaptor&&) = default; + tracking_resource_adaptor(tracking_resource_adaptor&&) noexcept = default; + tracking_resource_adaptor& operator=(tracking_resource_adaptor&&) noexcept = default; /** * @brief Return pointer to the upstream resource. @@ -154,10 +153,10 @@ class tracking_resource_adaptor final : public device_memory_resource { std::ostringstream oss; if (!allocations_.empty()) { - for (auto const& al : allocations_) { - oss << al.first << ": " << al.second.allocation_size << " B"; - if (al.second.strace != nullptr) { - oss << " : callstack:" << std::endl << *al.second.strace; + for (auto const& alloc : allocations_) { + oss << alloc.first << ": " << alloc.second.allocation_size << " B"; + if (alloc.second.strace != nullptr) { + oss << " : callstack:" << std::endl << *alloc.second.strace; } oss << std::endl; } @@ -193,34 +192,34 @@ class tracking_resource_adaptor final : public device_memory_resource { */ void* do_allocate(std::size_t bytes, cuda_stream_view stream) override { - void* p = upstream_->allocate(bytes, stream); + void* ptr = upstream_->allocate(bytes, stream); // track it. { write_lock_t lock(mtx_); - allocations_.emplace(p, allocation_info{bytes, capture_stacks_}); + allocations_.emplace(ptr, allocation_info{bytes, capture_stacks_}); } allocated_bytes_ += bytes; - return p; + return ptr; } /** - * @brief Free allocation of size `bytes` pointed to by `p` + * @brief Free allocation of size `bytes` pointed to by `ptr` * * @throws Nothing. * - * @param p Pointer to be deallocated + * @param ptr Pointer to be deallocated * @param bytes Size of the allocation * @param stream Stream on which to perform the deallocation */ - void do_deallocate(void* p, std::size_t bytes, cuda_stream_view stream) override + void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override { - upstream_->deallocate(p, bytes, stream); + upstream_->deallocate(ptr, bytes, stream); { write_lock_t lock(mtx_); - const auto found = allocations_.find(p); + const auto found = allocations_.find(ptr); // Ensure the allocation is found and the number of bytes match if (found == allocations_.end()) { @@ -229,7 +228,7 @@ class tracking_resource_adaptor final : public device_memory_resource { RMM_LOG_ERROR( "Deallocating a pointer that was not tracked. Ptr: {:p} [{}B], Current Num. Allocations: " "{}", - fmt::ptr(p), + fmt::ptr(ptr), bytes, this->allocations_.size()); } else { @@ -261,13 +260,10 @@ class tracking_resource_adaptor final : public device_memory_resource { */ bool do_is_equal(device_memory_resource const& other) const noexcept override { - if (this == &other) - return true; - else { - auto cast = dynamic_cast const*>(&other); - return cast != nullptr ? upstream_->is_equal(*cast->get_upstream()) - : upstream_->is_equal(other); - } + if (this == &other) { return true; } + auto cast = dynamic_cast const*>(&other); + return cast != nullptr ? upstream_->is_equal(*cast->get_upstream()) + : upstream_->is_equal(other); } /** @@ -303,5 +299,4 @@ tracking_resource_adaptor make_tracking_adaptor(Upstream* upstream) return tracking_resource_adaptor{upstream}; } -} // namespace mr -} // namespace rmm +} // namespace rmm::mr diff --git a/include/rmm/mr/host/host_memory_resource.hpp b/include/rmm/mr/host/host_memory_resource.hpp index b799b46da..4edffc860 100644 --- a/include/rmm/mr/host/host_memory_resource.hpp +++ b/include/rmm/mr/host/host_memory_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,9 +18,9 @@ #include #include -namespace rmm { -namespace mr { -/**---------------------------------------------------------------------------* +namespace rmm::mr { + +/** * @brief Base class for host memory allocation. * * This is based on `std::pmr::memory_resource`: @@ -43,124 +43,121 @@ namespace mr { * base class' `allocate` function may log every allocation, no matter what * derived class implementation is used. * - *---------------------------------------------------------------------------**/ + */ class host_memory_resource { public: - virtual ~host_memory_resource() = default; + host_memory_resource() = default; + virtual ~host_memory_resource() = default; + host_memory_resource(host_memory_resource const&) = default; + host_memory_resource& operator=(host_memory_resource const&) = default; + host_memory_resource(host_memory_resource&&) noexcept = default; + host_memory_resource& operator=(host_memory_resource&&) noexcept = default; - /**---------------------------------------------------------------------------* + /** * @brief Allocates memory on the host of size at least `bytes` bytes. * - * The returned storage is aligned to the specified `alignment` if supported, - * and to `alignof(std::max_align_t)` otherwise. + * The returned storage is aligned to the specified `alignment` if supported, and to + * `alignof(std::max_align_t)` otherwise. * - * @throws std::bad_alloc When the requested `bytes` and `alignment` cannot be - * allocated. + * @throws std::bad_alloc When the requested `bytes` and `alignment` cannot be allocated. * * @param bytes The size of the allocation * @param alignment Alignment of the allocation * @return void* Pointer to the newly allocated memory - *---------------------------------------------------------------------------**/ + */ void* allocate(std::size_t bytes, std::size_t alignment = alignof(std::max_align_t)) { return do_allocate(bytes, alignment); } - /**---------------------------------------------------------------------------* - * @brief Deallocate memory pointed to by `p`. + /** + * @brief Deallocate memory pointed to by `ptr`. * - * `p` must have been returned by a prior call to `allocate(bytes,alignment)` - * on a `host_memory_resource` that compares equal to `*this`, and the storage - * it points to must not yet have been deallocated, otherwise behavior is - * undefined. + * `ptr` must have been returned by a prior call to `allocate(bytes,alignment)` on a + * `host_memory_resource` that compares equal to `*this`, and the storage it points to must not + * yet have been deallocated, otherwise behavior is undefined. * * @throws Nothing. * - * @param p Pointer to be deallocated - * @param bytes The size in bytes of the allocation. This must be equal to the - * value of `bytes` that was passed to the `allocate` call that returned `p`. - * @param alignment Alignment of the allocation. This must be equal to the - *value of `alignment` that was passed to the `allocate` call that returned - *`p`. + * @param ptr Pointer to be deallocated + * @param bytes The size in bytes of the allocation. This must be equal to the value of `bytes` + * that was passed to the `allocate` call that returned `ptr`. + * @param alignment Alignment of the allocation. This must be equal to the value of `alignment` + * that was passed to the `allocate` call that returned `ptr`. * @param stream Stream on which to perform deallocation - *---------------------------------------------------------------------------**/ - void deallocate(void* p, std::size_t bytes, std::size_t alignment = alignof(std::max_align_t)) + */ + void deallocate(void* ptr, std::size_t bytes, std::size_t alignment = alignof(std::max_align_t)) { - do_deallocate(p, bytes, alignment); + do_deallocate(ptr, bytes, alignment); } - /**---------------------------------------------------------------------------* + /** * @brief Compare this resource to another. * - * Two `host_memory_resource`s compare equal if and only if memory allocated - * from one `host_memory_resource` can be deallocated from the other and vice - * versa. + * Two `host_memory_resource`s compare equal if and only if memory allocated from one + * `host_memory_resource` can be deallocated from the other and vice versa. * - * By default, simply checks if \p *this and \p other refer to the same - * object, i.e., does not check if they are two objects of the same class. + * By default, simply checks if \p *this and \p other refer to the same object, i.e., does not + * check if they are two objects of the same class. * * @param other The other resource to compare to - * @returns If the two resources are equivalent - *---------------------------------------------------------------------------**/ - bool is_equal(host_memory_resource const& other) const noexcept { return do_is_equal(other); } + * @returns true if the two resources are equivalent + */ + [[nodiscard]] bool is_equal(host_memory_resource const& other) const noexcept + { + return do_is_equal(other); + } private: - /**---------------------------------------------------------------------------* + /** * @brief Allocates memory on the host of size at least `bytes` bytes. * - * The returned storage is aligned to the specified `alignment` if supported, - * and to `alignof(std::max_align_t)` otherwise. + * The returned storage is aligned to the specified `alignment` if supported, and to + * `alignof(std::max_align_t)` otherwise. * - * @throws std::bad_alloc When the requested `bytes` and `alignment` cannot be - * allocated. + * @throws std::bad_alloc When the requested `bytes` and `alignment` cannot be allocated. * * @param bytes The size of the allocation * @param alignment Alignment of the allocation * @return void* Pointer to the newly allocated memory - *---------------------------------------------------------------------------**/ + */ virtual void* do_allocate(std::size_t bytes, std::size_t alignment = alignof(std::max_align_t)) = 0; - /**---------------------------------------------------------------------------* - * @brief Deallocate memory pointed to by `p`. + /** + * @brief Deallocate memory pointed to by `ptr`. * - * `p` must have been returned by a prior call to `allocate(bytes,alignment)` - * on a `host_memory_resource` that compares equal to `*this`, and the storage - * it points to must not yet have been deallocated, otherwise behavior is - * undefined. + * `ptr` must have been returned by a prior call to `allocate(bytes,alignment)` on a + * `host_memory_resource` that compares equal to `*this`, and the storage it points to must not + * yet have been deallocated, otherwise behavior is undefined. * * @throws Nothing. * - * @param p Pointer to be deallocated - * @param bytes The size in bytes of the allocation. This must be equal to the - * value of `bytes` that was passed to the `allocate` call that returned `p`. - * @param alignment Alignment of the allocation. This must be equal to the - *value of `alignment` that was passed to the `allocate` call that returned - *`p`. - * @param stream Stream on which to perform deallocation - *---------------------------------------------------------------------------**/ - virtual void do_deallocate(void* p, + * @param ptr Pointer to be deallocated + * @param bytes The size in bytes of the allocation. This must be equal to the value of `bytes` + * that was passed to the `allocate` call that returned `ptr`. + * @param alignment Alignment of the allocation. This must be equal to the value of `alignment` + * that was passed to the `allocate` call that returned `ptr`. + */ + virtual void do_deallocate(void* ptr, std::size_t bytes, std::size_t alignment = alignof(std::max_align_t)) = 0; - /**---------------------------------------------------------------------------* + /** * @brief Compare this resource to another. * - * Two host_memory_resources compare equal if and only if memory allocated - * from one host_memory_resource can be deallocated from the other and vice - * versa. + * Two host_memory_resources compare equal if and only if memory allocated from one + * host_memory_resource can be deallocated from the other and vice versa. * - * By default, simply checks if \p *this and \p other refer to the same - * object, i.e., does not check if they are two objects of the same class. + * By default, simply checks if `*this` and `other` refer to the same object, i.e., does not check + * whether they are two objects of the same class. * * @param other The other resource to compare to * @return true If the two resources are equivalent - * @return false If the two resources are not equal - *---------------------------------------------------------------------------**/ - virtual bool do_is_equal(host_memory_resource const& other) const noexcept + */ + [[nodiscard]] virtual bool do_is_equal(host_memory_resource const& other) const noexcept { return this == &other; } }; -} // namespace mr -} // namespace rmm +} // namespace rmm::mr diff --git a/include/rmm/mr/host/new_delete_resource.hpp b/include/rmm/mr/host/new_delete_resource.hpp index 0f27cbf3c..e30a6f41c 100644 --- a/include/rmm/mr/host/new_delete_resource.hpp +++ b/include/rmm/mr/host/new_delete_resource.hpp @@ -22,36 +22,34 @@ #include #include -namespace rmm { -namespace mr { +namespace rmm::mr { -/**---------------------------------------------------------------------------* - * @brief A `host_memory_resource` that uses the global `operator new` and - * `operator delete` to allocate host memory. - *---------------------------------------------------------------------------**/ +/** + * @brief A `host_memory_resource` that uses the global `operator new` and `operator delete` to + * allocate host memory. + */ class new_delete_resource final : public host_memory_resource { public: new_delete_resource() = default; - ~new_delete_resource() = default; + ~new_delete_resource() override = default; new_delete_resource(new_delete_resource const&) = default; new_delete_resource(new_delete_resource&&) = default; new_delete_resource& operator=(new_delete_resource const&) = default; new_delete_resource& operator=(new_delete_resource&&) = default; private: - /**---------------------------------------------------------------------------* + /** * @brief Allocates memory on the host of size at least `bytes` bytes. * - * The returned storage is aligned to the specified `alignment` if supported, - * and to `alignof(std::max_align_t)` otherwise. + * The returned storage is aligned to the specified `alignment` if supported, and to + * `alignof(std::max_align_t)` otherwise. * - * @throws std::bad_alloc When the requested `bytes` and `alignment` cannot be - * allocated. + * @throws std::bad_alloc When the requested `bytes` and `alignment` cannot be allocated. * * @param bytes The size of the allocation * @param alignment Alignment of the allocation - * @return void* Pointer to the newly allocated memory - *---------------------------------------------------------------------------**/ + * @return Pointer to the newly allocated memory + */ void* do_allocate(std::size_t bytes, std::size_t alignment = detail::RMM_DEFAULT_HOST_ALIGNMENT) override { @@ -63,30 +61,27 @@ class new_delete_resource final : public host_memory_resource { bytes, alignment, [](std::size_t size) { return ::operator new(size); }); } - /**---------------------------------------------------------------------------* - * @brief Deallocate memory pointed to by `p`. + /** + * @brief Deallocate memory pointed to by `ptr`. * - * `p` must have been returned by a prior call to `allocate(bytes,alignment)` - * on a `host_memory_resource` that compares equal to `*this`, and the storage - * it points to must not yet have been deallocated, otherwise behavior is - * undefined. + * `ptr` must have been returned by a prior call to `allocate(bytes,alignment)` on a + * `host_memory_resource` that compares equal to `*this`, and the storage it points to must not + * yet have been deallocated, otherwise behavior is undefined. * * @throws Nothing. * - * @param p Pointer to be deallocated - * @param bytes The size in bytes of the allocation. This must be equal to the - * value of `bytes` that was passed to the `allocate` call that returned `p`. - * @param alignment Alignment of the allocation. This must be equal to the - *value of `alignment` that was passed to the `allocate` call that returned - *`p`. - * @param stream Stream on which to perform deallocation - *---------------------------------------------------------------------------**/ - void do_deallocate(void* p, + * @param ptr Pointer to be deallocated + * @param bytes The size in bytes of the allocation. This must be equal to the value of `bytes` + * that was passed to the `allocate` call that returned `ptr`. + * @param alignment Alignment of the allocation. This must be equal to the value of `alignment` + * that was passed to the `allocate` call that returned `ptr`. + */ + void do_deallocate(void* ptr, std::size_t bytes, std::size_t alignment = detail::RMM_DEFAULT_HOST_ALIGNMENT) override { - detail::aligned_deallocate(p, bytes, alignment, [](void* p) { ::operator delete(p); }); + detail::aligned_deallocate(ptr, bytes, alignment, [](void* ptr) { ::operator delete(ptr); }); } }; -} // namespace mr -} // namespace rmm + +} // namespace rmm::mr diff --git a/include/rmm/mr/host/pinned_memory_resource.hpp b/include/rmm/mr/host/pinned_memory_resource.hpp index d00a5cffe..14b833684 100644 --- a/include/rmm/mr/host/pinned_memory_resource.hpp +++ b/include/rmm/mr/host/pinned_memory_resource.hpp @@ -22,38 +22,36 @@ #include #include -namespace rmm { -namespace mr { +namespace rmm::mr { -/**---------------------------------------------------------------------------* +/* * @brief A `host_memory_resource` that uses `cudaMallocHost` to allocate * pinned/page-locked host memory. * * See https://devblogs.nvidia.com/how-optimize-data-transfers-cuda-cc/ - *---------------------------------------------------------------------------**/ + */ class pinned_memory_resource final : public host_memory_resource { public: pinned_memory_resource() = default; - ~pinned_memory_resource() = default; + ~pinned_memory_resource() override = default; pinned_memory_resource(pinned_memory_resource const&) = default; pinned_memory_resource(pinned_memory_resource&&) = default; pinned_memory_resource& operator=(pinned_memory_resource const&) = default; pinned_memory_resource& operator=(pinned_memory_resource&&) = default; private: - /**---------------------------------------------------------------------------* + /** * @brief Allocates pinned memory on the host of size at least `bytes` bytes. * - * The returned storage is aligned to the specified `alignment` if supported, - * and to `alignof(std::max_align_t)` otherwise. + * The returned storage is aligned to the specified `alignment` if supported, and to + * `alignof(std::max_align_t)` otherwise. * - * @throws std::bad_alloc When the requested `bytes` and `alignment` cannot be - * allocated. + * @throws std::bad_alloc When the requested `bytes` and `alignment` cannot be allocated. * * @param bytes The size of the allocation * @param alignment Alignment of the allocation * @return void* Pointer to the newly allocated memory - *---------------------------------------------------------------------------**/ + */ void* do_allocate(std::size_t bytes, std::size_t alignment = alignof(std::max_align_t)) override { // don't allocate anything if the user requested zero bytes @@ -64,40 +62,35 @@ class pinned_memory_resource final : public host_memory_resource { (detail::is_supported_alignment(alignment)) ? alignment : detail::RMM_DEFAULT_HOST_ALIGNMENT; return detail::aligned_allocate(bytes, alignment, [](std::size_t size) { - void* p{nullptr}; - auto status = cudaMallocHost(&p, size); + void* ptr{nullptr}; + auto status = cudaMallocHost(&ptr, size); if (cudaSuccess != status) { throw std::bad_alloc{}; } - return p; + return ptr; }); } - /**---------------------------------------------------------------------------* - * @brief Deallocate memory pointed to by `p`. + /** + * @brief Deallocate memory pointed to by `ptr`. * - * `p` must have been returned by a prior call to `allocate(bytes,alignment)` - * on a `host_memory_resource` that compares equal to `*this`, and the storage - * it points to must not yet have been deallocated, otherwise behavior is - * undefined. + * `ptr` must have been returned by a prior call to `allocate(bytes,alignment)` on a + * `host_memory_resource` that compares equal to `*this`, and the storage it points to must not + * yet have been deallocated, otherwise behavior is undefined. * * @throws Nothing. * - * @param p Pointer to be deallocated - * @param bytes The size in bytes of the allocation. This must be equal to the - * value of `bytes` that was passed to the `allocate` call that returned `p`. - * @param alignment Alignment of the allocation. This must be equal to the - *value of `alignment` that was passed to the `allocate` call that returned - *`p`. - * @param stream Stream on which to perform deallocation - *---------------------------------------------------------------------------**/ - void do_deallocate(void* p, + * @param ptr Pointer to be deallocated + * @param bytes The size in bytes of the allocation. This must be equal to the value of `bytes` + * that was passed to the `allocate` call that returned `ptr`. + * @param alignment Alignment of the allocation. This must be equal to the value of `alignment` + * that was passed to the `allocate` call that returned `ptr`. + */ + void do_deallocate(void* ptr, std::size_t bytes, std::size_t alignment = alignof(std::max_align_t)) override { - (void)alignment; - if (nullptr == p) { return; } + if (nullptr == ptr) { return; } detail::aligned_deallocate( - p, bytes, alignment, [](void* p) { RMM_ASSERT_CUDA_SUCCESS(cudaFreeHost(p)); }); + ptr, bytes, alignment, [](void* ptr) { RMM_ASSERT_CUDA_SUCCESS(cudaFreeHost(ptr)); }); } }; -} // namespace mr -} // namespace rmm +} // namespace rmm::mr diff --git a/include/rmm/thrust_rmm_allocator.h b/include/rmm/thrust_rmm_allocator.h index 889faa3bd..894f402a1 100644 --- a/include/rmm/thrust_rmm_allocator.h +++ b/include/rmm/thrust_rmm_allocator.h @@ -38,12 +38,13 @@ using exec_policy_t = std::unique_ptr; * allocation. */ [[deprecated("Use new exec_policy in rmm/exec_policy.hpp")]] inline exec_policy_t exec_policy( - cudaStream_t stream = 0) + cudaStream_t stream = nullptr) { + // NOLINTNEXTLINE(cppcoreguidelines-owning-memory) auto* alloc = new rmm::mr::thrust_allocator(cuda_stream_view{stream}); auto deleter = [alloc](par_t* pointer) { - delete alloc; - delete pointer; + delete alloc; // NOLINT(cppcoreguidelines-owning-memory) + delete pointer; // NOLINT(cppcoreguidelines-owning-memory) }; exec_policy_t policy{new par_t(*alloc), deleter}; diff --git a/python/setup.py b/python/setup.py index e05a73e4b..8101ba2ef 100644 --- a/python/setup.py +++ b/python/setup.py @@ -155,7 +155,7 @@ def get_cuda_version_from_header(cuda_include_dir): ], libraries=["cuda", "cudart"], language="c++", - extra_compile_args=["-std=c++14"], + extra_compile_args=["-std=c++17"], ) ], nthreads=nthreads, @@ -178,7 +178,7 @@ def get_cuda_version_from_header(cuda_include_dir): ], libraries=["cuda", "cudart"], language="c++", - extra_compile_args=["-std=c++14"], + extra_compile_args=["-std=c++17"], ) ], nthreads=nthreads, diff --git a/tests/byte_literals.hpp b/tests/byte_literals.hpp new file mode 100644 index 000000000..b22e98a1c --- /dev/null +++ b/tests/byte_literals.hpp @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +namespace rmm::test { + +constexpr auto kilo{long{1} << 10}; +constexpr auto mega{long{1} << 20}; +constexpr auto giga{long{1} << 30}; +constexpr auto tera{long{1} << 40}; +constexpr auto peta{long{1} << 50}; + +// user-defined Byte literals +constexpr unsigned long long operator""_B(unsigned long long val) { return val; } +constexpr unsigned long long operator""_KiB(unsigned long long const val) { return kilo * val; } +constexpr unsigned long long operator""_MiB(unsigned long long const val) { return mega * val; } +constexpr unsigned long long operator""_GiB(unsigned long long const val) { return giga * val; } +constexpr unsigned long long operator""_TiB(unsigned long long const val) { return tera * val; } +constexpr unsigned long long operator""_PiB(unsigned long long const val) { return peta * val; } + +} // namespace rmm::test diff --git a/tests/cuda_stream_pool_tests.cpp b/tests/cuda_stream_pool_tests.cpp index 1e14e2abf..4fddb2da6 100644 --- a/tests/cuda_stream_pool_tests.cpp +++ b/tests/cuda_stream_pool_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -37,7 +37,6 @@ TEST_F(CudaStreamPoolTest, Unequal) TEST_F(CudaStreamPoolTest, Nondefault) { auto const stream_a = this->pool.get_stream(); - auto const stream_b = this->pool.get_stream(); // pool streams are explicit, non-default streams EXPECT_FALSE(stream_a.is_default()); @@ -50,13 +49,14 @@ TEST_F(CudaStreamPoolTest, ValidStreams) auto const stream_b = this->pool.get_stream(); // Operations on the streams should work correctly and without throwing exceptions - auto v = rmm::device_uvector{100, stream_a}; - RMM_CUDA_TRY(cudaMemsetAsync(v.data(), 0xcc, 100, stream_a.value())); + auto constexpr vector_size{100}; + auto vec1 = rmm::device_uvector{vector_size, stream_a}; + RMM_CUDA_TRY(cudaMemsetAsync(vec1.data(), 0xcc, 100, stream_a.value())); stream_a.synchronize(); - auto v2 = rmm::device_uvector{v, stream_b}; - auto x = v2.front_element(stream_b); - EXPECT_EQ(x, 0xcc); + auto vec2 = rmm::device_uvector{vec1, stream_b}; + auto element = vec2.front_element(stream_b); + EXPECT_EQ(element, 0xcc); } TEST_F(CudaStreamPoolTest, PoolSize) { EXPECT_GE(this->pool.get_pool_size(), 1); } diff --git a/tests/cuda_stream_tests.cpp b/tests/cuda_stream_tests.cpp index 55e3185fe..f801226c6 100644 --- a/tests/cuda_stream_tests.cpp +++ b/tests/cuda_stream_tests.cpp @@ -47,6 +47,7 @@ TEST_F(CudaStreamTest, MoveConstructor) rmm::cuda_stream stream_a; auto const view_a = stream_a.view(); rmm::cuda_stream stream_b = std::move(stream_a); + // NOLINTNEXTLINE(bugprone-use-after-move, clang-analyzer-cplusplus.Move) EXPECT_FALSE(stream_a.is_valid()); // Any other operations on stream_a are UB, may segfault EXPECT_EQ(stream_b, view_a); } diff --git a/tests/device_buffer_tests.cu b/tests/device_buffer_tests.cu index fa36a2751..2f8d34bad 100644 --- a/tests/device_buffer_tests.cu +++ b/tests/device_buffer_tests.cu @@ -27,9 +27,11 @@ #include #include -#include #include #include + +#include + #include #include @@ -42,7 +44,10 @@ struct DeviceBufferTest : public ::testing::Test { DeviceBufferTest() { std::default_random_engine generator; - std::uniform_int_distribution distribution(1000, 100000); + + auto constexpr range_min{1000}; + auto constexpr range_max{100000}; + std::uniform_int_distribution distribution(range_min, range_max); size = distribution(generator); } }; @@ -149,6 +154,7 @@ TYPED_TEST(DeviceBufferTest, CopyConstructor) // Initialize buffer thrust::sequence(rmm::exec_policy(rmm::cuda_stream_default), static_cast(buff.data()), + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) static_cast(buff.data()) + buff.size(), 0); @@ -163,6 +169,7 @@ TYPED_TEST(DeviceBufferTest, CopyConstructor) EXPECT_TRUE(thrust::equal(rmm::exec_policy(rmm::cuda_stream_default), static_cast(buff.data()), + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) static_cast(buff.data()) + buff.size(), static_cast(buff_copy.data()))); @@ -174,6 +181,7 @@ TYPED_TEST(DeviceBufferTest, CopyConstructor) EXPECT_TRUE(thrust::equal(rmm::exec_policy(rmm::cuda_stream_default), static_cast(buff.data()), + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) static_cast(buff.data()) + buff.size(), static_cast(buff_copy.data()))); } @@ -188,6 +196,7 @@ TYPED_TEST(DeviceBufferTest, CopyCapacityLargerThanSize) thrust::sequence(rmm::exec_policy(rmm::cuda_stream_default), static_cast(buff.data()), + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) static_cast(buff.data()) + buff.size(), 0); rmm::device_buffer buff_copy(buff, rmm::cuda_stream_default); @@ -203,6 +212,7 @@ TYPED_TEST(DeviceBufferTest, CopyCapacityLargerThanSize) EXPECT_TRUE(thrust::equal(rmm::exec_policy(rmm::cuda_stream_default), static_cast(buff.data()), + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) static_cast(buff.data()) + buff.size(), static_cast(buff_copy.data()))); } @@ -213,6 +223,7 @@ TYPED_TEST(DeviceBufferTest, CopyConstructorExplicitMr) thrust::sequence(rmm::exec_policy(rmm::cuda_stream_default), static_cast(buff.data()), + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) static_cast(buff.data()) + buff.size(), 0); rmm::device_buffer buff_copy(buff, this->stream, &this->mr); @@ -226,6 +237,7 @@ TYPED_TEST(DeviceBufferTest, CopyConstructorExplicitMr) EXPECT_TRUE(thrust::equal(rmm::exec_policy(buff_copy.stream()), static_cast(buff.data()), + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) static_cast(buff.data()) + buff.size(), static_cast(buff_copy.data()))); } @@ -240,6 +252,7 @@ TYPED_TEST(DeviceBufferTest, CopyCapacityLargerThanSizeExplicitMr) thrust::sequence(rmm::exec_policy(rmm::cuda_stream_default), static_cast(buff.data()), + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) static_cast(buff.data()) + buff.size(), 0); rmm::device_buffer buff_copy(buff, this->stream, &this->mr); @@ -256,6 +269,7 @@ TYPED_TEST(DeviceBufferTest, CopyCapacityLargerThanSizeExplicitMr) EXPECT_TRUE(thrust::equal(rmm::exec_policy(buff_copy.stream()), static_cast(buff.data()), + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) static_cast(buff.data()) + buff.size(), static_cast(buff_copy.data()))); } @@ -263,125 +277,127 @@ TYPED_TEST(DeviceBufferTest, CopyCapacityLargerThanSizeExplicitMr) TYPED_TEST(DeviceBufferTest, MoveConstructor) { rmm::device_buffer buff(this->size, rmm::cuda_stream_default, &this->mr); - auto p = buff.data(); + auto* ptr = buff.data(); auto size = buff.size(); auto capacity = buff.capacity(); - auto mr = buff.memory_resource(); + auto* mr = buff.memory_resource(); auto stream = buff.stream(); // New buffer should have the same contents as the original rmm::device_buffer buff_new(std::move(buff)); EXPECT_NE(nullptr, buff_new.data()); - EXPECT_EQ(p, buff_new.data()); + EXPECT_EQ(ptr, buff_new.data()); EXPECT_EQ(size, buff_new.size()); EXPECT_EQ(capacity, buff_new.capacity()); EXPECT_EQ(stream, buff_new.stream()); EXPECT_EQ(mr, buff_new.memory_resource()); // Original buffer should be empty - EXPECT_EQ(nullptr, buff.data()); - EXPECT_EQ(0, buff.size()); - EXPECT_EQ(0, buff.capacity()); - EXPECT_EQ(rmm::cuda_stream_default, buff.stream()); - EXPECT_NE(nullptr, buff.memory_resource()); + EXPECT_EQ(nullptr, + buff.data()); // NOLINT(bugprone-use-after-move, clang-analyzer-cplusplus.Move) + EXPECT_EQ(0, buff.size()); // NOLINT(bugprone-use-after-move) + EXPECT_EQ(0, buff.capacity()); // NOLINT(bugprone-use-after-move) + EXPECT_EQ(rmm::cuda_stream_default, buff.stream()); // NOLINT(bugprone-use-after-move) + EXPECT_NE(nullptr, buff.memory_resource()); // NOLINT(bugprone-use-after-move) } TYPED_TEST(DeviceBufferTest, MoveConstructorStream) { rmm::device_buffer buff(this->size, this->stream, &this->mr); this->stream.synchronize(); - auto p = buff.data(); + auto* ptr = buff.data(); auto size = buff.size(); auto capacity = buff.capacity(); - auto mr = buff.memory_resource(); + auto* mr = buff.memory_resource(); auto stream = buff.stream(); // New buffer should have the same contents as the original rmm::device_buffer buff_new(std::move(buff)); this->stream.synchronize(); EXPECT_NE(nullptr, buff_new.data()); - EXPECT_EQ(p, buff_new.data()); + EXPECT_EQ(ptr, buff_new.data()); EXPECT_EQ(size, buff_new.size()); EXPECT_EQ(capacity, buff_new.capacity()); EXPECT_EQ(stream, buff_new.stream()); EXPECT_EQ(mr, buff_new.memory_resource()); // Original buffer should be empty - EXPECT_EQ(nullptr, buff.data()); - EXPECT_EQ(0, buff.size()); - EXPECT_EQ(0, buff.capacity()); - EXPECT_EQ(rmm::cuda_stream_view{}, buff.stream()); - EXPECT_NE(nullptr, buff.memory_resource()); + EXPECT_EQ(nullptr, + buff.data()); // NOLINT(bugprone-use-after-move, clang-analyzer-cplusplus.Move) + EXPECT_EQ(0, buff.size()); // NOLINT(bugprone-use-after-move) + EXPECT_EQ(0, buff.capacity()); // NOLINT(bugprone-use-after-move) + EXPECT_EQ(rmm::cuda_stream_view{}, buff.stream()); // NOLINT(bugprone-use-after-move) + EXPECT_NE(nullptr, buff.memory_resource()); // NOLINT(bugprone-use-after-move) } TYPED_TEST(DeviceBufferTest, MoveAssignmentToDefault) { - rmm::device_buffer from(this->size, rmm::cuda_stream_default, &this->mr); - auto p = from.data(); - auto size = from.size(); - auto capacity = from.capacity(); - auto mr = from.memory_resource(); - auto stream = from.stream(); + rmm::device_buffer src(this->size, rmm::cuda_stream_default, &this->mr); + auto* ptr = src.data(); + auto size = src.size(); + auto capacity = src.capacity(); + auto* mr = src.memory_resource(); + auto stream = src.stream(); - rmm::device_buffer to; - EXPECT_NO_THROW(to = std::move(from)); + rmm::device_buffer dest; + dest = std::move(src); // contents of `from` should be in `to` - EXPECT_NE(nullptr, to.data()); - EXPECT_EQ(p, to.data()); - EXPECT_EQ(size, to.size()); - EXPECT_EQ(capacity, to.capacity()); - EXPECT_EQ(stream, to.stream()); - EXPECT_EQ(mr, to.memory_resource()); + EXPECT_NE(nullptr, dest.data()); + EXPECT_EQ(ptr, dest.data()); + EXPECT_EQ(size, dest.size()); + EXPECT_EQ(capacity, dest.capacity()); + EXPECT_EQ(stream, dest.stream()); + EXPECT_EQ(mr, dest.memory_resource()); // `from` should be empty - EXPECT_EQ(nullptr, from.data()); - EXPECT_EQ(0, from.size()); - EXPECT_EQ(0, from.capacity()); - EXPECT_EQ(rmm::cuda_stream_default, from.stream()); - EXPECT_NE(nullptr, from.memory_resource()); + EXPECT_EQ(nullptr, src.data()); // NOLINT(bugprone-use-after-move,clang-analyzer-cplusplus.Move) + EXPECT_EQ(0, src.size()); + EXPECT_EQ(0, src.capacity()); + EXPECT_EQ(rmm::cuda_stream_default, src.stream()); + EXPECT_NE(nullptr, src.memory_resource()); } TYPED_TEST(DeviceBufferTest, MoveAssignment) { - rmm::device_buffer from(this->size, rmm::cuda_stream_default, &this->mr); - auto p = from.data(); - auto size = from.size(); - auto capacity = from.capacity(); - auto mr = from.memory_resource(); - auto stream = from.stream(); + rmm::device_buffer src(this->size, rmm::cuda_stream_default, &this->mr); + auto* ptr = src.data(); + auto size = src.size(); + auto capacity = src.capacity(); + auto* mr = src.memory_resource(); + auto stream = src.stream(); - rmm::device_buffer to(this->size - 1, rmm::cuda_stream_default, &this->mr); - EXPECT_NO_THROW(to = std::move(from)); + rmm::device_buffer dest(this->size - 1, rmm::cuda_stream_default, &this->mr); + dest = std::move(src); // contents of `from` should be in `to` - EXPECT_NE(nullptr, to.data()); - EXPECT_EQ(p, to.data()); - EXPECT_EQ(size, to.size()); - EXPECT_EQ(capacity, to.capacity()); - EXPECT_EQ(stream, to.stream()); - EXPECT_EQ(mr, to.memory_resource()); + EXPECT_NE(nullptr, dest.data()); + EXPECT_EQ(ptr, dest.data()); + EXPECT_EQ(size, dest.size()); + EXPECT_EQ(capacity, dest.capacity()); + EXPECT_EQ(stream, dest.stream()); + EXPECT_EQ(mr, dest.memory_resource()); // `from` should be empty - EXPECT_EQ(nullptr, from.data()); - EXPECT_EQ(0, from.size()); - EXPECT_EQ(0, from.capacity()); - EXPECT_EQ(rmm::cuda_stream_default, from.stream()); - EXPECT_NE(nullptr, from.memory_resource()); + EXPECT_EQ(nullptr, src.data()); // NOLINT(bugprone-use-after-move,clang-analyzer-cplusplus.Move) + EXPECT_EQ(0, src.size()); + EXPECT_EQ(0, src.capacity()); + EXPECT_EQ(rmm::cuda_stream_default, src.stream()); + EXPECT_NE(nullptr, src.memory_resource()); } TYPED_TEST(DeviceBufferTest, SelfMoveAssignment) { rmm::device_buffer buff(this->size, rmm::cuda_stream_default, &this->mr); - auto p = buff.data(); + auto* ptr = buff.data(); auto size = buff.size(); auto capacity = buff.capacity(); - auto mr = buff.memory_resource(); + auto* mr = buff.memory_resource(); auto stream = buff.stream(); - buff = std::move(buff); // self-move-assignment shouldn't modify the buffer - EXPECT_NE(nullptr, buff.data()); - EXPECT_EQ(p, buff.data()); + buff = std::move(buff); // self-move-assignment shouldn't modify the buffer + EXPECT_NE(nullptr, buff.data()); // NOLINT(bugprone-use-after-move,clang-analyzer-cplusplus.Move) + EXPECT_EQ(ptr, buff.data()); EXPECT_EQ(size, buff.size()); EXPECT_EQ(capacity, buff.capacity()); EXPECT_EQ(stream, buff.stream()); @@ -394,10 +410,11 @@ TYPED_TEST(DeviceBufferTest, ResizeSmaller) thrust::sequence(rmm::exec_policy(rmm::cuda_stream_default), static_cast(buff.data()), + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) static_cast(buff.data()) + buff.size(), 0); - auto old_data = buff.data(); + auto* old_data = buff.data(); rmm::device_buffer old_content( old_data, buff.size(), rmm::cuda_stream_default, &this->mr); // for comparison @@ -408,7 +425,7 @@ TYPED_TEST(DeviceBufferTest, ResizeSmaller) // Resizing smaller means the existing allocation should remain unchanged EXPECT_EQ(old_data, buff.data()); - EXPECT_NO_THROW(buff.shrink_to_fit(rmm::cuda_stream_default)); + buff.shrink_to_fit(rmm::cuda_stream_default); EXPECT_NE(nullptr, buff.data()); // A reallocation should have occured EXPECT_NE(old_data, buff.data()); @@ -417,6 +434,7 @@ TYPED_TEST(DeviceBufferTest, ResizeSmaller) EXPECT_TRUE(thrust::equal(rmm::exec_policy(rmm::cuda_stream_default), static_cast(buff.data()), + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) static_cast(buff.data()) + buff.size(), static_cast(old_content.data()))); } @@ -424,8 +442,8 @@ TYPED_TEST(DeviceBufferTest, ResizeSmaller) TYPED_TEST(DeviceBufferTest, ResizeBigger) { rmm::device_buffer buff(this->size, rmm::cuda_stream_default, &this->mr); - auto old_data = buff.data(); - auto new_size = this->size + 1; + auto* old_data = buff.data(); + auto new_size = this->size + 1; buff.resize(new_size, rmm::cuda_stream_default); EXPECT_EQ(new_size, buff.size()); EXPECT_EQ(new_size, buff.capacity()); diff --git a/tests/device_scalar_tests.cpp b/tests/device_scalar_tests.cpp index e4c1a42ff..63c471094 100644 --- a/tests/device_scalar_tests.cpp +++ b/tests/device_scalar_tests.cpp @@ -22,6 +22,7 @@ #include #include + #include #include #include @@ -29,12 +30,12 @@ template struct DeviceScalarTest : public ::testing::Test { + std::default_random_engine generator{}; T value{}; rmm::cuda_stream stream{}; rmm::mr::device_memory_resource* mr{rmm::mr::get_current_device_resource()}; - std::default_random_engine generator{}; - DeviceScalarTest() { value = random_value(); } + DeviceScalarTest() : value{random_value()} {} template ::value, bool> = true> U random_value() @@ -56,7 +57,9 @@ struct DeviceScalarTest : public ::testing::Test { template ::value, bool> = true> U random_value() { - static std::normal_distribution distribution{100, 20}; + auto const mean{100}; + auto const stddev{20}; + static std::normal_distribution distribution(mean, stddev); return distribution(generator); } }; @@ -90,13 +93,14 @@ TYPED_TEST(DeviceScalarTest, MoveCtor) EXPECT_NE(nullptr, scalar.data()); EXPECT_EQ(this->value, scalar.value(this->stream)); - auto original_pointer = scalar.data(); - auto original_value = scalar.value(this->stream); + auto* original_pointer = scalar.data(); + auto original_value = scalar.value(this->stream); rmm::device_scalar moved_to{std::move(scalar)}; EXPECT_NE(nullptr, moved_to.data()); EXPECT_EQ(moved_to.data(), original_pointer); EXPECT_EQ(moved_to.value(this->stream), original_value); + // NOLINTNEXTLINE(bugprone-use-after-move,clang-analyzer-cplusplus.Move) EXPECT_EQ(nullptr, scalar.data()); } diff --git a/tests/device_uvector_tests.cpp b/tests/device_uvector_tests.cpp index db06f7c47..dce2cbbf6 100644 --- a/tests/device_uvector_tests.cpp +++ b/tests/device_uvector_tests.cpp @@ -1,6 +1,6 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,7 +23,7 @@ template struct TypedUVectorTest : ::testing::Test { - rmm::cuda_stream_view stream() const noexcept { return rmm::cuda_stream_view{}; } + [[nodiscard]] rmm::cuda_stream_view stream() const noexcept { return rmm::cuda_stream_view{}; } }; using TestTypes = ::testing::Types; @@ -32,27 +32,29 @@ TYPED_TEST_CASE(TypedUVectorTest, TestTypes); TYPED_TEST(TypedUVectorTest, ZeroSizeConstructor) { - rmm::device_uvector uv(0, this->stream()); - EXPECT_EQ(uv.size(), 0); - EXPECT_EQ(uv.end(), uv.begin()); - EXPECT_TRUE(uv.is_empty()); + rmm::device_uvector vec(0, this->stream()); + EXPECT_EQ(vec.size(), 0); + EXPECT_EQ(vec.end(), vec.begin()); + EXPECT_TRUE(vec.is_empty()); } TYPED_TEST(TypedUVectorTest, NonZeroSizeConstructor) { - rmm::device_uvector uv(12345, this->stream()); - EXPECT_EQ(uv.size(), 12345); - EXPECT_NE(uv.data(), nullptr); - EXPECT_EQ(uv.end(), uv.begin() + uv.size()); - EXPECT_FALSE(uv.is_empty()); - EXPECT_NE(uv.element_ptr(0), nullptr); + auto const size{12345}; + rmm::device_uvector vec(size, this->stream()); + EXPECT_EQ(vec.size(), 12345); + EXPECT_NE(vec.data(), nullptr); + EXPECT_EQ(vec.end(), vec.begin() + vec.size()); + EXPECT_FALSE(vec.is_empty()); + EXPECT_NE(vec.element_ptr(0), nullptr); } TYPED_TEST(TypedUVectorTest, CopyConstructor) { - rmm::device_uvector uv(12345, this->stream()); - rmm::device_uvector uv_copy(uv, this->stream()); - EXPECT_EQ(uv_copy.size(), uv.size()); + auto const size{12345}; + rmm::device_uvector vec(size, this->stream()); + rmm::device_uvector uv_copy(vec, this->stream()); + EXPECT_EQ(uv_copy.size(), vec.size()); EXPECT_NE(uv_copy.data(), nullptr); EXPECT_EQ(uv_copy.end(), uv_copy.begin() + uv_copy.size()); EXPECT_FALSE(uv_copy.is_empty()); @@ -61,145 +63,147 @@ TYPED_TEST(TypedUVectorTest, CopyConstructor) TYPED_TEST(TypedUVectorTest, ResizeSmaller) { - auto original_size = 12345; - rmm::device_uvector uv(original_size, this->stream()); - auto original_data = uv.data(); - auto original_begin = uv.begin(); + auto const original_size{12345}; + rmm::device_uvector vec(original_size, this->stream()); + auto* original_data = vec.data(); + auto* original_begin = vec.begin(); - auto smaller_size = uv.size() - 1; - uv.resize(smaller_size, this->stream()); + auto smaller_size = vec.size() - 1; + vec.resize(smaller_size, this->stream()); - EXPECT_EQ(original_data, uv.data()); - EXPECT_EQ(original_begin, uv.begin()); - EXPECT_EQ(uv.size(), smaller_size); - EXPECT_EQ(uv.capacity(), original_size); + EXPECT_EQ(original_data, vec.data()); + EXPECT_EQ(original_begin, vec.begin()); + EXPECT_EQ(vec.size(), smaller_size); + EXPECT_EQ(vec.capacity(), original_size); // shrink_to_fit should force a new allocation - uv.shrink_to_fit(this->stream()); - EXPECT_EQ(uv.size(), smaller_size); - EXPECT_EQ(uv.capacity(), smaller_size); + vec.shrink_to_fit(this->stream()); + EXPECT_EQ(vec.size(), smaller_size); + EXPECT_EQ(vec.capacity(), smaller_size); } TYPED_TEST(TypedUVectorTest, ResizeLarger) { - auto original_size = 12345; - rmm::device_uvector uv(original_size, this->stream()); - auto original_data = uv.data(); - auto original_begin = uv.begin(); + auto const original_size{12345}; + rmm::device_uvector vec(original_size, this->stream()); + auto* original_data = vec.data(); + auto* original_begin = vec.begin(); - auto larger_size = uv.size() + 1; - uv.resize(larger_size, this->stream()); + auto larger_size = vec.size() + 1; + vec.resize(larger_size, this->stream()); - EXPECT_NE(uv.data(), original_data); - EXPECT_NE(uv.begin(), original_begin); - EXPECT_EQ(uv.size(), larger_size); - EXPECT_EQ(uv.capacity(), larger_size); + EXPECT_NE(vec.data(), original_data); + EXPECT_NE(vec.begin(), original_begin); + EXPECT_EQ(vec.size(), larger_size); + EXPECT_EQ(vec.capacity(), larger_size); - auto larger_data = uv.data(); - auto larger_begin = uv.begin(); + auto* larger_data = vec.data(); + auto* larger_begin = vec.begin(); // shrink_to_fit shouldn't have any effect - uv.shrink_to_fit(this->stream()); - EXPECT_EQ(uv.size(), larger_size); - EXPECT_EQ(uv.capacity(), larger_size); - EXPECT_EQ(uv.data(), larger_data); - EXPECT_EQ(uv.begin(), larger_begin); + vec.shrink_to_fit(this->stream()); + EXPECT_EQ(vec.size(), larger_size); + EXPECT_EQ(vec.capacity(), larger_size); + EXPECT_EQ(vec.data(), larger_data); + EXPECT_EQ(vec.begin(), larger_begin); } TYPED_TEST(TypedUVectorTest, ResizeToZero) { - auto original_size = 12345; - rmm::device_uvector uv(original_size, this->stream()); - uv.resize(0, this->stream()); + auto const original_size{12345}; + rmm::device_uvector vec(original_size, this->stream()); + vec.resize(0, this->stream()); - EXPECT_EQ(uv.size(), 0); - EXPECT_TRUE(uv.is_empty()); - EXPECT_EQ(uv.capacity(), original_size); + EXPECT_EQ(vec.size(), 0); + EXPECT_TRUE(vec.is_empty()); + EXPECT_EQ(vec.capacity(), original_size); - uv.shrink_to_fit(this->stream()); - EXPECT_EQ(uv.capacity(), 0); + vec.shrink_to_fit(this->stream()); + EXPECT_EQ(vec.capacity(), 0); } TYPED_TEST(TypedUVectorTest, Release) { - auto original_size = 12345; - rmm::device_uvector uv(original_size, this->stream()); + auto const original_size{12345}; + rmm::device_uvector vec(original_size, this->stream()); - auto original_data = uv.data(); + auto* original_data = vec.data(); - rmm::device_buffer storage = uv.release(); + rmm::device_buffer storage = vec.release(); - EXPECT_EQ(uv.size(), 0); - EXPECT_EQ(uv.capacity(), 0); - EXPECT_TRUE(uv.is_empty()); + EXPECT_EQ(vec.size(), 0); + EXPECT_EQ(vec.capacity(), 0); + EXPECT_TRUE(vec.is_empty()); EXPECT_EQ(storage.data(), original_data); EXPECT_EQ(storage.size(), original_size * sizeof(TypeParam)); } TYPED_TEST(TypedUVectorTest, ElementPointer) { - auto size = 12345; - rmm::device_uvector uv(size, this->stream()); - for (std::size_t i = 0; i < uv.size(); ++i) { - EXPECT_NE(uv.element_ptr(i), nullptr); + auto const size{12345}; + rmm::device_uvector vec(size, this->stream()); + for (std::size_t i = 0; i < vec.size(); ++i) { + EXPECT_NE(vec.element_ptr(i), nullptr); } } TYPED_TEST(TypedUVectorTest, OOBSetElement) { - auto size = 12345; - rmm::device_uvector uv(size, this->stream()); - EXPECT_THROW(uv.set_element(uv.size() + 1, 42, this->stream()), rmm::out_of_range); + auto const size{12345}; + rmm::device_uvector vec(size, this->stream()); + EXPECT_THROW(vec.set_element(vec.size() + 1, 42, this->stream()), rmm::out_of_range); } TYPED_TEST(TypedUVectorTest, OOBGetElement) { - auto size = 12345; - rmm::device_uvector uv(size, this->stream()); - EXPECT_THROW(uv.element(uv.size() + 1, this->stream()), rmm::out_of_range); + auto const size{12345}; + rmm::device_uvector vec(size, this->stream()); + // avoid error due to nodiscard function + auto foo = [&]() { return vec.element(vec.size() + 1, this->stream()); }; + EXPECT_THROW(foo(), rmm::out_of_range); } TYPED_TEST(TypedUVectorTest, GetSetElement) { - auto size = 12345; - rmm::device_uvector uv(size, this->stream()); - for (std::size_t i = 0; i < uv.size(); ++i) { - uv.set_element(i, i, this->stream()); - EXPECT_EQ(static_cast(i), uv.element(i, this->stream())); + auto const size{12345}; + rmm::device_uvector vec(size, this->stream()); + for (std::size_t i = 0; i < vec.size(); ++i) { + vec.set_element(i, i, this->stream()); + EXPECT_EQ(static_cast(i), vec.element(i, this->stream())); } } TYPED_TEST(TypedUVectorTest, GetSetElementAsync) { - auto size = 12345; - rmm::device_uvector uv(size, this->stream()); - for (std::size_t i = 0; i < uv.size(); ++i) { + auto const size{12345}; + rmm::device_uvector vec(size, this->stream()); + for (std::size_t i = 0; i < vec.size(); ++i) { auto init = static_cast(i); - uv.set_element_async(i, init, this->stream()); - EXPECT_EQ(init, uv.element(i, this->stream())); + vec.set_element_async(i, init, this->stream()); + EXPECT_EQ(init, vec.element(i, this->stream())); } } TYPED_TEST(TypedUVectorTest, SetElementZeroAsync) { - auto size = 12345; - rmm::device_uvector uv(size, this->stream()); - for (std::size_t i = 0; i < uv.size(); ++i) { - uv.set_element_to_zero_async(i, this->stream()); - EXPECT_EQ(TypeParam{0}, uv.element(i, this->stream())); + auto const size{12345}; + rmm::device_uvector vec(size, this->stream()); + for (std::size_t i = 0; i < vec.size(); ++i) { + vec.set_element_to_zero_async(i, this->stream()); + EXPECT_EQ(TypeParam{0}, vec.element(i, this->stream())); } } TYPED_TEST(TypedUVectorTest, FrontBackElement) { - auto size = 12345; - rmm::device_uvector uv(size, this->stream()); + auto const size{12345}; + rmm::device_uvector vec(size, this->stream()); - auto first = TypeParam{42}; - auto last = TypeParam{13}; - uv.set_element(0, first, this->stream()); - uv.set_element(uv.size() - 1, last, this->stream()); + auto const first = TypeParam{42}; + auto const last = TypeParam{13}; + vec.set_element(0, first, this->stream()); + vec.set_element(vec.size() - 1, last, this->stream()); - EXPECT_EQ(first, uv.front_element(this->stream())); - EXPECT_EQ(last, uv.back_element(this->stream())); + EXPECT_EQ(first, vec.front_element(this->stream())); + EXPECT_EQ(last, vec.back_element(this->stream())); } diff --git a/tests/logger_tests.cpp b/tests/logger_tests.cpp index b343c7e35..e0663e84b 100644 --- a/tests/logger_tests.cpp +++ b/tests/logger_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,7 +26,7 @@ class raii_restore_env { public: raii_restore_env(char const* name) : name_(name) { - auto const value_or_null = getenv(name); + auto* const value_or_null = getenv(name); if (value_or_null != nullptr) { value_ = value_or_null; is_set_ = true; @@ -42,6 +42,11 @@ class raii_restore_env { } } + raii_restore_env(raii_restore_env const&) = default; + raii_restore_env& operator=(raii_restore_env const&) = default; + raii_restore_env(raii_restore_env&&) = default; + raii_restore_env& operator=(raii_restore_env&&) = default; + private: std::string name_{}; std::string value_{}; @@ -88,19 +93,22 @@ TEST(Adaptor, FilenameConstructor) rmm::mr::cuda_memory_resource upstream; rmm::mr::logging_resource_adaptor log_mr{&upstream, filename}; - auto p0 = log_mr.allocate(100); - auto p1 = log_mr.allocate(42); - log_mr.deallocate(p0, 100); - log_mr.deallocate(p1, 42); + auto const size0{100}; + auto const size1{42}; + + auto* ptr0 = log_mr.allocate(size0); + auto* ptr1 = log_mr.allocate(size1); + log_mr.deallocate(ptr0, size0); + log_mr.deallocate(ptr1, size1); log_mr.flush(); using rmm::detail::action; using rmm::detail::event; - std::vector expected_events{{action::ALLOCATE, 100, p0}, - {action::ALLOCATE, 42, p1}, - {action::FREE, 100, p0}, - {action::FREE, 42, p1}}; + std::vector expected_events{{action::ALLOCATE, size0, ptr0}, + {action::ALLOCATE, size1, ptr1}, + {action::FREE, size0, ptr0}, + {action::FREE, size1, ptr1}}; expect_log_events(filename, expected_events); } @@ -117,19 +125,22 @@ TEST(Adaptor, MultiSinkConstructor) rmm::mr::logging_resource_adaptor log_mr{&upstream, {file_sink1, file_sink2}}; - auto p0 = log_mr.allocate(100); - auto p1 = log_mr.allocate(42); - log_mr.deallocate(p0, 100); - log_mr.deallocate(p1, 42); + auto const size0{100}; + auto const size1{42}; + + auto* ptr0 = log_mr.allocate(size0); + auto* ptr1 = log_mr.allocate(size1); + log_mr.deallocate(ptr0, size0); + log_mr.deallocate(ptr1, size1); log_mr.flush(); using rmm::detail::action; using rmm::detail::event; - std::vector expected_events{{action::ALLOCATE, 100, p0}, - {action::ALLOCATE, 42, p1}, - {action::FREE, 100, p0}, - {action::FREE, 42, p1}}; + std::vector expected_events{{action::ALLOCATE, size0, ptr0}, + {action::ALLOCATE, size1, ptr1}, + {action::FREE, size0, ptr0}, + {action::FREE, size1, ptr1}}; expect_log_events(filename1, expected_events); expect_log_events(filename2, expected_events); @@ -142,19 +153,22 @@ TEST(Adaptor, Factory) auto log_mr = rmm::mr::make_logging_adaptor(&upstream, filename); - auto p0 = log_mr.allocate(99); - log_mr.deallocate(p0, 99); - auto p1 = log_mr.allocate(42); - log_mr.deallocate(p1, 42); + auto const size0{99}; + auto const size1{42}; + + auto* ptr0 = log_mr.allocate(size0); + log_mr.deallocate(ptr0, size0); + auto* ptr1 = log_mr.allocate(size1); + log_mr.deallocate(ptr1, size1); log_mr.flush(); using rmm::detail::action; using rmm::detail::event; - std::vector expected_events{{action::ALLOCATE, 99, p0}, - {action::FREE, 99, p0}, - {action::ALLOCATE, 42, p1}, - {action::FREE, 42, p1}}; + std::vector expected_events{{action::ALLOCATE, size0, ptr0}, + {action::FREE, size0, ptr0}, + {action::ALLOCATE, size1, ptr1}, + {action::FREE, size1, ptr1}}; expect_log_events(filename, expected_events); } @@ -178,8 +192,10 @@ TEST(Adaptor, EnvironmentPath) // use log file location specified in environment variable RMM_LOG_FILE auto log_mr = rmm::mr::make_logging_adaptor(&upstream); - auto p = log_mr.allocate(100); - log_mr.deallocate(p, 100); + auto const size{100}; + + auto* ptr = log_mr.allocate(size); + log_mr.deallocate(ptr, size); log_mr.flush(); @@ -187,8 +203,8 @@ TEST(Adaptor, EnvironmentPath) using rmm::detail::event; std::vector expected_events{ - {action::ALLOCATE, 100, p}, - {action::FREE, 100, p}, + {action::ALLOCATE, size, ptr}, + {action::FREE, size, ptr}, }; expect_log_events(filename, expected_events); @@ -202,11 +218,13 @@ TEST(Adaptor, STDOUT) auto log_mr = rmm::mr::make_logging_adaptor(&upstream, std::cout); - auto p = log_mr.allocate(100); - log_mr.deallocate(p, 100); + auto const size{100}; + + auto* ptr = log_mr.allocate(size); + log_mr.deallocate(ptr, size); std::string output = testing::internal::GetCapturedStdout(); - std::string header = output.substr(0, output.find("\n")); + std::string header = output.substr(0, output.find('\n')); ASSERT_EQ(header, log_mr.header()); } @@ -218,10 +236,12 @@ TEST(Adaptor, STDERR) auto log_mr = rmm::mr::make_logging_adaptor(&upstream, std::cerr); - auto p = log_mr.allocate(100); - log_mr.deallocate(p, 100); + auto const size{100}; + + auto* ptr = log_mr.allocate(size); + log_mr.deallocate(ptr, size); std::string output = testing::internal::GetCapturedStderr(); - std::string header = output.substr(0, output.find("\n")); + std::string header = output.substr(0, output.find('\n')); ASSERT_EQ(header, log_mr.header()); } diff --git a/tests/mr/device/aligned_mr_tests.cpp b/tests/mr/device/aligned_mr_tests.cpp index 3eafd624e..f1ed561f4 100644 --- a/tests/mr/device/aligned_mr_tests.cpp +++ b/tests/mr/device/aligned_mr_tests.cpp @@ -14,6 +14,7 @@ * limitations under the License. */ +#include #include #include #include @@ -40,6 +41,12 @@ class mock_resource : public rmm::mr::device_memory_resource { using aligned_mock = rmm::mr::aligned_resource_adaptor; using aligned_real = rmm::mr::aligned_resource_adaptor; +void* int_to_address(std::size_t val) +{ + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast, performance-no-int-to-ptr) + return reinterpret_cast(val); +} + TEST(AlignedTest, ThrowOnNullUpstream) { auto construct_nullptr = []() { aligned_mock mr{nullptr}; }; @@ -49,7 +56,9 @@ TEST(AlignedTest, ThrowOnNullUpstream) TEST(AlignedTest, ThrowOnInvalidAllocationAlignment) { mock_resource mock; - auto construct_alignment = [](auto* r, std::size_t a) { aligned_mock mr{r, a}; }; + auto construct_alignment = [](auto* memres, std::size_t align) { + aligned_mock mr{memres, align}; + }; EXPECT_THROW(construct_alignment(&mock, 255), rmm::logic_error); EXPECT_NO_THROW(construct_alignment(&mock, 256)); EXPECT_THROW(construct_alignment(&mock, 768), rmm::logic_error); @@ -85,97 +94,148 @@ TEST(AlignedTest, DefaultAllocationAlignmentPassthrough) aligned_mock mr{&mock}; cuda_stream_view stream; - void* pointer = reinterpret_cast(123); + void* const pointer = int_to_address(123); + // device_memory_resource aligns to 8. - EXPECT_CALL(mock, do_allocate(8, stream)).WillOnce(Return(pointer)); - EXPECT_CALL(mock, do_deallocate(pointer, 8, stream)).Times(1); - EXPECT_EQ(mr.allocate(5, stream), pointer); - mr.deallocate(pointer, 5, stream); + { + auto const size{8}; + EXPECT_CALL(mock, do_allocate(size, stream)).WillOnce(Return(pointer)); + EXPECT_CALL(mock, do_deallocate(pointer, size, stream)).Times(1); + } + + { + auto const size{5}; + EXPECT_EQ(mr.allocate(size, stream), pointer); + mr.deallocate(pointer, size, stream); + } } TEST(AlignedTest, BelowAlignmentThresholdPassthrough) { mock_resource mock; - aligned_mock mr{&mock, 4096, 65536}; + auto const alignment{4096}; + auto const threshold{65536}; + aligned_mock mr{&mock, alignment, threshold}; cuda_stream_view stream; - void* pointer = reinterpret_cast(123); + void* const pointer = int_to_address(123); // device_memory_resource aligns to 8. - EXPECT_CALL(mock, do_allocate(8, stream)).WillOnce(Return(pointer)); - EXPECT_CALL(mock, do_deallocate(pointer, 8, stream)).Times(1); - EXPECT_EQ(mr.allocate(3, stream), pointer); - mr.deallocate(pointer, 3, stream); - - void* pointer1 = reinterpret_cast(456); - EXPECT_CALL(mock, do_allocate(65528, stream)).WillOnce(Return(pointer1)); - EXPECT_CALL(mock, do_deallocate(pointer1, 65528, stream)).Times(1); - EXPECT_EQ(mr.allocate(65528, stream), pointer1); - mr.deallocate(pointer1, 65528, stream); + { + auto const size{8}; + EXPECT_CALL(mock, do_allocate(size, stream)).WillOnce(Return(pointer)); + EXPECT_CALL(mock, do_deallocate(pointer, size, stream)).Times(1); + } + + { + auto const size{3}; + EXPECT_EQ(mr.allocate(size, stream), pointer); + mr.deallocate(pointer, size, stream); + } + + { + auto const size{65528}; + void* const pointer1 = int_to_address(456); + EXPECT_CALL(mock, do_allocate(size, stream)).WillOnce(Return(pointer1)); + EXPECT_CALL(mock, do_deallocate(pointer1, size, stream)).Times(1); + EXPECT_EQ(mr.allocate(size, stream), pointer1); + mr.deallocate(pointer1, size, stream); + } } TEST(AlignedTest, UpstreamAddressAlreadyAligned) { mock_resource mock; - aligned_mock mr{&mock, 4096, 65536}; + auto const alignment{4096}; + auto const threshold{65536}; + aligned_mock mr{&mock, alignment, threshold}; cuda_stream_view stream; - void* pointer = reinterpret_cast(4096); - EXPECT_CALL(mock, do_allocate(69376, stream)).WillOnce(Return(pointer)); - EXPECT_CALL(mock, do_deallocate(pointer, 69376, stream)).Times(1); - - EXPECT_EQ(mr.allocate(65536, stream), pointer); - mr.deallocate(pointer, 65536, stream); + void* const pointer = int_to_address(4096); + + { + auto const size{69376}; + EXPECT_CALL(mock, do_allocate(size, stream)).WillOnce(Return(pointer)); + EXPECT_CALL(mock, do_deallocate(pointer, size, stream)).Times(1); + } + + { + auto const size{65536}; + EXPECT_EQ(mr.allocate(size, stream), pointer); + mr.deallocate(pointer, size, stream); + } } TEST(AlignedTest, AlignUpstreamAddress) { mock_resource mock; - aligned_mock mr{&mock, 4096, 65536}; + auto const alignment{4096}; + auto const threshold{65536}; + aligned_mock mr{&mock, alignment, threshold}; cuda_stream_view stream; - void* pointer = reinterpret_cast(256); - EXPECT_CALL(mock, do_allocate(69376, stream)).WillOnce(Return(pointer)); - EXPECT_CALL(mock, do_deallocate(pointer, 69376, stream)).Times(1); - - void* expected_pointer = reinterpret_cast(4096); - EXPECT_EQ(mr.allocate(65536, stream), expected_pointer); - mr.deallocate(expected_pointer, 65536, stream); + { + void* const pointer = int_to_address(256); + auto const size{69376}; + EXPECT_CALL(mock, do_allocate(size, stream)).WillOnce(Return(pointer)); + EXPECT_CALL(mock, do_deallocate(pointer, size, stream)).Times(1); + } + + { + void* const expected_pointer = int_to_address(4096); + auto const size{65536}; + EXPECT_EQ(mr.allocate(size, stream), expected_pointer); + mr.deallocate(expected_pointer, size, stream); + } } TEST(AlignedTest, AlignMultiple) { mock_resource mock; - aligned_mock mr{&mock, 4096, 65536}; + auto const alignment{4096}; + auto const threshold{65536}; + aligned_mock mr{&mock, alignment, threshold}; cuda_stream_view stream; - void* pointer = reinterpret_cast(256); - void* pointer1 = reinterpret_cast(131584); - void* pointer2 = reinterpret_cast(263168); - EXPECT_CALL(mock, do_allocate(69376, stream)).WillOnce(Return(pointer)); - EXPECT_CALL(mock, do_allocate(77568, stream)).WillOnce(Return(pointer1)); - EXPECT_CALL(mock, do_allocate(81664, stream)).WillOnce(Return(pointer2)); - EXPECT_CALL(mock, do_deallocate(pointer, 69376, stream)).Times(1); - EXPECT_CALL(mock, do_deallocate(pointer1, 77568, stream)).Times(1); - EXPECT_CALL(mock, do_deallocate(pointer2, 81664, stream)).Times(1); - - void* expected_pointer = reinterpret_cast(4096); - void* expected_pointer1 = reinterpret_cast(135168); - void* expected_pointer2 = reinterpret_cast(266240); - EXPECT_EQ(mr.allocate(65536, stream), expected_pointer); - EXPECT_EQ(mr.allocate(73728, stream), expected_pointer1); - EXPECT_EQ(mr.allocate(77800, stream), expected_pointer2); - mr.deallocate(expected_pointer1, 73728, stream); - mr.deallocate(expected_pointer, 65536, stream); - mr.deallocate(expected_pointer2, 77800, stream); + + { + void* const pointer1 = int_to_address(256); + void* const pointer2 = int_to_address(131584); + void* const pointer3 = int_to_address(263168); + auto const size1{69376}; + auto const size2{77568}; + auto const size3{81664}; + EXPECT_CALL(mock, do_allocate(size1, stream)).WillOnce(Return(pointer1)); + EXPECT_CALL(mock, do_allocate(size2, stream)).WillOnce(Return(pointer2)); + EXPECT_CALL(mock, do_allocate(size3, stream)).WillOnce(Return(pointer3)); + EXPECT_CALL(mock, do_deallocate(pointer1, size1, stream)).Times(1); + EXPECT_CALL(mock, do_deallocate(pointer2, size2, stream)).Times(1); + EXPECT_CALL(mock, do_deallocate(pointer3, size3, stream)).Times(1); + } + + { + void* const expected_pointer1 = int_to_address(4096); + void* const expected_pointer2 = int_to_address(135168); + void* const expected_pointer3 = int_to_address(266240); + auto const size1{65536}; + auto const size2{73728}; + auto const size3{77800}; + EXPECT_EQ(mr.allocate(size1, stream), expected_pointer1); + EXPECT_EQ(mr.allocate(size2, stream), expected_pointer2); + EXPECT_EQ(mr.allocate(size3, stream), expected_pointer3); + mr.deallocate(expected_pointer1, size1, stream); + mr.deallocate(expected_pointer2, size2, stream); + mr.deallocate(expected_pointer3, size3, stream); + } } TEST(AlignedTest, AlignRealPointer) { - aligned_real mr{rmm::mr::get_current_device_resource(), 4096, 65536}; - void* alloc = mr.allocate(65536); - auto const address = reinterpret_cast(alloc); - EXPECT_TRUE(address % 4096 == 0); - mr.deallocate(alloc, 65536); + auto const alignment{4096}; + auto const threshold{65536}; + aligned_real mr{rmm::mr::get_current_device_resource(), alignment, threshold}; + void* alloc = mr.allocate(threshold); + EXPECT_TRUE(rmm::detail::is_pointer_aligned(alloc, alignment)); + mr.deallocate(alloc, threshold); } } // namespace diff --git a/tests/mr/device/cuda_async_mr_tests.cpp b/tests/mr/device/cuda_async_mr_tests.cpp index a43f68fab..4bf0c3d5b 100644 --- a/tests/mr/device/cuda_async_mr_tests.cpp +++ b/tests/mr/device/cuda_async_mr_tests.cpp @@ -19,8 +19,7 @@ #include -namespace rmm { -namespace test { +namespace rmm::test { namespace { using cuda_async_mr = rmm::mr::cuda_async_memory_resource; @@ -38,24 +37,24 @@ TEST(PoolTest, ThrowIfNotSupported) #if defined(RMM_CUDA_MALLOC_ASYNC_SUPPORT) TEST(PoolTest, ExplicitInitialPoolSize) { - cuda_async_mr mr{100}; - void* p; - EXPECT_NO_THROW(p = mr.allocate(100)); - EXPECT_NO_THROW(mr.deallocate(p, 100)); + const auto pool_init_size{100}; + cuda_async_mr mr{pool_init_size}; + void* ptr = mr.allocate(pool_init_size); + mr.deallocate(ptr, pool_init_size); RMM_CUDA_TRY(cudaDeviceSynchronize()); } TEST(PoolTest, ExplicitReleaseThreshold) { - cuda_async_mr mr{100, 1000}; - void* p; - EXPECT_NO_THROW(p = mr.allocate(100)); - EXPECT_NO_THROW(mr.deallocate(p, 100)); + const auto pool_init_size{100}; + const auto pool_release_threshold{1000}; + cuda_async_mr mr{pool_init_size, pool_release_threshold}; + void* ptr = mr.allocate(pool_init_size); + mr.deallocate(ptr, pool_init_size); RMM_CUDA_TRY(cudaDeviceSynchronize()); } #endif } // namespace -} // namespace test -} // namespace rmm +} // namespace rmm::test diff --git a/tests/mr/device/limiting_mr_tests.cpp b/tests/mr/device/limiting_mr_tests.cpp index 64ec688be..3bc643abc 100644 --- a/tests/mr/device/limiting_mr_tests.cpp +++ b/tests/mr/device/limiting_mr_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,65 +14,81 @@ * limitations under the License. */ +#include "../../byte_literals.hpp" + #include #include #include #include -#include "mr_test.hpp" -namespace rmm { -namespace test { +namespace rmm::test { namespace { + using Limiting_adaptor = rmm::mr::limiting_resource_adaptor; + TEST(LimitingTest, ThrowOnNullUpstream) { - auto construct_nullptr = []() { Limiting_adaptor mr{nullptr, 5_MiB}; }; + auto const max_size{5_MiB}; + auto construct_nullptr = []() { Limiting_adaptor mr{nullptr, max_size}; }; EXPECT_THROW(construct_nullptr(), rmm::logic_error); } TEST(LimitingTest, TooBig) { - Limiting_adaptor mr{rmm::mr::get_current_device_resource(), 1_MiB}; - EXPECT_THROW(mr.allocate(5_MiB), rmm::bad_alloc); + auto const max_size{5_MiB}; + Limiting_adaptor mr{rmm::mr::get_current_device_resource(), max_size}; + EXPECT_THROW(mr.allocate(max_size + 1), rmm::bad_alloc); } TEST(LimitingTest, UnderLimitDueToFrees) { - Limiting_adaptor mr{rmm::mr::get_current_device_resource(), 10_MiB}; - auto p1 = mr.allocate(4_MiB); - EXPECT_EQ(mr.get_allocated_bytes(), 4_MiB); - EXPECT_EQ(mr.get_allocation_limit() - mr.get_allocated_bytes(), 6_MiB); - auto p2 = mr.allocate(4_MiB); - EXPECT_EQ(mr.get_allocated_bytes(), 8_MiB); - EXPECT_EQ(mr.get_allocation_limit() - mr.get_allocated_bytes(), 2_MiB); - mr.deallocate(p1, 4_MiB); - EXPECT_EQ(mr.get_allocated_bytes(), 4_MiB); - EXPECT_EQ(mr.get_allocation_limit() - mr.get_allocated_bytes(), 6_MiB); + auto const max_size{10_MiB}; + Limiting_adaptor mr{rmm::mr::get_current_device_resource(), max_size}; + auto const size1{4_MiB}; + auto* ptr1 = mr.allocate(size1); + auto allocated_bytes = size1; + EXPECT_EQ(mr.get_allocated_bytes(), allocated_bytes); + EXPECT_EQ(mr.get_allocation_limit() - mr.get_allocated_bytes(), max_size - allocated_bytes); + auto* ptr2 = mr.allocate(size1); + allocated_bytes += size1; + EXPECT_EQ(mr.get_allocated_bytes(), allocated_bytes); + EXPECT_EQ(mr.get_allocation_limit() - mr.get_allocated_bytes(), max_size - allocated_bytes); + mr.deallocate(ptr1, size1); + allocated_bytes -= size1; + EXPECT_EQ(mr.get_allocated_bytes(), allocated_bytes); + EXPECT_EQ(mr.get_allocation_limit() - mr.get_allocated_bytes(), max_size - allocated_bytes); // note that we don't keep track of fragmentation or things like page size // so this should fill 100% of the memory even though it is probably over. - EXPECT_NO_THROW(mr.allocate(6_MiB)); - EXPECT_EQ(mr.get_allocated_bytes(), 10_MiB); + auto const size2{6_MiB}; + auto* ptr3 = mr.allocate(size2); + allocated_bytes += size2; + EXPECT_EQ(mr.get_allocated_bytes(), allocated_bytes); EXPECT_EQ(mr.get_allocation_limit() - mr.get_allocated_bytes(), 0); - mr.deallocate(p2, 4_MiB); + mr.deallocate(ptr2, size1); + mr.deallocate(ptr3, size2); } TEST(LimitingTest, OverLimit) { - Limiting_adaptor mr{rmm::mr::get_current_device_resource(), 10_MiB}; - auto p1 = mr.allocate(4_MiB); - EXPECT_EQ(mr.get_allocated_bytes(), 4_MiB); - EXPECT_EQ(mr.get_allocation_limit() - mr.get_allocated_bytes(), 6_MiB); - auto p2 = mr.allocate(4_MiB); - EXPECT_EQ(mr.get_allocated_bytes(), 8_MiB); - EXPECT_EQ(mr.get_allocation_limit() - mr.get_allocated_bytes(), 2_MiB); - EXPECT_THROW(mr.allocate(3_MiB), rmm::bad_alloc); - EXPECT_EQ(mr.get_allocated_bytes(), 8_MiB); - EXPECT_EQ(mr.get_allocation_limit() - mr.get_allocated_bytes(), 2_MiB); - mr.deallocate(p1, 4_MiB); - mr.deallocate(p2, 4_MiB); + auto const max_size{10_MiB}; + Limiting_adaptor mr{rmm::mr::get_current_device_resource(), max_size}; + auto const size1{4_MiB}; + auto* ptr1 = mr.allocate(size1); + auto allocated_bytes = size1; + EXPECT_EQ(mr.get_allocated_bytes(), allocated_bytes); + EXPECT_EQ(mr.get_allocation_limit() - mr.get_allocated_bytes(), max_size - allocated_bytes); + auto* ptr2 = mr.allocate(size1); + allocated_bytes += size1; + EXPECT_EQ(mr.get_allocated_bytes(), allocated_bytes); + EXPECT_EQ(mr.get_allocation_limit() - mr.get_allocated_bytes(), max_size - allocated_bytes); + auto const size2{3_MiB}; + EXPECT_THROW(mr.allocate(size2), rmm::bad_alloc); + EXPECT_EQ(mr.get_allocated_bytes(), allocated_bytes); + EXPECT_EQ(mr.get_allocation_limit() - mr.get_allocated_bytes(), max_size - allocated_bytes); + mr.deallocate(ptr1, 4_MiB); + mr.deallocate(ptr2, 4_MiB); } } // namespace -} // namespace test -} // namespace rmm +} // namespace rmm::test diff --git a/tests/mr/device/mr_multithreaded_tests.cpp b/tests/mr/device/mr_multithreaded_tests.cpp index 233686f7e..838035d9f 100644 --- a/tests/mr/device/mr_multithreaded_tests.cpp +++ b/tests/mr/device/mr_multithreaded_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,8 +28,7 @@ #include #include -namespace rmm { -namespace test { +namespace rmm::test { namespace { struct mr_test_mt : public mr_test { @@ -49,11 +48,13 @@ void spawn_n(std::size_t num_threads, Task task, Arguments&&... args) { std::vector threads; threads.reserve(num_threads); - for (std::size_t i = 0; i < num_threads; ++i) + for (std::size_t i = 0; i < num_threads; ++i) { threads.emplace_back(std::thread(task, std::forward(args)...)); + } - for (auto& t : threads) - t.join(); + for (auto& thread : threads) { + thread.join(); + } } template @@ -75,8 +76,7 @@ TEST(DefaultTest, CurrentDeviceResourceIsCUDA_mt) TEST(DefaultTest, GetCurrentDeviceResource_mt) { spawn([]() { - rmm::mr::device_memory_resource* mr{nullptr}; - EXPECT_NO_THROW(mr = rmm::mr::get_current_device_resource()); + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(); EXPECT_NE(nullptr, mr); EXPECT_TRUE(mr->is_equal(rmm::mr::cuda_memory_resource{})); }); @@ -86,8 +86,7 @@ TEST_P(mr_test_mt, SetCurrentDeviceResource_mt) { // single thread changes default resource, then multiple threads use it - rmm::mr::device_memory_resource* old{nullptr}; - EXPECT_NO_THROW(old = rmm::mr::set_current_device_resource(this->mr.get())); + rmm::mr::device_memory_resource* old = rmm::mr::set_current_device_resource(this->mr.get()); EXPECT_NE(nullptr, old); spawn([mr = this->mr.get()]() { @@ -96,41 +95,42 @@ TEST_P(mr_test_mt, SetCurrentDeviceResource_mt) }); // setting default resource w/ nullptr should reset to initial - EXPECT_NO_THROW(rmm::mr::set_current_device_resource(nullptr)); + rmm::mr::set_current_device_resource(nullptr); EXPECT_TRUE(old->is_equal(*rmm::mr::get_current_device_resource())); } TEST_P(mr_test_mt, SetCurrentDeviceResourcePerThread_mt) { - int num_devices; + int num_devices{}; RMM_CUDA_TRY(cudaGetDeviceCount(&num_devices)); std::vector threads; threads.reserve(num_devices); for (int i = 0; i < num_devices; ++i) { - threads.emplace_back(std::thread{ - [mr = this->mr.get()](auto dev_id) { - RMM_CUDA_TRY(cudaSetDevice(dev_id)); - rmm::mr::device_memory_resource* old; - EXPECT_NO_THROW(old = rmm::mr::set_current_device_resource(mr)); - EXPECT_NE(nullptr, old); - // initial resource for this device should be CUDA mr - EXPECT_TRUE(old->is_equal(rmm::mr::cuda_memory_resource{})); - // get_current_device_resource should equal the resource we just set - EXPECT_EQ(mr, rmm::mr::get_current_device_resource()); - // Setting current dev resource to nullptr should reset to cuda MR and return the MR we - // previously set - EXPECT_NO_THROW(old = rmm::mr::set_current_device_resource(nullptr)); - EXPECT_NE(nullptr, old); - EXPECT_EQ(old, mr); - EXPECT_TRUE( - rmm::mr::get_current_device_resource()->is_equal(rmm::mr::cuda_memory_resource{})); - }, - i}); + threads.emplace_back(std::thread{[mr = this->mr.get()](auto dev_id) { + RMM_CUDA_TRY(cudaSetDevice(dev_id)); + rmm::mr::device_memory_resource* old = + rmm::mr::set_current_device_resource(mr); + EXPECT_NE(nullptr, old); + // initial resource for this device should be CUDA mr + EXPECT_TRUE(old->is_equal(rmm::mr::cuda_memory_resource{})); + // get_current_device_resource should equal the resource we + // just set + EXPECT_EQ(mr, rmm::mr::get_current_device_resource()); + // Setting current dev resource to nullptr should reset to + // cuda MR and return the MR we previously set + old = rmm::mr::set_current_device_resource(nullptr); + EXPECT_NE(nullptr, old); + EXPECT_EQ(old, mr); + EXPECT_TRUE(rmm::mr::get_current_device_resource()->is_equal( + rmm::mr::cuda_memory_resource{})); + }, + i}); } - for (auto& t : threads) - t.join(); + for (auto& thread : threads) { + thread.join(); + } } TEST_P(mr_test_mt, AllocateDefaultStream) @@ -145,22 +145,31 @@ TEST_P(mr_test_mt, AllocateOnStream) TEST_P(mr_test_mt, RandomAllocationsDefaultStream) { - spawn(test_random_allocations, this->mr.get(), 100, 5_MiB, rmm::cuda_stream_view{}); + spawn(test_random_allocations, + this->mr.get(), + default_num_allocations, + default_max_size, + rmm::cuda_stream_view{}); } TEST_P(mr_test_mt, RandomAllocationsStream) { - spawn(test_random_allocations, this->mr.get(), 100, 5_MiB, this->stream.view()); + spawn(test_random_allocations, + this->mr.get(), + default_num_allocations, + default_max_size, + this->stream.view()); } TEST_P(mr_test_mt, MixedRandomAllocationFreeDefaultStream) { - spawn(test_mixed_random_allocation_free, this->mr.get(), 5_MiB, rmm::cuda_stream_view{}); + spawn( + test_mixed_random_allocation_free, this->mr.get(), default_max_size, rmm::cuda_stream_view{}); } TEST_P(mr_test_mt, MixedRandomAllocationFreeStream) { - spawn(test_mixed_random_allocation_free, this->mr.get(), 5_MiB, this->stream.view()); + spawn(test_mixed_random_allocation_free, this->mr.get(), default_max_size, this->stream.view()); } void allocate_loop(rmm::mr::device_memory_resource* mr, @@ -176,8 +185,7 @@ void allocate_loop(rmm::mr::device_memory_resource* mr, for (std::size_t i = 0; i < num_allocations; ++i) { std::size_t size = size_distribution(generator); - void* ptr{}; - EXPECT_NO_THROW(ptr = mr->allocate(size, stream)); + void* ptr = mr->allocate(size, stream); { std::lock_guard lock(mtx); allocations.emplace_back(ptr, size); @@ -193,14 +201,11 @@ void deallocate_loop(rmm::mr::device_memory_resource* mr, { for (std::size_t i = 0; i < num_allocations;) { std::lock_guard lock(mtx); - if (allocations.empty()) - continue; - else { - i++; - allocation alloc = allocations.front(); - allocations.pop_front(); - EXPECT_NO_THROW(mr->deallocate(alloc.p, alloc.size, stream)); - } + if (allocations.empty()) { continue; } + i++; + allocation alloc = allocations.front(); + allocations.pop_front(); + mr->deallocate(alloc.ptr, alloc.size, stream); } } @@ -242,13 +247,10 @@ TEST_P(mr_test_mt, AllocFreeDifferentThreadsSameStream) TEST_P(mr_test_mt, AllocFreeDifferentThreadsDifferentStream) { - EXPECT_NO_THROW([this]() { - rmm::cuda_stream streamB; - test_allocate_free_different_threads(this->mr.get(), this->stream, streamB); - streamB.synchronize(); - }()); + rmm::cuda_stream streamB; + test_allocate_free_different_threads(this->mr.get(), this->stream, streamB); + streamB.synchronize(); } } // namespace -} // namespace test -} // namespace rmm +} // namespace rmm::test diff --git a/tests/mr/device/mr_test.hpp b/tests/mr/device/mr_test.hpp index bdcd69251..4bef2b54e 100644 --- a/tests/mr/device/mr_test.hpp +++ b/tests/mr/device/mr_test.hpp @@ -16,10 +16,11 @@ #pragma once -#include +#include "../../byte_literals.hpp" #include #include +#include #include #include #include @@ -31,29 +32,26 @@ #include #include +#include + #include #include #include #include #include +#include -namespace rmm { -namespace test { - -inline bool is_pointer_aligned(void* p, std::size_t alignment = 256) -{ - return (0 == reinterpret_cast(p) % alignment); -} +namespace rmm::test { /** * @brief Returns if a pointer points to a device memory or managed memory * allocation. */ -inline bool is_device_memory(void* p) +inline bool is_device_memory(void* ptr) { cudaPointerAttributes attributes{}; - if (cudaSuccess != cudaPointerGetAttributes(&attributes, p)) { return false; } + if (cudaSuccess != cudaPointerGetAttributes(&attributes, ptr)) { return false; } #if CUDART_VERSION < 10000 // memoryType is deprecated in CUDA 10 return attributes.memoryType == cudaMemoryTypeDevice; #else @@ -61,18 +59,15 @@ inline bool is_device_memory(void* p) #endif } -// some useful allocation sizes -constexpr long operator""_B(unsigned long long const x) { return x; } -constexpr long operator""_KiB(unsigned long long const x) { return x * (long{1} << 10); } -constexpr long operator""_MiB(unsigned long long const x) { return x * (long{1} << 20); } -constexpr long operator""_GiB(unsigned long long const x) { return x * (long{1} << 30); } -constexpr long operator""_TiB(unsigned long long const x) { return x * (long{1} << 40); } -constexpr long operator""_PiB(unsigned long long const x) { return x * (long{1} << 50); } +enum size_in_bytes : size_t {}; + +constexpr auto default_num_allocations{100}; +constexpr size_in_bytes default_max_size{5_MiB}; struct allocation { - void* p{nullptr}; + void* ptr{nullptr}; std::size_t size{0}; - allocation(void* _p, std::size_t _size) : p{_p}, size{_size} {} + allocation(void* ptr, std::size_t size) : ptr{ptr}, size{size} {} allocation() = default; }; @@ -81,49 +76,47 @@ struct allocation { inline void test_get_current_device_resource() { EXPECT_NE(nullptr, rmm::mr::get_current_device_resource()); - void* p{nullptr}; - EXPECT_NO_THROW(p = rmm::mr::get_current_device_resource()->allocate(1_MiB)); - EXPECT_NE(nullptr, p); - EXPECT_TRUE(is_pointer_aligned(p)); - EXPECT_TRUE(is_device_memory(p)); - EXPECT_NO_THROW(rmm::mr::get_current_device_resource()->deallocate(p, 1_MiB)); + void* ptr = rmm::mr::get_current_device_resource()->allocate(1_MiB); + EXPECT_NE(nullptr, ptr); + EXPECT_TRUE(rmm::detail::is_pointer_aligned(ptr)); + EXPECT_TRUE(is_device_memory(ptr)); + rmm::mr::get_current_device_resource()->deallocate(ptr, 1_MiB); } inline void test_allocate(rmm::mr::device_memory_resource* mr, std::size_t bytes, cuda_stream_view stream = {}) { - void* p{nullptr}; - EXPECT_NO_THROW(p = mr->allocate(bytes)); - if (not stream.is_default()) stream.synchronize(); - EXPECT_NE(nullptr, p); - EXPECT_TRUE(is_pointer_aligned(p)); - EXPECT_TRUE(is_device_memory(p)); - EXPECT_NO_THROW(mr->deallocate(p, bytes)); - if (not stream.is_default()) stream.synchronize(); + void* ptr = mr->allocate(bytes); + if (not stream.is_default()) { stream.synchronize(); } + EXPECT_NE(nullptr, ptr); + EXPECT_TRUE(rmm::detail::is_pointer_aligned(ptr)); + EXPECT_TRUE(is_device_memory(ptr)); + mr->deallocate(ptr, bytes); + if (not stream.is_default()) { stream.synchronize(); } } // Simple reproducer for https://github.com/rapidsai/rmm/issues/861 inline void concurrent_allocations_are_different(rmm::mr::device_memory_resource* mr, cuda_stream_view stream) { - void* p1 = mr->allocate(8_B, stream); - void* p2 = mr->allocate(8_B, stream); + const auto size{8_B}; + void* ptr1 = mr->allocate(size, stream); + void* ptr2 = mr->allocate(size, stream); - EXPECT_NE(p1, p2); + EXPECT_NE(ptr1, ptr2); - mr->deallocate(p1, 8_B, stream); - mr->deallocate(p2, 8_B, stream); + mr->deallocate(ptr1, size, stream); + mr->deallocate(ptr2, size, stream); } inline void test_various_allocations(rmm::mr::device_memory_resource* mr, cuda_stream_view stream) { // test allocating zero bytes on non-default stream { - void* p{nullptr}; - EXPECT_NO_THROW(p = mr->allocate(0, stream)); + void* ptr = mr->allocate(0, stream); stream.synchronize(); - EXPECT_NO_THROW(mr->deallocate(p, 0, stream)); + EXPECT_NO_THROW(mr->deallocate(ptr, 0, stream)); stream.synchronize(); } @@ -134,15 +127,15 @@ inline void test_various_allocations(rmm::mr::device_memory_resource* mr, cuda_s // should fail to allocate too much { - void* p{nullptr}; - EXPECT_THROW(p = mr->allocate(1_PiB, stream), rmm::bad_alloc); - EXPECT_EQ(nullptr, p); + void* ptr{nullptr}; + EXPECT_THROW(ptr = mr->allocate(1_PiB, stream), rmm::bad_alloc); + EXPECT_EQ(nullptr, ptr); } } inline void test_random_allocations(rmm::mr::device_memory_resource* mr, - std::size_t num_allocations = 100, - std::size_t max_size = 5_MiB, + std::size_t num_allocations = default_num_allocations, + size_in_bytes max_size = default_max_size, cuda_stream_view stream = {}) { std::vector allocations(num_allocations); @@ -150,24 +143,25 @@ inline void test_random_allocations(rmm::mr::device_memory_resource* mr, std::default_random_engine generator; std::uniform_int_distribution distribution(1, max_size); - // 100 allocations from [0,5MB) - std::for_each( - allocations.begin(), allocations.end(), [&generator, &distribution, stream, mr](allocation& a) { - a.size = distribution(generator); - EXPECT_NO_THROW(a.p = mr->allocate(a.size, stream)); - if (not stream.is_default()) stream.synchronize(); - EXPECT_NE(nullptr, a.p); - EXPECT_TRUE(is_pointer_aligned(a.p)); - }); - - std::for_each(allocations.begin(), allocations.end(), [stream, mr](allocation& a) { - EXPECT_NO_THROW(mr->deallocate(a.p, a.size, stream)); - if (not stream.is_default()) stream.synchronize(); + // num_allocations allocations from [0,max_size) + std::for_each(allocations.begin(), + allocations.end(), + [&generator, &distribution, stream, mr](allocation& alloc) { + alloc.size = distribution(generator); + EXPECT_NO_THROW(alloc.ptr = mr->allocate(alloc.size, stream)); + if (not stream.is_default()) { stream.synchronize(); } + EXPECT_NE(nullptr, alloc.ptr); + EXPECT_TRUE(rmm::detail::is_pointer_aligned(alloc.ptr)); + }); + + std::for_each(allocations.begin(), allocations.end(), [stream, mr](allocation& alloc) { + EXPECT_NO_THROW(mr->deallocate(alloc.ptr, alloc.size, stream)); + if (not stream.is_default()) { stream.synchronize(); } }); } inline void test_mixed_random_allocation_free(rmm::mr::device_memory_resource* mr, - std::size_t max_size = 5_MiB, + size_in_bytes max_size = default_max_size, cuda_stream_view stream = {}) { std::default_random_engine generator; @@ -175,8 +169,9 @@ inline void test_mixed_random_allocation_free(rmm::mr::device_memory_resource* m std::uniform_int_distribution size_distribution(1, max_size); - constexpr int allocation_probability = 53; // percent - std::uniform_int_distribution op_distribution(0, 99); + constexpr int allocation_probability{53}; // percent + constexpr int max_probability{99}; + std::uniform_int_distribution op_distribution(0, max_probability); std::uniform_int_distribution index_distribution(0, num_allocations - 1); std::size_t active_allocations{0}; @@ -197,14 +192,14 @@ inline void test_mixed_random_allocation_free(rmm::mr::device_memory_resource* m allocation_count++; EXPECT_NO_THROW(allocations.emplace_back(mr->allocate(size, stream), size)); auto new_allocation = allocations.back(); - EXPECT_NE(nullptr, new_allocation.p); - EXPECT_TRUE(is_pointer_aligned(new_allocation.p)); + EXPECT_NE(nullptr, new_allocation.ptr); + EXPECT_TRUE(rmm::detail::is_pointer_aligned(new_allocation.ptr)); } else { - std::size_t index = index_distribution(generator) % active_allocations; + auto const index = static_cast(index_distribution(generator) % active_allocations); active_allocations--; allocation to_free = allocations[index]; allocations.erase(std::next(allocations.begin(), index)); - EXPECT_NO_THROW(mr->deallocate(to_free.p, to_free.size, stream)); + EXPECT_NO_THROW(mr->deallocate(to_free.ptr, to_free.size, stream)); } } @@ -216,18 +211,21 @@ using MRFactoryFunc = std::function { void SetUp() override { - auto factory = GetParam().f; + auto factory = GetParam().factory; mr = factory(); } @@ -262,9 +260,12 @@ inline auto make_binning() auto pool = make_pool(); // Add a binning_memory_resource with fixed-size bins of sizes 256, 512, 1024, 2048 and 4096KiB // Larger allocations will use the pool resource - auto mr = rmm::mr::make_owning_wrapper(pool, 18, 22); + auto const bin_range_start{18}; + auto const bin_range_end{22}; + + auto mr = rmm::mr::make_owning_wrapper( + pool, bin_range_start, bin_range_end); return mr; } -} // namespace test -} // namespace rmm +} // namespace rmm::test diff --git a/tests/mr/device/mr_tests.cpp b/tests/mr/device/mr_tests.cpp index c133479d1..f1248dbc0 100644 --- a/tests/mr/device/mr_tests.cpp +++ b/tests/mr/device/mr_tests.cpp @@ -20,21 +20,20 @@ #include -namespace rmm { -namespace test { +namespace rmm::test { namespace { -INSTANTIATE_TEST_CASE_P(ResourceTests, - mr_test, - ::testing::Values(mr_factory{"CUDA", &make_cuda}, +INSTANTIATE_TEST_SUITE_P(ResourceTests, + mr_test, + ::testing::Values(mr_factory{"CUDA", &make_cuda}, #ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT - mr_factory{"CUDA_Async", &make_cuda_async}, + mr_factory{"CUDA_Async", &make_cuda_async}, #endif - mr_factory{"Managed", &make_managed}, - mr_factory{"Pool", &make_pool}, - mr_factory{"Arena", &make_arena}, - mr_factory{"Binning", &make_binning}), - [](auto const& info) { return info.param.name; }); + mr_factory{"Managed", &make_managed}, + mr_factory{"Pool", &make_pool}, + mr_factory{"Arena", &make_arena}, + mr_factory{"Binning", &make_binning}), + [](auto const& info) { return info.param.name; }); TEST(DefaultTest, CurrentDeviceResourceIsCUDA) { @@ -46,8 +45,7 @@ TEST(DefaultTest, UseCurrentDeviceResource) { test_get_current_device_resource() TEST(DefaultTest, GetCurrentDeviceResource) { - rmm::mr::device_memory_resource* mr; - mr = rmm::mr::get_current_device_resource(); + auto* mr = rmm::mr::get_current_device_resource(); EXPECT_NE(nullptr, mr); EXPECT_TRUE(mr->is_equal(rmm::mr::cuda_memory_resource{})); } @@ -95,32 +93,38 @@ TEST_P(mr_test, RandomAllocations) { test_random_allocations(this->mr.get()); } TEST_P(mr_test, RandomAllocationsStream) { - test_random_allocations(this->mr.get(), 100, 5_MiB, this->stream); + test_random_allocations(this->mr.get(), default_num_allocations, default_max_size, this->stream); } TEST_P(mr_test, MixedRandomAllocationFree) { - test_mixed_random_allocation_free(this->mr.get(), 5_MiB, cuda_stream_view{}); + test_mixed_random_allocation_free(this->mr.get(), default_max_size, cuda_stream_view{}); } TEST_P(mr_test, MixedRandomAllocationFreeStream) { - test_mixed_random_allocation_free(this->mr.get(), 5_MiB, this->stream); + test_mixed_random_allocation_free(this->mr.get(), default_max_size, this->stream); } TEST_P(mr_test, GetMemInfo) { if (this->mr->supports_get_mem_info()) { - std::pair mem_info; - mem_info = this->mr->get_mem_info(rmm::cuda_stream_view{}); - std::size_t allocation_size = 16 * 256; + const auto allocation_size{16 * 256}; + { + auto const [free, total] = this->mr->get_mem_info(rmm::cuda_stream_view{}); + EXPECT_TRUE(free >= allocation_size); + } + void* ptr{nullptr}; - ptr = this->mr->allocate(allocation_size); - mem_info = this->mr->get_mem_info(rmm::cuda_stream_view{}); - EXPECT_TRUE(mem_info.first >= allocation_size); + ptr = this->mr->allocate(allocation_size); + + { + auto const [free, total] = this->mr->get_mem_info(rmm::cuda_stream_view{}); + EXPECT_TRUE(free >= allocation_size); + } + this->mr->deallocate(ptr, allocation_size); } } } // namespace -} // namespace test -} // namespace rmm +} // namespace rmm::test diff --git a/tests/mr/device/polymorphic_allocator_tests.cpp b/tests/mr/device/polymorphic_allocator_tests.cpp index 319d0ca63..4c8346730 100644 --- a/tests/mr/device/polymorphic_allocator_tests.cpp +++ b/tests/mr/device/polymorphic_allocator_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -42,7 +42,8 @@ TEST_F(allocator_test, custom_resource) EXPECT_EQ(allocator.resource(), &mr); } -void test_conversion(rmm::mr::polymorphic_allocator) {} +void test_conversion(rmm::mr::polymorphic_allocator /*unused*/) {} + TEST_F(allocator_test, implicit_conversion) { rmm::mr::cuda_memory_resource mr; @@ -106,9 +107,10 @@ TEST_F(allocator_test, rebind) TEST_F(allocator_test, allocate_deallocate) { rmm::mr::polymorphic_allocator allocator{}; - auto p = allocator.allocate(1000, stream); - EXPECT_NE(p, nullptr); - EXPECT_NO_THROW(allocator.deallocate(p, 1000, stream)); + const auto size{1000}; + auto* ptr = allocator.allocate(size, stream); + EXPECT_NE(ptr, nullptr); + EXPECT_NO_THROW(allocator.deallocate(ptr, size, stream)); } } // namespace diff --git a/tests/mr/device/pool_mr_tests.cpp b/tests/mr/device/pool_mr_tests.cpp index 7f31412c8..9f2020785 100644 --- a/tests/mr/device/pool_mr_tests.cpp +++ b/tests/mr/device/pool_mr_tests.cpp @@ -26,8 +26,7 @@ #include -namespace rmm { -namespace test { +namespace rmm::test { namespace { using cuda_mr = rmm::mr::cuda_memory_resource; using pool_mr = rmm::mr::pool_memory_resource; @@ -44,7 +43,9 @@ TEST(PoolTest, ThrowMaxLessThanInitial) // Make sure first argument is enough larger than the second that alignment rounding doesn't // make them equal auto max_less_than_initial = []() { - pool_mr mr{rmm::mr::get_current_device_resource(), 1024, 256}; + const auto initial{1024}; + const auto maximum{256}; + pool_mr mr{rmm::mr::get_current_device_resource(), initial, maximum}; }; EXPECT_THROW(max_less_than_initial(), rmm::logic_error); } @@ -54,8 +55,9 @@ TEST(PoolTest, AllocateNinetyPercent) auto allocate_ninety = []() { auto const [free, total] = rmm::detail::available_device_memory(); (void)total; - auto const ninety_percent_pool = rmm::detail::align_up(static_cast(free * 0.9), - rmm::detail::CUDA_ALLOCATION_ALIGNMENT); + auto const ninety_percent_pool = + rmm::detail::align_up(static_cast(static_cast(free) * 0.9), + rmm::detail::CUDA_ALLOCATION_ALIGNMENT); pool_mr mr{rmm::mr::get_current_device_resource(), ninety_percent_pool}; }; EXPECT_NO_THROW(allocate_ninety()); @@ -67,10 +69,10 @@ TEST(PoolTest, TwoLargeBuffers) auto const [free, total] = rmm::detail::available_device_memory(); (void)total; pool_mr mr{rmm::mr::get_current_device_resource()}; - auto p1 = mr.allocate(free / 4); - auto p2 = mr.allocate(free / 4); - mr.deallocate(p1, free / 4); - mr.deallocate(p2, free / 4); + auto* ptr1 = mr.allocate(free / 4); + auto* ptr2 = mr.allocate(free / 4); + mr.deallocate(ptr1, free / 4); + mr.deallocate(ptr2, free / 4); }; EXPECT_NO_THROW(two_large()); } @@ -78,7 +80,8 @@ TEST(PoolTest, TwoLargeBuffers) TEST(PoolTest, ForceGrowth) { cuda_mr cuda; - limiting_mr limiter{&cuda, 6000}; + auto const max_size{6000}; + limiting_mr limiter{&cuda, max_size}; pool_mr mr{&limiter, 0}; EXPECT_NO_THROW(mr.allocate(1000)); EXPECT_NO_THROW(mr.allocate(4000)); @@ -89,7 +92,7 @@ TEST(PoolTest, ForceGrowth) TEST(PoolTest, DeletedStream) { pool_mr mr{rmm::mr::get_current_device_resource(), 0}; - cudaStream_t stream; // we don't use rmm::cuda_stream here to make destruction more explicit + cudaStream_t stream{}; // we don't use rmm::cuda_stream here to make destruction more explicit const int size = 10000; EXPECT_EQ(cudaSuccess, cudaStreamCreate(&stream)); EXPECT_NO_THROW(rmm::device_buffer buff(size, cuda_stream_view{stream}, &mr)); @@ -124,5 +127,4 @@ TEST(PoolTest, NonAlignedPoolSize) } } // namespace -} // namespace test -} // namespace rmm +} // namespace rmm::test diff --git a/tests/mr/device/statistics_mr_tests.cpp b/tests/mr/device/statistics_mr_tests.cpp index 83464bbe2..59c356b1e 100644 --- a/tests/mr/device/statistics_mr_tests.cpp +++ b/tests/mr/device/statistics_mr_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,20 +14,24 @@ * limitations under the License. */ +#include "../../byte_literals.hpp" + #include #include #include #include -#include "mr_test.hpp" #include -namespace rmm { -namespace test { +namespace rmm::test { namespace { using statistics_adaptor = rmm::mr::statistics_resource_adaptor; +constexpr auto num_allocations{10}; +constexpr auto num_more_allocations{5}; +constexpr auto ten_MiB{10_MiB}; + TEST(StatisticsTest, ThrowOnNullUpstream) { auto construct_nullptr = []() { statistics_adaptor mr{nullptr}; }; @@ -51,11 +55,13 @@ TEST(StatisticsTest, AllFreed) { statistics_adaptor mr{rmm::mr::get_current_device_resource()}; std::vector allocations; - for (int i = 0; i < 10; ++i) { - allocations.push_back(mr.allocate(10_MiB)); + + allocations.reserve(num_allocations); + for (int i = 0; i < num_allocations; ++i) { + allocations.push_back(mr.allocate(ten_MiB)); } - for (auto p : allocations) { - mr.deallocate(p, 10_MiB); + for (auto* alloc : allocations) { + mr.deallocate(alloc, ten_MiB); } // Counter values should be 0 @@ -67,12 +73,13 @@ TEST(StatisticsTest, PeakAllocations) { statistics_adaptor mr{rmm::mr::get_current_device_resource()}; std::vector allocations; - for (std::size_t i = 0; i < 10; ++i) { - allocations.push_back(mr.allocate(10_MiB)); + + for (std::size_t i = 0; i < num_allocations; ++i) { + allocations.push_back(mr.allocate(ten_MiB)); } // Delete every other allocation for (auto&& it = allocations.begin(); it != allocations.end(); ++it) { - mr.deallocate(*it, 10_MiB); + mr.deallocate(*it, ten_MiB); it = allocations.erase(it); } @@ -92,13 +99,13 @@ TEST(StatisticsTest, PeakAllocations) EXPECT_EQ(current_alloc_counts.total, 10); // Add 10 more to increase the peak - for (std::size_t i = 0; i < 10; ++i) { - allocations.push_back(mr.allocate(10_MiB)); + for (std::size_t i = 0; i < num_allocations; ++i) { + allocations.push_back(mr.allocate(ten_MiB)); } // Deallocate all remaining - for (std::size_t i = 0; i < allocations.size(); ++i) { - mr.deallocate(allocations[i], 10_MiB); + for (auto& allocation : allocations) { + mr.deallocate(allocation, ten_MiB); } allocations.clear(); @@ -124,9 +131,9 @@ TEST(StatisticsTest, MultiTracking) rmm::mr::set_current_device_resource(&mr); std::vector> allocations; - for (std::size_t i = 0; i < 10; ++i) { + for (std::size_t i = 0; i < num_allocations; ++i) { allocations.emplace_back( - std::make_shared(10_MiB, rmm::cuda_stream_default)); + std::make_shared(ten_MiB, rmm::cuda_stream_default)); } EXPECT_EQ(mr.get_allocations_counter().value, 10); @@ -134,9 +141,9 @@ TEST(StatisticsTest, MultiTracking) statistics_adaptor inner_mr{rmm::mr::get_current_device_resource()}; rmm::mr::set_current_device_resource(&inner_mr); - for (std::size_t i = 0; i < 5; ++i) { + for (std::size_t i = 0; i < num_more_allocations; ++i) { allocations.emplace_back( - std::make_shared(10_MiB, rmm::cuda_stream_default)); + std::make_shared(ten_MiB, rmm::cuda_stream_default)); } // Check the allocated bytes for both MRs @@ -174,8 +181,8 @@ TEST(StatisticsTest, NegativeInnerTracking) // memory pointer statistics_adaptor mr{rmm::mr::get_current_device_resource()}; std::vector allocations; - for (std::size_t i = 0; i < 10; ++i) { - allocations.push_back(mr.allocate(10_MiB)); + for (std::size_t i = 0; i < num_allocations; ++i) { + allocations.push_back(mr.allocate(ten_MiB)); } EXPECT_EQ(mr.get_allocations_counter().value, 10); @@ -183,8 +190,8 @@ TEST(StatisticsTest, NegativeInnerTracking) statistics_adaptor inner_mr{&mr}; // Add more allocations - for (std::size_t i = 0; i < 5; ++i) { - allocations.push_back(inner_mr.allocate(10_MiB)); + for (std::size_t i = 0; i < num_more_allocations; ++i) { + allocations.push_back(inner_mr.allocate(ten_MiB)); } // Check the outstanding allocations @@ -199,8 +206,8 @@ TEST(StatisticsTest, NegativeInnerTracking) EXPECT_EQ(inner_mr.get_allocations_counter().value, 5); // Deallocate all allocations using the inner_mr - for (std::size_t i = 0; i < allocations.size(); ++i) { - inner_mr.deallocate(allocations[i], 10_MiB); + for (auto& allocation : allocations) { + inner_mr.deallocate(allocation, ten_MiB); } allocations.clear(); @@ -227,5 +234,4 @@ TEST(StatisticsTest, NegativeInnerTracking) } } // namespace -} // namespace test -} // namespace rmm +} // namespace rmm::test diff --git a/tests/mr/device/stream_allocator_adaptor_tests.cpp b/tests/mr/device/stream_allocator_adaptor_tests.cpp index 616b899f1..669fca5f5 100644 --- a/tests/mr/device/stream_allocator_adaptor_tests.cpp +++ b/tests/mr/device/stream_allocator_adaptor_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,15 +14,16 @@ * limitations under the License. */ -#include - -#include #include #include #include #include #include +#include + +#include + namespace { struct allocator_test : public ::testing::Test { @@ -34,7 +35,7 @@ TEST_F(allocator_test, factory) { using Adaptor = rmm::mr::stream_allocator_adaptor; auto adapted = rmm::mr::make_stream_allocator_adaptor(allocator, stream); - static_assert((std::is_same::value), ""); + static_assert((std::is_same::value)); EXPECT_EQ(adapted.underlying_allocator(), allocator); EXPECT_EQ(adapted.stream(), stream); } @@ -97,21 +98,21 @@ TEST_F(allocator_test, rebind) { auto adapted = rmm::mr::make_stream_allocator_adaptor(allocator, stream); using Rebound = std::allocator_traits::rebind_alloc; - static_assert((std::is_same::value_type, double>::value), ""); + static_assert((std::is_same::value_type, double>::value)); static_assert( std::is_same>>::value, - ""); + rmm::mr::stream_allocator_adaptor>>::value); - Rebound r{adapted}; + Rebound rebound{adapted}; } TEST_F(allocator_test, allocate_deallocate) { auto adapted = rmm::mr::make_stream_allocator_adaptor(allocator, stream); - auto p = adapted.allocate(1000); - EXPECT_NE(p, nullptr); - EXPECT_NO_THROW(adapted.deallocate(p, 1000)); + auto const size{1000}; + auto* ptr = adapted.allocate(size); + EXPECT_NE(ptr, nullptr); + EXPECT_NO_THROW(adapted.deallocate(ptr, size)); } } // namespace diff --git a/tests/mr/device/thrust_allocator_tests.cu b/tests/mr/device/thrust_allocator_tests.cu index 0092f21ba..eabdfe143 100644 --- a/tests/mr/device/thrust_allocator_tests.cu +++ b/tests/mr/device/thrust_allocator_tests.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,13 +14,14 @@ * limitations under the License. */ -#include +#include "mr_test.hpp" + #include #include -#include "mr_test.hpp" -namespace rmm { -namespace test { +#include + +namespace rmm::test { namespace { struct allocator_test : public mr_test { @@ -28,8 +29,9 @@ struct allocator_test : public mr_test { TEST_P(allocator_test, first) { - rmm::device_vector ints(100, 1); - EXPECT_EQ(100, thrust::reduce(ints.begin(), ints.end())); + auto const num_ints{100}; + rmm::device_vector ints(num_ints, 1); + EXPECT_EQ(num_ints, thrust::reduce(ints.begin(), ints.end())); } INSTANTIATE_TEST_CASE_P(ThrustAllocatorTests, @@ -39,6 +41,6 @@ INSTANTIATE_TEST_CASE_P(ThrustAllocatorTests, mr_factory{"Pool", &make_pool}, mr_factory{"Binning", &make_binning}), [](auto const& info) { return info.param.name; }); + } // namespace -} // namespace test -} // namespace rmm +} // namespace rmm::test diff --git a/tests/mr/device/tracking_mr_tests.cpp b/tests/mr/device/tracking_mr_tests.cpp index 8dc666f0c..5926cbf62 100644 --- a/tests/mr/device/tracking_mr_tests.cpp +++ b/tests/mr/device/tracking_mr_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,20 +14,24 @@ * limitations under the License. */ +#include "../../byte_literals.hpp" + #include #include #include #include -#include "mr_test.hpp" #include -namespace rmm { -namespace test { +namespace rmm::test { namespace { using tracking_adaptor = rmm::mr::tracking_resource_adaptor; +constexpr auto num_allocations{10}; +constexpr auto num_more_allocations{5}; +constexpr auto ten_MiB{10_MiB}; + TEST(TrackingTest, ThrowOnNullUpstream) { auto construct_nullptr = []() { tracking_adaptor mr{nullptr}; }; @@ -45,11 +49,12 @@ TEST(TrackingTest, AllFreed) { tracking_adaptor mr{rmm::mr::get_current_device_resource()}; std::vector allocations; - for (int i = 0; i < 10; ++i) { - allocations.push_back(mr.allocate(10_MiB)); + allocations.reserve(num_allocations); + for (int i = 0; i < num_allocations; ++i) { + allocations.push_back(mr.allocate(ten_MiB)); } - for (auto p : allocations) { - mr.deallocate(p, 10_MiB); + for (auto* alloc : allocations) { + mr.deallocate(alloc, ten_MiB); } EXPECT_EQ(mr.get_outstanding_allocations().size(), 0); EXPECT_EQ(mr.get_allocated_bytes(), 0); @@ -59,16 +64,17 @@ TEST(TrackingTest, AllocationsLeftWithStacks) { tracking_adaptor mr{rmm::mr::get_current_device_resource(), true}; std::vector allocations; - for (int i = 0; i < 10; ++i) { - allocations.push_back(mr.allocate(10_MiB)); + allocations.reserve(num_allocations); + for (int i = 0; i < num_allocations; ++i) { + allocations.push_back(mr.allocate(ten_MiB)); } - for (int i = 0; i < 10; i += 2) { - mr.deallocate(allocations[i], 10_MiB); + for (int i = 0; i < num_allocations; i += 2) { + mr.deallocate(allocations[i], ten_MiB); } - EXPECT_EQ(mr.get_outstanding_allocations().size(), 5); - EXPECT_EQ(mr.get_allocated_bytes(), 50_MiB); + EXPECT_EQ(mr.get_outstanding_allocations().size(), num_allocations / 2); + EXPECT_EQ(mr.get_allocated_bytes(), ten_MiB * (num_allocations / 2)); auto const& outstanding_allocations = mr.get_outstanding_allocations(); - EXPECT_EQ(outstanding_allocations.size(), 5); + EXPECT_EQ(outstanding_allocations.size(), num_allocations / 2); EXPECT_NE(outstanding_allocations.begin()->second.strace, nullptr); } @@ -76,16 +82,18 @@ TEST(TrackingTest, AllocationsLeftWithoutStacks) { tracking_adaptor mr{rmm::mr::get_current_device_resource()}; std::vector allocations; - for (int i = 0; i < 10; ++i) { - allocations.push_back(mr.allocate(10_MiB)); + allocations.reserve(num_allocations); + for (int i = 0; i < num_allocations; ++i) { + allocations.push_back(mr.allocate(ten_MiB)); } - for (int i = 0; i < 10; i += 2) { - mr.deallocate(allocations[i], 10_MiB); + + for (int i = 0; i < num_allocations; i += 2) { + mr.deallocate(allocations[i], ten_MiB); } - EXPECT_EQ(mr.get_outstanding_allocations().size(), 5); - EXPECT_EQ(mr.get_allocated_bytes(), 50_MiB); + EXPECT_EQ(mr.get_outstanding_allocations().size(), num_allocations / 2); + EXPECT_EQ(mr.get_allocated_bytes(), ten_MiB * (num_allocations / 2)); auto const& outstanding_allocations = mr.get_outstanding_allocations(); - EXPECT_EQ(outstanding_allocations.size(), 5); + EXPECT_EQ(outstanding_allocations.size(), num_allocations / 2); EXPECT_EQ(outstanding_allocations.begin()->second.strace, nullptr); } @@ -95,27 +103,27 @@ TEST(TrackingTest, MultiTracking) rmm::mr::set_current_device_resource(&mr); std::vector> allocations; - for (std::size_t i = 0; i < 10; ++i) { + for (std::size_t i = 0; i < num_allocations; ++i) { allocations.emplace_back( - std::make_shared(10_MiB, rmm::cuda_stream_default)); + std::make_shared(ten_MiB, rmm::cuda_stream_default)); } - EXPECT_EQ(mr.get_outstanding_allocations().size(), 10); + EXPECT_EQ(mr.get_outstanding_allocations().size(), num_allocations); tracking_adaptor inner_mr{rmm::mr::get_current_device_resource()}; rmm::mr::set_current_device_resource(&inner_mr); - for (std::size_t i = 0; i < 5; ++i) { + for (std::size_t i = 0; i < num_more_allocations; ++i) { allocations.emplace_back( - std::make_shared(10_MiB, rmm::cuda_stream_default)); + std::make_shared(ten_MiB, rmm::cuda_stream_default)); } // Check the allocated bytes for both MRs - EXPECT_EQ(mr.get_outstanding_allocations().size(), 15); - EXPECT_EQ(inner_mr.get_outstanding_allocations().size(), 5); + EXPECT_EQ(mr.get_outstanding_allocations().size(), num_allocations + num_more_allocations); + EXPECT_EQ(inner_mr.get_outstanding_allocations().size(), num_more_allocations); - EXPECT_EQ(mr.get_allocated_bytes(), 150_MiB); - EXPECT_EQ(inner_mr.get_allocated_bytes(), 50_MiB); + EXPECT_EQ(mr.get_allocated_bytes(), ten_MiB * (num_allocations + num_more_allocations)); + EXPECT_EQ(inner_mr.get_allocated_bytes(), ten_MiB * num_more_allocations); EXPECT_GT(mr.get_outstanding_allocations_str().size(), 0); @@ -140,26 +148,26 @@ TEST(TrackingTest, NegativeInnerTracking) // memory pointer tracking_adaptor mr{rmm::mr::get_current_device_resource()}; std::vector allocations; - for (std::size_t i = 0; i < 10; ++i) { - allocations.push_back(mr.allocate(10_MiB)); + for (std::size_t i = 0; i < num_allocations; ++i) { + allocations.push_back(mr.allocate(ten_MiB)); } - EXPECT_EQ(mr.get_outstanding_allocations().size(), 10); + EXPECT_EQ(mr.get_outstanding_allocations().size(), num_allocations); tracking_adaptor inner_mr{&mr}; // Add more allocations - for (std::size_t i = 0; i < 5; ++i) { - allocations.push_back(inner_mr.allocate(10_MiB)); + for (std::size_t i = 0; i < num_more_allocations; ++i) { + allocations.push_back(inner_mr.allocate(ten_MiB)); } // Check the outstanding allocations - EXPECT_EQ(mr.get_outstanding_allocations().size(), 15); - EXPECT_EQ(inner_mr.get_outstanding_allocations().size(), 5); + EXPECT_EQ(mr.get_outstanding_allocations().size(), num_allocations + num_more_allocations); + EXPECT_EQ(inner_mr.get_outstanding_allocations().size(), num_more_allocations); // Deallocate all allocations using the inner_mr - for (std::size_t i = 0; i < allocations.size(); ++i) { - inner_mr.deallocate(allocations[i], 10_MiB); + for (auto& allocation : allocations) { + inner_mr.deallocate(allocation, ten_MiB); } allocations.clear(); @@ -172,13 +180,13 @@ TEST(TrackingTest, DeallocWrongBytes) { tracking_adaptor mr{rmm::mr::get_current_device_resource()}; std::vector allocations; - for (std::size_t i = 0; i < 10; ++i) { - allocations.push_back(mr.allocate(10_MiB)); + for (std::size_t i = 0; i < num_allocations; ++i) { + allocations.push_back(mr.allocate(ten_MiB)); } // When deallocating, pass the wrong bytes to deallocate - for (std::size_t i = 0; i < allocations.size(); ++i) { - mr.deallocate(allocations[i], 5_MiB); + for (auto& allocation : allocations) { + mr.deallocate(allocation, ten_MiB / 2); } allocations.clear(); @@ -190,5 +198,4 @@ TEST(TrackingTest, DeallocWrongBytes) } } // namespace -} // namespace test -} // namespace rmm +} // namespace rmm::test diff --git a/tests/mr/host/mr_tests.cpp b/tests/mr/host/mr_tests.cpp index 442a70ca0..1cd59f5a6 100644 --- a/tests/mr/host/mr_tests.cpp +++ b/tests/mr/host/mr_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,36 +14,33 @@ * limitations under the License. */ -#include +#include "../../byte_literals.hpp" +#include #include #include #include #include + +#include + #include #include #include +namespace rmm::test { namespace { -inline bool is_aligned(void* p, std::size_t alignment = alignof(std::max_align_t)) +inline bool is_aligned(void* ptr, std::size_t alignment = alignof(std::max_align_t)) { - return (0 == reinterpret_cast(p) % alignment); + return rmm::detail::is_pointer_aligned(ptr, alignment); } -inline void expect_aligned(void* p, std::size_t alignment) -{ - EXPECT_EQ(0, reinterpret_cast(p) % alignment); -} - -/**---------------------------------------------------------------------------* - * @brief Returns if a pointer points to a device memory or managed memory - * allocation. - *---------------------------------------------------------------------------**/ -inline bool is_device_memory(void* p) +// Returns true if a pointer points to a device memory or managed memory allocation. +inline bool is_device_memory(void* ptr) { cudaPointerAttributes attributes{}; - if (cudaSuccess != cudaPointerGetAttributes(&attributes, p)) { return false; } + if (cudaSuccess != cudaPointerGetAttributes(&attributes, ptr)) { return false; } #if CUDART_VERSION < 10000 // memoryType is deprecated in CUDA 10 return attributes.memoryType == cudaMemoryTypeDevice; #else @@ -54,24 +51,23 @@ inline bool is_device_memory(void* p) /** * @brief Returns if a pointer `p` points to pinned host memory. */ -inline bool is_pinned_memory(void* p) +inline bool is_pinned_memory(void* ptr) { cudaPointerAttributes attributes{}; - if (cudaSuccess != cudaPointerGetAttributes(&attributes, p)) { return false; } + if (cudaSuccess != cudaPointerGetAttributes(&attributes, ptr)) { return false; } return attributes.type == cudaMemoryTypeHost; } -static constexpr std::size_t size_word{4}; -static constexpr std::size_t size_kb{std::size_t{1} << 10}; -static constexpr std::size_t size_mb{std::size_t{1} << 20}; -static constexpr std::size_t size_gb{std::size_t{1} << 30}; -static constexpr std::size_t size_tb{std::size_t{1} << 40}; -static constexpr std::size_t size_pb{std::size_t{1} << 50}; +constexpr std::size_t size_word{4_B}; +constexpr std::size_t size_kb{1_KiB}; +constexpr std::size_t size_mb{1_MiB}; +constexpr std::size_t size_gb{1_GiB}; +constexpr std::size_t size_pb{1_PiB}; struct allocation { - void* p{nullptr}; + void* ptr{nullptr}; std::size_t size{0}; - allocation(void* _p, std::size_t _size) : p{_p}, size{_size} {} + allocation(void* ptr, std::size_t size) : ptr{ptr}, size{size} {} allocation() = default; }; } // namespace @@ -81,7 +77,6 @@ struct MRTest : public ::testing::Test { std::unique_ptr mr; MRTest() : mr{new MemoryResourceType} {} - ~MRTest() = default; }; using resources = ::testing::Types; @@ -92,56 +87,56 @@ TYPED_TEST(MRTest, SelfEquality) { EXPECT_TRUE(this->mr->is_equal(*this->mr)); } TYPED_TEST(MRTest, AllocateZeroBytes) { - void* p{nullptr}; - EXPECT_NO_THROW(p = this->mr->allocate(0)); - EXPECT_NO_THROW(this->mr->deallocate(p, 0)); + void* ptr{nullptr}; + EXPECT_NO_THROW(ptr = this->mr->allocate(0)); + EXPECT_NO_THROW(this->mr->deallocate(ptr, 0)); } TYPED_TEST(MRTest, AllocateWord) { - void* p{nullptr}; - EXPECT_NO_THROW(p = this->mr->allocate(size_word)); - EXPECT_NE(nullptr, p); - EXPECT_TRUE(is_aligned(p)); - EXPECT_FALSE(is_device_memory(p)); - EXPECT_NO_THROW(this->mr->deallocate(p, size_word)); + void* ptr{nullptr}; + EXPECT_NO_THROW(ptr = this->mr->allocate(size_word)); + EXPECT_NE(nullptr, ptr); + EXPECT_TRUE(is_aligned(ptr)); + EXPECT_FALSE(is_device_memory(ptr)); + EXPECT_NO_THROW(this->mr->deallocate(ptr, size_word)); } TYPED_TEST(MRTest, AllocateKB) { - void* p{nullptr}; - EXPECT_NO_THROW(p = this->mr->allocate(size_kb)); - EXPECT_NE(nullptr, p); - EXPECT_TRUE(is_aligned(p)); - EXPECT_FALSE(is_device_memory(p)); - EXPECT_NO_THROW(this->mr->deallocate(p, size_kb)); + void* ptr{nullptr}; + EXPECT_NO_THROW(ptr = this->mr->allocate(size_kb)); + EXPECT_NE(nullptr, ptr); + EXPECT_TRUE(is_aligned(ptr)); + EXPECT_FALSE(is_device_memory(ptr)); + EXPECT_NO_THROW(this->mr->deallocate(ptr, size_kb)); } TYPED_TEST(MRTest, AllocateMB) { - void* p{nullptr}; - EXPECT_NO_THROW(p = this->mr->allocate(size_mb)); - EXPECT_NE(nullptr, p); - EXPECT_TRUE(is_aligned(p)); - EXPECT_FALSE(is_device_memory(p)); - EXPECT_NO_THROW(this->mr->deallocate(p, size_mb)); + void* ptr{nullptr}; + EXPECT_NO_THROW(ptr = this->mr->allocate(size_mb)); + EXPECT_NE(nullptr, ptr); + EXPECT_TRUE(is_aligned(ptr)); + EXPECT_FALSE(is_device_memory(ptr)); + EXPECT_NO_THROW(this->mr->deallocate(ptr, size_mb)); } TYPED_TEST(MRTest, AllocateGB) { - void* p{nullptr}; - EXPECT_NO_THROW(p = this->mr->allocate(size_gb)); - EXPECT_NE(nullptr, p); - EXPECT_TRUE(is_aligned(p)); - EXPECT_FALSE(is_device_memory(p)); - EXPECT_NO_THROW(this->mr->deallocate(p, size_gb)); + void* ptr{nullptr}; + EXPECT_NO_THROW(ptr = this->mr->allocate(size_gb)); + EXPECT_NE(nullptr, ptr); + EXPECT_TRUE(is_aligned(ptr)); + EXPECT_FALSE(is_device_memory(ptr)); + EXPECT_NO_THROW(this->mr->deallocate(ptr, size_gb)); } TYPED_TEST(MRTest, AllocateTooMuch) { - void* p{nullptr}; - EXPECT_THROW(p = this->mr->allocate(size_pb), std::bad_alloc); - EXPECT_EQ(nullptr, p); + void* ptr{nullptr}; + EXPECT_THROW(ptr = this->mr->allocate(size_pb), std::bad_alloc); + EXPECT_EQ(nullptr, ptr); } TYPED_TEST(MRTest, RandomAllocations) @@ -156,17 +151,16 @@ TYPED_TEST(MRTest, RandomAllocations) // 100 allocations from [0,5MB) std::for_each( - allocations.begin(), allocations.end(), [&generator, &distribution, this](allocation& a) { - a.size = distribution(generator); - EXPECT_NO_THROW(a.p = this->mr->allocate(a.size)); - EXPECT_NE(nullptr, a.p); - EXPECT_TRUE(is_aligned(a.p)); + allocations.begin(), allocations.end(), [&generator, &distribution, this](allocation& alloc) { + alloc.size = distribution(generator); + EXPECT_NO_THROW(alloc.ptr = this->mr->allocate(alloc.size)); + EXPECT_NE(nullptr, alloc.ptr); + EXPECT_TRUE(is_aligned(alloc.ptr)); }); - std::for_each( - allocations.begin(), allocations.end(), [generator, distribution, this](allocation& a) { - EXPECT_NO_THROW(this->mr->deallocate(a.p, a.size)); - }); + std::for_each(allocations.begin(), allocations.end(), [this](allocation& alloc) { + EXPECT_NO_THROW(this->mr->deallocate(alloc.ptr, alloc.size)); + }); } TYPED_TEST(MRTest, MixedRandomAllocationFree) @@ -189,30 +183,27 @@ TYPED_TEST(MRTest, MixedRandomAllocationFree) std::size_t allocation_size = size_distribution(generator); EXPECT_NO_THROW(allocations.emplace_back(this->mr->allocate(allocation_size), allocation_size)); auto new_allocation = allocations.back(); - EXPECT_NE(nullptr, new_allocation.p); - EXPECT_TRUE(is_aligned(new_allocation.p)); + EXPECT_NE(nullptr, new_allocation.ptr); + EXPECT_TRUE(is_aligned(new_allocation.ptr)); bool const free_front{free_distribution(generator) == free_distribution.max()}; if (free_front) { auto front = allocations.front(); - EXPECT_NO_THROW(this->mr->deallocate(front.p, front.size)); + EXPECT_NO_THROW(this->mr->deallocate(front.ptr, front.size)); allocations.pop_front(); } } // free any remaining allocations - for (auto a : allocations) { - EXPECT_NO_THROW(this->mr->deallocate(a.p, a.size)); + for (auto alloc : allocations) { + EXPECT_NO_THROW(this->mr->deallocate(alloc.ptr, alloc.size)); allocations.pop_front(); } } -static constexpr std::size_t MinTestedSize = 32; -static constexpr std::size_t MaxTestedSize = 8 * 1024; -static constexpr std::size_t TestedSizeStep = 1; -static constexpr std::size_t MinTestedAlignment = 16; -static constexpr std::size_t MaxTestedAlignment = 4 * 1024; -static constexpr std::size_t TestedAlignmentMultiplier = 2; +static constexpr std::size_t MinTestedAlignment{16}; +static constexpr std::size_t MaxTestedAlignment{4096}; +static constexpr std::size_t TestedAlignmentMultiplier{2}; static constexpr std::size_t NUM_TRIALS{100}; TYPED_TEST(MRTest, AlignmentTest) @@ -248,7 +239,7 @@ TYPED_TEST(MRTest, UnsupportedAlignmentTest) // alignment of `alignof(std::max_align_t)` auto const bad_alignment = alignment + 1; EXPECT_NO_THROW(ptr = this->mr->allocate(allocation_size, bad_alignment)); - expect_aligned(ptr, alignof(std::max_align_t)); + EXPECT_TRUE(is_aligned(ptr, alignof(std::max_align_t))); EXPECT_NO_THROW(this->mr->deallocate(ptr, allocation_size, bad_alignment)); } } @@ -257,8 +248,9 @@ TYPED_TEST(MRTest, UnsupportedAlignmentTest) TEST(PinnedResource, isPinned) { rmm::mr::pinned_memory_resource mr; - void* p{nullptr}; - EXPECT_NO_THROW(p = mr.allocate(100)); - EXPECT_TRUE(is_pinned_memory(p)); - EXPECT_NO_THROW(mr.deallocate(p, 100)); + void* ptr{nullptr}; + EXPECT_NO_THROW(ptr = mr.allocate(100)); + EXPECT_TRUE(is_pinned_memory(ptr)); + EXPECT_NO_THROW(mr.deallocate(ptr, 100)); } +} // namespace rmm::test