diff --git a/README.md b/README.md index 3b91e855d..88ac71076 100644 --- a/README.md +++ b/README.md @@ -213,6 +213,14 @@ RAII semantics (constructor creates the CUDA stream, destructor destroys it). An can never represent the CUDA default stream or per-thread default stream; it only ever represents a single non-default stream. `rmm::cuda_stream` cannot be copied, but can be moved. +## `cuda_stream_pool` + +`rmm::cuda_stream_pool` provides fast access to a pool of CUDA streams. This class can be used to +create a set of `cuda_stream` objects whose lifetime is equal to the `cuda_stream_pool`. Using the +stream pool can be faster than creating the streams on the fly. The size of the pool is configurable. +Depending on this size, multiple calls to `cuda_stream_pool::get_stream()` may return instances of +`rmm::cuda_stream_view` that represent identical CUDA streams. + ### Thread Safety All current device memory resources are thread safe unless documented otherwise. More specifically, diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index 3ff79215d..887716bac 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -89,3 +89,10 @@ ConfigureBench(REPLAY_BENCH "${REPLAY_BENCH_SRC}") set(UVECTOR_BENCH_SRC "${CMAKE_CURRENT_SOURCE_DIR}/device_uvector/device_uvector_bench.cu") ConfigureBench(UVECTOR_BENCH "${UVECTOR_BENCH_SRC}") + +# cuda_stream_pool benchmark + +set(CUDA_STREAM_POOL_BENCH_SRC + "${CMAKE_CURRENT_SOURCE_DIR}/cuda_stream_pool/cuda_stream_pool_bench.cpp") + +ConfigureBench(CUDA_STREAM_POOL_BENCH "${CUDA_STREAM_POOL_BENCH_SRC}") diff --git a/benchmarks/cuda_stream_pool/cuda_stream_pool_bench.cpp b/benchmarks/cuda_stream_pool/cuda_stream_pool_bench.cpp new file mode 100644 index 000000000..986e22a45 --- /dev/null +++ b/benchmarks/cuda_stream_pool/cuda_stream_pool_bench.cpp @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either ex ess or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include + +#include + +#include + +static void BM_StreamPoolGetStream(benchmark::State& state) +{ + rmm::cuda_stream_pool stream_pool{}; + + for (auto _ : state) { + auto s = stream_pool.get_stream(); + auto e = cudaStreamQuery(s.value()); + } + + state.SetItemsProcessed(state.iterations()); +} +BENCHMARK(BM_StreamPoolGetStream)->Unit(benchmark::kMicrosecond); + +static void BM_CudaStreamClass(benchmark::State& state) +{ + for (auto _ : state) { + auto s = rmm::cuda_stream{}; + auto e = cudaStreamQuery(s.view().value()); + } + + state.SetItemsProcessed(state.iterations()); +} +BENCHMARK(BM_CudaStreamClass)->Unit(benchmark::kMicrosecond); + +BENCHMARK_MAIN(); diff --git a/conda/recipes/librmm/meta.yaml b/conda/recipes/librmm/meta.yaml index eba6cf8f8..73d479725 100644 --- a/conda/recipes/librmm/meta.yaml +++ b/conda/recipes/librmm/meta.yaml @@ -36,6 +36,7 @@ test: - test -f $PREFIX/include/rmm/logger.hpp - test -f $PREFIX/include/rmm/cuda_stream.hpp - test -f $PREFIX/include/rmm/cuda_stream_view.hpp + - test -f $PREFIX/include/rmm/cuda_stream_pool.hpp - test -f $PREFIX/include/rmm/device_uvector.hpp - test -f $PREFIX/include/rmm/device_scalar.hpp - test -f $PREFIX/include/rmm/device_buffer.hpp diff --git a/include/rmm/cuda_stream_pool.hpp b/include/rmm/cuda_stream_pool.hpp new file mode 100644 index 000000000..803c0474e --- /dev/null +++ b/include/rmm/cuda_stream_pool.hpp @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +#include +#include + +namespace rmm { + +/** + * @brief A pool of CUDA streams. + * + * Provides efficient access to collection of CUDA stream objects. + * + * Successive calls may return a `cuda_stream_view` of identical streams. For example, a possible + * implementation is to maintain a circular buffer of `cuda_stream` objects. + */ +class cuda_stream_pool { + public: + static constexpr std::size_t default_size{16}; ///< Default stream pool size + + /** + * @brief Construct a new cuda stream pool object of the given size + * + * @param pool_size The number of streams in the pool + */ + explicit cuda_stream_pool(std::size_t pool_size = default_size) : streams_(pool_size) {} + ~cuda_stream_pool() = default; + + cuda_stream_pool(cuda_stream_pool&&) = delete; + cuda_stream_pool(cuda_stream_pool const&) = delete; + cuda_stream_pool& operator=(cuda_stream_pool&&) = delete; + cuda_stream_pool& operator=(cuda_stream_pool const&) = delete; + + /** + * @brief Get a `cuda_stream_view` of a stream in the pool. + * + * This function is thread safe with respect to other calls to the same function. + * + * @return rmm::cuda_stream_view + */ + rmm::cuda_stream_view get_stream() const noexcept + { + return streams_[(next_stream++) % streams_.size()].view(); + } + + private: + std::vector streams_; + mutable std::atomic_size_t next_stream{}; +}; + +} // namespace rmm diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 3dc7acd6c..b19d6a9fb 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -131,8 +131,8 @@ ConfigureTest(HOST_MR_TEST "${HOST_MR_TEST_SRC}") # cuda stream tests -set(CUDA_STREAM_TEST_SRC "${CMAKE_CURRENT_SOURCE_DIR}/cuda_stream_tests.cpp") - +set(CUDA_STREAM_TEST_SRC "${CMAKE_CURRENT_SOURCE_DIR}/cuda_stream_tests.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/cuda_stream_pool_tests.cpp") ConfigureTest(CUDA_STREAM_TEST "${CUDA_STREAM_TEST_SRC}") # device buffer tests diff --git a/tests/cuda_stream_pool_tests.cpp b/tests/cuda_stream_pool_tests.cpp new file mode 100644 index 000000000..9ca2a4188 --- /dev/null +++ b/tests/cuda_stream_pool_tests.cpp @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include + +#include + +struct CudaStreamPoolTest : public ::testing::Test { + rmm::cuda_stream_pool pool{}; +}; + +TEST_F(CudaStreamPoolTest, Unequal) +{ + auto const stream_a = this->pool.get_stream(); + auto const stream_b = this->pool.get_stream(); + + EXPECT_NE(stream_a, stream_b); +} + +TEST_F(CudaStreamPoolTest, Nondefault) +{ + auto const stream_a = this->pool.get_stream(); + auto const stream_b = this->pool.get_stream(); + + // pool streams are explicit, non-default streams + EXPECT_FALSE(stream_a.is_default()); + EXPECT_FALSE(stream_a.is_per_thread_default()); +} + +TEST_F(CudaStreamPoolTest, ValidStreams) +{ + auto const stream_a = this->pool.get_stream(); + auto const stream_b = this->pool.get_stream(); + + // Operations on the streams should work correctly and without throwing exceptions + auto v = rmm::device_uvector{100, stream_a}; + RMM_CUDA_TRY(cudaMemsetAsync(v.data(), 0xcc, 100, stream_a.value())); + stream_a.synchronize(); + + auto v2 = rmm::device_uvector{v, stream_b}; + auto x = v2.front_element(stream_b); + EXPECT_EQ(x, 0xcc); +} diff --git a/tests/cuda_stream_tests.cpp b/tests/cuda_stream_tests.cpp index 4cbdc6511..59ac07f3d 100644 --- a/tests/cuda_stream_tests.cpp +++ b/tests/cuda_stream_tests.cpp @@ -14,14 +14,14 @@ * limitations under the License. */ -#include "gtest/gtest.h" - #include #include #include #include +#include + struct CudaStreamTest : public ::testing::Test { };