diff --git a/cudax/include/cuda/experimental/__device/all_devices.cuh b/cudax/include/cuda/experimental/__device/all_devices.cuh new file mode 100644 index 00000000000..3bd17f5fac2 --- /dev/null +++ b/cudax/include/cuda/experimental/__device/all_devices.cuh @@ -0,0 +1,194 @@ +//===----------------------------------------------------------------------===// +// +// Part of CUDA Experimental in CUDA C++ Core Libraries, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#ifndef _CUDAX__DEVICE_ALL_DEVICES +#define _CUDAX__DEVICE_ALL_DEVICES + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include +#include +#include + +#include + +#include + +namespace cuda::experimental +{ +namespace detail +{ +//! @brief A random-access range of all available CUDA devices +class all_devices +{ +public: + using size_type = ::std::vector::size_type; + using iterator = ::std::vector::const_iterator; + using const_iterator = ::std::vector::const_iterator; + + all_devices() = default; + + _CCCL_NODISCARD const device& operator[](size_type __i) const noexcept; + + _CCCL_NODISCARD const device& at(size_type __i) const; + + _CCCL_NODISCARD size_type size() const; + + _CCCL_NODISCARD iterator begin() const noexcept; + + _CCCL_NODISCARD iterator end() const noexcept; + +private: + struct __initializer_iterator; + + static const ::std::vector& __devices(); +}; + +//! @brief An iterator used to in-place construct `device` objects in a +//! std::vector. +//! +//! Since `device` objects are not movable or copyable, we need to construct them +//! in-place with a proxy object that can be implicitly converted to a `device` +//! object. +struct all_devices::__initializer_iterator +{ + using value_type = __emplace_device; + using reference = __emplace_device; + using iterator_category = ::std::forward_iterator_tag; + using difference_type = int; + using pointer = __emplace_device; + + int __id_; + + __emplace_device operator*() const noexcept + { + return __emplace_device{__id_}; + } + + __emplace_device operator->() const noexcept + { + return __emplace_device{__id_}; + } + + __initializer_iterator& operator++() noexcept + { + ++__id_; + return *this; + } + + __initializer_iterator operator++(int) noexcept + { + auto __tmp = *this; + ++__id_; + return __tmp; + } + + bool operator==(const __initializer_iterator& __other) const noexcept + { + return __id_ == __other.__id_; + } + + bool operator!=(const __initializer_iterator& __other) const noexcept + { + return __id_ != __other.__id_; + } +}; + +_CCCL_NODISCARD inline const device& all_devices::operator[](size_type __id_) const noexcept +{ + assert(__id_ < size()); + return __devices()[__id_]; +} + +_CCCL_NODISCARD inline const device& all_devices::at(size_type __id_) const +{ + if (__id_ >= size()) + { + _CUDA_VSTD::__throw_out_of_range("device index out of range"); + } + return __devices()[__id_]; +} + +_CCCL_NODISCARD inline all_devices::size_type all_devices::size() const +{ + return __devices().size(); +} + +_CCCL_NODISCARD inline all_devices::iterator all_devices::begin() const noexcept +{ + return __devices().begin(); +} + +_CCCL_NODISCARD inline all_devices::iterator all_devices::end() const noexcept +{ + return __devices().end(); +} + +inline const ::std::vector& all_devices::__devices() +{ + static const ::std::vector __devices = [] { + int __count = 0; + _CCCL_TRY_CUDA_API(::cudaGetDeviceCount, "failed to get the count of CUDA devices", &__count); + return ::std::vector{__initializer_iterator{0}, __initializer_iterator{__count}}; + }(); + return __devices; +} +} // namespace detail + +//! @brief A range of all available CUDA devices +//! +//! `cuda::devices` provides a view of all available CUDA devices. It is useful for +//! determining the number of supported devices and for iterating over all devices +//! in a range-based for loop (e.g., to print device properties, perhaps). +//! +//! @par Class synopsis +//! @code +//! class __all_devices { // exposition only +//! public: +//! using size_type = ::std::size_t; +//! struct iterator; +//! using const_iterator = iterator; +//! +//! [[nodiscard]] constexpr const device& operator[](size_type i) const noexcept; +//! +//! [[nodiscard]] size_type size() const; +//! +//! [[nodiscard]] iterator begin() const noexcept; +//! +//! [[nodiscard]] iterator end() const noexcept; +//! }; +//! @endcode +//! +//! @par +//! `__all_devices::iterator` is a random access iterator with a `reference` +//! type of `const device&`. +//! +//! @par Example +//! @code +//! auto& dev0 = cuda::devices[0]; +//! assert(cuda::devices.size() == cuda::std::distance(cuda::devices.begin(), cuda::devices.end())); +//! @endcode +//! +//! @sa +//! * device +//! * device_ref +inline constexpr detail::all_devices devices{}; + +} // namespace cuda::experimental + +#endif // _CUDAX__DEVICE_ALL_DEVICES diff --git a/cudax/include/cuda/experimental/__device/device.cuh b/cudax/include/cuda/experimental/__device/device.cuh index f7ba66a8729..f91b0089d5f 100644 --- a/cudax/include/cuda/experimental/__device/device.cuh +++ b/cudax/include/cuda/experimental/__device/device.cuh @@ -25,17 +25,43 @@ namespace cuda::experimental { -// TODO: this will be the element type of the the global `devices` array. It is -// where we can cache device properties. +namespace detail +{ +//! @brief A proxy object used to in-place construct a `device` object from an +//! integer ID. Used in __detail/all_devices.cuh. +struct __emplace_device +{ + int __id_; + + _CCCL_NODISCARD constexpr operator device() const noexcept; + + _CCCL_NODISCARD constexpr const __emplace_device* operator->() const noexcept; +}; +} // namespace detail + +// This is the element type of the the global `devices` array. In the future, we +// can cache device properties here. // //! @brief An immovable "owning" representation of a CUDA device. class device : public device_ref { +public: +#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document +# if defined(_CCCL_COMPILER_MSVC) + // When __EDG__ is defined, std::construct_at will not permit constructing + // a device object from an __emplace_device object. This is a workaround. + constexpr device(detail::__emplace_device __ed) noexcept + : device(__ed.__id_) + {} +# endif +#endif + +private: // TODO: put a mutable thread-safe (or thread_local) cache of device // properties here. -private: friend class device_ref; + friend struct detail::__emplace_device; explicit constexpr device(int __id) noexcept : device_ref(__id) @@ -48,6 +74,19 @@ private: device& operator=(const device&) = delete; }; +namespace detail +{ +_CCCL_NODISCARD inline constexpr __emplace_device::operator device() const noexcept +{ + return device(__id_); +} + +_CCCL_NODISCARD inline constexpr const __emplace_device* __emplace_device::operator->() const noexcept +{ + return this; +} +} // namespace detail + } // namespace cuda::experimental #endif // _CUDAX__DEVICE_DEVICE diff --git a/cudax/include/cuda/experimental/device.cuh b/cudax/include/cuda/experimental/device.cuh index bf623376035..264cb3cc1a8 100644 --- a/cudax/include/cuda/experimental/device.cuh +++ b/cudax/include/cuda/experimental/device.cuh @@ -11,6 +11,7 @@ #ifndef __CUDAX_DEVICE__ #define __CUDAX_DEVICE__ +#include #include #include #include diff --git a/cudax/test/CMakeLists.txt b/cudax/test/CMakeLists.txt index 2b6313a1bbb..bb8a7d7c545 100644 --- a/cudax/test/CMakeLists.txt +++ b/cudax/test/CMakeLists.txt @@ -63,7 +63,7 @@ foreach(cn_target IN LISTS cudax_TARGETS) launch/configuration.cu ) - Cudax_add_catch2_test(test_target device_tests ${cn_target} + cudax_add_catch2_test(test_target device_tests ${cn_target} device/device_smoke.cu ) target_compile_options(${test_target} PRIVATE $<$:--extended-lambda>) diff --git a/cudax/test/device/device_smoke.cu b/cudax/test/device/device_smoke.cu index e68b667c80a..86c9625e21c 100644 --- a/cudax/test/device/device_smoke.cu +++ b/cudax/test/device/device_smoke.cu @@ -7,6 +7,7 @@ // SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. // //===----------------------------------------------------------------------===// + #define LIBCUDACXX_ENABLE_EXCEPTIONS #include @@ -238,3 +239,39 @@ TEST_CASE("Smoke", "[device]") #endif } } + +TEST_CASE("global devices vector", "[device]") +{ + CUDAX_REQUIRE(cudax::devices.size() > 0); + CUDAX_REQUIRE(cudax::devices.begin() != cudax::devices.end()); + CUDAX_REQUIRE(cudax::devices.begin() == cudax::devices.begin()); + CUDAX_REQUIRE(cudax::devices.end() == cudax::devices.end()); + CUDAX_REQUIRE(cudax::devices.size() == static_cast(cudax::devices.end() - cudax::devices.begin())); + + CUDAX_REQUIRE(0 == cudax::devices[0].get()); + CUDAX_REQUIRE(0 == (*cudax::devices.begin()).get()); + CUDAX_REQUIRE(0 == cudax::devices.begin()->get()); + CUDAX_REQUIRE(0 == cudax::devices.begin()[0].get()); + + if (cudax::devices.size() > 1) + { + CUDAX_REQUIRE(1 == cudax::devices[1].get()); + CUDAX_REQUIRE(1 == (*std::next(cudax::devices.begin())).get()); + CUDAX_REQUIRE(1 == std::next(cudax::devices.begin())->get()); + CUDAX_REQUIRE(1 == cudax::devices.begin()[1].get()); + + CUDAX_REQUIRE(0 == (*std::prev(cudax::devices.end())).get()); + CUDAX_REQUIRE(0 == std::prev(cudax::devices.end())->get()); + CUDAX_REQUIRE(0 == cudax::devices.end()[-1].get()); + } + + try + { + [[maybe_unused]] const cudax::device& dev = cudax::devices.at(cudax::devices.size()); + CUDAX_REQUIRE(false); // should not get here + } + catch (const std::out_of_range&) + { + CUDAX_REQUIRE(true); // expected + } +}