Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CUDAX] Add a global constexpr cudax::devices range for all devices in the system #2100

Merged
merged 6 commits into from
Aug 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
194 changes: 194 additions & 0 deletions cudax/include/cuda/experimental/__device/all_devices.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
//===----------------------------------------------------------------------===//
//
// Part of CUDA Experimental in CUDA C++ Core Libraries,
// under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
//
//===----------------------------------------------------------------------===//

#ifndef _CUDAX__DEVICE_ALL_DEVICES
#define _CUDAX__DEVICE_ALL_DEVICES

#include <cuda/__cccl_config>

#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
# pragma GCC system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
# pragma clang system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
# pragma system_header
#endif // no system header

#include <cuda/std/__cuda/api_wrapper.h>
#include <cuda/std/cassert>
#include <cuda/std/detail/libcxx/include/stdexcept>

#include <cuda/experimental/__device/device.cuh>

#include <vector>

namespace cuda::experimental
{
namespace detail
{
//! @brief A random-access range of all available CUDA devices
class all_devices
{
public:
using size_type = ::std::vector<device>::size_type;
using iterator = ::std::vector<device>::const_iterator;
using const_iterator = ::std::vector<device>::const_iterator;

all_devices() = default;
ericniebler marked this conversation as resolved.
Show resolved Hide resolved

_CCCL_NODISCARD const device& operator[](size_type __i) const noexcept;

_CCCL_NODISCARD const device& at(size_type __i) const;

_CCCL_NODISCARD size_type size() const;

_CCCL_NODISCARD iterator begin() const noexcept;

_CCCL_NODISCARD iterator end() const noexcept;

private:
struct __initializer_iterator;

static const ::std::vector<device>& __devices();
};

//! @brief An iterator used to in-place construct `device` objects in a
//! std::vector.
//!
//! Since `device` objects are not movable or copyable, we need to construct them
//! in-place with a proxy object that can be implicitly converted to a `device`
//! object.
struct all_devices::__initializer_iterator
{
using value_type = __emplace_device;
using reference = __emplace_device;
using iterator_category = ::std::forward_iterator_tag;
using difference_type = int;
using pointer = __emplace_device;

int __id_;

__emplace_device operator*() const noexcept
{
return __emplace_device{__id_};
}

__emplace_device operator->() const noexcept
{
return __emplace_device{__id_};
}

__initializer_iterator& operator++() noexcept
{
++__id_;
return *this;
}

__initializer_iterator operator++(int) noexcept
{
auto __tmp = *this;
++__id_;
return __tmp;
}

bool operator==(const __initializer_iterator& __other) const noexcept
{
return __id_ == __other.__id_;
}

bool operator!=(const __initializer_iterator& __other) const noexcept
{
return __id_ != __other.__id_;
}
};

_CCCL_NODISCARD inline const device& all_devices::operator[](size_type __id_) const noexcept
{
assert(__id_ < size());
return __devices()[__id_];
}

_CCCL_NODISCARD inline const device& all_devices::at(size_type __id_) const
{
if (__id_ >= size())
{
_CUDA_VSTD::__throw_out_of_range("device index out of range");
}
return __devices()[__id_];
}

_CCCL_NODISCARD inline all_devices::size_type all_devices::size() const
{
return __devices().size();
}

_CCCL_NODISCARD inline all_devices::iterator all_devices::begin() const noexcept
{
return __devices().begin();
}

_CCCL_NODISCARD inline all_devices::iterator all_devices::end() const noexcept
{
return __devices().end();
}

inline const ::std::vector<device>& all_devices::__devices()
{
static const ::std::vector<device> __devices = [] {
int __count = 0;
_CCCL_TRY_CUDA_API(::cudaGetDeviceCount, "failed to get the count of CUDA devices", &__count);
return ::std::vector<device>{__initializer_iterator{0}, __initializer_iterator{__count}};
}();
return __devices;
}
} // namespace detail

//! @brief A range of all available CUDA devices
//!
//! `cuda::devices` provides a view of all available CUDA devices. It is useful for
//! determining the number of supported devices and for iterating over all devices
//! in a range-based for loop (e.g., to print device properties, perhaps).
//!
//! @par Class synopsis
//! @code
//! class __all_devices { // exposition only
ericniebler marked this conversation as resolved.
Show resolved Hide resolved
//! public:
//! using size_type = ::std::size_t;
//! struct iterator;
//! using const_iterator = iterator;
//!
//! [[nodiscard]] constexpr const device& operator[](size_type i) const noexcept;
//!
//! [[nodiscard]] size_type size() const;
//!
//! [[nodiscard]] iterator begin() const noexcept;
//!
//! [[nodiscard]] iterator end() const noexcept;
//! };
//! @endcode
//!
//! @par
//! `__all_devices::iterator` is a random access iterator with a `reference`
//! type of `const device&`.
//!
//! @par Example
//! @code
//! auto& dev0 = cuda::devices[0];
//! assert(cuda::devices.size() == cuda::std::distance(cuda::devices.begin(), cuda::devices.end()));
//! @endcode
//!
//! @sa
//! * device
//! * device_ref
inline constexpr detail::all_devices devices{};

} // namespace cuda::experimental

#endif // _CUDAX__DEVICE_ALL_DEVICES
45 changes: 42 additions & 3 deletions cudax/include/cuda/experimental/__device/device.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,43 @@

namespace cuda::experimental
{
// TODO: this will be the element type of the the global `devices` array. It is
// where we can cache device properties.
namespace detail
{
//! @brief A proxy object used to in-place construct a `device` object from an
//! integer ID. Used in __detail/all_devices.cuh.
struct __emplace_device
{
int __id_;

_CCCL_NODISCARD constexpr operator device() const noexcept;

_CCCL_NODISCARD constexpr const __emplace_device* operator->() const noexcept;
};
} // namespace detail

// This is the element type of the the global `devices` array. In the future, we
// can cache device properties here.
//
//! @brief An immovable "owning" representation of a CUDA device.
class device : public device_ref
{
public:
#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document
# if defined(_CCCL_COMPILER_MSVC)
// When __EDG__ is defined, std::construct_at will not permit constructing
// a device object from an __emplace_device object. This is a workaround.
constexpr device(detail::__emplace_device __ed) noexcept
: device(__ed.__id_)
{}
# endif
#endif

private:
// TODO: put a mutable thread-safe (or thread_local) cache of device
// properties here.

private:
friend class device_ref;
friend struct detail::__emplace_device;

explicit constexpr device(int __id) noexcept
: device_ref(__id)
Expand All @@ -48,6 +74,19 @@ private:
device& operator=(const device&) = delete;
};

namespace detail
{
_CCCL_NODISCARD inline constexpr __emplace_device::operator device() const noexcept
{
return device(__id_);
}

_CCCL_NODISCARD inline constexpr const __emplace_device* __emplace_device::operator->() const noexcept
{
return this;
}
} // namespace detail

} // namespace cuda::experimental

#endif // _CUDAX__DEVICE_DEVICE
1 change: 1 addition & 0 deletions cudax/include/cuda/experimental/device.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#ifndef __CUDAX_DEVICE__
#define __CUDAX_DEVICE__

#include <cuda/experimental/__device/all_devices.cuh>
#include <cuda/experimental/__device/attributes.cuh>
#include <cuda/experimental/__device/device.cuh>
#include <cuda/experimental/__device/device_ref.cuh>
Expand Down
2 changes: 1 addition & 1 deletion cudax/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ foreach(cn_target IN LISTS cudax_TARGETS)
launch/configuration.cu
)

Cudax_add_catch2_test(test_target device_tests ${cn_target}
cudax_add_catch2_test(test_target device_tests ${cn_target}
device/device_smoke.cu
)
target_compile_options(${test_target} PRIVATE $<$<COMPILE_LANG_AND_ID:CUDA,NVIDIA>:--extended-lambda>)
Expand Down
37 changes: 37 additions & 0 deletions cudax/test/device/device_smoke.cu
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
//
//===----------------------------------------------------------------------===//

#define LIBCUDACXX_ENABLE_EXCEPTIONS
#include <cuda/experimental/device.cuh>

Expand Down Expand Up @@ -238,3 +239,39 @@ TEST_CASE("Smoke", "[device]")
#endif
}
}

TEST_CASE("global devices vector", "[device]")
{
CUDAX_REQUIRE(cudax::devices.size() > 0);
CUDAX_REQUIRE(cudax::devices.begin() != cudax::devices.end());
CUDAX_REQUIRE(cudax::devices.begin() == cudax::devices.begin());
CUDAX_REQUIRE(cudax::devices.end() == cudax::devices.end());
CUDAX_REQUIRE(cudax::devices.size() == static_cast<size_t>(cudax::devices.end() - cudax::devices.begin()));

CUDAX_REQUIRE(0 == cudax::devices[0].get());
CUDAX_REQUIRE(0 == (*cudax::devices.begin()).get());
CUDAX_REQUIRE(0 == cudax::devices.begin()->get());
CUDAX_REQUIRE(0 == cudax::devices.begin()[0].get());

if (cudax::devices.size() > 1)
{
CUDAX_REQUIRE(1 == cudax::devices[1].get());
CUDAX_REQUIRE(1 == (*std::next(cudax::devices.begin())).get());
CUDAX_REQUIRE(1 == std::next(cudax::devices.begin())->get());
CUDAX_REQUIRE(1 == cudax::devices.begin()[1].get());

CUDAX_REQUIRE(0 == (*std::prev(cudax::devices.end())).get());
CUDAX_REQUIRE(0 == std::prev(cudax::devices.end())->get());
CUDAX_REQUIRE(0 == cudax::devices.end()[-1].get());
}

try
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

catch2 has some native machinery for verifying an expression throws a specific exception: https://github.com/catchorg/Catch2/blob/devel/docs/assertions.md#exceptions

{
[[maybe_unused]] const cudax::device& dev = cudax::devices.at(cudax::devices.size());
CUDAX_REQUIRE(false); // should not get here
}
catch (const std::out_of_range&)
{
CUDAX_REQUIRE(true); // expected
}
}
Loading