diff --git a/cudax/include/cuda/experimental/__device/all_devices.cuh b/cudax/include/cuda/experimental/__device/all_devices.cuh
new file mode 100644
index 00000000000..3bd17f5fac2
--- /dev/null
+++ b/cudax/include/cuda/experimental/__device/all_devices.cuh
@@ -0,0 +1,194 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of CUDA Experimental in CUDA C++ Core Libraries,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _CUDAX__DEVICE_ALL_DEVICES
+#define _CUDAX__DEVICE_ALL_DEVICES
+
+#include <cuda/__cccl_config>
+
+#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
+#  pragma GCC system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
+#  pragma clang system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
+#  pragma system_header
+#endif // no system header
+
+#include <cuda/std/__cuda/api_wrapper.h>
+#include <cuda/std/cassert>
+#include <cuda/std/detail/libcxx/include/stdexcept>
+
+#include <cuda/experimental/__device/device.cuh>
+
+#include <vector>
+
+namespace cuda::experimental
+{
+namespace detail
+{
+//! @brief A random-access range of all available CUDA devices
+class all_devices
+{
+public:
+  using size_type      = ::std::vector<device>::size_type;
+  using iterator       = ::std::vector<device>::const_iterator;
+  using const_iterator = ::std::vector<device>::const_iterator;
+
+  all_devices() = default;
+
+  _CCCL_NODISCARD const device& operator[](size_type __i) const noexcept;
+
+  _CCCL_NODISCARD const device& at(size_type __i) const;
+
+  _CCCL_NODISCARD size_type size() const;
+
+  _CCCL_NODISCARD iterator begin() const noexcept;
+
+  _CCCL_NODISCARD iterator end() const noexcept;
+
+private:
+  struct __initializer_iterator;
+
+  static const ::std::vector<device>& __devices();
+};
+
+//! @brief An iterator used to in-place construct `device` objects in a
+//! std::vector.
+//!
+//! Since `device` objects are not movable or copyable, we need to construct them
+//! in-place with a proxy object that can be implicitly converted to a `device`
+//! object.
+struct all_devices::__initializer_iterator
+{
+  using value_type        = __emplace_device;
+  using reference         = __emplace_device;
+  using iterator_category = ::std::forward_iterator_tag;
+  using difference_type   = int;
+  using pointer           = __emplace_device;
+
+  int __id_;
+
+  __emplace_device operator*() const noexcept
+  {
+    return __emplace_device{__id_};
+  }
+
+  __emplace_device operator->() const noexcept
+  {
+    return __emplace_device{__id_};
+  }
+
+  __initializer_iterator& operator++() noexcept
+  {
+    ++__id_;
+    return *this;
+  }
+
+  __initializer_iterator operator++(int) noexcept
+  {
+    auto __tmp = *this;
+    ++__id_;
+    return __tmp;
+  }
+
+  bool operator==(const __initializer_iterator& __other) const noexcept
+  {
+    return __id_ == __other.__id_;
+  }
+
+  bool operator!=(const __initializer_iterator& __other) const noexcept
+  {
+    return __id_ != __other.__id_;
+  }
+};
+
+_CCCL_NODISCARD inline const device& all_devices::operator[](size_type __id_) const noexcept
+{
+  assert(__id_ < size());
+  return __devices()[__id_];
+}
+
+_CCCL_NODISCARD inline const device& all_devices::at(size_type __id_) const
+{
+  if (__id_ >= size())
+  {
+    _CUDA_VSTD::__throw_out_of_range("device index out of range");
+  }
+  return __devices()[__id_];
+}
+
+_CCCL_NODISCARD inline all_devices::size_type all_devices::size() const
+{
+  return __devices().size();
+}
+
+_CCCL_NODISCARD inline all_devices::iterator all_devices::begin() const noexcept
+{
+  return __devices().begin();
+}
+
+_CCCL_NODISCARD inline all_devices::iterator all_devices::end() const noexcept
+{
+  return __devices().end();
+}
+
+inline const ::std::vector<device>& all_devices::__devices()
+{
+  static const ::std::vector<device> __devices = [] {
+    int __count = 0;
+    _CCCL_TRY_CUDA_API(::cudaGetDeviceCount, "failed to get the count of CUDA devices", &__count);
+    return ::std::vector<device>{__initializer_iterator{0}, __initializer_iterator{__count}};
+  }();
+  return __devices;
+}
+} // namespace detail
+
+//! @brief A range of all available CUDA devices
+//!
+//! `cuda::devices` provides a view of all available CUDA devices. It is useful for
+//! determining the number of supported devices and for iterating over all devices
+//! in a range-based for loop (e.g., to print device properties, perhaps).
+//!
+//! @par Class synopsis
+//! @code
+//! class __all_devices {                     // exposition only
+//! public:
+//!   using size_type = ::std::size_t;
+//!   struct iterator;
+//!   using const_iterator = iterator;
+//!
+//!   [[nodiscard]] constexpr const device& operator[](size_type i) const noexcept;
+//!
+//!   [[nodiscard]] size_type size() const;
+//!
+//!   [[nodiscard]] iterator begin() const noexcept;
+//!
+//!   [[nodiscard]] iterator end() const noexcept;
+//! };
+//! @endcode
+//!
+//! @par
+//! `__all_devices::iterator` is a random access iterator with a `reference`
+//! type of `const device&`.
+//!
+//! @par Example
+//! @code
+//! auto& dev0 = cuda::devices[0];
+//! assert(cuda::devices.size() == cuda::std::distance(cuda::devices.begin(), cuda::devices.end()));
+//! @endcode
+//!
+//! @sa
+//! * device
+//! * device_ref
+inline constexpr detail::all_devices devices{};
+
+} // namespace cuda::experimental
+
+#endif // _CUDAX__DEVICE_ALL_DEVICES
diff --git a/cudax/include/cuda/experimental/__device/device.cuh b/cudax/include/cuda/experimental/__device/device.cuh
index f7ba66a8729..f91b0089d5f 100644
--- a/cudax/include/cuda/experimental/__device/device.cuh
+++ b/cudax/include/cuda/experimental/__device/device.cuh
@@ -25,17 +25,43 @@
 
 namespace cuda::experimental
 {
-// TODO: this will be the element type of the the global `devices` array. It is
-// where we can cache device properties.
+namespace detail
+{
+//! @brief A proxy object used to in-place construct a `device` object from an
+//! integer ID. Used in __detail/all_devices.cuh.
+struct __emplace_device
+{
+  int __id_;
+
+  _CCCL_NODISCARD constexpr operator device() const noexcept;
+
+  _CCCL_NODISCARD constexpr const __emplace_device* operator->() const noexcept;
+};
+} // namespace detail
+
+// This is the element type of the the global `devices` array. In the future, we
+// can cache device properties here.
 //
 //! @brief An immovable "owning" representation of a CUDA device.
 class device : public device_ref
 {
+public:
+#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document
+#  if defined(_CCCL_COMPILER_MSVC)
+  // When __EDG__ is defined, std::construct_at will not permit constructing
+  // a device object from an __emplace_device object. This is a workaround.
+  constexpr device(detail::__emplace_device __ed) noexcept
+      : device(__ed.__id_)
+  {}
+#  endif
+#endif
+
+private:
   // TODO: put a mutable thread-safe (or thread_local) cache of device
   // properties here.
 
-private:
   friend class device_ref;
+  friend struct detail::__emplace_device;
 
   explicit constexpr device(int __id) noexcept
       : device_ref(__id)
@@ -48,6 +74,19 @@ private:
   device& operator=(const device&) = delete;
 };
 
+namespace detail
+{
+_CCCL_NODISCARD inline constexpr __emplace_device::operator device() const noexcept
+{
+  return device(__id_);
+}
+
+_CCCL_NODISCARD inline constexpr const __emplace_device* __emplace_device::operator->() const noexcept
+{
+  return this;
+}
+} // namespace detail
+
 } // namespace cuda::experimental
 
 #endif // _CUDAX__DEVICE_DEVICE
diff --git a/cudax/include/cuda/experimental/device.cuh b/cudax/include/cuda/experimental/device.cuh
index bf623376035..264cb3cc1a8 100644
--- a/cudax/include/cuda/experimental/device.cuh
+++ b/cudax/include/cuda/experimental/device.cuh
@@ -11,6 +11,7 @@
 #ifndef __CUDAX_DEVICE__
 #define __CUDAX_DEVICE__
 
+#include <cuda/experimental/__device/all_devices.cuh>
 #include <cuda/experimental/__device/attributes.cuh>
 #include <cuda/experimental/__device/device.cuh>
 #include <cuda/experimental/__device/device_ref.cuh>
diff --git a/cudax/test/CMakeLists.txt b/cudax/test/CMakeLists.txt
index 2b6313a1bbb..bb8a7d7c545 100644
--- a/cudax/test/CMakeLists.txt
+++ b/cudax/test/CMakeLists.txt
@@ -63,7 +63,7 @@ foreach(cn_target IN LISTS cudax_TARGETS)
     launch/configuration.cu
   )
 
-  Cudax_add_catch2_test(test_target device_tests ${cn_target}
+  cudax_add_catch2_test(test_target device_tests ${cn_target}
     device/device_smoke.cu
   )
   target_compile_options(${test_target} PRIVATE $<$<COMPILE_LANG_AND_ID:CUDA,NVIDIA>:--extended-lambda>)
diff --git a/cudax/test/device/device_smoke.cu b/cudax/test/device/device_smoke.cu
index e68b667c80a..86c9625e21c 100644
--- a/cudax/test/device/device_smoke.cu
+++ b/cudax/test/device/device_smoke.cu
@@ -7,6 +7,7 @@
 // SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
 //
 //===----------------------------------------------------------------------===//
+
 #define LIBCUDACXX_ENABLE_EXCEPTIONS
 #include <cuda/experimental/device.cuh>
 
@@ -238,3 +239,39 @@ TEST_CASE("Smoke", "[device]")
 #endif
   }
 }
+
+TEST_CASE("global devices vector", "[device]")
+{
+  CUDAX_REQUIRE(cudax::devices.size() > 0);
+  CUDAX_REQUIRE(cudax::devices.begin() != cudax::devices.end());
+  CUDAX_REQUIRE(cudax::devices.begin() == cudax::devices.begin());
+  CUDAX_REQUIRE(cudax::devices.end() == cudax::devices.end());
+  CUDAX_REQUIRE(cudax::devices.size() == static_cast<size_t>(cudax::devices.end() - cudax::devices.begin()));
+
+  CUDAX_REQUIRE(0 == cudax::devices[0].get());
+  CUDAX_REQUIRE(0 == (*cudax::devices.begin()).get());
+  CUDAX_REQUIRE(0 == cudax::devices.begin()->get());
+  CUDAX_REQUIRE(0 == cudax::devices.begin()[0].get());
+
+  if (cudax::devices.size() > 1)
+  {
+    CUDAX_REQUIRE(1 == cudax::devices[1].get());
+    CUDAX_REQUIRE(1 == (*std::next(cudax::devices.begin())).get());
+    CUDAX_REQUIRE(1 == std::next(cudax::devices.begin())->get());
+    CUDAX_REQUIRE(1 == cudax::devices.begin()[1].get());
+
+    CUDAX_REQUIRE(0 == (*std::prev(cudax::devices.end())).get());
+    CUDAX_REQUIRE(0 == std::prev(cudax::devices.end())->get());
+    CUDAX_REQUIRE(0 == cudax::devices.end()[-1].get());
+  }
+
+  try
+  {
+    [[maybe_unused]] const cudax::device& dev = cudax::devices.at(cudax::devices.size());
+    CUDAX_REQUIRE(false); // should not get here
+  }
+  catch (const std::out_of_range&)
+  {
+    CUDAX_REQUIRE(true); // expected
+  }
+}