Skip to content
This repository has been archived by the owner on Mar 21, 2024. It is now read-only.

Fix GCC/Clang only compilation of <cuda/std/atomic> #207

Merged
merged 2 commits into from
Sep 28, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 6 additions & 14 deletions include/cuda/std/atomic
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
#undef ATOMIC_VAR_INIT
#endif //__CUDACC_RTC__


#include "cassert"
#include "cstddef"
#include "cstdint"
Expand All @@ -63,8 +64,6 @@ namespace __detail {
using std::__detail::__thread_scope_block_tag;
using std::__detail::__thread_scope_device_tag;
using std::__detail::__thread_scope_system_tag;
using std::__detail::__atomic_signal_fence_cuda;
using std::__detail::__atomic_thread_fence_cuda;
}

using memory_order = std::memory_order;
Expand Down Expand Up @@ -173,32 +172,25 @@ inline __host__ __device__ void atomic_thread_fence(memory_order __m, thread_sco
NV_IS_DEVICE, (
switch(_Scope) {
case thread_scope::thread_scope_system:
__detail::__atomic_thread_fence_cuda((int)__m, __detail::__thread_scope_system_tag());
std::__detail::__atomic_thread_fence_cuda((int)__m, __detail::__thread_scope_system_tag());
break;
case thread_scope::thread_scope_device:
__detail::__atomic_thread_fence_cuda((int)__m, __detail::__thread_scope_device_tag());
std::__detail::__atomic_thread_fence_cuda((int)__m, __detail::__thread_scope_device_tag());
break;
case thread_scope::thread_scope_block:
__detail::__atomic_thread_fence_cuda((int)__m, __detail::__thread_scope_block_tag());
std::__detail::__atomic_thread_fence_cuda((int)__m, __detail::__thread_scope_block_tag());
break;
}
),
NV_IS_HOST, (
(void) _Scope;
::std::atomic_thread_fence((::std::memory_order)__m);
std::atomic_thread_fence(__m);
)
)
}

inline __host__ __device__ void atomic_signal_fence(memory_order __m) {
NV_DISPATCH_TARGET(
NV_IS_DEVICE, (
__detail::__atomic_signal_fence_cuda((int)__m);
),
NV_IS_HOST, (
::std::atomic_signal_fence((::std::memory_order)__m);
)
)
std::atomic_signal_fence(__m);
}

_LIBCUDACXX_END_NAMESPACE_CUDA
Expand Down
2 changes: 2 additions & 0 deletions include/cuda/std/detail/__config
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@
#define __ELF__
#endif

#define _LIBCUDACXX_HAS_CUDA_ATOMIC_EXT

#include "libcxx/include/__config"

#if defined(__CUDA_ARCH__)
Expand Down
4 changes: 4 additions & 0 deletions libcxx/include/atomic
Original file line number Diff line number Diff line change
Expand Up @@ -680,6 +680,10 @@ __cxx_atomic_assign_volatile(_Tp volatile& __a_value, _Tv volatile const& __val)

// Headers are wrapped like so: (cuda::std::|std::)detail
namespace __detail {
#if defined(_LIBCUDACXX_HAS_CUDA_ATOMIC_EXT)
# include "support/atomic/atomic_cuda_scopes.h"
#endif

#if defined(_LIBCUDACXX_HAS_CUDA_ATOMIC_IMPL)
# include "support/atomic/atomic_cuda.h"
#elif defined(_LIBCUDACXX_HAS_MSVC_ATOMIC_IMPL)
Expand Down
28 changes: 28 additions & 0 deletions libcxx/include/support/atomic/atomic_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,34 @@ inline auto __cxx_atomic_fetch_xor(_Tp* __a, _Td __pattern,
__cxx_atomic_order_to_int(__order));
}

template <typename _Tp, typename _Td>
inline auto __cxx_atomic_fetch_max(_Tp* __a, _Td __val,
memory_order __order) -> __cxx_atomic_underlying_t<_Tp> {
auto __expected = __cxx_atomic_load(__a, memory_order_relaxed);
auto __desired = __expected > __val ? __expected : __val;

while(__desired == __val &&
!__cxx_atomic_compare_exchange_strong(__a, &__expected, __desired, __order, __order)) {
__desired = __expected > __val ? __expected : __val;
}

return __expected;
}

template <typename _Tp, typename _Td>
inline auto __cxx_atomic_fetch_min(_Tp* __a, _Td __val,
memory_order __order) -> __cxx_atomic_underlying_t<_Tp> {
auto __expected = __cxx_atomic_load(__a, memory_order_relaxed);
auto __desired = __expected < __val ? __expected : __val;

while(__desired == __val &&
!__cxx_atomic_compare_exchange_strong(__a, &__expected, __desired, __order, __order)) {
__desired = __expected < __val ? __expected : __val;
}

return __expected;
}

inline constexpr
bool __cxx_atomic_is_lock_free(size_t __x) {
#if defined(_LIBCUDACXX_NO_RUNTIME_LOCK_FREE)
Expand Down
65 changes: 2 additions & 63 deletions libcxx/include/support/atomic/atomic_cuda.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,6 @@
#define __ATOMIC_SEQ_CST 5
#endif //__ATOMIC_RELAXED

#ifndef __ATOMIC_BLOCK
#define __ATOMIC_SYSTEM 0 // 0 indicates default
#define __ATOMIC_DEVICE 1
#define __ATOMIC_BLOCK 2
#define __ATOMIC_THREAD 10
#endif //__ATOMIC_BLOCK

inline __host__ __device__ int __stronger_order_cuda(int __a, int __b) {
int const __max = __a > __b ? __a : __b;
if(__max != __ATOMIC_RELEASE)
Expand All @@ -52,42 +45,6 @@ inline __host__ __device__ int __stronger_order_cuda(int __a, int __b) {
return __xform[__a < __b ? __a : __b];
}

enum thread_scope {
thread_scope_system = __ATOMIC_SYSTEM,
thread_scope_device = __ATOMIC_DEVICE,
thread_scope_block = __ATOMIC_BLOCK,
thread_scope_thread = __ATOMIC_THREAD
};

#define _LIBCUDACXX_ATOMIC_SCOPE_TYPE ::cuda::thread_scope
#define _LIBCUDACXX_ATOMIC_SCOPE_DEFAULT ::cuda::thread_scope::system

struct __thread_scope_thread_tag { };
struct __thread_scope_block_tag { };
struct __thread_scope_device_tag { };
struct __thread_scope_system_tag { };

template<int _Scope> struct __scope_enum_to_tag { };
/* This would be the implementation once an actual thread-scope backend exists.
template<> struct __scope_enum_to_tag<(int)thread_scope_thread> {
using type = __thread_scope_thread_tag; };
Until then: */
template<> struct __scope_enum_to_tag<(int)thread_scope_thread> {
using type = __thread_scope_block_tag; };
template<> struct __scope_enum_to_tag<(int)thread_scope_block> {
using type = __thread_scope_block_tag; };
template<> struct __scope_enum_to_tag<(int)thread_scope_device> {
using type = __thread_scope_device_tag; };
template<> struct __scope_enum_to_tag<(int)thread_scope_system> {
using type = __thread_scope_system_tag; };

template <int _Scope>
_LIBCUDACXX_INLINE_VISIBILITY auto constexpr __scope_tag() ->
typename __scope_enum_to_tag<_Scope>::type {
return typename __scope_enum_to_tag<_Scope>::type();
}
// END TODO

// Wrap host atomic implementations into a sub-namespace
namespace __host {
#if defined(_LIBCUDACXX_COMPILER_MSVC)
Expand Down Expand Up @@ -385,16 +342,7 @@ __host__ __device__
NV_IS_DEVICE, (
return __atomic_fetch_max_cuda(__cxx_get_underlying_device_atomic(__a), __val, __order, __scope_tag<_Sco>());
), (
// IS_HOST
_Tp __expected = __cxx_atomic_load(__a, memory_order_relaxed);
_Tp __desired = __expected > __val ? __expected : __val;

while(__desired == __val &&
!__cxx_atomic_compare_exchange_strong(__a, &__expected, __desired, __order, __order)) {
__desired = __expected > __val ? __expected : __val;
}

return __expected;
return __host::__cxx_atomic_fetch_max(&__a->__a_value, __val, __order);
)
)
}
Expand All @@ -406,16 +354,7 @@ __host__ __device__
NV_IS_DEVICE, (
return __atomic_fetch_min_cuda(__cxx_get_underlying_device_atomic(__a), __val, __order, __scope_tag<_Sco>());
), (
// IS_HOST
_Tp __expected = __cxx_atomic_load(__a, memory_order_relaxed);
_Tp __desired = __expected < __val ? __expected : __val;

while(__desired == __val &&
!__cxx_atomic_compare_exchange_strong(__a, &__expected, __desired, __order, __order)) {
__desired = __expected < __val ? __expected : __val;
}

return __expected;
return __host::__cxx_atomic_fetch_min(&__a->__a_value, __val, __order);
)
)
}
Expand Down
41 changes: 41 additions & 0 deletions libcxx/include/support/atomic/atomic_cuda_scopes.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#ifndef __ATOMIC_BLOCK
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if this name or something like atomic_scopes.h is better. We only expose these in the cuda:: versions of the interface, true, but it's used throughout...

#define __ATOMIC_SYSTEM 0 // 0 indicates default
#define __ATOMIC_DEVICE 1
#define __ATOMIC_BLOCK 2
#define __ATOMIC_THREAD 10
#endif //__ATOMIC_BLOCK

enum thread_scope {
thread_scope_system = __ATOMIC_SYSTEM,
thread_scope_device = __ATOMIC_DEVICE,
thread_scope_block = __ATOMIC_BLOCK,
thread_scope_thread = __ATOMIC_THREAD
};

#define _LIBCUDACXX_ATOMIC_SCOPE_TYPE ::cuda::thread_scope
#define _LIBCUDACXX_ATOMIC_SCOPE_DEFAULT ::cuda::thread_scope::system

struct __thread_scope_thread_tag { };
struct __thread_scope_block_tag { };
struct __thread_scope_device_tag { };
struct __thread_scope_system_tag { };

template<int _Scope> struct __scope_enum_to_tag { };
/* This would be the implementation once an actual thread-scope backend exists.
template<> struct __scope_enum_to_tag<(int)thread_scope_thread> {
using type = __thread_scope_thread_tag; };
Until then: */
template<> struct __scope_enum_to_tag<(int)thread_scope_thread> {
using type = __thread_scope_block_tag; };
template<> struct __scope_enum_to_tag<(int)thread_scope_block> {
using type = __thread_scope_block_tag; };
template<> struct __scope_enum_to_tag<(int)thread_scope_device> {
using type = __thread_scope_device_tag; };
template<> struct __scope_enum_to_tag<(int)thread_scope_system> {
using type = __thread_scope_system_tag; };

template <int _Scope>
_LIBCUDACXX_INLINE_VISIBILITY auto constexpr __scope_tag() ->
typename __scope_enum_to_tag<_Scope>::type {
return typename __scope_enum_to_tag<_Scope>::type();
}