From abe6d1cf882205eaef064103c0e8bc653f3ee951 Mon Sep 17 00:00:00 2001 From: Allard Hendriksen Date: Fri, 3 Nov 2023 16:39:22 +0100 Subject: [PATCH] Fix C++11 support of recently added tests (#651) --- .../test/cuda/barrier/cp_async_bulk_tensor_1d.pass.cpp | 1 + .../test/cuda/barrier/cp_async_bulk_tensor_2d.pass.cpp | 1 + .../test/cuda/barrier/cp_async_bulk_tensor_3d.pass.cpp | 1 + .../test/cuda/barrier/cp_async_bulk_tensor_4d.pass.cpp | 1 + .../test/cuda/barrier/cp_async_bulk_tensor_5d.pass.cpp | 1 + .../cuda/std/detail/libcxx/include/__cuda/barrier.h | 10 ++++++++++ 6 files changed, 15 insertions(+) diff --git a/libcudacxx/.upstream-tests/test/cuda/barrier/cp_async_bulk_tensor_1d.pass.cpp b/libcudacxx/.upstream-tests/test/cuda/barrier/cp_async_bulk_tensor_1d.pass.cpp index 4af32de2114..6d890edc9bc 100644 --- a/libcudacxx/.upstream-tests/test/cuda/barrier/cp_async_bulk_tensor_1d.pass.cpp +++ b/libcudacxx/.upstream-tests/test/cuda/barrier/cp_async_bulk_tensor_1d.pass.cpp @@ -8,6 +8,7 @@ // //===----------------------------------------------------------------------===// // +// UNSUPPORTED: c++11 // UNSUPPORTED: libcpp-has-no-threads // UNSUPPORTED: pre-sm-90 // UNSUPPORTED: nvrtc diff --git a/libcudacxx/.upstream-tests/test/cuda/barrier/cp_async_bulk_tensor_2d.pass.cpp b/libcudacxx/.upstream-tests/test/cuda/barrier/cp_async_bulk_tensor_2d.pass.cpp index be0c29f5eeb..e394515666a 100644 --- a/libcudacxx/.upstream-tests/test/cuda/barrier/cp_async_bulk_tensor_2d.pass.cpp +++ b/libcudacxx/.upstream-tests/test/cuda/barrier/cp_async_bulk_tensor_2d.pass.cpp @@ -8,6 +8,7 @@ // //===----------------------------------------------------------------------===// // +// UNSUPPORTED: c++11 // UNSUPPORTED: libcpp-has-no-threads // UNSUPPORTED: pre-sm-90 // UNSUPPORTED: nvrtc diff --git a/libcudacxx/.upstream-tests/test/cuda/barrier/cp_async_bulk_tensor_3d.pass.cpp b/libcudacxx/.upstream-tests/test/cuda/barrier/cp_async_bulk_tensor_3d.pass.cpp index 0b3a12f3539..b10c2bfc269 100644 --- a/libcudacxx/.upstream-tests/test/cuda/barrier/cp_async_bulk_tensor_3d.pass.cpp +++ b/libcudacxx/.upstream-tests/test/cuda/barrier/cp_async_bulk_tensor_3d.pass.cpp @@ -8,6 +8,7 @@ // //===----------------------------------------------------------------------===// // +// UNSUPPORTED: c++11 // UNSUPPORTED: libcpp-has-no-threads // UNSUPPORTED: pre-sm-90 // UNSUPPORTED: nvrtc diff --git a/libcudacxx/.upstream-tests/test/cuda/barrier/cp_async_bulk_tensor_4d.pass.cpp b/libcudacxx/.upstream-tests/test/cuda/barrier/cp_async_bulk_tensor_4d.pass.cpp index 68371a45ca0..8e7886db6a8 100644 --- a/libcudacxx/.upstream-tests/test/cuda/barrier/cp_async_bulk_tensor_4d.pass.cpp +++ b/libcudacxx/.upstream-tests/test/cuda/barrier/cp_async_bulk_tensor_4d.pass.cpp @@ -8,6 +8,7 @@ // //===----------------------------------------------------------------------===// // +// UNSUPPORTED: c++11 // UNSUPPORTED: libcpp-has-no-threads // UNSUPPORTED: pre-sm-90 // UNSUPPORTED: nvrtc diff --git a/libcudacxx/.upstream-tests/test/cuda/barrier/cp_async_bulk_tensor_5d.pass.cpp b/libcudacxx/.upstream-tests/test/cuda/barrier/cp_async_bulk_tensor_5d.pass.cpp index cbf6141a0af..8be14c21c14 100644 --- a/libcudacxx/.upstream-tests/test/cuda/barrier/cp_async_bulk_tensor_5d.pass.cpp +++ b/libcudacxx/.upstream-tests/test/cuda/barrier/cp_async_bulk_tensor_5d.pass.cpp @@ -8,6 +8,7 @@ // //===----------------------------------------------------------------------===// // +// UNSUPPORTED: c++11 // UNSUPPORTED: libcpp-has-no-threads // UNSUPPORTED: pre-sm-90 // UNSUPPORTED: nvrtc diff --git a/libcudacxx/include/cuda/std/detail/libcxx/include/__cuda/barrier.h b/libcudacxx/include/cuda/std/detail/libcxx/include/__cuda/barrier.h index 115e57a27d9..9fd883659bc 100644 --- a/libcudacxx/include/cuda/std/detail/libcxx/include/__cuda/barrier.h +++ b/libcudacxx/include/cuda/std/detail/libcxx/include/__cuda/barrier.h @@ -955,7 +955,13 @@ template inline __device__ void __cp_async_shared_global(char * __dest, const char * __src) { // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-async + + // If `if constexpr` is not available, this function gets instantiated even + // if is not called. Do not static_assert in that case. +#if _LIBCUDACXX_STD_VER >= 17 static_assert(_Copy_size == 4 || _Copy_size == 8 || _Copy_size == 16, "cp.async.shared.global requires a copy size of 4, 8, or 16."); +#endif // _LIBCUDACXX_STD_VER >= 17 + asm volatile( "cp.async.ca.shared.global [%0], [%1], %2, %2;" : @@ -982,7 +988,11 @@ void __cp_async_shared_global<16>(char * __dest, const char * __src) { template inline __device__ void __cp_async_shared_global_mechanism(_Group __g, char * __dest, const char * __src, _CUDA_VSTD::size_t __size) { + // If `if constexpr` is not available, this function gets instantiated even + // if is not called. Do not static_assert in that case. +#if _LIBCUDACXX_STD_VER >= 17 static_assert(4 <= _Alignment, "cp.async requires at least 4-byte alignment"); +#endif // _LIBCUDACXX_STD_VER >= 17 // Maximal copy size is 16. constexpr int __copy_size = (_Alignment > 16) ? 16 : _Alignment;