diff --git a/sycl/include/CL/__spirv/spirv_ops.hpp b/sycl/include/CL/__spirv/spirv_ops.hpp index dc8515f0bbe05..b735524835f54 100644 --- a/sycl/include/CL/__spirv/spirv_ops.hpp +++ b/sycl/include/CL/__spirv/spirv_ops.hpp @@ -10,9 +10,9 @@ #include #include #include +#include #include #include -#include // Convergent attribute #ifdef __SYCL_DEVICE_ONLY__ @@ -91,6 +91,10 @@ extern SYCL_EXTERNAL TempRetT __spirv_ImageSampleExplicitLod(SampledType, extern SYCL_EXTERNAL Type __spirv_AtomicUMin( \ AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \ Type V); +#define __SPIRV_ATOMIC_FMIN(AS, Type) \ + extern SYCL_EXTERNAL Type __spirv_AtomicFMinEXT( \ + AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \ + Type V); #define __SPIRV_ATOMIC_SMAX(AS, Type) \ extern SYCL_EXTERNAL Type __spirv_AtomicSMax( \ AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \ @@ -99,6 +103,10 @@ extern SYCL_EXTERNAL TempRetT __spirv_ImageSampleExplicitLod(SampledType, extern SYCL_EXTERNAL Type __spirv_AtomicUMax( \ AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \ Type V); +#define __SPIRV_ATOMIC_FMAX(AS, Type) \ + extern SYCL_EXTERNAL Type __spirv_AtomicFMaxEXT( \ + AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \ + Type V); #define __SPIRV_ATOMIC_AND(AS, Type) \ extern SYCL_EXTERNAL Type __spirv_AtomicAnd( \ AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \ @@ -114,6 +122,8 @@ extern SYCL_EXTERNAL TempRetT __spirv_ImageSampleExplicitLod(SampledType, #define __SPIRV_ATOMIC_FLOAT(AS, Type) \ __SPIRV_ATOMIC_FADD(AS, Type) \ + __SPIRV_ATOMIC_FMIN(AS, Type) \ + __SPIRV_ATOMIC_FMAX(AS, Type) \ __SPIRV_ATOMIC_LOAD(AS, Type) \ __SPIRV_ATOMIC_STORE(AS, Type) \ __SPIRV_ATOMIC_EXCHANGE(AS, Type) @@ -138,21 +148,30 @@ extern SYCL_EXTERNAL TempRetT __spirv_ImageSampleExplicitLod(SampledType, __SPIRV_ATOMIC_UMAX(AS, Type) // Helper atomic operations which select correct signed/unsigned version -// of atomic min/max based on the signed-ness of the type +// of atomic min/max based on the type #define __SPIRV_ATOMIC_MINMAX(AS, Op) \ template \ - typename std::enable_if::value, T>::type \ + typename cl::sycl::detail::enable_if_t< \ + std::is_integral::value && std::is_signed::value, T> \ __spirv_Atomic##Op(AS T *Ptr, __spv::Scope::Flag Memory, \ __spv::MemorySemanticsMask::Flag Semantics, \ T Value) { \ return __spirv_AtomicS##Op(Ptr, Memory, Semantics, Value); \ } \ template \ - typename std::enable_if::value, T>::type \ + typename cl::sycl::detail::enable_if_t< \ + std::is_integral::value && !std::is_signed::value, T> \ __spirv_Atomic##Op(AS T *Ptr, __spv::Scope::Flag Memory, \ __spv::MemorySemanticsMask::Flag Semantics, \ T Value) { \ return __spirv_AtomicU##Op(Ptr, Memory, Semantics, Value); \ + } \ + template \ + typename cl::sycl::detail::enable_if_t::value, T> \ + __spirv_Atomic##Op(AS T *Ptr, __spv::Scope::Flag Memory, \ + __spv::MemorySemanticsMask::Flag Semantics, \ + T Value) { \ + return __spirv_AtomicF##Op##EXT(Ptr, Memory, Semantics, Value); \ } #define __SPIRV_ATOMICS(macro, Arg) \ diff --git a/sycl/include/CL/sycl/ONEAPI/atomic_ref.hpp b/sycl/include/CL/sycl/ONEAPI/atomic_ref.hpp index b64331232f377..2c9b999309731 100644 --- a/sycl/include/CL/sycl/ONEAPI/atomic_ref.hpp +++ b/sycl/include/CL/sycl/ONEAPI/atomic_ref.hpp @@ -413,7 +413,6 @@ class atomic_ref_impl class atomic_ref_impl< @@ -486,22 +485,34 @@ class atomic_ref_impl< T fetch_min(T operand, memory_order order = default_read_modify_write_order, memory_scope scope = default_scope) const noexcept { +// TODO: Remove the "native atomics" macro check once implemented for all +// backends +#if defined(__SYCL_DEVICE_ONLY__) && defined(SYCL_USE_NATIVE_FP_ATOMICS) + return detail::spirv::AtomicMin(ptr, scope, order, operand); +#else auto load_order = detail::getLoadOrder(order); T old = load(load_order, scope); while (operand < old && !compare_exchange_weak(old, operand, order, scope)) { } return old; +#endif } T fetch_max(T operand, memory_order order = default_read_modify_write_order, memory_scope scope = default_scope) const noexcept { +// TODO: Remove the "native atomics" macro check once implemented for all +// backends +#if defined(__SYCL_DEVICE_ONLY__) && defined(SYCL_USE_NATIVE_FP_ATOMICS) + return detail::spirv::AtomicMax(ptr, scope, order, operand); +#else auto load_order = detail::getLoadOrder(order); T old = load(load_order, scope); while (operand > old && !compare_exchange_weak(old, operand, order, scope)) { } return old; +#endif } private: diff --git a/sycl/include/CL/sycl/detail/spirv.hpp b/sycl/include/CL/sycl/detail/spirv.hpp index c04700e59a775..ccf2cf3863304 100644 --- a/sycl/include/CL/sycl/detail/spirv.hpp +++ b/sycl/include/CL/sycl/detail/spirv.hpp @@ -435,6 +435,16 @@ AtomicMin(multi_ptr MPtr, ONEAPI::memory_scope Scope, return __spirv_AtomicMin(Ptr, SPIRVScope, SPIRVOrder, Value); } +template +inline typename detail::enable_if_t::value, T> +AtomicMin(multi_ptr MPtr, ONEAPI::memory_scope Scope, + ONEAPI::memory_order Order, T Value) { + auto *Ptr = MPtr.get(); + auto SPIRVOrder = getMemorySemanticsMask(Order); + auto SPIRVScope = getScope(Scope); + return __spirv_AtomicMin(Ptr, SPIRVScope, SPIRVOrder, Value); +} + template inline typename detail::enable_if_t::value, T> AtomicMax(multi_ptr MPtr, ONEAPI::memory_scope Scope, @@ -445,6 +455,16 @@ AtomicMax(multi_ptr MPtr, ONEAPI::memory_scope Scope, return __spirv_AtomicMax(Ptr, SPIRVScope, SPIRVOrder, Value); } +template +inline typename detail::enable_if_t::value, T> +AtomicMax(multi_ptr MPtr, ONEAPI::memory_scope Scope, + ONEAPI::memory_order Order, T Value) { + auto *Ptr = MPtr.get(); + auto SPIRVOrder = getMemorySemanticsMask(Order); + auto SPIRVScope = getScope(Scope); + return __spirv_AtomicMax(Ptr, SPIRVScope, SPIRVOrder, Value); +} + // Native shuffles map directly to a shuffle intrinsic: // - The Intel SPIR-V extension natively supports all arithmetic types // - The CUDA shfl intrinsics do not support vectors, and we use the _i32 diff --git a/sycl/test/atomic_ref/max.cpp b/sycl/test/atomic_ref/max.cpp index caadaa0381928..07621eff44842 100644 --- a/sycl/test/atomic_ref/max.cpp +++ b/sycl/test/atomic_ref/max.cpp @@ -1,5 +1,7 @@ +// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -DSYCL_USE_NATIVE_FP_ATOMICS \ +// RUN: -fsycl-device-only -S %s -o - | FileCheck %s --check-prefix=CHECK-LLVM // RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-device-only -S %s -o - \ -// RUN: | FileCheck %s --check-prefix=CHECK-LLVM +// RUN: | FileCheck %s --check-prefix=CHECK-LLVM-EMU // RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out // RUN: %RUN_ON_HOST %t.out @@ -83,19 +85,21 @@ int main() { // CHECK-LLVM-SAME: @_Z{{[0-9]+}}__spirv_AtomicUMax // CHECK-LLVM-SAME: (i64 addrspace(1)*, i32, i32, i64) max_test(q, N); - // CHECK-LLVM: declare dso_local spir_func i32 - // CHECK-LLVM-SAME: @_Z{{[0-9]+}}__spirv_AtomicLoad - // CHECK-LLVM-SAME: (i32 addrspace(1)*, i32, i32) - // CHECK-LLVM: declare dso_local spir_func i32 - // CHECK-LLVM-SAME: @_Z{{[0-9]+}}__spirv_AtomicCompareExchange - // CHECK-LLVM-SAME: (i32 addrspace(1)*, i32, i32, i32, i32, i32) + // CHECK-LLVM: declare dso_local spir_func float + // CHECK-LLVM-SAME: @_Z{{[0-9]+}}__spirv_AtomicFMaxEXT + // CHECK-LLVM-SAME: (float addrspace(1)*, i32, i32, float) + // CHECK-LLVM-EMU: declare {{.*}} i32 @{{.*}}__spirv_AtomicLoad + // CHECK-LLVM-EMU-SAME: (i32 addrspace(1)*, i32, i32) + // CHECK-LLVM-EMU: declare {{.*}} i32 @{{.*}}__spirv_AtomicCompareExchange + // CHECK-LLVM-EMU-SAME: (i32 addrspace(1)*, i32, i32, i32, i32, i32) max_test(q, N); - // CHECK-LLVM: declare dso_local spir_func i64 - // CHECK-LLVM-SAME: @_Z{{[0-9]+}}__spirv_AtomicLoad - // CHECK-LLVM-SAME: (i64 addrspace(1)*, i32, i32) - // CHECK-LLVM: declare dso_local spir_func i64 - // CHECK-LLVM-SAME: @_Z{{[0-9]+}}__spirv_AtomicCompareExchange - // CHECK-LLVM-SAME: (i64 addrspace(1)*, i32, i32, i32, i64, i64) + // CHECK-LLVM: declare dso_local spir_func double + // CHECK-LLVM-SAME: @_Z{{[0-9]+}}__spirv_AtomicFMaxEXT + // CHECK-LLVM-SAME: (double addrspace(1)*, i32, i32, double) + // CHECK-LLVM-EMU: declare {{.*}} i64 @{{.*}}__spirv_AtomicLoad + // CHECK-LLVM-EMU-SAME: (i64 addrspace(1)*, i32, i32) + // CHECK-LLVM-EMU: declare {{.*}} i64 @{{.*}}__spirv_AtomicCompareExchange + // CHECK-LLVM-EMU-SAME: (i64 addrspace(1)*, i32, i32, i32, i64, i64) max_test(q, N); std::cout << "Test passed." << std::endl; diff --git a/sycl/test/atomic_ref/min.cpp b/sycl/test/atomic_ref/min.cpp index 10ea0792d5c93..d6c1b25ed998b 100644 --- a/sycl/test/atomic_ref/min.cpp +++ b/sycl/test/atomic_ref/min.cpp @@ -1,5 +1,7 @@ +// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -DSYCL_USE_NATIVE_FP_ATOMICS \ +// RUN: -fsycl-device-only -S %s -o - | FileCheck %s --check-prefix=CHECK-LLVM // RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-device-only -S %s -o - \ -// RUN: | FileCheck %s --check-prefix=CHECK-LLVM +// RUN: | FileCheck %s --check-prefix=CHECK-LLVM-EMU // RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out // RUN: %RUN_ON_HOST %t.out @@ -81,19 +83,21 @@ int main() { // CHECK-LLVM-SAME: @_Z{{[0-9]+}}__spirv_AtomicUMin // CHECK-LLVM-SAME: (i64 addrspace(1)*, i32, i32, i64) min_test(q, N); - // CHECK-LLVM: declare dso_local spir_func i32 - // CHECK-LLVM-SAME: @_Z{{[0-9]+}}__spirv_AtomicLoad - // CHECK-LLVM-SAME: (i32 addrspace(1)*, i32, i32) - // CHECK-LLVM: declare dso_local spir_func i32 - // CHECK-LLVM-SAME: @_Z{{[0-9]+}}__spirv_AtomicCompareExchange - // CHECK-LLVM-SAME: (i32 addrspace(1)*, i32, i32, i32, i32, i32) + // CHECK-LLVM: declare dso_local spir_func float + // CHECK-LLVM-SAME: @_Z{{[0-9]+}}__spirv_AtomicFMinEXT + // CHECK-LLVM-SAME: (float addrspace(1)*, i32, i32, float) + // CHECK-LLVM-EMU: declare {{.*}} i32 @{{.*}}__spirv_AtomicLoad + // CHECK-LLVM-EMU-SAME: (i32 addrspace(1)*, i32, i32) + // CHECK-LLVM-EMU: declare {{.*}} i32 @{{.*}}__spirv_AtomicCompareExchange + // CHECK-LLVM-EMU-SAME: (i32 addrspace(1)*, i32, i32, i32, i32, i32) min_test(q, N); - // CHECK-LLVM: declare dso_local spir_func i64 - // CHECK-LLVM-SAME: @_Z{{[0-9]+}}__spirv_AtomicLoad - // CHECK-LLVM-SAME: (i64 addrspace(1)*, i32, i32) - // CHECK-LLVM: declare dso_local spir_func i64 - // CHECK-LLVM-SAME: @_Z{{[0-9]+}}__spirv_AtomicCompareExchange - // CHECK-LLVM-SAME: (i64 addrspace(1)*, i32, i32, i32, i64, i64) + // CHECK-LLVM: declare dso_local spir_func double + // CHECK-LLVM-SAME: @_Z{{[0-9]+}}__spirv_AtomicFMinEXT + // CHECK-LLVM-SAME: (double addrspace(1)*, i32, i32, double) + // CHECK-LLVM-EMU: declare {{.*}} i64 @{{.*}}__spirv_AtomicLoad + // CHECK-LLVM-EMU-SAME: (i64 addrspace(1)*, i32, i32) + // CHECK-LLVM-EMU: declare {{.*}} i64 @{{.*}}__spirv_AtomicCompareExchange + // CHECK-LLVM-EMU-SAME: (i64 addrspace(1)*, i32, i32, i32, i64, i64) min_test(q, N); std::cout << "Test passed." << std::endl;