Skip to content

Commit

Permalink
[SYCL] Specialize atomic fetch_min/fetch_max for FP types (#3297)
Browse files Browse the repository at this point in the history
Minor implementation details aside, this is a follow-up to #2765.
The end-to-end tests are already done, the latest update being
intel/llvm-test-suite#118.

Signed-off-by: Artem Gindinson <[email protected]>
  • Loading branch information
AGindinson authored Mar 4, 2021
1 parent 98505e4 commit 59ceaf4
Show file tree
Hide file tree
Showing 5 changed files with 89 additions and 31 deletions.
27 changes: 23 additions & 4 deletions sycl/include/CL/__spirv/spirv_ops.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@
#include <CL/__spirv/spirv_types.hpp>
#include <CL/sycl/detail/defines.hpp>
#include <CL/sycl/detail/export.hpp>
#include <CL/sycl/detail/stl_type_traits.hpp>
#include <cstddef>
#include <cstdint>
#include <type_traits>

// Convergent attribute
#ifdef __SYCL_DEVICE_ONLY__
Expand Down Expand Up @@ -91,6 +91,10 @@ extern SYCL_EXTERNAL TempRetT __spirv_ImageSampleExplicitLod(SampledType,
extern SYCL_EXTERNAL Type __spirv_AtomicUMin( \
AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
Type V);
#define __SPIRV_ATOMIC_FMIN(AS, Type) \
extern SYCL_EXTERNAL Type __spirv_AtomicFMinEXT( \
AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
Type V);
#define __SPIRV_ATOMIC_SMAX(AS, Type) \
extern SYCL_EXTERNAL Type __spirv_AtomicSMax( \
AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
Expand All @@ -99,6 +103,10 @@ extern SYCL_EXTERNAL TempRetT __spirv_ImageSampleExplicitLod(SampledType,
extern SYCL_EXTERNAL Type __spirv_AtomicUMax( \
AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
Type V);
#define __SPIRV_ATOMIC_FMAX(AS, Type) \
extern SYCL_EXTERNAL Type __spirv_AtomicFMaxEXT( \
AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
Type V);
#define __SPIRV_ATOMIC_AND(AS, Type) \
extern SYCL_EXTERNAL Type __spirv_AtomicAnd( \
AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
Expand All @@ -114,6 +122,8 @@ extern SYCL_EXTERNAL TempRetT __spirv_ImageSampleExplicitLod(SampledType,

#define __SPIRV_ATOMIC_FLOAT(AS, Type) \
__SPIRV_ATOMIC_FADD(AS, Type) \
__SPIRV_ATOMIC_FMIN(AS, Type) \
__SPIRV_ATOMIC_FMAX(AS, Type) \
__SPIRV_ATOMIC_LOAD(AS, Type) \
__SPIRV_ATOMIC_STORE(AS, Type) \
__SPIRV_ATOMIC_EXCHANGE(AS, Type)
Expand All @@ -138,21 +148,30 @@ extern SYCL_EXTERNAL TempRetT __spirv_ImageSampleExplicitLod(SampledType,
__SPIRV_ATOMIC_UMAX(AS, Type)

// Helper atomic operations which select correct signed/unsigned version
// of atomic min/max based on the signed-ness of the type
// of atomic min/max based on the type
#define __SPIRV_ATOMIC_MINMAX(AS, Op) \
template <typename T> \
typename std::enable_if<std::is_signed<T>::value, T>::type \
typename cl::sycl::detail::enable_if_t< \
std::is_integral<T>::value && std::is_signed<T>::value, T> \
__spirv_Atomic##Op(AS T *Ptr, __spv::Scope::Flag Memory, \
__spv::MemorySemanticsMask::Flag Semantics, \
T Value) { \
return __spirv_AtomicS##Op(Ptr, Memory, Semantics, Value); \
} \
template <typename T> \
typename std::enable_if<!std::is_signed<T>::value, T>::type \
typename cl::sycl::detail::enable_if_t< \
std::is_integral<T>::value && !std::is_signed<T>::value, T> \
__spirv_Atomic##Op(AS T *Ptr, __spv::Scope::Flag Memory, \
__spv::MemorySemanticsMask::Flag Semantics, \
T Value) { \
return __spirv_AtomicU##Op(Ptr, Memory, Semantics, Value); \
} \
template <typename T> \
typename cl::sycl::detail::enable_if_t<std::is_floating_point<T>::value, T> \
__spirv_Atomic##Op(AS T *Ptr, __spv::Scope::Flag Memory, \
__spv::MemorySemanticsMask::Flag Semantics, \
T Value) { \
return __spirv_AtomicF##Op##EXT(Ptr, Memory, Semantics, Value); \
}

#define __SPIRV_ATOMICS(macro, Arg) \
Expand Down
13 changes: 12 additions & 1 deletion sycl/include/CL/sycl/ONEAPI/atomic_ref.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -413,7 +413,6 @@ class atomic_ref_impl<T, DefaultOrder, DefaultScope, AddressSpace,
};

// Partial specialization for floating-point types
// TODO: Leverage floating-point SPIR-V atomics instead of emulation
template <typename T, memory_order DefaultOrder, memory_scope DefaultScope,
access::address_space AddressSpace>
class atomic_ref_impl<
Expand Down Expand Up @@ -486,22 +485,34 @@ class atomic_ref_impl<

T fetch_min(T operand, memory_order order = default_read_modify_write_order,
memory_scope scope = default_scope) const noexcept {
// TODO: Remove the "native atomics" macro check once implemented for all
// backends
#if defined(__SYCL_DEVICE_ONLY__) && defined(SYCL_USE_NATIVE_FP_ATOMICS)
return detail::spirv::AtomicMin(ptr, scope, order, operand);
#else
auto load_order = detail::getLoadOrder(order);
T old = load(load_order, scope);
while (operand < old &&
!compare_exchange_weak(old, operand, order, scope)) {
}
return old;
#endif
}

T fetch_max(T operand, memory_order order = default_read_modify_write_order,
memory_scope scope = default_scope) const noexcept {
// TODO: Remove the "native atomics" macro check once implemented for all
// backends
#if defined(__SYCL_DEVICE_ONLY__) && defined(SYCL_USE_NATIVE_FP_ATOMICS)
return detail::spirv::AtomicMax(ptr, scope, order, operand);
#else
auto load_order = detail::getLoadOrder(order);
T old = load(load_order, scope);
while (operand > old &&
!compare_exchange_weak(old, operand, order, scope)) {
}
return old;
#endif
}

private:
Expand Down
20 changes: 20 additions & 0 deletions sycl/include/CL/sycl/detail/spirv.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,16 @@ AtomicMin(multi_ptr<T, AddressSpace> MPtr, ONEAPI::memory_scope Scope,
return __spirv_AtomicMin(Ptr, SPIRVScope, SPIRVOrder, Value);
}

template <typename T, access::address_space AddressSpace>
inline typename detail::enable_if_t<std::is_floating_point<T>::value, T>
AtomicMin(multi_ptr<T, AddressSpace> MPtr, ONEAPI::memory_scope Scope,
ONEAPI::memory_order Order, T Value) {
auto *Ptr = MPtr.get();
auto SPIRVOrder = getMemorySemanticsMask(Order);
auto SPIRVScope = getScope(Scope);
return __spirv_AtomicMin(Ptr, SPIRVScope, SPIRVOrder, Value);
}

template <typename T, access::address_space AddressSpace>
inline typename detail::enable_if_t<std::is_integral<T>::value, T>
AtomicMax(multi_ptr<T, AddressSpace> MPtr, ONEAPI::memory_scope Scope,
Expand All @@ -445,6 +455,16 @@ AtomicMax(multi_ptr<T, AddressSpace> MPtr, ONEAPI::memory_scope Scope,
return __spirv_AtomicMax(Ptr, SPIRVScope, SPIRVOrder, Value);
}

template <typename T, access::address_space AddressSpace>
inline typename detail::enable_if_t<std::is_floating_point<T>::value, T>
AtomicMax(multi_ptr<T, AddressSpace> MPtr, ONEAPI::memory_scope Scope,
ONEAPI::memory_order Order, T Value) {
auto *Ptr = MPtr.get();
auto SPIRVOrder = getMemorySemanticsMask(Order);
auto SPIRVScope = getScope(Scope);
return __spirv_AtomicMax(Ptr, SPIRVScope, SPIRVOrder, Value);
}

// Native shuffles map directly to a shuffle intrinsic:
// - The Intel SPIR-V extension natively supports all arithmetic types
// - The CUDA shfl intrinsics do not support vectors, and we use the _i32
Expand Down
30 changes: 17 additions & 13 deletions sycl/test/atomic_ref/max.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -DSYCL_USE_NATIVE_FP_ATOMICS \
// RUN: -fsycl-device-only -S %s -o - | FileCheck %s --check-prefix=CHECK-LLVM
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-device-only -S %s -o - \
// RUN: | FileCheck %s --check-prefix=CHECK-LLVM
// RUN: | FileCheck %s --check-prefix=CHECK-LLVM-EMU
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out
// RUN: %RUN_ON_HOST %t.out

Expand Down Expand Up @@ -83,19 +85,21 @@ int main() {
// CHECK-LLVM-SAME: @_Z{{[0-9]+}}__spirv_AtomicUMax
// CHECK-LLVM-SAME: (i64 addrspace(1)*, i32, i32, i64)
max_test<unsigned long long>(q, N);
// CHECK-LLVM: declare dso_local spir_func i32
// CHECK-LLVM-SAME: @_Z{{[0-9]+}}__spirv_AtomicLoad
// CHECK-LLVM-SAME: (i32 addrspace(1)*, i32, i32)
// CHECK-LLVM: declare dso_local spir_func i32
// CHECK-LLVM-SAME: @_Z{{[0-9]+}}__spirv_AtomicCompareExchange
// CHECK-LLVM-SAME: (i32 addrspace(1)*, i32, i32, i32, i32, i32)
// CHECK-LLVM: declare dso_local spir_func float
// CHECK-LLVM-SAME: @_Z{{[0-9]+}}__spirv_AtomicFMaxEXT
// CHECK-LLVM-SAME: (float addrspace(1)*, i32, i32, float)
// CHECK-LLVM-EMU: declare {{.*}} i32 @{{.*}}__spirv_AtomicLoad
// CHECK-LLVM-EMU-SAME: (i32 addrspace(1)*, i32, i32)
// CHECK-LLVM-EMU: declare {{.*}} i32 @{{.*}}__spirv_AtomicCompareExchange
// CHECK-LLVM-EMU-SAME: (i32 addrspace(1)*, i32, i32, i32, i32, i32)
max_test<float>(q, N);
// CHECK-LLVM: declare dso_local spir_func i64
// CHECK-LLVM-SAME: @_Z{{[0-9]+}}__spirv_AtomicLoad
// CHECK-LLVM-SAME: (i64 addrspace(1)*, i32, i32)
// CHECK-LLVM: declare dso_local spir_func i64
// CHECK-LLVM-SAME: @_Z{{[0-9]+}}__spirv_AtomicCompareExchange
// CHECK-LLVM-SAME: (i64 addrspace(1)*, i32, i32, i32, i64, i64)
// CHECK-LLVM: declare dso_local spir_func double
// CHECK-LLVM-SAME: @_Z{{[0-9]+}}__spirv_AtomicFMaxEXT
// CHECK-LLVM-SAME: (double addrspace(1)*, i32, i32, double)
// CHECK-LLVM-EMU: declare {{.*}} i64 @{{.*}}__spirv_AtomicLoad
// CHECK-LLVM-EMU-SAME: (i64 addrspace(1)*, i32, i32)
// CHECK-LLVM-EMU: declare {{.*}} i64 @{{.*}}__spirv_AtomicCompareExchange
// CHECK-LLVM-EMU-SAME: (i64 addrspace(1)*, i32, i32, i32, i64, i64)
max_test<double>(q, N);

std::cout << "Test passed." << std::endl;
Expand Down
30 changes: 17 additions & 13 deletions sycl/test/atomic_ref/min.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -DSYCL_USE_NATIVE_FP_ATOMICS \
// RUN: -fsycl-device-only -S %s -o - | FileCheck %s --check-prefix=CHECK-LLVM
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-device-only -S %s -o - \
// RUN: | FileCheck %s --check-prefix=CHECK-LLVM
// RUN: | FileCheck %s --check-prefix=CHECK-LLVM-EMU
// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out
// RUN: %RUN_ON_HOST %t.out

Expand Down Expand Up @@ -81,19 +83,21 @@ int main() {
// CHECK-LLVM-SAME: @_Z{{[0-9]+}}__spirv_AtomicUMin
// CHECK-LLVM-SAME: (i64 addrspace(1)*, i32, i32, i64)
min_test<unsigned long long>(q, N);
// CHECK-LLVM: declare dso_local spir_func i32
// CHECK-LLVM-SAME: @_Z{{[0-9]+}}__spirv_AtomicLoad
// CHECK-LLVM-SAME: (i32 addrspace(1)*, i32, i32)
// CHECK-LLVM: declare dso_local spir_func i32
// CHECK-LLVM-SAME: @_Z{{[0-9]+}}__spirv_AtomicCompareExchange
// CHECK-LLVM-SAME: (i32 addrspace(1)*, i32, i32, i32, i32, i32)
// CHECK-LLVM: declare dso_local spir_func float
// CHECK-LLVM-SAME: @_Z{{[0-9]+}}__spirv_AtomicFMinEXT
// CHECK-LLVM-SAME: (float addrspace(1)*, i32, i32, float)
// CHECK-LLVM-EMU: declare {{.*}} i32 @{{.*}}__spirv_AtomicLoad
// CHECK-LLVM-EMU-SAME: (i32 addrspace(1)*, i32, i32)
// CHECK-LLVM-EMU: declare {{.*}} i32 @{{.*}}__spirv_AtomicCompareExchange
// CHECK-LLVM-EMU-SAME: (i32 addrspace(1)*, i32, i32, i32, i32, i32)
min_test<float>(q, N);
// CHECK-LLVM: declare dso_local spir_func i64
// CHECK-LLVM-SAME: @_Z{{[0-9]+}}__spirv_AtomicLoad
// CHECK-LLVM-SAME: (i64 addrspace(1)*, i32, i32)
// CHECK-LLVM: declare dso_local spir_func i64
// CHECK-LLVM-SAME: @_Z{{[0-9]+}}__spirv_AtomicCompareExchange
// CHECK-LLVM-SAME: (i64 addrspace(1)*, i32, i32, i32, i64, i64)
// CHECK-LLVM: declare dso_local spir_func double
// CHECK-LLVM-SAME: @_Z{{[0-9]+}}__spirv_AtomicFMinEXT
// CHECK-LLVM-SAME: (double addrspace(1)*, i32, i32, double)
// CHECK-LLVM-EMU: declare {{.*}} i64 @{{.*}}__spirv_AtomicLoad
// CHECK-LLVM-EMU-SAME: (i64 addrspace(1)*, i32, i32)
// CHECK-LLVM-EMU: declare {{.*}} i64 @{{.*}}__spirv_AtomicCompareExchange
// CHECK-LLVM-EMU-SAME: (i64 addrspace(1)*, i32, i32, i32, i64, i64)
min_test<double>(q, N);

std::cout << "Test passed." << std::endl;
Expand Down

0 comments on commit 59ceaf4

Please sign in to comment.