diff --git a/sycl/doc/extensions/README.md b/sycl/doc/extensions/README.md index d9d107cb7a6f6..30ff1c7c905bd 100644 --- a/sycl/doc/extensions/README.md +++ b/sycl/doc/extensions/README.md @@ -13,7 +13,7 @@ DPC++ extensions status: | [SYCL_INTEL_deduction_guides](deduction_guides/SYCL_INTEL_deduction_guides.asciidoc) | Supported | | | [SYCL_INTEL_device_specific_kernel_queries](DeviceSpecificKernelQueries/SYCL_INTEL_device_specific_kernel_queries.asciidoc) | Proposal | | | [SYCL_INTEL_enqueue_barrier](EnqueueBarrier/enqueue_barrier.asciidoc) | Supported(OpenCL, Level Zero) | | -| [SYCL_INTEL_extended_atomics](ExtendedAtomics/SYCL_INTEL_extended_atomics.asciidoc) | Partially supported(OpenCL: CPU, GPU) | Not supported: pointer types | +| [SYCL_INTEL_extended_atomics](ExtendedAtomics/SYCL_INTEL_extended_atomics.asciidoc) | Supported(OpenCL: CPU, GPU) | | | [SYCL_INTEL_group_algorithms](GroupAlgorithms/SYCL_INTEL_group_algorithms.asciidoc) | Supported(OpenCL) | | | [SYCL_INTEL_group_mask](./GroupMask/SYCL_INTEL_group_mask.asciidoc) | Proposal | | | [FPGA selector](IntelFPGA/FPGASelector.md) | Supported | | diff --git a/sycl/include/CL/sycl/intel/atomic_ref.hpp b/sycl/include/CL/sycl/intel/atomic_ref.hpp index f6e8d4ff68616..1616727f919b8 100644 --- a/sycl/include/CL/sycl/intel/atomic_ref.hpp +++ b/sycl/include/CL/sycl/intel/atomic_ref.hpp @@ -135,8 +135,6 @@ class atomic_ref_base { static_assert(!(std::is_same::value || std::is_same::value), "intel::atomic_ref does not support short type"); - static_assert(!std::is_pointer::value, - "intel::atomic_ref does not yet support pointer types"); static_assert(detail::IsValidAtomicAddressSpace::value, "Invalid atomic address_space. Valid address spaces are: " "global_space, local_space, global_device_space"); @@ -508,12 +506,138 @@ class atomic_ref_impl< }; // Partial specialization for pointer types +// Arithmetic is emulated because target's representation of T* is unknown +// TODO: Find a way to use intptr_t or uintptr_t atomics instead template -class atomic_ref_impl::value>> - : public atomic_ref_base { - // TODO: Implement partial specialization for pointer types +class atomic_ref_impl + : public atomic_ref_base { + +private: + using base_type = + atomic_ref_base; + +public: + using value_type = T *; + using difference_type = ptrdiff_t; + static constexpr size_t required_alignment = sizeof(T *); + static constexpr bool is_always_lock_free = + detail::IsValidAtomicType::value; + static constexpr memory_order default_read_order = + detail::memory_order_traits::read_order; + static constexpr memory_order default_write_order = + detail::memory_order_traits::write_order; + static constexpr memory_order default_read_modify_write_order = DefaultOrder; + static constexpr memory_scope default_scope = DefaultScope; + + using base_type::is_lock_free; + + atomic_ref_impl(T *&ref) : base_type(reinterpret_cast(ref)) {} + + void store(T *operand, memory_order order = default_write_order, + memory_scope scope = default_scope) const noexcept { + base_type::store(reinterpret_cast(operand), order, scope); + } + + T *operator=(T *desired) const noexcept { + store(desired); + return desired; + } + + T *load(memory_order order = default_read_order, + memory_scope scope = default_scope) const noexcept { + return reinterpret_cast(base_type::load(order, scope)); + } + + operator T *() const noexcept { return load(); } + + T *exchange(T *operand, memory_order order = default_read_modify_write_order, + memory_scope scope = default_scope) const noexcept { + return reinterpret_cast(base_type::exchange( + reinterpret_cast(operand), order, scope)); + } + + T *fetch_add(difference_type operand, + memory_order order = default_read_modify_write_order, + memory_scope scope = default_scope) const noexcept { + // TODO: Find a way to avoid compare_exchange here + auto load_order = detail::getLoadOrder(order); + T *expected = load(load_order, scope); + T *desired; + do { + desired = expected + operand; + } while (!compare_exchange_weak(expected, desired, order, scope)); + return expected; + } + + T *operator+=(difference_type operand) const noexcept { + return fetch_add(operand) + operand; + } + + T *operator++(int) const noexcept { return fetch_add(difference_type(1)); } + + T *operator++() const noexcept { + return fetch_add(difference_type(1)) + difference_type(1); + } + + T *fetch_sub(difference_type operand, + memory_order order = default_read_modify_write_order, + memory_scope scope = default_scope) const noexcept { + // TODO: Find a way to avoid compare_exchange here + auto load_order = detail::getLoadOrder(order); + T *expected = load(load_order, scope); + T *desired; + do { + desired = expected - operand; + } while (!compare_exchange_weak(expected, desired, order, scope)); + return expected; + } + + T *operator-=(difference_type operand) const noexcept { + return fetch_sub(operand) - operand; + } + + T *operator--(int) const noexcept { return fetch_sub(difference_type(1)); } + + T *operator--() const noexcept { + return fetch_sub(difference_type(1)) - difference_type(1); + } + + bool + compare_exchange_strong(T *&expected, T *desired, memory_order success, + memory_order failure, + memory_scope scope = default_scope) const noexcept { + return base_type::compare_exchange_strong( + reinterpret_cast(expected), + reinterpret_cast(desired), success, failure, scope); + } + + bool + compare_exchange_strong(T *&expected, T *desired, + memory_order order = default_read_modify_write_order, + memory_scope scope = default_scope) const noexcept { + return compare_exchange_strong(expected, desired, order, order, scope); + } + + bool + compare_exchange_weak(T *&expected, T *desired, memory_order success, + memory_order failure, + memory_scope scope = default_scope) const noexcept { + return base_type::compare_exchange_weak( + reinterpret_cast(expected), + reinterpret_cast(desired), success, failure, scope); + } + + bool + compare_exchange_weak(T *&expected, T *desired, + memory_order order = default_read_modify_write_order, + memory_scope scope = default_scope) const noexcept { + return compare_exchange_weak(expected, desired, order, order, scope); + } + +private: + using base_type::ptr; }; } // namespace detail diff --git a/sycl/test/atomic_ref/add.cpp b/sycl/test/atomic_ref/add.cpp index b152166e4f966..cfe943d176299 100644 --- a/sycl/test/atomic_ref/add.cpp +++ b/sycl/test/atomic_ref/add.cpp @@ -12,11 +12,11 @@ using namespace sycl; using namespace sycl::intel; -template +template void add_fetch_test(queue q, size_t N) { T sum = 0; std::vector output(N); - std::fill(output.begin(), output.end(), 0); + std::fill(output.begin(), output.end(), T(0)); { buffer sum_buf(&sum, 1); buffer output_buf(output.data(), output.size()); @@ -27,29 +27,29 @@ void add_fetch_test(queue q, size_t N) { cgh.parallel_for(range<1>(N), [=](item<1> it) { int gid = it.get_id(0); auto atm = atomic_ref(sum[0]); - out[gid] = atm.fetch_add(T(1)); + out[gid] = atm.fetch_add(Difference(1)); }); }); } // All work-items increment by 1, so final value should be equal to N - assert(sum == N); + assert(sum == T(N)); // Fetch returns original value: will be in [0, N-1] auto min_e = std::min_element(output.begin(), output.end()); auto max_e = std::max_element(output.begin(), output.end()); - assert(*min_e == 0 && *max_e == N - 1); + assert(*min_e == T(0) && *max_e == T(N - 1)); // Intermediate values should be unique std::sort(output.begin(), output.end()); assert(std::unique(output.begin(), output.end()) == output.end()); } -template +template void add_plus_equal_test(queue q, size_t N) { T sum = 0; std::vector output(N); - std::fill(output.begin(), output.end(), 0); + std::fill(output.begin(), output.end(), T(0)); { buffer sum_buf(&sum, 1); buffer output_buf(output.data(), output.size()); @@ -60,29 +60,29 @@ void add_plus_equal_test(queue q, size_t N) { cgh.parallel_for(range<1>(N), [=](item<1> it) { int gid = it.get_id(0); auto atm = atomic_ref(sum[0]); - out[gid] = atm += T(1); + out[gid] = atm += Difference(1); }); }); } // All work-items increment by 1, so final value should be equal to N - assert(sum == N); + assert(sum == T(N)); // += returns updated value: will be in [1, N] auto min_e = std::min_element(output.begin(), output.end()); auto max_e = std::max_element(output.begin(), output.end()); - assert(*min_e == 1 && *max_e == N); + assert(*min_e == T(1) && *max_e == T(N)); // Intermediate values should be unique std::sort(output.begin(), output.end()); assert(std::unique(output.begin(), output.end()) == output.end()); } -template +template void add_pre_inc_test(queue q, size_t N) { T sum = 0; std::vector output(N); - std::fill(output.begin(), output.end(), 0); + std::fill(output.begin(), output.end(), T(0)); { buffer sum_buf(&sum, 1); buffer output_buf(output.data(), output.size()); @@ -99,23 +99,23 @@ void add_pre_inc_test(queue q, size_t N) { } // All work-items increment by 1, so final value should be equal to N - assert(sum == N); + assert(sum == T(N)); // Pre-increment returns updated value: will be in [1, N] auto min_e = std::min_element(output.begin(), output.end()); auto max_e = std::max_element(output.begin(), output.end()); - assert(*min_e == 1 && *max_e == N); + assert(*min_e == T(1) && *max_e == T(N)); // Intermediate values should be unique std::sort(output.begin(), output.end()); assert(std::unique(output.begin(), output.end()) == output.end()); } -template +template void add_post_inc_test(queue q, size_t N) { T sum = 0; std::vector output(N); - std::fill(output.begin(), output.end(), 0); + std::fill(output.begin(), output.end(), T(0)); { buffer sum_buf(&sum, 1); buffer output_buf(output.data(), output.size()); @@ -132,24 +132,24 @@ void add_post_inc_test(queue q, size_t N) { } // All work-items increment by 1, so final value should be equal to N - assert(sum == N); + assert(sum == T(N)); // Post-increment returns original value: will be in [0, N-1] auto min_e = std::min_element(output.begin(), output.end()); auto max_e = std::max_element(output.begin(), output.end()); - assert(*min_e == 0 && *max_e == N - 1); + assert(*min_e == T(0) && *max_e == T(N - 1)); // Intermediate values should be unique std::sort(output.begin(), output.end()); assert(std::unique(output.begin(), output.end()) == output.end()); } -template +template void add_test(queue q, size_t N) { - add_fetch_test(q, N); - add_plus_equal_test(q, N); - add_pre_inc_test(q, N); - add_post_inc_test(q, N); + add_fetch_test(q, N); + add_plus_equal_test(q, N); + add_pre_inc_test(q, N); + add_post_inc_test(q, N); } // Floating-point types do not support pre- or post-increment @@ -173,8 +173,6 @@ int main() { } constexpr int N = 32; - - // TODO: Enable missing tests when supported add_test(q, N); add_test(q, N); add_test(q, N); @@ -183,7 +181,7 @@ int main() { add_test(q, N); add_test(q, N); add_test(q, N); - //add_test(q, N); + add_test(q, N); std::cout << "Test passed." << std::endl; } diff --git a/sycl/test/atomic_ref/compare_exchange.cpp b/sycl/test/atomic_ref/compare_exchange.cpp index 8f563fccb65fd..11c2caa6ef3c4 100644 --- a/sycl/test/atomic_ref/compare_exchange.cpp +++ b/sycl/test/atomic_ref/compare_exchange.cpp @@ -16,10 +16,10 @@ class compare_exchange_kernel; template void compare_exchange_test(queue q, size_t N) { - const T initial = std::numeric_limits::max(); + const T initial = T(N); T compare_exchange = initial; std::vector output(N); - std::fill(output.begin(), output.end(), 0); + std::fill(output.begin(), output.end(), T(0)); { buffer compare_exchange_buf(&compare_exchange, 1); buffer output_buf(output.data(), output.size()); @@ -27,15 +27,16 @@ void compare_exchange_test(queue q, size_t N) { q.submit([&](handler &cgh) { auto exc = compare_exchange_buf.template get_access(cgh); auto out = output_buf.template get_access(cgh); - cgh.parallel_for>(range<1>(N), [=](item<1> it) { - int gid = it.get_id(0); + cgh.parallel_for>(range<1>(N), [=](item<1> + it) { + size_t gid = it.get_id(0); auto atm = atomic_ref(exc[0]); - T result = initial; + T result = T(N); // Avoid copying pointer bool success = atm.compare_exchange_strong(result, (T)gid); if (success) { out[gid] = result; } else { - out[gid] = gid; + out[gid] = T(gid); } }); }); @@ -45,7 +46,7 @@ void compare_exchange_test(queue q, size_t N) { assert(std::count(output.begin(), output.end(), initial) == 1); // All other values should be the index itself or the sentinel value - for (int i = 0; i < N; ++i) { + for (size_t i = 0; i < N; ++i) { assert(output[i] == T(i) || output[i] == initial); } } @@ -59,8 +60,6 @@ int main() { } constexpr int N = 32; - - // TODO: Enable missing tests when supported compare_exchange_test(q, N); compare_exchange_test(q, N); compare_exchange_test(q, N); @@ -69,7 +68,7 @@ int main() { compare_exchange_test(q, N); compare_exchange_test(q, N); compare_exchange_test(q, N); - //compare_exchange_test(q, N); + compare_exchange_test(q, N); std::cout << "Test passed." << std::endl; } diff --git a/sycl/test/atomic_ref/exchange.cpp b/sycl/test/atomic_ref/exchange.cpp index 2ce1292cfdd55..b4445928ea075 100644 --- a/sycl/test/atomic_ref/exchange.cpp +++ b/sycl/test/atomic_ref/exchange.cpp @@ -16,10 +16,10 @@ class exchange_kernel; template void exchange_test(queue q, size_t N) { - const T initial = std::numeric_limits::max(); + const T initial = T(N); T exchange = initial; std::vector output(N); - std::fill(output.begin(), output.end(), 0); + std::fill(output.begin(), output.end(), T(0)); { buffer exchange_buf(&exchange, 1); buffer output_buf(output.data(), output.size()); @@ -28,9 +28,9 @@ void exchange_test(queue q, size_t N) { auto exc = exchange_buf.template get_access(cgh); auto out = output_buf.template get_access(cgh); cgh.parallel_for>(range<1>(N), [=](item<1> it) { - int gid = it.get_id(0); + size_t gid = it.get_id(0); auto atm = atomic_ref(exc[0]); - out[gid] = atm.exchange(gid); + out[gid] = atm.exchange(T(gid)); }); }); } @@ -52,8 +52,6 @@ int main() { } constexpr int N = 32; - - // TODO: Enable missing tests when supported exchange_test(q, N); exchange_test(q, N); exchange_test(q, N); @@ -62,7 +60,7 @@ int main() { exchange_test(q, N); exchange_test(q, N); exchange_test(q, N); - //exchange_test(q, N); + exchange_test(q, N); std::cout << "Test passed." << std::endl; } diff --git a/sycl/test/atomic_ref/load.cpp b/sycl/test/atomic_ref/load.cpp index 274191b9a5ac3..30ae13e16e65e 100644 --- a/sycl/test/atomic_ref/load.cpp +++ b/sycl/test/atomic_ref/load.cpp @@ -16,10 +16,10 @@ class load_kernel; template void load_test(queue q, size_t N) { - T initial = 42; + T initial = T(42); T load = initial; std::vector output(N); - std::fill(output.begin(), output.end(), 0); + std::fill(output.begin(), output.end(), T(0)); { buffer load_buf(&load, 1); buffer output_buf(output.data(), output.size()); @@ -28,7 +28,7 @@ void load_test(queue q, size_t N) { auto ld = load_buf.template get_access(cgh); auto out = output_buf.template get_access(cgh); cgh.parallel_for>(range<1>(N), [=](item<1> it) { - int gid = it.get_id(0); + size_t gid = it.get_id(0); auto atm = atomic_ref(ld[0]); out[gid] = atm.load(); }); @@ -49,8 +49,6 @@ int main() { } constexpr int N = 32; - - // TODO: Enable missing tests when supported load_test(q, N); load_test(q, N); load_test(q, N); @@ -59,7 +57,7 @@ int main() { load_test(q, N); load_test(q, N); load_test(q, N); - //load_test(q, N); + load_test(q, N); std::cout << "Test passed." << std::endl; } diff --git a/sycl/test/atomic_ref/max.cpp b/sycl/test/atomic_ref/max.cpp index c8bccf1c28067..0c95653b8219b 100644 --- a/sycl/test/atomic_ref/max.cpp +++ b/sycl/test/atomic_ref/max.cpp @@ -57,8 +57,6 @@ int main() { } constexpr int N = 32; - - // TODO: Enable missing tests when supported max_test(q, N); max_test(q, N); max_test(q, N); @@ -67,7 +65,6 @@ int main() { max_test(q, N); max_test(q, N); max_test(q, N); - //max_test(q, N); std::cout << "Test passed." << std::endl; } diff --git a/sycl/test/atomic_ref/min.cpp b/sycl/test/atomic_ref/min.cpp index 8313c4931136c..6a0e32ca14bb5 100644 --- a/sycl/test/atomic_ref/min.cpp +++ b/sycl/test/atomic_ref/min.cpp @@ -55,8 +55,6 @@ int main() { } constexpr int N = 32; - - // TODO: Enable missing tests when supported min_test(q, N); min_test(q, N); min_test(q, N); @@ -65,7 +63,6 @@ int main() { min_test(q, N); min_test(q, N); min_test(q, N); - //min_test(q, N); std::cout << "Test passed." << std::endl; } diff --git a/sycl/test/atomic_ref/store.cpp b/sycl/test/atomic_ref/store.cpp index eebdba5ced095..db076ee994a3d 100644 --- a/sycl/test/atomic_ref/store.cpp +++ b/sycl/test/atomic_ref/store.cpp @@ -16,14 +16,14 @@ class store_kernel; template void store_test(queue q, size_t N) { - T initial = std::numeric_limits::max(); + T initial = T(N); T store = initial; { buffer store_buf(&store, 1); q.submit([&](handler &cgh) { auto st = store_buf.template get_access(cgh); cgh.parallel_for>(range<1>(N), [=](item<1> it) { - int gid = it.get_id(0); + size_t gid = it.get_id(0); auto atm = atomic_ref(st[0]); atm.store(T(gid)); }); @@ -45,8 +45,6 @@ int main() { } constexpr int N = 32; - - // TODO: Enable missing tests when supported store_test(q, N); store_test(q, N); store_test(q, N); @@ -55,7 +53,7 @@ int main() { store_test(q, N); store_test(q, N); store_test(q, N); - //store_test(q, N); + store_test(q, N); std::cout << "Test passed." << std::endl; } diff --git a/sycl/test/atomic_ref/sub.cpp b/sycl/test/atomic_ref/sub.cpp index 52e338048e7be..10ed75d21da25 100644 --- a/sycl/test/atomic_ref/sub.cpp +++ b/sycl/test/atomic_ref/sub.cpp @@ -12,11 +12,11 @@ using namespace sycl; using namespace sycl::intel; -template +template void sub_fetch_test(queue q, size_t N) { - T val = N; + T val = T(N); std::vector output(N); - std::fill(output.begin(), output.end(), 0); + std::fill(output.begin(), output.end(), T(0)); { buffer val_buf(&val, 1); buffer output_buf(output.data(), output.size()); @@ -27,29 +27,29 @@ void sub_fetch_test(queue q, size_t N) { cgh.parallel_for(range<1>(N), [=](item<1> it) { int gid = it.get_id(0); auto atm = atomic_ref(val[0]); - out[gid] = atm.fetch_sub(T(1)); + out[gid] = atm.fetch_sub(Difference(1)); }); }); } // All work-items decrement by 1, so final value should be equal to 0 - assert(val == 0); + assert(val == T(0)); // Fetch returns original value: will be in [1, N] auto min_e = std::min_element(output.begin(), output.end()); auto max_e = std::max_element(output.begin(), output.end()); - assert(*min_e == 1 && *max_e == N); + assert(*min_e == T(1) && *max_e == T(N)); // Intermediate values should be unique std::sort(output.begin(), output.end()); assert(std::unique(output.begin(), output.end()) == output.end()); } -template +template void sub_plus_equal_test(queue q, size_t N) { - T val = N; + T val = T(N); std::vector output(N); - std::fill(output.begin(), output.end(), 0); + std::fill(output.begin(), output.end(), T(0)); { buffer val_buf(&val, 1); buffer output_buf(output.data(), output.size()); @@ -60,29 +60,29 @@ void sub_plus_equal_test(queue q, size_t N) { cgh.parallel_for(range<1>(N), [=](item<1> it) { int gid = it.get_id(0); auto atm = atomic_ref(val[0]); - out[gid] = atm -= T(1); + out[gid] = atm -= Difference(1); }); }); } // All work-items decrement by 1, so final value should be equal to 0 - assert(val == 0); + assert(val == T(0)); // -= returns updated value: will be in [0, N-1] auto min_e = std::min_element(output.begin(), output.end()); auto max_e = std::max_element(output.begin(), output.end()); - assert(*min_e == 0 && *max_e == N - 1); + assert(*min_e == T(0) && *max_e == T(N - 1)); // Intermediate values should be unique std::sort(output.begin(), output.end()); assert(std::unique(output.begin(), output.end()) == output.end()); } -template +template void sub_pre_dec_test(queue q, size_t N) { - T val = N; + T val = T(N); std::vector output(N); - std::fill(output.begin(), output.end(), 0); + std::fill(output.begin(), output.end(), T(0)); { buffer val_buf(&val, 1); buffer output_buf(output.data(), output.size()); @@ -99,23 +99,23 @@ void sub_pre_dec_test(queue q, size_t N) { } // All work-items decrement by 1, so final value should be equal to 0 - assert(val == 0); + assert(val == T(0)); // Pre-decrement returns updated value: will be in [0, N-1] auto min_e = std::min_element(output.begin(), output.end()); auto max_e = std::max_element(output.begin(), output.end()); - assert(*min_e == 0 && *max_e == N - 1); + assert(*min_e == T(0) && *max_e == T(N - 1)); // Intermediate values should be unique std::sort(output.begin(), output.end()); assert(std::unique(output.begin(), output.end()) == output.end()); } -template +template void sub_post_dec_test(queue q, size_t N) { - T val = N; + T val = T(N); std::vector output(N); - std::fill(output.begin(), output.end(), 0); + std::fill(output.begin(), output.end(), T(0)); { buffer val_buf(&val, 1); buffer output_buf(output.data(), output.size()); @@ -132,24 +132,24 @@ void sub_post_dec_test(queue q, size_t N) { } // All work-items decrement by 1, so final value should be equal to 0 - assert(val == 0); + assert(val == T(0)); // Post-decrement returns original value: will be in [1, N] auto min_e = std::min_element(output.begin(), output.end()); auto max_e = std::max_element(output.begin(), output.end()); - assert(*min_e == 1 && *max_e == N); + assert(*min_e == T(1) && *max_e == T(N)); // Intermediate values should be unique std::sort(output.begin(), output.end()); assert(std::unique(output.begin(), output.end()) == output.end()); } -template +template void sub_test(queue q, size_t N) { - sub_fetch_test(q, N); - sub_plus_equal_test(q, N); - sub_pre_dec_test(q, N); - sub_post_dec_test(q, N); + sub_fetch_test(q, N); + sub_plus_equal_test(q, N); + sub_pre_dec_test(q, N); + sub_post_dec_test(q, N); } // Floating-point types do not support pre- or post-decrement @@ -173,8 +173,6 @@ int main() { } constexpr int N = 32; - - // TODO: Enable missing tests when supported sub_test(q, N); sub_test(q, N); sub_test(q, N); @@ -183,7 +181,7 @@ int main() { sub_test(q, N); sub_test(q, N); sub_test(q, N); - //sub_test(q, N); + sub_test(q, N); std::cout << "Test passed." << std::endl; }