diff --git a/llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp b/llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp index 8714ec70493b5..b16b6b515d04f 100644 --- a/llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp +++ b/llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp @@ -628,13 +628,13 @@ bool CompileTimePropertiesPass::transformSYCLPropertiesAnnotation( // Read the annotation values and create the new annotation string. std::string NewAnnotString = ""; auto Properties = parseSYCLPropertiesString(M, IntrInst); - for (auto &Property : Properties) { + for (const auto &[PropName, PropVal] : Properties) { // sycl-alignment is converted to align on // previous parseAlignmentAndApply(), dropping here - if (*Property.first == "sycl-alignment") + if (PropName == "sycl-alignment") continue; - auto DecorIt = SpirvDecorMap.find(*Property.first); + auto DecorIt = SpirvDecorMap.find(*PropName); if (DecorIt == SpirvDecorMap.end()) continue; uint32_t DecorCode = DecorIt->second.Code; @@ -644,8 +644,8 @@ bool CompileTimePropertiesPass::transformSYCLPropertiesAnnotation( // string values are handled correctly. Note that " around values are // always valid, even if the decoration parameters are not strings. NewAnnotString += "{" + std::to_string(DecorCode); - if (Property.second) - NewAnnotString += ":\"" + Property.second->str() + "\""; + if (PropVal) + NewAnnotString += ":\"" + PropVal->str() + "\""; NewAnnotString += "}"; } diff --git a/sycl/include/sycl/ext/oneapi/experimental/prefetch.hpp b/sycl/include/sycl/ext/oneapi/experimental/prefetch.hpp new file mode 100644 index 0000000000000..9271af5059402 --- /dev/null +++ b/sycl/include/sycl/ext/oneapi/experimental/prefetch.hpp @@ -0,0 +1,269 @@ +//==--------------- prefetch.hpp --- SYCL prefetch extension ---------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include + +namespace sycl { +inline namespace _V1 { +namespace ext::oneapi::experimental { + +enum class cache_level { L1 = 0, L2 = 1, L3 = 2, L4 = 3 }; + +struct nontemporal; + +struct prefetch_hint_key { + template + using value_t = + property_value, Hint>; +}; + +template +inline constexpr prefetch_hint_key::value_t prefetch_hint; + +inline constexpr prefetch_hint_key::value_t + prefetch_hint_L1; +inline constexpr prefetch_hint_key::value_t + prefetch_hint_L2; +inline constexpr prefetch_hint_key::value_t + prefetch_hint_L3; +inline constexpr prefetch_hint_key::value_t + prefetch_hint_L4; + +inline constexpr prefetch_hint_key::value_t + prefetch_hint_L1_nt; +inline constexpr prefetch_hint_key::value_t + prefetch_hint_L2_nt; +inline constexpr prefetch_hint_key::value_t + prefetch_hint_L3_nt; +inline constexpr prefetch_hint_key::value_t + prefetch_hint_L4_nt; + +namespace detail { +template <> struct IsCompileTimeProperty : std::true_type {}; + +template +struct PropertyMetaInfo> { + static constexpr const char *name = std::is_same_v + ? "sycl-prefetch-hint-nt" + : "sycl-prefetch-hint"; + static constexpr int value = static_cast(Level); +}; + +template +inline constexpr bool check_prefetch_AS = + AS == access::address_space::global_space || + AS == access::address_space::generic_space; + +template +inline constexpr bool check_prefetch_acc_mode = + mode == access_mode::read || mode == access_mode::read_write; + +template +void prefetch_impl(T *ptr, size_t bytes, Properties properties) { +#ifdef __SYCL_DEVICE_ONLY__ + auto *ptrGlobalAS = __SYCL_GenericCastToPtrExplicit_ToGlobal(ptr); + const __attribute__((opencl_global)) char *ptrAnnotated = nullptr; + if constexpr (!properties.template has_property()) { + ptrAnnotated = __builtin_intel_sycl_ptr_annotation( + ptrGlobalAS, "sycl-prefetch-hint", static_cast(cache_level::L1)); + } else { + auto prop = properties.template get_property(); + ptrAnnotated = __builtin_intel_sycl_ptr_annotation( + ptrGlobalAS, PropertyMetaInfo::name, + PropertyMetaInfo::value); + } + __spirv_ocl_prefetch(ptrAnnotated, bytes); +#else + std::ignore = ptr; + std::ignore = bytes; + std::ignore = properties; +#endif +} + +template +void joint_prefetch_impl(Group g, T *ptr, size_t bytes, Properties properties) { + // Although calling joint_prefetch is functionally equivalent to calling + // prefetch from every work-item in a group, native suppurt may be added to to + // issue cooperative prefetches more efficiently on some hardware. + std::ignore = g; + prefetch_impl(ptr, bytes, properties); +} +} // namespace detail + +template +void prefetch(void *ptr, Properties properties = {}) { + detail::prefetch_impl(ptr, 1, properties); +} + +template +void prefetch(void *ptr, size_t bytes, Properties properties = {}) { + detail::prefetch_impl(ptr, bytes, properties); +} + +template +void prefetch(T *ptr, Properties properties = {}) { + detail::prefetch_impl(ptr, sizeof(T), properties); +} + +template +void prefetch(T *ptr, size_t count, Properties properties = {}) { + detail::prefetch_impl(ptr, count * sizeof(T), properties); +} + +template +std::enable_if_t> +prefetch(multi_ptr ptr, + Properties properties = {}) { + detail::prefetch_impl(ptr.get(), 1, properties); +} + +template +std::enable_if_t> +prefetch(multi_ptr ptr, size_t bytes, + Properties properties = {}) { + detail::prefetch_impl(ptr.get(), bytes, properties); +} + +template +std::enable_if_t> +prefetch(multi_ptr ptr, + Properties properties = {}) { + detail::prefetch_impl(ptr.get(), sizeof(T), properties); +} + +template +std::enable_if_t> +prefetch(multi_ptr ptr, size_t count, + Properties properties = {}) { + detail::prefetch_impl(ptr.get(), count * sizeof(T), properties); +} + +template +std::enable_if_t && + (Dimensions > 0)> +prefetch( + accessor acc, + id offset, Properties properties = {}) { + detail::prefetch_impl(&acc[offset], sizeof(DataT), properties); +} + +template +std::enable_if_t && + (Dimensions > 0)> +prefetch( + accessor acc, + size_t offset, size_t count, Properties properties = {}) { + detail::prefetch_impl(&acc[offset], count * sizeof(DataT), properties); +} + +template +std::enable_if_t>> +joint_prefetch(Group g, void *ptr, Properties properties = {}) { + detail::joint_prefetch_impl(g, ptr, 1, properties); +} + +template +std::enable_if_t>> +joint_prefetch(Group g, void *ptr, size_t bytes, Properties properties = {}) { + detail::joint_prefetch_impl(g, ptr, bytes, properties); +} + +template +std::enable_if_t>> +joint_prefetch(Group g, T *ptr, Properties properties = {}) { + detail::joint_prefetch_impl(g, ptr, sizeof(T), properties); +} + +template +std::enable_if_t>> +joint_prefetch(Group g, T *ptr, size_t count, Properties properties = {}) { + detail::joint_prefetch_impl(g, ptr, count * sizeof(T), properties); +} + +template +std::enable_if_t && + sycl::is_group_v>> +joint_prefetch(Group g, multi_ptr ptr, + Properties properties = {}) { + detail::joint_prefetch_impl(g, ptr.get(), 1, properties); +} + +template +std::enable_if_t && + sycl::is_group_v>> +joint_prefetch(Group g, multi_ptr ptr, + size_t bytes, Properties properties = {}) { + detail::joint_prefetch_impl(g, ptr.get(), bytes, properties); +} + +template +std::enable_if_t && + sycl::is_group_v>> +joint_prefetch(Group g, multi_ptr ptr, + Properties properties = {}) { + detail::joint_prefetch_impl(g, ptr.get(), sizeof(T), properties); +} + +template +std::enable_if_t && + sycl::is_group_v>> +joint_prefetch(Group g, multi_ptr ptr, + size_t count, Properties properties = {}) { + detail::joint_prefetch_impl(g, ptr.get(), count * sizeof(T), properties); +} + +template +std::enable_if_t && + (Dimensions > 0) && sycl::is_group_v>> +joint_prefetch( + Group g, + accessor acc, + size_t offset, Properties properties = {}) { + detail::joint_prefetch_impl(g, &acc[offset], sizeof(DataT), properties); +} + +template +std::enable_if_t && + (Dimensions > 0) && sycl::is_group_v>> +joint_prefetch( + Group g, + accessor acc, + size_t offset, size_t count, Properties properties = {}) { + detail::joint_prefetch_impl(g, &acc[offset], count * sizeof(DataT), + properties); +} + +} // namespace ext::oneapi::experimental +} // namespace _V1 +} // namespace sycl diff --git a/sycl/include/sycl/sycl.hpp b/sycl/include/sycl/sycl.hpp index 5b55801796e01..ca663c981c5e4 100644 --- a/sycl/include/sycl/sycl.hpp +++ b/sycl/include/sycl/sycl.hpp @@ -82,6 +82,7 @@ #include #include #include +#include #include #include #include diff --git a/sycl/source/feature_test.hpp.in b/sycl/source/feature_test.hpp.in index 9ef771010320b..f0d686a889fe9 100644 --- a/sycl/source/feature_test.hpp.in +++ b/sycl/source/feature_test.hpp.in @@ -90,6 +90,7 @@ inline namespace _V1 { #define SYCL_EXT_CODEPLAY_MAX_REGISTERS_PER_WORK_GROUP_QUERY 1 #define SYCL_EXT_ONEAPI_DEVICE_GLOBAL 1 #define SYCL_EXT_INTEL_QUEUE_IMMEDIATE_COMMAND_LIST 1 +#define SYCL_EXT_ONEAPI_PREFETCH 1 #ifndef __has_include #define __has_include(x) 0 diff --git a/sycl/test/extensions/prefetch.cpp b/sycl/test/extensions/prefetch.cpp new file mode 100644 index 0000000000000..56fd678f83336 --- /dev/null +++ b/sycl/test/extensions/prefetch.cpp @@ -0,0 +1,62 @@ +// RUN: %clangxx -fsycl -fsyntax-only %s + +#include + +int data[] = {0, 1, 2, 3}; + +int main() { + namespace syclex = sycl::ext::oneapi::experimental; + void *dataPtrVoid = data; + int *dataPtrInt = data; + auto prop = syclex::properties{syclex::prefetch_hint_L1}; + + { + sycl::buffer buf(data, 4); + sycl::queue q; + q.submit([&](sycl::handler &h) { + auto acc = buf.get_access(h); + h.parallel_for( + sycl::nd_range<1>(1, 1), ([=](sycl::nd_item<1> index) { + syclex::prefetch(dataPtrVoid, prop); + syclex::prefetch(dataPtrVoid, 16, prop); + + syclex::prefetch(dataPtrInt, prop); + syclex::prefetch(dataPtrInt, 4, prop); + + auto mPtrVoid = sycl::address_space_cast< + sycl::access::address_space::global_space, + sycl::access::decorated::yes>(dataPtrVoid); + syclex::prefetch(mPtrVoid, prop); + syclex::prefetch(mPtrVoid, 16, prop); + + auto mPtrInt = sycl::address_space_cast< + sycl::access::address_space::global_space, + sycl::access::decorated::yes>(dataPtrInt); + syclex::prefetch(mPtrInt, prop); + syclex::prefetch(mPtrInt, 8, prop); + + syclex::prefetch(acc, sycl::id(0), prop); + syclex::prefetch(acc, sycl::id(0), 4, prop); + + auto g = index.get_group(); + syclex::joint_prefetch(g, dataPtrVoid, prop); + syclex::joint_prefetch(g, dataPtrVoid, 16, prop); + + syclex::joint_prefetch(g, dataPtrInt, prop); + syclex::joint_prefetch(g, dataPtrInt, 4, prop); + + syclex::joint_prefetch(g, mPtrVoid, prop); + syclex::joint_prefetch(g, mPtrVoid, 16, prop); + + syclex::joint_prefetch(g, mPtrInt, prop); + syclex::joint_prefetch(g, mPtrInt, 8, prop); + + syclex::joint_prefetch(g, acc, sycl::id(0), prop); + syclex::joint_prefetch(g, acc, sycl::id(0), 4, prop); + })); + }); + q.wait(); + } + + return 0; +}