diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index af15743438da8..62f8ad91cbe65 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -684,7 +684,7 @@ pi_result cuda_piPlatformGetInfo(pi_platform platform, switch (param_name) { case PI_PLATFORM_INFO_NAME: return getInfo(param_value_size, param_value, param_value_size_ret, - "NVIDIA CUDA"); + "NVIDIA CUDA BACKEND"); case PI_PLATFORM_INFO_VENDOR: return getInfo(param_value_size, param_value, param_value_size_ret, "NVIDIA Corporation"); @@ -3359,6 +3359,13 @@ pi_result cuda_piEnqueueMemBufferMap(pi_queue command_queue, pi_mem buffer, ret_err = cuda_piEnqueueMemBufferRead( command_queue, buffer, blocking_map, offset, size, hostPtr, num_events_in_wait_list, event_wait_list, retEvent); + } else { + if (retEvent) { + auto new_event = + _pi_event::make_native(PI_COMMAND_TYPE_MEM_BUFFER_MAP, command_queue); + new_event->record(); + *retEvent = new_event; + } } return ret_err; @@ -3372,7 +3379,7 @@ pi_result cuda_piEnqueueMemUnmap(pi_queue command_queue, pi_mem memobj, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *retEvent) { - pi_result ret_err = PI_INVALID_OPERATION; + pi_result ret_err = PI_SUCCESS; assert(mapped_ptr != nullptr); assert(memobj != nullptr); @@ -3385,6 +3392,13 @@ pi_result cuda_piEnqueueMemUnmap(pi_queue command_queue, pi_mem memobj, command_queue, memobj, true, memobj->get_map_offset(mapped_ptr), memobj->get_size(), mapped_ptr, num_events_in_wait_list, event_wait_list, retEvent); + } else { + if (retEvent) { + auto new_event = _pi_event::make_native(PI_COMMAND_TYPE_MEM_BUFFER_UNMAP, + command_queue); + new_event->record(); + *retEvent = new_event; + } } memobj->unmap(mapped_ptr); diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index 733322fe0f577..ca9a0139ffc15 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -194,6 +194,14 @@ pi_result OCL(piDevicesGet)(pi_platform platform, pi_device_type device_type, *num_devices = 0; result = PI_SUCCESS; } + + // Absorb the CL_INVALID_DEVICE_TYPE error when the device type is + // not supported in some platforms and just return 0 in num_devices + if (result == CL_INVALID_DEVICE_TYPE) { + assert(num_devices != 0); + *num_devices = 0; + result = PI_SUCCESS; + } return cast<pi_result>(result); } diff --git a/sycl/source/detail/context_impl.cpp b/sycl/source/detail/context_impl.cpp index 8353396e7792d..ca57887a499e4 100644 --- a/sycl/source/detail/context_impl.cpp +++ b/sycl/source/detail/context_impl.cpp @@ -41,7 +41,7 @@ context_impl::context_impl(const vector_class<cl::sycl::device> Devices, DeviceIds.push_back(getSyclObjImpl(D)->getHandleRef()); } - if (MPlatform->is_cuda()) { + if (MPlatform->getPlugin().getBackend() == backend::cuda) { #if USE_PI_CUDA const pi_context_properties props[] = {PI_CONTEXT_PROPERTIES_CUDA_PRIMARY, UseCUDAPrimaryContext, 0}; diff --git a/sycl/source/detail/platform_impl.cpp b/sycl/source/detail/platform_impl.cpp index 10fd2d0fc9aa2..a8ea3c213852e 100644 --- a/sycl/source/detail/platform_impl.cpp +++ b/sycl/source/detail/platform_impl.cpp @@ -196,6 +196,40 @@ static void filterAllowList(vector_class<RT::PiDevice> &PiDevices, PiDevices.resize(InsertIDx); } +// @return True if the device is invalid for the current backend preferences +static bool isDeviceInvalidForBe(const device &Device) { + + if (Device.is_host()) + return false; + + // Retrieve Platform version to identify CUDA OpenCL platform + // String: OpenCL 1.2 CUDA <version> + const platform platform = Device.get_info<info::device::platform>(); + const std::string platformVersion = + platform.get_info<info::platform::version>(); + const bool HasOpenCL = (platformVersion.find("OpenCL") != std::string::npos); + const bool HasCUDA = (platformVersion.find("CUDA") != std::string::npos); + + backend *PrefBackend = detail::SYCLConfig<detail::SYCL_BE>::get(); + auto DeviceBackend = detail::getSyclObjImpl(Device)->getPlugin().getBackend(); + + // Reject the NVIDIA OpenCL implementation + if (DeviceBackend == backend::opencl && HasCUDA && HasOpenCL) + return true; + + // If no preference, assume OpenCL and reject CUDA + if (DeviceBackend == backend::cuda && !PrefBackend) { + return true; + } else if (!PrefBackend) + return false; + + // If using PI_OPENCL, reject the CUDA backend + if (DeviceBackend == backend::cuda && *PrefBackend == backend::opencl) + return true; + + return false; +} + vector_class<device> platform_impl::get_devices(info::device_type DeviceType) const { vector_class<device> Res; @@ -211,6 +245,7 @@ platform_impl::get_devices(info::device_type DeviceType) const { pi_uint32 NumDevices; const detail::plugin &Plugin = getPlugin(); + Plugin.call<PiApiKind::piDevicesGet>( MPlatform, pi::cast<RT::PiDeviceType>(DeviceType), 0, pi::cast<RT::PiDevice *>(nullptr), &NumDevices); @@ -235,6 +270,9 @@ platform_impl::get_devices(info::device_type DeviceType) const { PiDevice, std::make_shared<platform_impl>(*this))); }); + Res.erase(std::remove_if(Res.begin(), Res.end(), isDeviceInvalidForBe), + Res.end()); + return Res; } diff --git a/sycl/source/detail/platform_impl.hpp b/sycl/source/detail/platform_impl.hpp index 4f7f028bd695c..55c2f252b4cc3 100644 --- a/sycl/source/detail/platform_impl.hpp +++ b/sycl/source/detail/platform_impl.hpp @@ -73,14 +73,6 @@ class platform_impl { /// \return true if this SYCL platform is a host platform. bool is_host() const { return MHostPlatform; }; - bool is_cuda() const { - const string_class CUDA_PLATFORM_STRING = "NVIDIA CUDA"; - const string_class PlatformName = - get_platform_info<string_class, info::platform::name>::get(MPlatform, - getPlugin()); - return PlatformName == CUDA_PLATFORM_STRING; - } - /// \return an instance of OpenCL cl_platform_id. cl_platform_id get() const { if (is_host()) diff --git a/sycl/source/detail/plugin.hpp b/sycl/source/detail/plugin.hpp index 2bf5816ab8c72..e766882767c69 100644 --- a/sycl/source/detail/plugin.hpp +++ b/sycl/source/detail/plugin.hpp @@ -81,6 +81,17 @@ class plugin { RT::PiPlugin MPlugin; const backend MBackend; }; // class plugin + +/// Two plugins are the same if their string is the same. +/// There is no need to check the actual string, just the pointer, since +/// there is only one instance of the PiPlugin struct per backend. +/// +/// \ingroup sycl_pi +/// +inline bool operator==(const plugin &lhs, const plugin &rhs) { + return (lhs.getBackend() == rhs.getBackend()); +} + } // namespace detail } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 29e3f94053940..b3fc271cc8bf6 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -85,29 +85,7 @@ static RT::PiProgram createBinaryProgram(const ContextImplPtr Context, RT::PiProgram Program; - bool IsCUDA = false; - - // TODO: Implement `piProgramCreateWithBinary` to not require extra logic for - // the CUDA backend. -#if USE_PI_CUDA - // All devices in a context are from the same platform. - RT::PiDevice Device = getFirstDevice(Context); - RT::PiPlatform Platform = nullptr; - Plugin.call<PiApiKind::piDeviceGetInfo>(Device, PI_DEVICE_INFO_PLATFORM, sizeof(Platform), - &Platform, nullptr); - size_t PlatformNameSize = 0u; - Plugin.call<PiApiKind::piPlatformGetInfo>(Platform, PI_PLATFORM_INFO_NAME, 0u, nullptr, - &PlatformNameSize); - std::vector<char> PlatformName(PlatformNameSize, '\0'); - Plugin.call<PiApiKind::piPlatformGetInfo>(Platform, PI_PLATFORM_INFO_NAME, - PlatformName.size(), PlatformName.data(), nullptr); - if (PlatformNameSize > 0u && - std::strncmp(PlatformName.data(), "NVIDIA CUDA", PlatformNameSize) == 0) { - IsCUDA = true; - } -#endif // USE_PI_CUDA - - if (IsCUDA) { + if (Context->getPlugin().getBackend() == backend::cuda) { // TODO: Reemplace CreateWithSource with CreateWithBinary in CUDA backend const char *SignedData = reinterpret_cast<const char *>(Data); Plugin.call<PiApiKind::piclProgramCreateWithSource>(Context->getHandleRef(), 1 /*one binary*/, &SignedData, diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 9b65ead92fcde..983861ceee5ea 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -69,14 +69,16 @@ class queue_impl { : MDevice(Device), MContext(Context), MAsyncHandler(AsyncHandler), MPropList(PropList), MHostQueue(MDevice->is_host()), MOpenCLInterop(!MHostQueue) { - if (!MHostQueue) { - MCommandQueue = createQueue(Order); - } + if (!Context->hasDevice(Device)) throw cl::sycl::invalid_parameter_error( "Queue cannot be constructed with the given context and device " "as the context does not contain the given device.", PI_INVALID_DEVICE); + + if (!MHostQueue) { + MCommandQueue = createQueue(Order); + } } /// Constructs a SYCL queue from plugin interoperability handle. @@ -240,6 +242,8 @@ class queue_impl { RT::PiContext Context = MContext->getHandleRef(); RT::PiDevice Device = MDevice->getHandleRef(); const detail::plugin &Plugin = getPlugin(); + + assert(Plugin == MDevice->getPlugin()); RT::PiResult Error = Plugin.call_nocheck<PiApiKind::piQueueCreate>( Context, Device, CreationFlags, &Queue); diff --git a/sycl/source/device_selector.cpp b/sycl/source/device_selector.cpp index aba27e0c926fa..d7e0a586bde81 100644 --- a/sycl/source/device_selector.cpp +++ b/sycl/source/device_selector.cpp @@ -11,6 +11,7 @@ #include <CL/sycl/device_selector.hpp> #include <CL/sycl/exception.hpp> #include <CL/sycl/stl.hpp> +#include <detail/config.hpp> #include <detail/device_impl.hpp> #include <detail/force_device.hpp> // 4.6.1 Device selection class @@ -52,7 +53,8 @@ device device_selector::select_device() const { // preference to the device of the preferred BE. // if (score < dev_score || - (score == dev_score && isDeviceOfPreferredSyclBe(dev))) { + (score == dev_score && isDeviceOfPreferredSyclBe(dev) && + dev_score != -1)) { res = &dev; score = dev_score; } @@ -78,9 +80,7 @@ device device_selector::select_device() const { } int default_selector::operator()(const device &dev) const { - int Score = -1; - // Give preference to device of SYCL BE. if (isDeviceOfPreferredSyclBe(dev)) Score = 50; @@ -114,8 +114,10 @@ int gpu_selector::operator()(const device &dev) const { int cpu_selector::operator()(const device &dev) const { int Score = -1; + if (dev.is_cpu()) { Score = 1000; + // Give preference to device of SYCL BE. if (isDeviceOfPreferredSyclBe(dev)) Score += 50; @@ -125,6 +127,7 @@ int cpu_selector::operator()(const device &dev) const { int accelerator_selector::operator()(const device &dev) const { int Score = -1; + if (dev.is_accelerator()) { Score = 1000; // Give preference to device of SYCL BE. diff --git a/sycl/test/basic_tests/get_nonhost_devices.cpp b/sycl/test/basic_tests/get_nonhost_devices.cpp index aefbfafce82cf..9a41965442874 100644 --- a/sycl/test/basic_tests/get_nonhost_devices.cpp +++ b/sycl/test/basic_tests/get_nonhost_devices.cpp @@ -1,4 +1,4 @@ -// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out // RUN: %t.out // Check that the host device is not included in devices returned by diff --git a/sycl/test/scheduler/DataMovement.cpp b/sycl/test/scheduler/DataMovement.cpp index 62319e35ccea8..b2a3c33ba1a63 100644 --- a/sycl/test/scheduler/DataMovement.cpp +++ b/sycl/test/scheduler/DataMovement.cpp @@ -1,6 +1,3 @@ -// XFAIL: cuda -// TODO: Fix accidential error return when unmapping read-only memory objects. -// // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -I %sycl_source_dir %s -o %t.out -g // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out diff --git a/sycl/tools/get_device_count_by_type.cpp b/sycl/tools/get_device_count_by_type.cpp index 31d741950730b..81e6a22a54551 100644 --- a/sycl/tools/get_device_count_by_type.cpp +++ b/sycl/tools/get_device_count_by_type.cpp @@ -8,6 +8,7 @@ #include <CL/cl.h> #include <CL/cl_ext.h> +#include <cstring> #ifdef USE_PI_CUDA #include <cuda.h> @@ -82,6 +83,18 @@ static bool queryOpenCL(cl_device_type deviceType, cl_uint &deviceCount, } for (cl_uint i = 0; i < platformCount; i++) { + + const size_t MAX_PLATFORM_VENDOR = 100u; + char info[MAX_PLATFORM_VENDOR]; + // get platform attribute value + clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, MAX_PLATFORM_VENDOR, + info, NULL); + auto IsNVIDIAOpenCL = strstr(info, "NVIDIA") != NULL; + if (IsNVIDIAOpenCL) { + // Ignore NVIDIA OpenCL platform for testing + continue; + } + cl_uint deviceCountPart = 0; iRet = clGetDeviceIDs(platforms[i], deviceType, 0, nullptr, &deviceCountPart);