intel · Ruyk · Apr 8, 2020 · Apr 11, 2020 · Apr 17, 2020 · Apr 17, 2020
@@ -684,7 +684,7 @@ pi_result cuda_piPlatformGetInfo(pi_platform platform,
   switch (param_name) {
   case PI_PLATFORM_INFO_NAME:
     return getInfo(param_value_size, param_value, param_value_size_ret,
-                   "NVIDIA CUDA");
+                   "NVIDIA CUDA BACKEND");
   case PI_PLATFORM_INFO_VENDOR:
     return getInfo(param_value_size, param_value, param_value_size_ret,
                    "NVIDIA Corporation");
@@ -3359,6 +3359,13 @@ pi_result cuda_piEnqueueMemBufferMap(pi_queue command_queue, pi_mem buffer,
     ret_err = cuda_piEnqueueMemBufferRead(
         command_queue, buffer, blocking_map, offset, size, hostPtr,
         num_events_in_wait_list, event_wait_list, retEvent);
+  } else {
+    if (retEvent) {
+      auto new_event =
+          _pi_event::make_native(PI_COMMAND_TYPE_MEM_BUFFER_MAP, command_queue);
+      new_event->record();
+      *retEvent = new_event;
+    }
   }
 
   return ret_err;
@@ -3372,7 +3379,7 @@ pi_result cuda_piEnqueueMemUnmap(pi_queue command_queue, pi_mem memobj,
                                  pi_uint32 num_events_in_wait_list,
                                  const pi_event *event_wait_list,
                                  pi_event *retEvent) {
-  pi_result ret_err = PI_INVALID_OPERATION;
+  pi_result ret_err = PI_SUCCESS;
 
   assert(mapped_ptr != nullptr);
   assert(memobj != nullptr);
@@ -3385,6 +3392,13 @@ pi_result cuda_piEnqueueMemUnmap(pi_queue command_queue, pi_mem memobj,
       command_queue, memobj, true, memobj->get_map_offset(mapped_ptr),
       memobj->get_size(), mapped_ptr, num_events_in_wait_list, event_wait_list,
       retEvent);
+  } else {
+    if (retEvent) {
+      auto new_event = _pi_event::make_native(PI_COMMAND_TYPE_MEM_BUFFER_UNMAP,
+                                              command_queue);
+      new_event->record();
+      *retEvent = new_event;
+    }
   }
 
   memobj->unmap(mapped_ptr);

@@ -194,6 +194,14 @@ pi_result OCL(piDevicesGet)(pi_platform platform, pi_device_type device_type,
     *num_devices = 0;
     result = PI_SUCCESS;
   }
+
+  // Absorb the CL_INVALID_DEVICE_TYPE error when the device type is
+  // not supported in some platforms and just return 0 in num_devices
+  if (result == CL_INVALID_DEVICE_TYPE) {
+    assert(num_devices != 0);
+    *num_devices = 0;
+    result = PI_SUCCESS;
+  }
   return cast<pi_result>(result);
 }
 

@@ -41,7 +41,7 @@ context_impl::context_impl(const vector_class<cl::sycl::device> Devices,
     DeviceIds.push_back(getSyclObjImpl(D)->getHandleRef());
   }
 
-  if (MPlatform->is_cuda()) {
+  if (MPlatform->getPlugin().getBackend() == backend::cuda) {
 #if USE_PI_CUDA
     const pi_context_properties props[] = {PI_CONTEXT_PROPERTIES_CUDA_PRIMARY,
                                            UseCUDAPrimaryContext, 0};

@@ -196,6 +196,40 @@ static void filterAllowList(vector_class<RT::PiDevice> &PiDevices,
   PiDevices.resize(InsertIDx);
 }
 
+// @return True if the device is invalid for the current backend preferences
+static bool isDeviceInvalidForBe(const device &Device) {
+
+  if (Device.is_host())
+    return false;
+
+  // Retrieve Platform version to identify CUDA OpenCL platform
+  // String:  OpenCL 1.2 CUDA <version>
+  const platform platform = Device.get_info<info::device::platform>();
+  const std::string platformVersion =
+      platform.get_info<info::platform::version>();
+  const bool HasOpenCL = (platformVersion.find("OpenCL") != std::string::npos);
+  const bool HasCUDA = (platformVersion.find("CUDA") != std::string::npos);
+
+  backend *PrefBackend = detail::SYCLConfig<detail::SYCL_BE>::get();
-  backend *PrefBackend = detail::SYCLConfig<detail::SYCL_BE>::get();
-  backend *PrefBackend = detail::SYCLConfig<detail::SYCL_BE>::get();
+  auto DeviceBackend = detail::getSyclObjImpl(Device)->getPlugin().getBackend();
+
+  // Reject the NVIDIA OpenCL implementation
+  if (DeviceBackend == backend::opencl && HasCUDA && HasOpenCL)
+    return true;
+
-
+
+  backend *PrefBackend = detail::SYCLConfig<detail::SYCL_BE>::get();
-
+
+  backend *PrefBackend = detail::SYCLConfig<detail::SYCL_BE>::get();
+  // If no preference, assume OpenCL and reject CUDA
+  if (DeviceBackend == backend::cuda && !PrefBackend) {
+    return true;
+  } else if (!PrefBackend)
+    return false;
+
+  // If using PI_OPENCL, reject the CUDA backend
+  if (DeviceBackend == backend::cuda && *PrefBackend == backend::opencl)
+    return true;
+
+  return false;
+}
-  // If no preference, assume OpenCL and reject CUDA
-  if (DeviceBackend == backend::cuda && !PrefBackend) {
-    return true;
-  } else if (!PrefBackend)
-    return false;
-
-  // If using PI_OPENCL, reject the CUDA backend
-  if (DeviceBackend == backend::cuda && *PrefBackend == backend::opencl)
-    return true;
-
-  return false;
-}
+  // If no preference, assume OpenCL and reject CUDA
+  if (DeviceBackend == backend::cuda && (!PrefBackend || *PrefBackend == backend::opencl))
+    return true;
+
+  return false;
+}
 // If SYCL_BE is set then skip platforms which doesn't have specified 
-  // If no preference, assume OpenCL and reject CUDA
-  if (DeviceBackend == backend::cuda && !PrefBackend) {
-    return true;
-  } else if (!PrefBackend)
-    return false;
-
-  // If using PI_OPENCL, reject the CUDA backend
-  if (DeviceBackend == backend::cuda && *PrefBackend == backend::opencl)
-    return true;
-
-  return false;
-}
+  // If no preference, assume OpenCL and reject CUDA
+  if (DeviceBackend == backend::cuda && (!PrefBackend || *PrefBackend == backend::opencl))
+    return true;
+
+  return false;
+}
 // If SYCL_BE is set then skip platforms which doesn't have specified 
+
 vector_class<device>
 platform_impl::get_devices(info::device_type DeviceType) const {
   vector_class<device> Res;
@@ -211,6 +245,7 @@ platform_impl::get_devices(info::device_type DeviceType) const {
 
   pi_uint32 NumDevices;
   const detail::plugin &Plugin = getPlugin();
+
   Plugin.call<PiApiKind::piDevicesGet>(
       MPlatform, pi::cast<RT::PiDeviceType>(DeviceType), 0,
       pi::cast<RT::PiDevice *>(nullptr), &NumDevices);
@@ -235,6 +270,9 @@ platform_impl::get_devices(info::device_type DeviceType) const {
                            PiDevice, std::make_shared<platform_impl>(*this)));
                  });
 
+  Res.erase(std::remove_if(Res.begin(), Res.end(), isDeviceInvalidForBe),
+            Res.end());
+
   return Res;
 }
 

@@ -73,14 +73,6 @@ class platform_impl {
   /// \return true if this SYCL platform is a host platform.
   bool is_host() const { return MHostPlatform; };
 
-  bool is_cuda() const {
-    const string_class CUDA_PLATFORM_STRING = "NVIDIA CUDA";
-    const string_class PlatformName =
-        get_platform_info<string_class, info::platform::name>::get(MPlatform,
-                                                                   getPlugin());
-    return PlatformName == CUDA_PLATFORM_STRING;
-  }
-
   /// \return an instance of OpenCL cl_platform_id.
   cl_platform_id get() const {
     if (is_host())

@@ -81,6 +81,17 @@ class plugin {
   RT::PiPlugin MPlugin;
   const backend MBackend;
 }; // class plugin
+
+/// Two plugins are the same if their string is the same.
+/// There is no need to check the actual string, just the pointer, since
+/// there is only one instance of the PiPlugin struct per backend.
+///
+/// \ingroup sycl_pi
+///
+inline bool operator==(const plugin &lhs, const plugin &rhs) {
+  return (lhs.getBackend() == rhs.getBackend());
+}
+
 } // namespace detail
 } // namespace sycl
 } // __SYCL_INLINE_NAMESPACE(cl)
@@ -85,29 +85,7 @@ static RT::PiProgram createBinaryProgram(const ContextImplPtr Context,
 
   RT::PiProgram Program;
 
-  bool IsCUDA = false;
-
-  // TODO: Implement `piProgramCreateWithBinary` to not require extra logic for
-  //       the CUDA backend.
-#if USE_PI_CUDA
-  // All devices in a context are from the same platform.
-  RT::PiDevice Device = getFirstDevice(Context);
-  RT::PiPlatform Platform = nullptr;
-  Plugin.call<PiApiKind::piDeviceGetInfo>(Device, PI_DEVICE_INFO_PLATFORM, sizeof(Platform),
-                           &Platform, nullptr);
-  size_t PlatformNameSize = 0u;
-  Plugin.call<PiApiKind::piPlatformGetInfo>(Platform, PI_PLATFORM_INFO_NAME, 0u, nullptr,
-                             &PlatformNameSize);
-  std::vector<char> PlatformName(PlatformNameSize, '\0');
-  Plugin.call<PiApiKind::piPlatformGetInfo>(Platform, PI_PLATFORM_INFO_NAME,
-                             PlatformName.size(), PlatformName.data(), nullptr);
-  if (PlatformNameSize > 0u &&
-      std::strncmp(PlatformName.data(), "NVIDIA CUDA", PlatformNameSize) == 0) {
-    IsCUDA = true;
-  }
-#endif // USE_PI_CUDA
-
-  if (IsCUDA) {
+  if (Context->getPlugin().getBackend() == backend::cuda) {
     // TODO: Reemplace CreateWithSource with CreateWithBinary in CUDA backend
     const char *SignedData = reinterpret_cast<const char *>(Data);
     Plugin.call<PiApiKind::piclProgramCreateWithSource>(Context->getHandleRef(), 1 /*one binary*/, &SignedData,

@@ -69,14 +69,16 @@ class queue_impl {
       : MDevice(Device), MContext(Context), MAsyncHandler(AsyncHandler),
         MPropList(PropList), MHostQueue(MDevice->is_host()),
         MOpenCLInterop(!MHostQueue) {
-    if (!MHostQueue) {
-      MCommandQueue = createQueue(Order);
-    }
+
     if (!Context->hasDevice(Device))
       throw cl::sycl::invalid_parameter_error(
           "Queue cannot be constructed with the given context and device "
           "as the context does not contain the given device.",
           PI_INVALID_DEVICE);
+
+    if (!MHostQueue) {
+      MCommandQueue = createQueue(Order);
+    }
   }
 
   /// Constructs a SYCL queue from plugin interoperability handle.
@@ -240,6 +242,8 @@ class queue_impl {
     RT::PiContext Context = MContext->getHandleRef();
     RT::PiDevice Device = MDevice->getHandleRef();
     const detail::plugin &Plugin = getPlugin();
+
+    assert(Plugin == MDevice->getPlugin());
     RT::PiResult Error = Plugin.call_nocheck<PiApiKind::piQueueCreate>(
         Context, Device, CreationFlags, &Queue);
 

@@ -11,6 +11,7 @@
 #include <CL/sycl/device_selector.hpp>
 #include <CL/sycl/exception.hpp>
 #include <CL/sycl/stl.hpp>
+#include <detail/config.hpp>
 #include <detail/device_impl.hpp>
 #include <detail/force_device.hpp>
 // 4.6.1 Device selection class
@@ -52,7 +53,8 @@ device device_selector::select_device() const {
     // preference to the device of the preferred BE.
     //
     if (score < dev_score ||
-        (score == dev_score && isDeviceOfPreferredSyclBe(dev))) {
+        (score == dev_score && isDeviceOfPreferredSyclBe(dev) &&
+         dev_score != -1)) {
       res = &dev;
       score = dev_score;
     }
@@ -78,9 +80,7 @@ device device_selector::select_device() const {
 }
 
 int default_selector::operator()(const device &dev) const {
-
   int Score = -1;
-
   // Give preference to device of SYCL BE.
   if (isDeviceOfPreferredSyclBe(dev))
     Score = 50;
@@ -114,8 +114,10 @@ int gpu_selector::operator()(const device &dev) const {
 
 int cpu_selector::operator()(const device &dev) const {
   int Score = -1;
+
   if (dev.is_cpu()) {
     Score = 1000;
+
     // Give preference to device of SYCL BE.
     if (isDeviceOfPreferredSyclBe(dev))
       Score += 50;
@@ -125,6 +127,7 @@ int cpu_selector::operator()(const device &dev) const {
 
 int accelerator_selector::operator()(const device &dev) const {
   int Score = -1;
+
   if (dev.is_accelerator()) {
     Score = 1000;
     // Give preference to device of SYCL BE.

@@ -1,4 +1,4 @@
-// RUN: %clangxx -fsycl %s -o %t.out
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s  -o %t.out
 // RUN: %t.out
 
 // Check that the host device is not included in devices returned by

@@ -1,6 +1,3 @@
-// XFAIL: cuda
-// TODO: Fix accidential error return when unmapping read-only memory objects.
-//
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -I %sycl_source_dir %s -o %t.out -g
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out

@@ -8,6 +8,7 @@
 
 #include <CL/cl.h>
 #include <CL/cl_ext.h>
+#include <cstring>
 
 #ifdef USE_PI_CUDA
 #include <cuda.h>
@@ -82,6 +83,18 @@ static bool queryOpenCL(cl_device_type deviceType, cl_uint &deviceCount,
   }
 
   for (cl_uint i = 0; i < platformCount; i++) {
+
+    const size_t MAX_PLATFORM_VENDOR = 100u;
+    char info[MAX_PLATFORM_VENDOR];
+    // get platform attribute value
+    clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, MAX_PLATFORM_VENDOR,
+                      info, NULL);
+    auto IsNVIDIAOpenCL = strstr(info, "NVIDIA") != NULL;
+    if (IsNVIDIAOpenCL) {
+      // Ignore NVIDIA OpenCL platform for testing
+      continue;
+    }
+
     cl_uint deviceCountPart = 0;
     iRet =
         clGetDeviceIDs(platforms[i], deviceType, 0, nullptr, &deviceCountPart);