intel · Ruyk · Apr 8, 2020 · Apr 11, 2020 · Apr 17, 2020 · Apr 17, 2020
@@ -684,7 +684,7 @@ pi_result cuda_piPlatformGetInfo(pi_platform platform,
   switch (param_name) {
   case PI_PLATFORM_INFO_NAME:
     return getInfo(param_value_size, param_value, param_value_size_ret,
-                   "NVIDIA CUDA");
+                   "NVIDIA CUDA BACKEND");
   case PI_PLATFORM_INFO_VENDOR:
     return getInfo(param_value_size, param_value, param_value_size_ret,
                    "NVIDIA Corporation");
@@ -3359,6 +3359,13 @@ pi_result cuda_piEnqueueMemBufferMap(pi_queue command_queue, pi_mem buffer,
     ret_err = cuda_piEnqueueMemBufferRead(
         command_queue, buffer, blocking_map, offset, size, hostPtr,
         num_events_in_wait_list, event_wait_list, retEvent);
+  } else {
+    if (retEvent) {
+      auto new_event =
+          _pi_event::make_native(PI_COMMAND_TYPE_MEM_BUFFER_MAP, command_queue);
+      new_event->record();
+      *retEvent = new_event;
+    }
   }
 
   return ret_err;
@@ -3372,7 +3379,7 @@ pi_result cuda_piEnqueueMemUnmap(pi_queue command_queue, pi_mem memobj,
                                  pi_uint32 num_events_in_wait_list,
                                  const pi_event *event_wait_list,
                                  pi_event *retEvent) {
-  pi_result ret_err = PI_INVALID_OPERATION;
+  pi_result ret_err = PI_SUCCESS;
 
   assert(mapped_ptr != nullptr);
   assert(memobj != nullptr);
@@ -3385,6 +3392,13 @@ pi_result cuda_piEnqueueMemUnmap(pi_queue command_queue, pi_mem memobj,
       command_queue, memobj, true, memobj->get_map_offset(mapped_ptr),
       memobj->get_size(), mapped_ptr, num_events_in_wait_list, event_wait_list,
       retEvent);
+  } else {
+    if (retEvent) {
+      auto new_event = _pi_event::make_native(PI_COMMAND_TYPE_MEM_BUFFER_UNMAP,
+                                              command_queue);
+      new_event->record();
+      *retEvent = new_event;
+    }
   }
 
   memobj->unmap(mapped_ptr);

@@ -74,10 +74,10 @@ class platform_impl {
   bool is_host() const { return MHostPlatform; };
 
   bool is_cuda() const {
-    const string_class CUDA_PLATFORM_STRING = "NVIDIA CUDA";
+    const string_class CUDA_PLATFORM_STRING = "NVIDIA CUDA BACKEND";
     const string_class PlatformName =
-        get_platform_info<string_class, info::platform::name>::get(MPlatform,
-                                                                   getPlugin());
+        get_platform_info<string_class, info::platform::version>::get(
+            MPlatform, getPlugin());
     return PlatformName == CUDA_PLATFORM_STRING;
   }
 

@@ -23,7 +23,7 @@ class plugin {
 public:
   plugin() = delete;
 
-  plugin(RT::PiPlugin Plugin) : MPlugin(Plugin) {
+  explicit plugin(RT::PiPlugin Plugin) : MPlugin(Plugin) {
     MPiEnableTrace = (std::getenv("SYCL_PI_TRACE") != nullptr);
   }
 
@@ -79,6 +79,17 @@ class plugin {
   bool MPiEnableTrace;
 
 }; // class plugin
+
+/// Two plugins are the same if their string is the same.
+/// There is no need to check the actual string, just the pointer, since
+/// there is only one instance of the PiPlugin struct per backend.
+///
+/// \ingroup sycl_pi
+///
+inline bool operator==(const plugin &lhs, const plugin &rhs) {
+  return (lhs.getPiPlugin().PluginVersion == rhs.getPiPlugin().PluginVersion);
+}
+
 } // namespace detail
 } // namespace sycl
 } // __SYCL_INLINE_NAMESPACE(cl)
@@ -84,29 +84,7 @@ static RT::PiProgram createBinaryProgram(const ContextImplPtr Context,
 
   RT::PiProgram Program;
 
-  bool IsCUDA = false;
-
-  // TODO: Implement `piProgramCreateWithBinary` to not require extra logic for
-  //       the CUDA backend.
-#if USE_PI_CUDA
-  // All devices in a context are from the same platform.
-  RT::PiDevice Device = getFirstDevice(Context);
-  RT::PiPlatform Platform = nullptr;
-  Plugin.call<PiApiKind::piDeviceGetInfo>(Device, PI_DEVICE_INFO_PLATFORM, sizeof(Platform),
-                           &Platform, nullptr);
-  size_t PlatformNameSize = 0u;
-  Plugin.call<PiApiKind::piPlatformGetInfo>(Platform, PI_PLATFORM_INFO_NAME, 0u, nullptr,
-                             &PlatformNameSize);
-  std::vector<char> PlatformName(PlatformNameSize, '\0');
-  Plugin.call<PiApiKind::piPlatformGetInfo>(Platform, PI_PLATFORM_INFO_NAME,
-                             PlatformName.size(), PlatformName.data(), nullptr);
-  if (PlatformNameSize > 0u &&
-      std::strncmp(PlatformName.data(), "NVIDIA CUDA", PlatformNameSize) == 0) {
-    IsCUDA = true;
-  }
-#endif // USE_PI_CUDA
-
-  if (IsCUDA) {
+  if (Context->getPlatformImpl()->is_cuda()) {
     // TODO: Reemplace CreateWithSource with CreateWithBinary in CUDA backend
     const char *SignedData = reinterpret_cast<const char *>(Data);
     Plugin.call<PiApiKind::piclProgramCreateWithSource>(Context->getHandleRef(), 1 /*one binary*/, &SignedData,

@@ -69,14 +69,16 @@ class queue_impl {
       : MDevice(Device), MContext(Context), MAsyncHandler(AsyncHandler),
         MPropList(PropList), MHostQueue(MDevice->is_host()),
         MOpenCLInterop(!MHostQueue) {
-    if (!MHostQueue) {
-      MCommandQueue = createQueue(Order);
-    }
+
     if (!Context->hasDevice(Device))
       throw cl::sycl::invalid_parameter_error(
           "Queue cannot be constructed with the given context and device "
           "as the context does not contain the given device.",
           PI_INVALID_DEVICE);
+
+    if (!MHostQueue) {
+      MCommandQueue = createQueue(Order);
+    }
   }
 
   /// Constructs a SYCL queue from plugin interoperability handle.
@@ -240,6 +242,8 @@ class queue_impl {
     RT::PiContext Context = MContext->getHandleRef();
     RT::PiDevice Device = MDevice->getHandleRef();
     const detail::plugin &Plugin = getPlugin();
+
+    assert(Plugin == MDevice->getPlugin());
     RT::PiResult Error = Plugin.call_nocheck<PiApiKind::piQueueCreate>(
         Context, Device, CreationFlags, &Queue);
 

@@ -43,14 +43,16 @@ int default_selector::operator()(const device &dev) const {
     const platform platform = dev.get_info<info::device::platform>();
     const std::string platformVersion =
         platform.get_info<info::platform::version>();;
+    const bool HasCudaString =
+        platformVersion.find("CUDA") != std::string::npos;
+    const bool HasOpenCLString =
+        platformVersion.find("OpenCL") != std::string::npos;
     // If using PI_CUDA, don't accept a non-CUDA device
-    if (platformVersion.find("CUDA") == std::string::npos &&
-        backend == "PI_CUDA") {
+    if (HasCudaString && HasOpenCLString && backend == "PI_CUDA") {
       return -1;
     }
     // If using PI_OPENCL, don't accept a non-OpenCL device
-    if (platformVersion.find("OpenCL") == std::string::npos &&
-        backend == "PI_OPENCL") {
+    if (HasCudaString && !HasOpenCLString && backend == "PI_OPENCL") {
       return -1;
     }
   }