Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SYCL][CUDA] Minor fixes required to run BabelStream benchmarks on CUDA #1543

Closed
wants to merge 13 commits into from
Closed
18 changes: 16 additions & 2 deletions sycl/plugins/cuda/pi_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -684,7 +684,7 @@ pi_result cuda_piPlatformGetInfo(pi_platform platform,
switch (param_name) {
case PI_PLATFORM_INFO_NAME:
return getInfo(param_value_size, param_value, param_value_size_ret,
"NVIDIA CUDA");
"NVIDIA CUDA BACKEND");
smaslov-intel marked this conversation as resolved.
Show resolved Hide resolved
case PI_PLATFORM_INFO_VENDOR:
return getInfo(param_value_size, param_value, param_value_size_ret,
"NVIDIA Corporation");
Expand Down Expand Up @@ -3359,6 +3359,13 @@ pi_result cuda_piEnqueueMemBufferMap(pi_queue command_queue, pi_mem buffer,
ret_err = cuda_piEnqueueMemBufferRead(
command_queue, buffer, blocking_map, offset, size, hostPtr,
num_events_in_wait_list, event_wait_list, retEvent);
} else {
if (retEvent) {
auto new_event =
_pi_event::make_native(PI_COMMAND_TYPE_MEM_BUFFER_MAP, command_queue);
new_event->record();
*retEvent = new_event;
}
}

return ret_err;
Expand All @@ -3372,7 +3379,7 @@ pi_result cuda_piEnqueueMemUnmap(pi_queue command_queue, pi_mem memobj,
pi_uint32 num_events_in_wait_list,
const pi_event *event_wait_list,
pi_event *retEvent) {
pi_result ret_err = PI_INVALID_OPERATION;
pi_result ret_err = PI_SUCCESS;

assert(mapped_ptr != nullptr);
assert(memobj != nullptr);
Expand All @@ -3385,6 +3392,13 @@ pi_result cuda_piEnqueueMemUnmap(pi_queue command_queue, pi_mem memobj,
command_queue, memobj, true, memobj->get_map_offset(mapped_ptr),
memobj->get_size(), mapped_ptr, num_events_in_wait_list, event_wait_list,
retEvent);
} else {
if (retEvent) {
auto new_event = _pi_event::make_native(PI_COMMAND_TYPE_MEM_BUFFER_UNMAP,
command_queue);
new_event->record();
*retEvent = new_event;
}
}

memobj->unmap(mapped_ptr);
Expand Down
6 changes: 3 additions & 3 deletions sycl/source/detail/platform_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,10 +74,10 @@ class platform_impl {
bool is_host() const { return MHostPlatform; };

bool is_cuda() const {
const string_class CUDA_PLATFORM_STRING = "NVIDIA CUDA";
const string_class CUDA_PLATFORM_STRING = "NVIDIA CUDA BACKEND";
const string_class PlatformName =
get_platform_info<string_class, info::platform::name>::get(MPlatform,
getPlugin());
get_platform_info<string_class, info::platform::version>::get(
Ruyk marked this conversation as resolved.
Show resolved Hide resolved
MPlatform, getPlugin());
return PlatformName == CUDA_PLATFORM_STRING;
}

Expand Down
13 changes: 12 additions & 1 deletion sycl/source/detail/plugin.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class plugin {
public:
plugin() = delete;

plugin(RT::PiPlugin Plugin) : MPlugin(Plugin) {
explicit plugin(RT::PiPlugin Plugin) : MPlugin(Plugin) {
MPiEnableTrace = (std::getenv("SYCL_PI_TRACE") != nullptr);
}

Expand Down Expand Up @@ -79,6 +79,17 @@ class plugin {
bool MPiEnableTrace;

}; // class plugin

/// Two plugins are the same if their string is the same.
/// There is no need to check the actual string, just the pointer, since
/// there is only one instance of the PiPlugin struct per backend.
///
/// \ingroup sycl_pi
///
inline bool operator==(const plugin &lhs, const plugin &rhs) {
return (lhs.getPiPlugin().PluginVersion == rhs.getPiPlugin().PluginVersion);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this just check the MBackend (getBackend) of the plugins are the same.

}

} // namespace detail
} // namespace sycl
} // __SYCL_INLINE_NAMESPACE(cl)
24 changes: 1 addition & 23 deletions sycl/source/detail/program_manager/program_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,29 +84,7 @@ static RT::PiProgram createBinaryProgram(const ContextImplPtr Context,

RT::PiProgram Program;

bool IsCUDA = false;

// TODO: Implement `piProgramCreateWithBinary` to not require extra logic for
// the CUDA backend.
#if USE_PI_CUDA
// All devices in a context are from the same platform.
RT::PiDevice Device = getFirstDevice(Context);
RT::PiPlatform Platform = nullptr;
Plugin.call<PiApiKind::piDeviceGetInfo>(Device, PI_DEVICE_INFO_PLATFORM, sizeof(Platform),
&Platform, nullptr);
size_t PlatformNameSize = 0u;
Plugin.call<PiApiKind::piPlatformGetInfo>(Platform, PI_PLATFORM_INFO_NAME, 0u, nullptr,
&PlatformNameSize);
std::vector<char> PlatformName(PlatformNameSize, '\0');
Plugin.call<PiApiKind::piPlatformGetInfo>(Platform, PI_PLATFORM_INFO_NAME,
PlatformName.size(), PlatformName.data(), nullptr);
if (PlatformNameSize > 0u &&
std::strncmp(PlatformName.data(), "NVIDIA CUDA", PlatformNameSize) == 0) {
IsCUDA = true;
}
#endif // USE_PI_CUDA

if (IsCUDA) {
if (Context->getPlatformImpl()->is_cuda()) {
bader marked this conversation as resolved.
Show resolved Hide resolved
// TODO: Reemplace CreateWithSource with CreateWithBinary in CUDA backend
const char *SignedData = reinterpret_cast<const char *>(Data);
Plugin.call<PiApiKind::piclProgramCreateWithSource>(Context->getHandleRef(), 1 /*one binary*/, &SignedData,
Expand Down
10 changes: 7 additions & 3 deletions sycl/source/detail/queue_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,14 +69,16 @@ class queue_impl {
: MDevice(Device), MContext(Context), MAsyncHandler(AsyncHandler),
MPropList(PropList), MHostQueue(MDevice->is_host()),
MOpenCLInterop(!MHostQueue) {
if (!MHostQueue) {
MCommandQueue = createQueue(Order);
}

if (!Context->hasDevice(Device))
throw cl::sycl::invalid_parameter_error(
"Queue cannot be constructed with the given context and device "
"as the context does not contain the given device.",
PI_INVALID_DEVICE);

if (!MHostQueue) {
MCommandQueue = createQueue(Order);
}
}

/// Constructs a SYCL queue from plugin interoperability handle.
Expand Down Expand Up @@ -240,6 +242,8 @@ class queue_impl {
RT::PiContext Context = MContext->getHandleRef();
RT::PiDevice Device = MDevice->getHandleRef();
const detail::plugin &Plugin = getPlugin();

assert(Plugin == MDevice->getPlugin());
RT::PiResult Error = Plugin.call_nocheck<PiApiKind::piQueueCreate>(
Context, Device, CreationFlags, &Queue);

Expand Down
10 changes: 6 additions & 4 deletions sycl/source/device_selector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,16 @@ int default_selector::operator()(const device &dev) const {
const platform platform = dev.get_info<info::device::platform>();
const std::string platformVersion =
platform.get_info<info::platform::version>();;
const bool HasCudaString =
platformVersion.find("CUDA") != std::string::npos;
const bool HasOpenCLString =
platformVersion.find("OpenCL") != std::string::npos;
// If using PI_CUDA, don't accept a non-CUDA device
if (platformVersion.find("CUDA") == std::string::npos &&
backend == "PI_CUDA") {
if (HasCudaString && HasOpenCLString && backend == "PI_CUDA") {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This entire code fragment (lines 37-58) is in conflict with #1522, where SYCL_BE is used as the preferred not as the forced one. For example, if the target device type is CPU, which CUDA PI plugin does not support and users specified SYCL_BE=PI_CUDA , then with this code no device will be found and an exception thrown, while in that code it will use the plugin where the CPU is supported, i.e. OpenCL. I think both semantics are useful and we should probably fork SYCL_BE into SYCL_PI_FORCE & SYCL_PI_PREFER.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The only thing that line 51 does is that, if a platform has both the CUDA string and the OpenCL string, it is rejected if , and only if, the desired backend is PI_CUDA. This prevents the selection of the NVIDIA OpenCL platform when the user wants to use the PI CUDA backend. Everything else should work in the same way. I don't think there is a need to fork the env. variables at this point.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, I misread the code. It is not rejecting the non-CUDA devices under SYCL_BE=PI_CUDA as I initially thought. It is still to be merged properly with the code coming in #1522 (tagging @againull). Also it should be taking platform.getBackend() instead of checking platform name.

Copy link
Contributor Author

@Ruyk Ruyk Apr 23, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should I wait until #1522 and #1490 are merged? I can't use the platform.getBackend() until then.
Alternatively I can use the is_cuda until the other PRs are there.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i've rebased this on top of the mentioned patches so it uses the get plugin and the backend

return -1;
}
// If using PI_OPENCL, don't accept a non-OpenCL device
if (platformVersion.find("OpenCL") == std::string::npos &&
backend == "PI_OPENCL") {
if (HasCudaString && !HasOpenCLString && backend == "PI_OPENCL") {
return -1;
}
}
Expand Down