Skip to content

Commit

Permalink
Add abstract interfaces for the CUDA and ROCm services
Browse files Browse the repository at this point in the history
Split the CUDA and ROCm services into an interface and a concrete
implementation.

Split the CUDAService into CUDAInterface and CUDAService.
The former implements the CUDAService interface, while the latter
implement the conrete functionality, and is used only if:
  - CUDA is available on the current platforms, OS and compiler;
  - the current system has at least one available CUDA GPU;
  - gpu-nvidia is among the process.options.accelerators.

Split the ROCmService into ROCmInterface and ROCmService.
The former implements the ROCmService interface, while the latter
implement the conrete functionality, and is used only if:
  - ROCm is available on the current platforms, OS and compiler;
  - the current system has at least one available ROCm GPU;
  - gpu-amd is among the process.options.accelerators.

Update all uses of the edm::Service<CUDAService> and edm::Service<ROCmService>
to the interface classes, and check explicitly that a concrete implementation
of the service is available.
  • Loading branch information
fwyzard committed Feb 28, 2023
1 parent a2fe732 commit 5ebe8d8
Show file tree
Hide file tree
Showing 59 changed files with 557 additions and 405 deletions.
6 changes: 3 additions & 3 deletions EventFilter/HcalRawToDigi/plugins/HcalDigisProducerGPU.cc
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#include "FWCore/ParameterSet/interface/ParameterSet.h"
#include "FWCore/ServiceRegistry/interface/Service.h"
#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h"
#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h"
#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"

class HcalDigisProducerGPU : public edm::stream::EDProducer<edm::ExternalWork> {
Expand Down Expand Up @@ -97,8 +97,8 @@ HcalDigisProducerGPU::HcalDigisProducerGPU(const edm::ParameterSet& ps)
hf3_.stride = hcal::compute_stride<hcal::Flavor3>(QIE11DigiCollection::MAXSAMPLES);

// preallocate pinned host memory only if CUDA is available
edm::Service<CUDAService> cs;
if (cs and cs->enabled()) {
edm::Service<CUDAInterface> cuda;
if (cuda and cuda->enabled()) {
hf01_.reserve(config_.maxChannelsF01HE);
hf5_.reserve(config_.maxChannelsF5HB);
hf3_.reserve(config_.maxChannelsF3HB);
Expand Down
64 changes: 51 additions & 13 deletions HeterogeneousCore/AlpakaCore/python/ProcessAcceleratorAlpaka.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
import FWCore.ParameterSet.Config as cms

import os

from HeterogeneousCore.Common.PlatformStatus import PlatformStatus

class ModuleTypeResolverAlpaka:
def __init__(self, accelerators, backend):
# first element is used as the default if nothing is set
Expand Down Expand Up @@ -47,30 +51,64 @@ class ProcessAcceleratorAlpaka(cms.ProcessAccelerator):
ProcessAcceleratorCUDA) define.
"""
def __init__(self):
super(ProcessAcceleratorAlpaka,self).__init__()
super(ProcessAcceleratorAlpaka, self).__init__()
self._backend = None

# User-facing interface
def setBackend(self, backend):
self._backend = backend

# Framework-facing interface
def moduleTypeResolver(self, accelerators):
return ModuleTypeResolverAlpaka(accelerators, self._backend)

def apply(self, process, accelerators):
if not hasattr(process, "AlpakaServiceSerialSync"):
# Propagate the AlpakaService messages through the MessageLogger
if not hasattr(process.MessageLogger, "AlpakaService"):
process.MessageLogger.AlpakaService = cms.untracked.PSet()

# Check if the CPU backend is available
try:
if not "cpu" in accelerators:
raise False
from HeterogeneousCore.AlpakaServices.AlpakaServiceSerialSync_cfi import AlpakaServiceSerialSync
process.add_(AlpakaServiceSerialSync)
if not hasattr(process, "AlpakaServiceCudaAsync"):
except:
# the CPU backend is not available, do not load the AlpakaServiceSerialSync
if hasattr(process, "AlpakaServiceSerialSync"):
del process.AlpakaServiceSerialSync
else:
# the CPU backend is available, ensure the AlpakaServiceSerialSync is loaded
if not hasattr(process, "AlpakaServiceSerialSync"):
process.add_(AlpakaServiceSerialSync)

# Check if CUDA is available, and if the system has at least one usable NVIDIA GPU
try:
if not "gpu-nvidia" in accelerators:
raise False
from HeterogeneousCore.AlpakaServices.AlpakaServiceCudaAsync_cfi import AlpakaServiceCudaAsync
process.add_(AlpakaServiceCudaAsync)
if not hasattr(process, "AlpakaServiceROCmAsync"):
from HeterogeneousCore.AlpakaServices.AlpakaServiceROCmAsync_cfi import AlpakaServiceROCmAsync
process.add_(AlpakaServiceROCmAsync)
except:
# CUDA is not available, do not load the AlpakaServiceCudaAsync
if hasattr(process, "AlpakaServiceCudaAsync"):
del process.AlpakaServiceCudaAsync
else:
# CUDA is available, ensure the AlpakaServiceCudaAsync is loaded
if not hasattr(process, "AlpakaServiceCudaAsync"):
process.add_(AlpakaServiceCudaAsync)

if not hasattr(process.MessageLogger, "AlpakaService"):
process.MessageLogger.AlpakaService = cms.untracked.PSet()
# Check if ROCm is available, and if the system has at least one usable AMD GPU
try:
if not "gpu-amd" in accelerators:
raise False
from HeterogeneousCore.AlpakaServices.AlpakaServiceROCmAsync_cfi import AlpakaServiceROCmAsync
except:
# ROCm is not available, do not load the AlpakaServiceROCmAsync
if hasattr(process, "AlpakaServiceROCmAsync"):
del process.AlpakaServiceROCmAsync
else:
# ROCm is available, ensure the AlpakaServiceROCmAsync is loaded
if not hasattr(process, "AlpakaServiceROCmAsync"):
process.add_(AlpakaServiceROCmAsync)

process.AlpakaServiceSerialSync.enabled = "cpu" in accelerators
process.AlpakaServiceCudaAsync.enabled = "gpu-nvidia" in accelerators
process.AlpakaServiceROCmAsync.enabled = "gpu-amd" in accelerators

# Ensure this module is kept in the configuration when dumping it
cms.specialImportRegistry.registerSpecialImportForType(ProcessAcceleratorAlpaka, "from HeterogeneousCore.AlpakaCore.ProcessAcceleratorAlpaka import ProcessAcceleratorAlpaka")
12 changes: 6 additions & 6 deletions HeterogeneousCore/AlpakaServices/src/alpaka/AlpakaService.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@

#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
#include "FWCore/ServiceRegistry/interface/Service.h"
#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h"
#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h"
#endif // ALPAKA_ACC_GPU_CUDA_ENABLED

#ifdef ALPAKA_ACC_GPU_HIP_ENABLED
#include "FWCore/ServiceRegistry/interface/Service.h"
#include "HeterogeneousCore/ROCmServices/interface/ROCmService.h"
#include "HeterogeneousCore/ROCmServices/interface/ROCmInterface.h"
#endif // ALPAKA_ACC_GPU_HIP_ENABLED

namespace ALPAKA_ACCELERATOR_NAMESPACE {
Expand All @@ -31,11 +31,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
verbose_(config.getUntrackedParameter<bool>("verbose")) {
#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
// rely on the CUDAService to initialise the CUDA devices
edm::Service<CUDAService> cudaService;
edm::Service<CUDAInterface> cuda;
#endif // ALPAKA_ACC_GPU_CUDA_ENABLED
#ifdef ALPAKA_ACC_GPU_HIP_ENABLED
// rely on the ROCmService to initialise the ROCm devices
edm::Service<ROCmService> rocmService;
edm::Service<ROCmInterface> rocm;
#endif // ALPAKA_ACC_GPU_HIP_ENABLED

// TODO from Andrea Bocci:
Expand All @@ -48,14 +48,14 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
}

#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
if (not cudaService->enabled()) {
if (not cuda or not cuda->enabled()) {
enabled_ = false;
edm::LogInfo("AlpakaService") << ALPAKA_TYPE_ALIAS_NAME(AlpakaService) << " disabled by CUDAService";
return;
}
#endif // ALPAKA_ACC_GPU_CUDA_ENABLED
#ifdef ALPAKA_ACC_GPU_HIP_ENABLED
if (not rocmService->enabled()) {
if (not rocm or not rocm->enabled()) {
enabled_ = false;
edm::LogInfo("AlpakaService") << ALPAKA_TYPE_ALIAS_NAME(AlpakaService) << " disabled by ROCmService";
return;
Expand Down
9 changes: 8 additions & 1 deletion HeterogeneousCore/CUDACore/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,14 @@ This page documents the CUDA integration within CMSSW
stream must synchronize with the work queued on other CUDA
streams (with CUDA events and `cudaStreamWaitEvent()`)
4. Outside of `acquire()`/`produce()`, CUDA API functions may be
called only if `CUDAService::enabled()` returns `true`.
called only if the `CUDAService` implementation of the `CUDAInterface`
is available and `CUDAService::enabled()` returns `true`:
```c++
edm::Service<CUDAInterface> cuda;
if (cuda and cuda->enabled()) {
// CUDA calls ca be made here
}
```
* With point 3 it follows that in these cases multiple devices have
to be dealt with explicitly, as well as CUDA streams

Expand Down
47 changes: 31 additions & 16 deletions HeterogeneousCore/CUDACore/python/ProcessAcceleratorCUDA.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,29 +2,44 @@

import os

from HeterogeneousCore.Common.PlatformStatus import PlatformStatus

class ProcessAcceleratorCUDA(cms.ProcessAccelerator):
def __init__(self):
super(ProcessAcceleratorCUDA,self).__init__()
super(ProcessAcceleratorCUDA, self).__init__()
self._label = "gpu-nvidia"

def labels(self):
return [self._label]
return [ self._label ]

def enabledLabels(self):
enabled = (os.system("cudaIsEnabled") == 0)
if enabled:
return self.labels()
else:
return []
def apply(self, process, accelerators):
if not hasattr(process, "CUDAService"):
from HeterogeneousCore.CUDAServices.CUDAService_cfi import CUDAService
process.add_(CUDAService)
# Check if CUDA is available, and if the system has at least one usable device.
# These should be checked on each worker node, because it depends both
# on the architecture and on the actual hardware present in the machine.
status = PlatformStatus(os.waitstatus_to_exitcode(os.system("cudaIsEnabled")))
return self.labels() if status == PlatformStatus.Success else []

if not hasattr(process.MessageLogger, "CUDAService"):
process.MessageLogger.CUDAService = cms.untracked.PSet()
def apply(self, process, accelerators):

if self._label in accelerators:
process.CUDAService.enabled = True
# Ensure that the CUDAService is loaded
if not hasattr(process, "CUDAService"):
from HeterogeneousCore.CUDAServices.CUDAService_cfi import CUDAService
process.add_(CUDAService)

# Propagate the CUDAService messages through the MessageLogger
if not hasattr(process.MessageLogger, "CUDAService"):
process.MessageLogger.CUDAService = cms.untracked.PSet()

else:
process.CUDAService.enabled = False

# Make sure the CUDAService is not loaded
if hasattr(process, "CUDAService"):
del process.CUDAService

# Drop the CUDAService messages from the MessageLogger
if hasattr(process.MessageLogger, "CUDAService"):
del process.MessageLogger.CUDAService


# Ensure this module is kept in the configuration when dumping it
cms.specialImportRegistry.registerSpecialImportForType(ProcessAcceleratorCUDA, "from HeterogeneousCore.CUDACore.ProcessAcceleratorCUDA import ProcessAcceleratorCUDA")
15 changes: 8 additions & 7 deletions HeterogeneousCore/CUDACore/src/chooseDevice.cc
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
#include "FWCore/ServiceRegistry/interface/Service.h"
#include "FWCore/Utilities/interface/Exception.h"
#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h"
#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h"

#include "chooseDevice.h"

namespace cms::cuda {
int chooseDevice(edm::StreamID id) {
edm::Service<CUDAService> cudaService;
if (not cudaService->enabled()) {
edm::Service<CUDAInterface> cuda;
if (not cuda or not cuda->enabled()) {
cms::Exception ex("CUDAError");
ex << "Unable to choose current device because CUDAService is disabled. If CUDAService was not explicitly\n"
"disabled in the configuration, the probable cause is that there is no GPU or there is some problem\n"
"in the CUDA runtime or drivers.";
ex << "Unable to choose current device because CUDAService is not preset or disabled.\n"
<< "If CUDAService was not explicitly disabled in the configuration, the probable\n"
<< "cause is that there is no GPU or there is some problem in the CUDA runtime or\n"
<< "drivers.";
ex.addContext("Calling cms::cuda::chooseDevice()");
throw ex;
}
Expand All @@ -22,6 +23,6 @@ namespace cms::cuda {
// (and even then there is no load balancing).
//
// TODO: improve the "assignment" logic
return id % cudaService->numberOfDevices();
return id % cuda->numberOfDevices();
}
} // namespace cms::cuda
16 changes: 4 additions & 12 deletions HeterogeneousCore/CUDAServices/BuildFile.xml
Original file line number Diff line number Diff line change
@@ -1,12 +1,4 @@
<iftool name="cuda">
<use name="FWCore/ServiceRegistry"/>
<use name="FWCore/ParameterSet"/>
<use name="FWCore/MessageLogger"/>
<use name="FWCore/Utilities"/>
<use name="HeterogeneousCore/CUDAUtilities"/>
<use name="cuda"/>
<use name="cuda-nvml"/>
<export>
<lib name="1"/>
</export>
</iftool>
<use name="FWCore/ServiceRegistry"/>
<export>
<lib name="1"/>
</export>
19 changes: 19 additions & 0 deletions HeterogeneousCore/CUDAServices/interface/CUDAInterface.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#ifndef HeterogeneousCore_CUDAServices_interface_CUDAInterface
#define HeterogeneousCore_CUDAServices_interface_CUDAInterface

#include <utility>

class CUDAInterface {
public:
CUDAInterface() = default;
virtual ~CUDAInterface() = default;

virtual bool enabled() const = 0;

virtual int numberOfDevices() const = 0;

// Returns the (major, minor) CUDA compute capability of the given device.
virtual std::pair<int, int> computeCapability(int device) const = 0;
};

#endif // HeterogeneousCore_CUDAServices_interface_CUDAInterface
42 changes: 0 additions & 42 deletions HeterogeneousCore/CUDAServices/interface/CUDAService.h

This file was deleted.

3 changes: 2 additions & 1 deletion HeterogeneousCore/CUDAServices/plugins/BuildFile.xml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
<use name="FWCore/Utilities"/>
<use name="HLTrigger/Timer"/>
<use name="HeterogeneousCore/CUDAServices"/>
<library file="*.cc" name="HeterogeneousCoreCUDAServicesPlugins">
<use name="HeterogeneousCore/CUDAUtilities"/>
<library file="CUDAMonitoringService.cc CUDAService.cc NVProfilerService.cc" name="HeterogeneousCoreCUDAServicesPlugins">
<flags EDM_PLUGIN="1"/>
</library>
</iftool>
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
#include "FWCore/ServiceRegistry/interface/ModuleCallingContext.h"
#include "FWCore/ServiceRegistry/interface/Service.h"
#include "FWCore/ServiceRegistry/interface/ServiceMaker.h"
#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h"
#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
#include "HeterogeneousCore/CUDAUtilities/interface/deviceAllocatorStatus.h"

Expand All @@ -37,10 +37,11 @@ class CUDAMonitoringService {

CUDAMonitoringService::CUDAMonitoringService(edm::ParameterSet const& config, edm::ActivityRegistry& registry) {
// make sure that CUDA is initialised, and that the CUDAService destructor is called after this service's destructor
edm::Service<CUDAService> cudaService;
if (!cudaService->enabled())
edm::Service<CUDAInterface> cuda;
if (not cuda or not cuda->enabled())
return;
numberOfDevices_ = cudaService->numberOfDevices();

numberOfDevices_ = cuda->numberOfDevices();

if (config.getUntrackedParameter<bool>("memoryConstruction")) {
registry.watchPostModuleConstruction(this, &CUDAMonitoringService::postModuleConstruction);
Expand Down
Loading

0 comments on commit 5ebe8d8

Please sign in to comment.