Skip to content

Commit

Permalink
Merge pull request #40832 from fwyzard/more_Alpaka_updates_131x
Browse files Browse the repository at this point in the history
CUDA, ROCm and Alpaka-related updates
  • Loading branch information
cmsbuild authored Feb 28, 2023
2 parents ff2213a + 1c30d1c commit 3d761d8
Show file tree
Hide file tree
Showing 81 changed files with 761 additions and 629 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -363,10 +363,6 @@ def getSequence(process, collection,
## put the sequence together ##
###############################

if "Fast" in TTRHBuilder:
print("PixelCPEFast has been chosen, here we must include CUDAService first")
process.load('HeterogeneousCore.CUDAServices.CUDAService_cfi')

modules = []
src = collection
prevsrc = None
Expand Down
1 change: 1 addition & 0 deletions Configuration/StandardSequences/python/Accelerators_cff.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@
# used in production

from HeterogeneousCore.CUDACore.ProcessAcceleratorCUDA_cfi import ProcessAcceleratorCUDA
from HeterogeneousCore.ROCmCore.ProcessAcceleratorROCm_cfi import ProcessAcceleratorROCm
5 changes: 5 additions & 0 deletions DataFormats/PortableTestObjects/src/alpaka/classes_rocm.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#include "DataFormats/Common/interface/DeviceProduct.h"
#include "DataFormats/Common/interface/Wrapper.h"
#include "DataFormats/Portable/interface/Product.h"
#include "DataFormats/PortableTestObjects/interface/TestSoA.h"
#include "DataFormats/PortableTestObjects/interface/alpaka/TestDeviceCollection.h"
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<lcgdict>
<class name="alpaka_rocm_async::portabletest::TestDeviceCollection" persistent="false"/>
<class name="edm::DeviceProduct<alpaka_rocm_async::portabletest::TestDeviceCollection>" persistent="false"/>
<class name="edm::Wrapper<edm::DeviceProduct<alpaka_rocm_async::portabletest::TestDeviceCollection>>" persistent="false"/>
</lcgdict>
6 changes: 3 additions & 3 deletions EventFilter/HcalRawToDigi/plugins/HcalDigisProducerGPU.cc
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#include "FWCore/ParameterSet/interface/ParameterSet.h"
#include "FWCore/ServiceRegistry/interface/Service.h"
#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h"
#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h"
#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"

class HcalDigisProducerGPU : public edm::stream::EDProducer<edm::ExternalWork> {
Expand Down Expand Up @@ -97,8 +97,8 @@ HcalDigisProducerGPU::HcalDigisProducerGPU(const edm::ParameterSet& ps)
hf3_.stride = hcal::compute_stride<hcal::Flavor3>(QIE11DigiCollection::MAXSAMPLES);

// preallocate pinned host memory only if CUDA is available
edm::Service<CUDAService> cs;
if (cs and cs->enabled()) {
edm::Service<CUDAInterface> cuda;
if (cuda and cuda->enabled()) {
hf01_.reserve(config_.maxChannelsF01HE);
hf5_.reserve(config_.maxChannelsF5HB);
hf3_.reserve(config_.maxChannelsF3HB);
Expand Down
7 changes: 1 addition & 6 deletions EventFilter/HcalRawToDigi/plugins/HcalRawToDigiGPU.cc
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#include <iostream>

#include "CUDADataFormats/Common/interface/Product.h"
#include "CondFormats/DataRecord/interface/HcalElectronicsMapRcd.h"
#include "DataFormats/FEDRawData/interface/FEDNumbering.h"
#include "DataFormats/FEDRawData/interface/FEDRawDataCollection.h"
Expand All @@ -8,11 +7,7 @@
#include "FWCore/Framework/interface/MakerMacros.h"
#include "FWCore/Framework/interface/stream/EDProducer.h"
#include "FWCore/ParameterSet/interface/ParameterSet.h"
#include "FWCore/ServiceRegistry/interface/Service.h"
#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h"
#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"

#include "DeclsForKernels.h"
#include "DecodeGPU.h"
Expand Down
64 changes: 54 additions & 10 deletions HeterogeneousCore/AlpakaCore/python/ProcessAcceleratorAlpaka.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
import FWCore.ParameterSet.Config as cms

import os

from HeterogeneousCore.Common.PlatformStatus import PlatformStatus

class ModuleTypeResolverAlpaka:
def __init__(self, accelerators, backend):
# first element is used as the default is nothing is set
# first element is used as the default if nothing is set
self._valid_backends = []
if "gpu-nvidia" in accelerators:
self._valid_backends.append("cuda_async")
if "gpu-amd" in accelerators:
self._valid_backends.append("rocm_async")
if "cpu" in accelerators:
self._valid_backends.append("serial_sync")
if len(self._valid_backends) == 0:
Expand Down Expand Up @@ -45,26 +51,64 @@ class ProcessAcceleratorAlpaka(cms.ProcessAccelerator):
ProcessAcceleratorCUDA) define.
"""
def __init__(self):
super(ProcessAcceleratorAlpaka,self).__init__()
super(ProcessAcceleratorAlpaka, self).__init__()
self._backend = None

# User-facing interface
def setBackend(self, backend):
self._backend = backend

# Framework-facing interface
def moduleTypeResolver(self, accelerators):
return ModuleTypeResolverAlpaka(accelerators, self._backend)

def apply(self, process, accelerators):
if not hasattr(process, "AlpakaServiceSerialSync"):
# Propagate the AlpakaService messages through the MessageLogger
if not hasattr(process.MessageLogger, "AlpakaService"):
process.MessageLogger.AlpakaService = cms.untracked.PSet()

# Check if the CPU backend is available
try:
if not "cpu" in accelerators:
raise False
from HeterogeneousCore.AlpakaServices.AlpakaServiceSerialSync_cfi import AlpakaServiceSerialSync
process.add_(AlpakaServiceSerialSync)
if not hasattr(process, "AlpakaServiceCudaAsync"):
except:
# the CPU backend is not available, do not load the AlpakaServiceSerialSync
if hasattr(process, "AlpakaServiceSerialSync"):
del process.AlpakaServiceSerialSync
else:
# the CPU backend is available, ensure the AlpakaServiceSerialSync is loaded
if not hasattr(process, "AlpakaServiceSerialSync"):
process.add_(AlpakaServiceSerialSync)

# Check if CUDA is available, and if the system has at least one usable NVIDIA GPU
try:
if not "gpu-nvidia" in accelerators:
raise False
from HeterogeneousCore.AlpakaServices.AlpakaServiceCudaAsync_cfi import AlpakaServiceCudaAsync
process.add_(AlpakaServiceCudaAsync)
except:
# CUDA is not available, do not load the AlpakaServiceCudaAsync
if hasattr(process, "AlpakaServiceCudaAsync"):
del process.AlpakaServiceCudaAsync
else:
# CUDA is available, ensure the AlpakaServiceCudaAsync is loaded
if not hasattr(process, "AlpakaServiceCudaAsync"):
process.add_(AlpakaServiceCudaAsync)

if not hasattr(process.MessageLogger, "AlpakaService"):
process.MessageLogger.AlpakaService = cms.untracked.PSet()
# Check if ROCm is available, and if the system has at least one usable AMD GPU
try:
if not "gpu-amd" in accelerators:
raise False
from HeterogeneousCore.AlpakaServices.AlpakaServiceROCmAsync_cfi import AlpakaServiceROCmAsync
except:
# ROCm is not available, do not load the AlpakaServiceROCmAsync
if hasattr(process, "AlpakaServiceROCmAsync"):
del process.AlpakaServiceROCmAsync
else:
# ROCm is available, ensure the AlpakaServiceROCmAsync is loaded
if not hasattr(process, "AlpakaServiceROCmAsync"):
process.add_(AlpakaServiceROCmAsync)

process.AlpakaServiceSerialSync.enabled = "cpu" in accelerators
process.AlpakaServiceCudaAsync.enabled = "gpu-nvidia" in accelerators

# Ensure this module is kept in the configuration when dumping it
cms.specialImportRegistry.registerSpecialImportForType(ProcessAcceleratorAlpaka, "from HeterogeneousCore.AlpakaCore.ProcessAcceleratorAlpaka import ProcessAcceleratorAlpaka")
2 changes: 1 addition & 1 deletion HeterogeneousCore/AlpakaCore/src/module_backend_config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ namespace cms::alpakatools {
descAlpaka.addUntracked<std::string>("backend", "")
->setComment(
"Alpaka backend for this module. Can be empty string (for the global default), 'serial_sync', or "
"'cuda_async'");
" - depending on the architecture and available hardware - 'cuda_async', 'rocm_async'");

if (iDesc.defaultDescription()) {
if (iDesc.defaultDescription()->isLabelUnused(kPSetName)) {
Expand Down
12 changes: 6 additions & 6 deletions HeterogeneousCore/AlpakaServices/src/alpaka/AlpakaService.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@

#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
#include "FWCore/ServiceRegistry/interface/Service.h"
#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h"
#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h"
#endif // ALPAKA_ACC_GPU_CUDA_ENABLED

#ifdef ALPAKA_ACC_GPU_HIP_ENABLED
#include "FWCore/ServiceRegistry/interface/Service.h"
#include "HeterogeneousCore/ROCmServices/interface/ROCmService.h"
#include "HeterogeneousCore/ROCmServices/interface/ROCmInterface.h"
#endif // ALPAKA_ACC_GPU_HIP_ENABLED

namespace ALPAKA_ACCELERATOR_NAMESPACE {
Expand All @@ -31,11 +31,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
verbose_(config.getUntrackedParameter<bool>("verbose")) {
#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
// rely on the CUDAService to initialise the CUDA devices
edm::Service<CUDAService> cudaService;
edm::Service<CUDAInterface> cuda;
#endif // ALPAKA_ACC_GPU_CUDA_ENABLED
#ifdef ALPAKA_ACC_GPU_HIP_ENABLED
// rely on the ROCmService to initialise the ROCm devices
edm::Service<ROCmService> rocmService;
edm::Service<ROCmInterface> rocm;
#endif // ALPAKA_ACC_GPU_HIP_ENABLED

// TODO from Andrea Bocci:
Expand All @@ -48,14 +48,14 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
}

#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
if (not cudaService->enabled()) {
if (not cuda or not cuda->enabled()) {
enabled_ = false;
edm::LogInfo("AlpakaService") << ALPAKA_TYPE_ALIAS_NAME(AlpakaService) << " disabled by CUDAService";
return;
}
#endif // ALPAKA_ACC_GPU_CUDA_ENABLED
#ifdef ALPAKA_ACC_GPU_HIP_ENABLED
if (not rocmService->enabled()) {
if (not rocm or not rocm->enabled()) {
enabled_ = false;
edm::LogInfo("AlpakaService") << ALPAKA_TYPE_ALIAS_NAME(AlpakaService) << " disabled by ROCmService";
return;
Expand Down
9 changes: 8 additions & 1 deletion HeterogeneousCore/CUDACore/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,14 @@ This page documents the CUDA integration within CMSSW
stream must synchronize with the work queued on other CUDA
streams (with CUDA events and `cudaStreamWaitEvent()`)
4. Outside of `acquire()`/`produce()`, CUDA API functions may be
called only if `CUDAService::enabled()` returns `true`.
called only if the `CUDAService` implementation of the `CUDAInterface`
is available and `CUDAService::enabled()` returns `true`:
```c++
edm::Service<CUDAInterface> cuda;
if (cuda and cuda->enabled()) {
// CUDA calls ca be made here
}
```
* With point 3 it follows that in these cases multiple devices have
to be dealt with explicitly, as well as CUDA streams

Expand Down
47 changes: 31 additions & 16 deletions HeterogeneousCore/CUDACore/python/ProcessAcceleratorCUDA.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,29 +2,44 @@

import os

from HeterogeneousCore.Common.PlatformStatus import PlatformStatus

class ProcessAcceleratorCUDA(cms.ProcessAccelerator):
def __init__(self):
super(ProcessAcceleratorCUDA,self).__init__()
super(ProcessAcceleratorCUDA, self).__init__()
self._label = "gpu-nvidia"

def labels(self):
return [self._label]
return [ self._label ]

def enabledLabels(self):
enabled = (os.system("cudaIsEnabled") == 0)
if enabled:
return self.labels()
else:
return []
def apply(self, process, accelerators):
if not hasattr(process, "CUDAService"):
from HeterogeneousCore.CUDAServices.CUDAService_cfi import CUDAService
process.add_(CUDAService)
# Check if CUDA is available, and if the system has at least one usable device.
# These should be checked on each worker node, because it depends both
# on the architecture and on the actual hardware present in the machine.
status = PlatformStatus(os.waitstatus_to_exitcode(os.system("cudaIsEnabled")))
return self.labels() if status == PlatformStatus.Success else []

if not hasattr(process.MessageLogger, "CUDAService"):
process.MessageLogger.CUDAService = cms.untracked.PSet()
def apply(self, process, accelerators):

if self._label in accelerators:
process.CUDAService.enabled = True
# Ensure that the CUDAService is loaded
if not hasattr(process, "CUDAService"):
from HeterogeneousCore.CUDAServices.CUDAService_cfi import CUDAService
process.add_(CUDAService)

# Propagate the CUDAService messages through the MessageLogger
if not hasattr(process.MessageLogger, "CUDAService"):
process.MessageLogger.CUDAService = cms.untracked.PSet()

else:
process.CUDAService.enabled = False

# Make sure the CUDAService is not loaded
if hasattr(process, "CUDAService"):
del process.CUDAService

# Drop the CUDAService messages from the MessageLogger
if hasattr(process.MessageLogger, "CUDAService"):
del process.MessageLogger.CUDAService


# Ensure this module is kept in the configuration when dumping it
cms.specialImportRegistry.registerSpecialImportForType(ProcessAcceleratorCUDA, "from HeterogeneousCore.CUDACore.ProcessAcceleratorCUDA import ProcessAcceleratorCUDA")
15 changes: 8 additions & 7 deletions HeterogeneousCore/CUDACore/src/chooseDevice.cc
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
#include "FWCore/ServiceRegistry/interface/Service.h"
#include "FWCore/Utilities/interface/Exception.h"
#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h"
#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h"

#include "chooseDevice.h"

namespace cms::cuda {
int chooseDevice(edm::StreamID id) {
edm::Service<CUDAService> cudaService;
if (not cudaService->enabled()) {
edm::Service<CUDAInterface> cuda;
if (not cuda or not cuda->enabled()) {
cms::Exception ex("CUDAError");
ex << "Unable to choose current device because CUDAService is disabled. If CUDAService was not explicitly\n"
"disabled in the configuration, the probable cause is that there is no GPU or there is some problem\n"
"in the CUDA runtime or drivers.";
ex << "Unable to choose current device because CUDAService is not preset or disabled.\n"
<< "If CUDAService was not explicitly disabled in the configuration, the probable\n"
<< "cause is that there is no GPU or there is some problem in the CUDA runtime or\n"
<< "drivers.";
ex.addContext("Calling cms::cuda::chooseDevice()");
throw ex;
}
Expand All @@ -22,6 +23,6 @@ namespace cms::cuda {
// (and even then there is no load balancing).
//
// TODO: improve the "assignment" logic
return id % cudaService->numberOfDevices();
return id % cuda->numberOfDevices();
}
} // namespace cms::cuda
16 changes: 4 additions & 12 deletions HeterogeneousCore/CUDAServices/BuildFile.xml
Original file line number Diff line number Diff line change
@@ -1,12 +1,4 @@
<iftool name="cuda">
<use name="FWCore/ServiceRegistry"/>
<use name="FWCore/ParameterSet"/>
<use name="FWCore/MessageLogger"/>
<use name="FWCore/Utilities"/>
<use name="HeterogeneousCore/CUDAUtilities"/>
<use name="cuda"/>
<use name="cuda-nvml"/>
<export>
<lib name="1"/>
</export>
</iftool>
<use name="FWCore/ServiceRegistry"/>
<export>
<lib name="1"/>
</export>
19 changes: 19 additions & 0 deletions HeterogeneousCore/CUDAServices/interface/CUDAInterface.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#ifndef HeterogeneousCore_CUDAServices_interface_CUDAInterface
#define HeterogeneousCore_CUDAServices_interface_CUDAInterface

#include <utility>

class CUDAInterface {
public:
CUDAInterface() = default;
virtual ~CUDAInterface() = default;

virtual bool enabled() const = 0;

virtual int numberOfDevices() const = 0;

// Returns the (major, minor) CUDA compute capability of the given device.
virtual std::pair<int, int> computeCapability(int device) const = 0;
};

#endif // HeterogeneousCore_CUDAServices_interface_CUDAInterface
Loading

0 comments on commit 3d761d8

Please sign in to comment.