From 5ebe8d8bac6d9eef759b0e991c2d67d870e716c2 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Thu, 23 Feb 2023 18:33:06 +0100 Subject: [PATCH] Add abstract interfaces for the CUDA and ROCm services Split the CUDA and ROCm services into an interface and a concrete implementation. Split the CUDAService into CUDAInterface and CUDAService. The former implements the CUDAService interface, while the latter implement the conrete functionality, and is used only if: - CUDA is available on the current platforms, OS and compiler; - the current system has at least one available CUDA GPU; - gpu-nvidia is among the process.options.accelerators. Split the ROCmService into ROCmInterface and ROCmService. The former implements the ROCmService interface, while the latter implement the conrete functionality, and is used only if: - ROCm is available on the current platforms, OS and compiler; - the current system has at least one available ROCm GPU; - gpu-amd is among the process.options.accelerators. Update all uses of the edm::Service and edm::Service to the interface classes, and check explicitly that a concrete implementation of the service is available. --- .../plugins/HcalDigisProducerGPU.cc | 6 +- .../python/ProcessAcceleratorAlpaka.py | 64 +++++++-- .../src/alpaka/AlpakaService.cc | 12 +- HeterogeneousCore/CUDACore/README.md | 9 +- .../CUDACore/python/ProcessAcceleratorCUDA.py | 47 ++++--- .../CUDACore/src/chooseDevice.cc | 15 ++- HeterogeneousCore/CUDAServices/BuildFile.xml | 16 +-- .../CUDAServices/interface/CUDAInterface.h | 19 +++ .../CUDAServices/interface/CUDAService.h | 42 ------ .../CUDAServices/plugins/BuildFile.xml | 3 +- .../plugins/CUDAMonitoringService.cc | 9 +- .../{src => plugins}/CUDAService.cc | 63 +++++++-- .../CUDAServices/plugins/NVProfilerService.cc | 13 +- .../CUDAServices/plugins/plugins.cc | 4 - .../CUDAServices/src/numberOfDevices.cc | 6 +- .../CUDAServices/test/BuildFile.xml | 16 ++- .../CUDAServices/test/testCUDAService.cpp | 121 ++++++++++------- .../CUDAServices/test/test_main.cpp | 16 ++- .../CUDATest/plugins/TestCUDAAnalyzerGPU.cc | 6 +- .../CUDATest/plugins/TestCUDAProducerGPUEW.cc | 6 +- .../plugins/TestCUDAProducerGPUEWTask.cc | 6 +- .../ROCmCore/python/ProcessAcceleratorROCm.py | 47 ++++--- HeterogeneousCore/ROCmServices/BuildFile.xml | 16 +-- .../ROCmServices/interface/ROCmInterface.h | 19 +++ .../ROCmServices/interface/ROCmService.h | 42 ------ .../ROCmServices/plugins/BuildFile.xml | 3 +- .../plugins/ROCmMonitoringService.cc | 9 +- .../{src => plugins}/ROCmService.cc | 43 +++++- .../ROCmServices/plugins/plugins.cc | 4 - .../ROCmServices/test/BuildFile.xml | 16 ++- .../ROCmServices/test/testROCmService.cpp | 124 ++++++++++-------- .../ROCmServices/test/test_main.cpp | 16 +++ .../plugins/CUDATestDeviceAdditionModule.cc | 8 +- .../test/testCUDATestDeviceAdditionModule.py | 3 +- .../plugins/CUDATestKernelAdditionModule.cc | 8 +- .../test/testCUDATestKernelAdditionModule.py | 3 +- .../plugins/CUDATestOpaqueAdditionModule.cc | 8 +- .../test/testCUDATestAdditionModules.py | 3 +- .../test/testCUDATestOpaqueAdditionModule.py | 3 +- .../plugins/CUDATestWrapperAdditionModule.cc | 8 +- .../test/testCUDATestWrapperAdditionModule.py | 3 +- .../plugins/ROCmTestDeviceAdditionModule.cc | 8 +- .../test/testROCmTestDeviceAdditionModule.py | 3 +- .../plugins/ROCmTestKernelAdditionModule.cc | 8 +- .../test/testROCmTestKernelAdditionModule.py | 3 +- .../plugins/ROCmTestOpaqueAdditionModule.cc | 8 +- .../test/testROCmTestAdditionModules.py | 3 +- .../test/testROCmTestOpaqueAdditionModule.py | 3 +- .../plugins/ROCmTestWrapperAdditionModule.cc | 8 +- .../test/testROCmTestWrapperAdditionModule.py | 3 +- .../plugins/EcalRecHitProducerGPU.cc | 2 +- .../plugins/EERecHitGPUtoSoA.cc | 2 +- .../plugins/HEBRecHitGPUtoSoA.cc | 2 +- .../plugins/HEFRecHitGPUtoSoA.cc | 2 +- .../src/HBHERecHitProducerGPU.cc | 2 +- .../plugins/SiPixelPhase2DigiToClusterCUDA.cc | 2 +- .../plugins/SiPixelRawToClusterCUDA.cc | 2 +- .../plugins/CAHitNtupletGeneratorOnGPU.cc | 10 +- .../plugins/BeamSpotToCUDA.cc | 6 +- 59 files changed, 557 insertions(+), 405 deletions(-) create mode 100644 HeterogeneousCore/CUDAServices/interface/CUDAInterface.h delete mode 100644 HeterogeneousCore/CUDAServices/interface/CUDAService.h rename HeterogeneousCore/CUDAServices/{src => plugins}/CUDAService.cc (90%) delete mode 100644 HeterogeneousCore/CUDAServices/plugins/plugins.cc create mode 100644 HeterogeneousCore/ROCmServices/interface/ROCmInterface.h delete mode 100644 HeterogeneousCore/ROCmServices/interface/ROCmService.h rename HeterogeneousCore/ROCmServices/{src => plugins}/ROCmService.cc (91%) delete mode 100644 HeterogeneousCore/ROCmServices/plugins/plugins.cc create mode 100644 HeterogeneousCore/ROCmServices/test/test_main.cpp diff --git a/EventFilter/HcalRawToDigi/plugins/HcalDigisProducerGPU.cc b/EventFilter/HcalRawToDigi/plugins/HcalDigisProducerGPU.cc index 9ca33340f7036..80ac575ff2230 100644 --- a/EventFilter/HcalRawToDigi/plugins/HcalDigisProducerGPU.cc +++ b/EventFilter/HcalRawToDigi/plugins/HcalDigisProducerGPU.cc @@ -9,7 +9,7 @@ #include "FWCore/ParameterSet/interface/ParameterSet.h" #include "FWCore/ServiceRegistry/interface/Service.h" #include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" -#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h" +#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h" #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" class HcalDigisProducerGPU : public edm::stream::EDProducer { @@ -97,8 +97,8 @@ HcalDigisProducerGPU::HcalDigisProducerGPU(const edm::ParameterSet& ps) hf3_.stride = hcal::compute_stride(QIE11DigiCollection::MAXSAMPLES); // preallocate pinned host memory only if CUDA is available - edm::Service cs; - if (cs and cs->enabled()) { + edm::Service cuda; + if (cuda and cuda->enabled()) { hf01_.reserve(config_.maxChannelsF01HE); hf5_.reserve(config_.maxChannelsF5HB); hf3_.reserve(config_.maxChannelsF3HB); diff --git a/HeterogeneousCore/AlpakaCore/python/ProcessAcceleratorAlpaka.py b/HeterogeneousCore/AlpakaCore/python/ProcessAcceleratorAlpaka.py index 13bb79ba756ac..673a479d8eb2b 100644 --- a/HeterogeneousCore/AlpakaCore/python/ProcessAcceleratorAlpaka.py +++ b/HeterogeneousCore/AlpakaCore/python/ProcessAcceleratorAlpaka.py @@ -1,5 +1,9 @@ import FWCore.ParameterSet.Config as cms +import os + +from HeterogeneousCore.Common.PlatformStatus import PlatformStatus + class ModuleTypeResolverAlpaka: def __init__(self, accelerators, backend): # first element is used as the default if nothing is set @@ -47,30 +51,64 @@ class ProcessAcceleratorAlpaka(cms.ProcessAccelerator): ProcessAcceleratorCUDA) define. """ def __init__(self): - super(ProcessAcceleratorAlpaka,self).__init__() + super(ProcessAcceleratorAlpaka, self).__init__() self._backend = None + # User-facing interface def setBackend(self, backend): self._backend = backend + # Framework-facing interface def moduleTypeResolver(self, accelerators): return ModuleTypeResolverAlpaka(accelerators, self._backend) + def apply(self, process, accelerators): - if not hasattr(process, "AlpakaServiceSerialSync"): + # Propagate the AlpakaService messages through the MessageLogger + if not hasattr(process.MessageLogger, "AlpakaService"): + process.MessageLogger.AlpakaService = cms.untracked.PSet() + + # Check if the CPU backend is available + try: + if not "cpu" in accelerators: + raise False from HeterogeneousCore.AlpakaServices.AlpakaServiceSerialSync_cfi import AlpakaServiceSerialSync - process.add_(AlpakaServiceSerialSync) - if not hasattr(process, "AlpakaServiceCudaAsync"): + except: + # the CPU backend is not available, do not load the AlpakaServiceSerialSync + if hasattr(process, "AlpakaServiceSerialSync"): + del process.AlpakaServiceSerialSync + else: + # the CPU backend is available, ensure the AlpakaServiceSerialSync is loaded + if not hasattr(process, "AlpakaServiceSerialSync"): + process.add_(AlpakaServiceSerialSync) + + # Check if CUDA is available, and if the system has at least one usable NVIDIA GPU + try: + if not "gpu-nvidia" in accelerators: + raise False from HeterogeneousCore.AlpakaServices.AlpakaServiceCudaAsync_cfi import AlpakaServiceCudaAsync - process.add_(AlpakaServiceCudaAsync) - if not hasattr(process, "AlpakaServiceROCmAsync"): - from HeterogeneousCore.AlpakaServices.AlpakaServiceROCmAsync_cfi import AlpakaServiceROCmAsync - process.add_(AlpakaServiceROCmAsync) + except: + # CUDA is not available, do not load the AlpakaServiceCudaAsync + if hasattr(process, "AlpakaServiceCudaAsync"): + del process.AlpakaServiceCudaAsync + else: + # CUDA is available, ensure the AlpakaServiceCudaAsync is loaded + if not hasattr(process, "AlpakaServiceCudaAsync"): + process.add_(AlpakaServiceCudaAsync) - if not hasattr(process.MessageLogger, "AlpakaService"): - process.MessageLogger.AlpakaService = cms.untracked.PSet() + # Check if ROCm is available, and if the system has at least one usable AMD GPU + try: + if not "gpu-amd" in accelerators: + raise False + from HeterogeneousCore.AlpakaServices.AlpakaServiceROCmAsync_cfi import AlpakaServiceROCmAsync + except: + # ROCm is not available, do not load the AlpakaServiceROCmAsync + if hasattr(process, "AlpakaServiceROCmAsync"): + del process.AlpakaServiceROCmAsync + else: + # ROCm is available, ensure the AlpakaServiceROCmAsync is loaded + if not hasattr(process, "AlpakaServiceROCmAsync"): + process.add_(AlpakaServiceROCmAsync) - process.AlpakaServiceSerialSync.enabled = "cpu" in accelerators - process.AlpakaServiceCudaAsync.enabled = "gpu-nvidia" in accelerators - process.AlpakaServiceROCmAsync.enabled = "gpu-amd" in accelerators +# Ensure this module is kept in the configuration when dumping it cms.specialImportRegistry.registerSpecialImportForType(ProcessAcceleratorAlpaka, "from HeterogeneousCore.AlpakaCore.ProcessAcceleratorAlpaka import ProcessAcceleratorAlpaka") diff --git a/HeterogeneousCore/AlpakaServices/src/alpaka/AlpakaService.cc b/HeterogeneousCore/AlpakaServices/src/alpaka/AlpakaService.cc index d06a6adc39168..fbc0777c03b99 100644 --- a/HeterogeneousCore/AlpakaServices/src/alpaka/AlpakaService.cc +++ b/HeterogeneousCore/AlpakaServices/src/alpaka/AlpakaService.cc @@ -16,12 +16,12 @@ #ifdef ALPAKA_ACC_GPU_CUDA_ENABLED #include "FWCore/ServiceRegistry/interface/Service.h" -#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h" +#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h" #endif // ALPAKA_ACC_GPU_CUDA_ENABLED #ifdef ALPAKA_ACC_GPU_HIP_ENABLED #include "FWCore/ServiceRegistry/interface/Service.h" -#include "HeterogeneousCore/ROCmServices/interface/ROCmService.h" +#include "HeterogeneousCore/ROCmServices/interface/ROCmInterface.h" #endif // ALPAKA_ACC_GPU_HIP_ENABLED namespace ALPAKA_ACCELERATOR_NAMESPACE { @@ -31,11 +31,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { verbose_(config.getUntrackedParameter("verbose")) { #ifdef ALPAKA_ACC_GPU_CUDA_ENABLED // rely on the CUDAService to initialise the CUDA devices - edm::Service cudaService; + edm::Service cuda; #endif // ALPAKA_ACC_GPU_CUDA_ENABLED #ifdef ALPAKA_ACC_GPU_HIP_ENABLED // rely on the ROCmService to initialise the ROCm devices - edm::Service rocmService; + edm::Service rocm; #endif // ALPAKA_ACC_GPU_HIP_ENABLED // TODO from Andrea Bocci: @@ -48,14 +48,14 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { } #ifdef ALPAKA_ACC_GPU_CUDA_ENABLED - if (not cudaService->enabled()) { + if (not cuda or not cuda->enabled()) { enabled_ = false; edm::LogInfo("AlpakaService") << ALPAKA_TYPE_ALIAS_NAME(AlpakaService) << " disabled by CUDAService"; return; } #endif // ALPAKA_ACC_GPU_CUDA_ENABLED #ifdef ALPAKA_ACC_GPU_HIP_ENABLED - if (not rocmService->enabled()) { + if (not rocm or not rocm->enabled()) { enabled_ = false; edm::LogInfo("AlpakaService") << ALPAKA_TYPE_ALIAS_NAME(AlpakaService) << " disabled by ROCmService"; return; diff --git a/HeterogeneousCore/CUDACore/README.md b/HeterogeneousCore/CUDACore/README.md index 57224926e70ed..af7701f856e81 100644 --- a/HeterogeneousCore/CUDACore/README.md +++ b/HeterogeneousCore/CUDACore/README.md @@ -83,7 +83,14 @@ This page documents the CUDA integration within CMSSW stream must synchronize with the work queued on other CUDA streams (with CUDA events and `cudaStreamWaitEvent()`) 4. Outside of `acquire()`/`produce()`, CUDA API functions may be - called only if `CUDAService::enabled()` returns `true`. + called only if the `CUDAService` implementation of the `CUDAInterface` + is available and `CUDAService::enabled()` returns `true`: + ```c++ + edm::Service cuda; + if (cuda and cuda->enabled()) { + // CUDA calls ca be made here + } + ``` * With point 3 it follows that in these cases multiple devices have to be dealt with explicitly, as well as CUDA streams diff --git a/HeterogeneousCore/CUDACore/python/ProcessAcceleratorCUDA.py b/HeterogeneousCore/CUDACore/python/ProcessAcceleratorCUDA.py index a92d49f9014c8..0849c6180cc2f 100644 --- a/HeterogeneousCore/CUDACore/python/ProcessAcceleratorCUDA.py +++ b/HeterogeneousCore/CUDACore/python/ProcessAcceleratorCUDA.py @@ -2,29 +2,44 @@ import os +from HeterogeneousCore.Common.PlatformStatus import PlatformStatus + class ProcessAcceleratorCUDA(cms.ProcessAccelerator): def __init__(self): - super(ProcessAcceleratorCUDA,self).__init__() + super(ProcessAcceleratorCUDA, self).__init__() self._label = "gpu-nvidia" + def labels(self): - return [self._label] + return [ self._label ] + def enabledLabels(self): - enabled = (os.system("cudaIsEnabled") == 0) - if enabled: - return self.labels() - else: - return [] - def apply(self, process, accelerators): - if not hasattr(process, "CUDAService"): - from HeterogeneousCore.CUDAServices.CUDAService_cfi import CUDAService - process.add_(CUDAService) + # Check if CUDA is available, and if the system has at least one usable device. + # These should be checked on each worker node, because it depends both + # on the architecture and on the actual hardware present in the machine. + status = PlatformStatus(os.waitstatus_to_exitcode(os.system("cudaIsEnabled"))) + return self.labels() if status == PlatformStatus.Success else [] - if not hasattr(process.MessageLogger, "CUDAService"): - process.MessageLogger.CUDAService = cms.untracked.PSet() + def apply(self, process, accelerators): if self._label in accelerators: - process.CUDAService.enabled = True + # Ensure that the CUDAService is loaded + if not hasattr(process, "CUDAService"): + from HeterogeneousCore.CUDAServices.CUDAService_cfi import CUDAService + process.add_(CUDAService) + + # Propagate the CUDAService messages through the MessageLogger + if not hasattr(process.MessageLogger, "CUDAService"): + process.MessageLogger.CUDAService = cms.untracked.PSet() + else: - process.CUDAService.enabled = False - + # Make sure the CUDAService is not loaded + if hasattr(process, "CUDAService"): + del process.CUDAService + + # Drop the CUDAService messages from the MessageLogger + if hasattr(process.MessageLogger, "CUDAService"): + del process.MessageLogger.CUDAService + + +# Ensure this module is kept in the configuration when dumping it cms.specialImportRegistry.registerSpecialImportForType(ProcessAcceleratorCUDA, "from HeterogeneousCore.CUDACore.ProcessAcceleratorCUDA import ProcessAcceleratorCUDA") diff --git a/HeterogeneousCore/CUDACore/src/chooseDevice.cc b/HeterogeneousCore/CUDACore/src/chooseDevice.cc index 3c1d253537679..a768cea02c5ca 100644 --- a/HeterogeneousCore/CUDACore/src/chooseDevice.cc +++ b/HeterogeneousCore/CUDACore/src/chooseDevice.cc @@ -1,17 +1,18 @@ #include "FWCore/ServiceRegistry/interface/Service.h" #include "FWCore/Utilities/interface/Exception.h" -#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h" +#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h" #include "chooseDevice.h" namespace cms::cuda { int chooseDevice(edm::StreamID id) { - edm::Service cudaService; - if (not cudaService->enabled()) { + edm::Service cuda; + if (not cuda or not cuda->enabled()) { cms::Exception ex("CUDAError"); - ex << "Unable to choose current device because CUDAService is disabled. If CUDAService was not explicitly\n" - "disabled in the configuration, the probable cause is that there is no GPU or there is some problem\n" - "in the CUDA runtime or drivers."; + ex << "Unable to choose current device because CUDAService is not preset or disabled.\n" + << "If CUDAService was not explicitly disabled in the configuration, the probable\n" + << "cause is that there is no GPU or there is some problem in the CUDA runtime or\n" + << "drivers."; ex.addContext("Calling cms::cuda::chooseDevice()"); throw ex; } @@ -22,6 +23,6 @@ namespace cms::cuda { // (and even then there is no load balancing). // // TODO: improve the "assignment" logic - return id % cudaService->numberOfDevices(); + return id % cuda->numberOfDevices(); } } // namespace cms::cuda diff --git a/HeterogeneousCore/CUDAServices/BuildFile.xml b/HeterogeneousCore/CUDAServices/BuildFile.xml index a48e1c639eaf3..5bb08cd9726fb 100644 --- a/HeterogeneousCore/CUDAServices/BuildFile.xml +++ b/HeterogeneousCore/CUDAServices/BuildFile.xml @@ -1,12 +1,4 @@ - - - - - - - - - - - - + + + + diff --git a/HeterogeneousCore/CUDAServices/interface/CUDAInterface.h b/HeterogeneousCore/CUDAServices/interface/CUDAInterface.h new file mode 100644 index 0000000000000..ef43d867ae908 --- /dev/null +++ b/HeterogeneousCore/CUDAServices/interface/CUDAInterface.h @@ -0,0 +1,19 @@ +#ifndef HeterogeneousCore_CUDAServices_interface_CUDAInterface +#define HeterogeneousCore_CUDAServices_interface_CUDAInterface + +#include + +class CUDAInterface { +public: + CUDAInterface() = default; + virtual ~CUDAInterface() = default; + + virtual bool enabled() const = 0; + + virtual int numberOfDevices() const = 0; + + // Returns the (major, minor) CUDA compute capability of the given device. + virtual std::pair computeCapability(int device) const = 0; +}; + +#endif // HeterogeneousCore_CUDAServices_interface_CUDAInterface diff --git a/HeterogeneousCore/CUDAServices/interface/CUDAService.h b/HeterogeneousCore/CUDAServices/interface/CUDAService.h deleted file mode 100644 index d0dc3f56b51c2..0000000000000 --- a/HeterogeneousCore/CUDAServices/interface/CUDAService.h +++ /dev/null @@ -1,42 +0,0 @@ -#ifndef HeterogeneousCore_CUDAServices_CUDAService_h -#define HeterogeneousCore_CUDAServices_CUDAService_h - -#include -#include - -#include "FWCore/Utilities/interface/StreamID.h" - -namespace edm { - class ParameterSet; - class ActivityRegistry; - class ConfigurationDescriptions; -} // namespace edm - -class CUDAService { -public: - CUDAService(edm::ParameterSet const& iConfig); - ~CUDAService(); - - static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); - - bool enabled() const { return enabled_; } - - int numberOfDevices() const { return numberOfDevices_; } - - // major, minor - std::pair computeCapability(int device) const { return computeCapabilities_.at(device); } - -private: - int numberOfDevices_ = 0; - std::vector> computeCapabilities_; - bool enabled_ = false; - bool verbose_ = false; -}; - -namespace edm { - namespace service { - inline bool isProcessWideService(CUDAService const*) { return true; } - } // namespace service -} // namespace edm - -#endif diff --git a/HeterogeneousCore/CUDAServices/plugins/BuildFile.xml b/HeterogeneousCore/CUDAServices/plugins/BuildFile.xml index 942a573f77515..4d33d5d73cb18 100644 --- a/HeterogeneousCore/CUDAServices/plugins/BuildFile.xml +++ b/HeterogeneousCore/CUDAServices/plugins/BuildFile.xml @@ -9,7 +9,8 @@ - + + diff --git a/HeterogeneousCore/CUDAServices/plugins/CUDAMonitoringService.cc b/HeterogeneousCore/CUDAServices/plugins/CUDAMonitoringService.cc index 6271b1cc0941b..f0a2a5e897a99 100644 --- a/HeterogeneousCore/CUDAServices/plugins/CUDAMonitoringService.cc +++ b/HeterogeneousCore/CUDAServices/plugins/CUDAMonitoringService.cc @@ -11,7 +11,7 @@ #include "FWCore/ServiceRegistry/interface/ModuleCallingContext.h" #include "FWCore/ServiceRegistry/interface/Service.h" #include "FWCore/ServiceRegistry/interface/ServiceMaker.h" -#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h" +#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h" #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" #include "HeterogeneousCore/CUDAUtilities/interface/deviceAllocatorStatus.h" @@ -37,10 +37,11 @@ class CUDAMonitoringService { CUDAMonitoringService::CUDAMonitoringService(edm::ParameterSet const& config, edm::ActivityRegistry& registry) { // make sure that CUDA is initialised, and that the CUDAService destructor is called after this service's destructor - edm::Service cudaService; - if (!cudaService->enabled()) + edm::Service cuda; + if (not cuda or not cuda->enabled()) return; - numberOfDevices_ = cudaService->numberOfDevices(); + + numberOfDevices_ = cuda->numberOfDevices(); if (config.getUntrackedParameter("memoryConstruction")) { registry.watchPostModuleConstruction(this, &CUDAMonitoringService::postModuleConstruction); diff --git a/HeterogeneousCore/CUDAServices/src/CUDAService.cc b/HeterogeneousCore/CUDAServices/plugins/CUDAService.cc similarity index 90% rename from HeterogeneousCore/CUDAServices/src/CUDAService.cc rename to HeterogeneousCore/CUDAServices/plugins/CUDAService.cc index baa5c340b71c9..c0dc04ba008b9 100644 --- a/HeterogeneousCore/CUDAServices/src/CUDAService.cc +++ b/HeterogeneousCore/CUDAServices/plugins/CUDAService.cc @@ -2,7 +2,9 @@ #include #include #include +#include #include +#include #include #include @@ -16,7 +18,7 @@ #include "FWCore/ServiceRegistry/interface/Service.h" #include "FWCore/Utilities/interface/ResourceInformation.h" #include "FWCore/Utilities/interface/ReusableObjectHolder.h" -#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h" +#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h" #include "HeterogeneousCore/CUDAUtilities/interface/EventCache.h" #include "HeterogeneousCore/CUDAUtilities/interface/StreamCache.h" #include "HeterogeneousCore/CUDAUtilities/interface/cachingAllocators.h" @@ -26,6 +28,34 @@ #include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" #include "HeterogeneousCore/CUDAUtilities/interface/nvmlCheck.h" +class CUDAService : public CUDAInterface { +public: + CUDAService(edm::ParameterSet const& config); + ~CUDAService() override; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + + bool enabled() const final { return enabled_; } + + int numberOfDevices() const final { return numberOfDevices_; } + + // Return the (major, minor) CUDA compute capability of the given device. + std::pair computeCapability(int device) const final { + int size = computeCapabilities_.size(); + if (device < 0 or device >= size) { + throw std::out_of_range("Invalid device index" + std::to_string(device) + ": the valid range is from 0 to " + + std::to_string(size - 1)); + } + return computeCapabilities_[device]; + } + +private: + int numberOfDevices_ = 0; + std::vector> computeCapabilities_; + bool enabled_ = false; + bool verbose_ = false; +}; + void setCudaLimit(cudaLimit limit, const char* name, size_t request) { // read the current device int device; @@ -91,6 +121,14 @@ constexpr unsigned int getCudaCoresPerSM(unsigned int major, unsigned int minor) case 86: // SM 8.6: GA10x class return 128; + // Ada Lovelace architectures + case 89: // SM 8.9: AD10x class + return 128; + + // Hopper architecture + case 90: // SM 9.0: GH100 class + return 128; + // unknown architecture, return a default value default: return 64; @@ -109,7 +147,7 @@ namespace { auto streamPtr = cms::cuda::getStreamCache().get(); - std::vector > buffers; + std::vector> buffers; buffers.reserve(bufferSizes.size()); for (auto size : bufferSizes) { buffers.push_back(allocate(size, streamPtr.get())); @@ -137,8 +175,7 @@ namespace { /// Constructor CUDAService::CUDAService(edm::ParameterSet const& config) : verbose_(config.getUntrackedParameter("verbose")) { - bool configEnabled = config.getUntrackedParameter("enabled"); - if (not configEnabled) { + if (not config.getUntrackedParameter("enabled")) { edm::LogInfo("CUDAService") << "CUDAService disabled by configuration"; return; } @@ -386,8 +423,8 @@ CUDAService::CUDAService(edm::ParameterSet const& config) : verbose_(config.getU // Preallocate buffers if asked to auto const& allocator = config.getUntrackedParameter("allocator"); - devicePreallocate(numberOfDevices_, allocator.getUntrackedParameter >("devicePreallocate")); - hostPreallocate(allocator.getUntrackedParameter >("hostPreallocate")); + devicePreallocate(numberOfDevices_, allocator.getUntrackedParameter>("devicePreallocate")); + hostPreallocate(allocator.getUntrackedParameter>("hostPreallocate")); } CUDAService::~CUDAService() { @@ -431,11 +468,21 @@ void CUDAService::fillDescriptions(edm::ConfigurationDescriptions& descriptions) "the default value."); edm::ParameterSetDescription allocator; - allocator.addUntracked >("devicePreallocate", std::vector{}) + allocator.addUntracked>("devicePreallocate", std::vector{}) ->setComment("Preallocates buffers of given bytes on all devices"); - allocator.addUntracked >("hostPreallocate", std::vector{}) + allocator.addUntracked>("hostPreallocate", std::vector{}) ->setComment("Preallocates buffers of given bytes on the host"); desc.addUntracked("allocator", allocator); descriptions.add("CUDAService", desc); } + +namespace edm { + namespace service { + inline bool isProcessWideService(CUDAService const*) { return true; } + } // namespace service +} // namespace edm + +#include "FWCore/ServiceRegistry/interface/ServiceMaker.h" +using CUDAServiceMaker = edm::serviceregistry::ParameterSetMaker; +DEFINE_FWK_SERVICE_MAKER(CUDAService, CUDAServiceMaker); diff --git a/HeterogeneousCore/CUDAServices/plugins/NVProfilerService.cc b/HeterogeneousCore/CUDAServices/plugins/NVProfilerService.cc index 5cbf1819618b4..5e7db50a953f6 100644 --- a/HeterogeneousCore/CUDAServices/plugins/NVProfilerService.cc +++ b/HeterogeneousCore/CUDAServices/plugins/NVProfilerService.cc @@ -1,8 +1,3 @@ -// -*- C++ -*- -// -// Package: HeterogeneousCore/CUDAServices -// Class : NVProfilerService - #include #include #include @@ -40,7 +35,7 @@ #include "FWCore/Utilities/interface/Exception.h" #include "FWCore/Utilities/interface/ProductKindOfType.h" #include "FWCore/Utilities/interface/TimeOfDay.h" -#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h" +#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h" #include "HLTrigger/Timer/interface/ProcessCallGraph.h" using namespace std::string_literals; @@ -310,8 +305,10 @@ NVProfilerService::NVProfilerService(edm::ParameterSet const& config, edm::Activ : highlightModules_(config.getUntrackedParameter>("highlightModules")), showModulePrefetching_(config.getUntrackedParameter("showModulePrefetching")), skipFirstEvent_(config.getUntrackedParameter("skipFirstEvent")) { - // make sure that CUDA is initialised, and that the CUDAService destructor is called after this service's destructor - edm::Service cudaService; + // make sure that CUDA is initialised, and that the CUDAInterface destructor is called after this service's destructor + edm::Service cuda; + if (not cuda or not cuda->enabled()) + return; std::sort(highlightModules_.begin(), highlightModules_.end()); diff --git a/HeterogeneousCore/CUDAServices/plugins/plugins.cc b/HeterogeneousCore/CUDAServices/plugins/plugins.cc deleted file mode 100644 index 169bfd19195f6..0000000000000 --- a/HeterogeneousCore/CUDAServices/plugins/plugins.cc +++ /dev/null @@ -1,4 +0,0 @@ -#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h" -#include "FWCore/ServiceRegistry/interface/ServiceMaker.h" - -DEFINE_FWK_SERVICE_MAKER(CUDAService, edm::serviceregistry::ParameterSetMaker); diff --git a/HeterogeneousCore/CUDAServices/src/numberOfDevices.cc b/HeterogeneousCore/CUDAServices/src/numberOfDevices.cc index 4bc852f3a60c4..97e0e2b10843d 100644 --- a/HeterogeneousCore/CUDAServices/src/numberOfDevices.cc +++ b/HeterogeneousCore/CUDAServices/src/numberOfDevices.cc @@ -1,10 +1,10 @@ #include "HeterogeneousCore/CUDAServices/interface/numberOfDevices.h" -#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h" +#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h" #include "FWCore/ServiceRegistry/interface/Service.h" namespace cms::cuda { int numberOfDevices() { - edm::Service cs; - return cs->enabled() ? cs->numberOfDevices() : 0; + edm::Service cuda; + return (cuda and cuda->enabled()) ? cuda->numberOfDevices() : 0; } } // namespace cms::cuda diff --git a/HeterogeneousCore/CUDAServices/test/BuildFile.xml b/HeterogeneousCore/CUDAServices/test/BuildFile.xml index 99219eef7e6fd..707f469ff941d 100644 --- a/HeterogeneousCore/CUDAServices/test/BuildFile.xml +++ b/HeterogeneousCore/CUDAServices/test/BuildFile.xml @@ -1,12 +1,14 @@ + + + + + + + + - - + - - - - - diff --git a/HeterogeneousCore/CUDAServices/test/testCUDAService.cpp b/HeterogeneousCore/CUDAServices/test/testCUDAService.cpp index 0a2daf4b71bb6..5cda281e4fc0f 100644 --- a/HeterogeneousCore/CUDAServices/test/testCUDAService.cpp +++ b/HeterogeneousCore/CUDAServices/test/testCUDAService.cpp @@ -6,26 +6,49 @@ #include -#include "catch.hpp" +#include + +#include -#include "FWCore/ParameterSet/interface/ParameterSet.h" #include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" #include "FWCore/ParameterSetReader/interface/ParameterSetReader.h" -#include "FWCore/PluginManager/interface/PluginManager.h" -#include "FWCore/PluginManager/interface/standard.h" #include "FWCore/ServiceRegistry/interface/Service.h" #include "FWCore/ServiceRegistry/interface/ServiceRegistry.h" #include "FWCore/ServiceRegistry/interface/ServiceToken.h" #include "FWCore/Utilities/interface/Exception.h" #include "FWCore/Utilities/interface/ResourceInformation.h" -#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h" +#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h" namespace { - CUDAService makeCUDAService(edm::ParameterSet ps) { - auto desc = edm::ConfigurationDescriptions("Service", "CUDAService"); - CUDAService::fillDescriptions(desc); - desc.validate(ps, "CUDAService"); - return CUDAService(ps); + std::string makeProcess(std::string const& name) { + return fmt::format(R"_( +import FWCore.ParameterSet.Config as cms +process = cms.Process('{}') +)_", + name); + } + + void addResourceInformationService(std::string& config) { + config += R"_( +process.add_(cms.Service('ResourceInformationService')) + )_"; + } + + void addCUDAService(std::string& config, bool enabled = true) { + config += fmt::format(R"_( +process.add_(cms.Service('CUDAService', + enabled = cms.untracked.bool({}), + verbose = cms.untracked.bool(True) +)) + )_", + enabled ? "True" : "False"); + } + + edm::ServiceToken getServiceToken(std::string const& config) { + std::unique_ptr params; + edm::makeParameterSets(config, params); + return edm::ServiceToken(edm::ServiceRegistry::createServicesFromConfig(std::move(params))); } } // namespace @@ -40,31 +63,27 @@ TEST_CASE("Tests of CUDAService", "[CUDAService]") { << ret << ") " << cudaGetErrorString(ret) << ". Running only tests not requiring devices."); } - // Make Service system available as CUDAService depends on ResourceInformationService - std::vector psets; - edm::ServiceToken serviceToken = edm::ServiceRegistry::createSet(psets); + std::string config = makeProcess("Test"); + addCUDAService(config); + auto serviceToken = getServiceToken(config); edm::ServiceRegistry::Operate operate(serviceToken); - SECTION("CUDAService enabled") { - edm::ParameterSet ps; - ps.addUntrackedParameter("enabled", true); - SECTION("Enabled only if there are CUDA capable GPUs") { - auto cs = makeCUDAService(ps); - if (deviceCount <= 0) { - REQUIRE(cs.enabled() == false); - WARN("CUDAService is disabled as there are no CUDA GPU devices"); - } else { - REQUIRE(cs.enabled() == true); - INFO("CUDAService is enabled"); - } - } - + SECTION("Enable the CUDAService only if there are CUDA capable GPUs") { + edm::Service cuda; if (deviceCount <= 0) { + REQUIRE((not cuda or not cuda->enabled())); + WARN("CUDAService is not present, or disabled because there are no CUDA GPU devices"); return; + } else { + REQUIRE(cuda); + REQUIRE(cuda->enabled()); + INFO("CUDAService is enabled"); } + } - auto cs = makeCUDAService(ps); + SECTION("CUDAService enabled") { int driverVersion = 0, runtimeVersion = 0; + edm::Service cuda; ret = cudaDriverGetVersion(&driverVersion); if (ret != cudaSuccess) { FAIL("Unable to query the CUDA driver version from the CUDA runtime API: (" << ret << ") " @@ -82,8 +101,8 @@ TEST_CASE("Tests of CUDAService", "[CUDAService]") { // Test that the number of devices found by the service // is the same as detected by the CUDA runtime API - REQUIRE(cs.numberOfDevices() == deviceCount); - WARN("Detected " << cs.numberOfDevices() << " CUDA Capable device(s)"); + REQUIRE(cuda->numberOfDevices() == deviceCount); + WARN("Detected " << cuda->numberOfDevices() << " CUDA Capable device(s)"); // Test that the compute capabilities of each device // are the same as detected by the CUDA runtime API @@ -95,28 +114,25 @@ TEST_CASE("Tests of CUDAService", "[CUDAService]") { << cudaGetErrorString(ret)); } - REQUIRE(deviceProp.major == cs.computeCapability(i).first); - REQUIRE(deviceProp.minor == cs.computeCapability(i).second); + REQUIRE(deviceProp.major == cuda->computeCapability(i).first); + REQUIRE(deviceProp.minor == cuda->computeCapability(i).second); INFO("Device " << i << ": " << deviceProp.name << "\n CUDA Capability Major/Minor version number: " << deviceProp.major << "." << deviceProp.minor); } } SECTION("With ResourceInformationService available") { - edmplugin::PluginManager::configure(edmplugin::standard::config()); - - std::string const config = R"_(import FWCore.ParameterSet.Config as cms -process = cms.Process('Test') -process.add_(cms.Service('ResourceInformationService')) -)_"; - std::unique_ptr params; - edm::makeParameterSets(config, params); - edm::ServiceToken tempToken(edm::ServiceRegistry::createServicesFromConfig(std::move(params))); - edm::ServiceRegistry::Operate operate2(tempToken); - - auto cs = makeCUDAService(edm::ParameterSet{}); - REQUIRE(cs.enabled()); + std::string config = makeProcess("Test"); + addResourceInformationService(config); + addCUDAService(config); + auto serviceToken = getServiceToken(config); + edm::ServiceRegistry::Operate operate(serviceToken); + + edm::Service cuda; + REQUIRE(cuda); + REQUIRE(cuda->enabled()); edm::Service ri; + REQUIRE(ri); REQUIRE(ri->gpuModels().size() > 0); REQUIRE(ri->nvidiaDriverVersion().size() > 0); REQUIRE(ri->cudaDriverVersion() == driverVersion); @@ -124,11 +140,14 @@ process.add_(cms.Service('ResourceInformationService')) } } - SECTION("Force to be disabled") { - edm::ParameterSet ps; - ps.addUntrackedParameter("enabled", false); - auto cs = makeCUDAService(ps); - REQUIRE(cs.enabled() == false); - REQUIRE(cs.numberOfDevices() == 0); + SECTION("CUDAService disabled") { + std::string config = makeProcess("Test"); + addCUDAService(config, false); + auto serviceToken = getServiceToken(config); + edm::ServiceRegistry::Operate operate(serviceToken); + + edm::Service cuda; + REQUIRE(cuda->enabled() == false); + REQUIRE(cuda->numberOfDevices() == 0); } } diff --git a/HeterogeneousCore/CUDAServices/test/test_main.cpp b/HeterogeneousCore/CUDAServices/test/test_main.cpp index 0c7c351f437f5..417b2599ee8c5 100644 --- a/HeterogeneousCore/CUDAServices/test/test_main.cpp +++ b/HeterogeneousCore/CUDAServices/test/test_main.cpp @@ -1,2 +1,16 @@ #define CATCH_CONFIG_MAIN -#include "catch.hpp" +#include + +#include "FWCore/PluginManager/interface/PluginManager.h" +#include "FWCore/PluginManager/interface/standard.h" + +class ServiceRegistryListener : public Catch::TestEventListenerBase { +public: + using Catch::TestEventListenerBase::TestEventListenerBase; // inherit constructor + + void testRunStarting(Catch::TestRunInfo const& testRunInfo) override { + edmplugin::PluginManager::configure(edmplugin::standard::config()); + } +}; + +CATCH_REGISTER_LISTENER(ServiceRegistryListener); diff --git a/HeterogeneousCore/CUDATest/plugins/TestCUDAAnalyzerGPU.cc b/HeterogeneousCore/CUDATest/plugins/TestCUDAAnalyzerGPU.cc index 2778ed02f3ac6..09d85f6c1c47d 100644 --- a/HeterogeneousCore/CUDATest/plugins/TestCUDAAnalyzerGPU.cc +++ b/HeterogeneousCore/CUDATest/plugins/TestCUDAAnalyzerGPU.cc @@ -8,7 +8,7 @@ #include "CUDADataFormats/Common/interface/Product.h" #include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" -#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h" +#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h" #include "HeterogeneousCore/CUDATest/interface/Thing.h" #include "HeterogeneousCore/CUDAUtilities/interface/StreamCache.h" @@ -38,8 +38,8 @@ TestCUDAAnalyzerGPU::TestCUDAAnalyzerGPU(edm::ParameterSet const& iConfig) srcToken_(consumes>(iConfig.getParameter("src"))), minValue_(iConfig.getParameter("minValue")), maxValue_(iConfig.getParameter("maxValue")) { - edm::Service cs; - if (cs->enabled()) { + edm::Service cuda; + if (cuda and cuda->enabled()) { auto streamPtr = cms::cuda::getStreamCache().get(); gpuAlgo_ = std::make_unique(streamPtr.get()); } diff --git a/HeterogeneousCore/CUDATest/plugins/TestCUDAProducerGPUEW.cc b/HeterogeneousCore/CUDATest/plugins/TestCUDAProducerGPUEW.cc index 9b6fe85636026..b8b3f9058d496 100644 --- a/HeterogeneousCore/CUDATest/plugins/TestCUDAProducerGPUEW.cc +++ b/HeterogeneousCore/CUDATest/plugins/TestCUDAProducerGPUEW.cc @@ -10,7 +10,7 @@ #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" #include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" #include "HeterogeneousCore/CUDACore/interface/ContextState.h" -#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h" +#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h" #include "HeterogeneousCore/CUDATest/interface/Thing.h" #include "HeterogeneousCore/CUDAUtilities/interface/host_noncached_unique_ptr.h" @@ -42,8 +42,8 @@ TestCUDAProducerGPUEW::TestCUDAProducerGPUEW(edm::ParameterSet const& iConfig) : label_{iConfig.getParameter("@module_label")}, srcToken_{consumes>(iConfig.getParameter("src"))}, dstToken_{produces>()} { - edm::Service cs; - if (cs->enabled()) { + edm::Service cuda; + if (cuda and cuda->enabled()) { hostData_ = cms::cuda::make_host_noncached_unique(); } } diff --git a/HeterogeneousCore/CUDATest/plugins/TestCUDAProducerGPUEWTask.cc b/HeterogeneousCore/CUDATest/plugins/TestCUDAProducerGPUEWTask.cc index d1e4f94a30d96..80135880bd324 100644 --- a/HeterogeneousCore/CUDATest/plugins/TestCUDAProducerGPUEWTask.cc +++ b/HeterogeneousCore/CUDATest/plugins/TestCUDAProducerGPUEWTask.cc @@ -13,7 +13,7 @@ #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" #include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" #include "HeterogeneousCore/CUDACore/interface/ContextState.h" -#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h" +#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h" #include "HeterogeneousCore/CUDATest/interface/Thing.h" #include "HeterogeneousCore/CUDAUtilities/interface/host_noncached_unique_ptr.h" @@ -49,8 +49,8 @@ TestCUDAProducerGPUEWTask::TestCUDAProducerGPUEWTask(edm::ParameterSet const& iC : label_{iConfig.getParameter("@module_label")}, srcToken_{consumes>(iConfig.getParameter("src"))}, dstToken_{produces>()} { - edm::Service cs; - if (cs->enabled()) { + edm::Service cuda; + if (cuda and cuda->enabled()) { hostData_ = cms::cuda::make_host_noncached_unique(); } } diff --git a/HeterogeneousCore/ROCmCore/python/ProcessAcceleratorROCm.py b/HeterogeneousCore/ROCmCore/python/ProcessAcceleratorROCm.py index c80fc70c6d2b7..81121f3610e06 100644 --- a/HeterogeneousCore/ROCmCore/python/ProcessAcceleratorROCm.py +++ b/HeterogeneousCore/ROCmCore/python/ProcessAcceleratorROCm.py @@ -2,29 +2,44 @@ import os +from HeterogeneousCore.Common.PlatformStatus import PlatformStatus + class ProcessAcceleratorROCm(cms.ProcessAccelerator): def __init__(self): - super(ProcessAcceleratorROCm,self).__init__() + super(ProcessAcceleratorROCm, self).__init__() self._label = "gpu-amd" + def labels(self): - return [self._label] + return [ self._label ] + def enabledLabels(self): - enabled = (os.system("rocmIsEnabled") == 0) - if enabled: - return self.labels() - else: - return [] - def apply(self, process, accelerators): - if not hasattr(process, "ROCmService"): - from HeterogeneousCore.ROCmServices.ROCmService_cfi import ROCmService - process.add_(ROCmService) + # Check if ROCm is available, and if the system has at least one usable device. + # These should be checked on each worker node, because it depends both + # on the architecture and on the actual hardware present in the machine. + status = PlatformStatus(os.waitstatus_to_exitcode(os.system("rocmIsEnabled"))) + return self.labels() if status == PlatformStatus.Success else [] - if not hasattr(process.MessageLogger, "ROCmService"): - process.MessageLogger.ROCmService = cms.untracked.PSet() + def apply(self, process, accelerators): if self._label in accelerators: - process.ROCmService.enabled = True + # Ensure that the ROCmService is loaded + if not hasattr(process, "ROCmService"): + from HeterogeneousCore.ROCmServices.ROCmService_cfi import ROCmService + process.add_(ROCmService) + + # Propagate the ROCmService messages through the MessageLogger + if not hasattr(process.MessageLogger, "ROCmService"): + process.MessageLogger.ROCmService = cms.untracked.PSet() + else: - process.ROCmService.enabled = False - + # Make sure the ROCmService is not loaded + if hasattr(process, "ROCmService"): + del process.ROCmService + + # Drop the ROCmService messages from the MessageLogger + if hasattr(process.MessageLogger, "ROCmService"): + del process.MessageLogger.ROCmService + + +# Ensure this module is kept in the configuration when dumping it cms.specialImportRegistry.registerSpecialImportForType(ProcessAcceleratorROCm, "from HeterogeneousCore.ROCmCore.ProcessAcceleratorROCm import ProcessAcceleratorROCm") diff --git a/HeterogeneousCore/ROCmServices/BuildFile.xml b/HeterogeneousCore/ROCmServices/BuildFile.xml index 0ff47a94f4ebc..061859fab7928 100644 --- a/HeterogeneousCore/ROCmServices/BuildFile.xml +++ b/HeterogeneousCore/ROCmServices/BuildFile.xml @@ -1,11 +1,5 @@ - - - - - - - - - - - + + + + + diff --git a/HeterogeneousCore/ROCmServices/interface/ROCmInterface.h b/HeterogeneousCore/ROCmServices/interface/ROCmInterface.h new file mode 100644 index 0000000000000..b7e20b1dd081c --- /dev/null +++ b/HeterogeneousCore/ROCmServices/interface/ROCmInterface.h @@ -0,0 +1,19 @@ +#ifndef HeterogeneousCore_ROCmServices_interface_ROCmInterface_h +#define HeterogeneousCore_ROCmServices_interface_ROCmInterface_h + +#include + +class ROCmInterface { +public: + ROCmInterface() = default; + virtual ~ROCmInterface() = default; + + virtual bool enabled() const = 0; + + virtual int numberOfDevices() const = 0; + + // Returns the (major, minor) compute capability of the given device. + virtual std::pair computeCapability(int device) const = 0; +}; + +#endif // HeterogeneousCore_ROCmServices_interface_ROCmInterface_h diff --git a/HeterogeneousCore/ROCmServices/interface/ROCmService.h b/HeterogeneousCore/ROCmServices/interface/ROCmService.h deleted file mode 100644 index 851f7cf9f63e0..0000000000000 --- a/HeterogeneousCore/ROCmServices/interface/ROCmService.h +++ /dev/null @@ -1,42 +0,0 @@ -#ifndef HeterogeneousCore_ROCmServices_interface_ROCmService_h -#define HeterogeneousCore_ROCmServices_interface_ROCmService_h - -#include -#include - -#include "FWCore/Utilities/interface/StreamID.h" - -namespace edm { - class ParameterSet; - class ActivityRegistry; - class ConfigurationDescriptions; -} // namespace edm - -class ROCmService { -public: - ROCmService(edm::ParameterSet const& config); - ~ROCmService(); - - static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); - - bool enabled() const { return enabled_; } - - int numberOfDevices() const { return numberOfDevices_; } - - // major, minor - std::pair computeCapability(int device) const { return computeCapabilities_.at(device); } - -private: - int numberOfDevices_ = 0; - std::vector> computeCapabilities_; - bool enabled_ = false; - bool verbose_ = false; -}; - -namespace edm { - namespace service { - inline bool isProcessWideService(ROCmService const*) { return true; } - } // namespace service -} // namespace edm - -#endif // HeterogeneousCore_ROCmServices_interface_ROCmService_h diff --git a/HeterogeneousCore/ROCmServices/plugins/BuildFile.xml b/HeterogeneousCore/ROCmServices/plugins/BuildFile.xml index 42f9e3024fc2f..11220ef4c4fce 100644 --- a/HeterogeneousCore/ROCmServices/plugins/BuildFile.xml +++ b/HeterogeneousCore/ROCmServices/plugins/BuildFile.xml @@ -4,9 +4,10 @@ + - + diff --git a/HeterogeneousCore/ROCmServices/plugins/ROCmMonitoringService.cc b/HeterogeneousCore/ROCmServices/plugins/ROCmMonitoringService.cc index 3bd0f2448f1b4..ff6588292c06f 100644 --- a/HeterogeneousCore/ROCmServices/plugins/ROCmMonitoringService.cc +++ b/HeterogeneousCore/ROCmServices/plugins/ROCmMonitoringService.cc @@ -11,7 +11,7 @@ #include "FWCore/ServiceRegistry/interface/ModuleCallingContext.h" #include "FWCore/ServiceRegistry/interface/Service.h" #include "FWCore/ServiceRegistry/interface/ServiceMaker.h" -#include "HeterogeneousCore/ROCmServices/interface/ROCmService.h" +#include "HeterogeneousCore/ROCmServices/interface/ROCmInterface.h" #include "HeterogeneousCore/ROCmUtilities/interface/hipCheck.h" namespace edm { @@ -36,10 +36,11 @@ class ROCmMonitoringService { ROCmMonitoringService::ROCmMonitoringService(edm::ParameterSet const& config, edm::ActivityRegistry& registry) { // make sure that ROCm is initialised, and that the ROCmService destructor is called after this service's destructor - edm::Service rocmService; - if (!rocmService->enabled()) + edm::Service service; + if (not service or not service->enabled()) return; - numberOfDevices_ = rocmService->numberOfDevices(); + + numberOfDevices_ = service->numberOfDevices(); if (config.getUntrackedParameter("memoryConstruction")) { registry.watchPostModuleConstruction(this, &ROCmMonitoringService::postModuleConstruction); diff --git a/HeterogeneousCore/ROCmServices/src/ROCmService.cc b/HeterogeneousCore/ROCmServices/plugins/ROCmService.cc similarity index 91% rename from HeterogeneousCore/ROCmServices/src/ROCmService.cc rename to HeterogeneousCore/ROCmServices/plugins/ROCmService.cc index aa737289d9b0e..d8e598e8cad15 100644 --- a/HeterogeneousCore/ROCmServices/src/ROCmService.cc +++ b/HeterogeneousCore/ROCmServices/plugins/ROCmService.cc @@ -16,12 +16,40 @@ #include "FWCore/ParameterSet/interface/ParameterSetDescription.h" #include "FWCore/ServiceRegistry/interface/Service.h" #include "FWCore/Utilities/interface/ResourceInformation.h" -#include "HeterogeneousCore/ROCmServices/interface/ROCmService.h" +#include "HeterogeneousCore/ROCmServices/interface/ROCmInterface.h" #include "HeterogeneousCore/ROCmUtilities/interface/hipCheck.h" /* #include "HeterogeneousCore/ROCmUtilities/interface/nvmlCheck.h" */ +class ROCmService : public ROCmInterface { +public: + ROCmService(edm::ParameterSet const& config); + ~ROCmService() override; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + + bool enabled() const final { return enabled_; } + + int numberOfDevices() const final { return numberOfDevices_; } + + // Return the (major, minor) compute capability of the given device. + std::pair computeCapability(int device) const final { + int size = computeCapabilities_.size(); + if (device < 0 or device >= size) { + throw std::out_of_range("Invalid device index" + std::to_string(device) + ": the valid range is from 0 to " + + std::to_string(size - 1)); + } + return computeCapabilities_[device]; + } + +private: + int numberOfDevices_ = 0; + std::vector> computeCapabilities_; + bool enabled_ = false; + bool verbose_ = false; +}; + void setHipLimit(hipLimit_t limit, const char* name, size_t request) { // read the current device int device; @@ -50,8 +78,7 @@ std::string decodeVersion(int version) { /// Constructor ROCmService::ROCmService(edm::ParameterSet const& config) : verbose_(config.getUntrackedParameter("verbose")) { - bool configEnabled = config.getUntrackedParameter("enabled"); - if (not configEnabled) { + if (not config.getUntrackedParameter("enabled")) { edm::LogInfo("ROCmService") << "ROCmService disabled by configuration"; return; } @@ -357,3 +384,13 @@ void ROCmService::fillDescriptions(edm::ConfigurationDescriptions& descriptions) descriptions.add("ROCmService", desc); } + +namespace edm { + namespace service { + inline bool isProcessWideService(ROCmService const*) { return true; } + } // namespace service +} // namespace edm + +#include "FWCore/ServiceRegistry/interface/ServiceMaker.h" +using ROCmServiceMaker = edm::serviceregistry::ParameterSetMaker; +DEFINE_FWK_SERVICE_MAKER(ROCmService, ROCmServiceMaker); diff --git a/HeterogeneousCore/ROCmServices/plugins/plugins.cc b/HeterogeneousCore/ROCmServices/plugins/plugins.cc deleted file mode 100644 index a418eeced333f..0000000000000 --- a/HeterogeneousCore/ROCmServices/plugins/plugins.cc +++ /dev/null @@ -1,4 +0,0 @@ -#include "HeterogeneousCore/ROCmServices/interface/ROCmService.h" -#include "FWCore/ServiceRegistry/interface/ServiceMaker.h" - -DEFINE_FWK_SERVICE_MAKER(ROCmService, edm::serviceregistry::ParameterSetMaker); diff --git a/HeterogeneousCore/ROCmServices/test/BuildFile.xml b/HeterogeneousCore/ROCmServices/test/BuildFile.xml index 7fbe8d1931848..65d9bf58ca565 100644 --- a/HeterogeneousCore/ROCmServices/test/BuildFile.xml +++ b/HeterogeneousCore/ROCmServices/test/BuildFile.xml @@ -1,12 +1,14 @@ + + + + + + + + - - + - - - - - diff --git a/HeterogeneousCore/ROCmServices/test/testROCmService.cpp b/HeterogeneousCore/ROCmServices/test/testROCmService.cpp index add934e743d19..343b8150f2f6d 100644 --- a/HeterogeneousCore/ROCmServices/test/testROCmService.cpp +++ b/HeterogeneousCore/ROCmServices/test/testROCmService.cpp @@ -6,28 +6,49 @@ #include -#define CATCH_CONFIG_MAIN -#include "catch.hpp" +#include + +#include #include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" #include "FWCore/ParameterSet/interface/ParameterSet.h" #include "FWCore/ParameterSetReader/interface/ParameterSetReader.h" -#include "FWCore/PluginManager/interface/PluginManager.h" -#include "FWCore/PluginManager/interface/standard.h" #include "FWCore/ServiceRegistry/interface/Service.h" #include "FWCore/ServiceRegistry/interface/ServiceRegistry.h" #include "FWCore/ServiceRegistry/interface/ServiceToken.h" #include "FWCore/Utilities/interface/Exception.h" #include "FWCore/Utilities/interface/ResourceInformation.h" -#include "HeterogeneousCore/ROCmServices/interface/ROCmService.h" -#include "HeterogeneousCore/ROCmUtilities/interface/hipCheck.h" +#include "HeterogeneousCore/ROCmServices/interface/ROCmInterface.h" namespace { - ROCmService makeROCmService(edm::ParameterSet ps) { - auto desc = edm::ConfigurationDescriptions("Service", "ROCmService"); - ROCmService::fillDescriptions(desc); - desc.validate(ps, "ROCmService"); - return ROCmService(ps); + std::string makeProcess(std::string const& name) { + return fmt::format(R"_( +import FWCore.ParameterSet.Config as cms +process = cms.Process('{}') +)_", + name); + } + + void addResourceInformationService(std::string& config) { + config += R"_( +process.add_(cms.Service('ResourceInformationService')) + )_"; + } + + void addROCmService(std::string& config, bool enabled = true) { + config += fmt::format(R"_( +process.add_(cms.Service('ROCmService', + enabled = cms.untracked.bool({}), + verbose = cms.untracked.bool(True) +)) + )_", + enabled ? "True" : "False"); + } + + edm::ServiceToken getServiceToken(std::string const& config) { + std::unique_ptr params; + edm::makeParameterSets(config, params); + return edm::ServiceToken(edm::ServiceRegistry::createServicesFromConfig(std::move(params))); } } // namespace @@ -42,31 +63,27 @@ TEST_CASE("Tests of ROCmService", "[ROCmService]") { << ret << ") " << hipGetErrorString(ret) << ". Running only tests not requiring devices."); } - // Make Service system available as ROCmService depends on ResourceInformationService - std::vector psets; - edm::ServiceToken serviceToken = edm::ServiceRegistry::createSet(psets); + std::string config = makeProcess("Test"); + addROCmService(config); + auto serviceToken = getServiceToken(config); edm::ServiceRegistry::Operate operate(serviceToken); - SECTION("ROCmService enabled") { - edm::ParameterSet ps; - ps.addUntrackedParameter("enabled", true); - SECTION("Enabled only if there are ROCm capable GPUs") { - auto cs = makeROCmService(ps); - if (deviceCount <= 0) { - REQUIRE(cs.enabled() == false); - WARN("ROCmService is disabled as there are no ROCm GPU devices"); - } else { - REQUIRE(cs.enabled() == true); - INFO("ROCmService is enabled"); - } - } - + SECTION("Enable the ROCmService only if there are ROCm capable GPUs") { + edm::Service service; if (deviceCount <= 0) { + REQUIRE((not service or not service->enabled())); + WARN("ROCmService is not present, or disabled because there are no ROCm GPU devices"); return; + } else { + REQUIRE(service); + REQUIRE(service->enabled()); + INFO("ROCmService is enabled"); } + } - auto cs = makeROCmService(ps); + SECTION("ROCmService enabled") { int driverVersion = 0, runtimeVersion = 0; + edm::Service service; ret = hipDriverGetVersion(&driverVersion); if (ret != hipSuccess) { FAIL("Unable to query the ROCm driver version from the ROCm runtime API: (" << ret << ") " @@ -84,8 +101,8 @@ TEST_CASE("Tests of ROCmService", "[ROCmService]") { // Test that the number of devices found by the service // is the same as detected by the ROCm runtime API - REQUIRE(cs.numberOfDevices() == deviceCount); - WARN("Detected " << cs.numberOfDevices() << " ROCm Capable device(s)"); + REQUIRE(service->numberOfDevices() == deviceCount); + WARN("Detected " << service->numberOfDevices() << " ROCm Capable device(s)"); // Test that the compute capabilities of each device // are the same as detected by the ROCm runtime API @@ -97,42 +114,41 @@ TEST_CASE("Tests of ROCmService", "[ROCmService]") { << hipGetErrorString(ret)); } - REQUIRE(deviceProp.major == cs.computeCapability(i).first); - REQUIRE(deviceProp.minor == cs.computeCapability(i).second); + REQUIRE(deviceProp.major == service->computeCapability(i).first); + REQUIRE(deviceProp.minor == service->computeCapability(i).second); INFO("Device " << i << ": " << deviceProp.name << "\n ROCm Capability Major/Minor version number: " << deviceProp.major << "." << deviceProp.minor); } } SECTION("With ResourceInformationService available") { - edmplugin::PluginManager::configure(edmplugin::standard::config()); - - std::string const config = R"_(import FWCore.ParameterSet.Config as cms -process = cms.Process('Test') -process.add_(cms.Service('ResourceInformationService')) -)_"; - std::unique_ptr params; - edm::makeParameterSets(config, params); - edm::ServiceToken tempToken(edm::ServiceRegistry::createServicesFromConfig(std::move(params))); - edm::ServiceRegistry::Operate operate2(tempToken); - - auto cs = makeROCmService(edm::ParameterSet{}); - REQUIRE(cs.enabled()); + std::string config = makeProcess("Test"); + addResourceInformationService(config); + addROCmService(config); + auto serviceToken = getServiceToken(config); + edm::ServiceRegistry::Operate operate(serviceToken); + + edm::Service service; + REQUIRE(service); + REQUIRE(service->enabled()); edm::Service ri; REQUIRE(ri->gpuModels().size() > 0); /* - REQUIRE(ri->nvidiaDriverVersion().size() > 0); - REQUIRE(ri->cudaDriverVersion() == driverVersion); - REQUIRE(ri->cudaRuntimeVersion() == runtimeVersion); + REQUIRE(ri->amdDriverVersion().size() > 0); + REQUIRE(ri->rocmDriverVersion() == driverVersion); + REQUIRE(ri->rocmRuntimeVersion() == runtimeVersion); */ } } SECTION("Force to be disabled") { - edm::ParameterSet ps; - ps.addUntrackedParameter("enabled", false); - auto cs = makeROCmService(ps); - REQUIRE(cs.enabled() == false); - REQUIRE(cs.numberOfDevices() == 0); + std::string config = makeProcess("Test"); + addROCmService(config, false); + auto serviceToken = getServiceToken(config); + edm::ServiceRegistry::Operate operate(serviceToken); + + edm::Service service; + REQUIRE(service->enabled() == false); + REQUIRE(service->numberOfDevices() == 0); } } diff --git a/HeterogeneousCore/ROCmServices/test/test_main.cpp b/HeterogeneousCore/ROCmServices/test/test_main.cpp new file mode 100644 index 0000000000000..417b2599ee8c5 --- /dev/null +++ b/HeterogeneousCore/ROCmServices/test/test_main.cpp @@ -0,0 +1,16 @@ +#define CATCH_CONFIG_MAIN +#include + +#include "FWCore/PluginManager/interface/PluginManager.h" +#include "FWCore/PluginManager/interface/standard.h" + +class ServiceRegistryListener : public Catch::TestEventListenerBase { +public: + using Catch::TestEventListenerBase::TestEventListenerBase; // inherit constructor + + void testRunStarting(Catch::TestRunInfo const& testRunInfo) override { + edmplugin::PluginManager::configure(edmplugin::standard::config()); + } +}; + +CATCH_REGISTER_LISTENER(ServiceRegistryListener); diff --git a/HeterogeneousTest/CUDADevice/plugins/CUDATestDeviceAdditionModule.cc b/HeterogeneousTest/CUDADevice/plugins/CUDATestDeviceAdditionModule.cc index 8e10f2f50499e..c5d7f7ac272be 100644 --- a/HeterogeneousTest/CUDADevice/plugins/CUDATestDeviceAdditionModule.cc +++ b/HeterogeneousTest/CUDADevice/plugins/CUDATestDeviceAdditionModule.cc @@ -13,7 +13,7 @@ #include "FWCore/ParameterSet/interface/ParameterSet.h" #include "FWCore/ParameterSet/interface/ParameterSetDescription.h" #include "FWCore/ServiceRegistry/interface/Service.h" -#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h" +#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h" #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" #include "CUDATestDeviceAdditionAlgo.h" @@ -42,9 +42,9 @@ void CUDATestDeviceAdditionModule::fillDescriptions(edm::ConfigurationDescriptio void CUDATestDeviceAdditionModule::analyze(edm::StreamID, edm::Event const& event, edm::EventSetup const& setup) const { // require CUDA for running - edm::Service cs; - if (not cs->enabled()) { - std::cout << "The CUDAService is disabled, the test will be skipped.\n"; + edm::Service cuda; + if (not cuda or not cuda->enabled()) { + std::cout << "The CUDAService is not available or disabled, the test will be skipped.\n"; return; } diff --git a/HeterogeneousTest/CUDADevice/test/testCUDATestDeviceAdditionModule.py b/HeterogeneousTest/CUDADevice/test/testCUDATestDeviceAdditionModule.py index 60dc51b9e9733..2d5c232f73e95 100644 --- a/HeterogeneousTest/CUDADevice/test/testCUDATestDeviceAdditionModule.py +++ b/HeterogeneousTest/CUDADevice/test/testCUDATestDeviceAdditionModule.py @@ -1,11 +1,10 @@ import FWCore.ParameterSet.Config as cms process = cms.Process('TestCUDATestDeviceAdditionModule') +process.load('HeterogeneousCore.CUDACore.ProcessAcceleratorCUDA_cfi') process.source = cms.Source('EmptySource') -process.CUDAService = cms.Service('CUDAService') - process.cudaTestDeviceAdditionModule = cms.EDAnalyzer('CUDATestDeviceAdditionModule', size = cms.uint32( 1024*1024 ) ) diff --git a/HeterogeneousTest/CUDAKernel/plugins/CUDATestKernelAdditionModule.cc b/HeterogeneousTest/CUDAKernel/plugins/CUDATestKernelAdditionModule.cc index de7eba987698e..666e9acd537ca 100644 --- a/HeterogeneousTest/CUDAKernel/plugins/CUDATestKernelAdditionModule.cc +++ b/HeterogeneousTest/CUDAKernel/plugins/CUDATestKernelAdditionModule.cc @@ -13,7 +13,7 @@ #include "FWCore/ParameterSet/interface/ParameterSet.h" #include "FWCore/ParameterSet/interface/ParameterSetDescription.h" #include "FWCore/ServiceRegistry/interface/Service.h" -#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h" +#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h" #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" #include "CUDATestKernelAdditionAlgo.h" @@ -42,9 +42,9 @@ void CUDATestKernelAdditionModule::fillDescriptions(edm::ConfigurationDescriptio void CUDATestKernelAdditionModule::analyze(edm::StreamID, edm::Event const& event, edm::EventSetup const& setup) const { // require CUDA for running - edm::Service cs; - if (not cs->enabled()) { - std::cout << "The CUDAService is disabled, the test will be skipped.\n"; + edm::Service service; + if (not service or not service->enabled()) { + std::cout << "The CUDAService is not available or disabled, the test will be skipped.\n"; return; } diff --git a/HeterogeneousTest/CUDAKernel/test/testCUDATestKernelAdditionModule.py b/HeterogeneousTest/CUDAKernel/test/testCUDATestKernelAdditionModule.py index 229d40231681e..708a1c6a7d7ba 100644 --- a/HeterogeneousTest/CUDAKernel/test/testCUDATestKernelAdditionModule.py +++ b/HeterogeneousTest/CUDAKernel/test/testCUDATestKernelAdditionModule.py @@ -1,11 +1,10 @@ import FWCore.ParameterSet.Config as cms process = cms.Process('TestCUDATestKernelAdditionModule') +process.load('HeterogeneousCore.CUDACore.ProcessAcceleratorCUDA_cfi') process.source = cms.Source('EmptySource') -process.CUDAService = cms.Service('CUDAService') - process.cudaTestKernelAdditionModule = cms.EDAnalyzer('CUDATestKernelAdditionModule', size = cms.uint32( 1024*1024 ) ) diff --git a/HeterogeneousTest/CUDAOpaque/plugins/CUDATestOpaqueAdditionModule.cc b/HeterogeneousTest/CUDAOpaque/plugins/CUDATestOpaqueAdditionModule.cc index a4dabe7060155..bf60b97b48eb9 100644 --- a/HeterogeneousTest/CUDAOpaque/plugins/CUDATestOpaqueAdditionModule.cc +++ b/HeterogeneousTest/CUDAOpaque/plugins/CUDATestOpaqueAdditionModule.cc @@ -11,7 +11,7 @@ #include "FWCore/ParameterSet/interface/ParameterSet.h" #include "FWCore/ParameterSet/interface/ParameterSetDescription.h" #include "FWCore/ServiceRegistry/interface/Service.h" -#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h" +#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h" #include "HeterogeneousTest/CUDAOpaque/interface/DeviceAdditionOpaque.h" class CUDATestOpaqueAdditionModule : public edm::global::EDAnalyzer<> { @@ -38,9 +38,9 @@ void CUDATestOpaqueAdditionModule::fillDescriptions(edm::ConfigurationDescriptio void CUDATestOpaqueAdditionModule::analyze(edm::StreamID, edm::Event const& event, edm::EventSetup const& setup) const { // require CUDA for running - edm::Service cs; - if (not cs->enabled()) { - std::cout << "The CUDAService is disabled, the test will be skipped.\n"; + edm::Service cuda; + if (not cuda or not cuda->enabled()) { + std::cout << "The CUDAService is not available or disabled, the test will be skipped.\n"; return; } diff --git a/HeterogeneousTest/CUDAOpaque/test/testCUDATestAdditionModules.py b/HeterogeneousTest/CUDAOpaque/test/testCUDATestAdditionModules.py index 60d0183823b0f..16eb7c9f248f4 100644 --- a/HeterogeneousTest/CUDAOpaque/test/testCUDATestAdditionModules.py +++ b/HeterogeneousTest/CUDAOpaque/test/testCUDATestAdditionModules.py @@ -1,11 +1,10 @@ import FWCore.ParameterSet.Config as cms process = cms.Process('TestCUDATestOpaqueAdditionModule') +process.load('HeterogeneousCore.CUDACore.ProcessAcceleratorCUDA_cfi') process.source = cms.Source('EmptySource') -process.CUDAService = cms.Service('CUDAService') - process.cudaTestDeviceAdditionModule = cms.EDAnalyzer('CUDATestDeviceAdditionModule', size = cms.uint32( 1024*1024 ) ) diff --git a/HeterogeneousTest/CUDAOpaque/test/testCUDATestOpaqueAdditionModule.py b/HeterogeneousTest/CUDAOpaque/test/testCUDATestOpaqueAdditionModule.py index 244e283ec7c3e..1f54cfd812252 100644 --- a/HeterogeneousTest/CUDAOpaque/test/testCUDATestOpaqueAdditionModule.py +++ b/HeterogeneousTest/CUDAOpaque/test/testCUDATestOpaqueAdditionModule.py @@ -1,11 +1,10 @@ import FWCore.ParameterSet.Config as cms process = cms.Process('TestCUDATestOpaqueAdditionModule') +process.load('HeterogeneousCore.CUDACore.ProcessAcceleratorCUDA_cfi') process.source = cms.Source('EmptySource') -process.CUDAService = cms.Service('CUDAService') - process.cudaTestOpaqueAdditionModule = cms.EDAnalyzer('CUDATestOpaqueAdditionModule', size = cms.uint32( 1024*1024 ) ) diff --git a/HeterogeneousTest/CUDAWrapper/plugins/CUDATestWrapperAdditionModule.cc b/HeterogeneousTest/CUDAWrapper/plugins/CUDATestWrapperAdditionModule.cc index 46b1cd2d3761e..fb70aba2474d4 100644 --- a/HeterogeneousTest/CUDAWrapper/plugins/CUDATestWrapperAdditionModule.cc +++ b/HeterogeneousTest/CUDAWrapper/plugins/CUDATestWrapperAdditionModule.cc @@ -13,7 +13,7 @@ #include "FWCore/ParameterSet/interface/ParameterSet.h" #include "FWCore/ParameterSet/interface/ParameterSetDescription.h" #include "FWCore/ServiceRegistry/interface/Service.h" -#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h" +#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h" #include "HeterogeneousTest/CUDAWrapper/interface/DeviceAdditionWrapper.h" #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" @@ -43,9 +43,9 @@ void CUDATestWrapperAdditionModule::analyze(edm::StreamID, edm::Event const& event, edm::EventSetup const& setup) const { // require CUDA for running - edm::Service cs; - if (not cs->enabled()) { - std::cout << "The CUDAService is disabled, the test will be skipped.\n"; + edm::Service cuda; + if (not cuda or not cuda->enabled()) { + std::cout << "The CUDAService is not available or disabled, the test will be skipped.\n"; return; } diff --git a/HeterogeneousTest/CUDAWrapper/test/testCUDATestWrapperAdditionModule.py b/HeterogeneousTest/CUDAWrapper/test/testCUDATestWrapperAdditionModule.py index b3e37de34a951..af5e9dfb5f708 100644 --- a/HeterogeneousTest/CUDAWrapper/test/testCUDATestWrapperAdditionModule.py +++ b/HeterogeneousTest/CUDAWrapper/test/testCUDATestWrapperAdditionModule.py @@ -1,11 +1,10 @@ import FWCore.ParameterSet.Config as cms process = cms.Process('TestCUDATestWrapperAdditionModule') +process.load('HeterogeneousCore.CUDACore.ProcessAcceleratorCUDA_cfi') process.source = cms.Source('EmptySource') -process.CUDAService = cms.Service('CUDAService') - process.cudaTestWrapperAdditionModule = cms.EDAnalyzer('CUDATestWrapperAdditionModule', size = cms.uint32( 1024*1024 ) ) diff --git a/HeterogeneousTest/ROCmDevice/plugins/ROCmTestDeviceAdditionModule.cc b/HeterogeneousTest/ROCmDevice/plugins/ROCmTestDeviceAdditionModule.cc index 7cb12d3b0ce70..bf46ae35da8bf 100644 --- a/HeterogeneousTest/ROCmDevice/plugins/ROCmTestDeviceAdditionModule.cc +++ b/HeterogeneousTest/ROCmDevice/plugins/ROCmTestDeviceAdditionModule.cc @@ -13,7 +13,7 @@ #include "FWCore/ParameterSet/interface/ParameterSet.h" #include "FWCore/ParameterSet/interface/ParameterSetDescription.h" #include "FWCore/ServiceRegistry/interface/Service.h" -#include "HeterogeneousCore/ROCmServices/interface/ROCmService.h" +#include "HeterogeneousCore/ROCmServices/interface/ROCmInterface.h" #include "HeterogeneousCore/ROCmUtilities/interface/hipCheck.h" #include "ROCmTestDeviceAdditionAlgo.h" @@ -42,9 +42,9 @@ void ROCmTestDeviceAdditionModule::fillDescriptions(edm::ConfigurationDescriptio void ROCmTestDeviceAdditionModule::analyze(edm::StreamID, edm::Event const& event, edm::EventSetup const& setup) const { // require ROCm for running - edm::Service cs; - if (not cs->enabled()) { - std::cout << "The ROCmService is disabled, the test will be skipped.\n"; + edm::Service rocm; + if (not rocm or not rocm->enabled()) { + std::cout << "The ROCmService is not available or disabled, the test will be skipped.\n"; return; } diff --git a/HeterogeneousTest/ROCmDevice/test/testROCmTestDeviceAdditionModule.py b/HeterogeneousTest/ROCmDevice/test/testROCmTestDeviceAdditionModule.py index 5e31e902452f0..1c93e667741d6 100644 --- a/HeterogeneousTest/ROCmDevice/test/testROCmTestDeviceAdditionModule.py +++ b/HeterogeneousTest/ROCmDevice/test/testROCmTestDeviceAdditionModule.py @@ -1,11 +1,10 @@ import FWCore.ParameterSet.Config as cms process = cms.Process('TestROCmTestDeviceAdditionModule') +process.load('HeterogeneousCore.ROCmCore.ProcessAcceleratorROCm_cfi') process.source = cms.Source('EmptySource') -process.ROCmService = cms.Service('ROCmService') - process.rocmTestDeviceAdditionModule = cms.EDAnalyzer('ROCmTestDeviceAdditionModule', size = cms.uint32( 1024*1024 ) ) diff --git a/HeterogeneousTest/ROCmKernel/plugins/ROCmTestKernelAdditionModule.cc b/HeterogeneousTest/ROCmKernel/plugins/ROCmTestKernelAdditionModule.cc index cab3415e4551d..c33e42e3c49b0 100644 --- a/HeterogeneousTest/ROCmKernel/plugins/ROCmTestKernelAdditionModule.cc +++ b/HeterogeneousTest/ROCmKernel/plugins/ROCmTestKernelAdditionModule.cc @@ -13,7 +13,7 @@ #include "FWCore/ParameterSet/interface/ParameterSet.h" #include "FWCore/ParameterSet/interface/ParameterSetDescription.h" #include "FWCore/ServiceRegistry/interface/Service.h" -#include "HeterogeneousCore/ROCmServices/interface/ROCmService.h" +#include "HeterogeneousCore/ROCmServices/interface/ROCmInterface.h" #include "HeterogeneousCore/ROCmUtilities/interface/hipCheck.h" #include "ROCmTestKernelAdditionAlgo.h" @@ -42,9 +42,9 @@ void ROCmTestKernelAdditionModule::fillDescriptions(edm::ConfigurationDescriptio void ROCmTestKernelAdditionModule::analyze(edm::StreamID, edm::Event const& event, edm::EventSetup const& setup) const { // require ROCm for running - edm::Service cs; - if (not cs->enabled()) { - std::cout << "The ROCmService is disabled, the test will be skipped.\n"; + edm::Service rocm; + if (not rocm or not rocm->enabled()) { + std::cout << "The ROCmService is not available or disabled, the test will be skipped.\n"; return; } diff --git a/HeterogeneousTest/ROCmKernel/test/testROCmTestKernelAdditionModule.py b/HeterogeneousTest/ROCmKernel/test/testROCmTestKernelAdditionModule.py index b05991338da3b..fb6b54564bbea 100644 --- a/HeterogeneousTest/ROCmKernel/test/testROCmTestKernelAdditionModule.py +++ b/HeterogeneousTest/ROCmKernel/test/testROCmTestKernelAdditionModule.py @@ -1,11 +1,10 @@ import FWCore.ParameterSet.Config as cms process = cms.Process('TestROCmTestKernelAdditionModule') +process.load('HeterogeneousCore.ROCmCore.ProcessAcceleratorROCm_cfi') process.source = cms.Source('EmptySource') -process.ROCmService = cms.Service('ROCmService') - process.rocmTestKernelAdditionModule = cms.EDAnalyzer('ROCmTestKernelAdditionModule', size = cms.uint32( 1024*1024 ) ) diff --git a/HeterogeneousTest/ROCmOpaque/plugins/ROCmTestOpaqueAdditionModule.cc b/HeterogeneousTest/ROCmOpaque/plugins/ROCmTestOpaqueAdditionModule.cc index 901b6eac51122..e3315fa0ff0e4 100644 --- a/HeterogeneousTest/ROCmOpaque/plugins/ROCmTestOpaqueAdditionModule.cc +++ b/HeterogeneousTest/ROCmOpaque/plugins/ROCmTestOpaqueAdditionModule.cc @@ -11,7 +11,7 @@ #include "FWCore/ParameterSet/interface/ParameterSet.h" #include "FWCore/ParameterSet/interface/ParameterSetDescription.h" #include "FWCore/ServiceRegistry/interface/Service.h" -#include "HeterogeneousCore/ROCmServices/interface/ROCmService.h" +#include "HeterogeneousCore/ROCmServices/interface/ROCmInterface.h" #include "HeterogeneousTest/ROCmOpaque/interface/DeviceAdditionOpaque.h" class ROCmTestOpaqueAdditionModule : public edm::global::EDAnalyzer<> { @@ -38,9 +38,9 @@ void ROCmTestOpaqueAdditionModule::fillDescriptions(edm::ConfigurationDescriptio void ROCmTestOpaqueAdditionModule::analyze(edm::StreamID, edm::Event const& event, edm::EventSetup const& setup) const { // require ROCm for running - edm::Service cs; - if (not cs->enabled()) { - std::cout << "The ROCmService is disabled, the test will be skipped.\n"; + edm::Service rocm; + if (not rocm or not rocm->enabled()) { + std::cout << "The ROCmService is not available or disabled, the test will be skipped.\n"; return; } diff --git a/HeterogeneousTest/ROCmOpaque/test/testROCmTestAdditionModules.py b/HeterogeneousTest/ROCmOpaque/test/testROCmTestAdditionModules.py index 2ae6853a8e7ee..a2156489b7c21 100644 --- a/HeterogeneousTest/ROCmOpaque/test/testROCmTestAdditionModules.py +++ b/HeterogeneousTest/ROCmOpaque/test/testROCmTestAdditionModules.py @@ -1,11 +1,10 @@ import FWCore.ParameterSet.Config as cms process = cms.Process('TestROCmTestOpaqueAdditionModule') +process.load('HeterogeneousCore.ROCmCore.ProcessAcceleratorROCm_cfi') process.source = cms.Source('EmptySource') -process.ROCmService = cms.Service('ROCmService') - process.rocmTestDeviceAdditionModule = cms.EDAnalyzer('ROCmTestDeviceAdditionModule', size = cms.uint32( 1024*1024 ) ) diff --git a/HeterogeneousTest/ROCmOpaque/test/testROCmTestOpaqueAdditionModule.py b/HeterogeneousTest/ROCmOpaque/test/testROCmTestOpaqueAdditionModule.py index 05c4bf20d3f17..0d6f67183620d 100644 --- a/HeterogeneousTest/ROCmOpaque/test/testROCmTestOpaqueAdditionModule.py +++ b/HeterogeneousTest/ROCmOpaque/test/testROCmTestOpaqueAdditionModule.py @@ -1,11 +1,10 @@ import FWCore.ParameterSet.Config as cms process = cms.Process('TestROCmTestOpaqueAdditionModule') +process.load('HeterogeneousCore.ROCmCore.ProcessAcceleratorROCm_cfi') process.source = cms.Source('EmptySource') -process.ROCmService = cms.Service('ROCmService') - process.rocmTestOpaqueAdditionModule = cms.EDAnalyzer('ROCmTestOpaqueAdditionModule', size = cms.uint32( 1024*1024 ) ) diff --git a/HeterogeneousTest/ROCmWrapper/plugins/ROCmTestWrapperAdditionModule.cc b/HeterogeneousTest/ROCmWrapper/plugins/ROCmTestWrapperAdditionModule.cc index 48b2b9dc91a20..010e9f9dad618 100644 --- a/HeterogeneousTest/ROCmWrapper/plugins/ROCmTestWrapperAdditionModule.cc +++ b/HeterogeneousTest/ROCmWrapper/plugins/ROCmTestWrapperAdditionModule.cc @@ -13,7 +13,7 @@ #include "FWCore/ParameterSet/interface/ParameterSet.h" #include "FWCore/ParameterSet/interface/ParameterSetDescription.h" #include "FWCore/ServiceRegistry/interface/Service.h" -#include "HeterogeneousCore/ROCmServices/interface/ROCmService.h" +#include "HeterogeneousCore/ROCmServices/interface/ROCmInterface.h" #include "HeterogeneousTest/ROCmWrapper/interface/DeviceAdditionWrapper.h" #include "HeterogeneousCore/ROCmUtilities/interface/hipCheck.h" @@ -43,9 +43,9 @@ void ROCmTestWrapperAdditionModule::analyze(edm::StreamID, edm::Event const& event, edm::EventSetup const& setup) const { // require ROCm for running - edm::Service cs; - if (not cs->enabled()) { - std::cout << "The ROCmService is disabled, the test will be skipped.\n"; + edm::Service rocm; + if (not rocm or not rocm->enabled()) { + std::cout << "The ROCmService is not available or disabled, the test will be skipped.\n"; return; } diff --git a/HeterogeneousTest/ROCmWrapper/test/testROCmTestWrapperAdditionModule.py b/HeterogeneousTest/ROCmWrapper/test/testROCmTestWrapperAdditionModule.py index b493b484ed82a..56ca14ed90f8b 100644 --- a/HeterogeneousTest/ROCmWrapper/test/testROCmTestWrapperAdditionModule.py +++ b/HeterogeneousTest/ROCmWrapper/test/testROCmTestWrapperAdditionModule.py @@ -1,11 +1,10 @@ import FWCore.ParameterSet.Config as cms process = cms.Process('TestROCmTestWrapperAdditionModule') +process.load('HeterogeneousCore.ROCmCore.ProcessAcceleratorROCm_cfi') process.source = cms.Source('EmptySource') -process.ROCmService = cms.Service('ROCmService') - process.rocmTestWrapperAdditionModule = cms.EDAnalyzer('ROCmTestWrapperAdditionModule', size = cms.uint32( 1024*1024 ) ) diff --git a/RecoLocalCalo/EcalRecProducers/plugins/EcalRecHitProducerGPU.cc b/RecoLocalCalo/EcalRecProducers/plugins/EcalRecHitProducerGPU.cc index c08d27c4ad196..9edf3ad0087b1 100644 --- a/RecoLocalCalo/EcalRecProducers/plugins/EcalRecHitProducerGPU.cc +++ b/RecoLocalCalo/EcalRecProducers/plugins/EcalRecHitProducerGPU.cc @@ -27,7 +27,7 @@ #include "FWCore/Utilities/interface/ESGetToken.h" #include "HeterogeneousCore/CUDACore/interface/JobConfigurationGPURecord.h" #include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" -#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h" +#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h" #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" #include "EcalRecHitBuilderKernels.h" diff --git a/RecoLocalCalo/HGCalRecProducers/plugins/EERecHitGPUtoSoA.cc b/RecoLocalCalo/HGCalRecProducers/plugins/EERecHitGPUtoSoA.cc index 4ef1d4530722d..d73b1fbb82777 100644 --- a/RecoLocalCalo/HGCalRecProducers/plugins/EERecHitGPUtoSoA.cc +++ b/RecoLocalCalo/HGCalRecProducers/plugins/EERecHitGPUtoSoA.cc @@ -22,7 +22,7 @@ #include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" #include "HeterogeneousCore/CUDACore/interface/ContextState.h" -#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h" +#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h" #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" #include "FWCore/ServiceRegistry/interface/Service.h" diff --git a/RecoLocalCalo/HGCalRecProducers/plugins/HEBRecHitGPUtoSoA.cc b/RecoLocalCalo/HGCalRecProducers/plugins/HEBRecHitGPUtoSoA.cc index 2322128fb09a3..01415c7834807 100644 --- a/RecoLocalCalo/HGCalRecProducers/plugins/HEBRecHitGPUtoSoA.cc +++ b/RecoLocalCalo/HGCalRecProducers/plugins/HEBRecHitGPUtoSoA.cc @@ -22,7 +22,7 @@ #include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" #include "HeterogeneousCore/CUDACore/interface/ContextState.h" -#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h" +#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h" #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" #include "FWCore/ServiceRegistry/interface/Service.h" diff --git a/RecoLocalCalo/HGCalRecProducers/plugins/HEFRecHitGPUtoSoA.cc b/RecoLocalCalo/HGCalRecProducers/plugins/HEFRecHitGPUtoSoA.cc index 16a252fb33e48..b26800a5aff52 100644 --- a/RecoLocalCalo/HGCalRecProducers/plugins/HEFRecHitGPUtoSoA.cc +++ b/RecoLocalCalo/HGCalRecProducers/plugins/HEFRecHitGPUtoSoA.cc @@ -22,7 +22,7 @@ #include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" #include "HeterogeneousCore/CUDACore/interface/ContextState.h" -#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h" +#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h" #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" #include "FWCore/ServiceRegistry/interface/Service.h" diff --git a/RecoLocalCalo/HcalRecProducers/src/HBHERecHitProducerGPU.cc b/RecoLocalCalo/HcalRecProducers/src/HBHERecHitProducerGPU.cc index b598947423adf..193e5b8bba0f7 100644 --- a/RecoLocalCalo/HcalRecProducers/src/HBHERecHitProducerGPU.cc +++ b/RecoLocalCalo/HcalRecProducers/src/HBHERecHitProducerGPU.cc @@ -6,7 +6,7 @@ #include "FWCore/ServiceRegistry/interface/Service.h" #include "HeterogeneousCore/CUDACore/interface/JobConfigurationGPURecord.h" #include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" -#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h" +#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h" #include "SimpleAlgoGPU.h" diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelPhase2DigiToClusterCUDA.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelPhase2DigiToClusterCUDA.cc index 6f83a8b414485..9e19c5ec5ff15 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelPhase2DigiToClusterCUDA.cc +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelPhase2DigiToClusterCUDA.cc @@ -34,7 +34,7 @@ #include "FWCore/ParameterSet/interface/ParameterSetDescription.h" #include "FWCore/ServiceRegistry/interface/Service.h" #include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" -#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h" +#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h" #include "RecoTracker/Record/interface/CkfComponentsRecord.h" // local includes diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc index 76cc641d365c5..fd6109958f8ef 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc @@ -34,7 +34,7 @@ #include "FWCore/ServiceRegistry/interface/Service.h" #include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" #include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" -#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h" +#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h" #include "RecoTracker/Record/interface/CkfComponentsRecord.h" // local includes diff --git a/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorOnGPU.cc b/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorOnGPU.cc index 97530a9567e22..6765703f35a73 100644 --- a/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorOnGPU.cc +++ b/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorOnGPU.cc @@ -22,7 +22,7 @@ #include "FWCore/ServiceRegistry/interface/Service.h" #include "FWCore/Utilities/interface/EDMException.h" #include "FWCore/Utilities/interface/isFinite.h" -#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h" +#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h" #include "TrackingTools/DetLayers/interface/BarrelDetLayer.h" #include "CAHitNtupletGeneratorOnGPU.h" @@ -246,8 +246,8 @@ template void CAHitNtupletGeneratorOnGPU::beginJob() { if (m_params.onGPU_) { // allocate pinned host memory only if CUDA is available - edm::Service cs; - if (cs and cs->enabled()) { + edm::Service cuda; + if (cuda and cuda->enabled()) { cudaCheck(cudaMalloc(&m_counters, sizeof(Counters))); cudaCheck(cudaMemset(m_counters, 0, sizeof(Counters))); } @@ -261,8 +261,8 @@ template void CAHitNtupletGeneratorOnGPU::endJob() { if (m_params.onGPU_) { // print the gpu statistics and free pinned host memory only if CUDA is available - edm::Service cs; - if (cs and cs->enabled()) { + edm::Service cuda; + if (cuda and cuda->enabled()) { if (m_params.doStats_) { // crash on multi-gpu processes CAHitNtupletGeneratorKernelsGPU::printCounters(m_counters); diff --git a/RecoVertex/BeamSpotProducer/plugins/BeamSpotToCUDA.cc b/RecoVertex/BeamSpotProducer/plugins/BeamSpotToCUDA.cc index 8b0de1c739076..a62c6efb5abdb 100644 --- a/RecoVertex/BeamSpotProducer/plugins/BeamSpotToCUDA.cc +++ b/RecoVertex/BeamSpotProducer/plugins/BeamSpotToCUDA.cc @@ -12,7 +12,7 @@ #include "FWCore/ParameterSet/interface/ParameterSetDescription.h" #include "FWCore/ServiceRegistry/interface/Service.h" #include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" -#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h" +#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h" #include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" #include "HeterogeneousCore/CUDAUtilities/interface/host_noncached_unique_ptr.h" @@ -48,8 +48,8 @@ class BeamSpotToCUDA : public edm::global::EDProducer beginStream(edm::StreamID) const override { - edm::Service cs; - if (cs->enabled()) { + edm::Service cuda; + if (cuda and cuda->enabled()) { return std::make_unique(); } else { return nullptr;