diff --git a/HeterogeneousCore/ROCmServices/BuildFile.xml b/HeterogeneousCore/ROCmServices/BuildFile.xml
new file mode 100644
index 0000000000000..0ff47a94f4ebc
--- /dev/null
+++ b/HeterogeneousCore/ROCmServices/BuildFile.xml
@@ -0,0 +1,11 @@
+
+
+
+
+
+
+
+
+
+
+
diff --git a/HeterogeneousCore/ROCmServices/interface/ROCmService.h b/HeterogeneousCore/ROCmServices/interface/ROCmService.h
new file mode 100644
index 0000000000000..c78ec27f51d80
--- /dev/null
+++ b/HeterogeneousCore/ROCmServices/interface/ROCmService.h
@@ -0,0 +1,45 @@
+#ifndef HeterogeneousCore_ROCmServices_interface_ROCmService_h
+#define HeterogeneousCore_ROCmServices_interface_ROCmService_h
+
+#include
+#include
+
+#include "FWCore/Utilities/interface/StreamID.h"
+
+namespace edm {
+ class ParameterSet;
+ class ActivityRegistry;
+ class ConfigurationDescriptions;
+} // namespace edm
+
+class ROCmService {
+public:
+ ROCmService(edm::ParameterSet const& config);
+ ~ROCmService();
+
+ static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
+
+ bool enabled() const { return enabled_; }
+
+ int numberOfDevices() const { return numberOfDevices_; }
+
+ // major, minor
+ std::pair computeCapability(int device) const { return computeCapabilities_.at(device); }
+
+ // Returns the id of device with most free memory. If none is found, returns -1.
+ int deviceWithMostFreeMemory() const;
+
+private:
+ int numberOfDevices_ = 0;
+ std::vector> computeCapabilities_;
+ bool enabled_ = false;
+ bool verbose_ = false;
+};
+
+namespace edm {
+ namespace service {
+ inline bool isProcessWideService(ROCmService const*) { return true; }
+ } // namespace service
+} // namespace edm
+
+#endif // HeterogeneousCore_ROCmServices_interface_ROCmService_h
diff --git a/HeterogeneousCore/ROCmServices/plugins/BuildFile.xml b/HeterogeneousCore/ROCmServices/plugins/BuildFile.xml
new file mode 100644
index 0000000000000..42f9e3024fc2f
--- /dev/null
+++ b/HeterogeneousCore/ROCmServices/plugins/BuildFile.xml
@@ -0,0 +1,12 @@
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/HeterogeneousCore/ROCmServices/plugins/ROCmMonitoringService.cc b/HeterogeneousCore/ROCmServices/plugins/ROCmMonitoringService.cc
new file mode 100644
index 0000000000000..3bd0f2448f1b4
--- /dev/null
+++ b/HeterogeneousCore/ROCmServices/plugins/ROCmMonitoringService.cc
@@ -0,0 +1,120 @@
+#include
+
+#include
+
+#include "DataFormats/Provenance/interface/ModuleDescription.h"
+#include "FWCore/MessageLogger/interface/MessageLogger.h"
+#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h"
+#include "FWCore/ParameterSet/interface/ParameterSet.h"
+#include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
+#include "FWCore/ServiceRegistry/interface/ActivityRegistry.h"
+#include "FWCore/ServiceRegistry/interface/ModuleCallingContext.h"
+#include "FWCore/ServiceRegistry/interface/Service.h"
+#include "FWCore/ServiceRegistry/interface/ServiceMaker.h"
+#include "HeterogeneousCore/ROCmServices/interface/ROCmService.h"
+#include "HeterogeneousCore/ROCmUtilities/interface/hipCheck.h"
+
+namespace edm {
+ class StreamContext;
+}
+
+class ROCmMonitoringService {
+public:
+ ROCmMonitoringService(edm::ParameterSet const& iConfig, edm::ActivityRegistry& iRegistry);
+ ~ROCmMonitoringService() = default;
+
+ static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
+
+ void postModuleConstruction(edm::ModuleDescription const& desc);
+ void postModuleBeginStream(edm::StreamContext const&, edm::ModuleCallingContext const& mcc);
+ void postModuleEvent(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc);
+ void postEvent(edm::StreamContext const& sc);
+
+private:
+ int numberOfDevices_ = 0;
+};
+
+ROCmMonitoringService::ROCmMonitoringService(edm::ParameterSet const& config, edm::ActivityRegistry& registry) {
+ // make sure that ROCm is initialised, and that the ROCmService destructor is called after this service's destructor
+ edm::Service rocmService;
+ if (!rocmService->enabled())
+ return;
+ numberOfDevices_ = rocmService->numberOfDevices();
+
+ if (config.getUntrackedParameter("memoryConstruction")) {
+ registry.watchPostModuleConstruction(this, &ROCmMonitoringService::postModuleConstruction);
+ }
+ if (config.getUntrackedParameter("memoryBeginStream")) {
+ registry.watchPostModuleBeginStream(this, &ROCmMonitoringService::postModuleBeginStream);
+ }
+ if (config.getUntrackedParameter("memoryPerModule")) {
+ registry.watchPostModuleEvent(this, &ROCmMonitoringService::postModuleEvent);
+ }
+ if (config.getUntrackedParameter("memoryPerEvent")) {
+ registry.watchPostEvent(this, &ROCmMonitoringService::postEvent);
+ }
+}
+
+void ROCmMonitoringService::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
+ edm::ParameterSetDescription desc;
+
+ desc.addUntracked("memoryConstruction", false)
+ ->setComment("Print memory information for each device after the construction of each module");
+ desc.addUntracked("memoryBeginStream", true)
+ ->setComment("Print memory information for each device after the beginStream() of each module");
+ desc.addUntracked("memoryPerModule", true)
+ ->setComment("Print memory information for each device after the event of each module");
+ desc.addUntracked("memoryPerEvent", true)
+ ->setComment("Print memory information for each device after each event");
+
+ descriptions.add("ROCmMonitoringService", desc);
+ descriptions.setComment(
+ "The memory information is the global state of the device. This gets confusing if there are multiple processes "
+ "running on the same device. Probably the information retrieval should be re-thought?");
+}
+
+// activity handlers
+namespace {
+ template
+ void dumpUsedMemory(T& log, int num) {
+ int old = 0;
+ hipCheck(hipGetDevice(&old));
+ constexpr auto mbytes = 1 << 20;
+ for (int i = 0; i < num; ++i) {
+ size_t freeMemory, totalMemory;
+ hipCheck(hipSetDevice(i));
+ hipCheck(hipMemGetInfo(&freeMemory, &totalMemory));
+ log << "\n"
+ << i << ": " << (totalMemory - freeMemory) / mbytes << " MB used / " << totalMemory / mbytes << " MB total";
+ }
+ hipCheck(hipSetDevice(old));
+ }
+} // namespace
+
+void ROCmMonitoringService::postModuleConstruction(edm::ModuleDescription const& desc) {
+ auto log = edm::LogPrint("ROCmMonitoringService");
+ log << "ROCm device memory after construction of " << desc.moduleLabel() << " (" << desc.moduleName() << ")";
+ dumpUsedMemory(log, numberOfDevices_);
+}
+
+void ROCmMonitoringService::postModuleBeginStream(edm::StreamContext const&, edm::ModuleCallingContext const& mcc) {
+ auto log = edm::LogPrint("ROCmMonitoringService");
+ log << "ROCm device memory after beginStream() of " << mcc.moduleDescription()->moduleLabel() << " ("
+ << mcc.moduleDescription()->moduleName() << ")";
+ dumpUsedMemory(log, numberOfDevices_);
+}
+
+void ROCmMonitoringService::postModuleEvent(edm::StreamContext const&, edm::ModuleCallingContext const& mcc) {
+ auto log = edm::LogPrint("ROCmMonitoringService");
+ log << "ROCm device memory after processing an event by " << mcc.moduleDescription()->moduleLabel() << " ("
+ << mcc.moduleDescription()->moduleName() << ")";
+ dumpUsedMemory(log, numberOfDevices_);
+}
+
+void ROCmMonitoringService::postEvent(edm::StreamContext const& sc) {
+ auto log = edm::LogPrint("ROCmMonitoringService");
+ log << "ROCm device memory after event";
+ dumpUsedMemory(log, numberOfDevices_);
+}
+
+DEFINE_FWK_SERVICE(ROCmMonitoringService);
diff --git a/HeterogeneousCore/ROCmServices/plugins/plugins.cc b/HeterogeneousCore/ROCmServices/plugins/plugins.cc
new file mode 100644
index 0000000000000..a418eeced333f
--- /dev/null
+++ b/HeterogeneousCore/ROCmServices/plugins/plugins.cc
@@ -0,0 +1,4 @@
+#include "HeterogeneousCore/ROCmServices/interface/ROCmService.h"
+#include "FWCore/ServiceRegistry/interface/ServiceMaker.h"
+
+DEFINE_FWK_SERVICE_MAKER(ROCmService, edm::serviceregistry::ParameterSetMaker);
diff --git a/HeterogeneousCore/ROCmServices/src/ROCmService.cc b/HeterogeneousCore/ROCmServices/src/ROCmService.cc
new file mode 100644
index 0000000000000..2cabaed127d99
--- /dev/null
+++ b/HeterogeneousCore/ROCmServices/src/ROCmService.cc
@@ -0,0 +1,382 @@
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include
+/*
+#include
+*/
+
+#include "FWCore/MessageLogger/interface/MessageLogger.h"
+#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h"
+#include "FWCore/ParameterSet/interface/ParameterSet.h"
+#include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
+#include "FWCore/ServiceRegistry/interface/Service.h"
+#include "FWCore/Utilities/interface/ResourceInformation.h"
+#include "HeterogeneousCore/ROCmServices/interface/ROCmService.h"
+#include "HeterogeneousCore/ROCmUtilities/interface/hipCheck.h"
+/*
+#include "HeterogeneousCore/ROCmUtilities/interface/nvmlCheck.h"
+*/
+
+void setHipLimit(hipLimit_t limit, const char* name, size_t request) {
+ // read the current device
+ int device;
+ hipCheck(hipGetDevice(&device));
+ // try to set the requested limit
+ auto result = hipDeviceSetLimit(limit, request);
+ if (hipErrorUnsupportedLimit == result) {
+ edm::LogWarning("ROCmService") << "ROCm device " << device << ": unsupported limit \"" << name << "\"";
+ return;
+ }
+ // read back the limit value
+ size_t value;
+ result = hipDeviceGetLimit(&value, limit);
+ if (hipSuccess != result) {
+ edm::LogWarning("ROCmService") << "ROCm device " << device << ": failed to set limit \"" << name << "\" to "
+ << request << ", current value is " << value;
+ } else if (value != request) {
+ edm::LogWarning("ROCmService") << "ROCm device " << device << ": limit \"" << name << "\" set to " << value
+ << " instead of requested " << request;
+ }
+}
+
+std::string decodeVersion(int version) {
+ return std::to_string(version / 1000) + '.' + std::to_string(version % 1000 / 10);
+}
+
+/// Constructor
+ROCmService::ROCmService(edm::ParameterSet const& config) : verbose_(config.getUntrackedParameter("verbose")) {
+ bool configEnabled = config.getUntrackedParameter("enabled");
+ if (not configEnabled) {
+ edm::LogInfo("ROCmService") << "ROCmService disabled by configuration";
+ return;
+ }
+
+ auto status = hipGetDeviceCount(&numberOfDevices_);
+ if (hipSuccess != status) {
+ edm::LogWarning("ROCmService") << "Failed to initialize the ROCm runtime.\n"
+ << "Disabling the ROCmService.";
+ return;
+ }
+ computeCapabilities_.reserve(numberOfDevices_);
+
+ /*
+ // AMD system driver version, e.g. 470.57.02
+ char systemDriverVersion[NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE];
+ nvmlCheck(nvmlInitWithFlags(NVML_INIT_FLAG_NO_GPUS | NVML_INIT_FLAG_NO_ATTACH));
+ nvmlCheck(nvmlSystemGetDriverVersion(systemDriverVersion, sizeof(systemDriverVersion)));
+ nvmlCheck(nvmlShutdown());
+ */
+
+ // ROCm driver version, e.g. 11.4
+ // the full version, like 11.4.1 or 11.4.100, is not reported
+ int driverVersion = 0;
+ hipCheck(hipDriverGetVersion(&driverVersion));
+
+ // ROCm runtime version, e.g. 11.4
+ // the full version, like 11.4.1 or 11.4.108, is not reported
+ int runtimeVersion = 0;
+ hipCheck(hipRuntimeGetVersion(&runtimeVersion));
+
+ edm::LogInfo log("ROCmService");
+ if (verbose_) {
+ /*
+ log << "AMD driver: " << systemDriverVersion << '\n';
+ */
+ log << "ROCm driver API: " << decodeVersion(driverVersion) << /*" (compiled with " << decodeVersion(ROCm_VERSION)
+ << ")" */
+ "\n";
+ log << "ROCm runtime API: " << decodeVersion(runtimeVersion)
+ << /*" (compiled with " << decodeVersion(ROCmRT_VERSION)
+ << ")" */
+ "\n";
+ log << "ROCm runtime successfully initialised, found " << numberOfDevices_ << " compute devices.\n";
+ } else {
+ log << "ROCm runtime version " << decodeVersion(runtimeVersion) << ", driver version "
+ << decodeVersion(driverVersion)
+ /*
+ << ", AMD driver version " << systemDriverVersion
+ */
+ ;
+ }
+
+ auto const& limits = config.getUntrackedParameter("limits");
+ /*
+ auto printfFifoSize = limits.getUntrackedParameter("hipLimitPrintfFifoSize");
+ */
+ auto stackSize = limits.getUntrackedParameter("hipLimitStackSize");
+ auto mallocHeapSize = limits.getUntrackedParameter("hipLimitMallocHeapSize");
+ /*
+ auto devRuntimeSyncDepth = limits.getUntrackedParameter("hipLimitDevRuntimeSyncDepth");
+ auto devRuntimePendingLaunchCount = limits.getUntrackedParameter("hipLimitDevRuntimePendingLaunchCount");
+ */
+
+ std::set models;
+
+ for (int i = 0; i < numberOfDevices_; ++i) {
+ // read information about the compute device.
+ // see the documentation of hipGetDeviceProperties() for more information.
+ hipDeviceProp_t properties;
+ hipCheck(hipGetDeviceProperties(&properties, i));
+ log << '\n' << "ROCm device " << i << ": " << properties.name;
+ if (verbose_) {
+ log << '\n';
+ }
+ models.insert(std::string(properties.name));
+
+ // compute capabilities
+ computeCapabilities_.emplace_back(properties.major, properties.minor);
+ if (verbose_) {
+ log << " compute capability: " << properties.major << "." << properties.minor;
+ }
+ log << " (sm_" << properties.major << properties.minor << ")";
+ if (verbose_) {
+ log << '\n';
+ log << " streaming multiprocessors: " << std::setw(13) << properties.multiProcessorCount << '\n';
+ log << " ROCm cores: " << std::setw(28) << "not yet implemented" << '\n';
+ /*
+ log << " single to double performance: " << std::setw(8) << properties.singleToDoublePrecisionPerfRatio
+ << ":1\n";
+ */
+ }
+
+ // compute mode
+ static constexpr const char* computeModeDescription[] = {
+ "default (shared)", // hipComputeModeDefault
+ "exclusive (single thread)", // hipComputeModeExclusive
+ "prohibited", // hipComputeModeProhibited
+ "exclusive (single process)", // hipComputeModeExclusiveProcess
+ "unknown"};
+ if (verbose_) {
+ log << " compute mode:" << std::right << std::setw(27)
+ << computeModeDescription[std::min(properties.computeMode,
+ static_cast(std::size(computeModeDescription)) - 1)]
+ << '\n';
+ }
+
+ // TODO if a device is in exclusive use, skip it and remove it from the list, instead of failing with an exception
+ hipCheck(hipSetDevice(i));
+ hipCheck(hipSetDeviceFlags(hipDeviceScheduleAuto | hipDeviceMapHost));
+
+ // read the free and total amount of memory available for allocation by the device, in bytes.
+ // see the documentation of hipMemGetInfo() for more information.
+ if (verbose_) {
+ size_t freeMemory, totalMemory;
+ hipCheck(hipMemGetInfo(&freeMemory, &totalMemory));
+ log << " memory: " << std::setw(6) << freeMemory / (1 << 20) << " MB free / " << std::setw(6)
+ << totalMemory / (1 << 20) << " MB total\n";
+ log << " constant memory: " << std::setw(6) << properties.totalConstMem / (1 << 10) << " kB\n";
+ log << " L2 cache size: " << std::setw(6) << properties.l2CacheSize / (1 << 10) << " kB\n";
+ }
+
+ // L1 cache behaviour
+ if (verbose_) {
+ /*
+ static constexpr const char* l1CacheModeDescription[] = {
+ "unknown", "local memory", "global memory", "local and global memory"};
+ int l1CacheMode = properties.localL1CacheSupported + 2 * properties.globalL1CacheSupported;
+ log << " L1 cache mode:" << std::setw(26) << std::right << l1CacheModeDescription[l1CacheMode] << '\n';
+ log << '\n';
+ */
+
+ log << "Other capabilities\n";
+ log << " " << (properties.canMapHostMemory ? "can" : "cannot")
+ << " map host memory into the ROCm address space for use with hipHostAlloc()/hipHostGetDevicePointer()\n";
+ log << " " << (properties.pageableMemoryAccess ? "supports" : "does not support")
+ << " coherently accessing pageable memory without calling hipHostRegister() on it\n";
+ log << " " << (properties.pageableMemoryAccessUsesHostPageTables ? "can" : "cannot")
+ << " access pageable memory via the host's page tables\n";
+ /*
+ log << " " << (properties.canUseHostPointerForRegisteredMem ? "can" : "cannot")
+ << " access host registered memory at the same virtual address as the host\n";
+ log << " " << (properties.unifiedAddressing ? "shares" : "does not share")
+ << " a unified address space with the host\n";
+ */
+ log << " " << (properties.managedMemory ? "supports" : "does not support")
+ << " allocating managed memory on this system\n";
+ log << " " << (properties.concurrentManagedAccess ? "can" : "cannot")
+ << " coherently access managed memory concurrently with the host\n";
+ log << " "
+ << "the host " << (properties.directManagedMemAccessFromHost ? "can" : "cannot")
+ << " directly access managed memory on the device without migration\n";
+ log << " " << (properties.cooperativeLaunch ? "supports" : "does not support")
+ << " launching cooperative kernels via hipLaunchCooperativeKernel()\n";
+ log << " " << (properties.cooperativeMultiDeviceLaunch ? "supports" : "does not support")
+ << " launching cooperative kernels via hipLaunchCooperativeKernelMultiDevice()\n";
+ log << '\n';
+ }
+
+ // set and read the ROCm device flags.
+ // see the documentation of hipSetDeviceFlags and hipGetDeviceFlags for more information.
+ if (verbose_) {
+ log << "ROCm flags\n";
+ unsigned int flags;
+ hipCheck(hipGetDeviceFlags(&flags));
+ switch (flags & hipDeviceScheduleMask) {
+ case hipDeviceScheduleAuto:
+ log << " thread policy: default\n";
+ break;
+ case hipDeviceScheduleSpin:
+ log << " thread policy: spin\n";
+ break;
+ case hipDeviceScheduleYield:
+ log << " thread policy: yield\n";
+ break;
+ case hipDeviceScheduleBlockingSync:
+ log << " thread policy: blocking sync\n";
+ break;
+ default:
+ log << " thread policy: undefined\n";
+ }
+ if (flags & hipDeviceMapHost) {
+ log << " pinned host memory allocations: enabled\n";
+ } else {
+ log << " pinned host memory allocations: disabled\n";
+ }
+ if (flags & hipDeviceLmemResizeToMax) {
+ log << " kernel host memory reuse: enabled\n";
+ } else {
+ log << " kernel host memory reuse: disabled\n";
+ }
+ log << '\n';
+ }
+
+ // set and read the ROCm resource limits.
+ // see the documentation of hipDeviceSetLimit() for more information.
+
+ /*
+ // hipLimitPrintfFifoSize controls the size in bytes of the shared FIFO used by the
+ // printf() device system call.
+ if (printfFifoSize >= 0) {
+ setHipLimit(hipLimitPrintfFifoSize, "hipLimitPrintfFifoSize", printfFifoSize);
+ }
+ */
+ // hipLimitStackSize controls the stack size in bytes of each GPU thread.
+ if (stackSize >= 0) {
+ setHipLimit(hipLimitStackSize, "hipLimitStackSize", stackSize);
+ }
+ // hipLimitMallocHeapSize controls the size in bytes of the heap used by the malloc()
+ // and free() device system calls.
+ if (mallocHeapSize >= 0) {
+ setHipLimit(hipLimitMallocHeapSize, "hipLimitMallocHeapSize", mallocHeapSize);
+ }
+ /*
+ if ((properties.major > 3) or (properties.major == 3 and properties.minor >= 5)) {
+ // hipLimitDevRuntimeSyncDepth controls the maximum nesting depth of a grid at which
+ // a thread can safely call hipDeviceSynchronize().
+ if (devRuntimeSyncDepth >= 0) {
+ setHipLimit(hipLimitDevRuntimeSyncDepth, "hipLimitDevRuntimeSyncDepth", devRuntimeSyncDepth);
+ }
+ // hipLimitDevRuntimePendingLaunchCount controls the maximum number of outstanding
+ // device runtime launches that can be made from the current device.
+ if (devRuntimePendingLaunchCount >= 0) {
+ setHipLimit(
+ hipLimitDevRuntimePendingLaunchCount, "hipLimitDevRuntimePendingLaunchCount", devRuntimePendingLaunchCount);
+ }
+ }
+ */
+
+ if (verbose_) {
+ size_t value;
+ log << "ROCm limits\n";
+ /*
+ hipCheck(hipDeviceGetLimit(&value, hipLimitPrintfFifoSize));
+ log << " printf buffer size: " << std::setw(10) << value / (1 << 20) << " MB\n";
+ */
+ hipCheck(hipDeviceGetLimit(&value, hipLimitStackSize));
+ log << " stack size: " << std::setw(10) << value / (1 << 10) << " kB\n";
+ hipCheck(hipDeviceGetLimit(&value, hipLimitMallocHeapSize));
+ log << " malloc heap size: " << std::setw(10) << value / (1 << 20) << " MB\n";
+ /*
+ if ((properties.major > 3) or (properties.major == 3 and properties.minor >= 5)) {
+ hipCheck(hipDeviceGetLimit(&value, hipLimitDevRuntimeSyncDepth));
+ log << " runtime sync depth: " << std::setw(10) << value << '\n';
+ hipCheck(hipDeviceGetLimit(&value, hipLimitDevRuntimePendingLaunchCount));
+ log << " runtime pending launch count: " << std::setw(10) << value << '\n';
+ }
+ */
+ }
+ }
+
+ edm::Service resourceInformationService;
+ if (resourceInformationService.isAvailable()) {
+ std::vector modelsV(models.begin(), models.end());
+ resourceInformationService->setGPUModels(modelsV);
+ /*
+ std::string nvidiaDriverVersion{systemDriverVersion};
+ resourceInformationService->setNvidiaDriverVersion(nvidiaDriverVersion);
+ resourceInformationService->setCudaDriverVersion(driverVersion);
+ resourceInformationService->setCudaRuntimeVersion(runtimeVersion);
+ */
+ }
+
+ if (verbose_) {
+ log << '\n' << "ROCmService fully initialized";
+ }
+ enabled_ = true;
+}
+
+ROCmService::~ROCmService() {
+ if (enabled_) {
+ for (int i = 0; i < numberOfDevices_; ++i) {
+ hipCheck(hipSetDevice(i));
+ hipCheck(hipDeviceSynchronize());
+ // Explicitly destroys and cleans up all resources associated with the current device in the
+ // current process. Any subsequent API call to this device will reinitialize the device.
+ // Useful to check for memory leaks.
+ hipCheck(hipDeviceReset());
+ }
+ }
+}
+
+void ROCmService::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
+ edm::ParameterSetDescription desc;
+ desc.addUntracked("enabled", true);
+ desc.addUntracked("verbose", false);
+
+ edm::ParameterSetDescription limits;
+ /*
+ limits.addUntracked("hipLimitPrintfFifoSize", -1)
+ ->setComment("Size in bytes of the shared FIFO used by the printf() device system call.");
+ */
+ limits.addUntracked("hipLimitStackSize", -1)->setComment("Stack size in bytes of each GPU thread.");
+ limits.addUntracked("hipLimitMallocHeapSize", -1)
+ ->setComment("Size in bytes of the heap used by the malloc() and free() device system calls.");
+ limits.addUntracked("hipLimitDevRuntimeSyncDepth", -1)
+ ->setComment("Maximum nesting depth of a grid at which a thread can safely call hipDeviceSynchronize().");
+ limits.addUntracked("hipLimitDevRuntimePendingLaunchCount", -1)
+ ->setComment("Maximum number of outstanding device runtime launches that can be made from the current device.");
+ desc.addUntracked("limits", limits)
+ ->setComment(
+ "See the documentation of hipDeviceSetLimit for more information.\nSetting any of these options to -1 keeps "
+ "the default value.");
+
+ descriptions.add("ROCmService", desc);
+}
+
+int ROCmService::deviceWithMostFreeMemory() const {
+ // save the current device
+ int currentDevice;
+ hipCheck(hipGetDevice(¤tDevice));
+
+ size_t maxFreeMemory = 0;
+ int device = -1;
+ for (int i = 0; i < numberOfDevices_; ++i) {
+ size_t freeMemory, totalMemory;
+ hipCheck(hipSetDevice(i));
+ hipCheck(hipMemGetInfo(&freeMemory, &totalMemory));
+ edm::LogPrint("ROCmService") << "ROCm device " << i << ": " << freeMemory / (1 << 20) << " MB free / "
+ << totalMemory / (1 << 20) << " MB total memory";
+ if (freeMemory > maxFreeMemory) {
+ maxFreeMemory = freeMemory;
+ device = i;
+ }
+ }
+ // restore the current device
+ hipCheck(hipSetDevice(currentDevice));
+ return device;
+}
diff --git a/HeterogeneousCore/ROCmServices/test/BuildFile.xml b/HeterogeneousCore/ROCmServices/test/BuildFile.xml
new file mode 100644
index 0000000000000..7fbe8d1931848
--- /dev/null
+++ b/HeterogeneousCore/ROCmServices/test/BuildFile.xml
@@ -0,0 +1,12 @@
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/HeterogeneousCore/ROCmServices/test/testROCmService.cpp b/HeterogeneousCore/ROCmServices/test/testROCmService.cpp
new file mode 100644
index 0000000000000..06b2c90c6db8b
--- /dev/null
+++ b/HeterogeneousCore/ROCmServices/test/testROCmService.cpp
@@ -0,0 +1,155 @@
+#include
+#include
+#include
+#include
+#include
+
+#include
+
+#define CATCH_CONFIG_MAIN
+#include "catch.hpp"
+
+#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h"
+#include "FWCore/ParameterSet/interface/ParameterSet.h"
+#include "FWCore/ParameterSetReader/interface/ParameterSetReader.h"
+#include "FWCore/PluginManager/interface/PluginManager.h"
+#include "FWCore/PluginManager/interface/standard.h"
+#include "FWCore/ServiceRegistry/interface/Service.h"
+#include "FWCore/ServiceRegistry/interface/ServiceRegistry.h"
+#include "FWCore/ServiceRegistry/interface/ServiceToken.h"
+#include "FWCore/Utilities/interface/Exception.h"
+#include "FWCore/Utilities/interface/ResourceInformation.h"
+#include "HeterogeneousCore/ROCmServices/interface/ROCmService.h"
+#include "HeterogeneousCore/ROCmUtilities/interface/hipCheck.h"
+
+namespace {
+ ROCmService makeROCmService(edm::ParameterSet ps) {
+ auto desc = edm::ConfigurationDescriptions("Service", "ROCmService");
+ ROCmService::fillDescriptions(desc);
+ desc.validate(ps, "ROCmService");
+ return ROCmService(ps);
+ }
+} // namespace
+
+TEST_CASE("Tests of ROCmService", "[ROCmService]") {
+ // Test setup: check if a simple ROCm runtime API call fails:
+ // if so, skip the test with the ROCmService enabled
+ int deviceCount = 0;
+ auto ret = hipGetDeviceCount(&deviceCount);
+
+ if (ret != hipSuccess) {
+ WARN("Unable to query the ROCm capable devices from the ROCm runtime API: ("
+ << ret << ") " << hipGetErrorString(ret) << ". Running only tests not requiring devices.");
+ }
+
+ // Make Service system available as ROCmService depends on ResourceInformationService
+ std::vector psets;
+ edm::ServiceToken serviceToken = edm::ServiceRegistry::createSet(psets);
+ edm::ServiceRegistry::Operate operate(serviceToken);
+
+ SECTION("ROCmService enabled") {
+ edm::ParameterSet ps;
+ ps.addUntrackedParameter("enabled", true);
+ SECTION("Enabled only if there are ROCm capable GPUs") {
+ auto cs = makeROCmService(ps);
+ if (deviceCount <= 0) {
+ REQUIRE(cs.enabled() == false);
+ WARN("ROCmService is disabled as there are no ROCm GPU devices");
+ } else {
+ REQUIRE(cs.enabled() == true);
+ INFO("ROCmService is enabled");
+ }
+ }
+
+ if (deviceCount <= 0) {
+ return;
+ }
+
+ auto cs = makeROCmService(ps);
+ int driverVersion = 0, runtimeVersion = 0;
+ ret = hipDriverGetVersion(&driverVersion);
+ if (ret != hipSuccess) {
+ FAIL("Unable to query the ROCm driver version from the ROCm runtime API: (" << ret << ") "
+ << hipGetErrorString(ret));
+ }
+ ret = hipRuntimeGetVersion(&runtimeVersion);
+ if (ret != hipSuccess) {
+ FAIL("Unable to query the ROCm runtime API version: (" << ret << ") " << hipGetErrorString(ret));
+ }
+
+ SECTION("ROCm Queries") {
+ WARN("ROCm Driver Version / Runtime Version: " << driverVersion / 1000 << "." << (driverVersion % 100) / 10
+ << " / " << runtimeVersion / 1000 << "."
+ << (runtimeVersion % 100) / 10);
+
+ // Test that the number of devices found by the service
+ // is the same as detected by the ROCm runtime API
+ REQUIRE(cs.numberOfDevices() == deviceCount);
+ WARN("Detected " << cs.numberOfDevices() << " ROCm Capable device(s)");
+
+ // Test that the compute capabilities of each device
+ // are the same as detected by the ROCm runtime API
+ for (int i = 0; i < deviceCount; ++i) {
+ hipDeviceProp_t deviceProp;
+ ret = hipGetDeviceProperties(&deviceProp, i);
+ if (ret != hipSuccess) {
+ FAIL("Unable to query the ROCm properties for device " << i << " from the ROCm runtime API: (" << ret << ") "
+ << hipGetErrorString(ret));
+ }
+
+ REQUIRE(deviceProp.major == cs.computeCapability(i).first);
+ REQUIRE(deviceProp.minor == cs.computeCapability(i).second);
+ INFO("Device " << i << ": " << deviceProp.name << "\n ROCm Capability Major/Minor version number: "
+ << deviceProp.major << "." << deviceProp.minor);
+ }
+ }
+
+ SECTION("ROCmService device free memory") {
+ size_t mem = 0;
+ int dev = -1;
+ for (int i = 0; i < deviceCount; ++i) {
+ size_t free, tot;
+ REQUIRE_NOTHROW(hipCheck(hipSetDevice(i)));
+ REQUIRE_NOTHROW(hipCheck(hipMemGetInfo(&free, &tot)));
+ WARN("Device " << i << " memory total " << tot << " free " << free);
+ if (free > mem) {
+ mem = free;
+ dev = i;
+ }
+ }
+ WARN("Device with most free memory " << dev << "\n"
+ << " as given by ROCmService " << cs.deviceWithMostFreeMemory());
+ }
+
+ SECTION("With ResourceInformationService available") {
+ edmplugin::PluginManager::configure(edmplugin::standard::config());
+
+ std::string const config = R"_(import FWCore.ParameterSet.Config as cms
+process = cms.Process('Test')
+process.add_(cms.Service('ResourceInformationService'))
+)_";
+ std::unique_ptr params;
+ edm::makeParameterSets(config, params);
+ edm::ServiceToken tempToken(edm::ServiceRegistry::createServicesFromConfig(std::move(params)));
+ edm::ServiceRegistry::Operate operate2(tempToken);
+
+ auto cs = makeROCmService(edm::ParameterSet{});
+ REQUIRE(cs.enabled());
+ edm::Service ri;
+ REQUIRE(ri->gpuModels().size() > 0);
+ /*
+ REQUIRE(ri->nvidiaDriverVersion().size() > 0);
+ REQUIRE(ri->cudaDriverVersion() == driverVersion);
+ REQUIRE(ri->cudaRuntimeVersion() == runtimeVersion);
+ */
+ }
+ }
+
+ SECTION("Force to be disabled") {
+ edm::ParameterSet ps;
+ ps.addUntrackedParameter("enabled", false);
+ auto cs = makeROCmService(ps);
+ REQUIRE(cs.enabled() == false);
+ REQUIRE(cs.numberOfDevices() == 0);
+ }
+}
diff --git a/HeterogeneousCore/ROCmServices/test/testROCmService.py b/HeterogeneousCore/ROCmServices/test/testROCmService.py
new file mode 100644
index 0000000000000..d96d02f25be44
--- /dev/null
+++ b/HeterogeneousCore/ROCmServices/test/testROCmService.py
@@ -0,0 +1,20 @@
+import FWCore.ParameterSet.Config as cms
+
+process = cms.Process( "TEST" )
+
+process.options = cms.untracked.PSet(
+ numberOfThreads = cms.untracked.uint32( 4 ),
+ numberOfStreams = cms.untracked.uint32( 0 ),
+)
+
+process.load('FWCore.MessageService.MessageLogger_cfi')
+process.MessageLogger.ROCmService = {}
+
+process.load('HeterogeneousCore.ROCmServices.ROCmService_cfi')
+process.ROCmService.verbose = True
+
+process.source = cms.Source("EmptySource")
+
+process.maxEvents = cms.untracked.PSet(
+ input = cms.untracked.int32( 0 )
+)