-
Notifications
You must be signed in to change notification settings - Fork 4.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implement a first draft of the ROCmService
- Loading branch information
Showing
9 changed files
with
761 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
<iftool name="rocm"> | ||
<use name="rocm"/> | ||
<use name="FWCore/MessageLogger"/> | ||
<use name="FWCore/ParameterSet"/> | ||
<use name="FWCore/ServiceRegistry"/> | ||
<use name="FWCore/Utilities"/> | ||
<use name="HeterogeneousCore/ROCmUtilities" source_only="true"/> | ||
<export> | ||
<lib name="1"/> | ||
</export> | ||
</iftool> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
#ifndef HeterogeneousCore_ROCmServices_interface_ROCmService_h | ||
#define HeterogeneousCore_ROCmServices_interface_ROCmService_h | ||
|
||
#include <utility> | ||
#include <vector> | ||
|
||
#include "FWCore/Utilities/interface/StreamID.h" | ||
|
||
namespace edm { | ||
class ParameterSet; | ||
class ActivityRegistry; | ||
class ConfigurationDescriptions; | ||
} // namespace edm | ||
|
||
class ROCmService { | ||
public: | ||
ROCmService(edm::ParameterSet const& config); | ||
~ROCmService(); | ||
|
||
static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); | ||
|
||
bool enabled() const { return enabled_; } | ||
|
||
int numberOfDevices() const { return numberOfDevices_; } | ||
|
||
// major, minor | ||
std::pair<int, int> computeCapability(int device) const { return computeCapabilities_.at(device); } | ||
|
||
// Returns the id of device with most free memory. If none is found, returns -1. | ||
int deviceWithMostFreeMemory() const; | ||
|
||
private: | ||
int numberOfDevices_ = 0; | ||
std::vector<std::pair<int, int>> computeCapabilities_; | ||
bool enabled_ = false; | ||
bool verbose_ = false; | ||
}; | ||
|
||
namespace edm { | ||
namespace service { | ||
inline bool isProcessWideService(ROCmService const*) { return true; } | ||
} // namespace service | ||
} // namespace edm | ||
|
||
#endif // HeterogeneousCore_ROCmServices_interface_ROCmService_h |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
<iftool name="rocm"> | ||
<use name="rocm"/> | ||
<use name="DataFormats/Provenance"/> | ||
<use name="FWCore/MessageLogger"/> | ||
<use name="FWCore/ParameterSet"/> | ||
<use name="FWCore/ServiceRegistry"/> | ||
<use name="HeterogeneousCore/ROCmServices"/> | ||
<use name="HeterogeneousCore/ROCmUtilities" source_only="true"/> | ||
<library file="*.cc" name="HeterogeneousCoreROCmServicesPlugins"> | ||
<flags EDM_PLUGIN="1"/> | ||
</library> | ||
</iftool> |
120 changes: 120 additions & 0 deletions
120
HeterogeneousCore/ROCmServices/plugins/ROCmMonitoringService.cc
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
#include <iostream> | ||
|
||
#include <hip/hip_runtime.h> | ||
|
||
#include "DataFormats/Provenance/interface/ModuleDescription.h" | ||
#include "FWCore/MessageLogger/interface/MessageLogger.h" | ||
#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" | ||
#include "FWCore/ParameterSet/interface/ParameterSet.h" | ||
#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" | ||
#include "FWCore/ServiceRegistry/interface/ActivityRegistry.h" | ||
#include "FWCore/ServiceRegistry/interface/ModuleCallingContext.h" | ||
#include "FWCore/ServiceRegistry/interface/Service.h" | ||
#include "FWCore/ServiceRegistry/interface/ServiceMaker.h" | ||
#include "HeterogeneousCore/ROCmServices/interface/ROCmService.h" | ||
#include "HeterogeneousCore/ROCmUtilities/interface/hipCheck.h" | ||
|
||
namespace edm { | ||
class StreamContext; | ||
} | ||
|
||
class ROCmMonitoringService { | ||
public: | ||
ROCmMonitoringService(edm::ParameterSet const& iConfig, edm::ActivityRegistry& iRegistry); | ||
~ROCmMonitoringService() = default; | ||
|
||
static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); | ||
|
||
void postModuleConstruction(edm::ModuleDescription const& desc); | ||
void postModuleBeginStream(edm::StreamContext const&, edm::ModuleCallingContext const& mcc); | ||
void postModuleEvent(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc); | ||
void postEvent(edm::StreamContext const& sc); | ||
|
||
private: | ||
int numberOfDevices_ = 0; | ||
}; | ||
|
||
ROCmMonitoringService::ROCmMonitoringService(edm::ParameterSet const& config, edm::ActivityRegistry& registry) { | ||
// make sure that ROCm is initialised, and that the ROCmService destructor is called after this service's destructor | ||
edm::Service<ROCmService> rocmService; | ||
if (!rocmService->enabled()) | ||
return; | ||
numberOfDevices_ = rocmService->numberOfDevices(); | ||
|
||
if (config.getUntrackedParameter<bool>("memoryConstruction")) { | ||
registry.watchPostModuleConstruction(this, &ROCmMonitoringService::postModuleConstruction); | ||
} | ||
if (config.getUntrackedParameter<bool>("memoryBeginStream")) { | ||
registry.watchPostModuleBeginStream(this, &ROCmMonitoringService::postModuleBeginStream); | ||
} | ||
if (config.getUntrackedParameter<bool>("memoryPerModule")) { | ||
registry.watchPostModuleEvent(this, &ROCmMonitoringService::postModuleEvent); | ||
} | ||
if (config.getUntrackedParameter<bool>("memoryPerEvent")) { | ||
registry.watchPostEvent(this, &ROCmMonitoringService::postEvent); | ||
} | ||
} | ||
|
||
void ROCmMonitoringService::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { | ||
edm::ParameterSetDescription desc; | ||
|
||
desc.addUntracked<bool>("memoryConstruction", false) | ||
->setComment("Print memory information for each device after the construction of each module"); | ||
desc.addUntracked<bool>("memoryBeginStream", true) | ||
->setComment("Print memory information for each device after the beginStream() of each module"); | ||
desc.addUntracked<bool>("memoryPerModule", true) | ||
->setComment("Print memory information for each device after the event of each module"); | ||
desc.addUntracked<bool>("memoryPerEvent", true) | ||
->setComment("Print memory information for each device after each event"); | ||
|
||
descriptions.add("ROCmMonitoringService", desc); | ||
descriptions.setComment( | ||
"The memory information is the global state of the device. This gets confusing if there are multiple processes " | ||
"running on the same device. Probably the information retrieval should be re-thought?"); | ||
} | ||
|
||
// activity handlers | ||
namespace { | ||
template <typename T> | ||
void dumpUsedMemory(T& log, int num) { | ||
int old = 0; | ||
hipCheck(hipGetDevice(&old)); | ||
constexpr auto mbytes = 1 << 20; | ||
for (int i = 0; i < num; ++i) { | ||
size_t freeMemory, totalMemory; | ||
hipCheck(hipSetDevice(i)); | ||
hipCheck(hipMemGetInfo(&freeMemory, &totalMemory)); | ||
log << "\n" | ||
<< i << ": " << (totalMemory - freeMemory) / mbytes << " MB used / " << totalMemory / mbytes << " MB total"; | ||
} | ||
hipCheck(hipSetDevice(old)); | ||
} | ||
} // namespace | ||
|
||
void ROCmMonitoringService::postModuleConstruction(edm::ModuleDescription const& desc) { | ||
auto log = edm::LogPrint("ROCmMonitoringService"); | ||
log << "ROCm device memory after construction of " << desc.moduleLabel() << " (" << desc.moduleName() << ")"; | ||
dumpUsedMemory(log, numberOfDevices_); | ||
} | ||
|
||
void ROCmMonitoringService::postModuleBeginStream(edm::StreamContext const&, edm::ModuleCallingContext const& mcc) { | ||
auto log = edm::LogPrint("ROCmMonitoringService"); | ||
log << "ROCm device memory after beginStream() of " << mcc.moduleDescription()->moduleLabel() << " (" | ||
<< mcc.moduleDescription()->moduleName() << ")"; | ||
dumpUsedMemory(log, numberOfDevices_); | ||
} | ||
|
||
void ROCmMonitoringService::postModuleEvent(edm::StreamContext const&, edm::ModuleCallingContext const& mcc) { | ||
auto log = edm::LogPrint("ROCmMonitoringService"); | ||
log << "ROCm device memory after processing an event by " << mcc.moduleDescription()->moduleLabel() << " (" | ||
<< mcc.moduleDescription()->moduleName() << ")"; | ||
dumpUsedMemory(log, numberOfDevices_); | ||
} | ||
|
||
void ROCmMonitoringService::postEvent(edm::StreamContext const& sc) { | ||
auto log = edm::LogPrint("ROCmMonitoringService"); | ||
log << "ROCm device memory after event"; | ||
dumpUsedMemory(log, numberOfDevices_); | ||
} | ||
|
||
DEFINE_FWK_SERVICE(ROCmMonitoringService); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
#include "HeterogeneousCore/ROCmServices/interface/ROCmService.h" | ||
#include "FWCore/ServiceRegistry/interface/ServiceMaker.h" | ||
|
||
DEFINE_FWK_SERVICE_MAKER(ROCmService, edm::serviceregistry::ParameterSetMaker<ROCmService>); |
Oops, something went wrong.