Skip to content

Commit

Permalink
Fix the NVProfilerService
Browse files Browse the repository at this point in the history
Use a ProcessCallGraph to get the highest possible module id, instead of
relaying on the modules count.
  • Loading branch information
fwyzard committed Sep 15, 2022
1 parent fba0131 commit ae5949e
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 5 deletions.
1 change: 1 addition & 0 deletions HeterogeneousCore/CUDAServices/plugins/BuildFile.xml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
<use name="FWCore/ServiceRegistry"/>
<use name="FWCore/Utilities"/>
<use name="HeterogeneousCore/CUDAServices"/>
<use name="HLTrigger/Timer"/>

<library file="*.cc" name="HeterogeneousCoreCUDAServicesPlugins">
<flags EDM_PLUGIN="1"/>
Expand Down
17 changes: 12 additions & 5 deletions HeterogeneousCore/CUDAServices/plugins/NVProfilerService.cc
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
#include "FWCore/Utilities/interface/ProductKindOfType.h"
#include "FWCore/Utilities/interface/TimeOfDay.h"
#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h"
#include "HLTrigger/Timer/interface/ProcessCallGraph.h"

using namespace std::string_literals;

Expand Down Expand Up @@ -287,6 +288,9 @@ class NVProfilerService {
return highlight(label) ? nvtxLightAmber : nvtxLightGreen;
}

// build a complete representation of the modules in the whole job
ProcessCallGraph callgraph_;

std::vector<std::string> highlightModules_;
const bool showModulePrefetching_;
const bool skipFirstEvent_;
Expand Down Expand Up @@ -502,7 +506,7 @@ void NVProfilerService::preallocate(edm::service::SystemBounds const& bounds) {
std::stringstream out;
out << "preallocate: " << bounds.maxNumberOfConcurrentRuns() << " concurrent runs, "
<< bounds.maxNumberOfConcurrentLuminosityBlocks() << " luminosity sections, " << bounds.maxNumberOfStreams()
<< " streams\nrunning on" << bounds.maxNumberOfThreads() << " threads";
<< " streams\nrunning on " << bounds.maxNumberOfThreads() << " threads";
nvtxDomainMark(global_domain_, out.str().c_str());

auto concurrentStreams = bounds.maxNumberOfStreams();
Expand All @@ -524,12 +528,13 @@ void NVProfilerService::preallocate(edm::service::SystemBounds const& bounds) {
}

void NVProfilerService::preBeginJob(edm::PathsAndConsumesOfModulesBase const& pathsAndConsumes,
edm::ProcessContext const& pc) {
edm::ProcessContext const& context) {
callgraph_.preBeginJob(pathsAndConsumes, context);
nvtxDomainMark(global_domain_, "preBeginJob");

// FIXME this probably works only in the absence of subprocesses
// size() + 1 because pathsAndConsumes.allModules() does not include the source
unsigned int modules = pathsAndConsumes.allModules().size() + 1;
// this assumes that preBeginJob is not called concurrently with the modules' beginJob method
// or the preBeginJob for a subprocess
unsigned int modules = callgraph_.size();
global_modules_.resize(modules, nvtxInvalidRangeId);
for (unsigned int sid = 0; sid < stream_modules_.size(); ++sid) {
stream_modules_[sid].resize(modules, nvtxInvalidRangeId);
Expand Down Expand Up @@ -1115,6 +1120,8 @@ void NVProfilerService::postModuleGlobalEndLumi(edm::GlobalContext const& gc, ed
}

void NVProfilerService::preSourceConstruction(edm::ModuleDescription const& desc) {
callgraph_.preSourceConstruction(desc);

if (not skipFirstEvent_) {
auto mid = desc.id();
global_modules_.grow_to_at_least(mid + 1);
Expand Down

0 comments on commit ae5949e

Please sign in to comment.