Skip to content

Commit

Permalink
Merge pull request #39400 from fwyzard/Fix_ProcessCallGraph_and_NVPro…
Browse files Browse the repository at this point in the history
…filerService

Fix the `NVProfilerService` and `ProcessCallGraph` [12.5.x]
  • Loading branch information
cmsbuild authored Sep 17, 2022
2 parents d2d4010 + ae5949e commit 58b78f6
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 16 deletions.
4 changes: 2 additions & 2 deletions HLTrigger/Timer/interface/ProcessCallGraph.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ class ProcessCallGraph {

public:
// default c'tor
ProcessCallGraph();
ProcessCallGraph() = default;

// to be called from preSourceConstruction(...)
void preSourceConstruction(edm::ModuleDescription const &);
Expand Down Expand Up @@ -172,7 +172,7 @@ class ProcessCallGraph {
GraphType graph_;

// module id of the Source
unsigned int source_;
unsigned int source_ = edm::ModuleDescription::invalidID();

// map each (sub)process name to a "process id"
std::unordered_map<std::string, unsigned int> process_id_;
Expand Down
19 changes: 10 additions & 9 deletions HLTrigger/Timer/src/ProcessCallGraph.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,6 @@
#include "FWCore/Utilities/interface/EDMException.h"
#include "HLTrigger/Timer/interface/ProcessCallGraph.h"

ProcessCallGraph::ProcessCallGraph() = default;

// adaptor to use range-based for loops with boost::graph edges(...) and vertices(...) functions
template <typename I>
struct iterator_pair_as_a_range : std::pair<I, I> {
Expand All @@ -46,9 +44,10 @@ iterator_pair_as_a_range<I> make_range(std::pair<I, I> p) {
return iterator_pair_as_a_range<I>(p);
}

// FIXME
// - check that the Source has not already been added
void ProcessCallGraph::preSourceConstruction(edm::ModuleDescription const& module) {
// check that the Source has not already been added
assert(source_ == edm::ModuleDescription::invalidID());

// keep track of the Source module id
source_ = module.id();

Expand All @@ -58,13 +57,15 @@ void ProcessCallGraph::preSourceConstruction(edm::ModuleDescription const& modul
}

// FIXME
// - check that the Source has already been added
// - check that all module ids are valid (e.g. subprocesses are not being added in
// the wrong order)
void ProcessCallGraph::preBeginJob(edm::PathsAndConsumesOfModulesBase const& pathsAndConsumes,
edm::ProcessContext const& context) {
unsigned int pid = registerProcess(context);

// check that the Source has already been added
assert(source_ != edm::ModuleDescription::invalidID());

// work on the full graph (for the main process) or a subgraph (for a subprocess)
GraphType& graph = context.isSubProcess() ? graph_.create_subgraph() : graph_.root();

Expand Down Expand Up @@ -227,10 +228,8 @@ std::pair<std::vector<unsigned int>, std::vector<unsigned int>> ProcessCallGraph
}

// register a (sub)process and assigns it a "process id"
// if called with a duplicate process name, returns the original process id
// throws an exception if called with a duplicate process name
unsigned int ProcessCallGraph::registerProcess(edm::ProcessContext const& context) {
static unsigned int s_id = 0;

// registerProcess (called by preBeginJob) must be called for the parent process before its subprocess(es)
if (context.isSubProcess() and process_id_.find(context.parentProcessContext().processName()) == process_id_.end()) {
throw edm::Exception(edm::errors::LogicError)
Expand All @@ -246,7 +245,9 @@ unsigned int ProcessCallGraph::registerProcess(edm::ProcessContext const& contex
<< (context.isSubProcess() ? "subprocess" : "process") << " " << context.processName();
}

std::tie(id, std::ignore) = process_id_.insert(std::make_pair(context.processName(), s_id++));
// this assumes that registerProcess (called by preBeginJob) is not called concurrently from different threads
// otherwise, process_id_.size() should be replaces with an atomic counter
std::tie(id, std::ignore) = process_id_.insert(std::make_pair(context.processName(), process_id_.size()));
return id->second;
}

Expand Down
1 change: 1 addition & 0 deletions HeterogeneousCore/CUDAServices/plugins/BuildFile.xml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
<use name="FWCore/ServiceRegistry"/>
<use name="FWCore/Utilities"/>
<use name="HeterogeneousCore/CUDAServices"/>
<use name="HLTrigger/Timer"/>

<library file="*.cc" name="HeterogeneousCoreCUDAServicesPlugins">
<flags EDM_PLUGIN="1"/>
Expand Down
17 changes: 12 additions & 5 deletions HeterogeneousCore/CUDAServices/plugins/NVProfilerService.cc
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
#include "FWCore/Utilities/interface/ProductKindOfType.h"
#include "FWCore/Utilities/interface/TimeOfDay.h"
#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h"
#include "HLTrigger/Timer/interface/ProcessCallGraph.h"

using namespace std::string_literals;

Expand Down Expand Up @@ -287,6 +288,9 @@ class NVProfilerService {
return highlight(label) ? nvtxLightAmber : nvtxLightGreen;
}

// build a complete representation of the modules in the whole job
ProcessCallGraph callgraph_;

std::vector<std::string> highlightModules_;
const bool showModulePrefetching_;
const bool skipFirstEvent_;
Expand Down Expand Up @@ -502,7 +506,7 @@ void NVProfilerService::preallocate(edm::service::SystemBounds const& bounds) {
std::stringstream out;
out << "preallocate: " << bounds.maxNumberOfConcurrentRuns() << " concurrent runs, "
<< bounds.maxNumberOfConcurrentLuminosityBlocks() << " luminosity sections, " << bounds.maxNumberOfStreams()
<< " streams\nrunning on" << bounds.maxNumberOfThreads() << " threads";
<< " streams\nrunning on " << bounds.maxNumberOfThreads() << " threads";
nvtxDomainMark(global_domain_, out.str().c_str());

auto concurrentStreams = bounds.maxNumberOfStreams();
Expand All @@ -524,12 +528,13 @@ void NVProfilerService::preallocate(edm::service::SystemBounds const& bounds) {
}

void NVProfilerService::preBeginJob(edm::PathsAndConsumesOfModulesBase const& pathsAndConsumes,
edm::ProcessContext const& pc) {
edm::ProcessContext const& context) {
callgraph_.preBeginJob(pathsAndConsumes, context);
nvtxDomainMark(global_domain_, "preBeginJob");

// FIXME this probably works only in the absence of subprocesses
// size() + 1 because pathsAndConsumes.allModules() does not include the source
unsigned int modules = pathsAndConsumes.allModules().size() + 1;
// this assumes that preBeginJob is not called concurrently with the modules' beginJob method
// or the preBeginJob for a subprocess
unsigned int modules = callgraph_.size();
global_modules_.resize(modules, nvtxInvalidRangeId);
for (unsigned int sid = 0; sid < stream_modules_.size(); ++sid) {
stream_modules_[sid].resize(modules, nvtxInvalidRangeId);
Expand Down Expand Up @@ -1115,6 +1120,8 @@ void NVProfilerService::postModuleGlobalEndLumi(edm::GlobalContext const& gc, ed
}

void NVProfilerService::preSourceConstruction(edm::ModuleDescription const& desc) {
callgraph_.preSourceConstruction(desc);

if (not skipFirstEvent_) {
auto mid = desc.id();
global_modules_.grow_to_at_least(mid + 1);
Expand Down

0 comments on commit 58b78f6

Please sign in to comment.