diff --git a/Configuration/ProcessModifiers/python/allSonicTriton_cff.py b/Configuration/ProcessModifiers/python/allSonicTriton_cff.py new file mode 100644 index 0000000000000..1de0813bff065 --- /dev/null +++ b/Configuration/ProcessModifiers/python/allSonicTriton_cff.py @@ -0,0 +1,6 @@ +import FWCore.ParameterSet.Config as cms + +from Configuration.ProcessModifiers.enableSonicTriton_cff import enableSonicTriton + +# collect all SonicTriton-related process modifiers here +allSonicTriton = cms.ModifierChain(enableSonicTriton) diff --git a/Configuration/ProcessModifiers/python/enableSonicTriton_cff.py b/Configuration/ProcessModifiers/python/enableSonicTriton_cff.py new file mode 100644 index 0000000000000..8034f48d20ef4 --- /dev/null +++ b/Configuration/ProcessModifiers/python/enableSonicTriton_cff.py @@ -0,0 +1,3 @@ +import FWCore.ParameterSet.Config as cms + +enableSonicTriton = cms.Modifier() diff --git a/Configuration/PyReleaseValidation/python/upgradeWorkflowComponents.py b/Configuration/PyReleaseValidation/python/upgradeWorkflowComponents.py index a9876705bf631..490500cb3807d 100644 --- a/Configuration/PyReleaseValidation/python/upgradeWorkflowComponents.py +++ b/Configuration/PyReleaseValidation/python/upgradeWorkflowComponents.py @@ -919,6 +919,41 @@ def condition(self, fragment, stepList, key, hasHarvest): ) upgradeWFs['DD4hep'].allowReuse = False +class UpgradeWorkflow_SonicTriton(UpgradeWorkflow): + def setup_(self, step, stepName, stepDict, k, properties): + stepDict[stepName][k] = merge([{'--procModifiers': 'allSonicTriton'}, stepDict[step][k]]) + def condition(self, fragment, stepList, key, hasHarvest): + return (fragment=='TTbar_13' and '2021' in key) \ + or (fragment=='TTbar_14TeV' and '2026' in key) +upgradeWFs['SonicTriton'] = UpgradeWorkflow_SonicTriton( + steps = [ + 'GenSim', + 'GenSimHLBeamSpot', + 'GenSimHLBeamSpot14', + 'Digi', + 'DigiTrigger', + 'Reco', + 'RecoGlobal', + 'HARVEST', + 'HARVESTGlobal', + 'ALCA', + ], + PU = [ + 'GenSim', + 'GenSimHLBeamSpot', + 'GenSimHLBeamSpot14', + 'Digi', + 'DigiTrigger', + 'Reco', + 'RecoGlobal', + 'HARVEST', + 'HARVESTGlobal', + 'ALCA', + ], + suffix = '_SonicTriton', + offset = 0.9001, +) + # check for duplicate offsets offsets = [specialWF.offset for specialType,specialWF in six.iteritems(upgradeWFs)] seen = set() diff --git a/Configuration/StandardSequences/python/Services_cff.py b/Configuration/StandardSequences/python/Services_cff.py index 0365d5f22bf51..ef1da8dec1d59 100644 --- a/Configuration/StandardSequences/python/Services_cff.py +++ b/Configuration/StandardSequences/python/Services_cff.py @@ -14,3 +14,9 @@ def _addCUDAServices(process): from Configuration.ProcessModifiers.gpu_cff import gpu modifyConfigurationStandardSequencesServicesAddCUDAServices_ = gpu.makeProcessModifier(_addCUDAServices) + +# load TritonService when SONIC workflow is enabled +def _addTritonService(process): + process.load("HeterogeneousCore.SonicTriton.TritonService_cff") +from Configuration.ProcessModifiers.enableSonicTriton_cff import enableSonicTriton +modifyConfigurationStandardSequencesServicesAddTritonService_ = enableSonicTriton.makeProcessModifier(_addTritonService) diff --git a/HeterogeneousCore/SonicCore/README.md b/HeterogeneousCore/SonicCore/README.md index df877101e50e6..6ae698aeb6a9e 100644 --- a/HeterogeneousCore/SonicCore/README.md +++ b/HeterogeneousCore/SonicCore/README.md @@ -15,9 +15,8 @@ To implement a concrete derived producer class, the following skeleton can be us class MyProducer : public SonicEDProducer { public: - explicit MyProducer(edm::ParameterSet const& cfg) : SonicEDProducer(cfg) { - //for debugging - setDebugName("MyProducer"); + explicit MyProducer(edm::ParameterSet const& cfg) : SonicEDProducer(cfg, "MyProducer") { + //do any necessary operations } void acquire(edm::Event const& iEvent, edm::EventSetup const& iSetup, Input& iInput) override { //convert event data to client input format @@ -65,7 +64,7 @@ To add a new communication protocol for SONIC, follow these steps: 2. Set up the concrete client(s) that use the communication protocol in a new package in the `HeterogeneousCore` subsystem 3. Add a test producer (see above) to make sure it works -To implement a concrete client, the following skeleton can be used for the `.h` file, with the function implementations in an associated `.cc` file: +To implement a concrete client, the following skeleton can be used for the `.h` file: ```cpp #ifndef HeterogeneousCore_MyPackage_MyClient #define HeterogeneousCore_MyPackage_MyClient @@ -75,7 +74,7 @@ To implement a concrete client, the following skeleton can be used for the `.h` class MyClient : public SonicClient { public: - MyClient(const edm::ParameterSet& params); + MyClient(const edm::ParameterSet& params, const std::string& debugName); static void fillPSetDescription(edm::ParameterSetDescription& iDesc); @@ -86,6 +85,14 @@ protected: #endif ``` +The concrete client member function implementations, in an associated `.cc` file, should include the following: +```cpp +MyClient::MyClient(const edm::ParameterSet& params, const std::string& debugName) + : SonicClient(params, debugName, "MyClient") { + //do any necessary operations +} +``` + The `SonicClient` has three available modes: * `Sync`: synchronous call, blocks until the result is returned. * `Async`: asynchronous, non-blocking call. diff --git a/HeterogeneousCore/SonicCore/interface/SonicAcquirer.h b/HeterogeneousCore/SonicCore/interface/SonicAcquirer.h index 0b9021ffe571c..88917f5daac98 100644 --- a/HeterogeneousCore/SonicCore/interface/SonicAcquirer.h +++ b/HeterogeneousCore/SonicCore/interface/SonicAcquirer.h @@ -8,6 +8,8 @@ #include "HeterogeneousCore/SonicCore/interface/sonic_utils.h" #include +#include +#include template class SonicAcquirer : public Module { @@ -15,26 +17,34 @@ class SonicAcquirer : public Module { //typedef to simplify usage typedef typename Client::Input Input; //constructor - SonicAcquirer(edm::ParameterSet const& cfg) : client_(cfg.getParameter("Client")) {} + SonicAcquirer(edm::ParameterSet const& cfg, const std::string& debugName = "") + : clientPset_(cfg.getParameterSet("Client")), debugName_(debugName) {} //destructor ~SonicAcquirer() override = default; - //derived classes use a dedicated acquire() interface that incorporates client_.input() + //construct client at beginning of job + //in case client constructor depends on operations happening in derived module constructors + void beginStream(edm::StreamID) override { makeClient(); } + + //derived classes use a dedicated acquire() interface that incorporates client_->input() //(no need to interact with callback holder) void acquire(edm::Event const& iEvent, edm::EventSetup const& iSetup, edm::WaitingTaskWithArenaHolder holder) final { auto t0 = std::chrono::high_resolution_clock::now(); - acquire(iEvent, iSetup, client_.input()); - sonic_utils::printDebugTime(client_.debugName(), "acquire() time: ", t0); + acquire(iEvent, iSetup, client_->input()); + sonic_utils::printDebugTime(debugName_, "acquire() time: ", t0); t_dispatch_ = std::chrono::high_resolution_clock::now(); - client_.dispatch(holder); + client_->dispatch(holder); } virtual void acquire(edm::Event const& iEvent, edm::EventSetup const& iSetup, Input& iInput) = 0; protected: - //for debugging - void setDebugName(const std::string& debugName) { client_.setDebugName(debugName); } + //helper + void makeClient() { client_ = std::make_unique(clientPset_, debugName_); } + //members - Client client_; + edm::ParameterSet clientPset_; + std::unique_ptr client_; + std::string debugName_; std::chrono::time_point t_dispatch_; }; diff --git a/HeterogeneousCore/SonicCore/interface/SonicClient.h b/HeterogeneousCore/SonicCore/interface/SonicClient.h index a3bfd2bd7ed7d..00b26fedc523f 100644 --- a/HeterogeneousCore/SonicCore/interface/SonicClient.h +++ b/HeterogeneousCore/SonicCore/interface/SonicClient.h @@ -9,7 +9,8 @@ template class SonicClient : public SonicClientBase, public SonicClientTypes { public: //constructor - SonicClient(const edm::ParameterSet& params) : SonicClientBase(params), SonicClientTypes() {} + SonicClient(const edm::ParameterSet& params, const std::string& debugName, const std::string& clientName) + : SonicClientBase(params, debugName, clientName), SonicClientTypes() {} }; #endif diff --git a/HeterogeneousCore/SonicCore/interface/SonicClientBase.h b/HeterogeneousCore/SonicCore/interface/SonicClientBase.h index 6efc00d7e7dc6..3af1b574a4d54 100644 --- a/HeterogeneousCore/SonicCore/interface/SonicClientBase.h +++ b/HeterogeneousCore/SonicCore/interface/SonicClientBase.h @@ -19,12 +19,11 @@ enum class SonicMode { Sync = 1, Async = 2, PseudoAsync = 3 }; class SonicClientBase { public: //constructor - SonicClientBase(const edm::ParameterSet& params); + SonicClientBase(const edm::ParameterSet& params, const std::string& debugName, const std::string& clientName); //destructor virtual ~SonicClientBase() = default; - void setDebugName(const std::string& debugName); const std::string& debugName() const { return debugName_; } const std::string& clientName() const { return clientName_; } SonicMode mode() const { return mode_; } @@ -42,6 +41,8 @@ class SonicClientBase { static void fillBasePSetDescription(edm::ParameterSetDescription& desc, bool allowRetry = true); protected: + void setMode(SonicMode mode); + virtual void evaluate() = 0; void start(edm::WaitingTaskWithArenaHolder holder); @@ -57,7 +58,7 @@ class SonicClientBase { std::optional holder_; //for logging/debugging - std::string clientName_, debugName_, fullDebugName_; + std::string debugName_, clientName_, fullDebugName_; std::chrono::time_point t0_; friend class SonicDispatcher; diff --git a/HeterogeneousCore/SonicCore/interface/SonicEDFilter.h b/HeterogeneousCore/SonicCore/interface/SonicEDFilter.h index 3747fd780a2b1..f7bf4033e7e25 100644 --- a/HeterogeneousCore/SonicCore/interface/SonicEDFilter.h +++ b/HeterogeneousCore/SonicCore/interface/SonicEDFilter.h @@ -16,22 +16,22 @@ class SonicEDFilter : public SonicAcquirer>(cfg) {} + SonicEDFilter(edm::ParameterSet const& cfg, const std::string& debugName) + : SonicAcquirer>(cfg, debugName) {} //destructor ~SonicEDFilter() override = default; - //derived classes use a dedicated produce() interface that incorporates client_.output() + //derived classes use a dedicated produce() interface that incorporates client_->output() bool filter(edm::Event& iEvent, edm::EventSetup const& iSetup) final { //measure time between acquire and produce - sonic_utils::printDebugTime(this->client_.debugName(), "dispatch() time: ", this->t_dispatch_); + sonic_utils::printDebugTime(this->debugName_, "dispatch() time: ", this->t_dispatch_); auto t0 = std::chrono::high_resolution_clock::now(); - bool result = filter(iEvent, iSetup, this->client_.output()); - sonic_utils::printDebugTime(this->client_.debugName(), "filter() time: ", t0); + bool result = filter(iEvent, iSetup, this->client_->output()); + sonic_utils::printDebugTime(this->debugName_, "filter() time: ", t0); //reset client data - this->client_.reset(); + this->client_->reset(); return result; } diff --git a/HeterogeneousCore/SonicCore/interface/SonicEDProducer.h b/HeterogeneousCore/SonicCore/interface/SonicEDProducer.h index f777b05257001..5e4e1ce719d71 100644 --- a/HeterogeneousCore/SonicCore/interface/SonicEDProducer.h +++ b/HeterogeneousCore/SonicCore/interface/SonicEDProducer.h @@ -16,22 +16,22 @@ class SonicEDProducer : public SonicAcquirer>(cfg) {} + SonicEDProducer(edm::ParameterSet const& cfg, const std::string& debugName) + : SonicAcquirer>(cfg, debugName) {} //destructor ~SonicEDProducer() override = default; - //derived classes use a dedicated produce() interface that incorporates client_.output() + //derived classes use a dedicated produce() interface that incorporates client_->output() void produce(edm::Event& iEvent, edm::EventSetup const& iSetup) final { //measure time between acquire and produce - sonic_utils::printDebugTime(this->client_.debugName(), "dispatch() time: ", this->t_dispatch_); + sonic_utils::printDebugTime(this->debugName_, "dispatch() time: ", this->t_dispatch_); auto t0 = std::chrono::high_resolution_clock::now(); - produce(iEvent, iSetup, this->client_.output()); - sonic_utils::printDebugTime(this->client_.debugName(), "produce() time: ", t0); + produce(iEvent, iSetup, this->client_->output()); + sonic_utils::printDebugTime(this->debugName_, "produce() time: ", t0); //reset client data - this->client_.reset(); + this->client_->reset(); } virtual void produce(edm::Event& iEvent, edm::EventSetup const& iSetup, Output const& iOutput) = 0; }; diff --git a/HeterogeneousCore/SonicCore/interface/SonicOneEDAnalyzer.h b/HeterogeneousCore/SonicCore/interface/SonicOneEDAnalyzer.h index ce4e8de3abcd9..2b52913fbca53 100644 --- a/HeterogeneousCore/SonicCore/interface/SonicOneEDAnalyzer.h +++ b/HeterogeneousCore/SonicCore/interface/SonicOneEDAnalyzer.h @@ -21,43 +21,53 @@ class SonicOneEDAnalyzer : public edm::one::EDAnalyzer { typedef typename Client::Input Input; typedef typename Client::Output Output; //constructor - SonicOneEDAnalyzer(edm::ParameterSet const& cfg) : client_(cfg.getParameter("Client")) { + SonicOneEDAnalyzer(edm::ParameterSet const& cfg, const std::string& debugName) + : clientPset_(cfg.getParameterSet("Client")), debugName_(debugName) { //ExternalWork is not compatible with one modules, so Sync mode is enforced - if (client_.mode() != SonicMode::Sync) - throw cms::Exception("UnsupportedMode") << "SonicOneEDAnalyzer can only use Sync mode for clients"; + if (clientPset_.getParameter("mode") != "Sync") { + edm::LogWarning("ResetClientMode") << "Resetting client mode to Sync for SonicOneEDAnalyzer"; + clientPset_.addParameter("mode", "Sync"); + } } //destructor ~SonicOneEDAnalyzer() override = default; - //derived classes still use a dedicated acquire() interface that incorporates client_.input() for consistency + //construct client at beginning of job + //in case client constructor depends on operations happening in derived module constructors + void beginJob() override { makeClient(); } + + //derived classes still use a dedicated acquire() interface that incorporates client_->input() for consistency virtual void acquire(edm::Event const& iEvent, edm::EventSetup const& iSetup, Input& iInput) = 0; - //derived classes use a dedicated analyze() interface that incorporates client_.output() + //derived classes use a dedicated analyze() interface that incorporates client_->output() void analyze(edm::Event const& iEvent, edm::EventSetup const& iSetup) final { auto t0 = std::chrono::high_resolution_clock::now(); - acquire(iEvent, iSetup, client_.input()); - sonic_utils::printDebugTime(client_.debugName(), "acquire() time: ", t0); + acquire(iEvent, iSetup, client_->input()); + sonic_utils::printDebugTime(debugName_, "acquire() time: ", t0); //pattern similar to ExternalWork, but blocking auto t1 = std::chrono::high_resolution_clock::now(); - client_.dispatch(); + client_->dispatch(); //measure time between acquire and produce - sonic_utils::printDebugTime(client_.debugName(), "dispatch() time: ", t1); + sonic_utils::printDebugTime(debugName_, "dispatch() time: ", t1); auto t2 = std::chrono::high_resolution_clock::now(); - analyze(iEvent, iSetup, client_.output()); - sonic_utils::printDebugTime(client_.debugName(), "analyze() time: ", t2); + analyze(iEvent, iSetup, client_->output()); + sonic_utils::printDebugTime(debugName_, "analyze() time: ", t2); //reset client data - client_.reset(); + client_->reset(); } virtual void analyze(edm::Event const& iEvent, edm::EventSetup const& iSetup, Output const& iOutput) = 0; protected: - //for debugging - void setDebugName(const std::string& debugName) { client_.setDebugName(debugName); } + //helper + void makeClient() { client_ = std::make_unique(clientPset_, debugName_); } + //members - Client client_; + edm::ParameterSet clientPset_; + std::unique_ptr client_; + std::string debugName_; }; #endif diff --git a/HeterogeneousCore/SonicCore/src/SonicClientBase.cc b/HeterogeneousCore/SonicCore/src/SonicClientBase.cc index 750e2990b0277..32f0e2af1e9af 100644 --- a/HeterogeneousCore/SonicCore/src/SonicClientBase.cc +++ b/HeterogeneousCore/SonicCore/src/SonicClientBase.cc @@ -2,17 +2,31 @@ #include "FWCore/Utilities/interface/Exception.h" #include "FWCore/ParameterSet/interface/allowedValues.h" -SonicClientBase::SonicClientBase(const edm::ParameterSet& params) - : allowedTries_(params.getUntrackedParameter("allowedTries", 0)) { +SonicClientBase::SonicClientBase(const edm::ParameterSet& params, + const std::string& debugName, + const std::string& clientName) + : allowedTries_(params.getUntrackedParameter("allowedTries", 0)), + debugName_(debugName), + clientName_(clientName), + fullDebugName_(debugName_) { + if (!clientName_.empty()) + fullDebugName_ += ":" + clientName_; + std::string modeName(params.getParameter("mode")); if (modeName == "Sync") - mode_ = SonicMode::Sync; + setMode(SonicMode::Sync); else if (modeName == "Async") - mode_ = SonicMode::Async; + setMode(SonicMode::Async); else if (modeName == "PseudoAsync") - mode_ = SonicMode::PseudoAsync; + setMode(SonicMode::PseudoAsync); else throw cms::Exception("Configuration") << "Unknown mode for SonicClient: " << modeName; +} + +void SonicClientBase::setMode(SonicMode mode) { + if (mode_ == mode) + return; + mode_ = mode; //get correct dispatcher for mode if (mode_ == SonicMode::Sync or mode_ == SonicMode::Async) @@ -21,13 +35,6 @@ SonicClientBase::SonicClientBase(const edm::ParameterSet& params) dispatcher_ = std::make_unique(this); } -void SonicClientBase::setDebugName(const std::string& debugName) { - debugName_ = debugName; - fullDebugName_ = debugName_; - if (!clientName_.empty()) - fullDebugName_ += ":" + clientName_; -} - void SonicClientBase::start(edm::WaitingTaskWithArenaHolder holder) { start(); holder_ = std::move(holder); diff --git a/HeterogeneousCore/SonicCore/test/DummyClient.h b/HeterogeneousCore/SonicCore/test/DummyClient.h index 4bca9ea91eb0d..ccef888ad9f7d 100644 --- a/HeterogeneousCore/SonicCore/test/DummyClient.h +++ b/HeterogeneousCore/SonicCore/test/DummyClient.h @@ -12,8 +12,8 @@ class DummyClient : public SonicClient { public: //constructor - DummyClient(const edm::ParameterSet& params) - : SonicClient(params), + DummyClient(const edm::ParameterSet& params, const std::string& debugName) + : SonicClient(params, debugName, "DummyClient"), factor_(params.getParameter("factor")), wait_(params.getParameter("wait")), fails_(params.getParameter("fails")) {} diff --git a/HeterogeneousCore/SonicCore/test/SonicDummyFilter.cc b/HeterogeneousCore/SonicCore/test/SonicDummyFilter.cc index df40257f0b22b..386f51af90e46 100644 --- a/HeterogeneousCore/SonicCore/test/SonicDummyFilter.cc +++ b/HeterogeneousCore/SonicCore/test/SonicDummyFilter.cc @@ -10,9 +10,7 @@ namespace sonictest { class SonicDummyFilter : public SonicEDFilter { public: explicit SonicDummyFilter(edm::ParameterSet const& cfg) - : SonicEDFilter(cfg), input_(cfg.getParameter("input")) { - //for debugging - setDebugName("SonicDummyFilter"); + : SonicEDFilter(cfg, "SonicDummyFilter"), input_(cfg.getParameter("input")) { putToken_ = produces(); } diff --git a/HeterogeneousCore/SonicCore/test/SonicDummyOneAnalyzer.cc b/HeterogeneousCore/SonicCore/test/SonicDummyOneAnalyzer.cc index 10e402922e55f..399752820b2c4 100644 --- a/HeterogeneousCore/SonicCore/test/SonicDummyOneAnalyzer.cc +++ b/HeterogeneousCore/SonicCore/test/SonicDummyOneAnalyzer.cc @@ -12,12 +12,9 @@ namespace sonictest { class SonicDummyOneAnalyzer : public SonicOneEDAnalyzer { public: explicit SonicDummyOneAnalyzer(edm::ParameterSet const& cfg) - : SonicOneEDAnalyzer(cfg), + : SonicOneEDAnalyzer(cfg, "SonicDummyOneAnalyzer"), input_(cfg.getParameter("input")), - expected_(cfg.getParameter("expected")) { - //for debugging - setDebugName("SonicDummyOneAnalyzer"); - } + expected_(cfg.getParameter("expected")) {} void acquire(edm::Event const& iEvent, edm::EventSetup const& iSetup, Input& iInput) override { iInput = input_; } diff --git a/HeterogeneousCore/SonicCore/test/SonicDummyProducer.cc b/HeterogeneousCore/SonicCore/test/SonicDummyProducer.cc index 602a4afcf940f..e782ee15f8928 100644 --- a/HeterogeneousCore/SonicCore/test/SonicDummyProducer.cc +++ b/HeterogeneousCore/SonicCore/test/SonicDummyProducer.cc @@ -10,9 +10,7 @@ namespace sonictest { class SonicDummyProducer : public SonicEDProducer { public: explicit SonicDummyProducer(edm::ParameterSet const& cfg) - : SonicEDProducer(cfg), input_(cfg.getParameter("input")) { - //for debugging - setDebugName("SonicDummyProducer"); + : SonicEDProducer(cfg, "SonicDummyProducer"), input_(cfg.getParameter("input")) { putToken_ = produces(); } diff --git a/HeterogeneousCore/SonicTriton/README.md b/HeterogeneousCore/SonicTriton/README.md index 736c73a9a8320..63a52d46d1e57 100644 --- a/HeterogeneousCore/SonicTriton/README.md +++ b/HeterogeneousCore/SonicTriton/README.md @@ -1,5 +1,7 @@ # SONIC for Triton Inference Server +## Introduction to Triton + Triton Inference Server ([docs](https://docs.nvidia.com/deeplearning/triton-inference-server/archives/triton_inference_server_1130/user-guide/docs/index.html), [repo](https://github.com/NVIDIA/triton-inference-server)) is an open-source product from Nvidia that facilitates the use of GPUs as a service to process inference requests. @@ -9,6 +11,8 @@ boolean, unsigned integer (8, 16, 32, or 64 bits), integer (8, 16, 32, or 64 bit Triton additionally supports inputs and outputs with multiple dimensions, some of which might be variable (denoted by -1). Concrete values for variable dimensions must be specified for each call (event). +## Client + Accordingly, the `TritonClient` input and output types are: * input: `TritonInputMap = std::unordered_map` * output: `TritonOutputMap = std::unordered_map` @@ -22,11 +26,8 @@ The model information from the server can be printed by enabling `verbose` outpu `TritonClient` takes several parameters: * `modelName`: name of model with which to perform inference * `modelVersion`: version number of model (default: -1, use latest available version on server) -* `batchSize`: number of objects sent per request - * can also be set on per-event basis using `setBatchSize()` - * some models don't support batching -* `address`: server IP address -* `port`: server port +* `modelConfigPath`: path to `config.pbtxt` file for the model (using `edm::FileInPath`) +* `preferredServer`: name of preferred server, for testing (see [Services](#services) below) * `timeout`: maximum allowed time for a request * `outputs`: optional, specify which output(s) the server should send @@ -38,6 +39,8 @@ Useful `TritonData` accessors include: * `byteSize()`: return number of bytes for data type * `dname()`: return name of data type * `batchSize()`: return current batch size +* `setBatchSize()`: set a new batch size + * some models may not support batching To update the `TritonData` shape in the variable-dimension case: * `setShape(const std::vector& newShape)`: update all (variable) dimensions with values provided in `newShape` @@ -49,6 +52,22 @@ and returned by `TritonOutputData::fromServer()`, respectively: * `TritonInput = std::vector>` * `TritonOutput = std::vector>` +## Modules + +SONIC Triton supports producers, filters, and analyzers. +New modules should inherit from `TritonEDProducer`, `TritonEDFilter`, or `TritonOneEDAnalyzer`. +These follow essentially the same patterns described in [SonicCore](../SonicCore#for-analyzers). + +If an `edm::GlobalCache` of type `T` is needed, there are two changes: +* The new module should inherit from `TritonEDProducerT` or `TritonEDFilterT` +* The new module should contain these lines: + ```cpp + static std::unique_ptr initializeGlobalCache(edm::ParameterSet const& pset) { + TritonEDProducerT::initializeGlobalCache(pset); + [module-specific code goes here] + } + ``` + In a SONIC Triton producer, the basic flow should follow this pattern: 1. `acquire()`: a. access input object(s) from `TritonInputMap` @@ -61,5 +80,46 @@ In a SONIC Triton producer, the basic flow should follow this pattern: b. obtain output data as `TritonOutput` using `fromServer()` function of output object(s) (sets output shape(s) if variable dimensions exist) c. fill output products -Several example producers (running ResNet50 or Graph Attention Network), along with instructions to run a local server, -can be found in the [test](./test) directory. +## Services + +A script [`cmsTriton`](./scripts/cmsTriton) is provided to launch and manage local servers. +The script has two operations (`start` and `stop`) and the following options: +* `-c`: don't cleanup temporary dir (for debugging) +* `-D`: dry run: print container commands rather than executing them +* `-d`: use Docker instead of Singularity +* `-f`: force reuse of (possibly) existing container instance +* `-g`: use GPU instead of CPU +* `-i` [name]`: server image name (default: fastml/triton-torchgeo:20.09-py3-geometric) +* `-M [dir]`: model repository (can be given more than once) +* `-m [dir]`: specific model directory (can be given more than one) +* `-n [name]`: name of container instance, also used for hidden temporary dir (default: triton_server_instance) +* `-P [port]`: base port number for services (-1: automatically find an unused port range) (default: 8000) +* `-p`: automatically shut down server when parent process ends +* `-r [num]`: number of retries when starting container (default: 3) +* `-s [dir]`: Singularity sandbox directory (default: /cvmfs/unpacked.cern.ch/registry.hub.docker.com/fastml/triton-torchgeo:20.09-py3-geometric) +* `-t [dir]`: non-default hidden temporary dir +* `-v`: (verbose) start: activate server debugging info; stop: keep server logs +* `-w [time]`: maximum time to wait for server to start (default: 120 seconds) +* `-h`: print help message and exit + +Additional details and caveats: +* The `start` and `stop` operations for a given container instance should always be executed in the same directory +if a relative path is used for the hidden temporary directory (including the default from the container instance name), +in order to ensure that everything is properly cleaned up. +* A model repository is a folder that contains multiple model directories, while a model directory contains the files for a specific file. +(In the example below, `$CMSSW_BASE/src/HeterogeneousCore/SonicTriton/data/models` is a model repository, +while `$CMSSW_BASE/src/HeterogeneousCore/SonicTriton/data/models/resnet50_netdef` is a model directory.) +If a model repository is provided, all of the models it contains will be provided to the server. +* Older versions of Singularity have a short timeout that may cause launching the server to fail the first time the command is executed. +The `-r` (retry) flag exists to work around this issue. + +A central `TritonService` is provided to keep track of all available servers and which models they can serve. +The servers will automatically be assigned to clients at startup. +If some models are not served by any server, the `TritonService` can launch a fallback server using the `cmsTriton` script described above. +If the process modifiers `enableSonicTriton` or `allSonicTriton` are activated, +the fallback server will launch automatically if needed and will use a local GPU if one is available. +If the fallback server uses CPU, clients that use the fallback server will automatically be set to `Sync` mode. + +## Examples + +Several example producers (running ResNet50 or Graph Attention Network) can be found in the [test](./test) directory. diff --git a/HeterogeneousCore/SonicTriton/interface/TritonClient.h b/HeterogeneousCore/SonicTriton/interface/TritonClient.h index 99ca5f8765fe7..ccda4a3c6ec6a 100644 --- a/HeterogeneousCore/SonicTriton/interface/TritonClient.h +++ b/HeterogeneousCore/SonicTriton/interface/TritonClient.h @@ -29,7 +29,7 @@ class TritonClient : public SonicClient { }; //constructor - TritonClient(const edm::ParameterSet& params); + TritonClient(const edm::ParameterSet& params, const std::string& debugName); //accessors unsigned batchSize() const { return batchSize_; } diff --git a/HeterogeneousCore/SonicTriton/interface/TritonDummyCache.h b/HeterogeneousCore/SonicTriton/interface/TritonDummyCache.h new file mode 100644 index 0000000000000..df38d0bae28c9 --- /dev/null +++ b/HeterogeneousCore/SonicTriton/interface/TritonDummyCache.h @@ -0,0 +1,20 @@ +#ifndef HeterogeneousCore_SonicTriton_TritonDummyCache +#define HeterogeneousCore_SonicTriton_TritonDummyCache + +struct TritonDummyCache {}; + +//Triton modules want to call initializeGlobalCache, but don't want GlobalCache pointer in constructor +//-> override framework function (can't partial specialize function templates) +namespace edm { + class ParameterSet; + namespace stream { + namespace impl { + template + T* makeStreamModule(edm::ParameterSet const& iPSet, const TritonDummyCache*) { + return new T(iPSet); + } + } // namespace impl + } // namespace stream +} // namespace edm + +#endif diff --git a/HeterogeneousCore/SonicTriton/interface/TritonEDFilter.h b/HeterogeneousCore/SonicTriton/interface/TritonEDFilter.h new file mode 100644 index 0000000000000..989570373817e --- /dev/null +++ b/HeterogeneousCore/SonicTriton/interface/TritonEDFilter.h @@ -0,0 +1,33 @@ +#ifndef HeterogeneousCore_SonicTriton_TritonEDFilter +#define HeterogeneousCore_SonicTriton_TritonEDFilter + +//TritonDummyCache include comes first for overload resolution +#include "HeterogeneousCore/SonicTriton/interface/TritonDummyCache.h" +#include "HeterogeneousCore/SonicCore/interface/SonicEDFilter.h" +#include "HeterogeneousCore/SonicTriton/interface/TritonClient.h" +#include "FWCore/ServiceRegistry/interface/Service.h" +#include "HeterogeneousCore/SonicTriton/interface/TritonService.h" + +//inherited classes that use a non-default GlobalCache should be sure to call the parent initializeGlobalCache() +template +class TritonEDFilterT : public SonicEDFilter, Capabilities...> { +public: + TritonEDFilterT(edm::ParameterSet const& cfg, const std::string& debugName) + : SonicEDFilter, Capabilities...>(cfg, debugName) {} + + //use this function to avoid calling TritonService functions Nstreams times + static std::unique_ptr initializeGlobalCache(edm::ParameterSet const& pset) { + edm::Service ts; + const auto& clientPset = pset.getParameterSet("Client"); + ts->addModel(clientPset.getParameter("modelName"), + clientPset.getParameter("modelConfigPath").fullPath()); + return nullptr; + } + + static void globalEndJob(G*) {} +}; + +template +using TritonEDFilter = TritonEDFilterT; + +#endif diff --git a/HeterogeneousCore/SonicTriton/interface/TritonEDProducer.h b/HeterogeneousCore/SonicTriton/interface/TritonEDProducer.h new file mode 100644 index 0000000000000..0b8ec909f511c --- /dev/null +++ b/HeterogeneousCore/SonicTriton/interface/TritonEDProducer.h @@ -0,0 +1,33 @@ +#ifndef HeterogeneousCore_SonicTriton_TritonEDProducer +#define HeterogeneousCore_SonicTriton_TritonEDProducer + +//TritonDummyCache include comes first for overload resolution +#include "HeterogeneousCore/SonicTriton/interface/TritonDummyCache.h" +#include "HeterogeneousCore/SonicCore/interface/SonicEDProducer.h" +#include "HeterogeneousCore/SonicTriton/interface/TritonClient.h" +#include "FWCore/ServiceRegistry/interface/Service.h" +#include "HeterogeneousCore/SonicTriton/interface/TritonService.h" + +//inherited classes that use a non-default GlobalCache should be sure to call the parent initializeGlobalCache() +template +class TritonEDProducerT : public SonicEDProducer, Capabilities...> { +public: + TritonEDProducerT(edm::ParameterSet const& cfg, const std::string& debugName) + : SonicEDProducer, Capabilities...>(cfg, debugName) {} + + //use this function to avoid calling TritonService functions Nstreams times + static std::unique_ptr initializeGlobalCache(edm::ParameterSet const& pset) { + edm::Service ts; + const auto& clientPset = pset.getParameterSet("Client"); + ts->addModel(clientPset.getParameter("modelName"), + clientPset.getParameter("modelConfigPath").fullPath()); + return nullptr; + } + + static void globalEndJob(G*) {} +}; + +template +using TritonEDProducer = TritonEDProducerT; + +#endif diff --git a/HeterogeneousCore/SonicTriton/interface/TritonOneEDAnalyzer.h b/HeterogeneousCore/SonicTriton/interface/TritonOneEDAnalyzer.h new file mode 100644 index 0000000000000..6eaf35604586a --- /dev/null +++ b/HeterogeneousCore/SonicTriton/interface/TritonOneEDAnalyzer.h @@ -0,0 +1,24 @@ +#ifndef HeterogeneousCore_SonicTriton_TritonOneEDAnalyzer +#define HeterogeneousCore_SonicTriton_TritonOneEDAnalyzer + +#include "HeterogeneousCore/SonicCore/interface/SonicOneEDAnalyzer.h" +#include "HeterogeneousCore/SonicTriton/interface/TritonClient.h" +#include "FWCore/ParameterSet/interface/FileInPath.h" +#include "FWCore/ServiceRegistry/interface/Service.h" +#include "HeterogeneousCore/SonicTriton/interface/TritonService.h" + +#include + +template +class TritonOneEDAnalyzer : public SonicOneEDAnalyzer { +public: + TritonOneEDAnalyzer(edm::ParameterSet const& cfg, const std::string& debugName) + : SonicOneEDAnalyzer(cfg, debugName) { + edm::Service ts; + const auto& clientPset = cfg.getParameterSet("Client"); + ts->addModel(clientPset.getParameter("modelName"), + clientPset.getParameter("modelConfigPath").fullPath()); + } +}; + +#endif diff --git a/HeterogeneousCore/SonicTriton/interface/TritonService.h b/HeterogeneousCore/SonicTriton/interface/TritonService.h new file mode 100644 index 0000000000000..24232b6894ae1 --- /dev/null +++ b/HeterogeneousCore/SonicTriton/interface/TritonService.h @@ -0,0 +1,104 @@ +#ifndef HeterogeneousCore_SonicTriton_TritonService +#define HeterogeneousCore_SonicTriton_TritonService + +#include "FWCore/ParameterSet/interface/ParameterSet.h" + +#include +#include +#include +#include +#include +#include + +//forward declarations +namespace edm { + class ActivityRegistry; + class ConfigurationDescriptions; + class PathsAndConsumesOfModulesBase; + class ProcessContext; + class ModuleDescription; +} // namespace edm + +class TritonService { +public: + //classes and defs + struct FallbackOpts { + FallbackOpts(const edm::ParameterSet& pset) + : enable(pset.getUntrackedParameter("enable")), + debug(pset.getUntrackedParameter("debug")), + verbose(pset.getUntrackedParameter("verbose")), + useDocker(pset.getUntrackedParameter("useDocker")), + useGPU(pset.getUntrackedParameter("useGPU")), + retries(pset.getUntrackedParameter("retries")), + wait(pset.getUntrackedParameter("wait")), + instanceName(pset.getUntrackedParameter("instanceName")), + tempDir(pset.getUntrackedParameter("tempDir")) {} + + bool enable; + bool debug; + bool verbose; + bool useDocker; + bool useGPU; + int retries; + int wait; + std::string instanceName; + std::string tempDir; + }; + struct Server { + Server(const edm::ParameterSet& pset) + : url(pset.getUntrackedParameter("address") + ":" + + std::to_string(pset.getUntrackedParameter("port"))), + isFallback(pset.getUntrackedParameter("name") == fallbackName) {} + Server(const std::string& name_, const std::string& url_) : url(url_), isFallback(name_ == fallbackName) {} + + //members + std::string url; + bool isFallback; + std::unordered_set models; + static const std::string fallbackName; + static const std::string fallbackAddress; + }; + struct Model { + Model(const std::string& path_ = "") : path(path_) {} + + //members + std::string path; + std::unordered_set servers; + std::unordered_set modules; + }; + struct Module { + //currently assumes that a module can only have one associated model + Module(const std::string& model_) : model(model_) {} + + //members + std::string model; + }; + + TritonService(const edm::ParameterSet& pset, edm::ActivityRegistry& areg); + ~TritonService() = default; + + //accessors + void addModel(const std::string& modelName, const std::string& path); + std::pair serverAddress(const std::string& model, const std::string& preferred = "") const; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + +private: + void preModuleConstruction(edm::ModuleDescription const&); + void postModuleConstruction(edm::ModuleDescription const&); + void preModuleDestruction(edm::ModuleDescription const&); + void preBeginJob(edm::PathsAndConsumesOfModulesBase const&, edm::ProcessContext const&); + + bool verbose_; + FallbackOpts fallbackOpts_; + unsigned currentModuleId_; + bool allowAddModel_; + bool startedFallback_; + std::unordered_map unservedModels_; + //this represents a many:many:many map + std::unordered_map servers_; + std::unordered_map models_; + std::unordered_map modules_; +}; + +#endif diff --git a/HeterogeneousCore/SonicTriton/plugins/BuildFile.xml b/HeterogeneousCore/SonicTriton/plugins/BuildFile.xml new file mode 100644 index 0000000000000..f5eea4e8cf125 --- /dev/null +++ b/HeterogeneousCore/SonicTriton/plugins/BuildFile.xml @@ -0,0 +1,5 @@ + + + + + diff --git a/HeterogeneousCore/SonicTriton/plugins/TritonService.cc b/HeterogeneousCore/SonicTriton/plugins/TritonService.cc new file mode 100644 index 0000000000000..963e6eee3c56e --- /dev/null +++ b/HeterogeneousCore/SonicTriton/plugins/TritonService.cc @@ -0,0 +1,5 @@ +#include "FWCore/ServiceRegistry/interface/ServiceMaker.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "HeterogeneousCore/SonicTriton/interface/TritonService.h" + +DEFINE_FWK_SERVICE(TritonService); diff --git a/HeterogeneousCore/SonicTriton/python/TritonService_cff.py b/HeterogeneousCore/SonicTriton/python/TritonService_cff.py new file mode 100644 index 0000000000000..e991d54e6f72f --- /dev/null +++ b/HeterogeneousCore/SonicTriton/python/TritonService_cff.py @@ -0,0 +1,19 @@ +from HeterogeneousCore.SonicTriton.TritonService_cfi import * + +from Configuration.ProcessModifiers.enableSonicTriton_cff import enableSonicTriton + +_gpu_available_cached = None + +def _gpu_available(): + global _gpu_available_cached + if _gpu_available_cached is None: + import os + _gpu_available_cached = (os.system("nvidia-smi -L") == 0) + return _gpu_available_cached + +enableSonicTriton.toModify(TritonService, + fallback = dict( + enable = True, + useGPU = _gpu_available(), + ), +) diff --git a/HeterogeneousCore/SonicTriton/scripts/cmsTriton b/HeterogeneousCore/SonicTriton/scripts/cmsTriton new file mode 100755 index 0000000000000..3e79f226e2b15 --- /dev/null +++ b/HeterogeneousCore/SonicTriton/scripts/cmsTriton @@ -0,0 +1,420 @@ +#!/bin/bash + +# defaults +USEDOCKER="" +GPU="" +VERBOSE="" +VERBOSE_ARGS="--log-verbose=1 --log-error=1 --log-info=1" +WTIME=120 +SERVER=triton_server_instance +RETRIES=3 +REPOS=() +MODELS=() +FORCE="" +CLEANUP=true +TMPDIR="" +DRYRUN="" +PARENTPID="" +BASEPORT=8000 +AUTOPORT="" +NPORTS=3 +IMAGE=fastml/triton-torchgeo:20.09-py3-geometric +SANDBOX=/cvmfs/unpacked.cern.ch/registry.hub.docker.com/${IMAGE} + +usage() { + ECHO="echo -e" + $ECHO "cmsTriton [options] [start|stop]" + $ECHO + $ECHO "Options:" + $ECHO "-c \t don't cleanup temporary dir (for debugging)" + $ECHO "-D \t dry run: print container commands rather than executing them" + $ECHO "-d \t use Docker instead of Singularity" + $ECHO "-f \t force reuse of (possibly) existing container instance" + $ECHO "-g \t use GPU instead of CPU" + $ECHO "-i [name] \t server image name (default: ${IMAGE})" + $ECHO "-M [dir] \t model repository (can be given more than once)" + $ECHO "-m [dir] \t specific model directory (can be given more than one)" + $ECHO "-n [name] \t name of container instance, also used for default hidden temporary dir (default: ${SERVER})" + $ECHO "-P [port] \t base port number for services (-1: automatically find an unused port range) (default: ${BASEPORT})" + $ECHO "-p \t automatically shut down server when parent process ends" + $ECHO "-r [num] \t number of retries when starting container (default: ${RETRIES})" + $ECHO "-s [dir] \t Singularity sandbox directory (default: ${SANDBOX})" + $ECHO "-t [dir] \t non-default hidden temporary dir" + $ECHO "-v \t (verbose) start: activate server debugging info; stop: keep server logs" + $ECHO "-w [time] \t maximum time to wait for server to start (default: ${WTIME} seconds)" + $ECHO "-h \t print this message and exit" + $ECHO + $ECHO "Operations:" + $ECHO "start \t start server" + $ECHO "stop \t stop server" + exit $1 +} + +# check shm locations +SHM=/dev/shm +if [ -e /run/shm ]; then + SHM=/run/shm +fi + +while getopts "cDdfgi:M:m:n:P:pr:s:t:vw:h" opt; do + case "$opt" in + c) CLEANUP="" + ;; + D) DRYRUN=echo + ;; + d) USEDOCKER=true + ;; + f) FORCE=true + ;; + g) GPU=true + ;; + i) IMAGE="$OPTARG" + ;; + M) REPOS+=("$OPTARG") + ;; + m) MODELS+=("$OPTARG") + ;; + n) SERVER="$OPTARG" + ;; + P) if [ "$OPTARG" -eq -1 ]; then AUTOPORT=true; else BASEPORT="$OPTARG"; fi + ;; + p) PARENTPID="$PPID" + ;; + r) RETRIES="$OPTARG" + ;; + s) SANDBOX="$OPTARG" + ;; + t) TMPDIR="$OPTARG" + ;; + v) VERBOSE="$VERBOSE_ARGS" + ;; + w) WTIME="$OPTARG" + ;; + h) usage 0 + ;; + esac +done + +shift $(($OPTIND - 1)) +OP=$1 + +if [ "$OP" != start ] && [ "$OP" != stop ]; then + usage 1 +fi + +if [ "$RETRIES" -le 0 ]; then + RETRIES=1 +fi + + +TOPDIR=$PWD +if [ -z "$TMPDIR" ]; then + TMPDIR="${TOPDIR}/.${SERVER}" +else + TMPDIR=$(readlink -f $TMPDIR) +fi + +SANDBOX=$(readlink -f ${SANDBOX}) +DOCKER="sudo docker" +LOG="log_${SERVER}.log" +STOPLOG="log_stop_${SERVER}.log" +LIB=lib +STARTED_INDICATOR="Started GRPCInferenceService" +SEGFAULT_INDICATOR="Address already in use" +EXTRA="" + +compute_ports(){ + # compute derived port numbers + export HTTPPORT=$BASEPORT + export GRPCPORT=$((BASEPORT+1)) + export METRPORT=$((BASEPORT+2)) +} + +check_port(){ + # success on this command means the port is in use + if 2>/dev/null >"/dev/tcp/0.0.0.0/$1"; then + return 1 + else + return 0 + fi +} + +check_ports(){ + if check_port $HTTPPORT && check_port $GRPCPORT && check_port $METRPORT; then + return 0 + else + return 1 + fi +} + +find_ports(){ + while ! check_ports; do + BASEPORT=$((BASEPORT+NPORTS)) + compute_ports + done + echo "CMS_TRITON_GRPC_PORT: $GRPCPORT" +} + +handle_ports(){ + # handle ports + compute_ports + if [ -n "$AUTOPORT" ]; then + find_ports + elif ! check_ports; then + echo "Error: requested port in use" + return 1 + fi + export PORTARGS="--http-port=${HTTPPORT} --grpc-port=${GRPCPORT} --metrics-port=${METRPORT}" +} + +start_docker(){ + # mount all model repositories + MOUNTARGS="" + REPOARGS="" + for REPO in ${REPOS[@]}; do + MOUNTARGS="$MOUNTARGS -v$REPO:$REPO" + REPOARGS="$REPOARGS --model-repository=${REPO}" + done + + $DRYRUN $DOCKER run -d --name ${SERVER} \ + --shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864 \ + -p${HTTPPORT}:${HTTPPORT} -p${GRPCPORT}:${GRPCPORT} -p${METRPORT}:${METRPORT} $EXTRA $MOUNTARGS \ + ${IMAGE} tritonserver $PORTARGS $REPOARGS $VERBOSE +} + +start_singularity(){ + # triton server image may need to modify contents of opt/tritonserver/lib/ + # but cvmfs is read-only + # -> make a writable local directory with the same contents + $DRYRUN mkdir ${LIB} + $DRYRUN ln -s ${SANDBOX}/opt/tritonserver/lib/* ${LIB}/ + + # mount all model repositories + MOUNTARGS="" + REPOARGS="" + for REPO in ${REPOS[@]}; do + MOUNTARGS="$MOUNTARGS -B $REPO" + REPOARGS="$REPOARGS --model-repository=${REPO}" + done + + # start instance + # need to bind /cvmfs for above symlinks to work inside container + $DRYRUN singularity instance start \ + -B ${SHM}:/run/shm -B ${LIB}:/opt/tritonserver/lib -B ${SANDBOX} $MOUNTARGS $EXTRA \ + ${SANDBOX} ${SERVER} + + START_EXIT=$? + if [ "$START_EXIT" -ne 0 ]; then + rm -rf ${LIB} + return "$START_EXIT" + fi + + # run the actual server + if [ -z "$DRYRUN" ]; then + REDIR="$LOG" + else + REDIR=/dev/stdout + fi + $DRYRUN singularity run instance://${SERVER} \ + tritonserver $PORTARGS $REPOARGS $VERBOSE >& ${REDIR} & + [ -z "$DRYRUN" ] || wait +} + +stop_docker(){ + # keep log + if [ -z "$DRYRUN" ]; then + if [ -n "$VERBOSE" ]; then $DOCKER logs ${SERVER} >& "$LOG"; fi + fi + + $DRYRUN $DOCKER stop ${SERVER} + $DRYRUN $DOCKER rm ${SERVER} +} + +stop_singularity(){ + $DRYRUN singularity instance stop ${SERVER} +} + +test_docker(){ + # docker logs print to stderr + ${DOCKER} logs ${SERVER} |& grep "$1" +} + +test_singularity(){ + grep "$1" $LOG +} + +wait_server(){ + if [ -n "$DRYRUN" ]; then + return + fi + + COUNT=0 + while ! $TEST_FN "$STARTED_INDICATOR" >& /dev/null; do + if $TEST_FN "$SEGFAULT_INDICATOR" >& /dev/null; then + handle_ports + PORT_EXIT=$? + # port exit is zero if it found a new, good port; so retry + if [ "$PORT_EXIT" -ne 0 ]; then + auto_stop true + exit $PORT_EXIT + else + return 1 + fi + elif [ "$COUNT" -gt "$WTIME" ]; then + echo "timed out waiting for server to start" + auto_stop true + exit 1 + else + COUNT=$(($COUNT + 1)) + sleep 1 + fi + done + + echo "server is ready!" +} + +list_models(){ + # make list of model repositories + for MODEL in ${MODELS[@]}; do + # check if file was provided rather than directory + if [ -f "$MODEL" ]; then + MODEL="$(dirname "$MODEL")" + fi + REPOS+=("$(dirname "$MODEL")") + done + for ((r=0; r < ${#REPOS[@]}; r++)); do + # avoid issues w/ multiple levels of symlinks + REPOS[$r]=$(readlink -f ${REPOS[$r]}) + done + # make unique list + read -a REPOS <<< "$(printf "%s\n" "${REPOS[@]}" | sort -u | tr '\n' ' ')" +} + +auto_stop(){ + # allow enabling verbosity here even if disabled at top level + # but otherwise use top-level setting + if [ -n "$1" ]; then VERBOSE="$VERBOSE_ARGS"; fi + PARENTPID="$2" + + if [ -n "$PARENTPID" ]; then + PCOUNTER=0 + PMAX=5 + while [ "$PCOUNTER" -le "$PMAX" ]; do + if ! kill -0 $PARENTPID >& /dev/null; then + PCOUNTER=$((PCOUNTER+1)) + else + # must get 5 in a row, otherwise reset + PCOUNTER=0 + fi + sleep 1 + done + fi + $STOP_FN + + # move logs out of tmp dir + if [ -z "$DRYRUN" ]; then + if [ -n "$VERBOSE" ]; then + mv "$LOG" "$TOPDIR" + # only keep non-empty log + if [ -s "$STOPLOG" ]; then + mv "$STOPLOG" "$TOPDIR" + fi + fi + fi + + if [ -n "$CLEANUP" ]; then + $DRYRUN cd "$TOPDIR" + $DRYRUN rm -rf "$TMPDIR" + fi +} + +make_tmp(){ + # make sure everything happens in tmp dir + $DRYRUN mkdir "$TMPDIR" + MKDIR_EXIT=$? + if [ "$MKDIR_EXIT" -ne 0 ]; then + echo "Could not create temp dir: $TMPDIR" + exit "$MKDIR_EXIT" + fi + $DRYRUN cd "$TMPDIR" +} + +if [ -n "$USEDOCKER" ]; then + if [ -n "$GPU" ]; then + EXTRA="--gpus all" + fi + START_FN=start_docker + TEST_FN=test_docker + STOP_FN=stop_docker + PROG_NAME=Docker +else + if [ -n "$GPU" ]; then + EXTRA="--nv" + fi + START_FN=start_singularity + TEST_FN=test_singularity + STOP_FN=stop_singularity + PROG_NAME=Singularity +fi + +if [ "$OP" == start ]; then + # handle cleaning up + if [ -n "$FORCE" ]; then + auto_stop + elif [ -d "$TMPDIR" ]; then + echo "Error: this container may already exist (override with -f)" + exit 1 + fi + + handle_ports + PORT_EXIT=$? + if [ "$PORT_EXIT" -ne 0 ]; then exit $PORT_EXIT; fi + + list_models + + make_tmp + + # if parent PID is provided, automatically stop server when finished + # do this before actually trying to start the server in case of ctrl+c + if [ -n "$PARENTPID" ]; then + auto_stop "" "$PARENTPID" >& "$STOPLOG" & + fi + + START_EXIT=0 + for ((counter=0; counter < ${RETRIES}; counter++)); do + if [ "$START_EXIT" -ne 0 ]; then make_tmp; fi + + $START_FN + START_EXIT=$? + if [ "$START_EXIT" -eq 0 ]; then + wait_server + WAIT_EXIT=$? + if [ "$WAIT_EXIT" -eq 0 ]; then + break + else + # allow to keep retrying if there was a port issue + counter=$((counter-1)) + START_EXIT="$WAIT_EXIT" + fi + fi + + if [ "$START_EXIT" -ne 0 ]; then + auto_stop + echo "Retrying after container issue..." + fi + done + if [ "$START_EXIT" -ne 0 ]; then + echo "Error from $PROG_NAME" + exit "$START_EXIT" + fi +else + # check for tmp dir + if [ -d "$TMPDIR" ] || [ -n "$DRYRUN" ]; then + $DRYRUN cd "$TMPDIR" + elif [ -z "$FORCE" ]; then + echo "Error: attempt to stop unknown container $SERVER" + exit 1 + fi + + auto_stop +fi diff --git a/HeterogeneousCore/SonicTriton/src/TritonClient.cc b/HeterogeneousCore/SonicTriton/src/TritonClient.cc index 98380e6546f4d..f26a09392ba81 100644 --- a/HeterogeneousCore/SonicTriton/src/TritonClient.cc +++ b/HeterogeneousCore/SonicTriton/src/TritonClient.cc @@ -1,6 +1,9 @@ #include "FWCore/MessageLogger/interface/MessageLogger.h" +#include "FWCore/ParameterSet/interface/FileInPath.h" +#include "FWCore/ServiceRegistry/interface/Service.h" #include "FWCore/Utilities/interface/Exception.h" #include "HeterogeneousCore/SonicTriton/interface/TritonClient.h" +#include "HeterogeneousCore/SonicTriton/interface/TritonService.h" #include "HeterogeneousCore/SonicTriton/interface/triton_utils.h" #include "grpc_client.h" @@ -20,18 +23,23 @@ namespace nic = ni::client; //based on https://github.com/triton-inference-server/server/blob/v2.3.0/src/clients/c++/examples/simple_grpc_async_infer_client.cc //and https://github.com/triton-inference-server/server/blob/v2.3.0/src/clients/c++/perf_client/perf_client.cc -TritonClient::TritonClient(const edm::ParameterSet& params) - : SonicClient(params), +TritonClient::TritonClient(const edm::ParameterSet& params, const std::string& debugName) + : SonicClient(params, debugName, "TritonClient"), verbose_(params.getUntrackedParameter("verbose")), options_(params.getParameter("modelName")) { - clientName_ = "TritonClient"; - //will get overwritten later, just used in constructor - fullDebugName_ = clientName_; + //get appropriate server for this model + edm::Service ts; + const auto& [url, isFallbackCPU] = + ts->serverAddress(options_.model_name_, params.getUntrackedParameter("preferredServer")); + if (verbose_) + edm::LogInfo(fullDebugName_) << "Using server: " << url; + //enforce sync mode for fallback CPU server to avoid contention + //todo: could enforce async mode otherwise (unless mode was specified by user?) + if (isFallbackCPU) + setMode(SonicMode::Sync); //connect to the server //TODO: add SSL options - std::string url(params.getUntrackedParameter("address") + ":" + - std::to_string(params.getUntrackedParameter("port"))); triton_utils::throwIfError(nic::InferenceServerGrpcClient::Create(&client_, url, false), "TritonClient(): unable to create inference context"); @@ -127,8 +135,8 @@ TritonClient::TritonClient(const edm::ParameterSet& params) throw cms::Exception("MissingOutput") << "Some requested outputs were not available on the server: " << triton_utils::printColl(s_outputs); - //check requested batch size and propagate to inputs and outputs - setBatchSize(params.getUntrackedParameter("batchSize")); + //propagate batch size to inputs and outputs + setBatchSize(1); //print model info std::stringstream model_msg; @@ -327,9 +335,11 @@ TritonClient::ServerSideStats TritonClient::summarizeServerStats(const inference inference::ModelStatistics TritonClient::getServerSideStatus() const { if (verbose_) { inference::ModelStatisticsResponse resp; - triton_utils::warnIfError(client_->ModelInferenceStatistics(&resp, options_.model_name_, options_.model_version_), - "getServerSideStatus(): unable to get model statistics"); - return *(resp.model_stats().begin()); + bool success = triton_utils::warnIfError( + client_->ModelInferenceStatistics(&resp, options_.model_name_, options_.model_version_), + "getServerSideStatus(): unable to get model statistics"); + if (success) + return *(resp.model_stats().begin()); } return inference::ModelStatistics{}; } @@ -340,10 +350,9 @@ void TritonClient::fillPSetDescription(edm::ParameterSetDescription& iDesc) { fillBasePSetDescription(descClient); descClient.add("modelName"); descClient.add("modelVersion", ""); + descClient.add("modelConfigPath"); //server parameters should not affect the physics results - descClient.addUntracked("batchSize"); - descClient.addUntracked("address"); - descClient.addUntracked("port"); + descClient.addUntracked("preferredServer", ""); descClient.addUntracked("timeout"); descClient.addUntracked("verbose", false); descClient.addUntracked>("outputs", {}); diff --git a/HeterogeneousCore/SonicTriton/src/TritonService.cc b/HeterogeneousCore/SonicTriton/src/TritonService.cc new file mode 100644 index 0000000000000..d3421b0e60b8d --- /dev/null +++ b/HeterogeneousCore/SonicTriton/src/TritonService.cc @@ -0,0 +1,292 @@ +#include "HeterogeneousCore/SonicTriton/interface/TritonService.h" +#include "HeterogeneousCore/SonicTriton/interface/triton_utils.h" + +#include "DataFormats/Provenance/interface/ModuleDescription.h" +#include "FWCore/MessageLogger/interface/MessageLogger.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/ServiceRegistry/interface/ActivityRegistry.h" +#include "FWCore/ServiceRegistry/interface/ProcessContext.h" +#include "FWCore/Utilities/interface/Exception.h" +#include "FWCore/Utilities/interface/GlobalIdentifier.h" + +#include "grpc_client.h" +#include "grpc_service.pb.h" + +#include +#include +#include +#include +#include + +namespace ni = nvidia::inferenceserver; +namespace nic = ni::client; + +const std::string TritonService::Server::fallbackName{"fallback"}; +const std::string TritonService::Server::fallbackAddress{"0.0.0.0"}; + +namespace { + std::pair execSys(const std::string& cmd) { + //redirect stderr to stdout + auto pipe = popen((cmd + " 2>&1").c_str(), "r"); + int thisErrno = errno; + if (!pipe) + throw cms::Exception("SystemError") << "popen() failed with errno " << thisErrno << " for command: " << cmd; + + //extract output + constexpr static unsigned buffSize = 128; + std::array buffer; + std::string result; + while (!feof(pipe)) { + if (fgets(buffer.data(), buffSize, pipe)) + result += buffer.data(); + else { + thisErrno = ferror(pipe); + if (thisErrno) + throw cms::Exception("SystemError") << "failed reading command output with errno " << thisErrno; + } + } + + int rv = pclose(pipe); + return std::make_pair(result, rv); + } +} // namespace + +TritonService::TritonService(const edm::ParameterSet& pset, edm::ActivityRegistry& areg) + : verbose_(pset.getUntrackedParameter("verbose")), + fallbackOpts_(pset.getParameterSet("fallback")), + currentModuleId_(0), + allowAddModel_(false), + startedFallback_(false) { + //module construction is assumed to be serial (correct at the time this code was written) + areg.watchPreModuleConstruction(this, &TritonService::preModuleConstruction); + areg.watchPostModuleConstruction(this, &TritonService::postModuleConstruction); + areg.watchPreModuleDestruction(this, &TritonService::preModuleDestruction); + //fallback server will be launched (if needed) before beginJob + areg.watchPreBeginJob(this, &TritonService::preBeginJob); + + //include fallback server in set if enabled + if (fallbackOpts_.enable) + servers_.emplace(std::piecewise_construct, + std::forward_as_tuple(Server::fallbackName), + std::forward_as_tuple(Server::fallbackName, Server::fallbackAddress)); + + //loop over input servers: check which models they have + std::string msg; + if (verbose_) + msg = "List of models for each server:\n"; + for (const auto& serverPset : pset.getUntrackedParameterSetVector("servers")) { + const std::string& serverName(serverPset.getUntrackedParameter("name")); + //ensure uniqueness + auto [sit, unique] = servers_.emplace(serverName, serverPset); + if (!unique) + throw cms::Exception("DuplicateServer") + << "Not allowed to specify more than one server with same name (" << serverName << ")"; + auto& serverInfo(sit->second); + + std::unique_ptr client; + triton_utils::throwIfError( + nic::InferenceServerGrpcClient::Create(&client, serverInfo.url, false), + "TritonService(): unable to create inference context for " + serverName + " (" + serverInfo.url + ")"); + + inference::RepositoryIndexResponse repoIndexResponse; + triton_utils::throwIfError( + client->ModelRepositoryIndex(&repoIndexResponse), + "TritonService(): unable to get repository index for " + serverName + " (" + serverInfo.url + ")"); + + //servers keep track of models and vice versa + if (verbose_) + msg += serverName + ": "; + for (const auto& modelIndex : repoIndexResponse.models()) { + const auto& modelName = modelIndex.name(); + auto mit = models_.find(modelName); + if (mit == models_.end()) + mit = models_.emplace(modelName, "").first; + auto& modelInfo(mit->second); + modelInfo.servers.insert(serverName); + serverInfo.models.insert(modelName); + if (verbose_) + msg += modelName + ", "; + } + if (verbose_) + msg += "\n"; + } + if (verbose_) + edm::LogInfo("TritonService") << msg; +} + +void TritonService::preModuleConstruction(edm::ModuleDescription const& desc) { + currentModuleId_ = desc.id(); + allowAddModel_ = true; +} + +void TritonService::addModel(const std::string& modelName, const std::string& path) { + //should only be called in module constructors + if (!allowAddModel_) + throw cms::Exception("DisallowedAddModel") << "Attempt to call addModel() outside of module constructors"; + //if model is not in the list, then no specified server provides it + auto mit = models_.find(modelName); + if (mit == models_.end()) { + auto& modelInfo(unservedModels_.emplace(modelName, path).first->second); + modelInfo.modules.insert(currentModuleId_); + //only keep track of modules that need unserved models + modules_.emplace(currentModuleId_, modelName); + } +} + +void TritonService::postModuleConstruction(edm::ModuleDescription const& desc) { allowAddModel_ = false; } + +void TritonService::preModuleDestruction(edm::ModuleDescription const& desc) { + //remove destructed modules from unserved list + if (unservedModels_.empty()) + return; + auto id = desc.id(); + auto oit = modules_.find(id); + if (oit != modules_.end()) { + const auto& moduleInfo(oit->second); + auto mit = unservedModels_.find(moduleInfo.model); + if (mit != unservedModels_.end()) { + auto& modelInfo(mit->second); + modelInfo.modules.erase(id); + //remove a model if it is no longer needed by any modules + if (modelInfo.modules.empty()) + unservedModels_.erase(mit); + } + modules_.erase(oit); + } +} + +//second return value is only true if fallback CPU server is being used +std::pair TritonService::serverAddress(const std::string& model, + const std::string& preferred) const { + auto mit = models_.find(model); + if (mit == models_.end()) + throw cms::Exception("MissingModel") << "There are no servers that provide model " << model; + const auto& modelInfo(mit->second); + const auto& modelServers = modelInfo.servers; + + auto msit = modelServers.end(); + if (!preferred.empty()) { + msit = modelServers.find(preferred); + //todo: add a "strict" parameter to stop execution if preferred server isn't found? + if (msit == modelServers.end()) + edm::LogWarning("PreferredServer") << "Preferred server " << preferred << " for model " << model + << " not available, will choose another server"; + } + const auto& serverName(msit == modelServers.end() ? *modelServers.begin() : preferred); + + //todo: use some algorithm to select server rather than just picking arbitrarily + const auto& serverInfo(servers_.find(serverName)->second); + bool isFallbackCPU = serverInfo.isFallback and !fallbackOpts_.useGPU; + return std::make_pair(serverInfo.url, isFallbackCPU); +} + +void TritonService::preBeginJob(edm::PathsAndConsumesOfModulesBase const&, edm::ProcessContext const&) { + //only need fallback if there are unserved models + if (!fallbackOpts_.enable or unservedModels_.empty()) + return; + + std::string msg; + if (verbose_) + msg = "List of models for fallback server: "; + //all unserved models are provided by fallback server + auto& serverInfo(servers_.find(Server::fallbackName)->second); + for (const auto& [modelName, model] : unservedModels_) { + auto& modelInfo(models_.emplace(modelName, model).first->second); + modelInfo.servers.insert(Server::fallbackName); + serverInfo.models.insert(modelName); + if (verbose_) + msg += modelName + ", "; + } + if (verbose_) + edm::LogInfo("TritonService") << msg; + + //randomize instance name + if (fallbackOpts_.instanceName.empty()) { + fallbackOpts_.instanceName = "triton_server_instance_" + edm::createGlobalIdentifier(); + } + + //assemble server start command + std::string command("cmsTriton -p -P -1"); + if (fallbackOpts_.debug) + command += " -c"; + if (fallbackOpts_.verbose) + command += " -v"; + if (fallbackOpts_.useDocker) + command += " -d"; + if (fallbackOpts_.useGPU) + command += " -g"; + if (!fallbackOpts_.instanceName.empty()) + command += " -n " + fallbackOpts_.instanceName; + if (fallbackOpts_.retries >= 0) + command += " -r " + std::to_string(fallbackOpts_.retries); + if (fallbackOpts_.wait >= 0) + command += " -w " + std::to_string(fallbackOpts_.wait); + for (const auto& [modelName, model] : unservedModels_) { + command += " -m " + model.path; + } + //don't need this anymore + unservedModels_.clear(); + + //get a random temporary directory if none specified + if (fallbackOpts_.tempDir.empty()) { + auto tmp_dir_path{std::filesystem::temp_directory_path() /= edm::createGlobalIdentifier()}; + fallbackOpts_.tempDir = tmp_dir_path.string(); + } + //special case ".": use script default (temp dir = .$instanceName) + if (fallbackOpts_.tempDir != ".") + command += " -t " + fallbackOpts_.tempDir; + + command += " start"; + + if (fallbackOpts_.debug) + edm::LogInfo("TritonService") << "Fallback server temporary directory: " << fallbackOpts_.tempDir; + if (verbose_) + edm::LogInfo("TritonService") << command; + + //mark as started before executing in case of ctrl+c while command is running + startedFallback_ = true; + const auto& [output, rv] = execSys(command); + if (verbose_ or rv != 0) + edm::LogInfo("TritonService") << output; + if (rv != 0) + throw cms::Exception("FallbackFailed") << "Starting the fallback server failed with exit code " << rv; + + //get the port + const std::string& portIndicator("CMS_TRITON_GRPC_PORT: "); + //find last instance in log in case multiple ports were tried + auto pos = output.rfind(portIndicator); + if (pos != std::string::npos) { + auto pos2 = pos + portIndicator.size(); + auto pos3 = output.find('\n', pos2); + const auto& portNum = output.substr(pos2, pos3 - pos2); + serverInfo.url += ":" + portNum; + } else + throw cms::Exception("FallbackFailed") << "Unknown port for fallback server, log follows:\n" << output; +} + +void TritonService::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.addUntracked("verbose", false); + + edm::ParameterSetDescription validator; + validator.addUntracked("name"); + validator.addUntracked("address"); + validator.addUntracked("port"); + + desc.addVPSetUntracked("servers", validator, {}); + + edm::ParameterSetDescription fallbackDesc; + fallbackDesc.addUntracked("enable", false); + fallbackDesc.addUntracked("debug", false); + fallbackDesc.addUntracked("verbose", false); + fallbackDesc.addUntracked("useDocker", false); + fallbackDesc.addUntracked("useGPU", false); + fallbackDesc.addUntracked("retries", -1); + fallbackDesc.addUntracked("wait", -1); + fallbackDesc.addUntracked("instanceName", ""); + fallbackDesc.addUntracked("tempDir", ""); + desc.add("fallback", fallbackDesc); + + descriptions.addWithDefaultLabel(desc); +} diff --git a/HeterogeneousCore/SonicTriton/test/BuildFile.xml b/HeterogeneousCore/SonicTriton/test/BuildFile.xml index 39a975ea89418..f3b5d01f6cfda 100644 --- a/HeterogeneousCore/SonicTriton/test/BuildFile.xml +++ b/HeterogeneousCore/SonicTriton/test/BuildFile.xml @@ -1,6 +1,10 @@ - - - - - - + + + + + + + + + + diff --git a/HeterogeneousCore/SonicTriton/test/README.md b/HeterogeneousCore/SonicTriton/test/README.md index 6d0d75960078a..52e890480edc7 100644 --- a/HeterogeneousCore/SonicTriton/test/README.md +++ b/HeterogeneousCore/SonicTriton/test/README.md @@ -1,46 +1,29 @@ # SONIC TritonClient tests -Test producers `TritonImageProducer` and `TritonGraphProducer` are available. +Test modules `TritonImageProducer` and `TritonGraphProducer` (`TritonGraphFilter`, `TritonGraphAnalyzer`) are available. They generate arbitrary inputs for inference (with ResNet50 or Graph Attention Network, respectively) and print the resulting output. -To run the tests, a local Triton server can be started using Singularity (default, should not require superuser permission) -or Docker (may require superuser permission). -The server can utilize the local CPU (support for AVX instructions required) or a local Nvidia GPU, if one is available. -The default local server address is `0.0.0.0`. - -First, the relevant data should be downloaded from Nvidia: +First, the relevant data for ResNet50 should be downloaded from Nvidia: ``` ./fetch_model.sh ``` -The server can be managed with the `triton` script (using Singularity with CPU by default): -``` -./triton start -[run test commands] -./triton stop -``` - -The script has the following options: -* `-d`: use Docker instead of Singularity -* `-g`: use GPU instead of CPU -* `-n`: name of container instance (default: triton_server_instance) -* `-v`: (verbose) start: activate server debugging info; stop: keep server logs -* `-w`: maximum time to wait for server to start (default: 60 seconds) -* `-h`: print help message and exit +A local Triton server will be launched automatically when the tests run. +The local server will use Singularity with CPU by default; if a local Nvidia GPU is available, it will be used instead. +(This behavior can also be controlled manually using the "device" argument to [tritonTest_cfg.py](./tritonTest_cfg.py).) ## Test commands Run the image test: ``` -cmsRun tritonTest_cfg.py maxEvents=1 producer=TritonImageProducer +cmsRun tritonTest_cfg.py maxEvents=1 modules=TritonImageProducer ``` Run the graph test: ``` -cmsRun tritonTest_cfg.py maxEvents=1 producer=TritonGraphProducer +cmsRun tritonTest_cfg.py maxEvents=1 modules=TritonGraphProducer ``` ## Caveats * Local CPU server requires support for AVX instructions. -* Multiple users cannot run servers on the same GPU (e.g. on a shared node). diff --git a/HeterogeneousCore/SonicTriton/test/TritonGraphModules.cc b/HeterogeneousCore/SonicTriton/test/TritonGraphModules.cc new file mode 100644 index 0000000000000..46ced5e330291 --- /dev/null +++ b/HeterogeneousCore/SonicTriton/test/TritonGraphModules.cc @@ -0,0 +1,171 @@ +#include "HeterogeneousCore/SonicTriton/interface/TritonEDProducer.h" + +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/Framework/interface/MakerMacros.h" + +#include +#include +#include +#include + +class TritonGraphHelper { +public: + TritonGraphHelper(edm::ParameterSet const& cfg) + : nodeMin_(cfg.getParameter("nodeMin")), + nodeMax_(cfg.getParameter("nodeMax")), + edgeMin_(cfg.getParameter("edgeMin")), + edgeMax_(cfg.getParameter("edgeMax")) {} + void makeInput(edm::Event const& iEvent, TritonInputMap& iInput) const { + //get event-based seed for RNG + unsigned int runNum_uint = static_cast(iEvent.id().run()); + unsigned int lumiNum_uint = static_cast(iEvent.id().luminosityBlock()); + unsigned int evNum_uint = static_cast(iEvent.id().event()); + std::uint32_t seed = (lumiNum_uint << 10) + (runNum_uint << 20) + evNum_uint; + std::mt19937 rng(seed); + + std::uniform_int_distribution randint1(nodeMin_, nodeMax_); + int nnodes = randint1(rng); + std::uniform_int_distribution randint2(edgeMin_, edgeMax_); + int nedges = randint2(rng); + + //set shapes + auto& input1 = iInput.at("x__0"); + input1.setShape(0, nnodes); + auto data1 = std::make_shared>(1); + auto& vdata1 = (*data1)[0]; + vdata1.reserve(input1.sizeShape()); + + auto& input2 = iInput.at("edgeindex__1"); + input2.setShape(1, nedges); + auto data2 = std::make_shared>(1); + auto& vdata2 = (*data2)[0]; + vdata2.reserve(input2.sizeShape()); + + //fill + std::normal_distribution randx(-10, 4); + for (unsigned i = 0; i < input1.sizeShape(); ++i) { + vdata1.push_back(randx(rng)); + } + + std::uniform_int_distribution randedge(0, nnodes - 1); + for (unsigned i = 0; i < input2.sizeShape(); ++i) { + vdata2.push_back(randedge(rng)); + } + + // convert to server format + input1.toServer(data1); + input2.toServer(data2); + } + void makeOutput(const TritonOutputMap& iOutput, const std::string& debugName) const { + //check the results + const auto& output1 = iOutput.begin()->second; + // convert from server format + const auto& tmp = output1.fromServer(); + std::stringstream msg; + for (int i = 0; i < output1.shape()[0]; ++i) { + msg << "output " << i << ": "; + for (int j = 0; j < output1.shape()[1]; ++j) { + msg << tmp[0][output1.shape()[1] * i + j] << " "; + } + msg << "\n"; + } + edm::LogInfo(debugName) << msg.str(); + } + static void fillPSetDescription(edm::ParameterSetDescription& desc) { + desc.add("nodeMin", 100); + desc.add("nodeMax", 4000); + desc.add("edgeMin", 8000); + desc.add("edgeMax", 15000); + } + +private: + //members + unsigned nodeMin_, nodeMax_; + unsigned edgeMin_, edgeMax_; +}; + +class TritonGraphProducer : public TritonEDProducer<> { +public: + explicit TritonGraphProducer(edm::ParameterSet const& cfg) + : TritonEDProducer<>(cfg, "TritonGraphProducer"), helper_(cfg) {} + void acquire(edm::Event const& iEvent, edm::EventSetup const& iSetup, Input& iInput) override { + helper_.makeInput(iEvent, iInput); + } + void produce(edm::Event& iEvent, edm::EventSetup const& iSetup, Output const& iOutput) override { + helper_.makeOutput(iOutput, debugName_); + } + ~TritonGraphProducer() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + TritonClient::fillPSetDescription(desc); + TritonGraphHelper::fillPSetDescription(desc); + //to ensure distinct cfi names + descriptions.addWithDefaultLabel(desc); + } + +private: + //member + TritonGraphHelper helper_; +}; + +DEFINE_FWK_MODULE(TritonGraphProducer); + +#include "HeterogeneousCore/SonicTriton/interface/TritonEDFilter.h" + +class TritonGraphFilter : public TritonEDFilter<> { +public: + explicit TritonGraphFilter(edm::ParameterSet const& cfg) : TritonEDFilter<>(cfg, "TritonGraphFilter"), helper_(cfg) {} + void acquire(edm::Event const& iEvent, edm::EventSetup const& iSetup, Input& iInput) override { + helper_.makeInput(iEvent, iInput); + } + bool filter(edm::Event& iEvent, edm::EventSetup const& iSetup, Output const& iOutput) override { + helper_.makeOutput(iOutput, debugName_); + return true; + } + ~TritonGraphFilter() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + TritonClient::fillPSetDescription(desc); + TritonGraphHelper::fillPSetDescription(desc); + //to ensure distinct cfi names + descriptions.addWithDefaultLabel(desc); + } + +private: + //member + TritonGraphHelper helper_; +}; + +DEFINE_FWK_MODULE(TritonGraphFilter); + +#include "HeterogeneousCore/SonicTriton/interface/TritonOneEDAnalyzer.h" + +class TritonGraphAnalyzer : public TritonOneEDAnalyzer<> { +public: + explicit TritonGraphAnalyzer(edm::ParameterSet const& cfg) + : TritonOneEDAnalyzer<>(cfg, "TritonGraphAnalyzer"), helper_(cfg) {} + void acquire(edm::Event const& iEvent, edm::EventSetup const& iSetup, Input& iInput) override { + helper_.makeInput(iEvent, iInput); + } + void analyze(edm::Event const& iEvent, edm::EventSetup const& iSetup, Output const& iOutput) override { + helper_.makeOutput(iOutput, debugName_); + } + ~TritonGraphAnalyzer() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + TritonClient::fillPSetDescription(desc); + TritonGraphHelper::fillPSetDescription(desc); + //to ensure distinct cfi names + descriptions.addWithDefaultLabel(desc); + } + +private: + //member + TritonGraphHelper helper_; +}; + +DEFINE_FWK_MODULE(TritonGraphAnalyzer); diff --git a/HeterogeneousCore/SonicTriton/test/TritonGraphProducer.cc b/HeterogeneousCore/SonicTriton/test/TritonGraphProducer.cc deleted file mode 100644 index 952a6e0bd08b1..0000000000000 --- a/HeterogeneousCore/SonicTriton/test/TritonGraphProducer.cc +++ /dev/null @@ -1,87 +0,0 @@ -#include "HeterogeneousCore/SonicCore/interface/SonicEDProducer.h" -#include "HeterogeneousCore/SonicTriton/interface/TritonClient.h" - -#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" -#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" -#include "FWCore/Framework/interface/MakerMacros.h" - -#include -#include -#include -#include -#include -#include - -class TritonGraphProducer : public SonicEDProducer { -public: - explicit TritonGraphProducer(edm::ParameterSet const& cfg) : SonicEDProducer(cfg) { - //for debugging - setDebugName("TritonGraphProducer"); - } - void acquire(edm::Event const& iEvent, edm::EventSetup const& iSetup, Input& iInput) override { - //get event-based seed for RNG - unsigned int runNum_uint = static_cast(iEvent.id().run()); - unsigned int lumiNum_uint = static_cast(iEvent.id().luminosityBlock()); - unsigned int evNum_uint = static_cast(iEvent.id().event()); - std::uint32_t seed = (lumiNum_uint << 10) + (runNum_uint << 20) + evNum_uint; - std::mt19937 rng(seed); - - std::uniform_int_distribution randint1(100, 4000); - int nnodes = randint1(rng); - std::uniform_int_distribution randint2(8000, 15000); - int nedges = randint2(rng); - - //set shapes - auto& input1 = iInput.at("x__0"); - input1.setShape(0, nnodes); - auto data1 = std::make_shared>(1); - auto& vdata1 = (*data1)[0]; - vdata1.reserve(input1.sizeShape()); - - auto& input2 = iInput.at("edgeindex__1"); - input2.setShape(1, nedges); - auto data2 = std::make_shared>(1); - auto& vdata2 = (*data2)[0]; - vdata2.reserve(input2.sizeShape()); - - //fill - std::normal_distribution randx(-10, 4); - for (unsigned i = 0; i < input1.sizeShape(); ++i) { - vdata1.push_back(randx(rng)); - } - - std::uniform_int_distribution randedge(0, nnodes - 1); - for (unsigned i = 0; i < input2.sizeShape(); ++i) { - vdata2.push_back(randedge(rng)); - } - - // convert to server format - input1.toServer(data1); - input2.toServer(data2); - } - void produce(edm::Event& iEvent, edm::EventSetup const& iSetup, Output const& iOutput) override { - //check the results - const auto& output1 = iOutput.begin()->second; - // convert from server format - const auto& tmp = output1.fromServer(); - std::stringstream msg; - for (int i = 0; i < output1.shape()[0]; ++i) { - msg << "output " << i << ": "; - for (int j = 0; j < output1.shape()[1]; ++j) { - msg << tmp[0][output1.shape()[1] * i + j] << " "; - } - msg << "\n"; - } - edm::LogInfo(client_.debugName()) << msg.str(); - } - ~TritonGraphProducer() override = default; - - static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) { - edm::ParameterSetDescription desc; - TritonClient::fillPSetDescription(desc); - //to ensure distinct cfi names - descriptions.addWithDefaultLabel(desc); - } -}; - -DEFINE_FWK_MODULE(TritonGraphProducer); diff --git a/HeterogeneousCore/SonicTriton/test/TritonImageProducer.cc b/HeterogeneousCore/SonicTriton/test/TritonImageProducer.cc index 2617d28eedb2c..4c602c659ea75 100644 --- a/HeterogeneousCore/SonicTriton/test/TritonImageProducer.cc +++ b/HeterogeneousCore/SonicTriton/test/TritonImageProducer.cc @@ -1,6 +1,6 @@ -#include "HeterogeneousCore/SonicCore/interface/SonicEDProducer.h" -#include "HeterogeneousCore/SonicTriton/interface/TritonClient.h" +#include "HeterogeneousCore/SonicTriton/interface/TritonEDProducer.h" +#include "FWCore/ParameterSet/interface/FileInPath.h" #include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" #include "FWCore/ParameterSet/interface/ParameterSetDescription.h" #include "FWCore/Framework/interface/MakerMacros.h" @@ -11,14 +11,14 @@ #include #include -class TritonImageProducer : public SonicEDProducer { +class TritonImageProducer : public TritonEDProducer<> { public: explicit TritonImageProducer(edm::ParameterSet const& cfg) - : SonicEDProducer(cfg), topN_(cfg.getParameter("topN")) { - //for debugging - setDebugName("TritonImageProducer"); + : TritonEDProducer<>(cfg, "TritonImageProducer"), + batchSize_(cfg.getParameter("batchSize")), + topN_(cfg.getParameter("topN")) { //load score list - std::string imageListFile(cfg.getParameter("imageList")); + std::string imageListFile(cfg.getParameter("imageList").fullPath()); std::ifstream ifile(imageListFile); if (ifile.is_open()) { std::string line; @@ -30,12 +30,13 @@ class TritonImageProducer : public SonicEDProducer { } } void acquire(edm::Event const& iEvent, edm::EventSetup const& iSetup, Input& iInput) override { + client_->setBatchSize(batchSize_); // create an npix x npix x ncol image w/ arbitrary color value // model only has one input, so just pick begin() auto& input1 = iInput.begin()->second; auto data1 = std::make_shared>(); - data1->reserve(input1.batchSize()); - for (unsigned i = 0; i < input1.batchSize(); ++i) { + data1->reserve(batchSize_); + for (unsigned i = 0; i < batchSize_; ++i) { data1->emplace_back(input1.sizeDims(), 0.5f); } // convert to server format @@ -50,8 +51,9 @@ class TritonImageProducer : public SonicEDProducer { static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) { edm::ParameterSetDescription desc; TritonClient::fillPSetDescription(desc); + desc.add("batchSize", 1); desc.add("topN", 5); - desc.add("imageList"); + desc.add("imageList"); //to ensure distinct cfi names descriptions.addWithDefaultLabel(desc); } @@ -76,10 +78,11 @@ class TritonImageProducer : public SonicEDProducer { if (counter >= topN_) break; } - edm::LogInfo(client_.debugName()) << msg.str(); + edm::LogInfo(debugName_) << msg.str(); } } + unsigned batchSize_; unsigned topN_; std::vector imageList_; }; diff --git a/HeterogeneousCore/SonicTriton/test/fetch_model.sh b/HeterogeneousCore/SonicTriton/test/fetch_model.sh index bc469886e215c..0f36460b936ce 100755 --- a/HeterogeneousCore/SonicTriton/test/fetch_model.sh +++ b/HeterogeneousCore/SonicTriton/test/fetch_model.sh @@ -19,36 +19,3 @@ mkdir -p 1 curl -o 1/model.netdef http://download.caffe2.ai.s3.amazonaws.com/models/resnet50/predict_net.pb curl -o 1/init_model.netdef http://download.caffe2.ai.s3.amazonaws.com/models/resnet50/init_net.pb - -GAT_DIR=${TEST_DIR}/../data/models/gat_test -cd $TEST_DIR -mkdir -p $GAT_DIR -cd $GAT_DIR - -cat << EOF > config.pbtxt -name: "gat_test" -platform: "pytorch_libtorch" -max_batch_size: 0 -input [ - { - name: "x__0" - data_type: TYPE_FP32 - dims: [ -1, 1433 ] - }, - { - name: "edgeindex__1" - data_type: TYPE_INT64 - dims: [ 2, -1 ] - } -] -output [ - { - name: "logits__0" - data_type: TYPE_FP32 - dims: [ -1, 7 ] - } -] -EOF - -mkdir -p 1 -cp /cvmfs/unpacked.cern.ch/registry.hub.docker.com/fastml/triton-torchgeo:20.09-py3-geometric/torch_geometric/examples/model.pt 1/model.pt diff --git a/HeterogeneousCore/SonicTriton/test/triton b/HeterogeneousCore/SonicTriton/test/triton deleted file mode 100755 index a02c63e3f64e3..0000000000000 --- a/HeterogeneousCore/SonicTriton/test/triton +++ /dev/null @@ -1,168 +0,0 @@ -#!/bin/bash - -# defaults -USEDOCKER="" -GPU="" -VERBOSE="" -WTIME=60 -SERVER=triton_server_instance - -usage() { - ECHO="echo -e" - $ECHO "triton [options] [start|stop]" - $ECHO - $ECHO "Options:" - $ECHO "-d \t use Docker instead of Singularity" - $ECHO "-g \t use GPU instead of CPU" - $ECHO "-n \t name of container instance (default: ${SERVER})" - $ECHO "-v \t (verbose) start: activate server debugging info; stop: keep server logs" - $ECHO "-w \t maximum time to wait for server to start (default: ${WTIME} seconds)" - $ECHO "-h \t print this message and exit" - $ECHO - $ECHO "Operations:" - $ECHO "start \t start server" - $ECHO "stop \t stop server" - exit $1 -} - -# check shm locations -SHM=/dev/shm -if [ -e /run/shm ]; then - SHM=/run/shm -fi - -while getopts "dgvhw:n:" opt; do - case "$opt" in - d) USEDOCKER=true - ;; - g) GPU=true - ;; - v) VERBOSE="--log-verbose=1 --log-error=1 --log-info=1" - ;; - h) usage 0 - ;; - w) WTIME="$OPTARG" - ;; - n) SERVER="$OPTARG" - ;; - esac -done - -shift $(($OPTIND - 1)) -OP=$1 - -if [ "$OP" != start ] && [ "$OP" != stop ]; then - usage 1 -fi - -DOCKER="sudo docker" -IMAGE=fastml/triton-torchgeo:20.09-py3-geometric -MODELS=${CMSSW_BASE}/src/HeterogeneousCore/SonicTriton/data/models -LOG=log_triton_server.log -LIB=lib -STARTED_INDICATOR="Started GRPCInferenceService" -EXTRA="" - -start_docker(){ - $DOCKER run -d --name ${SERVER} \ - --shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864 \ - -p8000:8000 -p8001:8001 -p8002:8002 $EXTRA \ - -v${MODELS}:/models \ - ${IMAGE} tritonserver --model-repository=/models $VERBOSE -} - -start_singularity(){ - # triton server image may need to modify contents of opt/tritonserver/lib/ - # but cvmfs is read-only - # -> make a writable local directory with the same contents - mkdir ${LIB} - ln -s /cvmfs/unpacked.cern.ch/registry.hub.docker.com/${IMAGE}/opt/tritonserver/lib/* ${LIB}/ - - # start instance - # need to bind /cvmfs for above symlinks to work inside container - singularity instance start \ - -B ${SHM}:/run/shm -B ${MODELS}:/models -B ${LIB}:/opt/tritonserver/lib -B /cvmfs $EXTRA \ - /cvmfs/unpacked.cern.ch/registry.hub.docker.com/${IMAGE} ${SERVER} - - START_EXIT=$? - if [ "$START_EXIT" -ne 0 ]; then - rm -rf ${LIB} - return "$START_EXIT" - fi - - # run the actual server - singularity run instance://${SERVER} \ - tritonserver --model-repository=/models $VERBOSE >& ${LOG} & -} - -stop_docker(){ - # keep log - if [ -n "$VERBOSE" ]; then $DOCKER logs ${SERVER} >& ${LOG}; fi - - $DOCKER stop ${SERVER} - $DOCKER rm ${SERVER} -} - -stop_singularity(){ - singularity instance stop ${SERVER} - - # cleanup - rm -rf ${LIB} - if [ -z "$VERBOSE" ]; then rm ${LOG}; fi -} - -test_docker(){ - # docker logs print to stderr - ${DOCKER} logs ${SERVER} |& grep "$STARTED_INDICATOR" -} - -test_singularity(){ - grep "$STARTED_INDICATOR" $LOG -} - -wait_server(){ - COUNT=0 - while ! $WAIT_COND >& /dev/null; do - if [ "$COUNT" -gt "$WTIME" ]; then - echo "timed out waiting for server to start" - VERBOSE=true $STOP_FN - exit 1 - else - COUNT=$(($COUNT + 1)) - sleep 1 - fi - done - - echo "server is ready!" - exit 0 -} - -if [ -n "$USEDOCKER" ]; then - if [ -n "$GPU" ]; then - EXTRA="--gpus all" - fi - START_FN=start_docker - WAIT_COND=test_docker - STOP_FN=stop_docker - PROG_NAME=Docker -else - if [ -n "$GPU" ]; then - EXTRA="--nv" - fi - START_FN=start_singularity - WAIT_COND=test_singularity - STOP_FN=stop_singularity - PROG_NAME=Singularity -fi - -if [ "$OP" == start ]; then - $START_FN - START_EXIT=$? - if [ "$START_EXIT" -ne 0 ]; then - echo "Error from $PROG_NAME" - exit "$START_EXIT" - fi - wait_server -else - $STOP_FN -fi diff --git a/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py b/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py index 65ea8ca4a51ec..b9683cdb9439d 100644 --- a/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py +++ b/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py @@ -1,19 +1,23 @@ from FWCore.ParameterSet.VarParsing import VarParsing import FWCore.ParameterSet.Config as cms -import os, sys, json +import os, sys, json, six options = VarParsing("analysis") -options.register("address", "0.0.0.0", VarParsing.multiplicity.singleton, VarParsing.varType.string) +options.register("serverName", "default", VarParsing.multiplicity.singleton, VarParsing.varType.string) +options.register("address", "", VarParsing.multiplicity.singleton, VarParsing.varType.string) options.register("port", 8001, VarParsing.multiplicity.singleton, VarParsing.varType.int) options.register("timeout", 30, VarParsing.multiplicity.singleton, VarParsing.varType.int) options.register("params", "", VarParsing.multiplicity.singleton, VarParsing.varType.string) options.register("threads", 1, VarParsing.multiplicity.singleton, VarParsing.varType.int) options.register("streams", 0, VarParsing.multiplicity.singleton, VarParsing.varType.int) -options.register("batchSize", 1, VarParsing.multiplicity.singleton, VarParsing.varType.int) -options.register("producer", "TritonImageProducer", VarParsing.multiplicity.singleton, VarParsing.varType.string) +options.register("modules", "TritonImageProducer", VarParsing.multiplicity.list, VarParsing.varType.string) options.register("modelName","resnet50_netdef", VarParsing.multiplicity.singleton, VarParsing.varType.string) -options.register("mode","PseudoAsync", VarParsing.multiplicity.singleton, VarParsing.varType.string) +options.register("mode","Async", VarParsing.multiplicity.singleton, VarParsing.varType.string) options.register("verbose", False, VarParsing.multiplicity.singleton, VarParsing.varType.bool) +options.register("unittest", False, VarParsing.multiplicity.singleton, VarParsing.varType.bool) +options.register("device","auto", VarParsing.multiplicity.singleton, VarParsing.varType.string) +options.register("docker", False, VarParsing.multiplicity.singleton, VarParsing.varType.bool) +options.register("tries", 0, VarParsing.multiplicity.singleton, VarParsing.varType.int) options.parseArguments() if len(options.params)>0: @@ -23,48 +27,94 @@ options.port = int(pdict["port"]) print("server = "+options.address+":"+str(options.port)) -# check producer/model -models = { - "TritonImageProducer": "resnet50_netdef", - "TritonGraphProducer": "gat_test", -} +# check devices +options.device = options.device.lower() +allowed_devices = ["auto","cpu","gpu"] +if options.device not in allowed_devices: + raise ValueError("Unknown device: "+options.device) -if options.producer not in models: - raise ValueError("Unknown producer: "+options.producer) +from Configuration.ProcessModifiers.enableSonicTriton_cff import enableSonicTriton +process = cms.Process('tritonTest',enableSonicTriton) -process = cms.Process('tritonTest') +process.load("HeterogeneousCore.SonicTriton.TritonService_cff") process.maxEvents = cms.untracked.PSet( input = cms.untracked.int32(options.maxEvents) ) process.source = cms.Source("EmptySource") -process.TritonProducer = cms.EDProducer(options.producer, - Client = cms.PSet( - mode = cms.string(options.mode), - batchSize = cms.untracked.uint32(options.batchSize), - address = cms.untracked.string(options.address), - port = cms.untracked.uint32(options.port), - timeout = cms.untracked.uint32(options.timeout), - modelName = cms.string(models[options.producer]), - modelVersion = cms.string(""), - verbose = cms.untracked.bool(options.verbose), - allowedTries = cms.untracked.uint32(0), +process.TritonService.verbose = options.verbose +process.TritonService.fallback.verbose = options.verbose +process.TritonService.fallback.useDocker = options.docker +if options.device != "auto": + process.TritonService.fallback.useGPU = options.device=="gpu" +if len(options.address)>0: + process.TritonService.servers.append( + cms.PSet( + name = cms.untracked.string(options.serverName), + address = cms.untracked.string(options.address), + port = cms.untracked.uint32(options.port), + ) ) -) -if options.producer=="TritonImageProducer": - process.TritonProducer.topN = cms.uint32(5) - process.TritonProducer.imageList = cms.string("../data/models/resnet50_netdef/resnet50_labels.txt") # Let it run -process.p = cms.Path( - process.TritonProducer -) +process.p = cms.Path() + +# check module/model +models = { + "TritonImageProducer": "resnet50_netdef", + "TritonGraphProducer": "gat_test", + "TritonGraphFilter": "gat_test", + "TritonGraphAnalyzer": "gat_test", +} + +modules = { + "Producer": cms.EDProducer, + "Filter": cms.EDFilter, + "Analyzer": cms.EDAnalyzer, +} + +keepMsgs = ['TritonClient','TritonService'] +for module in options.modules: + if module not in models: + raise ValueError("Unknown module: "+module) + Module = [obj for name,obj in six.iteritems(modules) if name in module][0] + setattr(process, module, + Module(module, + Client = cms.PSet( + mode = cms.string(options.mode), + preferredServer = cms.untracked.string(""), + timeout = cms.untracked.uint32(options.timeout), + modelName = cms.string(models[module]), + modelVersion = cms.string(""), + modelConfigPath = cms.FileInPath("HeterogeneousCore/SonicTriton/data/models/{}/config.pbtxt".format(models[module])), + verbose = cms.untracked.bool(options.verbose), + allowedTries = cms.untracked.uint32(options.tries), + ) + ) + ) + processModule = getattr(process, module) + if module=="TritonImageProducer": + processModule.batchSize = cms.uint32(1) + processModule.topN = cms.uint32(5) + processModule.imageList = cms.FileInPath("HeterogeneousCore/SonicTriton/data/models/resnet50_netdef/resnet50_labels.txt") + elif "TritonGraph" in module: + if options.unittest: + # reduce input size for unit test + processModule.nodeMin = cms.uint32(1) + processModule.nodeMax = cms.uint32(10) + processModule.edgeMin = cms.uint32(20) + processModule.edgeMax = cms.uint32(40) + else: + processModule.nodeMin = cms.uint32(100) + processModule.nodeMax = cms.uint32(4000) + processModule.edgeMin = cms.uint32(8000) + processModule.edgeMax = cms.uint32(15000) + process.p += processModule + keepMsgs.extend([module,module+':TritonClient']) process.load('FWCore/MessageService/MessageLogger_cfi') process.MessageLogger.cerr.FwkReport.reportEvery = 500 -keep_msgs = [options.producer,options.producer+':TritonClient','TritonClient'] -for msg in keep_msgs: - +for msg in keepMsgs: setattr(process.MessageLogger.cerr,msg, cms.untracked.PSet( limit = cms.untracked.int32(10000000), diff --git a/HeterogeneousCore/SonicTriton/test/unittest.sh b/HeterogeneousCore/SonicTriton/test/unittest.sh new file mode 100755 index 0000000000000..a428a98a723d8 --- /dev/null +++ b/HeterogeneousCore/SonicTriton/test/unittest.sh @@ -0,0 +1,57 @@ +#!/bin/bash + +LOCALTOP=$1 + +# the test is not possible if: +# 1. avx instructions not supported (needed for singularity on CPU) +# 2. singularity not found or not usable +# 3. inside singularity container w/o unprivileged user namespace enabled (needed for singularity-in-singularity) +# so just return true in those cases + +if grep -q avx /proc/cpuinfo; then + echo "has avx" +else + echo "missing avx" + exit 0 +fi + +if type singularity >& /dev/null; then + echo "has singularity" +else + echo "missing singularity" + exit 0 +fi + +if [ -n "$SINGULARITY_CONTAINER" ]; then + if grep -q "^allow setuid = no" /etc/singularity/singularity.conf && unshare -U echo >/dev/null 2>&1; then + echo "has unprivileged user namespace support" + else + echo "missing unprivileged user namespace support" + exit 0 + fi +fi + +tmpFile=$(mktemp -p ${LOCALTOP} SonicTritonTestXXXXXXXX.log) +cmsRun ${LOCALTOP}/src/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py modules=TritonGraphProducer,TritonGraphFilter,TritonGraphAnalyzer maxEvents=1 unittest=1 verbose=1 >& $tmpFile +CMSEXIT=$? + +cat $tmpFile + +STOP_COUNTER=0 +while ! LOGFILE="$(ls -rt ${LOCALTOP}/log_triton_server_instance*.log 2>/dev/null | tail -n 1)" && [ "$STOP_COUNTER" -lt 5 ]; do + STOP_COUNTER=$((STOP_COUNTER+1)) + sleep 5 +done + +if [ -n "$LOGFILE" ]; then + echo -e '\n=====\nContents of '$LOGFILE':\n=====\n' + cat "$LOGFILE" +fi + +if grep -q "Socket closed" $tmpFile; then + echo "Transient server error (not caused by client code)" + CMSEXIT=0 +fi + +rm $tmpFile +exit $CMSEXIT