From 974cf5fe824d19874e68ad178934b0e9a400c4f6 Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Tue, 8 Dec 2020 14:36:40 -0600 Subject: [PATCH 01/31] numerous improvements to triton script: in path, container retry, temp dir, model handling, dry run --- HeterogeneousCore/SonicTriton/README.md | 30 +- HeterogeneousCore/SonicTriton/scripts/triton | 299 +++++++++++++++++++ HeterogeneousCore/SonicTriton/test/README.md | 14 +- HeterogeneousCore/SonicTriton/test/triton | 168 ----------- 4 files changed, 330 insertions(+), 181 deletions(-) create mode 100755 HeterogeneousCore/SonicTriton/scripts/triton delete mode 100755 HeterogeneousCore/SonicTriton/test/triton diff --git a/HeterogeneousCore/SonicTriton/README.md b/HeterogeneousCore/SonicTriton/README.md index 736c73a9a8320..4c71ca6eb032f 100644 --- a/HeterogeneousCore/SonicTriton/README.md +++ b/HeterogeneousCore/SonicTriton/README.md @@ -61,5 +61,31 @@ In a SONIC Triton producer, the basic flow should follow this pattern: b. obtain output data as `TritonOutput` using `fromServer()` function of output object(s) (sets output shape(s) if variable dimensions exist) c. fill output products -Several example producers (running ResNet50 or Graph Attention Network), along with instructions to run a local server, -can be found in the [test](./test) directory. +A script [`triton`](./scripts/triton) is provided to launch and manage local servers. +The script has two operations (`start` and `stop`) and the following options: +* `-c`: don't cleanup temporary dir (for debugging) +* `-D`: dry run: print container commands rather than executing them +* `-d`: use Docker instead of Singularity +* `-f`: force reuse of (possibly) existing container instance +* `-g`: use GPU instead of CPU +* `-M [dir]`: model repository (can be given more than once) +* `-m [dir]`: specific model directory (can be given more than one) +* `-n [name]`: name of container instance, also used for hidden temporary dir (default: triton_server_instance) +* `-r [num]`: number of retries when starting container (default: 3) +* `-t [dir]`: non-default hidden temporary dir +* `-v`: (verbose) start: activate server debugging info; stop: keep server logs +* `-w` [time]`: maximum time to wait for server to start (default: 60 seconds) +* `-h`: print help message and exit + +Additional details and caveats: +* The `start` and `stop` operations for a given container instance should always be executed in the same directory +if a relative path is used for the hidden temporary directory (including the default from the container instance name), +in order to ensure that everything is properly cleaned up. +* A model repository is a folder that contains multiple model directories, while a model directory contains the files for a specific file. +(In the example below, `$CMSSW_BASE/src/HeterogeneousCore/SonicTriton/data/models` is a model repository, +while `$CMSSW_BASE/src/HeterogeneousCore/SonicTriton/data/models/resnet50_netdef` is a model directory.) +If a model repository is provided, all of the models it contains will be provided to the server. +* Older versions of Singularity have a short timeout that may cause launching the server to fail the first time the command is executed. +The `-r` (retry) flag exists to work around this issue. + +Several example producers (running ResNet50 or Graph Attention Network) can be found in the [test](./test) directory. diff --git a/HeterogeneousCore/SonicTriton/scripts/triton b/HeterogeneousCore/SonicTriton/scripts/triton new file mode 100755 index 0000000000000..e527c485bc10b --- /dev/null +++ b/HeterogeneousCore/SonicTriton/scripts/triton @@ -0,0 +1,299 @@ +#!/bin/bash + +# defaults +USEDOCKER="" +GPU="" +VERBOSE="" +WTIME=60 +SERVER=triton_server_instance +RETRIES=3 +REPOS=() +MODELS=() +FORCE="" +CLEANUP=true +TMPDIR="" +DRYRUN="" + +usage() { + ECHO="echo -e" + $ECHO "triton [options] [start|stop]" + $ECHO + $ECHO "Options:" + $ECHO "-c \t don't cleanup temporary dir (for debugging)" + $ECHO "-D \t dry run: print container commands rather than executing them" + $ECHO "-d \t use Docker instead of Singularity" + $ECHO "-f \t force reuse of (possibly) existing container instance" + $ECHO "-g \t use GPU instead of CPU" + $ECHO "-M [dir] \t model repository (can be given more than once)" + $ECHO "-m [dir] \t specific model directory (can be given more than one)" + $ECHO "-n [name] \t name of container instance, also used for default hidden temporary dir (default: ${SERVER})" + $ECHO "-r [num] \t number of retries when starting container (default: ${RETRIES})" + $ECHO "-t [dir] \t non-default hidden temporary dir" + $ECHO "-v \t (verbose) start: activate server debugging info; stop: keep server logs" + $ECHO "-w [time] \t maximum time to wait for server to start (default: ${WTIME} seconds)" + $ECHO "-h \t print this message and exit" + $ECHO + $ECHO "Operations:" + $ECHO "start \t start server" + $ECHO "stop \t stop server" + exit $1 +} + +# check shm locations +SHM=/dev/shm +if [ -e /run/shm ]; then + SHM=/run/shm +fi + +while getopts "cDdfgM:m:n:r:t:vw:h" opt; do + case "$opt" in + c) CLEANUP="" + ;; + D) DRYRUN=echo + ;; + d) USEDOCKER=true + ;; + f) FORCE=true + ;; + g) GPU=true + ;; + M) REPOS+=("$OPTARG") + ;; + m) MODELS+=("$OPTARG") + ;; + n) SERVER="$OPTARG" + ;; + r) RETRIES="$OPTARG" + ;; + t) TMPDIR="$OPTARG" + ;; + v) VERBOSE="--log-verbose=1 --log-error=1 --log-info=1" + ;; + w) WTIME="$OPTARG" + ;; + h) usage 0 + ;; + esac +done + +shift $(($OPTIND - 1)) +OP=$1 + +if [ "$OP" != start ] && [ "$OP" != stop ]; then + usage 1 +fi + +if [ "$RETRIES" -le 0 ]; then + RETRIES=1 +fi + +TOPDIR=$PWD +if [ -z "$TMPDIR" ]; then + TMPDIR="${TOPDIR}/.${SERVER}" +else + TMPDIR=$(readlink -f $TMPDIR) +fi + +DOCKER="sudo docker" +IMAGE=fastml/triton-torchgeo:20.09-py3-geometric +LOG="log_${SERVER}.log" +LIB=lib +STARTED_INDICATOR="Started GRPCInferenceService" +EXTRA="" + +start_docker(){ + # mount all model repositories + MOUNTARGS="" + REPOARGS="" + for REPO in ${REPOS[@]}; do + MOUNTARGS="$MOUNTARGS -v$REPO:$REPO" + REPOARGS="$REPOARGS --model-repository=${REPO}" + done + + $DRYRUN $DOCKER run -d --name ${SERVER} \ + --shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864 \ + -p8000:8000 -p8001:8001 -p8002:8002 $EXTRA $MOUNTARGS \ + ${IMAGE} tritonserver $REPOARGS $VERBOSE +} + +start_singularity(){ + # triton server image may need to modify contents of opt/tritonserver/lib/ + # but cvmfs is read-only + # -> make a writable local directory with the same contents + if [ -z "$DRYRUN" ]; then + mkdir ${LIB} + ln -s /cvmfs/unpacked.cern.ch/registry.hub.docker.com/${IMAGE}/opt/tritonserver/lib/* ${LIB}/ + fi + + # mount all model repositories + MOUNTARGS="" + REPOARGS="" + for REPO in ${REPOS[@]}; do + MOUNTARGS="$MOUNTARGS -B $REPO" + REPOARGS="$REPOARGS --model-repository=${REPO}" + done + + # start instance + # need to bind /cvmfs for above symlinks to work inside container + $DRYRUN singularity instance start \ + -B ${SHM}:/run/shm -B ${LIB}:/opt/tritonserver/lib -B /cvmfs $MOUNTARGS $EXTRA \ + /cvmfs/unpacked.cern.ch/registry.hub.docker.com/${IMAGE} ${SERVER} + + START_EXIT=$? + if [ "$START_EXIT" -ne 0 ]; then + rm -rf ${LIB} + return "$START_EXIT" + fi + + # run the actual server + if [ -z "$DRYRUN" ]; then + REDIR="$LOG" + else + REDIR=/dev/stdout + fi + $DRYRUN singularity run instance://${SERVER} \ + tritonserver $REPOARGS $VERBOSE >& ${REDIR} & + [ -z "$DRYRUN" ] || wait +} + +stop_docker(){ + # keep log (outside of tmp dir) + if [ -z "$DRYRUN" ]; then + if [ -n "$VERBOSE" ]; then $DOCKER logs ${SERVER} >& "${TOPDIR}/${LOG}"; fi + fi + + $DRYRUN $DOCKER stop ${SERVER} + $DRYRUN $DOCKER rm ${SERVER} +} + +stop_singularity(){ + $DRYRUN singularity instance stop ${SERVER} + + # move log out of tmp dir + if [ -z "$DRYRUN" ]; then + if [ -n "$VERBOSE" ]; then mv "$LOG" "$TOPDIR"; fi + fi +} + +test_docker(){ + # docker logs print to stderr + ${DOCKER} logs ${SERVER} |& grep "$STARTED_INDICATOR" +} + +test_singularity(){ + grep "$STARTED_INDICATOR" $LOG +} + +wait_server(){ + if [ -n "$DRYRUN" ]; then + return + fi + + COUNT=0 + while ! $WAIT_COND >& /dev/null; do + if [ "$COUNT" -gt "$WTIME" ]; then + echo "timed out waiting for server to start" + VERBOSE=true $STOP_FN + cleanup + exit 1 + else + COUNT=$(($COUNT + 1)) + sleep 1 + fi + done + + echo "server is ready!" + exit 0 +} + +cleanup(){ + if [ -z "$DRYRUN" ] && [ -n "$CLEANUP" ]; then + cd "$TOPDIR" + rm -rf "$TMPDIR" + fi +} + +list_models(){ + # make list of model repositories + for MODEL in ${MODELS[@]}; do + REPOS+=("$(dirname $MODEL)") + done + for ((r=0; r < ${#REPOS[@]}; r++)); do + # avoid issues w/ multiple levels of symlinks + REPOS[$r]=$(readlink -f ${REPOS[$r]}) + done + # make unique list + read -a REPOS <<< "$(printf "%s\n" "${REPOS[@]}" | sort -u | tr '\n' ' ')" +} + +if [ -n "$USEDOCKER" ]; then + if [ -n "$GPU" ]; then + EXTRA="--gpus all" + fi + START_FN=start_docker + WAIT_COND=test_docker + STOP_FN=stop_docker + PROG_NAME=Docker +else + if [ -n "$GPU" ]; then + EXTRA="--nv" + fi + START_FN=start_singularity + WAIT_COND=test_singularity + STOP_FN=stop_singularity + PROG_NAME=Singularity +fi + +if [ "$OP" == start ]; then + # handle cleaning up + if [ -n "$FORCE" ]; then + $STOP_FN + cleanup + elif [ -d "$TMPDIR" ]; then + echo "Error: this container may already exist (override with -f)" + exit 1 + fi + + list_models + + # make sure everything happens in tmp dir + if [ -z "$DRYRUN" ]; then + mkdir "$TMPDIR" + MKDIR_EXIT=$? + if [ "$MKDIR_EXIT" -ne 0 ]; then + echo "Could not create temp dir: $TMPDIR" + exit "$MKDIR_EXIT" + fi + cd "$TMPDIR" + fi + + START_EXIT="" + for ((counter=0; counter < ${RETRIES}; counter++)); do + $START_FN + START_EXIT=$? + if [ "$START_EXIT" -eq 0 ]; then + break + else + echo "Retrying after container issue..." + $STOP_FN + fi + done + if [ "$START_EXIT" -ne 0 ]; then + echo "Error from $PROG_NAME" + exit "$START_EXIT" + fi + wait_server +else + # check for tmp dir + if [ -z "$DRYRUN" ]; then + if [ -d "$TMPDIR" ]; then + cd "$TMPDIR" + else + echo "Error: attempt to stop unknown container $SERVER" + exit 1 + fi + fi + + $STOP_FN + cleanup +fi diff --git a/HeterogeneousCore/SonicTriton/test/README.md b/HeterogeneousCore/SonicTriton/test/README.md index 6d0d75960078a..15d519b707351 100644 --- a/HeterogeneousCore/SonicTriton/test/README.md +++ b/HeterogeneousCore/SonicTriton/test/README.md @@ -13,21 +13,13 @@ First, the relevant data should be downloaded from Nvidia: ./fetch_model.sh ``` -The server can be managed with the `triton` script (using Singularity with CPU by default): +Launch a local server (using Singularity with CPU by default): ``` -./triton start +triton -M $CMSSW_BASE/src/HeterogeneousCore/SonicTriton/data/models start [run test commands] -./triton stop +triton stop ``` -The script has the following options: -* `-d`: use Docker instead of Singularity -* `-g`: use GPU instead of CPU -* `-n`: name of container instance (default: triton_server_instance) -* `-v`: (verbose) start: activate server debugging info; stop: keep server logs -* `-w`: maximum time to wait for server to start (default: 60 seconds) -* `-h`: print help message and exit - ## Test commands Run the image test: diff --git a/HeterogeneousCore/SonicTriton/test/triton b/HeterogeneousCore/SonicTriton/test/triton deleted file mode 100755 index a02c63e3f64e3..0000000000000 --- a/HeterogeneousCore/SonicTriton/test/triton +++ /dev/null @@ -1,168 +0,0 @@ -#!/bin/bash - -# defaults -USEDOCKER="" -GPU="" -VERBOSE="" -WTIME=60 -SERVER=triton_server_instance - -usage() { - ECHO="echo -e" - $ECHO "triton [options] [start|stop]" - $ECHO - $ECHO "Options:" - $ECHO "-d \t use Docker instead of Singularity" - $ECHO "-g \t use GPU instead of CPU" - $ECHO "-n \t name of container instance (default: ${SERVER})" - $ECHO "-v \t (verbose) start: activate server debugging info; stop: keep server logs" - $ECHO "-w \t maximum time to wait for server to start (default: ${WTIME} seconds)" - $ECHO "-h \t print this message and exit" - $ECHO - $ECHO "Operations:" - $ECHO "start \t start server" - $ECHO "stop \t stop server" - exit $1 -} - -# check shm locations -SHM=/dev/shm -if [ -e /run/shm ]; then - SHM=/run/shm -fi - -while getopts "dgvhw:n:" opt; do - case "$opt" in - d) USEDOCKER=true - ;; - g) GPU=true - ;; - v) VERBOSE="--log-verbose=1 --log-error=1 --log-info=1" - ;; - h) usage 0 - ;; - w) WTIME="$OPTARG" - ;; - n) SERVER="$OPTARG" - ;; - esac -done - -shift $(($OPTIND - 1)) -OP=$1 - -if [ "$OP" != start ] && [ "$OP" != stop ]; then - usage 1 -fi - -DOCKER="sudo docker" -IMAGE=fastml/triton-torchgeo:20.09-py3-geometric -MODELS=${CMSSW_BASE}/src/HeterogeneousCore/SonicTriton/data/models -LOG=log_triton_server.log -LIB=lib -STARTED_INDICATOR="Started GRPCInferenceService" -EXTRA="" - -start_docker(){ - $DOCKER run -d --name ${SERVER} \ - --shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864 \ - -p8000:8000 -p8001:8001 -p8002:8002 $EXTRA \ - -v${MODELS}:/models \ - ${IMAGE} tritonserver --model-repository=/models $VERBOSE -} - -start_singularity(){ - # triton server image may need to modify contents of opt/tritonserver/lib/ - # but cvmfs is read-only - # -> make a writable local directory with the same contents - mkdir ${LIB} - ln -s /cvmfs/unpacked.cern.ch/registry.hub.docker.com/${IMAGE}/opt/tritonserver/lib/* ${LIB}/ - - # start instance - # need to bind /cvmfs for above symlinks to work inside container - singularity instance start \ - -B ${SHM}:/run/shm -B ${MODELS}:/models -B ${LIB}:/opt/tritonserver/lib -B /cvmfs $EXTRA \ - /cvmfs/unpacked.cern.ch/registry.hub.docker.com/${IMAGE} ${SERVER} - - START_EXIT=$? - if [ "$START_EXIT" -ne 0 ]; then - rm -rf ${LIB} - return "$START_EXIT" - fi - - # run the actual server - singularity run instance://${SERVER} \ - tritonserver --model-repository=/models $VERBOSE >& ${LOG} & -} - -stop_docker(){ - # keep log - if [ -n "$VERBOSE" ]; then $DOCKER logs ${SERVER} >& ${LOG}; fi - - $DOCKER stop ${SERVER} - $DOCKER rm ${SERVER} -} - -stop_singularity(){ - singularity instance stop ${SERVER} - - # cleanup - rm -rf ${LIB} - if [ -z "$VERBOSE" ]; then rm ${LOG}; fi -} - -test_docker(){ - # docker logs print to stderr - ${DOCKER} logs ${SERVER} |& grep "$STARTED_INDICATOR" -} - -test_singularity(){ - grep "$STARTED_INDICATOR" $LOG -} - -wait_server(){ - COUNT=0 - while ! $WAIT_COND >& /dev/null; do - if [ "$COUNT" -gt "$WTIME" ]; then - echo "timed out waiting for server to start" - VERBOSE=true $STOP_FN - exit 1 - else - COUNT=$(($COUNT + 1)) - sleep 1 - fi - done - - echo "server is ready!" - exit 0 -} - -if [ -n "$USEDOCKER" ]; then - if [ -n "$GPU" ]; then - EXTRA="--gpus all" - fi - START_FN=start_docker - WAIT_COND=test_docker - STOP_FN=stop_docker - PROG_NAME=Docker -else - if [ -n "$GPU" ]; then - EXTRA="--nv" - fi - START_FN=start_singularity - WAIT_COND=test_singularity - STOP_FN=stop_singularity - PROG_NAME=Singularity -fi - -if [ "$OP" == start ]; then - $START_FN - START_EXIT=$? - if [ "$START_EXIT" -ne 0 ]; then - echo "Error from $PROG_NAME" - exit "$START_EXIT" - fi - wait_server -else - $STOP_FN -fi From 59c5ac2a8c5040a9695d90de08dcd1ac4841548d Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Tue, 8 Dec 2020 16:28:44 -0600 Subject: [PATCH 02/31] move client construction into beginJob/beginStream --- HeterogeneousCore/SonicCore/README.md | 17 +++++--- .../SonicCore/interface/SonicAcquirer.h | 26 ++++++++---- .../SonicCore/interface/SonicClient.h | 3 +- .../SonicCore/interface/SonicClientBase.h | 5 +-- .../SonicCore/interface/SonicEDFilter.h | 14 +++---- .../SonicCore/interface/SonicEDProducer.h | 14 +++---- .../SonicCore/interface/SonicOneEDAnalyzer.h | 40 ++++++++++++------- .../SonicCore/src/SonicClientBase.cc | 19 ++++----- .../SonicCore/test/DummyClient.h | 4 +- .../SonicCore/test/SonicDummyFilter.cc | 4 +- .../SonicCore/test/SonicDummyOneAnalyzer.cc | 7 +--- .../SonicCore/test/SonicDummyProducer.cc | 4 +- .../SonicTriton/interface/TritonClient.h | 2 +- .../SonicTriton/src/TritonClient.cc | 8 +--- .../SonicTriton/test/TritonGraphProducer.cc | 8 ++-- .../SonicTriton/test/TritonImageProducer.cc | 6 +-- 16 files changed, 97 insertions(+), 84 deletions(-) diff --git a/HeterogeneousCore/SonicCore/README.md b/HeterogeneousCore/SonicCore/README.md index df877101e50e6..6ae698aeb6a9e 100644 --- a/HeterogeneousCore/SonicCore/README.md +++ b/HeterogeneousCore/SonicCore/README.md @@ -15,9 +15,8 @@ To implement a concrete derived producer class, the following skeleton can be us class MyProducer : public SonicEDProducer { public: - explicit MyProducer(edm::ParameterSet const& cfg) : SonicEDProducer(cfg) { - //for debugging - setDebugName("MyProducer"); + explicit MyProducer(edm::ParameterSet const& cfg) : SonicEDProducer(cfg, "MyProducer") { + //do any necessary operations } void acquire(edm::Event const& iEvent, edm::EventSetup const& iSetup, Input& iInput) override { //convert event data to client input format @@ -65,7 +64,7 @@ To add a new communication protocol for SONIC, follow these steps: 2. Set up the concrete client(s) that use the communication protocol in a new package in the `HeterogeneousCore` subsystem 3. Add a test producer (see above) to make sure it works -To implement a concrete client, the following skeleton can be used for the `.h` file, with the function implementations in an associated `.cc` file: +To implement a concrete client, the following skeleton can be used for the `.h` file: ```cpp #ifndef HeterogeneousCore_MyPackage_MyClient #define HeterogeneousCore_MyPackage_MyClient @@ -75,7 +74,7 @@ To implement a concrete client, the following skeleton can be used for the `.h` class MyClient : public SonicClient { public: - MyClient(const edm::ParameterSet& params); + MyClient(const edm::ParameterSet& params, const std::string& debugName); static void fillPSetDescription(edm::ParameterSetDescription& iDesc); @@ -86,6 +85,14 @@ protected: #endif ``` +The concrete client member function implementations, in an associated `.cc` file, should include the following: +```cpp +MyClient::MyClient(const edm::ParameterSet& params, const std::string& debugName) + : SonicClient(params, debugName, "MyClient") { + //do any necessary operations +} +``` + The `SonicClient` has three available modes: * `Sync`: synchronous call, blocks until the result is returned. * `Async`: asynchronous, non-blocking call. diff --git a/HeterogeneousCore/SonicCore/interface/SonicAcquirer.h b/HeterogeneousCore/SonicCore/interface/SonicAcquirer.h index 0b9021ffe571c..88917f5daac98 100644 --- a/HeterogeneousCore/SonicCore/interface/SonicAcquirer.h +++ b/HeterogeneousCore/SonicCore/interface/SonicAcquirer.h @@ -8,6 +8,8 @@ #include "HeterogeneousCore/SonicCore/interface/sonic_utils.h" #include +#include +#include template class SonicAcquirer : public Module { @@ -15,26 +17,34 @@ class SonicAcquirer : public Module { //typedef to simplify usage typedef typename Client::Input Input; //constructor - SonicAcquirer(edm::ParameterSet const& cfg) : client_(cfg.getParameter("Client")) {} + SonicAcquirer(edm::ParameterSet const& cfg, const std::string& debugName = "") + : clientPset_(cfg.getParameterSet("Client")), debugName_(debugName) {} //destructor ~SonicAcquirer() override = default; - //derived classes use a dedicated acquire() interface that incorporates client_.input() + //construct client at beginning of job + //in case client constructor depends on operations happening in derived module constructors + void beginStream(edm::StreamID) override { makeClient(); } + + //derived classes use a dedicated acquire() interface that incorporates client_->input() //(no need to interact with callback holder) void acquire(edm::Event const& iEvent, edm::EventSetup const& iSetup, edm::WaitingTaskWithArenaHolder holder) final { auto t0 = std::chrono::high_resolution_clock::now(); - acquire(iEvent, iSetup, client_.input()); - sonic_utils::printDebugTime(client_.debugName(), "acquire() time: ", t0); + acquire(iEvent, iSetup, client_->input()); + sonic_utils::printDebugTime(debugName_, "acquire() time: ", t0); t_dispatch_ = std::chrono::high_resolution_clock::now(); - client_.dispatch(holder); + client_->dispatch(holder); } virtual void acquire(edm::Event const& iEvent, edm::EventSetup const& iSetup, Input& iInput) = 0; protected: - //for debugging - void setDebugName(const std::string& debugName) { client_.setDebugName(debugName); } + //helper + void makeClient() { client_ = std::make_unique(clientPset_, debugName_); } + //members - Client client_; + edm::ParameterSet clientPset_; + std::unique_ptr client_; + std::string debugName_; std::chrono::time_point t_dispatch_; }; diff --git a/HeterogeneousCore/SonicCore/interface/SonicClient.h b/HeterogeneousCore/SonicCore/interface/SonicClient.h index a3bfd2bd7ed7d..00b26fedc523f 100644 --- a/HeterogeneousCore/SonicCore/interface/SonicClient.h +++ b/HeterogeneousCore/SonicCore/interface/SonicClient.h @@ -9,7 +9,8 @@ template class SonicClient : public SonicClientBase, public SonicClientTypes { public: //constructor - SonicClient(const edm::ParameterSet& params) : SonicClientBase(params), SonicClientTypes() {} + SonicClient(const edm::ParameterSet& params, const std::string& debugName, const std::string& clientName) + : SonicClientBase(params, debugName, clientName), SonicClientTypes() {} }; #endif diff --git a/HeterogeneousCore/SonicCore/interface/SonicClientBase.h b/HeterogeneousCore/SonicCore/interface/SonicClientBase.h index 6efc00d7e7dc6..40b9e1b5a2f08 100644 --- a/HeterogeneousCore/SonicCore/interface/SonicClientBase.h +++ b/HeterogeneousCore/SonicCore/interface/SonicClientBase.h @@ -19,12 +19,11 @@ enum class SonicMode { Sync = 1, Async = 2, PseudoAsync = 3 }; class SonicClientBase { public: //constructor - SonicClientBase(const edm::ParameterSet& params); + SonicClientBase(const edm::ParameterSet& params, const std::string& debugName, const std::string& clientName); //destructor virtual ~SonicClientBase() = default; - void setDebugName(const std::string& debugName); const std::string& debugName() const { return debugName_; } const std::string& clientName() const { return clientName_; } SonicMode mode() const { return mode_; } @@ -57,7 +56,7 @@ class SonicClientBase { std::optional holder_; //for logging/debugging - std::string clientName_, debugName_, fullDebugName_; + std::string debugName_, clientName_, fullDebugName_; std::chrono::time_point t0_; friend class SonicDispatcher; diff --git a/HeterogeneousCore/SonicCore/interface/SonicEDFilter.h b/HeterogeneousCore/SonicCore/interface/SonicEDFilter.h index 3747fd780a2b1..f7bf4033e7e25 100644 --- a/HeterogeneousCore/SonicCore/interface/SonicEDFilter.h +++ b/HeterogeneousCore/SonicCore/interface/SonicEDFilter.h @@ -16,22 +16,22 @@ class SonicEDFilter : public SonicAcquirer>(cfg) {} + SonicEDFilter(edm::ParameterSet const& cfg, const std::string& debugName) + : SonicAcquirer>(cfg, debugName) {} //destructor ~SonicEDFilter() override = default; - //derived classes use a dedicated produce() interface that incorporates client_.output() + //derived classes use a dedicated produce() interface that incorporates client_->output() bool filter(edm::Event& iEvent, edm::EventSetup const& iSetup) final { //measure time between acquire and produce - sonic_utils::printDebugTime(this->client_.debugName(), "dispatch() time: ", this->t_dispatch_); + sonic_utils::printDebugTime(this->debugName_, "dispatch() time: ", this->t_dispatch_); auto t0 = std::chrono::high_resolution_clock::now(); - bool result = filter(iEvent, iSetup, this->client_.output()); - sonic_utils::printDebugTime(this->client_.debugName(), "filter() time: ", t0); + bool result = filter(iEvent, iSetup, this->client_->output()); + sonic_utils::printDebugTime(this->debugName_, "filter() time: ", t0); //reset client data - this->client_.reset(); + this->client_->reset(); return result; } diff --git a/HeterogeneousCore/SonicCore/interface/SonicEDProducer.h b/HeterogeneousCore/SonicCore/interface/SonicEDProducer.h index f777b05257001..5e4e1ce719d71 100644 --- a/HeterogeneousCore/SonicCore/interface/SonicEDProducer.h +++ b/HeterogeneousCore/SonicCore/interface/SonicEDProducer.h @@ -16,22 +16,22 @@ class SonicEDProducer : public SonicAcquirer>(cfg) {} + SonicEDProducer(edm::ParameterSet const& cfg, const std::string& debugName) + : SonicAcquirer>(cfg, debugName) {} //destructor ~SonicEDProducer() override = default; - //derived classes use a dedicated produce() interface that incorporates client_.output() + //derived classes use a dedicated produce() interface that incorporates client_->output() void produce(edm::Event& iEvent, edm::EventSetup const& iSetup) final { //measure time between acquire and produce - sonic_utils::printDebugTime(this->client_.debugName(), "dispatch() time: ", this->t_dispatch_); + sonic_utils::printDebugTime(this->debugName_, "dispatch() time: ", this->t_dispatch_); auto t0 = std::chrono::high_resolution_clock::now(); - produce(iEvent, iSetup, this->client_.output()); - sonic_utils::printDebugTime(this->client_.debugName(), "produce() time: ", t0); + produce(iEvent, iSetup, this->client_->output()); + sonic_utils::printDebugTime(this->debugName_, "produce() time: ", t0); //reset client data - this->client_.reset(); + this->client_->reset(); } virtual void produce(edm::Event& iEvent, edm::EventSetup const& iSetup, Output const& iOutput) = 0; }; diff --git a/HeterogeneousCore/SonicCore/interface/SonicOneEDAnalyzer.h b/HeterogeneousCore/SonicCore/interface/SonicOneEDAnalyzer.h index ce4e8de3abcd9..92bf9bbc2f33e 100644 --- a/HeterogeneousCore/SonicCore/interface/SonicOneEDAnalyzer.h +++ b/HeterogeneousCore/SonicCore/interface/SonicOneEDAnalyzer.h @@ -21,43 +21,53 @@ class SonicOneEDAnalyzer : public edm::one::EDAnalyzer { typedef typename Client::Input Input; typedef typename Client::Output Output; //constructor - SonicOneEDAnalyzer(edm::ParameterSet const& cfg) : client_(cfg.getParameter("Client")) { + SonicOneEDAnalyzer(edm::ParameterSet const& cfg, const std::string& debugName) + : clientPset_(cfg.getParameterSet("Client")), debugName_(debugName) { //ExternalWork is not compatible with one modules, so Sync mode is enforced - if (client_.mode() != SonicMode::Sync) - throw cms::Exception("UnsupportedMode") << "SonicOneEDAnalyzer can only use Sync mode for clients"; + if (clientPset_.getParameter("mode") != "Sync") { + edm::LogWarning("ResetClientMode") << "Resetting client mode to Sync for SonicOneEDAnalyzer"; + clientPset_.addParameter("Mode", "Sync"); + } } //destructor ~SonicOneEDAnalyzer() override = default; - //derived classes still use a dedicated acquire() interface that incorporates client_.input() for consistency + //construct client at beginning of job + //in case client constructor depends on operations happening in derived module constructors + void beginJob() override { makeClient(); } + + //derived classes still use a dedicated acquire() interface that incorporates client_->input() for consistency virtual void acquire(edm::Event const& iEvent, edm::EventSetup const& iSetup, Input& iInput) = 0; - //derived classes use a dedicated analyze() interface that incorporates client_.output() + //derived classes use a dedicated analyze() interface that incorporates client_->output() void analyze(edm::Event const& iEvent, edm::EventSetup const& iSetup) final { auto t0 = std::chrono::high_resolution_clock::now(); - acquire(iEvent, iSetup, client_.input()); - sonic_utils::printDebugTime(client_.debugName(), "acquire() time: ", t0); + acquire(iEvent, iSetup, client_->input()); + sonic_utils::printDebugTime(debugName_, "acquire() time: ", t0); //pattern similar to ExternalWork, but blocking auto t1 = std::chrono::high_resolution_clock::now(); - client_.dispatch(); + client_->dispatch(); //measure time between acquire and produce - sonic_utils::printDebugTime(client_.debugName(), "dispatch() time: ", t1); + sonic_utils::printDebugTime(debugName_, "dispatch() time: ", t1); auto t2 = std::chrono::high_resolution_clock::now(); - analyze(iEvent, iSetup, client_.output()); - sonic_utils::printDebugTime(client_.debugName(), "analyze() time: ", t2); + analyze(iEvent, iSetup, client_->output()); + sonic_utils::printDebugTime(debugName_, "analyze() time: ", t2); //reset client data - client_.reset(); + client_->reset(); } virtual void analyze(edm::Event const& iEvent, edm::EventSetup const& iSetup, Output const& iOutput) = 0; protected: - //for debugging - void setDebugName(const std::string& debugName) { client_.setDebugName(debugName); } + //helper + void makeClient() { client_ = std::make_unique(clientPset_, debugName_); } + //members - Client client_; + edm::ParameterSet clientPset_; + std::unique_ptr client_; + std::string debugName_; }; #endif diff --git a/HeterogeneousCore/SonicCore/src/SonicClientBase.cc b/HeterogeneousCore/SonicCore/src/SonicClientBase.cc index 750e2990b0277..711f6022a06d6 100644 --- a/HeterogeneousCore/SonicCore/src/SonicClientBase.cc +++ b/HeterogeneousCore/SonicCore/src/SonicClientBase.cc @@ -2,8 +2,16 @@ #include "FWCore/Utilities/interface/Exception.h" #include "FWCore/ParameterSet/interface/allowedValues.h" -SonicClientBase::SonicClientBase(const edm::ParameterSet& params) - : allowedTries_(params.getUntrackedParameter("allowedTries", 0)) { +SonicClientBase::SonicClientBase(const edm::ParameterSet& params, + const std::string& debugName, + const std::string& clientName) + : allowedTries_(params.getUntrackedParameter("allowedTries", 0)), + debugName_(debugName), + clientName_(clientName), + fullDebugName_(debugName_) { + if (!clientName_.empty()) + fullDebugName_ += ":" + clientName_; + std::string modeName(params.getParameter("mode")); if (modeName == "Sync") mode_ = SonicMode::Sync; @@ -21,13 +29,6 @@ SonicClientBase::SonicClientBase(const edm::ParameterSet& params) dispatcher_ = std::make_unique(this); } -void SonicClientBase::setDebugName(const std::string& debugName) { - debugName_ = debugName; - fullDebugName_ = debugName_; - if (!clientName_.empty()) - fullDebugName_ += ":" + clientName_; -} - void SonicClientBase::start(edm::WaitingTaskWithArenaHolder holder) { start(); holder_ = std::move(holder); diff --git a/HeterogeneousCore/SonicCore/test/DummyClient.h b/HeterogeneousCore/SonicCore/test/DummyClient.h index 4bca9ea91eb0d..ccef888ad9f7d 100644 --- a/HeterogeneousCore/SonicCore/test/DummyClient.h +++ b/HeterogeneousCore/SonicCore/test/DummyClient.h @@ -12,8 +12,8 @@ class DummyClient : public SonicClient { public: //constructor - DummyClient(const edm::ParameterSet& params) - : SonicClient(params), + DummyClient(const edm::ParameterSet& params, const std::string& debugName) + : SonicClient(params, debugName, "DummyClient"), factor_(params.getParameter("factor")), wait_(params.getParameter("wait")), fails_(params.getParameter("fails")) {} diff --git a/HeterogeneousCore/SonicCore/test/SonicDummyFilter.cc b/HeterogeneousCore/SonicCore/test/SonicDummyFilter.cc index df40257f0b22b..386f51af90e46 100644 --- a/HeterogeneousCore/SonicCore/test/SonicDummyFilter.cc +++ b/HeterogeneousCore/SonicCore/test/SonicDummyFilter.cc @@ -10,9 +10,7 @@ namespace sonictest { class SonicDummyFilter : public SonicEDFilter { public: explicit SonicDummyFilter(edm::ParameterSet const& cfg) - : SonicEDFilter(cfg), input_(cfg.getParameter("input")) { - //for debugging - setDebugName("SonicDummyFilter"); + : SonicEDFilter(cfg, "SonicDummyFilter"), input_(cfg.getParameter("input")) { putToken_ = produces(); } diff --git a/HeterogeneousCore/SonicCore/test/SonicDummyOneAnalyzer.cc b/HeterogeneousCore/SonicCore/test/SonicDummyOneAnalyzer.cc index 10e402922e55f..399752820b2c4 100644 --- a/HeterogeneousCore/SonicCore/test/SonicDummyOneAnalyzer.cc +++ b/HeterogeneousCore/SonicCore/test/SonicDummyOneAnalyzer.cc @@ -12,12 +12,9 @@ namespace sonictest { class SonicDummyOneAnalyzer : public SonicOneEDAnalyzer { public: explicit SonicDummyOneAnalyzer(edm::ParameterSet const& cfg) - : SonicOneEDAnalyzer(cfg), + : SonicOneEDAnalyzer(cfg, "SonicDummyOneAnalyzer"), input_(cfg.getParameter("input")), - expected_(cfg.getParameter("expected")) { - //for debugging - setDebugName("SonicDummyOneAnalyzer"); - } + expected_(cfg.getParameter("expected")) {} void acquire(edm::Event const& iEvent, edm::EventSetup const& iSetup, Input& iInput) override { iInput = input_; } diff --git a/HeterogeneousCore/SonicCore/test/SonicDummyProducer.cc b/HeterogeneousCore/SonicCore/test/SonicDummyProducer.cc index 602a4afcf940f..e782ee15f8928 100644 --- a/HeterogeneousCore/SonicCore/test/SonicDummyProducer.cc +++ b/HeterogeneousCore/SonicCore/test/SonicDummyProducer.cc @@ -10,9 +10,7 @@ namespace sonictest { class SonicDummyProducer : public SonicEDProducer { public: explicit SonicDummyProducer(edm::ParameterSet const& cfg) - : SonicEDProducer(cfg), input_(cfg.getParameter("input")) { - //for debugging - setDebugName("SonicDummyProducer"); + : SonicEDProducer(cfg, "SonicDummyProducer"), input_(cfg.getParameter("input")) { putToken_ = produces(); } diff --git a/HeterogeneousCore/SonicTriton/interface/TritonClient.h b/HeterogeneousCore/SonicTriton/interface/TritonClient.h index 99ca5f8765fe7..ccda4a3c6ec6a 100644 --- a/HeterogeneousCore/SonicTriton/interface/TritonClient.h +++ b/HeterogeneousCore/SonicTriton/interface/TritonClient.h @@ -29,7 +29,7 @@ class TritonClient : public SonicClient { }; //constructor - TritonClient(const edm::ParameterSet& params); + TritonClient(const edm::ParameterSet& params, const std::string& debugName); //accessors unsigned batchSize() const { return batchSize_; } diff --git a/HeterogeneousCore/SonicTriton/src/TritonClient.cc b/HeterogeneousCore/SonicTriton/src/TritonClient.cc index 98380e6546f4d..d1d41be4b2b39 100644 --- a/HeterogeneousCore/SonicTriton/src/TritonClient.cc +++ b/HeterogeneousCore/SonicTriton/src/TritonClient.cc @@ -20,14 +20,10 @@ namespace nic = ni::client; //based on https://github.com/triton-inference-server/server/blob/v2.3.0/src/clients/c++/examples/simple_grpc_async_infer_client.cc //and https://github.com/triton-inference-server/server/blob/v2.3.0/src/clients/c++/perf_client/perf_client.cc -TritonClient::TritonClient(const edm::ParameterSet& params) - : SonicClient(params), +TritonClient::TritonClient(const edm::ParameterSet& params, const std::string& debugName) + : SonicClient(params, debugName, "TritonClient"), verbose_(params.getUntrackedParameter("verbose")), options_(params.getParameter("modelName")) { - clientName_ = "TritonClient"; - //will get overwritten later, just used in constructor - fullDebugName_ = clientName_; - //connect to the server //TODO: add SSL options std::string url(params.getUntrackedParameter("address") + ":" + diff --git a/HeterogeneousCore/SonicTriton/test/TritonGraphProducer.cc b/HeterogeneousCore/SonicTriton/test/TritonGraphProducer.cc index 952a6e0bd08b1..fbb2e1a09b151 100644 --- a/HeterogeneousCore/SonicTriton/test/TritonGraphProducer.cc +++ b/HeterogeneousCore/SonicTriton/test/TritonGraphProducer.cc @@ -14,10 +14,8 @@ class TritonGraphProducer : public SonicEDProducer { public: - explicit TritonGraphProducer(edm::ParameterSet const& cfg) : SonicEDProducer(cfg) { - //for debugging - setDebugName("TritonGraphProducer"); - } + explicit TritonGraphProducer(edm::ParameterSet const& cfg) + : SonicEDProducer(cfg, "TritonGraphProducer") {} void acquire(edm::Event const& iEvent, edm::EventSetup const& iSetup, Input& iInput) override { //get event-based seed for RNG unsigned int runNum_uint = static_cast(iEvent.id().run()); @@ -72,7 +70,7 @@ class TritonGraphProducer : public SonicEDProducer { } msg << "\n"; } - edm::LogInfo(client_.debugName()) << msg.str(); + edm::LogInfo(debugName_) << msg.str(); } ~TritonGraphProducer() override = default; diff --git a/HeterogeneousCore/SonicTriton/test/TritonImageProducer.cc b/HeterogeneousCore/SonicTriton/test/TritonImageProducer.cc index 2617d28eedb2c..30f7e46368b3f 100644 --- a/HeterogeneousCore/SonicTriton/test/TritonImageProducer.cc +++ b/HeterogeneousCore/SonicTriton/test/TritonImageProducer.cc @@ -14,9 +14,7 @@ class TritonImageProducer : public SonicEDProducer { public: explicit TritonImageProducer(edm::ParameterSet const& cfg) - : SonicEDProducer(cfg), topN_(cfg.getParameter("topN")) { - //for debugging - setDebugName("TritonImageProducer"); + : SonicEDProducer(cfg, "TritonImageProducer"), topN_(cfg.getParameter("topN")) { //load score list std::string imageListFile(cfg.getParameter("imageList")); std::ifstream ifile(imageListFile); @@ -76,7 +74,7 @@ class TritonImageProducer : public SonicEDProducer { if (counter >= topN_) break; } - edm::LogInfo(client_.debugName()) << msg.str(); + edm::LogInfo(debugName_) << msg.str(); } } From f4ffb8ebb03e5a58d80746bb4443e0ce67a295bd Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Wed, 9 Dec 2020 18:10:23 -0600 Subject: [PATCH 03/31] introduce TritonService to keep track of servers and models --- .../SonicTriton/interface/TritonService.h | 80 +++++++++++++++++++ .../SonicTriton/plugins/BuildFile.xml | 5 ++ .../SonicTriton/plugins/TritonService.cc | 5 ++ .../SonicTriton/src/TritonClient.cc | 16 ++-- .../SonicTriton/src/TritonService.cc | 75 +++++++++++++++++ .../SonicTriton/test/TritonImageProducer.cc | 9 ++- .../SonicTriton/test/tritonTest_cfg.py | 16 +++- 7 files changed, 192 insertions(+), 14 deletions(-) create mode 100644 HeterogeneousCore/SonicTriton/interface/TritonService.h create mode 100644 HeterogeneousCore/SonicTriton/plugins/BuildFile.xml create mode 100644 HeterogeneousCore/SonicTriton/plugins/TritonService.cc create mode 100644 HeterogeneousCore/SonicTriton/src/TritonService.cc diff --git a/HeterogeneousCore/SonicTriton/interface/TritonService.h b/HeterogeneousCore/SonicTriton/interface/TritonService.h new file mode 100644 index 0000000000000..8f22b096fc848 --- /dev/null +++ b/HeterogeneousCore/SonicTriton/interface/TritonService.h @@ -0,0 +1,80 @@ +#ifndef HeterogeneousCore_SonicTriton_TritonService +#define HeterogeneousCore_SonicTriton_TritonService + +#include "FWCore/ParameterSet/interface/ParameterSet.h" + +#include +#include +#include +#include + +//forward declarations +namespace edm { + class ActivityRegistry; + class ConfigurationDescriptions; +} + +class TritonService { +public: + //classes and defs + struct Server { + Server(const edm::ParameterSet& pset) : name(pset.getUntrackedParameter("name")), url(pset.getUntrackedParameter("address") + ":" + std::to_string(pset.getUntrackedParameter("port"))) {} + Server(const std::string& name_) : name(name_), url("") {} + + struct Hash { + size_t operator()(const Server& obj) const { + return hashObj(obj.name); + } + std::hash hashObj; + }; + + struct Equal { + bool operator()(const Server& lhs, const Server& rhs) const { + return lhs.name == rhs.name; + } + }; + + //members + std::string name; + std::string url; + mutable std::unordered_set models; + }; + struct Model { + Model(const std::string& name_) : name(name_) {} + + struct Hash { + size_t operator()(const Model& obj) const { + return hashObj(obj.name); + } + std::hash hashObj; + }; + + struct Equal { + bool operator()(const Model& lhs, const Model& rhs) const { + return lhs.name == rhs.name; + } + }; + + //members + std::string name; + mutable std::unordered_set servers; + }; + + TritonService(const edm::ParameterSet& pset, edm::ActivityRegistry& areg); + + //accessors + std::string serverAddress(const std::string& model, const std::string& preferred="") const; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + +private: + //to search without full object + auto findServer(const std::string& name) const { return servers_.find(Server(name)); } + auto findModel(const std::string& name) const { return models_.find(Model(name)); } + + //this is a lazy and inefficient many:many map + std::unordered_set servers_; + std::unordered_set models_; +}; + +#endif diff --git a/HeterogeneousCore/SonicTriton/plugins/BuildFile.xml b/HeterogeneousCore/SonicTriton/plugins/BuildFile.xml new file mode 100644 index 0000000000000..f5eea4e8cf125 --- /dev/null +++ b/HeterogeneousCore/SonicTriton/plugins/BuildFile.xml @@ -0,0 +1,5 @@ + + + + + diff --git a/HeterogeneousCore/SonicTriton/plugins/TritonService.cc b/HeterogeneousCore/SonicTriton/plugins/TritonService.cc new file mode 100644 index 0000000000000..963e6eee3c56e --- /dev/null +++ b/HeterogeneousCore/SonicTriton/plugins/TritonService.cc @@ -0,0 +1,5 @@ +#include "FWCore/ServiceRegistry/interface/ServiceMaker.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "HeterogeneousCore/SonicTriton/interface/TritonService.h" + +DEFINE_FWK_SERVICE(TritonService); diff --git a/HeterogeneousCore/SonicTriton/src/TritonClient.cc b/HeterogeneousCore/SonicTriton/src/TritonClient.cc index d1d41be4b2b39..e5d4d81290c62 100644 --- a/HeterogeneousCore/SonicTriton/src/TritonClient.cc +++ b/HeterogeneousCore/SonicTriton/src/TritonClient.cc @@ -1,6 +1,8 @@ #include "FWCore/MessageLogger/interface/MessageLogger.h" +#include "FWCore/ServiceRegistry/interface/Service.h" #include "FWCore/Utilities/interface/Exception.h" #include "HeterogeneousCore/SonicTriton/interface/TritonClient.h" +#include "HeterogeneousCore/SonicTriton/interface/TritonService.h" #include "HeterogeneousCore/SonicTriton/interface/triton_utils.h" #include "grpc_client.h" @@ -24,10 +26,12 @@ TritonClient::TritonClient(const edm::ParameterSet& params, const std::string& d : SonicClient(params, debugName, "TritonClient"), verbose_(params.getUntrackedParameter("verbose")), options_(params.getParameter("modelName")) { + //get appropriate server for this model + edm::Service ts; + const auto& url = ts->serverAddress(options_.model_name_,params.getUntrackedParameter("preferredServer")); + //connect to the server //TODO: add SSL options - std::string url(params.getUntrackedParameter("address") + ":" + - std::to_string(params.getUntrackedParameter("port"))); triton_utils::throwIfError(nic::InferenceServerGrpcClient::Create(&client_, url, false), "TritonClient(): unable to create inference context"); @@ -123,8 +127,8 @@ TritonClient::TritonClient(const edm::ParameterSet& params, const std::string& d throw cms::Exception("MissingOutput") << "Some requested outputs were not available on the server: " << triton_utils::printColl(s_outputs); - //check requested batch size and propagate to inputs and outputs - setBatchSize(params.getUntrackedParameter("batchSize")); + //propagate batch size to inputs and outputs + setBatchSize(1); //print model info std::stringstream model_msg; @@ -337,9 +341,7 @@ void TritonClient::fillPSetDescription(edm::ParameterSetDescription& iDesc) { descClient.add("modelName"); descClient.add("modelVersion", ""); //server parameters should not affect the physics results - descClient.addUntracked("batchSize"); - descClient.addUntracked("address"); - descClient.addUntracked("port"); + descClient.addUntracked("preferredServer",""); descClient.addUntracked("timeout"); descClient.addUntracked("verbose", false); descClient.addUntracked>("outputs", {}); diff --git a/HeterogeneousCore/SonicTriton/src/TritonService.cc b/HeterogeneousCore/SonicTriton/src/TritonService.cc new file mode 100644 index 0000000000000..ecde1cb14e655 --- /dev/null +++ b/HeterogeneousCore/SonicTriton/src/TritonService.cc @@ -0,0 +1,75 @@ +#include "HeterogeneousCore/SonicTriton/interface/TritonService.h" +#include "HeterogeneousCore/SonicTriton/interface/triton_utils.h" + +#include "FWCore/MessageLogger/interface/MessageLogger.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/ServiceRegistry/interface/ActivityRegistry.h" +#include "FWCore/Utilities/interface/Exception.h" + +#include "grpc_client.h" +#include "grpc_service.pb.h" + +namespace ni = nvidia::inferenceserver; +namespace nic = ni::client; + +TritonService::TritonService(const edm::ParameterSet& pset, edm::ActivityRegistry& areg) { + //loop over input servers: check which models they have + for(const auto& serverPset : pset.getUntrackedParameterSetVector("servers")){ + Server tmp(serverPset); + //ensure uniqueness + auto sit = servers_.find(tmp); + if (sit!=servers_.end()) + throw cms::Exception("DuplicateServer") << "Not allowed to specify more than one server with same name (" << tmp.name << ")"; + + std::unique_ptr client; + triton_utils::throwIfError(nic::InferenceServerGrpcClient::Create(&client, tmp.url, false), "TritonService(): unable to create inference context for "+tmp.name+" ("+tmp.url+")"); + + inference::RepositoryIndexResponse repoIndexResponse; + triton_utils::throwIfError(client->ModelRepositoryIndex(&repoIndexResponse), "TritonService(): unable to get repository index for "+tmp.name+" ("+tmp.url+")"); + + //servers keep track of models and vice versa + for(const auto& modelIndex : repoIndexResponse.models()){ + const auto& modelName = modelIndex.name(); + auto mit = findModel(modelName); + if(mit==models_.end()) + mit = models_.emplace(modelName).first; + mit->servers.insert(tmp.name); + tmp.models.insert(modelName); + } + servers_.insert(tmp); + } +} + +std::string TritonService::serverAddress(const std::string& model, const std::string& preferred) const { + auto mit = findModel(model); + if(mit==models_.end()) + throw cms::Exception("MissingModel") << "There are no servers that provide model " << model; + + const auto& modelServers = mit->servers; + + if(!preferred.empty()){ + auto sit = modelServers.find(preferred); + //todo: add a "strict" parameter to stop execution if preferred server isn't found? + if(sit==modelServers.end()) + edm::LogWarning("PreferredServer") << "Preferred server " << preferred << " for model " << model << " not available, will choose another server"; + else + return findServer(preferred)->url; + } + + //todo: use some algorithm to select server rather than just picking arbitrarily + return findServer(*modelServers.begin())->url; +} + +void TritonService::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + + edm::ParameterSetDescription validator; + validator.addUntracked("name"); + validator.addUntracked("address"); + validator.addUntracked("port"); + + desc.addVPSetUntracked("servers", validator); + + descriptions.addWithDefaultLabel(desc); +} diff --git a/HeterogeneousCore/SonicTriton/test/TritonImageProducer.cc b/HeterogeneousCore/SonicTriton/test/TritonImageProducer.cc index 30f7e46368b3f..d0ca9fbadd983 100644 --- a/HeterogeneousCore/SonicTriton/test/TritonImageProducer.cc +++ b/HeterogeneousCore/SonicTriton/test/TritonImageProducer.cc @@ -14,7 +14,7 @@ class TritonImageProducer : public SonicEDProducer { public: explicit TritonImageProducer(edm::ParameterSet const& cfg) - : SonicEDProducer(cfg, "TritonImageProducer"), topN_(cfg.getParameter("topN")) { + : SonicEDProducer(cfg, "TritonImageProducer"), batchSize_(cfg.getParameter("batchSize")), topN_(cfg.getParameter("topN")) { //load score list std::string imageListFile(cfg.getParameter("imageList")); std::ifstream ifile(imageListFile); @@ -28,12 +28,13 @@ class TritonImageProducer : public SonicEDProducer { } } void acquire(edm::Event const& iEvent, edm::EventSetup const& iSetup, Input& iInput) override { + client_->setBatchSize(batchSize_); // create an npix x npix x ncol image w/ arbitrary color value // model only has one input, so just pick begin() auto& input1 = iInput.begin()->second; auto data1 = std::make_shared>(); - data1->reserve(input1.batchSize()); - for (unsigned i = 0; i < input1.batchSize(); ++i) { + data1->reserve(batchSize_); + for (unsigned i = 0; i < batchSize_; ++i) { data1->emplace_back(input1.sizeDims(), 0.5f); } // convert to server format @@ -48,6 +49,7 @@ class TritonImageProducer : public SonicEDProducer { static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) { edm::ParameterSetDescription desc; TritonClient::fillPSetDescription(desc); + desc.add("batchSize", 1); desc.add("topN", 5); desc.add("imageList"); //to ensure distinct cfi names @@ -78,6 +80,7 @@ class TritonImageProducer : public SonicEDProducer { } } + unsigned batchSize_; unsigned topN_; std::vector imageList_; }; diff --git a/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py b/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py index 65ea8ca4a51ec..91415e29119f2 100644 --- a/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py +++ b/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py @@ -9,7 +9,6 @@ options.register("params", "", VarParsing.multiplicity.singleton, VarParsing.varType.string) options.register("threads", 1, VarParsing.multiplicity.singleton, VarParsing.varType.int) options.register("streams", 0, VarParsing.multiplicity.singleton, VarParsing.varType.int) -options.register("batchSize", 1, VarParsing.multiplicity.singleton, VarParsing.varType.int) options.register("producer", "TritonImageProducer", VarParsing.multiplicity.singleton, VarParsing.varType.string) options.register("modelName","resnet50_netdef", VarParsing.multiplicity.singleton, VarParsing.varType.string) options.register("mode","PseudoAsync", VarParsing.multiplicity.singleton, VarParsing.varType.string) @@ -38,12 +37,20 @@ process.source = cms.Source("EmptySource") +process.TritonService = cms.Service("TritonService", + servers = cms.untracked.VPSet( + cms.PSet( + name = cms.untracked.string("local"), + address = cms.untracked.string(options.address), + port = cms.untracked.uint32(options.port), + ) + ) +) + process.TritonProducer = cms.EDProducer(options.producer, Client = cms.PSet( mode = cms.string(options.mode), - batchSize = cms.untracked.uint32(options.batchSize), - address = cms.untracked.string(options.address), - port = cms.untracked.uint32(options.port), + preferredServer = cms.untracked.string(""), timeout = cms.untracked.uint32(options.timeout), modelName = cms.string(models[options.producer]), modelVersion = cms.string(""), @@ -52,6 +59,7 @@ ) ) if options.producer=="TritonImageProducer": + process.TritonProducer.batchSize = cms.uint32(1) process.TritonProducer.topN = cms.uint32(5) process.TritonProducer.imageList = cms.string("../data/models/resnet50_netdef/resnet50_labels.txt") From 3983d32ff3773f2ab351341aacea6f5701373995 Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Thu, 10 Dec 2020 14:06:19 -0600 Subject: [PATCH 04/31] keep list of unserved models and launch fallback server if enabled --- .../SonicCore/interface/SonicClientBase.h | 2 + .../SonicCore/src/SonicClientBase.cc | 12 +- HeterogeneousCore/SonicTriton/BuildFile.xml | 1 + .../SonicTriton/interface/TritonDummyCache.h | 20 +++ .../SonicTriton/interface/TritonEDFilter.h | 31 ++++ .../SonicTriton/interface/TritonEDProducer.h | 31 ++++ .../interface/TritonOneEDAnalyzer.h | 19 +++ .../SonicTriton/interface/TritonService.h | 38 ++++- HeterogeneousCore/SonicTriton/scripts/triton | 6 +- .../SonicTriton/src/TritonClient.cc | 8 +- .../SonicTriton/src/TritonService.cc | 146 +++++++++++++++++- .../SonicTriton/test/TritonGraphProducer.cc | 7 +- .../SonicTriton/test/TritonImageProducer.cc | 7 +- .../SonicTriton/test/tritonTest_cfg.py | 20 ++- 14 files changed, 317 insertions(+), 31 deletions(-) create mode 100644 HeterogeneousCore/SonicTriton/interface/TritonDummyCache.h create mode 100644 HeterogeneousCore/SonicTriton/interface/TritonEDFilter.h create mode 100644 HeterogeneousCore/SonicTriton/interface/TritonEDProducer.h create mode 100644 HeterogeneousCore/SonicTriton/interface/TritonOneEDAnalyzer.h diff --git a/HeterogeneousCore/SonicCore/interface/SonicClientBase.h b/HeterogeneousCore/SonicCore/interface/SonicClientBase.h index 40b9e1b5a2f08..3af1b574a4d54 100644 --- a/HeterogeneousCore/SonicCore/interface/SonicClientBase.h +++ b/HeterogeneousCore/SonicCore/interface/SonicClientBase.h @@ -41,6 +41,8 @@ class SonicClientBase { static void fillBasePSetDescription(edm::ParameterSetDescription& desc, bool allowRetry = true); protected: + void setMode(SonicMode mode); + virtual void evaluate() = 0; void start(edm::WaitingTaskWithArenaHolder holder); diff --git a/HeterogeneousCore/SonicCore/src/SonicClientBase.cc b/HeterogeneousCore/SonicCore/src/SonicClientBase.cc index 711f6022a06d6..f91359e4a4816 100644 --- a/HeterogeneousCore/SonicCore/src/SonicClientBase.cc +++ b/HeterogeneousCore/SonicCore/src/SonicClientBase.cc @@ -14,14 +14,20 @@ SonicClientBase::SonicClientBase(const edm::ParameterSet& params, std::string modeName(params.getParameter("mode")); if (modeName == "Sync") - mode_ = SonicMode::Sync; + setMode(SonicMode::Sync); else if (modeName == "Async") - mode_ = SonicMode::Async; + setMode(SonicMode::Async); else if (modeName == "PseudoAsync") - mode_ = SonicMode::PseudoAsync; + setMode(SonicMode::PseudoAsync); else throw cms::Exception("Configuration") << "Unknown mode for SonicClient: " << modeName; +} + +void SonicClientBase::setMode(SonicMode mode) { + if (mode_==mode) return; + mode_ = mode; + //get correct dispatcher for mode if (mode_ == SonicMode::Sync or mode_ == SonicMode::Async) dispatcher_ = std::make_unique(this); diff --git a/HeterogeneousCore/SonicTriton/BuildFile.xml b/HeterogeneousCore/SonicTriton/BuildFile.xml index b574f395f4d12..555f9930704bf 100644 --- a/HeterogeneousCore/SonicTriton/BuildFile.xml +++ b/HeterogeneousCore/SonicTriton/BuildFile.xml @@ -4,6 +4,7 @@ + diff --git a/HeterogeneousCore/SonicTriton/interface/TritonDummyCache.h b/HeterogeneousCore/SonicTriton/interface/TritonDummyCache.h new file mode 100644 index 0000000000000..271526c953fd5 --- /dev/null +++ b/HeterogeneousCore/SonicTriton/interface/TritonDummyCache.h @@ -0,0 +1,20 @@ +#ifndef HeterogeneousCore_SonicTriton_TritonDummyCache +#define HeterogeneousCore_SonicTriton_TritonDummyCache + +struct TritonDummyCache {}; + +//Triton modules want to call initializeGlobalCache, but don't want GlobalCache pointer in constructor +//-> override framework function (can't partial specialize function templates) +namespace edm { + class ParameterSet; + namespace stream { + namespace impl { + template + T* makeStreamModule(edm::ParameterSet const& iPSet, const TritonDummyCache*) { + return new T(iPSet); + } + } + } +} + +#endif diff --git a/HeterogeneousCore/SonicTriton/interface/TritonEDFilter.h b/HeterogeneousCore/SonicTriton/interface/TritonEDFilter.h new file mode 100644 index 0000000000000..5d1f786ec2f47 --- /dev/null +++ b/HeterogeneousCore/SonicTriton/interface/TritonEDFilter.h @@ -0,0 +1,31 @@ +#ifndef HeterogeneousCore_SonicTriton_TritonEDFilter +#define HeterogeneousCore_SonicTriton_TritonEDFilter + +//TritonDummyCache include comes first for overload resolution +#include "HeterogeneousCore/SonicTriton/interface/TritonDummyCache.h" +#include "HeterogeneousCore/SonicCore/interface/SonicEDFilter.h" +#include "HeterogeneousCore/SonicTriton/interface/TritonClient.h" +#include "FWCore/ServiceRegistry/interface/Service.h" +#include "HeterogeneousCore/SonicTriton/interface/TritonService.h" + +//inherited classes that use a non-default GlobalCache should be sure to call the parent initializeGlobalCache() +template +class TritonEDFilterT : public SonicEDFilter, Capabilities...> { +public: + TritonEDFilterT(edm::ParameterSet const& cfg, const std::string& debugName) : SonicEDFilter, Capabilities...>(cfg, debugName) {} + + //use this function to avoid calling TritonService functions Nstreams times + static std::unique_ptr initializeGlobalCache(edm::ParameterSet const& pset) { + edm::Service ts; + const auto& clientPset = pset.getParameterSet("Client"); + ts->addModel(clientPset.getParameter("modelName"), clientPset.getParameter("modelConfigPath").fullPath()); + return nullptr; + } + + static void globalEndJob(G*) {} +}; + +template +using TritonEDFilter = TritonEDFilterT; + +#endif diff --git a/HeterogeneousCore/SonicTriton/interface/TritonEDProducer.h b/HeterogeneousCore/SonicTriton/interface/TritonEDProducer.h new file mode 100644 index 0000000000000..2ccb859f06f55 --- /dev/null +++ b/HeterogeneousCore/SonicTriton/interface/TritonEDProducer.h @@ -0,0 +1,31 @@ +#ifndef HeterogeneousCore_SonicTriton_TritonEDProducer +#define HeterogeneousCore_SonicTriton_TritonEDProducer + +//TritonDummyCache include comes first for overload resolution +#include "HeterogeneousCore/SonicTriton/interface/TritonDummyCache.h" +#include "HeterogeneousCore/SonicCore/interface/SonicEDProducer.h" +#include "HeterogeneousCore/SonicTriton/interface/TritonClient.h" +#include "FWCore/ServiceRegistry/interface/Service.h" +#include "HeterogeneousCore/SonicTriton/interface/TritonService.h" + +//inherited classes that use a non-default GlobalCache should be sure to call the parent initializeGlobalCache() +template +class TritonEDProducerT : public SonicEDProducer, Capabilities...> { +public: + TritonEDProducerT(edm::ParameterSet const& cfg, const std::string& debugName) : SonicEDProducer, Capabilities...>(cfg, debugName) {} + + //use this function to avoid calling TritonService functions Nstreams times + static std::unique_ptr initializeGlobalCache(edm::ParameterSet const& pset) { + edm::Service ts; + const auto& clientPset = pset.getParameterSet("Client"); + ts->addModel(clientPset.getParameter("modelName"), clientPset.getParameter("modelConfigPath").fullPath()); + return nullptr; + } + + static void globalEndJob(G*) {} +}; + +template +using TritonEDProducer = TritonEDProducerT; + +#endif diff --git a/HeterogeneousCore/SonicTriton/interface/TritonOneEDAnalyzer.h b/HeterogeneousCore/SonicTriton/interface/TritonOneEDAnalyzer.h new file mode 100644 index 0000000000000..86275e9c6d89c --- /dev/null +++ b/HeterogeneousCore/SonicTriton/interface/TritonOneEDAnalyzer.h @@ -0,0 +1,19 @@ +#ifndef HeterogeneousCore_SonicTriton_TritonOneEDAnalyzer +#define HeterogeneousCore_SonicTriton_TritonOneEDAnalyzer + +#include "HeterogeneousCore/SonicCore/interface/SonicOneEDAnalyzer.h" +#include "HeterogeneousCore/SonicTriton/interface/TritonClient.h" +#include "FWCore/ServiceRegistry/interface/Service.h" +#include "HeterogeneousCore/SonicTriton/interface/TritonService.h" + +template +class TritonOneEDAnalyzer : public SonicOneEDAnalyzer { +public: + TritonOneEDAnalyzer(edm::ParameterSet const& cfg, const std::string& debugName) : SonicOneEDAnalyzer(cfg, debugName) { + edm::Service ts; + const auto& clientPset = pset.getParameterSet("Client"); + ts->addModel(clientPset.getParameter("modelName"), clientPset.getParameter("modelConfigPath").fullPath()); + } +}; + +#endif diff --git a/HeterogeneousCore/SonicTriton/interface/TritonService.h b/HeterogeneousCore/SonicTriton/interface/TritonService.h index 8f22b096fc848..b7ec71679a306 100644 --- a/HeterogeneousCore/SonicTriton/interface/TritonService.h +++ b/HeterogeneousCore/SonicTriton/interface/TritonService.h @@ -3,23 +3,40 @@ #include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "tbb/concurrent_unordered_set.h" + #include #include #include #include +#include //forward declarations namespace edm { class ActivityRegistry; class ConfigurationDescriptions; + class PathsAndConsumesOfModulesBase; + class ProcessContext; } class TritonService { public: //classes and defs + struct FallbackOpts { + FallbackOpts(const edm::ParameterSet& pset) : enable(pset.getUntrackedParameter("enable")), verbose(pset.getUntrackedParameter("verbose")), useDocker(pset.getUntrackedParameter("useDocker")), useGPU(pset.getUntrackedParameter("useGPU")), retries(pset.getUntrackedParameter("retries")), wait(pset.getUntrackedParameter("wait")), instanceName(pset.getUntrackedParameter("instanceName")), tempDir(pset.getUntrackedParameter("tempDir")) {} + + bool enable; + bool verbose; + bool useDocker; + bool useGPU; + int retries; + int wait; + std::string instanceName; + std::string tempDir; + }; struct Server { - Server(const edm::ParameterSet& pset) : name(pset.getUntrackedParameter("name")), url(pset.getUntrackedParameter("address") + ":" + std::to_string(pset.getUntrackedParameter("port"))) {} - Server(const std::string& name_) : name(name_), url("") {} + Server(const edm::ParameterSet& pset) : name(pset.getUntrackedParameter("name")), url(pset.getUntrackedParameter("address") + ":" + std::to_string(pset.getUntrackedParameter("port"))), isFallback(name==fallbackName) {} + Server(const std::string& name_, const std::string& url_="") : name(name_), url(url_), isFallback(name==fallbackName) {} struct Hash { size_t operator()(const Server& obj) const { @@ -37,10 +54,13 @@ class TritonService { //members std::string name; std::string url; + bool isFallback; mutable std::unordered_set models; + static const std::string fallbackName; + static const std::string fallbackUrl; }; struct Model { - Model(const std::string& name_) : name(name_) {} + Model(const std::string& name_, const std::string& path_="") : name(name_), path(path_) {} struct Hash { size_t operator()(const Model& obj) const { @@ -57,21 +77,31 @@ class TritonService { //members std::string name; + std::string path; mutable std::unordered_set servers; }; TritonService(const edm::ParameterSet& pset, edm::ActivityRegistry& areg); + ~TritonService(); //accessors - std::string serverAddress(const std::string& model, const std::string& preferred="") const; + void addModel(const std::string& modelName, const std::string& path); + std::pair serverAddress(const std::string& model, const std::string& preferred="") const; static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); private: + void preBeginJob(edm::PathsAndConsumesOfModulesBase const&, edm::ProcessContext const&); + //to search without full object auto findServer(const std::string& name) const { return servers_.find(Server(name)); } auto findModel(const std::string& name) const { return models_.find(Model(name)); } + bool verbose_; + FallbackOpts fallbackOpts_; + bool startedFallback_; + //concurrent data type is used because addModel() might be called by multiple threads + tbb::concurrent_unordered_set unservedModels_; //this is a lazy and inefficient many:many map std::unordered_set servers_; std::unordered_set models_; diff --git a/HeterogeneousCore/SonicTriton/scripts/triton b/HeterogeneousCore/SonicTriton/scripts/triton index e527c485bc10b..2e5d3bfdb9995 100755 --- a/HeterogeneousCore/SonicTriton/scripts/triton +++ b/HeterogeneousCore/SonicTriton/scripts/triton @@ -216,7 +216,11 @@ cleanup(){ list_models(){ # make list of model repositories for MODEL in ${MODELS[@]}; do - REPOS+=("$(dirname $MODEL)") + # check if file was provided rather than directory + if [ -f "$MODEL" ]; then + MODEL="$(dirname "$MODEL")" + fi + REPOS+=("$(dirname "$MODEL")") done for ((r=0; r < ${#REPOS[@]}; r++)); do # avoid issues w/ multiple levels of symlinks diff --git a/HeterogeneousCore/SonicTriton/src/TritonClient.cc b/HeterogeneousCore/SonicTriton/src/TritonClient.cc index e5d4d81290c62..c0ee5c1d546c3 100644 --- a/HeterogeneousCore/SonicTriton/src/TritonClient.cc +++ b/HeterogeneousCore/SonicTriton/src/TritonClient.cc @@ -1,4 +1,5 @@ #include "FWCore/MessageLogger/interface/MessageLogger.h" +#include "FWCore/ParameterSet/interface/FileInPath.h" #include "FWCore/ServiceRegistry/interface/Service.h" #include "FWCore/Utilities/interface/Exception.h" #include "HeterogeneousCore/SonicTriton/interface/TritonClient.h" @@ -28,7 +29,11 @@ TritonClient::TritonClient(const edm::ParameterSet& params, const std::string& d options_(params.getParameter("modelName")) { //get appropriate server for this model edm::Service ts; - const auto& url = ts->serverAddress(options_.model_name_,params.getUntrackedParameter("preferredServer")); + const auto& [url, isFallbackCPU] = ts->serverAddress(options_.model_name_,params.getUntrackedParameter("preferredServer")); + //enforce sync mode for fallback CPU server to avoid contention + //todo: could enforce async mode otherwise (unless mode was specified by user?) + if(isFallbackCPU) + setMode(SonicMode::Sync); //connect to the server //TODO: add SSL options @@ -340,6 +345,7 @@ void TritonClient::fillPSetDescription(edm::ParameterSetDescription& iDesc) { fillBasePSetDescription(descClient); descClient.add("modelName"); descClient.add("modelVersion", ""); + descClient.add("modelConfigPath"); //server parameters should not affect the physics results descClient.addUntracked("preferredServer",""); descClient.addUntracked("timeout"); diff --git a/HeterogeneousCore/SonicTriton/src/TritonService.cc b/HeterogeneousCore/SonicTriton/src/TritonService.cc index ecde1cb14e655..f769dd5309bd4 100644 --- a/HeterogeneousCore/SonicTriton/src/TritonService.cc +++ b/HeterogeneousCore/SonicTriton/src/TritonService.cc @@ -5,16 +5,34 @@ #include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" #include "FWCore/ParameterSet/interface/ParameterSetDescription.h" #include "FWCore/ServiceRegistry/interface/ActivityRegistry.h" +#include "FWCore/ServiceRegistry/interface/ProcessContext.h" #include "FWCore/Utilities/interface/Exception.h" #include "grpc_client.h" #include "grpc_service.pb.h" +#include +#include +#include + namespace ni = nvidia::inferenceserver; namespace nic = ni::client; -TritonService::TritonService(const edm::ParameterSet& pset, edm::ActivityRegistry& areg) { +const std::string TritonService::Server::fallbackName{"fallback"}; +const std::string TritonService::Server::fallbackUrl{"0.0.0.0:8001"}; + +TritonService::TritonService(const edm::ParameterSet& pset, edm::ActivityRegistry& areg) : verbose_(pset.getUntrackedParameter("verbose")), fallbackOpts_(pset.getParameterSet("fallback")), startedFallback_(false) { + //fallback server will be launched (if needed) before beginJob + areg.watchPreBeginJob(this, &TritonService::preBeginJob); + + //include fallback server in set if enabled + if(fallbackOpts_.enable) + servers_.emplace(Server::fallbackName,Server::fallbackUrl); + //loop over input servers: check which models they have + std::string msg; + if(verbose_) + msg = "List of models for each server:\n"; for(const auto& serverPset : pset.getUntrackedParameterSetVector("servers")){ Server tmp(serverPset); //ensure uniqueness @@ -29,6 +47,8 @@ TritonService::TritonService(const edm::ParameterSet& pset, edm::ActivityRegistr triton_utils::throwIfError(client->ModelRepositoryIndex(&repoIndexResponse), "TritonService(): unable to get repository index for "+tmp.name+" ("+tmp.url+")"); //servers keep track of models and vice versa + if(verbose_) + msg += tmp.name + ": "; for(const auto& modelIndex : repoIndexResponse.models()){ const auto& modelName = modelIndex.name(); auto mit = findModel(modelName); @@ -36,40 +56,152 @@ TritonService::TritonService(const edm::ParameterSet& pset, edm::ActivityRegistr mit = models_.emplace(modelName).first; mit->servers.insert(tmp.name); tmp.models.insert(modelName); + if(verbose_) + msg += modelName + ", "; } + if(verbose_) + msg += "\n"; servers_.insert(tmp); } + if(verbose_) + edm::LogInfo("TritonService") << msg; } -std::string TritonService::serverAddress(const std::string& model, const std::string& preferred) const { - auto mit = findModel(model); +void TritonService::addModel(const std::string& modelName, const std::string& path) { + //if model is not in the list, then no specified server provides it + auto mit = findModel(modelName); if(mit==models_.end()) + unservedModels_.emplace(modelName,path); +} + +//second return value is only true if fallback CPU server is being used +std::pair TritonService::serverAddress(const std::string& model, const std::string& preferred) const { + auto mit = findModel(model); + if (mit==models_.end()) throw cms::Exception("MissingModel") << "There are no servers that provide model " << model; const auto& modelServers = mit->servers; - if(!preferred.empty()){ + if (!preferred.empty()){ auto sit = modelServers.find(preferred); //todo: add a "strict" parameter to stop execution if preferred server isn't found? if(sit==modelServers.end()) edm::LogWarning("PreferredServer") << "Preferred server " << preferred << " for model " << model << " not available, will choose another server"; else - return findServer(preferred)->url; + return std::make_pair(findServer(preferred)->url,false); } //todo: use some algorithm to select server rather than just picking arbitrarily - return findServer(*modelServers.begin())->url; + auto sit = findServer(*modelServers.begin()); + bool isFallbackCPU = sit->isFallback and !fallbackOpts_.useGPU; + return std::make_pair(sit->url,isFallbackCPU); +} + +void TritonService::preBeginJob(edm::PathsAndConsumesOfModulesBase const&, edm::ProcessContext const&) { + //only need fallback if there are unserved models + if (!fallbackOpts_.enable or unservedModels_.empty()) return; + + std::string msg; + if(verbose_) + msg = "List of models for fallback server: "; + //all unserved models are provided by fallback server + auto sit = findServer(Server::fallbackName); + for(const auto& model : unservedModels_){ + auto mit = models_.insert(model).first; + mit->servers.insert(Server::fallbackName); + sit->models.insert(mit->name); + if(verbose_) + msg += mit->name + ", "; + } + if(verbose_) + edm::LogInfo("TritonService") << msg; + + //assemble server start command + std::string command("triton"); + if (fallbackOpts_.verbose) + command += " -v"; + if (fallbackOpts_.useDocker) + command += " -d"; + if (fallbackOpts_.useGPU) + command += " -g"; + if (!fallbackOpts_.instanceName.empty()) + command += " -n "+fallbackOpts_.instanceName; + if (fallbackOpts_.retries >= 0) + command += " -r "+std::to_string(fallbackOpts_.retries); + if (fallbackOpts_.wait >=0) + command += " -w "+std::to_string(fallbackOpts_.wait); + for (const auto& model : unservedModels_) { + command += " -m "+model.path; + } + //don't need this anymore + unservedModels_.clear(); + + //get a random temporary directory if none specified + if (fallbackOpts_.tempDir.empty()) { + auto tmp_dir_path{std::filesystem::temp_directory_path() /= std::tmpnam(nullptr)}; + fallbackOpts_.tempDir = tmp_dir_path.string(); + } + //special case ".": use script default (temp dir = .$instanceName) + if (fallbackOpts_.tempDir != ".") + command += " -t "+fallbackOpts_.tempDir; + + command += " start"; + + if(verbose_) + edm::LogInfo("TritonService") << command; + + //mark as started before executing in case of ctrl+c while command is running + startedFallback_ = true; + int rv = std::system(command.c_str()); + if (rv != 0) + throw cms::Exception("FallbackFailed") << "Starting the fallback server failed with exit code " << rv; +} + +TritonService::~TritonService() { + if (!startedFallback_) return; + + //assemble server stop command + std::string command("triton"); + + if (fallbackOpts_.verbose) + command += " -v"; + if (fallbackOpts_.useDocker) + command += " -d"; + if (!fallbackOpts_.instanceName.empty()) + command += " -n "+fallbackOpts_.instanceName; + if (fallbackOpts_.tempDir != ".") + command += " -t "+fallbackOpts_.tempDir; + + command += " stop"; + + if(verbose_) + edm::LogInfo("TritonService") << command; + int rv = std::system(command.c_str()); + if (rv != 0) + edm::LogError("FallbackFailed") << "Stopping the fallback server failed with exit code " << rv; } void TritonService::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { edm::ParameterSetDescription desc; + desc.addUntracked("verbose",false); edm::ParameterSetDescription validator; validator.addUntracked("name"); validator.addUntracked("address"); validator.addUntracked("port"); - desc.addVPSetUntracked("servers", validator); + desc.addVPSetUntracked("servers", validator, {}); + + edm::ParameterSetDescription fallbackDesc; + fallbackDesc.addUntracked("enable",false); + fallbackDesc.addUntracked("verbose",false); + fallbackDesc.addUntracked("useDocker",false); + fallbackDesc.addUntracked("useGPU",false); + fallbackDesc.addUntracked("retries",-1); + fallbackDesc.addUntracked("wait",-1); + fallbackDesc.addUntracked("instanceName",""); + fallbackDesc.addUntracked("tempDir",""); + desc.add("fallback",fallbackDesc); descriptions.addWithDefaultLabel(desc); } diff --git a/HeterogeneousCore/SonicTriton/test/TritonGraphProducer.cc b/HeterogeneousCore/SonicTriton/test/TritonGraphProducer.cc index fbb2e1a09b151..78b08a178e397 100644 --- a/HeterogeneousCore/SonicTriton/test/TritonGraphProducer.cc +++ b/HeterogeneousCore/SonicTriton/test/TritonGraphProducer.cc @@ -1,5 +1,4 @@ -#include "HeterogeneousCore/SonicCore/interface/SonicEDProducer.h" -#include "HeterogeneousCore/SonicTriton/interface/TritonClient.h" +#include "HeterogeneousCore/SonicTriton/interface/TritonEDProducer.h" #include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" #include "FWCore/ParameterSet/interface/ParameterSetDescription.h" @@ -12,10 +11,10 @@ #include #include -class TritonGraphProducer : public SonicEDProducer { +class TritonGraphProducer : public TritonEDProducer<> { public: explicit TritonGraphProducer(edm::ParameterSet const& cfg) - : SonicEDProducer(cfg, "TritonGraphProducer") {} + : TritonEDProducer<>(cfg, "TritonGraphProducer") {} void acquire(edm::Event const& iEvent, edm::EventSetup const& iSetup, Input& iInput) override { //get event-based seed for RNG unsigned int runNum_uint = static_cast(iEvent.id().run()); diff --git a/HeterogeneousCore/SonicTriton/test/TritonImageProducer.cc b/HeterogeneousCore/SonicTriton/test/TritonImageProducer.cc index d0ca9fbadd983..3cbe1c2a816ab 100644 --- a/HeterogeneousCore/SonicTriton/test/TritonImageProducer.cc +++ b/HeterogeneousCore/SonicTriton/test/TritonImageProducer.cc @@ -1,5 +1,4 @@ -#include "HeterogeneousCore/SonicCore/interface/SonicEDProducer.h" -#include "HeterogeneousCore/SonicTriton/interface/TritonClient.h" +#include "HeterogeneousCore/SonicTriton/interface/TritonEDProducer.h" #include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" #include "FWCore/ParameterSet/interface/ParameterSetDescription.h" @@ -11,10 +10,10 @@ #include #include -class TritonImageProducer : public SonicEDProducer { +class TritonImageProducer : public TritonEDProducer<> { public: explicit TritonImageProducer(edm::ParameterSet const& cfg) - : SonicEDProducer(cfg, "TritonImageProducer"), batchSize_(cfg.getParameter("batchSize")), topN_(cfg.getParameter("topN")) { + : TritonEDProducer<>(cfg, "TritonImageProducer"), batchSize_(cfg.getParameter("batchSize")), topN_(cfg.getParameter("topN")) { //load score list std::string imageListFile(cfg.getParameter("imageList")); std::ifstream ifile(imageListFile); diff --git a/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py b/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py index 91415e29119f2..3f7fb76dcf409 100644 --- a/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py +++ b/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py @@ -3,6 +3,7 @@ import os, sys, json options = VarParsing("analysis") +options.register("serverName", "default", VarParsing.multiplicity.singleton, VarParsing.varType.string) options.register("address", "0.0.0.0", VarParsing.multiplicity.singleton, VarParsing.varType.string) options.register("port", 8001, VarParsing.multiplicity.singleton, VarParsing.varType.int) options.register("timeout", 30, VarParsing.multiplicity.singleton, VarParsing.varType.int) @@ -33,19 +34,23 @@ process = cms.Process('tritonTest') +process.load("HeterogeneousCore.SonicTriton.TritonService_cfi") + process.maxEvents = cms.untracked.PSet( input = cms.untracked.int32(options.maxEvents) ) process.source = cms.Source("EmptySource") -process.TritonService = cms.Service("TritonService", - servers = cms.untracked.VPSet( +process.TritonService.verbose = options.verbose +process.TritonService.fallback.enable = True +process.TritonService.fallback.verbose = options.verbose +if len(options.address)>0: + process.TritonService.servers.append( cms.PSet( - name = cms.untracked.string("local"), - address = cms.untracked.string(options.address), - port = cms.untracked.uint32(options.port), + name = cms.untracked.string(options.serverName), + address = cms.untracked.string(options.address), + port = cms.untracked.uint32(options.port), ) ) -) process.TritonProducer = cms.EDProducer(options.producer, Client = cms.PSet( @@ -54,6 +59,7 @@ timeout = cms.untracked.uint32(options.timeout), modelName = cms.string(models[options.producer]), modelVersion = cms.string(""), + modelConfigPath = cms.FileInPath("HeterogeneousCore/SonicTriton/data/models/{}/config.pbtxt".format(models[options.producer])), verbose = cms.untracked.bool(options.verbose), allowedTries = cms.untracked.uint32(0), ) @@ -70,7 +76,7 @@ process.load('FWCore/MessageService/MessageLogger_cfi') process.MessageLogger.cerr.FwkReport.reportEvery = 500 -keep_msgs = [options.producer,options.producer+':TritonClient','TritonClient'] +keep_msgs = [options.producer,options.producer+':TritonClient','TritonClient','TritonService'] for msg in keep_msgs: setattr(process.MessageLogger.cerr,msg, From 22040d3c8c77a5e42c436ee30e715a6bd0088fcd Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Thu, 10 Dec 2020 14:25:42 -0600 Subject: [PATCH 05/31] code format --- .../SonicCore/src/SonicClientBase.cc | 4 +- .../SonicTriton/interface/TritonDummyCache.h | 6 +- .../SonicTriton/interface/TritonEDFilter.h | 20 +- .../SonicTriton/interface/TritonEDProducer.h | 20 +- .../interface/TritonOneEDAnalyzer.h | 12 +- .../SonicTriton/interface/TritonService.h | 179 ++++----- .../SonicTriton/src/TritonClient.cc | 7 +- .../SonicTriton/src/TritonService.cc | 348 +++++++++--------- .../SonicTriton/test/TritonGraphProducer.cc | 3 +- .../SonicTriton/test/TritonImageProducer.cc | 6 +- 10 files changed, 315 insertions(+), 290 deletions(-) diff --git a/HeterogeneousCore/SonicCore/src/SonicClientBase.cc b/HeterogeneousCore/SonicCore/src/SonicClientBase.cc index f91359e4a4816..32f0e2af1e9af 100644 --- a/HeterogeneousCore/SonicCore/src/SonicClientBase.cc +++ b/HeterogeneousCore/SonicCore/src/SonicClientBase.cc @@ -21,11 +21,11 @@ SonicClientBase::SonicClientBase(const edm::ParameterSet& params, setMode(SonicMode::PseudoAsync); else throw cms::Exception("Configuration") << "Unknown mode for SonicClient: " << modeName; - } void SonicClientBase::setMode(SonicMode mode) { - if (mode_==mode) return; + if (mode_ == mode) + return; mode_ = mode; //get correct dispatcher for mode diff --git a/HeterogeneousCore/SonicTriton/interface/TritonDummyCache.h b/HeterogeneousCore/SonicTriton/interface/TritonDummyCache.h index 271526c953fd5..df38d0bae28c9 100644 --- a/HeterogeneousCore/SonicTriton/interface/TritonDummyCache.h +++ b/HeterogeneousCore/SonicTriton/interface/TritonDummyCache.h @@ -13,8 +13,8 @@ namespace edm { T* makeStreamModule(edm::ParameterSet const& iPSet, const TritonDummyCache*) { return new T(iPSet); } - } - } -} + } // namespace impl + } // namespace stream +} // namespace edm #endif diff --git a/HeterogeneousCore/SonicTriton/interface/TritonEDFilter.h b/HeterogeneousCore/SonicTriton/interface/TritonEDFilter.h index 5d1f786ec2f47..989570373817e 100644 --- a/HeterogeneousCore/SonicTriton/interface/TritonEDFilter.h +++ b/HeterogeneousCore/SonicTriton/interface/TritonEDFilter.h @@ -12,17 +12,19 @@ template class TritonEDFilterT : public SonicEDFilter, Capabilities...> { public: - TritonEDFilterT(edm::ParameterSet const& cfg, const std::string& debugName) : SonicEDFilter, Capabilities...>(cfg, debugName) {} + TritonEDFilterT(edm::ParameterSet const& cfg, const std::string& debugName) + : SonicEDFilter, Capabilities...>(cfg, debugName) {} - //use this function to avoid calling TritonService functions Nstreams times - static std::unique_ptr initializeGlobalCache(edm::ParameterSet const& pset) { - edm::Service ts; - const auto& clientPset = pset.getParameterSet("Client"); - ts->addModel(clientPset.getParameter("modelName"), clientPset.getParameter("modelConfigPath").fullPath()); - return nullptr; - } + //use this function to avoid calling TritonService functions Nstreams times + static std::unique_ptr initializeGlobalCache(edm::ParameterSet const& pset) { + edm::Service ts; + const auto& clientPset = pset.getParameterSet("Client"); + ts->addModel(clientPset.getParameter("modelName"), + clientPset.getParameter("modelConfigPath").fullPath()); + return nullptr; + } - static void globalEndJob(G*) {} + static void globalEndJob(G*) {} }; template diff --git a/HeterogeneousCore/SonicTriton/interface/TritonEDProducer.h b/HeterogeneousCore/SonicTriton/interface/TritonEDProducer.h index 2ccb859f06f55..0b8ec909f511c 100644 --- a/HeterogeneousCore/SonicTriton/interface/TritonEDProducer.h +++ b/HeterogeneousCore/SonicTriton/interface/TritonEDProducer.h @@ -12,17 +12,19 @@ template class TritonEDProducerT : public SonicEDProducer, Capabilities...> { public: - TritonEDProducerT(edm::ParameterSet const& cfg, const std::string& debugName) : SonicEDProducer, Capabilities...>(cfg, debugName) {} + TritonEDProducerT(edm::ParameterSet const& cfg, const std::string& debugName) + : SonicEDProducer, Capabilities...>(cfg, debugName) {} - //use this function to avoid calling TritonService functions Nstreams times - static std::unique_ptr initializeGlobalCache(edm::ParameterSet const& pset) { - edm::Service ts; - const auto& clientPset = pset.getParameterSet("Client"); - ts->addModel(clientPset.getParameter("modelName"), clientPset.getParameter("modelConfigPath").fullPath()); - return nullptr; - } + //use this function to avoid calling TritonService functions Nstreams times + static std::unique_ptr initializeGlobalCache(edm::ParameterSet const& pset) { + edm::Service ts; + const auto& clientPset = pset.getParameterSet("Client"); + ts->addModel(clientPset.getParameter("modelName"), + clientPset.getParameter("modelConfigPath").fullPath()); + return nullptr; + } - static void globalEndJob(G*) {} + static void globalEndJob(G*) {} }; template diff --git a/HeterogeneousCore/SonicTriton/interface/TritonOneEDAnalyzer.h b/HeterogeneousCore/SonicTriton/interface/TritonOneEDAnalyzer.h index 86275e9c6d89c..0a98aa70d7f6b 100644 --- a/HeterogeneousCore/SonicTriton/interface/TritonOneEDAnalyzer.h +++ b/HeterogeneousCore/SonicTriton/interface/TritonOneEDAnalyzer.h @@ -9,11 +9,13 @@ template class TritonOneEDAnalyzer : public SonicOneEDAnalyzer { public: - TritonOneEDAnalyzer(edm::ParameterSet const& cfg, const std::string& debugName) : SonicOneEDAnalyzer(cfg, debugName) { - edm::Service ts; - const auto& clientPset = pset.getParameterSet("Client"); - ts->addModel(clientPset.getParameter("modelName"), clientPset.getParameter("modelConfigPath").fullPath()); - } + TritonOneEDAnalyzer(edm::ParameterSet const& cfg, const std::string& debugName) + : SonicOneEDAnalyzer(cfg, debugName) { + edm::Service ts; + const auto& clientPset = pset.getParameterSet("Client"); + ts->addModel(clientPset.getParameter("modelName"), + clientPset.getParameter("modelConfigPath").fullPath()); + } }; #endif diff --git a/HeterogeneousCore/SonicTriton/interface/TritonService.h b/HeterogeneousCore/SonicTriton/interface/TritonService.h index b7ec71679a306..cac4d8f36078c 100644 --- a/HeterogeneousCore/SonicTriton/interface/TritonService.h +++ b/HeterogeneousCore/SonicTriton/interface/TritonService.h @@ -13,98 +13,103 @@ //forward declarations namespace edm { - class ActivityRegistry; - class ConfigurationDescriptions; - class PathsAndConsumesOfModulesBase; - class ProcessContext; -} + class ActivityRegistry; + class ConfigurationDescriptions; + class PathsAndConsumesOfModulesBase; + class ProcessContext; +} // namespace edm class TritonService { public: - //classes and defs - struct FallbackOpts { - FallbackOpts(const edm::ParameterSet& pset) : enable(pset.getUntrackedParameter("enable")), verbose(pset.getUntrackedParameter("verbose")), useDocker(pset.getUntrackedParameter("useDocker")), useGPU(pset.getUntrackedParameter("useGPU")), retries(pset.getUntrackedParameter("retries")), wait(pset.getUntrackedParameter("wait")), instanceName(pset.getUntrackedParameter("instanceName")), tempDir(pset.getUntrackedParameter("tempDir")) {} - - bool enable; - bool verbose; - bool useDocker; - bool useGPU; - int retries; - int wait; - std::string instanceName; - std::string tempDir; - }; - struct Server { - Server(const edm::ParameterSet& pset) : name(pset.getUntrackedParameter("name")), url(pset.getUntrackedParameter("address") + ":" + std::to_string(pset.getUntrackedParameter("port"))), isFallback(name==fallbackName) {} - Server(const std::string& name_, const std::string& url_="") : name(name_), url(url_), isFallback(name==fallbackName) {} - - struct Hash { - size_t operator()(const Server& obj) const { - return hashObj(obj.name); - } - std::hash hashObj; - }; - - struct Equal { - bool operator()(const Server& lhs, const Server& rhs) const { - return lhs.name == rhs.name; - } - }; - - //members - std::string name; - std::string url; - bool isFallback; - mutable std::unordered_set models; - static const std::string fallbackName; - static const std::string fallbackUrl; - }; - struct Model { - Model(const std::string& name_, const std::string& path_="") : name(name_), path(path_) {} - - struct Hash { - size_t operator()(const Model& obj) const { - return hashObj(obj.name); - } - std::hash hashObj; - }; - - struct Equal { - bool operator()(const Model& lhs, const Model& rhs) const { - return lhs.name == rhs.name; - } - }; - - //members - std::string name; - std::string path; - mutable std::unordered_set servers; - }; - - TritonService(const edm::ParameterSet& pset, edm::ActivityRegistry& areg); - ~TritonService(); - - //accessors - void addModel(const std::string& modelName, const std::string& path); - std::pair serverAddress(const std::string& model, const std::string& preferred="") const; - - static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + //classes and defs + struct FallbackOpts { + FallbackOpts(const edm::ParameterSet& pset) + : enable(pset.getUntrackedParameter("enable")), + verbose(pset.getUntrackedParameter("verbose")), + useDocker(pset.getUntrackedParameter("useDocker")), + useGPU(pset.getUntrackedParameter("useGPU")), + retries(pset.getUntrackedParameter("retries")), + wait(pset.getUntrackedParameter("wait")), + instanceName(pset.getUntrackedParameter("instanceName")), + tempDir(pset.getUntrackedParameter("tempDir")) {} + + bool enable; + bool verbose; + bool useDocker; + bool useGPU; + int retries; + int wait; + std::string instanceName; + std::string tempDir; + }; + struct Server { + Server(const edm::ParameterSet& pset) + : name(pset.getUntrackedParameter("name")), + url(pset.getUntrackedParameter("address") + ":" + + std::to_string(pset.getUntrackedParameter("port"))), + isFallback(name == fallbackName) {} + Server(const std::string& name_, const std::string& url_ = "") + : name(name_), url(url_), isFallback(name == fallbackName) {} + + struct Hash { + size_t operator()(const Server& obj) const { return hashObj(obj.name); } + std::hash hashObj; + }; + + struct Equal { + bool operator()(const Server& lhs, const Server& rhs) const { return lhs.name == rhs.name; } + }; + + //members + std::string name; + std::string url; + bool isFallback; + mutable std::unordered_set models; + static const std::string fallbackName; + static const std::string fallbackUrl; + }; + struct Model { + Model(const std::string& name_, const std::string& path_ = "") : name(name_), path(path_) {} + + struct Hash { + size_t operator()(const Model& obj) const { return hashObj(obj.name); } + std::hash hashObj; + }; + + struct Equal { + bool operator()(const Model& lhs, const Model& rhs) const { return lhs.name == rhs.name; } + }; + + //members + std::string name; + std::string path; + mutable std::unordered_set servers; + }; + + TritonService(const edm::ParameterSet& pset, edm::ActivityRegistry& areg); + ~TritonService(); + + //accessors + void addModel(const std::string& modelName, const std::string& path); + std::pair serverAddress(const std::string& model, const std::string& preferred = "") const; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); private: - void preBeginJob(edm::PathsAndConsumesOfModulesBase const&, edm::ProcessContext const&); - - //to search without full object - auto findServer(const std::string& name) const { return servers_.find(Server(name)); } - auto findModel(const std::string& name) const { return models_.find(Model(name)); } - - bool verbose_; - FallbackOpts fallbackOpts_; - bool startedFallback_; - //concurrent data type is used because addModel() might be called by multiple threads - tbb::concurrent_unordered_set unservedModels_; - //this is a lazy and inefficient many:many map - std::unordered_set servers_; - std::unordered_set models_; + void preBeginJob(edm::PathsAndConsumesOfModulesBase const&, edm::ProcessContext const&); + + //to search without full object + auto findServer(const std::string& name) const { return servers_.find(Server(name)); } + auto findModel(const std::string& name) const { return models_.find(Model(name)); } + + bool verbose_; + FallbackOpts fallbackOpts_; + bool startedFallback_; + //concurrent data type is used because addModel() might be called by multiple threads + tbb::concurrent_unordered_set unservedModels_; + //this is a lazy and inefficient many:many map + std::unordered_set servers_; + std::unordered_set models_; }; #endif diff --git a/HeterogeneousCore/SonicTriton/src/TritonClient.cc b/HeterogeneousCore/SonicTriton/src/TritonClient.cc index c0ee5c1d546c3..a12e2255ef66e 100644 --- a/HeterogeneousCore/SonicTriton/src/TritonClient.cc +++ b/HeterogeneousCore/SonicTriton/src/TritonClient.cc @@ -29,10 +29,11 @@ TritonClient::TritonClient(const edm::ParameterSet& params, const std::string& d options_(params.getParameter("modelName")) { //get appropriate server for this model edm::Service ts; - const auto& [url, isFallbackCPU] = ts->serverAddress(options_.model_name_,params.getUntrackedParameter("preferredServer")); + const auto& [url, isFallbackCPU] = + ts->serverAddress(options_.model_name_, params.getUntrackedParameter("preferredServer")); //enforce sync mode for fallback CPU server to avoid contention //todo: could enforce async mode otherwise (unless mode was specified by user?) - if(isFallbackCPU) + if (isFallbackCPU) setMode(SonicMode::Sync); //connect to the server @@ -347,7 +348,7 @@ void TritonClient::fillPSetDescription(edm::ParameterSetDescription& iDesc) { descClient.add("modelVersion", ""); descClient.add("modelConfigPath"); //server parameters should not affect the physics results - descClient.addUntracked("preferredServer",""); + descClient.addUntracked("preferredServer", ""); descClient.addUntracked("timeout"); descClient.addUntracked("verbose", false); descClient.addUntracked>("outputs", {}); diff --git a/HeterogeneousCore/SonicTriton/src/TritonService.cc b/HeterogeneousCore/SonicTriton/src/TritonService.cc index f769dd5309bd4..5c323b6d9e5f3 100644 --- a/HeterogeneousCore/SonicTriton/src/TritonService.cc +++ b/HeterogeneousCore/SonicTriton/src/TritonService.cc @@ -21,187 +21,199 @@ namespace nic = ni::client; const std::string TritonService::Server::fallbackName{"fallback"}; const std::string TritonService::Server::fallbackUrl{"0.0.0.0:8001"}; -TritonService::TritonService(const edm::ParameterSet& pset, edm::ActivityRegistry& areg) : verbose_(pset.getUntrackedParameter("verbose")), fallbackOpts_(pset.getParameterSet("fallback")), startedFallback_(false) { - //fallback server will be launched (if needed) before beginJob - areg.watchPreBeginJob(this, &TritonService::preBeginJob); - - //include fallback server in set if enabled - if(fallbackOpts_.enable) - servers_.emplace(Server::fallbackName,Server::fallbackUrl); - - //loop over input servers: check which models they have - std::string msg; - if(verbose_) - msg = "List of models for each server:\n"; - for(const auto& serverPset : pset.getUntrackedParameterSetVector("servers")){ - Server tmp(serverPset); - //ensure uniqueness - auto sit = servers_.find(tmp); - if (sit!=servers_.end()) - throw cms::Exception("DuplicateServer") << "Not allowed to specify more than one server with same name (" << tmp.name << ")"; - - std::unique_ptr client; - triton_utils::throwIfError(nic::InferenceServerGrpcClient::Create(&client, tmp.url, false), "TritonService(): unable to create inference context for "+tmp.name+" ("+tmp.url+")"); - - inference::RepositoryIndexResponse repoIndexResponse; - triton_utils::throwIfError(client->ModelRepositoryIndex(&repoIndexResponse), "TritonService(): unable to get repository index for "+tmp.name+" ("+tmp.url+")"); - - //servers keep track of models and vice versa - if(verbose_) - msg += tmp.name + ": "; - for(const auto& modelIndex : repoIndexResponse.models()){ - const auto& modelName = modelIndex.name(); - auto mit = findModel(modelName); - if(mit==models_.end()) - mit = models_.emplace(modelName).first; - mit->servers.insert(tmp.name); - tmp.models.insert(modelName); - if(verbose_) - msg += modelName + ", "; - } - if(verbose_) - msg += "\n"; - servers_.insert(tmp); - } - if(verbose_) - edm::LogInfo("TritonService") << msg; +TritonService::TritonService(const edm::ParameterSet& pset, edm::ActivityRegistry& areg) + : verbose_(pset.getUntrackedParameter("verbose")), + fallbackOpts_(pset.getParameterSet("fallback")), + startedFallback_(false) { + //fallback server will be launched (if needed) before beginJob + areg.watchPreBeginJob(this, &TritonService::preBeginJob); + + //include fallback server in set if enabled + if (fallbackOpts_.enable) + servers_.emplace(Server::fallbackName, Server::fallbackUrl); + + //loop over input servers: check which models they have + std::string msg; + if (verbose_) + msg = "List of models for each server:\n"; + for (const auto& serverPset : pset.getUntrackedParameterSetVector("servers")) { + Server tmp(serverPset); + //ensure uniqueness + auto sit = servers_.find(tmp); + if (sit != servers_.end()) + throw cms::Exception("DuplicateServer") + << "Not allowed to specify more than one server with same name (" << tmp.name << ")"; + + std::unique_ptr client; + triton_utils::throwIfError( + nic::InferenceServerGrpcClient::Create(&client, tmp.url, false), + "TritonService(): unable to create inference context for " + tmp.name + " (" + tmp.url + ")"); + + inference::RepositoryIndexResponse repoIndexResponse; + triton_utils::throwIfError( + client->ModelRepositoryIndex(&repoIndexResponse), + "TritonService(): unable to get repository index for " + tmp.name + " (" + tmp.url + ")"); + + //servers keep track of models and vice versa + if (verbose_) + msg += tmp.name + ": "; + for (const auto& modelIndex : repoIndexResponse.models()) { + const auto& modelName = modelIndex.name(); + auto mit = findModel(modelName); + if (mit == models_.end()) + mit = models_.emplace(modelName).first; + mit->servers.insert(tmp.name); + tmp.models.insert(modelName); + if (verbose_) + msg += modelName + ", "; + } + if (verbose_) + msg += "\n"; + servers_.insert(tmp); + } + if (verbose_) + edm::LogInfo("TritonService") << msg; } void TritonService::addModel(const std::string& modelName, const std::string& path) { - //if model is not in the list, then no specified server provides it - auto mit = findModel(modelName); - if(mit==models_.end()) - unservedModels_.emplace(modelName,path); + //if model is not in the list, then no specified server provides it + auto mit = findModel(modelName); + if (mit == models_.end()) + unservedModels_.emplace(modelName, path); } //second return value is only true if fallback CPU server is being used -std::pair TritonService::serverAddress(const std::string& model, const std::string& preferred) const { - auto mit = findModel(model); - if (mit==models_.end()) - throw cms::Exception("MissingModel") << "There are no servers that provide model " << model; - - const auto& modelServers = mit->servers; - - if (!preferred.empty()){ - auto sit = modelServers.find(preferred); - //todo: add a "strict" parameter to stop execution if preferred server isn't found? - if(sit==modelServers.end()) - edm::LogWarning("PreferredServer") << "Preferred server " << preferred << " for model " << model << " not available, will choose another server"; - else - return std::make_pair(findServer(preferred)->url,false); - } - - //todo: use some algorithm to select server rather than just picking arbitrarily - auto sit = findServer(*modelServers.begin()); - bool isFallbackCPU = sit->isFallback and !fallbackOpts_.useGPU; - return std::make_pair(sit->url,isFallbackCPU); +std::pair TritonService::serverAddress(const std::string& model, + const std::string& preferred) const { + auto mit = findModel(model); + if (mit == models_.end()) + throw cms::Exception("MissingModel") << "There are no servers that provide model " << model; + + const auto& modelServers = mit->servers; + + if (!preferred.empty()) { + auto sit = modelServers.find(preferred); + //todo: add a "strict" parameter to stop execution if preferred server isn't found? + if (sit == modelServers.end()) + edm::LogWarning("PreferredServer") << "Preferred server " << preferred << " for model " << model + << " not available, will choose another server"; + else + return std::make_pair(findServer(preferred)->url, false); + } + + //todo: use some algorithm to select server rather than just picking arbitrarily + auto sit = findServer(*modelServers.begin()); + bool isFallbackCPU = sit->isFallback and !fallbackOpts_.useGPU; + return std::make_pair(sit->url, isFallbackCPU); } void TritonService::preBeginJob(edm::PathsAndConsumesOfModulesBase const&, edm::ProcessContext const&) { - //only need fallback if there are unserved models - if (!fallbackOpts_.enable or unservedModels_.empty()) return; - - std::string msg; - if(verbose_) - msg = "List of models for fallback server: "; - //all unserved models are provided by fallback server - auto sit = findServer(Server::fallbackName); - for(const auto& model : unservedModels_){ - auto mit = models_.insert(model).first; - mit->servers.insert(Server::fallbackName); - sit->models.insert(mit->name); - if(verbose_) - msg += mit->name + ", "; - } - if(verbose_) - edm::LogInfo("TritonService") << msg; - - //assemble server start command - std::string command("triton"); - if (fallbackOpts_.verbose) - command += " -v"; - if (fallbackOpts_.useDocker) - command += " -d"; - if (fallbackOpts_.useGPU) - command += " -g"; - if (!fallbackOpts_.instanceName.empty()) - command += " -n "+fallbackOpts_.instanceName; - if (fallbackOpts_.retries >= 0) - command += " -r "+std::to_string(fallbackOpts_.retries); - if (fallbackOpts_.wait >=0) - command += " -w "+std::to_string(fallbackOpts_.wait); - for (const auto& model : unservedModels_) { - command += " -m "+model.path; - } - //don't need this anymore - unservedModels_.clear(); - - //get a random temporary directory if none specified - if (fallbackOpts_.tempDir.empty()) { - auto tmp_dir_path{std::filesystem::temp_directory_path() /= std::tmpnam(nullptr)}; - fallbackOpts_.tempDir = tmp_dir_path.string(); - } - //special case ".": use script default (temp dir = .$instanceName) - if (fallbackOpts_.tempDir != ".") - command += " -t "+fallbackOpts_.tempDir; - - command += " start"; - - if(verbose_) - edm::LogInfo("TritonService") << command; - - //mark as started before executing in case of ctrl+c while command is running - startedFallback_ = true; - int rv = std::system(command.c_str()); - if (rv != 0) - throw cms::Exception("FallbackFailed") << "Starting the fallback server failed with exit code " << rv; + //only need fallback if there are unserved models + if (!fallbackOpts_.enable or unservedModels_.empty()) + return; + + std::string msg; + if (verbose_) + msg = "List of models for fallback server: "; + //all unserved models are provided by fallback server + auto sit = findServer(Server::fallbackName); + for (const auto& model : unservedModels_) { + auto mit = models_.insert(model).first; + mit->servers.insert(Server::fallbackName); + sit->models.insert(mit->name); + if (verbose_) + msg += mit->name + ", "; + } + if (verbose_) + edm::LogInfo("TritonService") << msg; + + //assemble server start command + std::string command("triton"); + if (fallbackOpts_.verbose) + command += " -v"; + if (fallbackOpts_.useDocker) + command += " -d"; + if (fallbackOpts_.useGPU) + command += " -g"; + if (!fallbackOpts_.instanceName.empty()) + command += " -n " + fallbackOpts_.instanceName; + if (fallbackOpts_.retries >= 0) + command += " -r " + std::to_string(fallbackOpts_.retries); + if (fallbackOpts_.wait >= 0) + command += " -w " + std::to_string(fallbackOpts_.wait); + for (const auto& model : unservedModels_) { + command += " -m " + model.path; + } + //don't need this anymore + unservedModels_.clear(); + + //get a random temporary directory if none specified + if (fallbackOpts_.tempDir.empty()) { + auto tmp_dir_path{std::filesystem::temp_directory_path() /= std::tmpnam(nullptr)}; + fallbackOpts_.tempDir = tmp_dir_path.string(); + } + //special case ".": use script default (temp dir = .$instanceName) + if (fallbackOpts_.tempDir != ".") + command += " -t " + fallbackOpts_.tempDir; + + command += " start"; + + if (verbose_) + edm::LogInfo("TritonService") << command; + + //mark as started before executing in case of ctrl+c while command is running + startedFallback_ = true; + int rv = std::system(command.c_str()); + if (rv != 0) + throw cms::Exception("FallbackFailed") << "Starting the fallback server failed with exit code " << rv; } TritonService::~TritonService() { - if (!startedFallback_) return; - - //assemble server stop command - std::string command("triton"); - - if (fallbackOpts_.verbose) - command += " -v"; - if (fallbackOpts_.useDocker) - command += " -d"; - if (!fallbackOpts_.instanceName.empty()) - command += " -n "+fallbackOpts_.instanceName; - if (fallbackOpts_.tempDir != ".") - command += " -t "+fallbackOpts_.tempDir; - - command += " stop"; - - if(verbose_) - edm::LogInfo("TritonService") << command; - int rv = std::system(command.c_str()); - if (rv != 0) - edm::LogError("FallbackFailed") << "Stopping the fallback server failed with exit code " << rv; + if (!startedFallback_) + return; + + //assemble server stop command + std::string command("triton"); + + if (fallbackOpts_.verbose) + command += " -v"; + if (fallbackOpts_.useDocker) + command += " -d"; + if (!fallbackOpts_.instanceName.empty()) + command += " -n " + fallbackOpts_.instanceName; + if (fallbackOpts_.tempDir != ".") + command += " -t " + fallbackOpts_.tempDir; + + command += " stop"; + + if (verbose_) + edm::LogInfo("TritonService") << command; + int rv = std::system(command.c_str()); + if (rv != 0) + edm::LogError("FallbackFailed") << "Stopping the fallback server failed with exit code " << rv; } void TritonService::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { - edm::ParameterSetDescription desc; - desc.addUntracked("verbose",false); - - edm::ParameterSetDescription validator; - validator.addUntracked("name"); - validator.addUntracked("address"); - validator.addUntracked("port"); - - desc.addVPSetUntracked("servers", validator, {}); - - edm::ParameterSetDescription fallbackDesc; - fallbackDesc.addUntracked("enable",false); - fallbackDesc.addUntracked("verbose",false); - fallbackDesc.addUntracked("useDocker",false); - fallbackDesc.addUntracked("useGPU",false); - fallbackDesc.addUntracked("retries",-1); - fallbackDesc.addUntracked("wait",-1); - fallbackDesc.addUntracked("instanceName",""); - fallbackDesc.addUntracked("tempDir",""); - desc.add("fallback",fallbackDesc); - - descriptions.addWithDefaultLabel(desc); + edm::ParameterSetDescription desc; + desc.addUntracked("verbose", false); + + edm::ParameterSetDescription validator; + validator.addUntracked("name"); + validator.addUntracked("address"); + validator.addUntracked("port"); + + desc.addVPSetUntracked("servers", validator, {}); + + edm::ParameterSetDescription fallbackDesc; + fallbackDesc.addUntracked("enable", false); + fallbackDesc.addUntracked("verbose", false); + fallbackDesc.addUntracked("useDocker", false); + fallbackDesc.addUntracked("useGPU", false); + fallbackDesc.addUntracked("retries", -1); + fallbackDesc.addUntracked("wait", -1); + fallbackDesc.addUntracked("instanceName", ""); + fallbackDesc.addUntracked("tempDir", ""); + desc.add("fallback", fallbackDesc); + + descriptions.addWithDefaultLabel(desc); } diff --git a/HeterogeneousCore/SonicTriton/test/TritonGraphProducer.cc b/HeterogeneousCore/SonicTriton/test/TritonGraphProducer.cc index 78b08a178e397..f30993838cc22 100644 --- a/HeterogeneousCore/SonicTriton/test/TritonGraphProducer.cc +++ b/HeterogeneousCore/SonicTriton/test/TritonGraphProducer.cc @@ -13,8 +13,7 @@ class TritonGraphProducer : public TritonEDProducer<> { public: - explicit TritonGraphProducer(edm::ParameterSet const& cfg) - : TritonEDProducer<>(cfg, "TritonGraphProducer") {} + explicit TritonGraphProducer(edm::ParameterSet const& cfg) : TritonEDProducer<>(cfg, "TritonGraphProducer") {} void acquire(edm::Event const& iEvent, edm::EventSetup const& iSetup, Input& iInput) override { //get event-based seed for RNG unsigned int runNum_uint = static_cast(iEvent.id().run()); diff --git a/HeterogeneousCore/SonicTriton/test/TritonImageProducer.cc b/HeterogeneousCore/SonicTriton/test/TritonImageProducer.cc index 3cbe1c2a816ab..1451e79e45049 100644 --- a/HeterogeneousCore/SonicTriton/test/TritonImageProducer.cc +++ b/HeterogeneousCore/SonicTriton/test/TritonImageProducer.cc @@ -13,7 +13,9 @@ class TritonImageProducer : public TritonEDProducer<> { public: explicit TritonImageProducer(edm::ParameterSet const& cfg) - : TritonEDProducer<>(cfg, "TritonImageProducer"), batchSize_(cfg.getParameter("batchSize")), topN_(cfg.getParameter("topN")) { + : TritonEDProducer<>(cfg, "TritonImageProducer"), + batchSize_(cfg.getParameter("batchSize")), + topN_(cfg.getParameter("topN")) { //load score list std::string imageListFile(cfg.getParameter("imageList")); std::ifstream ifile(imageListFile); @@ -27,7 +29,7 @@ class TritonImageProducer : public TritonEDProducer<> { } } void acquire(edm::Event const& iEvent, edm::EventSetup const& iSetup, Input& iInput) override { - client_->setBatchSize(batchSize_); + client_->setBatchSize(batchSize_); // create an npix x npix x ncol image w/ arbitrary color value // model only has one input, so just pick begin() auto& input1 = iInput.begin()->second; From 6374f355fef9b6fe23b716defa708bb1af452ef0 Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Thu, 10 Dec 2020 14:57:50 -0600 Subject: [PATCH 06/31] add special workflow for SonicTriton w/ TritonService enabled automatically --- .../python/allSonicTriton_cff.py | 6 ++++ .../python/enableSonicTriton_cff.py | 3 ++ .../python/upgradeWorkflowComponents.py | 35 +++++++++++++++++++ .../StandardSequences/python/Services_cff.py | 6 ++++ .../SonicTriton/python/TritonService_cff.py | 12 +++++++ HeterogeneousCore/SonicTriton/scripts/triton | 2 +- .../SonicTriton/src/TritonService.cc | 5 +++ .../SonicTriton/test/tritonTest_cfg.py | 8 ++--- 8 files changed, 72 insertions(+), 5 deletions(-) create mode 100644 Configuration/ProcessModifiers/python/allSonicTriton_cff.py create mode 100644 Configuration/ProcessModifiers/python/enableSonicTriton_cff.py create mode 100644 HeterogeneousCore/SonicTriton/python/TritonService_cff.py diff --git a/Configuration/ProcessModifiers/python/allSonicTriton_cff.py b/Configuration/ProcessModifiers/python/allSonicTriton_cff.py new file mode 100644 index 0000000000000..1de0813bff065 --- /dev/null +++ b/Configuration/ProcessModifiers/python/allSonicTriton_cff.py @@ -0,0 +1,6 @@ +import FWCore.ParameterSet.Config as cms + +from Configuration.ProcessModifiers.enableSonicTriton_cff import enableSonicTriton + +# collect all SonicTriton-related process modifiers here +allSonicTriton = cms.ModifierChain(enableSonicTriton) diff --git a/Configuration/ProcessModifiers/python/enableSonicTriton_cff.py b/Configuration/ProcessModifiers/python/enableSonicTriton_cff.py new file mode 100644 index 0000000000000..8034f48d20ef4 --- /dev/null +++ b/Configuration/ProcessModifiers/python/enableSonicTriton_cff.py @@ -0,0 +1,3 @@ +import FWCore.ParameterSet.Config as cms + +enableSonicTriton = cms.Modifier() diff --git a/Configuration/PyReleaseValidation/python/upgradeWorkflowComponents.py b/Configuration/PyReleaseValidation/python/upgradeWorkflowComponents.py index a9876705bf631..490500cb3807d 100644 --- a/Configuration/PyReleaseValidation/python/upgradeWorkflowComponents.py +++ b/Configuration/PyReleaseValidation/python/upgradeWorkflowComponents.py @@ -919,6 +919,41 @@ def condition(self, fragment, stepList, key, hasHarvest): ) upgradeWFs['DD4hep'].allowReuse = False +class UpgradeWorkflow_SonicTriton(UpgradeWorkflow): + def setup_(self, step, stepName, stepDict, k, properties): + stepDict[stepName][k] = merge([{'--procModifiers': 'allSonicTriton'}, stepDict[step][k]]) + def condition(self, fragment, stepList, key, hasHarvest): + return (fragment=='TTbar_13' and '2021' in key) \ + or (fragment=='TTbar_14TeV' and '2026' in key) +upgradeWFs['SonicTriton'] = UpgradeWorkflow_SonicTriton( + steps = [ + 'GenSim', + 'GenSimHLBeamSpot', + 'GenSimHLBeamSpot14', + 'Digi', + 'DigiTrigger', + 'Reco', + 'RecoGlobal', + 'HARVEST', + 'HARVESTGlobal', + 'ALCA', + ], + PU = [ + 'GenSim', + 'GenSimHLBeamSpot', + 'GenSimHLBeamSpot14', + 'Digi', + 'DigiTrigger', + 'Reco', + 'RecoGlobal', + 'HARVEST', + 'HARVESTGlobal', + 'ALCA', + ], + suffix = '_SonicTriton', + offset = 0.9001, +) + # check for duplicate offsets offsets = [specialWF.offset for specialType,specialWF in six.iteritems(upgradeWFs)] seen = set() diff --git a/Configuration/StandardSequences/python/Services_cff.py b/Configuration/StandardSequences/python/Services_cff.py index 0365d5f22bf51..ef1da8dec1d59 100644 --- a/Configuration/StandardSequences/python/Services_cff.py +++ b/Configuration/StandardSequences/python/Services_cff.py @@ -14,3 +14,9 @@ def _addCUDAServices(process): from Configuration.ProcessModifiers.gpu_cff import gpu modifyConfigurationStandardSequencesServicesAddCUDAServices_ = gpu.makeProcessModifier(_addCUDAServices) + +# load TritonService when SONIC workflow is enabled +def _addTritonService(process): + process.load("HeterogeneousCore.SonicTriton.TritonService_cff") +from Configuration.ProcessModifiers.enableSonicTriton_cff import enableSonicTriton +modifyConfigurationStandardSequencesServicesAddTritonService_ = enableSonicTriton.makeProcessModifier(_addTritonService) diff --git a/HeterogeneousCore/SonicTriton/python/TritonService_cff.py b/HeterogeneousCore/SonicTriton/python/TritonService_cff.py new file mode 100644 index 0000000000000..381635f86617d --- /dev/null +++ b/HeterogeneousCore/SonicTriton/python/TritonService_cff.py @@ -0,0 +1,12 @@ +from HeterogeneousCore.SonicTriton.TritonService_cfi import * + +from HeterogeneousCore.CUDACore.SwitchProducerCUDA import _switch_cuda + +from Configuration.ProcessModifiers.enableSonicTriton_cff import enableSonicTriton + +enableSonicTriton.toModify(TritonService, + fallback = dict( + enable = True, + useGPU = _switch_cuda()[0], + ), +) diff --git a/HeterogeneousCore/SonicTriton/scripts/triton b/HeterogeneousCore/SonicTriton/scripts/triton index 2e5d3bfdb9995..1abf8dac2734a 100755 --- a/HeterogeneousCore/SonicTriton/scripts/triton +++ b/HeterogeneousCore/SonicTriton/scripts/triton @@ -292,7 +292,7 @@ else if [ -z "$DRYRUN" ]; then if [ -d "$TMPDIR" ]; then cd "$TMPDIR" - else + elif [ -z "$FORCE" ]; then echo "Error: attempt to stop unknown container $SERVER" exit 1 fi diff --git a/HeterogeneousCore/SonicTriton/src/TritonService.cc b/HeterogeneousCore/SonicTriton/src/TritonService.cc index 5c323b6d9e5f3..bf58160fb43b4 100644 --- a/HeterogeneousCore/SonicTriton/src/TritonService.cc +++ b/HeterogeneousCore/SonicTriton/src/TritonService.cc @@ -160,6 +160,8 @@ void TritonService::preBeginJob(edm::PathsAndConsumesOfModulesBase const&, edm:: if (verbose_) edm::LogInfo("TritonService") << command; + else + command += " >>/dev/null 2>>/dev/null"; //mark as started before executing in case of ctrl+c while command is running startedFallback_ = true; @@ -188,6 +190,9 @@ TritonService::~TritonService() { if (verbose_) edm::LogInfo("TritonService") << command; + else + command += " >>/dev/null 2>>/dev/null"; + int rv = std::system(command.c_str()); if (rv != 0) edm::LogError("FallbackFailed") << "Stopping the fallback server failed with exit code " << rv; diff --git a/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py b/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py index 3f7fb76dcf409..77df8a9e7dd08 100644 --- a/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py +++ b/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py @@ -4,7 +4,7 @@ options = VarParsing("analysis") options.register("serverName", "default", VarParsing.multiplicity.singleton, VarParsing.varType.string) -options.register("address", "0.0.0.0", VarParsing.multiplicity.singleton, VarParsing.varType.string) +options.register("address", "", VarParsing.multiplicity.singleton, VarParsing.varType.string) options.register("port", 8001, VarParsing.multiplicity.singleton, VarParsing.varType.int) options.register("timeout", 30, VarParsing.multiplicity.singleton, VarParsing.varType.int) options.register("params", "", VarParsing.multiplicity.singleton, VarParsing.varType.string) @@ -32,16 +32,16 @@ if options.producer not in models: raise ValueError("Unknown producer: "+options.producer) -process = cms.Process('tritonTest') +from Configuration.ProcessModifiers.enableSonicTriton_cff import enableSonicTriton +process = cms.Process('tritonTest',enableSonicTriton) -process.load("HeterogeneousCore.SonicTriton.TritonService_cfi") +process.load("HeterogeneousCore.SonicTriton.TritonService_cff") process.maxEvents = cms.untracked.PSet( input = cms.untracked.int32(options.maxEvents) ) process.source = cms.Source("EmptySource") process.TritonService.verbose = options.verbose -process.TritonService.fallback.enable = True process.TritonService.fallback.verbose = options.verbose if len(options.address)>0: process.TritonService.servers.append( From 0232dc01522c90ad46ccfba5714df07ed910c3f5 Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Thu, 10 Dec 2020 15:41:01 -0600 Subject: [PATCH 07/31] update docs --- HeterogeneousCore/SonicTriton/README.md | 42 +++++++++++++++++--- HeterogeneousCore/SonicTriton/test/README.md | 14 ++----- 2 files changed, 39 insertions(+), 17 deletions(-) diff --git a/HeterogeneousCore/SonicTriton/README.md b/HeterogeneousCore/SonicTriton/README.md index 4c71ca6eb032f..4dbb8f8577777 100644 --- a/HeterogeneousCore/SonicTriton/README.md +++ b/HeterogeneousCore/SonicTriton/README.md @@ -1,5 +1,7 @@ # SONIC for Triton Inference Server +## Introduction to Triton + Triton Inference Server ([docs](https://docs.nvidia.com/deeplearning/triton-inference-server/archives/triton_inference_server_1130/user-guide/docs/index.html), [repo](https://github.com/NVIDIA/triton-inference-server)) is an open-source product from Nvidia that facilitates the use of GPUs as a service to process inference requests. @@ -9,6 +11,8 @@ boolean, unsigned integer (8, 16, 32, or 64 bits), integer (8, 16, 32, or 64 bit Triton additionally supports inputs and outputs with multiple dimensions, some of which might be variable (denoted by -1). Concrete values for variable dimensions must be specified for each call (event). +## Client + Accordingly, the `TritonClient` input and output types are: * input: `TritonInputMap = std::unordered_map` * output: `TritonOutputMap = std::unordered_map` @@ -22,11 +26,8 @@ The model information from the server can be printed by enabling `verbose` outpu `TritonClient` takes several parameters: * `modelName`: name of model with which to perform inference * `modelVersion`: version number of model (default: -1, use latest available version on server) -* `batchSize`: number of objects sent per request - * can also be set on per-event basis using `setBatchSize()` - * some models don't support batching -* `address`: server IP address -* `port`: server port +* `modelConfigPath`: path to `config.pbtxt` file for the model (using `edm::FileInPath`) +* `preferredServer`: name of preferred server (see [Services](#services) below) * `timeout`: maximum allowed time for a request * `outputs`: optional, specify which output(s) the server should send @@ -38,6 +39,8 @@ Useful `TritonData` accessors include: * `byteSize()`: return number of bytes for data type * `dname()`: return name of data type * `batchSize()`: return current batch size +* `setBatchSize()`: set a new batch size + * some models may not support batching To update the `TritonData` shape in the variable-dimension case: * `setShape(const std::vector& newShape)`: update all (variable) dimensions with values provided in `newShape` @@ -49,6 +52,22 @@ and returned by `TritonOutputData::fromServer()`, respectively: * `TritonInput = std::vector>` * `TritonOutput = std::vector>` +## Modules + +SONIC Triton supports producers, filters, and analyzers. +New modules should inherit from `TritonEDProducer`, `TritonEDFilter`, or `TritonOneEDAnalyzer`. +These follow essentially the same patterns described in [SonicCore](../SonicCore#for-analyzers). + +If an `edm::GlobalCache` of type `T` is needed, there are two changes: +* The new module should inherit from `TritonEDProducerT` or `TritonEDFilterT` +* The new module should contain these lines: + ```cpp + static std::unique_ptr initializeGlobalCache(edm::ParameterSet const& pset) { + TritonEDProducerT::initializeGlobalCache(pset); + [module-specific code goes here] + } + ``` + In a SONIC Triton producer, the basic flow should follow this pattern: 1. `acquire()`: a. access input object(s) from `TritonInputMap` @@ -61,6 +80,8 @@ In a SONIC Triton producer, the basic flow should follow this pattern: b. obtain output data as `TritonOutput` using `fromServer()` function of output object(s) (sets output shape(s) if variable dimensions exist) c. fill output products +## Services + A script [`triton`](./scripts/triton) is provided to launch and manage local servers. The script has two operations (`start` and `stop`) and the following options: * `-c`: don't cleanup temporary dir (for debugging) @@ -74,7 +95,7 @@ The script has two operations (`start` and `stop`) and the following options: * `-r [num]`: number of retries when starting container (default: 3) * `-t [dir]`: non-default hidden temporary dir * `-v`: (verbose) start: activate server debugging info; stop: keep server logs -* `-w` [time]`: maximum time to wait for server to start (default: 60 seconds) +* `-w [time]`: maximum time to wait for server to start (default: 60 seconds) * `-h`: print help message and exit Additional details and caveats: @@ -88,4 +109,13 @@ If a model repository is provided, all of the models it contains will be provide * Older versions of Singularity have a short timeout that may cause launching the server to fail the first time the command is executed. The `-r` (retry) flag exists to work around this issue. +A central `TritonService` is provided to keep track of all available servers and which models they can serve. +The servers will automatically be assigned to clients at startup. +If some models are not served by any server, the `TritonService` can launch a fallback server using the `triton` script described above. +If the process modifiers `enableSonicTriton` or `allSonicTriton` are activated, +the fallback server will launch automatically if needed and will use a local GPU if one is available. +If the fallback server uses CPU, clients that use the fallback server will automatically be set to `Sync` mode. + +## Examples + Several example producers (running ResNet50 or Graph Attention Network) can be found in the [test](./test) directory. diff --git a/HeterogeneousCore/SonicTriton/test/README.md b/HeterogeneousCore/SonicTriton/test/README.md index 15d519b707351..74e53ad6e2a6c 100644 --- a/HeterogeneousCore/SonicTriton/test/README.md +++ b/HeterogeneousCore/SonicTriton/test/README.md @@ -3,22 +3,14 @@ Test producers `TritonImageProducer` and `TritonGraphProducer` are available. They generate arbitrary inputs for inference (with ResNet50 or Graph Attention Network, respectively) and print the resulting output. -To run the tests, a local Triton server can be started using Singularity (default, should not require superuser permission) -or Docker (may require superuser permission). -The server can utilize the local CPU (support for AVX instructions required) or a local Nvidia GPU, if one is available. -The default local server address is `0.0.0.0`. - First, the relevant data should be downloaded from Nvidia: ``` ./fetch_model.sh ``` -Launch a local server (using Singularity with CPU by default): -``` -triton -M $CMSSW_BASE/src/HeterogeneousCore/SonicTriton/data/models start -[run test commands] -triton stop -``` +A local Triton server will be launched automatically when the tests run. +The local server will use Singularity with CPU by default; if a local Nvidia GPU is available, it will be used instead. +(This behavior can also be controlled manually by modifying [tritonTest_cfg.py](./tritonTest_cfg.py).) ## Test commands From 3d75f3e7a852ece1533bc22b3b2bf8dd31073cc3 Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Mon, 21 Dec 2020 18:53:17 -0600 Subject: [PATCH 08/31] add unit tests for all Triton module types --- HeterogeneousCore/SonicTriton/README.md | 2 +- .../interface/TritonOneEDAnalyzer.h | 5 +- .../SonicTriton/python/TritonService_cff.py | 13 +- .../SonicTriton/test/BuildFile.xml | 16 +- HeterogeneousCore/SonicTriton/test/README.md | 6 +- .../SonicTriton/test/TritonGraphModules.cc | 175 ++++++++++++++++++ .../SonicTriton/test/TritonGraphProducer.cc | 83 --------- .../SonicTriton/test/TritonImageProducer.cc | 5 +- .../SonicTriton/test/tritonTest_cfg.py | 99 ++++++---- .../SonicTriton/test/unittest.sh | 32 ++++ 10 files changed, 304 insertions(+), 132 deletions(-) create mode 100644 HeterogeneousCore/SonicTriton/test/TritonGraphModules.cc delete mode 100644 HeterogeneousCore/SonicTriton/test/TritonGraphProducer.cc create mode 100755 HeterogeneousCore/SonicTriton/test/unittest.sh diff --git a/HeterogeneousCore/SonicTriton/README.md b/HeterogeneousCore/SonicTriton/README.md index 4dbb8f8577777..c6f6ace28e0c8 100644 --- a/HeterogeneousCore/SonicTriton/README.md +++ b/HeterogeneousCore/SonicTriton/README.md @@ -95,7 +95,7 @@ The script has two operations (`start` and `stop`) and the following options: * `-r [num]`: number of retries when starting container (default: 3) * `-t [dir]`: non-default hidden temporary dir * `-v`: (verbose) start: activate server debugging info; stop: keep server logs -* `-w [time]`: maximum time to wait for server to start (default: 60 seconds) +* `-w [time]`: maximum time to wait for server to start (default: 120 seconds) * `-h`: print help message and exit Additional details and caveats: diff --git a/HeterogeneousCore/SonicTriton/interface/TritonOneEDAnalyzer.h b/HeterogeneousCore/SonicTriton/interface/TritonOneEDAnalyzer.h index 0a98aa70d7f6b..6eaf35604586a 100644 --- a/HeterogeneousCore/SonicTriton/interface/TritonOneEDAnalyzer.h +++ b/HeterogeneousCore/SonicTriton/interface/TritonOneEDAnalyzer.h @@ -3,16 +3,19 @@ #include "HeterogeneousCore/SonicCore/interface/SonicOneEDAnalyzer.h" #include "HeterogeneousCore/SonicTriton/interface/TritonClient.h" +#include "FWCore/ParameterSet/interface/FileInPath.h" #include "FWCore/ServiceRegistry/interface/Service.h" #include "HeterogeneousCore/SonicTriton/interface/TritonService.h" +#include + template class TritonOneEDAnalyzer : public SonicOneEDAnalyzer { public: TritonOneEDAnalyzer(edm::ParameterSet const& cfg, const std::string& debugName) : SonicOneEDAnalyzer(cfg, debugName) { edm::Service ts; - const auto& clientPset = pset.getParameterSet("Client"); + const auto& clientPset = cfg.getParameterSet("Client"); ts->addModel(clientPset.getParameter("modelName"), clientPset.getParameter("modelConfigPath").fullPath()); } diff --git a/HeterogeneousCore/SonicTriton/python/TritonService_cff.py b/HeterogeneousCore/SonicTriton/python/TritonService_cff.py index 381635f86617d..e991d54e6f72f 100644 --- a/HeterogeneousCore/SonicTriton/python/TritonService_cff.py +++ b/HeterogeneousCore/SonicTriton/python/TritonService_cff.py @@ -1,12 +1,19 @@ from HeterogeneousCore.SonicTriton.TritonService_cfi import * -from HeterogeneousCore.CUDACore.SwitchProducerCUDA import _switch_cuda - from Configuration.ProcessModifiers.enableSonicTriton_cff import enableSonicTriton +_gpu_available_cached = None + +def _gpu_available(): + global _gpu_available_cached + if _gpu_available_cached is None: + import os + _gpu_available_cached = (os.system("nvidia-smi -L") == 0) + return _gpu_available_cached + enableSonicTriton.toModify(TritonService, fallback = dict( enable = True, - useGPU = _switch_cuda()[0], + useGPU = _gpu_available(), ), ) diff --git a/HeterogeneousCore/SonicTriton/test/BuildFile.xml b/HeterogeneousCore/SonicTriton/test/BuildFile.xml index 39a975ea89418..f3b5d01f6cfda 100644 --- a/HeterogeneousCore/SonicTriton/test/BuildFile.xml +++ b/HeterogeneousCore/SonicTriton/test/BuildFile.xml @@ -1,6 +1,10 @@ - - - - - - + + + + + + + + + + diff --git a/HeterogeneousCore/SonicTriton/test/README.md b/HeterogeneousCore/SonicTriton/test/README.md index 74e53ad6e2a6c..0018c41bb4a0a 100644 --- a/HeterogeneousCore/SonicTriton/test/README.md +++ b/HeterogeneousCore/SonicTriton/test/README.md @@ -1,6 +1,6 @@ # SONIC TritonClient tests -Test producers `TritonImageProducer` and `TritonGraphProducer` are available. +Test modules `TritonImageProducer` and `TritonGraphProducer` (`TritonGraphFilter`, `TritonGraphAnalyzer`) are available. They generate arbitrary inputs for inference (with ResNet50 or Graph Attention Network, respectively) and print the resulting output. First, the relevant data should be downloaded from Nvidia: @@ -16,12 +16,12 @@ The local server will use Singularity with CPU by default; if a local Nvidia GPU Run the image test: ``` -cmsRun tritonTest_cfg.py maxEvents=1 producer=TritonImageProducer +cmsRun tritonTest_cfg.py maxEvents=1 modules=TritonImageProducer ``` Run the graph test: ``` -cmsRun tritonTest_cfg.py maxEvents=1 producer=TritonGraphProducer +cmsRun tritonTest_cfg.py maxEvents=1 modules=TritonGraphProducer ``` ## Caveats diff --git a/HeterogeneousCore/SonicTriton/test/TritonGraphModules.cc b/HeterogeneousCore/SonicTriton/test/TritonGraphModules.cc new file mode 100644 index 0000000000000..b3a3066ffb1c5 --- /dev/null +++ b/HeterogeneousCore/SonicTriton/test/TritonGraphModules.cc @@ -0,0 +1,175 @@ +#include "HeterogeneousCore/SonicTriton/interface/TritonEDProducer.h" + +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/Framework/interface/MakerMacros.h" + +#include +#include +#include +#include + +class TritonGraphHelper { +public: + TritonGraphHelper(edm::ParameterSet const& cfg) + : nodeMin_(cfg.getParameter("nodeMin")), + nodeMax_(cfg.getParameter("nodeMax")), + edgeMin_(cfg.getParameter("edgeMin")), + edgeMax_(cfg.getParameter("edgeMax")) {} + void makeInput(edm::Event const& iEvent, TritonInputMap& iInput) const { + //get event-based seed for RNG + unsigned int runNum_uint = static_cast(iEvent.id().run()); + unsigned int lumiNum_uint = static_cast(iEvent.id().luminosityBlock()); + unsigned int evNum_uint = static_cast(iEvent.id().event()); + std::uint32_t seed = (lumiNum_uint << 10) + (runNum_uint << 20) + evNum_uint; + std::mt19937 rng(seed); + + std::uniform_int_distribution randint1(nodeMin_, nodeMax_); + int nnodes = randint1(rng); + std::uniform_int_distribution randint2(edgeMin_, edgeMax_); + int nedges = randint2(rng); + + //set shapes + auto& input1 = iInput.at("x__0"); + input1.setShape(0, nnodes); + auto data1 = std::make_shared>(1); + auto& vdata1 = (*data1)[0]; + vdata1.reserve(input1.sizeShape()); + + auto& input2 = iInput.at("edgeindex__1"); + input2.setShape(1, nedges); + auto data2 = std::make_shared>(1); + auto& vdata2 = (*data2)[0]; + vdata2.reserve(input2.sizeShape()); + + //fill + std::normal_distribution randx(-10, 4); + for (unsigned i = 0; i < input1.sizeShape(); ++i) { + vdata1.push_back(randx(rng)); + } + + std::uniform_int_distribution randedge(0, nnodes - 1); + for (unsigned i = 0; i < input2.sizeShape(); ++i) { + vdata2.push_back(randedge(rng)); + } + + // convert to server format + input1.toServer(data1); + input2.toServer(data2); + } + void makeOutput(const TritonOutputMap& iOutput, const std::string& debugName) const { + //check the results + const auto& output1 = iOutput.begin()->second; + // convert from server format + const auto& tmp = output1.fromServer(); + std::stringstream msg; + for (int i = 0; i < output1.shape()[0]; ++i) { + msg << "output " << i << ": "; + for (int j = 0; j < output1.shape()[1]; ++j) { + msg << tmp[0][output1.shape()[1] * i + j] << " "; + } + msg << "\n"; + } + edm::LogInfo(debugName) << msg.str(); + } + static void fillPSetDescription(edm::ParameterSetDescription& desc) { + desc.add("nodeMin", 100); + desc.add("nodeMax", 4000); + desc.add("edgeMin", 8000); + desc.add("edgeMax", 15000); + } + +private: + //members + unsigned nodeMin_, nodeMax_; + unsigned edgeMin_, edgeMax_; +}; + +class TritonGraphProducer : public TritonEDProducer<> { +public: + explicit TritonGraphProducer(edm::ParameterSet const& cfg) + : TritonEDProducer<>(cfg, "TritonGraphProducer"), + helper_(cfg) {} + void acquire(edm::Event const& iEvent, edm::EventSetup const& iSetup, Input& iInput) override { + helper_.makeInput(iEvent, iInput); + } + void produce(edm::Event& iEvent, edm::EventSetup const& iSetup, Output const& iOutput) override { + helper_.makeOutput(iOutput, debugName_); + } + ~TritonGraphProducer() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + TritonClient::fillPSetDescription(desc); + TritonGraphHelper::fillPSetDescription(desc); + //to ensure distinct cfi names + descriptions.addWithDefaultLabel(desc); + } + +private: + //member + TritonGraphHelper helper_; +}; + +DEFINE_FWK_MODULE(TritonGraphProducer); + +#include "HeterogeneousCore/SonicTriton/interface/TritonEDFilter.h" + +class TritonGraphFilter : public TritonEDFilter<> { +public: + explicit TritonGraphFilter(edm::ParameterSet const& cfg) + : TritonEDFilter<>(cfg, "TritonGraphFilter"), + helper_(cfg) {} + void acquire(edm::Event const& iEvent, edm::EventSetup const& iSetup, Input& iInput) override { + helper_.makeInput(iEvent, iInput); + } + bool filter(edm::Event& iEvent, edm::EventSetup const& iSetup, Output const& iOutput) override { + helper_.makeOutput(iOutput, debugName_); + return true; + } + ~TritonGraphFilter() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + TritonClient::fillPSetDescription(desc); + TritonGraphHelper::fillPSetDescription(desc); + //to ensure distinct cfi names + descriptions.addWithDefaultLabel(desc); + } + +private: + //member + TritonGraphHelper helper_; +}; + +DEFINE_FWK_MODULE(TritonGraphFilter); + +#include "HeterogeneousCore/SonicTriton/interface/TritonOneEDAnalyzer.h" + +class TritonGraphAnalyzer : public TritonOneEDAnalyzer<> { +public: + explicit TritonGraphAnalyzer(edm::ParameterSet const& cfg) + : TritonOneEDAnalyzer<>(cfg, "TritonGraphAnalyzer"), + helper_(cfg) {} + void acquire(edm::Event const& iEvent, edm::EventSetup const& iSetup, Input& iInput) override { + helper_.makeInput(iEvent, iInput); + } + void analyze(edm::Event const& iEvent, edm::EventSetup const& iSetup, Output const& iOutput) override { + helper_.makeOutput(iOutput, debugName_); + } + ~TritonGraphAnalyzer() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + TritonClient::fillPSetDescription(desc); + TritonGraphHelper::fillPSetDescription(desc); + //to ensure distinct cfi names + descriptions.addWithDefaultLabel(desc); + } + +private: + //member + TritonGraphHelper helper_; +}; + +DEFINE_FWK_MODULE(TritonGraphAnalyzer); diff --git a/HeterogeneousCore/SonicTriton/test/TritonGraphProducer.cc b/HeterogeneousCore/SonicTriton/test/TritonGraphProducer.cc deleted file mode 100644 index f30993838cc22..0000000000000 --- a/HeterogeneousCore/SonicTriton/test/TritonGraphProducer.cc +++ /dev/null @@ -1,83 +0,0 @@ -#include "HeterogeneousCore/SonicTriton/interface/TritonEDProducer.h" - -#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" -#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" -#include "FWCore/Framework/interface/MakerMacros.h" - -#include -#include -#include -#include -#include -#include - -class TritonGraphProducer : public TritonEDProducer<> { -public: - explicit TritonGraphProducer(edm::ParameterSet const& cfg) : TritonEDProducer<>(cfg, "TritonGraphProducer") {} - void acquire(edm::Event const& iEvent, edm::EventSetup const& iSetup, Input& iInput) override { - //get event-based seed for RNG - unsigned int runNum_uint = static_cast(iEvent.id().run()); - unsigned int lumiNum_uint = static_cast(iEvent.id().luminosityBlock()); - unsigned int evNum_uint = static_cast(iEvent.id().event()); - std::uint32_t seed = (lumiNum_uint << 10) + (runNum_uint << 20) + evNum_uint; - std::mt19937 rng(seed); - - std::uniform_int_distribution randint1(100, 4000); - int nnodes = randint1(rng); - std::uniform_int_distribution randint2(8000, 15000); - int nedges = randint2(rng); - - //set shapes - auto& input1 = iInput.at("x__0"); - input1.setShape(0, nnodes); - auto data1 = std::make_shared>(1); - auto& vdata1 = (*data1)[0]; - vdata1.reserve(input1.sizeShape()); - - auto& input2 = iInput.at("edgeindex__1"); - input2.setShape(1, nedges); - auto data2 = std::make_shared>(1); - auto& vdata2 = (*data2)[0]; - vdata2.reserve(input2.sizeShape()); - - //fill - std::normal_distribution randx(-10, 4); - for (unsigned i = 0; i < input1.sizeShape(); ++i) { - vdata1.push_back(randx(rng)); - } - - std::uniform_int_distribution randedge(0, nnodes - 1); - for (unsigned i = 0; i < input2.sizeShape(); ++i) { - vdata2.push_back(randedge(rng)); - } - - // convert to server format - input1.toServer(data1); - input2.toServer(data2); - } - void produce(edm::Event& iEvent, edm::EventSetup const& iSetup, Output const& iOutput) override { - //check the results - const auto& output1 = iOutput.begin()->second; - // convert from server format - const auto& tmp = output1.fromServer(); - std::stringstream msg; - for (int i = 0; i < output1.shape()[0]; ++i) { - msg << "output " << i << ": "; - for (int j = 0; j < output1.shape()[1]; ++j) { - msg << tmp[0][output1.shape()[1] * i + j] << " "; - } - msg << "\n"; - } - edm::LogInfo(debugName_) << msg.str(); - } - ~TritonGraphProducer() override = default; - - static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) { - edm::ParameterSetDescription desc; - TritonClient::fillPSetDescription(desc); - //to ensure distinct cfi names - descriptions.addWithDefaultLabel(desc); - } -}; - -DEFINE_FWK_MODULE(TritonGraphProducer); diff --git a/HeterogeneousCore/SonicTriton/test/TritonImageProducer.cc b/HeterogeneousCore/SonicTriton/test/TritonImageProducer.cc index 1451e79e45049..4c602c659ea75 100644 --- a/HeterogeneousCore/SonicTriton/test/TritonImageProducer.cc +++ b/HeterogeneousCore/SonicTriton/test/TritonImageProducer.cc @@ -1,5 +1,6 @@ #include "HeterogeneousCore/SonicTriton/interface/TritonEDProducer.h" +#include "FWCore/ParameterSet/interface/FileInPath.h" #include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" #include "FWCore/ParameterSet/interface/ParameterSetDescription.h" #include "FWCore/Framework/interface/MakerMacros.h" @@ -17,7 +18,7 @@ class TritonImageProducer : public TritonEDProducer<> { batchSize_(cfg.getParameter("batchSize")), topN_(cfg.getParameter("topN")) { //load score list - std::string imageListFile(cfg.getParameter("imageList")); + std::string imageListFile(cfg.getParameter("imageList").fullPath()); std::ifstream ifile(imageListFile); if (ifile.is_open()) { std::string line; @@ -52,7 +53,7 @@ class TritonImageProducer : public TritonEDProducer<> { TritonClient::fillPSetDescription(desc); desc.add("batchSize", 1); desc.add("topN", 5); - desc.add("imageList"); + desc.add("imageList"); //to ensure distinct cfi names descriptions.addWithDefaultLabel(desc); } diff --git a/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py b/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py index 77df8a9e7dd08..9295f0bf9b988 100644 --- a/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py +++ b/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py @@ -1,6 +1,6 @@ from FWCore.ParameterSet.VarParsing import VarParsing import FWCore.ParameterSet.Config as cms -import os, sys, json +import os, sys, json, six options = VarParsing("analysis") options.register("serverName", "default", VarParsing.multiplicity.singleton, VarParsing.varType.string) @@ -10,10 +10,12 @@ options.register("params", "", VarParsing.multiplicity.singleton, VarParsing.varType.string) options.register("threads", 1, VarParsing.multiplicity.singleton, VarParsing.varType.int) options.register("streams", 0, VarParsing.multiplicity.singleton, VarParsing.varType.int) -options.register("producer", "TritonImageProducer", VarParsing.multiplicity.singleton, VarParsing.varType.string) +options.register("modules", "TritonImageProducer", VarParsing.multiplicity.list, VarParsing.varType.string) options.register("modelName","resnet50_netdef", VarParsing.multiplicity.singleton, VarParsing.varType.string) options.register("mode","PseudoAsync", VarParsing.multiplicity.singleton, VarParsing.varType.string) options.register("verbose", False, VarParsing.multiplicity.singleton, VarParsing.varType.bool) +options.register("unittest", False, VarParsing.multiplicity.singleton, VarParsing.varType.bool) +options.register("device","auto", VarParsing.multiplicity.singleton, VarParsing.varType.string) options.parseArguments() if len(options.params)>0: @@ -23,14 +25,11 @@ options.port = int(pdict["port"]) print("server = "+options.address+":"+str(options.port)) -# check producer/model -models = { - "TritonImageProducer": "resnet50_netdef", - "TritonGraphProducer": "gat_test", -} - -if options.producer not in models: - raise ValueError("Unknown producer: "+options.producer) +# check devices +options.device = options.device.lower() +allowed_devices = ["auto","cpu","gpu"] +if options.device not in allowed_devices: + raise ValueError("Unknown device: "+options.device) from Configuration.ProcessModifiers.enableSonicTriton_cff import enableSonicTriton process = cms.Process('tritonTest',enableSonicTriton) @@ -43,6 +42,8 @@ process.TritonService.verbose = options.verbose process.TritonService.fallback.verbose = options.verbose +if options.device != "auto": + process.TritonService.fallback.useGPU = options.device=="gpu" if len(options.address)>0: process.TritonService.servers.append( cms.PSet( @@ -52,33 +53,65 @@ ) ) -process.TritonProducer = cms.EDProducer(options.producer, - Client = cms.PSet( - mode = cms.string(options.mode), - preferredServer = cms.untracked.string(""), - timeout = cms.untracked.uint32(options.timeout), - modelName = cms.string(models[options.producer]), - modelVersion = cms.string(""), - modelConfigPath = cms.FileInPath("HeterogeneousCore/SonicTriton/data/models/{}/config.pbtxt".format(models[options.producer])), - verbose = cms.untracked.bool(options.verbose), - allowedTries = cms.untracked.uint32(0), - ) -) -if options.producer=="TritonImageProducer": - process.TritonProducer.batchSize = cms.uint32(1) - process.TritonProducer.topN = cms.uint32(5) - process.TritonProducer.imageList = cms.string("../data/models/resnet50_netdef/resnet50_labels.txt") - # Let it run -process.p = cms.Path( - process.TritonProducer -) +process.p = cms.Path() + +# check module/model +models = { + "TritonImageProducer": "resnet50_netdef", + "TritonGraphProducer": "gat_test", + "TritonGraphFilter": "gat_test", + "TritonGraphAnalyzer": "gat_test", +} + +modules = { + "Producer": cms.EDProducer, + "Filter": cms.EDFilter, + "Analyzer": cms.EDAnalyzer, +} + +keepMsgs = ['TritonClient','TritonService'] +for module in options.modules: + if module not in models: + raise ValueError("Unknown module: "+module) + Module = [obj for name,obj in six.iteritems(modules) if name in module][0] + setattr(process, module, + Module(module, + Client = cms.PSet( + mode = cms.string(options.mode), + preferredServer = cms.untracked.string(""), + timeout = cms.untracked.uint32(options.timeout), + modelName = cms.string(models[module]), + modelVersion = cms.string(""), + modelConfigPath = cms.FileInPath("HeterogeneousCore/SonicTriton/data/models/{}/config.pbtxt".format(models[module])), + verbose = cms.untracked.bool(options.verbose), + allowedTries = cms.untracked.uint32(0), + ) + ) + ) + processModule = getattr(process, module) + if module=="TritonImageProducer": + processModule.batchSize = cms.uint32(1) + processModule.topN = cms.uint32(5) + processModule.imageList = cms.FileInPath("HeterogeneousCore/SonicTriton/data/models/resnet50_netdef/resnet50_labels.txt") + elif "TritonGraph" in module: + if options.unittest: + # reduce input size for unit test + processModule.nodeMin = cms.uint32(1) + processModule.nodeMax = cms.uint32(10) + processModule.edgeMin = cms.uint32(20) + processModule.edgeMax = cms.uint32(40) + else: + processModule.nodeMin = cms.uint32(100) + processModule.nodeMax = cms.uint32(4000) + processModule.edgeMin = cms.uint32(8000) + processModule.edgeMax = cms.uint32(15000) + process.p += processModule + keepMsgs.extend([module,module+':TritonClient']) process.load('FWCore/MessageService/MessageLogger_cfi') process.MessageLogger.cerr.FwkReport.reportEvery = 500 -keep_msgs = [options.producer,options.producer+':TritonClient','TritonClient','TritonService'] -for msg in keep_msgs: - +for msg in keepMsgs: setattr(process.MessageLogger.cerr,msg, cms.untracked.PSet( limit = cms.untracked.int32(10000000), diff --git a/HeterogeneousCore/SonicTriton/test/unittest.sh b/HeterogeneousCore/SonicTriton/test/unittest.sh new file mode 100755 index 0000000000000..fc2a704adf08b --- /dev/null +++ b/HeterogeneousCore/SonicTriton/test/unittest.sh @@ -0,0 +1,32 @@ +#!/bin/bash + +LOCALTOP=$1 + +# the test is not possible if: +# 1. avx instructions not supported (needed for singularity on CPU) +# 2. the model files cannot be found +# 3. singularity not found or not usable +# so just return true in those cases + +if grep -q avx /proc/cpuinfo; then + echo "has avx" +else + echo "missing avx" + exit 0 +fi + +if [ -n "$(find $(echo $CMSSW_SEARCH_PATH | tr ':' ' ') -path "*/HeterogeneousCore/SonicTriton/data/models/gat_test/config.pbtxt" 2>/dev/null)" ]; then + echo "has model file" +else + echo "missing model file" + exit 0 +fi + +if type singularity >& /dev/null; then + echo "has singularity" +else + echo "missing singularity" + exit 0 +fi + +cmsRun ${LOCALTOP}/src/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py modules=TritonGraphProducer,TritonGraphFilter,TritonGraphAnalyzer maxEvents=1 unittest=1 From 4cd5c9135c83c5ffa679601753930bb34be88257 Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Tue, 22 Dec 2020 16:26:00 -0600 Subject: [PATCH 09/31] code format --- .../SonicTriton/test/TritonGraphModules.cc | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/HeterogeneousCore/SonicTriton/test/TritonGraphModules.cc b/HeterogeneousCore/SonicTriton/test/TritonGraphModules.cc index b3a3066ffb1c5..46ced5e330291 100644 --- a/HeterogeneousCore/SonicTriton/test/TritonGraphModules.cc +++ b/HeterogeneousCore/SonicTriton/test/TritonGraphModules.cc @@ -12,7 +12,7 @@ class TritonGraphHelper { public: TritonGraphHelper(edm::ParameterSet const& cfg) - : nodeMin_(cfg.getParameter("nodeMin")), + : nodeMin_(cfg.getParameter("nodeMin")), nodeMax_(cfg.getParameter("nodeMax")), edgeMin_(cfg.getParameter("edgeMin")), edgeMax_(cfg.getParameter("edgeMax")) {} @@ -55,7 +55,7 @@ class TritonGraphHelper { // convert to server format input1.toServer(data1); - input2.toServer(data2); + input2.toServer(data2); } void makeOutput(const TritonOutputMap& iOutput, const std::string& debugName) const { //check the results @@ -88,8 +88,7 @@ class TritonGraphHelper { class TritonGraphProducer : public TritonEDProducer<> { public: explicit TritonGraphProducer(edm::ParameterSet const& cfg) - : TritonEDProducer<>(cfg, "TritonGraphProducer"), - helper_(cfg) {} + : TritonEDProducer<>(cfg, "TritonGraphProducer"), helper_(cfg) {} void acquire(edm::Event const& iEvent, edm::EventSetup const& iSetup, Input& iInput) override { helper_.makeInput(iEvent, iInput); } @@ -117,9 +116,7 @@ DEFINE_FWK_MODULE(TritonGraphProducer); class TritonGraphFilter : public TritonEDFilter<> { public: - explicit TritonGraphFilter(edm::ParameterSet const& cfg) - : TritonEDFilter<>(cfg, "TritonGraphFilter"), - helper_(cfg) {} + explicit TritonGraphFilter(edm::ParameterSet const& cfg) : TritonEDFilter<>(cfg, "TritonGraphFilter"), helper_(cfg) {} void acquire(edm::Event const& iEvent, edm::EventSetup const& iSetup, Input& iInput) override { helper_.makeInput(iEvent, iInput); } @@ -149,8 +146,7 @@ DEFINE_FWK_MODULE(TritonGraphFilter); class TritonGraphAnalyzer : public TritonOneEDAnalyzer<> { public: explicit TritonGraphAnalyzer(edm::ParameterSet const& cfg) - : TritonOneEDAnalyzer<>(cfg, "TritonGraphAnalyzer"), - helper_(cfg) {} + : TritonOneEDAnalyzer<>(cfg, "TritonGraphAnalyzer"), helper_(cfg) {} void acquire(edm::Event const& iEvent, edm::EventSetup const& iSetup, Input& iInput) override { helper_.makeInput(iEvent, iInput); } From ac3d194799849be2a1eba6861c5f3796f569a4c4 Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Tue, 22 Dec 2020 17:19:48 -0600 Subject: [PATCH 10/31] minor doc update --- HeterogeneousCore/SonicTriton/test/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/HeterogeneousCore/SonicTriton/test/README.md b/HeterogeneousCore/SonicTriton/test/README.md index 0018c41bb4a0a..a2f4b7c65d554 100644 --- a/HeterogeneousCore/SonicTriton/test/README.md +++ b/HeterogeneousCore/SonicTriton/test/README.md @@ -10,7 +10,7 @@ First, the relevant data should be downloaded from Nvidia: A local Triton server will be launched automatically when the tests run. The local server will use Singularity with CPU by default; if a local Nvidia GPU is available, it will be used instead. -(This behavior can also be controlled manually by modifying [tritonTest_cfg.py](./tritonTest_cfg.py).) +(This behavior can also be controlled manually using the "device" argument to [tritonTest_cfg.py](./tritonTest_cfg.py).) ## Test commands From 5c7b0a1053011c4e0686c35160a036987d066456 Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Tue, 29 Dec 2020 19:00:14 +0100 Subject: [PATCH 11/31] improve dryrun --- HeterogeneousCore/SonicTriton/scripts/triton | 38 +++++++++----------- 1 file changed, 16 insertions(+), 22 deletions(-) diff --git a/HeterogeneousCore/SonicTriton/scripts/triton b/HeterogeneousCore/SonicTriton/scripts/triton index 1abf8dac2734a..decbf3a96daa8 100755 --- a/HeterogeneousCore/SonicTriton/scripts/triton +++ b/HeterogeneousCore/SonicTriton/scripts/triton @@ -120,10 +120,8 @@ start_singularity(){ # triton server image may need to modify contents of opt/tritonserver/lib/ # but cvmfs is read-only # -> make a writable local directory with the same contents - if [ -z "$DRYRUN" ]; then - mkdir ${LIB} - ln -s /cvmfs/unpacked.cern.ch/registry.hub.docker.com/${IMAGE}/opt/tritonserver/lib/* ${LIB}/ - fi + $DRYRUN mkdir ${LIB} + $DRYRUN ln -s /cvmfs/unpacked.cern.ch/registry.hub.docker.com/${IMAGE}/opt/tritonserver/lib/* ${LIB}/ # mount all model repositories MOUNTARGS="" @@ -207,9 +205,9 @@ wait_server(){ } cleanup(){ - if [ -z "$DRYRUN" ] && [ -n "$CLEANUP" ]; then - cd "$TOPDIR" - rm -rf "$TMPDIR" + if [ -n "$CLEANUP" ]; then + $DRYRUN cd "$TOPDIR" + $DRYRUN rm -rf "$TMPDIR" fi } @@ -261,15 +259,13 @@ if [ "$OP" == start ]; then list_models # make sure everything happens in tmp dir - if [ -z "$DRYRUN" ]; then - mkdir "$TMPDIR" - MKDIR_EXIT=$? - if [ "$MKDIR_EXIT" -ne 0 ]; then - echo "Could not create temp dir: $TMPDIR" - exit "$MKDIR_EXIT" - fi - cd "$TMPDIR" + $DRYRUN mkdir "$TMPDIR" + MKDIR_EXIT=$? + if [ "$MKDIR_EXIT" -ne 0 ]; then + echo "Could not create temp dir: $TMPDIR" + exit "$MKDIR_EXIT" fi + $DRYRUN cd "$TMPDIR" START_EXIT="" for ((counter=0; counter < ${RETRIES}; counter++)); do @@ -289,13 +285,11 @@ if [ "$OP" == start ]; then wait_server else # check for tmp dir - if [ -z "$DRYRUN" ]; then - if [ -d "$TMPDIR" ]; then - cd "$TMPDIR" - elif [ -z "$FORCE" ]; then - echo "Error: attempt to stop unknown container $SERVER" - exit 1 - fi + if [ -d "$TMPDIR" ]; then + $DRYRUN cd "$TMPDIR" + elif [ -z "$FORCE" ]; then + echo "Error: attempt to stop unknown container $SERVER" + exit 1 fi $STOP_FN From 51bbe5ffae5d38021f9d47e25478e297b7ff2b2e Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Tue, 29 Dec 2020 16:34:51 -0600 Subject: [PATCH 12/31] fix default --- HeterogeneousCore/SonicTriton/scripts/triton | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/HeterogeneousCore/SonicTriton/scripts/triton b/HeterogeneousCore/SonicTriton/scripts/triton index decbf3a96daa8..b82fe0be2b994 100755 --- a/HeterogeneousCore/SonicTriton/scripts/triton +++ b/HeterogeneousCore/SonicTriton/scripts/triton @@ -4,7 +4,7 @@ USEDOCKER="" GPU="" VERBOSE="" -WTIME=60 +WTIME=120 SERVER=triton_server_instance RETRIES=3 REPOS=() From c2e5109c7c61cd92fa914bb559e593572313c9e8 Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Tue, 29 Dec 2020 18:52:02 -0600 Subject: [PATCH 13/31] use unordered_map instead of unordered_set --- .../SonicTriton/interface/TritonService.h | 47 +++---------- .../SonicTriton/src/TritonClient.cc | 2 + .../SonicTriton/src/TritonService.cc | 69 ++++++++++--------- 3 files changed, 50 insertions(+), 68 deletions(-) diff --git a/HeterogeneousCore/SonicTriton/interface/TritonService.h b/HeterogeneousCore/SonicTriton/interface/TritonService.h index cac4d8f36078c..7284982b260f3 100644 --- a/HeterogeneousCore/SonicTriton/interface/TritonService.h +++ b/HeterogeneousCore/SonicTriton/interface/TritonService.h @@ -3,10 +3,11 @@ #include "FWCore/ParameterSet/interface/ParameterSet.h" -#include "tbb/concurrent_unordered_set.h" +#include "tbb/concurrent_unordered_map.h" #include #include +#include #include #include #include @@ -44,46 +45,24 @@ class TritonService { }; struct Server { Server(const edm::ParameterSet& pset) - : name(pset.getUntrackedParameter("name")), - url(pset.getUntrackedParameter("address") + ":" + + : url(pset.getUntrackedParameter("address") + ":" + std::to_string(pset.getUntrackedParameter("port"))), - isFallback(name == fallbackName) {} - Server(const std::string& name_, const std::string& url_ = "") - : name(name_), url(url_), isFallback(name == fallbackName) {} - - struct Hash { - size_t operator()(const Server& obj) const { return hashObj(obj.name); } - std::hash hashObj; - }; - - struct Equal { - bool operator()(const Server& lhs, const Server& rhs) const { return lhs.name == rhs.name; } - }; + isFallback(pset.getUntrackedParameter("name") == fallbackName) {} + Server(const std::string& name_, const std::string& url_) : url(url_), isFallback(name_ == fallbackName) {} //members - std::string name; std::string url; bool isFallback; - mutable std::unordered_set models; + std::unordered_set models; static const std::string fallbackName; static const std::string fallbackUrl; }; struct Model { - Model(const std::string& name_, const std::string& path_ = "") : name(name_), path(path_) {} - - struct Hash { - size_t operator()(const Model& obj) const { return hashObj(obj.name); } - std::hash hashObj; - }; - - struct Equal { - bool operator()(const Model& lhs, const Model& rhs) const { return lhs.name == rhs.name; } - }; + Model(const std::string& path_ = "") : path(path_) {} //members - std::string name; std::string path; - mutable std::unordered_set servers; + std::unordered_set servers; }; TritonService(const edm::ParameterSet& pset, edm::ActivityRegistry& areg); @@ -98,18 +77,14 @@ class TritonService { private: void preBeginJob(edm::PathsAndConsumesOfModulesBase const&, edm::ProcessContext const&); - //to search without full object - auto findServer(const std::string& name) const { return servers_.find(Server(name)); } - auto findModel(const std::string& name) const { return models_.find(Model(name)); } - bool verbose_; FallbackOpts fallbackOpts_; bool startedFallback_; //concurrent data type is used because addModel() might be called by multiple threads - tbb::concurrent_unordered_set unservedModels_; + tbb::concurrent_unordered_map unservedModels_; //this is a lazy and inefficient many:many map - std::unordered_set servers_; - std::unordered_set models_; + std::unordered_map servers_; + std::unordered_map models_; }; #endif diff --git a/HeterogeneousCore/SonicTriton/src/TritonClient.cc b/HeterogeneousCore/SonicTriton/src/TritonClient.cc index a12e2255ef66e..a597198a0fe54 100644 --- a/HeterogeneousCore/SonicTriton/src/TritonClient.cc +++ b/HeterogeneousCore/SonicTriton/src/TritonClient.cc @@ -31,6 +31,8 @@ TritonClient::TritonClient(const edm::ParameterSet& params, const std::string& d edm::Service ts; const auto& [url, isFallbackCPU] = ts->serverAddress(options_.model_name_, params.getUntrackedParameter("preferredServer")); + if (verbose_) + edm::LogInfo(fullDebugName_) << "Using server: " << url; //enforce sync mode for fallback CPU server to avoid contention //todo: could enforce async mode otherwise (unless mode was specified by user?) if (isFallbackCPU) diff --git a/HeterogeneousCore/SonicTriton/src/TritonService.cc b/HeterogeneousCore/SonicTriton/src/TritonService.cc index bf58160fb43b4..5c7e564106a00 100644 --- a/HeterogeneousCore/SonicTriton/src/TritonService.cc +++ b/HeterogeneousCore/SonicTriton/src/TritonService.cc @@ -14,6 +14,8 @@ #include #include #include +#include +#include namespace ni = nvidia::inferenceserver; namespace nic = ni::client; @@ -30,46 +32,49 @@ TritonService::TritonService(const edm::ParameterSet& pset, edm::ActivityRegistr //include fallback server in set if enabled if (fallbackOpts_.enable) - servers_.emplace(Server::fallbackName, Server::fallbackUrl); + servers_.emplace(std::piecewise_construct, + std::forward_as_tuple(Server::fallbackName), + std::forward_as_tuple(Server::fallbackName, Server::fallbackUrl)); //loop over input servers: check which models they have std::string msg; if (verbose_) msg = "List of models for each server:\n"; for (const auto& serverPset : pset.getUntrackedParameterSetVector("servers")) { - Server tmp(serverPset); + const std::string& serverName(serverPset.getUntrackedParameter("name")); //ensure uniqueness - auto sit = servers_.find(tmp); - if (sit != servers_.end()) + auto [sit, unique] = servers_.emplace(serverName, serverPset); + if (!unique) throw cms::Exception("DuplicateServer") - << "Not allowed to specify more than one server with same name (" << tmp.name << ")"; + << "Not allowed to specify more than one server with same name (" << serverName << ")"; + auto& serverInfo(sit->second); std::unique_ptr client; triton_utils::throwIfError( - nic::InferenceServerGrpcClient::Create(&client, tmp.url, false), - "TritonService(): unable to create inference context for " + tmp.name + " (" + tmp.url + ")"); + nic::InferenceServerGrpcClient::Create(&client, serverInfo.url, false), + "TritonService(): unable to create inference context for " + serverName + " (" + serverInfo.url + ")"); inference::RepositoryIndexResponse repoIndexResponse; triton_utils::throwIfError( client->ModelRepositoryIndex(&repoIndexResponse), - "TritonService(): unable to get repository index for " + tmp.name + " (" + tmp.url + ")"); + "TritonService(): unable to get repository index for " + serverName + " (" + serverInfo.url + ")"); //servers keep track of models and vice versa if (verbose_) - msg += tmp.name + ": "; + msg += serverName + ": "; for (const auto& modelIndex : repoIndexResponse.models()) { const auto& modelName = modelIndex.name(); - auto mit = findModel(modelName); + auto mit = models_.find(modelName); if (mit == models_.end()) - mit = models_.emplace(modelName).first; - mit->servers.insert(tmp.name); - tmp.models.insert(modelName); + mit = models_.emplace(modelName, "").first; + auto& modelInfo(mit->second); + modelInfo.servers.insert(serverName); + serverInfo.models.insert(modelName); if (verbose_) msg += modelName + ", "; } if (verbose_) msg += "\n"; - servers_.insert(tmp); } if (verbose_) edm::LogInfo("TritonService") << msg; @@ -77,7 +82,7 @@ TritonService::TritonService(const edm::ParameterSet& pset, edm::ActivityRegistr void TritonService::addModel(const std::string& modelName, const std::string& path) { //if model is not in the list, then no specified server provides it - auto mit = findModel(modelName); + auto mit = models_.find(modelName); if (mit == models_.end()) unservedModels_.emplace(modelName, path); } @@ -85,26 +90,26 @@ void TritonService::addModel(const std::string& modelName, const std::string& pa //second return value is only true if fallback CPU server is being used std::pair TritonService::serverAddress(const std::string& model, const std::string& preferred) const { - auto mit = findModel(model); + auto mit = models_.find(model); if (mit == models_.end()) throw cms::Exception("MissingModel") << "There are no servers that provide model " << model; + const auto& modelInfo(mit->second); + const auto& modelServers = modelInfo.servers; - const auto& modelServers = mit->servers; - + auto msit = modelServers.end(); if (!preferred.empty()) { - auto sit = modelServers.find(preferred); + msit = modelServers.find(preferred); //todo: add a "strict" parameter to stop execution if preferred server isn't found? - if (sit == modelServers.end()) + if (msit == modelServers.end()) edm::LogWarning("PreferredServer") << "Preferred server " << preferred << " for model " << model << " not available, will choose another server"; - else - return std::make_pair(findServer(preferred)->url, false); } + const auto& serverName(msit == modelServers.end() ? *modelServers.begin() : preferred); //todo: use some algorithm to select server rather than just picking arbitrarily - auto sit = findServer(*modelServers.begin()); - bool isFallbackCPU = sit->isFallback and !fallbackOpts_.useGPU; - return std::make_pair(sit->url, isFallbackCPU); + const auto& serverInfo(servers_.find(serverName)->second); + bool isFallbackCPU = serverInfo.isFallback and !fallbackOpts_.useGPU; + return std::make_pair(serverInfo.url, isFallbackCPU); } void TritonService::preBeginJob(edm::PathsAndConsumesOfModulesBase const&, edm::ProcessContext const&) { @@ -116,13 +121,13 @@ void TritonService::preBeginJob(edm::PathsAndConsumesOfModulesBase const&, edm:: if (verbose_) msg = "List of models for fallback server: "; //all unserved models are provided by fallback server - auto sit = findServer(Server::fallbackName); - for (const auto& model : unservedModels_) { - auto mit = models_.insert(model).first; - mit->servers.insert(Server::fallbackName); - sit->models.insert(mit->name); + auto& serverInfo(servers_.find(Server::fallbackName)->second); + for (const auto& [modelName, model] : unservedModels_) { + auto& modelInfo(models_.emplace(modelName, model).first->second); + modelInfo.servers.insert(Server::fallbackName); + serverInfo.models.insert(modelName); if (verbose_) - msg += mit->name + ", "; + msg += modelName + ", "; } if (verbose_) edm::LogInfo("TritonService") << msg; @@ -141,7 +146,7 @@ void TritonService::preBeginJob(edm::PathsAndConsumesOfModulesBase const&, edm:: command += " -r " + std::to_string(fallbackOpts_.retries); if (fallbackOpts_.wait >= 0) command += " -w " + std::to_string(fallbackOpts_.wait); - for (const auto& model : unservedModels_) { + for (const auto& [modelName, model] : unservedModels_) { command += " -m " + model.path; } //don't need this anymore From 0557185d1f316bb160ff3216919f6627a5887dcd Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Tue, 29 Dec 2020 22:50:00 -0600 Subject: [PATCH 14/31] handle module construction and destruction, assume serial --- HeterogeneousCore/SonicTriton/BuildFile.xml | 1 - .../SonicTriton/interface/TritonService.h | 22 ++++++--- .../SonicTriton/src/TritonService.cc | 45 ++++++++++++++++++- 3 files changed, 60 insertions(+), 8 deletions(-) diff --git a/HeterogeneousCore/SonicTriton/BuildFile.xml b/HeterogeneousCore/SonicTriton/BuildFile.xml index 555f9930704bf..b574f395f4d12 100644 --- a/HeterogeneousCore/SonicTriton/BuildFile.xml +++ b/HeterogeneousCore/SonicTriton/BuildFile.xml @@ -4,7 +4,6 @@ - diff --git a/HeterogeneousCore/SonicTriton/interface/TritonService.h b/HeterogeneousCore/SonicTriton/interface/TritonService.h index 7284982b260f3..245ac6e295fee 100644 --- a/HeterogeneousCore/SonicTriton/interface/TritonService.h +++ b/HeterogeneousCore/SonicTriton/interface/TritonService.h @@ -3,8 +3,6 @@ #include "FWCore/ParameterSet/interface/ParameterSet.h" -#include "tbb/concurrent_unordered_map.h" - #include #include #include @@ -18,6 +16,7 @@ namespace edm { class ConfigurationDescriptions; class PathsAndConsumesOfModulesBase; class ProcessContext; + class ModuleDescription; } // namespace edm class TritonService { @@ -63,6 +62,14 @@ class TritonService { //members std::string path; std::unordered_set servers; + std::unordered_set modules; + }; + struct Module { + //currently assumes that a module can only have one associated model + Module(const std::string& model_) : model(model_) {} + + //members + std::string model; }; TritonService(const edm::ParameterSet& pset, edm::ActivityRegistry& areg); @@ -75,16 +82,21 @@ class TritonService { static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); private: + void preModuleConstruction(edm::ModuleDescription const&); + void postModuleConstruction(edm::ModuleDescription const&); + void preModuleDestruction(edm::ModuleDescription const&); void preBeginJob(edm::PathsAndConsumesOfModulesBase const&, edm::ProcessContext const&); bool verbose_; FallbackOpts fallbackOpts_; + unsigned currentModuleId_; + bool allowAddModel_; bool startedFallback_; - //concurrent data type is used because addModel() might be called by multiple threads - tbb::concurrent_unordered_map unservedModels_; - //this is a lazy and inefficient many:many map + std::unordered_map unservedModels_; + //this represents a many:many:many map std::unordered_map servers_; std::unordered_map models_; + std::unordered_map modules_; }; #endif diff --git a/HeterogeneousCore/SonicTriton/src/TritonService.cc b/HeterogeneousCore/SonicTriton/src/TritonService.cc index 5c7e564106a00..70fd6dd1b3ebf 100644 --- a/HeterogeneousCore/SonicTriton/src/TritonService.cc +++ b/HeterogeneousCore/SonicTriton/src/TritonService.cc @@ -1,6 +1,7 @@ #include "HeterogeneousCore/SonicTriton/interface/TritonService.h" #include "HeterogeneousCore/SonicTriton/interface/triton_utils.h" +#include "DataFormats/Provenance/interface/ModuleDescription.h" #include "FWCore/MessageLogger/interface/MessageLogger.h" #include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" #include "FWCore/ParameterSet/interface/ParameterSetDescription.h" @@ -26,7 +27,13 @@ const std::string TritonService::Server::fallbackUrl{"0.0.0.0:8001"}; TritonService::TritonService(const edm::ParameterSet& pset, edm::ActivityRegistry& areg) : verbose_(pset.getUntrackedParameter("verbose")), fallbackOpts_(pset.getParameterSet("fallback")), + currentModuleId_(0), + allowAddModel_(false), startedFallback_(false) { + //module construction is assumed to be serial (correct at the time this code was written) + areg.watchPreModuleConstruction(this, &TritonService::preModuleConstruction); + areg.watchPostModuleConstruction(this, &TritonService::postModuleConstruction); + areg.watchPreModuleDestruction(this, &TritonService::preModuleDestruction); //fallback server will be launched (if needed) before beginJob areg.watchPreBeginJob(this, &TritonService::preBeginJob); @@ -80,11 +87,45 @@ TritonService::TritonService(const edm::ParameterSet& pset, edm::ActivityRegistr edm::LogInfo("TritonService") << msg; } +void TritonService::preModuleConstruction(edm::ModuleDescription const& desc) { + currentModuleId_ = desc.id(); + allowAddModel_ = true; +} + void TritonService::addModel(const std::string& modelName, const std::string& path) { + //should only be called in module constructors + if (!allowAddModel_) + throw cms::Exception("DisallowedAddModel") << "Attempt to call addModel() outside of module constructors"; //if model is not in the list, then no specified server provides it auto mit = models_.find(modelName); - if (mit == models_.end()) - unservedModels_.emplace(modelName, path); + if (mit == models_.end()) { + auto& modelInfo(unservedModels_.emplace(modelName, path).first->second); + modelInfo.modules.insert(currentModuleId_); + //only keep track of modules that need unserved models + modules_.emplace(currentModuleId_, modelName); + } +} + +void TritonService::postModuleConstruction(edm::ModuleDescription const& desc) { allowAddModel_ = false; } + +void TritonService::preModuleDestruction(edm::ModuleDescription const& desc) { + //remove destructed modules from unserved list + if (unservedModels_.empty()) + return; + auto id = desc.id(); + auto oit = modules_.find(id); + if (oit != modules_.end()) { + const auto& moduleInfo(oit->second); + auto mit = unservedModels_.find(moduleInfo.model); + if (mit != unservedModels_.end()) { + auto& modelInfo(mit->second); + modelInfo.modules.erase(id); + //remove a model if it is no longer needed by any modules + if (modelInfo.modules.empty()) + unservedModels_.erase(mit); + } + modules_.erase(oit); + } } //second return value is only true if fallback CPU server is being used From a2addb9833d4dff21f2fa4276988ec942a3ddaa6 Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Tue, 29 Dec 2020 23:13:09 -0600 Subject: [PATCH 15/31] use popen and uuid --- .../SonicTriton/src/TritonService.cc | 37 +++++++++++++++---- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/HeterogeneousCore/SonicTriton/src/TritonService.cc b/HeterogeneousCore/SonicTriton/src/TritonService.cc index 70fd6dd1b3ebf..f82f805040cdb 100644 --- a/HeterogeneousCore/SonicTriton/src/TritonService.cc +++ b/HeterogeneousCore/SonicTriton/src/TritonService.cc @@ -24,6 +24,27 @@ namespace nic = ni::client; const std::string TritonService::Server::fallbackName{"fallback"}; const std::string TritonService::Server::fallbackUrl{"0.0.0.0:8001"}; +namespace { + std::pair execSys(const std::string& cmd) { + //redirect stderr to stdout + auto pipe = popen((cmd + " 2>&1").c_str(), "r"); + if (!pipe) + throw cms::Exception("SystemError") << "popen() failed for command: " << cmd; + + //extract output + constexpr static unsigned buffSize = 128; + std::array buffer; + std::string result; + while (!feof(pipe)) { + if (fgets(buffer.data(), buffSize, pipe)) + result += buffer.data(); + } + + int rv = pclose(pipe); + return std::make_pair(result, rv); + } +} // namespace + TritonService::TritonService(const edm::ParameterSet& pset, edm::ActivityRegistry& areg) : verbose_(pset.getUntrackedParameter("verbose")), fallbackOpts_(pset.getParameterSet("fallback")), @@ -195,7 +216,9 @@ void TritonService::preBeginJob(edm::PathsAndConsumesOfModulesBase const&, edm:: //get a random temporary directory if none specified if (fallbackOpts_.tempDir.empty()) { - auto tmp_dir_path{std::filesystem::temp_directory_path() /= std::tmpnam(nullptr)}; + auto [uuid, rv] = execSys("uuidgen | sed 's/-//g'"); + uuid.pop_back(); + auto tmp_dir_path{std::filesystem::temp_directory_path() /= uuid}; fallbackOpts_.tempDir = tmp_dir_path.string(); } //special case ".": use script default (temp dir = .$instanceName) @@ -206,12 +229,12 @@ void TritonService::preBeginJob(edm::PathsAndConsumesOfModulesBase const&, edm:: if (verbose_) edm::LogInfo("TritonService") << command; - else - command += " >>/dev/null 2>>/dev/null"; //mark as started before executing in case of ctrl+c while command is running startedFallback_ = true; - int rv = std::system(command.c_str()); + const auto& [output, rv] = execSys(command); + if (verbose_ or rv != 0) + edm::LogInfo("TritonService") << output; if (rv != 0) throw cms::Exception("FallbackFailed") << "Starting the fallback server failed with exit code " << rv; } @@ -236,10 +259,10 @@ TritonService::~TritonService() { if (verbose_) edm::LogInfo("TritonService") << command; - else - command += " >>/dev/null 2>>/dev/null"; - int rv = std::system(command.c_str()); + const auto& [output, rv] = execSys(command); + if (verbose_ or rv != 0) + edm::LogInfo("TritonService") << output; if (rv != 0) edm::LogError("FallbackFailed") << "Stopping the fallback server failed with exit code " << rv; } From 4f6345d165bb6a83add93b5417cbe25cba610cd5 Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Wed, 30 Dec 2020 11:34:20 -0600 Subject: [PATCH 16/31] rename triton -> cmsTriton --- HeterogeneousCore/SonicTriton/README.md | 4 ++-- HeterogeneousCore/SonicTriton/scripts/{triton => cmsTriton} | 2 +- HeterogeneousCore/SonicTriton/src/TritonService.cc | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) rename HeterogeneousCore/SonicTriton/scripts/{triton => cmsTriton} (99%) diff --git a/HeterogeneousCore/SonicTriton/README.md b/HeterogeneousCore/SonicTriton/README.md index c6f6ace28e0c8..50f9b1865713e 100644 --- a/HeterogeneousCore/SonicTriton/README.md +++ b/HeterogeneousCore/SonicTriton/README.md @@ -82,7 +82,7 @@ In a SONIC Triton producer, the basic flow should follow this pattern: ## Services -A script [`triton`](./scripts/triton) is provided to launch and manage local servers. +A script [`cmsTriton`](./scripts/cmsTriton) is provided to launch and manage local servers. The script has two operations (`start` and `stop`) and the following options: * `-c`: don't cleanup temporary dir (for debugging) * `-D`: dry run: print container commands rather than executing them @@ -111,7 +111,7 @@ The `-r` (retry) flag exists to work around this issue. A central `TritonService` is provided to keep track of all available servers and which models they can serve. The servers will automatically be assigned to clients at startup. -If some models are not served by any server, the `TritonService` can launch a fallback server using the `triton` script described above. +If some models are not served by any server, the `TritonService` can launch a fallback server using the `cmsTriton` script described above. If the process modifiers `enableSonicTriton` or `allSonicTriton` are activated, the fallback server will launch automatically if needed and will use a local GPU if one is available. If the fallback server uses CPU, clients that use the fallback server will automatically be set to `Sync` mode. diff --git a/HeterogeneousCore/SonicTriton/scripts/triton b/HeterogeneousCore/SonicTriton/scripts/cmsTriton similarity index 99% rename from HeterogeneousCore/SonicTriton/scripts/triton rename to HeterogeneousCore/SonicTriton/scripts/cmsTriton index b82fe0be2b994..50da585eb313a 100755 --- a/HeterogeneousCore/SonicTriton/scripts/triton +++ b/HeterogeneousCore/SonicTriton/scripts/cmsTriton @@ -16,7 +16,7 @@ DRYRUN="" usage() { ECHO="echo -e" - $ECHO "triton [options] [start|stop]" + $ECHO "cmsTriton [options] [start|stop]" $ECHO $ECHO "Options:" $ECHO "-c \t don't cleanup temporary dir (for debugging)" diff --git a/HeterogeneousCore/SonicTriton/src/TritonService.cc b/HeterogeneousCore/SonicTriton/src/TritonService.cc index f82f805040cdb..e091e6a1353bf 100644 --- a/HeterogeneousCore/SonicTriton/src/TritonService.cc +++ b/HeterogeneousCore/SonicTriton/src/TritonService.cc @@ -195,7 +195,7 @@ void TritonService::preBeginJob(edm::PathsAndConsumesOfModulesBase const&, edm:: edm::LogInfo("TritonService") << msg; //assemble server start command - std::string command("triton"); + std::string command("cmsTriton"); if (fallbackOpts_.verbose) command += " -v"; if (fallbackOpts_.useDocker) @@ -244,7 +244,7 @@ TritonService::~TritonService() { return; //assemble server stop command - std::string command("triton"); + std::string command("cmsTriton"); if (fallbackOpts_.verbose) command += " -v"; From 7a73dca850572d54eb738cc59abd374621b2cf65 Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Mon, 11 Jan 2021 14:32:56 -0600 Subject: [PATCH 17/31] automate fallback server shutdown, handle more error codes --- HeterogeneousCore/SonicTriton/README.md | 3 +- .../SonicTriton/interface/TritonService.h | 4 +- .../SonicTriton/scripts/cmsTriton | 67 +++++++++++++------ .../SonicTriton/src/TritonService.cc | 43 ++++-------- 4 files changed, 64 insertions(+), 53 deletions(-) diff --git a/HeterogeneousCore/SonicTriton/README.md b/HeterogeneousCore/SonicTriton/README.md index 50f9b1865713e..49da6afdb86b7 100644 --- a/HeterogeneousCore/SonicTriton/README.md +++ b/HeterogeneousCore/SonicTriton/README.md @@ -27,7 +27,7 @@ The model information from the server can be printed by enabling `verbose` outpu * `modelName`: name of model with which to perform inference * `modelVersion`: version number of model (default: -1, use latest available version on server) * `modelConfigPath`: path to `config.pbtxt` file for the model (using `edm::FileInPath`) -* `preferredServer`: name of preferred server (see [Services](#services) below) +* `preferredServer`: name of preferred server, for testing (see [Services](#services) below) * `timeout`: maximum allowed time for a request * `outputs`: optional, specify which output(s) the server should send @@ -92,6 +92,7 @@ The script has two operations (`start` and `stop`) and the following options: * `-M [dir]`: model repository (can be given more than once) * `-m [dir]`: specific model directory (can be given more than one) * `-n [name]`: name of container instance, also used for hidden temporary dir (default: triton_server_instance) +* `-p [pid]`: PID of parent process to ensure shutdown (used internally by `cmsRun`) * `-r [num]`: number of retries when starting container (default: 3) * `-t [dir]`: non-default hidden temporary dir * `-v`: (verbose) start: activate server debugging info; stop: keep server logs diff --git a/HeterogeneousCore/SonicTriton/interface/TritonService.h b/HeterogeneousCore/SonicTriton/interface/TritonService.h index 245ac6e295fee..67b293f776c32 100644 --- a/HeterogeneousCore/SonicTriton/interface/TritonService.h +++ b/HeterogeneousCore/SonicTriton/interface/TritonService.h @@ -25,6 +25,7 @@ class TritonService { struct FallbackOpts { FallbackOpts(const edm::ParameterSet& pset) : enable(pset.getUntrackedParameter("enable")), + debug(pset.getUntrackedParameter("debug")), verbose(pset.getUntrackedParameter("verbose")), useDocker(pset.getUntrackedParameter("useDocker")), useGPU(pset.getUntrackedParameter("useGPU")), @@ -34,6 +35,7 @@ class TritonService { tempDir(pset.getUntrackedParameter("tempDir")) {} bool enable; + bool debug; bool verbose; bool useDocker; bool useGPU; @@ -73,7 +75,7 @@ class TritonService { }; TritonService(const edm::ParameterSet& pset, edm::ActivityRegistry& areg); - ~TritonService(); + ~TritonService() = default; //accessors void addModel(const std::string& modelName, const std::string& path); diff --git a/HeterogeneousCore/SonicTriton/scripts/cmsTriton b/HeterogeneousCore/SonicTriton/scripts/cmsTriton index 50da585eb313a..3bac5eeb5d384 100755 --- a/HeterogeneousCore/SonicTriton/scripts/cmsTriton +++ b/HeterogeneousCore/SonicTriton/scripts/cmsTriton @@ -13,6 +13,7 @@ FORCE="" CLEANUP=true TMPDIR="" DRYRUN="" +PARENTPID="" usage() { ECHO="echo -e" @@ -27,6 +28,7 @@ usage() { $ECHO "-M [dir] \t model repository (can be given more than once)" $ECHO "-m [dir] \t specific model directory (can be given more than one)" $ECHO "-n [name] \t name of container instance, also used for default hidden temporary dir (default: ${SERVER})" + $ECHO "-p [pid] \t PID of parent process to ensure shutdown" $ECHO "-r [num] \t number of retries when starting container (default: ${RETRIES})" $ECHO "-t [dir] \t non-default hidden temporary dir" $ECHO "-v \t (verbose) start: activate server debugging info; stop: keep server logs" @@ -45,7 +47,7 @@ if [ -e /run/shm ]; then SHM=/run/shm fi -while getopts "cDdfgM:m:n:r:t:vw:h" opt; do +while getopts "cDdfgM:m:n:p:r:t:vw:h" opt; do case "$opt" in c) CLEANUP="" ;; @@ -63,6 +65,8 @@ while getopts "cDdfgM:m:n:r:t:vw:h" opt; do ;; n) SERVER="$OPTARG" ;; + p) PARENTPID="$OPTARG" + ;; r) RETRIES="$OPTARG" ;; t) TMPDIR="$OPTARG" @@ -97,6 +101,7 @@ fi DOCKER="sudo docker" IMAGE=fastml/triton-torchgeo:20.09-py3-geometric LOG="log_${SERVER}.log" +STOPLOG="log_stop_${SERVER}.log" LIB=lib STARTED_INDICATOR="Started GRPCInferenceService" EXTRA="" @@ -155,9 +160,9 @@ start_singularity(){ } stop_docker(){ - # keep log (outside of tmp dir) + # keep log if [ -z "$DRYRUN" ]; then - if [ -n "$VERBOSE" ]; then $DOCKER logs ${SERVER} >& "${TOPDIR}/${LOG}"; fi + if [ -n "$VERBOSE" ]; then $DOCKER logs ${SERVER} >& "$LOG"; fi fi $DRYRUN $DOCKER stop ${SERVER} @@ -166,11 +171,6 @@ stop_docker(){ stop_singularity(){ $DRYRUN singularity instance stop ${SERVER} - - # move log out of tmp dir - if [ -z "$DRYRUN" ]; then - if [ -n "$VERBOSE" ]; then mv "$LOG" "$TOPDIR"; fi - fi } test_docker(){ @@ -191,8 +191,7 @@ wait_server(){ while ! $WAIT_COND >& /dev/null; do if [ "$COUNT" -gt "$WTIME" ]; then echo "timed out waiting for server to start" - VERBOSE=true $STOP_FN - cleanup + auto_stop true exit 1 else COUNT=$(($COUNT + 1)) @@ -201,14 +200,6 @@ wait_server(){ done echo "server is ready!" - exit 0 -} - -cleanup(){ - if [ -n "$CLEANUP" ]; then - $DRYRUN cd "$TOPDIR" - $DRYRUN rm -rf "$TMPDIR" - fi } list_models(){ @@ -228,6 +219,35 @@ list_models(){ read -a REPOS <<< "$(printf "%s\n" "${REPOS[@]}" | sort -u | tr '\n' ' ')" } +auto_stop(){ + # allow enabling verbosity here even if disabled at top level + # but otherwise use top-level setting + if [ -n "$1" ]; then VERBOSE="$1"; fi + PARENTPID="$2" + + if [ -n "$PARENTPID" ]; then + while kill -0 $PARENTPID >& /dev/null; do + sleep 1 + done + fi + $STOP_FN + + # move logs out of tmp dir + if [ -z "$DRYRUN" ]; then + if [ -n "$VERBOSE" ]; then + mv "$LOG" "$TOPDIR" + if [ -f "$STOPLOG" ]; then + mv "$STOPLOG" "$TOPDIR" + fi + fi + fi + + if [ -n "$CLEANUP" ]; then + $DRYRUN cd "$TOPDIR" + $DRYRUN rm -rf "$TMPDIR" + fi +} + if [ -n "$USEDOCKER" ]; then if [ -n "$GPU" ]; then EXTRA="--gpus all" @@ -249,8 +269,7 @@ fi if [ "$OP" == start ]; then # handle cleaning up if [ -n "$FORCE" ]; then - $STOP_FN - cleanup + auto_stop elif [ -d "$TMPDIR" ]; then echo "Error: this container may already exist (override with -f)" exit 1 @@ -283,6 +302,11 @@ if [ "$OP" == start ]; then exit "$START_EXIT" fi wait_server + + # if parent PID is provided, automatically stop server when finished + if [ -n "$PARENTPID" ]; then + auto_stop "" "$PARENTPID" >& "$STOPLOG" & + fi else # check for tmp dir if [ -d "$TMPDIR" ]; then @@ -292,6 +316,5 @@ else exit 1 fi - $STOP_FN - cleanup + auto_stop fi diff --git a/HeterogeneousCore/SonicTriton/src/TritonService.cc b/HeterogeneousCore/SonicTriton/src/TritonService.cc index e091e6a1353bf..49ef9278a475c 100644 --- a/HeterogeneousCore/SonicTriton/src/TritonService.cc +++ b/HeterogeneousCore/SonicTriton/src/TritonService.cc @@ -17,6 +17,7 @@ #include #include #include +#include namespace ni = nvidia::inferenceserver; namespace nic = ni::client; @@ -28,8 +29,9 @@ namespace { std::pair execSys(const std::string& cmd) { //redirect stderr to stdout auto pipe = popen((cmd + " 2>&1").c_str(), "r"); + int thisErrno = errno; if (!pipe) - throw cms::Exception("SystemError") << "popen() failed for command: " << cmd; + throw cms::Exception("SystemError") << "popen() failed with errno " << thisErrno << " for command: " << cmd; //extract output constexpr static unsigned buffSize = 128; @@ -38,6 +40,11 @@ namespace { while (!feof(pipe)) { if (fgets(buffer.data(), buffSize, pipe)) result += buffer.data(); + else { + thisErrno = ferror(pipe); + if (thisErrno) + throw cms::Exception("SystemError") << "failed reading command output with errno " << thisErrno; + } } int rv = pclose(pipe); @@ -196,6 +203,9 @@ void TritonService::preBeginJob(edm::PathsAndConsumesOfModulesBase const&, edm:: //assemble server start command std::string command("cmsTriton"); + command += " -p " + std::to_string(::getpid()); + if (fallbackOpts_.debug) + command += " -c"; if (fallbackOpts_.verbose) command += " -v"; if (fallbackOpts_.useDocker) @@ -227,6 +237,8 @@ void TritonService::preBeginJob(edm::PathsAndConsumesOfModulesBase const&, edm:: command += " start"; + if (fallbackOpts_.debug) + edm::LogInfo("TritonService") << "Fallback server temporary directory: " << fallbackOpts_.tempDir; if (verbose_) edm::LogInfo("TritonService") << command; @@ -239,34 +251,6 @@ void TritonService::preBeginJob(edm::PathsAndConsumesOfModulesBase const&, edm:: throw cms::Exception("FallbackFailed") << "Starting the fallback server failed with exit code " << rv; } -TritonService::~TritonService() { - if (!startedFallback_) - return; - - //assemble server stop command - std::string command("cmsTriton"); - - if (fallbackOpts_.verbose) - command += " -v"; - if (fallbackOpts_.useDocker) - command += " -d"; - if (!fallbackOpts_.instanceName.empty()) - command += " -n " + fallbackOpts_.instanceName; - if (fallbackOpts_.tempDir != ".") - command += " -t " + fallbackOpts_.tempDir; - - command += " stop"; - - if (verbose_) - edm::LogInfo("TritonService") << command; - - const auto& [output, rv] = execSys(command); - if (verbose_ or rv != 0) - edm::LogInfo("TritonService") << output; - if (rv != 0) - edm::LogError("FallbackFailed") << "Stopping the fallback server failed with exit code " << rv; -} - void TritonService::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { edm::ParameterSetDescription desc; desc.addUntracked("verbose", false); @@ -280,6 +264,7 @@ void TritonService::fillDescriptions(edm::ConfigurationDescriptions& description edm::ParameterSetDescription fallbackDesc; fallbackDesc.addUntracked("enable", false); + fallbackDesc.addUntracked("debug", false); fallbackDesc.addUntracked("verbose", false); fallbackDesc.addUntracked("useDocker", false); fallbackDesc.addUntracked("useGPU", false); From d54093011877b15c6c017c353cfee588c61e4089 Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Tue, 12 Jan 2021 11:15:51 -0600 Subject: [PATCH 18/31] simplify and improve shutdown automation --- HeterogeneousCore/SonicTriton/README.md | 2 +- HeterogeneousCore/SonicTriton/scripts/cmsTriton | 6 +++--- HeterogeneousCore/SonicTriton/src/TritonService.cc | 4 +--- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/HeterogeneousCore/SonicTriton/README.md b/HeterogeneousCore/SonicTriton/README.md index 49da6afdb86b7..9fd9876a0f18c 100644 --- a/HeterogeneousCore/SonicTriton/README.md +++ b/HeterogeneousCore/SonicTriton/README.md @@ -92,7 +92,7 @@ The script has two operations (`start` and `stop`) and the following options: * `-M [dir]`: model repository (can be given more than once) * `-m [dir]`: specific model directory (can be given more than one) * `-n [name]`: name of container instance, also used for hidden temporary dir (default: triton_server_instance) -* `-p [pid]`: PID of parent process to ensure shutdown (used internally by `cmsRun`) +* `-p`: automatically shut down server when parent process ends * `-r [num]`: number of retries when starting container (default: 3) * `-t [dir]`: non-default hidden temporary dir * `-v`: (verbose) start: activate server debugging info; stop: keep server logs diff --git a/HeterogeneousCore/SonicTriton/scripts/cmsTriton b/HeterogeneousCore/SonicTriton/scripts/cmsTriton index 3bac5eeb5d384..143bb7381eb59 100755 --- a/HeterogeneousCore/SonicTriton/scripts/cmsTriton +++ b/HeterogeneousCore/SonicTriton/scripts/cmsTriton @@ -28,7 +28,7 @@ usage() { $ECHO "-M [dir] \t model repository (can be given more than once)" $ECHO "-m [dir] \t specific model directory (can be given more than one)" $ECHO "-n [name] \t name of container instance, also used for default hidden temporary dir (default: ${SERVER})" - $ECHO "-p [pid] \t PID of parent process to ensure shutdown" + $ECHO "-p \t automatically shut down server when parent process ends" $ECHO "-r [num] \t number of retries when starting container (default: ${RETRIES})" $ECHO "-t [dir] \t non-default hidden temporary dir" $ECHO "-v \t (verbose) start: activate server debugging info; stop: keep server logs" @@ -47,7 +47,7 @@ if [ -e /run/shm ]; then SHM=/run/shm fi -while getopts "cDdfgM:m:n:p:r:t:vw:h" opt; do +while getopts "cDdfgM:m:n:pr:t:vw:h" opt; do case "$opt" in c) CLEANUP="" ;; @@ -65,7 +65,7 @@ while getopts "cDdfgM:m:n:p:r:t:vw:h" opt; do ;; n) SERVER="$OPTARG" ;; - p) PARENTPID="$OPTARG" + p) PARENTPID="$PPID" ;; r) RETRIES="$OPTARG" ;; diff --git a/HeterogeneousCore/SonicTriton/src/TritonService.cc b/HeterogeneousCore/SonicTriton/src/TritonService.cc index 49ef9278a475c..cf8332250905f 100644 --- a/HeterogeneousCore/SonicTriton/src/TritonService.cc +++ b/HeterogeneousCore/SonicTriton/src/TritonService.cc @@ -17,7 +17,6 @@ #include #include #include -#include namespace ni = nvidia::inferenceserver; namespace nic = ni::client; @@ -202,8 +201,7 @@ void TritonService::preBeginJob(edm::PathsAndConsumesOfModulesBase const&, edm:: edm::LogInfo("TritonService") << msg; //assemble server start command - std::string command("cmsTriton"); - command += " -p " + std::to_string(::getpid()); + std::string command("cmsTriton -p"); if (fallbackOpts_.debug) command += " -c"; if (fallbackOpts_.verbose) From 1a3abbc6122375f939e007198a17cdfcb8e2d6ab Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Wed, 13 Jan 2021 09:40:15 -0600 Subject: [PATCH 19/31] assume gat model always available --- HeterogeneousCore/SonicTriton/test/README.md | 2 +- .../SonicTriton/test/fetch_model.sh | 33 ------------------- .../SonicTriton/test/unittest.sh | 10 +----- 3 files changed, 2 insertions(+), 43 deletions(-) diff --git a/HeterogeneousCore/SonicTriton/test/README.md b/HeterogeneousCore/SonicTriton/test/README.md index a2f4b7c65d554..caf917531f29b 100644 --- a/HeterogeneousCore/SonicTriton/test/README.md +++ b/HeterogeneousCore/SonicTriton/test/README.md @@ -3,7 +3,7 @@ Test modules `TritonImageProducer` and `TritonGraphProducer` (`TritonGraphFilter`, `TritonGraphAnalyzer`) are available. They generate arbitrary inputs for inference (with ResNet50 or Graph Attention Network, respectively) and print the resulting output. -First, the relevant data should be downloaded from Nvidia: +First, the relevant data for ResNet50 should be downloaded from Nvidia: ``` ./fetch_model.sh ``` diff --git a/HeterogeneousCore/SonicTriton/test/fetch_model.sh b/HeterogeneousCore/SonicTriton/test/fetch_model.sh index bc469886e215c..0f36460b936ce 100755 --- a/HeterogeneousCore/SonicTriton/test/fetch_model.sh +++ b/HeterogeneousCore/SonicTriton/test/fetch_model.sh @@ -19,36 +19,3 @@ mkdir -p 1 curl -o 1/model.netdef http://download.caffe2.ai.s3.amazonaws.com/models/resnet50/predict_net.pb curl -o 1/init_model.netdef http://download.caffe2.ai.s3.amazonaws.com/models/resnet50/init_net.pb - -GAT_DIR=${TEST_DIR}/../data/models/gat_test -cd $TEST_DIR -mkdir -p $GAT_DIR -cd $GAT_DIR - -cat << EOF > config.pbtxt -name: "gat_test" -platform: "pytorch_libtorch" -max_batch_size: 0 -input [ - { - name: "x__0" - data_type: TYPE_FP32 - dims: [ -1, 1433 ] - }, - { - name: "edgeindex__1" - data_type: TYPE_INT64 - dims: [ 2, -1 ] - } -] -output [ - { - name: "logits__0" - data_type: TYPE_FP32 - dims: [ -1, 7 ] - } -] -EOF - -mkdir -p 1 -cp /cvmfs/unpacked.cern.ch/registry.hub.docker.com/fastml/triton-torchgeo:20.09-py3-geometric/torch_geometric/examples/model.pt 1/model.pt diff --git a/HeterogeneousCore/SonicTriton/test/unittest.sh b/HeterogeneousCore/SonicTriton/test/unittest.sh index fc2a704adf08b..a277ba39ec7ad 100755 --- a/HeterogeneousCore/SonicTriton/test/unittest.sh +++ b/HeterogeneousCore/SonicTriton/test/unittest.sh @@ -4,8 +4,7 @@ LOCALTOP=$1 # the test is not possible if: # 1. avx instructions not supported (needed for singularity on CPU) -# 2. the model files cannot be found -# 3. singularity not found or not usable +# 2. singularity not found or not usable # so just return true in those cases if grep -q avx /proc/cpuinfo; then @@ -15,13 +14,6 @@ else exit 0 fi -if [ -n "$(find $(echo $CMSSW_SEARCH_PATH | tr ':' ' ') -path "*/HeterogeneousCore/SonicTriton/data/models/gat_test/config.pbtxt" 2>/dev/null)" ]; then - echo "has model file" -else - echo "missing model file" - exit 0 -fi - if type singularity >& /dev/null; then echo "has singularity" else From 18222baefe7428d47bca8708488271056acaca6f Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Wed, 13 Jan 2021 16:42:47 -0600 Subject: [PATCH 20/31] port checking to enable multiple servers per node, bug fixes (handle ctrl+c, randomize instance name) --- HeterogeneousCore/SonicTriton/README.md | 1 + .../SonicTriton/interface/TritonService.h | 2 +- .../SonicTriton/scripts/cmsTriton | 68 ++++++++++++++++--- .../SonicTriton/src/TritonService.cc | 32 +++++++-- HeterogeneousCore/SonicTriton/test/README.md | 1 - 5 files changed, 87 insertions(+), 17 deletions(-) diff --git a/HeterogeneousCore/SonicTriton/README.md b/HeterogeneousCore/SonicTriton/README.md index 9fd9876a0f18c..3d0613df5fbbf 100644 --- a/HeterogeneousCore/SonicTriton/README.md +++ b/HeterogeneousCore/SonicTriton/README.md @@ -92,6 +92,7 @@ The script has two operations (`start` and `stop`) and the following options: * `-M [dir]`: model repository (can be given more than once) * `-m [dir]`: specific model directory (can be given more than one) * `-n [name]`: name of container instance, also used for hidden temporary dir (default: triton_server_instance) +* `-P [port]`: base port number for services (-1: automatically find an unused port range) (default: 8000) * `-p`: automatically shut down server when parent process ends * `-r [num]`: number of retries when starting container (default: 3) * `-t [dir]`: non-default hidden temporary dir diff --git a/HeterogeneousCore/SonicTriton/interface/TritonService.h b/HeterogeneousCore/SonicTriton/interface/TritonService.h index 67b293f776c32..24232b6894ae1 100644 --- a/HeterogeneousCore/SonicTriton/interface/TritonService.h +++ b/HeterogeneousCore/SonicTriton/interface/TritonService.h @@ -56,7 +56,7 @@ class TritonService { bool isFallback; std::unordered_set models; static const std::string fallbackName; - static const std::string fallbackUrl; + static const std::string fallbackAddress; }; struct Model { Model(const std::string& path_ = "") : path(path_) {} diff --git a/HeterogeneousCore/SonicTriton/scripts/cmsTriton b/HeterogeneousCore/SonicTriton/scripts/cmsTriton index 143bb7381eb59..867525061f620 100755 --- a/HeterogeneousCore/SonicTriton/scripts/cmsTriton +++ b/HeterogeneousCore/SonicTriton/scripts/cmsTriton @@ -14,6 +14,9 @@ CLEANUP=true TMPDIR="" DRYRUN="" PARENTPID="" +BASEPORT=8000 +AUTOPORT="" +NPORTS=3 usage() { ECHO="echo -e" @@ -28,6 +31,7 @@ usage() { $ECHO "-M [dir] \t model repository (can be given more than once)" $ECHO "-m [dir] \t specific model directory (can be given more than one)" $ECHO "-n [name] \t name of container instance, also used for default hidden temporary dir (default: ${SERVER})" + $ECHO "-P [port] \t base port number for services (-1: automatically find an unused port range) (default: ${BASEPORT})" $ECHO "-p \t automatically shut down server when parent process ends" $ECHO "-r [num] \t number of retries when starting container (default: ${RETRIES})" $ECHO "-t [dir] \t non-default hidden temporary dir" @@ -47,7 +51,7 @@ if [ -e /run/shm ]; then SHM=/run/shm fi -while getopts "cDdfgM:m:n:pr:t:vw:h" opt; do +while getopts "cDdfgM:m:n:P:pr:t:vw:h" opt; do case "$opt" in c) CLEANUP="" ;; @@ -65,6 +69,8 @@ while getopts "cDdfgM:m:n:pr:t:vw:h" opt; do ;; n) SERVER="$OPTARG" ;; + P) if [ "$OPTARG" -eq -1 ]; then AUTOPORT=true; else BASEPORT="$OPTARG"; fi + ;; p) PARENTPID="$PPID" ;; r) RETRIES="$OPTARG" @@ -91,6 +97,7 @@ if [ "$RETRIES" -le 0 ]; then RETRIES=1 fi + TOPDIR=$PWD if [ -z "$TMPDIR" ]; then TMPDIR="${TOPDIR}/.${SERVER}" @@ -106,6 +113,38 @@ LIB=lib STARTED_INDICATOR="Started GRPCInferenceService" EXTRA="" +compute_ports(){ + # compute derived port numbers + export HTTPPORT=$BASEPORT + export GRPCPORT=$((BASEPORT+1)) + export METRPORT=$((BASEPORT+2)) +} + +check_port(){ + # success on this command means the port is in use + if 2>/dev/null >"/dev/tcp/0.0.0.0/$1"; then + return 1 + else + return 0 + fi +} + +check_ports(){ + if check_port $HTTPPORT && check_port $GRPCPORT && check_port $METRPORT; then + return 0 + else + return 1 + fi +} + +find_ports(){ + while ! check_ports; do + BASEPORT=$((BASEPORT+NPORTS)) + compute_ports + done + echo "CMS_TRITON_GRPC_PORT: $GRPCPORT" +} + start_docker(){ # mount all model repositories MOUNTARGS="" @@ -117,8 +156,8 @@ start_docker(){ $DRYRUN $DOCKER run -d --name ${SERVER} \ --shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864 \ - -p8000:8000 -p8001:8001 -p8002:8002 $EXTRA $MOUNTARGS \ - ${IMAGE} tritonserver $REPOARGS $VERBOSE + -p${HTTPPORT}:${HTTPPORT} -p${GRPCPORT}:${GRPCPORT} -p${METRPORT}:${METRPORT} $EXTRA $MOUNTARGS \ + ${IMAGE} tritonserver $PORTARGS $REPOARGS $VERBOSE } start_singularity(){ @@ -155,7 +194,7 @@ start_singularity(){ REDIR=/dev/stdout fi $DRYRUN singularity run instance://${SERVER} \ - tritonserver $REPOARGS $VERBOSE >& ${REDIR} & + tritonserver $PORTARGS $REPOARGS $VERBOSE >& ${REDIR} & [ -z "$DRYRUN" ] || wait } @@ -275,6 +314,16 @@ if [ "$OP" == start ]; then exit 1 fi + # handle ports + compute_ports + if [ -n "$AUTOPORT" ]; then + find_ports + elif ! check_ports; then + echo "Error: requested port in use" + exit 1 + fi + PORTARGS="--http-port=${HTTPPORT} --grpc-port=${GRPCPORT} --metrics-port=${METRPORT}" + list_models # make sure everything happens in tmp dir @@ -286,6 +335,12 @@ if [ "$OP" == start ]; then fi $DRYRUN cd "$TMPDIR" + # if parent PID is provided, automatically stop server when finished + # do this before actually trying to start the server in case of ctrl+c + if [ -n "$PARENTPID" ]; then + auto_stop "" "$PARENTPID" >& "$STOPLOG" & + fi + START_EXIT="" for ((counter=0; counter < ${RETRIES}; counter++)); do $START_FN @@ -302,11 +357,6 @@ if [ "$OP" == start ]; then exit "$START_EXIT" fi wait_server - - # if parent PID is provided, automatically stop server when finished - if [ -n "$PARENTPID" ]; then - auto_stop "" "$PARENTPID" >& "$STOPLOG" & - fi else # check for tmp dir if [ -d "$TMPDIR" ]; then diff --git a/HeterogeneousCore/SonicTriton/src/TritonService.cc b/HeterogeneousCore/SonicTriton/src/TritonService.cc index cf8332250905f..6aedac7df4fe0 100644 --- a/HeterogeneousCore/SonicTriton/src/TritonService.cc +++ b/HeterogeneousCore/SonicTriton/src/TritonService.cc @@ -22,7 +22,7 @@ namespace ni = nvidia::inferenceserver; namespace nic = ni::client; const std::string TritonService::Server::fallbackName{"fallback"}; -const std::string TritonService::Server::fallbackUrl{"0.0.0.0:8001"}; +const std::string TritonService::Server::fallbackAddress{"0.0.0.0"}; namespace { std::pair execSys(const std::string& cmd) { @@ -49,6 +49,12 @@ namespace { int rv = pclose(pipe); return std::make_pair(result, rv); } + + std::string getUuid() { + auto [uuid, rv] = execSys("uuidgen | sed 's/-//g'"); + uuid.pop_back(); + return uuid; + } } // namespace TritonService::TritonService(const edm::ParameterSet& pset, edm::ActivityRegistry& areg) @@ -68,7 +74,7 @@ TritonService::TritonService(const edm::ParameterSet& pset, edm::ActivityRegistr if (fallbackOpts_.enable) servers_.emplace(std::piecewise_construct, std::forward_as_tuple(Server::fallbackName), - std::forward_as_tuple(Server::fallbackName, Server::fallbackUrl)); + std::forward_as_tuple(Server::fallbackName, Server::fallbackAddress)); //loop over input servers: check which models they have std::string msg; @@ -200,8 +206,13 @@ void TritonService::preBeginJob(edm::PathsAndConsumesOfModulesBase const&, edm:: if (verbose_) edm::LogInfo("TritonService") << msg; + //randomize instance name + if (fallbackOpts_.instanceName.empty()) { + fallbackOpts_.instanceName = "triton_server_instance_" + getUuid(); + } + //assemble server start command - std::string command("cmsTriton -p"); + std::string command("cmsTriton -p -P -1"); if (fallbackOpts_.debug) command += " -c"; if (fallbackOpts_.verbose) @@ -224,9 +235,7 @@ void TritonService::preBeginJob(edm::PathsAndConsumesOfModulesBase const&, edm:: //get a random temporary directory if none specified if (fallbackOpts_.tempDir.empty()) { - auto [uuid, rv] = execSys("uuidgen | sed 's/-//g'"); - uuid.pop_back(); - auto tmp_dir_path{std::filesystem::temp_directory_path() /= uuid}; + auto tmp_dir_path{std::filesystem::temp_directory_path() /= getUuid()}; fallbackOpts_.tempDir = tmp_dir_path.string(); } //special case ".": use script default (temp dir = .$instanceName) @@ -247,6 +256,17 @@ void TritonService::preBeginJob(edm::PathsAndConsumesOfModulesBase const&, edm:: edm::LogInfo("TritonService") << output; if (rv != 0) throw cms::Exception("FallbackFailed") << "Starting the fallback server failed with exit code " << rv; + + //get the port + const std::string& portIndicator("CMS_TRITON_GRPC_PORT: "); + auto pos = output.find(portIndicator); + if (pos != std::string::npos) { + auto pos2 = pos + portIndicator.size(); + auto pos3 = output.find('\n', pos2); + const auto& portNum = output.substr(pos2, pos3 - pos2); + serverInfo.url += ":" + portNum; + } else + throw cms::Exception("FallbackFailed") << "Unknown port for fallback server, log follows:\n" << output; } void TritonService::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { diff --git a/HeterogeneousCore/SonicTriton/test/README.md b/HeterogeneousCore/SonicTriton/test/README.md index caf917531f29b..52e890480edc7 100644 --- a/HeterogeneousCore/SonicTriton/test/README.md +++ b/HeterogeneousCore/SonicTriton/test/README.md @@ -27,4 +27,3 @@ cmsRun tritonTest_cfg.py maxEvents=1 modules=TritonGraphProducer ## Caveats * Local CPU server requires support for AVX instructions. -* Multiple users cannot run servers on the same GPU (e.g. on a shared node). From e5825fa65ed3d4a80fabaa0a74227f40bba2f1d2 Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Thu, 14 Jan 2021 14:13:31 -0600 Subject: [PATCH 21/31] use GlobalIdentifier --- HeterogeneousCore/SonicTriton/src/TritonService.cc | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/HeterogeneousCore/SonicTriton/src/TritonService.cc b/HeterogeneousCore/SonicTriton/src/TritonService.cc index 6aedac7df4fe0..799f63cd9846c 100644 --- a/HeterogeneousCore/SonicTriton/src/TritonService.cc +++ b/HeterogeneousCore/SonicTriton/src/TritonService.cc @@ -8,6 +8,7 @@ #include "FWCore/ServiceRegistry/interface/ActivityRegistry.h" #include "FWCore/ServiceRegistry/interface/ProcessContext.h" #include "FWCore/Utilities/interface/Exception.h" +#include "FWCore/Utilities/interface/GlobalIdentifier.h" #include "grpc_client.h" #include "grpc_service.pb.h" @@ -49,12 +50,6 @@ namespace { int rv = pclose(pipe); return std::make_pair(result, rv); } - - std::string getUuid() { - auto [uuid, rv] = execSys("uuidgen | sed 's/-//g'"); - uuid.pop_back(); - return uuid; - } } // namespace TritonService::TritonService(const edm::ParameterSet& pset, edm::ActivityRegistry& areg) @@ -208,7 +203,7 @@ void TritonService::preBeginJob(edm::PathsAndConsumesOfModulesBase const&, edm:: //randomize instance name if (fallbackOpts_.instanceName.empty()) { - fallbackOpts_.instanceName = "triton_server_instance_" + getUuid(); + fallbackOpts_.instanceName = "triton_server_instance_" + edm::createGlobalIdentifier(); } //assemble server start command @@ -235,7 +230,7 @@ void TritonService::preBeginJob(edm::PathsAndConsumesOfModulesBase const&, edm:: //get a random temporary directory if none specified if (fallbackOpts_.tempDir.empty()) { - auto tmp_dir_path{std::filesystem::temp_directory_path() /= getUuid()}; + auto tmp_dir_path{std::filesystem::temp_directory_path() /= edm::createGlobalIdentifier()}; fallbackOpts_.tempDir = tmp_dir_path.string(); } //special case ".": use script default (temp dir = .$instanceName) From c366b15822e988f1e233514922735c4403a08280 Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Thu, 14 Jan 2021 14:13:53 -0600 Subject: [PATCH 22/31] retry if port in use to avoid race condition, fix some bugs in stopping --- .../SonicTriton/scripts/cmsTriton | 96 +++++++++++++------ .../SonicTriton/src/TritonService.cc | 3 +- 2 files changed, 67 insertions(+), 32 deletions(-) diff --git a/HeterogeneousCore/SonicTriton/scripts/cmsTriton b/HeterogeneousCore/SonicTriton/scripts/cmsTriton index 867525061f620..0e2bb42c8cf56 100755 --- a/HeterogeneousCore/SonicTriton/scripts/cmsTriton +++ b/HeterogeneousCore/SonicTriton/scripts/cmsTriton @@ -4,6 +4,7 @@ USEDOCKER="" GPU="" VERBOSE="" +VERBOSE_ARGS="--log-verbose=1 --log-error=1 --log-info=1" WTIME=120 SERVER=triton_server_instance RETRIES=3 @@ -77,7 +78,7 @@ while getopts "cDdfgM:m:n:P:pr:t:vw:h" opt; do ;; t) TMPDIR="$OPTARG" ;; - v) VERBOSE="--log-verbose=1 --log-error=1 --log-info=1" + v) VERBOSE="$VERBOSE_ARGS" ;; w) WTIME="$OPTARG" ;; @@ -111,6 +112,7 @@ LOG="log_${SERVER}.log" STOPLOG="log_stop_${SERVER}.log" LIB=lib STARTED_INDICATOR="Started GRPCInferenceService" +SEGFAULT_INDICATOR="Address already in use" EXTRA="" compute_ports(){ @@ -145,6 +147,18 @@ find_ports(){ echo "CMS_TRITON_GRPC_PORT: $GRPCPORT" } +handle_ports(){ + # handle ports + compute_ports + if [ -n "$AUTOPORT" ]; then + find_ports + elif ! check_ports; then + echo "Error: requested port in use" + return 1 + fi + export PORTARGS="--http-port=${HTTPPORT} --grpc-port=${GRPCPORT} --metrics-port=${METRPORT}" +} + start_docker(){ # mount all model repositories MOUNTARGS="" @@ -214,11 +228,11 @@ stop_singularity(){ test_docker(){ # docker logs print to stderr - ${DOCKER} logs ${SERVER} |& grep "$STARTED_INDICATOR" + ${DOCKER} logs ${SERVER} |& grep "$1" } test_singularity(){ - grep "$STARTED_INDICATOR" $LOG + grep "$1" $LOG } wait_server(){ @@ -227,8 +241,18 @@ wait_server(){ fi COUNT=0 - while ! $WAIT_COND >& /dev/null; do - if [ "$COUNT" -gt "$WTIME" ]; then + while ! $TEST_FN "$STARTED_INDICATOR" >& /dev/null; do + if $TEST_FN "$SEGFAULT_INDICATOR" >& /dev/null; then + handle_ports + PORT_EXIT=$? + # port exit is zero if it found a new, good port; so retry + if [ "$PORT_EXIT" -ne 0 ]; then + auto_stop true + exit $PORT_EXIT + else + return 1 + fi + elif [ "$COUNT" -gt "$WTIME" ]; then echo "timed out waiting for server to start" auto_stop true exit 1 @@ -261,7 +285,7 @@ list_models(){ auto_stop(){ # allow enabling verbosity here even if disabled at top level # but otherwise use top-level setting - if [ -n "$1" ]; then VERBOSE="$1"; fi + if [ -n "$1" ]; then VERBOSE="$VERBOSE_ARGS"; fi PARENTPID="$2" if [ -n "$PARENTPID" ]; then @@ -275,7 +299,8 @@ auto_stop(){ if [ -z "$DRYRUN" ]; then if [ -n "$VERBOSE" ]; then mv "$LOG" "$TOPDIR" - if [ -f "$STOPLOG" ]; then + # only keep non-empty log + if [ -s "$STOPLOG" ]; then mv "$STOPLOG" "$TOPDIR" fi fi @@ -287,12 +312,23 @@ auto_stop(){ fi } +make_tmp(){ + # make sure everything happens in tmp dir + $DRYRUN mkdir "$TMPDIR" + MKDIR_EXIT=$? + if [ "$MKDIR_EXIT" -ne 0 ]; then + echo "Could not create temp dir: $TMPDIR" + exit "$MKDIR_EXIT" + fi + $DRYRUN cd "$TMPDIR" +} + if [ -n "$USEDOCKER" ]; then if [ -n "$GPU" ]; then EXTRA="--gpus all" fi START_FN=start_docker - WAIT_COND=test_docker + TEST_FN=test_docker STOP_FN=stop_docker PROG_NAME=Docker else @@ -300,7 +336,7 @@ else EXTRA="--nv" fi START_FN=start_singularity - WAIT_COND=test_singularity + TEST_FN=test_singularity STOP_FN=stop_singularity PROG_NAME=Singularity fi @@ -314,26 +350,13 @@ if [ "$OP" == start ]; then exit 1 fi - # handle ports - compute_ports - if [ -n "$AUTOPORT" ]; then - find_ports - elif ! check_ports; then - echo "Error: requested port in use" - exit 1 - fi - PORTARGS="--http-port=${HTTPPORT} --grpc-port=${GRPCPORT} --metrics-port=${METRPORT}" + handle_ports + PORT_EXIT=$? + if [ "$PORT_EXIT" -ne 0 ]; then exit $PORT_EXIT; fi list_models - # make sure everything happens in tmp dir - $DRYRUN mkdir "$TMPDIR" - MKDIR_EXIT=$? - if [ "$MKDIR_EXIT" -ne 0 ]; then - echo "Could not create temp dir: $TMPDIR" - exit "$MKDIR_EXIT" - fi - $DRYRUN cd "$TMPDIR" + make_tmp # if parent PID is provided, automatically stop server when finished # do this before actually trying to start the server in case of ctrl+c @@ -341,22 +364,33 @@ if [ "$OP" == start ]; then auto_stop "" "$PARENTPID" >& "$STOPLOG" & fi - START_EXIT="" + START_EXIT=0 for ((counter=0; counter < ${RETRIES}; counter++)); do + if [ "$START_EXIT" -ne 0 ]; then make_tmp; fi + $START_FN START_EXIT=$? if [ "$START_EXIT" -eq 0 ]; then - break - else + wait_server + WAIT_EXIT=$? + if [ "$WAIT_EXIT" -eq 0 ]; then + break + else + # allow to keep retrying if there was a port issue + counter=$((counter-1)) + START_EXIT="$WAIT_EXIT" + fi + fi + + if [ "$START_EXIT" -ne 0 ]; then + auto_stop echo "Retrying after container issue..." - $STOP_FN fi done if [ "$START_EXIT" -ne 0 ]; then echo "Error from $PROG_NAME" exit "$START_EXIT" fi - wait_server else # check for tmp dir if [ -d "$TMPDIR" ]; then diff --git a/HeterogeneousCore/SonicTriton/src/TritonService.cc b/HeterogeneousCore/SonicTriton/src/TritonService.cc index 799f63cd9846c..d3421b0e60b8d 100644 --- a/HeterogeneousCore/SonicTriton/src/TritonService.cc +++ b/HeterogeneousCore/SonicTriton/src/TritonService.cc @@ -254,7 +254,8 @@ void TritonService::preBeginJob(edm::PathsAndConsumesOfModulesBase const&, edm:: //get the port const std::string& portIndicator("CMS_TRITON_GRPC_PORT: "); - auto pos = output.find(portIndicator); + //find last instance in log in case multiple ports were tried + auto pos = output.rfind(portIndicator); if (pos != std::string::npos) { auto pos2 = pos + portIndicator.size(); auto pos3 = output.find('\n', pos2); From c57021a4a41bccd65f2f7c521f5854e6a2d1b08f Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Mon, 18 Jan 2021 14:57:38 -0600 Subject: [PATCH 23/31] increase verbosity of unit test --- HeterogeneousCore/SonicTriton/test/unittest.sh | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/HeterogeneousCore/SonicTriton/test/unittest.sh b/HeterogeneousCore/SonicTriton/test/unittest.sh index a277ba39ec7ad..bab610a473308 100755 --- a/HeterogeneousCore/SonicTriton/test/unittest.sh +++ b/HeterogeneousCore/SonicTriton/test/unittest.sh @@ -21,4 +21,11 @@ else exit 0 fi -cmsRun ${LOCALTOP}/src/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py modules=TritonGraphProducer,TritonGraphFilter,TritonGraphAnalyzer maxEvents=1 unittest=1 +cmsRun ${LOCALTOP}/src/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py modules=TritonGraphProducer,TritonGraphFilter,TritonGraphAnalyzer maxEvents=1 unittest=1 verbose=1 +CMSEXIT=$? + +LOGFILE="$(ls -rt log_triton_server_instance*.log | tail -n 1)" +echo -e '\n=====\nContents of '$LOGFILE':\n=====\n' +cat "$LOGFILE" + +exit $CMSEXIT From b289164ba81013aae7995a78f197dfa441d13271 Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Tue, 19 Jan 2021 08:18:36 -0600 Subject: [PATCH 24/31] prevent segfault --- HeterogeneousCore/SonicTriton/src/TritonClient.cc | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/HeterogeneousCore/SonicTriton/src/TritonClient.cc b/HeterogeneousCore/SonicTriton/src/TritonClient.cc index a597198a0fe54..f26a09392ba81 100644 --- a/HeterogeneousCore/SonicTriton/src/TritonClient.cc +++ b/HeterogeneousCore/SonicTriton/src/TritonClient.cc @@ -335,9 +335,11 @@ TritonClient::ServerSideStats TritonClient::summarizeServerStats(const inference inference::ModelStatistics TritonClient::getServerSideStatus() const { if (verbose_) { inference::ModelStatisticsResponse resp; - triton_utils::warnIfError(client_->ModelInferenceStatistics(&resp, options_.model_name_, options_.model_version_), - "getServerSideStatus(): unable to get model statistics"); - return *(resp.model_stats().begin()); + bool success = triton_utils::warnIfError( + client_->ModelInferenceStatistics(&resp, options_.model_name_, options_.model_version_), + "getServerSideStatus(): unable to get model statistics"); + if (success) + return *(resp.model_stats().begin()); } return inference::ModelStatistics{}; } From c5832fcabe8be94851f4a6559df4a96dd90e2256 Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Tue, 19 Jan 2021 10:34:53 -0600 Subject: [PATCH 25/31] look in correct directory --- HeterogeneousCore/SonicTriton/test/unittest.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/HeterogeneousCore/SonicTriton/test/unittest.sh b/HeterogeneousCore/SonicTriton/test/unittest.sh index bab610a473308..a8c70312afa69 100755 --- a/HeterogeneousCore/SonicTriton/test/unittest.sh +++ b/HeterogeneousCore/SonicTriton/test/unittest.sh @@ -24,7 +24,7 @@ fi cmsRun ${LOCALTOP}/src/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py modules=TritonGraphProducer,TritonGraphFilter,TritonGraphAnalyzer maxEvents=1 unittest=1 verbose=1 CMSEXIT=$? -LOGFILE="$(ls -rt log_triton_server_instance*.log | tail -n 1)" +LOGFILE="$(ls -rt ${LOCALTOP}/log_triton_server_instance*.log | tail -n 1)" echo -e '\n=====\nContents of '$LOGFILE':\n=====\n' cat "$LOGFILE" From fbeacab99cb033f5152b26af6cbbfe75be74ac7a Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Wed, 20 Jan 2021 08:47:08 -0600 Subject: [PATCH 26/31] improvements to unit test --- .../SonicTriton/test/tritonTest_cfg.py | 2 +- HeterogeneousCore/SonicTriton/test/unittest.sh | 13 ++++++++++--- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py b/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py index 9295f0bf9b988..e15d92370d1f3 100644 --- a/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py +++ b/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py @@ -12,7 +12,7 @@ options.register("streams", 0, VarParsing.multiplicity.singleton, VarParsing.varType.int) options.register("modules", "TritonImageProducer", VarParsing.multiplicity.list, VarParsing.varType.string) options.register("modelName","resnet50_netdef", VarParsing.multiplicity.singleton, VarParsing.varType.string) -options.register("mode","PseudoAsync", VarParsing.multiplicity.singleton, VarParsing.varType.string) +options.register("mode","Async", VarParsing.multiplicity.singleton, VarParsing.varType.string) options.register("verbose", False, VarParsing.multiplicity.singleton, VarParsing.varType.bool) options.register("unittest", False, VarParsing.multiplicity.singleton, VarParsing.varType.bool) options.register("device","auto", VarParsing.multiplicity.singleton, VarParsing.varType.string) diff --git a/HeterogeneousCore/SonicTriton/test/unittest.sh b/HeterogeneousCore/SonicTriton/test/unittest.sh index a8c70312afa69..6f3b2a4c3a67e 100755 --- a/HeterogeneousCore/SonicTriton/test/unittest.sh +++ b/HeterogeneousCore/SonicTriton/test/unittest.sh @@ -24,8 +24,15 @@ fi cmsRun ${LOCALTOP}/src/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py modules=TritonGraphProducer,TritonGraphFilter,TritonGraphAnalyzer maxEvents=1 unittest=1 verbose=1 CMSEXIT=$? -LOGFILE="$(ls -rt ${LOCALTOP}/log_triton_server_instance*.log | tail -n 1)" -echo -e '\n=====\nContents of '$LOGFILE':\n=====\n' -cat "$LOGFILE" +STOP_COUNTER=0 +while ! LOGFILE="$(ls -rt ${LOCALTOP}/log_triton_server_instance*.log 2>/dev/null | tail -n 1)" && [ "$STOP_COUNTER" -lt 5 ]; do + STOP_COUNTER=$((STOP_COUNTER+1)) + sleep 5 +done + +if [ -n "$LOGFILE" ]; then + echo -e '\n=====\nContents of '$LOGFILE':\n=====\n' + cat "$LOGFILE" +fi exit $CMSEXIT From daa7238e7d35e04c9c003b467d5a30ec7655847d Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Wed, 20 Jan 2021 16:18:37 -0600 Subject: [PATCH 27/31] test unprivileged user namespace --- HeterogeneousCore/SonicTriton/test/unittest.sh | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/HeterogeneousCore/SonicTriton/test/unittest.sh b/HeterogeneousCore/SonicTriton/test/unittest.sh index 6f3b2a4c3a67e..256c8aa4db4ae 100755 --- a/HeterogeneousCore/SonicTriton/test/unittest.sh +++ b/HeterogeneousCore/SonicTriton/test/unittest.sh @@ -5,6 +5,7 @@ LOCALTOP=$1 # the test is not possible if: # 1. avx instructions not supported (needed for singularity on CPU) # 2. singularity not found or not usable +# 3. inside singularity container w/o unprivileged user namespace enabled (needed for singularity-in-singularity) # so just return true in those cases if grep -q avx /proc/cpuinfo; then @@ -21,6 +22,15 @@ else exit 0 fi +if [ -n "$SINGULARITY_CONTAINER" ]; then + if unshare -U echo >/dev/null 2>&1; then + echo "has unprivileged user namespace support" + else + echo "missing unprivileged user namespace support" + exit 0 + fi +fi + cmsRun ${LOCALTOP}/src/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py modules=TritonGraphProducer,TritonGraphFilter,TritonGraphAnalyzer maxEvents=1 unittest=1 verbose=1 CMSEXIT=$? From dbe6b0ff39d281ec02d5914cdae01237e04acf3f Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Wed, 20 Jan 2021 17:01:56 -0600 Subject: [PATCH 28/31] bug fix --- HeterogeneousCore/SonicCore/interface/SonicOneEDAnalyzer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/HeterogeneousCore/SonicCore/interface/SonicOneEDAnalyzer.h b/HeterogeneousCore/SonicCore/interface/SonicOneEDAnalyzer.h index 92bf9bbc2f33e..2b52913fbca53 100644 --- a/HeterogeneousCore/SonicCore/interface/SonicOneEDAnalyzer.h +++ b/HeterogeneousCore/SonicCore/interface/SonicOneEDAnalyzer.h @@ -26,7 +26,7 @@ class SonicOneEDAnalyzer : public edm::one::EDAnalyzer { //ExternalWork is not compatible with one modules, so Sync mode is enforced if (clientPset_.getParameter("mode") != "Sync") { edm::LogWarning("ResetClientMode") << "Resetting client mode to Sync for SonicOneEDAnalyzer"; - clientPset_.addParameter("Mode", "Sync"); + clientPset_.addParameter("mode", "Sync"); } } //destructor From c3d408556e945257466e9bf1e48249622e922326 Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Thu, 21 Jan 2021 22:22:00 +0100 Subject: [PATCH 29/31] add another unprivileged check --- HeterogeneousCore/SonicTriton/test/unittest.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/HeterogeneousCore/SonicTriton/test/unittest.sh b/HeterogeneousCore/SonicTriton/test/unittest.sh index 256c8aa4db4ae..e028aa19864ca 100755 --- a/HeterogeneousCore/SonicTriton/test/unittest.sh +++ b/HeterogeneousCore/SonicTriton/test/unittest.sh @@ -23,7 +23,7 @@ else fi if [ -n "$SINGULARITY_CONTAINER" ]; then - if unshare -U echo >/dev/null 2>&1; then + if grep -q "^allow setuid = no" /etc/singularity/singularity.conf && unshare -U echo >/dev/null 2>&1; then echo "has unprivileged user namespace support" else echo "missing unprivileged user namespace support" From 4ad2933880d96254bfcbf697703aa757eb17c008 Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Wed, 27 Jan 2021 20:10:48 -0600 Subject: [PATCH 30/31] mitigate instability in auto_stop, detect Socket closed in unit test, add some more options --- HeterogeneousCore/SonicTriton/README.md | 2 ++ .../SonicTriton/scripts/cmsTriton | 30 ++++++++++++++----- .../SonicTriton/test/tritonTest_cfg.py | 5 +++- .../SonicTriton/test/unittest.sh | 12 ++++++-- 4 files changed, 39 insertions(+), 10 deletions(-) diff --git a/HeterogeneousCore/SonicTriton/README.md b/HeterogeneousCore/SonicTriton/README.md index 3d0613df5fbbf..63a52d46d1e57 100644 --- a/HeterogeneousCore/SonicTriton/README.md +++ b/HeterogeneousCore/SonicTriton/README.md @@ -89,12 +89,14 @@ The script has two operations (`start` and `stop`) and the following options: * `-d`: use Docker instead of Singularity * `-f`: force reuse of (possibly) existing container instance * `-g`: use GPU instead of CPU +* `-i` [name]`: server image name (default: fastml/triton-torchgeo:20.09-py3-geometric) * `-M [dir]`: model repository (can be given more than once) * `-m [dir]`: specific model directory (can be given more than one) * `-n [name]`: name of container instance, also used for hidden temporary dir (default: triton_server_instance) * `-P [port]`: base port number for services (-1: automatically find an unused port range) (default: 8000) * `-p`: automatically shut down server when parent process ends * `-r [num]`: number of retries when starting container (default: 3) +* `-s [dir]`: Singularity sandbox directory (default: /cvmfs/unpacked.cern.ch/registry.hub.docker.com/fastml/triton-torchgeo:20.09-py3-geometric) * `-t [dir]`: non-default hidden temporary dir * `-v`: (verbose) start: activate server debugging info; stop: keep server logs * `-w [time]`: maximum time to wait for server to start (default: 120 seconds) diff --git a/HeterogeneousCore/SonicTriton/scripts/cmsTriton b/HeterogeneousCore/SonicTriton/scripts/cmsTriton index 0e2bb42c8cf56..3e79f226e2b15 100755 --- a/HeterogeneousCore/SonicTriton/scripts/cmsTriton +++ b/HeterogeneousCore/SonicTriton/scripts/cmsTriton @@ -18,6 +18,8 @@ PARENTPID="" BASEPORT=8000 AUTOPORT="" NPORTS=3 +IMAGE=fastml/triton-torchgeo:20.09-py3-geometric +SANDBOX=/cvmfs/unpacked.cern.ch/registry.hub.docker.com/${IMAGE} usage() { ECHO="echo -e" @@ -29,12 +31,14 @@ usage() { $ECHO "-d \t use Docker instead of Singularity" $ECHO "-f \t force reuse of (possibly) existing container instance" $ECHO "-g \t use GPU instead of CPU" + $ECHO "-i [name] \t server image name (default: ${IMAGE})" $ECHO "-M [dir] \t model repository (can be given more than once)" $ECHO "-m [dir] \t specific model directory (can be given more than one)" $ECHO "-n [name] \t name of container instance, also used for default hidden temporary dir (default: ${SERVER})" $ECHO "-P [port] \t base port number for services (-1: automatically find an unused port range) (default: ${BASEPORT})" $ECHO "-p \t automatically shut down server when parent process ends" $ECHO "-r [num] \t number of retries when starting container (default: ${RETRIES})" + $ECHO "-s [dir] \t Singularity sandbox directory (default: ${SANDBOX})" $ECHO "-t [dir] \t non-default hidden temporary dir" $ECHO "-v \t (verbose) start: activate server debugging info; stop: keep server logs" $ECHO "-w [time] \t maximum time to wait for server to start (default: ${WTIME} seconds)" @@ -52,7 +56,7 @@ if [ -e /run/shm ]; then SHM=/run/shm fi -while getopts "cDdfgM:m:n:P:pr:t:vw:h" opt; do +while getopts "cDdfgi:M:m:n:P:pr:s:t:vw:h" opt; do case "$opt" in c) CLEANUP="" ;; @@ -64,6 +68,8 @@ while getopts "cDdfgM:m:n:P:pr:t:vw:h" opt; do ;; g) GPU=true ;; + i) IMAGE="$OPTARG" + ;; M) REPOS+=("$OPTARG") ;; m) MODELS+=("$OPTARG") @@ -76,6 +82,8 @@ while getopts "cDdfgM:m:n:P:pr:t:vw:h" opt; do ;; r) RETRIES="$OPTARG" ;; + s) SANDBOX="$OPTARG" + ;; t) TMPDIR="$OPTARG" ;; v) VERBOSE="$VERBOSE_ARGS" @@ -106,8 +114,8 @@ else TMPDIR=$(readlink -f $TMPDIR) fi +SANDBOX=$(readlink -f ${SANDBOX}) DOCKER="sudo docker" -IMAGE=fastml/triton-torchgeo:20.09-py3-geometric LOG="log_${SERVER}.log" STOPLOG="log_stop_${SERVER}.log" LIB=lib @@ -179,7 +187,7 @@ start_singularity(){ # but cvmfs is read-only # -> make a writable local directory with the same contents $DRYRUN mkdir ${LIB} - $DRYRUN ln -s /cvmfs/unpacked.cern.ch/registry.hub.docker.com/${IMAGE}/opt/tritonserver/lib/* ${LIB}/ + $DRYRUN ln -s ${SANDBOX}/opt/tritonserver/lib/* ${LIB}/ # mount all model repositories MOUNTARGS="" @@ -192,8 +200,8 @@ start_singularity(){ # start instance # need to bind /cvmfs for above symlinks to work inside container $DRYRUN singularity instance start \ - -B ${SHM}:/run/shm -B ${LIB}:/opt/tritonserver/lib -B /cvmfs $MOUNTARGS $EXTRA \ - /cvmfs/unpacked.cern.ch/registry.hub.docker.com/${IMAGE} ${SERVER} + -B ${SHM}:/run/shm -B ${LIB}:/opt/tritonserver/lib -B ${SANDBOX} $MOUNTARGS $EXTRA \ + ${SANDBOX} ${SERVER} START_EXIT=$? if [ "$START_EXIT" -ne 0 ]; then @@ -289,7 +297,15 @@ auto_stop(){ PARENTPID="$2" if [ -n "$PARENTPID" ]; then - while kill -0 $PARENTPID >& /dev/null; do + PCOUNTER=0 + PMAX=5 + while [ "$PCOUNTER" -le "$PMAX" ]; do + if ! kill -0 $PARENTPID >& /dev/null; then + PCOUNTER=$((PCOUNTER+1)) + else + # must get 5 in a row, otherwise reset + PCOUNTER=0 + fi sleep 1 done fi @@ -393,7 +409,7 @@ if [ "$OP" == start ]; then fi else # check for tmp dir - if [ -d "$TMPDIR" ]; then + if [ -d "$TMPDIR" ] || [ -n "$DRYRUN" ]; then $DRYRUN cd "$TMPDIR" elif [ -z "$FORCE" ]; then echo "Error: attempt to stop unknown container $SERVER" diff --git a/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py b/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py index e15d92370d1f3..b9683cdb9439d 100644 --- a/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py +++ b/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py @@ -16,6 +16,8 @@ options.register("verbose", False, VarParsing.multiplicity.singleton, VarParsing.varType.bool) options.register("unittest", False, VarParsing.multiplicity.singleton, VarParsing.varType.bool) options.register("device","auto", VarParsing.multiplicity.singleton, VarParsing.varType.string) +options.register("docker", False, VarParsing.multiplicity.singleton, VarParsing.varType.bool) +options.register("tries", 0, VarParsing.multiplicity.singleton, VarParsing.varType.int) options.parseArguments() if len(options.params)>0: @@ -42,6 +44,7 @@ process.TritonService.verbose = options.verbose process.TritonService.fallback.verbose = options.verbose +process.TritonService.fallback.useDocker = options.docker if options.device != "auto": process.TritonService.fallback.useGPU = options.device=="gpu" if len(options.address)>0: @@ -85,7 +88,7 @@ modelVersion = cms.string(""), modelConfigPath = cms.FileInPath("HeterogeneousCore/SonicTriton/data/models/{}/config.pbtxt".format(models[module])), verbose = cms.untracked.bool(options.verbose), - allowedTries = cms.untracked.uint32(0), + allowedTries = cms.untracked.uint32(options.tries), ) ) ) diff --git a/HeterogeneousCore/SonicTriton/test/unittest.sh b/HeterogeneousCore/SonicTriton/test/unittest.sh index e028aa19864ca..10ff938f031fd 100755 --- a/HeterogeneousCore/SonicTriton/test/unittest.sh +++ b/HeterogeneousCore/SonicTriton/test/unittest.sh @@ -31,9 +31,12 @@ if [ -n "$SINGULARITY_CONTAINER" ]; then fi fi -cmsRun ${LOCALTOP}/src/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py modules=TritonGraphProducer,TritonGraphFilter,TritonGraphAnalyzer maxEvents=1 unittest=1 verbose=1 +tmpFile=$(mktemp) +cmsRun ${LOCALTOP}/src/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py modules=TritonGraphProducer,TritonGraphFilter,TritonGraphAnalyzer maxEvents=1 unittest=1 verbose=1 >& tmpFile CMSEXIT=$? +cat tmpFile + STOP_COUNTER=0 while ! LOGFILE="$(ls -rt ${LOCALTOP}/log_triton_server_instance*.log 2>/dev/null | tail -n 1)" && [ "$STOP_COUNTER" -lt 5 ]; do STOP_COUNTER=$((STOP_COUNTER+1)) @@ -45,4 +48,9 @@ if [ -n "$LOGFILE" ]; then cat "$LOGFILE" fi -exit $CMSEXIT +if grep -q "Socket closed" tmpFile; then + echo "Transient server error (not caused by client code)" + exit 0 +else + exit $CMSEXIT +fi From b2f470275c131ba363caebf0a06b9afaae608624 Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Thu, 28 Jan 2021 18:15:20 -0600 Subject: [PATCH 31/31] fix unit test script --- HeterogeneousCore/SonicTriton/test/unittest.sh | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/HeterogeneousCore/SonicTriton/test/unittest.sh b/HeterogeneousCore/SonicTriton/test/unittest.sh index 10ff938f031fd..a428a98a723d8 100755 --- a/HeterogeneousCore/SonicTriton/test/unittest.sh +++ b/HeterogeneousCore/SonicTriton/test/unittest.sh @@ -31,11 +31,11 @@ if [ -n "$SINGULARITY_CONTAINER" ]; then fi fi -tmpFile=$(mktemp) -cmsRun ${LOCALTOP}/src/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py modules=TritonGraphProducer,TritonGraphFilter,TritonGraphAnalyzer maxEvents=1 unittest=1 verbose=1 >& tmpFile +tmpFile=$(mktemp -p ${LOCALTOP} SonicTritonTestXXXXXXXX.log) +cmsRun ${LOCALTOP}/src/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py modules=TritonGraphProducer,TritonGraphFilter,TritonGraphAnalyzer maxEvents=1 unittest=1 verbose=1 >& $tmpFile CMSEXIT=$? -cat tmpFile +cat $tmpFile STOP_COUNTER=0 while ! LOGFILE="$(ls -rt ${LOCALTOP}/log_triton_server_instance*.log 2>/dev/null | tail -n 1)" && [ "$STOP_COUNTER" -lt 5 ]; do @@ -48,9 +48,10 @@ if [ -n "$LOGFILE" ]; then cat "$LOGFILE" fi -if grep -q "Socket closed" tmpFile; then +if grep -q "Socket closed" $tmpFile; then echo "Transient server error (not caused by client code)" - exit 0 -else - exit $CMSEXIT + CMSEXIT=0 fi + +rm $tmpFile +exit $CMSEXIT