-
Notifications
You must be signed in to change notification settings - Fork 0
/
02-perfanalyzer.sh
executable file
·86 lines (79 loc) · 2.95 KB
/
02-perfanalyzer.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#/bin/bash
seperator=---------------------------------------------------------------
seperator=$seperator$seperator
pattern="%-24s| %-24s| %-7s| %-7s| %-7s| %-7s|\n"
TableWidth=87
export CPU_COUNT=$(nproc)
export STEP_CONCURRENCY=${CPU_COUNT}
export CONCURRENCY_RUNS=8
export MIN_CONCURRENCY=${CPU_COUNT}
export MAX_CONCURRENCY=$((${CPU_COUNT}*${CONCURRENCY_RUNS}))
export TRITON_POD=$(kubectl -n default get pod -l app=triton-inference-server -o name | grep client | cut -d \/ -f2 | sed -e 's/\\r$//g')
export TRAEFIK_ENDPOINT=$(kubectl get svc -l app.kubernetes.io/name=traefik -o=jsonpath='{.items[0].spec.clusterIP}')
export MODEL_MANIFEST=$(cat deployed_models.txt)
function traverse_input(){
for row in $(echo $@ | jq -r '.input[] | @base64'); do
ARG=""
_jq() {
echo ${row} | base64 --decode | jq -r ${1}
}
shape=$(_jq '.dims' | jq -r 'join(",")')
if [[ $shape -ge 0 ]]; then
ARG=${ARG:+$ARG }" --shape $(_jq '.name'):$shape"
else
ARG=${ARG:+$ARG }" --shape $(_jq '.name'):16"
fi
echo $ARG
done
}
function traverse_output(){
for row in $(echo $@ | jq -r '.output[] | @base64'); do
unset ARG
_jq() {
echo ${row} | base64 --decode | jq -r ${1}
}
shape=$(_jq '.dims' | jq -r 'join(",")')
if [[ $shape -ge 0 ]]; then
ARG=${ARG:+$ARG }" --shape $(_jq '.name'):$shape"
else
ARG=${ARG:+$ARG }" --shape $(_jq '.name'):16"
fi
echo $ARG
done
}
clear
printf "$pattern" Name Platform Inputs Outputs Batch Status
printf "%.${TableWidth}s\n" "$seperator"
for MODEL in $MODEL_MANIFEST
do
config=$(kubectl exec $TRITON_POD -- curl -s $TRAEFIK_ENDPOINT:8000/v2/models/$MODEL/config)
name=$(echo "${config}" | jq -r '.name')
platform=$(echo "${config}" | jq -r '.platform')
batchsize=$(echo "${config}" | jq -r '.max_batch_size')
inputs=$(echo "${config}" | jq -r '.input | length')
outputs=$(echo "${config}" | jq -r '.output | length')
seq_check=$(echo ${config} | jq '.sequence_batching | length')
if [[ $seq_check -gt 0 ]]; then
unset batchsize
batchsize=1
elif [[ $batchsize -le 0 ]]; then
unset batchsize
batchsize=1
fi
code_status=$(kubectl exec $TRITON_POD -- curl -m 1 -L -s -o /dev/null -w %{http_code} $TRAEFIK_ENDPOINT:8000/v2/models/$MODEL/versions/1/ready)
status=$([ "$code_status" == 200 ] && echo OK || echo $code_status)
printf "$pattern" $name $platform "${inputs}" "${outputs}" $batchsize $status
extra_args=$(traverse_input $config)
kubectl exec $TRITON_POD -- perf_analyzer \
-m $MODEL \
-a \
-i grpc \
-u $TRAEFIK_ENDPOINT:8001 \
--percentile 95 \
--max-threads $(($(nproc)*4)) \
--request-distribution constant \
--measurement-interval 30000 \
--concurrency-range $MIN_CONCURRENCY:$MAX_CONCURRENCY:$STEP_CONCURRENCY \
-b $batchsize $extra_args > /dev/null 2>&1 &
done
printf "%.${TableWidth}s\n" "$seperator"