Skip to content

Commit

Permalink
feat: added performance metric grpahs config for nvidia nim (#320)
Browse files Browse the repository at this point in the history
* feat: added performance metric grpahs config for nvidia nim

Signed-off-by: Tomer Figenblat <[email protected]>

* chore: modifyed the runtime id annotation

Co-authored-by: Edgar Hernández <[email protected]>
Signed-off-by: Tomer Figenblat <[email protected]>

---------

Signed-off-by: Tomer Figenblat <[email protected]>
Co-authored-by: Edgar Hernández <[email protected]>
  • Loading branch information
TomerFi and israel-hdez authored Dec 2, 2024
1 parent 6e80852 commit aee3e05
Show file tree
Hide file tree
Showing 3 changed files with 84 additions and 18 deletions.
50 changes: 50 additions & 0 deletions controllers/constants/runtime-metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -223,4 +223,54 @@ const (
}
]
}`

// NVIDIA NIM
NIMMetricsData = `{
"config": [
{
"title": "Requests per 5 minutes",
"type": "REQUEST_COUNT",
"queries": [
{
"title": "Number of successful incoming requests",
"query": "round(sum(increase(request_success_total{namespace='${NAMESPACE}', pod=~'${MODEL_NAME}-predictor-.*'}[${REQUEST_RATE_INTERVAL}])))"
},
{
"title": "Number of failed incoming requests",
"query": "round(sum(increase(request_failure_total{namespace='${NAMESPACE}', pod=~'${MODEL_NAME}-predictor-.*'}[${REQUEST_RATE_INTERVAL}])))"
}
]
},
{
"title": "Average response time (ms)",
"type": "MEAN_LATENCY",
"queries": [
{
"title": "Average e2e latency",
"query": "sum by (model_name) (rate(e2e_request_latency_seconds_sum{namespace='${NAMESPACE}', pod=~'${MODEL_NAME}-predictor-.*'}[${RATE_INTERVAL}]) * 1000) / sum by (model_name) (rate(e2e_request_latency_seconds_count{namespace='${NAMESPACE}', pod=~'${MODEL_NAME}-predictor-.*'}[${RATE_INTERVAL}]) * 1000)"
}
]
},
{
"title": "CPU utilization %",
"type": "CPU_USAGE",
"queries": [
{
"title": "CPU usage",
"query": "sum(pod:container_cpu_usage:sum{namespace='${NAMESPACE}', pod=~'${MODEL_NAME}-predictor-.*'})/sum(kube_pod_resource_limit{resource='cpu', pod=~'${MODEL_NAME}-predictor-.*', namespace='${NAMESPACE}'})"
}
]
},
{
"title": "Memory utilization %",
"type": "MEMORY_USAGE",
"queries": [
{
"title": "Memory usage",
"query": "sum(container_memory_working_set_bytes{namespace='${NAMESPACE}', pod=~'${MODEL_NAME}-predictor-.*'})/sum(kube_pod_resource_limit{resource='memory', pod=~'${MODEL_NAME}-predictor-.*', namespace='${NAMESPACE}'})"
}
]
}
]
}`
)
46 changes: 28 additions & 18 deletions controllers/reconcilers/kserve_metrics_dashboard_reconciler.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,6 @@ func (r *KserveMetricsDashboardReconciler) Reconcile(ctx context.Context, log lo
func (r *KserveMetricsDashboardReconciler) createDesiredResource(ctx context.Context, log logr.Logger, isvc *kservev1beta1.InferenceService) (*corev1.ConfigMap, error) {

var err error
var servingRuntime string
runtime := &kservev1alpha1.ServingRuntime{}
supported := false

Expand Down Expand Up @@ -128,24 +127,8 @@ func (r *KserveMetricsDashboardReconciler) createDesiredResource(ctx context.Con
supported = false
}

servingRuntimeImage := runtime.Spec.Containers[0].Image
re := regexp.MustCompile(`/([^/@]+)[@:]`)
findImageName := re.FindStringSubmatch(servingRuntimeImage)
// sanity check for regex match, will fall back to a known string that will lead to a configmap for unsupported metrics
if len(findImageName) < 2 {
servingRuntime = constants.ServingRuntimeFallBackImageName
} else {
servingRuntime = findImageName[1]
}

runtimeMetricsData := map[string]string{
constants.OvmsImageName: constants.OvmsMetricsData,
constants.TgisImageName: constants.TgisMetricsData,
constants.VllmImageName: constants.VllmMetricsData,
constants.CaikitImageName: constants.CaikitMetricsData,
}
// supported is true only when a match on this map is found, is false otherwise
data, supported := runtimeMetricsData[servingRuntime]
data, supported := getMetricsData(runtime)
configMap, err := r.createConfigMap(isvc, supported, log)
if err != nil {
return nil, err
Expand Down Expand Up @@ -220,3 +203,30 @@ func (r *KserveMetricsDashboardReconciler) processDelta(ctx context.Context, log
}
return nil
}

func getMetricsData(runtime *kservev1alpha1.ServingRuntime) (string, bool) {
if runtime.Annotations[utils.IsNimRuntimeAnnotation] == "true" {
return constants.NIMMetricsData, true
}

var servingRuntime string
servingRuntimeImage := runtime.Spec.Containers[0].Image
re := regexp.MustCompile(`/([^/@]+)[@:]`)
findImageName := re.FindStringSubmatch(servingRuntimeImage)
// sanity check for regex match, will fall back to a known string that will lead to a configmap for unsupported metrics
if len(findImageName) < 2 {
servingRuntime = constants.ServingRuntimeFallBackImageName
} else {
servingRuntime = findImageName[1]
}

runtimeMetricsData := map[string]string{
constants.OvmsImageName: constants.OvmsMetricsData,
constants.TgisImageName: constants.TgisMetricsData,
constants.VllmImageName: constants.VllmMetricsData,
constants.CaikitImageName: constants.CaikitMetricsData,
}
// supported is true only when a match on this map is found, is false otherwise
data, supported := runtimeMetricsData[servingRuntime]
return data, supported
}
6 changes: 6 additions & 0 deletions controllers/utils/nim.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ const (
nimGetNgcCatalog = "https://api.ngc.nvidia.com/v2/search/catalog/resources/CONTAINER"
nimGetNgcToken = "https://authn.nvidia.com/token?service=ngc&"
nimGetNgcModelDataFmt = "https://api.ngc.nvidia.com/v2/org/%s/team/%s/repos/%s?resolve-labels=true"
IsNimRuntimeAnnotation = "runtimes.opendatahub.io/nvidia-nim"
)

var NimHttpClient HttpClient
Expand Down Expand Up @@ -296,6 +297,7 @@ func GetNimServingRuntimeTemplate(scheme *runtime.Scheme) (*v1alpha1.ServingRunt
Annotations: map[string]string{
"opendatahub.io/recommended-accelerators": "[\"nvidia.com/gpu\"]",
"openshift.io/display-name": "NVIDIA NIM",
IsNimRuntimeAnnotation: "true",
},
Labels: map[string]string{
"opendatahub.io/dashboard": "true",
Expand All @@ -304,6 +306,10 @@ func GetNimServingRuntimeTemplate(scheme *runtime.Scheme) (*v1alpha1.ServingRunt
},
Spec: v1alpha1.ServingRuntimeSpec{
ServingRuntimePodSpec: v1alpha1.ServingRuntimePodSpec{
Annotations: map[string]string{
"prometheus.io/path": "/metrics",
"prometheus.io/port": "8000",
},
Containers: []corev1.Container{
{Env: []corev1.EnvVar{
{
Expand Down

0 comments on commit aee3e05

Please sign in to comment.