diff --git a/controllers/constants/runtime-metrics.go b/controllers/constants/runtime-metrics.go index 7ca95b37..b09b2572 100644 --- a/controllers/constants/runtime-metrics.go +++ b/controllers/constants/runtime-metrics.go @@ -223,4 +223,54 @@ const ( } ] }` + + // NVIDIA NIM + NIMMetricsData = `{ + "config": [ + { + "title": "Requests per 5 minutes", + "type": "REQUEST_COUNT", + "queries": [ + { + "title": "Number of successful incoming requests", + "query": "round(sum(increase(request_success_total{namespace='${NAMESPACE}', pod=~'${MODEL_NAME}-predictor-.*'}[${REQUEST_RATE_INTERVAL}])))" + }, + { + "title": "Number of failed incoming requests", + "query": "round(sum(increase(request_failure_total{namespace='${NAMESPACE}', pod=~'${MODEL_NAME}-predictor-.*'}[${REQUEST_RATE_INTERVAL}])))" + } + ] + }, +{ + "title": "Average response time (ms)", + "type": "MEAN_LATENCY", + "queries": [ + { + "title": "Average e2e latency", + "query": "sum by (model_name) (rate(e2e_request_latency_seconds_sum{namespace='${NAMESPACE}', pod=~'${MODEL_NAME}-predictor-.*'}[${RATE_INTERVAL}]) * 1000) / sum by (model_name) (rate(e2e_request_latency_seconds_count{namespace='${NAMESPACE}', pod=~'${MODEL_NAME}-predictor-.*'}[${RATE_INTERVAL}]) * 1000)" + } + ] + }, + { + "title": "CPU utilization %", + "type": "CPU_USAGE", + "queries": [ + { + "title": "CPU usage", + "query": "sum(pod:container_cpu_usage:sum{namespace='${NAMESPACE}', pod=~'${MODEL_NAME}-predictor-.*'})/sum(kube_pod_resource_limit{resource='cpu', pod=~'${MODEL_NAME}-predictor-.*', namespace='${NAMESPACE}'})" + } + ] + }, + { + "title": "Memory utilization %", + "type": "MEMORY_USAGE", + "queries": [ + { + "title": "Memory usage", + "query": "sum(container_memory_working_set_bytes{namespace='${NAMESPACE}', pod=~'${MODEL_NAME}-predictor-.*'})/sum(kube_pod_resource_limit{resource='memory', pod=~'${MODEL_NAME}-predictor-.*', namespace='${NAMESPACE}'})" + } + ] + } + ] + }` ) diff --git a/controllers/reconcilers/kserve_metrics_dashboard_reconciler.go b/controllers/reconcilers/kserve_metrics_dashboard_reconciler.go index e07c243c..bf3d3ead 100644 --- a/controllers/reconcilers/kserve_metrics_dashboard_reconciler.go +++ b/controllers/reconcilers/kserve_metrics_dashboard_reconciler.go @@ -92,7 +92,6 @@ func (r *KserveMetricsDashboardReconciler) Reconcile(ctx context.Context, log lo func (r *KserveMetricsDashboardReconciler) createDesiredResource(ctx context.Context, log logr.Logger, isvc *kservev1beta1.InferenceService) (*corev1.ConfigMap, error) { var err error - var servingRuntime string runtime := &kservev1alpha1.ServingRuntime{} supported := false @@ -128,24 +127,8 @@ func (r *KserveMetricsDashboardReconciler) createDesiredResource(ctx context.Con supported = false } - servingRuntimeImage := runtime.Spec.Containers[0].Image - re := regexp.MustCompile(`/([^/@]+)[@:]`) - findImageName := re.FindStringSubmatch(servingRuntimeImage) - // sanity check for regex match, will fall back to a known string that will lead to a configmap for unsupported metrics - if len(findImageName) < 2 { - servingRuntime = constants.ServingRuntimeFallBackImageName - } else { - servingRuntime = findImageName[1] - } - - runtimeMetricsData := map[string]string{ - constants.OvmsImageName: constants.OvmsMetricsData, - constants.TgisImageName: constants.TgisMetricsData, - constants.VllmImageName: constants.VllmMetricsData, - constants.CaikitImageName: constants.CaikitMetricsData, - } // supported is true only when a match on this map is found, is false otherwise - data, supported := runtimeMetricsData[servingRuntime] + data, supported := getMetricsData(runtime) configMap, err := r.createConfigMap(isvc, supported, log) if err != nil { return nil, err @@ -220,3 +203,30 @@ func (r *KserveMetricsDashboardReconciler) processDelta(ctx context.Context, log } return nil } + +func getMetricsData(runtime *kservev1alpha1.ServingRuntime) (string, bool) { + if runtime.Annotations[utils.IsNimRuntimeAnnotation] == "true" { + return constants.NIMMetricsData, true + } + + var servingRuntime string + servingRuntimeImage := runtime.Spec.Containers[0].Image + re := regexp.MustCompile(`/([^/@]+)[@:]`) + findImageName := re.FindStringSubmatch(servingRuntimeImage) + // sanity check for regex match, will fall back to a known string that will lead to a configmap for unsupported metrics + if len(findImageName) < 2 { + servingRuntime = constants.ServingRuntimeFallBackImageName + } else { + servingRuntime = findImageName[1] + } + + runtimeMetricsData := map[string]string{ + constants.OvmsImageName: constants.OvmsMetricsData, + constants.TgisImageName: constants.TgisMetricsData, + constants.VllmImageName: constants.VllmMetricsData, + constants.CaikitImageName: constants.CaikitMetricsData, + } + // supported is true only when a match on this map is found, is false otherwise + data, supported := runtimeMetricsData[servingRuntime] + return data, supported +} diff --git a/controllers/utils/nim.go b/controllers/utils/nim.go index 07e776af..4f8e2c00 100644 --- a/controllers/utils/nim.go +++ b/controllers/utils/nim.go @@ -91,6 +91,7 @@ const ( nimGetNgcCatalog = "https://api.ngc.nvidia.com/v2/search/catalog/resources/CONTAINER" nimGetNgcToken = "https://authn.nvidia.com/token?service=ngc&" nimGetNgcModelDataFmt = "https://api.ngc.nvidia.com/v2/org/%s/team/%s/repos/%s?resolve-labels=true" + IsNimRuntimeAnnotation = "runtimes.opendatahub.io/nvidia-nim" ) var NimHttpClient HttpClient @@ -296,6 +297,7 @@ func GetNimServingRuntimeTemplate(scheme *runtime.Scheme) (*v1alpha1.ServingRunt Annotations: map[string]string{ "opendatahub.io/recommended-accelerators": "[\"nvidia.com/gpu\"]", "openshift.io/display-name": "NVIDIA NIM", + IsNimRuntimeAnnotation: "true", }, Labels: map[string]string{ "opendatahub.io/dashboard": "true", @@ -304,6 +306,10 @@ func GetNimServingRuntimeTemplate(scheme *runtime.Scheme) (*v1alpha1.ServingRunt }, Spec: v1alpha1.ServingRuntimeSpec{ ServingRuntimePodSpec: v1alpha1.ServingRuntimePodSpec{ + Annotations: map[string]string{ + "prometheus.io/path": "/metrics", + "prometheus.io/port": "8000", + }, Containers: []corev1.Container{ {Env: []corev1.EnvVar{ {