From dbd607eeb4a71cc5ba31efafc2f222a318a89444 Mon Sep 17 00:00:00 2001
From: Eero Tamminen <eero.t.tamminen@intel.com>
Date: Fri, 25 Oct 2024 06:15:56 +0300
Subject: [PATCH] Add monitoring option to (ChatQnA) Helm charts (#488)

* Add monitoring option for all ChatQnA Helm components

Also sync current serviceMonitors with "kubernetes-addons/Observability"
manifests content.

* Add Helm monitoring option documentation

And refactor + fix HPA instructions (HPA setting is not global).


---------

Signed-off-by: Eero Tamminen <eero.t.tamminen@intel.com>
---
 helm-charts/HPA.md                            | 69 +++-----------
 helm-charts/README.md                         |  7 +-
 helm-charts/chatqna/README.md                 | 16 ++--
 .../templates/custom-metrics-configmap.yaml   |  2 +-
 .../chatqna/templates/servicemonitor.yaml     | 18 ++++
 helm-charts/chatqna/values.yaml               |  5 +-
 helm-charts/common/embedding-usvc/README.md   |  1 +
 .../templates/servicemonitor.yaml             | 18 ++++
 helm-charts/common/embedding-usvc/values.yaml |  6 ++
 helm-charts/common/llm-uservice/README.md     |  1 +
 .../templates/servicemonitor.yaml             | 18 ++++
 helm-charts/common/llm-uservice/values.yaml   |  6 ++
 helm-charts/common/reranking-usvc/README.md   |  1 +
 .../templates/servicemonitor.yaml             | 18 ++++
 helm-charts/common/reranking-usvc/values.yaml |  6 ++
 helm-charts/common/retriever-usvc/README.md   |  1 +
 .../templates/servicemonitor.yaml             | 18 ++++
 helm-charts/common/retriever-usvc/values.yaml |  6 ++
 helm-charts/common/tei/README.md              |  3 +-
 .../templates/horizontal-pod-autoscaler.yaml  |  2 +-
 .../common/tei/templates/servicemonitor.yaml  |  7 +-
 helm-charts/common/tei/values.yaml            |  5 +-
 helm-charts/common/teirerank/README.md        |  3 +-
 .../templates/horizontal-pod-autoscaler.yaml  |  2 +-
 .../teirerank/templates/servicemonitor.yaml   |  7 +-
 helm-charts/common/teirerank/values.yaml      |  5 +-
 helm-charts/common/tgi/README.md              |  3 +-
 .../templates/horizontal-pod-autoscaler.yaml  |  2 +-
 .../common/tgi/templates/servicemonitor.yaml  |  7 +-
 helm-charts/common/tgi/values.yaml            |  5 +-
 helm-charts/monitoring.md                     | 89 +++++++++++++++++++
 31 files changed, 270 insertions(+), 87 deletions(-)
 create mode 100644 helm-charts/chatqna/templates/servicemonitor.yaml
 create mode 100644 helm-charts/common/embedding-usvc/templates/servicemonitor.yaml
 create mode 100644 helm-charts/common/llm-uservice/templates/servicemonitor.yaml
 create mode 100644 helm-charts/common/reranking-usvc/templates/servicemonitor.yaml
 create mode 100644 helm-charts/common/retriever-usvc/templates/servicemonitor.yaml
 create mode 100644 helm-charts/monitoring.md

diff --git a/helm-charts/HPA.md b/helm-charts/HPA.md
index 7d862ec97..3e99e76d8 100644
--- a/helm-charts/HPA.md
+++ b/helm-charts/HPA.md
@@ -5,7 +5,8 @@
 - [Introduction](#introduction)
 - [Pre-conditions](#pre-conditions)
   - [Resource requests](#resource-requests)
-  - [Prometheus](#prometheus)
+  - [Prometheus metrics](#prometheus-metrics)
+  - [Prometheus-adapter](#prometheus-adapter)
 - [Gotchas](#gotchas)
 - [Enable HPA](#enable-hpa)
   - [Install](#install)
@@ -14,7 +15,7 @@
 
 ## Introduction
 
-`horizontalPodAutoscaler` option enables HPA scaling for the TGI and TEI inferencing deployments:
+`horizontalPodAutoscaler` option enables HPA scaling for relevant service components:
 https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/
 
 Autoscaling is based on custom application metrics provided through [Prometheus](https://prometheus.io/).
@@ -41,24 +42,21 @@ small requests would be an issue:
   that provide further isolation
 - Containers can become non-functional when their actual resource usage crosses the specified limits
 
-### Prometheus
+### Prometheus metrics
 
-If cluster does not run [Prometheus operator](https://github.com/prometheus-operator/kube-prometheus)
-yet, it SHOULD be be installed before enabling HPA, e.g. by using a Helm chart for it:
-https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack
+Autoscaling requires k8s Prometheus installation and monitoring to be enabled in the top level chart.
+See [monitoring instructions](monitoring.md) for details.
 
-Prometheus-adapter is also needed, to provide k8s custom metrics based on collected TGI / TEI metrics:
+### Prometheus-adapter
+
+Prometheus-adapter is also needed, to provide k8s custom metrics based on collected service metrics:
 https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus-adapter
 
-To install (older versions) of them:
+Install adapter after installing Prometheus:
 
 ```console
-$ helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
-$ helm repo update
 $ prom_ns=monitoring  # namespace for Prometheus/-adapter
-$ kubectl create ns $prom_ns
-$ helm install prometheus-stack prometheus-community/kube-prometheus-stack --version 55.5.2 -n $prom_ns
-$ kubectl get services -n $prom_ns
+$ kubectl get svc -n $prom_ns
 $ helm install  prometheus-adapter prometheus-community/prometheus-adapter --version 4.10.0 -n $prom_ns \
   --set prometheus.url=http://prometheus-stack-kube-prom-prometheus.$prom_ns.svc \
   --set prometheus.prometheusSpec.serviceMonitorSelectorNilUsesHelmValues=false
@@ -80,9 +78,6 @@ Why HPA is opt-in:
   Prometheus-operator and -adapter are missing support needed to automate that
 - Top level chart name needs to conform to Prometheus metric naming conventions,
   as it is also used as a metric name prefix (with dashes converted to underscores)
-- By default Prometheus adds [k8s RBAC rules](https://github.com/prometheus-operator/kube-prometheus/blob/main/manifests/prometheus-roleBindingSpecificNamespaces.yaml)
-  for accessing metrics from `default`, `kube-system` and `monitoring` namespaces. If Helm is
-  asked to install OPEA services to some other namespace, those rules need to be updated accordingly
 - Unless pod resource requests, affinity rules, scheduling topology constraints and/or cluster NRI
   policies are used to better isolate service inferencing pods from each other, instances
   scaled up on same node may never get to ready state
@@ -143,46 +138,10 @@ $ kubectl -n $prom_ns delete $(kubectl -n $prom_ns get pod --selector $selector
 
 ## Verify
 
-To verify that horizontalPodAutoscaler options work, it's better to check that both metrics
-from the inferencing services, and HPA rules using custom metrics generated from them, do work.
-
-(Names of the object names depend on whether Prometheus was installed from manifests, or Helm,
-and the release name given for its Helm install.)
-
-Check installed Prometheus service names:
-
-```console
-$ prom_ns=monitoring  # Prometheus/-adapter namespace
-$ kubectl -n $prom_ns get svc
-```
-
-Use service name matching your Prometheus installation:
-
-```console
-$ prom_svc=prometheus-stack-kube-prom-prometheus  # Metrics service
-```
-
-Verify Prometheus found metric endpoints for chart services, i.e. last number on `curl` output is non-zero:
-
-```console
-$ chart=chatqna # OPEA chart release name
-$ prom_url=http://$(kubectl -n $prom_ns get -o jsonpath="{.spec.clusterIP}:{.spec.ports[0].port}" svc/$prom_svc)
-$ curl --no-progress-meter $prom_url/metrics | grep scrape_pool_targets.*$chart
-```
-
-**NOTE**: TGI and TEI inferencing services provide metrics endpoint only after they've processed
-their first request, and reranking service will be used only after context data has been uploaded!
-
-Check that both Prometheus metrics required from TGI are available:
-
-```console
-$ for m in sum count; do
-  curl --no-progress-meter $prom_url/api/v1/query? \
-  --data-urlencode query=tgi_request_inference_duration_$m'{service="'$chart'-tgi"}' | jq;
-done | grep __name__
-```
+After [verifying that service metrics work](monitoring.md#verify),
+one can verify that HPA rules can access custom metrics based on them.
 
-PrometheusAdapter lists corresponding TGI and/or TEI custom metrics (prefixed with chart name):
+Verify that there are (TGI and/or TEI) custom metrics prefixed with chart name:
 
 ```console
 $ kubectl get --raw /apis/custom.metrics.k8s.io/v1beta1 | jq .resources[].name
diff --git a/helm-charts/README.md b/helm-charts/README.md
index 2339cd02b..92474d411 100644
--- a/helm-charts/README.md
+++ b/helm-charts/README.md
@@ -11,6 +11,7 @@ This directory contains Helm charts for [GenAIComps](https://github.com/opea-pro
   - [From Source Code](#from-source-code)
   - [Using Helm Charts repository](#using-helm-charts-repository)
 - [Helm Charts Options](#helm-charts-options)
+- [Using HPA (autoscaling)](#using-hpa-autoscaling)
 - [Using Persistent Volume](#using-persistent-volume)
 - [Using Private Docker Hub](#using-private-docker-hub)
 - [Generate manifests from Helm Charts](#generate-manifests-from-helm-charts)
@@ -88,9 +89,13 @@ There are global options (which should be shared across all components of a work
 | global     | http_proxy https_proxy no_proxy | Proxy settings. If you are running the workloads behind the proxy, you'll have to add your proxy settings here.                                                                                                                                                                |
 | global     | modelUsePVC                     | The PersistentVolumeClaim you want to use as HuggingFace hub cache. Default "" means not using PVC. Only one of modelUsePVC/modelUseHostPath can be set.                                                                                                                       |
 | global     | modelUseHostPath                | If you don't have Persistent Volume in your k8s cluster and want to use local directory as HuggingFace hub cache, set modelUseHostPath to your local directory name. Note that this can't share across nodes. Default "". Only one of modelUsePVC/modelUseHostPath can be set. |
-| chatqna    | horizontalPodAutoscaler.enabled | Enable HPA autoscaling for TGI and TEI service deployments based on metrics they provide. See [Pre-conditions](HPA.md#pre-conditions) and [Gotchas](HPA.md#gotchas) before enabling!                                                                                           |
+| global     | monitoring                      | Enable monitoring for (ChatQnA) service components. See [Pre-conditions](monitoring.md#pre-conditions) before enabling!                                                                                                                                                        |
 | tgi        | LLM_MODEL_ID                    | The model id you want to use for tgi server. Default "Intel/neural-chat-7b-v3-3".                                                                                                                                                                                              |
 
+## Using HPA (autoscaling)
+
+See [HPA instructions](HPA.md) on how to enable horizontal pod autoscaling for service components, based on their usage metrics.
+
 ## Using Persistent Volume
 
 It's common to use Persistent Volume (PV) for model caches (HuggingFace hub cache) in a production k8s cluster. PersistentVolumeClaim (PVC) can be passed to containers, but it's the user's responsibility to create the PVC depending on your k8s cluster's capability.
diff --git a/helm-charts/chatqna/README.md b/helm-charts/chatqna/README.md
index e5b47798b..a591ff1e6 100644
--- a/helm-charts/chatqna/README.md
+++ b/helm-charts/chatqna/README.md
@@ -38,7 +38,7 @@ helm install chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --
 
 ### IMPORTANT NOTE
 
-1. Make sure your `MODELDIR` exists on the node where your workload is schedueled so you can cache the downloaded model for next time use. Otherwise, set `global.modelUseHostPath` to 'null' if you don't want to cache the model.
+1. Make sure your `MODELDIR` exists on the node where your workload is scheduled so you can cache the downloaded model for next time use. Otherwise, set `global.modelUseHostPath` to 'null' if you don't want to cache the model.
 
 ## Verify
 
@@ -71,13 +71,13 @@ Open a browser to access `http://<k8s-node-ip-address>:${port}` to play with the
 
 ## Values
 
-| Key                                    | Type   | Default                       | Description                                                                                                                             |
-| -------------------------------------- | ------ | ----------------------------- | --------------------------------------------------------------------------------------------------------------------------------------- |
-| image.repository                       | string | `"opea/chatqna"`              |                                                                                                                                         |
-| service.port                           | string | `"8888"`                      |                                                                                                                                         |
-| tgi.LLM_MODEL_ID                       | string | `"Intel/neural-chat-7b-v3-3"` | Models id from https://huggingface.co/, or predownloaded model directory                                                                |
-| global.horizontalPodAutoscaler.enabled | bop;   | false                         | HPA autoscaling for the TGI and TEI service deployments based on metrics they provide. See HPA section in ../README.md before enabling! |
+| Key               | Type   | Default                       | Description                                                                            |
+| ----------------- | ------ | ----------------------------- | -------------------------------------------------------------------------------------- |
+| image.repository  | string | `"opea/chatqna"`              |                                                                                        |
+| service.port      | string | `"8888"`                      |                                                                                        |
+| tgi.LLM_MODEL_ID  | string | `"Intel/neural-chat-7b-v3-3"` | Models id from https://huggingface.co/, or predownloaded model directory               |
+| global.monitoring | bop;   | false                         | Enable usage metrics for the service components. See ../monitoring.md before enabling! |
 
 ## Troubleshooting
 
-If you encount any issues, please refer to [ChatQnA Troubleshooting](troubleshooting.md)
+If you encounter any issues, please refer to [ChatQnA Troubleshooting](troubleshooting.md)
diff --git a/helm-charts/chatqna/templates/custom-metrics-configmap.yaml b/helm-charts/chatqna/templates/custom-metrics-configmap.yaml
index c02b07bf9..bd5afb3f3 100644
--- a/helm-charts/chatqna/templates/custom-metrics-configmap.yaml
+++ b/helm-charts/chatqna/templates/custom-metrics-configmap.yaml
@@ -1,7 +1,7 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-{{- if .Values.horizontalPodAutoscaler.enabled }}
+{{- if and .Values.global.monitoring .Values.horizontalPodAutoscaler.enabled }}
 apiVersion: v1
 kind: ConfigMap
 metadata:
diff --git a/helm-charts/chatqna/templates/servicemonitor.yaml b/helm-charts/chatqna/templates/servicemonitor.yaml
new file mode 100644
index 000000000..062c1b668
--- /dev/null
+++ b/helm-charts/chatqna/templates/servicemonitor.yaml
@@ -0,0 +1,18 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+{{- if .Values.global.monitoring }}
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+  name: {{ include "chatqna.fullname" . }}
+  labels:
+    release: {{ .Values.global.prometheusRelease }}
+spec:
+  selector:
+    matchLabels:
+      {{- include "chatqna.selectorLabels" . | nindent 6 }}
+  endpoints:
+  - port: chatqna
+    interval: 5s
+{{- end }}
diff --git a/helm-charts/chatqna/values.yaml b/helm-charts/chatqna/values.yaml
index 8fb54ffdd..fb6c725e5 100644
--- a/helm-charts/chatqna/values.yaml
+++ b/helm-charts/chatqna/values.yaml
@@ -67,5 +67,8 @@ global:
   # modelUseHostPath: /mnt/opea-models
   # modelUsePVC: model-volume
 
-  # Prometheus Helm installation info for subchart serviceMonitors
+  # Install Prometheus serviceMonitors for service components
+  monitoring: false
+
+  # Prometheus Helm install release name needed for serviceMonitors
   prometheusRelease: prometheus-stack
diff --git a/helm-charts/common/embedding-usvc/README.md b/helm-charts/common/embedding-usvc/README.md
index 75377ed11..1734fef93 100644
--- a/helm-charts/common/embedding-usvc/README.md
+++ b/helm-charts/common/embedding-usvc/README.md
@@ -49,3 +49,4 @@ curl http://localhost:6000/v1/embeddings \
 | image.repository       | string | `"opea/embedding-tei"` |             |
 | service.port           | string | `"6000"`               |             |
 | TEI_EMBEDDING_ENDPOINT | string | `""`                   |             |
+| global.monitoring      | bop;   | false                  |             |
diff --git a/helm-charts/common/embedding-usvc/templates/servicemonitor.yaml b/helm-charts/common/embedding-usvc/templates/servicemonitor.yaml
new file mode 100644
index 000000000..ea26f6cc2
--- /dev/null
+++ b/helm-charts/common/embedding-usvc/templates/servicemonitor.yaml
@@ -0,0 +1,18 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+{{- if .Values.global.monitoring }}
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+  name: {{ include "embedding-usvc.fullname" . }}
+  labels:
+    release: {{ .Values.global.prometheusRelease }}
+spec:
+  selector:
+    matchLabels:
+      {{- include "embedding-usvc.selectorLabels" . | nindent 6 }}
+  endpoints:
+  - port: embedding-usvc
+    interval: 5s
+{{- end }}
diff --git a/helm-charts/common/embedding-usvc/values.yaml b/helm-charts/common/embedding-usvc/values.yaml
index 5423433e5..ca3d30791 100644
--- a/helm-charts/common/embedding-usvc/values.yaml
+++ b/helm-charts/common/embedding-usvc/values.yaml
@@ -89,3 +89,9 @@ global:
   http_proxy: ""
   https_proxy: ""
   no_proxy: ""
+
+  # Install Prometheus serviceMonitor for service
+  monitoring: false
+
+  # Prometheus Helm install release name for serviceMonitor
+  prometheusRelease: prometheus-stack
diff --git a/helm-charts/common/llm-uservice/README.md b/helm-charts/common/llm-uservice/README.md
index 0f2337852..3e829522b 100644
--- a/helm-charts/common/llm-uservice/README.md
+++ b/helm-charts/common/llm-uservice/README.md
@@ -52,3 +52,4 @@ curl http://localhost:9000/v1/chat/completions \
 | image.repository                | string | `"opea/llm-tgi"` |                                 |
 | service.port                    | string | `"9000"`         |                                 |
 | TGI_LLM_ENDPOINT                | string | `""`             | LLM endpoint                    |
+| global.monitoring               | bop;   | false            | Service usage metrics           |
diff --git a/helm-charts/common/llm-uservice/templates/servicemonitor.yaml b/helm-charts/common/llm-uservice/templates/servicemonitor.yaml
new file mode 100644
index 000000000..ecb83fc34
--- /dev/null
+++ b/helm-charts/common/llm-uservice/templates/servicemonitor.yaml
@@ -0,0 +1,18 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+{{- if .Values.global.monitoring }}
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+  name: {{ include "llm-uservice.fullname" . }}
+  labels:
+    release: {{ .Values.global.prometheusRelease }}
+spec:
+  selector:
+    matchLabels:
+      {{- include "llm-uservice.selectorLabels" . | nindent 6 }}
+  endpoints:
+  - port: llm-uservice
+    interval: 5s
+{{- end }}
diff --git a/helm-charts/common/llm-uservice/values.yaml b/helm-charts/common/llm-uservice/values.yaml
index 6a2af0bbb..8eae02042 100644
--- a/helm-charts/common/llm-uservice/values.yaml
+++ b/helm-charts/common/llm-uservice/values.yaml
@@ -90,3 +90,9 @@ global:
   https_proxy: ""
   no_proxy: ""
   HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
+
+  # Install Prometheus serviceMonitor for service
+  monitoring: false
+
+  # Prometheus Helm install release name for serviceMonitor
+  prometheusRelease: prometheus-stack
diff --git a/helm-charts/common/reranking-usvc/README.md b/helm-charts/common/reranking-usvc/README.md
index 2c6adb141..d2e6ef25e 100644
--- a/helm-charts/common/reranking-usvc/README.md
+++ b/helm-charts/common/reranking-usvc/README.md
@@ -49,3 +49,4 @@ curl http://localhost:8000/v1/reranking \
 | image.repository       | string | `"opea/reranking-tgi"` |             |
 | TEI_RERANKING_ENDPOINT | string | `""`                   |             |
 | service.port           | string | `"8000"`               |             |
+| global.monitoring      | bop;   | false                  |             |
diff --git a/helm-charts/common/reranking-usvc/templates/servicemonitor.yaml b/helm-charts/common/reranking-usvc/templates/servicemonitor.yaml
new file mode 100644
index 000000000..8d1306edf
--- /dev/null
+++ b/helm-charts/common/reranking-usvc/templates/servicemonitor.yaml
@@ -0,0 +1,18 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+{{- if .Values.global.monitoring }}
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+  name: {{ include "reranking-usvc.fullname" . }}
+  labels:
+    release: {{ .Values.global.prometheusRelease }}
+spec:
+  selector:
+    matchLabels:
+      {{- include "reranking-usvc.selectorLabels" . | nindent 6 }}
+  endpoints:
+  - port: reranking-usvc
+    interval: 5s
+{{- end }}
diff --git a/helm-charts/common/reranking-usvc/values.yaml b/helm-charts/common/reranking-usvc/values.yaml
index 6014bc0f1..5ce273f63 100644
--- a/helm-charts/common/reranking-usvc/values.yaml
+++ b/helm-charts/common/reranking-usvc/values.yaml
@@ -89,3 +89,9 @@ global:
   http_proxy: ""
   https_proxy: ""
   no_proxy: ""
+
+  # Install Prometheus serviceMonitor for service
+  monitoring: false
+
+  # Prometheus Helm install release name for serviceMonitor
+  prometheusRelease: prometheus-stack
diff --git a/helm-charts/common/retriever-usvc/README.md b/helm-charts/common/retriever-usvc/README.md
index 9a84ee9a0..a64e4f292 100644
--- a/helm-charts/common/retriever-usvc/README.md
+++ b/helm-charts/common/retriever-usvc/README.md
@@ -52,6 +52,7 @@ curl http://localhost:7000/v1/retrieval  \
 | service.port           | string | `"7000"`               |             |
 | REDIS_URL              | string | `""`                   |             |
 | TEI_EMBEDDING_ENDPOINT | string | `""`                   |             |
+| global.monitoring      | bop;   | false                  |             |
 
 ## Milvus support
 
diff --git a/helm-charts/common/retriever-usvc/templates/servicemonitor.yaml b/helm-charts/common/retriever-usvc/templates/servicemonitor.yaml
new file mode 100644
index 000000000..2cfede645
--- /dev/null
+++ b/helm-charts/common/retriever-usvc/templates/servicemonitor.yaml
@@ -0,0 +1,18 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+{{- if .Values.global.monitoring }}
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+  name: {{ include "retriever-usvc.fullname" . }}
+  labels:
+    release: {{ .Values.global.prometheusRelease }}
+spec:
+  selector:
+    matchLabels:
+      {{- include "retriever-usvc.selectorLabels" . | nindent 6 }}
+  endpoints:
+  - port: retriever-usvc
+    interval: 5s
+{{- end }}
diff --git a/helm-charts/common/retriever-usvc/values.yaml b/helm-charts/common/retriever-usvc/values.yaml
index c1bb72bc5..a71cac068 100644
--- a/helm-charts/common/retriever-usvc/values.yaml
+++ b/helm-charts/common/retriever-usvc/values.yaml
@@ -107,3 +107,9 @@ global:
   https_proxy: ""
   no_proxy: ""
   HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
+
+  # Install Prometheus serviceMonitor for service
+  monitoring: false
+
+  # Prometheus Helm install release name for serviceMonitor
+  prometheusRelease: prometheus-stack
diff --git a/helm-charts/common/tei/README.md b/helm-charts/common/tei/README.md
index 484b7cd8a..e809ddb35 100644
--- a/helm-charts/common/tei/README.md
+++ b/helm-charts/common/tei/README.md
@@ -41,4 +41,5 @@ curl http://localhost:2081/embed -X POST -d '{"inputs":"What is Deep Learning?"}
 | global.modelUseHostPath         | string | `"/mnt/opea-models"`                              | Cached models directory, tei will not download if the model is cached here. The host path "modelUseHostPath" will be mounted to container as /data directory. Set this to null/empty will force it to download model. |
 | image.repository                | string | `"ghcr.io/huggingface/text-embeddings-inference"` |                                                                                                                                                                                                                       |
 | image.tag                       | string | `"cpu-1.5"`                                       |                                                                                                                                                                                                                       |
-| horizontalPodAutoscaler.enabled | bool   | false                                             | Enable HPA autoscaling for the service deployment based on metrics it provides. See [HPA section](../../HPA.md) before enabling!                                                                                      |
+| horizontalPodAutoscaler.enabled | bop;   | false                                             | Enable HPA autoscaling for the service deployment based on metrics it provides. See [HPA instructions](../../HPA.md) before enabling!                                                                                 |
+| global.monitoring               | bop;   | false                                             | Enable usage metrics for the service. Required for HPA. See [monitoring instructions](../../monitoring.md) before enabling!                                                                                           |
diff --git a/helm-charts/common/tei/templates/horizontal-pod-autoscaler.yaml b/helm-charts/common/tei/templates/horizontal-pod-autoscaler.yaml
index 277184ee6..1a2907361 100644
--- a/helm-charts/common/tei/templates/horizontal-pod-autoscaler.yaml
+++ b/helm-charts/common/tei/templates/horizontal-pod-autoscaler.yaml
@@ -1,7 +1,7 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-{{- if .Values.horizontalPodAutoscaler.enabled }}
+{{- if and .Values.global.monitoring .Values.horizontalPodAutoscaler.enabled }}
 apiVersion: autoscaling/v2
 kind: HorizontalPodAutoscaler
 metadata:
diff --git a/helm-charts/common/tei/templates/servicemonitor.yaml b/helm-charts/common/tei/templates/servicemonitor.yaml
index 70398ff69..96743442b 100644
--- a/helm-charts/common/tei/templates/servicemonitor.yaml
+++ b/helm-charts/common/tei/templates/servicemonitor.yaml
@@ -1,7 +1,7 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-{{- if .Values.horizontalPodAutoscaler.enabled }}
+{{- if .Values.global.monitoring }}
 apiVersion: monitoring.coreos.com/v1
 kind: ServiceMonitor
 metadata:
@@ -13,7 +13,6 @@ spec:
     matchLabels:
       {{- include "tei.selectorLabels" . | nindent 6 }}
   endpoints:
-  - interval: 4s
-    port: tei
-    scheme: http
+  - port: tei
+    interval: 5s
 {{- end }}
diff --git a/helm-charts/common/tei/values.yaml b/helm-charts/common/tei/values.yaml
index 15f438503..ff1ec3b4b 100644
--- a/helm-charts/common/tei/values.yaml
+++ b/helm-charts/common/tei/values.yaml
@@ -106,5 +106,8 @@ global:
   modelUseHostPath: ""
   modelUsePVC: ""
 
-  # Prometheus Helm installation info for serviceMonitor
+  # Install Prometheus serviceMonitor for service
+  monitoring: false
+
+  # Prometheus Helm install release name for serviceMonitor
   prometheusRelease: prometheus-stack
diff --git a/helm-charts/common/teirerank/README.md b/helm-charts/common/teirerank/README.md
index 68f799c30..7bec469e1 100644
--- a/helm-charts/common/teirerank/README.md
+++ b/helm-charts/common/teirerank/README.md
@@ -44,4 +44,5 @@ curl http://localhost:2082/rerank \
 | global.modelUseHostPath         | string | `"/mnt/opea-models"`                              | Cached models directory, teirerank will not download if the model is cached here. The host path "modelUseHostPath" will be mounted to container as /data directory. Set this to null/empty will force it to download model. |
 | image.repository                | string | `"ghcr.io/huggingface/text-embeddings-inference"` |                                                                                                                                                                                                                             |
 | image.tag                       | string | `"cpu-1.5"`                                       |                                                                                                                                                                                                                             |
-| horizontalPodAutoscaler.enabled | bool   | false                                             | Enable HPA autoscaling for the service deployment based on metrics it provides. See [HPA section](../../HPA.md) before enabling!                                                                                            |
+| horizontalPodAutoscaler.enabled | bop;   | false                                             | Enable HPA autoscaling for the service deployment based on metrics it provides. See [HPA instructions](../../HPA.md) before enabling!                                                                                       |
+| global.monitoring               | bop;   | false                                             | Enable usage metrics for the service. Required for HPA. See [monitoring instructions](../../monitoring.md) before enabling!                                                                                                 |
diff --git a/helm-charts/common/teirerank/templates/horizontal-pod-autoscaler.yaml b/helm-charts/common/teirerank/templates/horizontal-pod-autoscaler.yaml
index f54dc070d..a45e7949b 100644
--- a/helm-charts/common/teirerank/templates/horizontal-pod-autoscaler.yaml
+++ b/helm-charts/common/teirerank/templates/horizontal-pod-autoscaler.yaml
@@ -1,7 +1,7 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-{{- if .Values.horizontalPodAutoscaler.enabled }}
+{{- if and .Values.global.monitoring .Values.horizontalPodAutoscaler.enabled }}
 apiVersion: autoscaling/v2
 kind: HorizontalPodAutoscaler
 metadata:
diff --git a/helm-charts/common/teirerank/templates/servicemonitor.yaml b/helm-charts/common/teirerank/templates/servicemonitor.yaml
index 423cb9fca..13110adbe 100644
--- a/helm-charts/common/teirerank/templates/servicemonitor.yaml
+++ b/helm-charts/common/teirerank/templates/servicemonitor.yaml
@@ -1,7 +1,7 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-{{- if .Values.horizontalPodAutoscaler.enabled }}
+{{- if .Values.global.monitoring }}
 apiVersion: monitoring.coreos.com/v1
 kind: ServiceMonitor
 metadata:
@@ -13,7 +13,6 @@ spec:
     matchLabels:
       {{- include "teirerank.selectorLabels" . | nindent 6 }}
   endpoints:
-  - interval: 4s
-    port: teirerank
-    scheme: http
+  - port: teirerank
+    interval: 5s
 {{- end }}
diff --git a/helm-charts/common/teirerank/values.yaml b/helm-charts/common/teirerank/values.yaml
index 176dea10a..72039e6ce 100644
--- a/helm-charts/common/teirerank/values.yaml
+++ b/helm-charts/common/teirerank/values.yaml
@@ -106,5 +106,8 @@ global:
   modelUseHostPath: ""
   modelUsePVC: ""
 
-  # Prometheus Helm installation info for serviceMonitor
+  # Install Prometheus serviceMonitor for service
+  monitoring: false
+
+  # Prometheus Helm install release name for serviceMonitor
   prometheusRelease: prometheus-stack
diff --git a/helm-charts/common/tgi/README.md b/helm-charts/common/tgi/README.md
index dd2507ea3..1064cf28a 100644
--- a/helm-charts/common/tgi/README.md
+++ b/helm-charts/common/tgi/README.md
@@ -48,4 +48,5 @@ curl http://localhost:2080/generate \
 | global.modelUseHostPath         | string | `"/mnt/opea-models"`                              | Cached models directory, tgi will not download if the model is cached here. The host path "modelUseHostPath" will be mounted to container as /data directory. Set this to null/empty will force it to download model. |
 | image.repository                | string | `"ghcr.io/huggingface/text-generation-inference"` |                                                                                                                                                                                                                       |
 | image.tag                       | string | `"1.4"`                                           |                                                                                                                                                                                                                       |
-| horizontalPodAutoscaler.enabled | bool   | false                                             | Enable HPA autoscaling for the service deployment based on metrics it provides. See [HPA section](../../HPA.md) before enabling!                                                                                      |
+| horizontalPodAutoscaler.enabled | bop;   | false                                             | Enable HPA autoscaling for the service deployment based on metrics it provides. See [HPA instructions](../../HPA.md) before enabling!                                                                                 |
+| global.monitoring               | bop;   | false                                             | Enable usage metrics for the service. Required for HPA. See [monitoring instructions](../../monitoring.md) before enabling!                                                                                           |
diff --git a/helm-charts/common/tgi/templates/horizontal-pod-autoscaler.yaml b/helm-charts/common/tgi/templates/horizontal-pod-autoscaler.yaml
index 276ff067b..d51509c37 100644
--- a/helm-charts/common/tgi/templates/horizontal-pod-autoscaler.yaml
+++ b/helm-charts/common/tgi/templates/horizontal-pod-autoscaler.yaml
@@ -1,7 +1,7 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-{{- if .Values.horizontalPodAutoscaler.enabled }}
+{{- if and .Values.global.monitoring .Values.horizontalPodAutoscaler.enabled }}
 apiVersion: autoscaling/v2
 kind: HorizontalPodAutoscaler
 metadata:
diff --git a/helm-charts/common/tgi/templates/servicemonitor.yaml b/helm-charts/common/tgi/templates/servicemonitor.yaml
index fdb1159bd..978174226 100644
--- a/helm-charts/common/tgi/templates/servicemonitor.yaml
+++ b/helm-charts/common/tgi/templates/servicemonitor.yaml
@@ -6,7 +6,7 @@
 # Metric descriptions:
 # - https://github.com/huggingface/text-generation-inference/discussions/1127#discussioncomment-7240527
 
-{{- if .Values.horizontalPodAutoscaler.enabled }}
+{{- if .Values.global.monitoring }}
 apiVersion: monitoring.coreos.com/v1
 kind: ServiceMonitor
 metadata:
@@ -18,7 +18,6 @@ spec:
     matchLabels:
       {{- include "tgi.selectorLabels" . | nindent 6 }}
   endpoints:
-  - interval: 4s
-    port: tgi
-    scheme: http
+  - port: tgi
+    interval: 5s
 {{- end }}
diff --git a/helm-charts/common/tgi/values.yaml b/helm-charts/common/tgi/values.yaml
index e3e72e6c4..dde8d6815 100644
--- a/helm-charts/common/tgi/values.yaml
+++ b/helm-charts/common/tgi/values.yaml
@@ -138,5 +138,8 @@ global:
   modelUseHostPath: ""
   modelUsePVC: ""
 
-  # Prometheus Helm installation info for serviceMonitor
+  # Install Prometheus serviceMonitor for service
+  monitoring: false
+
+  # Prometheus Helm install release name for serviceMonitor
   prometheusRelease: prometheus-stack
diff --git a/helm-charts/monitoring.md b/helm-charts/monitoring.md
new file mode 100644
index 000000000..506f310df
--- /dev/null
+++ b/helm-charts/monitoring.md
@@ -0,0 +1,89 @@
+# Monitoring support
+
+## Table of Contents
+
+- [Introduction](#introduction)
+- [Pre-conditions](#pre-conditions)
+  - [Prometheus install](#prometheus-install)
+  - [Helm options](#helm-options)
+- [Gotchas](#gotchas)
+- [Install](#install)
+- [Verify](#verify)
+
+## Introduction
+
+Monitoring provides service component usage metrics for [Prometheus](https://prometheus.io/),
+which can be visualized e.g. in [Grafana](https://grafana.com/).
+
+Scaling the services automatically based on their usage with [HPA](HPA.md) also relies on these metrics.
+
+## Pre-conditions
+
+### Prometheus install
+
+If cluster does not run [Prometheus operator](https://github.com/prometheus-operator/kube-prometheus)
+yet, it SHOULD be be installed before enabling monitoring, e.g. by using a Helm chart for it:
+https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack
+
+To install (older version) of Prometheus:
+
+```console
+$ helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
+$ helm repo update
+$ prom_ns=monitoring  # namespace for Prometheus
+$ kubectl create ns $prom_ns
+$ helm install prometheus-stack prometheus-community/kube-prometheus-stack --version 55.5.2 -n $prom_ns
+```
+
+### Helm options
+
+If Prometheus is installed under some other release name than `prometheus-stack`,
+provide that as `global.prometheusRelease` value for the OPEA service Helm install,
+or in its `values.yaml` file. Otherwise Prometheus ignores the installed
+`serviceMonitor` objects.
+
+## Gotchas
+
+By default Prometheus adds [k8s RBAC rules](https://github.com/prometheus-operator/kube-prometheus/blob/main/manifests/prometheus-roleBindingSpecificNamespaces.yaml)
+for detecting `serviceMonitor`s and querying metrics from `default`, `kube-system` and `monitoring` namespaces.
+If Helm is asked to install OPEA service to some other namespace, those rules need to be updated accordingly.
+
+## Install
+
+Install Helm chart with `global.monitoring:true` option.
+
+## Verify
+
+Check installed Prometheus service names:
+
+```console
+$ prom_ns=monitoring  # Prometheus namespace
+$ kubectl -n $prom_ns get svc
+```
+
+(Object names depend on whether Prometheus was installed from manifests, or Helm,
+and the release name given for its Helm install.)
+
+Use service name matching your Prometheus installation:
+
+```console
+$ prom_svc=prometheus-stack-kube-prom-prometheus  # Metrics service
+```
+
+Verify Prometheus found metric endpoints for chart services, i.e. last number on `curl` output is non-zero:
+
+```console
+$ chart=chatqna # OPEA chart release name
+$ prom_url=http://$(kubectl -n $prom_ns get -o jsonpath="{.spec.clusterIP}:{.spec.ports[0].port}" svc/$prom_svc)
+$ curl --no-progress-meter $prom_url/metrics | grep scrape_pool_targets.*$chart
+```
+
+Check that Prometheus metrics from TGI inference component are available:
+
+```console
+$ curl --no-progress-meter $prom_url/api/v1/query? \
+  --data-urlencode 'query=tgi_queue_size{service="'$chart'-tgi"}' | jq
+```
+
+**NOTE**: services provide metrics only after they've processed their first request.
+And reranking service will be used only after context data has been uploaded!