diff --git a/config/prometheus-for-federation/grafana_deployment_patch.yaml b/config/prometheus-for-federation/grafana_deployment_patch.yaml index f4021e37f..4bd4e2608 100644 --- a/config/prometheus-for-federation/grafana_deployment_patch.yaml +++ b/config/prometheus-for-federation/grafana_deployment_patch.yaml @@ -22,3 +22,87 @@ value: name: grafana-mgc-metrics mountPath: /grafana-dashboard-definitions/0/grafana-mgc-metrics +- op: add + path: /spec/template/spec/volumes/- + value: + name: grafana-gatewayclasses + configMap: + defaultMode: 420 + name: grafana-gatewayclasses +- op: add + path: /spec/template/spec/volumes/- + value: + name: grafana-gateways + configMap: + defaultMode: 420 + name: grafana-gateways +- op: add + path: /spec/template/spec/volumes/- + value: + name: grafana-httproutes + configMap: + defaultMode: 420 + name: grafana-httproutes +- op: add + path: /spec/template/spec/volumes/- + value: + name: grafana-grpcroutes + configMap: + defaultMode: 420 + name: grafana-grpcroutes +- op: add + path: /spec/template/spec/volumes/- + value: + name: grafana-tlsroutes + configMap: + defaultMode: 420 + name: grafana-tlsroutes +- op: add + path: /spec/template/spec/volumes/- + value: + name: grafana-tcproutes + configMap: + defaultMode: 420 + name: grafana-tcproutes +- op: add + path: /spec/template/spec/volumes/- + value: + name: grafana-udproutes + configMap: + defaultMode: 420 + name: grafana-udproutes +- op: add + path: /spec/template/spec/containers/0/volumeMounts/- + value: + name: grafana-gatewayclasses + mountPath: /grafana-dashboard-definitions/0/grafana-gatewayclasses +- op: add + path: /spec/template/spec/containers/0/volumeMounts/- + value: + name: grafana-gateways + mountPath: /grafana-dashboard-definitions/0/grafana-gateways +- op: add + path: /spec/template/spec/containers/0/volumeMounts/- + value: + name: grafana-httproutes + mountPath: /grafana-dashboard-definitions/0/grafana-httproutes +- op: add + path: /spec/template/spec/containers/0/volumeMounts/- + value: + name: grafana-grpcroutes + mountPath: /grafana-dashboard-definitions/0/grafana-grpcroutes +- op: add + path: /spec/template/spec/containers/0/volumeMounts/- + value: + name: grafana-tlsroutes + mountPath: /grafana-dashboard-definitions/0/grafana-tlsroutes +- op: add + path: /spec/template/spec/containers/0/volumeMounts/- + value: + name: grafana-tcproutes + mountPath: /grafana-dashboard-definitions/0/grafana-tcpcroutes +- op: add + path: /spec/template/spec/containers/0/volumeMounts/- + value: + name: grafana-udproutes + mountPath: /grafana-dashboard-definitions/0/grafana-udproutes diff --git a/config/prometheus-for-federation/ksm_clusterrole_patch.yaml b/config/prometheus-for-federation/ksm_clusterrole_patch.yaml new file mode 100644 index 000000000..1f37337b4 --- /dev/null +++ b/config/prometheus-for-federation/ksm_clusterrole_patch.yaml @@ -0,0 +1,26 @@ +- op: add + path: /rules/- + value: + apiGroups: + - "apiextensions.k8s.io" + resources: + - customresourcedefinitions + verbs: + - list + - watch +- op: add + path: /rules/- + value: + apiGroups: + - "gateway.networking.k8s.io" + resources: + - gateways + - gatewayclasses + - httproutes + - grpcroutes + - tcproutes + - tlsroutes + - udproutes + verbs: + - list + - watch \ No newline at end of file diff --git a/config/prometheus-for-federation/ksm_deployment_patch.yaml b/config/prometheus-for-federation/ksm_deployment_patch.yaml new file mode 100644 index 000000000..9926efcd9 --- /dev/null +++ b/config/prometheus-for-federation/ksm_deployment_patch.yaml @@ -0,0 +1,21 @@ +- op: replace + path: /spec/template/spec/containers/0/image + value: registry.k8s.io/kube-state-metrics/kube-state-metrics:v2.9.2 +- op: add + path: /spec/template/spec/volumes + value: + - name: custom-resource-state + configMap: + defaultMode: 420 + name: custom-resource-state +- op: add + path: /spec/template/spec/containers/0/volumeMounts + value: + - name: custom-resource-state + mountPath: /custom-resource-state +- op: add + path: /spec/template/spec/containers/0/args/- + value: --custom-resource-state-config-file +- op: add + path: /spec/template/spec/containers/0/args/- + value: /custom-resource-state/custom-resource-state.yaml \ No newline at end of file diff --git a/config/prometheus-for-federation/kustomization.yaml b/config/prometheus-for-federation/kustomization.yaml index 13292d9fd..67c1649bf 100644 --- a/config/prometheus-for-federation/kustomization.yaml +++ b/config/prometheus-for-federation/kustomization.yaml @@ -3,6 +3,8 @@ kind: Kustomization resources: - github.com/prometheus-operator/kube-prometheus?ref=release-0.11 + - github.com/Kuadrant/gateway-api-state-metrics?ref=main + - github.com/Kuadrant/gateway-api-state-metrics/config/examples/dashboards?ref=main - grafana_ingress.yaml # To scrape istio metrics, 3 configurations are required: # 1. Envoy metrics directly from the istio ingress gateway pod @@ -65,6 +67,18 @@ patches: # Patch grafana deployment to include dashboard configmaps patchesJson6902: + - target: + group: apps + version: v1 + kind: Deployment + name: kube-state-metrics + path: ksm_deployment_patch.yaml + - target: + group: rbac.authorization.k8s.io + version: v1 + kind: ClusterRole + name: kube-state-metrics + path: ksm_clusterrole_patch.yaml - target: group: apps version: v1 diff --git a/docs/how-to/metrics-federation.md b/docs/how-to/metrics-federation.md deleted file mode 100644 index 13928ab89..000000000 --- a/docs/how-to/metrics-federation.md +++ /dev/null @@ -1,103 +0,0 @@ -# Metrics Federation (WIP) - -## Introduction - -This walkthrough shows how to install a metrics federation stack locally and query Istio metrics from the hub. - ->**Note:** :exclamation: this walkthrough is incomplete. It will be updated as issues from https://github.com/Kuadrant/multicluster-gateway-controller/issues/197 land - -![arch](../images/metrics/metrics-federation.png) - -## Requirements - -* Local development environment has been set up as per the main README i.e. local env files have been created with AWS credentials & a zone - ->**Note:** :exclamation: this walkthrough will setup a zone in your AWS account and make changes to it for DNS purposes - -## Installation and Setup - -To setup a local instance with metrics federation, run: - -```bash -make local-setup OCM_SINGLE=true METRICS_FEDERATION=true MGC_WORKLOAD_CLUSTERS_COUNT=1 -``` - -Once complete, you should see something like the below in the output (you may need to scroll) - -``` - Connect to Thanos Query UI - - URL : https://thanos-query.172.31.0.2.nip.io -``` - -Open the url in a browser, accepting the non CA signed certificate. -In the Thanos UI query box, enter the below query and press 'Execute' - -``` -sum(rate(container_cpu_usage_seconds_total{namespace="monitoring",container="prometheus"}[5m])) -``` - -You should see a response in the table view. -In the Graph view you should see some data over time as well. - -![arch](../images/metrics/metrics-federation-example-data.png) - - -## Istio Metrics - -### Thanos Query UI - -To query Istio workload metrics, you should first deploy a Gateway & HttpRoute, and send traffic to it. -The easiest way to do this is by following the steps in the [OCM Walkthrough](../how-to/multicluster-gateways-walkthrough.md). Before going through the walkthrough, there are two things to note: Firstly, you do not need to re-run the `make local-setup` step, as that should have already been run with the `METRICS_FEDERATION` flag above. Secondly, you should set `METRICS=true` when it comes to the step to start and deploy the gateway controller, i.e: - -``` -make build-controller kind-load-controller deploy-controller METRICS=true -``` - -After completing the OCM walkthrough, use `curl` to send some traffic to the application - -```bash -while true; do curl -k https://$MGC_SUB_DOMAIN && sleep 5; done -``` - -Open the Thanos Query UI again and try the below query: - -``` -sum(rate(istio_requests_total{}[5m])) by(destination_workload) -``` - -In the graph view you should see something that looks like the graph below. -This shows the rate of requests (per second) for each Isito workload. -In this case, there is 1 workload, balanced across 2 clusters. - -![arch](../images/metrics/metrics-federation-traffic-data.png) - -To see the rate of requests per cluster (actually per pod across all clusters), the below query can be used. -Over long periods of time, this graph can show traffic load balancing between application instances. - -``` -sum(rate(istio_requests_total{}[5m])) by(pod) -``` - -![arch](../images/metrics/metrics-federation-traffic-data-per-pod.png) - -### Grafana UI - -In the output from `local-setup`, you should see something like the below (you may need to scroll) - -``` - Connect to Grafana Query UI - - URL : https://grafana.172.31.0.2.nip.io -``` - -Open Grafana in a browser, accepting the non CA signed certificate. -The default login is admin/admin. - -Using the left sidebar in the Grafana UI, navigate to `Dashboards > Browse` and click on the `Istio Workload Dashboard`. - -![arch](../images/metrics/metrics-federation-grafana-dashboard-1.png) - -You should be able to see the following layout, which will include data from the `curl` command you ran in the previous section. - -![arch](../images/metrics/metrics-federation-grafana-dashboard-2.png) diff --git a/docs/how-to/metrics-walkthrough.md b/docs/how-to/metrics-walkthrough.md index b547d3132..39f2353ef 100644 --- a/docs/how-to/metrics-walkthrough.md +++ b/docs/how-to/metrics-walkthrough.md @@ -1,3 +1,9 @@ +## Introduction + +This walkthrough shows how to install a metrics federation stack locally and query Istio metrics from the hub. + +![arch](../images/metrics/metrics-federation.png) + ## Installation and Configuration of Metrics This document will guide you in installing metrics for your application and provide directions on where to access them. Additionally, it will include dashboards set up to display these metrics. @@ -36,6 +42,44 @@ To generate traffic to the application, use `curl` as follows: while true; do curl -k https://$MGC_SUB_DOMAIN && sleep 5; done ``` +### Accessing the Thanos UI + +1. Access the Thanos UI by clicking or entering the provided URL for the Grafana UI in your web browser. + +``` +https://thanos-query.172.31.0.2.nip.io +``` + +2. In the Thanos UI query box, enter the below query and press 'Execute' + +``` +sum(rate(container_cpu_usage_seconds_total{namespace="monitoring",container="prometheus"}[5m])) +``` + +You should see a response in the table view. +In the Graph view you should see some data over time as well. + +![arch](../images/metrics/metrics-federation-example-data.png) + +``` +sum(rate(istio_requests_total{}[5m])) by(destination_workload) +``` + +In the graph view you should see something that looks like the graph below. +This shows the rate of requests (per second) for each Isito workload. +In this case, there is 1 workload, balanced across 2 clusters. + +![arch](../images/metrics/metrics-federation-traffic-data.png) + +To see the rate of requests per cluster (actually per pod across all clusters), the below query can be used. +Over long periods of time, this graph can show traffic load balancing between application instances. + +``` +sum(rate(istio_requests_total{}[5m])) by(pod) +``` + +![arch](../images/metrics/metrics-federation-traffic-data-per-pod.png) + ### Accessing the Grafana Dashboard To view the operational metrics and status, proceed with the following steps: @@ -48,7 +92,7 @@ https://grafana.172.31.0.2.nip.io 2. Navigate to the included Grafana Dashboard -Using the left sidebar in the Grafana UI, navigate to `Dashboards > Browse` and select either the `Istio Workload Dashboard` or `MGC SRE Dashboard`. +Using the left sidebar in the Grafana UI, navigate to `Dashboards > Browse` and select the `Istio Workload Dashboard`, `MGC SRE Dashboard` or any of the following `Gateway Api State` dashboards. ![arch](../images/metrics/metrics-federation-grafana-dashboard-3.png) @@ -60,6 +104,18 @@ The `MGC SRE Dashboard` displays real-time insights and visualizations of resour ![arch](../images/metrics/metrics-federation-grafana-dashboard-4.png) +The `Gateway API State / Gateways` provides real-time insights and visualizations for Gateways. It offers information about gateway listeners, listener status, gateway status, addresses, and attached routes + +![arch](../images/metrics/metrics-federation-grafana-dashboard-5.png) + +The `Gateway API State / GatewayClasses` provides insights into Gateways organized by their respective Gateway Classes. It offers information about GatewayClasses and the supported features for each class. + +![arch](../images/metrics/metrics-federation-grafana-dashboard-6.png) + +The `Gateway API State / HTTPRoutes` or any of the remaining routes focuses on their `Routes` and provides insights into their configuration. It displays their targeted parent references, and attached parent references, offering a detailed view of how these routes are structured and associated with their respective resources. + +![arch](../images/metrics/metrics-federation-grafana-dashboard-7.png) + The Grafana dashboard will provide you with real-time insights and visualizations of your gateway's performance and metrics. By utilizing the Grafana dashboard, you can effectively monitor the health and behavior of your system, making informed decisions based on the displayed data. This monitoring capability enables you to proactively identify and address any potential issues to ensure the smooth operation of your environment. diff --git a/docs/images/metrics/metrics-federation-grafana-dashboard-1.png b/docs/images/metrics/metrics-federation-grafana-dashboard-1.png index 47079d1fa..34b814f2d 100644 Binary files a/docs/images/metrics/metrics-federation-grafana-dashboard-1.png and b/docs/images/metrics/metrics-federation-grafana-dashboard-1.png differ diff --git a/docs/images/metrics/metrics-federation-grafana-dashboard-3.png b/docs/images/metrics/metrics-federation-grafana-dashboard-3.png index ca0c4ccdc..e512f9612 100644 Binary files a/docs/images/metrics/metrics-federation-grafana-dashboard-3.png and b/docs/images/metrics/metrics-federation-grafana-dashboard-3.png differ diff --git a/docs/images/metrics/metrics-federation-grafana-dashboard-5.png b/docs/images/metrics/metrics-federation-grafana-dashboard-5.png new file mode 100644 index 000000000..819fec19d Binary files /dev/null and b/docs/images/metrics/metrics-federation-grafana-dashboard-5.png differ diff --git a/docs/images/metrics/metrics-federation-grafana-dashboard-6.png b/docs/images/metrics/metrics-federation-grafana-dashboard-6.png new file mode 100644 index 000000000..6bafc4840 Binary files /dev/null and b/docs/images/metrics/metrics-federation-grafana-dashboard-6.png differ diff --git a/docs/images/metrics/metrics-federation-grafana-dashboard-7.png b/docs/images/metrics/metrics-federation-grafana-dashboard-7.png new file mode 100644 index 000000000..5c77d43d1 Binary files /dev/null and b/docs/images/metrics/metrics-federation-grafana-dashboard-7.png differ diff --git a/hack/quickstart-metrics.sh b/hack/quickstart-metrics.sh index 82c37fbad..e8b6ea02f 100755 --- a/hack/quickstart-metrics.sh +++ b/hack/quickstart-metrics.sh @@ -20,16 +20,23 @@ export KFILT="docker run --rm -i ryane/kfilt" METRICS_FEDERATION=true -source /dev/stdin <<< "$(curl -s https://raw.githubusercontent.com/kuadrant/multicluster-gateway-controller/main/hack/.quickstartEnv)" -source /dev/stdin <<< "$(curl -s https://raw.githubusercontent.com/kuadrant/multicluster-gateway-controller/main/hack/.kindUtils)" -source /dev/stdin <<< "$(curl -s https://raw.githubusercontent.com/kuadrant/multicluster-gateway-controller/main/hack/.cleanupUtils)" -source /dev/stdin <<< "$(curl -s https://raw.githubusercontent.com/kuadrant/multicluster-gateway-controller/main/hack/.deployUtils)" -source /dev/stdin <<< "$(curl -s https://raw.githubusercontent.com/kuadrant/multicluster-gateway-controller/main/hack/.startUtils)" -source /dev/stdin <<< "$(curl -s https://raw.githubusercontent.com/kuadrant/multicluster-gateway-controller/main/hack/.setupEnv)" +if [ -z $MGC_BRANCH ]; then + MGC_BRANCH=${MGC_BRANCH:="main"} +fi +if [ -z $MGC_ACCOUNT ]; then + MGC_ACCOUNT=${MGC_ACCOUNT:="kuadrant"} +fi + +source /dev/stdin <<< "$(curl -s https://raw.githubusercontent.com/${MGC_ACCOUNT}/multicluster-gateway-controller/${MGC_BRANCH}/hack/.quickstartEnv)" +source /dev/stdin <<< "$(curl -s https://raw.githubusercontent.com/${MGC_ACCOUNT}/multicluster-gateway-controller/${MGC_BRANCH}/hack/.kindUtils)" +source /dev/stdin <<< "$(curl -s https://raw.githubusercontent.com/${MGC_ACCOUNT}/multicluster-gateway-controller/${MGC_BRANCH}/hack/.cleanupUtils)" +source /dev/stdin <<< "$(curl -s https://raw.githubusercontent.com/${MGC_ACCOUNT}/multicluster-gateway-controller/${MGC_BRANCH}/hack/.deployUtils)" +source /dev/stdin <<< "$(curl -s https://raw.githubusercontent.com/${MGC_ACCOUNT}/multicluster-gateway-controller/${MGC_BRANCH}/hack/.startUtils)" +source /dev/stdin <<< "$(curl -s https://raw.githubusercontent.com/${MGC_ACCOUNT}/multicluster-gateway-controller/${MGC_BRANCH}/hack/.setupEnv)" mkdir -p ${TMP_DIR} -MGC_REPO="github.com/kuadrant/multicluster-gateway-controller.git" +MGC_REPO=${MGC_REPO:="github.com/${MGC_ACCOUNT}/multicluster-gateway-controller.git"} PROMETHEUS_DIR=${MGC_REPO}/config/prometheus INGRESS_NGINX_DIR=${MGC_REPO}/config/ingress-nginx PROMETHEUS_FOR_FEDERATION_DIR=${MGC_REPO}/config/prometheus-for-federation @@ -48,7 +55,7 @@ fi deployIngressController ${KIND_CLUSTER_CONTROL_PLANE} ${INGRESS_NGINX_DIR} # Deploy Prometheus in the hub too -deployPrometheusForFederation ${KIND_CLUSTER_CONTROL_PLANE} ${PROMETHEUS_FOR_FEDERATION_DIR} +deployPrometheusForFederation ${KIND_CLUSTER_CONTROL_PLANE} ${PROMETHEUS_FOR_FEDERATION_DIR}?ref=${MGC_BRANCH} # Deploy Thanos components in the hub deployThanos ${KIND_CLUSTER_CONTROL_PLANE} ${THANOS_DIR} @@ -59,9 +66,13 @@ deployPrometheus ${KIND_CLUSTER_CONTROL_PLANE} # Apply Cluster Configurations to Workload clusters if [[ -n "${MGC_WORKLOAD_CLUSTERS_COUNT}" ]]; then for ((i = 1; i <= ${MGC_WORKLOAD_CLUSTERS_COUNT}; i++)); do - deployPrometheusForFederation ${KIND_CLUSTER_WORKLOAD}-${i} ${PROMETHEUS_FOR_FEDERATION_DIR} + deployPrometheusForFederation ${KIND_CLUSTER_WORKLOAD}-${i} ${PROMETHEUS_FOR_FEDERATION_DIR}?ref=${MGC_BRANCH} done fi +# Restarts the metrics to make sure all resources exists before ksm starts. +# https://github.com/kubernetes/kube-state-metrics/issues/2142 +kubectl delete pods -n monitoring -l app.kubernetes.io/name=kube-state-metrics + # Ensure the current context points to the control plane cluster kubectl config use-context kind-${KIND_CLUSTER_CONTROL_PLANE} \ No newline at end of file