diff --git a/README.md b/README.md new file mode 100644 index 000000000..4c8e1a6c6 --- /dev/null +++ b/README.md @@ -0,0 +1,21 @@ +# kubecost-quickstart +Quickly install kube-state-metrics, prometheus, and grafana on your cluster with helm. Requires a helm installation. + +> kubectl apply -f helm.yaml + +Sets up a suggestion for roles for your helm service. + +Once the roles have been set up, navigate to the kubecost-quickstart home directory and run + +> helm install cost-analyzer --name cost-analyzer --namespace monitoring + +View the dashboard locally with + +> kubectl port-forward --namespace monitoring deployment/cost-analyzer-grafana 3000 + +Sample Cluster Dashboard Here: + +![Sample Dashboard](https://cdn-images-1.medium.com/max/800/1*rQI3-gKtgKwHSs7JgIdorw.png) + + + diff --git a/cost-analyzer/Chart.yaml b/cost-analyzer/Chart.yaml new file mode 100644 index 000000000..2c05a4ac8 --- /dev/null +++ b/cost-analyzer/Chart.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +appVersion: "1.0" +description: A Helm chart that sets up Prometheus and Grafana to monitor cloud costs. +name: cost-analyzer +version: 0.1.0 diff --git a/cost-analyzer/charts/grafana/Chart.yaml b/cost-analyzer/charts/grafana/Chart.yaml new file mode 100755 index 000000000..99e7fd1cd --- /dev/null +++ b/cost-analyzer/charts/grafana/Chart.yaml @@ -0,0 +1,15 @@ +appVersion: 5.3.1 +description: The leading tool for querying and visualizing time series and metrics. +engine: gotpl +home: https://grafana.net +icon: https://raw.githubusercontent.com/grafana/grafana/master/public/img/logo_transparent_400x.png +kubeVersion: ^1.8.0-0 +maintainers: +- email: zanhsieh@gmail.com + name: zanhsieh +- email: rluckie@cisco.com + name: rtluckie +name: grafana +sources: +- https://github.com/grafana/grafana +version: 1.17.2 diff --git a/cost-analyzer/charts/grafana/README.md b/cost-analyzer/charts/grafana/README.md new file mode 100755 index 000000000..03c70b520 --- /dev/null +++ b/cost-analyzer/charts/grafana/README.md @@ -0,0 +1,162 @@ +# Grafana Helm Chart + +* Installs the web dashboarding system [Grafana](http://grafana.org/) + +## TL;DR; + +```console +$ helm install stable/grafana +``` + +## Installing the Chart + +To install the chart with the release name `my-release`: + +```console +$ helm install --name my-release stable/grafana +``` + +## Uninstalling the Chart + +To uninstall/delete the my-release deployment: + +```console +$ helm delete my-release +``` + +The command removes all the Kubernetes components associated with the chart and deletes the release. + + +## Configuration + +| Parameter | Description | Default | +|---------------------------------|-----------------------------------------------|---------------------------------------------------------| +| `replicas` | Number of nodes | `1` | +| `deploymentStrategy` | Deployment strategy | `RollingUpdate` | +| `livenessProbe` | Liveness Probe settings | `{ "httpGet": { "path": "/api/health", "port": 3000 } "initialDelaySeconds": 60, "timeoutSeconds": 30, "failureThreshold": 10 }` | +| `readinessProbe` | Rediness Probe settings | `{ "httpGet": { "path": "/api/health", "port": 3000 } }`| +| `securityContext` | Deployment securityContext | `{"runAsUser": 472, "fsGroup": 472}` | +| `image.repository` | Image repository | `grafana/grafana` | +| `image.tag` | Image tag. (`Must be >= 5.0.0`) | `5.3.1` | +| `image.pullPolicy` | Image pull policy | `IfNotPresent` | +| `service.type` | Kubernetes service type | `ClusterIP` | +| `service.port` | Kubernetes port where service is exposed | `80` | +| `service.annotations` | Service annotations | `{}` | +| `service.labels` | Custom labels | `{}` | +| `ingress.enabled` | Enables Ingress | `false` | +| `ingress.annotations` | Ingress annotations | `{}` | +| `ingress.labels` | Custom labels | `{}` | +| `ingress.hosts` | Ingress accepted hostnames | `[]` | +| `ingress.tls` | Ingress TLS configuration | `[]` | +| `resources` | CPU/Memory resource requests/limits | `{}` | +| `nodeSelector` | Node labels for pod assignment | `{}` | +| `tolerations` | Toleration labels for pod assignment | `[]` | +| `affinity` | Affinity settings for pod assignment | `{}` | +| `persistence.enabled` | Use persistent volume to store data | `false` | +| `persistence.size` | Size of persistent volume claim | `10Gi` | +| `persistence.existingClaim` | Use an existing PVC to persist data | `nil` | +| `persistence.storageClassName` | Type of persistent volume claim | `nil` | +| `persistence.accessModes` | Persistence access modes | `[]` | +| `persistence.subPath` | Mount a sub dir of the persistent volume | `""` | +| `schedulerName` | Alternate scheduler name | `nil` | +| `env` | Extra environment variables passed to pods | `{}` | +| `envFromSecret` | Name of a Kubenretes secret (must be manually created in the same namespace) containing values to be added to the environment | `""` | +| `extraSecretMounts` | Additional grafana server secret mounts | `[]` | +| `plugins` | Plugins to be loaded along with Grafana | `[]` | +| `datasources` | Configure grafana datasources | `{}` | +| `dashboardProviders` | Configure grafana dashboard providers | `{}` | +| `dashboards` | Dashboards to import | `{}` | +| `dashboardsConfigMaps` | ConfigMaps reference that contains dashboards | `{}` | +| `grafana.ini` | Grafana's primary configuration | `{}` | +| `ldap.existingSecret` | The name of an existing secret containing the `ldap.toml` file, this must have the key `ldap-toml`. | `""` | +| `ldap.config ` | Grafana's LDAP configuration | `""` | +| `annotations` | Deployment annotations | `{}` | +| `podAnnotations` | Pod annotations | `{}` | +| `sidecar.dashboards.enabled` | Enabled the cluster wide search for dashboards and adds/updates/deletes them in grafana | `false` | +| `sidecar.dashboards.label` | Label that config maps with dashboards should have to be added | `false` | +| `sidecar.datasources.enabled` | Enabled the cluster wide search for datasources and adds/updates/deletes them in grafana |`false` | +| `sidecar.datasources.label` | Label that config maps with datasources should have to be added | `false` | +| `smtp.existingSecret` | The name of an existing secret containing the SMTP credentials, this must have the keys `user` and `password`. | `""` | + +## Sidecar for dashboards + +If the parameter `sidecar.dashboards.enabled` is set, a sidecar container is deployed in the grafana pod. This container watches all config maps in the cluster and filters out the ones with a label as defined in `sidecar.dashboards.label`. The files defined in those configmaps are written to a folder and accessed by grafana. Changes to the configmaps are monitored and the imported dashboards are deleted/updated. A recommendation is to use one configmap per dashboard, as an reduction of multiple dashboards inside one configmap is currently not properly mirrored in grafana. +Example dashboard config: +``` +apiVersion: v1 +kind: ConfigMap +metadata: + name: sample-grafana-dashboard + labels: + grafana_dashboard: 1 +data: + k8s-dashboard.json: |- + [...] +``` + +## Sidecar for datasources + +If the parameter `sidecar.datasource.enabled` is set, a sidecar container is deployed in the grafana pod. This container watches all config maps in the cluster and filters out the ones with a label as defined in `sidecar.datasources.label`. The files defined in those configmaps are written to a folder and accessed by grafana on startup. Using these yaml files, the data sources in grafana can be modified. + +Example datasource config adapted from [Grafana](http://docs.grafana.org/administration/provisioning/#example-datasource-config-file): +``` +apiVersion: v1 +kind: ConfigMap +metadata: + name: sample-grafana-datasource + labels: + grafana_datasource: 1 +data: + datasource.yaml: |- + # config file version + apiVersion: 1 + + # list of datasources that should be deleted from the database + deleteDatasources: + - name: Graphite + orgId: 1 + + # list of datasources to insert/update depending + # whats available in the database + datasources: + # name of the datasource. Required + - name: Graphite + # datasource type. Required + type: graphite + # access mode. proxy or direct (Server or Browser in the UI). Required + access: proxy + # org id. will default to orgId 1 if not specified + orgId: 1 + # url + url: http://localhost:8080 + # database password, if used + password: + # database user, if used + user: + # database name, if used + database: + # enable/disable basic auth + basicAuth: + # basic auth username + basicAuthUser: + # basic auth password + basicAuthPassword: + # enable/disable with credentials headers + withCredentials: + # mark as default datasource. Max one per org + isDefault: + # fields that will be converted to json and stored in json_data + jsonData: + graphiteVersion: "1.1" + tlsAuth: true + tlsAuthWithCACert: true + # json object of data that will be encrypted. + secureJsonData: + tlsCACert: "..." + tlsClientCert: "..." + tlsClientKey: "..." + version: 1 + # allow users to edit datasources from the UI. + editable: false + +``` diff --git a/cost-analyzer/charts/grafana/templates/NOTES.txt b/cost-analyzer/charts/grafana/templates/NOTES.txt new file mode 100755 index 000000000..634e30afd --- /dev/null +++ b/cost-analyzer/charts/grafana/templates/NOTES.txt @@ -0,0 +1,37 @@ +1. Get your '{{ .Values.adminUser }}' user password by running: + + kubectl get secret --namespace {{ .Release.Namespace }} {{ template "grafana.fullname" . }} -o jsonpath="{.data.admin-password}" | base64 --decode ; echo + +2. The Grafana server can be accessed via port {{ .Values.service.port }} on the following DNS name from within your cluster: + + {{ template "grafana.fullname" . }}.{{ .Release.Namespace }}.svc.cluster.local +{{ if .Values.ingress.enabled }} + From outside the cluster, the server URL(s) are: +{{- range .Values.ingress.hosts }} + http://{{ . }} +{{- end }} +{{ else }} + Get the Grafana URL to visit by running these commands in the same shell: +{{ if contains "NodePort" .Values.service.type -}} + export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ template "grafana.fullname" . }}) + export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}") + echo http://$NODE_IP:$NODE_PORT +{{ else if contains "LoadBalancer" .Values.service.type -}} + NOTE: It may take a few minutes for the LoadBalancer IP to be available. + You can watch the status of by running 'kubectl get svc --namespace {{ .Release.Namespace }} -w {{ template "grafana.fullname" . }}' + export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ template "grafana.fullname" . }} -o jsonpath='{.status.loadBalancer.ingress[0].ip}') + http://$SERVICE_IP:{{ .Values.service.port -}} +{{ else if contains "ClusterIP" .Values.service.type }} + export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app={{ template "grafana.fullname" . }},component={{ .Values.name }}" -o jsonpath="{.items[0].metadata.name}") + kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 3000 +{{- end }} +{{- end }} + +3. Login with the password from step 1 and the username: {{ .Values.adminUser }} + +{{- if not .Values.persistence.enabled }} +################################################################################# +###### WARNING: Persistence is disabled!!! You will lose your data when ##### +###### the Grafana pod is terminated. ##### +################################################################################# +{{- end }} diff --git a/cost-analyzer/charts/grafana/templates/_helpers.tpl b/cost-analyzer/charts/grafana/templates/_helpers.tpl new file mode 100755 index 000000000..3a3ebd3ec --- /dev/null +++ b/cost-analyzer/charts/grafana/templates/_helpers.tpl @@ -0,0 +1,43 @@ +{{/* vim: set filetype=mustache: */}} +{{/* +Expand the name of the chart. +*/}} +{{- define "grafana.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "grafana.fullname" -}} +{{- if .Values.fullnameOverride -}} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- $name := default .Chart.Name .Values.nameOverride -}} +{{- if contains $name .Release.Name -}} +{{- .Release.Name | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "grafana.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Create the name of the service account +*/}} +{{- define "grafana.serviceAccountName" -}} +{{- if .Values.serviceAccount.create -}} + {{ default (include "grafana.fullname" .) .Values.serviceAccount.name }} +{{- else -}} + {{ default "default" .Values.serviceAccount.name }} +{{- end -}} +{{- end -}} diff --git a/cost-analyzer/charts/grafana/templates/clusterrole.yaml b/cost-analyzer/charts/grafana/templates/clusterrole.yaml new file mode 100755 index 000000000..47452f48c --- /dev/null +++ b/cost-analyzer/charts/grafana/templates/clusterrole.yaml @@ -0,0 +1,23 @@ +{{- if .Values.rbac.create }} +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + labels: + app: {{ template "grafana.name" . }} + chart: {{ template "grafana.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +{{- with .Values.annotations }} + annotations: +{{ toYaml . | indent 4 }} +{{- end }} + name: {{ template "grafana.fullname" . }}-clusterrole +{{- if or .Values.sidecar.dashboards.enabled .Values.sidecar.datasources.enabled }} +rules: +- apiGroups: [""] # "" indicates the core API group + resources: ["configmaps"] + verbs: ["get", "watch", "list"] +{{- else }} +rules: [] +{{- end}} +{{- end}} diff --git a/cost-analyzer/charts/grafana/templates/clusterrolebinding.yaml b/cost-analyzer/charts/grafana/templates/clusterrolebinding.yaml new file mode 100755 index 000000000..093e5518e --- /dev/null +++ b/cost-analyzer/charts/grafana/templates/clusterrolebinding.yaml @@ -0,0 +1,23 @@ +{{- if .Values.rbac.create }} +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: {{ template "grafana.fullname" . }}-clusterrolebinding + labels: + app: {{ template "grafana.name" . }} + chart: {{ template "grafana.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +{{- with .Values.annotations }} + annotations: +{{ toYaml . | indent 4 }} +{{- end }} +subjects: + - kind: ServiceAccount + name: {{ template "grafana.serviceAccountName" . }} + namespace: {{ .Release.Namespace }} +roleRef: + kind: ClusterRole + name: {{ template "grafana.fullname" . }}-clusterrole + apiGroup: rbac.authorization.k8s.io +{{- end}} diff --git a/cost-analyzer/charts/grafana/templates/configmap-dashboard-provider.yaml b/cost-analyzer/charts/grafana/templates/configmap-dashboard-provider.yaml new file mode 100755 index 000000000..077173194 --- /dev/null +++ b/cost-analyzer/charts/grafana/templates/configmap-dashboard-provider.yaml @@ -0,0 +1,26 @@ +{{- if .Values.sidecar.dashboards.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + labels: + app: {{ template "grafana.name" . }} + chart: {{ template "grafana.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +{{- with .Values.annotations }} + annotations: +{{ toYaml . | indent 4 }} +{{- end }} + name: {{ template "grafana.fullname" . }}-config-dashboards +data: + provider.yaml: |- + apiVersion: 1 + providers: + - name: 'default' + orgId: 1 + folder: '' + type: file + disableDeletion: false + options: + path: {{ .Values.sidecar.dashboards.folder }} +{{- end}} diff --git a/cost-analyzer/charts/grafana/templates/configmap.yaml b/cost-analyzer/charts/grafana/templates/configmap.yaml new file mode 100755 index 000000000..0837a59b3 --- /dev/null +++ b/cost-analyzer/charts/grafana/templates/configmap.yaml @@ -0,0 +1,68 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ template "grafana.fullname" . }} + labels: + app: {{ template "grafana.name" . }} + chart: {{ template "grafana.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +data: +{{- if .Values.plugins }} + plugins: {{ join "," .Values.plugins }} +{{- end }} + grafana.ini: | +{{- range $key, $value := index .Values "grafana.ini" }} + [{{ $key }}] + {{- range $elem, $elemVal := $value }} + {{ $elem }} = {{ $elemVal }} + {{- end }} +{{- end }} + +{{- if .Values.datasources }} + {{- range $key, $value := .Values.datasources }} + {{ $key }}: | +{{ toYaml $value | trim | indent 4 }} + {{- end -}} +{{- end -}} +{{- if .Values.installPrometheus }} + - access: proxy + isDefault: true + name: Prometheus + type: prometheus + url: http://{{ .Release.Name }}-prometheus-server.{{ .Release.Namespace }}.svc.cluster.local +{{- end -}} + +{{- if .Values.dashboardProviders }} + {{- range $key, $value := .Values.dashboardProviders }} + {{ $key }}: | +{{ toYaml $value | indent 4 }} + {{- end -}} +{{- end -}} + +{{- if .Values.dashboards }} + download_dashboards.sh: | + #!/usr/bin/env sh + set -euf + {{- if .Values.dashboardProviders }} + {{- range $key, $value := .Values.dashboardProviders }} + {{- range $value.providers }} + mkdir -p {{ .options.path }} + {{- end }} + {{- end }} + {{- end }} + + {{- range $provider, $dashboards := .Values.dashboards }} + {{- range $key, $value := $dashboards }} + {{- if (or (hasKey $value "gnetId") (hasKey $value "url")) }} + curl -sk \ + --connect-timeout 60 \ + --max-time 60 \ + -H "Accept: application/json" \ + -H "Content-Type: application/json;charset=UTF-8" \ + {{- if $value.url -}}{{ $value.url }}{{- else -}} https://grafana.com/api/dashboards/{{ $value.gnetId }}/revisions/{{- if $value.revision -}}{{ $value.revision }}{{- else -}}1{{- end -}}/download{{- end -}}{{ if $value.datasource }}| sed 's|\"datasource\":[^,]*|\"datasource\": \"{{ $value.datasource }}\"|g'{{ end }} \ + > /var/lib/grafana/dashboards/{{ $provider }}/{{ $key }}.json + {{- end }} + {{- end }} + {{- end }} +{{- end }} diff --git a/cost-analyzer/charts/grafana/templates/dashboards-json-configmap.yaml b/cost-analyzer/charts/grafana/templates/dashboards-json-configmap.yaml new file mode 100755 index 000000000..8737166d3 --- /dev/null +++ b/cost-analyzer/charts/grafana/templates/dashboards-json-configmap.yaml @@ -0,0 +1,22 @@ +{{- if .Values.dashboards }} + {{- range $provider, $dashboards := .Values.dashboards }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ template "grafana.fullname" $ }}-dashboards-{{ $provider }} + labels: + app: {{ template "grafana.name" $ }} + chart: {{ template "grafana.chart" $ }} + release: {{ $.Release.Name }} + heritage: {{ $.Release.Service }} + dashboard-provider: {{ $provider }} +data: + {{- range $key, $value := $dashboards }} + {{- if hasKey $value "json" }} + {{ $key }}.json: | +{{ $value.json | indent 4 }} + {{- end }} + {{- end }} + {{- end }} +{{- end }} diff --git a/cost-analyzer/charts/grafana/templates/deployment.yaml b/cost-analyzer/charts/grafana/templates/deployment.yaml new file mode 100755 index 000000000..7fb428ee8 --- /dev/null +++ b/cost-analyzer/charts/grafana/templates/deployment.yaml @@ -0,0 +1,270 @@ +apiVersion: apps/v1beta2 +kind: Deployment +metadata: + name: {{ template "grafana.fullname" . }} + labels: + app: {{ template "grafana.name" . }} + chart: {{ template "grafana.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +{{- with .Values.annotations }} + annotations: +{{ toYaml . | indent 4 }} +{{- end }} +spec: + replicas: {{ .Values.replicas }} + selector: + matchLabels: + app: {{ template "grafana.name" . }} + release: {{ .Release.Name }} + strategy: + type: {{ .Values.deploymentStrategy }} + {{- if ne .Values.deploymentStrategy "RollingUpdate" }} + rollingUpdate: null + {{- end }} + template: + metadata: + labels: + app: {{ template "grafana.name" . }} + release: {{ .Release.Name }} +{{- with .Values.podAnnotations }} + annotations: +{{ toYaml . | indent 8 }} +{{- end }} + spec: + serviceAccountName: {{ template "grafana.serviceAccountName" . }} +{{- if .Values.schedulerName }} + schedulerName: "{{ .Values.schedulerName }}" +{{- end }} +{{- if .Values.securityContext }} + securityContext: +{{ toYaml .Values.securityContext | indent 8 }} +{{- end }} +{{- if .Values.dashboards }} + initContainers: + - name: download-dashboards + image: "{{ .Values.downloadDashboardsImage.repository }}:{{ .Values.downloadDashboardsImage.tag }}" + imagePullPolicy: {{ .Values.downloadDashboardsImage.pullPolicy }} + command: ["sh", "/etc/grafana/download_dashboards.sh"] + volumeMounts: + - name: config + mountPath: "/etc/grafana/download_dashboards.sh" + subPath: download_dashboards.sh + - name: storage + mountPath: "/var/lib/grafana" + subPath: {{ .Values.persistence.subPath }} + {{- range .Values.extraSecretMounts }} + - name: {{ .name }} + mountPath: {{ .mountPath }} + readOnly: {{ .readOnly }} + {{- end }} +{{- end }} + {{- if .Values.image.pullSecrets }} + imagePullSecrets: + {{- range .Values.image.pullSecrets }} + - name: {{ . }} + {{- end}} + {{- end }} + containers: +{{- if .Values.sidecar.dashboards.enabled }} + - name: {{ template "grafana.name" . }}-sc-dashboard + image: "{{ .Values.sidecar.image }}" + imagePullPolicy: {{ .Values.sidecar.imagePullPolicy }} + env: + - name: LABEL + value: "{{ .Values.sidecar.dashboards.label }}" + - name: FOLDER + value: "{{ .Values.sidecar.dashboards.folder }}" + resources: +{{ toYaml .Values.sidecar.resources | indent 12 }} + volumeMounts: + - name: sc-dashboard-volume + mountPath: {{ .Values.sidecar.dashboards.folder | quote }} +{{- end}} +{{- if .Values.sidecar.datasources.enabled }} + - name: {{ template "grafana.name" . }}-sc-datasources + image: "{{ .Values.sidecar.image }}" + imagePullPolicy: {{ .Values.sidecar.imagePullPolicy }} + env: + - name: LABEL + value: "{{ .Values.sidecar.datasources.label }}" + - name: FOLDER + value: "/etc/grafana/provisioning/datasources" + resources: +{{ toYaml .Values.sidecar.resources | indent 12 }} + volumeMounts: + - name: sc-datasources-volume + mountPath: "/etc/grafana/provisioning/datasources" +{{- end}} + - name: {{ .Chart.Name }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + volumeMounts: + - name: config + mountPath: "/etc/grafana/grafana.ini" + subPath: grafana.ini + - name: ldap + mountPath: "/etc/grafana/ldap.toml" + subPath: ldap.toml +{{- if .Values.dashboards }} + {{- range $provider, $dashboards := .Values.dashboards }} + {{- range $key, $value := $dashboards }} + {{- if hasKey $value "json" }} + - name: dashboards-{{ $provider }} + mountPath: "/var/lib/grafana/dashboards/{{ $provider }}/{{ $key }}.json" + subPath: "{{ $key }}.json" + {{- end }} + {{- end }} + {{- end }} +{{- end -}} +{{- if .Values.dashboardsConfigMaps }} + {{- range keys .Values.dashboardsConfigMaps }} + - name: dashboards-{{ . }} + mountPath: "/var/lib/grafana/dashboards/{{ . }}" + {{- end }} +{{- end }} +{{- if .Values.datasources }} + - name: config + mountPath: "/etc/grafana/provisioning/datasources/datasources.yaml" + subPath: datasources.yaml +{{- end }} +{{- if .Values.dashboardProviders }} + - name: config + mountPath: "/etc/grafana/provisioning/dashboards/dashboardproviders.yaml" + subPath: dashboardproviders.yaml +{{- end }} +{{- if .Values.sidecar.dashboards.enabled }} + - name: sc-dashboard-volume + mountPath: {{ .Values.sidecar.dashboards.folder | quote }} + - name: sc-dashboard-provider + mountPath: "/etc/grafana/provisioning/dashboards/sc-dashboardproviders.yaml" + subPath: provider.yaml +{{- end}} +{{- if .Values.sidecar.datasources.enabled }} + - name: sc-datasources-volume + mountPath: "/etc/grafana/provisioning/datasources" +{{- end}} + - name: storage + mountPath: "/var/lib/grafana" + subPath: {{ .Values.persistence.subPath }} + {{- range .Values.extraSecretMounts }} + - name: {{ .name }} + mountPath: {{ .mountPath }} + readOnly: {{ .readOnly }} + {{- end }} + ports: + - name: service + containerPort: {{ .Values.service.port }} + protocol: TCP + - name: grafana + containerPort: 3000 + protocol: TCP + env: + - name: GF_SECURITY_ADMIN_USER + valueFrom: + secretKeyRef: + name: {{ template "grafana.fullname" . }} + key: admin-user + - name: GF_SECURITY_ADMIN_PASSWORD + valueFrom: + secretKeyRef: + name: {{ template "grafana.fullname" . }} + key: admin-password + {{- if .Values.plugins }} + - name: GF_INSTALL_PLUGINS + valueFrom: + configMapKeyRef: + name: {{ template "grafana.fullname" . }} + key: plugins + {{- end }} + {{- if .Values.smtp.existingSecret }} + - name: GF_SMTP_USER + valueFrom: + secretKeyRef: + name: {{ .Values.smtp.existingSecret }} + key: user + - name: GF_SMTP_PASSWORD + valueFrom: + secretKeyRef: + name: {{ .Values.smtp.existingSecret }} + key: password + {{- end }} +{{- range $key, $value := .Values.env }} + - name: "{{ $key }}" + value: "{{ $value }}" +{{- end }} + {{- if .Values.envFromSecret }} + envFrom: + - secretRef: + name: {{ .Values.envFromSecret }} + {{- end }} + livenessProbe: +{{ toYaml .Values.livenessProbe | indent 12 }} + readinessProbe: +{{ toYaml .Values.readinessProbe | indent 12 }} + resources: +{{ toYaml .Values.resources | indent 12 }} + {{- with .Values.nodeSelector }} + nodeSelector: +{{ toYaml . | indent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: +{{ toYaml . | indent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: +{{ toYaml . | indent 8 }} + {{- end }} + volumes: + - name: config + configMap: + name: {{ template "grafana.fullname" . }} + {{- if .Values.dashboards }} + {{- range keys .Values.dashboards }} + - name: dashboards-{{ . }} + configMap: + name: {{ template "grafana.fullname" $ }}-dashboards-{{ . }} + {{- end }} + {{- end }} + {{- if .Values.dashboardsConfigMaps }} + {{- range $provider, $name := .Values.dashboardsConfigMaps }} + - name: dashboards-{{ $provider }} + configMap: + name: {{ $name }} + {{- end }} + {{- end }} + - name: ldap + secret: + {{- if .Values.ldap.existingSecret }} + secretName: {{ .Values.ldap.existingSecret }} + {{- else }} + secretName: {{ template "grafana.fullname" . }} + {{- end }} + items: + - key: ldap-toml + path: ldap.toml + - name: storage + {{- if .Values.persistence.enabled }} + persistentVolumeClaim: + claimName: {{ .Values.persistence.existingClaim | default (include "grafana.fullname" .) }} + {{- else }} + emptyDir: {} + {{- end -}} + {{- if .Values.sidecar.dashboards.enabled }} + - name: sc-dashboard-volume + emptyDir: {} + - name: sc-dashboard-provider + configMap: + name: {{ template "grafana.fullname" . }}-config-dashboards + {{- end }} + {{- if .Values.sidecar.datasources.enabled }} + - name: sc-datasources-volume + emptyDir: {} + {{- end -}} + {{- range .Values.extraSecretMounts }} + - name: {{ .name }} + secret: + secretName: {{ .secretName }} + defaultMode: {{ .defaultMode }} + {{- end }} diff --git a/cost-analyzer/charts/grafana/templates/ingress.yaml b/cost-analyzer/charts/grafana/templates/ingress.yaml new file mode 100755 index 000000000..489734547 --- /dev/null +++ b/cost-analyzer/charts/grafana/templates/ingress.yaml @@ -0,0 +1,42 @@ +{{- if .Values.ingress.enabled -}} +{{- $fullName := include "grafana.fullname" . -}} +{{- $servicePort := .Values.service.port -}} +{{- $ingressPath := .Values.ingress.path -}} +apiVersion: extensions/v1beta1 +kind: Ingress +metadata: + name: {{ $fullName }} + labels: + app: {{ template "grafana.name" . }} + chart: {{ template "grafana.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +{{- if .Values.ingress.labels }} +{{ toYaml .Values.ingress.labels | indent 4 }} +{{- end }} +{{- with .Values.ingress.annotations }} + annotations: +{{ toYaml . | indent 4 }} +{{- end }} +spec: +{{- if .Values.ingress.tls }} + tls: + {{- range .Values.ingress.tls }} + - hosts: + {{- range .hosts }} + - {{ . | quote }} + {{- end }} + secretName: {{ .secretName }} + {{- end }} +{{- end }} + rules: + {{- range .Values.ingress.hosts }} + - host: {{ . }} + http: + paths: + - path: {{ $ingressPath }} + backend: + serviceName: {{ $fullName }} + servicePort: {{ $servicePort }} + {{- end }} +{{- end }} diff --git a/cost-analyzer/charts/grafana/templates/podsecuritypolicy.yaml b/cost-analyzer/charts/grafana/templates/podsecuritypolicy.yaml new file mode 100755 index 000000000..ffe4822b5 --- /dev/null +++ b/cost-analyzer/charts/grafana/templates/podsecuritypolicy.yaml @@ -0,0 +1,40 @@ +{{- if .Values.rbac.pspEnabled }} +apiVersion: extensions/v1beta1 +kind: PodSecurityPolicy +metadata: + name: {{ template "grafana.fullname" . }} + labels: + app: {{ template "grafana.name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + annotations: + seccomp.security.alpha.kubernetes.io/allowedProfileNames: 'docker/default' + apparmor.security.beta.kubernetes.io/allowedProfileNames: 'runtime/default' + seccomp.security.alpha.kubernetes.io/defaultProfileName: 'docker/default' + apparmor.security.beta.kubernetes.io/defaultProfileName: 'runtime/default' +spec: + privileged: false + allowPrivilegeEscalation: false + requiredDropCapabilities: + - ALL + volumes: + - 'configMap' + - 'emptyDir' + - 'projected' + - 'secret' + - 'downwardAPI' + - 'persistentVolumeClaim' + hostNetwork: false + hostIPC: false + hostPID: false + runAsUser: + rule: 'RunAsAny' + seLinux: + rule: 'RunAsAny' + supplementalGroups: + rule: 'RunAsAny' + fsGroup: + rule: 'RunAsAny' + readOnlyRootFilesystem: false +{{- end }} diff --git a/cost-analyzer/charts/grafana/templates/pvc.yaml b/cost-analyzer/charts/grafana/templates/pvc.yaml new file mode 100755 index 000000000..e1cc03296 --- /dev/null +++ b/cost-analyzer/charts/grafana/templates/pvc.yaml @@ -0,0 +1,24 @@ +{{- if and .Values.persistence.enabled (not .Values.persistence.existingClaim) }} +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: {{ template "grafana.fullname" . }} + labels: + app: {{ template "grafana.name" . }} + chart: {{ template "grafana.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} + {{- with .Values.persistence.annotations }} + annotations: +{{ toYaml . | indent 4 }} + {{- end }} +spec: + accessModes: + {{- range .Values.persistence.accessModes }} + - {{ . | quote }} + {{- end }} + resources: + requests: + storage: {{ .Values.persistence.size | quote }} + storageClassName: {{ .Values.persistence.storageClassName }} +{{- end -}} diff --git a/cost-analyzer/charts/grafana/templates/role.yaml b/cost-analyzer/charts/grafana/templates/role.yaml new file mode 100755 index 000000000..8091d49f2 --- /dev/null +++ b/cost-analyzer/charts/grafana/templates/role.yaml @@ -0,0 +1,18 @@ +{{- if .Values.rbac.create }} +apiVersion: rbac.authorization.k8s.io/v1beta1 +kind: Role +metadata: + name: {{ template "grafana.fullname" . }} + labels: + app: {{ template "grafana.name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} +{{- if .Values.rbac.pspEnabled }} +rules: +- apiGroups: ['extensions'] + resources: ['podsecuritypolicies'] + verbs: ['use'] + resourceNames: [{{ template "grafana.fullname" . }}] +{{- end }} +{{- end }} diff --git a/cost-analyzer/charts/grafana/templates/rolebinding.yaml b/cost-analyzer/charts/grafana/templates/rolebinding.yaml new file mode 100755 index 000000000..199e8fe4e --- /dev/null +++ b/cost-analyzer/charts/grafana/templates/rolebinding.yaml @@ -0,0 +1,18 @@ +{{- if .Values.rbac.create -}} +apiVersion: rbac.authorization.k8s.io/v1beta1 +kind: RoleBinding +metadata: + name: {{ template "grafana.fullname" . }} + labels: + app: {{ template "grafana.name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: {{ template "grafana.fullname" . }} +subjects: +- kind: ServiceAccount + name: {{ template "grafana.serviceAccountName" . }} +{{- end -}} diff --git a/cost-analyzer/charts/grafana/templates/secret.yaml b/cost-analyzer/charts/grafana/templates/secret.yaml new file mode 100755 index 000000000..a698e058f --- /dev/null +++ b/cost-analyzer/charts/grafana/templates/secret.yaml @@ -0,0 +1,20 @@ +apiVersion: v1 +kind: Secret +metadata: + name: {{ template "grafana.fullname" . }} + labels: + app: {{ template "grafana.name" . }} + chart: {{ template "grafana.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +type: Opaque +data: + admin-user: {{ .Values.adminUser | b64enc | quote }} + {{- if .Values.adminPassword }} + admin-password: {{ .Values.adminPassword | b64enc | quote }} + {{- else }} + admin-password: {{ randAlphaNum 40 | b64enc | quote }} + {{- end }} + {{- if not .Values.ldap.existingSecret }} + ldap-toml: {{ .Values.ldap.config | b64enc | quote }} + {{- end }} diff --git a/cost-analyzer/charts/grafana/templates/service.yaml b/cost-analyzer/charts/grafana/templates/service.yaml new file mode 100755 index 000000000..6dcd63a4d --- /dev/null +++ b/cost-analyzer/charts/grafana/templates/service.yaml @@ -0,0 +1,49 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ template "grafana.fullname" . }} + labels: + app: {{ template "grafana.name" . }} + chart: {{ template "grafana.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +{{- if .Values.service.labels }} +{{ toYaml .Values.service.labels | indent 4 }} +{{- end }} +{{- with .Values.service.annotations }} + annotations: +{{ toYaml . | indent 4 }} +{{- end }} +spec: +{{- if (or (eq .Values.service.type "ClusterIP") (empty .Values.service.type)) }} + type: ClusterIP + {{- if .Values.service.clusterIP }} + clusterIP: {{ .Values.service.clusterIP }} + {{end}} +{{- else if eq .Values.service.type "LoadBalancer" }} + type: {{ .Values.service.type }} + {{- if .Values.service.loadBalancerIP }} + loadBalancerIP: {{ .Values.service.loadBalancerIP }} + {{- end }} + {{- if .Values.service.loadBalancerSourceRanges }} + loadBalancerSourceRanges: +{{ toYaml .Values.service.loadBalancerSourceRanges | indent 4 }} + {{- end -}} +{{- else }} + type: {{ .Values.service.type }} +{{- end }} +{{- if .Values.service.externalIPs }} + externalIPs: +{{ toYaml .Values.service.externalIPs | indent 4 }} +{{- end }} + ports: + - name: service + port: {{ .Values.service.port }} + protocol: TCP + targetPort: 3000 +{{ if (and (eq .Values.service.type "NodePort") (not (empty .Values.service.nodePort))) }} + nodePort: {{.Values.service.nodePort}} +{{ end }} + selector: + app: {{ template "grafana.name" . }} + release: {{ .Release.Name }} diff --git a/cost-analyzer/charts/grafana/templates/serviceaccount.yaml b/cost-analyzer/charts/grafana/templates/serviceaccount.yaml new file mode 100755 index 000000000..04601d054 --- /dev/null +++ b/cost-analyzer/charts/grafana/templates/serviceaccount.yaml @@ -0,0 +1,11 @@ +{{- if .Values.serviceAccount.create }} +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + app: {{ template "grafana.name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + name: {{ template "grafana.serviceAccountName" . }} +{{- end }} diff --git a/cost-analyzer/charts/grafana/values.yaml b/cost-analyzer/charts/grafana/values.yaml new file mode 100644 index 000000000..3f954c23d --- /dev/null +++ b/cost-analyzer/charts/grafana/values.yaml @@ -0,0 +1,285 @@ +rbac: + create: true + pspEnabled: true +serviceAccount: + create: true + name: + +replicas: 1 + +deploymentStrategy: RollingUpdate + +readinessProbe: + httpGet: + path: /api/health + port: 3000 + +livenessProbe: + httpGet: + path: /api/health + port: 3000 + initialDelaySeconds: 60 + timeoutSeconds: 30 + failureThreshold: 10 + +image: + repository: grafana/grafana + tag: 5.3.1 + pullPolicy: IfNotPresent + + ## Optionally specify an array of imagePullSecrets. + ## Secrets must be manually created in the namespace. + ## ref: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/ + ## + # pullSecrets: + # - myRegistrKeySecretName + +securityContext: + runAsUser: 472 + fsGroup: 472 + +downloadDashboardsImage: + repository: appropriate/curl + tag: latest + pullPolicy: IfNotPresent + +## Pod Annotations +# podAnnotations: {} + +## Deployment annotations +# annotations: {} + +## Expose the grafana service to be accessed from outside the cluster (LoadBalancer service). +## or access it from within the cluster (ClusterIP service). Set the service type and the port to serve it. +## ref: http://kubernetes.io/docs/user-guide/services/ +## +service: + type: ClusterIP + port: 80 + annotations: {} + labels: {} + +ingress: + enabled: false + annotations: {} + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: "true" + labels: {} + path: / + hosts: + - chart-example.local + tls: [] + # - secretName: chart-example-tls + # hosts: + # - chart-example.local + +resources: {} +# limits: +# cpu: 100m +# memory: 128Mi +# requests: +# cpu: 100m +# memory: 128Mi + +## Node labels for pod assignment +## ref: https://kubernetes.io/docs/user-guide/node-selection/ +# +nodeSelector: {} + +## Tolerations for pod assignment +## ref: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/ +## +tolerations: [] + +## Affinity for pod assignment +## ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity +## +affinity: {} + +## Enable persistence using Persistent Volume Claims +## ref: http://kubernetes.io/docs/user-guide/persistent-volumes/ +## +persistence: + enabled: false + # storageClassName: default + # accessModes: + # - ReadWriteOnce + # size: 10Gi + # annotations: {} + # subPath: "" + # existingClaim: + +adminUser: admin +# adminPassword: strongpassword + +## Use an alternate scheduler, e.g. "stork". +## ref: https://kubernetes.io/docs/tasks/administer-cluster/configure-multiple-schedulers/ +## +# schedulerName: + +## Extra environment variables that will be pass onto deployment pods +env: {} + +## The name of a secret in the same kubernetes namespace which contain values to be added to the environment +## This can be useful for auth tokens, etc +envFromSecret: "" + +## Additional grafana server secret mounts +# Defines additional mounts with secrets. Secrets must be manually created in the namespace. +extraSecretMounts: [] + # - name: secret-files + # mountPath: /etc/secrets + # secretName: grafana-secret-files + # readOnly: true + +## Pass the plugins you want installed as a list. +## +plugins: [] + # - digrich-bubblechart-panel + # - grafana-clock-panel + +## Configure grafana datasources +## ref: http://docs.grafana.org/administration/provisioning/#datasources +## +datasources: + datasources.yaml: + apiVersion: 1 + datasources: + - name: Prometheus2 + type: prometheus + url: http://prometheus-server.default.svc.cluster.local + access: proxy + isDefault: false + +installPrometheus: true + +## Configure grafana dashboard providers +## ref: http://docs.grafana.org/administration/provisioning/#dashboards +## +## `path` must be /var/lib/grafana/dashboards/ +## +dashboardProviders: + dashboardproviders.yaml: + apiVersion: 1 + providers: + - name: 'default' + orgId: 1 + folder: '' + type: file + disableDeletion: false + editable: true + options: + path: /var/lib/grafana/dashboards/default + +## Configure grafana dashboard to import +## NOTE: To use dashboards you must also enable/configure dashboardProviders +## ref: https://grafana.com/dashboards +## +## dashboards per provider, use provider name as key. +## +dashboards: + default: + prometheus-stats: + gnetId: 3662 + revision: 2 + datasource: Prometheus + pod-utilization: + json: '{ "annotations": { "list": [ { "builtIn": 1, "datasource": "-- Grafana --", "enable": true, "hide": true, "iconColor": "rgba(0, 211, 255, 1)", "name": "Annotations & Alerts", "type": "dashboard" } ] }, "description": "Visualize your kubernetes costs at the pod level.", "editable": true, "gnetId": 9063, "graphTooltip": 0, "id": 5, "iteration": 1542350726367, "links": [], "panels": [ { "columns": [ { "text": "Avg", "value": "avg" } ], "datasource": "Prometheus", "fontSize": "100%", "gridPos": { "h": 5, "w": 24, "x": 0, "y": 0 }, "hideTimeOverride": true, "id": 73, "links": [], "pageSize": 5, "repeat": null, "repeatDirection": "v", "scroll": true, "showHeader": true, "sort": { "col": 6, "desc": true }, "styles": [ { "alias": "Container", "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(50, 172, 45, 0.97)", "#c15c17" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "pattern": "container_name", "thresholds": [ "30", "80" ], "type": "string", "unit": "currencyUSD" }, { "alias": "On-demand RAM", "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "pattern": "Value #B", "thresholds": [], "type": "number", "unit": "currencyUSD" }, { "alias": "On-demand CPU", "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "mappingType": 1, "pattern": "Value #A", "thresholds": [], "type": "number", "unit": "currencyUSD" }, { "alias": "", "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "mappingType": 1, "pattern": "Time", "thresholds": [], "type": "hidden", "unit": "short" }, { "alias": "Spot/PE CPU", "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "mappingType": 1, "pattern": "Value #C", "thresholds": [], "type": "number", "unit": "currencyUSD" }, { "alias": "Spot/PE RAM", "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "mappingType": 1, "pattern": "Value #D", "thresholds": [], "type": "number", "unit": "currencyUSD" }, { "alias": "Total", "colorMode": null, "colors": [ "#bf1b00", "rgba(50, 172, 45, 0.97)", "#ef843c" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "mappingType": 1, "pattern": "Value #E", "thresholds": [ "" ], "type": "number", "unit": "currencyUSD" } ], "targets": [ { "expr": "sum(\n sum(rate(container_cpu_usage_seconds_total{image!=\"\", namespace=\"$namespace\", pod_name=\"$pod\", container_name!=\"POD\", cloud_google_com_gke_preemptible!=\"true\"}[1h])) by (container_name)\n * ($costcpu - ($costcpu / 100 * $costDiscount))\n) by (container_name)\n", "format": "table", "hide": false, "instant": true, "interval": "", "intervalFactor": 1, "legendFormat": "{{ pod_name }}", "refId": "A" }, { "expr": "sum(\n sum(rate(container_cpu_usage_seconds_total{image!=\"\", namespace=\"$namespace\", pod_name=\"$pod\", container_name!=\"POD\", cloud_google_com_gke_preemptible=\"true\"}[1h])) by (container_name)\n or up * 0\n) by (container_name) * $costpcpu\n", "format": "table", "instant": true, "intervalFactor": 1, "refId": "C" }, { "expr": "sum(\n sum(avg_over_time(container_memory_working_set_bytes{image!=\"\",namespace=\"$namespace\", pod_name=\"$pod\", container_name!=\"POD\", cloud_google_com_gke_preemptible!=\"true\"}[1h])) by (container_name)\n * ($costram - ($costram / 100 * $costDiscount)) / 1024 / 1024 / 1024\n) by (container_name)\n", "format": "table", "hide": false, "instant": true, "intervalFactor": 1, "legendFormat": "{{ namespace }}", "refId": "B" }, { "expr": "sum(\n sum(avg_over_time(container_memory_working_set_bytes{image!=\"\",namespace=\"$namespace\", pod_name=\"$pod\", container_name!=\"POD\", cloud_google_com_gke_preemptible=\"true\"}[1h])) by (container_name)\n or up * 0\n) by (container_name) * $costpram / 1024 / 1024 / 1024\n", "format": "table", "hide": false, "instant": true, "intervalFactor": 1, "refId": "D" } ], "timeFrom": "1M", "timeShift": null, "title": "Container cost and utilization analysis", "transform": "table", "transparent": false, "type": "table" }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "Prometheus", "decimals": 3, "description": "This graph attempts to show you CPU use of your application vs its requests", "editable": true, "error": false, "fill": 0, "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 0, "y": 5 }, "height": "", "id": 94, "isNew": true, "legend": { "alignAsTable": false, "avg": false, "current": false, "hideEmpty": false, "hideZero": false, "max": false, "min": false, "rightSide": false, "show": true, "sideWidth": null, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 2, "links": [], "nullPointMode": "connected", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": true, "targets": [ { "expr": "sum (rate (container_cpu_usage_seconds_total{namespace=~\"$namespace\", pod_name=\"$pod\", container_name!=\"POD\"}[10m])) by (container_name)", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, "legendFormat": "{{ container_name }} (usage)", "metric": "container_cpu", "refId": "A", "step": 10 }, { "expr": "sum(kube_pod_container_resource_requests_cpu_cores{namespace=~\"$namespace\", pod=\"$pod\", container!=\"POD\"}) by (container)", "format": "time_series", "instant": false, "intervalFactor": 1, "legendFormat": "{{ container}} (request)", "refId": "B" } ], "thresholds": [], "timeFrom": "", "timeShift": null, "title": "CPU Usage vs Requested", "tooltip": { "msResolution": true, "shared": true, "sort": 2, "value_type": "cumulative" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "none", "label": "", "logBase": 1, "max": null, "min": "0", "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "Prometheus", "decimals": 3, "description": "This graph attempts to show you RAM use of your application vs its requests", "editable": true, "error": false, "fill": 0, "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 12, "y": 5 }, "height": "", "id": 96, "isNew": true, "legend": { "alignAsTable": false, "avg": false, "current": false, "hideEmpty": false, "hideZero": false, "max": false, "min": false, "rightSide": false, "show": true, "sideWidth": null, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 2, "links": [], "nullPointMode": "connected", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": true, "targets": [ { "expr": "sum (avg_over_time (container_memory_working_set_bytes{namespace=\"$namespace\", pod_name=\"$pod\", container_name!=\"POD\"}[1m])) by (container_name)", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, "legendFormat": "{{ container_name }} (usage)", "metric": "container_cpu", "refId": "A", "step": 10 }, { "expr": "sum(kube_pod_container_resource_requests_memory_bytes{namespace=~\"$namespace\", pod=\"$pod\", container!=\"POD\"}) by (container)", "format": "time_series", "hide": false, "instant": false, "intervalFactor": 1, "legendFormat": "{{ container }} (requested)", "refId": "B" } ], "thresholds": [], "timeFrom": "", "timeShift": null, "title": "RAM Usage vs Requested", "tooltip": { "msResolution": true, "shared": true, "sort": 2, "value_type": "cumulative" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "bytes", "label": "", "logBase": 1, "max": null, "min": "0", "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "Prometheus", "decimals": 2, "description": "Traffic in and out of this pod, as a sum of its containers", "editable": true, "error": false, "fill": 1, "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 0, "y": 12 }, "height": "", "id": 95, "isNew": true, "legend": { "alignAsTable": false, "avg": true, "current": true, "hideEmpty": false, "hideZero": false, "max": false, "min": false, "rightSide": false, "show": true, "sideWidth": null, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 2, "links": [], "nullPointMode": "connected", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum (rate (container_network_receive_bytes_total{namespace=\"$namespace\",pod_name=\"$pod\"}[10m])) by (pod_name)", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, "legendFormat": "<- in", "metric": "container_cpu", "refId": "A", "step": 10 }, { "expr": "- sum (rate (container_network_transmit_bytes_total{namespace=\"$namespace\",pod_name=\"$pod\"}[10m])) by (pod_name)", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, "legendFormat": "-> out", "refId": "B" } ], "thresholds": [], "timeFrom": "", "timeShift": null, "title": "Network IO", "tooltip": { "msResolution": true, "shared": true, "sort": 2, "value_type": "cumulative" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "Bps", "label": "", "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "Prometheus", "decimals": 2, "description": "Disk read writes", "editable": true, "error": false, "fill": 1, "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 12, "y": 12 }, "height": "", "id": 97, "isNew": true, "legend": { "alignAsTable": false, "avg": true, "current": true, "hideEmpty": false, "hideZero": false, "max": false, "min": false, "rightSide": false, "show": true, "sideWidth": null, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 2, "links": [], "nullPointMode": "connected", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum (rate (container_fs_writes_bytes_total{namespace=\"$namespace\",pod_name=\"$pod\"}[10m])) by (pod_name)", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, "legendFormat": "<- write", "metric": "container_cpu", "refId": "A", "step": 10 }, { "expr": "- sum (rate (container_fs_reads_bytes_total{namespace=\"$namespace\",pod_name=\"$pod\"}[10m])) by (pod_name)", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, "legendFormat": "-> read", "refId": "B" } ], "thresholds": [], "timeFrom": "", "timeShift": null, "title": "Disk IO", "tooltip": { "msResolution": true, "shared": true, "sort": 2, "value_type": "cumulative" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "Bps", "label": "", "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ], "yaxis": { "align": false, "alignLevel": null } } ], "refresh": false, "schemaVersion": 16, "style": "dark", "tags": [ "cost", "utilisation", "metrics" ], "templating": { "list": [ { "current": { "text": "17.78", "value": "17.78" }, "hide": 0, "label": "CPU", "name": "costcpu", "options": [ { "text": "17.78", "value": "17.78" } ], "query": "17.78", "skipUrlSync": false, "type": "constant" }, { "current": { "text": "5.35", "value": "5.35" }, "hide": 0, "label": "PE CPU", "name": "costpcpu", "options": [ { "text": "5.35", "value": "5.35" } ], "query": "5.35", "skipUrlSync": false, "type": "constant" }, { "current": { "text": "2.38", "value": "2.38" }, "hide": 0, "label": "RAM", "name": "costram", "options": [ { "text": "2.38", "value": "2.38" } ], "query": "2.38", "skipUrlSync": false, "type": "constant" }, { "current": { "text": "0.72", "value": "0.72" }, "hide": 0, "label": "PE RAM", "name": "costpram", "options": [ { "text": "0.72", "value": "0.72" } ], "query": "0.72", "skipUrlSync": false, "type": "constant" }, { "current": { "text": "0.044", "value": "0.044" }, "hide": 0, "label": "Storage", "name": "costStorageStandard", "options": [ { "text": "0.044", "value": "0.044" } ], "query": "0.044", "skipUrlSync": false, "type": "constant" }, { "current": { "text": "0.187", "value": "0.187" }, "hide": 0, "label": "SSD", "name": "costStorageSSD", "options": [ { "text": "0.187", "value": "0.187" } ], "query": "0.187", "skipUrlSync": false, "type": "constant" }, { "current": { "text": "30", "value": "30" }, "hide": 0, "label": "Disc.", "name": "costDiscount", "options": [ { "text": "30", "value": "30" } ], "query": "30", "skipUrlSync": false, "type": "constant" }, { "allValue": null, "current": { "text": "kube-system", "value": "kube-system" }, "datasource": "Prometheus", "hide": 0, "includeAll": false, "label": "NS", "multi": false, "name": "namespace", "options": [], "query": "query_result(sum(container_memory_working_set_bytes{namespace!=\"\"}) by (namespace))", "refresh": 1, "regex": "/namespace=\\\"(.*?)(\\\")/", "skipUrlSync": false, "sort": 0, "tagValuesQuery": "", "tags": [], "tagsQuery": "", "type": "query", "useTags": false }, { "allValue": null, "current": { "text": "kube-proxy-gke-guestbook-default-pool-f466ca00-g3kv", "value": "kube-proxy-gke-guestbook-default-pool-f466ca00-g3kv" }, "datasource": "Prometheus", "hide": 0, "includeAll": false, "label": "Pod", "multi": false, "name": "pod", "options": [], "query": "query_result(sum(container_memory_working_set_bytes{namespace=\"$namespace\"}) by (pod_name))", "refresh": 1, "regex": "/pod_name=\\\"(.*?)(\\\")/", "skipUrlSync": false, "sort": 0, "tagValuesQuery": "", "tags": [], "tagsQuery": "", "type": "query", "useTags": false } ] }, "time": { "from": "now-15m", "to": "now" }, "timepicker": { "hidden": false, "refresh_intervals": [ "5s", "10s", "30s", "1m", "5m", "15m", "30m", "1h", "2h", "1d" ], "time_options": [ "5m", "15m", "1h", "6h", "12h", "24h", "2d", "7d", "30d" ] }, "timezone": "browser", "title": "Pod cost & utilization metrics", "uid": "at-cost-analysis-pod", "version": 1 }' + insights: + json: '{ "annotations": { "list": [ { "builtIn": 1, "datasource": "-- Grafana --", "enable": true, "hide": true, "iconColor": "rgba(0, 211, 255, 1)", "name": "Annotations & Alerts", "type": "dashboard" } ] }, "editable": true, "gnetId": null, "graphTooltip": 0, "id": 5, "iteration": 1542347581633, "links": [], "panels": [ { "cacheTimeout": null, "colorBackground": false, "colorValue": false, "colors": [ "#299c46", "rgba(237, 129, 40, 0.89)", "#d44a3a" ], "datasource": "Prometheus", "decimals": 2, "description": "Based on CPU usage over last 24 hours", "format": "currencyUSD", "gauge": { "maxValue": 100, "minValue": 0, "show": false, "thresholdLabels": false, "thresholdMarkers": true }, "gridPos": { "h": 5, "w": 6, "x": 0, "y": 0 }, "hideTimeOverride": true, "id": 15, "interval": null, "links": [], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "tableColumn": "label_cloud_google_com_gke_preemptible", "targets": [ { "expr": "sum(\n label_replace(\n sum(rate(container_cpu_usage_seconds_total{image!=\"\",container_name!=\"POD\"}[24h])) by (kubernetes_io_hostname,pod_name),\n \"node\",\n \"$1\", \n \"kubernetes_io_hostname\", \n \"(.+)\"\n ) * on (node) group_left (label_cloud_google_com_gke_preemptible)\n kube_node_labels{label_cloud_google_com_gke_preemptible!=\"true\"} \n * on (pod_name) group_left()\n label_replace(\n sum(kube_pod_labels{label_$label=~\"$label_value\"}) by (pod),\n \"pod_name\",\n \"$1\", \n \"pod\", \n \"(.+)\"\n ) or up * 0\n) * 23.076\n\n+ \n\nsum(\n label_replace(\n sum(rate(container_cpu_usage_seconds_total{image!=\"\",container_name!=\"POD\"}[24h])) by (kubernetes_io_hostname,pod_name),\n \"node\",\n \"$1\", \n \"kubernetes_io_hostname\", \n \"(.+)\"\n ) * on (node) group_left (label_cloud_google_com_gke_preemptible)\n kube_node_labels{label_cloud_google_com_gke_preemptible=\"true\"} \n * on (pod_name) group_left()\n label_replace(\n sum(kube_pod_labels{label_$label=~\"$label_value\"}) by (pod),\n \"pod_name\",\n \"$1\", \n \"pod\", \n \"(.+)\"\n ) or up * 0\n) * 5.1", "format": "time_series", "instant": true, "interval": "", "intervalFactor": 1, "legendFormat": " {{ node }}", "refId": "A" } ], "thresholds": "", "timeFrom": "15m", "timeShift": null, "title": "CPU Cost", "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "current" }, { "cacheTimeout": null, "colorBackground": false, "colorValue": false, "colors": [ "#299c46", "rgba(237, 129, 40, 0.89)", "#d44a3a" ], "datasource": "Prometheus", "decimals": 2, "description": "Based on CPU usage over last 24 hours", "format": "currencyUSD", "gauge": { "maxValue": 100, "minValue": 0, "show": false, "thresholdLabels": false, "thresholdMarkers": true }, "gridPos": { "h": 5, "w": 6, "x": 6, "y": 0 }, "hideTimeOverride": true, "id": 16, "interval": null, "links": [], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "tableColumn": "label_cloud_google_com_gke_preemptible", "targets": [ { "expr": "sum(\n label_replace(\n sum(container_memory_working_set_bytes{image!=\"\",container_name!=\"POD\"}) by (kubernetes_io_hostname,pod_name),\n \"node\",\n \"$1\", \n \"kubernetes_io_hostname\", \n \"(.+)\"\n ) * on (node) group_left (label_cloud_google_com_gke_preemptible)\n kube_node_labels{label_cloud_google_com_gke_preemptible!=\"true\"} \n * on (pod_name) group_left()\n label_replace(\n sum(kube_pod_labels{label_$label=~\"$label_value\"}) by (pod),\n \"pod_name\",\n \"$1\", \n \"pod\", \n \"(.+)\"\n ) or up * 0\n) * 4 / 1024 / 1024 / 1024\n\n+ \n\nsum(\n label_replace(\n sum(container_memory_working_set_bytes{image!=\"\",container_name!=\"POD\"}) by (kubernetes_io_hostname,pod_name),\n \"node\",\n \"$1\", \n \"kubernetes_io_hostname\", \n \"(.+)\"\n ) * on (node) group_left (label_cloud_google_com_gke_preemptible)\n kube_node_labels{label_cloud_google_com_gke_preemptible=\"true\"} \n * on (pod_name) group_left()\n label_replace(\n sum(kube_pod_labels{label_$label=~\"$label_value\"}) by (pod),\n \"pod_name\",\n \"$1\", \n \"pod\", \n \"(.+)\"\n ) or up * 0\n) * 11 / 1024 / 1024 / 1024", "format": "time_series", "instant": true, "interval": "", "intervalFactor": 1, "legendFormat": " {{ node }}", "refId": "A" } ], "thresholds": "", "timeFrom": "15m", "timeShift": null, "title": "Memory Cost", "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "current" }, { "cacheTimeout": null, "colorBackground": false, "colorValue": false, "colors": [ "#299c46", "rgba(237, 129, 40, 0.89)", "#d44a3a" ], "datasource": "Prometheus", "decimals": 2, "description": "", "format": "currencyUSD", "gauge": { "maxValue": 100, "minValue": 0, "show": false, "thresholdLabels": false, "thresholdMarkers": true }, "gridPos": { "h": 5, "w": 6, "x": 12, "y": 0 }, "hideTimeOverride": true, "id": 21, "interval": null, "links": [], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "tableColumn": "label_cloud_google_com_gke_preemptible", "targets": [ { "expr": "sum(\n sum(kube_persistentvolumeclaim_info{storageclass!=\".*ssd.*\"}) by (persistentvolumeclaim, storageclass)\n * on (persistentvolumeclaim) group_right(storageclass)\n sum(kube_persistentvolumeclaim_resource_requests_storage_bytes) by (persistentvolumeclaim)\n * on (persistentvolumeclaim) group_left(label_app)\n kube_persistentvolumeclaim_labels{label_$label=~\"$label_value\"} or up * 0\n) / 1024 / 1024 /1024 * .04 \n\n+\n\nsum(\n sum(kube_persistentvolumeclaim_info{storageclass=~\".*ssd.*\"}) by (persistentvolumeclaim, storageclass)\n * on (persistentvolumeclaim) group_right(storageclass)\n sum(kube_persistentvolumeclaim_resource_requests_storage_bytes) by (persistentvolumeclaim)\n * on (persistentvolumeclaim) group_left(label_app)\n kube_persistentvolumeclaim_labels{label_$label=~\"$label_value\"} or up * 0\n) / 1024 / 1024 /1024 * .17 \n", "format": "time_series", "instant": true, "interval": "", "intervalFactor": 1, "legendFormat": " {{ node }}", "refId": "A" } ], "thresholds": "", "timeFrom": "15m", "timeShift": null, "title": "Storage Cost", "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "current" }, { "cacheTimeout": null, "colorBackground": false, "colorValue": false, "colors": [ "#299c46", "rgba(237, 129, 40, 0.89)", "#d44a3a" ], "datasource": "Prometheus", "decimals": 2, "description": "Cost of memory + CPU usage", "format": "currencyUSD", "gauge": { "maxValue": 100, "minValue": 0, "show": false, "thresholdLabels": false, "thresholdMarkers": true }, "gridPos": { "h": 5, "w": 6, "x": 18, "y": 0 }, "hideTimeOverride": true, "id": 20, "interval": null, "links": [], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "tableColumn": "label_cloud_google_com_gke_preemptible", "targets": [ { "expr": "# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ CPU ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\nsum(\n label_replace(\n sum(rate(container_cpu_usage_seconds_total{image!=\"\",container_name!=\"POD\"}[24h])) by (kubernetes_io_hostname,pod_name),\n \"node\",\n \"$1\", \n \"kubernetes_io_hostname\", \n \"(.+)\"\n ) * on (node) group_left (label_cloud_google_com_gke_preemptible)\n kube_node_labels{label_cloud_google_com_gke_preemptible!=\"true\"} \n * on (pod_name) group_left()\n label_replace(\n sum(kube_pod_labels{label_$label=~\"$label_value\"}) by (pod),\n \"pod_name\",\n \"$1\", \n \"pod\", \n \"(.+)\"\n ) or up * 0\n) * 23.076\n\n+ \n\nsum(\n label_replace(\n sum(rate(container_cpu_usage_seconds_total{image!=\"\",container_name!=\"POD\"}[24h])) by (kubernetes_io_hostname,pod_name),\n \"node\",\n \"$1\", \n \"kubernetes_io_hostname\", \n \"(.+)\"\n ) * on (node) group_left (label_cloud_google_com_gke_preemptible)\n kube_node_labels{label_cloud_google_com_gke_preemptible=\"true\"} \n * on (pod_name) group_left()\n label_replace(\n sum(kube_pod_labels{label_$label=~\"$label_value\"}) by (pod),\n \"pod_name\",\n \"$1\", \n \"pod\", \n \"(.+)\"\n ) or up * 0\n) * 5.1\n\n#END CPU\n+\n\n# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Memory ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\nsum(\n label_replace(\n sum(container_memory_working_set_bytes{image!=\"\",container_name!=\"POD\"}) by (kubernetes_io_hostname,pod_name),\n \"node\",\n \"$1\", \n \"kubernetes_io_hostname\", \n \"(.+)\"\n ) * on (node) group_left (label_cloud_google_com_gke_preemptible)\n kube_node_labels{label_cloud_google_com_gke_preemptible!=\"true\"} \n * on (pod_name) group_left()\n label_replace(\n sum(kube_pod_labels{label_$label=~\"$label_value\"}) by (pod),\n \"pod_name\",\n \"$1\", \n \"pod\", \n \"(.+)\"\n ) or up * 0\n) * 4 / 1024 / 1024 / 1024\n\n+ \n\nsum(\n label_replace(\n sum(container_memory_working_set_bytes{image!=\"\",container_name!=\"POD\"}) by (kubernetes_io_hostname,pod_name),\n \"node\",\n \"$1\", \n \"kubernetes_io_hostname\", \n \"(.+)\"\n ) * on (node) group_left (label_cloud_google_com_gke_preemptible)\n kube_node_labels{label_cloud_google_com_gke_preemptible=\"true\"} \n * on (pod_name) group_left()\n label_replace(\n sum(kube_pod_labels{label_$label=~\"$label_value\"}) by (pod),\n \"pod_name\",\n \"$1\", \n \"pod\", \n \"(.+)\"\n ) or up * 0\n) * 11 / 1024 / 1024 / 1024\n\n# END MEMORY\n\n+\n\n# ~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORAGE ~~~~~~~~~~~~~~~~~~~~~~~~~\n\nsum(\n sum(kube_persistentvolumeclaim_info{storageclass!=\".*ssd.*\"}) by (persistentvolumeclaim, storageclass)\n * on (persistentvolumeclaim) group_right(storageclass)\n sum(kube_persistentvolumeclaim_resource_requests_storage_bytes) by (persistentvolumeclaim)\n * on (persistentvolumeclaim) group_left(label_app)\n kube_persistentvolumeclaim_labels{label_$label=~\"$label_value\"} or up * 0\n) / 1024 / 1024 /1024 * .04 \n\n+\n\nsum(\n sum(kube_persistentvolumeclaim_info{storageclass=~\".*ssd.*\"}) by (persistentvolumeclaim, storageclass)\n * on (persistentvolumeclaim) group_right(storageclass)\n sum(kube_persistentvolumeclaim_resource_requests_storage_bytes) by (persistentvolumeclaim)\n * on (persistentvolumeclaim) group_left(label_app)\n kube_persistentvolumeclaim_labels{label_$label=~\"$label_value\"} or up * 0\n) / 1024 / 1024 /1024 * .17 \n\n\n# END STORAGE\n", "format": "time_series", "instant": true, "interval": "", "intervalFactor": 1, "legendFormat": " {{ node }}", "refId": "A" } ], "thresholds": "", "timeFrom": "15m", "timeShift": null, "title": "Total Cost", "type": "singlestat", "valueFontSize": "110%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "current" }, { "cacheTimeout": null, "colorBackground": false, "colorValue": false, "colors": [ "#299c46", "rgba(237, 129, 40, 0.89)", "#d44a3a" ], "datasource": "Prometheus", "format": "none", "gauge": { "maxValue": 100, "minValue": 0, "show": false, "thresholdLabels": false, "thresholdMarkers": true }, "gridPos": { "h": 4, "w": 3, "x": 0, "y": 5 }, "id": 10, "interval": null, "links": [], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, "postfix": " cores", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "tableColumn": "", "targets": [ { "expr": "sum(\n sum (kube_pod_container_resource_requests_cpu_cores) by (pod)\n * on (pod) group_left()\n kube_pod_labels{label_$label=~\"$label_value\"}\n or up * 0\n) ", "format": "time_series", "intervalFactor": 1, "refId": "A" } ], "thresholds": "", "title": "CPU Request", "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "current" }, { "cacheTimeout": null, "colorBackground": false, "colorValue": false, "colors": [ "#299c46", "rgba(237, 129, 40, 0.89)", "#d44a3a" ], "datasource": "Prometheus", "decimals": 2, "format": "none", "gauge": { "maxValue": 100, "minValue": 0, "show": false, "thresholdLabels": false, "thresholdMarkers": true }, "gridPos": { "h": 4, "w": 3, "x": 3, "y": 5 }, "id": 17, "interval": null, "links": [], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, "postfix": " cores", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "tableColumn": "", "targets": [ { "expr": "sum(\n label_replace(\n sum(rate(container_cpu_usage_seconds_total{image!=\"\",container_name!=\"POD\"}[1h])) by (kubernetes_io_hostname,pod_name),\n \"node\",\n \"$1\", \n \"kubernetes_io_hostname\", \n \"(.+)\"\n ) \n * on (pod_name) group_left()\n label_replace(\n sum(kube_pod_labels{label_$label=~\"$label_value\"}) by (pod),\n \"pod_name\",\n \"$1\", \n \"pod\", \n \"(.+)\"\n ) or up * 0\n) ", "format": "time_series", "intervalFactor": 2, "refId": "A" } ], "thresholds": "", "title": "CPU Used", "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "current" }, { "cacheTimeout": null, "colorBackground": false, "colorValue": false, "colors": [ "#299c46", "rgba(237, 129, 40, 0.89)", "#d44a3a" ], "datasource": "Prometheus", "decimals": 0, "format": "bytes", "gauge": { "maxValue": 100, "minValue": 0, "show": false, "thresholdLabels": false, "thresholdMarkers": true }, "gridPos": { "h": 4, "w": 3, "x": 6, "y": 5 }, "id": 11, "interval": null, "links": [], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": true, "lineColor": "rgb(31, 120, 193)", "show": false }, "tableColumn": "", "targets": [ { "expr": "sum(\n sum (kube_pod_container_resource_requests_memory_bytes) by (pod)\n * on (pod) group_left()\n kube_pod_labels{label_$label=~\"$label_value\"}\n or up * 0\n) ", "format": "time_series", "intervalFactor": 1, "refId": "A" } ], "thresholds": "", "title": "Memory Request", "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "current" }, { "cacheTimeout": null, "colorBackground": false, "colorValue": false, "colors": [ "#299c46", "rgba(237, 129, 40, 0.89)", "#d44a3a" ], "datasource": "Prometheus", "format": "bytes", "gauge": { "maxValue": 100, "minValue": 0, "show": false, "thresholdLabels": false, "thresholdMarkers": true }, "gridPos": { "h": 4, "w": 3, "x": 9, "y": 5 }, "id": 18, "interval": null, "links": [], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "tableColumn": "", "targets": [ { "expr": "sum(\n label_replace(\n sum (container_memory_working_set_bytes{pod_name!=\"\"}) by (pod_name),\n \"pod\",\n \"$1\", \n \"pod_name\", \n \"(.+)\")\n * on (pod) group_left()\n kube_pod_labels{label_$label=~\"$label_value\"} \n or up * 0\n)", "format": "time_series", "instant": true, "intervalFactor": 1, "refId": "A" } ], "thresholds": "", "title": "Memory Usage", "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "current" }, { "cacheTimeout": null, "colorBackground": false, "colorValue": false, "colors": [ "#299c46", "rgba(237, 129, 40, 0.89)", "#d44a3a" ], "datasource": "Prometheus", "decimals": 0, "format": "bytes", "gauge": { "maxValue": 100, "minValue": 0, "show": false, "thresholdLabels": false, "thresholdMarkers": true }, "gridPos": { "h": 4, "w": 3, "x": 12, "y": 5 }, "id": 22, "interval": null, "links": [], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "tableColumn": "", "targets": [ { "expr": "sum(\n sum(kube_persistentvolumeclaim_info{storageclass!=\".*ssd.*\"}) by (persistentvolumeclaim, storageclass)\n * on (persistentvolumeclaim) group_right(storageclass)\n sum(kube_persistentvolumeclaim_resource_requests_storage_bytes) by (persistentvolumeclaim)\n * on (persistentvolumeclaim) group_left(label_app)\n kube_persistentvolumeclaim_labels{label_$label=~\"$label_value\"} or up * 0\n) \n\n+\n\nsum(\n sum(kube_persistentvolumeclaim_info{storageclass=~\".*ssd.*\"}) by (persistentvolumeclaim, storageclass)\n * on (persistentvolumeclaim) group_right(storageclass)\n sum(kube_persistentvolumeclaim_resource_requests_storage_bytes) by (persistentvolumeclaim)\n * on (persistentvolumeclaim) group_left(label_app)\n kube_persistentvolumeclaim_labels{label_$label=~\"$label_value\"} or up * 0\n) \n", "format": "time_series", "instant": true, "intervalFactor": 1, "refId": "A" } ], "thresholds": "", "title": "Storage Request", "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "current" }, { "cacheTimeout": null, "colorBackground": false, "colorValue": false, "colors": [ "#299c46", "rgba(237, 129, 40, 0.89)", "#d44a3a" ], "datasource": "Prometheus", "decimals": 0, "format": "bytes", "gauge": { "maxValue": 100, "minValue": 0, "show": false, "thresholdLabels": false, "thresholdMarkers": true }, "gridPos": { "h": 4, "w": 3, "x": 15, "y": 5 }, "id": 23, "interval": null, "links": [], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "tableColumn": "", "targets": [ { "expr": "sum(\n sum(kube_persistentvolumeclaim_info{storageclass!=\".*ssd.*\"}) by (persistentvolumeclaim, storageclass)\n * on (persistentvolumeclaim) group_right(storageclass)\n sum(kubelet_volume_stats_used_bytes) by (persistentvolumeclaim)\n * on (persistentvolumeclaim) group_left(label_app)\n kube_persistentvolumeclaim_labels{label_$label=~\"$label_value\"} or up * 0\n) \n\n+\n\nsum(\n sum(kube_persistentvolumeclaim_info{storageclass=~\".*ssd.*\"}) by (persistentvolumeclaim, storageclass)\n * on (persistentvolumeclaim) group_right(storageclass)\n sum(kubelet_volume_stats_used_bytes) by (persistentvolumeclaim)\n * on (persistentvolumeclaim) group_left(label_app)\n kube_persistentvolumeclaim_labels{label_$label=~\"$label_value\"} or up * 0\n) \n", "format": "time_series", "instant": true, "intervalFactor": 1, "refId": "A" } ], "thresholds": "", "title": "Storage Used", "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "current" }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": null, "fill": 1, "gridPos": { "h": 8, "w": 12, "x": 0, "y": 9 }, "id": 8, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(\n label_replace(\n sum (kube_pod_container_resource_limits_cpu_cores) by (pod, container)\n * on (pod) group_left()\n kube_pod_labels{label_$label=~\"$label_value\"},\n \"container_name\",\n \"$1\", \n \"container\", \n \"(.+)\"\n )\n) \n", "format": "time_series", "intervalFactor": 1, "legendFormat": "limit", "refId": "C" }, { "expr": "sum(\n label_replace(\n sum (kube_pod_container_resource_requests_cpu_cores) by (pod, container)\n * on (pod) group_left()\n kube_pod_labels{label_$label=~\"$label_value\"},\n \"container_name\",\n \"$1\", \n \"container\", \n \"(.+)\"\n )\n) \n", "format": "time_series", "intervalFactor": 1, "legendFormat": "request", "refId": "B" }, { "expr": "sum(\n label_replace(\n sum (rate (container_cpu_usage_seconds_total{image!=\"\",container_name!=\"POD\"}[10m])) by (container_name,pod_name),\n \"pod\", \n \"$1\", \n \"pod_name\", \n \"(.+)\"\n )\n * on (pod) group_left (label_app)\n kube_pod_labels{label_$label=~\"$label_value\"}\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "usage", "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeShift": null, "title": "CPU Usage vs Requests vs Limits", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": "0", "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": null, "fill": 1, "gridPos": { "h": 8, "w": 12, "x": 12, "y": 9 }, "id": 12, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(\nlabel_replace(\nsum (rate (container_cpu_usage_seconds_total{image!=\"\",container_name!=\"POD\"}[10m])) by (container_name,pod_name),\n\"pod\", \n \"$1\", \n \"pod_name\", \n \"(.+)\"\n)\n* on (pod) group_left (label_app)\n kube_pod_labels{label_app=~\"$label_value\"}\n ) by (container_name,pod)\n", "format": "time_series", "intervalFactor": 1, "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeShift": null, "title": "Container CPU Utilization", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "percentunit", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "columns": [ { "text": "Avg", "value": "avg" } ], "datasource": "Prometheus", "description": "This table shows the comparison of CPU requests and usage by namespace", "fontSize": "100%", "gridPos": { "h": 8, "w": 12, "x": 0, "y": 17 }, "hideTimeOverride": true, "id": 4, "links": [], "pageSize": 8, "repeatDirection": "v", "scroll": true, "showHeader": true, "sort": { "col": 3, "desc": true }, "styles": [ { "alias": "Node", "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "mappingType": 1, "pattern": "node", "thresholds": [], "type": "string", "unit": "short" }, { "alias": "", "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "mappingType": 1, "pattern": "Time", "thresholds": [], "type": "hidden", "unit": "short" }, { "alias": "CPU Requests", "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(50, 172, 45, 0.97)", "#cffaff" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "mappingType": 1, "pattern": "Value #B", "thresholds": [ "" ], "type": "number", "unit": "short" }, { "alias": "Request Utilization", "colorMode": "value", "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "mappingType": 1, "pattern": "Value #C", "thresholds": [ "30" ], "type": "number", "unit": "percentunit" }, { "alias": "Utilization", "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "#3f6833", "#cca300" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "mappingType": 1, "pattern": "Value #A", "thresholds": [ "20", "90" ], "type": "number", "unit": "percentunit" } ], "targets": [ { "expr": "sum(\nsum (rate (container_cpu_usage_seconds_total{image!=\"\",container_name!=\"POD\",pod_name!=\"\"}[10m])) by (container_name,pod_name)\n* on (pod_name) group_left (label_$label)\nlabel_replace(\n kube_pod_labels{label_$label=~\"$label_value\"},\n \"pod_name\", \n \"$1\", \n \"pod\", \n \"(.+)\"\n)) by (container_name,pod_name)", "format": "table", "hide": false, "instant": true, "interval": "", "intervalFactor": 1, "legendFormat": "", "refId": "A" }, { "expr": "sum(\n label_replace(\n label_replace(\n sum (kube_pod_container_resource_requests_cpu_cores) by (pod, container),\n \"container_name\",\n \"$1\", \n \"container\", \n \"(.+)\"),\n \"pod_name\",\n \"$1\", \n \"pod\", \n \"(.+)\")\n * on (pod) group_left()\n kube_pod_labels{label_$label=~\"$label_value\"}\n) by (pod_name,container_name)", "format": "table", "instant": true, "intervalFactor": 1, "refId": "B" } ], "timeFrom": null, "timeShift": null, "title": "Average CPU usage by container", "transform": "table", "transparent": false, "type": "table" } ], "refresh": false, "schemaVersion": 16, "style": "dark", "tags": [], "templating": { "list": [ { "datasource": "Prometheus", "filters": [], "hide": 0, "label": "", "name": "Filters", "skipUrlSync": false, "type": "adhoc" }, { "allValue": null, "current": { "tags": [], "text": "app", "value": "app" }, "hide": 0, "includeAll": false, "label": "Label", "multi": false, "name": "label", "options": [ { "selected": false, "text": "app", "value": "app" }, { "selected": false, "text": "tier", "value": "tier" }, { "selected": false, "text": "component", "value": "component" }, { "selected": true, "text": "release", "value": "release" }, { "selected": false, "text": "name", "value": "name" }, { "selected": false, "text": "team", "value": "team" }, { "selected": false, "text": "department", "value": "department" }, { "selected": false, "text": "owner", "value": "owner" }, { "selected": false, "text": "contact", "value": "contact" } ], "query": "app, tier, component, release, name, team, department, owner, contact", "skipUrlSync": false, "type": "custom" }, { "allValue": ".*", "current": { "text": "redis", "value": "redis" }, "datasource": "Prometheus", "hide": 0, "includeAll": true, "label": "Value", "multi": false, "name": "label_value", "options": [], "query": "query_result(SUM(kube_pod_labels{label_$label!=\"\",namespace!=\"kube-system\"}) by (label_$label))", "refresh": 1, "regex": "/label_$label=\\\"(.*?)(\\\")/", "skipUrlSync": false, "sort": 0, "tagValuesQuery": "", "tags": [], "tagsQuery": "", "type": "query", "useTags": false }, { "allValue": "()", "current": { "text": "cost-analyzer-grafana", "value": "cost-analyzer-grafana" }, "datasource": "Prometheus", "hide": 0, "includeAll": true, "label": "", "multi": false, "name": "Deployments", "options": [], "query": "label_values(deployment)", "refresh": 1, "regex": "", "skipUrlSync": false, "sort": 1, "tagValuesQuery": "", "tags": [], "tagsQuery": "", "type": "query", "useTags": false }, { "allValue": null, "current": { "tags": [], "text": "All", "value": "$__all" }, "datasource": "Prometheus", "hide": 0, "includeAll": true, "label": null, "multi": false, "name": "Secondary", "options": [ { "selected": true, "text": "All", "value": "$__all" }, { "selected": false, "text": "app", "value": "app" }, { "selected": false, "text": "component", "value": "component" }, { "selected": false, "text": "controller_revision_hash", "value": "controller_revision_hash" }, { "selected": false, "text": "k8s_app", "value": "k8s_app" } ], "query": "query_result(kube_pod_labels)", "refresh": 0, "regex": "/.+?label_([^=]*).*/", "skipUrlSync": false, "sort": 1, "tagValuesQuery": "", "tags": [], "tagsQuery": "", "type": "query", "useTags": false } ] }, "time": { "from": "now-6h", "to": "now" }, "timepicker": { "refresh_intervals": [ "5s", "10s", "30s", "1m", "5m", "15m", "30m", "1h", "2h", "1d" ], "time_options": [ "5m", "15m", "1h", "6h", "12h", "24h", "2d", "7d", "30d" ] }, "timezone": "", "title": "Label costs & utlization", "uid": "lWMhIA-ik", "version": 28 }' + label-cost-utilization: + json: '{ "annotations": { "list": [ { "builtIn": 1, "datasource": "-- Grafana --", "enable": true, "hide": true, "iconColor": "rgba(0, 211, 255, 1)", "name": "Annotations & Alerts", "type": "dashboard" } ] }, "editable": true, "gnetId": null, "graphTooltip": 0, "id": 5, "iteration": 1542347581633, "links": [], "panels": [ { "cacheTimeout": null, "colorBackground": false, "colorValue": false, "colors": [ "#299c46", "rgba(237, 129, 40, 0.89)", "#d44a3a" ], "datasource": "Prometheus", "decimals": 2, "description": "Based on CPU usage over last 24 hours", "format": "currencyUSD", "gauge": { "maxValue": 100, "minValue": 0, "show": false, "thresholdLabels": false, "thresholdMarkers": true }, "gridPos": { "h": 5, "w": 6, "x": 0, "y": 0 }, "hideTimeOverride": true, "id": 15, "interval": null, "links": [], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "tableColumn": "label_cloud_google_com_gke_preemptible", "targets": [ { "expr": "sum(\n label_replace(\n sum(rate(container_cpu_usage_seconds_total{image!=\"\",container_name!=\"POD\"}[24h])) by (kubernetes_io_hostname,pod_name),\n \"node\",\n \"$1\", \n \"kubernetes_io_hostname\", \n \"(.+)\"\n ) * on (node) group_left (label_cloud_google_com_gke_preemptible)\n kube_node_labels{label_cloud_google_com_gke_preemptible!=\"true\"} \n * on (pod_name) group_left()\n label_replace(\n sum(kube_pod_labels{label_$label=~\"$label_value\"}) by (pod),\n \"pod_name\",\n \"$1\", \n \"pod\", \n \"(.+)\"\n ) or up * 0\n) * 23.076\n\n+ \n\nsum(\n label_replace(\n sum(rate(container_cpu_usage_seconds_total{image!=\"\",container_name!=\"POD\"}[24h])) by (kubernetes_io_hostname,pod_name),\n \"node\",\n \"$1\", \n \"kubernetes_io_hostname\", \n \"(.+)\"\n ) * on (node) group_left (label_cloud_google_com_gke_preemptible)\n kube_node_labels{label_cloud_google_com_gke_preemptible=\"true\"} \n * on (pod_name) group_left()\n label_replace(\n sum(kube_pod_labels{label_$label=~\"$label_value\"}) by (pod),\n \"pod_name\",\n \"$1\", \n \"pod\", \n \"(.+)\"\n ) or up * 0\n) * 5.1", "format": "time_series", "instant": true, "interval": "", "intervalFactor": 1, "legendFormat": " {{ node }}", "refId": "A" } ], "thresholds": "", "timeFrom": "15m", "timeShift": null, "title": "CPU Cost", "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "current" }, { "cacheTimeout": null, "colorBackground": false, "colorValue": false, "colors": [ "#299c46", "rgba(237, 129, 40, 0.89)", "#d44a3a" ], "datasource": "Prometheus", "decimals": 2, "description": "Based on CPU usage over last 24 hours", "format": "currencyUSD", "gauge": { "maxValue": 100, "minValue": 0, "show": false, "thresholdLabels": false, "thresholdMarkers": true }, "gridPos": { "h": 5, "w": 6, "x": 6, "y": 0 }, "hideTimeOverride": true, "id": 16, "interval": null, "links": [], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "tableColumn": "label_cloud_google_com_gke_preemptible", "targets": [ { "expr": "sum(\n label_replace(\n sum(container_memory_working_set_bytes{image!=\"\",container_name!=\"POD\"}) by (kubernetes_io_hostname,pod_name),\n \"node\",\n \"$1\", \n \"kubernetes_io_hostname\", \n \"(.+)\"\n ) * on (node) group_left (label_cloud_google_com_gke_preemptible)\n kube_node_labels{label_cloud_google_com_gke_preemptible!=\"true\"} \n * on (pod_name) group_left()\n label_replace(\n sum(kube_pod_labels{label_$label=~\"$label_value\"}) by (pod),\n \"pod_name\",\n \"$1\", \n \"pod\", \n \"(.+)\"\n ) or up * 0\n) * 4 / 1024 / 1024 / 1024\n\n+ \n\nsum(\n label_replace(\n sum(container_memory_working_set_bytes{image!=\"\",container_name!=\"POD\"}) by (kubernetes_io_hostname,pod_name),\n \"node\",\n \"$1\", \n \"kubernetes_io_hostname\", \n \"(.+)\"\n ) * on (node) group_left (label_cloud_google_com_gke_preemptible)\n kube_node_labels{label_cloud_google_com_gke_preemptible=\"true\"} \n * on (pod_name) group_left()\n label_replace(\n sum(kube_pod_labels{label_$label=~\"$label_value\"}) by (pod),\n \"pod_name\",\n \"$1\", \n \"pod\", \n \"(.+)\"\n ) or up * 0\n) * 11 / 1024 / 1024 / 1024", "format": "time_series", "instant": true, "interval": "", "intervalFactor": 1, "legendFormat": " {{ node }}", "refId": "A" } ], "thresholds": "", "timeFrom": "15m", "timeShift": null, "title": "Memory Cost", "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "current" }, { "cacheTimeout": null, "colorBackground": false, "colorValue": false, "colors": [ "#299c46", "rgba(237, 129, 40, 0.89)", "#d44a3a" ], "datasource": "Prometheus", "decimals": 2, "description": "", "format": "currencyUSD", "gauge": { "maxValue": 100, "minValue": 0, "show": false, "thresholdLabels": false, "thresholdMarkers": true }, "gridPos": { "h": 5, "w": 6, "x": 12, "y": 0 }, "hideTimeOverride": true, "id": 21, "interval": null, "links": [], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "tableColumn": "label_cloud_google_com_gke_preemptible", "targets": [ { "expr": "sum(\n sum(kube_persistentvolumeclaim_info{storageclass!=\".*ssd.*\"}) by (persistentvolumeclaim, storageclass)\n * on (persistentvolumeclaim) group_right(storageclass)\n sum(kube_persistentvolumeclaim_resource_requests_storage_bytes) by (persistentvolumeclaim)\n * on (persistentvolumeclaim) group_left(label_app)\n kube_persistentvolumeclaim_labels{label_$label=~\"$label_value\"} or up * 0\n) / 1024 / 1024 /1024 * .04 \n\n+\n\nsum(\n sum(kube_persistentvolumeclaim_info{storageclass=~\".*ssd.*\"}) by (persistentvolumeclaim, storageclass)\n * on (persistentvolumeclaim) group_right(storageclass)\n sum(kube_persistentvolumeclaim_resource_requests_storage_bytes) by (persistentvolumeclaim)\n * on (persistentvolumeclaim) group_left(label_app)\n kube_persistentvolumeclaim_labels{label_$label=~\"$label_value\"} or up * 0\n) / 1024 / 1024 /1024 * .17 \n", "format": "time_series", "instant": true, "interval": "", "intervalFactor": 1, "legendFormat": " {{ node }}", "refId": "A" } ], "thresholds": "", "timeFrom": "15m", "timeShift": null, "title": "Storage Cost", "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "current" }, { "cacheTimeout": null, "colorBackground": false, "colorValue": false, "colors": [ "#299c46", "rgba(237, 129, 40, 0.89)", "#d44a3a" ], "datasource": "Prometheus", "decimals": 2, "description": "Cost of memory + CPU usage", "format": "currencyUSD", "gauge": { "maxValue": 100, "minValue": 0, "show": false, "thresholdLabels": false, "thresholdMarkers": true }, "gridPos": { "h": 5, "w": 6, "x": 18, "y": 0 }, "hideTimeOverride": true, "id": 20, "interval": null, "links": [], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "tableColumn": "label_cloud_google_com_gke_preemptible", "targets": [ { "expr": "# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ CPU ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\nsum(\n label_replace(\n sum(rate(container_cpu_usage_seconds_total{image!=\"\",container_name!=\"POD\"}[24h])) by (kubernetes_io_hostname,pod_name),\n \"node\",\n \"$1\", \n \"kubernetes_io_hostname\", \n \"(.+)\"\n ) * on (node) group_left (label_cloud_google_com_gke_preemptible)\n kube_node_labels{label_cloud_google_com_gke_preemptible!=\"true\"} \n * on (pod_name) group_left()\n label_replace(\n sum(kube_pod_labels{label_$label=~\"$label_value\"}) by (pod),\n \"pod_name\",\n \"$1\", \n \"pod\", \n \"(.+)\"\n ) or up * 0\n) * 23.076\n\n+ \n\nsum(\n label_replace(\n sum(rate(container_cpu_usage_seconds_total{image!=\"\",container_name!=\"POD\"}[24h])) by (kubernetes_io_hostname,pod_name),\n \"node\",\n \"$1\", \n \"kubernetes_io_hostname\", \n \"(.+)\"\n ) * on (node) group_left (label_cloud_google_com_gke_preemptible)\n kube_node_labels{label_cloud_google_com_gke_preemptible=\"true\"} \n * on (pod_name) group_left()\n label_replace(\n sum(kube_pod_labels{label_$label=~\"$label_value\"}) by (pod),\n \"pod_name\",\n \"$1\", \n \"pod\", \n \"(.+)\"\n ) or up * 0\n) * 5.1\n\n#END CPU\n+\n\n# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Memory ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\nsum(\n label_replace(\n sum(container_memory_working_set_bytes{image!=\"\",container_name!=\"POD\"}) by (kubernetes_io_hostname,pod_name),\n \"node\",\n \"$1\", \n \"kubernetes_io_hostname\", \n \"(.+)\"\n ) * on (node) group_left (label_cloud_google_com_gke_preemptible)\n kube_node_labels{label_cloud_google_com_gke_preemptible!=\"true\"} \n * on (pod_name) group_left()\n label_replace(\n sum(kube_pod_labels{label_$label=~\"$label_value\"}) by (pod),\n \"pod_name\",\n \"$1\", \n \"pod\", \n \"(.+)\"\n ) or up * 0\n) * 4 / 1024 / 1024 / 1024\n\n+ \n\nsum(\n label_replace(\n sum(container_memory_working_set_bytes{image!=\"\",container_name!=\"POD\"}) by (kubernetes_io_hostname,pod_name),\n \"node\",\n \"$1\", \n \"kubernetes_io_hostname\", \n \"(.+)\"\n ) * on (node) group_left (label_cloud_google_com_gke_preemptible)\n kube_node_labels{label_cloud_google_com_gke_preemptible=\"true\"} \n * on (pod_name) group_left()\n label_replace(\n sum(kube_pod_labels{label_$label=~\"$label_value\"}) by (pod),\n \"pod_name\",\n \"$1\", \n \"pod\", \n \"(.+)\"\n ) or up * 0\n) * 11 / 1024 / 1024 / 1024\n\n# END MEMORY\n\n+\n\n# ~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORAGE ~~~~~~~~~~~~~~~~~~~~~~~~~\n\nsum(\n sum(kube_persistentvolumeclaim_info{storageclass!=\".*ssd.*\"}) by (persistentvolumeclaim, storageclass)\n * on (persistentvolumeclaim) group_right(storageclass)\n sum(kube_persistentvolumeclaim_resource_requests_storage_bytes) by (persistentvolumeclaim)\n * on (persistentvolumeclaim) group_left(label_app)\n kube_persistentvolumeclaim_labels{label_$label=~\"$label_value\"} or up * 0\n) / 1024 / 1024 /1024 * .04 \n\n+\n\nsum(\n sum(kube_persistentvolumeclaim_info{storageclass=~\".*ssd.*\"}) by (persistentvolumeclaim, storageclass)\n * on (persistentvolumeclaim) group_right(storageclass)\n sum(kube_persistentvolumeclaim_resource_requests_storage_bytes) by (persistentvolumeclaim)\n * on (persistentvolumeclaim) group_left(label_app)\n kube_persistentvolumeclaim_labels{label_$label=~\"$label_value\"} or up * 0\n) / 1024 / 1024 /1024 * .17 \n\n\n# END STORAGE\n", "format": "time_series", "instant": true, "interval": "", "intervalFactor": 1, "legendFormat": " {{ node }}", "refId": "A" } ], "thresholds": "", "timeFrom": "15m", "timeShift": null, "title": "Total Cost", "type": "singlestat", "valueFontSize": "110%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "current" }, { "cacheTimeout": null, "colorBackground": false, "colorValue": false, "colors": [ "#299c46", "rgba(237, 129, 40, 0.89)", "#d44a3a" ], "datasource": "Prometheus", "format": "none", "gauge": { "maxValue": 100, "minValue": 0, "show": false, "thresholdLabels": false, "thresholdMarkers": true }, "gridPos": { "h": 4, "w": 3, "x": 0, "y": 5 }, "id": 10, "interval": null, "links": [], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, "postfix": " cores", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "tableColumn": "", "targets": [ { "expr": "sum(\n sum (kube_pod_container_resource_requests_cpu_cores) by (pod)\n * on (pod) group_left()\n kube_pod_labels{label_$label=~\"$label_value\"}\n or up * 0\n) ", "format": "time_series", "intervalFactor": 1, "refId": "A" } ], "thresholds": "", "title": "CPU Request", "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "current" }, { "cacheTimeout": null, "colorBackground": false, "colorValue": false, "colors": [ "#299c46", "rgba(237, 129, 40, 0.89)", "#d44a3a" ], "datasource": "Prometheus", "decimals": 2, "format": "none", "gauge": { "maxValue": 100, "minValue": 0, "show": false, "thresholdLabels": false, "thresholdMarkers": true }, "gridPos": { "h": 4, "w": 3, "x": 3, "y": 5 }, "id": 17, "interval": null, "links": [], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, "postfix": " cores", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "tableColumn": "", "targets": [ { "expr": "sum(\n label_replace(\n sum(rate(container_cpu_usage_seconds_total{image!=\"\",container_name!=\"POD\"}[1h])) by (kubernetes_io_hostname,pod_name),\n \"node\",\n \"$1\", \n \"kubernetes_io_hostname\", \n \"(.+)\"\n ) \n * on (pod_name) group_left()\n label_replace(\n sum(kube_pod_labels{label_$label=~\"$label_value\"}) by (pod),\n \"pod_name\",\n \"$1\", \n \"pod\", \n \"(.+)\"\n ) or up * 0\n) ", "format": "time_series", "intervalFactor": 2, "refId": "A" } ], "thresholds": "", "title": "CPU Used", "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "current" }, { "cacheTimeout": null, "colorBackground": false, "colorValue": false, "colors": [ "#299c46", "rgba(237, 129, 40, 0.89)", "#d44a3a" ], "datasource": "Prometheus", "decimals": 0, "format": "bytes", "gauge": { "maxValue": 100, "minValue": 0, "show": false, "thresholdLabels": false, "thresholdMarkers": true }, "gridPos": { "h": 4, "w": 3, "x": 6, "y": 5 }, "id": 11, "interval": null, "links": [], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": true, "lineColor": "rgb(31, 120, 193)", "show": false }, "tableColumn": "", "targets": [ { "expr": "sum(\n sum (kube_pod_container_resource_requests_memory_bytes) by (pod)\n * on (pod) group_left()\n kube_pod_labels{label_$label=~\"$label_value\"}\n or up * 0\n) ", "format": "time_series", "intervalFactor": 1, "refId": "A" } ], "thresholds": "", "title": "Memory Request", "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "current" }, { "cacheTimeout": null, "colorBackground": false, "colorValue": false, "colors": [ "#299c46", "rgba(237, 129, 40, 0.89)", "#d44a3a" ], "datasource": "Prometheus", "format": "bytes", "gauge": { "maxValue": 100, "minValue": 0, "show": false, "thresholdLabels": false, "thresholdMarkers": true }, "gridPos": { "h": 4, "w": 3, "x": 9, "y": 5 }, "id": 18, "interval": null, "links": [], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "tableColumn": "", "targets": [ { "expr": "sum(\n label_replace(\n sum (container_memory_working_set_bytes{pod_name!=\"\"}) by (pod_name),\n \"pod\",\n \"$1\", \n \"pod_name\", \n \"(.+)\")\n * on (pod) group_left()\n kube_pod_labels{label_$label=~\"$label_value\"} \n or up * 0\n)", "format": "time_series", "instant": true, "intervalFactor": 1, "refId": "A" } ], "thresholds": "", "title": "Memory Usage", "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "current" }, { "cacheTimeout": null, "colorBackground": false, "colorValue": false, "colors": [ "#299c46", "rgba(237, 129, 40, 0.89)", "#d44a3a" ], "datasource": "Prometheus", "decimals": 0, "format": "bytes", "gauge": { "maxValue": 100, "minValue": 0, "show": false, "thresholdLabels": false, "thresholdMarkers": true }, "gridPos": { "h": 4, "w": 3, "x": 12, "y": 5 }, "id": 22, "interval": null, "links": [], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "tableColumn": "", "targets": [ { "expr": "sum(\n sum(kube_persistentvolumeclaim_info{storageclass!=\".*ssd.*\"}) by (persistentvolumeclaim, storageclass)\n * on (persistentvolumeclaim) group_right(storageclass)\n sum(kube_persistentvolumeclaim_resource_requests_storage_bytes) by (persistentvolumeclaim)\n * on (persistentvolumeclaim) group_left(label_app)\n kube_persistentvolumeclaim_labels{label_$label=~\"$label_value\"} or up * 0\n) \n\n+\n\nsum(\n sum(kube_persistentvolumeclaim_info{storageclass=~\".*ssd.*\"}) by (persistentvolumeclaim, storageclass)\n * on (persistentvolumeclaim) group_right(storageclass)\n sum(kube_persistentvolumeclaim_resource_requests_storage_bytes) by (persistentvolumeclaim)\n * on (persistentvolumeclaim) group_left(label_app)\n kube_persistentvolumeclaim_labels{label_$label=~\"$label_value\"} or up * 0\n) \n", "format": "time_series", "instant": true, "intervalFactor": 1, "refId": "A" } ], "thresholds": "", "title": "Storage Request", "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "current" }, { "cacheTimeout": null, "colorBackground": false, "colorValue": false, "colors": [ "#299c46", "rgba(237, 129, 40, 0.89)", "#d44a3a" ], "datasource": "Prometheus", "decimals": 0, "format": "bytes", "gauge": { "maxValue": 100, "minValue": 0, "show": false, "thresholdLabels": false, "thresholdMarkers": true }, "gridPos": { "h": 4, "w": 3, "x": 15, "y": 5 }, "id": 23, "interval": null, "links": [], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "tableColumn": "", "targets": [ { "expr": "sum(\n sum(kube_persistentvolumeclaim_info{storageclass!=\".*ssd.*\"}) by (persistentvolumeclaim, storageclass)\n * on (persistentvolumeclaim) group_right(storageclass)\n sum(kubelet_volume_stats_used_bytes) by (persistentvolumeclaim)\n * on (persistentvolumeclaim) group_left(label_app)\n kube_persistentvolumeclaim_labels{label_$label=~\"$label_value\"} or up * 0\n) \n\n+\n\nsum(\n sum(kube_persistentvolumeclaim_info{storageclass=~\".*ssd.*\"}) by (persistentvolumeclaim, storageclass)\n * on (persistentvolumeclaim) group_right(storageclass)\n sum(kubelet_volume_stats_used_bytes) by (persistentvolumeclaim)\n * on (persistentvolumeclaim) group_left(label_app)\n kube_persistentvolumeclaim_labels{label_$label=~\"$label_value\"} or up * 0\n) \n", "format": "time_series", "instant": true, "intervalFactor": 1, "refId": "A" } ], "thresholds": "", "title": "Storage Used", "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "current" }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": null, "fill": 1, "gridPos": { "h": 8, "w": 12, "x": 0, "y": 9 }, "id": 8, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(\n label_replace(\n sum (kube_pod_container_resource_limits_cpu_cores) by (pod, container)\n * on (pod) group_left()\n kube_pod_labels{label_$label=~\"$label_value\"},\n \"container_name\",\n \"$1\", \n \"container\", \n \"(.+)\"\n )\n) \n", "format": "time_series", "intervalFactor": 1, "legendFormat": "limit", "refId": "C" }, { "expr": "sum(\n label_replace(\n sum (kube_pod_container_resource_requests_cpu_cores) by (pod, container)\n * on (pod) group_left()\n kube_pod_labels{label_$label=~\"$label_value\"},\n \"container_name\",\n \"$1\", \n \"container\", \n \"(.+)\"\n )\n) \n", "format": "time_series", "intervalFactor": 1, "legendFormat": "request", "refId": "B" }, { "expr": "sum(\n label_replace(\n sum (rate (container_cpu_usage_seconds_total{image!=\"\",container_name!=\"POD\"}[10m])) by (container_name,pod_name),\n \"pod\", \n \"$1\", \n \"pod_name\", \n \"(.+)\"\n )\n * on (pod) group_left (label_app)\n kube_pod_labels{label_$label=~\"$label_value\"}\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "usage", "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeShift": null, "title": "CPU Usage vs Requests vs Limits", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": "0", "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": null, "fill": 1, "gridPos": { "h": 8, "w": 12, "x": 12, "y": 9 }, "id": 12, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(\nlabel_replace(\nsum (rate (container_cpu_usage_seconds_total{image!=\"\",container_name!=\"POD\"}[10m])) by (container_name,pod_name),\n\"pod\", \n \"$1\", \n \"pod_name\", \n \"(.+)\"\n)\n* on (pod) group_left (label_app)\n kube_pod_labels{label_app=~\"$label_value\"}\n ) by (container_name,pod)\n", "format": "time_series", "intervalFactor": 1, "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeShift": null, "title": "Container CPU Utilization", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "percentunit", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "columns": [ { "text": "Avg", "value": "avg" } ], "datasource": "Prometheus", "description": "This table shows the comparison of CPU requests and usage by namespace", "fontSize": "100%", "gridPos": { "h": 8, "w": 12, "x": 0, "y": 17 }, "hideTimeOverride": true, "id": 4, "links": [], "pageSize": 8, "repeatDirection": "v", "scroll": true, "showHeader": true, "sort": { "col": 3, "desc": true }, "styles": [ { "alias": "Node", "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "mappingType": 1, "pattern": "node", "thresholds": [], "type": "string", "unit": "short" }, { "alias": "", "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "mappingType": 1, "pattern": "Time", "thresholds": [], "type": "hidden", "unit": "short" }, { "alias": "CPU Requests", "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(50, 172, 45, 0.97)", "#cffaff" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "mappingType": 1, "pattern": "Value #B", "thresholds": [ "" ], "type": "number", "unit": "short" }, { "alias": "Request Utilization", "colorMode": "value", "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "mappingType": 1, "pattern": "Value #C", "thresholds": [ "30" ], "type": "number", "unit": "percentunit" }, { "alias": "Utilization", "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "#3f6833", "#cca300" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "mappingType": 1, "pattern": "Value #A", "thresholds": [ "20", "90" ], "type": "number", "unit": "percentunit" } ], "targets": [ { "expr": "sum(\nsum (rate (container_cpu_usage_seconds_total{image!=\"\",container_name!=\"POD\",pod_name!=\"\"}[10m])) by (container_name,pod_name)\n* on (pod_name) group_left (label_$label)\nlabel_replace(\n kube_pod_labels{label_$label=~\"$label_value\"},\n \"pod_name\", \n \"$1\", \n \"pod\", \n \"(.+)\"\n)) by (container_name,pod_name)", "format": "table", "hide": false, "instant": true, "interval": "", "intervalFactor": 1, "legendFormat": "", "refId": "A" }, { "expr": "sum(\n label_replace(\n label_replace(\n sum (kube_pod_container_resource_requests_cpu_cores) by (pod, container),\n \"container_name\",\n \"$1\", \n \"container\", \n \"(.+)\"),\n \"pod_name\",\n \"$1\", \n \"pod\", \n \"(.+)\")\n * on (pod) group_left()\n kube_pod_labels{label_$label=~\"$label_value\"}\n) by (pod_name,container_name)", "format": "table", "instant": true, "intervalFactor": 1, "refId": "B" } ], "timeFrom": null, "timeShift": null, "title": "Average CPU usage by container", "transform": "table", "transparent": false, "type": "table" } ], "refresh": false, "schemaVersion": 16, "style": "dark", "tags": [], "templating": { "list": [ { "datasource": "Prometheus", "filters": [], "hide": 0, "label": "", "name": "Filters", "skipUrlSync": false, "type": "adhoc" }, { "allValue": null, "current": { "tags": [], "text": "app", "value": "app" }, "hide": 0, "includeAll": false, "label": "Label", "multi": false, "name": "label", "options": [ { "selected": false, "text": "app", "value": "app" }, { "selected": false, "text": "tier", "value": "tier" }, { "selected": false, "text": "component", "value": "component" }, { "selected": true, "text": "release", "value": "release" }, { "selected": false, "text": "name", "value": "name" }, { "selected": false, "text": "team", "value": "team" }, { "selected": false, "text": "department", "value": "department" }, { "selected": false, "text": "owner", "value": "owner" }, { "selected": false, "text": "contact", "value": "contact" } ], "query": "app, tier, component, release, name, team, department, owner, contact", "skipUrlSync": false, "type": "custom" }, { "allValue": ".*", "current": { "text": "redis", "value": "redis" }, "datasource": "Prometheus", "hide": 0, "includeAll": true, "label": "Value", "multi": false, "name": "label_value", "options": [], "query": "query_result(SUM(kube_pod_labels{label_$label!=\"\",namespace!=\"kube-system\"}) by (label_$label))", "refresh": 1, "regex": "/label_$label=\\\"(.*?)(\\\")/", "skipUrlSync": false, "sort": 0, "tagValuesQuery": "", "tags": [], "tagsQuery": "", "type": "query", "useTags": false }, { "allValue": "()", "current": { "text": "cost-analyzer-grafana", "value": "cost-analyzer-grafana" }, "datasource": "Prometheus", "hide": 0, "includeAll": true, "label": "", "multi": false, "name": "Deployments", "options": [], "query": "label_values(deployment)", "refresh": 1, "regex": "", "skipUrlSync": false, "sort": 1, "tagValuesQuery": "", "tags": [], "tagsQuery": "", "type": "query", "useTags": false }, { "allValue": null, "current": { "tags": [], "text": "All", "value": "$__all" }, "datasource": "Prometheus", "hide": 0, "includeAll": true, "label": null, "multi": false, "name": "Secondary", "options": [ { "selected": true, "text": "All", "value": "$__all" }, { "selected": false, "text": "app", "value": "app" }, { "selected": false, "text": "component", "value": "component" }, { "selected": false, "text": "controller_revision_hash", "value": "controller_revision_hash" }, { "selected": false, "text": "k8s_app", "value": "k8s_app" } ], "query": "query_result(kube_pod_labels)", "refresh": 0, "regex": "/.+?label_([^=]*).*/", "skipUrlSync": false, "sort": 1, "tagValuesQuery": "", "tags": [], "tagsQuery": "", "type": "query", "useTags": false } ] }, "time": { "from": "now-6h", "to": "now" }, "timepicker": { "refresh_intervals": [ "5s", "10s", "30s", "1m", "5m", "15m", "30m", "1h", "2h", "1d" ], "time_options": [ "5m", "15m", "1h", "6h", "12h", "24h", "2d", "7d", "30d" ] }, "timezone": "", "title": "Label costs & utlization", "uid": "lWMhIA-ik", "version": 28 }' + namespace-analysis: + json: '{ "annotations": { "list": [ { "builtIn": 1, "datasource": "-- Grafana --", "enable": true, "hide": true, "iconColor": "rgba(0, 211, 255, 1)", "name": "Annotations & Alerts", "type": "dashboard" } ] }, "description": "A dashboard to help with cost and utilisation", "editable": true, "gnetId": 6876, "graphTooltip": 0, "id": 2, "iteration": 1542347825329, "links": [], "panels": [ { "columns": [ { "text": "Avg", "value": "avg" } ], "datasource": "Prometheus", "fontSize": "100%", "gridPos": { "h": 9, "w": 16, "x": 0, "y": 0 }, "hideTimeOverride": true, "id": 73, "links": [], "pageSize": 8, "repeat": null, "repeatDirection": "v", "scroll": true, "showHeader": true, "sort": { "col": null, "desc": false }, "styles": [ { "alias": "Pod", "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(50, 172, 45, 0.97)", "#c15c17" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": false, "linkTooltip": "", "linkUrl": "", "pattern": "pod_name", "thresholds": [ "30", "80" ], "type": "string", "unit": "currencyUSD" }, { "alias": "RAM", "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "pattern": "Value #B", "thresholds": [], "type": "number", "unit": "currencyUSD" }, { "alias": "CPU", "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "mappingType": 1, "pattern": "Value #A", "thresholds": [], "type": "number", "unit": "currencyUSD" }, { "alias": "", "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "mappingType": 1, "pattern": "Time", "thresholds": [], "type": "hidden", "unit": "short" }, { "alias": "Storage", "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "mappingType": 1, "pattern": "Value #C", "thresholds": [], "type": "number", "unit": "currencyUSD" }, { "alias": "Total", "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "mappingType": 1, "pattern": "Value #D", "thresholds": [], "type": "number", "unit": "currencyUSD" }, { "alias": "CPU Utilisation", "colorMode": "value", "colors": [ "#bf1b00", "rgba(50, 172, 45, 0.97)", "#ef843c" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "mappingType": 1, "pattern": "Value #E", "thresholds": [ "30", "80" ], "type": "number", "unit": "percent" }, { "alias": "RAM Utilisation", "colorMode": "value", "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(50, 172, 45, 0.97)", "#ef843c" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "mappingType": 1, "pattern": "Value #F", "thresholds": [ "30", "80" ], "type": "number", "unit": "percent" } ], "targets": [ { "expr": "(\n sum(container_spec_cpu_shares{namespace=\"$namespace\",cloud_google_com_gke_preemptible!=\"true\"}/1000*($costcpu - ($costcpu / 100 * $costDiscount))) by(pod_name)\n or\n count(\n count(container_spec_cpu_shares{namespace=\"$namespace\"}) by(pod_name)\n ) by(pod_name) -1\n)\n\n+\n\n(\n sum(container_spec_cpu_shares{namespace=\"$namespace\",cloud_google_com_gke_preemptible=\"true\"}/1000*$costpcpu) by(pod_name)\n or\n count(\n count(container_spec_cpu_shares{namespace=\"$namespace\"}) by(pod_name)\n ) by(pod_name) -1\n)", "format": "table", "hide": false, "instant": true, "interval": "", "intervalFactor": 1, "legendFormat": "{{ pod_name }}", "refId": "A" }, { "expr": "sum(\n count(count(container_spec_cpu_shares{namespace=\"$namespace\"}) by (pod_name)) by (pod_name) \n * on (pod_name) \n sum(irate(container_cpu_usage_seconds_total{namespace=\"$namespace\"}[1m])) by (pod_name)\n) by (pod_name) * 1000\n/\nsum(container_spec_cpu_shares{namespace=\"$namespace\"}) by (pod_name) * 100", "format": "table", "hide": false, "instant": true, "intervalFactor": 1, "legendFormat": "{{ pod_name }}", "refId": "E" }, { "expr": "(\n sum(container_spec_memory_limit_bytes{namespace=\"$namespace\",cloud_google_com_gke_preemptible!=\"true\"}/1024/1024/1024*($costram- ($costram / 100 * $costDiscount))) by(pod_name)\n or\n count(\n count(container_spec_memory_limit_bytes{namespace=\"$namespace\"}) by(pod_name)\n ) by(pod_name) -1\n)\n\n+\n\n(\n sum(container_spec_memory_limit_bytes{namespace=\"$namespace\",cloud_google_com_gke_preemptible=\"true\"}/1024/1024/1024*$costpram) by(pod_name)\n or\n count(\n count(container_spec_memory_limit_bytes{namespace=\"$namespace\"}) by(pod_name)\n ) by(pod_name) -1\n)", "format": "table", "hide": false, "instant": true, "intervalFactor": 1, "legendFormat": "{{ namespace }}", "refId": "B" }, { "expr": "sum(\n count(count(container_memory_working_set_bytes{namespace=\"$namespace\"}) by (pod_name)) by (pod_name) \n * on (pod_name) \n sum(avg_over_time(container_memory_working_set_bytes{namespace=\"$namespace\"}[1m])) by (pod_name)\n) by (pod_name)\n/\nsum(container_spec_memory_limit_bytes{namespace=\"$namespace\"}) by (pod_name) * 100", "format": "table", "hide": false, "instant": true, "intervalFactor": 1, "legendFormat": "{{ namespace }}", "refId": "F" }, { "expr": "(\n sum(container_spec_cpu_shares{namespace=\"$namespace\",cloud_google_com_gke_preemptible!=\"true\"}/1000*($costcpu - ($costcpu / 100 * $costDiscount))) by(pod_name)\n or\n count(\n count(container_spec_cpu_shares{namespace=\"$namespace\"}) by(pod_name)\n ) by(pod_name) -1\n)\n\n+\n\n(\n sum(container_spec_cpu_shares{namespace=\"$namespace\",cloud_google_com_gke_preemptible=\"true\"}/1000*$costpcpu) by(pod_name)\n or\n count(\n count(container_spec_cpu_shares{namespace=\"$namespace\"}) by(pod_name)\n ) by(pod_name) -1\n)\n\n# Now ram\n\n+ \n(\n sum(container_spec_memory_limit_bytes{namespace=\"$namespace\",cloud_google_com_gke_preemptible!=\"true\"}/1024/1024/1024*($costram- ($costram / 100 * $costDiscount))) by(pod_name)\n or\n count(\n count(container_spec_memory_limit_bytes{namespace=\"$namespace\"}) by(pod_name)\n ) by(pod_name) -1\n)\n\n+\n\n(\n sum(container_spec_memory_limit_bytes{namespace=\"$namespace\",cloud_google_com_gke_preemptible=\"true\"}/1024/1024/1024*$costpram) by(pod_name)\n or\n count(\n count(container_spec_memory_limit_bytes{namespace=\"$namespace\"}) by(pod_name)\n ) by(pod_name) -1\n)\n\n", "format": "table", "hide": false, "instant": true, "intervalFactor": 1, "refId": "D" } ], "timeFrom": "1M", "timeShift": null, "title": "Pod cost and utilisation analysis", "transform": "table", "transparent": false, "type": "table" }, { "columns": [ { "text": "Avg", "value": "avg" } ], "datasource": "Prometheus", "fontSize": "100%", "gridPos": { "h": 9, "w": 8, "x": 16, "y": 0 }, "hideTimeOverride": true, "id": 90, "links": [], "pageSize": 8, "repeatDirection": "v", "scroll": true, "showHeader": true, "sort": { "col": 4, "desc": true }, "styles": [ { "alias": "Namespace", "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "mappingType": 1, "pattern": "namespace", "thresholds": [], "type": "hidden", "unit": "short" }, { "alias": "PVC Name", "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "mappingType": 1, "pattern": "persistentvolumeclaim", "thresholds": [], "type": "number", "unit": "short" }, { "alias": "Storage Class", "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "mappingType": 1, "pattern": "storageclass", "thresholds": [], "type": "number", "unit": "short" }, { "alias": "Cost", "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "mappingType": 1, "pattern": "Value", "thresholds": [], "type": "number", "unit": "currencyUSD" }, { "alias": "", "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "mappingType": 1, "pattern": "Time", "thresholds": [], "type": "hidden", "unit": "short" } ], "targets": [ { "expr": "sum (\n sum(kube_persistentvolumeclaim_info{storageclass=~\".*ssd.*\"}) by (persistentvolumeclaim, namespace, storageclass)\n + on (persistentvolumeclaim, namespace) group_right(storageclass)\n sum(kube_persistentvolumeclaim_resource_requests_storage_bytes{namespace=~\"$namespace\"}) by (persistentvolumeclaim, namespace)\n) by (namespace,persistentvolumeclaim,storageclass) / 1024 / 1024 /1024 * $costStorageSSD\n\nor\n\nsum (\n sum(kube_persistentvolumeclaim_info{storageclass!~\".*ssd.*\"}) by (persistentvolumeclaim, namespace, storageclass)\n + on (persistentvolumeclaim, namespace) group_right(storageclass)\n sum(kube_persistentvolumeclaim_resource_requests_storage_bytes{namespace=~\"$namespace\"}) by (persistentvolumeclaim, namespace)\n) by (namespace,persistentvolumeclaim,storageclass) / 1024 / 1024 /1024 * $costStorageStandard\n", "format": "table", "hide": false, "instant": true, "interval": "", "intervalFactor": 1, "legendFormat": "{{ persistentvolumeclaim }}", "refId": "A" } ], "timeFrom": null, "timeShift": null, "title": "Persistent Volume Claims", "transform": "table", "transparent": false, "type": "table" }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "Prometheus", "description": "CPU requests by container divided by the rate of CPU usage over the last hour", "fill": 1, "gridPos": { "h": 9, "w": 24, "x": 0, "y": 9 }, "id": 100, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "topk(5, label_replace(\n label_replace(\n kube_pod_container_resource_requests_cpu_cores{namespace=\"$namespace\"},\n \"pod_name\", \n \"$1\", \n \"pod\", \n \"(.+)\"\n ),\n \"container_name\", \n \"$1\", \n \"container\", \n \"(.+)\"\n) \n/\non(pod_name,container_name) \nsum(\n rate(\n container_cpu_usage_seconds_total{namespace=\"$namespace\",pod_name=~\".+\"}[1h]\n )\n) \nby (pod_name,container_name))", "format": "time_series", "intervalFactor": 1, "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeShift": null, "title": "Ratio of CPU requests to usage (by container)", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "Prometheus", "decimals": 3, "description": "This panel shows historical utilisation as an average across all pods in this namespace. It only accounts for currently deployed pods", "editable": true, "error": false, "fill": 0, "grid": {}, "gridPos": { "h": 6, "w": 12, "x": 0, "y": 18 }, "height": "", "id": 94, "isNew": true, "legend": { "alignAsTable": false, "avg": false, "current": false, "hideEmpty": false, "hideZero": false, "max": false, "min": false, "rightSide": false, "show": false, "sideWidth": null, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 2, "links": [], "nullPointMode": "connected", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": true, "targets": [ { "expr": "sum (rate (container_cpu_usage_seconds_total{namespace=\"$namespace\"}[10m])) by (namespace) * 1000\n/\nsum(avg_over_time(container_spec_cpu_shares{namespace=\"$namespace\"}[10m])) by (namespace) * 100", "format": "time_series", "hide": false, "instant": false, "interval": "10s", "intervalFactor": 1, "legendFormat": "cpu", "metric": "container_cpu", "refId": "A", "step": 10 } ], "thresholds": [], "timeFrom": "", "timeShift": null, "title": "Overall CPU Utilisation", "tooltip": { "msResolution": true, "shared": true, "sort": 2, "value_type": "cumulative" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "decimals": null, "format": "percent", "label": "", "logBase": 1, "max": "110", "min": "0", "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "Prometheus", "decimals": 2, "description": "This panel shows historical utilisation as an average across all pods in this namespace. It only accounts for currently deployed pods", "editable": true, "error": false, "fill": 0, "grid": {}, "gridPos": { "h": 6, "w": 12, "x": 12, "y": 18 }, "id": 92, "isNew": true, "legend": { "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, "rightSide": false, "show": false, "sideWidth": 200, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 2, "links": [], "nullPointMode": "connected", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": true, "targets": [ { "expr": "sum (container_memory_working_set_bytes{namespace=\"$namespace\"}) by (namespace)\n/\nsum(container_spec_memory_limit_bytes{namespace=\"$namespace\"}) by (namespace) * 100", "format": "time_series", "instant": false, "interval": "10s", "intervalFactor": 1, "legendFormat": "limit utilization", "metric": "container_memory_usage:sort_desc", "refId": "A", "step": 10 }, { "expr": "sum (container_memory_working_set_bytes{namespace=\"$namespace\"}) by (namespace)\n/\nsum(kube_pod_container_resource_requests_memory_bytes{namespace=\"$namespace\"}) by (namespace) * 100", "format": "time_series", "instant": false, "intervalFactor": 1, "legendFormat": "request utilization", "refId": "B" } ], "thresholds": [], "timeFrom": "", "timeShift": null, "title": "Overall RAM Utilisation", "tooltip": { "msResolution": false, "shared": true, "sort": 2, "value_type": "cumulative" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "decimals": null, "format": "percent", "label": null, "logBase": 1, "max": "110", "min": "0", "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "Prometheus", "decimals": 2, "description": "Traffic in and out of this namespace, as a sum of the pods within it", "editable": true, "error": false, "fill": 1, "grid": {}, "gridPos": { "h": 6, "w": 12, "x": 0, "y": 24 }, "height": "", "id": 96, "isNew": true, "legend": { "alignAsTable": false, "avg": true, "current": true, "hideEmpty": false, "hideZero": false, "max": false, "min": false, "rightSide": false, "show": true, "sideWidth": null, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 2, "links": [], "nullPointMode": "connected", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum (rate (container_network_receive_bytes_total{namespace=\"$namespace\"}[10m])) by (namespace)", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, "legendFormat": "<- in", "metric": "container_cpu", "refId": "A", "step": 10 }, { "expr": "- sum (rate (container_network_transmit_bytes_total{namespace=\"$namespace\"}[10m])) by (namespace)", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, "legendFormat": "-> out", "refId": "B" } ], "thresholds": [], "timeFrom": "", "timeShift": null, "title": "Network IO", "tooltip": { "msResolution": true, "shared": true, "sort": 2, "value_type": "cumulative" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "Bps", "label": "", "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "Prometheus", "decimals": 2, "description": "Disk reads and writes for the namespace, as a sum of the pods within it", "editable": true, "error": false, "fill": 1, "grid": {}, "gridPos": { "h": 6, "w": 12, "x": 12, "y": 24 }, "height": "", "id": 98, "isNew": true, "legend": { "alignAsTable": false, "avg": true, "current": true, "hideEmpty": false, "hideZero": false, "max": false, "min": false, "rightSide": false, "show": true, "sideWidth": null, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 2, "links": [], "nullPointMode": "connected", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum (rate (container_fs_writes_bytes_total{namespace=\"$namespace\"}[10m])) by (namespace)", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, "legendFormat": "<- write", "metric": "container_cpu", "refId": "A", "step": 10 }, { "expr": "- sum (rate (container_fs_reads_bytes_total{namespace=\"$namespace\"}[10m])) by (namespace)", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 1, "legendFormat": "-> read", "refId": "B" } ], "thresholds": [], "timeFrom": "", "timeShift": null, "title": "Disk IO", "tooltip": { "msResolution": true, "shared": true, "sort": 2, "value_type": "cumulative" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "Bps", "label": "", "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ], "yaxis": { "align": false, "alignLevel": null } } ], "refresh": "10s", "schemaVersion": 16, "style": "dark", "tags": [ "cost", "utilisation", "metrics" ], "templating": { "list": [ { "current": { "text": "23.06", "value": "23.06" }, "hide": 0, "label": "CPU", "name": "costcpu", "options": [ { "text": "23.06", "value": "23.06" } ], "query": "23.06", "skipUrlSync": false, "type": "constant" }, { "current": { "text": "7.28", "value": "7.28" }, "hide": 0, "label": "PE CPU", "name": "costpcpu", "options": [ { "text": "7.28", "value": "7.28" } ], "query": "7.28", "skipUrlSync": false, "type": "constant" }, { "current": { "text": "3.25", "value": "3.25" }, "hide": 0, "label": "RAM", "name": "costram", "options": [ { "text": "3.25", "value": "3.25" } ], "query": "3.25", "skipUrlSync": false, "type": "constant" }, { "current": { "text": "0.6862", "value": "0.6862" }, "hide": 0, "label": "PE RAM", "name": "costpram", "options": [ { "text": "0.6862", "value": "0.6862" } ], "query": "0.6862", "skipUrlSync": false, "type": "constant" }, { "current": { "text": "0.04", "value": "0.04" }, "hide": 0, "label": "Storage", "name": "costStorageStandard", "options": [ { "text": "0.04", "value": "0.04" } ], "query": "0.04", "skipUrlSync": false, "type": "constant" }, { "current": { "text": ".17", "value": ".17" }, "hide": 0, "label": "SSD", "name": "costStorageSSD", "options": [ { "text": ".17", "value": ".17" } ], "query": ".17", "skipUrlSync": false, "type": "constant" }, { "current": { "text": "30", "value": "30" }, "hide": 0, "label": "Disc.", "name": "costDiscount", "options": [ { "text": "30", "value": "30" } ], "query": "30", "skipUrlSync": false, "type": "constant" }, { "allValue": null, "current": { "text": "default", "value": "default" }, "datasource": "Prometheus", "hide": 0, "includeAll": false, "label": "NS", "multi": false, "name": "namespace", "options": [], "query": "query_result(sum(container_memory_working_set_bytes{namespace!=\"\"}) by (namespace))", "refresh": 1, "regex": "/namespace=\\\"(.*?)(\\\")/", "skipUrlSync": false, "sort": 0, "tagValuesQuery": "", "tags": [], "tagsQuery": "", "type": "query", "useTags": false } ] }, "time": { "from": "now-15m", "to": "now" }, "timepicker": { "hidden": false, "refresh_intervals": [ "5s", "10s", "30s", "1m", "5m", "15m", "30m", "1h", "2h", "1d" ], "time_options": [ "5m", "15m", "1h", "6h", "12h", "24h", "2d", "7d", "30d" ] }, "timezone": "browser", "title": "Namespace cost & utilization metrics", "uid": "at-cost-analysis-namespace", "version": 1 }' + cluster-analysis: + json: '{ "annotations": { "list": [ { "builtIn": 1, "datasource": "-- Grafana --", "enable": true, "hide": true, "iconColor": "rgba(0, 211, 255, 1)", "name": "Annotations & Alerts", "type": "dashboard" } ] }, "description": "A dashboard to help manage Kubernetes cluster costs and resources", "editable": true, "gnetId": 6873, "graphTooltip": 0, "id": 1, "iteration": 1542347911715, "links": [], "panels": [ { "content": "This dashboard shows monthly cost estimates for the cluster, based on **current** CPU, RAM and storage provisioned.", "gridPos": { "h": 2, "w": 24, "x": 0, "y": 0 }, "id": 86, "links": [], "mode": "markdown", "title": "", "transparent": true, "type": "text" }, { "cacheTimeout": null, "colorBackground": false, "colorValue": false, "colors": [ "#299c46", "rgba(237, 129, 40, 0.89)", "#d44a3a" ], "datasource": "Prometheus", "decimals": 2, "format": "currencyUSD", "gauge": { "maxValue": 100, "minValue": 0, "show": false, "thresholdLabels": false, "thresholdMarkers": true }, "gridPos": { "h": 4, "w": 6, "x": 0, "y": 2 }, "hideTimeOverride": true, "id": 75, "interval": null, "links": [], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "tableColumn": "label_cloud_google_com_gke_preemptible", "targets": [ { "expr": "sum(\n (\n (\n sum(kube_node_status_capacity_cpu_cores) by (node)\n * on (node) group_left (label_cloud_google_com_gke_preemptible)\n kube_node_labels{label_cloud_google_com_gke_preemptible=\"true\"}\n ) * $costpcpu\n )\n or\n (\n (\n sum(kube_node_status_capacity_cpu_cores) by (node)\n * on (node) group_left (label_cloud_google_com_gke_preemptible)\n kube_node_labels{label_cloud_google_com_gke_preemptible!=\"true\"}\n ) * ($costcpu - ($costcpu / 100 * $costDiscount))\n )\n) ", "format": "time_series", "instant": true, "interval": "", "intervalFactor": 1, "legendFormat": " {{ node }}", "refId": "A" } ], "thresholds": "", "timeFrom": "15m", "timeShift": null, "title": "CPU Cost", "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "current" }, { "cacheTimeout": null, "colorBackground": false, "colorValue": false, "colors": [ "#299c46", "rgba(237, 129, 40, 0.89)", "#d44a3a" ], "datasource": "Prometheus", "decimals": 2, "format": "currencyUSD", "gauge": { "maxValue": 100, "minValue": 0, "show": false, "thresholdLabels": false, "thresholdMarkers": true }, "gridPos": { "h": 4, "w": 6, "x": 6, "y": 2 }, "hideTimeOverride": true, "id": 77, "interval": null, "links": [], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "tableColumn": "label_cloud_google_com_gke_preemptible", "targets": [ { "expr": "sum(\n (\n (\n sum(kube_node_status_capacity_memory_bytes) by (node)\n * on (node) group_left (label_cloud_google_com_gke_preemptible)\n kube_node_labels{label_cloud_google_com_gke_preemptible=\"true\"}\n ) /1024/1024/1024 * $costpram\n )\n or\n (\n (\n sum(kube_node_status_capacity_memory_bytes) by (node)\n * on (node) group_left (label_cloud_google_com_gke_preemptible)\n kube_node_labels{label_cloud_google_com_gke_preemptible!=\"true\"}\n ) /1024/1024/1024 * ($costram - ($costram / 100 * $costDiscount))\n)\n) ", "format": "time_series", "instant": true, "interval": "", "intervalFactor": 1, "legendFormat": " {{ node }}", "refId": "A" } ], "thresholds": "", "timeFrom": "15m", "timeShift": null, "title": "RAM Cost", "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "current" }, { "cacheTimeout": null, "colorBackground": false, "colorValue": false, "colors": [ "#299c46", "rgba(237, 129, 40, 0.89)", "#d44a3a" ], "datasource": "Prometheus", "decimals": 2, "format": "currencyUSD", "gauge": { "maxValue": 100, "minValue": 0, "show": false, "thresholdLabels": false, "thresholdMarkers": true }, "gridPos": { "h": 4, "w": 6, "x": 12, "y": 2 }, "hideTimeOverride": true, "id": 78, "interval": null, "links": [], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "tableColumn": "label_cloud_google_com_gke_preemptible", "targets": [ { "expr": "sum (\n sum(kube_persistentvolumeclaim_info{storageclass=~\".*ssd.*\"}) by (persistentvolumeclaim, namespace, storageclass)\n + on (persistentvolumeclaim, namespace) group_right(storageclass)\n sum(kube_persistentvolumeclaim_resource_requests_storage_bytes) by (persistentvolumeclaim, namespace) or up * 0\n) / 1024 / 1024 /1024 * $costStorageSSD\n\n+\n\nsum (\n sum(kube_persistentvolumeclaim_info{storageclass!~\".*ssd.*\"}) by (persistentvolumeclaim, namespace, storageclass)\n + on (persistentvolumeclaim, namespace) group_right(storageclass)\n sum(kube_persistentvolumeclaim_resource_requests_storage_bytes) by (persistentvolumeclaim, namespace) or up * 0\n) / 1024 / 1024 /1024 * $costStorageStandard\n\n+ \n\nsum(container_fs_limit_bytes{id=\"/\"}) / 1024 / 1024 / 1024 * 1.03 * $costStorageStandard", "format": "time_series", "instant": true, "interval": "", "intervalFactor": 1, "legendFormat": " {{ node }}", "refId": "A" } ], "thresholds": "", "timeFrom": "15m", "timeShift": null, "title": "Storage Cost (Cluster and PVC)", "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "current" }, { "cacheTimeout": null, "colorBackground": false, "colorValue": false, "colors": [ "#299c46", "rgba(237, 129, 40, 0.89)", "#d44a3a" ], "datasource": "Prometheus", "decimals": 2, "description": "Represents a near worst-case approximation of network costs.", "format": "currencyUSD", "gauge": { "maxValue": 100, "minValue": 0, "show": false, "thresholdLabels": false, "thresholdMarkers": true }, "gridPos": { "h": 4, "w": 6, "x": 18, "y": 2 }, "hideTimeOverride": true, "id": 129, "interval": null, "links": [], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "tableColumn": "label_cloud_google_com_gke_preemptible", "targets": [ { "expr": "SUM(rate(node_network_transmit_bytes_total{device=\"eth0\"}[60m]) / 1024 / 1024 / 1024 ) * (60 * 60 * 24 * 30) * $costEgress", "format": "time_series", "instant": true, "interval": "", "intervalFactor": 1, "legendFormat": " {{ node }}", "refId": "A" } ], "thresholds": "", "timeFrom": "15m", "timeShift": null, "title": "Network Egress Cost", "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "current" }, { "cacheTimeout": null, "colorBackground": false, "colorValue": true, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(50, 172, 45, 0.97)", "#c15c17" ], "datasource": "Prometheus", "decimals": 2, "description": "Current CPU use from applications divided by allocatable CPUs", "editable": true, "error": false, "format": "percent", "gauge": { "maxValue": 100, "minValue": 0, "show": true, "thresholdLabels": false, "thresholdMarkers": true }, "gridPos": { "h": 4, "w": 3, "x": 0, "y": 6 }, "height": "180px", "hideTimeOverride": true, "id": 82, "interval": null, "isNew": true, "links": [], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "tableColumn": "", "targets": [ { "expr": "(\n sum(\n count(irate(container_cpu_usage_seconds_total{id=\"/\"}[10m])) by (instance)\n * on (instance) \n sum(irate(container_cpu_usage_seconds_total{id=\"/\"}[10m])) by (instance)\n ) \n / \n (sum (kube_node_status_allocatable_cpu_cores))\n) * 100", "format": "time_series", "interval": "", "intervalFactor": 1, "refId": "A", "step": 10 } ], "thresholds": "30, 80", "timeFrom": "", "title": "CPU Utilization", "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "current" }, { "cacheTimeout": null, "colorBackground": false, "colorValue": true, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(50, 172, 45, 0.97)", "#c15c17" ], "datasource": "Prometheus", "decimals": 2, "description": "Current CPU reservation requests from applications vs allocatable CPU", "editable": true, "error": false, "format": "percent", "gauge": { "maxValue": 100, "minValue": 0, "show": true, "thresholdLabels": false, "thresholdMarkers": true }, "gridPos": { "h": 4, "w": 3, "x": 3, "y": 6 }, "height": "180px", "id": 91, "interval": null, "isNew": true, "links": [], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "tableColumn": "", "targets": [ { "expr": "SUM(kube_pod_container_resource_requests_cpu_cores) / SUM(kube_node_status_allocatable_cpu_cores) * 100", "format": "time_series", "interval": "", "intervalFactor": 1, "refId": "A", "step": 10 } ], "thresholds": "30, 80", "title": "CPU Requests", "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "current" }, { "cacheTimeout": null, "colorBackground": false, "colorValue": true, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(50, 172, 45, 0.97)", "#c15c17" ], "datasource": "Prometheus", "description": "Current RAM use vs RAM available", "editable": true, "error": false, "format": "percent", "gauge": { "maxValue": 100, "minValue": 0, "show": true, "thresholdLabels": false, "thresholdMarkers": true }, "gridPos": { "h": 4, "w": 3, "x": 6, "y": 6 }, "height": "180px", "hideTimeOverride": true, "id": 80, "interval": null, "isNew": true, "links": [], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "tableColumn": "", "targets": [ { "expr": "SUM(container_memory_usage_bytes{namespace!=\"\"}) / SUM(kube_node_status_allocatable_memory_bytes) * 100", "format": "time_series", "interval": "", "intervalFactor": 1, "refId": "A", "step": 10 }, { "expr": "", "format": "time_series", "intervalFactor": 1, "refId": "B" } ], "thresholds": "30,80", "timeFrom": "", "title": "RAM Utilization", "transparent": false, "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "current" }, { "cacheTimeout": null, "colorBackground": false, "colorValue": true, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(50, 172, 45, 0.97)", "#c15c17" ], "datasource": "Prometheus", "description": "Current RAM requests vs RAM available", "editable": true, "error": false, "format": "percent", "gauge": { "maxValue": 100, "minValue": 0, "show": true, "thresholdLabels": false, "thresholdMarkers": true }, "gridPos": { "h": 4, "w": 3, "x": 9, "y": 6 }, "height": "180px", "id": 92, "interval": null, "isNew": true, "links": [], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "tableColumn": "", "targets": [ { "expr": "(\n sum(kube_pod_container_resource_requests_memory_bytes{namespace!=\"\"})\n /\n sum(kube_node_status_allocatable_memory_bytes)\n) * 100", "format": "time_series", "interval": "", "intervalFactor": 1, "refId": "A", "step": 10 } ], "thresholds": "30,80", "title": "RAM Requests", "transparent": false, "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "current" }, { "cacheTimeout": null, "colorBackground": false, "colorValue": true, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(50, 172, 45, 0.97)", "#c15c17" ], "datasource": "Prometheus", "decimals": 2, "description": "This gauge shows the current standard storage use, including cluster storage, vs storage available", "editable": true, "error": false, "format": "percent", "gauge": { "maxValue": 100, "minValue": 0, "show": true, "thresholdLabels": false, "thresholdMarkers": true }, "gridPos": { "h": 4, "w": 3, "x": 12, "y": 6 }, "height": "180px", "hideTimeOverride": true, "id": 95, "interval": null, "isNew": true, "links": [], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "tableColumn": "", "targets": [ { "expr": "sum (\n sum(kube_persistentvolumeclaim_info{storageclass!~\".*ssd.*\"}) by (persistentvolumeclaim, namespace, storageclass)\n + on (persistentvolumeclaim, namespace) group_right(storageclass)\n sum(kubelet_volume_stats_used_bytes) by (persistentvolumeclaim, namespace) or up * 0\n + sum(container_fs_usage_bytes{device=~\"^/dev/[sv]d[a-z][1-9]$\",id=\"/\"})\n) /\nsum (\n sum(kube_persistentvolumeclaim_info{storageclass!~\".*ssd.*\"}) by (persistentvolumeclaim, namespace, storageclass)\n + on (persistentvolumeclaim, namespace) group_right(storageclass)\n sum(kube_persistentvolumeclaim_resource_requests_storage_bytes) by (persistentvolumeclaim, namespace) or up * 0\n + sum(container_fs_limit_bytes{device=~\"^/dev/[sv]d[a-z][1-9]$\",id=\"/\"})\n) * 100", "format": "time_series", "interval": "", "intervalFactor": 1, "refId": "A", "step": 10 } ], "thresholds": "30, 80", "timeFrom": "", "title": "Storage Utilization", "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "current" }, { "cacheTimeout": null, "colorBackground": false, "colorValue": true, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(50, 172, 45, 0.97)", "#c15c17" ], "datasource": "Prometheus", "decimals": 2, "description": "This gauge shows the current SSD use vs SSD available", "editable": true, "error": false, "format": "percent", "gauge": { "maxValue": 100, "minValue": 0, "show": true, "thresholdLabels": false, "thresholdMarkers": true }, "gridPos": { "h": 4, "w": 3, "x": 15, "y": 6 }, "height": "180px", "hideTimeOverride": true, "id": 96, "interval": null, "isNew": true, "links": [], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "tableColumn": "", "targets": [ { "expr": "sum (\n sum(kube_persistentvolumeclaim_info{storageclass=~\".*ssd.*\"}) by (persistentvolumeclaim, namespace, storageclass)\n + on (persistentvolumeclaim, namespace) group_right(storageclass)\n sum(kubelet_volume_stats_used_bytes) by (persistentvolumeclaim, namespace)\n) /\nsum (\n sum(kube_persistentvolumeclaim_info{storageclass=~\".*ssd.*\"}) by (persistentvolumeclaim, namespace, storageclass)\n + on (persistentvolumeclaim, namespace) group_right(storageclass)\n sum(kube_persistentvolumeclaim_resource_requests_storage_bytes) by (persistentvolumeclaim, namespace)\n) * 100", "format": "time_series", "interval": "", "intervalFactor": 1, "refId": "A", "step": 10 } ], "thresholds": "30, 80", "timeFrom": "", "title": "SSD Utilization", "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "current" }, { "cacheTimeout": null, "colorBackground": false, "colorValue": false, "colors": [ "#299c46", "rgba(237, 129, 40, 0.89)", "#d44a3a" ], "datasource": "Prometheus", "decimals": 2, "description": "Expected monthly cost given current CPU, memory storage, and network resource consumption", "format": "currencyUSD", "gauge": { "maxValue": 100, "minValue": 0, "show": false, "thresholdLabels": false, "thresholdMarkers": true }, "gridPos": { "h": 4, "w": 6, "x": 18, "y": 6 }, "hideTimeOverride": true, "id": 93, "interval": null, "links": [], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "tableColumn": "label_cloud_google_com_gke_preemptible", "targets": [ { "expr": "# CPU\nsum(\n (\n (\n sum(kube_node_status_capacity_cpu_cores) by (node)\n * on (node) group_left (label_cloud_google_com_gke_preemptible)\n kube_node_labels{label_cloud_google_com_gke_preemptible=\"true\"}\n ) * $costpcpu\n )\n or\n (\n (\n sum(kube_node_status_capacity_cpu_cores) by (node)\n * on (node) group_left (label_cloud_google_com_gke_preemptible)\n kube_node_labels{label_cloud_google_com_gke_preemptible!=\"true\"}\n ) * ($costcpu - ($costcpu / 100 * $costDiscount))\n )\n) \n\n+ \n\n# Storage\nsum (\n sum(kube_persistentvolumeclaim_info{storageclass=~\".*ssd.*\"}) by (persistentvolumeclaim, namespace, storageclass)\n + on (persistentvolumeclaim, namespace) group_right(storageclass)\n sum(kube_persistentvolumeclaim_resource_requests_storage_bytes) by (persistentvolumeclaim, namespace) or up * 0\n) / 1024 / 1024 /1024 * $costStorageSSD\n\n+\n\nsum (\n sum(kube_persistentvolumeclaim_info{storageclass!~\".*ssd.*\"}) by (persistentvolumeclaim, namespace, storageclass)\n + on (persistentvolumeclaim, namespace) group_right(storageclass)\n sum(kube_persistentvolumeclaim_resource_requests_storage_bytes) by (persistentvolumeclaim, namespace) or up * 0\n) / 1024 / 1024 /1024 * $costStorageStandard\n\n+ \n\nsum(container_fs_limit_bytes{id=\"/\"}) / 1024 / 1024 / 1024 * 1.03 * $costStorageStandard \n\n+\n\n# END STORAGE\n# RAM \nsum(\n (\n (\n sum(kube_node_status_capacity_memory_bytes) by (node)\n * on (node) group_left (label_cloud_google_com_gke_preemptible)\n kube_node_labels{label_cloud_google_com_gke_preemptible=\"true\"}\n ) /1024/1024/1024 * $costpram\n )\n or\n (\n (\n sum(kube_node_status_capacity_memory_bytes) by (node)\n * on (node) group_left (label_cloud_google_com_gke_preemptible)\n kube_node_labels{label_cloud_google_com_gke_preemptible!=\"true\"}\n ) /1024/1024/1024 * ($costram - ($costram / 100 * $costDiscount))\n)\n)\n\n+\n\n#Network \nSUM(rate(node_network_transmit_bytes_total{device=\"eth0\"}[60m]) / 1024 / 1024 / 1024 ) * (60 * 60 * 24 * 30) * $costEgress", "format": "time_series", "instant": true, "interval": "", "intervalFactor": 1, "legendFormat": " {{ node }}", "refId": "A" } ], "thresholds": "", "timeFrom": "15m", "timeShift": null, "title": "Total Monthly Cost", "type": "singlestat", "valueFontSize": "120%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "current" }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "Prometheus", "description": "Expected monthly CPU, memory and storage costs given provisioned resources", "fill": 1, "gridPos": { "h": 8, "w": 12, "x": 0, "y": 10 }, "id": 120, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": false, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "# CPU\nsum(\n (\n (\n sum(kube_node_status_capacity_cpu_cores) by (node)\n * on (node) group_left (label_cloud_google_com_gke_preemptible)\n kube_node_labels{label_cloud_google_com_gke_preemptible=\"true\"}\n ) * $costpcpu\n )\n or\n (\n (\n sum(kube_node_status_capacity_cpu_cores) by (node)\n * on (node) group_left (label_cloud_google_com_gke_preemptible)\n kube_node_labels{label_cloud_google_com_gke_preemptible!=\"true\"}\n ) * ($costcpu - ($costcpu / 100 * $costDiscount))\n )\n) \n\n+ \n\n# Storage\nsum (\n sum(kube_persistentvolumeclaim_info{storageclass=~\".*ssd.*\"}) by (persistentvolumeclaim, namespace, storageclass)\n + on (persistentvolumeclaim, namespace) group_right(storageclass)\n sum(kube_persistentvolumeclaim_resource_requests_storage_bytes) by (persistentvolumeclaim, namespace) or up * 0\n) / 1024 / 1024 /1024 * $costStorageSSD\n\n+\n\nsum (\n sum(kube_persistentvolumeclaim_info{storageclass!~\".*ssd.*\"}) by (persistentvolumeclaim, namespace, storageclass)\n + on (persistentvolumeclaim, namespace) group_right(storageclass)\n sum(kube_persistentvolumeclaim_resource_requests_storage_bytes) by (persistentvolumeclaim, namespace) or up * 0\n) / 1024 / 1024 /1024 * $costStorageStandard\n\n+ \n\nsum(container_fs_limit_bytes{id=\"/\"}) / 1024 / 1024 / 1024 * 1.03 * $costStorageStandard \n\n+\n\n# END STORAGE\n# RAM \nsum(\n (\n (\n sum(kube_node_status_capacity_memory_bytes) by (node)\n * on (node) group_left (label_cloud_google_com_gke_preemptible)\n kube_node_labels{label_cloud_google_com_gke_preemptible=\"true\"}\n ) /1024/1024/1024 * $costpram\n )\n or\n (\n (\n sum(kube_node_status_capacity_memory_bytes) by (node)\n * on (node) group_left (label_cloud_google_com_gke_preemptible)\n kube_node_labels{label_cloud_google_com_gke_preemptible!=\"true\"}\n ) /1024/1024/1024 * ($costram - ($costram / 100 * $costDiscount))\n)\n) \n\n+\n\n#Network \nSUM(rate(node_network_transmit_bytes_total{device=\"eth0\"}[60m]) / 1024 / 1024 / 1024 ) * (60 * 60 * 24 * 30) * $costEgress", "format": "time_series", "intervalFactor": 1, "legendFormat": "cluster cost", "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeShift": null, "title": "Total monthly cost", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "currencyUSD", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "columns": [ { "text": "Avg", "value": "avg" } ], "datasource": "Prometheus", "description": "Resources allocated to namespace based on container requests", "fontSize": "100%", "gridPos": { "h": 8, "w": 12, "x": 12, "y": 10 }, "hideTimeOverride": false, "id": 73, "links": [], "pageSize": 10, "repeat": null, "repeatDirection": "v", "scroll": true, "showHeader": true, "sort": { "col": 7, "desc": true }, "styles": [ { "alias": "Namespace", "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(50, 172, 45, 0.97)", "#c15c17" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": true, "linkTooltip": "View namespace cost metrics", "linkUrl": "d/at-cost-analysis-namespace/namespace-cost-metrics?&var-namespace=$__cell", "pattern": "namespace", "thresholds": [ "30", "80" ], "type": "string", "unit": "currencyUSD" }, { "alias": "RAM", "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "pattern": "Value #B", "thresholds": [], "type": "number", "unit": "currencyUSD" }, { "alias": "CPU", "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "mappingType": 1, "pattern": "Value #A", "thresholds": [], "type": "number", "unit": "currencyUSD" }, { "alias": "", "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "mappingType": 1, "pattern": "Time", "thresholds": [], "type": "hidden", "unit": "short" }, { "alias": "PV Storage", "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "mappingType": 1, "pattern": "Value #C", "thresholds": [], "type": "number", "unit": "currencyUSD" }, { "alias": "Total", "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "mappingType": 1, "pattern": "Value #D", "thresholds": [], "type": "number", "unit": "currencyUSD" }, { "alias": "CPU Utilisation", "colorMode": "value", "colors": [ "#bf1b00", "rgba(50, 172, 45, 0.97)", "#ef843c" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "mappingType": 1, "pattern": "Value #E", "thresholds": [ "30", "80" ], "type": "number", "unit": "percent" }, { "alias": "RAM Utilisation", "colorMode": "value", "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(50, 172, 45, 0.97)", "#ef843c" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "mappingType": 1, "pattern": "Value #F", "thresholds": [ "30", "80" ], "type": "number", "unit": "percent" } ], "targets": [ { "expr": "(\n sum(kube_pod_container_resource_requests_cpu_cores{namespace!=\"\",namespace!=\"kube-system\",cloud_google_com_gke_preemptible!=\"true\"}*($costcpu - ($costcpu / 100 * $costDiscount))) by(namespace)\n or\n count(\n count(container_spec_cpu_shares{namespace!=\"\",namespace!=\"kube-system\"}) by(namespace)\n ) by(namespace) -1\n)\n\n+\n\n(\n sum(kube_pod_container_resource_requests_cpu_cores{namespace!=\"\",namespace!=\"kube-system\",cloud_google_com_gke_preemptible=\"true\"}*$costpcpu) by(namespace)\n or\n count(\n count(container_spec_cpu_shares{namespace!=\"\",namespace!=\"kube-system\"}) by(namespace)\n ) by(namespace) -1\n)", "format": "table", "hide": false, "instant": true, "interval": "", "intervalFactor": 1, "legendFormat": "{{ namespace }}", "refId": "A" }, { "expr": "(\n sum(kube_pod_container_resource_requests_memory_bytes{namespace!=\"\",namespace!=\"kube-system\",cloud_google_com_gke_preemptible!=\"true\"} / 1024 / 1024 / 1024*($costram- ($costram / 100 * $costDiscount))) by (namespace) \n or\n count(\n count(container_spec_memory_limit_bytes{namespace!=\"\",namespace!=\"kube-system\"}) by(namespace)\n ) by(namespace) -1\n)\n\n+\n\n(\n sum(kube_pod_container_resource_requests_memory_bytes{namespace!=\"\",namespace!=\"kube-system\",cloud_google_com_gke_preemptible=\"true\"} / 1024 / 1024 / 1024 * $costpram ) by (namespace) \n or\n count(\n count(container_spec_memory_limit_bytes{namespace!=\"\",namespace!=\"kube-system\"}) by(namespace)\n ) by(namespace) -1\n)", "format": "table", "instant": true, "intervalFactor": 1, "legendFormat": "{{ namespace }}", "refId": "B" }, { "expr": "sum (\n sum(kube_persistentvolumeclaim_info{storageclass=~\".*ssd.*\"}) by (persistentvolumeclaim, namespace, storageclass)\n + on (persistentvolumeclaim, namespace) group_right(storageclass)\n sum(kube_persistentvolumeclaim_resource_requests_storage_bytes) by (persistentvolumeclaim, namespace) \n) by (namespace) / 1024 / 1024 /1024 * $costStorageSSD \n\nor\n\nsum (\n sum(kube_persistentvolumeclaim_info{storageclass!~\".*ssd.*\"}) by (persistentvolumeclaim, namespace, storageclass)\n + on (persistentvolumeclaim, namespace) group_right(storageclass)\n sum(kube_persistentvolumeclaim_resource_requests_storage_bytes) by (persistentvolumeclaim, namespace) \n) by (namespace) / 1024 / 1024 /1024 * $costStorageStandard", "format": "table", "instant": true, "intervalFactor": 1, "legendFormat": "{{ namespace }}", "refId": "C" }, { "expr": "# CPU \n(\n sum(kube_pod_container_resource_requests_cpu_cores{namespace!=\"\",namespace!=\"kube-system\",cloud_google_com_gke_preemptible!=\"true\"}*($costcpu - ($costcpu / 100 * $costDiscount))) by(namespace)\n or\n count(\n count(container_spec_cpu_shares{namespace!=\"\",namespace!=\"kube-system\"}) by(namespace)\n ) by(namespace) -1\n)\n\n+\n\n(\n sum(kube_pod_container_resource_requests_cpu_cores{namespace!=\"\",namespace!=\"kube-system\",cloud_google_com_gke_preemptible=\"true\"}*$costpcpu) by(namespace)\n or\n count(\n count(container_spec_cpu_shares{namespace!=\"\",namespace!=\"kube-system\"}) by(namespace)\n ) by(namespace) -1\n)\n\n+\n\n#END CPU \n# Memory \n\n(\n sum(kube_pod_container_resource_requests_memory_bytes{namespace!=\"\",namespace!=\"kube-system\",cloud_google_com_gke_preemptible!=\"true\"} / 1024 / 1024 / 1024*($costram- ($costram / 100 * $costDiscount))) by (namespace) \n or\n count(\n count(container_spec_memory_limit_bytes{namespace!=\"\",namespace!=\"kube-system\"}) by(namespace)\n ) by(namespace) -1\n)\n\n+\n\n(\n sum(kube_pod_container_resource_requests_memory_bytes{namespace!=\"\",namespace!=\"kube-system\",cloud_google_com_gke_preemptible=\"true\"} / 1024 / 1024 / 1024 * $costpram ) by (namespace) \n or\n count(\n count(container_spec_memory_limit_bytes{namespace!=\"\",namespace!=\"kube-system\"}) by(namespace)\n ) by(namespace) -1\n)\n\n+\n\n# PV storage\n\n(\nsum (\n sum(kube_persistentvolumeclaim_info{storageclass=~\".*ssd.*\"}) by (persistentvolumeclaim, namespace, storageclass)\n + on (persistentvolumeclaim, namespace) group_right(storageclass)\n sum(kube_persistentvolumeclaim_resource_requests_storage_bytes) by (persistentvolumeclaim, namespace) \n) by (namespace) / 1024 / 1024 /1024 * $costStorageSSD \n\nor\n\nsum (\n sum(kube_persistentvolumeclaim_info{storageclass!~\".*ssd.*\"}) by (persistentvolumeclaim, namespace, storageclass)\n + on (persistentvolumeclaim, namespace) group_right(storageclass)\n sum(kube_persistentvolumeclaim_resource_requests_storage_bytes) by (persistentvolumeclaim, namespace) \n) by (namespace) / 1024 / 1024 /1024 * $costStorageStandard \n)", "format": "table", "instant": true, "intervalFactor": 1, "legendFormat": "Total", "refId": "D" } ], "timeFrom": "", "timeShift": null, "title": "Namespace cost allocation", "transform": "table", "transparent": false, "type": "table" }, { "collapsed": false, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 18 }, "id": 108, "panels": [], "title": "CPU Metrics", "type": "row" }, { "columns": [ { "text": "Avg", "value": "avg" } ], "datasource": "Prometheus", "description": "This table shows the comparison of CPU requests and usage by namespace", "fontSize": "100%", "gridPos": { "h": 10, "w": 12, "x": 0, "y": 19 }, "hideTimeOverride": true, "id": 104, "links": [], "pageSize": 8, "repeatDirection": "v", "scroll": true, "showHeader": true, "sort": { "col": 1, "desc": true }, "styles": [ { "alias": "CPU Requests", "colorMode": null, "colors": [ "#fceaca", "#fce2de", "rgba(245, 54, 54, 0.9)" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "mappingType": 1, "pattern": "Value #A", "thresholds": [ "" ], "type": "number", "unit": "short" }, { "alias": "Node", "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "mappingType": 1, "pattern": "node", "thresholds": [], "type": "string", "unit": "short" }, { "alias": "", "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "mappingType": 1, "pattern": "Time", "thresholds": [], "type": "hidden", "unit": "short" }, { "alias": "CPU Requests", "colorMode": "value", "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(50, 172, 45, 0.97)", "#cffaff" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "mappingType": 1, "pattern": "Value #B", "thresholds": [ "" ], "type": "number", "unit": "short" }, { "alias": "Request Utilization", "colorMode": "value", "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "mappingType": 1, "pattern": "Value #C", "thresholds": [ "30" ], "type": "number", "unit": "percentunit" }, { "alias": "", "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": true, "linkTooltip": "View namespace cost metrics", "linkUrl": "d/at-cost-analysis-namespace/namespace-cost-metrics?&var-namespace=$__cell", "mappingType": 1, "pattern": "namespace", "thresholds": [], "type": "number", "unit": "short" } ], "targets": [ { "expr": "sum(kube_pod_container_resource_requests_cpu_cores) by (namespace) ", "format": "table", "hide": false, "instant": true, "interval": "", "intervalFactor": 1, "legendFormat": "", "refId": "A" }, { "expr": "sum (rate (container_cpu_usage_seconds_total{image!=\"\"}[10m])) by (namespace)", "format": "table", "instant": true, "intervalFactor": 1, "legendFormat": "{{ namespace }}", "refId": "C" } ], "timeFrom": null, "timeShift": null, "title": "CPU request utilization by namespace", "transform": "table", "transparent": false, "type": "table" }, { "columns": [ { "text": "Avg", "value": "avg" } ], "datasource": "Prometheus", "description": "This table shows the comparison of application CPU usage vs the capacity of the node (measured over last 60 minutes)", "fontSize": "100%", "gridPos": { "h": 10, "w": 12, "x": 12, "y": 19 }, "hideTimeOverride": true, "id": 90, "links": [], "pageSize": 8, "repeatDirection": "v", "scroll": true, "showHeader": true, "sort": { "col": 2, "desc": true }, "styles": [ { "alias": "CPU Costs", "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(50, 172, 45, 0.97)", "#ef843c" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "mappingType": 1, "pattern": "Value #A", "thresholds": [ "30", " 80" ], "type": "number", "unit": "currencyUSD" }, { "alias": "Node", "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "mappingType": 1, "pattern": "node", "thresholds": [], "type": "string", "unit": "short" }, { "alias": "", "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "mappingType": 1, "pattern": "Time", "thresholds": [], "type": "hidden", "unit": "short" }, { "alias": "CPU Utilization", "colorMode": "value", "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(50, 172, 45, 0.97)", "#ef843c" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "mappingType": 1, "pattern": "Value #B", "thresholds": [ "30", " 80" ], "type": "number", "unit": "percent" }, { "alias": "Usage", "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "mappingType": 1, "pattern": "Value", "thresholds": [], "type": "number", "unit": "percentunit" } ], "targets": [ { "expr": "sum(\n (\n (\n sum(kube_node_status_capacity_cpu_cores) by (node)\n * on (node) group_left (label_cloud_google_com_gke_preemptible)\n kube_node_labels{label_cloud_google_com_gke_preemptible=\"true\"}\n ) * $costpcpu\n )\n or\n (\n (\n sum(kube_node_status_capacity_cpu_cores) by (node)\n * on (node) group_left (label_cloud_google_com_gke_preemptible)\n kube_node_labels{label_cloud_google_com_gke_preemptible!=\"true\"}\n ) * ($costcpu - ($costcpu / 100 * $costDiscount))\n )\n) by (node)", "format": "table", "instant": true, "intervalFactor": 1, "refId": "A" }, { "expr": "SUM(\nSUM(rate(container_cpu_usage_seconds_total[60m])) by (pod_name)\n* on (pod_name) group_left (node) \nlabel_replace(\n kube_pod_info{},\n \"pod_name\", \n \"$1\", \n \"pod\", \n \"(.+)\"\n)\n) by (node) \n/ \nsum(kube_node_status_allocatable_cpu_cores) by (node)", "format": "table", "instant": true, "intervalFactor": 1, "refId": "B" } ], "timeFrom": null, "timeShift": null, "title": "Cluster cost & utilization by node", "transform": "table", "transparent": false, "type": "table" }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "Prometheus", "fill": 1, "gridPos": { "h": 8, "w": 24, "x": 0, "y": 29 }, "id": 116, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": false, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "SUM(kube_node_status_capacity_cpu_cores)", "format": "time_series", "intervalFactor": 1, "legendFormat": "capacity", "refId": "A" }, { "expr": "SUM(kube_pod_container_resource_requests_cpu_cores)", "format": "time_series", "intervalFactor": 1, "legendFormat": "requests", "refId": "C" }, { "expr": "SUM(rate(container_cpu_usage_seconds_total{id=\"/\"}[5m]))", "format": "time_series", "intervalFactor": 1, "legendFormat": "usage", "refId": "B" } ], "thresholds": [], "timeFrom": null, "timeShift": null, "title": "Cluster CPUs", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "collapsed": false, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 37 }, "id": 113, "panels": [], "title": "Memory Metrics", "type": "row" }, { "columns": [ { "text": "Avg", "value": "avg" } ], "datasource": "Prometheus", "description": "Comparison of memory requests and current usage by namespace", "fontSize": "100%", "gridPos": { "h": 10, "w": 12, "x": 0, "y": 38 }, "hideTimeOverride": true, "id": 109, "links": [], "pageSize": 7, "repeatDirection": "v", "scroll": true, "showHeader": true, "sort": { "col": 1, "desc": true }, "styles": [ { "alias": "Mem Requests (GB)", "colorMode": null, "colors": [ "#fceaca", "#fce2de", "rgba(245, 54, 54, 0.9)" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "mappingType": 1, "pattern": "Value #A", "thresholds": [ "" ], "type": "number", "unit": "short" }, { "alias": "Node", "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "mappingType": 1, "pattern": "node", "thresholds": [], "type": "string", "unit": "short" }, { "alias": "", "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "mappingType": 1, "pattern": "Time", "thresholds": [], "type": "hidden", "unit": "short" }, { "alias": "CPU Requests", "colorMode": "value", "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(50, 172, 45, 0.97)", "#cffaff" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "mappingType": 1, "pattern": "Value #B", "thresholds": [ "" ], "type": "number", "unit": "short" }, { "alias": "Request Utilization", "colorMode": "value", "colors": [ "rgba(245, 54, 54, 0.9)", "#508642", "#e5ac0e" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "mappingType": 1, "pattern": "Value #C", "thresholds": [ ".30", ".75" ], "type": "number", "unit": "percentunit" }, { "alias": "Namespace", "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": true, "linkTooltip": "View namespace cost metrics", "linkUrl": "d/at-cost-analysis-namespace/namespace-cost-metrics?&var-namespace=$__cell", "mappingType": 1, "pattern": "namespace", "thresholds": [], "type": "number", "unit": "short" } ], "targets": [ { "expr": "sum(kube_pod_container_resource_requests_memory_bytes / 1024 / 1024 / 1024) by (namespace) ", "format": "table", "hide": false, "instant": true, "interval": "", "intervalFactor": 1, "legendFormat": "", "refId": "A" }, { "expr": "SUM(container_memory_usage_bytes{image!=\"\"}) by (namespace) / sum(kube_pod_container_resource_requests_memory_bytes) by (namespace) ", "format": "table", "instant": true, "intervalFactor": 1, "legendFormat": "", "refId": "C" } ], "timeFrom": null, "timeShift": null, "title": "Memory requests & utilization by namespace", "transform": "table", "transparent": false, "type": "table" }, { "columns": [ { "text": "Avg", "value": "avg" } ], "datasource": "Prometheus", "description": "Container RAM usage vs node capacity", "fontSize": "100%", "gridPos": { "h": 10, "w": 12, "x": 12, "y": 38 }, "hideTimeOverride": true, "id": 114, "links": [], "pageSize": 8, "repeatDirection": "v", "scroll": true, "showHeader": true, "sort": { "col": 1, "desc": true }, "styles": [ { "alias": "RAM Requests", "colorMode": "value", "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(50, 172, 45, 0.97)", "#ef843c" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "mappingType": 1, "pattern": "Value #A", "thresholds": [ "30", " 80" ], "type": "number", "unit": "percentunit" }, { "alias": "Node", "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "mappingType": 1, "pattern": "node", "thresholds": [], "type": "string", "unit": "short" }, { "alias": "", "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "mappingType": 1, "pattern": "Time", "thresholds": [], "type": "hidden", "unit": "short" }, { "alias": "RAM Usage", "colorMode": "value", "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(50, 172, 45, 0.97)", "#ef843c" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "mappingType": 1, "pattern": "Value", "thresholds": [ "25", " 80" ], "type": "number", "unit": "percent" } ], "targets": [ { "expr": "SUM(label_replace(container_memory_usage_bytes{namespace!=\"\"}, \"node\", \"$1\", \"instance\",\"(.+)\")) by (node) * 100\n/\nSUM(kube_node_status_allocatable_memory_bytes) by (node)", "format": "table", "instant": true, "intervalFactor": 1, "refId": "B" } ], "timeFrom": null, "timeShift": null, "title": "Node utilization of allocatable RAM", "transform": "table", "transparent": false, "type": "table" }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "Prometheus", "fill": 1, "gridPos": { "h": 8, "w": 24, "x": 0, "y": 48 }, "id": 117, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": false, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "SUM(kube_node_status_capacity_memory_bytes / 1024 / 1024 / 1024 * 1.03)", "format": "time_series", "intervalFactor": 1, "legendFormat": "capacity", "refId": "A" }, { "expr": "SUM(kube_pod_container_resource_requests_memory_bytes{namespace!=\"\"} / 1024 / 1024 / 1024)", "format": "time_series", "intervalFactor": 1, "legendFormat": "requests", "refId": "C" }, { "expr": "SUM(container_memory_usage_bytes{image!=\"\"} / 1024 / 1024 / 1024)", "format": "time_series", "intervalFactor": 1, "legendFormat": "usage", "refId": "B" } ], "thresholds": [], "timeFrom": null, "timeShift": null, "title": "Cluster memory (GB)", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "collapsed": false, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 56 }, "id": 101, "panels": [], "title": "Storage Metrics", "type": "row" }, { "columns": [ { "text": "Avg", "value": "avg" } ], "datasource": "Prometheus", "fontSize": "100%", "gridPos": { "h": 10, "w": 12, "x": 0, "y": 57 }, "hideTimeOverride": true, "id": 97, "links": [], "pageSize": 8, "repeatDirection": "v", "scroll": true, "showHeader": true, "sort": { "col": 4, "desc": true }, "styles": [ { "alias": "Node", "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "mappingType": 1, "pattern": "instance", "thresholds": [], "type": "string", "unit": "short" }, { "alias": "PVC Name", "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "mappingType": 1, "pattern": "persistentvolumeclaim", "thresholds": [], "type": "number", "unit": "short" }, { "alias": "Storage Class", "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "mappingType": 1, "pattern": "storageclass", "thresholds": [], "type": "number", "unit": "short" }, { "alias": "Cost", "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "mappingType": 1, "pattern": "Value", "thresholds": [], "type": "number", "unit": "currencyUSD" }, { "alias": "", "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "mappingType": 1, "pattern": "Time", "thresholds": [], "type": "hidden", "unit": "short" }, { "alias": "Cost", "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "mappingType": 1, "pattern": "Value #A", "thresholds": [], "type": "number", "unit": "currencyUSD" }, { "alias": "Size (GB)", "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "mappingType": 1, "pattern": "Value #B", "thresholds": [], "type": "number", "unit": "short" }, { "alias": "Usage", "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "mappingType": 1, "pattern": "Value #C", "thresholds": [], "type": "number", "unit": "percentunit" } ], "targets": [ { "expr": "SUM(container_fs_limit_bytes{id=\"/\"}) by (instance) / 1024 / 1024 / 1024 * 1.03", "format": "table", "instant": true, "intervalFactor": 1, "refId": "B" }, { "expr": "SUM(container_fs_limit_bytes{id=\"/\"}) by (instance) / 1024 / 1024 / 1024 * 1.03 * $costStorageStandard\n", "format": "table", "hide": false, "instant": true, "interval": "", "intervalFactor": 1, "legendFormat": "{{ persistentvolumeclaim }}", "refId": "A" }, { "expr": "sum(container_fs_usage_bytes{device=~\"^/dev/[sv]d[a-z][1-9]$\",id=\"/\"} / container_fs_limit_bytes{device=~\"^/dev/[sv]d[a-z][1-9]$\",id=\"/\"}) by (instance) \n", "format": "table", "instant": true, "intervalFactor": 1, "refId": "C" } ], "timeFrom": null, "timeShift": null, "title": "Cluster Storage", "transform": "table", "transparent": false, "type": "table" }, { "columns": [ { "text": "Avg", "value": "avg" } ], "datasource": "Prometheus", "fontSize": "100%", "gridPos": { "h": 10, "w": 12, "x": 12, "y": 57 }, "hideTimeOverride": true, "id": 94, "links": [], "pageSize": 10, "repeatDirection": "v", "scroll": true, "showHeader": true, "sort": { "col": 2, "desc": true }, "styles": [ { "alias": "Namespace", "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "link": true, "linkTooltip": "View namespace cost metrics", "linkUrl": "d/at-cost-analysis-namespace/namespace-cost-metrics?&var-namespace=$__cell", "mappingType": 1, "pattern": "namespace", "thresholds": [], "type": "string", "unit": "short" }, { "alias": "PVC Name", "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "mappingType": 1, "pattern": "persistentvolumeclaim", "thresholds": [], "type": "number", "unit": "short" }, { "alias": "Storage Class", "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "mappingType": 1, "pattern": "storageclass", "thresholds": [], "type": "number", "unit": "short" }, { "alias": "Cost", "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "mappingType": 1, "pattern": "Value", "thresholds": [], "type": "number", "unit": "currencyUSD" }, { "alias": "", "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "mappingType": 1, "pattern": "Time", "thresholds": [], "type": "hidden", "unit": "short" }, { "alias": "Cost", "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "mappingType": 1, "pattern": "Value #A", "thresholds": [], "type": "number", "unit": "currencyUSD" }, { "alias": "Usage", "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "mappingType": 1, "pattern": "Value #B", "thresholds": [], "type": "number", "unit": "percentunit" }, { "alias": "Size (GB)", "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, "mappingType": 1, "pattern": "Value #C", "thresholds": [], "type": "number", "unit": "short" } ], "targets": [ { "expr": "sum (\n sum(kube_persistentvolumeclaim_info{storageclass=~\".*ssd.*\"}) by (persistentvolumeclaim, namespace, storageclass)\n * on (persistentvolumeclaim, namespace) group_right(storageclass)\n sum(kube_persistentvolumeclaim_resource_requests_storage_bytes{storageclass=~\".*ssd.*\"}) by (persistentvolumeclaim, namespace)\n) by (namespace,persistentvolumeclaim,storageclass) / 1024 / 1024 /1024\n\nor\n\nsum (\n sum(kube_persistentvolumeclaim_info{storageclass!~\".*ssd.*\"}) by (persistentvolumeclaim, namespace, storageclass)\n * on (persistentvolumeclaim, namespace) group_right(storageclass)\n sum(kube_persistentvolumeclaim_resource_requests_storage_bytes{storageclass!~\".*ssd.*\"}) by (persistentvolumeclaim, namespace)\n) by (namespace,persistentvolumeclaim,storageclass) / 1024 / 1024 /1024\n\n\n", "format": "table", "instant": true, "intervalFactor": 1, "refId": "C" }, { "expr": "sum (\n sum(kube_persistentvolumeclaim_info{storageclass=~\".*ssd.*\"}) by (persistentvolumeclaim, namespace, storageclass)\n * on (persistentvolumeclaim, namespace) group_right(storageclass)\n sum(kube_persistentvolumeclaim_resource_requests_storage_bytes{storageclass=~\".*ssd.*\"}) by (persistentvolumeclaim, namespace)\n) by (namespace,persistentvolumeclaim,storageclass) / 1024 / 1024 /1024 * $costStorageSSD\n\nor\n\nsum (\n sum(kube_persistentvolumeclaim_info{storageclass!~\".*ssd.*\"}) by (persistentvolumeclaim, namespace, storageclass)\n * on (persistentvolumeclaim, namespace) group_right(storageclass)\n sum(kube_persistentvolumeclaim_resource_requests_storage_bytes{storageclass!~\".*ssd.*\"}) by (persistentvolumeclaim, namespace)\n) by (namespace,persistentvolumeclaim,storageclass) / 1024 / 1024 /1024 * $costStorageStandard\n", "format": "table", "hide": false, "instant": true, "interval": "", "intervalFactor": 1, "legendFormat": "{{ persistentvolumeclaim }}", "refId": "A" }, { "expr": "sum(kubelet_volume_stats_used_bytes) by (persistentvolumeclaim, namespace) \n/\nsum (\n sum(kube_persistentvolumeclaim_info{storageclass!~\".*ssd.*\"}) by (persistentvolumeclaim, namespace, storageclass)\n * on (persistentvolumeclaim, namespace) group_right(storageclass)\n sum(kube_persistentvolumeclaim_resource_requests_storage_bytes{storageclass!~\".*ssd.*\"}) by (persistentvolumeclaim, namespace)\n) by (namespace,persistentvolumeclaim)", "format": "table", "instant": true, "intervalFactor": 1, "refId": "B" } ], "timeFrom": null, "timeShift": null, "title": "Persistent Volume Claims", "transform": "table", "transparent": false, "type": "table" }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "Prometheus", "fill": 1, "gridPos": { "h": 8, "w": 12, "x": 0, "y": 67 }, "id": 122, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "SUM( kubelet_volume_stats_inodes_used / kubelet_volume_stats_inodes) by (persistentvolumeclaim) * 100", "format": "time_series", "intervalFactor": 1, "legendFormat": "", "refId": "D" } ], "thresholds": [], "timeFrom": null, "timeShift": null, "title": "Inode usage", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "percent", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "Prometheus", "fill": 1, "gridPos": { "h": 8, "w": 12, "x": 12, "y": 67 }, "id": 128, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "SUM(rate(node_disk_reads_completed_total[10m])) or SUM(rate(node_disk_reads_completed[10m]))\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "reads", "refId": "D" }, { "expr": "SUM(rate(node_disk_writes_completed_total[10m])) or SUM(rate(node_disk_writes_completed[10m]))", "format": "time_series", "intervalFactor": 1, "legendFormat": "writes", "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeShift": null, "title": "Disk IOPS", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "none", "label": "IOPS", "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "collapsed": false, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 75 }, "id": 127, "panels": [], "title": "Network", "type": "row" }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "Prometheus", "fill": 1, "gridPos": { "h": 9, "w": 24, "x": 0, "y": 76 }, "id": 123, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum (rate (node_network_transmit_bytes_total{}[60m]))\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "node_out", "refId": "B" }, { "expr": "SUM ( rate(node_network_transmit_bytes_total{device=\"eth0\"}[60m]))\n\n", "format": "time_series", "instant": false, "intervalFactor": 1, "legendFormat": "eth0 out", "refId": "C" } ], "thresholds": [], "timeFrom": null, "timeShift": null, "title": "Node network transmit", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } } ], "refresh": "15m", "schemaVersion": 16, "style": "dark", "tags": [ "cost", "utilisation", "metrics", "utilization" ], "templating": { "list": [ { "current": { "text": "23.076", "value": "23.076" }, "hide": 0, "label": "CPU", "name": "costcpu", "options": [ { "text": "23.076", "value": "23.076" } ], "query": "23.076", "skipUrlSync": false, "type": "constant" }, { "current": { "text": "5.10", "value": "5.10" }, "hide": 0, "label": "PE CPU", "name": "costpcpu", "options": [ { "text": "5.10", "value": "5.10" } ], "query": "5.10", "skipUrlSync": false, "type": "constant" }, { "current": { "text": "3.25", "value": "3.25" }, "hide": 0, "label": "RAM", "name": "costram", "options": [ { "text": "3.25", "value": "3.25" } ], "query": "3.25", "skipUrlSync": false, "type": "constant" }, { "current": { "text": "0.6862", "value": "0.6862" }, "hide": 0, "label": "PE RAM", "name": "costpram", "options": [ { "text": "0.6862", "value": "0.6862" } ], "query": "0.6862", "skipUrlSync": false, "type": "constant" }, { "current": { "text": "0.040", "value": "0.040" }, "hide": 0, "label": "Storage", "name": "costStorageStandard", "options": [ { "text": "0.040", "value": "0.040" } ], "query": "0.040", "skipUrlSync": false, "type": "constant" }, { "current": { "text": ".17", "value": ".17" }, "hide": 0, "label": "SSD", "name": "costStorageSSD", "options": [ { "text": ".17", "value": ".17" } ], "query": ".17", "skipUrlSync": false, "type": "constant" }, { "current": { "text": ".12", "value": ".12" }, "hide": 0, "label": "Egress", "name": "costEgress", "options": [ { "selected": true, "text": ".12", "value": ".12" } ], "query": ".12", "skipUrlSync": false, "type": "constant" }, { "current": { "text": "30", "value": "30" }, "hide": 0, "label": "Discount", "name": "costDiscount", "options": [ { "text": "30", "value": "30" } ], "query": "30", "skipUrlSync": false, "type": "constant" } ] }, "time": { "from": "now-15m", "to": "now" }, "timepicker": { "hidden": false, "refresh_intervals": [ "5s", "10s", "30s", "1m", "5m", "15m", "30m", "1h", "2h", "1d" ], "time_options": [ "5m", "15m", "1h", "6h", "12h", "24h", "2d", "7d", "30d" ] }, "timezone": "browser", "title": "Cluster cost & utilization metrics", "uid": "cluster-costs", "version": 1 }' +## Reference to external ConfigMap per provider. Use provider name as key and ConfiMap name as value. +## A provider dashboards must be defined either by external ConfigMaps or in values.yaml, not in both. +## ConfigMap data example: +## +## data: +## example-dashboard.json: | +## RAW_JSON +## +dashboardsConfigMaps: {} +# default: "" + +## Grafana's primary configuration +## NOTE: values in map will be converted to ini format +## ref: http://docs.grafana.org/installation/configuration/ +## +grafana.ini: + paths: + data: /var/lib/grafana/data + logs: /var/log/grafana + plugins: /var/lib/grafana/plugins + provisioning: /etc/grafana/provisioning + analytics: + check_for_updates: true + log: + mode: console + grafana_net: + url: https://grafana.net + auth.anonymous: + enabled: true + org_role: Admin + org_name: Main Org. +## LDAP Authentication can be enabled with the following values on grafana.ini +## NOTE: Grafana will fail to start if the value for ldap.toml is invalid + # auth.ldap: + # enabled: true + # allow_sign_up: true + # config_file: /etc/grafana/ldap.toml + +## Grafana's LDAP configuration +## Templated by the template in _helpers.tpl +## NOTE: To enable the grafana.ini must be configured with auth.ldap.enabled +## ref: http://docs.grafana.org/installation/configuration/#auth-ldap +## ref: http://docs.grafana.org/installation/ldap/#configuration +ldap: + # `existingSecret` is a reference to an existing secret containing the ldap configuration + # for Grafana in a key `ldap-toml`. + existingSecret: "" + # `config` is the content of `ldap.toml` that will be stored in the created secret + config: "" + # config: |- + # verbose_logging = true + + # [[servers]] + # host = "my-ldap-server" + # port = 636 + # use_ssl = true + # start_tls = false + # ssl_skip_verify = false + # bind_dn = "uid=%s,ou=users,dc=myorg,dc=com" + +## Grafana's SMTP configuration +## NOTE: To enable, grafana.ini must be configured with smtp.enabled +## ref: http://docs.grafana.org/installation/configuration/#smtp +smtp: + # `existingSecret` is a reference to an existing secret containing the smtp configuration + # for Grafana in keys `user` and `password`. + existingSecret: "" + +## Sidecars that collect the configmaps with specified label and stores the included files them into the respective folders +## Requires at least Grafana 5 to work and can't be used together with parameters dashboardProviders, datasources and dashboards +sidecar: + image: kiwigrid/k8s-sidecar:0.0.3 + imagePullPolicy: IfNotPresent + resources: +# limits: +# cpu: 100m +# memory: 100Mi +# requests: +# cpu: 50m +# memory: 50Mi + dashboards: + enabled: false + # label that the configmaps with dashboards are marked with + label: grafana_dashboard + # folder in the pod that should hold the collected dashboards + folder: /tmp/dashboards + datasources: + enabled: false + # label that the configmaps with datasources are marked with + label: grafana_datasource diff --git a/cost-analyzer/charts/prometheus-7.4.1.tgz b/cost-analyzer/charts/prometheus-7.4.1.tgz new file mode 100644 index 000000000..559680f87 Binary files /dev/null and b/cost-analyzer/charts/prometheus-7.4.1.tgz differ diff --git a/cost-analyzer/requirements.lock b/cost-analyzer/requirements.lock new file mode 100644 index 000000000..895bbd1b0 --- /dev/null +++ b/cost-analyzer/requirements.lock @@ -0,0 +1,6 @@ +dependencies: +- name: prometheus + repository: https://kubernetes-charts.storage.googleapis.com/ + version: 7.4.1 +digest: sha256:721c3eb852017861df33d352160351b8065ab7999054c86584f3caad81695e6e +generated: 2018-11-15T23:26:53.686522-08:00 diff --git a/cost-analyzer/requirements.yaml b/cost-analyzer/requirements.yaml new file mode 100644 index 000000000..b5b2d4f6f --- /dev/null +++ b/cost-analyzer/requirements.yaml @@ -0,0 +1,4 @@ +dependencies: + - name: prometheus + repository: https://kubernetes-charts.storage.googleapis.com/ + version: "7.X.X" diff --git a/cost-analyzer/templates/cost-analyzer-cluster-role-binding.yaml b/cost-analyzer/templates/cost-analyzer-cluster-role-binding.yaml new file mode 100644 index 000000000..acadd7f2d --- /dev/null +++ b/cost-analyzer/templates/cost-analyzer-cluster-role-binding.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: cost-analyzer +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: cost-analyzer +subjects: +- kind: ServiceAccount + name: cost-analyzer + namespace: monitoring \ No newline at end of file diff --git a/cost-analyzer/templates/cost-analyzer-cluster-role.yaml b/cost-analyzer/templates/cost-analyzer-cluster-role.yaml new file mode 100644 index 000000000..c8a08d018 --- /dev/null +++ b/cost-analyzer/templates/cost-analyzer-cluster-role.yaml @@ -0,0 +1,44 @@ +apiVersion: rbac.authorization.k8s.io/v1 +# kubernetes versions before 1.8.0 should use rbac.authorization.k8s.io/v1beta1 +kind: ClusterRole +metadata: + name: cost-analyzer +rules: +- apiGroups: [""] + resources: + - configmaps + - secrets + - nodes + - pods + - services + - resourcequotas + - replicationcontrollers + - limitranges + - persistentvolumeclaims + - persistentvolumes + - namespaces + - endpoints + verbs: ["get", "list", "watch"] +- apiGroups: ["extensions"] + resources: + - daemonsets + - deployments + - replicasets + verbs: ["get", "list", "watch"] +- apiGroups: ["apps"] + resources: + - statefulsets + verbs: ["list", "watch"] +- apiGroups: ["batch"] + resources: + - cronjobs + - jobs + verbs: ["get", "list", "watch"] +- apiGroups: ["autoscaling"] + resources: + - horizontalpodautoscalers + verbs: ["get","list", "watch"] +- apiGroups: ["policy"] + resources: + - poddisruptionbudgets + verbs: ["get", "list", "watch"] \ No newline at end of file diff --git a/cost-analyzer/templates/cost-analyzer-deployment.yaml b/cost-analyzer/templates/cost-analyzer-deployment.yaml new file mode 100644 index 000000000..01b385918 --- /dev/null +++ b/cost-analyzer/templates/cost-analyzer-deployment.yaml @@ -0,0 +1,44 @@ +apiVersion: extensions/v1beta1 +kind: Deployment +metadata: + name: cost-analyzer + namespace: monitoring + labels: + app: cost-analyzer +spec: + strategy: + rollingUpdate: + maxSurge: 1 + maxUnavailable: 1 + type: RollingUpdate + template: + metadata: + labels: + app: cost-analyzer + spec: + restartPolicy: Always + serviceAccountName: cost-analyzer + containers: + - image: ajaytripathy/kubecost:latest + name: cost-analyzer-server + resources: + requests: + cpu: "20m" + memory: "55M" + #livenessProbe: + # httpGet: + # path: /_status/healthz + # port: 5000 + # initialDelaySeconds: 90 + # timeoutSeconds: 10 + #readinessProbe: + # httpGet: + # path: /_status/healthz + # port: 5000 + # initialDelaySeconds: 30 + # timeoutSeconds: 10 + imagePullPolicy: Always + imagePullSecrets: + - name: regcred + + \ No newline at end of file diff --git a/cost-analyzer/templates/cost-analyzer-service-account.yaml b/cost-analyzer/templates/cost-analyzer-service-account.yaml new file mode 100644 index 000000000..0a00b58f5 --- /dev/null +++ b/cost-analyzer/templates/cost-analyzer-service-account.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: cost-analyzer + namespace: monitoring \ No newline at end of file diff --git a/cost-analyzer/templates/cost-analyzer-service.yaml b/cost-analyzer/templates/cost-analyzer-service.yaml new file mode 100644 index 000000000..43e776d1f --- /dev/null +++ b/cost-analyzer/templates/cost-analyzer-service.yaml @@ -0,0 +1,13 @@ +kind: Service +apiVersion: v1 +metadata: + name: cost-analyzer +spec: + selector: + app: cost-analyzer + type: ClusterIP + ports: + - name: cost-analyzer-server + port: 9001 + targetPort: 9001 + \ No newline at end of file diff --git a/cost-analyzer/values.yaml b/cost-analyzer/values.yaml new file mode 100644 index 000000000..f90d0bd1a --- /dev/null +++ b/cost-analyzer/values.yaml @@ -0,0 +1,48 @@ +# Default values for cost-analyzer. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +replicaCount: 1 + +image: + repository: nginx + tag: stable + pullPolicy: IfNotPresent + +nameOverride: "" +fullnameOverride: "" + +service: + type: ClusterIP + port: 80 + +ingress: + enabled: false + annotations: {} + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: "true" + path: / + hosts: + - chart-example.local + tls: [] + # - secretName: chart-example-tls + # hosts: + # - chart-example.local + +resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + +nodeSelector: {} + +tolerations: [] + +affinity: {} diff --git a/helm.yaml b/helm.yaml new file mode 100644 index 000000000..c20512f9d --- /dev/null +++ b/helm.yaml @@ -0,0 +1,19 @@ +# This is an extract from here: http://jayunit100.blogspot.fi/2017/07/helm-on.html +apiVersion: v1 +kind: ServiceAccount +metadata: + name: helm + namespace: kube-system +--- +apiVersion: rbac.authorization.k8s.io/v1beta1 +kind: ClusterRoleBinding +metadata: + name: helm +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: cluster-admin +subjects: + - kind: ServiceAccount + name: helm + namespace: kube-system