Skip to content

Commit

Permalink
Separate dashboards rules and alerts provisioning
Browse files Browse the repository at this point in the history
Signed-off-by: Weifeng Wang <[email protected]>

Separate dashboards rules and alerts provisioning

Signed-off-by: Weifeng Wang <[email protected]>
  • Loading branch information
qclaogui committed Mar 22, 2024
1 parent a8352ca commit ebe2035
Show file tree
Hide file tree
Showing 55 changed files with 11,865 additions and 290,518 deletions.
12 changes: 7 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -210,15 +210,15 @@ manifests-common: $(KUSTOMIZE)
manifests-monolithic-mode: $(KUSTOMIZE)
$(info ******************** generates monolithic-mode manifests ********************)
@$(KUSTOMIZE) build --enable-helm kubernetes/monolithic-mode/logs > kubernetes/monolithic-mode/logs/k8s-all-in-one.yaml
@$(KUSTOMIZE) build kubernetes/monolithic-mode/metrics > kubernetes/monolithic-mode/metrics/k8s-all-in-one.yaml
@$(KUSTOMIZE) build --enable-helm kubernetes/monolithic-mode/metrics > kubernetes/monolithic-mode/metrics/k8s-all-in-one.yaml
@$(KUSTOMIZE) build --enable-helm kubernetes/monolithic-mode/profiles > kubernetes/monolithic-mode/profiles/k8s-all-in-one.yaml
@$(KUSTOMIZE) build --enable-helm kubernetes/monolithic-mode/traces > kubernetes/monolithic-mode/traces/k8s-all-in-one.yaml
@$(KUSTOMIZE) build --enable-helm kubernetes/monolithic-mode/all-in-one > kubernetes/monolithic-mode/all-in-one/k8s-all-in-one.yaml

manifests-read-write-mode: $(KUSTOMIZE)
$(info ******************** generates read-write-mode manifests ********************)
@$(KUSTOMIZE) build --enable-helm kubernetes/read-write-mode/logs > kubernetes/read-write-mode/logs/k8s-all-in-one.yaml
@$(KUSTOMIZE) build kubernetes/read-write-mode/metrics > kubernetes/read-write-mode/metrics/k8s-all-in-one.yaml
@$(KUSTOMIZE) build --enable-helm kubernetes/read-write-mode/metrics > kubernetes/read-write-mode/metrics/k8s-all-in-one.yaml

manifests-microservices-mode: $(KUSTOMIZE)
$(info ******************** generates microservices-mode manifests ********************)
Expand Down Expand Up @@ -248,6 +248,9 @@ deploy-minio:
@$(KUSTOMIZE) build --enable-helm kubernetes/common/minio-operator | kubectl apply -f -
kubectl rollout status -n minio-system deployment/minio-operator --watch --timeout=600s
@$(KUSTOMIZE) build --enable-helm kubernetes/common/minio-tenant | kubectl apply -f -
@echo "Waiting for Minio to be ready..."
@sleep 20
kubectl rollout status -n minio-system statefulset/codelab-pool-10gb --watch --timeout=600s || true
delete-minio:
@$(KUSTOMIZE) build --enable-helm kubernetes/common/minio-tenant | kubectl delete --ignore-not-found -f -

Expand All @@ -261,9 +264,7 @@ deploy-gateway:
deploy-grafana: deploy-prometheus-operator-crds deploy-minio deploy-gateway
$(info ******************** deploy grafana manifests ********************)
@$(KUSTOMIZE) build --enable-helm kubernetes/common/grafana | kubectl apply -f -
@$(KUSTOMIZE) build --enable-helm kubernetes/common/grafana-agent | kubectl apply -f -
delete-grafana:
@$(KUSTOMIZE) build --enable-helm kubernetes/common/grafana-agent | kubectl delete --ignore-not-found -f -
@$(KUSTOMIZE) build --enable-helm kubernetes/common/grafana | kubectl delete --ignore-not-found -f -

define echo_info
Expand All @@ -277,7 +278,8 @@ define config_changes_trigger_pod_restart
$(eval $@_MSG = $(1))
@kubectl rollout restart deployment -n gateway nginx
kubectl rollout status -n gateway deployment/nginx --watch --timeout=600s
@kubectl rollout restart daemonset -n monitoring-system grafana-agent
@echo "Provisioning Grafana dashboards Prometheus rules and alerts..."
@$(KUSTOMIZE) build monitoring-mixins | kubectl apply -f -
kubectl rollout status -n monitoring-system daemonset/grafana-agent --watch --timeout=600s
@$(call echo_info, ${$@_MSG})
endef
Expand Down
2 changes: 0 additions & 2 deletions kubernetes/common/grafana-agent/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@ helmCharts:
configMapGenerator:
- name: agent-config
namespace: monitoring-system
options:
disableNameSuffixHash: true
files:
- configs/config.river

Expand Down
4 changes: 2 additions & 2 deletions kubernetes/common/grafana-agent/manifests/k8s-all-in-one.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ data:
\"info\")\n\tformat = \"logfmt\"\n}\n"
kind: ConfigMap
metadata:
name: agent-config
name: agent-config-6d74m77mfd
namespace: monitoring-system
---
apiVersion: v1
Expand Down Expand Up @@ -836,7 +836,7 @@ spec:
serviceAccountName: grafana-agent
volumes:
- configMap:
name: agent-config
name: agent-config-6d74m77mfd
name: config
- hostPath:
path: /var/log
Expand Down
74 changes: 66 additions & 8 deletions kubernetes/common/grafana/configs/datasources.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,41 +16,99 @@ datasources:
type: prometheus
uid: metrics
access: proxy
url: http://nginx.gateway.svc.cluster.local:8080/prometheus
orgId: 1
url: http://nginx.gateway.svc.cluster.local.:8080/prometheus
basicAuth: false
isDefault: false
isDefault: true
version: 1
editable: true
editable: false
jsonData:
prometheusType: Mimir
exemplarTraceIdDestinations:
- name: traceID
datasourceUid: traces


# Loki for logs
- name: Logs
type: loki
uid: logs
access: proxy
url: http://nginx.gateway.svc.cluster.local:3100
orgId: 1
uid: logs
url: http://nginx.gateway.svc.cluster.local.:3100
basicAuth: false
isDefault: false
version: 1
editable: true
jsonData:
derivedFields:
- datasourceUid: traces
matcherRegex: "[tT]race_?[iI][dD]\"?[:=]\"?(\\w+)"
name: traceID
url: $${__value.raw}

# https://grafana.com/docs/grafana/latest/datasources/tempo/configure-tempo-data-source/#provision-the-data-source
# Tempo for traces
- name: Traces
type: tempo
access: proxy
orgId: 1
uid: traces
url: http://nginx.gateway.svc.cluster.local:3200
url: http://nginx.gateway.svc.cluster.local.:3200
basicAuth: false
isDefault: false
version: 1
editable: true
apiVersion: 1
jsonData:
search:
hide: false
lokiSearch:
datasourceUid: logs
nodeGraph:
enabled: true
serviceMap:
datasourceUid: metrics
traceQuery:
timeShiftEnabled: true
spanStartTimeShift: '-30m'
spanEndTimeShift: '30m'
spanBar:
type: 'Tag'
tag: 'http.path'
tracesToMetrics:
datasourceUid: metrics
spanStartTimeShift: '-30m'
spanEndTimeShift: '30m'
tags: [{ key: 'service.name', value: 'service' }]
queries:
- name: '(R) Rate'
query: 'sum(rate(traces_spanmetrics_calls_total{$$__tags}[$$__rate_interval]))'
- name: '(E) Error Rate'
query: 'sum(rate(traces_spanmetrics_calls_total{$$__tags, status_code="STATUS_CODE_ERROR"}[$$__rate_interval]))'
- name: '(D) Duration'
query: 'histogram_quantile(0.9, sum(rate(traces_spanmetrics_latency_bucket{$$__tags}[$$__rate_interval])) by (le))'
tracesToLogsV2:
datasourceUid: logs
spanStartTimeShift: '-30m'
spanEndTimeShift: '30m'
tags: [{ key: 'app', value: 'app' }]
filterByTraceID: false
filterBySpanID: false
tracesToProfiles:
customQuery: false
datasourceUid: "profiles"
profileTypeId: "process_cpu:cpu:nanoseconds:cpu:nanoseconds"
tags: [{ key: 'app', value: 'service_name' }]


# Pyroscope for profiles
- name: Profiles
type: grafana-pyroscope-datasource
uid: profiles
access: proxy
url: http://nginx.gateway.svc.cluster.local:4040
orgId: 1
uid: profiles
url: http://nginx.gateway.svc.cluster.local.:4040
basicAuth: false
isDefault: false
version: 1
Expand Down
76 changes: 67 additions & 9 deletions kubernetes/common/grafana/manifests/k8s-all-in-one.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -182,41 +182,99 @@ data:
type: prometheus
uid: metrics
access: proxy
url: http://nginx.gateway.svc.cluster.local:8080/prometheus
orgId: 1
url: http://nginx.gateway.svc.cluster.local.:8080/prometheus
basicAuth: false
isDefault: false
isDefault: true
version: 1
editable: true
editable: false
jsonData:
prometheusType: Mimir
exemplarTraceIdDestinations:
- name: traceID
datasourceUid: traces
# Loki for logs
- name: Logs
type: loki
uid: logs
access: proxy
url: http://nginx.gateway.svc.cluster.local:3100
orgId: 1
uid: logs
url: http://nginx.gateway.svc.cluster.local.:3100
basicAuth: false
isDefault: false
version: 1
editable: true
jsonData:
derivedFields:
- datasourceUid: traces
matcherRegex: "[tT]race_?[iI][dD]\"?[:=]\"?(\\w+)"
name: traceID
url: $${__value.raw}
# https://grafana.com/docs/grafana/latest/datasources/tempo/configure-tempo-data-source/#provision-the-data-source
# Tempo for traces
- name: Traces
type: tempo
access: proxy
orgId: 1
uid: traces
url: http://nginx.gateway.svc.cluster.local:3200
url: http://nginx.gateway.svc.cluster.local.:3200
basicAuth: false
isDefault: false
version: 1
editable: true
apiVersion: 1
jsonData:
search:
hide: false
lokiSearch:
datasourceUid: logs
nodeGraph:
enabled: true
serviceMap:
datasourceUid: metrics
traceQuery:
timeShiftEnabled: true
spanStartTimeShift: '-30m'
spanEndTimeShift: '30m'
spanBar:
type: 'Tag'
tag: 'http.path'
tracesToMetrics:
datasourceUid: metrics
spanStartTimeShift: '-30m'
spanEndTimeShift: '30m'
tags: [{ key: 'service.name', value: 'service' }]
queries:
- name: '(R) Rate'
query: 'sum(rate(traces_spanmetrics_calls_total{$$__tags}[$$__rate_interval]))'
- name: '(E) Error Rate'
query: 'sum(rate(traces_spanmetrics_calls_total{$$__tags, status_code="STATUS_CODE_ERROR"}[$$__rate_interval]))'
- name: '(D) Duration'
query: 'histogram_quantile(0.9, sum(rate(traces_spanmetrics_latency_bucket{$$__tags}[$$__rate_interval])) by (le))'
tracesToLogsV2:
datasourceUid: logs
spanStartTimeShift: '-30m'
spanEndTimeShift: '30m'
tags: [{ key: 'app', value: 'app' }]
filterByTraceID: false
filterBySpanID: false
tracesToProfiles:
customQuery: false
datasourceUid: "profiles"
profileTypeId: "process_cpu:cpu:nanoseconds:cpu:nanoseconds"
tags: [{ key: 'app', value: 'service_name' }]
# Pyroscope for profiles
- name: Profiles
type: grafana-pyroscope-datasource
uid: profiles
access: proxy
url: http://nginx.gateway.svc.cluster.local:4040
orgId: 1
uid: profiles
url: http://nginx.gateway.svc.cluster.local.:4040
basicAuth: false
isDefault: false
version: 1
Expand All @@ -226,7 +284,7 @@ kind: ConfigMap
metadata:
labels:
grafana_datasource: "1"
name: grafana-datasources-gcc7kf6bh4
name: grafana-datasources-22t6t9c8f8
namespace: monitoring-system
---
apiVersion: v1
Expand Down
1 change: 0 additions & 1 deletion kubernetes/common/memcached/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ helmCharts:

resources:
- namespace.yaml
- ../../../monitoring-mixins/memcached-mixin/deploy

# # only if auth.enabled=true
# secretGenerator:
Expand Down
Loading

0 comments on commit ebe2035

Please sign in to comment.