Skip to content

Commit

Permalink
feat!: add operator-metrics port
Browse files Browse the repository at this point in the history
  • Loading branch information
fstr committed Sep 2, 2024
1 parent e85fd2d commit abedaec
Show file tree
Hide file tree
Showing 7 changed files with 70 additions and 2 deletions.
4 changes: 4 additions & 0 deletions charts/pyrra/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,18 @@ The dashboards can be deployed using a ConfigMap and get's automatically [reload
| ingress.tls | list | `[]` | |
| nameOverride | string | `""` | overrides chart name |
| nodeSelector | object | `{}` | node selector for scheduling server pod |
| operatorMetricsAddress | string | `":8080"` | Address to expose operator metrics |
| podAnnotations | object | `{}` | additional annotations for server pod |
| podSecurityContext | object | `{}` | additional security context for server pod |
| prometheusExternalUrl | string | `""` | url to public-facing prometheus UI in case it differs from prometheusUrl |
| prometheusRule.enabled | bool | `false` | enables creation of PrometheusRules to monitor Pyrra |
| prometheusRule.labels | object | `{}` | Set labels that will be applied on all PrometheusRules (alerts) |
| prometheusUrl | string | `"http://prometheus-operated.monitoring.svc.cluster.local:9090"` | url to prometheus instance with metrics |
| resources | object | `{}` | resource limits and requests for server pod |
| securityContext | object | `{}` | additional security context for server |
| service.annotations | object | `{}` | Annotations to add to the service |
| service.nodePort | string | `""` | node port for HTTP, choose port between <30000-32767> |
| service.operatorMetricsPort | int | `8080` | service port for operator metrics |
| service.port | int | `9099` | service port for server |
| service.type | string | `"ClusterIP"` | service type for server |
| serviceAccount.annotations | object | `{}` | Annotations to add to the service account |
Expand Down
7 changes: 7 additions & 0 deletions charts/pyrra/templates/_helpers.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -64,3 +64,10 @@ Create the name of the service account to use
{{- default "default" .Values.serviceAccount.name }}
{{- end }}
{{- end }}

{{/*
Operator metrics port
*/}}
{{- define "pyrra.operatorMetricsPort" -}}
{{ (split ":" .Values.operatorMetricsAddress)._1 }}
{{- end }}
6 changes: 6 additions & 0 deletions charts/pyrra/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ spec:
{{- if and .Values.validatingWebhookConfiguration.enabled ($.Capabilities.APIVersions.Has "cert-manager.io/v1") }}
- --disable-webhooks=false
{{- end }}
{{- if .Values.operatorMetricsAddress }}
- --metrics-addr={{ .Values.operatorMetricsAddress }}
{{- end }}
{{- with .Values.extraKubernetesArgs }}
{{- toYaml . | nindent 12 }}
{{- end }}
Expand All @@ -48,6 +51,9 @@ spec:
- mountPath: /tmp/k8s-webhook-server/serving-certs
name: certs
{{- end }}
ports:
- name: op-metrics
containerPort: {{ include "pyrra.operatorMetricsPort" . }}
- name: {{ .Chart.Name }}
securityContext:
{{- toYaml .Values.securityContext | nindent 12 }}
Expand Down
26 changes: 26 additions & 0 deletions charts/pyrra/templates/prometheusrule.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{{ if .Values.prometheusRule.enabled }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: {{ include "pyrra.fullname" . }}-prometheusrule
labels:
{{- include "pyrra.labels" . | nindent 4 }}
spec:
groups:
- name: {{ include "pyrra.fullname" . }}-prometheusrules
rules:
- alert: PyrraReconciliationError
# We use a 20m interval as the controller only reconciles roughly once every 15 minutes. This interval is
# large enough to stop the rate from dropping to 0, causing a flapping alert.
# The interval is short enough to resolve within a reasonable time after a broken SLO has been fixed/removed.
expr: sum by (job) (rate(controller_runtime_reconcile_errors_total{controller="servicelevelobjective"}[20m])) > 0
for: 1m
labels:
severity: error
{{- if .Values.prometheusRule.labels }}
{{- toYaml .Values.prometheusRule.labels | nindent 12 }}
{{- end }}
annotations:
summary: Failed to reconcile state
description: 'Pyrra Kubernetes operator failed to reconcile. Check logs for invalid ServiceLevelObjectives.'
{{- end }}
3 changes: 3 additions & 0 deletions charts/pyrra/templates/service.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,8 @@ spec:
{{- if .Values.service.nodePort }}
nodePort: {{ .Values.service.nodePort }}
{{- end }}
- name: op-metrics
port: {{ .Values.service.operatorMetricsPort }}
targetPort: {{ include "pyrra.operatorMetricsPort" . }}
selector:
{{- include "pyrra.selectorLabels" . | nindent 4 }}
14 changes: 13 additions & 1 deletion charts/pyrra/templates/servicemonitor.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: {{ template "pyrra.fullname" . }}-server
name: {{ template "pyrra.fullname" . }}
labels:
{{- include "pyrra.labels" . | nindent 4 }}
{{- if .Values.serviceMonitor.labels }}
Expand All @@ -29,4 +29,16 @@ spec:
relabelings:
{{ toYaml .Values.serviceMonitor.relabelings | indent 4 }}
{{- end }}
- port: op-metrics
{{- if .Values.serviceMonitor.interval }}
interval: {{ .Values.serviceMonitor.interval }}
{{- end }}
{{- if .Values.serviceMonitor.metricRelabelings }}
metricRelabelings:
{{ toYaml .Values.serviceMonitor.metricRelabelings | indent 4 }}
{{- end }}
{{- if .Values.serviceMonitor.relabelings }}
relabelings:
{{ toYaml .Values.serviceMonitor.relabelings | indent 4 }}
{{- end }}
{{- end }}
12 changes: 11 additions & 1 deletion charts/pyrra/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ additionalLabels: {}
extraApiArgs: []
# -- Extra args for Pyrra's Kubernetes container
extraKubernetesArgs: []
# -- Address to expose operator metrics
operatorMetricsAddress: ":8080"

serviceAccount:
# -- Specifies whether a service account should be created
Expand Down Expand Up @@ -52,6 +54,8 @@ service:
# -- service nodePort to expose
# -- node port for HTTP, choose port between <30000-32767>
nodePort: ""
# -- service port for operator metrics
operatorMetricsPort: 8080

ingress:
# -- enables ingress for server UI
Expand All @@ -74,7 +78,7 @@ ingress:
# - chart-example.local

# -- resource limits and requests for server pod
resources: {}
resources: {}
# We usually recommend not to specify default resources and to leave this as a conscious
# choice for the user. This also increases chances charts run on environments with little
# resources, such as Minikube. If you do want to specify resources, uncomment the following
Expand All @@ -99,6 +103,12 @@ serviceMonitor:
# -- provides the possibility to override the jobName if needed
# jobLabel: fancy-pyrra-server

prometheusRule:
# -- enables creation of PrometheusRules to monitor Pyrra
enabled: false
# -- Set labels that will be applied on all PrometheusRules (alerts)
labels: {}

genericRules:
# -- enables generate Pyrra generic recording rules. Pyrra generates metrics with the same name for each SLO.
enabled: false
Expand Down

0 comments on commit abedaec

Please sign in to comment.