Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[GEP-19] Adapt monitoring configuration #257

Merged
merged 2 commits into from
May 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,15 @@ rules:
- patch
- update
- delete
# TODO(rfranzke): Remove this after August 2024.
- apiGroups:
- apps
resources:
- statefulsets
verbs:
- get
- list
- watch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
{{/* HINT: This file is intentionally NOT called _helpers.tpl (as usual) since this Helm chart is embedded via go embed. */}}
{{/* HINT: go embed does not support hidden files, hence, _helpers.tpl cannot be used as name. */}}

{{/* vim: set filetype=mustache: */}}
{{/*
Expand the name of the chart.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{{- if .Values.gep19Monitoring }}
apiVersion: v1
kind: ConfigMap
metadata:
name: cert-controller-manager-dashboards
namespace: {{ .Release.Namespace }}
labels:
dashboard.monitoring.gardener.cloud/shoot: "true"
data:
cert-controller-manager-dashboard.json: |-
{{- .Files.Get "cert-dashboard.json" | nindent 4 }}
{{- end }}
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
{{- if not .Values.gep19Monitoring }}
apiVersion: v1
kind: ConfigMap
metadata:
Expand Down Expand Up @@ -56,3 +57,4 @@ data:
dashboard_users: |
cert-controller-manager-dashboard.json: |-
{{- .Files.Get "cert-dashboard.json" | nindent 6 }}
{{- end }}
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{{- if .Values.gep19Monitoring }}
{{- if gt .Values.configuration.certExpirationAlertDays 0.0 }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: shoot-cert-controller-manager
namespace: {{ .Release.Namespace }}
labels:
prometheus: shoot
spec:
groups:
- name: cert-controller-manager.rules
rules:
- alert: SslCertificateWillExpireSoon
expr: ((cert_management_cert_object_expire > 0) - time()) / 86400 <= {{ .Values.configuration.certExpirationAlertDays }}
for: 30m
labels:
service: cert-controller-manager
severity: critical
type: seed
visibility: operator
annotations:
description: Certificate in namespace {{ .Release.Namespace }} will expire in less than {{ .Values.configuration.certExpirationAlertDays }} days.
summary: TLS certificate will expire in less than {{ .Values.configuration.certExpirationAlertDays }} days
{{- end }}
{{- end }}
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
{{- if .Values.gep19Monitoring }}
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: shoot-cert-controller-manager
namespace: {{ .Release.Namespace }}
labels:
prometheus: shoot
spec:
selector:
matchLabels:
app.kubernetes.io/name: {{ include "cert-management.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
endpoints:
- port: metrics
relabelings:
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
metricRelabelings:
- sourceLabels:
- __name__
action: keep
regex: ^(cert_management_.+)$
honorLabels: false
{{- end }}
3 changes: 3 additions & 0 deletions charts/internal/shoot-cert-management-seed/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -86,3 +86,6 @@ additionalConfiguration:
- --kubeconfig.disable-deploy-crds
- --source.disable-deploy-crds
- --target.disable-deploy-crds

# TODO(rfranzke): Remove this field after August 2024.
gep19Monitoring: false
2 changes: 1 addition & 1 deletion example/controller-registration.yaml

Large diffs are not rendered by default.

10 changes: 10 additions & 0 deletions pkg/controller/actuator.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
kutil "github.com/gardener/gardener/pkg/utils/kubernetes"
"github.com/gardener/gardener/pkg/utils/managedresources"
"github.com/go-logr/logr"
appsv1 "k8s.io/api/apps/v1"
autoscalingv1 "k8s.io/api/autoscaling/v1"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -316,6 +317,15 @@ func (a *actuator) createSeedResources(ctx context.Context, certConfig *service.
cfg["privateKeyDefaults"] = defaults
}

// TODO(rfranzke): Delete this after August 2024.
gep19Monitoring := a.client.Get(ctx, client.ObjectKey{Name: "prometheus-shoot", Namespace: namespace}, &appsv1.StatefulSet{}) == nil
if gep19Monitoring {
if err := kutil.DeleteObject(ctx, a.client, &corev1.ConfigMap{ObjectMeta: metav1.ObjectMeta{Name: "cert-controller-manager-observability-config", Namespace: namespace}}); err != nil {
return fmt.Errorf("failed deleting cert-controller-manager-observability-config ConfigMap: %w", err)
}
}
certManagementConfig["gep19Monitoring"] = gep19Monitoring

certManagementConfig, err = chart.InjectImages(certManagementConfig, imagevector.ImageVector(), []string{v1alpha1.CertManagementImageName})
if err != nil {
return fmt.Errorf("failed to find image version for %s: %v", v1alpha1.CertManagementImageName, err)
Expand Down