Skip to content

Commit

Permalink
fix: correct liveness / readiness checks
Browse files Browse the repository at this point in the history
Signed-off-by: Jakob Möller <[email protected]>
  • Loading branch information
jakobmoellerdev committed May 27, 2024
1 parent 0816722 commit c973428
Show file tree
Hide file tree
Showing 7 changed files with 80 additions and 16 deletions.
14 changes: 12 additions & 2 deletions bundle/manifests/lvms-operator.clusterserviceversion.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -551,16 +551,26 @@ spec:
fieldPath: metadata.name
image: quay.io/lvms_dev/lvms-operator:latest
livenessProbe:
failureThreshold: 3
httpGet:
path: /healthz
port: 8081
initialDelaySeconds: 10
periodSeconds: 20
initialDelaySeconds: 1
periodSeconds: 30
timeoutSeconds: 1
name: manager
ports:
- containerPort: 9443
name: webhook-server
protocol: TCP
readinessProbe:
failureThreshold: 3
httpGet:
path: /readyz
port: 8081
initialDelaySeconds: 1
periodSeconds: 60
timeoutSeconds: 1
resources:
requests:
cpu: 5m
Expand Down
2 changes: 1 addition & 1 deletion catalog/lvms-operator/v0.0.1.yaml

Large diffs are not rendered by default.

22 changes: 22 additions & 0 deletions cmd/operator/operator.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package operator
import (
"context"
"fmt"
"net/http"
"os"
"os/signal"
"syscall"
Expand All @@ -40,6 +41,7 @@ import (
"google.golang.org/grpc"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/utils/ptr"
"sigs.k8s.io/controller-runtime/pkg/manager"

appsv1 "k8s.io/api/apps/v1"
"k8s.io/apimachinery/pkg/api/meta"
Expand Down Expand Up @@ -309,6 +311,15 @@ func run(cmd *cobra.Command, _ []string, opts *Options) error {
return fmt.Errorf("unable to set up health check: %w", err)
}

if err := mgr.AddReadyzCheck("readyz", func(req *http.Request) error {
if err := readyCheck(mgr)(req); err != nil {
return err
}
return mgr.GetWebhookServer().StartedChecker()(req)
}); err != nil {
return fmt.Errorf("unable to set up health check: %w", err)
}

c := make(chan os.Signal, 2)
signal.Notify(c, []os.Signal{os.Interrupt, syscall.SIGTERM}...)
go func() {
Expand All @@ -325,3 +336,14 @@ func run(cmd *cobra.Command, _ []string, opts *Options) error {

return nil
}

// readyCheck returns a healthz.Checker that verifies the operator is ready
func readyCheck(mgr manager.Manager) healthz.Checker {
return func(req *http.Request) error {
// Perform various checks here to determine if the operator is ready
if !mgr.GetCache().WaitForCacheSync(req.Context()) {
return fmt.Errorf("informer cache not synced and thus not ready")
}
return nil
}
}
24 changes: 19 additions & 5 deletions cmd/vgmanager/vgmanager.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"context"
"errors"
"fmt"
"net/http"
"os"
"os/signal"
"syscall"
Expand Down Expand Up @@ -48,6 +49,7 @@ import (
"github.com/topolvm/topolvm/pkg/runners"
"google.golang.org/grpc"
"k8s.io/utils/ptr"
"sigs.k8s.io/controller-runtime/pkg/manager"

"k8s.io/apimachinery/pkg/runtime"
// Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.)
Expand All @@ -68,7 +70,7 @@ import (

const (
DefaultDiagnosticsAddr = ":8443"
DefaultProbeAddr = ":8081"
DefaultHealthProbeAddr = ":8081"
)

var ErrConfigModified = errors.New("lvmd config file is modified")
Expand Down Expand Up @@ -98,7 +100,7 @@ func NewCmd(opts *Options) *cobra.Command {
&opts.diagnosticsAddr, "diagnosticsAddr", DefaultDiagnosticsAddr, "The address the diagnostics endpoint binds to.",
)
cmd.Flags().StringVar(
&opts.healthProbeAddr, "health-probe-bind-address", DefaultProbeAddr, "The address the probe endpoint binds to.",
&opts.healthProbeAddr, "health-probe-bind-address", DefaultHealthProbeAddr, "The address the probe endpoint binds to.",
)
return cmd
}
Expand Down Expand Up @@ -152,9 +154,6 @@ func run(cmd *cobra.Command, _ []string, opts *Options) error {
lvmdConfig := &lvmd.Config{}
if err := loadConfFile(ctx, lvmdConfig, lvmd.DefaultFileConfigPath); err != nil {
opts.SetupLog.Error(err, "lvmd config could not be loaded, starting without topolvm components and attempting bootstrap")
if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil {
return fmt.Errorf("unable to set up ready check: %w", err)
}
} else {
topoLVMD.Containerized(true)
lvclnt, vgclnt := topoLVMD.NewEmbeddedServiceClients(ctx, lvmdConfig.DeviceClasses, lvmdConfig.LvcreateOptionClasses)
Expand Down Expand Up @@ -213,6 +212,10 @@ func run(cmd *cobra.Command, _ []string, opts *Options) error {
return fmt.Errorf("unable to create controller VGManager: %w", err)
}

if err := mgr.AddReadyzCheck("readyz", readyCheck(mgr)); err != nil {
return fmt.Errorf("unable to set up ready check: %w", err)
}

if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil {
return fmt.Errorf("unable to set up health check: %w", err)
}
Expand Down Expand Up @@ -321,3 +324,14 @@ func registrationPath() string {
func pluginRegistrationSocketPath() string {
return fmt.Sprintf("%s/%s-reg.sock", constants.DefaultPluginRegistrationPath, constants.TopolvmCSIDriverName)
}

// readyCheck returns a healthz.Checker that verifies the operator is ready
func readyCheck(mgr manager.Manager) healthz.Checker {
return func(req *http.Request) error {
// Perform various checks here to determine if the operator is ready
if !mgr.GetCache().WaitForCacheSync(req.Context()) {
return fmt.Errorf("informer cache not synced and thus not ready")
}
return nil
}
}
14 changes: 12 additions & 2 deletions config/manager/manager.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,18 @@ spec:
httpGet:
path: /healthz
port: 8081
initialDelaySeconds: 10
periodSeconds: 20
initialDelaySeconds: 1
timeoutSeconds: 1
failureThreshold: 3
periodSeconds: 30
readinessProbe:
httpGet:
path: /readyz
port: 8081
initialDelaySeconds: 1
timeoutSeconds: 1
failureThreshold: 3
periodSeconds: 60
resources:
requests:
cpu: 5m
Expand Down
2 changes: 0 additions & 2 deletions internal/controllers/lvmcluster/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,7 @@ import (
"github.com/openshift/lvm-operator/internal/controllers/constants"
"github.com/openshift/lvm-operator/internal/controllers/lvmcluster/logpassthrough"
"github.com/openshift/lvm-operator/internal/controllers/lvmcluster/resource"

topolvmv1 "github.com/topolvm/topolvm/api/v1"

corev1 "k8s.io/api/core/v1"
k8serrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/types"
Expand Down
18 changes: 14 additions & 4 deletions internal/controllers/lvmcluster/resource/vgmanager_daemonset.go
Original file line number Diff line number Diff line change
Expand Up @@ -252,17 +252,27 @@ func newVGManagerDaemonset(lvmCluster *lvmv1alpha1.LVMCluster, namespace, vgImag
Privileged: ptr.To(true),
RunAsUser: ptr.To(int64(0)),
},
Ports: []corev1.ContainerPort{{Name: constants.TopolvmNodeContainerHealthzName,
ContainerPort: 8081,
Protocol: corev1.ProtocolTCP}},
Ports: []corev1.ContainerPort{
{Name: constants.TopolvmNodeContainerHealthzName,
ContainerPort: 8081,
Protocol: corev1.ProtocolTCP},
},
LivenessProbe: &corev1.Probe{
ProbeHandler: corev1.ProbeHandler{
HTTPGet: &corev1.HTTPGetAction{Path: "/healthz",
Port: intstr.FromString(constants.TopolvmNodeContainerHealthzName)}},
FailureThreshold: 3,
InitialDelaySeconds: 1,
TimeoutSeconds: 1,
PeriodSeconds: 10},
PeriodSeconds: 30},
ReadinessProbe: &corev1.Probe{
ProbeHandler: corev1.ProbeHandler{
HTTPGet: &corev1.HTTPGetAction{Path: "/readyz",
Port: intstr.FromString(constants.TopolvmNodeContainerHealthzName)}},
FailureThreshold: 3,
InitialDelaySeconds: 1,
TimeoutSeconds: 1,
PeriodSeconds: 60},
VolumeMounts: volumeMounts,
Resources: resourceRequirements,
Env: []corev1.EnvVar{
Expand Down

0 comments on commit c973428

Please sign in to comment.