From 35d9c4e213b8e33fc66618442676af176085d5bb Mon Sep 17 00:00:00 2001 From: Ewout Prangsma Date: Fri, 11 May 2018 10:07:00 +0200 Subject: [PATCH 01/11] Adding finalizer support --- pkg/deployment/deployment_inspector.go | 2 +- pkg/deployment/images.go | 2 +- pkg/deployment/resources/context.go | 6 ++ pkg/deployment/resources/pod_creator.go | 13 ++- pkg/deployment/resources/pod_finalizers.go | 109 ++++++++++++++++++++ pkg/deployment/resources/pod_inspector.go | 20 +++- pkg/util/constants/constants.go | 2 + pkg/util/k8sutil/finalizers.go | 112 +++++++++++++++++++++ pkg/util/k8sutil/pods.go | 18 ++-- 9 files changed, 274 insertions(+), 10 deletions(-) create mode 100644 pkg/deployment/resources/pod_finalizers.go create mode 100644 pkg/util/k8sutil/finalizers.go diff --git a/pkg/deployment/deployment_inspector.go b/pkg/deployment/deployment_inspector.go index c4c9f3063..7bf75de0d 100644 --- a/pkg/deployment/deployment_inspector.go +++ b/pkg/deployment/deployment_inspector.go @@ -80,7 +80,7 @@ func (d *Deployment) inspectDeployment(lastInterval time.Duration) time.Duration } // Inspection of generated resources needed - if err := d.resources.InspectPods(); err != nil { + if err := d.resources.InspectPods(ctx); err != nil { hasError = true d.CreateEvent(k8sutil.NewErrorEvent("Pod inspection failed", err, d.apiObject)) } diff --git a/pkg/deployment/images.go b/pkg/deployment/images.go index 504a4ca7b..368401868 100644 --- a/pkg/deployment/images.go +++ b/pkg/deployment/images.go @@ -166,7 +166,7 @@ func (ib *imagesBuilder) fetchArangoDBImageIDAndVersion(ctx context.Context, ima "--server.authentication=false", fmt.Sprintf("--server.endpoint=tcp://[::]:%d", k8sutil.ArangoPort), } - if err := k8sutil.CreateArangodPod(ib.KubeCli, true, ib.APIObject, role, id, podName, "", image, ib.Spec.GetImagePullPolicy(), "", false, args, nil, nil, nil, "", ""); err != nil { + if err := k8sutil.CreateArangodPod(ib.KubeCli, true, ib.APIObject, role, id, podName, "", image, ib.Spec.GetImagePullPolicy(), "", false, args, nil, nil, nil, nil, "", ""); err != nil { log.Debug().Err(err).Msg("Failed to create image ID pod") return true, maskAny(err) } diff --git a/pkg/deployment/resources/context.go b/pkg/deployment/resources/context.go index e7a5d6683..3bea8c3de 100644 --- a/pkg/deployment/resources/context.go +++ b/pkg/deployment/resources/context.go @@ -23,6 +23,9 @@ package resources import ( + "context" + + driver "github.com/arangodb/go-driver" api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1alpha" "github.com/arangodb/kube-arangodb/pkg/util/k8sutil" "k8s.io/api/core/v1" @@ -65,4 +68,7 @@ type Context interface { // CleanupPod deletes a given pod with force and explicit UID. // If the pod does not exist, the error is ignored. CleanupPod(p v1.Pod) error + // GetDatabaseClient returns a cached client for the entire database (cluster coordinators or single server), + // creating one if needed. + GetDatabaseClient(ctx context.Context) (driver.Client, error) } diff --git a/pkg/deployment/resources/pod_creator.go b/pkg/deployment/resources/pod_creator.go index 4da74078f..ddcbe5b26 100644 --- a/pkg/deployment/resources/pod_creator.go +++ b/pkg/deployment/resources/pod_creator.go @@ -303,6 +303,16 @@ func (r *Resources) createReadinessProbe(spec api.DeploymentSpec, group api.Serv }, nil } +// createPodFinalizers creates a list of finalizers for a pod created for the given group. +func (r *Resources) createPodFinalizers(group api.ServerGroup) []string { + switch group { + case api.ServerGroupDBServers: + return []string{constants.FinalizerDrainDBServer} + default: + return nil + } +} + // createPodForMember creates all Pods listed in member status func (r *Resources) createPodForMember(spec api.DeploymentSpec, group api.ServerGroup, groupSpec api.ServerGroupSpec, m api.MemberStatus, memberStatusList *api.MemberStatusList) error { @@ -368,8 +378,9 @@ func (r *Resources) createPodForMember(spec api.DeploymentSpec, group api.Server } engine := spec.GetStorageEngine().AsArangoArgument() requireUUID := group == api.ServerGroupDBServers && m.IsInitialized + finalizers := r.createPodFinalizers(group) if err := k8sutil.CreateArangodPod(kubecli, spec.IsDevelopment(), apiObject, role, m.ID, m.PodName, m.PersistentVolumeClaimName, info.ImageID, spec.GetImagePullPolicy(), - engine, requireUUID, args, env, livenessProbe, readinessProbe, tlsKeyfileSecretName, rocksdbEncryptionSecretName); err != nil { + engine, requireUUID, args, env, finalizers, livenessProbe, readinessProbe, tlsKeyfileSecretName, rocksdbEncryptionSecretName); err != nil { return maskAny(err) } log.Debug().Str("pod-name", m.PodName).Msg("Created pod") diff --git a/pkg/deployment/resources/pod_finalizers.go b/pkg/deployment/resources/pod_finalizers.go new file mode 100644 index 000000000..bdae9cc25 --- /dev/null +++ b/pkg/deployment/resources/pod_finalizers.go @@ -0,0 +1,109 @@ +// +// DISCLAIMER +// +// Copyright 2018 ArangoDB GmbH, Cologne, Germany +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright holder is ArangoDB GmbH, Cologne, Germany +// +// Author Ewout Prangsma +// + +package resources + +import ( + "context" + "fmt" + + "github.com/rs/zerolog" + "k8s.io/api/core/v1" + + api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1alpha" + "github.com/arangodb/kube-arangodb/pkg/util/constants" + "github.com/arangodb/kube-arangodb/pkg/util/k8sutil" +) + +// runPodFinalizers goes through the list of pod finalizers to see if they can be removed. +func (r *Resources) runPodFinalizers(ctx context.Context, p *v1.Pod, memberStatus api.MemberStatus) error { + log := r.log.With().Str("pod-name", p.GetName()).Logger() + var removalList []string + for _, f := range p.ObjectMeta.GetFinalizers() { + switch f { + case constants.FinalizerDrainDBServer: + log.Debug().Msg("Inspecting drain dbserver finalizer") + if err := r.inspectFinalizerDrainDBServer(ctx, log, p, memberStatus); err == nil { + removalList = append(removalList, f) + } else { + log.Debug().Err(err).Str("finalizer", f).Msg("Cannot remove finalizer yet") + } + } + } + // Remove finalizers (if needed) + if len(removalList) > 0 { + kubecli := r.context.GetKubeCli() + if err := k8sutil.RemovePodFinalizers(log, kubecli, p, removalList); err != nil { + log.Debug().Err(err).Msg("Failed to update pod (to remove finalizers)") + return maskAny(err) + } + } + return nil +} + +// inspectFinalizerDrainDBServer checks the finalizer condition for drain-dbserver. +// It returns nil if the finalizer can be removed. +func (r *Resources) inspectFinalizerDrainDBServer(ctx context.Context, log zerolog.Logger, p *v1.Pod, memberStatus api.MemberStatus) error { + // Inspect member phase + if memberStatus.Phase.IsFailed() { + log.Debug().Msg("Pod is already failed, safe to remove drain dbserver finalizer") + return nil + } + // Inspect deployment deletion state + apiObject := r.context.GetAPIObject() + if apiObject.GetDeletionTimestamp() != nil { + log.Debug().Msg("Entire deployment is being deleted, safe to remove drain dbserver finalizer") + return nil + } + // Inspect cleaned out state + c, err := r.context.GetDatabaseClient(ctx) + if err != nil { + log.Debug().Err(err).Msg("Failed to create member client") + return maskAny(err) + } + cluster, err := c.Cluster(ctx) + if err != nil { + log.Debug().Err(err).Msg("Failed to access cluster") + return maskAny(err) + } + cleanedOut, err := cluster.IsCleanedOut(ctx, memberStatus.ID) + if err != nil { + return maskAny(err) + } + if cleanedOut { + // All done + log.Debug().Msg("Server is cleaned out. Save to remove drain dbserver finalizer") + return nil + } + // Not cleaned out yet, check member status + if memberStatus.Conditions.IsTrue(api.ConditionTypeTerminated) { + log.Warn().Msg("Member is already terminated before it could be cleaned out. Not good, but removing drain dbserver finalizer because we cannot do anything further") + return nil + } + // Ensure the cleanout is triggered + log.Debug().Msg("Server is not yet clean out. Triggering a clean out now") + if err := cluster.CleanOutServer(ctx, memberStatus.ID); err != nil { + log.Debug().Err(err).Msg("Failed to clean out server") + return maskAny(err) + } + return maskAny(fmt.Errorf("Server is not yet cleaned out")) +} diff --git a/pkg/deployment/resources/pod_inspector.go b/pkg/deployment/resources/pod_inspector.go index 5594878e0..c5928cbc0 100644 --- a/pkg/deployment/resources/pod_inspector.go +++ b/pkg/deployment/resources/pod_inspector.go @@ -23,6 +23,7 @@ package resources import ( + "context" "fmt" "time" @@ -44,7 +45,7 @@ const ( // InspectPods lists all pods that belong to the given deployment and updates // the member status of the deployment accordingly. -func (r *Resources) InspectPods() error { +func (r *Resources) InspectPods(ctx context.Context) error { log := r.log var events []*v1.Event @@ -72,6 +73,16 @@ func (r *Resources) InspectPods() error { memberStatus, group, found := status.Members.MemberStatusByPodName(p.GetName()) if !found { log.Debug().Str("pod", p.GetName()).Msg("no memberstatus found for pod") + if k8sutil.IsPodMarkedForDeletion(&p) && len(p.GetFinalizers()) > 0 { + // Strange, pod belongs to us, but we have no member for it. + // Remove all finalizers, so it can be removed. + log.Warn().Msg("Pod belongs to this deployment, but we don't know the member. Removing all finalizers") + kubecli := r.context.GetKubeCli() + if err := k8sutil.RemovePodFinalizers(log, kubecli, &p, p.GetFinalizers()); err != nil { + log.Debug().Err(err).Msg("Failed to update pod (to remove all finalizers)") + return maskAny(err) + } + } continue } @@ -123,6 +134,13 @@ func (r *Resources) InspectPods() error { } else if !k8sutil.IsPodScheduled(&p) { unscheduledPodNames = append(unscheduledPodNames, p.GetName()) } + if k8sutil.IsPodMarkedForDeletion(&p) { + // Process finalizers + if err := r.runPodFinalizers(ctx, &p, memberStatus); err != nil { + // Only log here, since we'll be called to try again. + log.Warn().Err(err).Msg("Failed to run pod finalizers") + } + } if updateMemberStatusNeeded { if err := status.Members.UpdateMemberStatus(memberStatus, group); err != nil { return maskAny(err) diff --git a/pkg/util/constants/constants.go b/pkg/util/constants/constants.go index b11f14dad..a5e50be2d 100644 --- a/pkg/util/constants/constants.go +++ b/pkg/util/constants/constants.go @@ -38,4 +38,6 @@ const ( SecretCAKey = "ca.key" // Key in Secret.data used to store a PEM encoded CA private key SecretTLSKeyfile = "tls.keyfile" // Key in Secret.data used to store a PEM encoded TLS certificate in the format used by ArangoDB (`--ssl.keyfile`) + + FinalizerDrainDBServer = "dbserver.database.arangodb.com/drain" // Finalizer adds to DBServers, indicating the need for draining that dbserver ) diff --git a/pkg/util/k8sutil/finalizers.go b/pkg/util/k8sutil/finalizers.go new file mode 100644 index 000000000..1e541a0cb --- /dev/null +++ b/pkg/util/k8sutil/finalizers.go @@ -0,0 +1,112 @@ +// +// DISCLAIMER +// +// Copyright 2018 ArangoDB GmbH, Cologne, Germany +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright holder is ArangoDB GmbH, Cologne, Germany +// +// Author Ewout Prangsma +// + +package k8sutil + +import ( + "github.com/rs/zerolog" + "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" +) + +const ( + maxRemoveFinalizersAttempts = 50 +) + +// RemovePodFinalizers removes the given finalizers from the given pod. +func RemovePodFinalizers(log zerolog.Logger, kubecli kubernetes.Interface, p *v1.Pod, finalizers []string) error { + pods := kubecli.CoreV1().Pods(p.GetNamespace()) + getFunc := func() (metav1.Object, error) { + result, err := pods.Get(p.GetName(), metav1.GetOptions{}) + if err != nil { + return nil, maskAny(err) + } + return result, nil + } + updateFunc := func(updated metav1.Object) error { + updatedPod := updated.(*v1.Pod) + result, err := pods.Update(updatedPod) + if err != nil { + return maskAny(err) + } + *p = *result + return nil + } + if err := removeFinalizers(log, finalizers, getFunc, updateFunc); err != nil { + return maskAny(err) + } + return nil +} + +// removeFinalizers is a helper used to remove finalizers from an object. +// The functions tries to get the object using the provided get function, +// then remove the given finalizers and update the update using the given update function. +// In case of an update conflict, the functions tries again. +func removeFinalizers(log zerolog.Logger, finalizers []string, getFunc func() (metav1.Object, error), updateFunc func(metav1.Object) error) error { + attempts := 0 + for { + attempts++ + obj, err := getFunc() + if err != nil { + log.Warn().Err(err).Msg("Failed to get resource") + return maskAny(err) + } + original := obj.GetFinalizers() + if len(original) == 0 { + // We're done + return nil + } + newList := make([]string, 0, len(original)) + shouldRemove := func(f string) bool { + for _, x := range finalizers { + if x == f { + return true + } + } + return false + } + for _, f := range original { + if !shouldRemove(f) { + newList = append(newList, f) + } + } + if len(newList) < len(original) { + obj.SetFinalizers(newList) + if err := updateFunc(obj); IsConflict(err) { + if attempts > maxRemoveFinalizersAttempts { + log.Warn().Err(err).Msg("Failed to update resource with fewer finalizers after many attempts") + return maskAny(err) + } else { + // Try again + continue + } + } else if err != nil { + log.Warn().Err(err).Msg("Failed to update resource with fewer finalizers") + return maskAny(err) + } + } else { + log.Debug().Msg("No finalizers needed removal. Resource unchanged") + } + return nil + } +} diff --git a/pkg/util/k8sutil/pods.go b/pkg/util/k8sutil/pods.go index 8700e0f5d..8f692ddd7 100644 --- a/pkg/util/k8sutil/pods.go +++ b/pkg/util/k8sutil/pods.go @@ -104,6 +104,11 @@ func IsPodNotScheduledFor(pod *v1.Pod, timeout time.Duration) bool { condition.LastTransitionTime.Time.Add(timeout).Before(time.Now()) } +// IsPodMarkedForDeletion returns true if the pod has been marked for deletion. +func IsPodMarkedForDeletion(pod *v1.Pod) bool { + return pod.DeletionTimestamp != nil +} + // IsArangoDBImageIDAndVersionPod returns true if the given pod is used for fetching image ID and ArangoDB version of an image func IsArangoDBImageIDAndVersionPod(p v1.Pod) bool { role, found := p.GetLabels()[LabelKeyRole] @@ -256,12 +261,13 @@ func arangosyncContainer(name string, image string, imagePullPolicy v1.PullPolic } // newPod creates a basic Pod for given settings. -func newPod(deploymentName, ns, role, id, podName string) v1.Pod { +func newPod(deploymentName, ns, role, id, podName string, finalizers []string) v1.Pod { hostname := CreatePodHostName(deploymentName, role, id) p := v1.Pod{ ObjectMeta: metav1.ObjectMeta{ - Name: podName, - Labels: LabelsForDeployment(deploymentName, role), + Name: podName, + Labels: LabelsForDeployment(deploymentName, role), + Finalizers: finalizers, }, Spec: v1.PodSpec{ Hostname: hostname, @@ -278,11 +284,11 @@ func newPod(deploymentName, ns, role, id, podName string) v1.Pod { func CreateArangodPod(kubecli kubernetes.Interface, developmentMode bool, deployment APIObject, role, id, podName, pvcName, image string, imagePullPolicy v1.PullPolicy, engine string, requireUUID bool, - args []string, env map[string]EnvValue, + args []string, env map[string]EnvValue, finalizers []string, livenessProbe *HTTPProbeConfig, readinessProbe *HTTPProbeConfig, tlsKeyfileSecretName, rocksdbEncryptionSecretName string) error { // Prepare basic pod - p := newPod(deployment.GetName(), deployment.GetNamespace(), role, id, podName) + p := newPod(deployment.GetName(), deployment.GetNamespace(), role, id, podName, finalizers) // Add arangod container c := arangodContainer("arangod", image, imagePullPolicy, args, env, livenessProbe, readinessProbe) @@ -361,7 +367,7 @@ func CreateArangodPod(kubecli kubernetes.Interface, developmentMode bool, deploy func CreateArangoSyncPod(kubecli kubernetes.Interface, developmentMode bool, deployment APIObject, role, id, podName, image string, imagePullPolicy v1.PullPolicy, args []string, env map[string]EnvValue, livenessProbe *HTTPProbeConfig, affinityWithRole string) error { // Prepare basic pod - p := newPod(deployment.GetName(), deployment.GetNamespace(), role, id, podName) + p := newPod(deployment.GetName(), deployment.GetNamespace(), role, id, podName, nil) // Add arangosync container c := arangosyncContainer("arangosync", image, imagePullPolicy, args, env, livenessProbe) From c11ec3f7de508afa2c9e6d9dae8a98f630379ce2 Mon Sep 17 00:00:00 2001 From: Ewout Prangsma Date: Fri, 11 May 2018 14:58:49 +0200 Subject: [PATCH 02/11] Prevent stopping Pods to early to avoid dataloss --- docs/design/lifecycle_hooks.md | 25 ++++ lifecycle.go | 150 +++++++++++++++++++++++ main.go | 19 ++- pkg/deployment/context_impl.go | 5 + pkg/deployment/deployment.go | 1 + pkg/deployment/images.go | 2 +- pkg/deployment/reconcile/plan_builder.go | 17 ++- pkg/deployment/resources/context.go | 2 + pkg/deployment/resources/pod_creator.go | 5 +- pkg/operator/operator.go | 1 + pkg/operator/operator_deployment.go | 1 + pkg/util/k8sutil/container.go | 38 ++++++ pkg/util/k8sutil/images.go | 38 ++++++ pkg/util/k8sutil/pods.go | 140 +++++++++++++++++++-- 14 files changed, 418 insertions(+), 26 deletions(-) create mode 100644 docs/design/lifecycle_hooks.md create mode 100644 lifecycle.go create mode 100644 pkg/util/k8sutil/container.go create mode 100644 pkg/util/k8sutil/images.go diff --git a/docs/design/lifecycle_hooks.md b/docs/design/lifecycle_hooks.md new file mode 100644 index 000000000..030a00134 --- /dev/null +++ b/docs/design/lifecycle_hooks.md @@ -0,0 +1,25 @@ +# Lifecycle hooks + +The ArangoDB operator expects full control of the `Pods` is creates. +Therefore it takes measures to prevent the removal of those `Pods` +until it is say to do so. + +To achieve this, the server containers in the `Pods` have +a `preStop` hook configured and finalizers are added to the `Pods`. + +The `preStop` hook executes a binary that waits until all finalizers of +the current pod have been removed. +Until this `preStop` hook terminates, Kubernetes will not send a `TERM` signal +to the processes inside the container, which ensures that the server remains running +until it is safe to stop them. + +The operator performs all actions needed when a delete of a `Pod` have been +triggered. E.g. for a dbserver it cleans out the server before it removes +the finalizers. + +## Lifecycle init-container + +Because the binary that is called in the `preStop` hook is not part of a standard +ArangoDB docker image, it has to be brought into the filesystem of a `Pod`. +This is done by an initial container that copies the binary to an `emptyDir` volume that +is shared between the init-container and the server container. diff --git a/lifecycle.go b/lifecycle.go new file mode 100644 index 000000000..7b5561388 --- /dev/null +++ b/lifecycle.go @@ -0,0 +1,150 @@ +// +// DISCLAIMER +// +// Copyright 2018 ArangoDB GmbH, Cologne, Germany +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright holder is ArangoDB GmbH, Cologne, Germany +// +// Author Ewout Prangsma +// + +package main + +import ( + "io" + "os" + "path/filepath" + "time" + + "github.com/spf13/cobra" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "github.com/arangodb/kube-arangodb/pkg/util/constants" + "github.com/arangodb/kube-arangodb/pkg/util/k8sutil" +) + +var ( + cmdLifecycle = &cobra.Command{ + Use: "lifecycle", + Run: cmdUsage, + Hidden: true, + } + + cmdLifecyclePreStop = &cobra.Command{ + Use: "preStop", + Run: cmdLifecyclePreStopRun, + Hidden: true, + } + cmdLifecycleCopy = &cobra.Command{ + Use: "copy", + Run: cmdLifecycleCopyRun, + Hidden: true, + } + + lifecycleCopyOptions struct { + TargetDir string + } +) + +func init() { + cmdMain.AddCommand(cmdLifecycle) + cmdLifecycle.AddCommand(cmdLifecyclePreStop) + cmdLifecycle.AddCommand(cmdLifecycleCopy) + + cmdLifecycleCopy.Flags().StringVar(&lifecycleCopyOptions.TargetDir, "target", "", "Target directory to copy the executable to") +} + +// Wait until all finalizers of the current pod have been removed. +func cmdLifecyclePreStopRun(cmd *cobra.Command, args []string) { + cliLog.Info().Msgf("Starting arangodb-operator, lifecycle preStop, version %s build %s", projectVersion, projectBuild) + + // Get environment + namespace := os.Getenv(constants.EnvOperatorPodNamespace) + if len(namespace) == 0 { + cliLog.Fatal().Msgf("%s environment variable missing", constants.EnvOperatorPodNamespace) + } + name := os.Getenv(constants.EnvOperatorPodName) + if len(name) == 0 { + cliLog.Fatal().Msgf("%s environment variable missing", constants.EnvOperatorPodName) + } + + // Create kubernetes client + kubecli, err := k8sutil.NewKubeClient() + if err != nil { + cliLog.Fatal().Err(err).Msg("Failed to create Kubernetes client") + } + + pods := kubecli.CoreV1().Pods(namespace) + recentErrors := 0 + for { + p, err := pods.Get(name, metav1.GetOptions{}) + if k8sutil.IsNotFound(err) { + cliLog.Warn().Msg("Pod not found") + return + } else if err != nil { + recentErrors++ + cliLog.Error().Err(err).Msg("Failed to get pod") + if recentErrors > 20 { + cliLog.Fatal().Err(err).Msg("Too many recent errors") + return + } + } else { + // We got our pod + finalizerCount := len(p.GetFinalizers()) + if finalizerCount == 0 { + // No more finalizers, we're done + cliLog.Info().Msg("All finalizers gone, we can stop now") + return + } + cliLog.Info().Msgf("Waiting for %d more finalizers to be removed", finalizerCount) + } + // Wait a bit + time.Sleep(time.Second) + } +} + +// Copy the executable to a given place. +func cmdLifecycleCopyRun(cmd *cobra.Command, args []string) { + cliLog.Info().Msgf("Starting arangodb-operator, lifecycle copy, version %s build %s", projectVersion, projectBuild) + + exePath, err := os.Executable() + if err != nil { + cliLog.Fatal().Err(err).Msg("Failed to get executable path") + } + + // Open source + rd, err := os.Open(exePath) + if err != nil { + cliLog.Fatal().Err(err).Msg("Failed to open executable file") + } + defer rd.Close() + + // Open target + targetPath := filepath.Join(lifecycleCopyOptions.TargetDir, filepath.Base(exePath)) + wr, err := os.Create(targetPath) + if err != nil { + cliLog.Fatal().Err(err).Msg("Failed to create target file") + } + defer wr.Close() + + if _, err := io.Copy(wr, rd); err != nil { + cliLog.Fatal().Err(err).Msg("Failed to copy") + } + + // Set file mode + if err := os.Chmod(targetPath, 0755); err != nil { + cliLog.Fatal().Err(err).Msg("Failed to chmod") + } +} diff --git a/main.go b/main.go index 7a80c339f..5f83ac185 100644 --- a/main.go +++ b/main.go @@ -193,7 +193,7 @@ func newOperatorConfigAndDeps(id, namespace, name string) (operator.Config, oper return operator.Config{}, operator.Dependencies{}, maskAny(err) } - serviceAccount, err := getMyPodServiceAccount(kubecli, namespace, name) + image, serviceAccount, err := getMyPodInfo(kubecli, namespace, name) if err != nil { return operator.Config{}, operator.Dependencies{}, maskAny(fmt.Errorf("Failed to get my pod's service account: %s", err)) } @@ -213,6 +213,7 @@ func newOperatorConfigAndDeps(id, namespace, name string) (operator.Config, oper Namespace: namespace, PodName: name, ServiceAccount: serviceAccount, + LifecycleImage: image, EnableDeployment: operatorOptions.enableDeployment, EnableStorage: operatorOptions.enableStorage, AllowChaos: chaosOptions.allowed, @@ -231,9 +232,10 @@ func newOperatorConfigAndDeps(id, namespace, name string) (operator.Config, oper return cfg, deps, nil } -// getMyPodServiceAccount looks up the service account of the pod with given name in given namespace -func getMyPodServiceAccount(kubecli kubernetes.Interface, namespace, name string) (string, error) { - var sa string +// getMyPodInfo looks up the image & service account of the pod with given name in given namespace +// Returns image, serviceAccount, error. +func getMyPodInfo(kubecli kubernetes.Interface, namespace, name string) (string, string, error) { + var image, sa string op := func() error { pod, err := kubecli.CoreV1().Pods(namespace).Get(name, metav1.GetOptions{}) if err != nil { @@ -244,12 +246,17 @@ func getMyPodServiceAccount(kubecli kubernetes.Interface, namespace, name string return maskAny(err) } sa = pod.Spec.ServiceAccountName + image = k8sutil.ConvertImageID2Image(pod.Status.ContainerStatuses[0].ImageID) + if image == "" { + // Fallback in case we don't know the id. + image = pod.Spec.Containers[0].Image + } return nil } if err := retry.Retry(op, time.Minute*5); err != nil { - return "", maskAny(err) + return "", "", maskAny(err) } - return sa, nil + return image, sa, nil } func createRecorder(log zerolog.Logger, kubecli kubernetes.Interface, name, namespace string) record.EventRecorder { diff --git a/pkg/deployment/context_impl.go b/pkg/deployment/context_impl.go index 5ebec2886..81ffed117 100644 --- a/pkg/deployment/context_impl.go +++ b/pkg/deployment/context_impl.go @@ -50,6 +50,11 @@ func (d *Deployment) GetKubeCli() kubernetes.Interface { return d.deps.KubeCli } +// GetLifecycleImage returns the image name containing the lifecycle helper (== name of operator image) +func (d *Deployment) GetLifecycleImage() string { + return d.config.LifecycleImage +} + // GetNamespace returns the kubernetes namespace that contains // this deployment. func (d *Deployment) GetNamespace() string { diff --git a/pkg/deployment/deployment.go b/pkg/deployment/deployment.go index 902dc60df..b6f1d9123 100644 --- a/pkg/deployment/deployment.go +++ b/pkg/deployment/deployment.go @@ -50,6 +50,7 @@ import ( type Config struct { ServiceAccount string AllowChaos bool + LifecycleImage string } // Dependencies holds dependent services for a Deployment diff --git a/pkg/deployment/images.go b/pkg/deployment/images.go index 368401868..c95a9ab5c 100644 --- a/pkg/deployment/images.go +++ b/pkg/deployment/images.go @@ -166,7 +166,7 @@ func (ib *imagesBuilder) fetchArangoDBImageIDAndVersion(ctx context.Context, ima "--server.authentication=false", fmt.Sprintf("--server.endpoint=tcp://[::]:%d", k8sutil.ArangoPort), } - if err := k8sutil.CreateArangodPod(ib.KubeCli, true, ib.APIObject, role, id, podName, "", image, ib.Spec.GetImagePullPolicy(), "", false, args, nil, nil, nil, nil, "", ""); err != nil { + if err := k8sutil.CreateArangodPod(ib.KubeCli, true, ib.APIObject, role, id, podName, "", image, "", ib.Spec.GetImagePullPolicy(), "", false, args, nil, nil, nil, nil, "", ""); err != nil { log.Debug().Err(err).Msg("Failed to create image ID pod") return true, maskAny(err) } diff --git a/pkg/deployment/reconcile/plan_builder.go b/pkg/deployment/reconcile/plan_builder.go index a3c51ff34..b28b032f6 100644 --- a/pkg/deployment/reconcile/plan_builder.go +++ b/pkg/deployment/reconcile/plan_builder.go @@ -33,6 +33,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1alpha" + "github.com/arangodb/kube-arangodb/pkg/util/k8sutil" ) // upgradeDecision is the result of an upgrade check. @@ -204,8 +205,7 @@ func createPlan(log zerolog.Logger, apiObject metav1.Object, // podNeedsUpgrading decides if an upgrade of the pod is needed (to comply with // the given spec) and if that is allowed. func podNeedsUpgrading(p v1.Pod, spec api.DeploymentSpec, images api.ImageInfoList) upgradeDecision { - if len(p.Spec.Containers) == 1 { - c := p.Spec.Containers[0] + if c, found := k8sutil.GetContainerByName(&p, k8sutil.ServerContainerName); found { specImageInfo, found := images.GetByImage(spec.GetImage()) if !found { return upgradeDecision{UpgradeNeeded: false} @@ -249,14 +249,13 @@ func podNeedsUpgrading(p v1.Pod, spec api.DeploymentSpec, images api.ImageInfoLi // When true is returned, a reason for the rotation is already returned. func podNeedsRotation(p v1.Pod, apiObject metav1.Object, spec api.DeploymentSpec, group api.ServerGroup, agents api.MemberStatusList, id string) (bool, string) { - // Check number of containers - if len(p.Spec.Containers) != 1 { - return true, "Number of containers changed" - } // Check image pull policy - c := p.Spec.Containers[0] - if c.ImagePullPolicy != spec.GetImagePullPolicy() { - return true, "Image pull policy changed" + if c, found := k8sutil.GetContainerByName(&p, k8sutil.ServerContainerName); found { + if c.ImagePullPolicy != spec.GetImagePullPolicy() { + return true, "Image pull policy changed" + } + } else { + return true, "Server container not found" } // Check arguments /*expectedArgs := createArangodArgs(apiObject, spec, group, agents, id) diff --git a/pkg/deployment/resources/context.go b/pkg/deployment/resources/context.go index 3bea8c3de..8bc827930 100644 --- a/pkg/deployment/resources/context.go +++ b/pkg/deployment/resources/context.go @@ -58,6 +58,8 @@ type Context interface { UpdateStatus(status api.DeploymentStatus, force ...bool) error // GetKubeCli returns the kubernetes client GetKubeCli() kubernetes.Interface + // GetLifecycleImage returns the image name containing the lifecycle helper (== name of operator image) + GetLifecycleImage() string // GetNamespace returns the namespace that contains the deployment GetNamespace() string // createEvent creates a given event. diff --git a/pkg/deployment/resources/pod_creator.go b/pkg/deployment/resources/pod_creator.go index ddcbe5b26..f5b89f16e 100644 --- a/pkg/deployment/resources/pod_creator.go +++ b/pkg/deployment/resources/pod_creator.go @@ -321,6 +321,7 @@ func (r *Resources) createPodForMember(spec api.DeploymentSpec, group api.Server apiObject := r.context.GetAPIObject() ns := r.context.GetNamespace() status := r.context.GetStatus() + lifecycleImage := r.context.GetLifecycleImage() // Update pod name role := group.AsRole() @@ -379,7 +380,7 @@ func (r *Resources) createPodForMember(spec api.DeploymentSpec, group api.Server engine := spec.GetStorageEngine().AsArangoArgument() requireUUID := group == api.ServerGroupDBServers && m.IsInitialized finalizers := r.createPodFinalizers(group) - if err := k8sutil.CreateArangodPod(kubecli, spec.IsDevelopment(), apiObject, role, m.ID, m.PodName, m.PersistentVolumeClaimName, info.ImageID, spec.GetImagePullPolicy(), + if err := k8sutil.CreateArangodPod(kubecli, spec.IsDevelopment(), apiObject, role, m.ID, m.PodName, m.PersistentVolumeClaimName, info.ImageID, lifecycleImage, spec.GetImagePullPolicy(), engine, requireUUID, args, env, finalizers, livenessProbe, readinessProbe, tlsKeyfileSecretName, rocksdbEncryptionSecretName); err != nil { return maskAny(err) } @@ -402,7 +403,7 @@ func (r *Resources) createPodForMember(spec api.DeploymentSpec, group api.Server if group == api.ServerGroupSyncWorkers { affinityWithRole = api.ServerGroupDBServers.AsRole() } - if err := k8sutil.CreateArangoSyncPod(kubecli, spec.IsDevelopment(), apiObject, role, m.ID, m.PodName, info.ImageID, spec.Sync.GetImagePullPolicy(), args, env, livenessProbe, affinityWithRole); err != nil { + if err := k8sutil.CreateArangoSyncPod(kubecli, spec.IsDevelopment(), apiObject, role, m.ID, m.PodName, info.ImageID, lifecycleImage, spec.Sync.GetImagePullPolicy(), args, env, livenessProbe, affinityWithRole); err != nil { return maskAny(err) } log.Debug().Str("pod-name", m.PodName).Msg("Created pod") diff --git a/pkg/operator/operator.go b/pkg/operator/operator.go index 5fdeb85a9..fa0d75bd4 100644 --- a/pkg/operator/operator.go +++ b/pkg/operator/operator.go @@ -66,6 +66,7 @@ type Config struct { Namespace string PodName string ServiceAccount string + LifecycleImage string EnableDeployment bool EnableStorage bool AllowChaos bool diff --git a/pkg/operator/operator_deployment.go b/pkg/operator/operator_deployment.go index d62d32090..62cd54182 100644 --- a/pkg/operator/operator_deployment.go +++ b/pkg/operator/operator_deployment.go @@ -203,6 +203,7 @@ func (o *Operator) handleDeploymentEvent(event *Event) error { func (o *Operator) makeDeploymentConfigAndDeps(apiObject *api.ArangoDeployment) (deployment.Config, deployment.Dependencies) { cfg := deployment.Config{ ServiceAccount: o.Config.ServiceAccount, + LifecycleImage: o.Config.LifecycleImage, AllowChaos: o.Config.AllowChaos, } deps := deployment.Dependencies{ diff --git a/pkg/util/k8sutil/container.go b/pkg/util/k8sutil/container.go new file mode 100644 index 000000000..b018eff97 --- /dev/null +++ b/pkg/util/k8sutil/container.go @@ -0,0 +1,38 @@ +// +// DISCLAIMER +// +// Copyright 2018 ArangoDB GmbH, Cologne, Germany +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright holder is ArangoDB GmbH, Cologne, Germany +// +// Author Ewout Prangsma +// + +package k8sutil + +import ( + "k8s.io/api/core/v1" +) + +// GetContainerByName returns the container in the given pod with the given name. +// Returns false if not found. +func GetContainerByName(p *v1.Pod, name string) (v1.Container, bool) { + for _, c := range p.Spec.Containers { + if c.Name == name { + return c, true + } + } + return v1.Container{}, false +} diff --git a/pkg/util/k8sutil/images.go b/pkg/util/k8sutil/images.go new file mode 100644 index 000000000..f5f2327b6 --- /dev/null +++ b/pkg/util/k8sutil/images.go @@ -0,0 +1,38 @@ +// +// DISCLAIMER +// +// Copyright 2018 ArangoDB GmbH, Cologne, Germany +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright holder is ArangoDB GmbH, Cologne, Germany +// +// Author Ewout Prangsma +// + +package k8sutil + +import "strings" + +const ( + dockerPullableImageIDPrefix = "docker-pullable://" +) + +// ConvertImageID2Image converts a ImageID from a ContainerStatus to an Image that can be used +// in a Container specification. +func ConvertImageID2Image(imageID string) string { + if strings.HasPrefix(imageID, dockerPullableImageIDPrefix) { + return imageID[len(dockerPullableImageIDPrefix):] + } + return imageID +} diff --git a/pkg/util/k8sutil/pods.go b/pkg/util/k8sutil/pods.go index 8f692ddd7..0735b4a99 100644 --- a/pkg/util/k8sutil/pods.go +++ b/pkg/util/k8sutil/pods.go @@ -24,23 +24,30 @@ package k8sutil import ( "fmt" + "os" "path/filepath" "strings" "time" + "github.com/arangodb/kube-arangodb/pkg/util/constants" "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/kubernetes" ) const ( + InitDataContainerName = "init-data" + InitLifecycleContainerName = "init-lifecycle" + ServerContainerName = "server" alpineImage = "alpine" arangodVolumeName = "arangod-data" tlsKeyfileVolumeName = "tls-keyfile" + lifecycleVolumeName = "lifecycle" rocksdbEncryptionVolumeName = "rocksdb-encryption" ArangodVolumeMountDir = "/data" RocksDBEncryptionVolumeMountDir = "/secrets/rocksdb/encryption" TLSKeyfileVolumeMountDir = "/secrets/tls" + LifecycleVolumeMountDir = "/lifecycle/tools" ) // EnvValue is a helper structure for environment variable sources. @@ -147,6 +154,13 @@ func CreateTLSKeyfileSecretName(deploymentName, role, id string) string { return CreatePodName(deploymentName, role, id, "-tls-keyfile") } +// lifecycleVolumeMounts creates a volume mount structure for shared lifecycle emptyDir. +func lifecycleVolumeMounts() []v1.VolumeMount { + return []v1.VolumeMount{ + {Name: lifecycleVolumeName, MountPath: LifecycleVolumeMountDir}, + } +} + // arangodVolumeMounts creates a volume mount structure for arangod. func arangodVolumeMounts() []v1.VolumeMount { return []v1.VolumeMount{ @@ -207,12 +221,14 @@ func arangodInitContainer(name, id, engine string, requireUUID bool) v1.Containe } // arangodContainer creates a container configured to run `arangod`. -func arangodContainer(name string, image string, imagePullPolicy v1.PullPolicy, args []string, env map[string]EnvValue, livenessProbe *HTTPProbeConfig, readinessProbe *HTTPProbeConfig) v1.Container { +func arangodContainer(image string, imagePullPolicy v1.PullPolicy, args []string, env map[string]EnvValue, livenessProbe *HTTPProbeConfig, readinessProbe *HTTPProbeConfig, + lifecycle *v1.Lifecycle, lifecycleEnvVars []v1.EnvVar) v1.Container { c := v1.Container{ Command: append([]string{"/usr/sbin/arangod"}, args...), - Name: name, + Name: ServerContainerName, Image: image, ImagePullPolicy: imagePullPolicy, + Lifecycle: lifecycle, Ports: []v1.ContainerPort{ { Name: "server", @@ -231,17 +247,23 @@ func arangodContainer(name string, image string, imagePullPolicy v1.PullPolicy, if readinessProbe != nil { c.ReadinessProbe = readinessProbe.Create() } + if lifecycle != nil { + c.Env = append(c.Env, lifecycleEnvVars...) + c.VolumeMounts = append(c.VolumeMounts, lifecycleVolumeMounts()...) + } return c } // arangosyncContainer creates a container configured to run `arangosync`. -func arangosyncContainer(name string, image string, imagePullPolicy v1.PullPolicy, args []string, env map[string]EnvValue, livenessProbe *HTTPProbeConfig) v1.Container { +func arangosyncContainer(image string, imagePullPolicy v1.PullPolicy, args []string, env map[string]EnvValue, livenessProbe *HTTPProbeConfig, + lifecycle *v1.Lifecycle, lifecycleEnvVars []v1.EnvVar) v1.Container { c := v1.Container{ Command: append([]string{"/usr/sbin/arangosync"}, args...), - Name: name, + Name: ServerContainerName, Image: image, ImagePullPolicy: imagePullPolicy, + Lifecycle: lifecycle, Ports: []v1.ContainerPort{ { Name: "server", @@ -256,10 +278,74 @@ func arangosyncContainer(name string, image string, imagePullPolicy v1.PullPolic if livenessProbe != nil { c.LivenessProbe = livenessProbe.Create() } + if lifecycle != nil { + c.Env = append(c.Env, lifecycleEnvVars...) + c.VolumeMounts = append(c.VolumeMounts, lifecycleVolumeMounts()...) + } return c } +// newLifecycle creates a lifecycle structure with preStop handler. +func newLifecycle() (*v1.Lifecycle, []v1.EnvVar, []v1.Volume, error) { + binaryPath, err := os.Executable() + if err != nil { + return nil, nil, nil, maskAny(err) + } + exePath := filepath.Join(LifecycleVolumeMountDir, filepath.Base(binaryPath)) + lifecycle := &v1.Lifecycle{ + PreStop: &v1.Handler{ + Exec: &v1.ExecAction{ + Command: append([]string{exePath}, "lifecycle", "preStop"), + }, + }, + } + envVars := []v1.EnvVar{ + v1.EnvVar{ + Name: constants.EnvOperatorPodName, + ValueFrom: &v1.EnvVarSource{ + FieldRef: &v1.ObjectFieldSelector{ + FieldPath: "metadata.name", + }, + }, + }, + v1.EnvVar{ + Name: constants.EnvOperatorPodNamespace, + ValueFrom: &v1.EnvVarSource{ + FieldRef: &v1.ObjectFieldSelector{ + FieldPath: "metadata.namespace", + }, + }, + }, + } + vols := []v1.Volume{ + v1.Volume{ + Name: lifecycleVolumeName, + VolumeSource: v1.VolumeSource{ + EmptyDir: &v1.EmptyDirVolumeSource{}, + }, + }, + } + return lifecycle, envVars, vols, nil +} + +// initLifecycleContainer creates an init-container to copy the lifecycle binary +// to a shared volume. +func initLifecycleContainer(image string) (v1.Container, error) { + binaryPath, err := os.Executable() + if err != nil { + return v1.Container{}, maskAny(err) + } + c := v1.Container{ + Command: append([]string{binaryPath}, "lifecycle", "copy", "--target", LifecycleVolumeMountDir), + Name: InitLifecycleContainerName, + Image: image, + ImagePullPolicy: v1.PullIfNotPresent, + VolumeMounts: lifecycleVolumeMounts(), + } + return c, nil +} + // newPod creates a basic Pod for given settings. func newPod(deploymentName, ns, role, id, podName string, finalizers []string) v1.Pod { hostname := CreatePodHostName(deploymentName, role, id) @@ -282,7 +368,7 @@ func newPod(deploymentName, ns, role, id, podName string, finalizers []string) v // If the pod already exists, nil is returned. // If another error occurs, that error is returned. func CreateArangodPod(kubecli kubernetes.Interface, developmentMode bool, deployment APIObject, - role, id, podName, pvcName, image string, imagePullPolicy v1.PullPolicy, + role, id, podName, pvcName, image, lifecycleImage string, imagePullPolicy v1.PullPolicy, engine string, requireUUID bool, args []string, env map[string]EnvValue, finalizers []string, livenessProbe *HTTPProbeConfig, readinessProbe *HTTPProbeConfig, @@ -290,8 +376,24 @@ func CreateArangodPod(kubecli kubernetes.Interface, developmentMode bool, deploy // Prepare basic pod p := newPod(deployment.GetName(), deployment.GetNamespace(), role, id, podName, finalizers) + // Add lifecycle container + var lifecycle *v1.Lifecycle + var lifecycleEnvVars []v1.EnvVar + var lifecycleVolumes []v1.Volume + if lifecycleImage != "" { + c, err := initLifecycleContainer(lifecycleImage) + if err != nil { + return maskAny(err) + } + p.Spec.InitContainers = append(p.Spec.InitContainers, c) + lifecycle, lifecycleEnvVars, lifecycleVolumes, err = newLifecycle() + if err != nil { + return maskAny(err) + } + } + // Add arangod container - c := arangodContainer("arangod", image, imagePullPolicy, args, env, livenessProbe, readinessProbe) + c := arangodContainer(image, imagePullPolicy, args, env, livenessProbe, readinessProbe, lifecycle, lifecycleEnvVars) if tlsKeyfileSecretName != "" { c.VolumeMounts = append(c.VolumeMounts, tlsKeyfileVolumeMounts()...) } @@ -352,6 +454,9 @@ func CreateArangodPod(kubecli kubernetes.Interface, developmentMode bool, deploy p.Spec.Volumes = append(p.Spec.Volumes, vol) } + // Lifecycle volumes (if any) + p.Spec.Volumes = append(p.Spec.Volumes, lifecycleVolumes...) + // Add (anti-)affinity p.Spec.Affinity = createAffinity(deployment.GetName(), role, !developmentMode, "") @@ -364,15 +469,34 @@ func CreateArangodPod(kubecli kubernetes.Interface, developmentMode bool, deploy // CreateArangoSyncPod creates a Pod that runs `arangosync`. // If the pod already exists, nil is returned. // If another error occurs, that error is returned. -func CreateArangoSyncPod(kubecli kubernetes.Interface, developmentMode bool, deployment APIObject, role, id, podName, image string, imagePullPolicy v1.PullPolicy, +func CreateArangoSyncPod(kubecli kubernetes.Interface, developmentMode bool, deployment APIObject, role, id, podName, image, lifecycleImage string, imagePullPolicy v1.PullPolicy, args []string, env map[string]EnvValue, livenessProbe *HTTPProbeConfig, affinityWithRole string) error { // Prepare basic pod p := newPod(deployment.GetName(), deployment.GetNamespace(), role, id, podName, nil) + // Add lifecycle container + var lifecycle *v1.Lifecycle + var lifecycleEnvVars []v1.EnvVar + var lifecycleVolumes []v1.Volume + if lifecycleImage != "" { + c, err := initLifecycleContainer(lifecycleImage) + if err != nil { + return maskAny(err) + } + p.Spec.InitContainers = append(p.Spec.InitContainers, c) + lifecycle, lifecycleEnvVars, lifecycleVolumes, err = newLifecycle() + if err != nil { + return maskAny(err) + } + } + // Add arangosync container - c := arangosyncContainer("arangosync", image, imagePullPolicy, args, env, livenessProbe) + c := arangosyncContainer(image, imagePullPolicy, args, env, livenessProbe, lifecycle, lifecycleEnvVars) p.Spec.Containers = append(p.Spec.Containers, c) + // Lifecycle volumes (if any) + p.Spec.Volumes = append(p.Spec.Volumes, lifecycleVolumes...) + // Add (anti-)affinity p.Spec.Affinity = createAffinity(deployment.GetName(), role, !developmentMode, affinityWithRole) From 29db9bd9e5c85828ee76b8f1312d3f3f99b186b3 Mon Sep 17 00:00:00 2001 From: Ewout Prangsma Date: Fri, 11 May 2018 15:26:19 +0200 Subject: [PATCH 03/11] Set (increase) default termination grace period for pods --- pkg/apis/deployment/v1alpha/server_group.go | 16 ++++++++++++++++ pkg/deployment/images.go | 11 +++++------ pkg/deployment/resources/pod_creator.go | 3 ++- pkg/util/k8sutil/pods.go | 5 ++++- 4 files changed, 27 insertions(+), 8 deletions(-) diff --git a/pkg/apis/deployment/v1alpha/server_group.go b/pkg/apis/deployment/v1alpha/server_group.go index 8d8725693..894ba0b1e 100644 --- a/pkg/apis/deployment/v1alpha/server_group.go +++ b/pkg/apis/deployment/v1alpha/server_group.go @@ -22,6 +22,8 @@ package v1alpha +import time "time" + type ServerGroup int const ( @@ -85,6 +87,20 @@ func (g ServerGroup) AsRoleAbbreviated() string { } } +// DefaultTerminationGracePeriod returns the default period between SIGTERM & SIGKILL for a server in the given group. +func (g ServerGroup) DefaultTerminationGracePeriod() time.Duration { + switch g { + case ServerGroupSingle: + return time.Minute + case ServerGroupAgents: + return time.Minute + case ServerGroupDBServers: + return time.Hour + default: + return time.Second * 30 + } +} + // IsArangod returns true when the groups runs servers of type `arangod`. func (g ServerGroup) IsArangod() bool { switch g { diff --git a/pkg/deployment/images.go b/pkg/deployment/images.go index c95a9ab5c..084d5906b 100644 --- a/pkg/deployment/images.go +++ b/pkg/deployment/images.go @@ -26,7 +26,7 @@ import ( "context" "crypto/sha1" "fmt" - "strings" + "time" "github.com/rs/zerolog" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -117,10 +117,8 @@ func (ib *imagesBuilder) fetchArangoDBImageIDAndVersion(ctx context.Context, ima log.Warn().Msg("Empty list of ContainerStatuses") return true, nil } - imageID := pod.Status.ContainerStatuses[0].ImageID - if strings.HasPrefix(imageID, dockerPullableImageIDPrefix) { - imageID = imageID[len(dockerPullableImageIDPrefix):] - } else if imageID == "" { + imageID := k8sutil.ConvertImageID2Image(pod.Status.ContainerStatuses[0].ImageID) + if imageID == "" { // Fall back to specified image imageID = image } @@ -166,7 +164,8 @@ func (ib *imagesBuilder) fetchArangoDBImageIDAndVersion(ctx context.Context, ima "--server.authentication=false", fmt.Sprintf("--server.endpoint=tcp://[::]:%d", k8sutil.ArangoPort), } - if err := k8sutil.CreateArangodPod(ib.KubeCli, true, ib.APIObject, role, id, podName, "", image, "", ib.Spec.GetImagePullPolicy(), "", false, args, nil, nil, nil, nil, "", ""); err != nil { + terminationGracePeriod := time.Second * 30 + if err := k8sutil.CreateArangodPod(ib.KubeCli, true, ib.APIObject, role, id, podName, "", image, "", ib.Spec.GetImagePullPolicy(), "", false, terminationGracePeriod, args, nil, nil, nil, nil, "", ""); err != nil { log.Debug().Err(err).Msg("Failed to create image ID pod") return true, maskAny(err) } diff --git a/pkg/deployment/resources/pod_creator.go b/pkg/deployment/resources/pod_creator.go index f5b89f16e..6ac1b92ec 100644 --- a/pkg/deployment/resources/pod_creator.go +++ b/pkg/deployment/resources/pod_creator.go @@ -379,9 +379,10 @@ func (r *Resources) createPodForMember(spec api.DeploymentSpec, group api.Server } engine := spec.GetStorageEngine().AsArangoArgument() requireUUID := group == api.ServerGroupDBServers && m.IsInitialized + terminationGracePeriod := group.DefaultTerminationGracePeriod() finalizers := r.createPodFinalizers(group) if err := k8sutil.CreateArangodPod(kubecli, spec.IsDevelopment(), apiObject, role, m.ID, m.PodName, m.PersistentVolumeClaimName, info.ImageID, lifecycleImage, spec.GetImagePullPolicy(), - engine, requireUUID, args, env, finalizers, livenessProbe, readinessProbe, tlsKeyfileSecretName, rocksdbEncryptionSecretName); err != nil { + engine, requireUUID, terminationGracePeriod, args, env, finalizers, livenessProbe, readinessProbe, tlsKeyfileSecretName, rocksdbEncryptionSecretName); err != nil { return maskAny(err) } log.Debug().Str("pod-name", m.PodName).Msg("Created pod") diff --git a/pkg/util/k8sutil/pods.go b/pkg/util/k8sutil/pods.go index 0735b4a99..9816ad72b 100644 --- a/pkg/util/k8sutil/pods.go +++ b/pkg/util/k8sutil/pods.go @@ -24,6 +24,7 @@ package k8sutil import ( "fmt" + "math" "os" "path/filepath" "strings" @@ -369,12 +370,14 @@ func newPod(deploymentName, ns, role, id, podName string, finalizers []string) v // If another error occurs, that error is returned. func CreateArangodPod(kubecli kubernetes.Interface, developmentMode bool, deployment APIObject, role, id, podName, pvcName, image, lifecycleImage string, imagePullPolicy v1.PullPolicy, - engine string, requireUUID bool, + engine string, requireUUID bool, terminationGracePeriod time.Duration, args []string, env map[string]EnvValue, finalizers []string, livenessProbe *HTTPProbeConfig, readinessProbe *HTTPProbeConfig, tlsKeyfileSecretName, rocksdbEncryptionSecretName string) error { // Prepare basic pod p := newPod(deployment.GetName(), deployment.GetNamespace(), role, id, podName, finalizers) + terminationGracePeriodSeconds := int64(math.Ceil(terminationGracePeriod.Seconds())) + p.Spec.TerminationGracePeriodSeconds = &terminationGracePeriodSeconds // Add lifecycle container var lifecycle *v1.Lifecycle From 264c05efface0e4c9303e860d811b144e66f3e14 Mon Sep 17 00:00:00 2001 From: Ewout Prangsma Date: Fri, 11 May 2018 15:29:23 +0200 Subject: [PATCH 04/11] Set termination grace period also on arangosync --- pkg/deployment/resources/pod_creator.go | 4 ++-- pkg/util/k8sutil/pods.go | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/pkg/deployment/resources/pod_creator.go b/pkg/deployment/resources/pod_creator.go index 6ac1b92ec..004071846 100644 --- a/pkg/deployment/resources/pod_creator.go +++ b/pkg/deployment/resources/pod_creator.go @@ -322,6 +322,7 @@ func (r *Resources) createPodForMember(spec api.DeploymentSpec, group api.Server ns := r.context.GetNamespace() status := r.context.GetStatus() lifecycleImage := r.context.GetLifecycleImage() + terminationGracePeriod := group.DefaultTerminationGracePeriod() // Update pod name role := group.AsRole() @@ -379,7 +380,6 @@ func (r *Resources) createPodForMember(spec api.DeploymentSpec, group api.Server } engine := spec.GetStorageEngine().AsArangoArgument() requireUUID := group == api.ServerGroupDBServers && m.IsInitialized - terminationGracePeriod := group.DefaultTerminationGracePeriod() finalizers := r.createPodFinalizers(group) if err := k8sutil.CreateArangodPod(kubecli, spec.IsDevelopment(), apiObject, role, m.ID, m.PodName, m.PersistentVolumeClaimName, info.ImageID, lifecycleImage, spec.GetImagePullPolicy(), engine, requireUUID, terminationGracePeriod, args, env, finalizers, livenessProbe, readinessProbe, tlsKeyfileSecretName, rocksdbEncryptionSecretName); err != nil { @@ -404,7 +404,7 @@ func (r *Resources) createPodForMember(spec api.DeploymentSpec, group api.Server if group == api.ServerGroupSyncWorkers { affinityWithRole = api.ServerGroupDBServers.AsRole() } - if err := k8sutil.CreateArangoSyncPod(kubecli, spec.IsDevelopment(), apiObject, role, m.ID, m.PodName, info.ImageID, lifecycleImage, spec.Sync.GetImagePullPolicy(), args, env, livenessProbe, affinityWithRole); err != nil { + if err := k8sutil.CreateArangoSyncPod(kubecli, spec.IsDevelopment(), apiObject, role, m.ID, m.PodName, info.ImageID, lifecycleImage, spec.Sync.GetImagePullPolicy(), terminationGracePeriod, args, env, livenessProbe, affinityWithRole); err != nil { return maskAny(err) } log.Debug().Str("pod-name", m.PodName).Msg("Created pod") diff --git a/pkg/util/k8sutil/pods.go b/pkg/util/k8sutil/pods.go index 9816ad72b..a844abb0c 100644 --- a/pkg/util/k8sutil/pods.go +++ b/pkg/util/k8sutil/pods.go @@ -473,9 +473,11 @@ func CreateArangodPod(kubecli kubernetes.Interface, developmentMode bool, deploy // If the pod already exists, nil is returned. // If another error occurs, that error is returned. func CreateArangoSyncPod(kubecli kubernetes.Interface, developmentMode bool, deployment APIObject, role, id, podName, image, lifecycleImage string, imagePullPolicy v1.PullPolicy, - args []string, env map[string]EnvValue, livenessProbe *HTTPProbeConfig, affinityWithRole string) error { + terminationGracePeriod time.Duration, args []string, env map[string]EnvValue, livenessProbe *HTTPProbeConfig, affinityWithRole string) error { // Prepare basic pod p := newPod(deployment.GetName(), deployment.GetNamespace(), role, id, podName, nil) + terminationGracePeriodSeconds := int64(math.Ceil(terminationGracePeriod.Seconds())) + p.Spec.TerminationGracePeriodSeconds = &terminationGracePeriodSeconds // Add lifecycle container var lifecycle *v1.Lifecycle From e2bdb9f2e579465774b9e3096d36f7481f4b2485 Mon Sep 17 00:00:00 2001 From: Ewout Prangsma Date: Fri, 11 May 2018 16:56:20 +0200 Subject: [PATCH 05/11] Prevent deleting PVCs while their member still exists --- docs/design/lifecycle_hooks.md | 15 ++-- .../v1alpha/deployment_status_members.go | 16 ++++ .../deployment/v1alpha/member_status_list.go | 11 +++ pkg/deployment/context_impl.go | 18 +++++ pkg/deployment/deployment_inspector.go | 4 + pkg/deployment/resources/context.go | 2 + pkg/deployment/resources/pod_creator.go | 2 +- pkg/deployment/resources/pod_finalizers.go | 21 ++++- pkg/deployment/resources/pvc_finalizers.go | 78 ++++++++++++++++++ pkg/deployment/resources/pvc_inspector.go | 80 +++++++++++++++++++ pkg/deployment/resources/pvcs.go | 9 ++- pkg/util/constants/constants.go | 3 +- pkg/util/k8sutil/finalizers.go | 25 ++++++ pkg/util/k8sutil/pvc.go | 12 ++- 14 files changed, 279 insertions(+), 17 deletions(-) create mode 100644 pkg/deployment/resources/pvc_finalizers.go create mode 100644 pkg/deployment/resources/pvc_inspector.go diff --git a/docs/design/lifecycle_hooks.md b/docs/design/lifecycle_hooks.md index 030a00134..58060f3ae 100644 --- a/docs/design/lifecycle_hooks.md +++ b/docs/design/lifecycle_hooks.md @@ -1,11 +1,12 @@ # Lifecycle hooks -The ArangoDB operator expects full control of the `Pods` is creates. -Therefore it takes measures to prevent the removal of those `Pods` -until it is say to do so. +The ArangoDB operator expects full control of the `Pods` and `PersistentVolumeClaims` it creates. +Therefore it takes measures to prevent the removal of those resources +until it is safe to do so. To achieve this, the server containers in the `Pods` have -a `preStop` hook configured and finalizers are added to the `Pods`. +a `preStop` hook configured and finalizers are added to the `Pods` +ands `PersistentVolumeClaims`. The `preStop` hook executes a binary that waits until all finalizers of the current pod have been removed. @@ -13,9 +14,9 @@ Until this `preStop` hook terminates, Kubernetes will not send a `TERM` signal to the processes inside the container, which ensures that the server remains running until it is safe to stop them. -The operator performs all actions needed when a delete of a `Pod` have been -triggered. E.g. for a dbserver it cleans out the server before it removes -the finalizers. +The operator performs all actions needed when a delete of a `Pod` or +`PersistentVolumeClaims` has been triggered. +E.g. for a dbserver it cleans out the server if the `Pod` and `PersistentVolumeClaim` are being deleted. ## Lifecycle init-container diff --git a/pkg/apis/deployment/v1alpha/deployment_status_members.go b/pkg/apis/deployment/v1alpha/deployment_status_members.go index b13d9a932..0b8184075 100644 --- a/pkg/apis/deployment/v1alpha/deployment_status_members.go +++ b/pkg/apis/deployment/v1alpha/deployment_status_members.go @@ -121,6 +121,22 @@ func (ds DeploymentStatusMembers) MemberStatusByPodName(podName string) (MemberS return MemberStatus{}, 0, false } +// MemberStatusByPVCName returns a reference to the element in the given set of lists that has the given PVC name. +// If no such element exists, nil is returned. +func (ds DeploymentStatusMembers) MemberStatusByPVCName(pvcName string) (MemberStatus, ServerGroup, bool) { + if result, found := ds.Single.ElementByPVCName(pvcName); found { + return result, ServerGroupSingle, true + } + if result, found := ds.Agents.ElementByPVCName(pvcName); found { + return result, ServerGroupAgents, true + } + if result, found := ds.DBServers.ElementByPVCName(pvcName); found { + return result, ServerGroupDBServers, true + } + // Note: Other server groups do not have PVC's so we can skip them. + return MemberStatus{}, 0, false +} + // UpdateMemberStatus updates the given status in the given group. func (ds *DeploymentStatusMembers) UpdateMemberStatus(status MemberStatus, group ServerGroup) error { var err error diff --git a/pkg/apis/deployment/v1alpha/member_status_list.go b/pkg/apis/deployment/v1alpha/member_status_list.go index fff418e25..18b38165a 100644 --- a/pkg/apis/deployment/v1alpha/member_status_list.go +++ b/pkg/apis/deployment/v1alpha/member_status_list.go @@ -63,6 +63,17 @@ func (l MemberStatusList) ElementByPodName(podName string) (MemberStatus, bool) return MemberStatus{}, false } +// ElementByPVCName returns the element in the given list that has the given PVC name and true. +// If no such element exists, an empty element and false is returned. +func (l MemberStatusList) ElementByPVCName(pvcName string) (MemberStatus, bool) { + for i, x := range l { + if x.PersistentVolumeClaimName == pvcName { + return l[i], true + } + } + return MemberStatus{}, false +} + // Add a member to the list. // Returns an AlreadyExistsError if the ID of the given member already exists. func (l *MemberStatusList) Add(m MemberStatus) error { diff --git a/pkg/deployment/context_impl.go b/pkg/deployment/context_impl.go index 81ffed117..f3e56ad16 100644 --- a/pkg/deployment/context_impl.go +++ b/pkg/deployment/context_impl.go @@ -196,6 +196,24 @@ func (d *Deployment) GetOwnedPods() ([]v1.Pod, error) { return myPods, nil } +// GetOwnedPVCs returns a list of all PVCs owned by the deployment. +func (d *Deployment) GetOwnedPVCs() ([]v1.PersistentVolumeClaim, error) { + // Get all current PVCs + log := d.deps.Log + pvcs, err := d.deps.KubeCli.CoreV1().PersistentVolumeClaims(d.apiObject.GetNamespace()).List(k8sutil.DeploymentListOpt(d.apiObject.GetName())) + if err != nil { + log.Debug().Err(err).Msg("Failed to list PVCs") + return nil, maskAny(err) + } + myPVCs := make([]v1.PersistentVolumeClaim, 0, len(pvcs.Items)) + for _, p := range pvcs.Items { + if d.isOwnerOf(&p) { + myPVCs = append(myPVCs, p) + } + } + return myPVCs, nil +} + // GetTLSKeyfile returns the keyfile encoded TLS certificate+key for // the given member. func (d *Deployment) GetTLSKeyfile(group api.ServerGroup, member api.MemberStatus) (string, error) { diff --git a/pkg/deployment/deployment_inspector.go b/pkg/deployment/deployment_inspector.go index 7bf75de0d..e4c5132be 100644 --- a/pkg/deployment/deployment_inspector.go +++ b/pkg/deployment/deployment_inspector.go @@ -84,6 +84,10 @@ func (d *Deployment) inspectDeployment(lastInterval time.Duration) time.Duration hasError = true d.CreateEvent(k8sutil.NewErrorEvent("Pod inspection failed", err, d.apiObject)) } + if err := d.resources.InspectPVCs(ctx); err != nil { + hasError = true + d.CreateEvent(k8sutil.NewErrorEvent("PVC inspection failed", err, d.apiObject)) + } // Check members for resilience if err := d.resilience.CheckMemberFailure(); err != nil { diff --git a/pkg/deployment/resources/context.go b/pkg/deployment/resources/context.go index 8bc827930..42ca317ca 100644 --- a/pkg/deployment/resources/context.go +++ b/pkg/deployment/resources/context.go @@ -67,6 +67,8 @@ type Context interface { CreateEvent(evt *v1.Event) // GetOwnedPods returns a list of all pods owned by the deployment. GetOwnedPods() ([]v1.Pod, error) + // GetOwnedPVCs returns a list of all PVCs owned by the deployment. + GetOwnedPVCs() ([]v1.PersistentVolumeClaim, error) // CleanupPod deletes a given pod with force and explicit UID. // If the pod does not exist, the error is ignored. CleanupPod(p v1.Pod) error diff --git a/pkg/deployment/resources/pod_creator.go b/pkg/deployment/resources/pod_creator.go index 004071846..44d2ff1c3 100644 --- a/pkg/deployment/resources/pod_creator.go +++ b/pkg/deployment/resources/pod_creator.go @@ -307,7 +307,7 @@ func (r *Resources) createReadinessProbe(spec api.DeploymentSpec, group api.Serv func (r *Resources) createPodFinalizers(group api.ServerGroup) []string { switch group { case api.ServerGroupDBServers: - return []string{constants.FinalizerDrainDBServer} + return []string{constants.FinalizerPodDrainDBServer} default: return nil } diff --git a/pkg/deployment/resources/pod_finalizers.go b/pkg/deployment/resources/pod_finalizers.go index bdae9cc25..69d98f4ef 100644 --- a/pkg/deployment/resources/pod_finalizers.go +++ b/pkg/deployment/resources/pod_finalizers.go @@ -28,6 +28,7 @@ import ( "github.com/rs/zerolog" "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1alpha" "github.com/arangodb/kube-arangodb/pkg/util/constants" @@ -40,9 +41,9 @@ func (r *Resources) runPodFinalizers(ctx context.Context, p *v1.Pod, memberStatu var removalList []string for _, f := range p.ObjectMeta.GetFinalizers() { switch f { - case constants.FinalizerDrainDBServer: + case constants.FinalizerPodDrainDBServer: log.Debug().Msg("Inspecting drain dbserver finalizer") - if err := r.inspectFinalizerDrainDBServer(ctx, log, p, memberStatus); err == nil { + if err := r.inspectFinalizerPodDrainDBServer(ctx, log, p, memberStatus); err == nil { removalList = append(removalList, f) } else { log.Debug().Err(err).Str("finalizer", f).Msg("Cannot remove finalizer yet") @@ -60,9 +61,9 @@ func (r *Resources) runPodFinalizers(ctx context.Context, p *v1.Pod, memberStatu return nil } -// inspectFinalizerDrainDBServer checks the finalizer condition for drain-dbserver. +// inspectFinalizerPodDrainDBServer checks the finalizer condition for drain-dbserver. // It returns nil if the finalizer can be removed. -func (r *Resources) inspectFinalizerDrainDBServer(ctx context.Context, log zerolog.Logger, p *v1.Pod, memberStatus api.MemberStatus) error { +func (r *Resources) inspectFinalizerPodDrainDBServer(ctx context.Context, log zerolog.Logger, p *v1.Pod, memberStatus api.MemberStatus) error { // Inspect member phase if memberStatus.Phase.IsFailed() { log.Debug().Msg("Pod is already failed, safe to remove drain dbserver finalizer") @@ -74,6 +75,18 @@ func (r *Resources) inspectFinalizerDrainDBServer(ctx context.Context, log zerol log.Debug().Msg("Entire deployment is being deleted, safe to remove drain dbserver finalizer") return nil } + // Check PVC + pvcs := r.context.GetKubeCli().CoreV1().PersistentVolumeClaims(apiObject.GetNamespace()) + pvc, err := pvcs.Get(memberStatus.PersistentVolumeClaimName, metav1.GetOptions{}) + if err != nil { + log.Warn().Err(err).Msg("Failed to get PVC for member") + return maskAny(err) + } + if !k8sutil.IsPersistentVolumeClaimMarkedForDeletion(pvc) { + log.Debug().Msg("PVC is not being deleted, so it is safe to remove drain dbserver finalizer") + return nil + } + log.Debug().Msg("PVC is being deleted, so we will cleanout the dbserver first") // Inspect cleaned out state c, err := r.context.GetDatabaseClient(ctx) if err != nil { diff --git a/pkg/deployment/resources/pvc_finalizers.go b/pkg/deployment/resources/pvc_finalizers.go new file mode 100644 index 000000000..ff0cb7327 --- /dev/null +++ b/pkg/deployment/resources/pvc_finalizers.go @@ -0,0 +1,78 @@ +// +// DISCLAIMER +// +// Copyright 2018 ArangoDB GmbH, Cologne, Germany +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright holder is ArangoDB GmbH, Cologne, Germany +// +// Author Ewout Prangsma +// + +package resources + +import ( + "context" + "fmt" + + "github.com/rs/zerolog" + "k8s.io/api/core/v1" + + api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1alpha" + "github.com/arangodb/kube-arangodb/pkg/util/constants" + "github.com/arangodb/kube-arangodb/pkg/util/k8sutil" +) + +// runPVCFinalizers goes through the list of PVC finalizers to see if they can be removed. +func (r *Resources) runPVCFinalizers(ctx context.Context, p *v1.PersistentVolumeClaim, memberStatus api.MemberStatus) error { + log := r.log.With().Str("pvc-name", p.GetName()).Logger() + var removalList []string + for _, f := range p.ObjectMeta.GetFinalizers() { + switch f { + case constants.FinalizerPVCMemberExists: + log.Debug().Msg("Inspecting member exists finalizer") + if err := r.inspectFinalizerPVCMemberExists(ctx, log, p, memberStatus); err == nil { + removalList = append(removalList, f) + } else { + log.Debug().Err(err).Str("finalizer", f).Msg("Cannot remove finalizer yet") + } + } + } + // Remove finalizers (if needed) + if len(removalList) > 0 { + kubecli := r.context.GetKubeCli() + if err := k8sutil.RemovePVCFinalizers(log, kubecli, p, removalList); err != nil { + log.Debug().Err(err).Msg("Failed to update PVC (to remove finalizers)") + return maskAny(err) + } + } + return nil +} + +// inspectFinalizerPVCMemberExists checks the finalizer condition for member-exists. +// It returns nil if the finalizer can be removed. +func (r *Resources) inspectFinalizerPVCMemberExists(ctx context.Context, log zerolog.Logger, p *v1.PersistentVolumeClaim, memberStatus api.MemberStatus) error { + // Inspect member phase + if memberStatus.Phase.IsFailed() { + log.Debug().Msg("Member is already failed, safe to remove member-exists finalizer") + return nil + } + // Inspect deployment deletion state + apiObject := r.context.GetAPIObject() + if apiObject.GetDeletionTimestamp() != nil { + log.Debug().Msg("Entire deployment is being deleted, safe to remove member-exists finalizer") + return nil + } + return maskAny(fmt.Errorf("Member still exists")) +} diff --git a/pkg/deployment/resources/pvc_inspector.go b/pkg/deployment/resources/pvc_inspector.go new file mode 100644 index 000000000..959c7ebcd --- /dev/null +++ b/pkg/deployment/resources/pvc_inspector.go @@ -0,0 +1,80 @@ +// +// DISCLAIMER +// +// Copyright 2018 ArangoDB GmbH, Cologne, Germany +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright holder is ArangoDB GmbH, Cologne, Germany +// +// Author Ewout Prangsma +// + +package resources + +import ( + "context" + + "github.com/arangodb/kube-arangodb/pkg/metrics" + "github.com/arangodb/kube-arangodb/pkg/util/k8sutil" +) + +var ( + inspectedPVCCounter = metrics.MustRegisterCounter("deployment", "inspected_ppvcs", "Number of PVCs inspections") +) + +// InspectPVCs lists all PVCs that belong to the given deployment and updates +// the member status of the deployment accordingly. +func (r *Resources) InspectPVCs(ctx context.Context) error { + log := r.log + + pvcs, err := r.context.GetOwnedPVCs() + if err != nil { + log.Debug().Err(err).Msg("Failed to get owned PVCs") + return maskAny(err) + } + + // Update member status from all pods found + status := r.context.GetStatus() + for _, p := range pvcs { + // PVC belongs to this deployment, update metric + inspectedPVCCounter.Inc() + + // Find member status + memberStatus, _, found := status.Members.MemberStatusByPVCName(p.GetName()) + if !found { + log.Debug().Str("pvc", p.GetName()).Msg("no memberstatus found for PVC") + if k8sutil.IsPersistentVolumeClaimMarkedForDeletion(&p) && len(p.GetFinalizers()) > 0 { + // Strange, pvc belongs to us, but we have no member for it. + // Remove all finalizers, so it can be removed. + log.Warn().Msg("PVC belongs to this deployment, but we don't know the member. Removing all finalizers") + kubecli := r.context.GetKubeCli() + if err := k8sutil.RemovePVCFinalizers(log, kubecli, &p, p.GetFinalizers()); err != nil { + log.Debug().Err(err).Msg("Failed to update PVC (to remove all finalizers)") + return maskAny(err) + } + } + continue + } + + if k8sutil.IsPersistentVolumeClaimMarkedForDeletion(&p) { + // Process finalizers + if err := r.runPVCFinalizers(ctx, &p, memberStatus); err != nil { + // Only log here, since we'll be called to try again. + log.Warn().Err(err).Msg("Failed to run PVC finalizers") + } + } + } + + return nil +} diff --git a/pkg/deployment/resources/pvcs.go b/pkg/deployment/resources/pvcs.go index abb2b135b..72ddf243e 100644 --- a/pkg/deployment/resources/pvcs.go +++ b/pkg/deployment/resources/pvcs.go @@ -25,9 +25,15 @@ package resources import ( api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1alpha" + "github.com/arangodb/kube-arangodb/pkg/util/constants" "github.com/arangodb/kube-arangodb/pkg/util/k8sutil" ) +// createPVCFinalizers creates a list of finalizers for a PVC created for the given group. +func (r *Resources) createPVCFinalizers(group api.ServerGroup) []string { + return []string{constants.FinalizerPVCMemberExists} +} + // EnsurePVCs creates all PVC's listed in member status func (r *Resources) EnsurePVCs() error { kubecli := r.context.GetKubeCli() @@ -44,7 +50,8 @@ func (r *Resources) EnsurePVCs() error { storageClassName := spec.GetStorageClassName() role := group.AsRole() resources := spec.Resources - if err := k8sutil.CreatePersistentVolumeClaim(kubecli, m.PersistentVolumeClaimName, deploymentName, ns, storageClassName, role, resources, owner); err != nil { + finalizers := r.createPVCFinalizers(group) + if err := k8sutil.CreatePersistentVolumeClaim(kubecli, m.PersistentVolumeClaimName, deploymentName, ns, storageClassName, role, resources, finalizers, owner); err != nil { return maskAny(err) } } diff --git a/pkg/util/constants/constants.go b/pkg/util/constants/constants.go index a5e50be2d..437a26160 100644 --- a/pkg/util/constants/constants.go +++ b/pkg/util/constants/constants.go @@ -39,5 +39,6 @@ const ( SecretTLSKeyfile = "tls.keyfile" // Key in Secret.data used to store a PEM encoded TLS certificate in the format used by ArangoDB (`--ssl.keyfile`) - FinalizerDrainDBServer = "dbserver.database.arangodb.com/drain" // Finalizer adds to DBServers, indicating the need for draining that dbserver + FinalizerPodDrainDBServer = "dbserver.database.arangodb.com/drain" // Finalizer added to DBServers, indicating the need for draining that dbserver + FinalizerPVCMemberExists = "pvc.database.arangodb.com/member-exists" // Finalizer added to PVCs, indicating the need to keep is as long as its member exists ) diff --git a/pkg/util/k8sutil/finalizers.go b/pkg/util/k8sutil/finalizers.go index 1e541a0cb..ec4ea3b6d 100644 --- a/pkg/util/k8sutil/finalizers.go +++ b/pkg/util/k8sutil/finalizers.go @@ -58,6 +58,31 @@ func RemovePodFinalizers(log zerolog.Logger, kubecli kubernetes.Interface, p *v1 return nil } +// RemovePVCFinalizers removes the given finalizers from the given PVC. +func RemovePVCFinalizers(log zerolog.Logger, kubecli kubernetes.Interface, p *v1.PersistentVolumeClaim, finalizers []string) error { + pvcs := kubecli.CoreV1().PersistentVolumeClaims(p.GetNamespace()) + getFunc := func() (metav1.Object, error) { + result, err := pvcs.Get(p.GetName(), metav1.GetOptions{}) + if err != nil { + return nil, maskAny(err) + } + return result, nil + } + updateFunc := func(updated metav1.Object) error { + updatedPVC := updated.(*v1.PersistentVolumeClaim) + result, err := pvcs.Update(updatedPVC) + if err != nil { + return maskAny(err) + } + *p = *result + return nil + } + if err := removeFinalizers(log, finalizers, getFunc, updateFunc); err != nil { + return maskAny(err) + } + return nil +} + // removeFinalizers is a helper used to remove finalizers from an object. // The functions tries to get the object using the provided get function, // then remove the given finalizers and update the update using the given update function. diff --git a/pkg/util/k8sutil/pvc.go b/pkg/util/k8sutil/pvc.go index cfed48981..d366ee5ae 100644 --- a/pkg/util/k8sutil/pvc.go +++ b/pkg/util/k8sutil/pvc.go @@ -28,6 +28,11 @@ import ( "k8s.io/client-go/kubernetes" ) +// IsPersistentVolumeClaimMarkedForDeletion returns true if the pod has been marked for deletion. +func IsPersistentVolumeClaimMarkedForDeletion(pvc *v1.PersistentVolumeClaim) bool { + return pvc.DeletionTimestamp != nil +} + // CreatePersistentVolumeClaimName returns the name of the persistent volume claim for a member with // a given id in a deployment with a given name. func CreatePersistentVolumeClaimName(deploymentName, role, id string) string { @@ -37,13 +42,14 @@ func CreatePersistentVolumeClaimName(deploymentName, role, id string) string { // CreatePersistentVolumeClaim creates a persistent volume claim with given name and configuration. // If the pvc already exists, nil is returned. // If another error occurs, that error is returned. -func CreatePersistentVolumeClaim(kubecli kubernetes.Interface, pvcName, deploymentName, ns, storageClassName, role string, resources v1.ResourceRequirements, owner metav1.OwnerReference) error { +func CreatePersistentVolumeClaim(kubecli kubernetes.Interface, pvcName, deploymentName, ns, storageClassName, role string, resources v1.ResourceRequirements, finalizers []string, owner metav1.OwnerReference) error { labels := LabelsForDeployment(deploymentName, role) volumeMode := v1.PersistentVolumeFilesystem pvc := &v1.PersistentVolumeClaim{ ObjectMeta: metav1.ObjectMeta{ - Name: pvcName, - Labels: labels, + Name: pvcName, + Labels: labels, + Finalizers: finalizers, }, Spec: v1.PersistentVolumeClaimSpec{ AccessModes: []v1.PersistentVolumeAccessMode{ From 0c2b903e56880da2857216ea021c1ce023f7d2cc Mon Sep 17 00:00:00 2001 From: Ewout Prangsma Date: Fri, 11 May 2018 17:34:36 +0200 Subject: [PATCH 06/11] Remove finalizers upon delete of deployment to prevent orphans --- pkg/apis/deployment/v1alpha/deployment.go | 11 +++-- pkg/deployment/cleanup.go | 59 +++++++++++++++++++++++ pkg/deployment/deployment.go | 8 +++ 3 files changed, 74 insertions(+), 4 deletions(-) create mode 100644 pkg/deployment/cleanup.go diff --git a/pkg/apis/deployment/v1alpha/deployment.go b/pkg/apis/deployment/v1alpha/deployment.go index d5ccd5663..a47b63278 100644 --- a/pkg/apis/deployment/v1alpha/deployment.go +++ b/pkg/apis/deployment/v1alpha/deployment.go @@ -50,11 +50,14 @@ type ArangoDeployment struct { // AsOwner creates an OwnerReference for the given deployment func (d *ArangoDeployment) AsOwner() metav1.OwnerReference { + trueVar := true return metav1.OwnerReference{ - APIVersion: SchemeGroupVersion.String(), - Kind: ArangoDeploymentResourceKind, - Name: d.Name, - UID: d.UID, + APIVersion: SchemeGroupVersion.String(), + Kind: ArangoDeploymentResourceKind, + Name: d.Name, + UID: d.UID, + Controller: &trueVar, + BlockOwnerDeletion: &trueVar, } } diff --git a/pkg/deployment/cleanup.go b/pkg/deployment/cleanup.go new file mode 100644 index 000000000..99ad9a497 --- /dev/null +++ b/pkg/deployment/cleanup.go @@ -0,0 +1,59 @@ +// +// DISCLAIMER +// +// Copyright 2018 ArangoDB GmbH, Cologne, Germany +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright holder is ArangoDB GmbH, Cologne, Germany +// +// Author Ewout Prangsma +// + +package deployment + +import ( + "github.com/arangodb/kube-arangodb/pkg/util/k8sutil" +) + +// removePodFinalizers removes all finalizers from all pods owned by us. +func (d *Deployment) removePodFinalizers() error { + log := d.deps.Log + kubecli := d.GetKubeCli() + pods, err := d.GetOwnedPods() + if err != nil { + return maskAny(err) + } + for _, p := range pods { + if err := k8sutil.RemovePodFinalizers(log, kubecli, &p, p.GetFinalizers()); err != nil { + log.Warn().Err(err).Msg("Failed to remove pod finalizers") + } + } + return nil +} + +// removePVCFinalizers removes all finalizers from all PVCs owned by us. +func (d *Deployment) removePVCFinalizers() error { + log := d.deps.Log + kubecli := d.GetKubeCli() + pvcs, err := d.GetOwnedPVCs() + if err != nil { + return maskAny(err) + } + for _, p := range pvcs { + if err := k8sutil.RemovePVCFinalizers(log, kubecli, &p, p.GetFinalizers()); err != nil { + log.Warn().Err(err).Msg("Failed to remove PVC finalizers") + } + } + return nil +} diff --git a/pkg/deployment/deployment.go b/pkg/deployment/deployment.go index b6f1d9123..4dbe57a1e 100644 --- a/pkg/deployment/deployment.go +++ b/pkg/deployment/deployment.go @@ -220,6 +220,14 @@ func (d *Deployment) run() { for { select { case <-d.stopCh: + // Remove finalizers from created resources + log.Info().Msg("Deployment removed, removing finalizers to prevent orphaned resources") + if err := d.removePodFinalizers(); err != nil { + log.Warn().Err(err).Msg("Failed to remove Pod finalizers") + } + if err := d.removePVCFinalizers(); err != nil { + log.Warn().Err(err).Msg("Failed to remove PVC finalizers") + } // We're being stopped. return From b7e07d5f384ba418da1b941a653fcb455ccab510 Mon Sep 17 00:00:00 2001 From: Ewout Prangsma Date: Mon, 14 May 2018 10:50:06 +0200 Subject: [PATCH 07/11] Prevent multiple-agent delete --- pkg/deployment/resources/context.go | 2 + pkg/deployment/resources/pod_creator.go | 2 + pkg/deployment/resources/pod_finalizers.go | 52 ++++++++++++++++++++++ pkg/deployment/resources/pvc_finalizers.go | 11 +++-- pkg/deployment/resources/pvc_inspector.go | 4 +- pkg/util/constants/constants.go | 5 ++- 6 files changed, 69 insertions(+), 7 deletions(-) diff --git a/pkg/deployment/resources/context.go b/pkg/deployment/resources/context.go index 42ca317ca..571754caa 100644 --- a/pkg/deployment/resources/context.go +++ b/pkg/deployment/resources/context.go @@ -72,6 +72,8 @@ type Context interface { // CleanupPod deletes a given pod with force and explicit UID. // If the pod does not exist, the error is ignored. CleanupPod(p v1.Pod) error + // GetAgencyClients returns a client connection for every agency member. + GetAgencyClients(ctx context.Context, predicate func(memberID string) bool) ([]driver.Connection, error) // GetDatabaseClient returns a cached client for the entire database (cluster coordinators or single server), // creating one if needed. GetDatabaseClient(ctx context.Context) (driver.Client, error) diff --git a/pkg/deployment/resources/pod_creator.go b/pkg/deployment/resources/pod_creator.go index 44d2ff1c3..e46b532ff 100644 --- a/pkg/deployment/resources/pod_creator.go +++ b/pkg/deployment/resources/pod_creator.go @@ -306,6 +306,8 @@ func (r *Resources) createReadinessProbe(spec api.DeploymentSpec, group api.Serv // createPodFinalizers creates a list of finalizers for a pod created for the given group. func (r *Resources) createPodFinalizers(group api.ServerGroup) []string { switch group { + case api.ServerGroupAgents: + return []string{constants.FinalizerPodAgencyServing} case api.ServerGroupDBServers: return []string{constants.FinalizerPodDrainDBServer} default: diff --git a/pkg/deployment/resources/pod_finalizers.go b/pkg/deployment/resources/pod_finalizers.go index 69d98f4ef..901c16e4c 100644 --- a/pkg/deployment/resources/pod_finalizers.go +++ b/pkg/deployment/resources/pod_finalizers.go @@ -30,6 +30,7 @@ import ( "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "github.com/arangodb/go-driver/agency" api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1alpha" "github.com/arangodb/kube-arangodb/pkg/util/constants" "github.com/arangodb/kube-arangodb/pkg/util/k8sutil" @@ -41,6 +42,13 @@ func (r *Resources) runPodFinalizers(ctx context.Context, p *v1.Pod, memberStatu var removalList []string for _, f := range p.ObjectMeta.GetFinalizers() { switch f { + case constants.FinalizerPodAgencyServing: + log.Debug().Msg("Inspecting agency-serving finalizer") + if err := r.inspectFinalizerPodAgencyServing(ctx, log, p, memberStatus); err == nil { + removalList = append(removalList, f) + } else { + log.Debug().Err(err).Str("finalizer", f).Msg("Cannot remove finalizer yet") + } case constants.FinalizerPodDrainDBServer: log.Debug().Msg("Inspecting drain dbserver finalizer") if err := r.inspectFinalizerPodDrainDBServer(ctx, log, p, memberStatus); err == nil { @@ -61,6 +69,50 @@ func (r *Resources) runPodFinalizers(ctx context.Context, p *v1.Pod, memberStatu return nil } +// inspectFinalizerPodAgencyServing checks the finalizer condition for agency-serving. +// It returns nil if the finalizer can be removed. +func (r *Resources) inspectFinalizerPodAgencyServing(ctx context.Context, log zerolog.Logger, p *v1.Pod, memberStatus api.MemberStatus) error { + // Inspect member phase + if memberStatus.Phase.IsFailed() { + log.Debug().Msg("Pod is already failed, safe to remove agency serving finalizer") + return nil + } + // Inspect deployment deletion state + apiObject := r.context.GetAPIObject() + if apiObject.GetDeletionTimestamp() != nil { + log.Debug().Msg("Entire deployment is being deleted, safe to remove agency serving finalizer") + return nil + } + // Check PVC + pvcs := r.context.GetKubeCli().CoreV1().PersistentVolumeClaims(apiObject.GetNamespace()) + pvc, err := pvcs.Get(memberStatus.PersistentVolumeClaimName, metav1.GetOptions{}) + if err != nil { + log.Warn().Err(err).Msg("Failed to get PVC for member") + return maskAny(err) + } + if !k8sutil.IsPersistentVolumeClaimMarkedForDeletion(pvc) { + log.Debug().Msg("PVC is not being deleted, so it is safe to remove agency serving finalizer") + return nil + } + log.Debug().Msg("PVC is being deleted, so we will check agency serving status first") + // Inspect agency state + agencyConns, err := r.context.GetAgencyClients(ctx, func(id string) bool { return id != memberStatus.ID }) + if err != nil { + log.Debug().Err(err).Msg("Failed to create member client") + return maskAny(err) + } + if len(agencyConns) == 0 { + log.Debug().Err(err).Msg("No more remaining agents, we cannot delete this one") + return maskAny(fmt.Errorf("No more remaining agents")) + } + if err := agency.AreAgentsHealthy(ctx, agencyConns); err != nil { + log.Debug().Err(err).Msg("Remaining agents are not health") + return maskAny(err) + } + // Remaining agents are health, we can remove this one + return nil +} + // inspectFinalizerPodDrainDBServer checks the finalizer condition for drain-dbserver. // It returns nil if the finalizer can be removed. func (r *Resources) inspectFinalizerPodDrainDBServer(ctx context.Context, log zerolog.Logger, p *v1.Pod, memberStatus api.MemberStatus) error { diff --git a/pkg/deployment/resources/pvc_finalizers.go b/pkg/deployment/resources/pvc_finalizers.go index ff0cb7327..0381c469d 100644 --- a/pkg/deployment/resources/pvc_finalizers.go +++ b/pkg/deployment/resources/pvc_finalizers.go @@ -35,14 +35,14 @@ import ( ) // runPVCFinalizers goes through the list of PVC finalizers to see if they can be removed. -func (r *Resources) runPVCFinalizers(ctx context.Context, p *v1.PersistentVolumeClaim, memberStatus api.MemberStatus) error { +func (r *Resources) runPVCFinalizers(ctx context.Context, p *v1.PersistentVolumeClaim, group api.ServerGroup, memberStatus api.MemberStatus) error { log := r.log.With().Str("pvc-name", p.GetName()).Logger() var removalList []string for _, f := range p.ObjectMeta.GetFinalizers() { switch f { case constants.FinalizerPVCMemberExists: log.Debug().Msg("Inspecting member exists finalizer") - if err := r.inspectFinalizerPVCMemberExists(ctx, log, p, memberStatus); err == nil { + if err := r.inspectFinalizerPVCMemberExists(ctx, log, p, group, memberStatus); err == nil { removalList = append(removalList, f) } else { log.Debug().Err(err).Str("finalizer", f).Msg("Cannot remove finalizer yet") @@ -62,7 +62,7 @@ func (r *Resources) runPVCFinalizers(ctx context.Context, p *v1.PersistentVolume // inspectFinalizerPVCMemberExists checks the finalizer condition for member-exists. // It returns nil if the finalizer can be removed. -func (r *Resources) inspectFinalizerPVCMemberExists(ctx context.Context, log zerolog.Logger, p *v1.PersistentVolumeClaim, memberStatus api.MemberStatus) error { +func (r *Resources) inspectFinalizerPVCMemberExists(ctx context.Context, log zerolog.Logger, p *v1.PersistentVolumeClaim, group api.ServerGroup, memberStatus api.MemberStatus) error { // Inspect member phase if memberStatus.Phase.IsFailed() { log.Debug().Msg("Member is already failed, safe to remove member-exists finalizer") @@ -74,5 +74,10 @@ func (r *Resources) inspectFinalizerPVCMemberExists(ctx context.Context, log zer log.Debug().Msg("Entire deployment is being deleted, safe to remove member-exists finalizer") return nil } + // We do allow to rebuild agents + if group == api.ServerGroupAgents && memberStatus.Conditions.IsTrue(api.ConditionTypeTerminated) { + log.Debug().Msg("Rebuilding terminated agents is allowed, safe to remove member-exists finalizer") + return nil + } return maskAny(fmt.Errorf("Member still exists")) } diff --git a/pkg/deployment/resources/pvc_inspector.go b/pkg/deployment/resources/pvc_inspector.go index 959c7ebcd..e04d49022 100644 --- a/pkg/deployment/resources/pvc_inspector.go +++ b/pkg/deployment/resources/pvc_inspector.go @@ -51,7 +51,7 @@ func (r *Resources) InspectPVCs(ctx context.Context) error { inspectedPVCCounter.Inc() // Find member status - memberStatus, _, found := status.Members.MemberStatusByPVCName(p.GetName()) + memberStatus, group, found := status.Members.MemberStatusByPVCName(p.GetName()) if !found { log.Debug().Str("pvc", p.GetName()).Msg("no memberstatus found for PVC") if k8sutil.IsPersistentVolumeClaimMarkedForDeletion(&p) && len(p.GetFinalizers()) > 0 { @@ -69,7 +69,7 @@ func (r *Resources) InspectPVCs(ctx context.Context) error { if k8sutil.IsPersistentVolumeClaimMarkedForDeletion(&p) { // Process finalizers - if err := r.runPVCFinalizers(ctx, &p, memberStatus); err != nil { + if err := r.runPVCFinalizers(ctx, &p, group, memberStatus); err != nil { // Only log here, since we'll be called to try again. log.Warn().Err(err).Msg("Failed to run PVC finalizers") } diff --git a/pkg/util/constants/constants.go b/pkg/util/constants/constants.go index 437a26160..6560ea519 100644 --- a/pkg/util/constants/constants.go +++ b/pkg/util/constants/constants.go @@ -39,6 +39,7 @@ const ( SecretTLSKeyfile = "tls.keyfile" // Key in Secret.data used to store a PEM encoded TLS certificate in the format used by ArangoDB (`--ssl.keyfile`) - FinalizerPodDrainDBServer = "dbserver.database.arangodb.com/drain" // Finalizer added to DBServers, indicating the need for draining that dbserver - FinalizerPVCMemberExists = "pvc.database.arangodb.com/member-exists" // Finalizer added to PVCs, indicating the need to keep is as long as its member exists + FinalizerPodDrainDBServer = "dbserver.database.arangodb.com/drain" // Finalizer added to DBServers, indicating the need for draining that dbserver + FinalizerPodAgencyServing = "agent.database.arangodb.com/agency-serving" // Finalizer added to Agents, indicating the need for keeping enough agents alive + FinalizerPVCMemberExists = "pvc.database.arangodb.com/member-exists" // Finalizer added to PVCs, indicating the need to keep is as long as its member exists ) From 191c4c3c20b68fdb72864c68c0caee3f9120b054 Mon Sep 17 00:00:00 2001 From: Ewout Prangsma Date: Mon, 14 May 2018 10:56:01 +0200 Subject: [PATCH 08/11] Documented finalizers --- ...e_hooks.md => lifecycle_hooks_and_finalizers.md} | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) rename docs/design/{lifecycle_hooks.md => lifecycle_hooks_and_finalizers.md} (67%) diff --git a/docs/design/lifecycle_hooks.md b/docs/design/lifecycle_hooks_and_finalizers.md similarity index 67% rename from docs/design/lifecycle_hooks.md rename to docs/design/lifecycle_hooks_and_finalizers.md index 58060f3ae..c6acfa737 100644 --- a/docs/design/lifecycle_hooks.md +++ b/docs/design/lifecycle_hooks_and_finalizers.md @@ -1,4 +1,4 @@ -# Lifecycle hooks +# Lifecycle hooks & Finalizers The ArangoDB operator expects full control of the `Pods` and `PersistentVolumeClaims` it creates. Therefore it takes measures to prevent the removal of those resources @@ -24,3 +24,14 @@ Because the binary that is called in the `preStop` hook is not part of a standar ArangoDB docker image, it has to be brought into the filesystem of a `Pod`. This is done by an initial container that copies the binary to an `emptyDir` volume that is shared between the init-container and the server container. + +## Finalizers + +The ArangoDB operators adds the following finalizers to `Pods`. + +- `dbserver.database.arangodb.com/drain`: Added to DBServers, removed only when the dbserver can be restarted or is completely drained +- `agent.database.arangodb.com/agency-serving`: Added to Agents, removed only when enough agents are left to keep the agency serving + +The ArangoDB operators adds the following finalizers to `PersistentVolumeClaims`. + +- `pvc.database.arangodb.com/member-exists`: removed only when its member exists no longer exists or can be safely rebuild From e2f2c60d1c88d419c0b6f9d0441644fa39484340 Mon Sep 17 00:00:00 2001 From: Ewout Prangsma Date: Mon, 14 May 2018 11:59:18 +0200 Subject: [PATCH 09/11] Always remove cleaned out server --- pkg/apis/deployment/v1alpha/conditions.go | 3 +++ .../reconcile/action_cleanout_member.go | 11 +++++++++++ pkg/deployment/reconcile/plan_builder.go | 10 ++++++++++ pkg/deployment/resources/pod_creator.go | 3 +++ pkg/deployment/resources/pod_finalizers.go | 13 +++++++++---- pkg/deployment/resources/pod_inspector.go | 6 +++++- pkg/deployment/resources/pvc_finalizers.go | 16 ++++++++++++---- 7 files changed, 53 insertions(+), 9 deletions(-) diff --git a/pkg/apis/deployment/v1alpha/conditions.go b/pkg/apis/deployment/v1alpha/conditions.go index b4b023803..77a19d17e 100644 --- a/pkg/apis/deployment/v1alpha/conditions.go +++ b/pkg/apis/deployment/v1alpha/conditions.go @@ -37,6 +37,9 @@ const ( ConditionTypeTerminated ConditionType = "Terminated" // ConditionTypeAutoUpgrade indicates that the member has to be started with `--database.auto-upgrade` once. ConditionTypeAutoUpgrade ConditionType = "AutoUpgrade" + // ConditionTypeCleanedOut indicates that the member (dbserver) has been cleaned out. + // Always check in combination with ConditionTypeTerminated. + ConditionTypeCleanedOut ConditionType = "CleanedOut" // ConditionTypePodSchedulingFailure indicates that one or more pods belonging to the deployment cannot be schedule. ConditionTypePodSchedulingFailure ConditionType = "PodSchedulingFailure" // ConditionTypeSecretsChanged indicates that the value of one of more secrets used by diff --git a/pkg/deployment/reconcile/action_cleanout_member.go b/pkg/deployment/reconcile/action_cleanout_member.go index 6292a5c52..eb9d9fbc6 100644 --- a/pkg/deployment/reconcile/action_cleanout_member.go +++ b/pkg/deployment/reconcile/action_cleanout_member.go @@ -82,6 +82,11 @@ func (a *actionCleanoutMember) Start(ctx context.Context) (bool, error) { // Returns true if the action is completely finished, false otherwise. func (a *actionCleanoutMember) CheckProgress(ctx context.Context) (bool, error) { log := a.log + m, ok := a.actionCtx.GetMemberStatusByID(a.action.MemberID) + if !ok { + // We wanted to remove and it is already gone. All ok + return true, nil + } c, err := a.actionCtx.GetDatabaseClient(ctx) if err != nil { log.Debug().Err(err).Msg("Failed to create member client") @@ -101,5 +106,11 @@ func (a *actionCleanoutMember) CheckProgress(ctx context.Context) (bool, error) return false, nil } // Cleanout completed + if m.Conditions.Update(api.ConditionTypeCleanedOut, true, "CleanedOut", "") { + if a.actionCtx.UpdateMember(m); err != nil { + return false, maskAny(err) + } + } + // Cleanout completed return true, nil } diff --git a/pkg/deployment/reconcile/plan_builder.go b/pkg/deployment/reconcile/plan_builder.go index b28b032f6..091756568 100644 --- a/pkg/deployment/reconcile/plan_builder.go +++ b/pkg/deployment/reconcile/plan_builder.go @@ -109,6 +109,16 @@ func createPlan(log zerolog.Logger, apiObject metav1.Object, return nil }) + // Check for cleaned out dbserver in created state + for _, m := range status.Members.DBServers { + if len(plan) == 0 && m.Phase == api.MemberPhaseCreated && m.Conditions.IsTrue(api.ConditionTypeCleanedOut) { + plan = append(plan, + api.NewAction(api.ActionTypeRemoveMember, api.ServerGroupDBServers, m.ID), + api.NewAction(api.ActionTypeAddMember, api.ServerGroupDBServers, ""), + ) + } + } + // Check for scale up/down if len(plan) == 0 { switch spec.GetMode() { diff --git a/pkg/deployment/resources/pod_creator.go b/pkg/deployment/resources/pod_creator.go index e46b532ff..58fb3c436 100644 --- a/pkg/deployment/resources/pod_creator.go +++ b/pkg/deployment/resources/pod_creator.go @@ -437,6 +437,9 @@ func (r *Resources) EnsurePods() error { if m.Phase != api.MemberPhaseNone { continue } + if m.Conditions.IsTrue(api.ConditionTypeCleanedOut) { + continue + } spec := r.context.GetSpec() if err := r.createPodForMember(spec, group, groupSpec, m, status); err != nil { return maskAny(err) diff --git a/pkg/deployment/resources/pod_finalizers.go b/pkg/deployment/resources/pod_finalizers.go index 901c16e4c..5626d6caa 100644 --- a/pkg/deployment/resources/pod_finalizers.go +++ b/pkg/deployment/resources/pod_finalizers.go @@ -37,7 +37,7 @@ import ( ) // runPodFinalizers goes through the list of pod finalizers to see if they can be removed. -func (r *Resources) runPodFinalizers(ctx context.Context, p *v1.Pod, memberStatus api.MemberStatus) error { +func (r *Resources) runPodFinalizers(ctx context.Context, p *v1.Pod, memberStatus api.MemberStatus, updateMember func(api.MemberStatus) error) error { log := r.log.With().Str("pod-name", p.GetName()).Logger() var removalList []string for _, f := range p.ObjectMeta.GetFinalizers() { @@ -51,7 +51,7 @@ func (r *Resources) runPodFinalizers(ctx context.Context, p *v1.Pod, memberStatu } case constants.FinalizerPodDrainDBServer: log.Debug().Msg("Inspecting drain dbserver finalizer") - if err := r.inspectFinalizerPodDrainDBServer(ctx, log, p, memberStatus); err == nil { + if err := r.inspectFinalizerPodDrainDBServer(ctx, log, p, memberStatus, updateMember); err == nil { removalList = append(removalList, f) } else { log.Debug().Err(err).Str("finalizer", f).Msg("Cannot remove finalizer yet") @@ -115,7 +115,7 @@ func (r *Resources) inspectFinalizerPodAgencyServing(ctx context.Context, log ze // inspectFinalizerPodDrainDBServer checks the finalizer condition for drain-dbserver. // It returns nil if the finalizer can be removed. -func (r *Resources) inspectFinalizerPodDrainDBServer(ctx context.Context, log zerolog.Logger, p *v1.Pod, memberStatus api.MemberStatus) error { +func (r *Resources) inspectFinalizerPodDrainDBServer(ctx context.Context, log zerolog.Logger, p *v1.Pod, memberStatus api.MemberStatus, updateMember func(api.MemberStatus) error) error { // Inspect member phase if memberStatus.Phase.IsFailed() { log.Debug().Msg("Pod is already failed, safe to remove drain dbserver finalizer") @@ -155,7 +155,12 @@ func (r *Resources) inspectFinalizerPodDrainDBServer(ctx context.Context, log ze return maskAny(err) } if cleanedOut { - // All done + // Cleanout completed + if memberStatus.Conditions.Update(api.ConditionTypeCleanedOut, true, "CleanedOut", "") { + if err := updateMember(memberStatus); err != nil { + return maskAny(err) + } + } log.Debug().Msg("Server is cleaned out. Save to remove drain dbserver finalizer") return nil } diff --git a/pkg/deployment/resources/pod_inspector.go b/pkg/deployment/resources/pod_inspector.go index c5928cbc0..f5d3c1548 100644 --- a/pkg/deployment/resources/pod_inspector.go +++ b/pkg/deployment/resources/pod_inspector.go @@ -136,7 +136,11 @@ func (r *Resources) InspectPods(ctx context.Context) error { } if k8sutil.IsPodMarkedForDeletion(&p) { // Process finalizers - if err := r.runPodFinalizers(ctx, &p, memberStatus); err != nil { + if err := r.runPodFinalizers(ctx, &p, memberStatus, func(m api.MemberStatus) error { + updateMemberStatusNeeded = true + memberStatus = m + return nil + }); err != nil { // Only log here, since we'll be called to try again. log.Warn().Err(err).Msg("Failed to run pod finalizers") } diff --git a/pkg/deployment/resources/pvc_finalizers.go b/pkg/deployment/resources/pvc_finalizers.go index 0381c469d..a964a4090 100644 --- a/pkg/deployment/resources/pvc_finalizers.go +++ b/pkg/deployment/resources/pvc_finalizers.go @@ -74,10 +74,18 @@ func (r *Resources) inspectFinalizerPVCMemberExists(ctx context.Context, log zer log.Debug().Msg("Entire deployment is being deleted, safe to remove member-exists finalizer") return nil } - // We do allow to rebuild agents - if group == api.ServerGroupAgents && memberStatus.Conditions.IsTrue(api.ConditionTypeTerminated) { - log.Debug().Msg("Rebuilding terminated agents is allowed, safe to remove member-exists finalizer") - return nil + // We do allow to rebuild agents & dbservers + switch group { + case api.ServerGroupAgents: + if memberStatus.Conditions.IsTrue(api.ConditionTypeTerminated) { + log.Debug().Msg("Rebuilding terminated agents is allowed, safe to remove member-exists finalizer") + return nil + } + case api.ServerGroupDBServers: + if memberStatus.Conditions.IsTrue(api.ConditionTypeCleanedOut) { + log.Debug().Msg("Removing cleanedout dbservers is allowed, safe to remove member-exists finalizer") + return nil + } } return maskAny(fmt.Errorf("Member still exists")) } From cb587c7795a70b4f91ff2dc66213f17443b6051b Mon Sep 17 00:00:00 2001 From: Ewout Prangsma Date: Mon, 14 May 2018 14:52:16 +0200 Subject: [PATCH 10/11] Fixed resilience PVC test wrt finalizers --- tests/resilience_test.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/resilience_test.go b/tests/resilience_test.go index 5144d7250..7958e25a8 100644 --- a/tests/resilience_test.go +++ b/tests/resilience_test.go @@ -166,8 +166,9 @@ func TestResiliencePVC(t *testing.T) { // Delete one pvc after the other apiObject.ForeachServerGroup(func(group api.ServerGroup, spec api.ServerGroupSpec, status *api.MemberStatusList) error { - if group == api.ServerGroupCoordinators { + if group != api.ServerGroupAgents { // Coordinators have no PVC + // DBServers will be cleaned out and create a new member return nil } for _, m := range *status { @@ -179,6 +180,10 @@ func TestResiliencePVC(t *testing.T) { if err := kubecli.CoreV1().PersistentVolumeClaims(ns).Delete(m.PersistentVolumeClaimName, &metav1.DeleteOptions{}); err != nil { t.Fatalf("Failed to delete pvc %s: %v", m.PersistentVolumeClaimName, err) } + // Now delete the pod as well, otherwise the PVC will only have a deletion timestamp but its finalizers will stay on. + if err := kubecli.CoreV1().Pods(ns).Delete(m.PodName, &metav1.DeleteOptions{}); err != nil { + t.Fatalf("Failed to delete pod %s: %v", m.PodName, err) + } // Wait for pvc to return with different UID op := func() error { pvc, err := kubecli.CoreV1().PersistentVolumeClaims(ns).Get(m.PersistentVolumeClaimName, metav1.GetOptions{}) From bd90071045c35319691014c89ad4ae765765edf4 Mon Sep 17 00:00:00 2001 From: Ewout Prangsma Date: Tue, 15 May 2018 10:17:02 +0200 Subject: [PATCH 11/11] Typo --- docs/design/lifecycle_hooks_and_finalizers.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/design/lifecycle_hooks_and_finalizers.md b/docs/design/lifecycle_hooks_and_finalizers.md index c6acfa737..d30b4723d 100644 --- a/docs/design/lifecycle_hooks_and_finalizers.md +++ b/docs/design/lifecycle_hooks_and_finalizers.md @@ -6,7 +6,7 @@ until it is safe to do so. To achieve this, the server containers in the `Pods` have a `preStop` hook configured and finalizers are added to the `Pods` -ands `PersistentVolumeClaims`. +and `PersistentVolumeClaims`. The `preStop` hook executes a binary that waits until all finalizers of the current pod have been removed.