Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature: finalizers #136

Merged
merged 15 commits into from
May 15, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions docs/design/lifecycle_hooks_and_finalizers.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# Lifecycle hooks & Finalizers

The ArangoDB operator expects full control of the `Pods` and `PersistentVolumeClaims` it creates.
Therefore it takes measures to prevent the removal of those resources
until it is safe to do so.

To achieve this, the server containers in the `Pods` have
a `preStop` hook configured and finalizers are added to the `Pods`
and `PersistentVolumeClaims`.

The `preStop` hook executes a binary that waits until all finalizers of
the current pod have been removed.
Until this `preStop` hook terminates, Kubernetes will not send a `TERM` signal
to the processes inside the container, which ensures that the server remains running
until it is safe to stop them.

The operator performs all actions needed when a delete of a `Pod` or
`PersistentVolumeClaims` has been triggered.
E.g. for a dbserver it cleans out the server if the `Pod` and `PersistentVolumeClaim` are being deleted.

## Lifecycle init-container

Because the binary that is called in the `preStop` hook is not part of a standard
ArangoDB docker image, it has to be brought into the filesystem of a `Pod`.
This is done by an initial container that copies the binary to an `emptyDir` volume that
is shared between the init-container and the server container.

## Finalizers

The ArangoDB operators adds the following finalizers to `Pods`.

- `dbserver.database.arangodb.com/drain`: Added to DBServers, removed only when the dbserver can be restarted or is completely drained
- `agent.database.arangodb.com/agency-serving`: Added to Agents, removed only when enough agents are left to keep the agency serving

The ArangoDB operators adds the following finalizers to `PersistentVolumeClaims`.

- `pvc.database.arangodb.com/member-exists`: removed only when its member exists no longer exists or can be safely rebuild
150 changes: 150 additions & 0 deletions lifecycle.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
//
// DISCLAIMER
//
// Copyright 2018 ArangoDB GmbH, Cologne, Germany
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Copyright holder is ArangoDB GmbH, Cologne, Germany
//
// Author Ewout Prangsma
//

package main

import (
"io"
"os"
"path/filepath"
"time"

"github.com/spf13/cobra"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

"github.com/arangodb/kube-arangodb/pkg/util/constants"
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil"
)

var (
cmdLifecycle = &cobra.Command{
Use: "lifecycle",
Run: cmdUsage,
Hidden: true,
}

cmdLifecyclePreStop = &cobra.Command{
Use: "preStop",
Run: cmdLifecyclePreStopRun,
Hidden: true,
}
cmdLifecycleCopy = &cobra.Command{
Use: "copy",
Run: cmdLifecycleCopyRun,
Hidden: true,
}

lifecycleCopyOptions struct {
TargetDir string
}
)

func init() {
cmdMain.AddCommand(cmdLifecycle)
cmdLifecycle.AddCommand(cmdLifecyclePreStop)
cmdLifecycle.AddCommand(cmdLifecycleCopy)

cmdLifecycleCopy.Flags().StringVar(&lifecycleCopyOptions.TargetDir, "target", "", "Target directory to copy the executable to")
}

// Wait until all finalizers of the current pod have been removed.
func cmdLifecyclePreStopRun(cmd *cobra.Command, args []string) {
cliLog.Info().Msgf("Starting arangodb-operator, lifecycle preStop, version %s build %s", projectVersion, projectBuild)

// Get environment
namespace := os.Getenv(constants.EnvOperatorPodNamespace)
if len(namespace) == 0 {
cliLog.Fatal().Msgf("%s environment variable missing", constants.EnvOperatorPodNamespace)
}
name := os.Getenv(constants.EnvOperatorPodName)
if len(name) == 0 {
cliLog.Fatal().Msgf("%s environment variable missing", constants.EnvOperatorPodName)
}

// Create kubernetes client
kubecli, err := k8sutil.NewKubeClient()
if err != nil {
cliLog.Fatal().Err(err).Msg("Failed to create Kubernetes client")
}

pods := kubecli.CoreV1().Pods(namespace)
recentErrors := 0
for {
p, err := pods.Get(name, metav1.GetOptions{})
if k8sutil.IsNotFound(err) {
cliLog.Warn().Msg("Pod not found")
return
} else if err != nil {
recentErrors++
cliLog.Error().Err(err).Msg("Failed to get pod")
if recentErrors > 20 {
cliLog.Fatal().Err(err).Msg("Too many recent errors")
return
}
} else {
// We got our pod
finalizerCount := len(p.GetFinalizers())
if finalizerCount == 0 {
// No more finalizers, we're done
cliLog.Info().Msg("All finalizers gone, we can stop now")
return
}
cliLog.Info().Msgf("Waiting for %d more finalizers to be removed", finalizerCount)
}
// Wait a bit
time.Sleep(time.Second)
}
}

// Copy the executable to a given place.
func cmdLifecycleCopyRun(cmd *cobra.Command, args []string) {
cliLog.Info().Msgf("Starting arangodb-operator, lifecycle copy, version %s build %s", projectVersion, projectBuild)

exePath, err := os.Executable()
if err != nil {
cliLog.Fatal().Err(err).Msg("Failed to get executable path")
}

// Open source
rd, err := os.Open(exePath)
if err != nil {
cliLog.Fatal().Err(err).Msg("Failed to open executable file")
}
defer rd.Close()

// Open target
targetPath := filepath.Join(lifecycleCopyOptions.TargetDir, filepath.Base(exePath))
wr, err := os.Create(targetPath)
if err != nil {
cliLog.Fatal().Err(err).Msg("Failed to create target file")
}
defer wr.Close()

if _, err := io.Copy(wr, rd); err != nil {
cliLog.Fatal().Err(err).Msg("Failed to copy")
}

// Set file mode
if err := os.Chmod(targetPath, 0755); err != nil {
cliLog.Fatal().Err(err).Msg("Failed to chmod")
}
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just for the record (one more time): LOL about this trick.

19 changes: 13 additions & 6 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ func newOperatorConfigAndDeps(id, namespace, name string) (operator.Config, oper
return operator.Config{}, operator.Dependencies{}, maskAny(err)
}

serviceAccount, err := getMyPodServiceAccount(kubecli, namespace, name)
image, serviceAccount, err := getMyPodInfo(kubecli, namespace, name)
if err != nil {
return operator.Config{}, operator.Dependencies{}, maskAny(fmt.Errorf("Failed to get my pod's service account: %s", err))
}
Expand All @@ -213,6 +213,7 @@ func newOperatorConfigAndDeps(id, namespace, name string) (operator.Config, oper
Namespace: namespace,
PodName: name,
ServiceAccount: serviceAccount,
LifecycleImage: image,
EnableDeployment: operatorOptions.enableDeployment,
EnableStorage: operatorOptions.enableStorage,
AllowChaos: chaosOptions.allowed,
Expand All @@ -231,9 +232,10 @@ func newOperatorConfigAndDeps(id, namespace, name string) (operator.Config, oper
return cfg, deps, nil
}

// getMyPodServiceAccount looks up the service account of the pod with given name in given namespace
func getMyPodServiceAccount(kubecli kubernetes.Interface, namespace, name string) (string, error) {
var sa string
// getMyPodInfo looks up the image & service account of the pod with given name in given namespace
// Returns image, serviceAccount, error.
func getMyPodInfo(kubecli kubernetes.Interface, namespace, name string) (string, string, error) {
var image, sa string
op := func() error {
pod, err := kubecli.CoreV1().Pods(namespace).Get(name, metav1.GetOptions{})
if err != nil {
Expand All @@ -244,12 +246,17 @@ func getMyPodServiceAccount(kubecli kubernetes.Interface, namespace, name string
return maskAny(err)
}
sa = pod.Spec.ServiceAccountName
image = k8sutil.ConvertImageID2Image(pod.Status.ContainerStatuses[0].ImageID)
if image == "" {
// Fallback in case we don't know the id.
image = pod.Spec.Containers[0].Image
}
return nil
}
if err := retry.Retry(op, time.Minute*5); err != nil {
return "", maskAny(err)
return "", "", maskAny(err)
}
return sa, nil
return image, sa, nil
}

func createRecorder(log zerolog.Logger, kubecli kubernetes.Interface, name, namespace string) record.EventRecorder {
Expand Down
3 changes: 3 additions & 0 deletions pkg/apis/deployment/v1alpha/conditions.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ const (
ConditionTypeTerminated ConditionType = "Terminated"
// ConditionTypeAutoUpgrade indicates that the member has to be started with `--database.auto-upgrade` once.
ConditionTypeAutoUpgrade ConditionType = "AutoUpgrade"
// ConditionTypeCleanedOut indicates that the member (dbserver) has been cleaned out.
// Always check in combination with ConditionTypeTerminated.
ConditionTypeCleanedOut ConditionType = "CleanedOut"
// ConditionTypePodSchedulingFailure indicates that one or more pods belonging to the deployment cannot be schedule.
ConditionTypePodSchedulingFailure ConditionType = "PodSchedulingFailure"
// ConditionTypeSecretsChanged indicates that the value of one of more secrets used by
Expand Down
11 changes: 7 additions & 4 deletions pkg/apis/deployment/v1alpha/deployment.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,14 @@ type ArangoDeployment struct {

// AsOwner creates an OwnerReference for the given deployment
func (d *ArangoDeployment) AsOwner() metav1.OwnerReference {
trueVar := true
return metav1.OwnerReference{
APIVersion: SchemeGroupVersion.String(),
Kind: ArangoDeploymentResourceKind,
Name: d.Name,
UID: d.UID,
APIVersion: SchemeGroupVersion.String(),
Kind: ArangoDeploymentResourceKind,
Name: d.Name,
UID: d.UID,
Controller: &trueVar,
BlockOwnerDeletion: &trueVar,
}
}

Expand Down
16 changes: 16 additions & 0 deletions pkg/apis/deployment/v1alpha/deployment_status_members.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,22 @@ func (ds DeploymentStatusMembers) MemberStatusByPodName(podName string) (MemberS
return MemberStatus{}, 0, false
}

// MemberStatusByPVCName returns a reference to the element in the given set of lists that has the given PVC name.
// If no such element exists, nil is returned.
func (ds DeploymentStatusMembers) MemberStatusByPVCName(pvcName string) (MemberStatus, ServerGroup, bool) {
if result, found := ds.Single.ElementByPVCName(pvcName); found {
return result, ServerGroupSingle, true
}
if result, found := ds.Agents.ElementByPVCName(pvcName); found {
return result, ServerGroupAgents, true
}
if result, found := ds.DBServers.ElementByPVCName(pvcName); found {
return result, ServerGroupDBServers, true
}
// Note: Other server groups do not have PVC's so we can skip them.
return MemberStatus{}, 0, false
}

// UpdateMemberStatus updates the given status in the given group.
func (ds *DeploymentStatusMembers) UpdateMemberStatus(status MemberStatus, group ServerGroup) error {
var err error
Expand Down
11 changes: 11 additions & 0 deletions pkg/apis/deployment/v1alpha/member_status_list.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,17 @@ func (l MemberStatusList) ElementByPodName(podName string) (MemberStatus, bool)
return MemberStatus{}, false
}

// ElementByPVCName returns the element in the given list that has the given PVC name and true.
// If no such element exists, an empty element and false is returned.
func (l MemberStatusList) ElementByPVCName(pvcName string) (MemberStatus, bool) {
for i, x := range l {
if x.PersistentVolumeClaimName == pvcName {
return l[i], true
}
}
return MemberStatus{}, false
}

// Add a member to the list.
// Returns an AlreadyExistsError if the ID of the given member already exists.
func (l *MemberStatusList) Add(m MemberStatus) error {
Expand Down
16 changes: 16 additions & 0 deletions pkg/apis/deployment/v1alpha/server_group.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@

package v1alpha

import time "time"

type ServerGroup int

const (
Expand Down Expand Up @@ -85,6 +87,20 @@ func (g ServerGroup) AsRoleAbbreviated() string {
}
}

// DefaultTerminationGracePeriod returns the default period between SIGTERM & SIGKILL for a server in the given group.
func (g ServerGroup) DefaultTerminationGracePeriod() time.Duration {
switch g {
case ServerGroupSingle:
return time.Minute
case ServerGroupAgents:
return time.Minute
case ServerGroupDBServers:
return time.Hour
default:
return time.Second * 30
}
}

// IsArangod returns true when the groups runs servers of type `arangod`.
func (g ServerGroup) IsArangod() bool {
switch g {
Expand Down
59 changes: 59 additions & 0 deletions pkg/deployment/cleanup.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
//
// DISCLAIMER
//
// Copyright 2018 ArangoDB GmbH, Cologne, Germany
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Copyright holder is ArangoDB GmbH, Cologne, Germany
//
// Author Ewout Prangsma
//

package deployment

import (
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil"
)

// removePodFinalizers removes all finalizers from all pods owned by us.
func (d *Deployment) removePodFinalizers() error {
log := d.deps.Log
kubecli := d.GetKubeCli()
pods, err := d.GetOwnedPods()
if err != nil {
return maskAny(err)
}
for _, p := range pods {
if err := k8sutil.RemovePodFinalizers(log, kubecli, &p, p.GetFinalizers()); err != nil {
log.Warn().Err(err).Msg("Failed to remove pod finalizers")
}
}
return nil
}

// removePVCFinalizers removes all finalizers from all PVCs owned by us.
func (d *Deployment) removePVCFinalizers() error {
log := d.deps.Log
kubecli := d.GetKubeCli()
pvcs, err := d.GetOwnedPVCs()
if err != nil {
return maskAny(err)
}
for _, p := range pvcs {
if err := k8sutil.RemovePVCFinalizers(log, kubecli, &p, p.GetFinalizers()); err != nil {
log.Warn().Err(err).Msg("Failed to remove PVC finalizers")
}
}
return nil
}
Loading