Skip to content

Commit

Permalink
[Bugfix] Wait for ImageStatus (#1602)
Browse files Browse the repository at this point in the history
  • Loading branch information
ajanikow authored Feb 26, 2024
1 parent b085209 commit 3e0f90f
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 8 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
- (Feature) Extract Scheduler API
- (Bugfix) Fix Image Discovery
- (Bugfix) Fix Resources Copy mechanism to prevent invalid pod creation
- (Bugfix) Wait for ImageStatus in ImageDiscover

## [1.2.38](https://github.com/arangodb/kube-arangodb/tree/1.2.38) (2024-02-22)
- (Feature) Extract GRPC Server
Expand Down
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -161,12 +161,15 @@ Flags:
--deployment.feature.upgrade-version-check-v2 Enable initContainer with pre version check based by Operator - Required ArangoDB 3.8.0 or higher
--features-config-map-name string Name of the Feature Map ConfigMap (default "arangodb-operator-feature-config-map")
-h, --help help for arangodb_operator
--image.discovery.status Discover Operator Image from Pod Status by default. When disabled Pod Spec is used. (default true)
--image.discovery.timeout duration Timeout for image discovery process (default 1m0s)
--internal.scaling-integration Enable Scaling Integration
--kubernetes.burst int Burst for the k8s API (default 30)
--kubernetes.max-batch-size int Size of batch during objects read (default 256)
--kubernetes.qps float32 Number of queries per second for k8s API (default 15)
--log.format string Set log format. Allowed values: 'pretty', 'JSON'. If empty, default format is used (default "pretty")
--log.level stringArray Set log levels in format <level> or <logger>=<level>. Possible loggers: action, agency, api-server, assertion, backup-operator, chaos-monkey, crd, deployment, deployment-ci, deployment-reconcile, deployment-replication, deployment-resilience, deployment-resources, deployment-storage, deployment-storage-pc, deployment-storage-service, http, inspector, k8s-client, ml-batchjob-operator, ml-cronjob-operator, ml-extension-operator, ml-extension-shutdown, ml-storage-operator, monitor, operator, operator-arangojob-handler, operator-v2, operator-v2-event, operator-v2-worker, panics, pod_compare, root, root-event-recorder, server, server-authentication (default [debug])
--log.level stringArray Set log levels in format <level> or <logger>=<level>. Possible loggers: action, agency, api-server, assertion, backup-operator, chaos-monkey, crd, deployment, deployment-ci, deployment-reconcile, deployment-replication, deployment-resilience, deployment-resources, deployment-storage, deployment-storage-pc, deployment-storage-service, http, inspector, integrations, k8s-client, ml-batchjob-operator, ml-cronjob-operator, ml-extension-operator, ml-extension-shutdown, ml-storage-operator, monitor, operator, operator-arangojob-handler, operator-v2, operator-v2-event, operator-v2-worker, panics, pod_compare, root, root-event-recorder, server, server-authentication (default [info])
--log.sampling If true, operator will try to minimize duplication of logging events (default true)
--memory-limit uint Define memory limit for hard shutdown and the dump of goroutines. Used for testing
--metrics.excluded-prefixes stringArray List of the excluded metrics prefixes
--mode.single Enable single mode in Operator. WARNING: There should be only one replica of Operator, otherwise Operator can take unexpected actions
Expand Down
39 changes: 35 additions & 4 deletions cmd/cmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,10 @@ var (
backupArangoD time.Duration
backupUploadArangoD time.Duration
}
operatorImageDiscovery struct {
timeout time.Duration
defaultStatusDiscovery bool
}
operatorReconciliationRetry struct {
delay time.Duration
count int
Expand Down Expand Up @@ -241,6 +245,8 @@ func init() {
f.IntVar(&operatorBackup.concurrentUploads, "backup-concurrent-uploads", globals.DefaultBackupConcurrentUploads, "Number of concurrent uploads per deployment")
f.Uint64Var(&memoryLimit.hardLimit, "memory-limit", 0, "Define memory limit for hard shutdown and the dump of goroutines. Used for testing")
f.StringArrayVar(&metricsOptions.excludedMetricPrefixes, "metrics.excluded-prefixes", nil, "List of the excluded metrics prefixes")
f.BoolVar(&operatorImageDiscovery.defaultStatusDiscovery, "image.discovery.status", true, "Discover Operator Image from Pod Status by default. When disabled Pod Spec is used.")
f.DurationVar(&operatorImageDiscovery.timeout, "image.discovery.timeout", time.Minute, "Timeout for image discovery process")
if err := features.Init(&cmdMain); err != nil {
panic(err.Error())
}
Expand Down Expand Up @@ -584,6 +590,20 @@ func newOperatorConfigAndDeps(id, namespace, name string) (operator.Config, oper
// getMyPodInfo looks up the image & service account of the pod with given name in given namespace
// Returns image, serviceAccount, error.
func getMyPodInfo(kubecli kubernetes.Interface, namespace, name string) (string, string, error) {
if image, sa, ok := getMyPodInfoWrap(kubecli, namespace, name, getMyImageInfoFunc(operatorImageDiscovery.defaultStatusDiscovery)); ok {
return image, sa, nil
}

logger.Warn("Unable to discover image, fallback to second method")

if image, sa, ok := getMyPodInfoWrap(kubecli, namespace, name, getMyImageInfoFunc(!operatorImageDiscovery.defaultStatusDiscovery)); ok {
return image, sa, nil
}

return "", "", errors.Errorf("Unable to discover image")
}

func getMyPodInfoWrap(kubecli kubernetes.Interface, namespace, name string, imageFunc func(in *core.Pod) (string, bool)) (string, string, bool) {
var image, sa string
op := func() error {
pod, err := kubecli.CoreV1().Pods(namespace).Get(context.Background(), name, meta.GetOptions{})
Expand All @@ -595,15 +615,26 @@ func getMyPodInfo(kubecli kubernetes.Interface, namespace, name string) (string,
return errors.WithStack(err)
}
sa = pod.Spec.ServiceAccountName
if image, err = k8sutil.GetArangoDBImageIDFromPod(pod, shared.ServerContainerName, shared.OperatorContainerName, constants.MyContainerNameEnv.GetOrDefault(shared.OperatorContainerName)); err != nil {
if i, ok := imageFunc(pod); !ok {
return errors.Wrap(err, "failed to get image ID from pod")
} else {
image = i
}
return nil
}
if err := retry.Retry(op, time.Minute*5); err != nil {
return "", "", errors.WithStack(err)
if err := retry.Retry(op, operatorImageDiscovery.timeout/2); err == nil {
return image, sa, true
}
return "", "", false
}

func getMyImageInfoFunc(status bool) func(pod *core.Pod) (string, bool) {
return func(pod *core.Pod) (string, bool) {
if status {
return k8sutil.GetArangoDBImageIDFromContainerStatuses(pod.Status.ContainerStatuses, shared.ServerContainerName, shared.OperatorContainerName, constants.MyContainerNameEnv.GetOrDefault(shared.OperatorContainerName))
}
return k8sutil.GetArangoDBImageIDFromContainers(pod.Spec.Containers, shared.ServerContainerName, shared.OperatorContainerName, constants.MyContainerNameEnv.GetOrDefault(shared.OperatorContainerName))
}
return image, sa, nil
}

func createRecorder(kubecli kubernetes.Interface, name, namespace string) record.EventRecorder {
Expand Down
15 changes: 12 additions & 3 deletions pkg/util/k8sutil/images.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,13 +53,16 @@ func GetArangoDBImageIDFromPod(pod *core.Pod, names ...string) (string, error) {
if image, ok := GetArangoDBImageIDFromContainerStatuses(pod.Status.ContainerStatuses, names...); ok {
return image, nil
}

if image, ok := GetArangoDBImageIDFromContainers(pod.Spec.Containers, names...); ok {
return image, nil
}

if cs := pod.Status.ContainerStatuses; len(cs) > 0 {
if image := cs[0].ImageID; image != "" {
return ConvertImageID2Image(image), nil
if disc := ConvertImageID2Image(image); disc != "" {
return disc, nil
}
}
}
if cs := pod.Spec.Containers; len(cs) > 0 {
Expand All @@ -75,7 +78,11 @@ func GetArangoDBImageIDFromPod(pod *core.Pod, names ...string) (string, error) {
func GetArangoDBImageIDFromContainerStatuses(containers []core.ContainerStatus, names ...string) (string, bool) {
for _, name := range names {
if id := container.GetContainerStatusIDByName(containers, name); id != -1 {
return ConvertImageID2Image(containers[id].ImageID), true
if image := containers[id].ImageID; image != "" {
if disc := ConvertImageID2Image(image); disc != "" {
return disc, true
}
}
}
}

Expand All @@ -86,7 +93,9 @@ func GetArangoDBImageIDFromContainerStatuses(containers []core.ContainerStatus,
func GetArangoDBImageIDFromContainers(containers []core.Container, names ...string) (string, bool) {
for _, name := range names {
if id := container.GetContainerIDByName(containers, name); id != -1 {
return containers[id].Image, true
if image := containers[id].Image; image != "" {
return image, true
}
}
}

Expand Down

0 comments on commit 3e0f90f

Please sign in to comment.