diff --git a/go.mod b/go.mod index 769e8a99f..fb708e460 100644 --- a/go.mod +++ b/go.mod @@ -7,7 +7,7 @@ require ( github.com/Azure/go-autorest/autorest v0.11.17 github.com/Azure/go-autorest/autorest/adal v0.9.5 github.com/Azure/go-autorest/autorest/to v0.3.0 - github.com/gardener/machine-controller-manager v0.36.0 + github.com/gardener/machine-controller-manager v0.37.0 github.com/golang/mock v1.4.4 github.com/golang/protobuf v1.3.2 // indirect github.com/onsi/ginkgo v1.12.0 diff --git a/go.sum b/go.sum index 807f26400..1556987cd 100644 --- a/go.sum +++ b/go.sum @@ -6,8 +6,6 @@ github.com/Azure/azure-sdk-for-go v42.2.0+incompatible h1:ezf8BQIvXYn+LSf+rDqOVy github.com/Azure/azure-sdk-for-go v42.2.0+incompatible/go.mod h1:9XXNKU+eRnpl9moKnB4QOLf1HestfXbmab5FXxiDBjc= github.com/Azure/azure-sdk-for-go v50.0.0+incompatible h1:kFIPXbg+knN0rsmsj3jIuoxOYCsevOwvwUgwICmrIwA= github.com/Azure/azure-sdk-for-go v50.0.0+incompatible/go.mod h1:9XXNKU+eRnpl9moKnB4QOLf1HestfXbmab5FXxiDBjc= -github.com/Azure/azure-sdk-for-go v51.0.0+incompatible h1:p7blnyJSjJqf5jflHbSGhIhEpXIgIFmYZNg5uwqweso= -github.com/Azure/azure-sdk-for-go v51.0.0+incompatible/go.mod h1:9XXNKU+eRnpl9moKnB4QOLf1HestfXbmab5FXxiDBjc= github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78/go.mod h1:LmzpDX56iTiv29bbRTIsUNlaFfuhWRQBWjQdVyAevI8= github.com/Azure/go-autorest v14.2.0+incompatible h1:V5VMDjClD3GiElqLWO7mz2MxNAK/vTfRHdAubSIPRgs= github.com/Azure/go-autorest v14.2.0+incompatible/go.mod h1:r+4oMnoxhatjLLJ6zxSWATqVooLgysK6ZNox3g/xq24= @@ -81,8 +79,8 @@ github.com/form3tech-oss/jwt-go v3.2.2+incompatible h1:TcekIExNqud5crz4xD2pavyTg github.com/form3tech-oss/jwt-go v3.2.2+incompatible/go.mod h1:pbq4aXjuKjdthFRnoDwaVPLA+WlJuPGy+QneDUgJi2k= github.com/fsnotify/fsnotify v1.4.7 h1:IXs+QLmnXW2CcXuY+8Mzv/fWEsPGWxqefPtCP5CnV9I= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= -github.com/gardener/machine-controller-manager v0.36.0 h1:bGtmkz0si1zzRSUCR8+Fpku+oTim66iJeTp01os1Pz4= -github.com/gardener/machine-controller-manager v0.36.0/go.mod h1:Be9VDEXC8fF62inu5kyq5pnzmBmaJOczDMYFQdhGDWk= +github.com/gardener/machine-controller-manager v0.37.0 h1:am2FNCmBNQyNwsagsqH/tEYdDADzPH5a0UCClX3V6JA= +github.com/gardener/machine-controller-manager v0.37.0/go.mod h1:Be9VDEXC8fF62inu5kyq5pnzmBmaJOczDMYFQdhGDWk= github.com/ghodss/yaml v0.0.0-20150909031657-73d445a93680/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= github.com/go-logr/logr v0.1.0/go.mod h1:ixOQHD9gLJUVQQ2ZOR7zLEifBX6tGkNJF4QyIY7sIas= github.com/go-openapi/jsonpointer v0.0.0-20160704185906-46af16f9f7b1/go.mod h1:+35s3my2LFTysnkMfxsJBAMHj/DoqoB9knIWoYG/Vk0= diff --git a/vendor/github.com/gardener/machine-controller-manager/pkg/metrics/metrics.go b/vendor/github.com/gardener/machine-controller-manager/pkg/metrics/metrics.go new file mode 100644 index 000000000..29ffca485 --- /dev/null +++ b/vendor/github.com/gardener/machine-controller-manager/pkg/metrics/metrics.go @@ -0,0 +1,340 @@ +/* +Copyright (c) 2017 SAP SE or an SAP affiliate company. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metrics + +import ( + "github.com/prometheus/client_golang/prometheus" +) + +const ( + namespace = "mcm" + machineSubsystem = "machine" + machinesetSubsystem = "machine_set" + machinedeploymentSubsystem = "machine_deployment" + cloudAPISubsystem = "cloud_api" +) + +var ( + // MachineControllerFrozenDesc is a metric about MachineController's frozen status + MachineControllerFrozenDesc = prometheus.NewDesc("mcm_machine_controller_frozen", "Frozen status of the machine controller manager.", nil, nil) + // MachineCountDesc is a metric about machine count of the mcm manages + MachineCountDesc = prometheus.NewDesc("mcm_machine_items_total", "Count of machines currently managed by the mcm.", nil, nil) + + //MachineCSPhase Current status phase of the Machines currently managed by the mcm. + MachineCSPhase = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: machineSubsystem, + Name: "current_status_phase", + Help: "Current status phase of the Machines currently managed by the mcm.", + }, []string{"name", "namespace"}) + + //MachineInfo Information of the Machines currently managed by the mcm. + MachineInfo = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: machineSubsystem, + Name: "info", + Help: "Information of the Machines currently managed by the mcm.", + }, []string{"name", "namespace", "createdAt", + "spec_provider_id", "spec_class_api_group", "spec_class_kind", "spec_class_name"}) + + // MachineStatusCondition Information of the mcm managed Machines' status conditions + MachineStatusCondition = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: machineSubsystem, + Name: "status_condition", + Help: "Information of the mcm managed Machines' status conditions.", + }, []string{"name", "namespace", "condition"}) + + // MachineSetCountDesc Count of machinesets currently managed by the mcm + MachineSetCountDesc = prometheus.NewDesc("mcm_machine_set_items_total", "Count of machinesets currently managed by the mcm.", nil, nil) + + // MachineSetInfo Information of the Machinesets currently managed by the mcm. + MachineSetInfo = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: machinesetSubsystem, + Name: "info", + Help: "Information of the Machinesets currently managed by the mcm.", + }, []string{"name", "namespace", "createdAt", + "spec_machine_class_api_group", "spec_machine_class_kind", "spec_machine_class_name"}) + + // MachineSetInfoSpecReplicas Count of the Machinesets Spec Replicas. + MachineSetInfoSpecReplicas = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: machinesetSubsystem, + Name: "info_spec_replicas", + Help: "Count of the Machinesets Spec Replicas.", + }, []string{"name", "namespace"}) + + // MachineSetInfoSpecMinReadySeconds Information of the Machinesets currently managed by the mcm. + MachineSetInfoSpecMinReadySeconds = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: machinesetSubsystem, + Name: "info_spec_min_ready_seconds", + Help: "Information of the Machinesets currently managed by the mcm.", + }, []string{"name", "namespace"}) + + // MachineSetStatusCondition Information of the mcm managed Machinesets' status conditions. + MachineSetStatusCondition = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: machinesetSubsystem, + Name: "status_condition", + Help: "Information of the mcm managed Machinesets' status conditions.", + }, []string{"name", "namespace", "condition"}) + + // MachineSetStatusFailedMachines Information of the mcm managed Machinesets' failed machines. + MachineSetStatusFailedMachines = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: machinesetSubsystem, + Name: "failed_machines", + Help: "Information of the mcm managed Machinesets' failed machines.", + }, []string{"name", "namespace", "failed_machine_name", "failed_machine_provider_id", "failed_machine_owner_ref", + "failed_machine_last_operation_state", + "failed_machine_last_operation_machine_operation_type"}) + + // MachineSetStatusAvailableReplicas Information of the mcm managed Machinesets' status for available replicas. + MachineSetStatusAvailableReplicas = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: machinesetSubsystem, + Name: "status_available_replicas", + Help: "Information of the mcm managed Machinesets' status for available replicas.", + }, []string{"name", "namespace"}) + + // MachineSetStatusFullyLabelledReplicas Information of the mcm managed Machinesets' status for fully labelled replicas. + MachineSetStatusFullyLabelledReplicas = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: machinesetSubsystem, + Name: "status_fully_labelled_replicas", + Help: "Information of the mcm managed Machinesets' status for fully labelled replicas.", + }, []string{"name", "namespace"}) + + // MachineSetStatusReadyReplicas Information of the mcm managed Machinesets' status for ready replicas + MachineSetStatusReadyReplicas = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: machinesetSubsystem, + Name: "status_ready_replicas", + Help: "Information of the mcm managed Machinesets' status for ready replicas.", + }, []string{"name", "namespace"}) + + // MachineSetStatusReplicas Information of the mcm managed Machinesets' status for replicas. + MachineSetStatusReplicas = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: machinesetSubsystem, + Name: "status_replicas", + Help: "Information of the mcm managed Machinesets' status for replicas.", + }, []string{"name", "namespace"}) + + // MachineDeploymentCountDesc Count of machinedeployments currently managed by the mcm. + MachineDeploymentCountDesc = prometheus.NewDesc("mcm_machine_deployment_items_total", "Count of machinedeployments currently managed by the mcm.", nil, nil) + + // MachineDeploymentInfo Information of the Machinedeployments currently managed by the mcm. + MachineDeploymentInfo = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: machinedeploymentSubsystem, + Name: "info", + Help: "Information of the Machinedeployments currently managed by the mcm.", + }, []string{"name", "namespace", "createdAt", "spec_strategy_type"}) + + // MachineDeploymentInfoSpecPaused Information of the Machinedeployments paused status. + MachineDeploymentInfoSpecPaused = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: machinedeploymentSubsystem, + Name: "info_spec_paused", + Help: "Information of the Machinedeployments paused status.", + }, []string{"name", "namespace"}) + + // MachineDeploymentInfoSpecReplicas Information of the Machinedeployments spec replicas. + MachineDeploymentInfoSpecReplicas = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: machinedeploymentSubsystem, + Name: "info_spec_replicas", + Help: "Information of the Machinedeployments spec replicas.", + }, []string{"name", "namespace"}) + + // MachineDeploymentInfoSpecMinReadySeconds Information of the Machinedeployments spec min ready seconds. + MachineDeploymentInfoSpecMinReadySeconds = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: machinedeploymentSubsystem, + Name: "info_spec_min_ready_seconds", + Help: "Information of the Machinedeployments spec min ready seconds.", + }, []string{"name", "namespace"}) + + // MachineDeploymentInfoSpecRollingUpdateMaxSurge Information of the Machinedeployments spec rolling update max surge. + MachineDeploymentInfoSpecRollingUpdateMaxSurge = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: machinedeploymentSubsystem, + Name: "info_spec_rolling_update_max_surge", + Help: "Information of the Machinedeployments spec rolling update max surge.", + }, []string{"name", "namespace"}) + + // MachineDeploymentInfoSpecRollingUpdateMaxUnavailable Information of the Machinedeployments spec rolling update max unavailable. + MachineDeploymentInfoSpecRollingUpdateMaxUnavailable = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: machinedeploymentSubsystem, + Name: "info_spec_rolling_update_max_unavailable", + Help: "Information of the Machinedeployments spec rolling update max unavailable.", + }, []string{"name", "namespace"}) + + // MachineDeploymentInfoSpecRevisionHistoryLimit Information of the Machinedeployments spec revision history limit. + MachineDeploymentInfoSpecRevisionHistoryLimit = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: machinedeploymentSubsystem, + Name: "info_spec_revision_history_limit", + Help: "Information of the Machinedeployments spec revision history limit.", + }, []string{"name", "namespace"}) + + // MachineDeploymentInfoSpecProgressDeadlineSeconds Information of the Machinedeployments spec deadline seconds. + MachineDeploymentInfoSpecProgressDeadlineSeconds = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: machinedeploymentSubsystem, + Name: "info_spec_progress_deadline_seconds", + Help: "Information of the Machinedeployments spec deadline seconds.", + }, []string{"name", "namespace"}) + + // MachineDeploymentInfoSpecRollbackToRevision Information of the Machinedeployments spec rollback to revision. + MachineDeploymentInfoSpecRollbackToRevision = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: machinedeploymentSubsystem, + Name: "info_spec_rollback_to_revision", + Help: "Information of the Machinedeployments spec rollback to revision.", + }, []string{"name", "namespace"}) + + // MachineDeploymentStatusCondition Information of the mcm managed Machinedeployments' status conditions. + MachineDeploymentStatusCondition = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: machinedeploymentSubsystem, + Name: "status_condition", + Help: "Information of the mcm managed Machinedeployments' status conditions.", + }, []string{"name", "namespace", "condition"}) + + // MachineDeploymentStatusAvailableReplicas Count of the mcm managed Machinedeployments available replicas. + MachineDeploymentStatusAvailableReplicas = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: machinedeploymentSubsystem, + Name: "status_available_replicas", + Help: "Count of the mcm managed Machinedeployments available replicas.", + }, []string{"name", "namespace"}) + + // MachineDeploymentStatusUnavailableReplicas Count of the mcm managed Machinedeployments unavailable replicas. + MachineDeploymentStatusUnavailableReplicas = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: machinedeploymentSubsystem, + Name: "status_unavailable_replicas", + Help: "Count of the mcm managed Machinedeployments unavailable replicas.", + }, []string{"name", "namespace"}) + + // MachineDeploymentStatusReadyReplicas Count of the mcm managed Machinedeployments ready replicas. + MachineDeploymentStatusReadyReplicas = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: machinedeploymentSubsystem, + Name: "status_ready_replicas", + Help: "Count of the mcm managed Machinedeployments ready replicas.", + }, []string{"name", "namespace"}) + + // MachineDeploymentStatusUpdatedReplicas Count of the mcm managed Machinedeployments updated replicas. + MachineDeploymentStatusUpdatedReplicas = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: machinedeploymentSubsystem, + Name: "status_updated_replicas", + Help: "Count of the mcm managed Machinedeployments updated replicas.", + }, []string{"name", "namespace"}) + + // MachineDeploymentStatusCollisionCount Mcm managed Machinedeployments collision count. + MachineDeploymentStatusCollisionCount = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: machinedeploymentSubsystem, + Name: "status_collision_count", + Help: "Mcm managed Machinedeployments collision count.", + }, []string{"name", "namespace"}) + + // MachineDeploymentStatusReplicas Count of the mcm managed Machinedeployments replicas. + MachineDeploymentStatusReplicas = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: machinedeploymentSubsystem, + Name: "status_replicas", + Help: "Count of the mcm managed Machinedeployments replicas.", + }, []string{"name", "namespace"}) + + // MachineDeploymentStatusFailedMachines Information of the mcm managed Machinedeployments' failed machines. + MachineDeploymentStatusFailedMachines = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: machinedeploymentSubsystem, + Name: "failed_machines", + Help: "Information of the mcm managed Machinedeployments' failed machines.", + }, []string{"name", "namespace", "failed_machine_name", "failed_machine_provider_id", "failed_machine_owner_ref", + "failed_machine_last_operation_state", + "failed_machine_last_operation_machine_operation_type"}) + + // APIRequestCount Number of Cloud Service API requests, partitioned by provider, and service. + APIRequestCount = prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: namespace, + Subsystem: cloudAPISubsystem, + Name: "requests_total", + Help: "Number of Cloud Service API requests, partitioned by provider, and service.", + }, []string{"provider", "service"}, + ) + + // APIFailedRequestCount Number of Failed Cloud Service API requests, partitioned by provider, and service. + APIFailedRequestCount = prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: namespace, + Subsystem: cloudAPISubsystem, + Name: "requests_failed_total", + Help: "Number of Failed Cloud Service API requests, partitioned by provider, and service.", + }, []string{"provider", "service"}, + ) + + // ScrapeFailedCounter is a Prometheus metric, which counts errors during metrics collection. + ScrapeFailedCounter = prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: namespace, + Name: "scrape_failure_total", + Help: "Total count of scrape failures.", + }, []string{"kind"}) +) + +func init() { + prometheus.MustRegister(ScrapeFailedCounter) + prometheus.MustRegister(MachineInfo) + prometheus.MustRegister(MachineStatusCondition) + prometheus.MustRegister(MachineCSPhase) + prometheus.MustRegister(MachineSetInfo) + prometheus.MustRegister(MachineSetInfoSpecReplicas) + prometheus.MustRegister(MachineSetInfoSpecMinReadySeconds) + prometheus.MustRegister(MachineSetStatusAvailableReplicas) + prometheus.MustRegister(MachineSetStatusFullyLabelledReplicas) + prometheus.MustRegister(MachineSetStatusReadyReplicas) + prometheus.MustRegister(MachineSetStatusReplicas) + prometheus.MustRegister(MachineSetStatusCondition) + prometheus.MustRegister(MachineSetStatusFailedMachines) + prometheus.MustRegister(MachineDeploymentInfo) + prometheus.MustRegister(MachineDeploymentInfoSpecPaused) + prometheus.MustRegister(MachineDeploymentInfoSpecReplicas) + prometheus.MustRegister(MachineDeploymentInfoSpecRevisionHistoryLimit) + prometheus.MustRegister(MachineDeploymentInfoSpecMinReadySeconds) + prometheus.MustRegister(MachineDeploymentInfoSpecRollingUpdateMaxSurge) + prometheus.MustRegister(MachineDeploymentInfoSpecRollingUpdateMaxUnavailable) + prometheus.MustRegister(MachineDeploymentInfoSpecProgressDeadlineSeconds) + prometheus.MustRegister(MachineDeploymentInfoSpecRollbackToRevision) + prometheus.MustRegister(MachineDeploymentStatusCondition) + prometheus.MustRegister(MachineDeploymentStatusAvailableReplicas) + prometheus.MustRegister(MachineDeploymentStatusUnavailableReplicas) + prometheus.MustRegister(MachineDeploymentStatusReadyReplicas) + prometheus.MustRegister(MachineDeploymentStatusUpdatedReplicas) + prometheus.MustRegister(MachineDeploymentStatusCollisionCount) + prometheus.MustRegister(MachineDeploymentStatusReplicas) + prometheus.MustRegister(MachineDeploymentStatusFailedMachines) + prometheus.MustRegister(APIRequestCount) + prometheus.MustRegister(APIFailedRequestCount) +} diff --git a/vendor/github.com/gardener/machine-controller-manager/pkg/util/provider/app/app.go b/vendor/github.com/gardener/machine-controller-manager/pkg/util/provider/app/app.go index ae8927533..cc2e8e455 100644 --- a/vendor/github.com/gardener/machine-controller-manager/pkg/util/provider/app/app.go +++ b/vendor/github.com/gardener/machine-controller-manager/pkg/util/provider/app/app.go @@ -261,6 +261,7 @@ func StartControllers(s *options.MCServer, targetCoreInformerFactory.Core().V1().PersistentVolumes(), controlCoreInformerFactory.Core().V1().Secrets(), targetCoreInformerFactory.Core().V1().Nodes(), + targetCoreInformerFactory.Policy().V1beta1().PodDisruptionBudgets(), machineSharedInformers.MachineClasses(), machineSharedInformers.Machines(), recorder, diff --git a/vendor/github.com/gardener/machine-controller-manager/pkg/util/provider/drain/drain.go b/vendor/github.com/gardener/machine-controller-manager/pkg/util/provider/drain/drain.go index 6a0c52113..05f622e0f 100644 --- a/vendor/github.com/gardener/machine-controller-manager/pkg/util/provider/drain/drain.go +++ b/vendor/github.com/gardener/machine-controller-manager/pkg/util/provider/drain/drain.go @@ -35,7 +35,7 @@ import ( "github.com/gardener/machine-controller-manager/pkg/util/provider/driver" api "k8s.io/api/core/v1" corev1 "k8s.io/api/core/v1" - policy "k8s.io/api/policy/v1beta1" + policyv1beta1 "k8s.io/api/policy/v1beta1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/fields" @@ -43,6 +43,7 @@ import ( "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/kubernetes" corelisters "k8s.io/client-go/listers/core/v1" + policylisters "k8s.io/client-go/listers/policy/v1beta1" "k8s.io/klog" ) @@ -63,6 +64,7 @@ type Options struct { Driver driver.Driver pvcLister corelisters.PersistentVolumeClaimLister pvLister corelisters.PersistentVolumeLister + pdbLister policylisters.PodDisruptionBudgetLister drainStartedOn time.Time drainEndedOn time.Time } @@ -133,6 +135,7 @@ func NewDrainOptions( driver driver.Driver, pvcLister corelisters.PersistentVolumeClaimLister, pvLister corelisters.PersistentVolumeLister, + pdbLister policylisters.PodDisruptionBudgetLister, ) *Options { return &Options{ @@ -151,6 +154,7 @@ func NewDrainOptions( Driver: driver, pvcLister: pvcLister, pvLister: pvLister, + pdbLister: pdbLister, } } @@ -335,7 +339,7 @@ func (o *Options) evictPod(pod *api.Pod, policyGroupVersion string) error { gracePeriodSeconds := int64(o.GracePeriodSeconds) deleteOptions.GracePeriodSeconds = &gracePeriodSeconds } - eviction := &policy.Eviction{ + eviction := &policyv1beta1.Eviction{ TypeMeta: metav1.TypeMeta{ APIVersion: policyGroupVersion, Kind: EvictionKind, @@ -631,7 +635,18 @@ func (o *Options) evictPodsWithPVInternal(attemptEvict bool, pods []*corev1.Pod, if attemptEvict && apierrors.IsTooManyRequests(err) { // Pod eviction failed because of PDB violation, we will retry one we are done with this list. - klog.V(3).Info("Pod ", pod.Namespace, "/", pod.Name, " from node ", pod.Spec.NodeName, " couldn't be evicted. This may also occur due to PDB violation. Will be retried. Error:", err) + klog.V(3).Infof("Pod %s/%s couldn't be evicted from node %s. This may also occur due to PDB violation. Will be retried. Error: %v", pod.Namespace, pod.Name, pod.Spec.NodeName, err) + + pdb := getPdbForPod(o.pdbLister, pod) + if pdb != nil { + if isMisconfiguredPdb(pdb) { + pdbErr := fmt.Errorf("error while evicting pod %q: pod disruption budget %s/%s is misconfigured and requires zero voluntary evictions", + pod.Name, pdb.Namespace, pdb.Name) + returnCh <- pdbErr + continue + } + } + retryPods = append(retryPods, pod) continue } else if apierrors.IsNotFound(err) { @@ -840,6 +855,18 @@ func (o *Options) evictPodWithoutPVInternal(attemptEvict bool, pod *corev1.Pod, return } else if attemptEvict && apierrors.IsTooManyRequests(err) { // Pod couldn't be evicted because of PDB violation + klog.V(3).Infof("Pod %s/%s couldn't be evicted from node %s. This may also occur due to PDB violation. Will be retried. Error: %v", pod.Namespace, pod.Name, pod.Spec.NodeName, err) + + pdb := getPdbForPod(o.pdbLister, pod) + if pdb != nil { + if isMisconfiguredPdb(pdb) { + pdbErr := fmt.Errorf("error while evicting pod %q: pod disruption budget %s/%s is misconfigured and requires zero voluntary evictions", + pod.Name, pdb.Namespace, pdb.Name) + returnCh <- pdbErr + return + } + } + time.Sleep(PodEvictionRetryInterval) } else { returnCh <- fmt.Errorf("error when evicting pod %q: %v scheduled on node %v", pod.Name, err, pod.Spec.NodeName) @@ -956,3 +983,27 @@ func (o *Options) RunCordonOrUncordon(desired bool) error { } return nil } + +func getPdbForPod(pdbLister policylisters.PodDisruptionBudgetLister, pod *corev1.Pod) *policyv1beta1.PodDisruptionBudget { + // GetPodPodDisruptionBudgets returns an error only if no PodDisruptionBudgets are found. + // We don't return that as an error to the caller. + pdbs, err := pdbLister.GetPodPodDisruptionBudgets(pod) + if err != nil { + klog.V(4).Infof("No PodDisruptionBudgets found for pod %s/%s.", pod.Namespace, pod.Name) + return nil + } + + if len(pdbs) > 1 { + klog.Warningf("Pod %s/%s matches multiple PodDisruptionBudgets. Chose %q arbitrarily.", pod.Namespace, pod.Name, pdbs[0].Name) + } + + return pdbs[0] +} + +func isMisconfiguredPdb(pdb *policyv1beta1.PodDisruptionBudget) bool { + if pdb.ObjectMeta.Generation != pdb.Status.ObservedGeneration { + return false + } + + return pdb.Status.ExpectedPods > 0 && pdb.Status.CurrentHealthy >= pdb.Status.ExpectedPods && pdb.Status.PodDisruptionsAllowed == 0 +} diff --git a/vendor/github.com/gardener/machine-controller-manager/pkg/util/provider/driver/fake.go b/vendor/github.com/gardener/machine-controller-manager/pkg/util/provider/driver/fake.go index 31dd156bf..83b123434 100644 --- a/vendor/github.com/gardener/machine-controller-manager/pkg/util/provider/driver/fake.go +++ b/vendor/github.com/gardener/machine-controller-manager/pkg/util/provider/driver/fake.go @@ -25,6 +25,9 @@ import ( "github.com/gardener/machine-controller-manager/pkg/util/provider/machinecodes/status" ) +// VMs is the map to hold the VM data +type VMs map[string]string + // FakeDriver is a fake driver returned when none of the actual drivers match type FakeDriver struct { VMExists bool @@ -32,13 +35,31 @@ type FakeDriver struct { NodeName string LastKnownState string Err error + fakeVMs VMs } // NewFakeDriver returns a new fakedriver object -func NewFakeDriver(fakeDriver *FakeDriver) Driver { +func NewFakeDriver(vmExists bool, providerID, nodeName, lastKnownState string, err error, fakeVMs VMs) Driver { + fakeDriver := &FakeDriver{ + VMExists: vmExists, + ProviderID: providerID, + NodeName: nodeName, + LastKnownState: lastKnownState, + Err: err, + fakeVMs: make(VMs), + } + if providerID != "" && nodeName != "" { + _ = fakeDriver.AddMachine(providerID, nodeName) + } return fakeDriver } +// AddMachine makes a call to the driver to create the machine. +func (d *FakeDriver) AddMachine(machineID, machineName string) error { + d.fakeVMs[machineID] = machineName + return nil +} + // CreateMachine makes a call to the driver to create the machine. func (d *FakeDriver) CreateMachine(ctx context.Context, createMachineRequest *CreateMachineRequest) (*CreateMachineResponse, error) { if d.Err == nil { @@ -56,6 +77,7 @@ func (d *FakeDriver) CreateMachine(ctx context.Context, createMachineRequest *Cr // DeleteMachine make a call to the driver to delete the machine. func (d *FakeDriver) DeleteMachine(ctx context.Context, deleteMachineRequest *DeleteMachineRequest) (*DeleteMachineResponse, error) { d.VMExists = false + delete(d.fakeVMs, deleteMachineRequest.Machine.Spec.ProviderID) return &DeleteMachineResponse{ LastKnownState: d.LastKnownState, }, d.Err @@ -80,7 +102,7 @@ func (d *FakeDriver) GetMachineStatus(ctx context.Context, getMachineStatusReque // ListMachines have to list machines func (d *FakeDriver) ListMachines(ctx context.Context, listMachinesRequest *ListMachinesRequest) (*ListMachinesResponse, error) { return &ListMachinesResponse{ - MachineList: map[string]string{}, + MachineList: d.fakeVMs, }, d.Err } diff --git a/vendor/github.com/gardener/machine-controller-manager/pkg/util/provider/machinecontroller/controller.go b/vendor/github.com/gardener/machine-controller-manager/pkg/util/provider/machinecontroller/controller.go index fdab3212e..df153bcd7 100644 --- a/vendor/github.com/gardener/machine-controller-manager/pkg/util/provider/machinecontroller/controller.go +++ b/vendor/github.com/gardener/machine-controller-manager/pkg/util/provider/machinecontroller/controller.go @@ -35,9 +35,11 @@ import ( runtimeutil "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/apimachinery/pkg/util/wait" coreinformers "k8s.io/client-go/informers/core/v1" + policyinformers "k8s.io/client-go/informers/policy/v1beta1" "k8s.io/client-go/kubernetes" typedcorev1 "k8s.io/client-go/kubernetes/typed/core/v1" corelisters "k8s.io/client-go/listers/core/v1" + policylisters "k8s.io/client-go/listers/policy/v1beta1" "k8s.io/client-go/tools/cache" "k8s.io/client-go/tools/record" "k8s.io/client-go/util/workqueue" @@ -72,6 +74,7 @@ func NewController( pvInformer coreinformers.PersistentVolumeInformer, secretInformer coreinformers.SecretInformer, nodeInformer coreinformers.NodeInformer, + pdbInformer policyinformers.PodDisruptionBudgetInformer, machineClassInformer machineinformers.MachineClassInformer, machineInformer machineinformers.MachineInformer, recorder record.EventRecorder, @@ -115,12 +118,14 @@ func NewController( controller.pvcLister = pvcInformer.Lister() controller.pvLister = pvInformer.Lister() controller.secretLister = secretInformer.Lister() + controller.pdbLister = pdbInformer.Lister() controller.machineClassLister = machineClassInformer.Lister() controller.nodeLister = nodeInformer.Lister() controller.machineLister = machineInformer.Lister() // Controller syncs controller.secretSynced = secretInformer.Informer().HasSynced + controller.pdbSynced = pdbInformer.Informer().HasSynced controller.machineClassSynced = machineClassInformer.Informer().HasSynced controller.nodeSynced = nodeInformer.Informer().HasSynced controller.machineSynced = machineInformer.Informer().HasSynced @@ -207,6 +212,7 @@ type controller struct { pvLister corelisters.PersistentVolumeLister secretLister corelisters.SecretLister nodeLister corelisters.NodeLister + pdbLister policylisters.PodDisruptionBudgetLister machineClassLister machinelisters.MachineClassLister machineLister machinelisters.MachineLister // queues @@ -218,6 +224,7 @@ type controller struct { machineSafetyAPIServerQueue workqueue.RateLimitingInterface // syncs secretSynced cache.InformerSynced + pdbSynced cache.InformerSynced nodeSynced cache.InformerSynced machineClassSynced cache.InformerSynced machineSynced cache.InformerSynced @@ -237,7 +244,7 @@ func (c *controller) Run(workers int, stopCh <-chan struct{}) { defer c.machineSafetyOrphanVMsQueue.ShutDown() defer c.machineSafetyAPIServerQueue.ShutDown() - if !cache.WaitForCacheSync(stopCh, c.secretSynced, c.nodeSynced, c.machineClassSynced, c.machineSynced) { + if !cache.WaitForCacheSync(stopCh, c.secretSynced, c.pdbSynced, c.nodeSynced, c.machineClassSynced, c.machineSynced) { runtimeutil.HandleError(fmt.Errorf("Timed out waiting for caches to sync")) return } diff --git a/vendor/github.com/gardener/machine-controller-manager/pkg/util/provider/machinecontroller/machine_safety.go b/vendor/github.com/gardener/machine-controller-manager/pkg/util/provider/machinecontroller/machine_safety.go index d71a20f0e..8f8263f1c 100644 --- a/vendor/github.com/gardener/machine-controller-manager/pkg/util/provider/machinecontroller/machine_safety.go +++ b/vendor/github.com/gardener/machine-controller-manager/pkg/util/provider/machinecontroller/machine_safety.go @@ -219,7 +219,7 @@ func (c *controller) checkMachineClass(machineClass *v1alpha1.MachineClass) (mac // If machine exists and machine object is still been processed by the machine controller if err == nil && - machine.Status.CurrentStatus.Phase == "" { + (machine.Status.CurrentStatus.Phase == "" || machine.Status.CurrentStatus.Phase == v1alpha1.MachineCrashLoopBackOff) { klog.V(3).Infof("SafetyController: Machine object %q is being processed by machine controller, hence skipping", machine.Name) continue } diff --git a/vendor/github.com/gardener/machine-controller-manager/pkg/util/provider/machinecontroller/machine_util.go b/vendor/github.com/gardener/machine-controller-manager/pkg/util/provider/machinecontroller/machine_util.go index e1b46db67..7544c97ac 100644 --- a/vendor/github.com/gardener/machine-controller-manager/pkg/util/provider/machinecontroller/machine_util.go +++ b/vendor/github.com/gardener/machine-controller-manager/pkg/util/provider/machinecontroller/machine_util.go @@ -1007,6 +1007,7 @@ func (c *controller) drainNode(deleteMachineRequest *driver.DeleteMachineRequest c.driver, c.pvcLister, c.pvLister, + c.pdbLister, ) err = drainOptions.RunDrain() if err == nil { diff --git a/vendor/modules.txt b/vendor/modules.txt index 7a63fd9c6..96b745e42 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -36,7 +36,7 @@ github.com/davecgh/go-spew/spew github.com/evanphx/json-patch # github.com/form3tech-oss/jwt-go v3.2.2+incompatible github.com/form3tech-oss/jwt-go -# github.com/gardener/machine-controller-manager v0.36.0 +# github.com/gardener/machine-controller-manager v0.37.0 github.com/gardener/machine-controller-manager/pkg/apis/machine github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1 github.com/gardener/machine-controller-manager/pkg/apis/machine/validation @@ -53,6 +53,7 @@ github.com/gardener/machine-controller-manager/pkg/client/listers/machine/v1alph github.com/gardener/machine-controller-manager/pkg/fakeclient github.com/gardener/machine-controller-manager/pkg/features github.com/gardener/machine-controller-manager/pkg/handlers +github.com/gardener/machine-controller-manager/pkg/metrics github.com/gardener/machine-controller-manager/pkg/options github.com/gardener/machine-controller-manager/pkg/util/client/leaderelectionconfig github.com/gardener/machine-controller-manager/pkg/util/client/metrics/prometheus