diff --git a/artifacts/deploy/karmada-scheduler.yaml b/artifacts/deploy/karmada-scheduler.yaml index 1dbedec23987..3f1e9fda2829 100644 --- a/artifacts/deploy/karmada-scheduler.yaml +++ b/artifacts/deploy/karmada-scheduler.yaml @@ -28,7 +28,7 @@ spec: - --kubeconfig=/etc/kubeconfig - --bind-address=0.0.0.0 - --secure-port=10351 - - --failover=true + - --feature-gates=Failover=true - --enable-scheduler-estimator=true - --v=4 volumeMounts: diff --git a/charts/templates/karmada_scheduler.yaml b/charts/templates/karmada_scheduler.yaml index eca2ec6e1066..a6240c367905 100644 --- a/charts/templates/karmada_scheduler.yaml +++ b/charts/templates/karmada_scheduler.yaml @@ -55,7 +55,7 @@ spec: - --kubeconfig=/etc/kubeconfig - --bind-address=0.0.0.0 - --secure-port=10351 - - --failover=true + - --feature-gates=Failover=true volumeMounts: {{- include "karmada.kubeconfig.volumeMount" . | nindent 12 }} resources: diff --git a/cmd/scheduler/app/options/options.go b/cmd/scheduler/app/options/options.go index ba7d409ea047..15bbfb03071d 100644 --- a/cmd/scheduler/app/options/options.go +++ b/cmd/scheduler/app/options/options.go @@ -8,6 +8,7 @@ import ( "k8s.io/client-go/tools/leaderelection/resourcelock" componentbaseconfig "k8s.io/component-base/config" + "github.com/karmada-io/karmada/pkg/features" "github.com/karmada-io/karmada/pkg/util" ) @@ -33,9 +34,6 @@ type Options struct { // SecurePort is the port that the server serves at. SecurePort int - // Failover indicates if scheduler should reschedule on cluster failure. - Failover bool - // KubeAPIQPS is the QPS to use while talking with karmada-apiserver. KubeAPIQPS float32 // KubeAPIBurst is the burst to allow while talking with karmada-apiserver. @@ -76,10 +74,10 @@ func (o *Options) AddFlags(fs *pflag.FlagSet) { fs.StringVar(&o.Master, "master", o.Master, "The address of the Kubernetes API server. Overrides any value in KubeConfig. Only required if out-of-cluster.") fs.StringVar(&o.BindAddress, "bind-address", defaultBindAddress, "The IP address on which to listen for the --secure-port port.") fs.IntVar(&o.SecurePort, "secure-port", defaultPort, "The secure port on which to serve HTTPS.") - fs.BoolVar(&o.Failover, "failover", false, "Reschedule on cluster failure.") fs.Float32Var(&o.KubeAPIQPS, "kube-api-qps", 40.0, "QPS to use while talking with karmada-apiserver. Doesn't cover events and node heartbeat apis which rate limiting is controlled by a different set of flags.") fs.IntVar(&o.KubeAPIBurst, "kube-api-burst", 60, "Burst to use while talking with karmada-apiserver. Doesn't cover events and node heartbeat apis which rate limiting is controlled by a different set of flags.") fs.BoolVar(&o.EnableSchedulerEstimator, "enable-scheduler-estimator", false, "Enable calling cluster scheduler estimator for adjusting replicas.") fs.DurationVar(&o.SchedulerEstimatorTimeout.Duration, "scheduler-estimator-timeout", 3*time.Second, "Specifies the timeout period of calling the scheduler estimator service.") fs.IntVar(&o.SchedulerEstimatorPort, "scheduler-estimator-port", defaultEstimatorPort, "The secure port on which to connect the accurate scheduler estimator.") + features.FeatureGate.AddFlag(fs) } diff --git a/cmd/scheduler/app/scheduler.go b/cmd/scheduler/app/scheduler.go index f763f715dc0c..e4c1752a2b5a 100644 --- a/cmd/scheduler/app/scheduler.go +++ b/cmd/scheduler/app/scheduler.go @@ -66,7 +66,6 @@ func run(opts *options.Options, stopChan <-chan struct{}) error { cancel() }() - scheduler.Failover = opts.Failover sched := scheduler.NewScheduler(dynamicClientSet, karmadaClient, kubeClientSet, opts) if !opts.LeaderElection.LeaderElect { sched.Run(ctx) diff --git a/pkg/features/features.go b/pkg/features/features.go new file mode 100644 index 000000000000..2071e71ee28b --- /dev/null +++ b/pkg/features/features.go @@ -0,0 +1,24 @@ +package features + +import ( + "k8s.io/apimachinery/pkg/util/runtime" + "k8s.io/component-base/featuregate" +) + +const ( + // Failover indicates if scheduler should reschedule on cluster failure. + Failover featuregate.Feature = "Failover" +) + +var ( + // FeatureGate is a shared global scheduler FeatureGate. + FeatureGate featuregate.MutableFeatureGate = featuregate.NewFeatureGate() + + defaultFeatureGates = map[featuregate.Feature]featuregate.FeatureSpec{ + Failover: {Default: false, PreRelease: featuregate.Alpha}, + } +) + +func init() { + runtime.Must(FeatureGate.Add(defaultFeatureGates)) +} diff --git a/pkg/scheduler/scheduler.go b/pkg/scheduler/scheduler.go index f95431ab47f2..9e14234bf214 100644 --- a/pkg/scheduler/scheduler.go +++ b/pkg/scheduler/scheduler.go @@ -24,6 +24,7 @@ import ( policyv1alpha1 "github.com/karmada-io/karmada/pkg/apis/policy/v1alpha1" workv1alpha2 "github.com/karmada-io/karmada/pkg/apis/work/v1alpha2" estimatorclient "github.com/karmada-io/karmada/pkg/estimator/client" + "github.com/karmada-io/karmada/pkg/features" karmadaclientset "github.com/karmada-io/karmada/pkg/generated/clientset/versioned" informerfactory "github.com/karmada-io/karmada/pkg/generated/informers/externalversions" clusterlister "github.com/karmada-io/karmada/pkg/generated/listers/cluster/v1alpha1" @@ -68,10 +69,6 @@ const ( scheduleSuccessMessage = "the binding has been scheduled" ) -// Failover indicates if the scheduler should performs re-scheduler in case of cluster failure. -// TODO(RainbowMango): Remove the temporary solution by introducing feature flag -var Failover bool - // Scheduler is the scheduler schema, which is used to schedule a specific resource to specific clusters type Scheduler struct { DynamicClient dynamic.Interface @@ -412,7 +409,7 @@ func (s *Scheduler) scheduleNext() bool { klog.Infof("Reschedule binding(%s) as replicas scaled down or scaled up", keys) metrics.BindingSchedule(string(ScaleSchedule), metrics.SinceInSeconds(start), err) case FailoverSchedule: - if Failover { + if features.FeatureGate.Enabled(features.Failover) { err = s.rescheduleOne(keys) klog.Infof("Reschedule binding(%s) as cluster failure", keys) metrics.BindingSchedule(string(FailoverSchedule), metrics.SinceInSeconds(start), err) @@ -554,9 +551,9 @@ func (s *Scheduler) updateCluster(_, newObj interface{}) { // Check if cluster becomes failure if meta.IsStatusConditionPresentAndEqual(newCluster.Status.Conditions, clusterv1alpha1.ClusterConditionReady, metav1.ConditionFalse) { - klog.Infof("Found cluster(%s) failure and failover flag is %v", newCluster.Name, Failover) + klog.Infof("Found cluster(%s) failure and failover flag is %v", newCluster.Name, features.FeatureGate.Enabled(features.Failover)) - if Failover { // Trigger reschedule on cluster failure only when flag is true. + if features.FeatureGate.Enabled(features.Failover) { // Trigger reschedule on cluster failure only when flag is true. s.enqueueAffectedBinding(newCluster.Name) s.enqueueAffectedClusterBinding(newCluster.Name) return