Skip to content

Commit

Permalink
Introduce feature gates for the scheduler component
Browse files Browse the repository at this point in the history
Signed-off-by: iawia002 <[email protected]>
Signed-off-by: Xinzhao Xu <[email protected]>
  • Loading branch information
iawia002 committed Nov 5, 2021
1 parent 51c911a commit 98ee6ce
Show file tree
Hide file tree
Showing 6 changed files with 32 additions and 14 deletions.
2 changes: 1 addition & 1 deletion artifacts/deploy/karmada-scheduler.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ spec:
- --kubeconfig=/etc/kubeconfig
- --bind-address=0.0.0.0
- --secure-port=10351
- --failover=true
- --feature-gates=Failover=true
- --enable-scheduler-estimator=true
- --v=4
volumeMounts:
Expand Down
2 changes: 1 addition & 1 deletion charts/templates/karmada_scheduler.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ spec:
- --kubeconfig=/etc/kubeconfig
- --bind-address=0.0.0.0
- --secure-port=10351
- --failover=true
- --feature-gates=Failover=true
volumeMounts:
{{- include "karmada.kubeconfig.volumeMount" . | nindent 12 }}
resources:
Expand Down
6 changes: 2 additions & 4 deletions cmd/scheduler/app/options/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"k8s.io/client-go/tools/leaderelection/resourcelock"
componentbaseconfig "k8s.io/component-base/config"

"github.com/karmada-io/karmada/pkg/features"
"github.com/karmada-io/karmada/pkg/util"
)

Expand All @@ -33,9 +34,6 @@ type Options struct {
// SecurePort is the port that the server serves at.
SecurePort int

// Failover indicates if scheduler should reschedule on cluster failure.
Failover bool

// KubeAPIQPS is the QPS to use while talking with karmada-apiserver.
KubeAPIQPS float32
// KubeAPIBurst is the burst to allow while talking with karmada-apiserver.
Expand Down Expand Up @@ -76,10 +74,10 @@ func (o *Options) AddFlags(fs *pflag.FlagSet) {
fs.StringVar(&o.Master, "master", o.Master, "The address of the Kubernetes API server. Overrides any value in KubeConfig. Only required if out-of-cluster.")
fs.StringVar(&o.BindAddress, "bind-address", defaultBindAddress, "The IP address on which to listen for the --secure-port port.")
fs.IntVar(&o.SecurePort, "secure-port", defaultPort, "The secure port on which to serve HTTPS.")
fs.BoolVar(&o.Failover, "failover", false, "Reschedule on cluster failure.")
fs.Float32Var(&o.KubeAPIQPS, "kube-api-qps", 40.0, "QPS to use while talking with karmada-apiserver. Doesn't cover events and node heartbeat apis which rate limiting is controlled by a different set of flags.")
fs.IntVar(&o.KubeAPIBurst, "kube-api-burst", 60, "Burst to use while talking with karmada-apiserver. Doesn't cover events and node heartbeat apis which rate limiting is controlled by a different set of flags.")
fs.BoolVar(&o.EnableSchedulerEstimator, "enable-scheduler-estimator", false, "Enable calling cluster scheduler estimator for adjusting replicas.")
fs.DurationVar(&o.SchedulerEstimatorTimeout.Duration, "scheduler-estimator-timeout", 3*time.Second, "Specifies the timeout period of calling the scheduler estimator service.")
fs.IntVar(&o.SchedulerEstimatorPort, "scheduler-estimator-port", defaultEstimatorPort, "The secure port on which to connect the accurate scheduler estimator.")
features.FeatureGate.AddFlag(fs)
}
1 change: 0 additions & 1 deletion cmd/scheduler/app/scheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@ func run(opts *options.Options, stopChan <-chan struct{}) error {
cancel()
}()

scheduler.Failover = opts.Failover
sched := scheduler.NewScheduler(dynamicClientSet, karmadaClient, kubeClientSet, opts)
if !opts.LeaderElection.LeaderElect {
sched.Run(ctx)
Expand Down
24 changes: 24 additions & 0 deletions pkg/features/features.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package features

import (
"k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/component-base/featuregate"
)

const (
// Failover indicates if scheduler should reschedule on cluster failure.
Failover featuregate.Feature = "Failover"
)

var (
// FeatureGate is a shared global scheduler FeatureGate.
FeatureGate featuregate.MutableFeatureGate = featuregate.NewFeatureGate()

defaultFeatureGates = map[featuregate.Feature]featuregate.FeatureSpec{
Failover: {Default: false, PreRelease: featuregate.Alpha},
}
)

func init() {
runtime.Must(FeatureGate.Add(defaultFeatureGates))
}
11 changes: 4 additions & 7 deletions pkg/scheduler/scheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
policyv1alpha1 "github.com/karmada-io/karmada/pkg/apis/policy/v1alpha1"
workv1alpha2 "github.com/karmada-io/karmada/pkg/apis/work/v1alpha2"
estimatorclient "github.com/karmada-io/karmada/pkg/estimator/client"
"github.com/karmada-io/karmada/pkg/features"
karmadaclientset "github.com/karmada-io/karmada/pkg/generated/clientset/versioned"
informerfactory "github.com/karmada-io/karmada/pkg/generated/informers/externalversions"
clusterlister "github.com/karmada-io/karmada/pkg/generated/listers/cluster/v1alpha1"
Expand Down Expand Up @@ -68,10 +69,6 @@ const (
scheduleSuccessMessage = "the binding has been scheduled"
)

// Failover indicates if the scheduler should performs re-scheduler in case of cluster failure.
// TODO(RainbowMango): Remove the temporary solution by introducing feature flag
var Failover bool

// Scheduler is the scheduler schema, which is used to schedule a specific resource to specific clusters
type Scheduler struct {
DynamicClient dynamic.Interface
Expand Down Expand Up @@ -412,7 +409,7 @@ func (s *Scheduler) scheduleNext() bool {
klog.Infof("Reschedule binding(%s) as replicas scaled down or scaled up", keys)
metrics.BindingSchedule(string(ScaleSchedule), metrics.SinceInSeconds(start), err)
case FailoverSchedule:
if Failover {
if features.FeatureGate.Enabled(features.Failover) {
err = s.rescheduleOne(keys)
klog.Infof("Reschedule binding(%s) as cluster failure", keys)
metrics.BindingSchedule(string(FailoverSchedule), metrics.SinceInSeconds(start), err)
Expand Down Expand Up @@ -554,9 +551,9 @@ func (s *Scheduler) updateCluster(_, newObj interface{}) {

// Check if cluster becomes failure
if meta.IsStatusConditionPresentAndEqual(newCluster.Status.Conditions, clusterv1alpha1.ClusterConditionReady, metav1.ConditionFalse) {
klog.Infof("Found cluster(%s) failure and failover flag is %v", newCluster.Name, Failover)
klog.Infof("Found cluster(%s) failure and failover flag is %v", newCluster.Name, features.FeatureGate.Enabled(features.Failover))

if Failover { // Trigger reschedule on cluster failure only when flag is true.
if features.FeatureGate.Enabled(features.Failover) { // Trigger reschedule on cluster failure only when flag is true.
s.enqueueAffectedBinding(newCluster.Name)
s.enqueueAffectedClusterBinding(newCluster.Name)
return
Expand Down

0 comments on commit 98ee6ce

Please sign in to comment.