Skip to content

Commit

Permalink
WIP: sched: support for HA mode
Browse files Browse the repository at this point in the history
WIP TBD

Signed-off-by: Francesco Romani <[email protected]>
  • Loading branch information
ffromani committed Mar 19, 2024
1 parent 1f994c8 commit 641e628
Show file tree
Hide file tree
Showing 18 changed files with 207 additions and 52 deletions.
2 changes: 2 additions & 0 deletions pkg/commands/deploy.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,8 @@ func NewDeploySchedulerPluginCommand(env *deployer.Environment, commonOpts *opti
Verbose: commonOpts.SchedVerbose,
ScoringStratConfigData: commonOpts.SchedScoringStratConfigData,
CacheParamsConfigData: commonOpts.SchedCacheParamsConfigData,
LeaderElection: commonOpts.Replicas > 1,
LeaderElectionResource: commonOpts.SchedLeaderElectResource,
})
},
Args: cobra.NoArgs,
Expand Down
4 changes: 4 additions & 0 deletions pkg/commands/remove.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ func NewRemoveCommand(env *deployer.Environment, commonOpts *options.Options) *c
Verbose: commonOpts.SchedVerbose,
ScoringStratConfigData: commonOpts.SchedScoringStratConfigData,
CacheParamsConfigData: commonOpts.SchedCacheParamsConfigData,
LeaderElection: commonOpts.Replicas > 1,
LeaderElectionResource: commonOpts.SchedLeaderElectResource,
})
if err != nil {
// intentionally keep going to remove as much as possible
Expand Down Expand Up @@ -166,6 +168,8 @@ func NewRemoveSchedulerPluginCommand(env *deployer.Environment, commonOpts *opti
Verbose: commonOpts.SchedVerbose,
ScoringStratConfigData: commonOpts.SchedScoringStratConfigData,
CacheParamsConfigData: commonOpts.SchedCacheParamsConfigData,
LeaderElection: commonOpts.Replicas > 1,
LeaderElectionResource: commonOpts.SchedLeaderElectResource,
})
},
Args: cobra.NoArgs,
Expand Down
4 changes: 4 additions & 0 deletions pkg/commands/render.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,8 @@ func NewRenderSchedulerPluginCommand(env *deployer.Environment, commonOpts *opti
CacheResyncPeriod: commonOpts.SchedResyncPeriod,
ScoringStratConfigData: commonOpts.SchedScoringStratConfigData,
CacheParamsConfigData: commonOpts.SchedCacheParamsConfigData,
LeaderElection: commonOpts.Replicas > 1,
LeaderElectionResource: commonOpts.SchedLeaderElectResource,
}
schedObjs, err := schedManifests.Render(env.Log, renderOpts)
if err != nil {
Expand Down Expand Up @@ -186,6 +188,8 @@ func RenderManifests(env *deployer.Environment, commonOpts *options.Options) err
Verbose: commonOpts.SchedVerbose,
ScoringStratConfigData: commonOpts.SchedScoringStratConfigData,
CacheParamsConfigData: commonOpts.SchedCacheParamsConfigData,
LeaderElection: commonOpts.Replicas > 1,
LeaderElectionResource: commonOpts.SchedLeaderElectResource,
}

schedObjs, err := schedManifests.Render(env.Log, schedRenderOpts)
Expand Down
3 changes: 2 additions & 1 deletion pkg/commands/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,8 @@ func InitFlags(flags *pflag.FlagSet, commonOpts *options.Options, internalOpts *
flags.StringVar(&commonOpts.SchedProfileName, "sched-profile-name", schedmanifests.DefaultProfileName, "inject scheduler profile name.")
flags.DurationVar(&commonOpts.SchedResyncPeriod, "sched-resync-period", schedmanifests.DefaultResyncPeriod, "inject scheduler resync period.")
flags.IntVar(&commonOpts.SchedVerbose, "sched-verbose", schedmanifests.DefaultVerbose, "set the scheduler verbosiness.")
flags.BoolVar(&commonOpts.SchedCtrlPlaneAffinity, "sched-ctrlplane-affinity", true, "toggle the scheduler control plane affinity.")
flags.BoolVar(&commonOpts.SchedCtrlPlaneAffinity, "sched-ctrlplane-affinity", schedmanifests.DefaultCtrlPlaneAffinity, "toggle the scheduler control plane affinity.")
flags.StringVar(&commonOpts.SchedLeaderElectResource, "sched-leader-elect-resource", schedmanifests.DefaultLeaderElectResource, "leader election resource namespaced name \"namespace/name\"")
}

func PostSetupOptions(env *deployer.Environment, commonOpts *options.Options, internalOpts *internalOptions) error {
Expand Down
2 changes: 2 additions & 0 deletions pkg/deploy/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ func OnCluster(env *deployer.Environment, commonOpts *options.Options) error {
Verbose: commonOpts.SchedVerbose,
ScoringStratConfigData: commonOpts.SchedScoringStratConfigData,
CacheParamsConfigData: commonOpts.SchedCacheParamsConfigData,
LeaderElection: commonOpts.Replicas > 1,
LeaderElectionResource: commonOpts.SchedLeaderElectResource,
}); err != nil {
return err
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/deployer/sched/sched.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ func Deploy(env *deployer.Environment, opts options.Scheduler) error {
env = env.WithName("SCD")
env.Log.Info("deploying topology-aware-scheduling scheduler plugin")

mf, err := schedmanifests.GetManifests(opts.Platform, "")
mf, err := schedmanifests.GetManifests(opts.Platform, "tas-scheduler")
if err != nil {
return err
}
Expand Down
8 changes: 6 additions & 2 deletions pkg/manifests/manifests.go
Original file line number Diff line number Diff line change
Expand Up @@ -148,15 +148,19 @@ func Role(component, subComponent, namespace string) (*rbacv1.Role, error) {
return role, nil
}

func RoleBinding(component, subComponent, namespace string) (*rbacv1.RoleBinding, error) {
func RoleBinding(component, subComponent, roleName, namespace string) (*rbacv1.RoleBinding, error) {
if err := validateComponent(component); err != nil {
return nil, err
}
if err := validateSubComponent(component, subComponent); err != nil {
return nil, err
}

obj, err := loadObject(filepath.Join("yaml", component, subComponent, "rolebinding.yaml"))
fileName := "rolebinding.yaml"
if roleName != "" {
fileName = "rolebinding_" + roleName + ".yaml"
}
obj, err := loadObject(filepath.Join("yaml", component, subComponent, fileName))
if err != nil {
return nil, err
}
Expand Down
6 changes: 4 additions & 2 deletions pkg/manifests/manifests_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ func TestGetRole(t *testing.T) {
{
component: ComponentSchedulerPlugin,
subComponent: SubComponentSchedulerPluginScheduler,
expectError: true,
expectError: false,
},
{
component: ComponentSchedulerPlugin,
Expand Down Expand Up @@ -159,6 +159,7 @@ func TestGetRoleBinding(t *testing.T) {
type testCase struct {
component string
subComponent string
roleName string
expectError bool
}

Expand All @@ -174,6 +175,7 @@ func TestGetRoleBinding(t *testing.T) {
{
component: ComponentSchedulerPlugin,
subComponent: SubComponentSchedulerPluginScheduler,
roleName: "authread",
expectError: false,
},
{
Expand All @@ -189,7 +191,7 @@ func TestGetRoleBinding(t *testing.T) {

for _, tc := range testCases {
t.Run(tc.component, func(t *testing.T) {
obj, err := RoleBinding(tc.component, tc.subComponent, "")
obj, err := RoleBinding(tc.component, tc.subComponent, tc.roleName, "")
if tc.expectError {
if err == nil || obj != nil {
t.Fatalf("nil err or non-nil obj=%v", obj)
Expand Down
2 changes: 1 addition & 1 deletion pkg/manifests/rte/rte.go
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ func GetManifests(plat platform.Platform, version platform.Version, namespace st
if err != nil {
return mf, err
}
mf.RoleBinding, err = manifests.RoleBinding(manifests.ComponentResourceTopologyExporter, "", namespace)
mf.RoleBinding, err = manifests.RoleBinding(manifests.ComponentResourceTopologyExporter, "", "", namespace)
if err != nil {
return mf, err
}
Expand Down
98 changes: 69 additions & 29 deletions pkg/manifests/sched/sched.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package sched
import (
"encoding/json"
"fmt"
"strings"
"time"

"github.com/go-logr/logr"
Expand All @@ -39,9 +40,11 @@ import (
)

const (
DefaultProfileName = "topology-aware-scheduler"
DefaultResyncPeriod = 5 * time.Second
DefaultVerbose = 4
DefaultProfileName = "topology-aware-scheduler"
DefaultResyncPeriod = 5 * time.Second
DefaultVerbose = 4
DefaultCtrlPlaneAffinity = true
DefaultLeaderElectResource = manifests.LeaderElectionDefaultNamespace + "/" + manifests.LeaderElectionDefaultName
)

const (
Expand All @@ -59,12 +62,14 @@ type Manifests struct {
RBController *rbacv1.RoleBinding
DPController *appsv1.Deployment
// scheduler proper
SAScheduler *corev1.ServiceAccount
CRScheduler *rbacv1.ClusterRole
CRBScheduler *rbacv1.ClusterRoleBinding
RBScheduler *rbacv1.RoleBinding
DPScheduler *appsv1.Deployment
ConfigMap *corev1.ConfigMap
SAScheduler *corev1.ServiceAccount
CRScheduler *rbacv1.ClusterRole
RSchedulerElect *rbacv1.Role
CRBScheduler *rbacv1.ClusterRoleBinding
RBSchedulerAuth *rbacv1.RoleBinding
RBSchedulerElect *rbacv1.RoleBinding
DPScheduler *appsv1.Deployment
ConfigMap *corev1.ConfigMap
// internal fields
plat platform.Platform
}
Expand All @@ -73,19 +78,21 @@ func (mf Manifests) Clone() Manifests {
return Manifests{
plat: mf.plat,
// objects
Crd: mf.Crd.DeepCopy(),
Namespace: mf.Namespace.DeepCopy(),
SAController: mf.SAController.DeepCopy(),
CRController: mf.CRController.DeepCopy(),
CRBController: mf.CRBController.DeepCopy(),
DPController: mf.DPController.DeepCopy(),
RBController: mf.RBController.DeepCopy(),
SAScheduler: mf.SAScheduler.DeepCopy(),
CRScheduler: mf.CRScheduler.DeepCopy(),
CRBScheduler: mf.CRBScheduler.DeepCopy(),
DPScheduler: mf.DPScheduler.DeepCopy(),
ConfigMap: mf.ConfigMap.DeepCopy(),
RBScheduler: mf.RBScheduler.DeepCopy(),
Crd: mf.Crd.DeepCopy(),
Namespace: mf.Namespace.DeepCopy(),
SAController: mf.SAController.DeepCopy(),
CRController: mf.CRController.DeepCopy(),
CRBController: mf.CRBController.DeepCopy(),
DPController: mf.DPController.DeepCopy(),
RBController: mf.RBController.DeepCopy(),
SAScheduler: mf.SAScheduler.DeepCopy(),
CRScheduler: mf.CRScheduler.DeepCopy(),
RSchedulerElect: mf.RSchedulerElect.DeepCopy(),
CRBScheduler: mf.CRBScheduler.DeepCopy(),
RBSchedulerAuth: mf.RBSchedulerAuth.DeepCopy(),
RBSchedulerElect: mf.RBSchedulerElect.DeepCopy(),
DPScheduler: mf.DPScheduler.DeepCopy(),
ConfigMap: mf.ConfigMap.DeepCopy(),
}
}

Expand All @@ -104,16 +111,18 @@ func (mf Manifests) Render(logger logr.Logger, opts options.Scheduler) (Manifest
Cache: manifests.NewConfigCacheParams(),
}

params.LeaderElection, err = leaderElectionParamsFromOpts(opts)
if err != nil {
return ret, err
}

if len(opts.CacheParamsConfigData) > 0 {
err = yaml.Unmarshal([]byte(opts.CacheParamsConfigData), params.Cache)
if err != nil {
return ret, err
}
}

// always override
params.Cache.ResyncPeriodSeconds = newInt64(int64(opts.CacheResyncPeriod.Seconds()))

if len(opts.ScoringStratConfigData) > 0 {
params.ScoringStrategy = &manifests.ScoringStrategyParams{}
err = yaml.Unmarshal([]byte(opts.ScoringStratConfigData), params.ScoringStrategy)
Expand All @@ -122,6 +131,9 @@ func (mf Manifests) Render(logger logr.Logger, opts options.Scheduler) (Manifest
}
}

// always override
params.Cache.ResyncPeriodSeconds = newInt64(int64(opts.CacheResyncPeriod.Seconds()))

err = schedupdate.SchedulerConfig(ret.ConfigMap, DefaultProfileName, &params)
if err != nil {
return ret, err
Expand All @@ -140,7 +152,8 @@ func (mf Manifests) Render(logger logr.Logger, opts options.Scheduler) (Manifest

ret.SAScheduler.Namespace = ret.Namespace.Name
rbacupdate.ClusterRoleBinding(ret.CRBScheduler, ret.SAScheduler.Name, ret.Namespace.Name)
rbacupdate.RoleBinding(ret.RBScheduler, ret.SAScheduler.Name, ret.Namespace.Name)
rbacupdate.RoleBinding(ret.RBSchedulerElect, ret.SAScheduler.Name, ret.Namespace.Name)
rbacupdate.RoleBinding(ret.RBSchedulerAuth, ret.SAScheduler.Name, ret.Namespace.Name)
ret.DPScheduler.Namespace = ret.Namespace.Name
ret.ConfigMap.Namespace = ret.Namespace.Name

Expand All @@ -155,7 +168,9 @@ func (mf Manifests) ToObjects() []client.Object {
mf.CRScheduler,
mf.CRBScheduler,
mf.ConfigMap,
mf.RBScheduler,
mf.RSchedulerElect,
mf.RBSchedulerAuth,
mf.RBSchedulerElect,
mf.DPScheduler,
mf.SAController,
mf.CRController,
Expand Down Expand Up @@ -199,7 +214,15 @@ func GetManifests(plat platform.Platform, namespace string) (Manifests, error) {
if err != nil {
return mf, err
}
mf.RBScheduler, err = manifests.RoleBinding(manifests.ComponentSchedulerPlugin, manifests.SubComponentSchedulerPluginScheduler, namespace)
mf.RSchedulerElect, err = manifests.Role(manifests.ComponentSchedulerPlugin, manifests.SubComponentSchedulerPluginScheduler, namespace)
if err != nil {
return mf, err
}
mf.RBSchedulerElect, err = manifests.RoleBinding(manifests.ComponentSchedulerPlugin, manifests.SubComponentSchedulerPluginScheduler, "leaderelect", namespace)
if err != nil {
return mf, err
}
mf.RBSchedulerAuth, err = manifests.RoleBinding(manifests.ComponentSchedulerPlugin, manifests.SubComponentSchedulerPluginScheduler, "authread", namespace)
if err != nil {
return mf, err
}
Expand All @@ -220,7 +243,7 @@ func GetManifests(plat platform.Platform, namespace string) (Manifests, error) {
if err != nil {
return mf, err
}
mf.RBController, err = manifests.RoleBinding(manifests.ComponentSchedulerPlugin, manifests.SubComponentSchedulerPluginController, namespace)
mf.RBController, err = manifests.RoleBinding(manifests.ComponentSchedulerPlugin, manifests.SubComponentSchedulerPluginController, "", namespace)
if err != nil {
return mf, err
}
Expand All @@ -232,6 +255,23 @@ func GetManifests(plat platform.Platform, namespace string) (Manifests, error) {
return mf, nil
}

func leaderElectionParamsFromOpts(opts options.Scheduler) (*manifests.LeaderElectionParams, error) {
if !opts.LeaderElection {
return nil, nil
}
tokens := strings.Split(opts.LeaderElectionResource, "/")
if len(tokens) != 2 {
return nil, fmt.Errorf("malformed leader election resource: %q", opts.LeaderElectionResource)
}
leap := manifests.LeaderElectionParams{
LeaderElect: true,
ResourceNamespace: tokens[0],
ResourceName: tokens[1],
}
manifests.SetDefaultsLeaderElection(&leap)
return &leap, nil
}

func newInt32(value int32) *int32 {
return &value
}
Expand Down
21 changes: 21 additions & 0 deletions pkg/manifests/schedparams.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,11 @@ const (
ScoringStrategyLeastAllocated = "LeastAllocated"
)

const (
LeaderElectionDefaultName = "nrtmatch-scheduler"
LeaderElectionDefaultNamespace = "tas-scheduler"
)

func ValidateForeignPodsDetectMode(value string) error {
switch value {
case ForeignPodsDetectNone:
Expand Down Expand Up @@ -136,10 +141,26 @@ func ValidateScoringStrategyType(value string) error {
}
}

type LeaderElectionParams struct {
LeaderElect bool `json:"leaderElect,omitempty"`
ResourceName string `json:"resourceName,omitempty"`
ResourceNamespace string `json:"resourceNamespace,omitempty"`
}

func SetDefaultsLeaderElection(lep *LeaderElectionParams) {
if lep.ResourceName == "" {
lep.ResourceName = LeaderElectionDefaultName
}
if lep.ResourceNamespace == "" {
lep.ResourceNamespace = LeaderElectionDefaultNamespace
}
}

type ConfigParams struct {
ProfileName string // can't be empty, so no need for pointer
Cache *ConfigCacheParams
ScoringStrategy *ScoringStrategyParams
LeaderElection *LeaderElectionParams
}

func DecodeSchedulerProfilesFromData(data []byte) ([]ConfigParams, error) {
Expand Down
14 changes: 0 additions & 14 deletions pkg/manifests/yaml/sched/scheduler/clusterrole.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,6 @@ rules:
- apiGroups: ["", "events.k8s.io"]
resources: ["events"]
verbs: ["create", "patch", "update"]
- apiGroups: ["coordination.k8s.io"]
resources: ["leases"]
verbs: ["create"]
- apiGroups: ["coordination.k8s.io"]
resourceNames: ["kube-scheduler"]
resources: ["leases"]
verbs: ["get", "update"]
- apiGroups: [""]
resources: ["endpoints"]
verbs: ["create"]
- apiGroups: [""]
resourceNames: ["kube-scheduler"]
resources: ["endpoints"]
verbs: ["get", "update"]
- apiGroups: [""]
resources: ["nodes"]
verbs: ["get", "list", "watch"]
Expand Down
19 changes: 19 additions & 0 deletions pkg/manifests/yaml/sched/scheduler/role.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: topology-aware-scheduler-leader-elect
rules:
- apiGroups: ["coordination.k8s.io"]
resources: ["leases"]
verbs: ["create"]
- apiGroups: ["coordination.k8s.io"]
resourceNames: ["nrtmatch-scheduler"]
resources: ["leases"]
verbs: ["get", "update"]
- apiGroups: [""]
resources: ["endpoints"]
verbs: ["create"]
- apiGroups: [""]
resourceNames: ["nrtmatch-scheduler"]
resources: ["endpoints"]
verbs: ["get", "update"]
Loading

0 comments on commit 641e628

Please sign in to comment.