Skip to content

Commit

Permalink
Merge pull request #298 from ffromani/sched-ha-support
Browse files Browse the repository at this point in the history
sched: enable HA support
  • Loading branch information
ffromani authored Jul 29, 2024
2 parents f81b881 + b8be008 commit 6908d45
Show file tree
Hide file tree
Showing 17 changed files with 317 additions and 67 deletions.
2 changes: 2 additions & 0 deletions pkg/commands/deploy.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,8 @@ func NewDeploySchedulerPluginCommand(env *deployer.Environment, commonOpts *opti
Verbose: commonOpts.SchedVerbose,
ScoringStratConfigData: commonOpts.SchedScoringStratConfigData,
CacheParamsConfigData: commonOpts.SchedCacheParamsConfigData,
LeaderElection: commonOpts.Replicas > 1,
LeaderElectionResource: commonOpts.SchedLeaderElectResource,
})
},
Args: cobra.NoArgs,
Expand Down
4 changes: 4 additions & 0 deletions pkg/commands/remove.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ func NewRemoveCommand(env *deployer.Environment, commonOpts *options.Options) *c
Verbose: commonOpts.SchedVerbose,
ScoringStratConfigData: commonOpts.SchedScoringStratConfigData,
CacheParamsConfigData: commonOpts.SchedCacheParamsConfigData,
LeaderElection: commonOpts.Replicas > 1,
LeaderElectionResource: commonOpts.SchedLeaderElectResource,
})
if err != nil {
// intentionally keep going to remove as much as possible
Expand Down Expand Up @@ -166,6 +168,8 @@ func NewRemoveSchedulerPluginCommand(env *deployer.Environment, commonOpts *opti
Verbose: commonOpts.SchedVerbose,
ScoringStratConfigData: commonOpts.SchedScoringStratConfigData,
CacheParamsConfigData: commonOpts.SchedCacheParamsConfigData,
LeaderElection: commonOpts.Replicas > 1,
LeaderElectionResource: commonOpts.SchedLeaderElectResource,
})
},
Args: cobra.NoArgs,
Expand Down
4 changes: 4 additions & 0 deletions pkg/commands/render.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,8 @@ func NewRenderSchedulerPluginCommand(env *deployer.Environment, commonOpts *opti
CacheResyncPeriod: commonOpts.SchedResyncPeriod,
ScoringStratConfigData: commonOpts.SchedScoringStratConfigData,
CacheParamsConfigData: commonOpts.SchedCacheParamsConfigData,
LeaderElection: commonOpts.Replicas > 1,
LeaderElectionResource: commonOpts.SchedLeaderElectResource,
}
schedObjs, err := schedManifests.Render(env.Log, renderOpts)
if err != nil {
Expand Down Expand Up @@ -186,6 +188,8 @@ func RenderManifests(env *deployer.Environment, commonOpts *options.Options) err
Verbose: commonOpts.SchedVerbose,
ScoringStratConfigData: commonOpts.SchedScoringStratConfigData,
CacheParamsConfigData: commonOpts.SchedCacheParamsConfigData,
LeaderElection: commonOpts.Replicas > 1,
LeaderElectionResource: commonOpts.SchedLeaderElectResource,
}

schedObjs, err := schedManifests.Render(env.Log, schedRenderOpts)
Expand Down
3 changes: 2 additions & 1 deletion pkg/commands/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,8 @@ func InitFlags(flags *pflag.FlagSet, commonOpts *options.Options, internalOpts *
flags.StringVar(&commonOpts.SchedProfileName, "sched-profile-name", schedmanifests.DefaultProfileName, "inject scheduler profile name.")
flags.DurationVar(&commonOpts.SchedResyncPeriod, "sched-resync-period", schedmanifests.DefaultResyncPeriod, "inject scheduler resync period.")
flags.IntVar(&commonOpts.SchedVerbose, "sched-verbose", schedmanifests.DefaultVerbose, "set the scheduler verbosiness.")
flags.BoolVar(&commonOpts.SchedCtrlPlaneAffinity, "sched-ctrlplane-affinity", true, "toggle the scheduler control plane affinity.")
flags.BoolVar(&commonOpts.SchedCtrlPlaneAffinity, "sched-ctrlplane-affinity", schedmanifests.DefaultCtrlPlaneAffinity, "toggle the scheduler control plane affinity.")
flags.StringVar(&commonOpts.SchedLeaderElectResource, "sched-leader-elect-resource", schedmanifests.DefaultLeaderElectResource, "leader election resource namespaced name \"namespace/name\"")
}

func PostSetupOptions(env *deployer.Environment, commonOpts *options.Options, internalOpts *internalOptions) error {
Expand Down
2 changes: 2 additions & 0 deletions pkg/deploy/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ func OnCluster(env *deployer.Environment, commonOpts *options.Options) error {
Verbose: commonOpts.SchedVerbose,
ScoringStratConfigData: commonOpts.SchedScoringStratConfigData,
CacheParamsConfigData: commonOpts.SchedCacheParamsConfigData,
LeaderElection: commonOpts.Replicas > 1,
LeaderElectionResource: commonOpts.SchedLeaderElectResource,
}); err != nil {
return err
}
Expand Down
26 changes: 23 additions & 3 deletions pkg/manifests/manifests.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,11 @@ const (
SubComponentNodeFeatureDiscoveryTopologyUpdater = "topologyupdater"
)

const (
RoleNameAuthReader = "authreader"
RoleNameLeaderElect = "leaderelect"
)

const (
ContainerNameRTE = "resource-topology-exporter"
ContainerNameNFDTopologyUpdater = "nfd-topology-updater"
Expand Down Expand Up @@ -148,15 +153,23 @@ func Role(component, subComponent, namespace string) (*rbacv1.Role, error) {
return role, nil
}

func RoleBinding(component, subComponent, namespace string) (*rbacv1.RoleBinding, error) {
func RoleBinding(component, subComponent, roleName, namespace string) (*rbacv1.RoleBinding, error) {
if err := validateComponent(component); err != nil {
return nil, err
}
if err := validateSubComponent(component, subComponent); err != nil {
return nil, err
}

obj, err := loadObject(filepath.Join("yaml", component, subComponent, "rolebinding.yaml"))
var fileName string
if roleName == "" {
fileName = "rolebinding.yaml"
} else {
if err := validateRoleName(roleName); err != nil {
return nil, err
}
fileName = "rolebinding_" + roleName + ".yaml"
}
obj, err := loadObject(filepath.Join("yaml", component, subComponent, fileName))
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -469,6 +482,13 @@ func validateSubComponent(component, subComponent string) error {
return fmt.Errorf("unknown subComponent %q for component: %q", subComponent, component)
}

func validateRoleName(roleName string) error {
if roleName == RoleNameAuthReader || roleName == RoleNameLeaderElect {
return nil
}
return fmt.Errorf("unknown roleName %q", roleName)
}

func Service(component, subComponent, namespace string) (*corev1.Service, error) {
if err := validateComponent(component); err != nil {
return nil, err
Expand Down
24 changes: 22 additions & 2 deletions pkg/manifests/manifests_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ func TestGetRole(t *testing.T) {
{
component: ComponentSchedulerPlugin,
subComponent: SubComponentSchedulerPluginScheduler,
expectError: true,
expectError: false,
},
{
component: ComponentSchedulerPlugin,
Expand Down Expand Up @@ -159,6 +159,7 @@ func TestGetRoleBinding(t *testing.T) {
type testCase struct {
component string
subComponent string
roleName string
expectError bool
}

Expand All @@ -171,9 +172,28 @@ func TestGetRoleBinding(t *testing.T) {
component: ComponentAPI,
expectError: true,
},
{
component: ComponentSchedulerPlugin,
subComponent: SubComponentSchedulerPluginController,
roleName: "",
expectError: false,
},
{
component: ComponentSchedulerPlugin,
subComponent: SubComponentSchedulerPluginScheduler,
roleName: "",
expectError: true,
},
{
component: ComponentSchedulerPlugin,
subComponent: SubComponentSchedulerPluginScheduler,
roleName: RoleNameAuthReader,
expectError: false,
},
{
component: ComponentSchedulerPlugin,
subComponent: SubComponentSchedulerPluginScheduler,
roleName: RoleNameLeaderElect,
expectError: false,
},
{
Expand All @@ -189,7 +209,7 @@ func TestGetRoleBinding(t *testing.T) {

for _, tc := range testCases {
t.Run(tc.component, func(t *testing.T) {
obj, err := RoleBinding(tc.component, tc.subComponent, "")
obj, err := RoleBinding(tc.component, tc.subComponent, tc.roleName, "")
if tc.expectError {
if err == nil || obj != nil {
t.Fatalf("nil err or non-nil obj=%v", obj)
Expand Down
2 changes: 1 addition & 1 deletion pkg/manifests/rte/rte.go
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ func GetManifests(plat platform.Platform, version platform.Version, namespace st
if err != nil {
return mf, err
}
mf.RoleBinding, err = manifests.RoleBinding(manifests.ComponentResourceTopologyExporter, "", namespace)
mf.RoleBinding, err = manifests.RoleBinding(manifests.ComponentResourceTopologyExporter, "", "", namespace)
if err != nil {
return mf, err
}
Expand Down
108 changes: 83 additions & 25 deletions pkg/manifests/sched/sched.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package sched
import (
"encoding/json"
"fmt"
"strings"
"time"

"github.com/go-logr/logr"
Expand Down Expand Up @@ -61,12 +62,14 @@ type Manifests struct {
RBController *rbacv1.RoleBinding
DPController *appsv1.Deployment
// scheduler proper
SAScheduler *corev1.ServiceAccount
CRScheduler *rbacv1.ClusterRole
CRBScheduler *rbacv1.ClusterRoleBinding
RBScheduler *rbacv1.RoleBinding
DPScheduler *appsv1.Deployment
ConfigMap *corev1.ConfigMap
SAScheduler *corev1.ServiceAccount
CRScheduler *rbacv1.ClusterRole
RSchedulerElect *rbacv1.Role
CRBScheduler *rbacv1.ClusterRoleBinding
RBSchedulerAuth *rbacv1.RoleBinding
RBSchedulerElect *rbacv1.RoleBinding
DPScheduler *appsv1.Deployment
ConfigMap *corev1.ConfigMap
// internal fields
plat platform.Platform
}
Expand All @@ -75,19 +78,21 @@ func (mf Manifests) Clone() Manifests {
return Manifests{
plat: mf.plat,
// objects
Crd: mf.Crd.DeepCopy(),
Namespace: mf.Namespace.DeepCopy(),
SAController: mf.SAController.DeepCopy(),
CRController: mf.CRController.DeepCopy(),
CRBController: mf.CRBController.DeepCopy(),
DPController: mf.DPController.DeepCopy(),
RBController: mf.RBController.DeepCopy(),
SAScheduler: mf.SAScheduler.DeepCopy(),
CRScheduler: mf.CRScheduler.DeepCopy(),
CRBScheduler: mf.CRBScheduler.DeepCopy(),
DPScheduler: mf.DPScheduler.DeepCopy(),
ConfigMap: mf.ConfigMap.DeepCopy(),
RBScheduler: mf.RBScheduler.DeepCopy(),
Crd: mf.Crd.DeepCopy(),
Namespace: mf.Namespace.DeepCopy(),
SAController: mf.SAController.DeepCopy(),
CRController: mf.CRController.DeepCopy(),
CRBController: mf.CRBController.DeepCopy(),
DPController: mf.DPController.DeepCopy(),
RBController: mf.RBController.DeepCopy(),
SAScheduler: mf.SAScheduler.DeepCopy(),
CRScheduler: mf.CRScheduler.DeepCopy(),
RSchedulerElect: mf.RSchedulerElect.DeepCopy(),
CRBScheduler: mf.CRBScheduler.DeepCopy(),
RBSchedulerAuth: mf.RBSchedulerAuth.DeepCopy(),
RBSchedulerElect: mf.RBSchedulerElect.DeepCopy(),
DPScheduler: mf.DPScheduler.DeepCopy(),
ConfigMap: mf.ConfigMap.DeepCopy(),
}
}

Expand All @@ -100,12 +105,19 @@ func (mf Manifests) Render(logger logr.Logger, opts options.Scheduler) (Manifest
ret.DPScheduler.Spec.Replicas = newInt32(replicas)
ret.DPController.Spec.Replicas = newInt32(replicas)

var err error
params := manifests.ConfigParams{
ProfileName: opts.ProfileName,
Cache: manifests.NewConfigCacheParams(),
}

leap, ok, err := leaderElectionParamsFromOpts(opts)
if err != nil {
return ret, err
}
if ok {
params.LeaderElection = &leap
}

if len(opts.CacheParamsConfigData) > 0 {
err = yaml.Unmarshal([]byte(opts.CacheParamsConfigData), params.Cache)
if err != nil {
Expand All @@ -131,7 +143,9 @@ func (mf Manifests) Render(logger logr.Logger, opts options.Scheduler) (Manifest

schedupdate.SchedulerDeployment(ret.DPScheduler, opts.PullIfNotPresent, opts.CtrlPlaneAffinity, opts.Verbose)
schedupdate.ControllerDeployment(ret.DPController, opts.PullIfNotPresent, opts.CtrlPlaneAffinity)
if mf.plat == platform.OpenShift {
if opts.Namespace != "" {
ret.Namespace.Name = opts.Namespace
} else if mf.plat == platform.OpenShift {
ret.Namespace.Name = NamespaceOpenShift
}

Expand All @@ -140,9 +154,12 @@ func (mf Manifests) Render(logger logr.Logger, opts options.Scheduler) (Manifest
rbacupdate.RoleBinding(ret.RBController, ret.SAController.Name, ret.Namespace.Name)
ret.DPController.Namespace = ret.Namespace.Name

rbacupdate.Role(ret.RSchedulerElect, ret.Namespace.Name)

ret.SAScheduler.Namespace = ret.Namespace.Name
rbacupdate.ClusterRoleBinding(ret.CRBScheduler, ret.SAScheduler.Name, ret.Namespace.Name)
rbacupdate.RoleBinding(ret.RBScheduler, ret.SAScheduler.Name, ret.Namespace.Name)
rbacupdate.RoleBinding(ret.RBSchedulerElect, ret.SAScheduler.Name, ret.Namespace.Name)
rbacupdate.RoleBinding(ret.RBSchedulerAuth, ret.SAScheduler.Name, ret.Namespace.Name)
ret.DPScheduler.Namespace = ret.Namespace.Name
ret.ConfigMap.Namespace = ret.Namespace.Name

Expand All @@ -157,7 +174,9 @@ func (mf Manifests) ToObjects() []client.Object {
mf.CRScheduler,
mf.CRBScheduler,
mf.ConfigMap,
mf.RBScheduler,
mf.RSchedulerElect,
mf.RBSchedulerAuth,
mf.RBSchedulerElect,
mf.DPScheduler,
mf.SAController,
mf.CRController,
Expand Down Expand Up @@ -201,7 +220,15 @@ func GetManifests(plat platform.Platform, namespace string) (Manifests, error) {
if err != nil {
return mf, err
}
mf.RBScheduler, err = manifests.RoleBinding(manifests.ComponentSchedulerPlugin, manifests.SubComponentSchedulerPluginScheduler, namespace)
mf.RSchedulerElect, err = manifests.Role(manifests.ComponentSchedulerPlugin, manifests.SubComponentSchedulerPluginScheduler, namespace)
if err != nil {
return mf, err
}
mf.RBSchedulerElect, err = manifests.RoleBinding(manifests.ComponentSchedulerPlugin, manifests.SubComponentSchedulerPluginScheduler, manifests.RoleNameLeaderElect, namespace)
if err != nil {
return mf, err
}
mf.RBSchedulerAuth, err = manifests.RoleBinding(manifests.ComponentSchedulerPlugin, manifests.SubComponentSchedulerPluginScheduler, manifests.RoleNameAuthReader, namespace)
if err != nil {
return mf, err
}
Expand All @@ -222,7 +249,7 @@ func GetManifests(plat platform.Platform, namespace string) (Manifests, error) {
if err != nil {
return mf, err
}
mf.RBController, err = manifests.RoleBinding(manifests.ComponentSchedulerPlugin, manifests.SubComponentSchedulerPluginController, namespace)
mf.RBController, err = manifests.RoleBinding(manifests.ComponentSchedulerPlugin, manifests.SubComponentSchedulerPluginController, "", namespace)
if err != nil {
return mf, err
}
Expand All @@ -234,6 +261,37 @@ func GetManifests(plat platform.Platform, namespace string) (Manifests, error) {
return mf, nil
}

func leaderElectionParamsFromOpts(opts options.Scheduler) (manifests.LeaderElectionParams, bool, error) {
leap := manifests.LeaderElectionParams{}
if !opts.LeaderElection {
return leap, false, nil
}

manifests.SetDefaultsLeaderElection(&leap)
leap.LeaderElect = true

var err error
tokens := strings.Split(opts.LeaderElectionResource, "/")
if len(tokens) == 1 {
// special case, see docs of strings.Split
if tokens[0] == opts.LeaderElectionResource {
err = fmt.Errorf("malformed leader election resource: %q", opts.LeaderElectionResource)
} else {
leap.ResourceNamespace = tokens[0]
}
} else if len(tokens) == 2 {
if len(tokens[0]) > 0 {
leap.ResourceNamespace = tokens[0]
}
if len(tokens[1]) > 0 {
leap.ResourceName = tokens[1]
}
} else {
err = fmt.Errorf("malformed leader election resource: %q", opts.LeaderElectionResource)
}
return leap, true, err
}

func newInt32(value int32) *int32 {
return &value
}
Expand Down
Loading

0 comments on commit 6908d45

Please sign in to comment.