Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sched: enable HA support #298

Merged
merged 2 commits into from
Jul 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions pkg/commands/deploy.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,8 @@ func NewDeploySchedulerPluginCommand(env *deployer.Environment, commonOpts *opti
Verbose: commonOpts.SchedVerbose,
ScoringStratConfigData: commonOpts.SchedScoringStratConfigData,
CacheParamsConfigData: commonOpts.SchedCacheParamsConfigData,
LeaderElection: commonOpts.Replicas > 1,
LeaderElectionResource: commonOpts.SchedLeaderElectResource,
})
},
Args: cobra.NoArgs,
Expand Down
4 changes: 4 additions & 0 deletions pkg/commands/remove.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ func NewRemoveCommand(env *deployer.Environment, commonOpts *options.Options) *c
Verbose: commonOpts.SchedVerbose,
ScoringStratConfigData: commonOpts.SchedScoringStratConfigData,
CacheParamsConfigData: commonOpts.SchedCacheParamsConfigData,
LeaderElection: commonOpts.Replicas > 1,
LeaderElectionResource: commonOpts.SchedLeaderElectResource,
})
if err != nil {
// intentionally keep going to remove as much as possible
Expand Down Expand Up @@ -166,6 +168,8 @@ func NewRemoveSchedulerPluginCommand(env *deployer.Environment, commonOpts *opti
Verbose: commonOpts.SchedVerbose,
ScoringStratConfigData: commonOpts.SchedScoringStratConfigData,
CacheParamsConfigData: commonOpts.SchedCacheParamsConfigData,
LeaderElection: commonOpts.Replicas > 1,
LeaderElectionResource: commonOpts.SchedLeaderElectResource,
})
},
Args: cobra.NoArgs,
Expand Down
4 changes: 4 additions & 0 deletions pkg/commands/render.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,8 @@ func NewRenderSchedulerPluginCommand(env *deployer.Environment, commonOpts *opti
CacheResyncPeriod: commonOpts.SchedResyncPeriod,
ScoringStratConfigData: commonOpts.SchedScoringStratConfigData,
CacheParamsConfigData: commonOpts.SchedCacheParamsConfigData,
LeaderElection: commonOpts.Replicas > 1,
LeaderElectionResource: commonOpts.SchedLeaderElectResource,
}
schedObjs, err := schedManifests.Render(env.Log, renderOpts)
if err != nil {
Expand Down Expand Up @@ -186,6 +188,8 @@ func RenderManifests(env *deployer.Environment, commonOpts *options.Options) err
Verbose: commonOpts.SchedVerbose,
ScoringStratConfigData: commonOpts.SchedScoringStratConfigData,
CacheParamsConfigData: commonOpts.SchedCacheParamsConfigData,
LeaderElection: commonOpts.Replicas > 1,
LeaderElectionResource: commonOpts.SchedLeaderElectResource,
}

schedObjs, err := schedManifests.Render(env.Log, schedRenderOpts)
Expand Down
3 changes: 2 additions & 1 deletion pkg/commands/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,8 @@ func InitFlags(flags *pflag.FlagSet, commonOpts *options.Options, internalOpts *
flags.StringVar(&commonOpts.SchedProfileName, "sched-profile-name", schedmanifests.DefaultProfileName, "inject scheduler profile name.")
flags.DurationVar(&commonOpts.SchedResyncPeriod, "sched-resync-period", schedmanifests.DefaultResyncPeriod, "inject scheduler resync period.")
flags.IntVar(&commonOpts.SchedVerbose, "sched-verbose", schedmanifests.DefaultVerbose, "set the scheduler verbosiness.")
flags.BoolVar(&commonOpts.SchedCtrlPlaneAffinity, "sched-ctrlplane-affinity", true, "toggle the scheduler control plane affinity.")
flags.BoolVar(&commonOpts.SchedCtrlPlaneAffinity, "sched-ctrlplane-affinity", schedmanifests.DefaultCtrlPlaneAffinity, "toggle the scheduler control plane affinity.")
flags.StringVar(&commonOpts.SchedLeaderElectResource, "sched-leader-elect-resource", schedmanifests.DefaultLeaderElectResource, "leader election resource namespaced name \"namespace/name\"")
}

func PostSetupOptions(env *deployer.Environment, commonOpts *options.Options, internalOpts *internalOptions) error {
Expand Down
2 changes: 2 additions & 0 deletions pkg/deploy/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ func OnCluster(env *deployer.Environment, commonOpts *options.Options) error {
Verbose: commonOpts.SchedVerbose,
ScoringStratConfigData: commonOpts.SchedScoringStratConfigData,
CacheParamsConfigData: commonOpts.SchedCacheParamsConfigData,
LeaderElection: commonOpts.Replicas > 1,
LeaderElectionResource: commonOpts.SchedLeaderElectResource,
}); err != nil {
return err
}
Expand Down
26 changes: 23 additions & 3 deletions pkg/manifests/manifests.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,11 @@ const (
SubComponentNodeFeatureDiscoveryTopologyUpdater = "topologyupdater"
)

const (
RoleNameAuthReader = "authreader"
RoleNameLeaderElect = "leaderelect"
)

const (
ContainerNameRTE = "resource-topology-exporter"
ContainerNameNFDTopologyUpdater = "nfd-topology-updater"
Expand Down Expand Up @@ -148,15 +153,23 @@ func Role(component, subComponent, namespace string) (*rbacv1.Role, error) {
return role, nil
}

func RoleBinding(component, subComponent, namespace string) (*rbacv1.RoleBinding, error) {
func RoleBinding(component, subComponent, roleName, namespace string) (*rbacv1.RoleBinding, error) {
if err := validateComponent(component); err != nil {
return nil, err
}
if err := validateSubComponent(component, subComponent); err != nil {
return nil, err
}

obj, err := loadObject(filepath.Join("yaml", component, subComponent, "rolebinding.yaml"))
var fileName string
if roleName == "" {
fileName = "rolebinding.yaml"
} else {
if err := validateRoleName(roleName); err != nil {
return nil, err
}
fileName = "rolebinding_" + roleName + ".yaml"
}
obj, err := loadObject(filepath.Join("yaml", component, subComponent, fileName))
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -469,6 +482,13 @@ func validateSubComponent(component, subComponent string) error {
return fmt.Errorf("unknown subComponent %q for component: %q", subComponent, component)
}

func validateRoleName(roleName string) error {
if roleName == RoleNameAuthReader || roleName == RoleNameLeaderElect {
return nil
}
return fmt.Errorf("unknown roleName %q", roleName)
}

func Service(component, subComponent, namespace string) (*corev1.Service, error) {
if err := validateComponent(component); err != nil {
return nil, err
Expand Down
24 changes: 22 additions & 2 deletions pkg/manifests/manifests_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ func TestGetRole(t *testing.T) {
{
component: ComponentSchedulerPlugin,
subComponent: SubComponentSchedulerPluginScheduler,
expectError: true,
expectError: false,
},
{
component: ComponentSchedulerPlugin,
Expand Down Expand Up @@ -159,6 +159,7 @@ func TestGetRoleBinding(t *testing.T) {
type testCase struct {
component string
subComponent string
roleName string
expectError bool
}

Expand All @@ -171,9 +172,28 @@ func TestGetRoleBinding(t *testing.T) {
component: ComponentAPI,
expectError: true,
},
{
component: ComponentSchedulerPlugin,
subComponent: SubComponentSchedulerPluginController,
roleName: "",
expectError: false,
},
{
component: ComponentSchedulerPlugin,
subComponent: SubComponentSchedulerPluginScheduler,
roleName: "",
expectError: true,
},
{
component: ComponentSchedulerPlugin,
subComponent: SubComponentSchedulerPluginScheduler,
roleName: RoleNameAuthReader,
expectError: false,
},
{
component: ComponentSchedulerPlugin,
subComponent: SubComponentSchedulerPluginScheduler,
roleName: RoleNameLeaderElect,
expectError: false,
},
{
Expand All @@ -189,7 +209,7 @@ func TestGetRoleBinding(t *testing.T) {

for _, tc := range testCases {
t.Run(tc.component, func(t *testing.T) {
obj, err := RoleBinding(tc.component, tc.subComponent, "")
obj, err := RoleBinding(tc.component, tc.subComponent, tc.roleName, "")
if tc.expectError {
if err == nil || obj != nil {
t.Fatalf("nil err or non-nil obj=%v", obj)
Expand Down
2 changes: 1 addition & 1 deletion pkg/manifests/rte/rte.go
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ func GetManifests(plat platform.Platform, version platform.Version, namespace st
if err != nil {
return mf, err
}
mf.RoleBinding, err = manifests.RoleBinding(manifests.ComponentResourceTopologyExporter, "", namespace)
mf.RoleBinding, err = manifests.RoleBinding(manifests.ComponentResourceTopologyExporter, "", "", namespace)
if err != nil {
return mf, err
}
Expand Down
108 changes: 83 additions & 25 deletions pkg/manifests/sched/sched.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package sched
import (
"encoding/json"
"fmt"
"strings"
"time"

"github.com/go-logr/logr"
Expand Down Expand Up @@ -61,12 +62,14 @@ type Manifests struct {
RBController *rbacv1.RoleBinding
DPController *appsv1.Deployment
// scheduler proper
SAScheduler *corev1.ServiceAccount
CRScheduler *rbacv1.ClusterRole
CRBScheduler *rbacv1.ClusterRoleBinding
RBScheduler *rbacv1.RoleBinding
DPScheduler *appsv1.Deployment
ConfigMap *corev1.ConfigMap
SAScheduler *corev1.ServiceAccount
CRScheduler *rbacv1.ClusterRole
RSchedulerElect *rbacv1.Role
CRBScheduler *rbacv1.ClusterRoleBinding
RBSchedulerAuth *rbacv1.RoleBinding
RBSchedulerElect *rbacv1.RoleBinding
DPScheduler *appsv1.Deployment
ConfigMap *corev1.ConfigMap
// internal fields
plat platform.Platform
}
Expand All @@ -75,19 +78,21 @@ func (mf Manifests) Clone() Manifests {
return Manifests{
plat: mf.plat,
// objects
Crd: mf.Crd.DeepCopy(),
Namespace: mf.Namespace.DeepCopy(),
SAController: mf.SAController.DeepCopy(),
CRController: mf.CRController.DeepCopy(),
CRBController: mf.CRBController.DeepCopy(),
DPController: mf.DPController.DeepCopy(),
RBController: mf.RBController.DeepCopy(),
SAScheduler: mf.SAScheduler.DeepCopy(),
CRScheduler: mf.CRScheduler.DeepCopy(),
CRBScheduler: mf.CRBScheduler.DeepCopy(),
DPScheduler: mf.DPScheduler.DeepCopy(),
ConfigMap: mf.ConfigMap.DeepCopy(),
RBScheduler: mf.RBScheduler.DeepCopy(),
Crd: mf.Crd.DeepCopy(),
Namespace: mf.Namespace.DeepCopy(),
SAController: mf.SAController.DeepCopy(),
CRController: mf.CRController.DeepCopy(),
CRBController: mf.CRBController.DeepCopy(),
DPController: mf.DPController.DeepCopy(),
RBController: mf.RBController.DeepCopy(),
SAScheduler: mf.SAScheduler.DeepCopy(),
CRScheduler: mf.CRScheduler.DeepCopy(),
RSchedulerElect: mf.RSchedulerElect.DeepCopy(),
CRBScheduler: mf.CRBScheduler.DeepCopy(),
RBSchedulerAuth: mf.RBSchedulerAuth.DeepCopy(),
RBSchedulerElect: mf.RBSchedulerElect.DeepCopy(),
DPScheduler: mf.DPScheduler.DeepCopy(),
ConfigMap: mf.ConfigMap.DeepCopy(),
}
}

Expand All @@ -100,12 +105,19 @@ func (mf Manifests) Render(logger logr.Logger, opts options.Scheduler) (Manifest
ret.DPScheduler.Spec.Replicas = newInt32(replicas)
ret.DPController.Spec.Replicas = newInt32(replicas)

var err error
params := manifests.ConfigParams{
ProfileName: opts.ProfileName,
Cache: manifests.NewConfigCacheParams(),
}

leap, ok, err := leaderElectionParamsFromOpts(opts)
if err != nil {
return ret, err
}
if ok {
params.LeaderElection = &leap
}

if len(opts.CacheParamsConfigData) > 0 {
err = yaml.Unmarshal([]byte(opts.CacheParamsConfigData), params.Cache)
if err != nil {
Expand All @@ -131,7 +143,9 @@ func (mf Manifests) Render(logger logr.Logger, opts options.Scheduler) (Manifest

schedupdate.SchedulerDeployment(ret.DPScheduler, opts.PullIfNotPresent, opts.CtrlPlaneAffinity, opts.Verbose)
schedupdate.ControllerDeployment(ret.DPController, opts.PullIfNotPresent, opts.CtrlPlaneAffinity)
if mf.plat == platform.OpenShift {
if opts.Namespace != "" {
ret.Namespace.Name = opts.Namespace
} else if mf.plat == platform.OpenShift {
ret.Namespace.Name = NamespaceOpenShift
}

Expand All @@ -140,9 +154,12 @@ func (mf Manifests) Render(logger logr.Logger, opts options.Scheduler) (Manifest
rbacupdate.RoleBinding(ret.RBController, ret.SAController.Name, ret.Namespace.Name)
ret.DPController.Namespace = ret.Namespace.Name

rbacupdate.Role(ret.RSchedulerElect, ret.Namespace.Name)

ret.SAScheduler.Namespace = ret.Namespace.Name
rbacupdate.ClusterRoleBinding(ret.CRBScheduler, ret.SAScheduler.Name, ret.Namespace.Name)
rbacupdate.RoleBinding(ret.RBScheduler, ret.SAScheduler.Name, ret.Namespace.Name)
rbacupdate.RoleBinding(ret.RBSchedulerElect, ret.SAScheduler.Name, ret.Namespace.Name)
rbacupdate.RoleBinding(ret.RBSchedulerAuth, ret.SAScheduler.Name, ret.Namespace.Name)
ret.DPScheduler.Namespace = ret.Namespace.Name
ret.ConfigMap.Namespace = ret.Namespace.Name

Expand All @@ -157,7 +174,9 @@ func (mf Manifests) ToObjects() []client.Object {
mf.CRScheduler,
mf.CRBScheduler,
mf.ConfigMap,
mf.RBScheduler,
mf.RSchedulerElect,
mf.RBSchedulerAuth,
mf.RBSchedulerElect,
mf.DPScheduler,
mf.SAController,
mf.CRController,
Expand Down Expand Up @@ -201,7 +220,15 @@ func GetManifests(plat platform.Platform, namespace string) (Manifests, error) {
if err != nil {
return mf, err
}
mf.RBScheduler, err = manifests.RoleBinding(manifests.ComponentSchedulerPlugin, manifests.SubComponentSchedulerPluginScheduler, namespace)
mf.RSchedulerElect, err = manifests.Role(manifests.ComponentSchedulerPlugin, manifests.SubComponentSchedulerPluginScheduler, namespace)
if err != nil {
return mf, err
}
mf.RBSchedulerElect, err = manifests.RoleBinding(manifests.ComponentSchedulerPlugin, manifests.SubComponentSchedulerPluginScheduler, manifests.RoleNameLeaderElect, namespace)
if err != nil {
return mf, err
}
mf.RBSchedulerAuth, err = manifests.RoleBinding(manifests.ComponentSchedulerPlugin, manifests.SubComponentSchedulerPluginScheduler, manifests.RoleNameAuthReader, namespace)
if err != nil {
return mf, err
}
Expand All @@ -222,7 +249,7 @@ func GetManifests(plat platform.Platform, namespace string) (Manifests, error) {
if err != nil {
return mf, err
}
mf.RBController, err = manifests.RoleBinding(manifests.ComponentSchedulerPlugin, manifests.SubComponentSchedulerPluginController, namespace)
mf.RBController, err = manifests.RoleBinding(manifests.ComponentSchedulerPlugin, manifests.SubComponentSchedulerPluginController, "", namespace)
if err != nil {
return mf, err
}
Expand All @@ -234,6 +261,37 @@ func GetManifests(plat platform.Platform, namespace string) (Manifests, error) {
return mf, nil
}

func leaderElectionParamsFromOpts(opts options.Scheduler) (manifests.LeaderElectionParams, bool, error) {
leap := manifests.LeaderElectionParams{}
if !opts.LeaderElection {
return leap, false, nil
}

manifests.SetDefaultsLeaderElection(&leap)
leap.LeaderElect = true

var err error
tokens := strings.Split(opts.LeaderElectionResource, "/")
if len(tokens) == 1 {
// special case, see docs of strings.Split
if tokens[0] == opts.LeaderElectionResource {
err = fmt.Errorf("malformed leader election resource: %q", opts.LeaderElectionResource)
} else {
leap.ResourceNamespace = tokens[0]
}
} else if len(tokens) == 2 {
if len(tokens[0]) > 0 {
leap.ResourceNamespace = tokens[0]
}
if len(tokens[1]) > 0 {
leap.ResourceName = tokens[1]
}
} else {
err = fmt.Errorf("malformed leader election resource: %q", opts.LeaderElectionResource)
}
return leap, true, err
}

func newInt32(value int32) *int32 {
return &value
}
Expand Down
Loading
Loading