Skip to content

Commit

Permalink
feat: toplogy aware leader election config
Browse files Browse the repository at this point in the history
leader election config will query the cluster for the topology and if we are in an SNO toplogy we use one set of configs otherwise we use conventional defaults for HA cluster config.

Signed-off-by: ehila <[email protected]>

upkeep: ran go mod tidy/vendor/verify

Signed-off-by: ehila <[email protected]>

upkeep: spelling fix

Signed-off-by: ehila <[email protected]>

feat: moved sno logic to utils

moved sno logic to use the cluster.go file
added check for k8s or openshift environment
upkeep re-organize imports

Signed-off-by: ehila <[email protected]>

feat: added rbac for operator

Signed-off-by: ehila <[email protected]>

refactor: updated to use client-go leader election

updated to use client-go leader election struct instead of openshift/api
updated wording to remove SNO acronym

Signed-off-by: ehila <[email protected]>
  • Loading branch information
eggfoobar committed Jan 18, 2022
1 parent 0a84c95 commit e61462b
Show file tree
Hide file tree
Showing 25 changed files with 318 additions and 379 deletions.
2 changes: 1 addition & 1 deletion controllers/sriovoperatorconfig_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ func (r *SriovOperatorConfigReconciler) Reconcile(ctx context.Context, req ctrl.
if errors.IsNotFound(err) {
singleNode, err := utils.IsSingleNodeCluster(r.Client)
if err != nil {
return reconcile.Result{}, fmt.Errorf("Couldn't check the anount of nodes in the cluster")
return reconcile.Result{}, fmt.Errorf("Couldn't check the amount of nodes in the cluster")
}

// Default Config object not found, create it.
Expand Down
3 changes: 3 additions & 0 deletions deploy/clusterrole.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ rules:
- apiGroups: ["machineconfiguration.openshift.io"]
resources: ["*"]
verbs: ["*"]
- apiGroups: ["config.openshift.io"]
resources: ["infrastructures"]
verbs: ["get", "list", "watch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
Expand Down
8 changes: 8 additions & 0 deletions deploy/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,14 @@ rules:
- rolebindings
verbs:
- '*'
- apiGroups:
- config.openshift.io
resources:
- infrastructures
verbs:
- get
- list
- watch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
Expand Down
3 changes: 3 additions & 0 deletions deployment/sriov-network-operator/templates/clusterrole.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ rules:
- apiGroups: ["machineconfiguration.openshift.io"]
resources: ["*"]
verbs: ["*"]
- apiGroups: ["config.openshift.io"]
resources: ["infrastructures"]
verbs: ["get", "list", "watch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
Expand Down
8 changes: 8 additions & 0 deletions deployment/sriov-network-operator/templates/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,14 @@ rules:
- rolebindings
verbs:
- '*'
- apiGroups:
- config.openshift.io
resources:
- infrastructures
verbs:
- get
- list
- watch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
Expand Down
1 change: 0 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ module github.com/k8snetworkplumbingwg/sriov-network-operator
go 1.16

require (
cloud.google.com/go v0.58.0 // indirect
github.com/Masterminds/sprig/v3 v3.2.2
github.com/blang/semver v3.5.1+incompatible
github.com/cenkalti/backoff v2.2.1+incompatible
Expand Down
50 changes: 3 additions & 47 deletions go.sum

Large diffs are not rendered by default.

39 changes: 21 additions & 18 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,14 @@ import (
"k8s.io/client-go/kubernetes"
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
_ "k8s.io/client-go/plugin/pkg/client/auth/gcp"
"k8s.io/client-go/rest"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/healthz"
"sigs.k8s.io/controller-runtime/pkg/log/zap"

sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1"
"github.com/k8snetworkplumbingwg/sriov-network-operator/controllers"
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/leaderelection"
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/utils"
//+kubebuilder:scaffold:imports
)
Expand Down Expand Up @@ -77,22 +77,34 @@ func main() {
opts.BindFlags(flag.CommandLine)
flag.Parse()

restConfig := ctrl.GetConfigOrDie()
kubeClient, err := client.New(restConfig, client.Options{Scheme: scheme})
if err != nil {
setupLog.Error(err, "couldn't create client")
os.Exit(1)
}

le := leaderelection.GetLeaderElectionConfig(kubeClient, enableLeaderElection)

ctrl.SetLogger(zap.New(zap.UseFlagOptions(&opts)))
namespace := os.Getenv("NAMESPACE")
mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{
mgr, err := ctrl.NewManager(restConfig, ctrl.Options{
Scheme: scheme,
MetricsBindAddress: metricsAddr,
Port: 9443,
HealthProbeBindAddress: probeAddr,
LeaderElection: enableLeaderElection,
LeaseDuration: &le.LeaseDuration,
RenewDeadline: &le.RenewDeadline,
RetryPeriod: &le.RetryPeriod,
LeaderElectionID: "a56def2a.openshift.io",
Namespace: namespace,
})
if err != nil {
setupLog.Error(err, "unable to start manager")
os.Exit(1)
}
mgrGlobal, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{
mgrGlobal, err := ctrl.NewManager(restConfig, ctrl.Options{
Scheme: scheme,
MetricsBindAddress: "0",
})
Expand Down Expand Up @@ -144,14 +156,14 @@ func main() {
// +kubebuilder:scaffold:builder

// Create a default SriovNetworkNodePolicy
err = createDefaultPolicy(ctrl.GetConfigOrDie())
err = createDefaultPolicy(kubeClient)
if err != nil {
setupLog.Error(err, "unable to create default SriovNetworkNodePolicy")
os.Exit(1)
}

// Create default SriovOperatorConfig
err = createDefaultOperatorConfig(ctrl.GetConfigOrDie())
err = createDefaultOperatorConfig(kubeClient)
if err != nil {
setupLog.Error(err, "unable to create default SriovOperatorConfig")
os.Exit(1)
Expand Down Expand Up @@ -194,12 +206,8 @@ func initNicIdMap() error {
return nil
}

func createDefaultPolicy(cfg *rest.Config) error {
func createDefaultPolicy(c client.Client) error {
logger := setupLog.WithName("createDefaultPolicy")
c, err := client.New(cfg, client.Options{Scheme: scheme})
if err != nil {
return fmt.Errorf("Couldn't create client: %v", err)
}
policy := &sriovnetworkv1.SriovNetworkNodePolicy{
Spec: sriovnetworkv1.SriovNetworkNodePolicySpec{
NumVfs: 0,
Expand All @@ -209,7 +217,7 @@ func createDefaultPolicy(cfg *rest.Config) error {
}
name := "default"
namespace := os.Getenv("NAMESPACE")
err = c.Get(context.TODO(), types.NamespacedName{Name: name, Namespace: namespace}, policy)
err := c.Get(context.TODO(), types.NamespacedName{Name: name, Namespace: namespace}, policy)
if err != nil {
if errors.IsNotFound(err) {
logger.Info("Create a default SriovNetworkNodePolicy")
Expand All @@ -226,16 +234,11 @@ func createDefaultPolicy(cfg *rest.Config) error {
return nil
}

func createDefaultOperatorConfig(cfg *rest.Config) error {
func createDefaultOperatorConfig(c client.Client) error {
logger := setupLog.WithName("createDefaultOperatorConfig")
c, err := client.New(cfg, client.Options{Scheme: scheme})
if err != nil {
return fmt.Errorf("Couldn't create client: %v", err)
}

singleNode, err := utils.IsSingleNodeCluster(c)
if err != nil {
return fmt.Errorf("Couldn't check the anount of nodes in the cluster")
return fmt.Errorf("Couldn't check the amount of nodes in the cluster")
}

enableAdmissionController := os.Getenv("ENABLE_ADMISSION_CONTROLLER") == "true"
Expand Down
50 changes: 50 additions & 0 deletions pkg/leaderelection/leaderelection.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
package leaderelection

import (
"time"

"github.com/golang/glog"
"k8s.io/client-go/tools/leaderelection"
"sigs.k8s.io/controller-runtime/pkg/client"

"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/utils"
)

const (
// Defaults follow conventions
// https://github.com/openshift/enhancements/blob/master/CONVENTIONS.md#high-availability
// Impl Calculations: https://github.com/openshift/library-go/commit/7e7d216ed91c3119800219c9194e5e57113d059a
defaultLeaseDuration = 137 * time.Second
defaultRenewDeadline = 107 * time.Second
defaultRetryPeriod = 26 * time.Second
)

func GetLeaderElectionConfig(c client.Client, enabled bool) (defaultConfig leaderelection.LeaderElectionConfig) {
defaultConfig = leaderelection.LeaderElectionConfig{
LeaseDuration: defaultLeaseDuration,
RenewDeadline: defaultRenewDeadline,
RetryPeriod: defaultRetryPeriod,
}

if enabled {
isSingleNode, err := utils.IsSingleNodeCluster(c)
if err != nil {
glog.Warningf("unable to get cluster infrastructure status, using HA cluster values for leader election: %v", err)
return
}
if isSingleNode {
return leaderElectionSingleNodeConfig(defaultConfig)
}
}
return
}

// Default leader election for Single Node environments
// Impl Calculations:
// https://github.com/openshift/library-go/commit/2612981f3019479805ac8448b997266fc07a236a#diff-61dd95c7fd45fa18038e825205fbfab8a803f1970068157608b6b1e9e6c27248R127
func leaderElectionSingleNodeConfig(config leaderelection.LeaderElectionConfig) leaderelection.LeaderElectionConfig {
config.LeaseDuration = 270 * time.Second
config.RenewDeadline = 240 * time.Second
config.RetryPeriod = 60 * time.Second
return config
}
29 changes: 28 additions & 1 deletion pkg/utils/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,30 @@ package utils

import (
"context"
"fmt"
"os"

"github.com/golang/glog"

configv1 "github.com/openshift/api/config/v1"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types"
"sigs.k8s.io/controller-runtime/pkg/client"
)

const (
// default Infrastructure resource name for Openshift
infraResourceName = "cluster"
)

func IsSingleNodeCluster(c client.Client) (bool, error) {
if os.Getenv("CLUSTER_TYPE") == ClusterTypeOpenshift {
return openshiftSingleNodeClusterStatus(c)
}
return k8sSingleNodeClusterStatus(c)
}

func k8sSingleNodeClusterStatus(c client.Client) (bool, error) {
nodeList := &corev1.NodeList{}
err := c.List(context.TODO(), nodeList)
if err != nil {
Expand All @@ -21,6 +37,17 @@ func IsSingleNodeCluster(c client.Client) (bool, error) {
glog.Infof("IsSingleNodeCluster(): one node found in the cluster")
return true, nil
}

return false, nil
}

func openshiftSingleNodeClusterStatus(c client.Client) (bool, error) {
infra := &configv1.Infrastructure{}
err := c.Get(context.TODO(), types.NamespacedName{Name: infraResourceName}, infra)
if err != nil {
return false, err
}
if infra == nil {
return false, fmt.Errorf("getting resource Infrastructure (name: %s) succeeded but object was nil", infraResourceName)
}
return infra.Status.ControlPlaneTopology == configv1.SingleReplicaTopologyMode, nil
}
4 changes: 0 additions & 4 deletions vendor/google.golang.org/grpc/README.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 8 additions & 9 deletions vendor/google.golang.org/grpc/clientconn.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 4 additions & 14 deletions vendor/google.golang.org/grpc/credentials/tls.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 0 additions & 12 deletions vendor/google.golang.org/grpc/dialoptions.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion vendor/google.golang.org/grpc/go.mod

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit e61462b

Please sign in to comment.