Skip to content

Commit

Permalink
Merge pull request #220 from eggfoobar/sno_leader_election_config
Browse files Browse the repository at this point in the history
HA/SNO leader election config
  • Loading branch information
adrianchiris authored Jan 20, 2022
2 parents 4baf8e5 + e61462b commit dec7774
Show file tree
Hide file tree
Showing 84 changed files with 4,558 additions and 2,011 deletions.
2 changes: 1 addition & 1 deletion controllers/sriovoperatorconfig_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ func (r *SriovOperatorConfigReconciler) Reconcile(ctx context.Context, req ctrl.
if errors.IsNotFound(err) {
singleNode, err := utils.IsSingleNodeCluster(r.Client)
if err != nil {
return reconcile.Result{}, fmt.Errorf("Couldn't check the anount of nodes in the cluster")
return reconcile.Result{}, fmt.Errorf("Couldn't check the amount of nodes in the cluster")
}

// Default Config object not found, create it.
Expand Down
3 changes: 3 additions & 0 deletions deploy/clusterrole.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ rules:
- apiGroups: ["machineconfiguration.openshift.io"]
resources: ["*"]
verbs: ["*"]
- apiGroups: ["config.openshift.io"]
resources: ["infrastructures"]
verbs: ["get", "list", "watch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
Expand Down
8 changes: 8 additions & 0 deletions deploy/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,14 @@ rules:
- rolebindings
verbs:
- '*'
- apiGroups:
- config.openshift.io
resources:
- infrastructures
verbs:
- get
- list
- watch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
Expand Down
3 changes: 3 additions & 0 deletions deployment/sriov-network-operator/templates/clusterrole.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ rules:
- apiGroups: ["machineconfiguration.openshift.io"]
resources: ["*"]
verbs: ["*"]
- apiGroups: ["config.openshift.io"]
resources: ["infrastructures"]
verbs: ["get", "list", "watch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
Expand Down
8 changes: 8 additions & 0 deletions deployment/sriov-network-operator/templates/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,14 @@ rules:
- rolebindings
verbs:
- '*'
- apiGroups:
- config.openshift.io
resources:
- infrastructures
verbs:
- get
- list
- watch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ module github.com/k8snetworkplumbingwg/sriov-network-operator
go 1.16

require (
cloud.google.com/go v0.58.0 // indirect
github.com/Masterminds/sprig/v3 v3.2.2
github.com/blang/semver v3.5.1+incompatible
github.com/cenkalti/backoff v2.2.1+incompatible
Expand All @@ -19,6 +18,7 @@ require (
github.com/k8snetworkplumbingwg/network-attachment-definition-client v0.0.0-20200626054723-37f83d1996bc
github.com/onsi/ginkgo v1.14.1
github.com/onsi/gomega v1.10.2
github.com/openshift/api v0.0.0-20210325163602-e37aaed4c278
github.com/openshift/client-go v0.0.0-20200827190008-3062137373b5
github.com/openshift/machine-config-operator v0.0.1-0.20201023110058-6c8bd9b2915c
github.com/pkg/errors v0.9.1
Expand Down
57 changes: 9 additions & 48 deletions go.sum

Large diffs are not rendered by default.

39 changes: 21 additions & 18 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,14 @@ import (
"k8s.io/client-go/kubernetes"
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
_ "k8s.io/client-go/plugin/pkg/client/auth/gcp"
"k8s.io/client-go/rest"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/healthz"
"sigs.k8s.io/controller-runtime/pkg/log/zap"

sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1"
"github.com/k8snetworkplumbingwg/sriov-network-operator/controllers"
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/leaderelection"
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/utils"
//+kubebuilder:scaffold:imports
)
Expand Down Expand Up @@ -77,22 +77,34 @@ func main() {
opts.BindFlags(flag.CommandLine)
flag.Parse()

restConfig := ctrl.GetConfigOrDie()
kubeClient, err := client.New(restConfig, client.Options{Scheme: scheme})
if err != nil {
setupLog.Error(err, "couldn't create client")
os.Exit(1)
}

le := leaderelection.GetLeaderElectionConfig(kubeClient, enableLeaderElection)

ctrl.SetLogger(zap.New(zap.UseFlagOptions(&opts)))
namespace := os.Getenv("NAMESPACE")
mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{
mgr, err := ctrl.NewManager(restConfig, ctrl.Options{
Scheme: scheme,
MetricsBindAddress: metricsAddr,
Port: 9443,
HealthProbeBindAddress: probeAddr,
LeaderElection: enableLeaderElection,
LeaseDuration: &le.LeaseDuration,
RenewDeadline: &le.RenewDeadline,
RetryPeriod: &le.RetryPeriod,
LeaderElectionID: "a56def2a.openshift.io",
Namespace: namespace,
})
if err != nil {
setupLog.Error(err, "unable to start manager")
os.Exit(1)
}
mgrGlobal, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{
mgrGlobal, err := ctrl.NewManager(restConfig, ctrl.Options{
Scheme: scheme,
MetricsBindAddress: "0",
})
Expand Down Expand Up @@ -144,14 +156,14 @@ func main() {
// +kubebuilder:scaffold:builder

// Create a default SriovNetworkNodePolicy
err = createDefaultPolicy(ctrl.GetConfigOrDie())
err = createDefaultPolicy(kubeClient)
if err != nil {
setupLog.Error(err, "unable to create default SriovNetworkNodePolicy")
os.Exit(1)
}

// Create default SriovOperatorConfig
err = createDefaultOperatorConfig(ctrl.GetConfigOrDie())
err = createDefaultOperatorConfig(kubeClient)
if err != nil {
setupLog.Error(err, "unable to create default SriovOperatorConfig")
os.Exit(1)
Expand Down Expand Up @@ -194,12 +206,8 @@ func initNicIdMap() error {
return nil
}

func createDefaultPolicy(cfg *rest.Config) error {
func createDefaultPolicy(c client.Client) error {
logger := setupLog.WithName("createDefaultPolicy")
c, err := client.New(cfg, client.Options{Scheme: scheme})
if err != nil {
return fmt.Errorf("Couldn't create client: %v", err)
}
policy := &sriovnetworkv1.SriovNetworkNodePolicy{
Spec: sriovnetworkv1.SriovNetworkNodePolicySpec{
NumVfs: 0,
Expand All @@ -209,7 +217,7 @@ func createDefaultPolicy(cfg *rest.Config) error {
}
name := "default"
namespace := os.Getenv("NAMESPACE")
err = c.Get(context.TODO(), types.NamespacedName{Name: name, Namespace: namespace}, policy)
err := c.Get(context.TODO(), types.NamespacedName{Name: name, Namespace: namespace}, policy)
if err != nil {
if errors.IsNotFound(err) {
logger.Info("Create a default SriovNetworkNodePolicy")
Expand All @@ -226,16 +234,11 @@ func createDefaultPolicy(cfg *rest.Config) error {
return nil
}

func createDefaultOperatorConfig(cfg *rest.Config) error {
func createDefaultOperatorConfig(c client.Client) error {
logger := setupLog.WithName("createDefaultOperatorConfig")
c, err := client.New(cfg, client.Options{Scheme: scheme})
if err != nil {
return fmt.Errorf("Couldn't create client: %v", err)
}

singleNode, err := utils.IsSingleNodeCluster(c)
if err != nil {
return fmt.Errorf("Couldn't check the anount of nodes in the cluster")
return fmt.Errorf("Couldn't check the amount of nodes in the cluster")
}

enableAdmissionController := os.Getenv("ENABLE_ADMISSION_CONTROLLER") == "true"
Expand Down
50 changes: 50 additions & 0 deletions pkg/leaderelection/leaderelection.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
package leaderelection

import (
"time"

"github.com/golang/glog"
"k8s.io/client-go/tools/leaderelection"
"sigs.k8s.io/controller-runtime/pkg/client"

"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/utils"
)

const (
// Defaults follow conventions
// https://github.com/openshift/enhancements/blob/master/CONVENTIONS.md#high-availability
// Impl Calculations: https://github.com/openshift/library-go/commit/7e7d216ed91c3119800219c9194e5e57113d059a
defaultLeaseDuration = 137 * time.Second
defaultRenewDeadline = 107 * time.Second
defaultRetryPeriod = 26 * time.Second
)

func GetLeaderElectionConfig(c client.Client, enabled bool) (defaultConfig leaderelection.LeaderElectionConfig) {
defaultConfig = leaderelection.LeaderElectionConfig{
LeaseDuration: defaultLeaseDuration,
RenewDeadline: defaultRenewDeadline,
RetryPeriod: defaultRetryPeriod,
}

if enabled {
isSingleNode, err := utils.IsSingleNodeCluster(c)
if err != nil {
glog.Warningf("unable to get cluster infrastructure status, using HA cluster values for leader election: %v", err)
return
}
if isSingleNode {
return leaderElectionSingleNodeConfig(defaultConfig)
}
}
return
}

// Default leader election for Single Node environments
// Impl Calculations:
// https://github.com/openshift/library-go/commit/2612981f3019479805ac8448b997266fc07a236a#diff-61dd95c7fd45fa18038e825205fbfab8a803f1970068157608b6b1e9e6c27248R127
func leaderElectionSingleNodeConfig(config leaderelection.LeaderElectionConfig) leaderelection.LeaderElectionConfig {
config.LeaseDuration = 270 * time.Second
config.RenewDeadline = 240 * time.Second
config.RetryPeriod = 60 * time.Second
return config
}
29 changes: 28 additions & 1 deletion pkg/utils/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,30 @@ package utils

import (
"context"
"fmt"
"os"

"github.com/golang/glog"

configv1 "github.com/openshift/api/config/v1"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types"
"sigs.k8s.io/controller-runtime/pkg/client"
)

const (
// default Infrastructure resource name for Openshift
infraResourceName = "cluster"
)

func IsSingleNodeCluster(c client.Client) (bool, error) {
if os.Getenv("CLUSTER_TYPE") == ClusterTypeOpenshift {
return openshiftSingleNodeClusterStatus(c)
}
return k8sSingleNodeClusterStatus(c)
}

func k8sSingleNodeClusterStatus(c client.Client) (bool, error) {
nodeList := &corev1.NodeList{}
err := c.List(context.TODO(), nodeList)
if err != nil {
Expand All @@ -21,6 +37,17 @@ func IsSingleNodeCluster(c client.Client) (bool, error) {
glog.Infof("IsSingleNodeCluster(): one node found in the cluster")
return true, nil
}

return false, nil
}

func openshiftSingleNodeClusterStatus(c client.Client) (bool, error) {
infra := &configv1.Infrastructure{}
err := c.Get(context.TODO(), types.NamespacedName{Name: infraResourceName}, infra)
if err != nil {
return false, err
}
if infra == nil {
return false, fmt.Errorf("getting resource Infrastructure (name: %s) succeeded but object was nil", infraResourceName)
}
return infra.Status.ControlPlaneTopology == configv1.SingleReplicaTopologyMode, nil
}

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit dec7774

Please sign in to comment.