Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HA/SNO leader election config #220

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion controllers/sriovoperatorconfig_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ func (r *SriovOperatorConfigReconciler) Reconcile(ctx context.Context, req ctrl.
if errors.IsNotFound(err) {
singleNode, err := utils.IsSingleNodeCluster(r.Client)
if err != nil {
return reconcile.Result{}, fmt.Errorf("Couldn't check the anount of nodes in the cluster")
return reconcile.Result{}, fmt.Errorf("Couldn't check the amount of nodes in the cluster")
}

// Default Config object not found, create it.
Expand Down
3 changes: 3 additions & 0 deletions deploy/clusterrole.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ rules:
- apiGroups: ["machineconfiguration.openshift.io"]
resources: ["*"]
verbs: ["*"]
- apiGroups: ["config.openshift.io"]
resources: ["infrastructures"]
verbs: ["get", "list", "watch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
Expand Down
8 changes: 8 additions & 0 deletions deploy/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,14 @@ rules:
- rolebindings
verbs:
- '*'
- apiGroups:
- config.openshift.io
resources:
- infrastructures
verbs:
- get
- list
- watch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
Expand Down
3 changes: 3 additions & 0 deletions deployment/sriov-network-operator/templates/clusterrole.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ rules:
- apiGroups: ["machineconfiguration.openshift.io"]
resources: ["*"]
verbs: ["*"]
- apiGroups: ["config.openshift.io"]
resources: ["infrastructures"]
verbs: ["get", "list", "watch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
Expand Down
8 changes: 8 additions & 0 deletions deployment/sriov-network-operator/templates/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,14 @@ rules:
- rolebindings
verbs:
- '*'
- apiGroups:
- config.openshift.io
resources:
- infrastructures
verbs:
- get
- list
- watch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ module github.com/k8snetworkplumbingwg/sriov-network-operator
go 1.16

require (
cloud.google.com/go v0.58.0 // indirect
github.com/Masterminds/sprig/v3 v3.2.2
github.com/blang/semver v3.5.1+incompatible
github.com/cenkalti/backoff v2.2.1+incompatible
Expand All @@ -19,6 +18,7 @@ require (
github.com/k8snetworkplumbingwg/network-attachment-definition-client v0.0.0-20200626054723-37f83d1996bc
github.com/onsi/ginkgo v1.14.1
github.com/onsi/gomega v1.10.2
github.com/openshift/api v0.0.0-20210325163602-e37aaed4c278
github.com/openshift/client-go v0.0.0-20200827190008-3062137373b5
github.com/openshift/machine-config-operator v0.0.1-0.20201023110058-6c8bd9b2915c
github.com/pkg/errors v0.9.1
Expand Down
57 changes: 9 additions & 48 deletions go.sum

Large diffs are not rendered by default.

39 changes: 21 additions & 18 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,14 @@ import (
"k8s.io/client-go/kubernetes"
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
_ "k8s.io/client-go/plugin/pkg/client/auth/gcp"
"k8s.io/client-go/rest"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/healthz"
"sigs.k8s.io/controller-runtime/pkg/log/zap"

sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1"
"github.com/k8snetworkplumbingwg/sriov-network-operator/controllers"
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/leaderelection"
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/utils"
//+kubebuilder:scaffold:imports
)
Expand Down Expand Up @@ -77,22 +77,34 @@ func main() {
opts.BindFlags(flag.CommandLine)
flag.Parse()

restConfig := ctrl.GetConfigOrDie()
kubeClient, err := client.New(restConfig, client.Options{Scheme: scheme})
if err != nil {
setupLog.Error(err, "couldn't create client")
os.Exit(1)
}

le := leaderelection.GetLeaderElectionConfig(kubeClient, enableLeaderElection)

ctrl.SetLogger(zap.New(zap.UseFlagOptions(&opts)))
namespace := os.Getenv("NAMESPACE")
mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{
mgr, err := ctrl.NewManager(restConfig, ctrl.Options{
Scheme: scheme,
MetricsBindAddress: metricsAddr,
Port: 9443,
HealthProbeBindAddress: probeAddr,
LeaderElection: enableLeaderElection,
LeaseDuration: &le.LeaseDuration,
RenewDeadline: &le.RenewDeadline,
RetryPeriod: &le.RetryPeriod,
LeaderElectionID: "a56def2a.openshift.io",
Namespace: namespace,
})
if err != nil {
setupLog.Error(err, "unable to start manager")
os.Exit(1)
}
mgrGlobal, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{
mgrGlobal, err := ctrl.NewManager(restConfig, ctrl.Options{
Scheme: scheme,
MetricsBindAddress: "0",
})
Expand Down Expand Up @@ -144,14 +156,14 @@ func main() {
// +kubebuilder:scaffold:builder

// Create a default SriovNetworkNodePolicy
err = createDefaultPolicy(ctrl.GetConfigOrDie())
err = createDefaultPolicy(kubeClient)
if err != nil {
setupLog.Error(err, "unable to create default SriovNetworkNodePolicy")
os.Exit(1)
}

// Create default SriovOperatorConfig
err = createDefaultOperatorConfig(ctrl.GetConfigOrDie())
err = createDefaultOperatorConfig(kubeClient)
if err != nil {
setupLog.Error(err, "unable to create default SriovOperatorConfig")
os.Exit(1)
Expand Down Expand Up @@ -194,12 +206,8 @@ func initNicIdMap() error {
return nil
}

func createDefaultPolicy(cfg *rest.Config) error {
func createDefaultPolicy(c client.Client) error {
logger := setupLog.WithName("createDefaultPolicy")
c, err := client.New(cfg, client.Options{Scheme: scheme})
if err != nil {
return fmt.Errorf("Couldn't create client: %v", err)
}
policy := &sriovnetworkv1.SriovNetworkNodePolicy{
Spec: sriovnetworkv1.SriovNetworkNodePolicySpec{
NumVfs: 0,
Expand All @@ -209,7 +217,7 @@ func createDefaultPolicy(cfg *rest.Config) error {
}
name := "default"
namespace := os.Getenv("NAMESPACE")
err = c.Get(context.TODO(), types.NamespacedName{Name: name, Namespace: namespace}, policy)
err := c.Get(context.TODO(), types.NamespacedName{Name: name, Namespace: namespace}, policy)
if err != nil {
if errors.IsNotFound(err) {
logger.Info("Create a default SriovNetworkNodePolicy")
Expand All @@ -226,16 +234,11 @@ func createDefaultPolicy(cfg *rest.Config) error {
return nil
}

func createDefaultOperatorConfig(cfg *rest.Config) error {
func createDefaultOperatorConfig(c client.Client) error {
logger := setupLog.WithName("createDefaultOperatorConfig")
c, err := client.New(cfg, client.Options{Scheme: scheme})
if err != nil {
return fmt.Errorf("Couldn't create client: %v", err)
}

singleNode, err := utils.IsSingleNodeCluster(c)
if err != nil {
return fmt.Errorf("Couldn't check the anount of nodes in the cluster")
return fmt.Errorf("Couldn't check the amount of nodes in the cluster")
}

enableAdmissionController := os.Getenv("ENABLE_ADMISSION_CONTROLLER") == "true"
Expand Down
50 changes: 50 additions & 0 deletions pkg/leaderelection/leaderelection.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
package leaderelection

import (
"time"

"github.com/golang/glog"
"k8s.io/client-go/tools/leaderelection"
"sigs.k8s.io/controller-runtime/pkg/client"

"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/utils"
)

const (
// Defaults follow conventions
// https://github.com/openshift/enhancements/blob/master/CONVENTIONS.md#high-availability
// Impl Calculations: https://github.com/openshift/library-go/commit/7e7d216ed91c3119800219c9194e5e57113d059a
defaultLeaseDuration = 137 * time.Second
defaultRenewDeadline = 107 * time.Second
defaultRetryPeriod = 26 * time.Second
)

func GetLeaderElectionConfig(c client.Client, enabled bool) (defaultConfig leaderelection.LeaderElectionConfig) {
defaultConfig = leaderelection.LeaderElectionConfig{
LeaseDuration: defaultLeaseDuration,
RenewDeadline: defaultRenewDeadline,
RetryPeriod: defaultRetryPeriod,
}

if enabled {
isSingleNode, err := utils.IsSingleNodeCluster(c)
if err != nil {
glog.Warningf("unable to get cluster infrastructure status, using HA cluster values for leader election: %v", err)
return
}
if isSingleNode {
return leaderElectionSingleNodeConfig(defaultConfig)
}
}
return
}

// Default leader election for Single Node environments
// Impl Calculations:
// https://github.com/openshift/library-go/commit/2612981f3019479805ac8448b997266fc07a236a#diff-61dd95c7fd45fa18038e825205fbfab8a803f1970068157608b6b1e9e6c27248R127
func leaderElectionSingleNodeConfig(config leaderelection.LeaderElectionConfig) leaderelection.LeaderElectionConfig {
config.LeaseDuration = 270 * time.Second
config.RenewDeadline = 240 * time.Second
config.RetryPeriod = 60 * time.Second
return config
}
29 changes: 28 additions & 1 deletion pkg/utils/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,30 @@ package utils

import (
"context"
"fmt"
"os"

"github.com/golang/glog"

configv1 "github.com/openshift/api/config/v1"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types"
"sigs.k8s.io/controller-runtime/pkg/client"
)

const (
// default Infrastructure resource name for Openshift
infraResourceName = "cluster"
)

func IsSingleNodeCluster(c client.Client) (bool, error) {
if os.Getenv("CLUSTER_TYPE") == ClusterTypeOpenshift {
return openshiftSingleNodeClusterStatus(c)
}
return k8sSingleNodeClusterStatus(c)
}

func k8sSingleNodeClusterStatus(c client.Client) (bool, error) {
nodeList := &corev1.NodeList{}
err := c.List(context.TODO(), nodeList)
if err != nil {
Expand All @@ -21,6 +37,17 @@ func IsSingleNodeCluster(c client.Client) (bool, error) {
glog.Infof("IsSingleNodeCluster(): one node found in the cluster")
return true, nil
}

return false, nil
}

func openshiftSingleNodeClusterStatus(c client.Client) (bool, error) {
infra := &configv1.Infrastructure{}
err := c.Get(context.TODO(), types.NamespacedName{Name: infraResourceName}, infra)
if err != nil {
return false, err
}
if infra == nil {
return false, fmt.Errorf("getting resource Infrastructure (name: %s) succeeded but object was nil", infraResourceName)
}
return infra.Status.ControlPlaneTopology == configv1.SingleReplicaTopologyMode, nil
}

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading