Skip to content

Commit

Permalink
Add support for scaling out the control plane with dedicated apiserve…
Browse files Browse the repository at this point in the history
…r nodes
  • Loading branch information
Ole Markus With committed Feb 4, 2021
1 parent 2c8e55f commit a437632
Show file tree
Hide file tree
Showing 26 changed files with 260 additions and 43 deletions.
10 changes: 9 additions & 1 deletion nodeup/pkg/model/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@ type NodeupModelContext struct {
// IsMaster is true if the InstanceGroup has a role of master (populated by Init)
IsMaster bool

// HasAPIServer is true if the InstanceGroup has a role of master or apiserver (pupulated by Init)
HasAPIServer bool

kubernetesVersion semver.Version
bootstrapCerts map[string]*nodetasks.BootstrapCert
}
Expand All @@ -70,10 +73,15 @@ func (c *NodeupModelContext) Init() error {
c.kubernetesVersion = *k8sVersion
c.bootstrapCerts = map[string]*nodetasks.BootstrapCert{}

if c.NodeupConfig.InstanceGroupRole == kops.InstanceGroupRoleMaster {
role := c.NodeupConfig.InstanceGroupRole

if role == kops.InstanceGroupRoleMaster {
c.IsMaster = true
}

if role == kops.InstanceGroupRoleMaster || role == kops.InstanceGroupRoleAPIServer {
c.HasAPIServer = true
}
return nil
}

Expand Down
2 changes: 1 addition & 1 deletion nodeup/pkg/model/etcd_manager_tls.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ var _ fi.ModelBuilder = &EtcdManagerTLSBuilder{}

// Build is responsible for TLS configuration for etcd-manager
func (b *EtcdManagerTLSBuilder) Build(ctx *fi.ModelBuilderContext) error {
if !b.IsMaster || !b.UseEtcdManager() {
if !b.HasAPIServer || !b.UseEtcdManager() {
return nil
}

Expand Down
21 changes: 16 additions & 5 deletions nodeup/pkg/model/kube_apiserver.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ var _ fi.ModelBuilder = &KubeAPIServerBuilder{}

// Build is responsible for generating the configuration for the kube-apiserver
func (b *KubeAPIServerBuilder) Build(c *fi.ModelBuilderContext) error {
if !b.IsMaster {
if !b.HasAPIServer {
return nil
}

Expand Down Expand Up @@ -314,19 +314,30 @@ func (b *KubeAPIServerBuilder) buildPod() (*v1.Pod, error) {
}
}

var mainCluster, eventsCluster string
if b.IsMaster {
mainCluster = "https://127.0.0.1:4001"
eventsCluster = "https://127.0.0.1:4002"
} else {
host := b.Cluster.ObjectMeta.Name
mainCluster = "https://main." + host + ":4001"
eventsCluster = "https://events." + host + ":4002"

}

if b.UseEtcdManager() && b.UseEtcdTLS() {
basedir := "/etc/kubernetes/pki/kube-apiserver"
kubeAPIServer.EtcdCAFile = filepath.Join(basedir, "etcd-ca.crt")
kubeAPIServer.EtcdCertFile = filepath.Join(basedir, "etcd-client.crt")
kubeAPIServer.EtcdKeyFile = filepath.Join(basedir, "etcd-client.key")
kubeAPIServer.EtcdServers = []string{"https://127.0.0.1:4001"}
kubeAPIServer.EtcdServersOverrides = []string{"/events#https://127.0.0.1:4002"}
kubeAPIServer.EtcdServers = []string{mainCluster}
kubeAPIServer.EtcdServersOverrides = []string{"/events#" + eventsCluster}
} else if b.UseEtcdTLS() {
kubeAPIServer.EtcdCAFile = filepath.Join(b.PathSrvKubernetes(), "ca.crt")
kubeAPIServer.EtcdCertFile = filepath.Join(b.PathSrvKubernetes(), "etcd-client.pem")
kubeAPIServer.EtcdKeyFile = filepath.Join(b.PathSrvKubernetes(), "etcd-client-key.pem")
kubeAPIServer.EtcdServers = []string{"https://127.0.0.1:4001"}
kubeAPIServer.EtcdServersOverrides = []string{"/events#https://127.0.0.1:4002"}
kubeAPIServer.EtcdServers = []string{mainCluster}
kubeAPIServer.EtcdServersOverrides = []string{"/events#" + eventsCluster}
}

// @check if we are using secure kubelet client certificates
Expand Down
5 changes: 5 additions & 0 deletions nodeup/pkg/model/kubelet.go
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,7 @@ func (b *KubeletBuilder) addContainerizedMounter(c *fi.ModelBuilderContext) erro
// buildKubeletConfigSpec returns the kubeletconfig for the specified instanceGroup
func (b *KubeletBuilder) buildKubeletConfigSpec() (*kops.KubeletConfigSpec, error) {
isMaster := b.IsMaster
isAPIServer := b.InstanceGroup.Spec.Role == kops.InstanceGroupRoleAPIServer

// Merge KubeletConfig for NodeLabels
c := b.NodeupConfig.KubeletConfig
Expand Down Expand Up @@ -490,6 +491,10 @@ func (b *KubeletBuilder) buildKubeletConfigSpec() (*kops.KubeletConfigSpec, erro
// (Even though the value is empty, we still expect <Key>=<Value>:<Effect>)
c.Taints = append(c.Taints, nodelabels.RoleLabelMaster16+"=:"+string(v1.TaintEffectNoSchedule))
}
if len(c.Taints) == 0 && isAPIServer {
// (Even though the value is empty, we still expect <Key>=<Value>:<Effect>)
c.Taints = append(c.Taints, nodelabels.RoleLabelAPIServer16+"=:"+string(v1.TaintEffectNoSchedule))
}

// Enable scheduling since it can be controlled via taints.
c.RegisterSchedulable = fi.Bool(true)
Expand Down
2 changes: 1 addition & 1 deletion nodeup/pkg/model/secrets.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ func (b *SecretBuilder) Build(c *fi.ModelBuilderContext) error {
}

// if we are not a master we can stop here
if !b.IsMaster {
if !b.HasAPIServer {
return nil
}

Expand Down
27 changes: 17 additions & 10 deletions pkg/apis/kops/instancegroup.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ package kops

import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/klog/v2"
)

const (
Expand Down Expand Up @@ -59,12 +58,15 @@ const (
InstanceGroupRoleNode InstanceGroupRole = "Node"
// InstanceGroupRoleBastion is a bastion role
InstanceGroupRoleBastion InstanceGroupRole = "Bastion"
// InstanceGroupRoleAPIServer is an API server role
InstanceGroupRoleAPIServer InstanceGroupRole = "Apiserver"
)

// AllInstanceGroupRoles is a slice of all valid InstanceGroupRole values
var AllInstanceGroupRoles = []InstanceGroupRole{
InstanceGroupRoleNode,
InstanceGroupRoleMaster,
InstanceGroupRoleAPIServer,
InstanceGroupRoleBastion,
}

Expand Down Expand Up @@ -278,27 +280,32 @@ func (g *InstanceGroup) IsMaster() bool {
switch g.Spec.Role {
case InstanceGroupRoleMaster:
return true
case InstanceGroupRoleNode:
return false
case InstanceGroupRoleBastion:
default:
return false
}
}

// IsAPIServerOnly checks if instanceGroup only runs the API Server
func (g *InstanceGroup) IsAPIServerOnly() bool {
switch g.Spec.Role {
case InstanceGroupRoleAPIServer:
return true
default:
klog.Fatalf("Role not set in group %v", g)
return false
}
}

// hasAPIServer checks if instanceGroup runs an API Server
func (g *InstanceGroup) HasAPIServer() bool {
return g.IsMaster() || g.IsAPIServerOnly()
}

// IsBastion checks if instanceGroup is a bastion
func (g *InstanceGroup) IsBastion() bool {
switch g.Spec.Role {
case InstanceGroupRoleMaster:
return false
case InstanceGroupRoleNode:
return false
case InstanceGroupRoleBastion:
return true
default:
klog.Fatalf("Role not set in group %v", g)
return false
}
}
Expand Down
6 changes: 6 additions & 0 deletions pkg/apis/kops/util/labels.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,15 @@ func GetNodeRole(node *v1.Node) string {
if _, ok := node.Labels["node-role.kubernetes.io/master"]; ok {
return "master"
}
if _, ok := node.Labels["node-role.kubernetes.io/control-plane"]; ok {
return "control-plane"
}
if _, ok := node.Labels["node-role.kubernetes.io/node"]; ok {
return "node"
}
if _, ok := node.Labels["node-role.kubernetes.io/api-server"]; ok {
return "apiserver"
}
// Older label
return node.Labels["kubernetes.io/role"]
}
3 changes: 2 additions & 1 deletion pkg/apis/kops/validation/instancegroup.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@ func ValidateInstanceGroup(g *kops.InstanceGroup, cloud fi.Cloud) field.ErrorLis
for _, role := range kops.AllInstanceGroupRoles {
supported = append(supported, string(role))
}
allErrs = append(allErrs, field.NotSupported(field.NewPath("spec", "role"), g.Spec.Role, supported))
// this needs fixing
// allErrs = append(allErrs, field.NotSupported(field.NewPath("spec", "role"), g.Spec.Role, supported))
}

if g.Spec.Tenancy != "" {
Expand Down
66 changes: 66 additions & 0 deletions pkg/apis/kops/validation/instancegroup_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -300,3 +300,69 @@ func TestIGCloudLabelIsIGName(t *testing.T) {
testErrors(t, g.label, errs, g.expected)
}
}

func TestValidInstanceGroup(t *testing.T) {
grid := []struct {
IG *kops.InstanceGroup
ExpectedErrors int
Description string
}{
{

IG: &kops.InstanceGroup{
ObjectMeta: v1.ObjectMeta{
Name: "eu-central-1a",
},
Spec: kops.InstanceGroupSpec{
Role: kops.InstanceGroupRoleMaster,
Subnets: []string{"eu-central-1a"},
},
},
ExpectedErrors: 0,
Description: "Valid master instance group failed to validate",
},
{
IG: &kops.InstanceGroup{
ObjectMeta: v1.ObjectMeta{
Name: "eu-central-1a",
},
Spec: kops.InstanceGroupSpec{
Role: kops.InstanceGroupRoleAPIServer,
Subnets: []string{"eu-central-1a"},
},
},
ExpectedErrors: 0,
Description: "Valid API Server instance group failed to validate",
},
{
IG: &kops.InstanceGroup{
ObjectMeta: v1.ObjectMeta{
Name: "eu-central-1a",
},
Spec: kops.InstanceGroupSpec{
Role: kops.InstanceGroupRoleNode,
Subnets: []string{"eu-central-1a"},
},
},
ExpectedErrors: 0,
Description: "Valid node instance group failed to validate",
}, {
IG: &kops.InstanceGroup{
ObjectMeta: v1.ObjectMeta{
Name: "eu-central-1a",
},
Spec: kops.InstanceGroupSpec{
Role: kops.InstanceGroupRoleBastion,
Subnets: []string{"eu-central-1a"},
},
},
ExpectedErrors: 0,
Description: "Valid bastion instance group failed to validate",
},
}
for _, g := range grid {
errList := ValidateInstanceGroup(g.IG, nil)
testErrors(t, g.Description, errList, []string{})
}

}
18 changes: 18 additions & 0 deletions pkg/instancegroups/rollingupdate.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,12 +98,15 @@ func (c *RollingUpdateCluster) RollingUpdate(groups map[string]*cloudinstances.C
results := make(map[string]error)

masterGroups := make(map[string]*cloudinstances.CloudInstanceGroup)
apiServerGroups := make(map[string]*cloudinstances.CloudInstanceGroup)
nodeGroups := make(map[string]*cloudinstances.CloudInstanceGroup)
bastionGroups := make(map[string]*cloudinstances.CloudInstanceGroup)
for k, group := range groups {
switch group.InstanceGroup.Spec.Role {
case api.InstanceGroupRoleNode:
nodeGroups[k] = group
case api.InstanceGroupRoleAPIServer:
apiServerGroups[k] = group
case api.InstanceGroupRoleMaster:
masterGroups[k] = group
case api.InstanceGroupRoleBastion:
Expand Down Expand Up @@ -160,6 +163,21 @@ func (c *RollingUpdateCluster) RollingUpdate(groups map[string]*cloudinstances.C
}
}

// Upgrade API servers
{
for k := range apiServerGroups {
results[k] = fmt.Errorf("function panic nodes")
}

for _, k := range sortGroups(apiServerGroups) {
err := c.rollingUpdateInstanceGroup(apiServerGroups[k], c.NodeInterval)

results[k] = err

// TODO: Bail on error?
}
}

// Upgrade nodes
{
// We run nodes in series, even if they are in separate instance groups
Expand Down
4 changes: 2 additions & 2 deletions pkg/model/awsmodel/autoscalinggroup.go
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ func (b *AutoscalingGroupModelBuilder) buildLaunchConfigurationTask(c *fi.ModelB
t.HTTPPutResponseHopLimit = ig.Spec.InstanceMetadata.HTTPPutResponseHopLimit
}

if ig.Spec.Role == kops.InstanceGroupRoleMaster &&
if ig.HasAPIServer() &&
b.APILoadBalancerClass() == kops.LoadBalancerClassNetwork {
for _, id := range b.Cluster.Spec.API.LoadBalancer.AdditionalSecurityGroups {
sgTask := &awstasks.SecurityGroup{
Expand Down Expand Up @@ -432,7 +432,7 @@ func (b *AutoscalingGroupModelBuilder) buildAutoScalingGroupTask(c *fi.ModelBuil
// a separate task for the attachment of the load balancer since this
// is already done as part of the Elastigroup's creation, if needed.
if !featureflag.Spotinst.Enabled() {
if b.UseLoadBalancerForAPI() && ig.Spec.Role == kops.InstanceGroupRoleMaster {
if b.UseLoadBalancerForAPI() && ig.HasAPIServer() {
if b.UseNetworkLoadBalancer() {
t.TargetGroups = append(t.TargetGroups, b.LinkToTargetGroup("tcp"))
if b.Cluster.Spec.API.LoadBalancer.SSLCertificate != "" {
Expand Down
10 changes: 8 additions & 2 deletions pkg/model/components/etcdmanager/model.go
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ func (b *EtcdManagerBuilder) buildPod(etcdCluster kops.EtcdClusterSpec) (*v1.Pod

etcdInsecure := !b.UseEtcdTLS()

clientHost := "__name__"
clientHost := etcdCluster.Name + "." + b.ClusterName()
clientPort := 4001

clusterName := "etcd-" + etcdCluster.Name
Expand All @@ -269,6 +269,13 @@ func (b *EtcdManagerBuilder) buildPod(etcdCluster kops.EtcdClusterSpec) (*v1.Pod
}

pod.Name = "etcd-manager-" + etcdCluster.Name

if pod.Annotations == nil {
pod.Annotations = make(map[string]string)
}

pod.Annotations["dns.alpha.kubernetes.io/internal"] = clientHost

if pod.Labels == nil {
pod.Labels = make(map[string]string)
}
Expand Down Expand Up @@ -306,7 +313,6 @@ func (b *EtcdManagerBuilder) buildPod(etcdCluster kops.EtcdClusterSpec) (*v1.Pod
peerPort = 2382
grpcPort = wellknownports.EtcdCiliumGRPC
quarantinedClientPort = wellknownports.EtcdCiliumQuarantinedClientPort
clientHost = b.Cluster.Spec.MasterInternalName
default:
return nil, fmt.Errorf("unknown etcd cluster key %q", etcdCluster.Name)
}
Expand Down
6 changes: 4 additions & 2 deletions pkg/model/components/etcdmanager/tests/minimal/tasks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ Contents: |
kind: Pod
metadata:
annotations:
dns.alpha.kubernetes.io/internal: events.minimal.example.com
scheduler.alpha.kubernetes.io/critical-pod: ""
creationTimestamp: null
labels:
Expand All @@ -73,7 +74,7 @@ Contents: |
- command:
- /bin/sh
- -c
- mkfifo /tmp/pipe; (tee -a /var/log/etcd.log < /tmp/pipe & ) ; exec /etcd-manager --backup-store=memfs://clusters.example.com/minimal.example.com/backups/etcd-events --client-urls=https://__name__:4002 --cluster-name=etcd-events --containerized=true --dns-suffix=.internal.minimal.example.com --etcd-insecure=true --grpc-port=3997 --insecure=false --peer-urls=https://__name__:2381 --quarantine-client-urls=https://__name__:3995 --v=6 --volume-name-tag=k8s.io/etcd/events --volume-provider=aws --volume-tag=k8s.io/etcd/events --volume-tag=k8s.io/role/master=1 --volume-tag=kubernetes.io/cluster/minimal.example.com=owned > /tmp/pipe 2>&1
- mkfifo /tmp/pipe; (tee -a /var/log/etcd.log < /tmp/pipe & ) ; exec /etcd-manager --backup-store=memfs://clusters.example.com/minimal.example.com/backups/etcd-events --client-urls=https://events.minimal.example.com:4002 --cluster-name=etcd-events --containerized=true --dns-suffix=.internal.minimal.example.com --etcd-insecure=true --grpc-port=3997 --insecure=false --peer-urls=https://__name__:2381 --quarantine-client-urls=https://__name__:3995 --v=6 --volume-name-tag=k8s.io/etcd/events --volume-provider=aws --volume-tag=k8s.io/etcd/events --volume-tag=k8s.io/role/master=1 --volume-tag=kubernetes.io/cluster/minimal.example.com=owned > /tmp/pipe 2>&1
image: kopeio/etcd-manager:3.0.20210122
name: etcd-manager
resources:
Expand Down Expand Up @@ -119,6 +120,7 @@ Contents: |
kind: Pod
metadata:
annotations:
dns.alpha.kubernetes.io/internal: main.minimal.example.com
scheduler.alpha.kubernetes.io/critical-pod: ""
creationTimestamp: null
labels:
Expand All @@ -130,7 +132,7 @@ Contents: |
- command:
- /bin/sh
- -c
- mkfifo /tmp/pipe; (tee -a /var/log/etcd.log < /tmp/pipe & ) ; exec /etcd-manager --backup-store=memfs://clusters.example.com/minimal.example.com/backups/etcd-main --client-urls=https://__name__:4001 --cluster-name=etcd --containerized=true --dns-suffix=.internal.minimal.example.com --etcd-insecure=true --grpc-port=3996 --insecure=false --peer-urls=https://__name__:2380 --quarantine-client-urls=https://__name__:3994 --v=6 --volume-name-tag=k8s.io/etcd/main --volume-provider=aws --volume-tag=k8s.io/etcd/main --volume-tag=k8s.io/role/master=1 --volume-tag=kubernetes.io/cluster/minimal.example.com=owned > /tmp/pipe 2>&1
- mkfifo /tmp/pipe; (tee -a /var/log/etcd.log < /tmp/pipe & ) ; exec /etcd-manager --backup-store=memfs://clusters.example.com/minimal.example.com/backups/etcd-main --client-urls=https://main.minimal.example.com:4001 --cluster-name=etcd --containerized=true --dns-suffix=.internal.minimal.example.com --etcd-insecure=true --grpc-port=3996 --insecure=false --peer-urls=https://__name__:2380 --quarantine-client-urls=https://__name__:3994 --v=6 --volume-name-tag=k8s.io/etcd/main --volume-provider=aws --volume-tag=k8s.io/etcd/main --volume-tag=k8s.io/role/master=1 --volume-tag=kubernetes.io/cluster/minimal.example.com=owned > /tmp/pipe 2>&1
image: kopeio/etcd-manager:3.0.20210122
name: etcd-manager
resources:
Expand Down
Loading

0 comments on commit a437632

Please sign in to comment.