Skip to content

Commit

Permalink
Implemented Cloud Provider initialization and discovery logic.
Browse files Browse the repository at this point in the history
  • Loading branch information
ellistarn committed Jan 28, 2021
1 parent 48f5646 commit 13a6bf4
Show file tree
Hide file tree
Showing 10 changed files with 391 additions and 102 deletions.
8 changes: 3 additions & 5 deletions cmd/controller/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ func main() {
Port: options.WebhookPort,
})

cloudProviderFactory := registry.NewFactory(cloudprovider.Options{Client: manager.GetClient()})
cloudProviderFactory := registry.NewFactory(cloudprovider.Options{Client: manager.GetClient(), Config: manager.GetConfig()})
metricsProducerFactory := &producers.Factory{Client: manager.GetClient(), CloudProviderFactory: cloudProviderFactory}
metricsClientFactory := metricsclients.NewFactoryOrDie(options.PrometheusURI)
autoscalerFactory := autoscaler.NewFactoryOrDie(metricsClientFactory, manager.GetRESTMapper(), manager.GetConfig())
Expand All @@ -74,10 +74,8 @@ func main() {
&scalablenodegroupv1alpha1.Controller{CloudProvider: cloudProviderFactory},
&metricsproducerv1alpha1.Controller{ProducerFactory: metricsProducerFactory},
&provisionerv1alpha1.Controller{
Client: manager.GetClient(),
Allocator: &allocation.GreedyAllocator{
Capacity: cloudProviderFactory.Capacity(),
},
Client: manager.GetClient(),
Allocator: &allocation.GreedyAllocator{Capacity: cloudProviderFactory.Capacity()},
},
).Start(controllerruntime.SetupSignalHandler()); err != nil {
zap.S().Panicf("Unable to start manager, %w", err)
Expand Down
7 changes: 7 additions & 0 deletions config/rbac/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -63,3 +63,10 @@ rules:
- get
- list
- watch
- apiGroups:
- ""
resources:
- configmaps
verbs:
- get
- update
89 changes: 61 additions & 28 deletions docs/aws/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,33 +14,56 @@ This command will create an IAM Policy with access to all of the resources for a
```
aws iam create-policy --policy-name Karpenter --policy-document "$(cat <<-EOM
{
"Version": "2012-10-17",
"Statement": [
{
"Action": [
"eks:DescribeNodegroup",
"eks:UpdateNodegroupConfig"
],
"Effect": "Allow",
"Resource": "*"
},
{
"Action": [
"autoscaling:DescribeAutoScalingGroups",
"autoscaling:UpdateAutoScalingGroup"
],
"Effect": "Allow",
"Resource": "*"
},
{
"Action": [
"sqs:GetQueueAttributes",
"sqs:GetQueueUrl"
],
"Effect": "Allow",
"Resource": "*"
}
]
"Version": "2012-10-17",
"Statement": [
{
"Action": [
"eks:DescribeNodegroup",
"eks:UpdateNodegroupConfig"
],
"Effect": "Allow",
"Resource": "*"
},
{
"Action": [
"autoscaling:DescribeAutoScalingGroups",
"autoscaling:UpdateAutoScalingGroup"
],
"Effect": "Allow",
"Resource": "*"
},
{
"Action": [
"sqs:GetQueueAttributes",
"sqs:GetQueueUrl",
"sqs:ReceiveMessage",
"sqs:DeleteMessage",
"sqs:SendMessage"
],
"Effect": "Allow",
"Resource": "*"
},
{
"Action": [
"ec2:DescribeLaunchTemplates",
"ec2:CreateLaunchTemplate",
"ec2:CreateFleet",
"ec2:RunInstances",
"ec2:CreateTags",
"ec2:DescribeSubnets",
"eks:DescribeCluster",
"iam:GetRole",
"iam:CreateRole",
"iam:AddRoleToInstanceProfile",
"iam:PassRole",
"iam:GetInstanceProfile",
"iam:CreateInstanceProfile",
"iam:AttachRolePolicy"
],
"Effect": "Allow",
"Resource": "*"
}
]
}
EOM
)"
Expand Down Expand Up @@ -73,7 +96,17 @@ kubectl delete pods -n karpenter -l control-plane=karpenter
```

### Cleanup
```
```bash
eksctl delete iamserviceaccount --cluster ${CLUSTER_NAME} --name default --namespace karpenter
aws iam delete-policy --policy-arn arn:aws:iam::${AWS_ACCOUNT_ID}:policy/Karpenter

# Remove Karpenter generated resources
aws iam remove-role-from-instance-profile --instance-profile-name KarpenterNodeRole --role-name KarpenterNodeRole
aws iam delete-instance-profile --instance-profile-name KarpenterNodeRole
aws iam detach-role-policy --role-name KarpenterNodeRole --policy-arn arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore
aws iam detach-role-policy --role-name KarpenterNodeRole --policy-arn arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy
aws iam detach-role-policy --role-name KarpenterNodeRole --policy-arn arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy
aws iam detach-role-policy --role-name KarpenterNodeRole --policy-arn arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly
aws iam delete-role --role-name KarpenterNodeRole
aws ec2 delete-launch-template --launch-template-name KarpenterLaunchTemplate
```
23 changes: 23 additions & 0 deletions docs/examples/provisioner/provisioner.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,26 @@ kind: Provisioner
metadata:
name: example
spec: {}
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: example
spec:
selector:
matchLabels:
app: example
template:
metadata:
labels:
app: example
spec:
nodeSelector:
karpenter.sh/instance: test
containers:
- name: example
image: k8s.gcr.io/pause
resources:
limits:
memory: "128Mi"
cpu: "500m"
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ require (
github.com/go-logr/zapr v0.2.0
github.com/onsi/ginkgo v1.14.2
github.com/onsi/gomega v1.10.3
github.com/pkg/errors v0.9.1
github.com/prometheus/client_golang v1.8.0
github.com/prometheus/common v0.14.0
go.uber.org/multierr v1.6.0
Expand Down
94 changes: 35 additions & 59 deletions pkg/cloudprovider/aws/capacity.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,84 +17,60 @@ package aws
import (
"context"
"fmt"
"strconv"

"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/service/ec2"
"github.com/aws/aws-sdk-go/service/ec2/ec2iface"
"github.com/aws/aws-sdk-go/service/eks/eksiface"
"github.com/aws/aws-sdk-go/service/iam/iamiface"
"github.com/awslabs/karpenter/pkg/cloudprovider"
"github.com/awslabs/karpenter/pkg/utils/log"
"go.uber.org/zap"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/client-go/rest"
)

type Capacity struct {
ec2Iface ec2iface.EC2API
EC2API ec2iface.EC2API
LaunchTemplate *ec2.LaunchTemplate
ZonalSubnets map[string]*ec2.Subnet
}

// var singleton *Capacity

// NewCapacity constructs a Capacity client for AWS
func NewCapacity(client ec2iface.EC2API) *Capacity {
return &Capacity{ec2Iface: client}
func NewCapacity(EC2API ec2iface.EC2API, EKSAPI eksiface.EKSAPI, IAMAPI iamiface.IAMAPI, config *rest.Config) *Capacity {
initialization := NewInitialization(EC2API, EKSAPI, IAMAPI, config)
return &Capacity{
EC2API: EC2API,
LaunchTemplate: initialization.LaunchTemplate,
ZonalSubnets: initialization.ZonalSubnets,
}
}

// Create a set of nodes given the constraints
func (cp *Capacity) Create(ctx context.Context, constraints *cloudprovider.CapacityConstraints) error {
// TODO Convert contraints to the Node types and select the launch template
func (c *Capacity) Create(ctx context.Context, constraints *cloudprovider.CapacityConstraints) error {
// TODO, select a zone more intelligently
var zone string
for zone = range c.ZonalSubnets {
}

// Create the desired number of instances based on constraints
// create instances using EC2 fleet API
// TODO remove hard coded values
output, err := cp.ec2Iface.CreateFleetWithContext(ctx, &ec2.CreateFleetInput{
LaunchTemplateConfigs: []*ec2.FleetLaunchTemplateConfigRequest{
{
LaunchTemplateSpecification: &ec2.FleetLaunchTemplateSpecificationRequest{
LaunchTemplateId: aws.String("lt-02f427483e1be00f5"),
Version: aws.String("$Latest"),
},
Overrides: []*ec2.FleetLaunchTemplateOverridesRequest{
{
InstanceType: aws.String("m5.large"),
SubnetId: aws.String("subnet-03216d5a693377033"),
AvailabilityZone: aws.String("us-east-2a"),
},
},
},
},
if _, err := c.EC2API.CreateFleetWithContext(context.TODO(), &ec2.CreateFleetInput{
Type: aws.String(ec2.FleetTypeInstant),
TargetCapacitySpecification: &ec2.TargetCapacitySpecificationRequest{
DefaultTargetCapacityType: aws.String(ec2.DefaultTargetCapacityTypeOnDemand),
OnDemandTargetCapacity: aws.Int64(1),
TotalTargetCapacity: aws.Int64(1),
},
Type: aws.String(ec2.FleetTypeInstant),
})
if err != nil {
return fmt.Errorf("failed to create fleet %w", err)
LaunchTemplateConfigs: []*ec2.FleetLaunchTemplateConfigRequest{{
LaunchTemplateSpecification: &ec2.FleetLaunchTemplateSpecificationRequest{
LaunchTemplateName: c.LaunchTemplate.LaunchTemplateName,
Version: aws.String("$Default"),
},
Overrides: []*ec2.FleetLaunchTemplateOverridesRequest{{
AvailabilityZone: aws.String(zone),
InstanceType: aws.String("m5.large"),
SubnetId: c.ZonalSubnets[zone].SubnetId,
}},
}},
}); err != nil {
return fmt.Errorf("creating fleet, %w", err)
}
// TODO Get instanceID from the output
_ = output
// _ = cfg.instanceID
zap.S().Infof("Successfully created a node in zone %v", constraints.Zone)
return nil
}

// calculateResourceListOrDie queries EC2 API and gets the CPU & Mem for a list of instance types
func calculateResourceListOrDie(client ec2iface.EC2API, instanceType []*string) map[string]v1.ResourceList {
output, err := client.DescribeInstanceTypes(
&ec2.DescribeInstanceTypesInput{
InstanceTypes: instanceType,
},
)
if err != nil {
log.PanicIfError(err, "Describe instance type request failed")
}
var instanceTypes = map[string]v1.ResourceList{}
for _, instance := range output.InstanceTypes {
resourceList := v1.ResourceList{
v1.ResourceCPU: resource.MustParse(strconv.FormatInt(*instance.VCpuInfo.DefaultVCpus, 10)),
v1.ResourceMemory: resource.MustParse(strconv.FormatInt(*instance.MemoryInfo.SizeInMiB, 10)),
}
instanceTypes[*instance.InstanceType] = resourceList
}
return instanceTypes
}
25 changes: 16 additions & 9 deletions pkg/cloudprovider/aws/factory.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,31 +24,38 @@ import (
"github.com/aws/aws-sdk-go/service/ec2/ec2iface"
"github.com/aws/aws-sdk-go/service/eks"
"github.com/aws/aws-sdk-go/service/eks/eksiface"
"github.com/aws/aws-sdk-go/service/iam"
"github.com/aws/aws-sdk-go/service/iam/iamiface"
"github.com/aws/aws-sdk-go/service/sqs"
"github.com/aws/aws-sdk-go/service/sqs/sqsiface"
"github.com/awslabs/karpenter/pkg/apis/autoscaling/v1alpha1"
"github.com/awslabs/karpenter/pkg/cloudprovider"
"github.com/awslabs/karpenter/pkg/cloudprovider/fake"
"github.com/awslabs/karpenter/pkg/utils/log"
"k8s.io/client-go/rest"
"sigs.k8s.io/controller-runtime/pkg/client"
)

type Factory struct {
AutoscalingClient autoscalingiface.AutoScalingAPI
SQSClient sqsiface.SQSAPI
EKSClient eksiface.EKSAPI
EC2Client ec2iface.EC2API
SQSAPI sqsiface.SQSAPI
EKSAPI eksiface.EKSAPI
EC2API ec2iface.EC2API
IAMAPI iamiface.IAMAPI
Client client.Client
Config *rest.Config
}

func NewFactory(options cloudprovider.Options) *Factory {
sess := withRegion(session.Must(session.NewSession()))
return &Factory{
AutoscalingClient: autoscaling.New(sess),
EKSClient: eks.New(sess),
SQSClient: sqs.New(sess),
EC2Client: ec2.New(sess),
EKSAPI: eks.New(sess),
SQSAPI: sqs.New(sess),
EC2API: ec2.New(sess),
IAMAPI: iam.New(sess),
Client: options.Client,
Config: options.Config,
}
}

Expand All @@ -57,7 +64,7 @@ func (f *Factory) NodeGroupFor(spec *v1alpha1.ScalableNodeGroupSpec) cloudprovid
case v1alpha1.AWSEC2AutoScalingGroup:
return NewAutoScalingGroup(spec.ID, f.AutoscalingClient)
case v1alpha1.AWSEKSNodeGroup:
return NewManagedNodeGroup(spec.ID, f.EKSClient, f.AutoscalingClient, f.Client)
return NewManagedNodeGroup(spec.ID, f.EKSAPI, f.AutoscalingClient, f.Client)
default:
return fake.NewNotImplementedFactory().NodeGroupFor(spec)
}
Expand All @@ -66,14 +73,14 @@ func (f *Factory) NodeGroupFor(spec *v1alpha1.ScalableNodeGroupSpec) cloudprovid
func (f *Factory) QueueFor(spec *v1alpha1.QueueSpec) cloudprovider.Queue {
switch spec.Type {
case v1alpha1.AWSSQSQueueType:
return NewSQSQueue(spec.ID, f.SQSClient)
return NewSQSQueue(spec.ID, f.SQSAPI)
default:
return fake.NewNotImplementedFactory().QueueFor(spec)
}
}

func (f *Factory) Capacity() cloudprovider.Capacity {
return NewCapacity(f.EC2Client)
return NewCapacity(f.EC2API, f.EKSAPI, f.IAMAPI, f.Config)
}

func withRegion(sess *session.Session) *session.Session {
Expand Down
Loading

0 comments on commit 13a6bf4

Please sign in to comment.