Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
…into ami-controller
  • Loading branch information
engedaam committed Apr 30, 2024
2 parents efc9a67 + e6fa442 commit f220c7d
Show file tree
Hide file tree
Showing 74 changed files with 794 additions and 378 deletions.
2 changes: 1 addition & 1 deletion .github/actions/e2e/cleanup/action.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ inputs:
runs:
using: "composite"
steps:
- uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2
- uses: actions/checkout@1d96c772d19495a3b5c517cd2bc0cb401ea0529f # v4.1.3
with:
ref: ${{ inputs.git_ref }}
- uses: ./.github/actions/e2e/install-eksctl
Expand Down
2 changes: 1 addition & 1 deletion .github/actions/e2e/install-karpenter/action.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ inputs:
runs:
using: "composite"
steps:
- uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2
- uses: actions/checkout@1d96c772d19495a3b5c517cd2bc0cb401ea0529f # v4.1.3
with:
ref: ${{ inputs.git_ref }}
- uses: ./.github/actions/e2e/install-helm
Expand Down
2 changes: 1 addition & 1 deletion .github/actions/e2e/install-prometheus/action.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ inputs:
runs:
using: "composite"
steps:
- uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2
- uses: actions/checkout@1d96c772d19495a3b5c517cd2bc0cb401ea0529f # v4.1.3
with:
ref: ${{ inputs.git_ref }}
- uses: ./.github/actions/e2e/install-helm
Expand Down
4 changes: 4 additions & 0 deletions .github/actions/e2e/run-tests-private-cluster/action.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,10 @@ runs:
- kubectl delete ec2nodeclass --all
- kubectl delete deployment --all
- PRIVATE_CLUSTER=$CLUSTER_NAME TEST_SUITE=$SUITE ENABLE_METRICS=$ENABLE_METRICS METRICS_REGION=$METRICS_REGION GIT_REF="$(git rev-parse HEAD)" CLUSTER_NAME=$CLUSTER_NAME CLUSTER_ENDPOINT="$(aws eks describe-cluster --name $CLUSTER_NAME --query "cluster.endpoint" --output text)" INTERRUPTION_QUEUE=$CLUSTER_NAME make e2etests
- aws logs put-retention-policy --log-group-name /aws/containerinsights/$CLUSTER_NAME/application --retention-in-days 30
- aws logs put-retention-policy --log-group-name /aws/containerinsights/$CLUSTER_NAME/dataplane --retention-in-days 30
- aws logs put-retention-policy --log-group-name /aws/containerinsights/$CLUSTER_NAME/host --retention-in-days 30
- aws logs put-retention-policy --log-group-name /aws/containerinsights/$CLUSTER_NAME/performance --retention-in-days 30
post_build:
commands:
# Describe karpenter pods
Expand Down
17 changes: 11 additions & 6 deletions .github/actions/e2e/setup-cluster/action.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ inputs:
default: "1.29"
eksctl_version:
description: "Version of eksctl to install"
default: v0.169.0
default: v0.175.0
ip_family:
description: "IP Family of the cluster. Valid values are IPv4 or IPv6"
default: "IPv4"
Expand All @@ -50,7 +50,7 @@ inputs:
runs:
using: "composite"
steps:
- uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2
- uses: actions/checkout@1d96c772d19495a3b5c517cd2bc0cb401ea0529f # v4.1.3
with:
ref: ${{ inputs.git_ref }}
- uses: ./.github/actions/e2e/install-eksctl
Expand Down Expand Up @@ -152,11 +152,9 @@ runs:
minSize: 2
maxSize: 2
iam:
withAddonPolicies:
cloudWatch: true
instanceRolePermissionsBoundary: "arn:aws:iam::$ACCOUNT_ID:policy/GithubActionsPermissionsBoundary"
taints:
- key: CriticalAddonsOnly
value: "true"
effect: NoSchedule
cloudWatch:
clusterLogging:
enableTypes: ["*"]
Expand All @@ -175,6 +173,8 @@ runs:
$KARPENTER_IAM
withOIDC: true
addons:
- name: amazon-cloudwatch-observability
permissionsBoundary: "arn:aws:iam::$ACCOUNT_ID:policy/GithubActionsPermissionsBoundary"
- name: vpc-cni
permissionsBoundary: "arn:aws:iam::$ACCOUNT_ID:policy/GithubActionsPermissionsBoundary"
- name: coredns
Expand Down Expand Up @@ -211,6 +211,11 @@ runs:
else
eksctl ${cmd} cluster -f clusterconfig.yaml
fi
# Adding taints after all necessary pods have scheduled to the manged node group nodes
# amazon-cloudwatch-observability pods do no not tolerate CriticalAddonsOnly=true:NoSchedule and
# amazon-cloudwatch-observability addons does not allow to add tolerations to the addon pods as part of the advanced configuration
kubectl taint nodes CriticalAddonsOnly=true:NoSchedule --all
- name: tag oidc provider of the cluster
if: always()
shell: bash
Expand Down
2 changes: 1 addition & 1 deletion .github/actions/e2e/slack/notify/action.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ inputs:
runs:
using: "composite"
steps:
- uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2
- uses: actions/checkout@1d96c772d19495a3b5c517cd2bc0cb401ea0529f # v4.1.3
with:
ref: ${{ inputs.git_ref }}
- id: get-run-name
Expand Down
2 changes: 1 addition & 1 deletion .github/actions/e2e/upgrade-crds/action.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ runs:
role-to-assume: arn:aws:iam::${{ inputs.account_id }}:role/${{ inputs.role }}
aws-region: ${{ inputs.region }}
role-duration-seconds: 21600
- uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2
- uses: actions/checkout@1d96c772d19495a3b5c517cd2bc0cb401ea0529f # v4.1.3
with:
ref: ${{ inputs.git_ref }}
- name: install-karpenter
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/approval-comment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ jobs:
mkdir -p /tmp/artifacts
{ echo "$REVIEW_BODY"; echo "$PULL_REQUEST_NUMBER"; echo "$COMMIT_ID"; } >> /tmp/artifacts/metadata.txt
cat /tmp/artifacts/metadata.txt
- uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4.3.1
- uses: actions/upload-artifact@1746f4ab65b179e0ea60a494b83293b640dd5bba # v4.3.2
with:
name: artifacts
path: /tmp/artifacts
11 changes: 10 additions & 1 deletion .github/workflows/e2e-upgrade.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ jobs:
region: ${{ inputs.region }}
cluster_name: ${{ steps.generate-cluster-name.outputs.CLUSTER_NAME }}
k8s_version: ${{ inputs.k8s_version }}
eksctl_version: v0.169.0
eksctl_version: v0.175.0
ip_family: IPv4 # Set the value to IPv6 if IPv6 suite, else IPv4
git_ref: ${{ inputs.from_git_ref }}
ecr_account_id: ${{ vars.SNAPSHOT_ACCOUNT_ID }}
Expand Down Expand Up @@ -135,6 +135,15 @@ jobs:
url: ${{ secrets.SLACK_WEBHOOK_URL }}
suite: Upgrade
git_ref: ${{ inputs.to_git_ref }}
- name: add log retention policy
if: ${{ inputs.workflow_trigger != 'private_cluster' }}
env:
CLUSTER_NAME: ${{ steps.generate-cluster-name.outputs.CLUSTER_NAME }}
run: |
aws logs put-retention-policy --log-group-name /aws/containerinsights/"$CLUSTER_NAME"/application --retention-in-days 30
aws logs put-retention-policy --log-group-name /aws/containerinsights/"$CLUSTER_NAME"/dataplane --retention-in-days 30
aws logs put-retention-policy --log-group-name /aws/containerinsights/"$CLUSTER_NAME"/host --retention-in-days 30
aws logs put-retention-policy --log-group-name /aws/containerinsights/"$CLUSTER_NAME"/performance --retention-in-days 30
- name: dump logs on failure
uses: ./.github/actions/e2e/dump-logs
if: failure() || cancelled()
Expand Down
11 changes: 10 additions & 1 deletion .github/workflows/e2e.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ jobs:
region: ${{ inputs.region }}
cluster_name: ${{ steps.generate-cluster-name.outputs.CLUSTER_NAME }}
k8s_version: ${{ inputs.k8s_version }}
eksctl_version: v0.169.0
eksctl_version: v0.175.0
ip_family: ${{ contains(inputs.suite, 'IPv6') && 'IPv6' || 'IPv4' }} # Set the value to IPv6 if IPv6 suite, else IPv4
private_cluster: ${{ inputs.workflow_trigger == 'private_cluster' }}
git_ref: ${{ inputs.git_ref }}
Expand Down Expand Up @@ -187,6 +187,15 @@ jobs:
suite: ${{ inputs.suite }}
git_ref: ${{ inputs.git_ref }}
workflow_trigger: ${{ inputs.workflow_trigger }}
- name: add log retention policy
if: ${{ inputs.workflow_trigger != 'private_cluster' }}
env:
CLUSTER_NAME: ${{ steps.generate-cluster-name.outputs.CLUSTER_NAME }}
run: |
aws logs put-retention-policy --log-group-name /aws/containerinsights/"$CLUSTER_NAME"/application --retention-in-days 30
aws logs put-retention-policy --log-group-name /aws/containerinsights/"$CLUSTER_NAME"/dataplane --retention-in-days 30
aws logs put-retention-policy --log-group-name /aws/containerinsights/"$CLUSTER_NAME"/host --retention-in-days 30
aws logs put-retention-policy --log-group-name /aws/containerinsights/"$CLUSTER_NAME"/performance --retention-in-days 30
- name: dump logs on failure
uses: ./.github/actions/e2e/dump-logs
if: (failure() || cancelled()) && inputs.workflow_trigger != 'private_cluster'
Expand Down
4 changes: 2 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,14 @@ require (
github.com/Pallinder/go-randomdata v1.2.0
github.com/PuerkitoBio/goquery v1.9.1
github.com/avast/retry-go v3.0.0+incompatible
github.com/aws/aws-sdk-go v1.51.21
github.com/aws/aws-sdk-go v1.51.25
github.com/aws/karpenter-provider-aws/tools/kompat v0.0.0-20240410220356-6b868db24881
github.com/awslabs/amazon-eks-ami/nodeadm v0.0.0-20240229193347-cfab22a10647
github.com/go-logr/zapr v1.3.0
github.com/imdario/mergo v0.3.16
github.com/mitchellh/hashstructure/v2 v2.0.2
github.com/onsi/ginkgo/v2 v2.17.1
github.com/onsi/gomega v1.32.0
github.com/onsi/gomega v1.33.0
github.com/patrickmn/go-cache v2.1.0+incompatible
github.com/pelletier/go-toml/v2 v2.2.1
github.com/prometheus/client_golang v1.19.0
Expand Down
8 changes: 4 additions & 4 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@ github.com/andybalholm/cascadia v1.3.2/go.mod h1:7gtRlve5FxPPgIgX36uWBX58OdBsSS6
github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY=
github.com/avast/retry-go v3.0.0+incompatible h1:4SOWQ7Qs+oroOTQOYnAHqelpCO0biHSxpiH9JdtuBj0=
github.com/avast/retry-go v3.0.0+incompatible/go.mod h1:XtSnn+n/sHqQIpZ10K1qAevBhOOCWBLXXy3hyiqqBrY=
github.com/aws/aws-sdk-go v1.51.21 h1:UrT6JC9R9PkYYXDZBV0qDKTualMr+bfK2eboTknMgbs=
github.com/aws/aws-sdk-go v1.51.21/go.mod h1:LF8svs817+Nz+DmiMQKTO3ubZ/6IaTpq3TjupRn3Eqk=
github.com/aws/aws-sdk-go v1.51.25 h1:DjTT8mtmsachhV6yrXR8+yhnG6120dazr720nopRsls=
github.com/aws/aws-sdk-go v1.51.25/go.mod h1:LF8svs817+Nz+DmiMQKTO3ubZ/6IaTpq3TjupRn3Eqk=
github.com/aws/karpenter-provider-aws/tools/kompat v0.0.0-20240410220356-6b868db24881 h1:m9rhsGhdepdQV96tZgfy68oU75AWAjOH8u65OefTjwA=
github.com/aws/karpenter-provider-aws/tools/kompat v0.0.0-20240410220356-6b868db24881/go.mod h1:+Mk5k0b6HpKobxNq+B56DOhZ+I/NiPhd5MIBhQMSTSs=
github.com/awslabs/amazon-eks-ami/nodeadm v0.0.0-20240229193347-cfab22a10647 h1:8yRBVsjGmI7qQsPWtIrbWP+XfwHO9Wq7gdLVzjqiZFs=
Expand Down Expand Up @@ -272,8 +272,8 @@ github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N
github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY=
github.com/onsi/ginkgo/v2 v2.17.1 h1:V++EzdbhI4ZV4ev0UTIj0PzhzOcReJFyJaLjtSF55M8=
github.com/onsi/ginkgo/v2 v2.17.1/go.mod h1:llBI3WDLL9Z6taip6f33H76YcWtJv+7R3HigUjbIBOs=
github.com/onsi/gomega v1.32.0 h1:JRYU78fJ1LPxlckP6Txi/EYqJvjtMrDC04/MM5XRHPk=
github.com/onsi/gomega v1.32.0/go.mod h1:a4x4gW6Pz2yK1MAmvluYme5lvYTn61afQ2ETw/8n4Lg=
github.com/onsi/gomega v1.33.0 h1:snPCflnZrpMsy94p4lXVEkHo12lmPnc3vY5XBbreexE=
github.com/onsi/gomega v1.33.0/go.mod h1:+925n5YtiFsLzzafLUHzVMBpvvRAzrydIBiSIxjX3wY=
github.com/patrickmn/go-cache v2.1.0+incompatible h1:HRMgzkcYKYpi3C8ajMPV8OFXaaRUnok+kx1WdO15EQc=
github.com/patrickmn/go-cache v2.1.0+incompatible/go.mod h1:3Qf8kWWT7OJRJbdiICTKqZju1ZixQ/KpMGzzAfe6+WQ=
github.com/pelletier/go-toml/v2 v2.2.1 h1:9TA9+T8+8CUCO2+WYnDLCgrYi9+omqKXyjDtosvtEhg=
Expand Down
9 changes: 2 additions & 7 deletions pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -168,12 +168,6 @@ spec:
format: int64
type: integer
volumeSize:
allOf:
- pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
- pattern: ^((?:[1-9][0-9]{0,3}|[1-4][0-9]{4}|[5][0-8][0-9]{3}|59000)Gi|(?:[1-9][0-9]{0,3}|[1-5][0-9]{4}|[6][0-3][0-9]{3}|64000)G|([1-9]||[1-5][0-7]|58)Ti|([1-9]||[1-5][0-9]|6[0-3]|64)T)$
anyOf:
- type: integer
- type: string
description: |-
VolumeSize in `Gi`, `G`, `Ti`, or `T`. You must specify either a snapshot ID or
a volume size. The following are the supported volumes sizes for each volume
Expand All @@ -190,7 +184,8 @@ spec:
* standard: 1-1,024
x-kubernetes-int-or-string: true
pattern: ^((?:[1-9][0-9]{0,3}|[1-4][0-9]{4}|[5][0-8][0-9]{3}|59000)Gi|(?:[1-9][0-9]{0,3}|[1-5][0-9]{4}|[6][0-3][0-9]{3}|64000)G|([1-9]||[1-5][0-7]|58)Ti|([1-9]||[1-5][0-9]|6[0-3]|64)T)$
type: string
volumeType:
description: |-
VolumeType of the block device.
Expand Down
3 changes: 2 additions & 1 deletion pkg/apis/v1beta1/ec2nodeclass.go
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,8 @@ type BlockDevice struct {
// + TODO: Add the CEL resources.quantity type after k8s 1.29
// + https://github.com/kubernetes/apiserver/commit/b137c256373aec1c5d5810afbabb8932a19ecd2a#diff-838176caa5882465c9d6061febd456397a3e2b40fb423ed36f0cabb1847ecb4dR190
// +kubebuilder:validation:Pattern:="^((?:[1-9][0-9]{0,3}|[1-4][0-9]{4}|[5][0-8][0-9]{3}|59000)Gi|(?:[1-9][0-9]{0,3}|[1-5][0-9]{4}|[6][0-3][0-9]{3}|64000)G|([1-9]||[1-5][0-7]|58)Ti|([1-9]||[1-5][0-9]|6[0-3]|64)T)$"
// +kubebuilder:validation:XIntOrString
// +kubebuilder:validation:Schemaless
// +kubebuilder:validation:Type:=string
// +optional
VolumeSize *resource.Quantity `json:"volumeSize,omitempty" hash:"string"`
// VolumeType of the block device.
Expand Down
14 changes: 5 additions & 9 deletions pkg/cloudprovider/drift.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ func (c *CloudProvider) isNodeClassDrifted(ctx context.Context, nodeClaim *corev
if err != nil {
return "", fmt.Errorf("calculating ami drift, %w", err)
}
securitygroupDrifted, err := c.areSecurityGroupsDrifted(ctx, instance, nodeClass)
securitygroupDrifted, err := c.areSecurityGroupsDrifted(instance, nodeClass)
if err != nil {
return "", fmt.Errorf("calculating securitygroup drift, %w", err)
}
Expand Down Expand Up @@ -83,7 +83,7 @@ func (c *CloudProvider) isAMIDrifted(ctx context.Context, nodeClaim *corev1beta1
if len(nodeClass.Status.AMIs) == 0 {
return "", fmt.Errorf("no amis exist given constraints")
}
mappedAMIs := amifamily.MapToInstanceTypes([]*cloudprovider.InstanceType{nodeInstanceType}, nodeClass.Status.AMIs)
mappedAMIs := amifamily.MapToInstanceTypes(nodeClass.Status.AMIs, []*cloudprovider.InstanceType{nodeInstanceType})
if !lo.Contains(lo.Keys(mappedAMIs), instance.ImageID) {
return AMIDrift, nil
}
Expand Down Expand Up @@ -114,14 +114,10 @@ func (c *CloudProvider) isSubnetDrifted(ctx context.Context, instance *instance.

// Checks if the security groups are drifted, by comparing the security groups returned from the SecurityGroupProvider
// to the ec2 instance security groups
func (c *CloudProvider) areSecurityGroupsDrifted(ctx context.Context, ec2Instance *instance.Instance, nodeClass *v1beta1.EC2NodeClass) (cloudprovider.DriftReason, error) {
securitygroup, err := c.securityGroupProvider.List(ctx, nodeClass)
if err != nil {
return "", err
}
securityGroupIds := sets.New(lo.Map(securitygroup, func(sg *ec2.SecurityGroup, _ int) string { return aws.StringValue(sg.GroupId) })...)
func (c *CloudProvider) areSecurityGroupsDrifted(ec2Instance *instance.Instance, nodeClass *v1beta1.EC2NodeClass) (cloudprovider.DriftReason, error) {
securityGroupIds := sets.New(lo.Map(nodeClass.Status.SecurityGroups, func(sg v1beta1.SecurityGroup, _ int) string { return sg.ID })...)
if len(securityGroupIds) == 0 {
return "", fmt.Errorf("no security groups are discovered")
return "", fmt.Errorf("no security groups are present in the status")
}

if !securityGroupIds.Equal(sets.New(ec2Instance.SecurityGroupIDs...)) {
Expand Down
Loading

0 comments on commit f220c7d

Please sign in to comment.