Skip to content

Commit

Permalink
Merge pull request #5 from gitpod-io/kylos101/tolerate-duplicate-subnets
Browse files Browse the repository at this point in the history
Tolerate two subnets and Introduce Cleanup command
  • Loading branch information
kylos101 authored Jul 17, 2024
2 parents ab3e7cf + b9ffb31 commit 9106ae5
Show file tree
Hide file tree
Showing 5 changed files with 262 additions and 85 deletions.
19 changes: 17 additions & 2 deletions gitpod-network-check/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ A CLI to check if your network setup is suitable for the installation of Gitpod.
```

2. Set up AWS credentials

`gitpod-network-check` needs access to the AWS account you are planning to use to deploy Gitpod in. Much like AWS CLI, `gitpod-network-check` uses the available AWS profile in your terminal to authenticate against the account. This means that you can rely on any locally available [AWS profiles](https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-files.html) or just set the right environment variables in your terminal for the CLI to use:
```
export AWS_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLE
Expand All @@ -44,7 +44,7 @@ A CLI to check if your network setup is suitable for the installation of Gitpod.
To start the diagnosis, the the command: `./gitpod-network-check diagnose`

```
```console
./gitpod-network-check diagnose
INFO[0000] ✅ Main Subnets are valid
INFO[0000] ✅ Pod Subnets are valid
Expand Down Expand Up @@ -77,3 +77,18 @@ A CLI to check if your network setup is suitable for the installation of Gitpod.
INFO[0191] ✅ S3 is available
```

3. Clean up after network diagnosis

Dianosis is designed to do clean-up before it finishes. However, if the process terminates unexpectedly, you may clean-up AWS resources it creates like so:

```console
./gitpod-network-check clean
INFO[0000] ✅ Main Subnets are valid
INFO[0000] ✅ Pod Subnets are valid
INFO[0000] ✅ Instances terminated
INFO[0000] Cleaning up: Waiting for 2 minutes so network interfaces are deleted
INFO[0121] ✅ Role 'GitpodNetworkCheck' deleted
INFO[0121] ✅ Instance profile deleted
INFO[0122] ✅ Security group 'sg-0a6119dcb6a564fc1' deleted
INFO[0122] ✅ Security group 'sg-07373362953212e54' deleted
```
124 changes: 41 additions & 83 deletions gitpod-network-check/cmd/checks.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@ import (
"encoding/base64"
"errors"
"fmt"
"slices"
"sort"
"strings"
"time"

"github.com/aws/aws-sdk-go-v2/aws"
"github.com/aws/aws-sdk-go-v2/config"
"github.com/aws/aws-sdk-go-v2/service/ec2"
"github.com/aws/aws-sdk-go-v2/service/ec2/types"
"github.com/aws/aws-sdk-go-v2/service/iam"
Expand All @@ -23,28 +23,6 @@ import (
"k8s.io/apimachinery/pkg/util/wait"
)

const gitpodRoleName = "GitpodNetworkCheck"
const gitpodInstanceProfile = "GitpodNetworkCheck"

var networkCheckTag = []iam_types.Tag{
{
Key: aws.String("gitpod.io/network-check"),
Value: aws.String("true"),
},
}

func initAwsConfig(ctx context.Context, region string) (aws.Config, error) {
return config.LoadDefaultConfig(ctx, config.WithRegion(region))
}

// this will be useful when we are cleaning up things at the end
var (
InstanceIds []string
SecurityGroups []string
Roles []string
InstanceProfile string
)

var checkCommand = &cobra.Command{ // nolint:gochecknoglobals
PersistentPreRunE: validateSubnets,
Use: "diagnose",
Expand All @@ -71,13 +49,21 @@ var checkCommand = &cobra.Command{ // nolint:gochecknoglobals
return fmt.Errorf("❌ error creating IAM role and attaching policy: %v", err)
}
Roles = append(Roles, *role.RoleName)
log.Info("✅ IAM role created and policy attached")

instanceProfile, err := createInstanceProfileAndAttachRole(cmd.Context(), iamClient, *role.RoleName)
if err != nil {
return fmt.Errorf("❌ failed to create instance profile: %v", err)
}
InstanceProfile = aws.ToString(instanceProfile.InstanceProfileName)

allSubnets := slices.Concat(networkConfig.MainSubnets, networkConfig.PodSubnets)
slices.Sort(allSubnets)
distinctSubnets := slices.Compact(allSubnets)
if len(distinctSubnets) < len(allSubnets) {
log.Infof("ℹ️ Found duplicate subnets. We'll test each subnet '%v' only once.", distinctSubnets)
}

log.Infof("ℹ️ Launching EC2 instances in Main subnets")
mainInstanceIds, err := launchInstances(cmd.Context(), ec2Client, networkConfig.MainSubnets, instanceProfile.Arn)
if err != nil {
Expand Down Expand Up @@ -270,9 +256,13 @@ func validateSubnets(cmd *cobra.Command, args []string) error {
func launchInstances(ctx context.Context, ec2Client *ec2.Client, subnets []string, profileArn *string) ([]string, error) {
var instanceIds []string
for _, subnet := range subnets {
if _, ok := Subnets[subnet]; ok {
log.Warnf("An EC2 instance was already created for subnet '%v', skipping", subnet)
continue
}
secGroup, err := createSecurityGroups(ctx, ec2Client, subnet)
if err != nil {
return nil, fmt.Errorf("❌ failed to create security group: %v", err)
return nil, fmt.Errorf("❌ failed to create security group for subnet '%v': %v", subnet, err)
}
SecurityGroups = append(SecurityGroups, secGroup)
instanceId, err := launchInstanceInSubnet(ctx, ec2Client, subnet, secGroup, profileArn)
Expand All @@ -281,6 +271,10 @@ func launchInstances(ctx context.Context, ec2Client *ec2.Client, subnets []strin
}

instanceIds = append(instanceIds, instanceId)
if Subnets == nil {
Subnets = make(map[string]bool)
}
Subnets[subnet] = true
}

return instanceIds, nil
Expand Down Expand Up @@ -312,6 +306,17 @@ func launchInstanceInSubnet(ctx context.Context, ec2Client *ec2.Client, subnetID
IamInstanceProfile: &types.IamInstanceProfileSpecification{
Arn: instanceProfileName,
},
TagSpecifications: []types.TagSpecification{
{
ResourceType: types.ResourceTypeInstance,
Tags: []types.Tag{
{
Key: aws.String("gitpod.io/network-check"),
Value: aws.String("true"),
},
},
},
},
}

var result *ec2.RunInstancesOutput
Expand Down Expand Up @@ -450,6 +455,17 @@ func createSecurityGroups(ctx context.Context, svc *ec2.Client, subnetID string)
Description: aws.String("EC2 security group allowing all HTTPS outgoing traffic"),
GroupName: aws.String(fmt.Sprintf("EC2-security-group-nc-%s", subnetID)),
VpcId: vpcID,
TagSpecifications: []types.TagSpecification{
{
ResourceType: types.ResourceTypeSecurityGroup,
Tags: []types.Tag{
{
Key: aws.String("gitpod.io/network-check"),
Value: aws.String("true"),
},
},
},
},
}

createSGOutput, err := svc.CreateSecurityGroup(ctx, createSGInput)
Expand All @@ -458,7 +474,7 @@ func createSecurityGroups(ctx context.Context, svc *ec2.Client, subnetID string)
}

sgID := createSGOutput.GroupId
log.Infof("ℹ️ Created security group with ID: %s", *sgID)
log.Infof("ℹ️ Created security group with ID: %s", *sgID)

// Authorize HTTPS outbound traffic
authorizeEgressInput := &ec2.AuthorizeSecurityGroupEgressInput{
Expand Down Expand Up @@ -486,64 +502,6 @@ func createSecurityGroups(ctx context.Context, svc *ec2.Client, subnetID string)
return *sgID, nil
}

func cleanup(ctx context.Context, svc *ec2.Client, iamsvc *iam.Client) {
if len(InstanceIds) > 0 {
_, err := svc.TerminateInstances(ctx, &ec2.TerminateInstancesInput{
InstanceIds: InstanceIds,
})
if err != nil {
log.WithError(err).WithField("instanceIds", InstanceIds).Warnf("Failed to cleanup instances, please cleanup manually")
}
}
if len(Roles) > 0 {
for _, role := range Roles {
_, err := iamsvc.DetachRolePolicy(ctx, &iam.DetachRolePolicyInput{PolicyArn: aws.String("arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore"), RoleName: aws.String(role)})
if err != nil {
log.WithError(err).WithField("rolename", role).Warnf("Failed to cleanup role, please cleanup manually")
}

_, err = iamsvc.RemoveRoleFromInstanceProfile(ctx, &iam.RemoveRoleFromInstanceProfileInput{
RoleName: aws.String(role),
InstanceProfileName: aws.String(InstanceProfile),
})
if err != nil {
log.WithError(err).WithField("roleName", role).WithField("profileName", InstanceProfile).Warnf("Failed to remove role from instance profile")
}

_, err = iamsvc.DeleteRole(ctx, &iam.DeleteRoleInput{RoleName: aws.String(role)})
if err != nil {
log.WithError(err).WithField("rolename", role).Warnf("Failed to cleanup role, please cleanup manaullay")
}
}

_, err := iamsvc.DeleteInstanceProfile(ctx, &iam.DeleteInstanceProfileInput{
InstanceProfileName: aws.String(InstanceProfile),
})

if err != nil {
log.WithError(err).WithField("instanceProfile", InstanceProfile).Warnf("Failed to clean up instance profile, please cleanup manually")
}
}

log.Info("Cleaning up: Waiting for 1 minute so network interfaces are deleted")
time.Sleep(time.Minute)

if len(SecurityGroups) > 0 {
for _, sg := range SecurityGroups {
deleteSGInput := &ec2.DeleteSecurityGroupInput{
GroupId: aws.String(sg),
}

_, err := svc.DeleteSecurityGroup(ctx, deleteSGInput)
if err != nil {
log.WithError(err).WithField("securityGroup", sg).Warnf("Failed to clean up security group, please cleanup manually")
}

}

}
}

func createIAMRoleAndAttachPolicy(ctx context.Context, svc *iam.Client) (*iam_types.Role, error) {
// Define the trust relationship
trustPolicy := `{
Expand Down
26 changes: 26 additions & 0 deletions gitpod-network-check/cmd/cleanup.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package cmd

import (
"github.com/aws/aws-sdk-go-v2/service/ec2"
"github.com/aws/aws-sdk-go-v2/service/iam"
"github.com/spf13/cobra"
)

var cleanCommand = &cobra.Command{ // nolint:gochecknoglobals
PersistentPreRunE: validateSubnets,
Use: "clean",
Short: "Explicitly cleans up after the network check diagnosis",
SilenceUsage: false,
RunE: func(cmd *cobra.Command, args []string) error {
cfg, err := initAwsConfig(cmd.Context(), networkConfig.AwsRegion)
if err != nil {
return err
}

ec2Client := ec2.NewFromConfig(cfg)
iamClient := iam.NewFromConfig(cfg)

cleanup(cmd.Context(), ec2Client, iamClient)
return nil
},
}
Loading

0 comments on commit 9106ae5

Please sign in to comment.