diff --git a/gitpod-network-check/README.md b/gitpod-network-check/README.md index 3b30524..2df3015 100644 --- a/gitpod-network-check/README.md +++ b/gitpod-network-check/README.md @@ -17,7 +17,7 @@ A CLI to check if your network setup is suitable for the installation of Gitpod. ``` 2. Set up AWS credentials - + `gitpod-network-check` needs access to the AWS account you are planning to use to deploy Gitpod in. Much like AWS CLI, `gitpod-network-check` uses the available AWS profile in your terminal to authenticate against the account. This means that you can rely on any locally available [AWS profiles](https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-files.html) or just set the right environment variables in your terminal for the CLI to use: ``` export AWS_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLE @@ -44,7 +44,7 @@ A CLI to check if your network setup is suitable for the installation of Gitpod. To start the diagnosis, the the command: `./gitpod-network-check diagnose` - ``` + ```console ./gitpod-network-check diagnose INFO[0000] ✅ Main Subnets are valid INFO[0000] ✅ Pod Subnets are valid @@ -77,3 +77,18 @@ A CLI to check if your network setup is suitable for the installation of Gitpod. INFO[0191] ✅ S3 is available ``` +3. Clean up after network diagnosis + + Dianosis is designed to do clean-up before it finishes. However, if the process terminates unexpectedly, you may clean-up AWS resources it creates like so: + + ```console + ./gitpod-network-check clean + INFO[0000] ✅ Main Subnets are valid + INFO[0000] ✅ Pod Subnets are valid + INFO[0000] ✅ Instances terminated + INFO[0000] Cleaning up: Waiting for 2 minutes so network interfaces are deleted + INFO[0121] ✅ Role 'GitpodNetworkCheck' deleted + INFO[0121] ✅ Instance profile deleted + INFO[0122] ✅ Security group 'sg-0a6119dcb6a564fc1' deleted + INFO[0122] ✅ Security group 'sg-07373362953212e54' deleted + ``` diff --git a/gitpod-network-check/cmd/checks.go b/gitpod-network-check/cmd/checks.go index 734668b..d9c4129 100644 --- a/gitpod-network-check/cmd/checks.go +++ b/gitpod-network-check/cmd/checks.go @@ -5,12 +5,12 @@ import ( "encoding/base64" "errors" "fmt" + "slices" "sort" "strings" "time" "github.com/aws/aws-sdk-go-v2/aws" - "github.com/aws/aws-sdk-go-v2/config" "github.com/aws/aws-sdk-go-v2/service/ec2" "github.com/aws/aws-sdk-go-v2/service/ec2/types" "github.com/aws/aws-sdk-go-v2/service/iam" @@ -23,28 +23,6 @@ import ( "k8s.io/apimachinery/pkg/util/wait" ) -const gitpodRoleName = "GitpodNetworkCheck" -const gitpodInstanceProfile = "GitpodNetworkCheck" - -var networkCheckTag = []iam_types.Tag{ - { - Key: aws.String("gitpod.io/network-check"), - Value: aws.String("true"), - }, -} - -func initAwsConfig(ctx context.Context, region string) (aws.Config, error) { - return config.LoadDefaultConfig(ctx, config.WithRegion(region)) -} - -// this will be useful when we are cleaning up things at the end -var ( - InstanceIds []string - SecurityGroups []string - Roles []string - InstanceProfile string -) - var checkCommand = &cobra.Command{ // nolint:gochecknoglobals PersistentPreRunE: validateSubnets, Use: "diagnose", @@ -71,6 +49,7 @@ var checkCommand = &cobra.Command{ // nolint:gochecknoglobals return fmt.Errorf("❌ error creating IAM role and attaching policy: %v", err) } Roles = append(Roles, *role.RoleName) + log.Info("✅ IAM role created and policy attached") instanceProfile, err := createInstanceProfileAndAttachRole(cmd.Context(), iamClient, *role.RoleName) if err != nil { @@ -78,6 +57,13 @@ var checkCommand = &cobra.Command{ // nolint:gochecknoglobals } InstanceProfile = aws.ToString(instanceProfile.InstanceProfileName) + allSubnets := slices.Concat(networkConfig.MainSubnets, networkConfig.PodSubnets) + slices.Sort(allSubnets) + distinctSubnets := slices.Compact(allSubnets) + if len(distinctSubnets) < len(allSubnets) { + log.Infof("ℹ️ Found duplicate subnets. We'll test each subnet '%v' only once.", distinctSubnets) + } + log.Infof("ℹ️ Launching EC2 instances in Main subnets") mainInstanceIds, err := launchInstances(cmd.Context(), ec2Client, networkConfig.MainSubnets, instanceProfile.Arn) if err != nil { @@ -270,9 +256,13 @@ func validateSubnets(cmd *cobra.Command, args []string) error { func launchInstances(ctx context.Context, ec2Client *ec2.Client, subnets []string, profileArn *string) ([]string, error) { var instanceIds []string for _, subnet := range subnets { + if _, ok := Subnets[subnet]; ok { + log.Warnf("An EC2 instance was already created for subnet '%v', skipping", subnet) + continue + } secGroup, err := createSecurityGroups(ctx, ec2Client, subnet) if err != nil { - return nil, fmt.Errorf("❌ failed to create security group: %v", err) + return nil, fmt.Errorf("❌ failed to create security group for subnet '%v': %v", subnet, err) } SecurityGroups = append(SecurityGroups, secGroup) instanceId, err := launchInstanceInSubnet(ctx, ec2Client, subnet, secGroup, profileArn) @@ -281,6 +271,10 @@ func launchInstances(ctx context.Context, ec2Client *ec2.Client, subnets []strin } instanceIds = append(instanceIds, instanceId) + if Subnets == nil { + Subnets = make(map[string]bool) + } + Subnets[subnet] = true } return instanceIds, nil @@ -312,6 +306,17 @@ func launchInstanceInSubnet(ctx context.Context, ec2Client *ec2.Client, subnetID IamInstanceProfile: &types.IamInstanceProfileSpecification{ Arn: instanceProfileName, }, + TagSpecifications: []types.TagSpecification{ + { + ResourceType: types.ResourceTypeInstance, + Tags: []types.Tag{ + { + Key: aws.String("gitpod.io/network-check"), + Value: aws.String("true"), + }, + }, + }, + }, } var result *ec2.RunInstancesOutput @@ -450,6 +455,17 @@ func createSecurityGroups(ctx context.Context, svc *ec2.Client, subnetID string) Description: aws.String("EC2 security group allowing all HTTPS outgoing traffic"), GroupName: aws.String(fmt.Sprintf("EC2-security-group-nc-%s", subnetID)), VpcId: vpcID, + TagSpecifications: []types.TagSpecification{ + { + ResourceType: types.ResourceTypeSecurityGroup, + Tags: []types.Tag{ + { + Key: aws.String("gitpod.io/network-check"), + Value: aws.String("true"), + }, + }, + }, + }, } createSGOutput, err := svc.CreateSecurityGroup(ctx, createSGInput) @@ -458,7 +474,7 @@ func createSecurityGroups(ctx context.Context, svc *ec2.Client, subnetID string) } sgID := createSGOutput.GroupId - log.Infof("ℹ️ Created security group with ID: %s", *sgID) + log.Infof("ℹ️ Created security group with ID: %s", *sgID) // Authorize HTTPS outbound traffic authorizeEgressInput := &ec2.AuthorizeSecurityGroupEgressInput{ @@ -486,64 +502,6 @@ func createSecurityGroups(ctx context.Context, svc *ec2.Client, subnetID string) return *sgID, nil } -func cleanup(ctx context.Context, svc *ec2.Client, iamsvc *iam.Client) { - if len(InstanceIds) > 0 { - _, err := svc.TerminateInstances(ctx, &ec2.TerminateInstancesInput{ - InstanceIds: InstanceIds, - }) - if err != nil { - log.WithError(err).WithField("instanceIds", InstanceIds).Warnf("Failed to cleanup instances, please cleanup manually") - } - } - if len(Roles) > 0 { - for _, role := range Roles { - _, err := iamsvc.DetachRolePolicy(ctx, &iam.DetachRolePolicyInput{PolicyArn: aws.String("arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore"), RoleName: aws.String(role)}) - if err != nil { - log.WithError(err).WithField("rolename", role).Warnf("Failed to cleanup role, please cleanup manually") - } - - _, err = iamsvc.RemoveRoleFromInstanceProfile(ctx, &iam.RemoveRoleFromInstanceProfileInput{ - RoleName: aws.String(role), - InstanceProfileName: aws.String(InstanceProfile), - }) - if err != nil { - log.WithError(err).WithField("roleName", role).WithField("profileName", InstanceProfile).Warnf("Failed to remove role from instance profile") - } - - _, err = iamsvc.DeleteRole(ctx, &iam.DeleteRoleInput{RoleName: aws.String(role)}) - if err != nil { - log.WithError(err).WithField("rolename", role).Warnf("Failed to cleanup role, please cleanup manaullay") - } - } - - _, err := iamsvc.DeleteInstanceProfile(ctx, &iam.DeleteInstanceProfileInput{ - InstanceProfileName: aws.String(InstanceProfile), - }) - - if err != nil { - log.WithError(err).WithField("instanceProfile", InstanceProfile).Warnf("Failed to clean up instance profile, please cleanup manually") - } - } - - log.Info("Cleaning up: Waiting for 1 minute so network interfaces are deleted") - time.Sleep(time.Minute) - - if len(SecurityGroups) > 0 { - for _, sg := range SecurityGroups { - deleteSGInput := &ec2.DeleteSecurityGroupInput{ - GroupId: aws.String(sg), - } - - _, err := svc.DeleteSecurityGroup(ctx, deleteSGInput) - if err != nil { - log.WithError(err).WithField("securityGroup", sg).Warnf("Failed to clean up security group, please cleanup manually") - } - - } - - } -} - func createIAMRoleAndAttachPolicy(ctx context.Context, svc *iam.Client) (*iam_types.Role, error) { // Define the trust relationship trustPolicy := `{ diff --git a/gitpod-network-check/cmd/cleanup.go b/gitpod-network-check/cmd/cleanup.go new file mode 100644 index 0000000..c6b04cb --- /dev/null +++ b/gitpod-network-check/cmd/cleanup.go @@ -0,0 +1,26 @@ +package cmd + +import ( + "github.com/aws/aws-sdk-go-v2/service/ec2" + "github.com/aws/aws-sdk-go-v2/service/iam" + "github.com/spf13/cobra" +) + +var cleanCommand = &cobra.Command{ // nolint:gochecknoglobals + PersistentPreRunE: validateSubnets, + Use: "clean", + Short: "Explicitly cleans up after the network check diagnosis", + SilenceUsage: false, + RunE: func(cmd *cobra.Command, args []string) error { + cfg, err := initAwsConfig(cmd.Context(), networkConfig.AwsRegion) + if err != nil { + return err + } + + ec2Client := ec2.NewFromConfig(cfg) + iamClient := iam.NewFromConfig(cfg) + + cleanup(cmd.Context(), ec2Client, iamClient) + return nil + }, +} diff --git a/gitpod-network-check/cmd/common.go b/gitpod-network-check/cmd/common.go new file mode 100644 index 0000000..0cc6190 --- /dev/null +++ b/gitpod-network-check/cmd/common.go @@ -0,0 +1,177 @@ +package cmd + +import ( + "context" + "time" + + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/config" + "github.com/aws/aws-sdk-go-v2/service/ec2" + "github.com/aws/aws-sdk-go-v2/service/ec2/types" + "github.com/aws/aws-sdk-go-v2/service/iam" + iam_types "github.com/aws/aws-sdk-go-v2/service/iam/types" + log "github.com/sirupsen/logrus" +) + +// this will be useful when we are cleaning up things at the end +var ( + InstanceIds []string + SecurityGroups []string + Roles []string + InstanceProfile string + Subnets map[string]bool +) + +const gitpodRoleName = "GitpodNetworkCheck" +const gitpodInstanceProfile = "GitpodNetworkCheck" + +var networkCheckTag = []iam_types.Tag{ + { + Key: aws.String("gitpod.io/network-check"), + Value: aws.String("true"), + }, +} + +func initAwsConfig(ctx context.Context, region string) (aws.Config, error) { + return config.LoadDefaultConfig(ctx, config.WithRegion(region)) +} + +func cleanup(ctx context.Context, svc *ec2.Client, iamsvc *iam.Client) { + if len(InstanceIds) == 0 { + instances, err := svc.DescribeInstances(ctx, &ec2.DescribeInstancesInput{ + Filters: []types.Filter{ + { + Name: aws.String("tag:gitpod.io/network-check"), + Values: []string{"true"}, + }, + { + Name: aws.String("instance-state-name"), + Values: []string{"pending", "running", "shutting-down", "stopping", "stopped"}, + }, + }, + }) + if err != nil { + log.WithError(err).Warn("Failed to list instances, please cleanup manually") + } else if len(instances.Reservations) == 0 { + log.Info("No instances found.") + } + + for _, r := range instances.Reservations { + for _, i := range r.Instances { + InstanceIds = append(InstanceIds, *i.InstanceId) + } + } + } + + if len(InstanceIds) > 0 { + _, err := svc.TerminateInstances(ctx, &ec2.TerminateInstancesInput{ + InstanceIds: InstanceIds, + }) + if err != nil { + log.WithError(err).WithField("instanceIds", InstanceIds).Warnf("Failed to cleanup instances, please cleanup manually") + } + + log.Info("✅ Instances terminated") + + log.Info("Cleaning up: Waiting for 2 minutes so network interfaces are deleted") + time.Sleep(2 * time.Minute) + } + + if len(Roles) == 0 { + paginator := iam.NewListInstanceProfilesPaginator(iamsvc, &iam.ListInstanceProfilesInput{}) + for paginator.HasMorePages() { + output, err := paginator.NextPage(ctx) + if err != nil { + log.WithError(err).Warn("Failed to list roles, please cleanup manually") + break + } + + for _, ip := range output.InstanceProfiles { + if *ip.InstanceProfileName == gitpodInstanceProfile { + { + InstanceProfile = *ip.InstanceProfileName + if len(ip.Roles) > 0 { + for _, role := range ip.Roles { + Roles = append(Roles, *role.RoleName) + } + } + } + } + } + } + if len(Roles) == 0 { + log.Info("No roles found.") + } + } + + if len(Roles) > 0 { + for _, role := range Roles { + _, err := iamsvc.DetachRolePolicy(ctx, &iam.DetachRolePolicyInput{PolicyArn: aws.String("arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore"), RoleName: aws.String(role)}) + if err != nil { + log.WithError(err).WithField("rolename", role).Warnf("Failed to cleanup role, please cleanup manually") + } + + _, err = iamsvc.RemoveRoleFromInstanceProfile(ctx, &iam.RemoveRoleFromInstanceProfileInput{ + RoleName: aws.String(role), + InstanceProfileName: aws.String(InstanceProfile), + }) + if err != nil { + log.WithError(err).WithField("roleName", role).WithField("profileName", InstanceProfile).Warnf("Failed to remove role from instance profile") + } + + _, err = iamsvc.DeleteRole(ctx, &iam.DeleteRoleInput{RoleName: aws.String(role)}) + if err != nil { + log.WithError(err).WithField("rolename", role).Warnf("Failed to cleanup role, please cleanup manaullay") + continue + } + + log.Infof("✅ Role '%v' deleted", role) + } + + _, err := iamsvc.DeleteInstanceProfile(ctx, &iam.DeleteInstanceProfileInput{ + InstanceProfileName: aws.String(InstanceProfile), + }) + + if err != nil { + log.WithError(err).WithField("instanceProfile", InstanceProfile).Warnf("Failed to clean up instance profile, please cleanup manually") + } + + log.Info("✅ Instance profile deleted") + } + + if len(SecurityGroups) == 0 { + securityGroups, err := svc.DescribeSecurityGroups(ctx, &ec2.DescribeSecurityGroupsInput{ + Filters: []types.Filter{ + { + Name: aws.String("tag:gitpod.io/network-check"), + Values: []string{"true"}, + }, + }, + }) + + if err != nil { + log.WithError(err).Error("Failed to list security groups, please cleanup manually") + } else if len(securityGroups.SecurityGroups) == 0 { + log.Info("No security groups found.") + } + + for _, sg := range securityGroups.SecurityGroups { + SecurityGroups = append(SecurityGroups, *sg.GroupId) + } + } + + if len(SecurityGroups) > 0 { + for _, sg := range SecurityGroups { + deleteSGInput := &ec2.DeleteSecurityGroupInput{ + GroupId: aws.String(sg), + } + + _, err := svc.DeleteSecurityGroup(ctx, deleteSGInput) + if err != nil { + log.WithError(err).WithField("securityGroup", sg).Warnf("Failed to clean up security group, please cleanup manually") + continue + } + log.Infof("✅ Security group '%v' deleted", sg) + } + } +} diff --git a/gitpod-network-check/cmd/root.go b/gitpod-network-check/cmd/root.go index 7e7f4b2..4207956 100644 --- a/gitpod-network-check/cmd/root.go +++ b/gitpod-network-check/cmd/root.go @@ -126,5 +126,6 @@ func readConfigFile() *viper.Viper { func Execute() error { networkCheckCmd.AddCommand(checkCommand) + networkCheckCmd.AddCommand(cleanCommand) return networkCheckCmd.Execute() }