Skip to content

Commit

Permalink
Merge pull request #4092 from cnmcavoy/cnmcavoy/speedy-provision-and-…
Browse files Browse the repository at this point in the history
…termination

Modify AWSMachine reconciliation behavior to terminate and create instances without blocking
  • Loading branch information
k8s-ci-robot authored Feb 28, 2023
2 parents 64827bf + f5d1d45 commit 8d64bc6
Show file tree
Hide file tree
Showing 7 changed files with 40 additions and 94 deletions.
20 changes: 13 additions & 7 deletions controllers/awscluster_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ func TestAWSClusterReconcilerIntegrationTests(t *testing.T) {
mockedDeleteVPCCallsForNonExistentVPC(m)
mockedDeleteLBCalls(true, ev2, e)
mockedDescribeInstanceCall(m)
mockedDeleteInstanceCalls(m)
mockedDeleteInstanceAndAwaitTerminationCalls(m)
}
expect(ec2Mock.EXPECT(), elbv2Mock.EXPECT(), elbMock.EXPECT())

Expand Down Expand Up @@ -347,7 +347,7 @@ func TestAWSClusterReconcilerIntegrationTests(t *testing.T) {
mockedDeleteVPCCalls(m)
mockedDescribeInstanceCall(m)
mockedDeleteLBCalls(true, ev2, e)
mockedDeleteInstanceCalls(m)
mockedDeleteInstanceAndAwaitTerminationCalls(m)
mockedDeleteSGCalls(m)
}
expect(ec2Mock.EXPECT(), elbv2Mock.EXPECT(), elbMock.EXPECT())
Expand Down Expand Up @@ -497,19 +497,25 @@ func mockedDescribeInstanceCall(m *mocks.MockEC2APIMockRecorder) {
}, nil)
}

func mockedDeleteInstanceCalls(m *mocks.MockEC2APIMockRecorder) {
func mockedDeleteInstanceAndAwaitTerminationCalls(m *mocks.MockEC2APIMockRecorder) {
m.TerminateInstances(
gomock.Eq(&ec2.TerminateInstancesInput{
InstanceIds: aws.StringSlice([]string{"id-1"}),
}),
).
Return(nil, nil)
).Return(nil, nil)
m.WaitUntilInstanceTerminated(
gomock.Eq(&ec2.DescribeInstancesInput{
InstanceIds: aws.StringSlice([]string{"id-1"}),
}),
).
Return(nil)
).Return(nil)
}

func mockedDeleteInstanceCalls(m *mocks.MockEC2APIMockRecorder) {
m.TerminateInstances(
gomock.Eq(&ec2.TerminateInstancesInput{
InstanceIds: aws.StringSlice([]string{"id-1"}),
}),
).Return(nil, nil)
}

func mockedCreateVPCCalls(m *mocks.MockEC2APIMockRecorder) {
Expand Down
19 changes: 12 additions & 7 deletions controllers/awsmachine_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"context"
"encoding/json"
"fmt"
"time"

"github.com/aws/aws-sdk-go/aws"
ignTypes "github.com/flatcar/ignition/config/v2_3/types"
Expand Down Expand Up @@ -340,8 +341,14 @@ func (r *AWSMachineReconciler) reconcileDelete(machineScope *scope.MachineScope,
// This decision is based on the ec2-instance-lifecycle graph at
// https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-lifecycle.html
switch instance.State {
case infrav1.InstanceStateShuttingDown, infrav1.InstanceStateTerminated:
case infrav1.InstanceStateShuttingDown:
machineScope.Info("EC2 instance is shutting down or already terminated", "instance-id", instance.ID)
// requeue reconciliation until we observe termination (or the instance can no longer be looked up)
return ctrl.Result{RequeueAfter: time.Minute}, nil
case infrav1.InstanceStateTerminated:
machineScope.Info("EC2 instance terminated successfully", "instance-id", instance.ID)
controllerutil.RemoveFinalizer(machineScope.AWSMachine, infrav1.MachineFinalizer)
return ctrl.Result{}, nil
default:
machineScope.Info("Terminating EC2 instance", "instance-id", instance.ID)

Expand All @@ -352,7 +359,7 @@ func (r *AWSMachineReconciler) reconcileDelete(machineScope *scope.MachineScope,
return ctrl.Result{}, err
}

if err := ec2Service.TerminateInstanceAndWait(instance.ID); err != nil {
if err := ec2Service.TerminateInstance(instance.ID); err != nil {
machineScope.Error(err, "failed to terminate instance")
conditions.MarkFalse(machineScope.AWSMachine, infrav1.InstanceReadyCondition, "DeletingFailed", clusterv1.ConditionSeverityWarning, err.Error())
r.Recorder.Eventf(machineScope.AWSMachine, corev1.EventTypeWarning, "FailedTerminate", "Failed to terminate instance %q: %v", instance.ID, err)
Expand Down Expand Up @@ -391,12 +398,10 @@ func (r *AWSMachineReconciler) reconcileDelete(machineScope *scope.MachineScope,

machineScope.Info("EC2 instance successfully terminated", "instance-id", instance.ID)
r.Recorder.Eventf(machineScope.AWSMachine, corev1.EventTypeNormal, "SuccessfulTerminate", "Terminated instance %q", instance.ID)
}

// Instance is deleted so remove the finalizer.
controllerutil.RemoveFinalizer(machineScope.AWSMachine, infrav1.MachineFinalizer)

return ctrl.Result{}, nil
// requeue reconciliation until we observe termination (or the instance can no longer be looked up)
return ctrl.Result{RequeueAfter: time.Minute}, nil
}
}

// findInstance queries the EC2 apis and retrieves the instance if it exists.
Expand Down
2 changes: 0 additions & 2 deletions controllers/awsmachine_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -581,8 +581,6 @@ func mockedCreateInstanceCalls(m *mocks.MockEC2APIMockRecorder) {
},
},
}, nil)
m.WaitUntilInstanceRunningWithContext(gomock.Any(), gomock.Any(), gomock.Any()).
Return(nil)
m.DescribeNetworkInterfaces(gomock.Eq(&ec2.DescribeNetworkInterfacesInput{Filters: []*ec2.Filter{
{
Name: aws.String("attachment.instance-id"),
Expand Down
30 changes: 15 additions & 15 deletions controllers/awsmachine_controller_unit_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -391,6 +391,7 @@ func TestAWSMachineReconciler(t *testing.T) {
secretSvc.EXPECT().UserData(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil, nil).Times(1)
instance.State = infrav1.InstanceStatePending
_, _ = reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs, cs)

g.Expect(ms.AWSMachine.Status.InstanceState).To(PointTo(Equal(infrav1.InstanceStatePending)))
g.Expect(ms.AWSMachine.Status.Ready).To(Equal(false))
g.Expect(buf.String()).To(ContainSubstring(("EC2 instance state changed")))
Expand All @@ -410,6 +411,7 @@ func TestAWSMachineReconciler(t *testing.T) {
secretSvc.EXPECT().UserData(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil, nil).Times(1)
instance.State = infrav1.InstanceStateRunning
_, _ = reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs, cs)

g.Expect(ms.AWSMachine.Status.InstanceState).To(PointTo(Equal(infrav1.InstanceStateRunning)))
g.Expect(ms.AWSMachine.Status.Ready).To(Equal(true))
g.Expect(buf.String()).To(ContainSubstring(("EC2 instance state changed")))
Expand Down Expand Up @@ -1081,7 +1083,7 @@ func TestAWSMachineReconciler(t *testing.T) {

instance.State = infrav1.InstanceStateRunning
secretSvc.EXPECT().Delete(gomock.Any()).Return(nil).Times(1)
ec2Svc.EXPECT().TerminateInstanceAndWait(gomock.Any()).Return(nil).AnyTimes()
ec2Svc.EXPECT().TerminateInstance(gomock.Any()).Return(nil).AnyTimes()
_, _ = reconciler.reconcileDelete(ms, cs, cs, cs, cs)
})

Expand All @@ -1094,7 +1096,7 @@ func TestAWSMachineReconciler(t *testing.T) {

ms.AWSMachine.Status.FailureReason = capierrors.MachineStatusErrorPtr(capierrors.UpdateMachineError)
secretSvc.EXPECT().Delete(gomock.Any()).Return(nil).Times(1)
ec2Svc.EXPECT().TerminateInstanceAndWait(gomock.Any()).Return(nil).AnyTimes()
ec2Svc.EXPECT().TerminateInstance(gomock.Any()).Return(nil).AnyTimes()
_, _ = reconciler.reconcileDelete(ms, cs, cs, cs, cs)
})
t.Run("should not attempt to delete the secret if InsecureSkipSecretsManager is set on CloudInit", func(t *testing.T) {
Expand All @@ -1107,7 +1109,7 @@ func TestAWSMachineReconciler(t *testing.T) {
ms.AWSMachine.Spec.CloudInit.InsecureSkipSecretsManager = true

secretSvc.EXPECT().Delete(gomock.Any()).Return(nil).Times(0)
ec2Svc.EXPECT().TerminateInstanceAndWait(gomock.Any()).Return(nil).AnyTimes()
ec2Svc.EXPECT().TerminateInstance(gomock.Any()).Return(nil).AnyTimes()

_, _ = reconciler.reconcileDelete(ms, cs, cs, cs, cs)
})
Expand Down Expand Up @@ -1167,7 +1169,7 @@ func TestAWSMachineReconciler(t *testing.T) {

instance.State = infrav1.InstanceStateRunning
secretSvc.EXPECT().Delete(gomock.Any()).Return(nil).Times(1)
ec2Svc.EXPECT().TerminateInstanceAndWait(gomock.Any()).Return(nil).AnyTimes()
ec2Svc.EXPECT().TerminateInstance(gomock.Any()).Return(nil).AnyTimes()
_, _ = reconciler.reconcileDelete(ms, cs, cs, cs, cs)
})

Expand All @@ -1180,7 +1182,7 @@ func TestAWSMachineReconciler(t *testing.T) {

ms.AWSMachine.Status.FailureReason = capierrors.MachineStatusErrorPtr(capierrors.UpdateMachineError)
secretSvc.EXPECT().Delete(gomock.Any()).Return(nil).Times(1)
ec2Svc.EXPECT().TerminateInstanceAndWait(gomock.Any()).Return(nil).AnyTimes()
ec2Svc.EXPECT().TerminateInstance(gomock.Any()).Return(nil).AnyTimes()
_, _ = reconciler.reconcileDelete(ms, cs, cs, cs, cs)
})
})
Expand Down Expand Up @@ -1348,7 +1350,7 @@ func TestAWSMachineReconciler(t *testing.T) {

instance.State = infrav1.InstanceStateRunning
objectStoreSvc.EXPECT().Delete(gomock.Any()).Return(nil).Times(1)
ec2Svc.EXPECT().TerminateInstanceAndWait(gomock.Any()).Return(nil).AnyTimes()
ec2Svc.EXPECT().TerminateInstance(gomock.Any()).Return(nil).AnyTimes()

_, _ = reconciler.reconcileDelete(ms, cs, cs, cs, cs)
})
Expand All @@ -1365,7 +1367,7 @@ func TestAWSMachineReconciler(t *testing.T) {
ms.AWSMachine.Status.FailureReason = capierrors.MachineStatusErrorPtr(capierrors.UpdateMachineError)

objectStoreSvc.EXPECT().Delete(gomock.Any()).Return(nil).Times(1)
ec2Svc.EXPECT().TerminateInstanceAndWait(gomock.Any()).Return(nil).AnyTimes()
ec2Svc.EXPECT().TerminateInstance(gomock.Any()).Return(nil).AnyTimes()

_, _ = reconciler.reconcileDelete(ms, cs, cs, cs, cs)
})
Expand Down Expand Up @@ -1429,7 +1431,7 @@ func TestAWSMachineReconciler(t *testing.T) {

instance.State = infrav1.InstanceStateRunning
objectStoreSvc.EXPECT().Delete(gomock.Any()).Return(nil).Times(1)
ec2Svc.EXPECT().TerminateInstanceAndWait(gomock.Any()).Return(nil).AnyTimes()
ec2Svc.EXPECT().TerminateInstance(gomock.Any()).Return(nil).AnyTimes()
_, _ = reconciler.reconcileDelete(ms, cs, cs, cs, cs)
})

Expand All @@ -1444,7 +1446,7 @@ func TestAWSMachineReconciler(t *testing.T) {
// TODO: This seems to have no effect on the test result.
ms.AWSMachine.Status.FailureReason = capierrors.MachineStatusErrorPtr(capierrors.UpdateMachineError)
objectStoreSvc.EXPECT().Delete(gomock.Any()).Return(nil).Times(1)
ec2Svc.EXPECT().TerminateInstanceAndWait(gomock.Any()).Return(nil).AnyTimes()
ec2Svc.EXPECT().TerminateInstance(gomock.Any()).Return(nil).AnyTimes()
_, _ = reconciler.reconcileDelete(ms, cs, cs, cs, cs)
})
})
Expand Down Expand Up @@ -1534,9 +1536,8 @@ func TestAWSMachineReconciler(t *testing.T) {
_, err := reconciler.reconcileDelete(ms, cs, cs, cs, cs)
g.Expect(err).To(BeNil())
g.Expect(buf.String()).To(ContainSubstring("EC2 instance is shutting down or already terminated"))
g.Expect(ms.AWSMachine.Finalizers).To(ConsistOf(metav1.FinalizerDeleteDependents))
})
t.Run("should ignore instances in terminated down state", func(t *testing.T) {
t.Run("should ignore instances in terminated state", func(t *testing.T) {
g := NewWithT(t)
awsMachine := getAWSMachine()
setup(t, g, awsMachine)
Expand All @@ -1553,7 +1554,7 @@ func TestAWSMachineReconciler(t *testing.T) {

_, err := reconciler.reconcileDelete(ms, cs, cs, cs, cs)
g.Expect(err).To(BeNil())
g.Expect(buf.String()).To(ContainSubstring("EC2 instance is shutting down or already terminated"))
g.Expect(buf.String()).To(ContainSubstring("EC2 instance terminated successfully"))
g.Expect(ms.AWSMachine.Finalizers).To(ConsistOf(metav1.FinalizerDeleteDependents))
})
t.Run("instance not shutting down yet", func(t *testing.T) {
Expand All @@ -1572,7 +1573,7 @@ func TestAWSMachineReconciler(t *testing.T) {
getRunningInstance(t, g)

expected := errors.New("can't reach AWS to terminate machine")
ec2Svc.EXPECT().TerminateInstanceAndWait(gomock.Any()).Return(expected)
ec2Svc.EXPECT().TerminateInstance(gomock.Any()).Return(expected)

buf := new(bytes.Buffer)
klog.SetOutput(buf)
Expand All @@ -1585,7 +1586,7 @@ func TestAWSMachineReconciler(t *testing.T) {
t.Run("when instance can be shut down", func(t *testing.T) {
terminateInstance := func(t *testing.T, g *WithT) {
t.Helper()
ec2Svc.EXPECT().TerminateInstanceAndWait(gomock.Any()).Return(nil)
ec2Svc.EXPECT().TerminateInstance(gomock.Any()).Return(nil)
secretSvc.EXPECT().Delete(gomock.Any()).Return(nil).AnyTimes()
}

Expand Down Expand Up @@ -1663,7 +1664,6 @@ func TestAWSMachineReconciler(t *testing.T) {

_, err := reconciler.reconcileDelete(ms, cs, cs, cs, cs)
g.Expect(err).To(BeNil())
g.Expect(ms.AWSMachine.Finalizers).To(ConsistOf(metav1.FinalizerDeleteDependents))
})

t.Run("should fail to detach control plane ELB from instance", func(t *testing.T) {
Expand Down
2 changes: 0 additions & 2 deletions pkg/cloud/services/ec2/bastion_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -366,8 +366,6 @@ func TestServiceReconcileBastion(t *testing.T) {
},
},
}, nil)
m.WaitUntilInstanceRunningWithContext(gomock.Any(), gomock.Any(), gomock.Any()).
Return(nil)
},
bastionEnabled: true,
expectError: false,
Expand Down
17 changes: 0 additions & 17 deletions pkg/cloud/services/ec2/instances.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,12 @@ limitations under the License.
package ec2

import (
"context"
"encoding/base64"
"fmt"
"sort"
"strings"
"time"

"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/request"
"github.com/aws/aws-sdk-go/service/ec2"
"github.com/pkg/errors"
"k8s.io/utils/pointer"
Expand All @@ -34,7 +31,6 @@ import (
"sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/awserrors"
"sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/converters"
"sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/filter"
awslogs "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/logs"
"sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/scope"
"sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/services/userdata"
"sigs.k8s.io/cluster-api-provider-aws/v2/pkg/record"
Expand Down Expand Up @@ -591,19 +587,6 @@ func (s *Service) runInstance(role string, i *infrav1.Instance) (*infrav1.Instan
return nil, errors.Errorf("no instance returned for reservation %v", out.GoString())
}

waitTimeout := 1 * time.Minute
s.scope.Debug("Waiting for instance to be in running state", "instance-id", *out.Instances[0].InstanceId, "timeout", waitTimeout.String())
ctx, cancel := context.WithTimeout(aws.BackgroundContext(), waitTimeout)
defer cancel()

if err := s.EC2Client.WaitUntilInstanceRunningWithContext(
ctx,
&ec2.DescribeInstancesInput{InstanceIds: []*string{out.Instances[0].InstanceId}},
request.WithWaiterLogger(awslogs.NewWrapLogr(s.scope.GetLogger())),
); err != nil {
s.scope.Debug("Could not determine if Machine is running. Machine state might be unavailable until next renconciliation.")
}

return s.SDKToInstance(out.Instances[0])
}

Expand Down
Loading

0 comments on commit 8d64bc6

Please sign in to comment.