Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🌱Add handleRateLimit method #1387

Merged
merged 1 commit into from
Aug 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 36 additions & 30 deletions pkg/services/hcloud/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,23 @@ func (s *Service) Reconcile(ctx context.Context) (res reconcile.Result, err erro
return res, nil
}

// implements setting rate limit on hcloudmachine.
func handleRateLimit(hm *infrav1.HCloudMachine, err error, functionName string, errMsg string) error {
yrs147 marked this conversation as resolved.
Show resolved Hide resolved
yrs147 marked this conversation as resolved.
Show resolved Hide resolved
// returns error if not a rate limit exceeded error
if !hcloud.IsError(err, hcloud.ErrorCodeRateLimitExceeded) {
return fmt.Errorf("%s: %w", errMsg, err)
}

// does not return error if machine is running and does not have a deletion timestamp
if hm.Status.Ready && hm.DeletionTimestamp.IsZero() {
return nil
}

// check for a rate limit exceeded error if the machine is not running or if machine has a deletion timestamp
hcloudutil.HandleRateLimitExceeded(hm, err, functionName)
return fmt.Errorf("%s: %w", errMsg, err)
}

// Delete implements delete method of server.
func (s *Service) Delete(ctx context.Context) (res reconcile.Result, err error) {
server, err := s.findServer(ctx)
Expand Down Expand Up @@ -246,12 +263,11 @@ func (s *Service) reconcileNetworkAttachment(ctx context.Context, server *hcloud
ID: s.scope.HetznerCluster.Status.Network.ID,
},
}); err != nil {
hcloudutil.HandleRateLimitExceeded(s.scope.HCloudMachine, err, "AttachServerToNetwork")
// check if network status is old and server is in fact already attached
if hcloud.IsError(err, hcloud.ErrorCodeServerAlreadyAttached) {
return nil
}
return fmt.Errorf("failed to attach server to network: %w", err)
return handleRateLimit(s.scope.HCloudMachine, err, "AttachServerToNetwork", "failed to attach server to network")
}

return nil
Expand Down Expand Up @@ -308,11 +324,11 @@ func (s *Service) reconcileLoadBalancerAttachment(ctx context.Context, server *h
}

if err := s.scope.HCloudClient.AddTargetServerToLoadBalancer(ctx, opts, loadBalancer); err != nil {
hcloudutil.HandleRateLimitExceeded(s.scope.HCloudMachine, err, "AddTargetServerToLoadBalancer")
if hcloud.IsError(err, hcloud.ErrorCodeTargetAlreadyDefined) {
return reconcile.Result{}, nil
}
return reconcile.Result{}, fmt.Errorf("failed to add server %s with ID %d as target to load balancer: %w", server.Name, server.ID, err)
errMsg := fmt.Sprintf("failed to add server %s with ID %d as target to load balancer", server.Name, server.ID)
return reconcile.Result{}, handleRateLimit(s.scope.HCloudMachine, err, "AddTargetServerToLoadBalancer", errMsg)
}

record.Eventf(
Expand Down Expand Up @@ -429,8 +445,7 @@ func (s *Service) createServer(ctx context.Context) (*hcloud.Server, error) {
// get all ssh keys that are stored in HCloud API
sshKeysAPI, err := s.scope.HCloudClient.ListSSHKeys(ctx, hcloud.SSHKeyListOpts{})
if err != nil {
hcloudutil.HandleRateLimitExceeded(s.scope.HCloudMachine, err, "ListSSHKeys")
return nil, fmt.Errorf("failed listing ssh heys from hcloud: %w", err)
return nil, handleRateLimit(s.scope.HCloudMachine, err, "ListSSHKeys", "failed listing ssh keys from hcloud")
}

// find matching keys and store them
Expand Down Expand Up @@ -479,8 +494,8 @@ func (s *Service) createServer(ctx context.Context) (*hcloud.Server, error) {
s.scope.Name(),
err,
)
err = errors.Join(errServerCreateNotPossible, err)
return nil, fmt.Errorf("failed to create HCloud server %s: %w", s.scope.HCloudMachine.Name, err)
errMsg := fmt.Sprintf("failed to create HCloud server %s", s.scope.HCloudMachine.Name)
return nil, handleRateLimit(s.scope.HCloudMachine, err, "CreateServer", errMsg)
}

// set ssh keys to status
Expand All @@ -497,8 +512,7 @@ func (s *Service) getServerImage(ctx context.Context) (*hcloud.Image, error) {
// Get server type so we can filter for images with correct architecture
serverType, err := s.scope.HCloudClient.GetServerType(ctx, string(s.scope.HCloudMachine.Spec.Type))
if err != nil {
hcloudutil.HandleRateLimitExceeded(s.scope.HCloudMachine, err, "GetServerType")
return nil, fmt.Errorf("failed to get server type in HCloud: %w", err)
return nil, handleRateLimit(s.scope.HCloudMachine, err, "GetServerType", "failed to get server type in HCloud")
}
if serverType == nil {
conditions.MarkFalse(
Expand All @@ -522,8 +536,7 @@ func (s *Service) getServerImage(ctx context.Context) (*hcloud.Image, error) {

images, err := s.scope.HCloudClient.ListImages(ctx, listOpts)
if err != nil {
hcloudutil.HandleRateLimitExceeded(s.scope.HCloudMachine, err, "ListImages")
return nil, fmt.Errorf("failed to list images by label in HCloud: %w", err)
return nil, handleRateLimit(s.scope.HCloudMachine, err, "ListImages", "failed to list images by label in HCloud")
}

// query for an existing image by name.
Expand All @@ -533,8 +546,7 @@ func (s *Service) getServerImage(ctx context.Context) (*hcloud.Image, error) {
}
imagesByName, err := s.scope.HCloudClient.ListImages(ctx, listOpts)
if err != nil {
hcloudutil.HandleRateLimitExceeded(s.scope.HCloudMachine, err, "ListImages")
return nil, fmt.Errorf("failed to list images by name in HCloud: %w", err)
return nil, handleRateLimit(s.scope.HCloudMachine, err, "ListImages", "failed to list images by name in HCloud")
}

images = append(images, imagesByName...)
Expand Down Expand Up @@ -577,12 +589,11 @@ func (s *Service) handleServerStatusOff(ctx context.Context, server *hcloud.Serv
if time.Now().Before(serverAvailableCondition.LastTransitionTime.Time.Add(serverOffTimeout)) {
// Not yet timed out, try again to power on
if err := s.scope.HCloudClient.PowerOnServer(ctx, server); err != nil {
hcloudutil.HandleRateLimitExceeded(s.scope.HCloudMachine, err, "PowerOnServer")
if hcloud.IsError(err, hcloud.ErrorCodeLocked) {
// if server is locked, we just retry again
return reconcile.Result{RequeueAfter: 30 * time.Second}, nil
}
return reconcile.Result{}, fmt.Errorf("failed to power on server: %w", err)
return reconcile.Result{}, handleRateLimit(s.scope.HCloudMachine, err, "PowerOnServer", "failed to power on server")
}
} else {
// Timed out. Set failure reason
Expand All @@ -592,12 +603,11 @@ func (s *Service) handleServerStatusOff(ctx context.Context, server *hcloud.Serv
} else {
// No condition set yet. Try to power server on.
if err := s.scope.HCloudClient.PowerOnServer(ctx, server); err != nil {
hcloudutil.HandleRateLimitExceeded(s.scope.HCloudMachine, err, "PowerOnServer")
if hcloud.IsError(err, hcloud.ErrorCodeLocked) {
// if server is locked, we just retry again
return reconcile.Result{RequeueAfter: 30 * time.Second}, nil
}
return reconcile.Result{}, fmt.Errorf("failed to power on server: %w", err)
return reconcile.Result{}, handleRateLimit(s.scope.HCloudMachine, err, "PowerOnServer", "failed to power on server")
}
conditions.MarkFalse(
s.scope.HCloudMachine,
Expand All @@ -619,8 +629,7 @@ func (s *Service) handleDeleteServerStatusRunning(ctx context.Context, server *h

if s.scope.HasServerAvailableCondition() {
if err := s.scope.HCloudClient.ShutdownServer(ctx, server); err != nil {
hcloudutil.HandleRateLimitExceeded(s.scope.HCloudMachine, err, "ShutdownServer")
return reconcile.Result{}, fmt.Errorf("failed to shutdown server: %w", err)
return reconcile.Result{}, handleRateLimit(s.scope.HCloudMachine, err, "ShutdownServer", "failed to shutdown server")
}

conditions.MarkFalse(s.scope.HCloudMachine,
Expand All @@ -635,9 +644,8 @@ func (s *Service) handleDeleteServerStatusRunning(ctx context.Context, server *h

// timeout for shutdown has been reached - delete server
if err := s.scope.HCloudClient.DeleteServer(ctx, server); err != nil {
hcloudutil.HandleRateLimitExceeded(s.scope.HCloudMachine, err, "DeleteServer")
record.Warnf(s.scope.HCloudMachine, "FailedDeleteHCloudServer", "Failed to delete HCloud server %s", s.scope.Name())
return reconcile.Result{}, fmt.Errorf("failed to delete server: %w", err)
return reconcile.Result{}, handleRateLimit(s.scope.HCloudMachine, err, "DeleteServer", "failed to delete server")
}

record.Eventf(s.scope.HCloudMachine, "HCloudServerDeleted", "HCloud server %s deleted", s.scope.Name())
Expand All @@ -647,9 +655,8 @@ func (s *Service) handleDeleteServerStatusRunning(ctx context.Context, server *h
func (s *Service) handleDeleteServerStatusOff(ctx context.Context, server *hcloud.Server) (res reconcile.Result, err error) {
// server is off and can be deleted
if err := s.scope.HCloudClient.DeleteServer(ctx, server); err != nil {
hcloudutil.HandleRateLimitExceeded(s.scope.HCloudMachine, err, "DeleteServer")
record.Warnf(s.scope.HCloudMachine, "FailedDeleteHCloudServer", "Failed to delete HCloud server %s", s.scope.Name())
return reconcile.Result{}, fmt.Errorf("failed to delete server: %w", err)
return reconcile.Result{}, handleRateLimit(s.scope.HCloudMachine, err, "DeleteServer", "failed to delete server")
}

record.Eventf(s.scope.HCloudMachine, "HCloudServerDeleted", "HCloud server %s deleted", s.scope.Name())
Expand All @@ -660,12 +667,12 @@ func (s *Service) deleteServerOfLoadBalancer(ctx context.Context, server *hcloud
lb := &hcloud.LoadBalancer{ID: s.scope.HetznerCluster.Status.ControlPlaneLoadBalancer.ID}

if err := s.scope.HCloudClient.DeleteTargetServerOfLoadBalancer(ctx, lb, server); err != nil {
hcloudutil.HandleRateLimitExceeded(s.scope.HCloudMachine, err, "DeleteTargetServerOfLoadBalancer")
// do not return an error in case the target was not found
if strings.Contains(err.Error(), "load_balancer_target_not_found") {
return nil
}
return fmt.Errorf("failed to delete server %s with ID %d as target of load balancer %s with ID %d: %w", server.Name, server.ID, lb.Name, lb.ID, err)
errMsg := fmt.Sprintf("failed to delete server %s with ID %d as target of load balancer %s with ID %d", server.Name, server.ID, lb.Name, lb.ID)
return handleRateLimit(s.scope.HCloudMachine, err, "DeleteTargetServerOfLoadBalancer", errMsg)
}
record.Eventf(
s.scope.HetznerCluster,
Expand All @@ -685,8 +692,8 @@ func (s *Service) findServer(ctx context.Context) (*hcloud.Server, error) {
if err == nil {
server, err = s.scope.HCloudClient.GetServer(ctx, serverID)
if err != nil {
hcloudutil.HandleRateLimitExceeded(s.scope.HCloudMachine, err, "GetServer")
return nil, fmt.Errorf("failed to get server %d: %w", serverID, err)
errMsg := fmt.Sprintf("failed to get server %d", serverID)
return nil, handleRateLimit(s.scope.HCloudMachine, err, "GetServer", errMsg)
}

// if server has been found, return it
Expand All @@ -702,8 +709,7 @@ func (s *Service) findServer(ctx context.Context) (*hcloud.Server, error) {

servers, err := s.scope.HCloudClient.ListServers(ctx, opts)
if err != nil {
hcloudutil.HandleRateLimitExceeded(s.scope.HCloudMachine, err, "ListServers")
return nil, fmt.Errorf("failed to list servers: %w", err)
return nil, handleRateLimit(s.scope.HCloudMachine, err, "ListServers", "failed to list servers")
}

if len(servers) > 1 {
Expand Down
104 changes: 104 additions & 0 deletions pkg/services/hcloud/server/server_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,13 @@ package server

import (
"context"
"fmt"
"time"

"github.com/hetznercloud/hcloud-go/v2/hcloud"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/utils/ptr"
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
Expand Down Expand Up @@ -339,3 +341,105 @@ var _ = Describe("Test ValidateLabels", func() {
}),
)
})

var _ = Describe("Test handleRateLimit", func() {
yrs147 marked this conversation as resolved.
Show resolved Hide resolved
type testCaseHandleRateLimit struct {
hm *infrav1.HCloudMachine
err error
functionName string
errMsg string
expectError error
expectCondition bool
}

DescribeTable("Test handleRateLimit",
func(tc testCaseHandleRateLimit) {
err := handleRateLimit(tc.hm, tc.err, tc.functionName, tc.errMsg)
if tc.expectError != nil {
Expect(err).To(MatchError(tc.expectError))
} else {
Expect(err).To(BeNil())
}
if tc.expectCondition {
Expect(isPresentAndFalseWithReason(tc.hm, infrav1.HetznerAPIReachableCondition, infrav1.RateLimitExceededReason)).To(BeTrue())
} else {
Expect(conditions.Get(tc.hm, infrav1.HetznerAPIReachableCondition)).To(BeNil())
}
},
Entry("machine not ready, rate limit exceeded error", testCaseHandleRateLimit{
hm: &infrav1.HCloudMachine{
Status: infrav1.HCloudMachineStatus{Ready: false},
},
err: hcloud.Error{Code: hcloud.ErrorCodeRateLimitExceeded},
functionName: "TestFunction",
errMsg: "Test error message",
expectError: fmt.Errorf("Test error message: %w", hcloud.Error{Code: hcloud.ErrorCodeRateLimitExceeded}),
expectCondition: true,
}),
Entry("machine has deletion timestamp, rate limit exceeded error", testCaseHandleRateLimit{
hm: &infrav1.HCloudMachine{
ObjectMeta: metav1.ObjectMeta{
DeletionTimestamp: &metav1.Time{Time: time.Now()},
},
Status: infrav1.HCloudMachineStatus{Ready: true},
},
err: hcloud.Error{Code: hcloud.ErrorCodeRateLimitExceeded},
functionName: "TestFunction",
errMsg: "Test error message",
expectError: fmt.Errorf("Test error message: %w", hcloud.Error{Code: hcloud.ErrorCodeRateLimitExceeded}),
expectCondition: true,
}),
Entry("machine not ready, has deletion timestamp, rate limit exceeded error", testCaseHandleRateLimit{
hm: &infrav1.HCloudMachine{
ObjectMeta: metav1.ObjectMeta{
DeletionTimestamp: &metav1.Time{Time: time.Now()},
},
Status: infrav1.HCloudMachineStatus{Ready: false},
},
err: hcloud.Error{Code: hcloud.ErrorCodeRateLimitExceeded},
functionName: "TestFunction",
errMsg: "Test error message",
expectError: fmt.Errorf("Test error message: %w", hcloud.Error{Code: hcloud.ErrorCodeRateLimitExceeded}),
expectCondition: true,
}),
Entry("machine ready, rate limit exceeded error", testCaseHandleRateLimit{
hm: &infrav1.HCloudMachine{
Status: infrav1.HCloudMachineStatus{Ready: true},
},
err: hcloud.Error{Code: hcloud.ErrorCodeRateLimitExceeded},
functionName: "TestFunction",
errMsg: "Test error message",
expectError: nil,
expectCondition: false,
}),
Entry("machine ready, other error", testCaseHandleRateLimit{
hm: &infrav1.HCloudMachine{
Status: infrav1.HCloudMachineStatus{Ready: true},
},
err: hcloud.Error{Code: hcloud.ErrorCodeResourceUnavailable},
functionName: "TestFunction",
errMsg: "Test error message",
expectError: fmt.Errorf("Test error message: %w", hcloud.Error{Code: hcloud.ErrorCodeResourceUnavailable}),
expectCondition: false,
}),
Entry("machine not ready, other error", testCaseHandleRateLimit{
hm: &infrav1.HCloudMachine{
Status: infrav1.HCloudMachineStatus{Ready: false},
},
err: hcloud.Error{Code: hcloud.ErrorCodeConflict},
functionName: "TestFunction",
errMsg: "Test conflict error message",
expectError: fmt.Errorf("Test conflict error message: %w", hcloud.Error{Code: hcloud.ErrorCodeConflict}),
expectCondition: false,
}),
)
})

func isPresentAndFalseWithReason(getter conditions.Getter, condition clusterv1.ConditionType, reason string) bool {
if !conditions.Has(getter, condition) {
return false
}
objectCondition := conditions.Get(getter, condition)
return objectCondition.Status == corev1.ConditionFalse &&
objectCondition.Reason == reason
}