Skip to content

Commit

Permalink
Keep polling Hive ClusterDeployment if we encounter a transient conne…
Browse files Browse the repository at this point in the history
…ction failure (#4028)
  • Loading branch information
kimorris27 authored Dec 20, 2024
1 parent bfa6a19 commit 15920e1
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 2 deletions.
18 changes: 16 additions & 2 deletions pkg/hive/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"errors"
"fmt"
"sort"
"strings"

hivev1 "github.com/openshift/hive/apis/hive/v1"
"github.com/sirupsen/logrus"
Expand Down Expand Up @@ -163,7 +164,7 @@ func (hr *clusterManager) Delete(ctx context.Context, doc *api.OpenShiftClusterD
func (hr *clusterManager) IsClusterDeploymentReady(ctx context.Context, doc *api.OpenShiftClusterDocument) (bool, error) {
cd, err := hr.GetClusterDeployment(ctx, doc)
if err != nil {
return false, err
return false, hr.handleClusterDeploymentGetError(err)
}

if len(cd.Status.Conditions) == 0 {
Expand All @@ -190,7 +191,7 @@ func (hr *clusterManager) IsClusterDeploymentReady(ctx context.Context, doc *api
func (hr *clusterManager) IsClusterInstallationComplete(ctx context.Context, doc *api.OpenShiftClusterDocument) (bool, error) {
cd, err := hr.GetClusterDeployment(ctx, doc)
if err != nil {
return false, err
return false, hr.handleClusterDeploymentGetError(err)
}

if cd.Spec.Installed {
Expand Down Expand Up @@ -223,6 +224,19 @@ func (hr *clusterManager) GetClusterDeployment(ctx context.Context, doc *api.Ope
return cd, nil
}

// handleClusterDeploymentGetError is intended to take in an error value returned by hr.GetClusterDeployment()
// and apply some special handling: if we encounter a transient connection error, return nil so that the RP continues
// polling the ClusterDeployment. Otherwise, return the error that was passed in.
//
// This allows CI to be resilient to temporary VPN failures that occur while the RP polls the Hive ClusterDeployment.
func (hr *clusterManager) handleClusterDeploymentGetError(err error) error {
if err != nil && strings.Contains(err.Error(), "http2: client connection lost") {
return nil
}

return err
}

func (hr *clusterManager) ResetCorrelationData(ctx context.Context, doc *api.OpenShiftClusterDocument) error {
return retry.RetryOnConflict(retry.DefaultRetry, func() error {
cd, err := hr.GetClusterDeployment(ctx, doc)
Expand Down
18 changes: 18 additions & 0 deletions pkg/hive/manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (

hivev1 "github.com/openshift/hive/apis/hive/v1"
"github.com/sirupsen/logrus"
"go.uber.org/mock/gomock"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
kruntime "k8s.io/apimachinery/pkg/runtime"
Expand All @@ -20,6 +21,7 @@ import (
"github.com/Azure/ARO-RP/pkg/api"
"github.com/Azure/ARO-RP/pkg/hive/failure"
"github.com/Azure/ARO-RP/pkg/util/cmp"
mock_env "github.com/Azure/ARO-RP/pkg/util/mocks/env"
"github.com/Azure/ARO-RP/pkg/util/uuid"
uuidfake "github.com/Azure/ARO-RP/pkg/util/uuid/fake"
utilerror "github.com/Azure/ARO-RP/test/util/error"
Expand Down Expand Up @@ -112,13 +114,21 @@ func TestIsClusterDeploymentReady(t *testing.T) {
},
} {
t.Run(tt.name, func(t *testing.T) {
controller := gomock.NewController(t)
defer controller.Finish()

fakeClientBuilder := fake.NewClientBuilder()
if tt.cd != nil {
fakeClientBuilder.WithRuntimeObjects(tt.cd)
}

mockEnv := mock_env.NewMockInterface(controller)
mockEnv.EXPECT().IsCI().AnyTimes().Return(true)

c := clusterManager{
hiveClientset: fakeClientBuilder.Build(),
log: logrus.NewEntry(logrus.StandardLogger()),
env: mockEnv,
}

result, err := c.IsClusterDeploymentReady(context.Background(), doc)
Expand Down Expand Up @@ -348,6 +358,9 @@ func TestIsClusterInstallationComplete(t *testing.T) {
},
} {
t.Run(tt.name, func(t *testing.T) {
controller := gomock.NewController(t)
defer controller.Finish()

fakeClientBuilder := fake.NewClientBuilder()
if tt.cd != nil {
fakeClientBuilder = fakeClientBuilder.WithRuntimeObjects(tt.cd)
Expand All @@ -357,9 +370,14 @@ func TestIsClusterInstallationComplete(t *testing.T) {
} else {
fakeClientBuilder = fakeClientBuilder.WithRuntimeObjects(makeClusterProvision(""))
}

mockEnv := mock_env.NewMockInterface(controller)
mockEnv.EXPECT().IsCI().AnyTimes().Return(true)

c := clusterManager{
hiveClientset: fakeClientBuilder.Build(),
log: logrus.NewEntry(logrus.StandardLogger()),
env: mockEnv,
}

result, err := c.IsClusterInstallationComplete(context.Background(), doc)
Expand Down

0 comments on commit 15920e1

Please sign in to comment.