Skip to content

Commit

Permalink
fix: GcpNfsVolume to recover from timout during creation (kyma-projec…
Browse files Browse the repository at this point in the history
  • Loading branch information
abalaie authored Sep 6, 2024
1 parent f1ed036 commit c5899fc
Show file tree
Hide file tree
Showing 7 changed files with 102 additions and 5 deletions.
2 changes: 1 addition & 1 deletion config/manager/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ configMapGenerator:
- GCP_CLIENT_RENEW_DURATION=5m
- GCP_RETRY_WAIT_DURATION=5s
- GCP_OPERATION_WAIT_DURATION=5s
- GCP_API_TIMEOUT_DURATION=3s
- GCP_API_TIMEOUT_DURATION=8s
name: manager-env

secretGenerator:
Expand Down
3 changes: 2 additions & 1 deletion pkg/kcp/provider/gcp/client/gcpConstants.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ const fileBackupPattern = "projects/%s/locations/%s/backups/%s"

const GcpRetryWaitTime = time.Second * 3
const GcpOperationWaitTime = time.Second * 5
const GcpApiTimeout = time.Second * 3
const GcpApiTimeout = time.Second * 8

type GcpConfig struct {
GcpRetryWaitTime time.Duration
Expand Down Expand Up @@ -95,6 +95,7 @@ type FilestoreState string
const (
READY FilestoreState = "READY"
DELETING FilestoreState = "DELETING"
ERROR FilestoreState = "ERROR"
)

type OperationType int
Expand Down
17 changes: 17 additions & 0 deletions pkg/kcp/provider/gcp/nfsinstance/checkNUpdateState.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,18 +35,24 @@ func checkNUpdateState(ctx context.Context, st composed.State) (error, context.C
//If filestore doesn't exist, add it.
state.operation = client.ADD
state.curState = client.Creating
} else if state.fsInstance.State == string(client.ERROR) {
state.curState = v1beta1.ErrorState
} else if !state.doesFilestoreMatch() {
//If the filestore exists, but does not match, update it.
state.operation = client.MODIFY
state.curState = client.Updating
} else if state.fsInstance.State == string(client.READY) {
state.curState = v1beta1.ReadyState
} else {
//If the filestore exists but is not READY or in ERROR, it is in a transient state.
return composed.StopWithRequeueDelay(state.gcpConfig.GcpRetryWaitTime), ctx
}
}

//Update State Info
prevState := nfsInstance.Status.State
nfsInstance.Status.State = state.curState
logger.Info("State Info", "curState", state.curState, "Operation", state.operation)

if state.curState == v1beta1.ReadyState {
nfsInstance.Status.Hosts = state.fsInstance.Networks[0].IpAddresses
Expand All @@ -63,6 +69,17 @@ func checkNUpdateState(ctx context.Context, st composed.State) (error, context.C
SuccessError(composed.StopAndForget).
Run(ctx, state)
} else if prevState != state.curState {
if state.curState == v1beta1.ErrorState {
return composed.UpdateStatus(nfsInstance).
SetExclusiveConditions(metav1.Condition{
Type: v1beta1.ConditionTypeError,
Status: metav1.ConditionTrue,
Reason: v1beta1.ReasonGcpError,
Message: state.fsInstance.StatusMessage,
}).
SuccessError(composed.StopWithRequeueDelay(state.gcpConfig.GcpRetryWaitTime)).
Run(ctx, state)
}
return composed.UpdateStatus(nfsInstance).
RemoveConditions(v1beta1.ConditionTypeReady).
SuccessError(composed.StopWithRequeue).
Expand Down
76 changes: 76 additions & 0 deletions pkg/kcp/provider/gcp/nfsinstance/checkNUpdateState_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,82 @@ func (suite *checkNUpdateStateSuite) TestCheckNUpdateStateReady() {
assert.Equal(suite.T(), v1beta1.ReasonReady, updatedObject.Status.Conditions[0].Reason)
}

func (suite *checkNUpdateStateSuite) TestCheckNUpdateStateError() {
fakeHttpServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
assert.Fail(suite.T(), "unexpected request: "+r.URL.String())
}))
gcpNfsInstance := getGcpNfsInstanceWithoutStatus()

factory, err := newTestStateFactory(fakeHttpServer, gcpNfsInstance)
assert.Nil(suite.T(), err)

ctx, cancel := context.WithCancel(context.Background())
defer cancel()

//Get state object with GcpNfsVolume

testState, err := factory.newStateWith(ctx, gcpNfsInstance, "")
testState.fsInstance = &file.Instance{
Name: "test-gcp-nfs-volume-2",
State: string(client.ERROR),
StatusMessage: "Some error",
}
assert.Nil(suite.T(), err)
defer testState.FakeHttpServer.Close()
err, _ = checkNUpdateState(ctx, testState.State)
assert.NotNil(suite.T(), err)
assert.Equal(suite.T(), composed.StopWithRequeueDelay(testState.State.gcpConfig.GcpRetryWaitTime), err)
assert.Equal(suite.T(), v1beta1.ErrorState, testState.State.curState)
assert.Equal(suite.T(), v1beta1.ErrorState, testState.State.ObjAsNfsInstance().Status.State)

updatedObject := &v1beta1.NfsInstance{}
err = factory.kcpCluster.K8sClient().Get(ctx, types.NamespacedName{Name: gcpNfsInstance.Name, Namespace: gcpNfsInstance.Namespace}, updatedObject)
assert.Nil(suite.T(), err)
assert.Equal(suite.T(), v1beta1.ErrorState, updatedObject.Status.State)
// validate status conditions
assert.Equal(suite.T(), v1beta1.ConditionTypeError, updatedObject.Status.Conditions[0].Type)
assert.Equal(suite.T(), metav1.ConditionTrue, updatedObject.Status.Conditions[0].Status)
assert.Equal(suite.T(), v1beta1.ReasonGcpError, updatedObject.Status.Conditions[0].Reason)
}

func (suite *checkNUpdateStateSuite) TestCheckNUpdateFilestoreStateTransient() {
fakeHttpServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
assert.Fail(suite.T(), "unexpected request: "+r.URL.String())
}))
gcpNfsInstance := getGcpNfsInstanceWithoutStatus()

factory, err := newTestStateFactory(fakeHttpServer, gcpNfsInstance)
assert.Nil(suite.T(), err)

ctx, cancel := context.WithCancel(context.Background())
defer cancel()

//Get state object with GcpNfsVolume

testState, err := factory.newStateWith(ctx, gcpNfsInstance, "")
testState.fsInstance = &file.Instance{
Name: "test-gcp-nfs-volume-2",
State: "somethingElse",
Networks: []*file.NetworkConfig{
{
IpAddresses: []string{"10.2.74.33"},
},
},
FileShares: []*file.FileShareConfig{
{
CapacityGb: int64(gcpNfsInstance.Spec.Instance.Gcp.CapacityGb),
},
},
}
assert.Nil(suite.T(), err)
defer testState.FakeHttpServer.Close()
err, _ = checkNUpdateState(ctx, testState.State)
assert.NotNil(suite.T(), err)
assert.Equal(suite.T(), composed.StopWithRequeueDelay(testState.State.gcpConfig.GcpRetryWaitTime), err)
assert.Equal(suite.T(), gcpNfsInstance.Status.State, testState.State.curState)
assert.Equal(suite.T(), client.NONE, testState.State.operation)
}

func TestCheckNUpdateState(t *testing.T) {
suite.Run(t, new(checkNUpdateStateSuite))
}
2 changes: 1 addition & 1 deletion pkg/kcp/provider/gcp/nfsinstance/state.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ func newState(nfsInstanceState types.State, fc client.FilestoreClient, gcpConfig

func (s State) doesFilestoreMatch() bool {
nfsInstance := s.ObjAsNfsInstance()
return s.fsInstance != nil &&
return s.fsInstance != nil && len(s.fsInstance.FileShares) > 0 &&
s.fsInstance.FileShares[0].CapacityGb == int64(nfsInstance.Spec.Instance.Gcp.CapacityGb)
}

Expand Down
3 changes: 3 additions & 0 deletions pkg/kcp/provider/gcp/nfsinstance/syncNfsInstance.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ func syncNfsInstance(ctx context.Context, st composed.State) (error, context.Con
operation, err = state.filestoreClient.PatchFilestoreInstance(ctx, project, location, name, mask, state.toInstance())
case client.DELETE:
operation, err = state.filestoreClient.DeleteFilestoreInstance(ctx, project, location, name)
case client.NONE:
//If the operation is not ADD, MODIFY or DELETE, continue.
return nil, nil
}

if err != nil {
Expand Down
4 changes: 2 additions & 2 deletions pkg/skr/gcpnfsvolume/deletePersistenceVolume.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ func deletePersistenceVolume(ctx context.Context, st composed.State) (error, con
if state.PV.Status.Phase != "Released" && state.PV.Status.Phase != "Available" {
// Only PV in Released or Available state can be deleted
state.ObjAsGcpNfsVolume().Status.State = cloudresourcesv1beta1.GcpNfsVolumeError
return composed.PatchStatus(state.ObjAsGcpNfsVolume()).
return composed.UpdateStatus(state.ObjAsGcpNfsVolume()).
SetExclusiveConditions(metav1.Condition{
Type: cloudresourcesv1beta1.ConditionTypeError,
Status: metav1.ConditionTrue,
Expand All @@ -41,7 +41,7 @@ func deletePersistenceVolume(ctx context.Context, st composed.State) (error, con
for i := range state.ObjAsGcpNfsVolume().Status.Conditions {
condition := (state.ObjAsGcpNfsVolume().Status.Conditions)[i]
if condition.Type == cloudresourcesv1beta1.ConditionTypeError && condition.Reason == cloudresourcesv1beta1.ConditionReasonPVNotReadyForDeletion {
return composed.PatchStatus(state.ObjAsGcpNfsVolume()).
return composed.UpdateStatus(state.ObjAsGcpNfsVolume()).
RemoveConditions(cloudresourcesv1beta1.ConditionTypeError).
ErrorLogMessage("Error removing conditionType Error").
OnUpdateSuccess(func(ctx context.Context) (error, context.Context) {
Expand Down

0 comments on commit c5899fc

Please sign in to comment.