Skip to content

Commit

Permalink
feat: Add detailed work request errors when work-request is failed fo…
Browse files Browse the repository at this point in the history
…r both Loadbalancer and Machines (#378)
  • Loading branch information
sindhusri16 authored Sep 5, 2024
1 parent ca7c47a commit 1b44dcd
Show file tree
Hide file tree
Showing 19 changed files with 600 additions and 132 deletions.
File renamed without changes.
6 changes: 6 additions & 0 deletions api/v1beta2/ocicluster_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,12 @@ type ClientOverrides struct {
// +optional
// +nullable
ContainerEngineClientUrl *string `json:"containerEngineClientUrl,omitempty"`

// WorkrequestClientUrl allows the default work request SDK client URL to be changed.
//
// +optional
// +nullable
WorkrequestClientUrl *string `json:"workrequestClientUrl,omitempty"`
}

// GetConditions returns the list of conditions for an OCICluster API object.
Expand Down
5 changes: 5 additions & 0 deletions api/v1beta2/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

30 changes: 24 additions & 6 deletions cloud/ociutil/ociutil.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,13 @@ import (

lb "github.com/oracle/cluster-api-provider-oci/cloud/services/loadbalancer"
nlb "github.com/oracle/cluster-api-provider-oci/cloud/services/networkloadbalancer"
wrs "github.com/oracle/cluster-api-provider-oci/cloud/services/workrequests"

"github.com/oracle/oci-go-sdk/v65/common"
"github.com/oracle/oci-go-sdk/v65/core"
"github.com/oracle/oci-go-sdk/v65/loadbalancer"
"github.com/oracle/oci-go-sdk/v65/networkloadbalancer"
"github.com/oracle/oci-go-sdk/v65/workrequests"

"github.com/pkg/errors"
"k8s.io/apimachinery/pkg/util/wait"
Expand Down Expand Up @@ -61,10 +63,25 @@ func IsNotFound(err error) bool {
return ok && serviceErr.GetHTTPStatusCode() == http.StatusNotFound
}

func FetchErrorsOnFailedWorkRequest(ctx context.Context, workRequestClient wrs.Client, workRequestId *string) (done bool, err error) {
resp, err := workRequestClient.ListWorkRequestErrors(ctx, workrequests.ListWorkRequestErrorsRequest{
WorkRequestId: workRequestId,
})
if err != nil {
return false, errors.Wrapf(err, "Failed to fetch work-request-errors for failed workrequest: %s", *workRequestId)
}
final_err := errors.Errorf("WorkRequest %s failed", *workRequestId)
for _, wr_err := range resp.Items {
final_err = errors.Errorf("%s, %s", *wr_err.Message, final_err.Error())
}
return false, final_err
}

// AwaitNLBWorkRequest waits for the LB work request to either succeed, fail. See k8s.io/apimachinery/pkg/util/wait
func AwaitNLBWorkRequest(ctx context.Context, networkLoadBalancerClient nlb.NetworkLoadBalancerClient, workRequestId *string) (*networkloadbalancer.WorkRequest, error) {
func AwaitNLBWorkRequest(ctx context.Context, networkLoadBalancerClient nlb.NetworkLoadBalancerClient, workRequestClient wrs.Client, workRequestId *string) (*networkloadbalancer.WorkRequest, error) {
var wr *networkloadbalancer.WorkRequest
err := wait.PollWithContext(ctx, WorkRequestPollInterval, WorkRequestTimeout, func(ctx context.Context) (done bool, err error) {
immediate := true
err := wait.PollUntilContextTimeout(ctx, WorkRequestPollInterval, WorkRequestTimeout, immediate, func(ctx context.Context) (done bool, err error) {
twr, err := networkLoadBalancerClient.GetWorkRequest(ctx, networkloadbalancer.GetWorkRequestRequest{
WorkRequestId: workRequestId,
})
Expand All @@ -76,17 +93,18 @@ func AwaitNLBWorkRequest(ctx context.Context, networkLoadBalancerClient nlb.Netw
wr = &twr.WorkRequest
return true, nil
case networkloadbalancer.OperationStatusFailed:
return false, errors.Errorf("WorkRequest %s failed", *workRequestId)
return FetchErrorsOnFailedWorkRequest(ctx, workRequestClient, workRequestId)
}
return false, nil
})
return wr, err
}

// AwaitLBWorkRequest waits for the LBaaS work request to either succeed, fail. See k8s.io/apimachinery/pkg/util/wait
func AwaitLBWorkRequest(ctx context.Context, loadBalancerClient lb.LoadBalancerClient, workRequestId *string) (*loadbalancer.WorkRequest, error) {
func AwaitLBWorkRequest(ctx context.Context, loadBalancerClient lb.LoadBalancerClient, workRequestClient wrs.Client, workRequestId *string) (*loadbalancer.WorkRequest, error) {
var wr *loadbalancer.WorkRequest
err := wait.PollWithContext(ctx, WorkRequestPollInterval, WorkRequestTimeout, func(ctx context.Context) (done bool, err error) {
immediate := true
err := wait.PollUntilContextTimeout(ctx, WorkRequestPollInterval, WorkRequestTimeout, immediate, func(ctx context.Context) (done bool, err error) {
twr, err := loadBalancerClient.GetWorkRequest(ctx, loadbalancer.GetWorkRequestRequest{
WorkRequestId: workRequestId,
})
Expand All @@ -98,7 +116,7 @@ func AwaitLBWorkRequest(ctx context.Context, loadBalancerClient lb.LoadBalancerC
wr = &twr.WorkRequest
return true, nil
case loadbalancer.WorkRequestLifecycleStateFailed:
return false, errors.Errorf("WorkRequest %s failed", *workRequestId)
return FetchErrorsOnFailedWorkRequest(ctx, workRequestClient, workRequestId)
}
return false, nil
})
Expand Down
26 changes: 24 additions & 2 deletions cloud/scope/clients.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,15 @@ import (
lb "github.com/oracle/cluster-api-provider-oci/cloud/services/loadbalancer"
nlb "github.com/oracle/cluster-api-provider-oci/cloud/services/networkloadbalancer"
"github.com/oracle/cluster-api-provider-oci/cloud/services/vcn"
wr "github.com/oracle/cluster-api-provider-oci/cloud/services/workrequests"
"github.com/oracle/cluster-api-provider-oci/version"
"github.com/oracle/oci-go-sdk/v65/common"
"github.com/oracle/oci-go-sdk/v65/containerengine"
"github.com/oracle/oci-go-sdk/v65/core"
"github.com/oracle/oci-go-sdk/v65/identity"
"github.com/oracle/oci-go-sdk/v65/loadbalancer"
"github.com/oracle/oci-go-sdk/v65/networkloadbalancer"
"github.com/oracle/oci-go-sdk/v65/workrequests"
"github.com/pkg/errors"
"k8s.io/klog/v2/klogr"
)
Expand All @@ -52,6 +54,7 @@ type OCIClients struct {
LoadBalancerClient lb.LoadBalancerClient
IdentityClient identityClient.Client
ContainerEngineClient containerEngineClient.Client
WorkRequestsClient wr.Client
BaseClient base.BaseClient
}

Expand Down Expand Up @@ -161,11 +164,11 @@ func (c *ClientProvider) createClients(region string) (OCIClients, error) {
if err != nil {
return OCIClients{}, err
}
baseClient, err := c.createBaseClient(region, c.ociAuthConfigProvider, c.Logger)
workrequestsClt, err := c.createWorkrequestsClient(region, c.ociAuthConfigProvider, c.Logger)
if err != nil {
return OCIClients{}, err
}

baseClient, err := c.createBaseClient(region, c.ociAuthConfigProvider, c.Logger)
if err != nil {
return OCIClients{}, err
}
Expand All @@ -178,6 +181,7 @@ func (c *ClientProvider) createClients(region string) (OCIClients, error) {
ComputeClient: computeClient,
ComputeManagementClient: computeManagementClient,
ContainerEngineClient: containerEngineClt,
WorkRequestsClient: workrequestsClt,
BaseClient: baseClient,
}, err
}
Expand Down Expand Up @@ -308,6 +312,24 @@ func (c *ClientProvider) createContainerEngineClient(region string, ociAuthConfi
return &containerEngineClt, nil
}

func (c *ClientProvider) createWorkrequestsClient(region string, ociAuthConfigProvider common.ConfigurationProvider, logger *logr.Logger) (*workrequests.WorkRequestClient, error) {
workrequestsClt, err := workrequests.NewWorkRequestClientWithConfigurationProvider(ociAuthConfigProvider)
if err != nil {
logger.Error(err, "unable to create OCI WorkRequests client")
return nil, err
}
workrequestsClt.SetRegion(region)
dispatcher := workrequestsClt.HTTPClient
workrequestsClt.HTTPClient = metrics.NewHttpRequestDispatcherWrapper(dispatcher, region)

if c.ociClientOverrides != nil && c.ociClientOverrides.WorkrequestClientUrl != nil {
workrequestsClt.Host = *c.ociClientOverrides.WorkrequestClientUrl
}
workrequestsClt.Interceptor = setVersionHeader()

return &workrequestsClt, nil
}

func (c *ClientProvider) createBaseClient(region string, ociAuthConfigProvider common.ConfigurationProvider, logger *logr.Logger) (base.BaseClient, error) {
baseClient, err := base.NewBaseClient(ociAuthConfigProvider, logger)
if err != nil {
Expand Down
3 changes: 3 additions & 0 deletions cloud/scope/clients_mock.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import (
"github.com/oracle/cluster-api-provider-oci/cloud/services/vcn"
"github.com/oracle/oci-go-sdk/v65/loadbalancer"
"github.com/oracle/oci-go-sdk/v65/networkloadbalancer"
"github.com/oracle/oci-go-sdk/v65/workrequests"
"k8s.io/klog/v2/klogr"
)

Expand All @@ -40,6 +41,7 @@ type MockOCIClients struct {
NetworkLoadBalancerClient *networkloadbalancer.NetworkLoadBalancerClient
LoadBalancerClient *loadbalancer.LoadBalancerClient
IdentityClient identity.Client
WorkRequestsClient *workrequests.WorkRequestClient
}

var (
Expand All @@ -54,6 +56,7 @@ func MockNewClientProvider(mockClients MockOCIClients) (*ClientProvider, error)
LoadBalancerClient: mockClients.LoadBalancerClient,
IdentityClient: mockClients.IdentityClient,
ComputeClient: mockClients.ComputeClient,
WorkRequestsClient: mockClients.WorkRequestsClient,
}}

authConfig, err := MockAuthConfig()
Expand Down
2 changes: 2 additions & 0 deletions cloud/scope/clients_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ func TestClients_NewClientProviderWithClientOverrides(t *testing.T) {
NetworkLoadBalancerClientUrl: common.String("NetworkLoadBalancerClientUrl"),
IdentityClientUrl: common.String("IdentityClientUrl"),
ContainerEngineClientUrl: common.String("ContainerEngineClientUrl"),
WorkrequestClientUrl: common.String("WorkrequestClientUrl"),
}

clientProvider, err := NewClientProvider(ClientProviderParams{
Expand Down Expand Up @@ -108,6 +109,7 @@ func TestClients_NewClientProviderWithMissingOverrides(t *testing.T) {
LoadBalancerClientUrl: common.String("LoadBalancerClientUrl"),
//NetworkLoadBalancerClientUrl is missing,
//IdentityClientUrl is missing,
//WorkrequestClientUrl is missing,
ContainerEngineClientUrl: common.String("ContainerEngineClientUrl"),
}

Expand Down
4 changes: 4 additions & 0 deletions cloud/scope/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (
lb "github.com/oracle/cluster-api-provider-oci/cloud/services/loadbalancer"
nlb "github.com/oracle/cluster-api-provider-oci/cloud/services/networkloadbalancer"
"github.com/oracle/cluster-api-provider-oci/cloud/services/vcn"
wr "github.com/oracle/cluster-api-provider-oci/cloud/services/workrequests"
"github.com/oracle/oci-go-sdk/v65/common"
"github.com/oracle/oci-go-sdk/v65/identity"
"github.com/pkg/errors"
Expand All @@ -55,6 +56,7 @@ type ClusterScopeParams struct {
NetworkLoadBalancerClient nlb.NetworkLoadBalancerClient
LoadBalancerClient lb.LoadBalancerClient
IdentityClient identityClient.Client
WorkRequestClient wr.Client
// RegionIdentifier Identifier as specified here https://docs.oracle.com/en-us/iaas/Content/General/Concepts/regions.htm
RegionIdentifier string
OCIAuthConfigProvider common.ConfigurationProvider
Expand All @@ -73,6 +75,7 @@ type ClusterScope struct {
NetworkLoadBalancerClient nlb.NetworkLoadBalancerClient
LoadBalancerClient lb.LoadBalancerClient
IdentityClient identityClient.Client
WorkRequestClient wr.Client
// RegionIdentifier Identifier as specified here https://docs.oracle.com/en-us/iaas/Content/General/Concepts/regions.htm
RegionIdentifier string
ClientProvider *ClientProvider
Expand Down Expand Up @@ -104,6 +107,7 @@ func NewClusterScope(params ClusterScopeParams) (*ClusterScope, error) {
NetworkLoadBalancerClient: params.NetworkLoadBalancerClient,
LoadBalancerClient: params.LoadBalancerClient,
IdentityClient: params.IdentityClient,
WorkRequestClient: params.WorkRequestClient,
RegionIdentifier: params.RegionIdentifier,
ClientProvider: params.ClientProvider,
OCIClusterAccessor: params.OCIClusterAccessor,
Expand Down
6 changes: 4 additions & 2 deletions cloud/scope/drg_rpc_attachment_reconciler.go
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,8 @@ func (s *ClusterScope) waitForRPCToBeProvisioned(ctx context.Context, rpc *core.
return errors.New("invalid RPC lifecycle state")
}

err := wait.PollWithContext(ctx, PollInterval, RequestTimeout, func(ctx context.Context) (done bool, err error) {
immediate := true
err := wait.PollUntilContextTimeout(ctx, PollInterval, RequestTimeout, immediate, func(ctx context.Context) (done bool, err error) {
rpc, err := s.getRPC(ctx, rpc.Id, vcnClient)
if err != nil {
return true, err
Expand All @@ -271,7 +272,8 @@ func (s *ClusterScope) waitForRPCToBeProvisioned(ctx context.Context, rpc *core.
}

func (s *ClusterScope) waitForRPCToBeDeleted(ctx context.Context, rpcId *string, vcnClient vcn.Client) error {
err := wait.PollWithContext(ctx, PollInterval, RequestTimeout, func(ctx context.Context) (done bool, err error) {
immediate := true
err := wait.PollUntilContextTimeout(ctx, PollInterval, RequestTimeout, immediate, func(ctx context.Context) (done bool, err error) {
rpc, err := s.getRPC(ctx, rpcId, vcnClient)
if err != nil {
if ociutil.IsNotFound(err) {
Expand Down
6 changes: 3 additions & 3 deletions cloud/scope/load_balancer_reconciler.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ func (s *ClusterScope) DeleteApiServerLB(ctx context.Context) error {
s.Logger.Error(err, "failed to delete apiserver lb")
return errors.Wrap(err, "failed to delete apiserver lb")
}
_, err = ociutil.AwaitLBWorkRequest(ctx, s.LoadBalancerClient, lbResponse.OpcWorkRequestId)
_, err = ociutil.AwaitLBWorkRequest(ctx, s.LoadBalancerClient, s.WorkRequestClient, lbResponse.OpcWorkRequestId)
if err != nil {
return errors.Wrap(err, "work request to delete lb failed")
}
Expand Down Expand Up @@ -129,7 +129,7 @@ func (s *ClusterScope) UpdateLB(ctx context.Context, lb infrastructurev1beta2.Lo
s.Logger.Error(err, "failed to reconcile the apiserver LB, failed to generate update lb workrequest")
return errors.Wrap(err, "failed to reconcile the apiserver LB, failed to generate update lb workrequest")
}
_, err = ociutil.AwaitLBWorkRequest(ctx, s.LoadBalancerClient, lbResponse.OpcWorkRequestId)
_, err = ociutil.AwaitLBWorkRequest(ctx, s.LoadBalancerClient, s.WorkRequestClient, lbResponse.OpcWorkRequestId)
if err != nil {
s.Logger.Error(err, "failed to reconcile the apiserver LB, failed to update lb")
return errors.Wrap(err, "failed to reconcile the apiserver LB, failed to update lb")
Expand Down Expand Up @@ -202,7 +202,7 @@ func (s *ClusterScope) CreateLB(ctx context.Context, lb infrastructurev1beta2.Lo
return nil, nil, errors.Wrap(err, "failed to create apiserver lb, failed to create work request")
}

wr, err := ociutil.AwaitLBWorkRequest(ctx, s.LoadBalancerClient, lbResponse.OpcWorkRequestId)
wr, err := ociutil.AwaitLBWorkRequest(ctx, s.LoadBalancerClient, s.WorkRequestClient, lbResponse.OpcWorkRequestId)
if err != nil {
return nil, nil, errors.Wrap(err, "awaiting load balancer")
}
Expand Down
Loading

0 comments on commit 1b44dcd

Please sign in to comment.