Skip to content

Commit

Permalink
corrected recording of erroneous metrics for driver and API requests,…
Browse files Browse the repository at this point in the history
…created convenience functions to record API and Driver metrics
  • Loading branch information
unmarshall committed Feb 15, 2024
1 parent 78c7bbf commit e5cb64a
Show file tree
Hide file tree
Showing 12 changed files with 61 additions and 38 deletions.
3 changes: 1 addition & 2 deletions pkg/azure/access/helpers/disk.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ package helpers

import (
"context"
"time"

"github.com/Azure/azure-sdk-for-go/sdk/azcore/runtime"
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v5"
Expand All @@ -32,7 +31,7 @@ const (
// DeleteDisk deletes disk for passed in resourceGroup and diskName.
// NOTE: All calls to this Azure API are instrumented as prometheus metric.
func DeleteDisk(ctx context.Context, client *armcompute.DisksClient, resourceGroup, diskName string) (err error) {
defer instrument.RecordAzAPIMetric(err, diskDeleteServiceLabel, time.Now())
defer instrument.APIMetricRecorderFn(diskDeleteServiceLabel, &err)
var poller *runtime.Poller[armcompute.DisksClientDeleteResponse]
poller, err = client.BeginDelete(ctx, resourceGroup, diskName, nil)
if err != nil {
Expand Down
5 changes: 2 additions & 3 deletions pkg/azure/access/helpers/marketplaceagreement.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ package helpers

import (
"context"
"time"

"github.com/Azure/azure-sdk-for-go/sdk/azcore/to"
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v5"
Expand All @@ -33,7 +32,7 @@ const (
// GetAgreementTerms fetches the agreement terms for the purchase plan.
// NOTE: All calls to this Azure API are instrumented as prometheus metric.
func GetAgreementTerms(ctx context.Context, mktPlaceAgreementAccess *armmarketplaceordering.MarketplaceAgreementsClient, purchasePlan armcompute.PurchasePlan) (agreementTerms *armmarketplaceordering.AgreementTerms, err error) {
defer instrument.RecordAzAPIMetric(err, mktPlaceAgreementGetServiceLabel, time.Now())
defer instrument.APIMetricRecorderFn(mktPlaceAgreementGetServiceLabel, &err)
resp, err := mktPlaceAgreementAccess.Get(ctx, armmarketplaceordering.OfferTypeVirtualmachine, *purchasePlan.Publisher, *purchasePlan.Product, *purchasePlan.Name, nil)
if err != nil {
errors.LogAzAPIError(err, "Failed to get marketplace agreement for PurchasePlan: %+v", purchasePlan)
Expand All @@ -46,7 +45,7 @@ func GetAgreementTerms(ctx context.Context, mktPlaceAgreementAccess *armmarketpl
// AcceptAgreement updates the agreementTerms as accepted.
// NOTE: All calls to this Azure API are instrumented as prometheus metric.
func AcceptAgreement(ctx context.Context, mktPlaceAgreementAccess *armmarketplaceordering.MarketplaceAgreementsClient, purchasePlan armcompute.PurchasePlan, existingAgreement armmarketplaceordering.AgreementTerms) (err error) {
defer instrument.RecordAzAPIMetric(err, mktPlaceAgreementCreateServiceLabel, time.Now())
defer instrument.APIMetricRecorderFn(mktPlaceAgreementCreateServiceLabel, &err)
updatedAgreement := existingAgreement
updatedAgreement.Properties.Accepted = to.Ptr(true)
_, err = mktPlaceAgreementAccess.Create(ctx, armmarketplaceordering.OfferTypeVirtualmachine, *purchasePlan.Publisher, *purchasePlan.Product, *purchasePlan.Name, updatedAgreement, nil)
Expand Down
10 changes: 6 additions & 4 deletions pkg/azure/access/helpers/nic.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ import (

// labels used for recording prometheus metrics
const (
subnetGetServiceLabel = "subnet_get"
nicGetServiceLabel = "nic_get"
nicDeleteServiceLabel = "nic_delete"
nicCreateServiceLabel = "nic_create"
Expand All @@ -40,7 +39,8 @@ const (
// DeleteNIC deletes the NIC identified by a resourceGroup and nicName.
// NOTE: All calls to this Azure API are instrumented as prometheus metric.
func DeleteNIC(ctx context.Context, client *armnetwork.InterfacesClient, resourceGroup, nicName string) (err error) {
defer instrument.RecordAzAPIMetric(err, nicDeleteServiceLabel, time.Now())
defer instrument.APIMetricRecorderFn(nicDeleteServiceLabel, &err)

var poller *runtime.Poller[armnetwork.InterfacesClientDeleteResponse]
delCtx, cancelFn := context.WithTimeout(ctx, defaultDeleteNICTimeout)
defer cancelFn()
Expand All @@ -61,7 +61,8 @@ func DeleteNIC(ctx context.Context, client *armnetwork.InterfacesClient, resourc
// GetNIC fetches a NIC identified by resourceGroup and nic name.
// NOTE: All calls to this Azure API are instrumented as prometheus metric.
func GetNIC(ctx context.Context, client *armnetwork.InterfacesClient, resourceGroup, nicName string) (nic *armnetwork.Interface, err error) {
defer instrument.RecordAzAPIMetric(err, nicGetServiceLabel, time.Now())
defer instrument.APIMetricRecorderFn(nicGetServiceLabel, &err)

resp, err := client.Get(ctx, resourceGroup, nicName, nil)
if err != nil {
if errors.IsNotFoundAzAPIError(err) {
Expand All @@ -76,7 +77,8 @@ func GetNIC(ctx context.Context, client *armnetwork.InterfacesClient, resourceGr
// CreateNIC creates a NIC given the resourceGroup, nic name and NIC creation parameters.
// NOTE: All calls to this Azure API are instrumented as prometheus metric.
func CreateNIC(ctx context.Context, nicAccess *armnetwork.InterfacesClient, resourceGroup string, nicParams armnetwork.Interface, nicName string) (nic *armnetwork.Interface, err error) {
defer instrument.RecordAzAPIMetric(err, nicCreateServiceLabel, time.Now())
defer instrument.APIMetricRecorderFn(nicCreateServiceLabel, &err)

var (
poller *runtime.Poller[armnetwork.InterfacesClientCreateOrUpdateResponse]
creationResp armnetwork.InterfacesClientCreateOrUpdateResponse
Expand Down
4 changes: 2 additions & 2 deletions pkg/azure/access/helpers/resourcegraph.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ package helpers
import (
"context"
"fmt"
"time"

"github.com/Azure/azure-sdk-for-go/sdk/azcore/to"
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resourcegraph/armresourcegraph"
Expand All @@ -37,7 +36,8 @@ type MapperFn[T any] func(map[string]interface{}) *T
// The result of the query are then mapped using a mapperFn and the result or an error is returned.
// NOTE: All calls to this Azure API are instrumented as prometheus metric.
func QueryAndMap[T any](ctx context.Context, client *armresourcegraph.Client, subscriptionID string, mapperFn MapperFn[T], queryTemplate string, templateArgs ...any) (results []T, err error) {
defer instrument.RecordAzAPIMetric(err, resourceGraphQueryServiceLabel, time.Now())
defer instrument.APIMetricRecorderFn(resourceGraphQueryServiceLabel, &err)

query := fmt.Sprintf(queryTemplate, templateArgs...)
resources, err := client.Resources(ctx,
armresourcegraph.QueryRequest{
Expand Down
4 changes: 2 additions & 2 deletions pkg/azure/access/helpers/resourcegroup.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ package helpers

import (
"context"
"time"

"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armresources"
"github.com/gardener/machine-controller-manager-provider-azure/pkg/azure/access/errors"
Expand All @@ -30,7 +29,8 @@ const (
// ResourceGroupExists checks if the given resourceGroup exists.
// NOTE: All calls to this Azure API are instrumented as prometheus metric.
func ResourceGroupExists(ctx context.Context, client *armresources.ResourceGroupsClient, resourceGroup string) (exists bool, err error) {
defer instrument.RecordAzAPIMetric(err, resourceGroupExistsServiceLabel, time.Now())
defer instrument.APIMetricRecorderFn(resourceGroupExistsServiceLabel, &err)

resp, err := client.CheckExistence(ctx, resourceGroup, nil)
if err != nil {
if errors.IsNotFoundAzAPIError(err) {
Expand Down
6 changes: 4 additions & 2 deletions pkg/azure/access/helpers/subnet.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,20 @@ package helpers

import (
"context"
"time"

"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v4"
"github.com/gardener/machine-controller-manager-provider-azure/pkg/azure/access/errors"
"github.com/gardener/machine-controller-manager-provider-azure/pkg/azure/instrument"
)

const subnetGetServiceLabel = "subnet_get"

// GetSubnet fetches a Subnet resource given a resourceGroup, virtualNetworkName and subnetName.
// NOTE: All calls to this Azure API are instrumented as prometheus metric.
func GetSubnet(ctx context.Context, subnetAccess *armnetwork.SubnetsClient, resourceGroup, virtualNetworkName, subnetName string) (subnet *armnetwork.Subnet, err error) {
var subnetResp armnetwork.SubnetsClientGetResponse
defer instrument.RecordAzAPIMetric(err, subnetGetServiceLabel, time.Now())
defer instrument.APIMetricRecorderFn(subnetGetServiceLabel, &err)

subnetResp, err = subnetAccess.Get(ctx, resourceGroup, virtualNetworkName, subnetName, nil)
if err != nil {
errors.LogAzAPIError(err, "Failed to GET Subnet for [resourceGroup: %s, virtualNetworkName: %s, subnetName: %s]", resourceGroup, virtualNetworkName, subnetName)
Expand Down
12 changes: 8 additions & 4 deletions pkg/azure/access/helpers/vm.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@ const (
// NOTE: All calls to this Azure API are instrumented as prometheus metric.
func GetVirtualMachine(ctx context.Context, vmClient *armcompute.VirtualMachinesClient, resourceGroup, vmName string) (vm *armcompute.VirtualMachine, err error) {
var getResp armcompute.VirtualMachinesClientGetResponse
defer instrument.RecordAzAPIMetric(err, vmGetServiceLabel, time.Now())
defer instrument.APIMetricRecorderFn(vmGetServiceLabel, &err)

getResp, err = vmClient.Get(ctx, resourceGroup, vmName, nil)
if err != nil {
if errors.IsNotFoundAzAPIError(err) {
Expand All @@ -61,7 +62,8 @@ func GetVirtualMachine(ctx context.Context, vmClient *armcompute.VirtualMachines
// If cascade delete is set for associated NICs and Disks then these resources will also be deleted along with the VM.
// NOTE: All calls to this Azure API are instrumented as prometheus metric.
func DeleteVirtualMachine(ctx context.Context, vmAccess *armcompute.VirtualMachinesClient, resourceGroup, vmName string) (err error) {
defer instrument.RecordAzAPIMetric(err, vmDeleteServiceLabel, time.Now())
defer instrument.APIMetricRecorderFn(vmDeleteServiceLabel, &err)

delCtx, cancelFn := context.WithTimeout(ctx, defaultDeleteVMTimeout)
defer cancelFn()
poller, err := vmAccess.BeginDelete(delCtx, resourceGroup, vmName, nil)
Expand All @@ -80,7 +82,8 @@ func DeleteVirtualMachine(ctx context.Context, vmAccess *armcompute.VirtualMachi
// CreateVirtualMachine creates a Virtual Machine given a resourceGroup and virtual machine creation parameters.
// NOTE: All calls to this Azure API are instrumented as prometheus metric.
func CreateVirtualMachine(ctx context.Context, vmAccess *armcompute.VirtualMachinesClient, resourceGroup string, vmCreationParams armcompute.VirtualMachine) (vm *armcompute.VirtualMachine, err error) {
defer instrument.RecordAzAPIMetric(err, vmCreateServiceLabel, time.Now())
defer instrument.APIMetricRecorderFn(vmCreateServiceLabel, &err)

createCtx, cancelFn := context.WithTimeout(ctx, defaultCreateVMTimeout)
defer cancelFn()
vmName := *vmCreationParams.Name
Expand All @@ -101,7 +104,8 @@ func CreateVirtualMachine(ctx context.Context, vmAccess *armcompute.VirtualMachi
// SetCascadeDeleteForNICsAndDisks sets cascade deletion for NICs and Disks (OSDisk and DataDisks) associated to passed virtual machine.
// NOTE: All calls to this Azure API are instrumented as prometheus metric.
func SetCascadeDeleteForNICsAndDisks(ctx context.Context, vmClient *armcompute.VirtualMachinesClient, resourceGroup string, vmName string, vmUpdateParams *armcompute.VirtualMachineUpdate) (err error) {
defer instrument.RecordAzAPIMetric(err, vmUpdateServiceLabel, time.Now())
defer instrument.APIMetricRecorderFn(vmUpdateServiceLabel, &err)

updCtx, cancelFn := context.WithTimeout(ctx, defaultUpdateVMTimeout)
defer cancelFn()
poller, err := vmClient.BeginUpdate(updCtx, resourceGroup, vmName, *vmUpdateParams, nil)
Expand Down
4 changes: 2 additions & 2 deletions pkg/azure/access/helpers/vmimage.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ package helpers

import (
"context"
"time"

"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v5"
"github.com/gardener/machine-controller-manager-provider-azure/pkg/azure/access/errors"
Expand All @@ -28,7 +27,8 @@ const vmImageGetServiceLabel = "virtual_machine_image_get"
// GetVMImage fetches the VM Image given a location and image reference.
// NOTE: All calls to this Azure API are instrumented as prometheus metric.
func GetVMImage(ctx context.Context, vmImagesAccess *armcompute.VirtualMachineImagesClient, location string, imageRef armcompute.ImageReference) (vmImage *armcompute.VirtualMachineImage, err error) {
defer instrument.RecordAzAPIMetric(err, vmImageGetServiceLabel, time.Now())
defer instrument.APIMetricRecorderFn(vmImageGetServiceLabel, &err)

resp, err := vmImagesAccess.Get(ctx, location, *imageRef.Publisher, *imageRef.Offer, *imageRef.SKU, *imageRef.Version, nil)
if err != nil {
errors.LogAzAPIError(err, "Failed to get VM Image [Location: %s, ImageRef: %+v]", location, imageRef)
Expand Down
5 changes: 4 additions & 1 deletion pkg/azure/api/validation/validation.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ import (
"strings"

"github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1"
"github.com/gardener/machine-controller-manager/pkg/util/provider/machinecodes/codes"
"github.com/gardener/machine-controller-manager/pkg/util/provider/machinecodes/status"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/validation/field"
Expand All @@ -26,7 +28,8 @@ const providerAzure = "Azure"
// If it is not then it will return an error indicating that this provider implementation cannot fulfill the request.
func ValidateMachineClassProvider(mcc *v1alpha1.MachineClass) error {
if mcc.Provider != providerAzure {
return field.Invalid(field.NewPath("provider"), mcc.Provider, fmt.Sprintf("Request for provider %s cannot be fulfilled. Only %s provider is supported.", mcc.Provider, providerAzure))
err := field.Invalid(field.NewPath("provider"), mcc.Provider, fmt.Sprintf("Request for provider %s cannot be fulfilled. Only %s provider is supported.", mcc.Provider, providerAzure))
return status.Error(codes.InvalidArgument, fmt.Sprintf("error validating provider %v", err))
}
return nil
}
Expand Down
18 changes: 18 additions & 0 deletions pkg/azure/instrument/instrument.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,3 +80,21 @@ func RecordDriverAPIMetric(err error, operation string, invocationTime time.Time
operation,
).Observe(elapsed.Seconds())
}

// APIMetricRecorderFn returns a function that can be used to record a prometheus metric for Azure API calls.
// NOTE: a pointer to an error (which itself is a fat interface pointer) is necessary to enable the callers of this function to enclose this call into a `defer` statement.
func APIMetricRecorderFn(azServiceName string, err *error) func() {
invocationTime := time.Now()
return func() {
RecordAzAPIMetric(*err, azServiceName, invocationTime)
}
}

// DriverAPIMetricRecorderFn returns a function that can be used to record a prometheus metric for driver API calls.
// NOTE: a pointer to an error (which itself is a fat interface pointer) is necessary to enable the callers of this function to enclose this call into a `defer` statement.
func DriverAPIMetricRecorderFn(operation string, err *error) func() {
invocationTime := time.Now()
return func() {
RecordDriverAPIMetric(*err, operation, invocationTime)
}
}
15 changes: 9 additions & 6 deletions pkg/azure/provider/provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ import (
"context"
"fmt"
"strings"
"time"

"github.com/gardener/machine-controller-manager-provider-azure/pkg/azure/instrument"
"github.com/gardener/machine-controller-manager/pkg/util/provider/driver"
Expand Down Expand Up @@ -53,7 +52,7 @@ func NewDefaultDriver(accessFactory access.Factory) driver.Driver {
}

func (d defaultDriver) ListMachines(ctx context.Context, req *driver.ListMachinesRequest) (resp *driver.ListMachinesResponse, err error) {
defer instrument.RecordDriverAPIMetric(err, listMachinesOperationLabel, time.Now())
defer instrument.DriverAPIMetricRecorderFn(listMachinesOperationLabel, &err)
providerSpec, connectConfig, err := helpers.ExtractProviderSpecAndConnectConfig(req.MachineClass, req.Secret)
if err != nil {
return
Expand All @@ -67,7 +66,8 @@ func (d defaultDriver) ListMachines(ctx context.Context, req *driver.ListMachine
}

func (d defaultDriver) CreateMachine(ctx context.Context, req *driver.CreateMachineRequest) (resp *driver.CreateMachineResponse, err error) {
defer instrument.RecordDriverAPIMetric(err, createMachineOperationLabel, time.Now())
defer instrument.DriverAPIMetricRecorderFn(createMachineOperationLabel, &err)

providerSpec, connectConfig, err := helpers.ExtractProviderSpecAndConnectConfig(req.MachineClass, req.Secret)
if err != nil {
return
Expand Down Expand Up @@ -99,7 +99,8 @@ func (d defaultDriver) CreateMachine(ctx context.Context, req *driver.CreateMach
}

func (d defaultDriver) DeleteMachine(ctx context.Context, req *driver.DeleteMachineRequest) (resp *driver.DeleteMachineResponse, err error) {
defer instrument.RecordDriverAPIMetric(err, deleteMachineOperationLabel, time.Now())
defer instrument.DriverAPIMetricRecorderFn(deleteMachineOperationLabel, &err)

providerSpec, connectConfig, err := helpers.ExtractProviderSpecAndConnectConfig(req.MachineClass, req.Secret)
if err != nil {
return
Expand Down Expand Up @@ -164,7 +165,8 @@ func (d defaultDriver) DeleteMachine(ctx context.Context, req *driver.DeleteMach
}

func (d defaultDriver) GetMachineStatus(ctx context.Context, req *driver.GetMachineStatusRequest) (resp *driver.GetMachineStatusResponse, err error) {
defer instrument.RecordDriverAPIMetric(err, getMachineStatusOperationLabel, time.Now())
defer instrument.DriverAPIMetricRecorderFn(getMachineStatusOperationLabel, &err)

providerSpec, connectConfig, err := helpers.ExtractProviderSpecAndConnectConfig(req.MachineClass, req.Secret)
if err != nil {
return nil, err
Expand Down Expand Up @@ -195,7 +197,8 @@ func (d defaultDriver) GetMachineStatus(ctx context.Context, req *driver.GetMach
}

func (d defaultDriver) GetVolumeIDs(_ context.Context, request *driver.GetVolumeIDsRequest) (resp *driver.GetVolumeIDsResponse, err error) {
defer instrument.RecordDriverAPIMetric(err, getVolumeIDsOperationLabel, time.Now())
defer instrument.DriverAPIMetricRecorderFn(getVolumeIDsOperationLabel, &err)

var volumeIDs []string

if request.PVSpecs != nil {
Expand Down
13 changes: 3 additions & 10 deletions pkg/azure/provider/provider_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,7 @@ import (
"github.com/gardener/machine-controller-manager/pkg/util/provider/machinecodes/codes"
"github.com/gardener/machine-controller-manager/pkg/util/provider/machinecodes/status"
. "github.com/onsi/gomega"
. "github.com/onsi/gomega/gstruct"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/util/validation/field"
)

const (
Expand Down Expand Up @@ -495,17 +493,12 @@ func TestDeleteMachineWhenProviderIsNotAzure(t *testing.T) {
Secret: fakes.CreateProviderSecret(),
})
g.Expect(err).ToNot(BeNil())
var validationErr *field.Error
g.Expect(errors.As(err, &validationErr)).Should(BeTrue())
g.Expect(validationErr).To(
PointTo(MatchFields(IgnoreExtras, Fields{
"Type": Equal(field.ErrorTypeInvalid),
"Field": Equal("provider"),
})))
var statusErr *status.Status
g.Expect(errors.As(err, &statusErr)).To(BeTrue())
g.Expect(statusErr.Code()).To(Equal(codes.InvalidArgument))
}

func TestGetMachineStatus(t *testing.T) {

const testErrorCode = "test-error-code"
testInternalServerError := testhelp.InternalServerError(testErrorCode)

Expand Down

0 comments on commit e5cb64a

Please sign in to comment.