diff --git a/pkg/cloud/services/compute/availabilityzone.go b/pkg/cloud/services/compute/availabilityzone.go index 16f8c29813..a7df1dd08f 100644 --- a/pkg/cloud/services/compute/availabilityzone.go +++ b/pkg/cloud/services/compute/availabilityzone.go @@ -20,11 +20,14 @@ import ( "fmt" "github.com/gophercloud/gophercloud/openstack/compute/v2/extensions/availabilityzones" + + "sigs.k8s.io/cluster-api-provider-openstack/pkg/metrics" ) func (s *Service) GetAvailabilityZones() ([]availabilityzones.AvailabilityZone, error) { + mc := metrics.NewMetricPrometheusContext("availability_zone", "list") allPages, err := availabilityzones.List(s.computeClient).AllPages() - if err != nil { + if mc.ObserveRequest(err) != nil { return nil, fmt.Errorf("error getting availability zone list: %v", err) } availabilityZoneList, err := availabilityzones.ExtractAvailabilityZones(allPages) diff --git a/pkg/cloud/services/compute/instance.go b/pkg/cloud/services/compute/instance.go index 2e24d379c2..ab3e275989 100644 --- a/pkg/cloud/services/compute/instance.go +++ b/pkg/cloud/services/compute/instance.go @@ -330,8 +330,9 @@ func applyServerGroupID(opts servers.CreateOptsBuilder, serverGroupID string) se } func (s *Service) getTrunkSupport() (bool, error) { + mc := metrics.NewMetricPrometheusContext("network_extension", "list") allPages, err := netext.List(s.networkClient).AllPages() - if err != nil { + if mc.ObserveRequest(err) != nil { return false, err } @@ -357,8 +358,9 @@ func (s *Service) getSecurityGroups(securityGroupParams []infrav1.SecurityGroupP } listOpts.Name = sg.Name listOpts.ID = sg.UUID + mc := metrics.NewMetricPrometheusContext("security_group", "list") pages, err := groups.List(s.networkClient, listOpts).AllPages() - if err != nil { + if mc.ObserveRequest(err) != nil { return nil, err } @@ -422,11 +424,12 @@ func (s *Service) getServerNetworks(networkParams []infrav1.NetworkParam) ([]inf } func (s *Service) getOrCreatePort(eventObject runtime.Object, clusterName string, portName string, net infrav1.Network, instanceSecurityGroups *[]string) (*ports.Port, error) { + mc := metrics.NewMetricPrometheusContext("port", "list") allPages, err := ports.List(s.networkClient, ports.ListOpts{ Name: portName, NetworkID: net.ID, }).AllPages() - if err != nil { + if mc.ObserveRequest(err) != nil { return nil, fmt.Errorf("searching for existing port for server: %v", err) } existingPorts, err := ports.ExtractPorts(allPages) @@ -491,8 +494,8 @@ func (s *Service) getOrCreatePort(eventObject runtime.Object, clusterName string if len(fixedIPs) > 0 { createOpts.FixedIPs = fixedIPs } - mc := metrics.NewMetricPrometheusContext("port", "create") + mc = metrics.NewMetricPrometheusContext("port", "create") port, err := ports.Create(s.networkClient, portsbinding.CreateOptsExt{ CreateOptsBuilder: createOpts, HostID: portOpts.HostID, @@ -509,11 +512,12 @@ func (s *Service) getOrCreatePort(eventObject runtime.Object, clusterName string } func (s *Service) getOrCreateTrunk(eventObject runtime.Object, clusterName, trunkName, portID string) (*trunks.Trunk, error) { + mc := metrics.NewMetricPrometheusContext("trunk", "list") allPages, err := trunks.List(s.networkClient, trunks.ListOpts{ Name: trunkName, PortID: portID, }).AllPages() - if err != nil { + if mc.ObserveRequest(err) != nil { return nil, fmt.Errorf("searching for existing trunk for server: %v", err) } trunkList, err := trunks.ExtractTrunks(allPages) @@ -530,8 +534,8 @@ func (s *Service) getOrCreateTrunk(eventObject runtime.Object, clusterName, trun PortID: portID, Description: names.GetDescription(clusterName), } - mc := metrics.NewMetricPrometheusContext("trunk", "create") + mc = metrics.NewMetricPrometheusContext("trunk", "create") trunk, err := trunks.Create(s.networkClient, trunkCreateOpts).Extract() if mc.ObserveRequest(err) != nil { record.Warnf(eventObject, "FailedCreateTrunk", "Failed to create trunk %s: %v", trunkName, err) @@ -566,8 +570,9 @@ func (s *Service) getImageID(imageName string) (string, error) { Name: imageName, } + mc := metrics.NewMetricPrometheusContext("image", "list") pages, err := images.List(s.imagesClient, opts).AllPages() - if err != nil { + if mc.ObserveRequest(err) != nil { return "", err } @@ -608,8 +613,9 @@ func (s *Service) DeleteInstance(eventObject runtime.Object, instanceName string return nil } + mc := metrics.NewMetricPrometheusContext("server_os_interface", "list") allInterfaces, err := attachinterfaces.List(s.computeClient, instance.ID).AllPages() - if err != nil { + if mc.ObserveRequest(err) != nil { return err } instanceInterfaces, err := attachinterfaces.ExtractInterfaces(allInterfaces) @@ -699,9 +705,9 @@ func (s *Service) deleteAttachInterface(eventObject runtime.Object, instanceID, return nil } - mc := metrics.NewMetricPrometheusContext("router_interface", "delete") + mc := metrics.NewMetricPrometheusContext("server_os_interface", "delete") err = attachinterfaces.Delete(s.computeClient, instanceID, portID).ExtractErr() - if mc.ObserveRequest(err) != nil { + if mc.ObserveRequestIgnoreNotFound(err) != nil { if capoerrors.IsNotFound(err) { return nil } @@ -725,8 +731,9 @@ func (s *Service) deleteTrunk(eventObject runtime.Object, portID string) error { listOpts := trunks.ListOpts{ PortID: port.ID, } + mc := metrics.NewMetricPrometheusContext("trunk", "list") trunkList, err := trunks.List(s.networkClient, listOpts).AllPages() - if err != nil { + if mc.ObserveRequest(err) != nil { return err } trunkInfo, err := trunks.ExtractTrunks(trunkList) @@ -738,7 +745,8 @@ func (s *Service) deleteTrunk(eventObject runtime.Object, portID string) error { } err = util.PollImmediate(retryIntervalTrunkDelete, timeoutTrunkDelete, func() (bool, error) { - if err := trunks.Delete(s.networkClient, trunkInfo[0].ID).ExtractErr(); err != nil { + mc := metrics.NewMetricPrometheusContext("trunk", "delete") + if err := trunks.Delete(s.networkClient, trunkInfo[0].ID).ExtractErr(); mc.ObserveRequest(err) != nil { if capoerrors.IsRetryable(err) { return false, nil } @@ -759,8 +767,9 @@ func (s *Service) getPort(portID string) (port *ports.Port, err error) { if portID == "" { return nil, fmt.Errorf("portID should be specified to get detail") } + mc := metrics.NewMetricPrometheusContext("port", "get") port, err = ports.Get(s.networkClient, portID).Extract() - if err != nil { + if mc.ObserveRequestIgnoreNotFound(err) != nil { if capoerrors.IsNotFound(err) { return nil, nil } @@ -808,8 +817,9 @@ func (s *Service) GetInstance(resourceID string) (instance *infrav1.Instance, er if resourceID == "" { return nil, fmt.Errorf("resourceId should be specified to get detail") } + mc := metrics.NewMetricPrometheusContext("server", "get") server, err := servers.Get(s.computeClient, resourceID).Extract() - if err != nil { + if mc.ObserveRequestIgnoreNotFound(err) != nil { if capoerrors.IsNotFound(err) { return nil, nil } @@ -835,8 +845,9 @@ func (s *Service) InstanceExists(name string) (instance *infrav1.Instance, err e listOpts = servers.ListOpts{} } + mc := metrics.NewMetricPrometheusContext("server", "list") allPages, err := servers.List(s.computeClient, listOpts).AllPages() - if err != nil { + if mc.ObserveRequest(err) != nil { return nil, fmt.Errorf("get server list: %v", err) } serverList, err := servers.ExtractServers(allPages) diff --git a/pkg/cloud/services/loadbalancer/loadbalancer.go b/pkg/cloud/services/loadbalancer/loadbalancer.go index 8e0d3ec7a7..1582631d7e 100644 --- a/pkg/cloud/services/loadbalancer/loadbalancer.go +++ b/pkg/cloud/services/loadbalancer/loadbalancer.go @@ -422,8 +422,9 @@ func getLoadBalancerName(clusterName string) string { } func (s *Service) checkIfLbExists(name string) (*loadbalancers.LoadBalancer, error) { + mc := metrics.NewMetricPrometheusContext("loadbalancer", "list") allPages, err := loadbalancers.List(s.loadbalancerClient, loadbalancers.ListOpts{Name: name}).AllPages() - if err != nil { + if mc.ObserveRequest(err) != nil { return nil, err } lbList, err := loadbalancers.ExtractLoadBalancers(allPages) @@ -437,8 +438,9 @@ func (s *Service) checkIfLbExists(name string) (*loadbalancers.LoadBalancer, err } func (s *Service) checkIfListenerExists(name string) (*listeners.Listener, error) { + mc := metrics.NewMetricPrometheusContext("loadbalancer_listener", "list") allPages, err := listeners.List(s.loadbalancerClient, listeners.ListOpts{Name: name}).AllPages() - if err != nil { + if mc.ObserveRequest(err) != nil { return nil, err } listenerList, err := listeners.ExtractListeners(allPages) @@ -452,8 +454,9 @@ func (s *Service) checkIfListenerExists(name string) (*listeners.Listener, error } func (s *Service) checkIfPoolExists(name string) (*pools.Pool, error) { + mc := metrics.NewMetricPrometheusContext("loadbalancer_pool", "list") allPages, err := pools.List(s.loadbalancerClient, pools.ListOpts{Name: name}).AllPages() - if err != nil { + if mc.ObserveRequest(err) != nil { return nil, err } poolList, err := pools.ExtractPools(allPages) @@ -467,8 +470,9 @@ func (s *Service) checkIfPoolExists(name string) (*pools.Pool, error) { } func (s *Service) checkIfMonitorExists(name string) (*monitors.Monitor, error) { + mc := metrics.NewMetricPrometheusContext("loadbalancer_healthmonitor", "list") allPages, err := monitors.List(s.loadbalancerClient, monitors.ListOpts{Name: name}).AllPages() - if err != nil { + if mc.ObserveRequest(err) != nil { return nil, err } monitorList, err := monitors.ExtractMonitors(allPages) @@ -482,8 +486,9 @@ func (s *Service) checkIfMonitorExists(name string) (*monitors.Monitor, error) { } func (s *Service) checkIfLbMemberExists(poolID, name string) (*pools.Member, error) { + mc := metrics.NewMetricPrometheusContext("loadbalancer_pool", "list") allPages, err := pools.ListMembers(s.loadbalancerClient, poolID, pools.ListMembersOpts{Name: name}).AllPages() - if err != nil { + if mc.ObserveRequest(err) != nil { return nil, err } lbMemberList, err := pools.ExtractMembers(allPages) @@ -507,8 +512,9 @@ var backoff = wait.Backoff{ func (s *Service) waitForLoadBalancerActive(id string) error { s.logger.Info("Waiting for load balancer", "id", id, "targetStatus", "ACTIVE") return wait.ExponentialBackoff(backoff, func() (bool, error) { + mc := metrics.NewMetricPrometheusContext("loadbalancer", "get") lb, err := loadbalancers.Get(s.loadbalancerClient, id).Extract() - if err != nil { + if mc.ObserveRequest(err) != nil { return false, err } return lb.ProvisioningStatus == "ACTIVE", nil @@ -518,8 +524,9 @@ func (s *Service) waitForLoadBalancerActive(id string) error { func (s *Service) waitForListener(id, target string) error { s.logger.Info("Waiting for load balancer listener", "id", id, "targetStatus", target) return wait.ExponentialBackoff(backoff, func() (bool, error) { + mc := metrics.NewMetricPrometheusContext("loadbalancer_listener", "get") _, err := listeners.Get(s.loadbalancerClient, id).Extract() - if err != nil { + if mc.ObserveRequest(err) != nil { return false, err } // The listener resource has no Status attribute, so a successful Get is the best we can do diff --git a/pkg/cloud/services/networking/floatingip.go b/pkg/cloud/services/networking/floatingip.go index fad4877fb5..6a1147d3b4 100644 --- a/pkg/cloud/services/networking/floatingip.go +++ b/pkg/cloud/services/networking/floatingip.go @@ -71,8 +71,9 @@ func (s *Service) GetOrCreateFloatingIP(openStackCluster *infrav1.OpenStackClust } func (s *Service) checkIfFloatingIPExists(ip string) (*floatingips.FloatingIP, error) { + mc := metrics.NewMetricPrometheusContext("floating_ip", "list") allPages, err := floatingips.List(s.client, floatingips.ListOpts{FloatingIP: ip}).AllPages() - if err != nil { + if mc.ObserveRequest(err) != nil { return nil, err } fpList, err := floatingips.ExtractFloatingIPs(allPages) @@ -86,8 +87,9 @@ func (s *Service) checkIfFloatingIPExists(ip string) (*floatingips.FloatingIP, e } func (s *Service) GetFloatingIPByPortID(portID string) (*floatingips.FloatingIP, error) { + mc := metrics.NewMetricPrometheusContext("floating_ip", "list") allPages, err := floatingips.List(s.client, floatingips.ListOpts{PortID: portID}).AllPages() - if err != nil { + if mc.ObserveRequest(err) != nil { return nil, err } fpList, err := floatingips.ExtractFloatingIPs(allPages) @@ -186,8 +188,9 @@ func (s *Service) DisassociateFloatingIP(openStackCluster *infrav1.OpenStackClus func (s *Service) waitForFloatingIP(id, target string) error { s.logger.Info("Waiting for floating IP", "id", id, "targetStatus", target) return wait.ExponentialBackoff(backoff, func() (bool, error) { + mc := metrics.NewMetricPrometheusContext("floating_ip", "get") fip, err := floatingips.Get(s.client, id).Extract() - if err != nil { + if mc.ObserveRequest(err) != nil { return false, err } return fip.Status == target, nil diff --git a/pkg/cloud/services/networking/network.go b/pkg/cloud/services/networking/network.go index aec4038f09..97507e8ae8 100644 --- a/pkg/cloud/services/networking/network.go +++ b/pkg/cloud/services/networking/network.go @@ -66,8 +66,9 @@ func (s *Service) ReconcileExternalNetwork(openStackCluster *infrav1.OpenStackCl External: &iTrue, } + mc := metrics.NewMetricPrometheusContext("network", "list") allPages, err := networks.List(s.client, listOpts).AllPages() - if err != nil { + if mc.ObserveRequest(err) != nil { return err } networkList, err := networks.ExtractNetworks(allPages) @@ -180,11 +181,12 @@ func (s *Service) ReconcileSubnet(openStackCluster *infrav1.OpenStackCluster, cl subnetName := getSubnetName(clusterName) s.logger.Info("Reconciling subnet", "name", subnetName) + mc := metrics.NewMetricPrometheusContext("subnet", "list") allPages, err := subnets.List(s.client, subnets.ListOpts{ NetworkID: openStackCluster.Status.Network.ID, CIDR: openStackCluster.Spec.NodeCIDR, }).AllPages() - if err != nil { + if mc.ObserveRequest(err) != nil { return err } @@ -255,8 +257,9 @@ func (s *Service) getNetworkByID(networkID string) (networks.Network, error) { ID: networkID, } + mc := metrics.NewMetricPrometheusContext("network", "list") allPages, err := networks.List(s.client, opts).AllPages() - if err != nil { + if mc.ObserveRequest(err) != nil { return networks.Network{}, err } @@ -279,8 +282,9 @@ func (s *Service) getNetworkByName(networkName string) (networks.Network, error) Name: networkName, } + mc := metrics.NewMetricPrometheusContext("network", "list") allPages, err := networks.List(s.client, opts).AllPages() - if err != nil { + if mc.ObserveRequest(err) != nil { return networks.Network{}, err } @@ -303,11 +307,12 @@ func (s *Service) GetNetworksByFilter(opts networks.ListOptsBuilder) ([]networks if opts == nil { return nil, fmt.Errorf("no Filters were passed") } + mc := metrics.NewMetricPrometheusContext("network", "list") pager := networks.List(s.client, opts) var nets []networks.Network err := pager.EachPage(func(page pagination.Page) (bool, error) { networkList, err := networks.ExtractNetworks(page) - if err != nil { + if mc.ObserveRequest(err) != nil { return false, err } else if len(networkList) == 0 { return false, fmt.Errorf("no networks could be found with the filters provided") @@ -339,11 +344,12 @@ func (s *Service) GetSubnetsByFilter(opts subnets.ListOptsBuilder) ([]subnets.Su if opts == nil { return []subnets.Subnet{}, fmt.Errorf("no Filters were passed") } + mc := metrics.NewMetricPrometheusContext("subnet", "list") pager := subnets.List(s.client, opts) var snets []subnets.Subnet err := pager.EachPage(func(page pagination.Page) (bool, error) { subnetList, err := subnets.ExtractSubnets(page) - if err != nil { + if mc.ObserveRequest(err) != nil { return false, err } else if len(subnetList) == 0 { return false, fmt.Errorf("no subnets could be found with the filters provided") diff --git a/pkg/cloud/services/networking/router.go b/pkg/cloud/services/networking/router.go index 8e65abdc2a..ae0b094593 100644 --- a/pkg/cloud/services/networking/router.go +++ b/pkg/cloud/services/networking/router.go @@ -48,10 +48,11 @@ func (s *Service) ReconcileRouter(openStackCluster *infrav1.OpenStackCluster, cl routerName := getRouterName(clusterName) s.logger.Info("Reconciling router", "name", routerName) + mc := metrics.NewMetricPrometheusContext("router", "list") allPages, err := routers.List(s.client, routers.ListOpts{ Name: routerName, }).AllPages() - if err != nil { + if mc.ObserveRequest(err) != nil { return err } @@ -109,7 +110,7 @@ INTERFACE_LOOP: // ... and create a router interface for our subnet. if createInterface { s.logger.V(4).Info("Creating RouterInterface", "routerID", router.ID, "subnetID", openStackCluster.Status.Network.Subnet.ID) - mc := metrics.NewMetricPrometheusContext("router_interface", "create") + mc := metrics.NewMetricPrometheusContext("server_os_interface", "create") routerInterface, err := routers.AddInterface(s.client, router.ID, routers.AddInterfaceOpts{ SubnetID: openStackCluster.Status.Network.Subnet.ID, }).Extract() @@ -206,11 +207,11 @@ func (s *Service) DeleteRouter(openStackCluster *infrav1.OpenStackCluster, clust } if subnet.ID != "" { - mc := metrics.NewMetricPrometheusContext("router_interface", "delete") + mc := metrics.NewMetricPrometheusContext("server_os_interface", "delete") _, err = routers.RemoveInterface(s.client, router.ID, routers.RemoveInterfaceOpts{ SubnetID: subnet.ID, }).Extract() - if mc.ObserveRequest(err) != nil { + if mc.ObserveRequestIgnoreNotFound(err) != nil { if !capoerrors.IsNotFound(err) { return fmt.Errorf("unable to remove router interface: %v", err) } @@ -232,10 +233,11 @@ func (s *Service) DeleteRouter(openStackCluster *infrav1.OpenStackCluster, clust } func (s *Service) getRouterInterfaces(routerID string) ([]ports.Port, error) { + mc := metrics.NewMetricPrometheusContext("port", "list") allPages, err := ports.List(s.client, ports.ListOpts{ DeviceID: routerID, }).AllPages() - if err != nil { + if mc.ObserveRequest(err) != nil { return []ports.Port{}, err } @@ -264,10 +266,11 @@ func (s *Service) getRouter(clusterName string) (routers.Router, subnets.Subnet, } func (s *Service) getRouterByName(routerName string) (routers.Router, error) { + mc := metrics.NewMetricPrometheusContext("router", "list") allPages, err := routers.List(s.client, routers.ListOpts{ Name: routerName, }).AllPages() - if err != nil { + if mc.ObserveRequest(err) != nil { return routers.Router{}, err } @@ -290,8 +293,9 @@ func (s *Service) getSubnetByName(subnetName string) (subnets.Subnet, error) { Name: subnetName, } + mc := metrics.NewMetricPrometheusContext("subnet", "list") allPages, err := subnets.List(s.client, opts).AllPages() - if err != nil { + if mc.ObserveRequest(err) != nil { return subnets.Subnet{}, err } diff --git a/pkg/cloud/services/networking/securitygroups.go b/pkg/cloud/services/networking/securitygroups.go index ff2d834290..a53639457d 100644 --- a/pkg/cloud/services/networking/securitygroups.go +++ b/pkg/cloud/services/networking/securitygroups.go @@ -481,8 +481,9 @@ func (s *Service) getSecurityGroupByName(name string) (*infrav1.SecurityGroup, e } s.logger.V(6).Info("Attempting to fetch security group with", "name", name) + mc := metrics.NewMetricPrometheusContext("group", "list") allPages, err := groups.List(s.client, opts).AllPages() - if err != nil { + if mc.ObserveRequest(err) != nil { return &infrav1.SecurityGroup{}, err } diff --git a/pkg/cloud/services/provider/provider.go b/pkg/cloud/services/provider/provider.go index dcf20cd3a7..beacf6b779 100644 --- a/pkg/cloud/services/provider/provider.go +++ b/pkg/cloud/services/provider/provider.go @@ -34,6 +34,7 @@ import ( "sigs.k8s.io/yaml" infrav1 "sigs.k8s.io/cluster-api-provider-openstack/api/v1alpha4" + "sigs.k8s.io/cluster-api-provider-openstack/pkg/metrics" ) const ( @@ -184,8 +185,9 @@ func GetProjectID(client *gophercloud.ProviderClient, name string) (string, erro } jsonResp := projects{} + mc := metrics.NewMetricPrometheusContext("project", "get") resp, err := c.Get(c.ServiceURL("auth", "projects"), &jsonResp, &gophercloud.RequestOpts{OkCodes: []int{200}}) - if err != nil { + if mc.ObserveRequest(err) != nil { return "", err } defer resp.Body.Close() diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go index 7882b99e59..0f0d9d0167 100644 --- a/pkg/metrics/metrics.go +++ b/pkg/metrics/metrics.go @@ -22,6 +22,8 @@ import ( "github.com/prometheus/client_golang/prometheus" "sigs.k8s.io/controller-runtime/pkg/metrics" + + capoerrors "sigs.k8s.io/cluster-api-provider-openstack/pkg/utils/errors" ) type OpenstackPrometheusMetrics struct { @@ -50,6 +52,15 @@ func (mc *MetricPrometheusContext) ObserveRequest(err error) error { return mc.Observe(apiRequestPrometheusMetrics, err) } +// ObserveRequestIgnoreNotFound records the request latency and counts the errors if it's not IsNotFound. +func (mc *MetricPrometheusContext) ObserveRequestIgnoreNotFound(err error) error { + if capoerrors.IsNotFound(err) { + _ = mc.ObserveRequest(nil) + return err + } + return mc.ObserveRequest(err) +} + // Observe records the request latency and counts the errors. func (mc *MetricPrometheusContext) Observe(om *OpenstackPrometheusMetrics, err error) error { if om == nil {