diff --git a/cmd/manager/main.go b/cmd/manager/main.go index 1b16a6c0..f4b0f2da 100644 --- a/cmd/manager/main.go +++ b/cmd/manager/main.go @@ -33,6 +33,7 @@ import ( ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/healthz" "sigs.k8s.io/controller-runtime/pkg/log/zap" + "sigs.k8s.io/controller-runtime/pkg/metrics" "github.com/spf13/pflag" @@ -41,6 +42,7 @@ import ( "github.com/operator-framework/catalogd/internal/version" corecontrollers "github.com/operator-framework/catalogd/pkg/controllers/core" "github.com/operator-framework/catalogd/pkg/features" + catalogdmetrics "github.com/operator-framework/catalogd/pkg/metrics" "github.com/operator-framework/catalogd/pkg/profile" "github.com/operator-framework/catalogd/pkg/storage" @@ -124,24 +126,32 @@ func main() { os.Exit(1) } - if err := os.MkdirAll(storageDir, 0700); err != nil { - setupLog.Error(err, "unable to create storage directory for catalogs") - } - localStorage := storage.LocalDir{RootDir: storageDir} - shutdownTimeout := 30 * time.Second - catalogServer := server.Server{ - Kind: "catalogs", - Server: &http.Server{ - Addr: catalogServerAddr, - Handler: localStorage.StorageServerHandler(), - ReadTimeout: 5 * time.Second, - WriteTimeout: 10 * time.Second, - }, - ShutdownTimeout: &shutdownTimeout, - } - if err := mgr.Add(&catalogServer); err != nil { - setupLog.Error(err, "unable to start catalog server") - os.Exit(1) + var localStorage storage.Instance + if features.CatalogdFeatureGate.Enabled(features.HTTPServer) { + metrics.Registry.MustRegister(catalogdmetrics.RequestDurationMetric) + + if err := os.MkdirAll(storageDir, 0700); err != nil { + setupLog.Error(err, "unable to create storage directory for catalogs") + os.Exit(1) + } + + localStorage = storage.LocalDir{RootDir: storageDir} + shutdownTimeout := 30 * time.Second + catalogServer := server.Server{ + Kind: "catalogs", + Server: &http.Server{ + Addr: catalogServerAddr, + Handler: catalogdmetrics.AddMetricsToHandler(localStorage.StorageServerHandler()), + ReadTimeout: 5 * time.Second, + WriteTimeout: 10 * time.Second, + }, + ShutdownTimeout: &shutdownTimeout, + } + + if err := mgr.Add(&catalogServer); err != nil { + setupLog.Error(err, "unable to start catalog server") + os.Exit(1) + } } if err = (&corecontrollers.CatalogReconciler{ diff --git a/go.mod b/go.mod index 97a9969b..a6e9782a 100644 --- a/go.mod +++ b/go.mod @@ -10,6 +10,7 @@ require ( github.com/onsi/ginkgo/v2 v2.9.7 github.com/onsi/gomega v1.27.7 github.com/operator-framework/operator-registry v1.27.1 + github.com/prometheus/client_golang v1.14.0 github.com/spf13/pflag v1.0.5 github.com/stretchr/testify v1.8.1 k8s.io/api v0.26.1 @@ -59,7 +60,6 @@ require ( github.com/operator-framework/api v0.17.4-0.20230223191600-0131a6301e42 // indirect github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect - github.com/prometheus/client_golang v1.14.0 // indirect github.com/prometheus/client_model v0.3.0 // indirect github.com/prometheus/common v0.37.0 // indirect github.com/prometheus/procfs v0.8.0 // indirect diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go new file mode 100644 index 00000000..c30aed58 --- /dev/null +++ b/pkg/metrics/metrics.go @@ -0,0 +1,40 @@ +package metrics + +import ( + "net/http" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promhttp" +) + +const ( + RequestDurationMetricName = "catalogd_http_request_duration_seconds" +) + +// Sets up the necessary metrics for calculating the Apdex Score +// If using Grafana for visualization connected to a Prometheus data +// source that is scraping these metrics, you can create a panel that +// uses the following queries + expressions for calculating the Apdex Score where T = 0.5: +// Query A: sum(catalogd_http_request_duration_seconds_bucket{code!~"5..",le="0.5"}) +// Query B: sum(catalogd_http_request_duration_seconds_bucket{code!~"5..",le="2"}) +// Query C: sum(catalogd_http_request_duration_seconds_count) +// Expression for Apdex Score: ($A + (($B - $A) / 2)) / $C +var ( + RequestDurationMetric = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: RequestDurationMetricName, + Help: "Histogram of request duration in seconds", + // create a bucket for each 100 ms up to 1s and ensure it multiplied by 4 also exists. + // Include a 10s bucket to capture very long running requests. This allows us to easily + // calculate Apdex Scores up to a T of 1 second, but using various mathmatical formulas we + // should be able to estimate Apdex Scores up to a T of 2.5. Having a larger range of buckets + // will allow us to more easily calculate health indicators other than the Apdex Score. + Buckets: []float64{0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1, 1.2, 1.6, 2, 2.4, 2.8, 3.2, 3.6, 4, 10}, + }, + []string{"code"}, + ) +) + +func AddMetricsToHandler(handler http.Handler) http.Handler { + return promhttp.InstrumentHandlerDuration(RequestDurationMetric, handler) +}