Skip to content

Commit

Permalink
some renames
Browse files Browse the repository at this point in the history
Signed-off-by: Alan Clucas <[email protected]>
  • Loading branch information
Joibel committed Jun 24, 2024
1 parent c9e6d25 commit ae9c924
Show file tree
Hide file tree
Showing 9 changed files with 154 additions and 109 deletions.
1 change: 0 additions & 1 deletion .spelling
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,6 @@ parameterizing
params
pprof
pre-commit
prometheus
rc2
repo
roadmap
Expand Down
12 changes: 6 additions & 6 deletions config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -245,10 +245,10 @@ type MySQLConfig struct {
Options map[string]string `json:"options,omitempty"`
}

// MetricOptions are options for an individual named metric to change their behaviour
type MetricOption struct {
// Disable disables the emission of this metric completely
Disable bool `json:"disable,omitempty"`
// MetricModifier are modifiers for an individual named metric to change their behaviour
type MetricModifier struct {
// Disabled disables the emission of this metric completely
Disabled bool `json:"disabled,omitempty"`
// DisabledAttributes lists labels for this metric to remove that attributes to save on cardinality
DisabledAttributes []string `json:"disabledAttributes"`
// HistogramBuckets allow configuring of the buckets used in a histogram
Expand Down Expand Up @@ -280,8 +280,8 @@ type MetricsConfig struct {
IgnoreErrors bool `json:"ignoreErrors,omitempty"`
// Secure is a flag that starts the metrics servers using TLS, defaults to true
Secure *bool `json:"secure,omitempty"`
// Configure metrics by name
Options map[string]MetricOption `json:"options,omitempty"`
// Modifiers configure metrics by name
Modifiers map[string]MetricModifier `json:"modifiers,omitempty"`
// Temporality configures the temporality of the opentelemetry metrics.
// Valid values are Cumulative and Delta, defaulting to cumulative.
// This has no effect on prometheus metrics, which are always cumulative
Expand Down
31 changes: 16 additions & 15 deletions docs/metrics-3.6.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,39 +4,34 @@ Metrics have changed in 3.6.

You can now retrieve metrics using the OpenTelemetry Protocol using the [OpenTelemetry collector](https://opentelemetry.io/docs/collector/), and this is the recommended mechanism.

These notes explain the differences in using the prometheus `/metrics` endpoint to scrape metrics for a minimal effort upgrade. It is not recommended you follow this guide blindly, the new metrics have been introduced because they add value, and so they should be worth collecting and using.

## TLS

The Prometheus `/metrics` endpoint now has TLS enabled by default.

To disable this set `metricsConfig.secure` to `false`.
These notes explain the differences in using the Prometheus `/metrics` endpoint to scrape metrics for a minimal effort upgrade. It is not recommended you follow this guide blindly, the new metrics have been introduced because they add value, and so they should be worth collecting and using.

## New metrics

The following are new metrics:

* `build_info`
* `total_count`
* `pods_total_count`
* `controller_build_info`
* `cronworkflows_triggered_total`
* `workflowtemplate_triggered_total`
* `workflowtemplate_runtime`
* `k8s_request_duration`
* `pods_total_count`
* `pod_pending_count`
* `queue_duration`
* `queue_longest_running`
* `queue_retries`
* `queue_unfinished_work`
* `pod_pending`
* `total_count`
* `workflowtemplate_runtime`
* `workflowtemplate_triggered_total`

and can be disabled with

```yaml
metricsConfig:
options:
metricsConfig: |
modifiers:
build_info:
disable: true
...
```

## Renamed metrics
Expand All @@ -52,6 +47,12 @@ If you are using these metrics in your recording rules, dashboards, or alerts, y

## Custom metrics

Custom metric names and labels must be valid prometheus and OpenTelemetry names now. This prevents the use of `:`, which was usable in earlier versions of workflows
Custom metric names and labels must be valid Prometheus and OpenTelemetry names now. This prevents the use of `:`, which was usable in earlier versions of workflows

Custom metrics, as defined by a workflow, could be defined as one type (say counter) in one workflow, and then as a histogram of the same name in a different workflow. This would work in 3.5 if the first usage of the metric had reached TTL and been deleted. This will no-longer work in 3.6, and custom metrics may not be redefined. It doesn't really make sense to change a metric in this way, and the OpenTelemetry SDK prevents you from doing so.

## TLS

The Prometheus `/metrics` endpoint now has TLS enabled by default.

To disable this set `metricsConfig.secure` to `false`.
176 changes: 110 additions & 66 deletions docs/metrics.md

Large diffs are not rendered by default.

14 changes: 7 additions & 7 deletions workflow/controller/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -1353,12 +1353,12 @@ func (wfc *WorkflowController) getMaxStackDepth() int {

func (wfc *WorkflowController) getMetricsServerConfig() *metrics.Config {
// Metrics config
options := make(map[string]metrics.MetricOption)
for name, option := range wfc.Config.MetricsConfig.Options {
options[name] = metrics.MetricOption{
Disable: option.Disable,
DisabledAttributes: option.DisabledAttributes,
HistogramBuckets: option.HistogramBuckets,
modifiers := make(map[string]metrics.Modifier)
for name, modifier := range wfc.Config.MetricsConfig.Modifiers {
modifiers[name] = metrics.Modifier{
Disabled: modifier.Disabled,
DisabledAttributes: modifier.DisabledAttributes,
HistogramBuckets: modifier.HistogramBuckets,
}
}

Expand All @@ -1370,7 +1370,7 @@ func (wfc *WorkflowController) getMetricsServerConfig() *metrics.Config {
IgnoreErrors: wfc.Config.MetricsConfig.IgnoreErrors,
// Default to true for 3.6
Secure: wfc.Config.MetricsConfig.GetSecure(true),
Options: options,
Modifiers: modifiers,
Temporality: wfc.Config.MetricsConfig.Temporality,
}
return &metricsConfig
Expand Down
2 changes: 1 addition & 1 deletion workflow/metrics/instrument.go
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ func (m *Metrics) createInstrument(instType instrumentType, name, desc, unit str
}

func (m *Metrics) buckets(name string, defaultBuckets []float64) []float64 {
if opts, ok := m.config.Options[name]; ok {
if opts, ok := m.config.Modifiers[name]; ok {
if len(opts.HistogramBuckets) > 0 {
buckets := opts.HistogramBuckets
sort.Float64s(buckets)
Expand Down
4 changes: 2 additions & 2 deletions workflow/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ type Config struct {
TTL time.Duration
IgnoreErrors bool
Secure bool
DefaultOptions MetricOption
Options map[string]MetricOption
DefaultOptions Modifier
Modifiers map[string]Modifier
Temporality wfconfig.MetricsTemporality
}

Expand Down
15 changes: 8 additions & 7 deletions workflow/metrics/options.go → workflow/metrics/modifiers.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,23 @@ import (
metricsdk "go.opentelemetry.io/otel/sdk/metric"
)

type MetricOption struct {
Disable bool
// Modifier holds options to change the behaviour for a single metric
type Modifier struct {
Disabled bool
DisabledAttributes []string
HistogramBuckets []float64
}

// Create an opentelemetry 'view' which disables whole metrics or aggregates across labels
func view(config *Config) metricsdk.Option {
views := make([]metricsdk.View, 0)
for metric, opt := range config.Options {
if opt.Disable {
for metric, modifier := range config.Modifiers {
if modifier.Disabled {
views = append(views, metricsdk.NewView(metricsdk.Instrument{Name: metric},
metricsdk.Stream{Aggregation: metricsdk.AggregationDrop{}}))
} else if len(opt.DisabledAttributes) > 0 {
keys := make([]attribute.Key, len(opt.DisabledAttributes))
for i, key := range opt.DisabledAttributes {
} else if len(modifier.DisabledAttributes) > 0 {
keys := make([]attribute.Key, len(modifier.DisabledAttributes))
for i, key := range modifier.DisabledAttributes {
keys[i] = attribute.Key(key)
}
views = append(views, metricsdk.NewView(metricsdk.Instrument{Name: metric},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@ import (
func TestViewDisable(t *testing.T) {
// Same metric as TestMetrics, but disabled by a view
m, te, err := createTestMetrics(&Config{
Options: map[string]MetricOption{
Modifiers: map[string]Modifier{
nameOperationDuration: {
Disable: true,
Disabled: true,
},
},
})
Expand All @@ -27,7 +27,7 @@ func TestViewDisable(t *testing.T) {
func TestViewDisabledAttributes(t *testing.T) {
// Disable the error cause label
m, te, err := createTestMetrics(&Config{
Options: map[string]MetricOption{
Modifiers: map[string]Modifier{
nameErrorCount: {
DisabledAttributes: []string{labelErrorCause},
},
Expand All @@ -53,7 +53,7 @@ func TestViewHistogramBuckets(t *testing.T) {
// Same metric as TestMetrics, but buckets changed
bounds := []float64{1.0, 3.0, 5.0, 10.0}
m, te, err := createTestMetrics(&Config{
Options: map[string]MetricOption{
Modifiers: map[string]Modifier{
nameOperationDuration: {
HistogramBuckets: bounds,
},
Expand Down

0 comments on commit ae9c924

Please sign in to comment.